diff --git a/.dev_scripts/build_docs.sh b/.dev_scripts/build_docs.sh new file mode 100644 index 00000000..43378168 --- /dev/null +++ b/.dev_scripts/build_docs.sh @@ -0,0 +1,7 @@ +cd docs +rm -rf build + +# update api rst +#rm -rf source/api/ +#sphinx-apidoc --module-first -o source/api/ ../modelscope/ +make html diff --git a/.dev_scripts/ci_container_test.sh b/.dev_scripts/ci_container_test.sh new file mode 100644 index 00000000..dd868b5b --- /dev/null +++ b/.dev_scripts/ci_container_test.sh @@ -0,0 +1,40 @@ +if [ "$MODELSCOPE_SDK_DEBUG" == "True" ]; then + # pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple + git config --global --add safe.directory /twinkle + git config --global user.email tmp + git config --global user.name tmp.com + + # linter test + # use internal project for pre-commit due to the network problem + if [ `git remote -v | grep alibaba | wc -l` -gt 1 ]; then + pre-commit run -c .pre-commit-config_local.yaml --all-files + if [ $? -ne 0 ]; then + echo "linter test failed, please run 'pre-commit run --all-files' to check" + echo "From the repository folder" + echo "Run 'pre-commit install' install pre-commit hooks." + echo "Finally run linter with command: 'pre-commit run --all-files' to check." + echo "Ensure there is no failure!!!!!!!!" + exit -1 + fi + fi + + pip install decord einops -U -i https://mirrors.aliyun.com/pypi/simple/ + pip uninstall autoawq -y + pip uninstall lmdeploy -y + pip uninstall tensorflow -y + pip install optimum + + # test with install + pip install . +else + echo "Running case in release image, run case directly!" +fi +# remove torch_extensions folder to avoid ci hang. +rm -rf ~/.cache/torch_extensions +if [ $# -eq 0 ]; then + ci_command="pytest tests" +else + ci_command="$@" +fi +echo "Running case with command: $ci_command" +$ci_command diff --git a/.dev_scripts/dockerci.sh b/.dev_scripts/dockerci.sh new file mode 100644 index 00000000..3e41846c --- /dev/null +++ b/.dev_scripts/dockerci.sh @@ -0,0 +1,96 @@ +#!/bin/bash +MODELSCOPE_CACHE_DIR_IN_CONTAINER=/modelscope_cache +CODE_DIR=$PWD +CODE_DIR_IN_CONTAINER=/twinkle +mkdir -p ~/.cache +MODELSCOPE_CACHE=~/.cache +IMAGE_NAME=modelscope-registry.us-west-1.cr.aliyuncs.com/modelscope-repo/modelscope +IMAGE_VERSION=ci_image +MODELSCOPE_HOME_CACHE=~/.cache +CI_TEST=True +MODELSCOPE_SDK_DEBUG=True +CI_COMMAND='bash .dev_scripts/ci_container_test.sh pytest tests' +MODELSCOPE_SDK_DEBUG=True +echo "$USER" +gpus='0,1 2,3' +cpu_sets='0-15 16-31' +cpu_sets_arr=($cpu_sets) +is_get_file_lock=false +echo "ci command: $CI_COMMAND" +PR_CHANGED_FILES="${PR_CHANGED_FILES:-}" +echo "PR modified files: $PR_CHANGED_FILES" +PR_CHANGED_FILES=${PR_CHANGED_FILES//[ ]/#} +echo "PR_CHANGED_FILES: $PR_CHANGED_FILES" +idx=0 +for gpu in $gpus +do + exec {lock_fd}>"/tmp/gpu$gpu" || exit 1 + flock -n "$lock_fd" || { echo "WARN: gpu $gpu is in use!" >&2; idx=$((idx+1)); continue; } + echo "get gpu lock $gpu" + + CONTAINER_NAME="twinkle-ci-$idx" + let is_get_file_lock=true + + # pull image if there are update + docker pull ${IMAGE_NAME}:${IMAGE_VERSION} + if [ "$MODELSCOPE_SDK_DEBUG" == "True" ]; then + echo 'debugging' + docker run --rm --name $CONTAINER_NAME --shm-size=16gb \ + --cpuset-cpus=${cpu_sets_arr[$idx]} \ + --gpus='"'"device=$gpu"'"' \ + -v $CODE_DIR:$CODE_DIR_IN_CONTAINER \ + -v $MODELSCOPE_CACHE:$MODELSCOPE_CACHE_DIR_IN_CONTAINER \ + -v $MODELSCOPE_HOME_CACHE/$idx:/root \ + -v /home/admin/pre-commit:/home/admin/pre-commit \ + -e CI_TEST=True \ + -e TEST_LEVEL=$TEST_LEVEL \ + -e MODELSCOPE_CACHE=$MODELSCOPE_CACHE_DIR_IN_CONTAINER \ + -e MODELSCOPE_DOMAIN=$MODELSCOPE_DOMAIN \ + -e MODELSCOPE_SDK_DEBUG=True \ + -e HUB_DATASET_ENDPOINT=$HUB_DATASET_ENDPOINT \ + -e TEST_ACCESS_TOKEN_CITEST=$TEST_ACCESS_TOKEN_CITEST \ + -e TEST_ACCESS_TOKEN_SDKDEV=$TEST_ACCESS_TOKEN_SDKDEV \ + -e TEST_LEVEL=$TEST_LEVEL \ + -e MODELSCOPE_ENVIRONMENT='ci' \ + -e TEST_UPLOAD_MS_TOKEN=$TEST_UPLOAD_MS_TOKEN \ + -e MODEL_TAG_URL=$MODEL_TAG_URL \ + -e MODELSCOPE_API_TOKEN=$MODELSCOPE_API_TOKEN \ + -e PR_CHANGED_FILES=$PR_CHANGED_FILES \ + --workdir=$CODE_DIR_IN_CONTAINER \ + ${IMAGE_NAME}:${IMAGE_VERSION} \ + $CI_COMMAND + else + docker run --rm --name $CONTAINER_NAME --shm-size=16gb \ + --cpuset-cpus=${cpu_sets_arr[$idx]} \ + --gpus='"'"device=$gpu"'"' \ + -v $CODE_DIR:$CODE_DIR_IN_CONTAINER \ + -v $MODELSCOPE_CACHE:$MODELSCOPE_CACHE_DIR_IN_CONTAINER \ + -v $MODELSCOPE_HOME_CACHE/$idx:/root \ + -v /home/admin/pre-commit:/home/admin/pre-commit \ + -e CI_TEST=True \ + -e TEST_LEVEL=$TEST_LEVEL \ + -e MODELSCOPE_CACHE=$MODELSCOPE_CACHE_DIR_IN_CONTAINER \ + -e MODELSCOPE_DOMAIN=$MODELSCOPE_DOMAIN \ + -e HUB_DATASET_ENDPOINT=$HUB_DATASET_ENDPOINT \ + -e TEST_ACCESS_TOKEN_CITEST=$TEST_ACCESS_TOKEN_CITEST \ + -e TEST_ACCESS_TOKEN_SDKDEV=$TEST_ACCESS_TOKEN_SDKDEV \ + -e TEST_LEVEL=$TEST_LEVEL \ + -e MODELSCOPE_ENVIRONMENT='ci' \ + -e TEST_UPLOAD_MS_TOKEN=$TEST_UPLOAD_MS_TOKEN \ + -e MODEL_TAG_URL=$MODEL_TAG_URL \ + -e MODELSCOPE_API_TOKEN=$MODELSCOPE_API_TOKEN \ + -e PR_CHANGED_FILES=$PR_CHANGED_FILES \ + --workdir=$CODE_DIR_IN_CONTAINER \ + ${IMAGE_NAME}:${IMAGE_VERSION} \ + $CI_COMMAND + fi + if [ $? -ne 0 ]; then + echo "Running test case failed, please check the log!" + exit -1 + fi + break +done +if [ "$is_get_file_lock" = false ] ; then + echo 'No free GPU!' + exit 1 +fi diff --git a/.dev_scripts/dockerci_npu.sh b/.dev_scripts/dockerci_npu.sh new file mode 100644 index 00000000..e0f9d253 --- /dev/null +++ b/.dev_scripts/dockerci_npu.sh @@ -0,0 +1,57 @@ +#!/bin/bash +MODELSCOPE_CACHE_DIR=/modelscope_cache +CODE_DIR=$PWD +MODELSCOPE_SDK_DEBUG=True +echo "$USER" +gpus='0,1 2,3' +is_get_file_lock=false +CI_COMMAND=${CI_COMMAND:-bash .dev_scripts/ci_container_test.sh pytest tests} +echo "ci command: $CI_COMMAND" +PR_CHANGED_FILES="${PR_CHANGED_FILES:-}" +echo "PR modified files: $PR_CHANGED_FILES" +PR_CHANGED_FILES=${PR_CHANGED_FILES//[ ]/#} +echo "PR_CHANGED_FILES: $PR_CHANGED_FILES" +idx=0 +for gpu in $gpus +do + exec {lock_fd}>"/tmp/gpu$gpu" || exit 1 + flock -n "$lock_fd" || { echo "WARN: gpu $gpu is in use!" >&2; idx=$((idx+1)); continue; } + echo "get gpu lock $gpu" + + let is_get_file_lock=true + + # 设置环境变量 + export CI_TEST=True + export TEST_LEVEL=$TEST_LEVEL + export MODELSCOPE_CACHE=${MODELSCOPE_CACHE:-$MODELSCOPE_CACHE_DIR} + export MODELSCOPE_DOMAIN=$MODELSCOPE_DOMAIN + export HUB_DATASET_ENDPOINT=$HUB_DATASET_ENDPOINT + export TEST_ACCESS_TOKEN_CITEST=$TEST_ACCESS_TOKEN_CITEST + export TEST_ACCESS_TOKEN_SDKDEV=$TEST_ACCESS_TOKEN_SDKDEV + export MODELSCOPE_ENVIRONMENT='ci' + export TEST_UPLOAD_MS_TOKEN=$TEST_UPLOAD_MS_TOKEN + export MODEL_TAG_URL=$MODEL_TAG_URL + export MODELSCOPE_API_TOKEN=$MODELSCOPE_API_TOKEN + export PR_CHANGED_FILES=$PR_CHANGED_FILES + export CUDA_VISIBLE_DEVICES=$gpu + + if [ "$MODELSCOPE_SDK_DEBUG" == "True" ]; then + export MODELSCOPE_SDK_DEBUG=True + echo 'debugging' + fi + + # 切换到代码目录并执行命令 + cd $CODE_DIR + eval $CI_COMMAND + + if [ $? -ne 0 ]; then + echo "Running test case failed, please check the log!" + exit -1 + fi + break +done + +if [ "$is_get_file_lock" = false ] ; then + echo 'No free GPU!' + exit 1 +fi diff --git a/.github/ISSUE_TEMPLATE/1-bug-report.yml b/.github/ISSUE_TEMPLATE/1-bug-report.yml new file mode 100644 index 00000000..9999b446 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/1-bug-report.yml @@ -0,0 +1,49 @@ +name: "🐛 Bug Report" +description: Create a bug report to help us improve twinkle +labels: ["bug"] + +body: + - type: markdown + attributes: + value: | + Thank you for supporting twinkle and taking the time to submit this issue. + 感谢你对 twinkle 的支持和抽出时间提交相关 issue。 + + - type: checkboxes + id: checklist + attributes: + label: Checklist / 检查清单 + options: + - label: I have searched existing issues, and this is a new bug report. / 我已经搜索过现有的 issues,确认这是一个新的 bug report。 + required: true + + + - type: textarea + id: bug-description + validations: + required: true + attributes: + label: Bug Description / Bug 描述 + description: | + Please describe the issue you encountered. It's better to include error screenshots or stack trace information. + 请详细描述你遇到的问题,最好包含报错截图或报错栈信息。 + + + - type: textarea + id: reproduction-steps + validations: + required: true + attributes: + label: How to Reproduce / 如何复现 + description: | + Please provide steps to reproduce the issue, including twinkle version, runtime environment, and detailed reproduction steps. + 请提供复现问题的步骤,包括 twinkle 的版本、运行环境、详细的复现步骤等。 + + + - type: textarea + id: additional-information + attributes: + label: Additional Information / 补充信息 + description: | + Please provide any additional information here. + 在这里补充其他相关信息。 diff --git a/.github/ISSUE_TEMPLATE/2-feature-request.yml b/.github/ISSUE_TEMPLATE/2-feature-request.yml new file mode 100644 index 00000000..57633400 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/2-feature-request.yml @@ -0,0 +1,37 @@ +name: "🚀 Feature Request" +description: Submit a request for a new feature +labels: ["enhancement"] + +body: + - type: markdown + attributes: + value: | + Thank you for supporting twinkle and taking the time to submit this issue. + 感谢你对 twinkle 的支持和抽出时间提交相关 issue。 + + - type: checkboxes + id: checklist + attributes: + label: Checklist / 检查清单 + options: + - label: I have searched existing issues, and this is a new feature request. / 我已经搜索过现有的 issues,确认这是一个新的 Feature Request。 + required: true + + - type: textarea + id: feature-request-description + validations: + required: true + attributes: + label: Feature Request Description / Feature Request 描述 + description: | + Please provide a detailed description of the new feature you would like to see added. + 请详细描述您希望添加的新功能特性。 + + + - type: textarea + id: pull-request + attributes: + label: Pull Request / Pull Request 信息 + description: | + Have you already submitted or plan to submit a Pull Request? Please share your plans. + 你是否已经提交或即将提交 Pull Request?请说明你的计划。 diff --git a/.github/ISSUE_TEMPLATE/3-question-discussion.yml b/.github/ISSUE_TEMPLATE/3-question-discussion.yml new file mode 100644 index 00000000..cc8ba339 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/3-question-discussion.yml @@ -0,0 +1,28 @@ +name: "🤔 Question & Discussion" +description: Create an issue for questions and discussions +labels: ["question"] + +body: + - type: markdown + attributes: + value: | + Thank you for supporting twinkle and taking the time to submit this issue. + 感谢你对 twinkle 的支持和抽出时间提交相关 issue。 + + - type: checkboxes + id: checklist + attributes: + label: Checklist / 检查清单 + options: + - label: I have searched existing issues, and this is a new question or discussion topic. / 我已经搜索过现有的 issues,确认这是一个新的问题与讨论。 + required: true + + - type: textarea + id: question-description + validations: + required: true + attributes: + label: Question Description / 问题描述 + description: | + Please describe the question or topic you would like to discuss. + 请描述你想要讨论的问题或话题。 diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 00000000..3ba13e0c --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1 @@ +blank_issues_enabled: false diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 00000000..a09bfad1 --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,13 @@ +# PR type +- [ ] Bug Fix +- [ ] New Feature +- [ ] Document Updates +- [ ] More Models or Datasets Support + +# PR information + +Write the detail information belongs to this PR. + +## Experiment results + +Paste your experiment result here(if needed). diff --git a/.github/SECURITY.md b/.github/SECURITY.md new file mode 100644 index 00000000..d549cbed --- /dev/null +++ b/.github/SECURITY.md @@ -0,0 +1,3 @@ +# Reporting Security Issues + +Usually security issues of a deep learning project come from non-standard 3rd packages or continuous running services. If you are suffering from security issues from our project, please consider reporting to us. We appreciate your efforts to responsibly disclose your findings, and will make every effort to acknowledge your contributions. diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md new file mode 100644 index 00000000..bf104522 --- /dev/null +++ b/.github/copilot-instructions.md @@ -0,0 +1,63 @@ +# Twinkle AI Coding Agent Guidelines + +These instructions help AI agents work productively in this repo. Focus on concrete repo patterns and workflows. + +## Big Picture +- **Goal:** Training and serving LLMs with multi-adapter LoRA, efficient data handling, and distributed execution across Ray and Torch. +- **Core Modules:** + - Infrastructure & distributed orchestration: [src/twinkle/infra/__init__.py](src/twinkle/infra/__init__.py) + - Device layout & platform abstraction: [src/twinkle/utils/platform.py](src/twinkle/utils/platform.py), [src/twinkle/utils/framework.py](src/twinkle/utils/framework.py) + - Model stack (Transformers + Multi-LoRA): [src/twinkle/model/multi_lora_transformers.py](src/twinkle/model/multi_lora_transformers.py) + - Sampler (vLLM integration): [src/twinkle/sampler/vllm_sampler.py](src/twinkle/sampler/vllm_sampler.py) + - Losses & metrics: [src/twinkle/loss](src/twinkle/loss), [src/twinkle/metric](src/twinkle/metric) + - Templates & preprocessing: [src/twinkle/template](src/twinkle/template), [src/twinkle/preprocessor](src/twinkle/preprocessor) + - Model/Processor HTTP services via Ray Serve: [src/twinkle/server/twinkle](src/twinkle/server/twinkle) + - Hub integrations (ModelScope/HF): [src/twinkle/hub/hub.py](src/twinkle/hub/hub.py) + +## Architecture & Patterns +- **Lazy import surface:** [src/twinkle/__init__.py](src/twinkle/__init__.py) exposes a small, lazy API (`_LazyModule`), import public symbols from here when possible. +- **Distributed mode selection:** `twinkle.infra.initialize()` toggles between local and Ray modes. Ray mode requires `TWINKLE_MODE=ray` or `initialize(mode='ray', ...)`. +- **Remote execution decorators:** + - `remote_class()` wraps classes for Ray placement; auto-injects `DeviceMesh` if missing. + - `remote_function(dispatch='slice', execute='all', collect='none')` patches methods for distributed dispatch/collect. + - See usage in [src/twinkle/model/multi_lora_transformers.py](src/twinkle/model/multi_lora_transformers.py) and [src/twinkle/sampler/vllm_sampler.py](src/twinkle/sampler/vllm_sampler.py). +- **Device topology:** Represented by `DeviceMesh`/`DeviceGroup`. Visualize with `twinkle.infra.get_device_placement()`; examples in [tests/infra/test_infra_graph.py](tests/infra/test_infra_graph.py). +- **Platform abstractions:** `GPU`/`NPU` selection via env and device discovery. Rank/world size read from env (`RANK`, `WORLD_SIZE`, etc.). See [src/twinkle/utils/platform.py](src/twinkle/utils/platform.py). +- **Hub usage:** `HubOperation` routes to HF or ModelScope by `hf://` or `ms://` prefixes. Dataset/model download/push helpers in [src/twinkle/hub/hub.py](src/twinkle/hub/hub.py). +- **Plugin loading:** Use `Plugin.load_plugin(id, Base)` for remote code from hubs; guarded by `trust_remote_code()` to prevent unsafe execution. See [src/twinkle/utils/plugin.py](src/twinkle/utils/plugin.py). +- **Multi-LoRA conventions:** + - `MultiLoraTransformersModel` wraps a base Transformers model via `MultiAdapter` to manage multiple LoRA adapters. + - FSDP is unsupported for Multi-LoRA (`fsdp_world_size == 1` enforced). Adapter params are strictly controlled to avoid training base weights. + - Adapter ops are routed through remote functions and grouped by DP process groups. + +## Developer Workflows +- **Install:** Python 3.11+. Install with Poetry or pip. + - Poetry: `poetry install --with transformers,ray` + - Pip (editable): `pip install -e .[transformers,ray]` +- **Run tests:** + - Unit tests: `python -m unittest tests/infra/test_infra_graph.py` +- **Local single-process dev:** + - Initialize infra: `twinkle.initialize(mode='local', seed=42)` + - Inspect device placement: call `twinkle.infra.get_device_placement()`. +- **Ray Serve demo (HTTP services):** + - Config and launcher: [cookbook/client/server.py](cookbook/client/server.py), [cookbook/client/server_config.yaml](cookbook/client/server_config.yaml) + - Start: + - `python cookbook/client/server.py` + - Endpoints print on startup (default `localhost:8000`). + - Model app binds `MultiLoraTransformersModel` and exposes routes like `/add_adapter_to_model`, `/forward`, `/calculate_loss`, etc. See [src/twinkle/server/twinkle/model.py](src/twinkle/server/twinkle/model.py). +- **vLLM inference:** Use `VLLMEngine` with engine args; LoRA weight sync via `patch.vllm_lora_weights`. See [src/twinkle/sampler/vllm_engine.py](src/twinkle/sampler/vllm_engine.py). + +## Conventions & Gotchas +- **Safety:** Remote plugin code requires `trust_remote_code()` true; avoid loading arbitrary strings into adapter configs (enforced in Multi-LoRA). +- **Env-driven ranks:** Many utilities read ranks/world size from env; set `WORLD_SIZE`, `RANK`, `LOCAL_RANK` when using torchrun. +- **Determinism:** `seed_everything(seed, full_determinism)` controls CUDA/NPU determinism; may set envs like `CUDA_LAUNCH_BLOCKING`. +- **Adapter lifecycle:** Server auto-removes inactive adapters (heartbeat required); per-token adapter limits are enforced. See cleanup in [src/twinkle/server/twinkle/model.py](src/twinkle/server/twinkle/model.py). +- **Templates:** Tokenization/encode via `Template` (e.g., `Qwen3Template`), producing `InputFeature` for model forward. See [src/twinkle/template/base.py](src/twinkle/template/base.py). + +## Examples +- **Visualize a custom mesh:** create `DeviceMesh` and call `get_device_placement()`; example in [tests/infra/test_infra_graph.py](tests/infra/test_infra_graph.py). +- **Add LoRA adapter via HTTP:** POST to `/add_adapter_to_model` with serialized `LoraConfig`; see server routes in [src/twinkle/server/twinkle/model.py](src/twinkle/server/twinkle/model.py). +- **Sample with vLLM:** Configure `vLLMSampler`, set `Template`/`Processor`, then `sample()` on `Trajectory` list; see [src/twinkle/sampler/vllm_sampler.py](src/twinkle/sampler/vllm_sampler.py). + +--- +Questions or gaps? Tell us where guidance is unclear (e.g., missing run scripts, Ray cluster setup), and we’ll refine this document. \ No newline at end of file diff --git a/.github/workflows/citest.yaml b/.github/workflows/citest.yaml new file mode 100644 index 00000000..bd560302 --- /dev/null +++ b/.github/workflows/citest.yaml @@ -0,0 +1,76 @@ +name: citest + +on: + push: + branches: + - master + - "release/**" + paths-ignore: + - "setup.*" + - "requirements.txt" + - "requirements/**" + - "docs/**" + - "tools/**" + - ".dev_scripts/**" + - "README.md" + - "README_*.md" + - "NOTICE" + - ".github/workflows/lint.yaml" + - ".github/workflows/publish.yaml" + + pull_request: + paths-ignore: + - "setup.*" + - "requirements.txt" + - "requirements/**" + - "docs/**" + - "tools/**" + - ".dev_scripts/**" + - "README.md" + - "README_*.md" + - "NOTICE" + - ".github/workflows/lint.yaml" + - ".github/workflows/publish.yaml" + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + unittest: + # The type of runner that the job will run on + runs-on: [self-hosted] + timeout-minutes: 240 + steps: + - name: ResetFileMode + shell: bash + run: | + # reset filemode to allow action runner to delete files + # generated by root in docker + set -e + source ~/.bashrc + sudo chown -R $USER:$USER $GITHUB_WORKSPACE + + - name: Checkout + uses: actions/checkout@v3 + env: + GIT_CONFIG_PARAMETERS: "'core.hooksPath='" + with: + lfs: 'true' + submodules: 'false' + fetch-depth: ${{ github.event_name == 'pull_request' && 2 || 0 }} + - name: Get changed files + id: changed-files + run: | + if ${{ github.event_name == 'pull_request' }}; then + echo "PR_CHANGED_FILES=$(git diff --name-only -r HEAD^1 HEAD | xargs)" >> $GITHUB_ENV + else + echo "PR_CHANGED_FILES=$(git diff --name-only ${{ github.event.before }} ${{ github.event.after }} | xargs)" >> $GITHUB_ENV + fi + - name: Checkout LFS objects + run: git lfs checkout + - name: Run unittest + shell: bash + run: | + set -e + bash .dev_scripts/dockerci.sh diff --git a/.github/workflows/citest_npu.yaml b/.github/workflows/citest_npu.yaml new file mode 100644 index 00000000..d48c7421 --- /dev/null +++ b/.github/workflows/citest_npu.yaml @@ -0,0 +1,75 @@ +name: citest-npu + +on: + push: + branches: + - master + - "release/**" + paths-ignore: + - "setup.*" + - "requirements.txt" + - "requirements/**" + - "docs/**" + - "tools/**" + - ".dev_scripts/**" + - "README.md" + - "README_*.md" + - "NOTICE" + - ".github/workflows/lint.yaml" + - ".github/workflows/publish.yaml" + + pull_request: + paths-ignore: + - "setup.*" + - "requirements.txt" + - "requirements/**" + - "docs/**" + - "tools/**" + - ".dev_scripts/**" + - "README.md" + - "README_*.md" + - "NOTICE" + - ".github/workflows/lint.yaml" + - ".github/workflows/publish.yaml" + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + unittest: + # The type of runner that the job will run on + runs-on: [linux-aarch64-a2-1] + timeout-minutes: 240 + container: + image: 'ascendai/cann:8.3.rc2-910b-ubuntu22.04-py3.11' + steps: + - name: Config mirrors + run: | + sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list + pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple + pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local + + - name: Checkout + uses: actions/checkout@v3 + with: + fetch-depth: ${{ github.event_name == 'pull_request' && 2 || 0 }} + - name: Get changed files + id: changed-files + run: | + if ${{ github.event_name == 'pull_request' }}; then + echo "PR_CHANGED_FILES=$(git diff --name-only -r HEAD^1 HEAD | xargs)" >> $GITHUB_ENV + else + echo "PR_CHANGED_FILES=$(git diff --name-only ${{ github.event.before }} ${{ github.event.after }} | xargs)" >> $GITHUB_ENV + fi + - name: Run unittest + shell: bash + run: | + set -e + export IMAGE_NAME=ascendai/cann + export IMAGE_VERSION=8.3.rc2-910b-ubuntu22.04-py3.11 + export TEST_LEVEL=0 + mkdir -p ~/.cache + export MODELSCOPE_CACHE=~/.cache + export CI_COMMAND='bash .dev_scripts/ci_container_test.sh pytest tests' + bash .dev_scripts/dockerci_npu.sh diff --git a/.github/workflows/close_tale_issue.yaml b/.github/workflows/close_tale_issue.yaml new file mode 100644 index 00000000..46a713f1 --- /dev/null +++ b/.github/workflows/close_tale_issue.yaml @@ -0,0 +1,20 @@ +name: Close Stale Issues +on: + schedule: + - cron: '0 0 * * *' + workflow_dispatch: + +jobs: + close-stale: + runs-on: ubuntu-latest + steps: + - name: Close stale issues + uses: actions/stale@v8 + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + days-before-stale: 90 + days-before-close: 7 + stale-issue-message: 'This issue has been inactive for over 3 months and will be automatically closed in 7 days. If this issue is still relevant, please reply to this message.' + close-issue-message: 'This issue has been automatically closed due to inactivity. If needed, it can be reopened.' + stale-issue-label: 'stale' + exempt-all-issue-labels: true diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml new file mode 100644 index 00000000..771ee4bc --- /dev/null +++ b/.github/workflows/lint.yaml @@ -0,0 +1,22 @@ +name: Lint test + +on: [push, pull_request] + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Set up Python 3.11 + uses: actions/setup-python@v2 + with: + python-version: '3.11' + - name: Install pre-commit hook + run: | + pip install pre-commit + - name: Linting + run: pre-commit run --all-files diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml new file mode 100644 index 00000000..bf37a0b4 --- /dev/null +++ b/.github/workflows/publish.yaml @@ -0,0 +1,29 @@ +name: release + +on: + push: + tags: + - 'v**' + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }}-publish + cancel-in-progress: true + +jobs: + build-n-publish: + runs-on: ubuntu-22.04 + #if: startsWith(github.event.ref, 'refs/tags') + steps: + - uses: actions/checkout@v2 + - name: Set up Python 3.11 + uses: actions/setup-python@v2 + with: + python-version: '3.11' + - name: Install poetry + run: pip install poetry + - name: Build twinkle-kit + run: poetry build + - name: Publish package to PyPI + run: | + pip install twine + twine upload dist/* --skip-existing -u __token__ -p ${{ secrets.PYPI_API_TOKEN }} diff --git a/.gitignore b/.gitignore index 3c7cc700..cc500d06 100644 --- a/.gitignore +++ b/.gitignore @@ -28,6 +28,7 @@ wheels/ /package /temp MANIFEST +.locks/ # PyInstaller # Usually these files are written by a python script from a template @@ -134,7 +135,6 @@ wandb/ benchmarks/ eval_output/ eval_outputs/ -transformers/ vlmeval/ my_model/ /data @@ -142,6 +142,7 @@ result/ images /custom/ megatron_output/ +.qoder # Pytorch *.pth @@ -149,3 +150,5 @@ megatron_output/ # ast template ast_index_file.py +test_cookbook/ +/test*.py \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 558ddc5a..2d104699 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,52 +1,38 @@ repos: - repo: https://github.com/pycqa/flake8.git - rev: 4.0.0 + rev: 7.3.0 hooks: - id: flake8 exclude: | (?x)^( - thirdparty/| examples/| - tests/run.py + cookbook/| + client_tools/| + src/twinkle_client| + )$ - repo: https://github.com/PyCQA/isort.git - rev: 4.3.21 + rev: 7.0.0 hooks: - id: isort exclude: | (?x)^( examples/| - tests/run.py| - swift/cli/sft.py + cookbook/| + client_tools/| + src/twinkle_client| )$ - - repo: https://github.com/pre-commit/mirrors-yapf.git - rev: v0.30.0 + - repo: https://github.com/google/yapf + rev: v0.43.0 hooks: - id: yapf exclude: | (?x)^( - thirdparty/| examples/| - tests/run.py + cookbook/| + client_tools/| + src/twinkle_client| )$ - repo: https://github.com/pre-commit/pre-commit-hooks.git - rev: v3.1.0 + rev: v6.0.0 hooks: - - id: trailing-whitespace - exclude: thirdparty/|tests/run.py - - id: check-yaml - exclude: thirdparty/|tests/run.py - - id: end-of-file-fixer - exclude: thirdparty/|tests/run.py - - id: requirements-txt-fixer - exclude: thirdparty/|tests/run.py - - id: double-quote-string-fixer - exclude: thirdparty/|tests/run.py - - id: check-merge-conflict - exclude: thirdparty/|tests/run.py - - id: fix-encoding-pragma - exclude: thirdparty/|tests/run.py - args: ["--remove"] - - id: mixed-line-ending - exclude: thirdparty/|tests/run.py - args: ["--fix=lf"] diff --git a/.pre-commit-config_local.yaml b/.pre-commit-config_local.yaml index f6ef27d9..2bc55fcf 100644 --- a/.pre-commit-config_local.yaml +++ b/.pre-commit-config_local.yaml @@ -5,9 +5,10 @@ repos: - id: flake8 exclude: | (?x)^( - thirdparty/| examples/| - tests/run.py + cookbook/| + client_tools/| + src/twinkle_client| )$ - repo: /home/admin/pre-commit/isort rev: 4.3.21 @@ -16,8 +17,9 @@ repos: exclude: | (?x)^( examples/| - tests/run.py| - swift/cli/sft.py + cookbook/| + client_tools/| + src/twinkle_client| )$ - repo: /home/admin/pre-commit/mirrors-yapf rev: v0.30.0 @@ -25,28 +27,11 @@ repos: - id: yapf exclude: | (?x)^( - thirdparty/| examples/| - tests/run.py + cookbook/| + client_tools/| + src/twinkle_client| )$ - repo: /home/admin/pre-commit/pre-commit-hooks rev: v3.1.0 hooks: - - id: trailing-whitespace - exclude: thirdparty/|tests/run.py - - id: check-yaml - exclude: thirdparty/|tests/run.py - - id: end-of-file-fixer - exclude: thirdparty/ - - id: requirements-txt-fixer - exclude: thirdparty/|tests/run.py - - id: double-quote-string-fixer - exclude: thirdparty/|tests/run.py - - id: check-merge-conflict - exclude: thirdparty/|tests/run.py - - id: fix-encoding-pragma - exclude: thirdparty/|tests/run.py - args: ["--remove"] - - id: mixed-line-ending - exclude: thirdparty/|tests/run.py - args: ["--fix=lf"] diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 9892a2d3..7011868a 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,63 +1,67 @@ -# Contributor Guide +# Contributor Guidelines -_Welcome to offer PRs, bug reports, documentation supplements or other types of contributions to SWIFT!_ +*Welcome to contribute Feature PRs, Bug reports, documentation, or other types of contributions to twinkle!* ## Table of Contents + - [Code of Conduct](#-code-of-conduct) - [Contribution Process](#-contribution-process) -- [Hardware support](#-Hardware-support) +- [Resource Support](#-resource-support) ## 📖 Code of Conduct -Please refer to our [Code of Conduct documentation](./CODE_OF_CONDUCT.md). + +Please refer to our [Code of Conduct document](./CODE_OF_CONDUCT.md). ## 🔁 Contribution Process + ### What We Need -- New Technologies and New Models: SWIFT needs to support more open-source models and datasets, or new technologies that we have not paid attention to. If you are interested please submit a PR to us. -- Technical Propagation: If you are interested in technical propagation, you are welcome to help us write tutorials, documents or videos on any website, and send us the link. -- Community Contribution: You can write technical articles related to SWIFT, and submit them to us. After review and approval, we will publish them on the official ModelScope accounts (Zhihu, WeChat, etc.), with your name assigned. + +- New components: You can contribute excellent components to the twinkle project, or contribute them to the modelhub in the ModelScope/Hugging Face community following the component protocol, making them available for other developers to use +- New kernels: You can contribute low-level kernels to the twinkle project. These kernels can be integrated into models to achieve better training value + +Your contributions will help other developers. Please add your component name, location, and usage documentation link in the Community Components section of the README in your code PR. ### Incentives -- we will issue electronic certificates to contributors on behalf of the ModelScope community, to encourage your selfless contributions. -- We will offer small souvenirs related to the ModelScope Community. -- We will provide free A10 computing power during the development period. For more details, please refer to [Hardware-support](#-Hardware-support) section. - -### Submitting PR (Pull Requests) - -Any feature development is carried out in the form of Fork and then PR on GitHub. -1. Fork: Go to the [ms-swift](https://github.com/modelscope/ms-swift) page and click the **Fork button**. After completion, a SWIFT code repository will be cloned under your personal organization. -2. Clone: Clone the code repository generated in the first step to your local machine and **create a new branch** for development. During development, please click the **Sync Fork button** in time to synchronize with the `main` branch to prevent code expiration and conflicts. -3. Submit PR: After development and testing, push the code to the remote branch. On GitHub, go to the **Pull Requests page**, create a new PR, select your code branch as the source branch, and the `modelscope/swift:main` branch as the target branch. - -4. Write Description: It is necessary to provide a good feature description in the PR, so that the reviewers know the content of your modification. -5. Review: We hope that the code to be merged is concise and efficient, so we may raise some questions and discuss them. Please note that any issues raised in the review are aimed at the code itself, not at you personally. Once all issues are discussed and resolved, your code will be approved. - -### Code Standards and Development Approach -SWIFT has conventional variable naming conventions and development approaches. Please follow these approaches as much as possible during development. -1. Variable names are separated by underscores, and class names are named with the first letter of each word capitalized. -2. All Python indentation uses four spaces instead of a tab. -3. Choose well-known open-source libraries, avoid using closed-source libraries or unstable open-source libraries, and avoid repeating the existing code. - -After the PR is submitted, SWIFT will perform two types of tests: -- Code Lint Test: A static code compliance check test. please make sure that you have performed code lint locally in advance. -```shell -pip install pre-commit # In the swift folder -pre-commit run --all-files # Fix the errors reported by pre-commit until all checks are successful -``` -- CI Tests: Smoke tests and unit tests, please refer to the next section. -### Running CI Tests -Before submitting the PR, please ensure that your development code is protected by test cases, such as smoke tests for new features, or unit tests for various edge cases. Reviewers will also pay attention to this during code review. At the same time, there will be dedicated services running CI Tests, running all test cases, and the code can only be merged after the test cases pass. +- We will issue electronic certificates to contributors on behalf of the ModelScope community to acknowledge your selfless contributions. +- We will give away ModelScope community merchandise and small gifts. + +### Submitting PRs (Pull Requests) + +All feature development is conducted on GitHub using a Fork-then-PR workflow. + +1. Fork: Go to the [twinkle](https://github.com/modelscope/twinkle) page and click the **Fork button**. This will clone a twinkle repository under your personal organization + +2. Clone: Clone the repository created in step one to your local machine and **create a new branch** for development. During development, please click the **Sync Fork button** regularly to sync with the `main` branch to prevent code from becoming outdated and causing conflicts -Additionally, since some important tests have been skipped due to long running time, to ensure that your logic is correct, you can run the test locally: -```shell -python tests/llm/test_run.py -``` -Please make sure this test can pass normally. +3. Submit PR: After development and testing are complete, push your code to the remote branch. On GitHub, click the **Pull Requests page** and create a new PR. Select your code branch as the source branch and `modelscope/twinkle:main` as the target branch -## ✅ Hardware support +4. Write Description: It is essential to provide a good feature description in your PR so that reviewers understand your changes + +5. Review: We want the merged code to be clean and efficient, so we may raise some questions for discussion. Please note that any questions raised during review are about the code itself, not about you personally. Once all issues have been discussed and resolved, your code will be approved + +### Code Standards and Development Practices + +twinkle has established conventions for variable naming and development practices. Please try to follow these conventions during development. + +1. Variable names use underscore separation; class names use PascalCase (capitalize the first letter of each word) +2. All Python indentation uses four spaces instead of one tab +3. Use well-known open-source libraries; avoid closed-source or unstable open-source libraries; avoid reinventing the wheel + +twinkle runs two types of tests after a PR is submitted: + +- Code Lint Tests: Static code analysis tests. To ensure this test passes, please run Code lint locally beforehand. Here's how: + + ```shell + pip install pre-commit + pre-commit run --all-files + # Fix any errors reported by pre-commit until all checks pass + ``` + +- CI Tests: Smoke tests and unit tests. Please refer to the next section + +### Running CI Tests -SWIFT will provide hardware support for developers, including free GPUs. If needed, please email us ([contact@modelscope.cn](mailto:contact@modelscope.cn)) or join our WeChat group: +Before submitting a PR, please ensure your development code is protected by test cases. For example, smoke tests for new features, or unit tests for various edge cases. Reviewers will also pay attention to this during code review. Additionally, a dedicated service will run CI Tests, executing all test cases. Code can only be merged after all test cases pass. -

- -

+Please ensure these tests pass successfully. \ No newline at end of file diff --git a/CONTRIBUTING_CN.md b/CONTRIBUTING_CN.md index d18ae6e3..cdbc4755 100644 --- a/CONTRIBUTING_CN.md +++ b/CONTRIBUTING_CN.md @@ -1,6 +1,6 @@ # 贡献者指引 -*欢迎帮SWIFT提供Feature PR、Bug反馈、文档补充或其他类型的贡献!* +*欢迎帮twinkle提供Feature PR、Bug反馈、文档补充或其他类型的贡献!* ## 目录 @@ -15,25 +15,26 @@ ## 🔁 贡献流程 ### 我们需要什么 -- 新技术和新模型:SWIFT需要支持更多的开源模型和数据集,或我们没有关注到的新技术,如果您对此有兴趣,可以提交PR给我们。 -- 技术布道:如果您对技术布道有兴趣,欢迎在任何网站上帮我们撰写教程文档或视频等,并将链接发给我们。 -- 社区供稿:您可以撰写和SWIFT有关的技术文章,并供稿给我们,我们审核通过后会在魔搭官方账号(知乎、公众号等)上进行发布,并属上您的名字。 + +- 新组件:您可以将优秀的组件贡献进twinkle项目,或按照组件协议贡献进ModelScope/Hugging Face社区的modelhub中,方便其他开发者使用 +- 新kernels:您可以将底层kernels贡献进twinkle项目中,这些kernels可以被模型集成,实现更好的训练价值 + +您的贡献会帮助到其他开发者,请在代码PR中在README的社区组件章节中增加您的组件名称、位置和使用方法文档链接。 ### 激励 - 我们会以魔搭社区的身份给贡献者颁发电子证书,以鼓励您的无私贡献。 - 我们会赠送相关魔搭社区相关周边小礼品。 -- 我们会赠送开发期间的免费A10算力,具体可以查看[资源支持](#-资源支持)章节。 ### 提交PR(Pull Requests) 任何feature开发都在github上以先Fork后PR的形式进行。 -1. Fork:进入[ms-swift](https://github.com/modelscope/ms-swift)页面后,点击**Fork按钮**执行。完成后会在您的个人组织下克隆出一个SWIFT代码库 +1. Fork:进入[twinkle](https://github.com/modelscope/twinkle)页面后,点击**Fork按钮**执行。完成后会在您的个人组织下克隆出一个twinkle代码库 2. Clone:将第一步产生的代码库clone到本地并**拉新分支**进行开发,开发中请及时点击**Sync Fork按钮**同步`main`分支,防止代码过期并冲突 -3. 提交PR:开发、测试完成后将代码推送到远程分支。在github上点击**Pull Requests页面**,新建一个PR,源分支选择您提交的代码分支,目标分支选择`modelscope/swift:main`分支 +3. 提交PR:开发、测试完成后将代码推送到远程分支。在github上点击**Pull Requests页面**,新建一个PR,源分支选择您提交的代码分支,目标分支选择`modelscope/twinkle:main`分支 4. 撰写描述:在PR中填写良好的feature描述是必要的,让Reviewers知道您的修改内容 @@ -41,19 +42,18 @@ ### 代码规范和开发方式 -SWIFT有约定俗成的变量命名方式和开发方式。在开发中请尽量遵循这些方式。 +twinkle有约定俗成的变量命名方式和开发方式。在开发中请尽量遵循这些方式。 1. 变量命名以下划线分割,类名以所有单词首字母大写方式命名 2. 所有的python缩进都是四个空格取代一个tab 3. 选用知名的开源库,避免使用闭源库或不稳定的开源库,避免重复造轮子 -SWIFT在PR提交后会进行两类测试: +twinkle在PR提交后会进行两类测试: - Code Lint测试 对代码进行静态规范走查的测试,为保证改测试通过,请保证本地预先进行了Code lint。方法是: ```shell pip install pre-commit - # 在swift文件夹内 pre-commit run --all-files # 对pre-commit报的错误进行修改,直到所有的检查都是成功状态 ``` @@ -64,18 +64,4 @@ SWIFT在PR提交后会进行两类测试: 在提交PR前,请保证您的开发代码已经受到了测试用例的保护。例如,对新功能的冒烟测试,或者各种边缘case的单元测试等。在代码review时Reviewers也会关注这一点。同时,也会有服务专门运行CI Tests,运行所有的测试用例,测试用例通过后代码才可以合并。 -另外,由于运行时间过长,我们跳过了部分重要测试,为保证您的逻辑是正确的,可以在本地执行该测试: - -```shell -python tests/llm/test_run.py -``` - 请保证该测试可以正常通过。 - -## ✅ 资源支持 - -SWIFT会为开发者提供资源支持,包括免费的GPU算力。如果需要请邮件联系我们([contact@modelscope.cn](mailto:contact@modelscope.cn))或加入我们的微信群: - -

- -

diff --git a/README.md b/README.md index e69de29b..566b212f 100644 --- a/README.md +++ b/README.md @@ -0,0 +1,304 @@ +

Twinkle: Training workbench to make your model glow

+ +

+ +

+

+by ModelScope +
+ English  |  中文  +

+ +

+ + + + + + +

+ +

+ English Documentation   |   中文文档   +

+ +## ✨ What is Twinkle? +Twinkle✨ is a lightweight, client-server training framework engineered +with modular, high-cohesion interfaces. Whether you are executing locally +with `torchrun`, or scaling training across Ray clusters, +Twinkle✨ eliminates infrastructure friction by encapsulating +complex training logic into standardized APIs. Beyond simple +abstraction, Twinkle✨ serves as a robust backend and gateway to enable serverless Training-as-a-Service (TaaS). +It offers interfaces that constitute a _superset_ of [Tinker](https://thinkingmachines.ai/tinker/) APIs, +thereby making is possible to access a Twinkle✨ training service via Tinker client or native Twinkle✨-client +which offers more functionalities. + +🧩 Decoupled Architecture: Standardized Interfaces, backward compatible with Tinker APIs.
+🚀 Multiple Runtime Modes: torchrun / Ray / HTTP.
+🔌 Multi-Training Backends: Transformers / Megatron.
+👥 Multi-Tenancy Training Service: Train multiple LoRAs that share one base model deployment.
+ +Note: Twinkle✨is built by the team behind [ms-swift](https://github.com/modelscope/ms-swift), and +we expect the two projects to evolve together. We expect some fundamental components in Twinkle✨will likely +be reused in [ms-swift](https://github.com/modelscope/ms-swift). + +## Installation + +### Install with package: + +```shell +pip install 'twinkle-kit' +``` + +### Installation from Source: + +```shell +git clone https://github.com/modelscope/twinkle.git +cd twinkle +pip install -e . --no-build-isolation +``` + +## Tutorials + +| Training Type | Model Framework | Cookbook Path | +| --------------------------------- | --------------- | ---------------------------------------- | +| FSDP finetuning | transformers | [Script](cookbook/transformers/fsdp2.py) | +| FSDP MoE finetuning | transformers | [Script](cookbook/transformers/fsdp2_moe.py) | +| EP MoE finetuning | transformers | [Script](cookbook/transformers/ep_fsdp_qwen3_moe.py) | +| pp/tp/cp finetuning | megatron | [Script](cookbook/megatron/tp.py) | +| pp/tp/cp MoE finetuning | megatron | [Script](cookbook/megatron/tp_moe.py) | +| tinker client finetuning | megatron | [Script](cookbook/client/tinker/megatron) | +| tinker client finetuning/sampling | transformers | [Script](cookbook/client/tinker/transformer) | +| twinkle client finetuning | megatron | [Script](cookbook/client/twinkle/megatron) | +| twinkle client finetuning | transformer | [Script](cookbook/client/twinkle/transformer) | + +## Changelog + +- 🎉2026-02-10 Initial version of Twinkle✨ released, including SFT/PT/RL for text models and serverless training capabilities on [ModelScope](https://modelscope.cn). + +## Supported Hardware + +| Hardware Environment | Notes | +|----------------------|-----------------------------------------------------------------| +| Nvidia GPUs | ✅ Support for BF16/Flash-Attn may be incomplete in earlier GPUs | +| Ascend NPU | ✅ Some operators may not supported | +| PPU | ✅ | +| CPU | Supports partial components like dataset, dataloader | + +## Supported Models +We will be adding support for more models as new models are released. The following table lists current models +supported on Twinkle✨ framework. However, the models supported on our serverless training backend may be a +much smaller subset. Please refer to the [doc](link) section for more information. + +| Model Type | Model ID on [ModelScope](https://modelscope.cn) | Requires | Megatron Support | HF Model ID | +| ------------------- |--------------------------------------------------------------------------------------------------------------------------| -------------------- |-----------| ---------------------------------------------------------------------------------------------------------- | +| qwen3 series | [Qwen/Qwen3-0.6B-Base](https://modelscope.cn/models/Qwen/Qwen3-0.6B-Base)~32B | transformers>=4.51 | ✅ | [Qwen/Qwen3-0.6B-Base](https://huggingface.co/Qwen/Qwen3-0.6B-Base) | +| qwen3_moe series | [Qwen/Qwen3-30B-A3B-Base](https://modelscope.cn/models/Qwen/Qwen3-30B-A3B-Base) | transformers>=4.51 | ✅ | [Qwen/Qwen3-30B-A3B-Base](https://huggingface.co/Qwen/Qwen3-30B-A3B-Base) | +| | [Qwen/Qwen3-30B-A3B](https://modelscope.cn/models/Qwen/Qwen3-30B-A3B)~235B | transformers>=4.51 | ✅ | [Qwen/Qwen3-30B-A3B](https://huggingface.co/Qwen/Qwen3-30B-A3B) | +| qwen2 series | [Qwen/Qwen2-0.5B-Instruct](https://modelscope.cn/models/Qwen/Qwen2-0.5B-Instruct) ~72B | transformers>=4.37 | ✅ | [Qwen/Qwen2-0.5B-Instruct](https://huggingface.co/Qwen/Qwen2-0.5B-Instruct) | +| | [Qwen/Qwen2-72B](https://modelscope.cn/models/Qwen/Qwen2-72B)~72B | transformers>=4.37 | ✅ | [Qwen/Qwen2-1.5B](https://huggingface.co/Qwen/Qwen2-1.5B) | +| | [Qwen/Qwen2.5-0.5B-Instruct](https://modelscope.cn/models/Qwen/Qwen2.5-0.5B-Instruct)~72B | transformers>=4.37 | ✅ | [Qwen/Qwen2.5-0.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct) | +| | [Qwen/Qwen2.5-0.5B](https://modelscope.cn/models/Qwen/Qwen2.5-0.5B)~72B | transformers>=4.37 | ✅ | [Qwen/Qwen2.5-0.5B](https://huggingface.co/Qwen/Qwen2.5-0.5B) | +| qwen2_moe series | [Qwen/Qwen1.5-MoE-A2.7B-Chat](https://modelscope.cn/models/Qwen/Qwen1.5-MoE-A2.7B-Chat) | transformers>=4.40 | ✅ | [Qwen/Qwen1.5-MoE-A2.7B-Chat](https://huggingface.co/Qwen/Qwen1.5-MoE-A2.7B-Chat) | +| chatglm2 series | [ZhipuAI/chatglm2-6b](https://modelscope.cn/models/ZhipuAI/chatglm2-6b) | transformers<4.42 | ✘ | [zai-org/chatglm2-6b](https://huggingface.co/zai-org/chatglm2-6b) | +| | [ZhipuAI/chatglm2-6b-32k](https://modelscope.cn/models/ZhipuAI/chatglm2-6b-32k) | transformers<4.42 | ✘ | [zai-org/chatglm2-6b-32k](https://huggingface.co/zai-org/chatglm2-6b-32k) | +| chatglm3 series | [ZhipuAI/chatglm3-6b](https://modelscope.cn/models/ZhipuAI/chatglm3-6b) | transformers<4.42 | ✘ | [zai-org/chatglm3-6b](https://huggingface.co/zai-org/chatglm3-6b) | +| | [ZhipuAI/chatglm3-6b-base](https://modelscope.cn/models/ZhipuAI/chatglm3-6b-base) | transformers<4.42 | ✘ | [zai-org/chatglm3-6b-base](https://huggingface.co/zai-org/chatglm3-6b-base) | +| | [ZhipuAI/chatglm3-6b-32k](https://modelscope.cn/models/ZhipuAI/chatglm3-6b-32k)~128k | transformers<4.42 | ✘ | [zai-org/chatglm3-6b-32k](https://huggingface.co/zai-org/chatglm3-6b-32k) | +| chatglm4 series | [ZhipuAI/glm-4-9b-chat](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat) | transformers>=4.42 | ✘ | [zai-org/glm-4-9b-chat](https://huggingface.co/zai-org/glm-4-9b-chat) | +| | [ZhipuAI/LongWriter-glm4-9b](https://modelscope.cn/models/ZhipuAI/LongWriter-glm4-9b) | transformers>=4.42 | ✘ | [zai-org/LongWriter-glm4-9b](https://huggingface.co/zai-org/LongWriter-glm4-9b) | +| glm_edge series | [ZhipuAI/glm-edge-1.5b-chat](https://modelscope.cn/models/ZhipuAI/glm-edge-1.5b-chat) | transformers>=4.46 | ✘ | [zai-org/glm-edge-1.5b-chat](https://huggingface.co/zai-org/glm-edge-1.5b-chat) | +| | [ZhipuAI/glm-edge-4b-chat](https://modelscope.cn/models/ZhipuAI/glm-edge-4b-chat) | transformers>=4.46 | ✘ | [zai-org/glm-edge-4b-chat](https://huggingface.co/zai-org/glm-edge-4b-chat) | +| internlm2 series | [Shanghai_AI_Laboratory/internlm2-1_8b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-1_8b) | transformers>=4.38 | ✘ | [internlm/internlm2-1_8b](https://huggingface.co/internlm/internlm2-1_8b) | +| | [Shanghai_AI_Laboratory/internlm2-chat-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-7b) | transformers>=4.38 | ✘ | [internlm/internlm2-chat-7b](https://huggingface.co/internlm/internlm2-chat-7b) | +| deepseek_v1 | [deepseek-ai/deepseek-vl-7b-chat](https://modelscope.cn/models/deepseek-ai/deepseek-vl-7b-chat) | transformers>=4.39.4 | ✔ | —— | +| | [deepseek-ai/DeepSeek-V2-Lite](https://modelscope.cn/models/deepseek-ai/DeepSeek-V2-Lite) | transformers>=4.39.3 | ✔ | [deepseek-ai/DeepSeek-V2-Lite](https://huggingface.co/deepseek-ai/DeepSeek-V2-Lite) | +| | [deepseek-ai/DeepSeek-V2.5](https://modelscope.cn/models/deepseek-ai/DeepSeek-V2.5) | transformers>=4.39.3 | ✔ | [deepseek-ai/DeepSeek-V2.5](https://huggingface.co/deepseek-ai/DeepSeek-V2.5) | +| | [deepseek-ai/DeepSeek-R1](https://modelscope.cn/models/deepseek-ai/DeepSeek-R1) | transformers>=4.39.3 | ✔ | [deepseek-ai/DeepSeek-R1](https://huggingface.co/deepseek-ai/DeepSeek-R1) | +| deepSeek-r1-distill | [deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B](https://modelscope.cn/models/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B) ~32B | transformers>=4.37 | ✔ | [deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B) | + +For a more detailed model support list 👉 [Quick Start.md](https://github.com/modelscope/twinkle/blob/dev/docs/source/%E4%BD%BF%E7%94%A8%E6%8C%87%E5%BC%95/%E5%BF%AB%E9%80%9F%E5%BC%80%E5%A7%8B.md) + +## Sample Code + +```python +from peft import LoraConfig +import twinkle +from twinkle import DeviceMesh, DeviceGroup +from twinkle.dataloader import DataLoader +from twinkle.dataset import Dataset, DatasetMeta +from twinkle.model import TransformersModel +from twinkle.preprocessor import SelfCognitionProcessor + +device_group = [DeviceGroup(name='default',ranks=8,device_type='cuda')] +device_mesh = DeviceMesh.from_sizes(fsdp_size=4, dp_size=2) +# local for torchrun +twinkle.initialize(mode='ray', groups=device_group, global_device_mesh=device_mesh) + + +def train(): + # 1000 samples + dataset = Dataset(dataset_meta=DatasetMeta('ms://swift/self-cognition', data_slice=range(1000))) + # Set template to prepare encoding + dataset.set_template('Template', model_id='ms://Qwen/Qwen2.5-7B-Instruct') + # Preprocess the dataset to standard format + dataset.map(SelfCognitionProcessor('twinkle LLM', 'ModelScope Community')) + # Encode dataset + dataset.encode() + # Global batch size = 8, for GPUs, so 1 sample per GPU + dataloader = DataLoader(dataset=dataset, batch_size=8, min_batch_size=8) + # Use a TransformersModel + model = TransformersModel(model_id='ms://Qwen/Qwen2.5-7B-Instruct', remote_group='default') + + lora_config = LoraConfig( + r=8, + lora_alpha=32, + target_modules='all-linear' + ) + + # Add a lora to model, with name `default` + # Comment this to use full-parameter training + model.add_adapter_to_model('default', lora_config, gradient_accumulation_steps=2) + # Add Optimizer for lora `default` + model.set_optimizer(optimizer_cls='AdamW', lr=1e-4) + # Add LRScheduler for lora `default` + model.set_lr_scheduler(scheduler_cls='CosineWarmupScheduler', num_warmup_steps=5, + num_training_steps=len(dataloader)) + for step, batch in enumerate(dataloader): + # Do forward and backward + model.forward_backward(inputs=batch) + # Step + model.clip_grad_and_step() + if step % 20 == 0: + # Print metric + metric = model.calculate_metric(is_training=True) + print(f'Current is step {step} of {len(dataloader)}, metric: {metric}') + model.save(f'last-checkpoint') + + +if __name__ == '__main__': + train() +``` + +Launch training: + +```shell +python3 train.py +``` + +## Architecture Design + + + + **Twinkle✨** features a decoupled **Client-Server architecture** designed for maximum flexibility. + The client-side provides two distinct integration paths: + * **Twinkle✨ Native:** A conforming API that mirrors the server-side interface for seamless end-to-end integration. + * **Tinker Compatibility:** Full support for the native Tinker API, enabling developers to leverage Twinkle✨’s backend using Tinker client. + +This dual-path design ensures access to Twinkle✨’s training services using Tinker API, with a simple modification of the Tinker base URL. + +## Multi-Tenancy + +**Twinkle✨** supports simultaneous multi-tenant training on a shared base model. Leveraging a **LoRA Pool + Tenant Application** architecture, Twinkle enables up to **N tenants** to train in parallel with complete isolation. This design offers unprecedented flexibility: from the model's perspective, each tenant's session is distinct, supporting heterogeneous configurations including unique **data padding strategies, optimizers, and loss functions**—all running concurrently on the same base model. + +*Note: This feature is currently optimized for [LoRA](https://github.com/huggingface/peft).* + + + +For example: + +- Tenant A: Load local private dataset locally, LoRA rank=8, using base model for SFT +- Tenant B: Load open-source dataset from Hub remotely, LoRA rank=32, using base model for PT +- Tenant C: Use base model for GRPO loss calculation, using Sampler for sampling +- Tenant D: Use base model for logps inference + +These processes are executed concurrently on a single base model because the **Model and Sampler** +are integrated as **task-agnostic components** within the Twinkle✨ ecosystem. +Upon completion, checkpoints are automatically pushed to **ModelScope** or **HuggingFace** repositories +(private by default). On the server side, Twinkle✨ provides a robust multi-tenant suite +featuring **automated cluster management** and **dynamic scaling**, making it the +foundation for building customizable, enterprise-grade training services. + +> As a modular framework, Twinkle✨ also supports remote temporary exclusive training, i.e., training in full-parameter mode. + + +## 🛠️ Twinkle✨ Modular Ecosystem +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+

Dataset
Data loading and preprocessing

+
+

Template
Encoding and decoding

+
+

DataLoader
Data distribution and batching

+
+

Preprocessor
Data ETL

+
+

InputProcessor
Task-specific input processing

+
+

Model
Large models, supports multiple frameworks

+
+

Sampler
Sampler logic

+
+

Loss
Loss functions

+
+

Metric
Training metrics collection

+
+

Reward
Reward function

+
+

Advantage
Advantage function

+
+

CheckpointEngine
Weight synchronization

+
+

Patch
Patches for model fixes

+
+

Module
Components, e.g., Optimizer

+
+

Kernel
Operators

+
+

Server
Start backend cluster

+
+

Client
Client code

+
+

Infra
Isolate ray and torchrun differences

+
+

Plugin
Use hub components

+
+

Hub
Interface with HF/MS libraries

+
+
+ +## Community Components + +| Component Type | Component Link | Component Function | Author | +| -------------- | -------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------- | ---------------- | +| Patch | [qwen3_moe_transformers4_patch](https://www.modelscope.cn/models/twinkle-kit/qwen3_moe_transformers4_patch) | Fixes Qwen3 MoE model hang issue during FSDP2 training, effective for transformers==4.x | ModelScope Official | + diff --git a/README_ZH.md b/README_ZH.md new file mode 100644 index 00000000..f8405016 --- /dev/null +++ b/README_ZH.md @@ -0,0 +1,251 @@ +

Twinkle: Training Workbench for Industrial Neural-network Kit & LLM Engineering

+ +

+
+ +
+

+

+The Modelscope Community +
+ 中文  |  English  +

+ +

+ + + + + + +

+ +

+ English Documentation   |   中文文档   +

+ +
+ +## ✨ Twinkle是什么? + +大模型训练组件库。基于 PyTorch,更简洁、更灵活、生产就绪。 + +

+🧩 松耦合架构 · 标准化接口
+🚀 多运行模式 · torchrun / Ray / HTTP
+🔌 多框架兼容 · Transformers / Megatron
+👥 多租户支持 · 单基座模型部署 +

+ +
+ +## 安装 + +使用pip安装: + +```shell +pip install 'twinkle-kit' +``` + +## 源代码安装 + +```shell +git clone https://github.com/modelscope/twinkle.git +cd twinkle +pip install -e . --no-build-isolation +``` + +## 示例教程 + +| 训练类型 | 模型框架 | cookbook地址 | +| --------------------------------- | ------------ | ---------------------------------------- | +| FSDP finetuning | transformers | [脚本](cookbook/transformers/fsdp2.py) | +| FSDP MoE finetuning | transformers | [脚本](cookbook/transformers/fsdp2_moe.py) | +| EP MoE finetuning | transformers | [脚本](cookbook/transformers/ep_fsdp_qwen3_moe.py) | +| pp/tp/cp finetuning | megatron | [脚本](cookbook/megatron/tp.py) | +| pp/tp/cp MoE finetuning | megatron | [脚本](cookbook/megatron/tp_moe.py) | +| tinker client finetuning | megatron | [脚本](cookbook/client/tinker/megatron) | +| tinker client finetuning/sampling | transformers | [脚本](cookbook/client/tinker/transformer) | +| twinkle client finetuning | megatron | [脚本](cookbook/client/twinkle/megatron) | +| twinkle client finetuning | transformer | [脚本](cookbook/client/twinkle/transformer) | + +## 更新日志 + +- 🎉2026-02-10 twinkle-kit第一版编写完成,包含纯文本模型SFT/PT/RL和远程训练能力,并支持了[魔搭官方免费资源]() + +## 支持的硬件 + +| 硬件环境 | 备注 | +| --------------------------- | --------------------------------- | +| GPU A10/A100/H100/RTX系列等 | | +| GPU T4/V100等 | 不支持bfloat16、Flash-Attention | +| Ascend NPU | 部分算子不支持 | +| PPU | 支持 | +| CPU | 支持dataset、dataloader等部分组件 | + +## 支持的大语言模型 + +| Model Type | Model ID 举例 | Requires | Support Megatron | HF Model ID | +| ------------------- | ---------------------------------------------------------------------------------------------------------------------- | -------------------- | ---------------- | ---------------------------------------------------------------------------------------------------------- | +| qwen2 全系列 | [Qwen/Qwen2-0.5B-Instruct](https://modelscope.cn/models/Qwen/Qwen2-0.5B-Instruct) ~72B | transformers>=4.37 | ✔ | [Qwen/Qwen2-0.5B-Instruct](https://huggingface.co/Qwen/Qwen2-0.5B-Instruct) | +| | [Qwen/Qwen2-72B](https://modelscope.cn/models/Qwen/Qwen2-72B)~72B | transformers>=4.37 | ✔ | [Qwen/Qwen2-1.5B](https://huggingface.co/Qwen/Qwen2-1.5B) | +| | [Qwen/Qwen2.5-0.5B-Instruct](https://modelscope.cn/models/Qwen/Qwen2.5-0.5B-Instruct)~72B | transformers>=4.37 | ✔ | [Qwen/Qwen2.5-0.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct) | +| | [Qwen/Qwen2.5-0.5B](https://modelscope.cn/models/Qwen/Qwen2.5-0.5B)~72B | transformers>=4.37 | ✔ | [Qwen/Qwen2.5-0.5B](https://huggingface.co/Qwen/Qwen2.5-0.5B) | +| qwen2_moe 全系列 | [Qwen/Qwen1.5-MoE-A2.7B-Chat](https://modelscope.cn/models/Qwen/Qwen1.5-MoE-A2.7B-Chat) | transformers>=4.40 | ✔ | [Qwen/Qwen1.5-MoE-A2.7B-Chat](https://huggingface.co/Qwen/Qwen1.5-MoE-A2.7B-Chat) | +| qwen3 全系列 | [Qwen/Qwen3-0.6B-Base](https://modelscope.cn/models/Qwen/Qwen3-0.6B-Base)~32B | transformers>=4.51 | ✔ | [Qwen/Qwen3-0.6B-Base](https://huggingface.co/Qwen/Qwen3-0.6B-Base) | +| qwen3_moe 全系列 | [Qwen/Qwen3-30B-A3B-Base](https://modelscope.cn/models/Qwen/Qwen3-30B-A3B-Base) | transformers>=4.51 | ✔ | [Qwen/Qwen3-30B-A3B-Base](https://huggingface.co/Qwen/Qwen3-30B-A3B-Base) | +| | [Qwen/Qwen3-30B-A3B](https://modelscope.cn/models/Qwen/Qwen3-30B-A3B)~235B | transformers>=4.51 | ✔ | [Qwen/Qwen3-30B-A3B](https://huggingface.co/Qwen/Qwen3-30B-A3B) | +| chatglm2 全系列 | [ZhipuAI/chatglm2-6b](https://modelscope.cn/models/ZhipuAI/chatglm2-6b) | transformers<4.42 | ✘ | [zai-org/chatglm2-6b](https://huggingface.co/zai-org/chatglm2-6b) | +| | [ZhipuAI/chatglm2-6b-32k](https://modelscope.cn/models/ZhipuAI/chatglm2-6b-32k) | transformers<4.42 | ✘ | [zai-org/chatglm2-6b-32k](https://huggingface.co/zai-org/chatglm2-6b-32k) | +| chatglm3 全系列 | [ZhipuAI/chatglm3-6b](https://modelscope.cn/models/ZhipuAI/chatglm3-6b) | transformers<4.42 | ✘ | [zai-org/chatglm3-6b](https://huggingface.co/zai-org/chatglm3-6b) | +| | [ZhipuAI/chatglm3-6b-base](https://modelscope.cn/models/ZhipuAI/chatglm3-6b-base) | transformers<4.42 | ✘ | [zai-org/chatglm3-6b-base](https://huggingface.co/zai-org/chatglm3-6b-base) | +| | [ZhipuAI/chatglm3-6b-32k](https://modelscope.cn/models/ZhipuAI/chatglm3-6b-32k)~128k | transformers<4.42 | ✘ | [zai-org/chatglm3-6b-32k](https://huggingface.co/zai-org/chatglm3-6b-32k) | +| chatglm4 全系列 | [ZhipuAI/glm-4-9b-chat](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat) | transformers>=4.42 | ✘ | [zai-org/glm-4-9b-chat](https://huggingface.co/zai-org/glm-4-9b-chat) | +| | [ZhipuAI/LongWriter-glm4-9b](https://modelscope.cn/models/ZhipuAI/LongWriter-glm4-9b) | transformers>=4.42 | ✘ | [zai-org/LongWriter-glm4-9b](https://huggingface.co/zai-org/LongWriter-glm4-9b) | +| glm_edge 全系列 | [ZhipuAI/glm-edge-1.5b-chat](https://modelscope.cn/models/ZhipuAI/glm-edge-1.5b-chat) | transformers>=4.46 | ✘ | [zai-org/glm-edge-1.5b-chat](https://huggingface.co/zai-org/glm-edge-1.5b-chat) | +| | [ZhipuAI/glm-edge-4b-chat](https://modelscope.cn/models/ZhipuAI/glm-edge-4b-chat) | transformers>=4.46 | ✘ | [zai-org/glm-edge-4b-chat](https://huggingface.co/zai-org/glm-edge-4b-chat) | +| internlm2 全系列 | [Shanghai_AI_Laboratory/internlm2-1_8b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-1_8b) | transformers>=4.38 | ✘ | [internlm/internlm2-1_8b](https://huggingface.co/internlm/internlm2-1_8b) | +| | [Shanghai_AI_Laboratory/internlm2-chat-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-7b) | transformers>=4.38 | ✘ | [internlm/internlm2-chat-7b](https://huggingface.co/internlm/internlm2-chat-7b) | +| deepseek_v1 | [deepseek-ai/deepseek-vl-7b-chat](https://modelscope.cn/models/deepseek-ai/deepseek-vl-7b-chat) | transformers>=4.39.4 | ✔ | —— | +| | [deepseek-ai/DeepSeek-V2-Lite](https://modelscope.cn/models/deepseek-ai/DeepSeek-V2-Lite) | transformers>=4.39.3 | ✔ | [deepseek-ai/DeepSeek-V2-Lite](https://huggingface.co/deepseek-ai/DeepSeek-V2-Lite) | +| | [deepseek-ai/DeepSeek-V2.5](https://modelscope.cn/models/deepseek-ai/DeepSeek-V2.5) | transformers>=4.39.3 | ✔ | [deepseek-ai/DeepSeek-V2.5](https://huggingface.co/deepseek-ai/DeepSeek-V2.5) | +| | [deepseek-ai/DeepSeek-R1](https://modelscope.cn/models/deepseek-ai/DeepSeek-R1) | transformers>=4.39.3 | ✔ | [deepseek-ai/DeepSeek-R1](https://huggingface.co/deepseek-ai/DeepSeek-R1) | +| deepSeek-r1-distill | [deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B](https://modelscope.cn/models/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B) ~32B | transformers>=4.37 | ✔ | [deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B) | + +更详细的模型支持列表 👉 [快速开始.md](https://github.com/modelscope/twinkle/blob/dev/docs/source/%E4%BD%BF%E7%94%A8%E6%8C%87%E5%BC%95/%E5%BF%AB%E9%80%9F%E5%BC%80%E5%A7%8B.md) + +## 示例代码 + +```python +from peft import LoraConfig +import twinkle +from twinkle import DeviceMesh, DeviceGroup +from twinkle.dataloader import DataLoader +from twinkle.dataset import Dataset, DatasetMeta +from twinkle.model import TransformersModel +from twinkle.preprocessor import SelfCognitionProcessor + +device_group = [DeviceGroup(name='default',ranks=8,device_type='cuda')] +device_mesh = DeviceMesh.from_sizes(fsdp_size=4, dp_size=2) +# local for torchrun +twinkle.initialize(mode='ray', groups=device_group, global_device_mesh=device_mesh) + + +def train(): + # 1000 samples + dataset = Dataset(dataset_meta=DatasetMeta('ms://swift/self-cognition', data_slice=range(1000))) + # Set template to prepare encoding + dataset.set_template('Template', model_id='ms://Qwen/Qwen2.5-7B-Instruct') + # Preprocess the dataset to standard format + dataset.map(SelfCognitionProcessor('twinkle大模型', 'ModelScope社区')) + # Encode dataset + dataset.encode() + # Global batch size = 8, for GPUs, so 1 sample per GPU + dataloader = DataLoader(dataset=dataset, batch_size=8, min_batch_size=8) + # Use a TransformersModel + model = TransformersModel(model_id='ms://Qwen/Qwen2.5-7B-Instruct', remote_group='default') + + lora_config = LoraConfig( + r=8, + lora_alpha=32, + target_modules='all-linear' + ) + + # Add a lora to model, with name `default` + # Comment this to use full-parameter training + model.add_adapter_to_model('default', lora_config, gradient_accumulation_steps=2) + # Add Optimizer for lora `default` + model.set_optimizer(optimizer_cls='AdamW', lr=1e-4) + # Add LRScheduler for lora `default` + model.set_lr_scheduler(scheduler_cls='CosineWarmupScheduler', num_warmup_steps=5, + num_training_steps=len(dataloader)) + for step, batch in enumerate(dataloader): + # Do forward and backward + model.forward_backward(inputs=batch) + # Step + model.clip_grad_and_step() + if step % 20 == 0: + # Print metric + metric = model.calculate_metric(is_training=True) + print(f'Current is step {step} of {len(dataloader)}, metric: {metric}') + model.save(f'last-checkpoint') + + +if __name__ == '__main__': + train() +``` + +这样启动训练: + +```shell +python3 train.py +``` + +## 架构设计 + + + +twinkle的架构由client和server两部分构成,其中client端包含两个使用可能性: + +1. 符合twinkle调用API的客户端,其API和server端完全相同 +2. 对原生Tinker API的兼容 + +这使得开发者可以直接使用Tinker API调用twinkle部署起来的后端训练服务。 + +## 多租户支持 + +Twinkle支持多个租户同时使用一个基模型进行训练。这一行为目前仅限于[LoRA](https://github.com/huggingface/peft/blob/main/src/peft/tuners/lora/config.py#L323)。 +Twinkle采用了LoRA池+租户申请的技术方案。这个方案可以支持最大N个租户并行训练互不干扰,并且在模型角度来看,不同租户的训练流程可能不同,在基模中的数据padding方式、optimizer、Loss类型也可以不同。 + + + +例如: + +- 租户A:本机加载本地私有数据集,loRA rank=8,使用基模进行SFT +- 租户B:使用远端加载Hub端开源数据集,LoRA rank=32,使用基模进行PT +- 租户C:使用基模进行GRPO Loss计算,使用Sampler采样 +- 租户D:使用基模进行logps推理 + +这些过程可以同时发生在一个基模上,因为模型、Sampler本质上也是twinkle组件的一部分,可以做到任务无关。训练完成后,支持checkpoint推送HuggingFace/ModelScope的模型仓库,默认为私有。twinkle提供了完整的多租户训练解决方案,在server端支持集群化管理和动态扩缩容,可以进行简单定制化后作为企业级服务。 + +> 作为模块化框架,twinkle本身也可以支持远端临时的独占式训练,即全参数方式。 + + +## 支持的组件 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Dataset
数据加载和预处理
Template
编码和解码
DataLoader
数据分发和batch化
Preprocessor
数据ETL
InputProcessor
处理任务特定输入
Model
大模型,支持多种框架
Sampler
采样器
Loss
残差
Metric
训练指标集合
Reward
奖励函数
Advantage
优势函数
CheckpointEngine
权重同步
Patch
补丁,用于模型修复
Module
组件,例如Optimizer
Kernel
算子
Server
开启后端集群
Client
客户端代码
Infra
隔离ray和torchrun差异
Plugin
使用hub端组件
Hub
对接HF/MS网络库
+ +## 社区组件 + +| 组件类型 | 组件链接 | 组件作用 | 作者 | +| -------- | -------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------- | -------------- | +| Patch | [qwen3_moe_transformers4_patch](https://www.modelscope.cn/models/twinkle-kit/qwen3_moe_transformers4_patch) | 修复Qwen3 MoE模型在FSDP2训练时Hang的问题,对transformers==4.x生效 | ModelScope官方 | diff --git a/ROADMAP.md b/ROADMAP.md new file mode 100644 index 00000000..cb01cbce --- /dev/null +++ b/ROADMAP.md @@ -0,0 +1,89 @@ +# 0.1版本release + +## 中文 + +### 基础能力 + +- [x] 支持transformers模型 +- [x] 支持megatron模型 +- [x] 支持vLLM采样器 +- [x] 支持dataset、dataloader、reward、advantage、权重同步等基本组件 +- [x] 支持数据集packing、padding_free、流式数据集 +- [x] 支持纯文本模型的PT/SFT +- [x] 支持纯文本模型的GRPO +- [x] 支持kernels +- [x] 兼容NPU生态 + +### 网络能力 + +- [x] 支持多LoRA租户 +- [x] 支持twinkle client训练 +- [x] 支持tinker API的兼容性 +- [x] 支持租户资源控制、水位控制 +- [x] 支持checkpoint的保存上传、下载 +- [x] 支持魔搭免费训练集群 + +## English + +### Core Capabilities + +- [x] Support for Transformers models +- [x] Support for Megatron models +- [x] Support for vLLM sampler +- [x] Support for basic components including dataset, dataloader, reward, advantage, and weight synchronization +- [x] Support for dataset packing, padding-free, and streaming datasets +- [x] Support for PT/SFT of text-only models +- [x] Support for GRPO of text-only models +- [x] Support for kernels +- [x] Compatibility with NPU ecosystem + +### Networking Capabilities + +- [x] Support for multi-LoRA tenants +- [x] Support for Twinkle client training +- [x] Support for Tinker API compatibility +- [x] Support for tenant resource control and watermark control +- [x] Support for checkpoint saving, uploading, and downloading +- [x] Support for ModelScope free training cluster + + +# 0.2版本待开发 + +## 中文 + +### 基础能力 + +- [ ] 支持多模态模型 +- [ ] 支持megatron VPP +- [ ] 支持liger kernel +- [ ] 支持transformers模型的ulysses/ring-attention +- [ ] 兼容transformers v5的tp、pp +- [ ] 支持多轮RL +- [ ] 支持gym训练 +- [ ] 支持GAPO、GSPO算法 +- [ ] 支持GKD、on-policy-distill等蒸馏算法 +- [ ] 支持DPO对齐训练 +- [ ] 支持colocate RL训练 +- [ ] Preprocess支持batched + +### 网络能力 + +## English + +### Core Capabilities + +- [ ] Support for multimodal models +- [ ] Support for Megatron VPP +- [ ] Support for Liger kernel +- [ ] Support for Ulysses/Ring-Attention for Transformers models +- [ ] Compatibility with Transformers v5 TP and PP +- [ ] Support for multi-turn RL +- [ ] Support for Gym training +- [ ] Support for GAPO and GSPO algorithms +- [ ] Support for distillation algorithms such as GKD and on-policy distillation +- [ ] Support for DPO alignment training +- [ ] Support for colocate RL training +- [ ] Support for batched preprocessing + +### Networking Capabilities + diff --git a/assets/framework.jpg b/assets/framework.jpg new file mode 100644 index 00000000..140326f6 Binary files /dev/null and b/assets/framework.jpg differ diff --git a/assets/multi_lora.png b/assets/multi_lora.png new file mode 100644 index 00000000..cb8d89d0 Binary files /dev/null and b/assets/multi_lora.png differ diff --git a/assets/slogan.png b/assets/slogan.png new file mode 100644 index 00000000..c07888f4 Binary files /dev/null and b/assets/slogan.png differ diff --git a/client_tools/client_generator.py b/client_tools/client_generator.py new file mode 100644 index 00000000..4484577e --- /dev/null +++ b/client_tools/client_generator.py @@ -0,0 +1,881 @@ +# Copyright (c) ModelScope Contributors. All rights reserved. +import ast +from pathlib import Path +from typing import Dict, List, Tuple, Set + +AUTO_GEN_WARNING = """# ============================================================================ +# WARNING: AUTO-GENERATED FILE - DO NOT MODIFY MANUALLY! +# ============================================================================ +# This file is automatically generated by client_tools/client_generator.py +# Any manual changes will be overwritten when the generator runs again. +# +# To update this file: +# 1. Modify the source files in src/twinkle/ +# 2. Run: python client_tools/client_generator.py +# ============================================================================ +""" + +def generate_processors(): + """Generate client wrappers for all classes with @remote_function methods.""" + + # Module mapping: module_name -> directory in src/twinkle + module_mapping = { + 'dataloader': 'dataloader', + 'dataset': 'dataset', + 'processor': 'processor', + 'reward': 'reward', + 'template': 'template', + 'weight_loader': 'weight_loader', + } + + # Map module names to processor types in the server + processor_type_mapping = { + 'dataloader': 'dataloader', + 'dataset': 'dataset', + 'processor': 'processor', + 'reward': 'reward', + 'template': 'template', + 'weight_loader': 'weight_loader', + } + + # Get the project root directory + project_root = Path(__file__).parent.parent + src_twinkle_path = project_root / 'src' / 'twinkle' + src_client_path = project_root / 'src' / 'twinkle_client' + + def get_method_signature(func_node: ast.FunctionDef) -> str: + """Extract method signature from AST node.""" + args = [] + + # Regular arguments + for i, arg in enumerate(func_node.args.args): + if arg.arg == 'self': + continue + + # Get argument name + arg_str = arg.arg + + # Get type annotation if available + if arg.annotation: + try: + arg_str += f": {ast.unparse(arg.annotation)}" + except: + pass + + # Get default value if available + defaults_offset = len(func_node.args.args) - len(func_node.args.defaults) + if i >= defaults_offset: + default_idx = i - defaults_offset + try: + default_val = ast.unparse(func_node.args.defaults[default_idx]) + arg_str += f" = {default_val}" + except: + pass + + args.append(arg_str) + + # *args + if func_node.args.vararg: + vararg_str = f"*{func_node.args.vararg.arg}" + if func_node.args.vararg.annotation: + try: + vararg_str += f": {ast.unparse(func_node.args.vararg.annotation)}" + except: + pass + args.append(vararg_str) + + # **kwargs + if func_node.args.kwarg: + kwarg_str = f"**{func_node.args.kwarg.arg}" + if func_node.args.kwarg.annotation: + try: + kwarg_str += f": {ast.unparse(func_node.args.kwarg.annotation)}" + except: + pass + args.append(kwarg_str) + + return ', '.join(args) + + def extract_typing_imports(signatures: List[str]) -> Set[str]: + """Extract required typing imports from signatures.""" + typing_patterns = { + 'Union[': 'Union', + 'Optional[': 'Optional', + 'List[': 'List', + 'Dict[': 'Dict', + 'Tuple[': 'Tuple', + 'Type[': 'Type', + 'Any': 'Any', + 'Callable': 'Callable', + 'Literal[': 'Literal', + 'Required[': 'Required', + 'Set[': 'Set', + 'TypedDict': 'TypedDict', + } + + all_text = ' '.join(signatures) + return {name for pattern, name in typing_patterns.items() if pattern in all_text} + + def extract_twinkle_imports(signatures: List[str]) -> Set[str]: + """Extract required twinkle imports from signatures.""" + twinkle_patterns = { + 'InputFeature': ['from twinkle.data_format import InputFeature'], + 'Trajectory': ['from twinkle.data_format import Trajectory'], + 'DataFilter': ['from twinkle.preprocessor import DataFilter'], + 'Preprocessor': ['from twinkle.preprocessor import Preprocessor'], + 'DatasetMeta': ['from twinkle.dataset import DatasetMeta'], + 'Dataset': ['from twinkle.dataset import Dataset'], + 'DeviceMesh': ['from twinkle import DeviceMesh'], + 'Template': ['from twinkle.template import Template'], + 'template.Template': ['from twinkle.template import Template', 'from twinkle import template'], + 'processor.InputProcessor': ['from twinkle.processor import InputProcessor', 'from twinkle import processor'], + 'InputProcessor': ['from twinkle.processor import InputProcessor'], + } + + all_text = ' '.join(signatures) + imports = set() + for pattern, stmts in twinkle_patterns.items(): + if pattern in all_text: + imports.update(stmts) + + return imports + + def parse_params_from_signature(signature: str) -> List[str]: + """Parse parameter names from signature, handling nested brackets.""" + params = [] + current = '' + depth = 0 + + for char in signature + ',': + if char in '[(': + depth += 1 + elif char in '])': + depth -= 1 + + if char == ',' and depth == 0: + name = current.split(':')[0].split('=')[0].strip() + if name and name != 'self' and not name.startswith('*'): + params.append(name) + current = '' + else: + current += char + + return params + + def find_classes_with_remote_methods(file_path: Path) -> List[Tuple[str, str, List[Tuple[str, str]]]]: + """Find all classes that have @remote_function decorated methods.""" + try: + with open(file_path, 'r', encoding='utf-8') as f: + tree = ast.parse(f.read(), filename=str(file_path)) + except Exception as e: + print(f"Error parsing {file_path}: {e}") + return [] + + def has_remote_decorator(func: ast.FunctionDef) -> bool: + for dec in func.decorator_list: + if isinstance(dec, ast.Name) and dec.id == 'remote_function': + return True + if isinstance(dec, ast.Call): + func_node = dec.func + if isinstance(func_node, ast.Name) and func_node.id == 'remote_function': + return True + if isinstance(func_node, ast.Attribute) and func_node.attr == 'remote_function': + return True + return False + + def is_public_or_dunder(name: str) -> bool: + return (name.startswith('__') and name.endswith('__')) or not name.startswith('_') + + def get_base_name(node: ast.ClassDef) -> str: + if not node.bases: + return 'object' + base = node.bases[0] + if isinstance(base, ast.Name): + return base.id + if isinstance(base, ast.Attribute): + return base.attr + return 'object' + + classes_found = [] + for node in ast.walk(tree): + if not isinstance(node, ast.ClassDef): + continue + + methods = [ + (item.name, get_method_signature(item)) + for item in node.body + if isinstance(item, ast.FunctionDef) + and has_remote_decorator(item) + and is_public_or_dunder(item.name) + ] + + # Extract __init__ signature separately (it may not have @remote_function) + init_signature = '' + for item in node.body: + if isinstance(item, ast.FunctionDef) and item.name == '__init__': + init_signature = get_method_signature(item) + break + + if methods: + classes_found.append((node.name, get_base_name(node), methods, init_signature)) + + return classes_found + + def generate_client_class(class_name: str, base_class_name: str, + methods: List[Tuple[str, str]], module_name: str, + processor_type: str, source_filename: str, + has_base_file: bool, init_signature: str = '') -> str: + """Generate client wrapper class code.""" + + def build_imports() -> Tuple[List[str], str]: + # Include both method signatures and __init__ signature for import detection + signatures = [sig for _, sig in methods] + if init_signature: + signatures.append(init_signature) + + typing_imports = extract_typing_imports(signatures) + twinkle_imports = extract_twinkle_imports(signatures) + + lines = [] + if typing_imports: + lines.append(f"from typing import {', '.join(sorted(typing_imports))}") + lines.extend([ + "from twinkle_client.http import http_post, heartbeat_manager", + ]) + lines.extend(sorted(twinkle_imports)) + + if source_filename == 'base': + inheritance = "object" + elif base_class_name == 'IterableDataset': + lines.append("from torch.utils.data import IterableDataset") + inheritance = "IterableDataset" + elif has_base_file and base_class_name != 'object': + lines.append(f"from .base import {base_class_name}") + inheritance = base_class_name + else: + inheritance = "object" + + lines.append("") + return lines, inheritance + + def build_method(name: str, signature: str) -> str: + param_names = parse_params_from_signature(signature) + kwargs_dict = '{' + ', '.join(f"'{p}': {p}" for p in param_names) + '}' if param_names else '{}' + sig_part = f', {signature}' if signature else '' + if 'kwargs' in sig_part: + extra_args = '\n **kwargs' + else: + extra_args = '' + ret = 'self' if name == '__iter__' else 'response.json()["result"]' + + code = f''' + def {name}(self{sig_part}): + response = http_post( + url=f'{{self.server_url}}/processors/call', + json_data={{ + 'processor_id': self.processor_id, + 'function': '{name}', + **{kwargs_dict},{extra_args} + }} + ) + response.raise_for_status() + return {ret} + ''' + if name == '__iter__': + code += ''' + def __next__(self): + response = http_post( + url=f'{self.server_url}/processors/call', + json_data={ + 'processor_id': self.processor_id, + 'function': '__next__', + } + ) + response.raise_for_status() + return response.json()["result"] + ''' + return code + + import_lines, inheritance = build_imports() + + # Build __init__ method with actual signature + if init_signature: + # Extract parameter names from signature (excluding **kwargs) + param_names = parse_params_from_signature(init_signature) + init_params = f"self, {init_signature}" if init_signature else "self" + + # Check if signature has **kwargs + has_kwargs = '**' in init_signature + + # Extract the **kwargs name if present + kwargs_name = None + if has_kwargs: + # Find the **kwargs parameter name + for part in init_signature.split(','): + part = part.strip() + if part.startswith('**'): + # Extract name after **, before : or end + kwargs_name = part[2:].split(':')[0].strip() + break + + # Build kwargs dict for HTTP request + if param_names: + kwargs_items = ', '.join([f"'{p}': {p}" for p in param_names]) + if has_kwargs and kwargs_name: + # Include both named params and **kwargs + kwargs_dict = f"{{{kwargs_items}}}, **{kwargs_name}" + else: + kwargs_dict = f"{{{kwargs_items}}}" + else: + if has_kwargs and kwargs_name: + kwargs_dict = kwargs_name + else: + kwargs_dict = "{}" + else: + # Fallback to **kwargs if no __init__ found + init_params = "self, **kwargs" + kwargs_dict = "kwargs" + + class_template = f'''{AUTO_GEN_WARNING} +{chr(10).join(import_lines)} +class {class_name}({inheritance}): + """Client wrapper for {class_name} that calls server HTTP endpoints.""" + + def __init__({init_params}): + from twinkle_client.http import get_base_url + self.server_url = get_base_url() + + response = http_post( + url=f'{{self.server_url}}/processors/create', + json_data={{ + 'processor_type': '{processor_type}', + 'class_type': '{class_name}', + **{kwargs_dict} + }} + ) + response.raise_for_status() + self.processor_id = response.json()['processor_id'] + heartbeat_manager.register_processor(self.processor_id) + + def __del__(self): + try: + heartbeat_manager.unregister_processor(self.processor_id) + except: + pass + + ''' + + method_codes = [build_method(name, sig) for name, sig in methods] + + return class_template + '\n'.join(method_codes) + + def scan_modules(src_twinkle_path: Path, module_mapping: Dict[str, str]) -> Dict: + """Scan all modules for classes with @remote_function methods.""" + print("Scanning src/twinkle modules for classes with @remote_function methods...") + + module_files = {} + for module_name, module_dir in module_mapping.items(): + module_path = src_twinkle_path / module_dir + if not module_path.exists(): + continue + + print(f" Scanning {module_name}...") + for py_file in module_path.glob('*.py'): + if py_file.name.startswith('_'): + continue + + if classes := find_classes_with_remote_methods(py_file): + module_files.setdefault(module_name, {}).setdefault(py_file.stem, []).extend(classes) + + return module_files + + def write_client_files(module_files: Dict, src_client_path: Path, + processor_type_mapping: Dict[str, str]) -> None: + """Generate and write client files.""" + print("\nGenerating client classes...") + + for module_name, source_files in module_files.items(): + client_module_path = src_client_path / module_name + client_module_path.mkdir(parents=True, exist_ok=True) + + processor_type = processor_type_mapping.get(module_name, module_name) + has_base_file = 'base' in source_files + + for source_filename, classes in source_files.items(): + client_file = client_module_path / f'{source_filename}.py' + print(f" Writing {client_file}...") + + code = '\n\n'.join( + generate_client_class(class_name, base_class_name, methods, + module_name, processor_type, source_filename, has_base_file, init_signature) + for class_name, base_class_name, methods, init_signature in classes + ) + client_file.write_text(code, encoding='utf-8') + + def write_init_files(module_files: Dict, src_client_path: Path) -> None: + """Generate __init__.py files for each module.""" + print("\nGenerating __init__.py files...") + + for module_name, source_files in module_files.items(): + init_file = src_client_path / module_name / '__init__.py' + print(f" Writing {init_file}...") + + init_lines = [ + f"from .{source_filename} import {class_name}" + for source_filename, classes in sorted(source_files.items()) + for class_name, _, _, _ in classes + ] + init_content = AUTO_GEN_WARNING + '\n'.join(sorted(init_lines)) + '\n' + init_file.write_text(init_content, encoding='utf-8') + + module_files = scan_modules(src_twinkle_path, module_mapping) + write_client_files(module_files, src_client_path, processor_type_mapping) + write_init_files(module_files, src_client_path) + print("\nProcessor client generation complete!") + return module_files + + +def generate_models(): + """Generate client wrapper for Model management.""" + from pathlib import Path + + project_root = Path(__file__).parent.parent + src_client_path = project_root / 'src' / 'twinkle_client' + client_module_path = src_client_path / 'model' + client_module_path.mkdir(parents=True, exist_ok=True) + + model_code = AUTO_GEN_WARNING + '''from typing import Any, Optional, Union, Type, Dict, Literal, List +import uuid +from twinkle_client.http import http_post, heartbeat_manager +from twinkle import DeviceMesh +from twinkle.data_format import InputFeature, Trajectory + + +class MultiLoraTransformersModel: + """Client wrapper for TwinkleModel that calls server HTTP endpoints. + + This client manages adapters and sends training/inference requests to the model server. + Each adapter has its own lifecycle managed through automatic heartbeats. + """ + + def __init__(self, model_id: str, **kwargs): + """Initialize model client.""" + from twinkle_client.http import get_base_url + self.server_url = get_base_url() + + self.model_id = model_id + if '://' in model_id: + model_id = model_id.split('://')[1] + self.server_url = f'{self.server_url}/models/{model_id}' + self.adapter_name = None + response = http_post( + url=f'{self.server_url}/create', + ) + response.raise_for_status() + + def _send_adapter_heartbeat(self): + """Internal method to send adapter heartbeat.""" + response = http_post( + url=f'{self.server_url}/heartbeat', + json_data={'adapter_name': self.adapter_name} + ) + response.raise_for_status() + + def add_adapter_to_model(self, adapter_name: str, config: Dict[str, Any], **kwargs): + """Add a new adapter to the model and start automatic heartbeat.""" + response = http_post( + url=f'{self.server_url}/add_adapter_to_model', + json_data={'adapter_name': adapter_name, 'config': config, **kwargs} + ) + response.raise_for_status() + + # Register adapter for automatic heartbeat after successful creation + self.adapter_name = adapter_name + heartbeat_manager.register_adapter( + self.adapter_name, + self._send_adapter_heartbeat + ) + + def __del__(self): + """Cleanup: unregister adapter from heartbeat manager.""" + try: + heartbeat_manager.unregister_adapter(self.adapter_name) + except: + pass + + def forward(self, inputs: Any, **kwargs): + """Execute forward pass on the model.""" + response = http_post( + url=f'{self.server_url}/forward', + json_data={'inputs': inputs, 'adapter_name': self.adapter_name, **kwargs} + ) + response.raise_for_status() + return response.json()['result'] + + def forward_only(self, inputs: Any, **kwargs): + """Execute forward pass without gradient computation.""" + response = http_post( + url=f'{self.server_url}/forward_only', + json_data={'inputs': inputs, 'adapter_name': self.adapter_name, **kwargs} + ) + response.raise_for_status() + return response.json()['result'] + + def calculate_loss(self, **kwargs): + """Calculate loss from model outputs.""" + response = http_post( + url=f'{self.server_url}/calculate_loss', + json_data={'adapter_name': self.adapter_name, **kwargs} + ) + response.raise_for_status() + return response.json()['result'] + + def get_train_configs(self, **kwargs): + """Get training configs""" + response = http_post( + url=f'{self.server_url}/get_train_configs', + json_data={'adapter_name': self.adapter_name, **kwargs} + ) + response.raise_for_status() + return response.json()['result'] + + def backward(self, **kwargs): + """Execute backward pass.""" + response = http_post( + url=f'{self.server_url}/backward', + json_data={'adapter_name': self.adapter_name, **kwargs} + ) + response.raise_for_status() + return response.json()['result'] + + def forward_backward(self, inputs: Any, **kwargs): + """Execute combined forward and backward pass.""" + response = http_post( + url=f'{self.server_url}/forward_backward', + json_data={'inputs': inputs, 'adapter_name': self.adapter_name, **kwargs} + ) + response.raise_for_status() + return response.json()['result'] + + def step(self, **kwargs): + """Execute optimizer step.""" + response = http_post( + url=f'{self.server_url}/step', + json_data={'adapter_name': self.adapter_name, **kwargs} + ) + response.raise_for_status() + return response.json()['result'] + + def zero_grad(self, **kwargs): + """Zero out gradients.""" + response = http_post( + url=f'{self.server_url}/zero_grad', + json_data={'adapter_name': self.adapter_name, **kwargs} + ) + response.raise_for_status() + return response.json()['result'] + + def lr_step(self, **kwargs): + """Execute learning rate scheduler step.""" + response = http_post( + url=f'{self.server_url}/lr_step', + json_data={'adapter_name': self.adapter_name, **kwargs} + ) + response.raise_for_status() + return response.json()['result'] + + def set_loss(self, loss_cls: str, **kwargs): + """Set the loss function.""" + response = http_post( + url=f'{self.server_url}/set_loss', + json_data={'loss_cls': loss_cls, 'adapter_name': self.adapter_name, **kwargs} + ) + response.raise_for_status() + return response.json()['result'] + + def clip_grad_norm(self, max_grad_norm: float=1.0, norm_type=2, **kwargs): + """Set the loss function.""" + response = http_post( + url=f'{self.server_url}/clip_grad_norm', + json_data={'max_grad_norm': max_grad_norm, 'norm_type': norm_type, 'adapter_name': self.adapter_name, **kwargs} + ) + response.raise_for_status() + return response.json()['result'] + + def set_optimizer(self, optimizer_cls: str, **kwargs): + """Set the optimizer.""" + response = http_post( + url=f'{self.server_url}/set_optimizer', + json_data={'optimizer_cls': optimizer_cls, 'adapter_name': self.adapter_name, **kwargs} + ) + response.raise_for_status() + return response.json()['result'] + + def set_lr_scheduler(self, scheduler_cls: str, **kwargs): + """Set the learning rate scheduler.""" + response = http_post( + url=f'{self.server_url}/set_lr_scheduler', + json_data={'scheduler_cls': scheduler_cls, 'adapter_name': self.adapter_name, **kwargs} + ) + response.raise_for_status() + return response.json()['result'] + + def save(self, name: str, **kwargs): + """Save model checkpoint.""" + response = http_post( + url=f'{self.server_url}/save', + json_data={'name': name, 'adapter_name': self.adapter_name, **kwargs} + ) + response.raise_for_status() + return response.json()['result'] + + def load(self, name: str, **kwargs): + """Load model checkpoint.""" + response = http_post( + url=f'{self.server_url}/load', + json_data={'name': name, 'adapter_name': self.adapter_name, **kwargs} + ) + response.raise_for_status() + return response.json()['result'] + + def set_template(self, template_cls: str, **kwargs): + """Set the template for data processing.""" + response = http_post( + url=f'{self.server_url}/set_template', + json_data={'template_cls': template_cls, 'adapter_name': self.adapter_name, 'model_id': self.model_id, **kwargs} + ) + response.raise_for_status() + return response.json()['result'] + + def set_processor(self, processor_cls: str, **kwargs): + """Set the input processor.""" + response = http_post( + url=f'{self.server_url}/set_processor', + json_data={'processor_cls': processor_cls, 'adapter_name': self.adapter_name, **kwargs} + ) + response.raise_for_status() + return response.json()['result'] + + def calculate_metric(self, is_training: bool = True, **kwargs): + """Calculate metrics from model outputs.""" + response = http_post( + url=f'{self.server_url}/calculate_metric', + json_data={'is_training': is_training, 'adapter_name': self.adapter_name, **kwargs} + ) + response.raise_for_status() + return response.json()['result'] + + def get_state_dict(self, **kwargs): + """Get model state dictionary.""" + response = http_post( + url=f'{self.server_url}/get_state_dict', + json_data={'adapter_name': self.adapter_name, **kwargs} + ) + response.raise_for_status() + return response.json()['result'] + + def upload_to_hub(self, checkpoint_dir: str, hub_model_id: str, hub_token: Optional[str] = None, async_upload: bool = True): + """Upload model checkpoint to hub. + + Args: + checkpoint_dir: The directory path of the checkpoint to upload. + hub_model_id: The hub model id. + hub_token: The hub token (optional). + async_upload: Whether to use async upload (default: True). + """ + response = http_post( + url=f'{self.server_url}/upload_to_hub', + json_data={ + 'checkpoint_dir': checkpoint_dir, + 'hub_model_id': hub_model_id, + 'hub_token': hub_token, + 'async_upload': async_upload + } + ) + response.raise_for_status() + return response.json() +''' + + # Write the model client file + client_file = client_module_path / 'multi_lora_transformers.py' + print(f"Generating {client_file}...") + with open(client_file, 'w', encoding='utf-8') as f: + f.write(model_code) + + # Create/overwrite __init__.py + init_file = client_module_path / '__init__.py' + init_content = AUTO_GEN_WARNING + "from .multi_lora_transformers import MultiLoraTransformersModel\n" + print(f"Writing {init_file}...") + with open(init_file, 'w', encoding='utf-8') as f: + f.write(init_content) + + print("Model client generation complete!") + + +def generate_samplers(): + """Generate client wrapper for Sampler management.""" + from pathlib import Path + + project_root = Path(__file__).parent.parent + src_client_path = project_root / 'src' / 'twinkle_client' + client_module_path = src_client_path / 'sampler' + client_module_path.mkdir(parents=True, exist_ok=True) + + sampler_code = AUTO_GEN_WARNING + '''from typing import Any, Optional, List, Dict, Union +from twinkle_client.http import http_post, heartbeat_manager +from twinkle.sampler.base import Sampler +from peft import PeftConfig +from twinkle.data_format import Trajectory, InputFeature + + +class vLLMSampler(Sampler): + """Client wrapper for Sampler that calls server HTTP endpoints. + + This client manages sampling operations and adapter synchronization with the sampler server. + Each adapter has its own lifecycle managed through automatic heartbeats. + """ + + def __init__(self, model_id: str, **kwargs): + """Create the sampler instance on server.""" + from twinkle_client.http import get_base_url + self.server_url = get_base_url() + + self.adapter_name = None + if '://' in model_id: + model_id = model_id.split('://')[1] + self.server_url = f'{self.server_url}/samplers/{model_id}' + response = http_post( + url=f'{self.server_url}/create', + json_data=kwargs + ) + response.raise_for_status() + + def _send_adapter_heartbeat(self): + """Internal method to send adapter heartbeat.""" + if not self.adapter_name: + return + response = http_post( + url=f'{self.server_url}/heartbeat', + json_data={'adapter_name': self.adapter_name} + ) + response.raise_for_status() + + def add_adapter_to_sampler(self, adapter_name: str, config: PeftConfig, **kwargs): + """Add a new adapter to the sampler and start automatic heartbeat.""" + if isinstance(config, PeftConfig): + config = config.__dict__ + response = http_post( + url=f'{self.server_url}/add_adapter_to_sampler', + json_data={'adapter_name': adapter_name, 'config': config, **kwargs} + ) + response.raise_for_status() + + # Register adapter for automatic heartbeat after successful creation + self.adapter_name = adapter_name + heartbeat_manager.register_adapter( + self.adapter_name, + self._send_adapter_heartbeat + ) + + return response.json() + + def __del__(self): + """Cleanup: unregister adapter from heartbeat manager.""" + try: + if self.adapter_name: + heartbeat_manager.unregister_adapter(self.adapter_name) + except: + pass + + def sample( + self, + inputs: Union[List[Trajectory], List[InputFeature]], + sampling_params: Optional[Dict[str, Any]] = None, + adapter_name: str = '', + adapter_uri: Optional[str] = None, + num_samples: int = 1, + ) -> Dict[str, Any]: + """Sample from the model. + + Args: + inputs: List of Trajectory or InputFeature to sample from. + sampling_params: Sampling parameters dict. + adapter_name: Adapter name for LoRA inference. + adapter_uri: Adapter URI (twinkle:// path or local path) for LoRA inference. + num_samples: Number of completions to generate per prompt. + + Returns: + Dict with 'sequences' list, each containing tokens, logprobs, stop_reason. + """ + json_data = { + 'inputs': inputs, + 'sampling_params': sampling_params, + 'adapter_name': adapter_name, + 'num_samples': num_samples, + } + if adapter_uri is not None: + json_data['adapter_uri'] = adapter_uri + + response = http_post( + url=f'{self.server_url}/sample', + json_data=json_data + ) + response.raise_for_status() + return response.json() + + def sync_weights(self, state_dict: Dict[str, Any], adapter_name: str = ''): + """Synchronize weights to the sampler.""" + adapter = adapter_name or self.adapter_name + response = http_post( + url=f'{self.server_url}/sync_weights', + json_data={'state_dict': state_dict, 'adapter_name': adapter} + ) + response.raise_for_status() + return response.json() + + def set_template(self, template_cls: str, adapter_name: str = '', **kwargs): + """Set the template for encoding trajectories.""" + response = http_post( + url=f'{self.server_url}/set_template', + json_data={'template_cls': template_cls, 'adapter_name': adapter_name, **kwargs} + ) + response.raise_for_status() + return response.json() +''' + + # Write the sampler client file + client_file = client_module_path / 'vllm_sampler.py' + print(f"Generating {client_file}...") + with open(client_file, 'w', encoding='utf-8') as f: + f.write(sampler_code) + + # Create/overwrite __init__.py + init_file = client_module_path / '__init__.py' + init_content = AUTO_GEN_WARNING + "from .vllm_sampler import vLLMSampler\n" + print(f"Writing {init_file}...") + with open(init_file, 'w', encoding='utf-8') as f: + f.write(init_content) + + print("Sampler client generation complete!") + + +if __name__ == '__main__': + print("Starting client code generation...\n") + print("=" * 60) + + # Generate processor-based clients + print("\n[1/3] Generating processor-based clients...") + generate_processors() + + # Generate model client + print("\n" + "=" * 60) + print("\n[2/3] Generating model client...") + generate_models() + + # Generate sampler client + print("\n" + "=" * 60) + print("\n[3/3] Generating sampler client...") + generate_samplers() + + print("\n" + "=" * 60) + print("\n✓ All client code generation complete!\n") diff --git a/cookbook/client/tinker/megatron/server.py b/cookbook/client/tinker/megatron/server.py new file mode 100644 index 00000000..f04dfffa --- /dev/null +++ b/cookbook/client/tinker/megatron/server.py @@ -0,0 +1,21 @@ +# Twinkle Server Launcher - Tinker-Compatible Megatron Backend +# +# This script starts the Twinkle server with Tinker-compatible API support +# using the Megatron model backend. +# It reads the server_config.yaml in the same directory for all +# configuration (model, deployment settings, etc.). +# Run this script BEFORE running the client training script (lora.py). + +import os + +# Enable Ray debug mode for verbose logging during development +os.environ['TWINKLE_TRUST_REMOTE_CODE'] = '1' + +from twinkle.server import launch_server + +# Resolve the path to server_config.yaml relative to this script's location +file_dir = os.path.abspath(os.path.dirname(__file__)) +config_path = os.path.join(file_dir, 'server_config.yaml') + +# Launch the Twinkle server — this call blocks until the server is shut down +launch_server(config_path=config_path) \ No newline at end of file diff --git a/cookbook/client/tinker/megatron/server_config.yaml b/cookbook/client/tinker/megatron/server_config.yaml new file mode 100644 index 00000000..08399d91 --- /dev/null +++ b/cookbook/client/tinker/megatron/server_config.yaml @@ -0,0 +1,108 @@ +# Twinkle Server Configuration - Tinker-Compatible Transformers Backend + +# Server protocol type: "tinker" enables the Tinker-compatible API +server_type: tinker + +# proxy_location: determines where the HTTP proxy runs. +# "EveryNode" means each Ray node runs its own proxy (good for multi-node). +proxy_location: EveryNode + +# HTTP listener settings +http_options: + host: 0.0.0.0 # Listen on all network interfaces + port: 8000 # Port number for the server + +# Applications: each entry defines a service component deployed on the server +applications: + + # 1. TinkerCompatServer - The central API server + # Handles client connections, training run tracking, checkpoint listing. + - name: server + route_prefix: /api/v1 # API endpoint prefix (Tinker-compatible) + import_path: server # Python module to import + args: + + deployments: + - name: TinkerCompatServer + autoscaling_config: + min_replicas: 1 # Minimum number of replicas + max_replicas: 1 # Maximum number of replicas + target_ongoing_requests: 128 # Target concurrent requests per replica + ray_actor_options: + num_cpus: 0.1 # CPU resources allocated to this actor + runtime_env: + env_vars: + TWINKLE_TRUST_REMOTE_CODE: "0" + DEVICE_COUNT_PER_PHYSICAL_NODE: "8" + + # 3. Sampler Service - Runs inference / sampling using vLLM engine + # Used for generating text from the model (e.g., evaluating LoRA results). + - name: sampler-Qwen3-30B-A3B-Instruct-2507 + route_prefix: /api/v1/sampler/Qwen/Qwen3-30B-A3B-Instruct-2507 + import_path: sampler + args: + model_id: "ms://Qwen/Qwen3-30B-A3B-Instruct-2507" # ModelScope model identifier + nproc_per_node: 4 # Number of GPU processes per node + sampler_type: vllm # Inference engine: 'vllm' (fast) or 'torch' (TorchSampler) + engine_args: # vLLM engine-specific settings + max_model_len: 4096 # Maximum sequence length the engine supports + gpu_memory_utilization: 0.7 # Fraction of GPU memory to use (0.0-1.0) + enable_lora: true # Allow loading LoRA adapters during inference + device_group: # Logical device group for the sampler + name: sampler + gpus_per_worker: 1 + ranks: [0,1,2,3] # GPU rank indices to use + device_type: cuda + device_mesh: + device_type: cuda + dp_size: 4 + deployments: + - name: SamplerManagement + autoscaling_config: + min_replicas: 1 + max_replicas: 1 + target_ongoing_requests: 16 + ray_actor_options: + num_cpus: 0.1 + runtime_env: + env_vars: + TWINKLE_TRUST_REMOTE_CODE: "0" + DEVICE_COUNT_PER_PHYSICAL_NODE: "8" + + # 2. Model Service (commented out) - Would host the base model for training. + # Uncomment and configure if you need a training model worker. + - name: models-Qwen3-30B-A3B-Instruct-2507 + route_prefix: /api/v1/model/Qwen/Qwen3-30B-A3B-Instruct-2507 + import_path: model + args: + use_megatron: true # Use HuggingFace Transformers backend + model_id: "ms://Qwen/Qwen3-30B-A3B-Instruct-2507" # ModelScope model identifier + nproc_per_node: 4 # Number of GPU processes per node + device_group: + name: model + ranks: [4,5,6,7] # GPU rank indices + device_type: cuda + device_mesh: + device_type: cuda + dp_size: 2 + tp_size: 2 + ep_size: 2 + + queue_config: + rps_limit: 100 # Max requests per second + tps_limit: 10000 # Max tokens per second + adapter_config: + per_token_adapter_limit: 30 # Max concurrent LoRA adapters + adapter_timeout: 1800 # Seconds before idle adapter unload + deployments: + - name: ModelManagement + autoscaling_config: + min_replicas: 1 + max_replicas: 1 + target_ongoing_requests: 16 + ray_actor_options: + num_cpus: 0.1 + runtime_env: + env_vars: + TWINKLE_TRUST_REMOTE_CODE: "0" + DEVICE_COUNT_PER_PHYSICAL_NODE: "8" diff --git a/cookbook/client/tinker/transformer/grpo.py b/cookbook/client/tinker/transformer/grpo.py new file mode 100644 index 00000000..c08c6fe5 --- /dev/null +++ b/cookbook/client/tinker/transformer/grpo.py @@ -0,0 +1,292 @@ +# Tinker-Compatible Client - GRPO (Group Relative Policy Optimization) Training Example +# +# This script demonstrates GRPO reinforcement learning training using the +# Tinker-compatible client API with save_weights_for_sampler for weight sync. +# Instead of calling sync_weights directly, it periodically saves weights and +# creates a sampling client for generation. +# +# Flow: +# 1. Prepare Countdown dataset (client-side) +# 2. Initialize Tinker-compatible training & sampling clients +# 3. Training loop: +# a. Every SYNC_INTERVAL steps: save_weights_for_sampler → sampling_client +# b. Sample completions from the sampling client +# c. Compute rewards and advantages (client-side) +# d. Train on sampled data weighted by advantages +# e. Optimizer step +# +# The server must be running first (see server.py and server_config.yaml). +# Requires both model and sampler services to be configured. + +import gc +import numpy as np +from typing import List, Tuple + +from tinker import types +from twinkle_client import init_tinker_compat_client +from twinkle import get_logger +from twinkle.advantage import GRPOAdvantage +from twinkle.dataloader import DataLoader +from twinkle.dataset import Dataset, DatasetMeta +from twinkle.metric import CompletionRewardMetric +from modelscope import AutoTokenizer + +logger = get_logger() + +# ========== Configuration ========== +BASE_MODEL = 'Qwen/Qwen2.5-3B-Instruct' +NUM_GENERATIONS = 4 +MAX_NEW_TOKENS = 1024 +LEARNING_RATE = 1e-5 +MAX_STEPS = 100 +BATCH_SIZE = 2 +TEMPERATURE = 1.0 +SYNC_INTERVAL = 2 # Save weights for sampler every N steps +LORA_RANK = 8 + + +def create_countdown_dataset(): + """Create Countdown Game dataset for GRPO training.""" + logger.info("Loading Countdown dataset...") + + dataset = Dataset(DatasetMeta( + "ms://zouxuhong/Countdown-Tasks-3to4", data_slice=range(500))) + dataset.set_template( + "Template", model_id=f'ms://{BASE_MODEL}', max_length=8192) + dataset.map('CountdownProcessor') + dataset.encode(add_generation_prompt=True) + + logger.info(f"Dataset loaded with {len(dataset)} samples") + return dataset + + +def compute_rewards( + trajectories: List[dict], +) -> Tuple[List[float], List[float], List[float]]: + """Compute format and accuracy rewards for Countdown game.""" + from twinkle.reward import CountDownAccuracy, FormatReward + format_rewards = FormatReward()(trajectories, []) + accuracy_rewards = CountDownAccuracy()(trajectories, []) + total_rewards = [a + b for a, b in zip(accuracy_rewards, format_rewards)] + return total_rewards, format_rewards, accuracy_rewards + + +def main(): + logger.info("Starting GRPO training...") + + # Step 1: Prepare dataset and dataloader (client-side) + dataset = create_countdown_dataset() + dataloader = DataLoader(dataset=dataset, batch_size=BATCH_SIZE) + tokenizer = AutoTokenizer.from_pretrained( + BASE_MODEL, trust_remote_code=True) + + logger.info("Dataset and tokenizer initialized") + + # Step 2: Initialize the Tinker-compatible client + logger.info("Connecting to Tinker server...") + service_client = init_tinker_compat_client( + base_url='http://localhost:8000') + + logger.info("Creating LoRA training client...") + # Create a LoRA training client for GRPO + training_client = service_client.create_lora_training_client( + base_model=BASE_MODEL, + rank=LORA_RANK, + ) + + logger.info("Training client created successfully") + + # Step 3: Setup metrics and advantage function + advantage_fn = GRPOAdvantage() + metrics = CompletionRewardMetric() + + sampling_params = types.SamplingParams( + max_tokens=MAX_NEW_TOKENS, + temperature=TEMPERATURE, + top_p=0.95, + ) + + # The sampling client is created on-demand via save_weights_for_sampler + sampling_client = None + + step = 0 + for batch in dataloader: + if step >= MAX_STEPS: + break + + metrics.reset() + prompts = batch if isinstance(batch, list) else [batch] + + # ========== 1. Save weights for sampler (instead of sync_weights) ========== + if step % SYNC_INTERVAL == 0: + logger.info(f"Step {step}: Saving weights for sampler...") + + sampling_client = ( + training_client.save_weights_and_get_sampling_client( + name=f'grpo-step-{step}')) + logger.info(f"Step {step}: Sampling client ready") + + if sampling_client is None: + logger.warning("No sampling client available, skipping step") + step += 1 + continue + + # ========== 2. Sample completions ========== + # Convert input features to token prompts for the sampling client + all_sequences = [] + for prompt_feature in prompts: + input_ids = prompt_feature['input_ids'] + if hasattr(input_ids, 'tolist'): + input_ids = input_ids.tolist() + prompt = types.ModelInput.from_ints(input_ids) + future = sampling_client.sample( + prompt=prompt, + sampling_params=sampling_params, + num_samples=NUM_GENERATIONS, + ) + result = future.result() + all_sequences.extend(result.sequences) + + if not all_sequences: + logger.warning(f"Step {step}: No valid samples, skipping") + step += 1 + continue + + # ========== 3. Build trajectories and collect logprobs ========== + trajectories = [] + old_logps_list = [] + completion_lengths = [] + + for seq in all_sequences: + decoded_text = tokenizer.decode(seq.tokens, skip_special_tokens=True) + trajectories.append({ + 'messages': [{'role': 'assistant', 'content': decoded_text}] + }) + old_logps_list.append( + [lp for lp in seq.logprobs] if seq.logprobs else []) + completion_lengths.append(len(seq.tokens)) + + # ========== 4. Compute rewards ========== + total_rewards, format_rewards, accuracy_rewards = compute_rewards( + trajectories) + metrics.accumulate( + None, None, + completion_lengths=completion_lengths, + rewards={ + 'total': total_rewards, + 'format': format_rewards, + 'accuracy': accuracy_rewards, + }) + + # ========== 5. Compute advantages ========== + advantages = advantage_fn( + total_rewards, + num_generations=NUM_GENERATIONS, + scale='group', + ).tolist() + + frac_zero_std = ( + 1.0 if all(abs(a) < 1e-8 for a in advantages) else 0.0) + if frac_zero_std == 1.0: + logger.info( + f"Step {step}: All advantages are zero, skipping training") + step += 1 + continue + + # ========== 6. Train the policies with GRPO loss ========== + # Train the policies with the Advantage-Regularized policy + # gradient (GRPO) loss function. + # + # The GRPO loss function requires: + # 1. logprobs: The log probabilities of the tokens under the current policy + # 2. advantages: The advantage values for each completion + # + # The training data is constructed with: + # - model_input: The full prompt + completion tokens + # - target_tokens: The shifted tokens for next-token prediction + # - logprobs: The log probabilities from the sampling step + # - advantages: The computed advantage values + training_data = [] + for i, seq in enumerate(all_sequences): + # Build a Datum from the completion tokens with logprobs and advantages + prompt_feature = prompts[i // NUM_GENERATIONS] + prompt_ids = prompt_feature['input_ids'] + if hasattr(prompt_ids, 'tolist'): + prompt_ids = prompt_ids.tolist() + + sampled_tokens = list(seq.tokens) + logprobs = seq.logprobs if seq.logprobs else [0.0] * len(sampled_tokens) + advantage = float(advantages[i]) + + ob_len = len(prompt_ids) - 1 + input_tokens = prompt_ids + sampled_tokens[:-1] + target_tokens = [0] * ob_len + sampled_tokens + padded_advantages = [0.0] * ob_len + [advantage] * len(sampled_tokens) + padded_logprobs = [0.0] * ob_len + logprobs + + # Verify lengths match + assert len(input_tokens) == len(target_tokens) == len(padded_logprobs) == len(padded_advantages), \ + f"Length mismatch: input={len(input_tokens)}, target={len(target_tokens)}, " \ + f"logprobs={len(padded_logprobs)}, advantages={len(padded_advantages)}" + + datum = types.Datum( + model_input=types.ModelInput.from_ints(input_tokens), + loss_fn_inputs={ + 'target_tokens': target_tokens, + 'logprobs': types.TensorData.from_numpy(np.array(padded_logprobs, dtype=np.float32)), + 'advantages': types.TensorData.from_numpy(np.array(padded_advantages, dtype=np.float32)), + }, + ) + training_data.append(datum) + + if not training_data: + logger.info( + f"Step {step}: No training data constructed, skipping") + step += 1 + continue + + # Forward-backward pass with importance_sampling (GRPO) loss + # The training data already contains logprobs and advantages for the GRPO loss + fwdbwd_future = training_client.forward_backward( + training_data, "importance_sampling") + optim_future = training_client.optim_step( + types.AdamParams(learning_rate=LEARNING_RATE)) + + fwdbwd_result = fwdbwd_future.result() + optim_result = optim_future.result() + + # Compute metrics from the forward-backward result + # For importance_sampling, we get logprobs and elementwise_loss + logprobs_list = [] + elementwise_losses = [] + for output in fwdbwd_result.loss_fn_outputs: + if output.get('logprobs') is not None: + logprobs_list.append(output['logprobs'].to_numpy()) + if output.get('elementwise_loss') is not None: + elementwise_losses.append(output['elementwise_loss'].to_numpy()) + + # Compute average loss per token (weighted by advantages) + if elementwise_losses: + all_losses = np.concatenate(elementwise_losses) + avg_loss = np.mean(all_losses) if len(all_losses) > 0 else 0.0 + else: + avg_loss = 0.0 + + gc.collect() + + # ========== 7. Log ========== + log_dict = metrics.calculate() + log_dict['train/loss_per_token'] = float(avg_loss) + log_dict['train/frac_reward_zero_std'] = frac_zero_std + log_dict['train/num_training_samples'] = len(training_data) + logger.info(f"Step {step}: {log_dict}") + step += 1 + + # Save final checkpoint + save_future = training_client.save_state("grpo-countdown-final") + save_result = save_future.result() + logger.info(f"Saved final checkpoint to {save_result.path}") + + +if __name__ == '__main__': + main() diff --git a/cookbook/client/tinker/transformer/gsm8k.py b/cookbook/client/tinker/transformer/gsm8k.py new file mode 100644 index 00000000..b7f86c61 --- /dev/null +++ b/cookbook/client/tinker/transformer/gsm8k.py @@ -0,0 +1,425 @@ +# Tinker-Compatible Client - GSM8K GRPO Training Example +# +# This script demonstrates GSM8K math problem training using the +# Tinker-compatible client API with save_weights_for_sampler for weight sync. +# Instead of calling sync_weights directly, it periodically saves weights and +# creates a sampling client for generation. +# +# Flow: +# 1. Prepare GSM8K dataset (client-side) +# 2. Initialize Tinker-compatible training & sampling clients +# 3. Training loop: +# a. Every SYNC_INTERVAL steps: save_weights_for_sampler → sampling_client +# b. Sample completions from the sampling client +# c. Compute rewards and advantages (client-side) +# d. Train on sampled data weighted by advantages +# e. Optimizer step +# +# The server must be running first (see server.py and server_config.yaml). +# Requires both model and sampler services to be configured. + +import gc +import re +import numpy as np +from typing import List, Tuple + +from tinker import types +from twinkle_client import init_tinker_compat_client +from twinkle import get_logger +from twinkle.advantage import GRPOAdvantage +from twinkle.dataloader import DataLoader +from twinkle.preprocessor import Preprocessor +from twinkle.reward.base import Reward +from twinkle.data_format import Trajectory, InputFeature, Message +from twinkle.dataset import Dataset, DatasetMeta +from twinkle.metric import CompletionRewardMetric +from modelscope import AutoTokenizer + +logger = get_logger() + +# ========== Configuration ========== +BASE_MODEL = 'Qwen/Qwen2.5-3B-Instruct' +NUM_GENERATIONS = 4 +MAX_NEW_TOKENS = 2048 +LEARNING_RATE = 1e-5 +MAX_STEPS = 100 +BATCH_SIZE = 2 +TEMPERATURE = 1.0 +SYNC_INTERVAL = 1 # Save weights for sampler every N steps +LORA_RANK = 8 +DATA_NUM = 1000 # Number of GSM8K samples to use + +SYSTEM_PROMPT = ( + "You are a helpful math assistant. Solve the problem step by step. " + "Show your reasoning in tags, then give the final " + "numerical answer after ####.\n" + "For example:\n ... reasoning ... \n#### 42" +) + + +class GSM8KProcessor(Preprocessor): + """Preprocessor for GSM8K dataset. + + GSM8K fields: question (str), answer (str ending with '#### ') + Extracts the ground truth number and stores it in user_data for reward. + """ + + @staticmethod + def extract_ground_truth(answer_str: str) -> str: + """Extract the number after '####' from GSM8K answer.""" + match = re.search(r'####\s*([\-\d,\.]+)', answer_str) + if match: + return match.group(1).replace(',', '').strip() + return '' + + def __call__(self, row) -> Trajectory: + question = row['question'] + answer = row.get('answer', '') + ground_truth = self.extract_ground_truth(answer) + + messages = [ + Message(role='system', content=SYSTEM_PROMPT), + Message(role='user', content=question), + ] + return Trajectory( + messages=messages, + user_data=[('ground_truth', ground_truth)], + ) + + +# ========== GSM8K Reward Functions ========== +class GSM8KAccuracyReward(Reward): + """Accuracy reward for GSM8K: checks if the model's answer matches ground truth. + + Extracts the last '#### ' from model output and compares with ground truth. + Returns 1.0 for correct, 0.0 for incorrect. + """ + + @staticmethod + def extract_answer(completion: str) -> str: + """Extract the last #### answer from model completion.""" + # Only check last 500 chars for efficiency + text = completion[-500:] if len(completion) > 500 else completion + matches = re.findall(r'####\s*([\-\d,\.\s]+)', text) + if matches: + return matches[-1].replace(',', '').replace(' ', '').strip() + return '' + + def __call__( + self, trajectories: List[Trajectory], ground_truths: List[Trajectory] + ) -> List[float]: + rewards = [] + for trajectory in trajectories: + messages = trajectory.get('messages', []) + # Get model completion (last assistant message) + completion = '' + for msg in reversed(messages): + if msg.get('role') == 'assistant': + completion = msg.get('content', '') + break + + # Get ground truth from user_data + gt = '' + user_data = trajectory.get('user_data', []) + if isinstance(user_data, list): + for item in user_data: + if isinstance(item, (list, tuple)) and len(item) == 2: + if item[0] == 'ground_truth': + gt = str(item[1]) + break + + predicted = self.extract_answer(completion) + + # Numeric comparison + correct = False + if predicted and gt: + try: + correct = abs(float(predicted) - float(gt)) < 1e-5 + except (ValueError, OverflowError): + correct = predicted == gt + + rewards.append(1.0 if correct else 0.0) + return rewards + + +class GSM8KFormatReward(Reward): + """Format reward: checks if output contains ... tag. + + Returns 1.0 if format is correct, 0.0 otherwise. + """ + + def __call__( + self, trajectories: List[Trajectory], ground_truths: List[Trajectory] + ) -> List[float]: + rewards = [] + for trajectory in trajectories: + messages = trajectory.get('messages', []) + completion = '' + for msg in reversed(messages): + if msg.get('role') == 'assistant': + completion = msg.get('content', '') + break + has_think = bool( + re.search(r'.*?', completion, re.DOTALL) + ) + has_answer = bool(re.search(r'####\s*[\-\d,\.]+', completion)) + rewards.append(1.0 if (has_think and has_answer) else 0.0) + return rewards + + +def create_gsm8k_dataset(): + """Create GSM8K dataset.""" + meta = DatasetMeta( + "ms://modelscope/gsm8k", + subset_name='main', split='train', + data_slice=range(DATA_NUM), + ) + dataset = Dataset(meta) + dataset.set_template("Template", model_id=f'ms://{BASE_MODEL}', max_length=2048) + dataset.map(GSM8KProcessor()) + dataset.encode(add_generation_prompt=True) + return dataset + + +def compute_rewards( + trajectories: List[Trajectory], +) -> Tuple[List[float], List[float], List[float]]: + """Compute accuracy and format rewards for GSM8K.""" + accuracy_reward_fn = GSM8KAccuracyReward() + format_reward_fn = GSM8KFormatReward() + + accuracy_rewards = accuracy_reward_fn(trajectories, []) + format_rewards = format_reward_fn(trajectories, []) + total_rewards = [a + f for a, f in zip(accuracy_rewards, format_rewards)] + return total_rewards, format_rewards, accuracy_rewards + + +def main(): + logger.info("Starting GSM8K GRPO training...") + + # Step 1: Prepare dataset and dataloader (client-side) + dataset = create_gsm8k_dataset() + dataloader = DataLoader(dataset=dataset, batch_size=BATCH_SIZE) + tokenizer = AutoTokenizer.from_pretrained( + BASE_MODEL, trust_remote_code=True) + + logger.info("Dataset and tokenizer initialized") + + # Step 2: Initialize the Tinker-compatible client + logger.info("Connecting to Tinker server...") + service_client = init_tinker_compat_client( + base_url='http://localhost:8000') + + logger.info("Creating LoRA training client...") + # Create a LoRA training client for GRPO + training_client = service_client.create_lora_training_client( + base_model=BASE_MODEL, + rank=LORA_RANK, + ) + + logger.info("Training client created successfully") + + # Step 3: Setup metrics and advantage function + advantage_fn = GRPOAdvantage() + metrics = CompletionRewardMetric() + + sampling_params = types.SamplingParams( + max_tokens=MAX_NEW_TOKENS, + temperature=TEMPERATURE, + top_p=0.95, + ) + + # The sampling client is created on-demand via save_weights_for_sampler + sampling_client = None + + step = 0 + for batch in dataloader: + if step >= MAX_STEPS: + break + + metrics.reset() + prompts = batch if isinstance(batch, list) else [batch] + + # ========== 1. Save weights for sampler (instead of sync_weights) ========== + if step % SYNC_INTERVAL == 0: + logger.info(f"Step {step}: Saving weights for sampler...") + + sampling_client = ( + training_client.save_weights_and_get_sampling_client( + name=f'gsm8k-step-{step}')) + logger.info(f"Step {step}: Sampling client ready") + + if sampling_client is None: + logger.warning("No sampling client available, skipping step") + step += 1 + continue + + # ========== 2. Sample completions ========== + # Convert input features to token prompts for the sampling client + all_sequences = [] + all_user_data = [] + for prompt_feature in prompts: + input_ids = prompt_feature['input_ids'] + if hasattr(input_ids, 'tolist'): + input_ids = input_ids.tolist() + prompt = types.ModelInput.from_ints(input_ids) + future = sampling_client.sample( + prompt=prompt, + sampling_params=sampling_params, + num_samples=NUM_GENERATIONS, + ) + result = future.result() + # Store both sequences and user data + for _ in range(NUM_GENERATIONS): + all_user_data.append(prompt_feature.get('user_data', [])) + all_sequences.extend(result.sequences) + + if not all_sequences: + logger.warning(f"Step {step}: No valid samples, skipping") + step += 1 + continue + + # ========== 3. Build trajectories and collect logprobs ========== + trajectories = [] + old_logps_list = [] + completion_lengths = [] + + for idx, seq in enumerate(all_sequences): + decoded_text = tokenizer.decode(seq.tokens, skip_special_tokens=True) + # Use the corresponding user data for this sequence + trajectories.append({ + 'messages': [ + {'role': 'system', 'content': SYSTEM_PROMPT}, + {'role': 'user', 'content': 'Math problem'}, # Placeholder + {'role': 'assistant', 'content': decoded_text} + ], + 'user_data': all_user_data[idx] + }) + old_logps_list.append( + [lp for lp in seq.logprobs] if seq.logprobs else []) + completion_lengths.append(len(seq.tokens)) + + # ========== 4. Compute rewards ========== + total_rewards, format_rewards, accuracy_rewards = compute_rewards( + trajectories) + metrics.accumulate( + None, None, + completion_lengths=completion_lengths, + rewards={ + 'total': total_rewards, + 'format': format_rewards, + 'accuracy': accuracy_rewards, + }) + + # ========== 5. Compute advantages ========== + advantages = advantage_fn( + total_rewards, + num_generations=NUM_GENERATIONS, + scale='group', + ).tolist() + + frac_zero_std = ( + 1.0 if all(abs(a) < 1e-8 for a in advantages) else 0.0) + if frac_zero_std == 1.0: + logger.info( + f"Step {step}: All advantages are zero, skipping training") + step += 1 + continue + + # ========== 6. Train the policies with GRPO loss ========== + # Train the policies with the Advantage-Regularized policy + # gradient (GRPO) loss function. + # + # The GRPO loss function requires: + # 1. logprobs: The log probabilities of the tokens under the current policy + # 2. advantages: The advantage values for each completion + # + # The training data is constructed with: + # - model_input: The full prompt + completion tokens + # - target_tokens: The shifted tokens for next-token prediction + # - logprobs: The log probabilities from the sampling step + # - advantages: The computed advantage values + training_data = [] + for i, seq in enumerate(all_sequences): + # Build a Datum from the completion tokens with logprobs and advantages + prompt_feature = prompts[i // NUM_GENERATIONS] + prompt_ids = prompt_feature['input_ids'] + if hasattr(prompt_ids, 'tolist'): + prompt_ids = prompt_ids.tolist() + + sampled_tokens = list(seq.tokens) + logprobs = seq.logprobs if seq.logprobs else [0.0] * len(sampled_tokens) + advantage = float(advantages[i]) + + ob_len = len(prompt_ids) - 1 + input_tokens = prompt_ids + sampled_tokens[:-1] + target_tokens = [0] * ob_len + sampled_tokens + padded_advantages = [0.0] * ob_len + [advantage] * len(sampled_tokens) + padded_logprobs = [0.0] * ob_len + logprobs + + # Verify lengths match + assert len(input_tokens) == len(target_tokens) == len(padded_logprobs) == len(padded_advantages), \ + f"Length mismatch: input={len(input_tokens)}, target={len(target_tokens)}, " \ + f"logprobs={len(padded_logprobs)}, advantages={len(padded_advantages)}" + + datum = types.Datum( + model_input=types.ModelInput.from_ints(input_tokens), + loss_fn_inputs={ + 'target_tokens': target_tokens, + 'logprobs': types.TensorData.from_numpy(np.array(padded_logprobs, dtype=np.float32)), + 'advantages': types.TensorData.from_numpy(np.array(padded_advantages, dtype=np.float32)), + }, + ) + training_data.append(datum) + + if not training_data: + logger.info( + f"Step {step}: No training data constructed, skipping") + step += 1 + continue + + # Forward-backward pass with importance_sampling (GRPO) loss + # The training data already contains logprobs and advantages for the GRPO loss + fwdbwd_future = training_client.forward_backward( + training_data, "importance_sampling") + optim_future = training_client.optim_step( + types.AdamParams(learning_rate=LEARNING_RATE)) + + fwdbwd_result = fwdbwd_future.result() + optim_result = optim_future.result() + + # Compute metrics from the forward-backward result + # For importance_sampling, we get logprobs and elementwise_loss + logprobs_list = [] + elementwise_losses = [] + for output in fwdbwd_result.loss_fn_outputs: + if output.get('logprobs') is not None: + logprobs_list.append(output['logprobs'].to_numpy()) + if output.get('elementwise_loss') is not None: + elementwise_losses.append(output['elementwise_loss'].to_numpy()) + + # Compute average loss per token (weighted by advantages) + if elementwise_losses: + all_losses = np.concatenate(elementwise_losses) + avg_loss = np.mean(all_losses) if len(all_losses) > 0 else 0.0 + else: + avg_loss = 0.0 + + gc.collect() + + # ========== 7. Log ========== + log_dict = metrics.calculate() + log_dict['train/loss_per_token'] = float(avg_loss) + log_dict['train/frac_reward_zero_std'] = frac_zero_std + log_dict['train/num_training_samples'] = len(training_data) + logger.info(f"Step {step}: {log_dict}") + step += 1 + + # Save final checkpoint + save_future = training_client.save_state("gsm8k-grpo-final") + save_result = save_future.result() + logger.info(f"Saved final checkpoint to {save_result.path}") + + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/cookbook/client/tinker/transformer/lora.py b/cookbook/client/tinker/transformer/lora.py new file mode 100644 index 00000000..44fc94c5 --- /dev/null +++ b/cookbook/client/tinker/transformer/lora.py @@ -0,0 +1,156 @@ +# Tinker-Compatible Client - Transformers LoRA Training Example +# +# This script demonstrates end-to-end LoRA fine-tuning using the Tinker- +# compatible client API (an alternative client protocol for the Twinkle server). +# It covers: connecting to the server, preparing data manually with tokenizers, +# running a training loop, saving checkpoints, and publishing to ModelScope. +# The server must be running first (see server.py and server_config.yaml). + +# Step 1: Load environment variables from a .env file (e.g., API tokens) +import dotenv +dotenv.load_dotenv('.env') + +import os +from twinkle_client import init_tinker_compat_client + +# Step 2: Initialize the Tinker-compatible client to communicate with the server. +# - base_url: the address of the running server +# - api_key: authentication token (loaded from environment variable) +service_client = init_tinker_compat_client(base_url='http://localhost:8000', api_key=os.environ.get('MODELSCOPE_SDK_TOKEN')) + +# Step 3: List models available on the server to verify the connection +print("Available models:") +for item in service_client.get_server_capabilities().supported_models: + print("- " + item.model_name) + + +# Step 4: Create a REST client for querying training runs and checkpoints. +# This is useful for inspecting previous training sessions or resuming training. +rest_client = service_client.create_rest_client() + +future = rest_client.list_training_runs(limit=50) +response = future.result() + +# You can resume from either: +# 1. A twinkle path: "twinkle://...//weights/" +# 2. A model id on hub: "/" +# Example: +# resume_path = "twinkle://20260131_170251-Qwen_Qwen2_5-0_5B-Instruct-7275126c/weights/pig-latin-lora-epoch-1" +# resume_path = "AlexEz/20260205_163645-Qwen_Qwen2_5-3B-Instruct-385d5c17_pig-latin-lora-epoch-1" +resume_path = "" + +print(f"Found {len(response.training_runs)} training runs") +for tr in response.training_runs: + print(tr.model_dump_json(indent=2)) + + chpts = rest_client.list_checkpoints(tr.training_run_id).result() + for chpt in chpts.checkpoints: + print(" " + chpt.model_dump_json(indent=2)) + # Uncomment the line below to resume from the last checkpoint: + # resume_path = chpt.tinker_path + +# Step 5: Create or resume a training client. +# If resume_path is set, it restores both model weights and optimizer state. +base_model = "Qwen/Qwen2.5-3B-Instruct" +if not resume_path: + training_client = service_client.create_lora_training_client( + base_model=base_model + ) +else: + print("Resuming from " + resume_path) + training_client = service_client.create_training_client_from_state_with_optimizer(path=resume_path) + +# Step 6: Prepare training data manually +# +# This example teaches the model to translate English into Pig Latin. +# Each example has an "input" (English phrase) and "output" (Pig Latin). +examples = [ + {"input": "banana split", "output": "anana-bay plit-say"}, + {"input": "quantum physics", "output": "uantum-qay ysics-phay"}, + {"input": "donut shop", "output": "onut-day op-shay"}, + {"input": "pickle jar", "output": "ickle-pay ar-jay"}, + {"input": "space exploration", "output": "ace-spay exploration-way"}, + {"input": "rubber duck", "output": "ubber-ray uck-day"}, + {"input": "coding wizard", "output": "oding-cay izard-way"}, +] + +from tinker import types +from modelscope import AutoTokenizer + +# Load the tokenizer locally (avoids a network call to HuggingFace) +tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True) + +def process_example(example: dict, tokenizer) -> types.Datum: + """Convert a raw example dict into a Datum suitable for the training API. + + The Datum contains: + - model_input: the token IDs fed into the LLM + - loss_fn_inputs: target tokens and per-token weights (0 = ignore, 1 = train) + """ + # Build a simple prompt template + prompt = f"English: {example['input']}\nPig Latin:" + + # Tokenize the prompt; weights=0 means the loss ignores these tokens + prompt_tokens = tokenizer.encode(prompt, add_special_tokens=True) + prompt_weights = [0] * len(prompt_tokens) + + # Tokenize the completion; weights=1 means the loss is computed on these tokens + completion_tokens = tokenizer.encode(f" {example['output']}\n\n", add_special_tokens=False) + completion_weights = [1] * len(completion_tokens) + + # Concatenate prompt + completion + tokens = prompt_tokens + completion_tokens + weights = prompt_weights + completion_weights + + # Shift by one: input is tokens[:-1], target is tokens[1:] (next-token prediction) + input_tokens = tokens[:-1] + target_tokens = tokens[1:] + weights = weights[1:] + + return types.Datum( + model_input=types.ModelInput.from_ints(tokens=input_tokens), + loss_fn_inputs=dict(weights=weights, target_tokens=target_tokens) + ) + +# Process all examples into Datum objects +processed_examples = [process_example(ex, tokenizer) for ex in examples] + +# Visualize the first example to verify tokenization and weight alignment +datum0 = processed_examples[0] +print(f"{'Input':<20} {'Target':<20} {'Weight':<10}") +print("-" * 50) +for i, (inp, tgt, wgt) in enumerate(zip(datum0.model_input.to_ints(), datum0.loss_fn_inputs['target_tokens'].tolist(), datum0.loss_fn_inputs['weights'].tolist())): + print(f"{repr(tokenizer.decode([inp])):<20} {repr(tokenizer.decode([tgt])):<20} {wgt:<10}") + +# Step 7: Run the training loop +# +# For each epoch, iterate over multiple batches: +# - forward_backward: sends data to the server, computes loss & gradients +# - optim_step: updates model weights using Adam optimizer +import numpy as np +for epoch in range(2): + for batch in range(5): + # Send training data and get back logprobs (asynchronous futures) + fwdbwd_future = training_client.forward_backward(processed_examples, "cross_entropy") + optim_future = training_client.optim_step(types.AdamParams(learning_rate=1e-4)) + + # Wait for results from the server + fwdbwd_result = fwdbwd_future.result() + optim_result = optim_future.result() + + # Compute the weighted average log-loss per token for monitoring + print(f"Epoch {epoch}, Batch {batch}: ", end="") + logprobs = np.concatenate([output['logprobs'].tolist() for output in fwdbwd_result.loss_fn_outputs]) + weights = np.concatenate([example.loss_fn_inputs['weights'].tolist() for example in processed_examples]) + print(f"Loss per token: {-np.dot(logprobs, weights) / weights.sum():.4f}") + + # Save checkpoint (model weights + optimizer state) after each epoch + save_future = training_client.save_state(f"pig-latin-lora-epoch-{epoch}") + save_result = save_future.result() + print(f"Saved checkpoint for epoch {epoch} to {save_result.path}") + +# Step 8: Publish the final checkpoint to ModelScope Hub. +# NOTE: Requires a valid ModelScope token set as api_key when initializing the client. +# The published model name will be: {run_id}_{checkpoint_name} +rest_client.publish_checkpoint_from_tinker_path(save_result.path).result() +print("Published checkpoint") diff --git a/cookbook/client/tinker/transformer/sample.py b/cookbook/client/tinker/transformer/sample.py new file mode 100644 index 00000000..84faa689 --- /dev/null +++ b/cookbook/client/tinker/transformer/sample.py @@ -0,0 +1,43 @@ +# Tinker-Compatible Client - Sampling / Inference Example +# +# This script demonstrates how to use a previously trained LoRA checkpoint +# for text generation (sampling) via the Tinker-compatible client API. +# The server must be running first (see server.py and server_config.yaml). + +from tinker import types +from twinkle_client import init_tinker_compat_client +from modelscope import AutoTokenizer + +# Step 1: Define the base model and connect to the server +base_model = "Qwen/Qwen2.5-3B-Instruct" +service_client = init_tinker_compat_client(base_url='http://localhost:8000', api_key="tml-EMPTY_TOKEN") + +# Step 2: Create a sampling client by loading weights from a saved checkpoint. +# The model_path is a twinkle:// URI pointing to a previously saved LoRA checkpoint. +# The server will load the base model and apply the LoRA adapter weights. +sampling_client = service_client.create_sampling_client( + model_path="twinkle://20260130_133245-Qwen_Qwen2_5-0_5B-Instruct-ffebd239/weights/pig-latin-lora-epoch-1", + base_model=base_model) + +# Step 3: Load the tokenizer locally to encode the prompt and decode the results +print(f"Using model {base_model}") +tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True) + +# Step 4: Prepare the prompt and sampling parameters +prompt = types.ModelInput.from_ints(tokenizer.encode("English: coffee break\nPig Latin:")) +params = types.SamplingParams( + max_tokens=20, # Maximum number of tokens to generate + temperature=0.0, # Greedy sampling (deterministic, always pick the top token) + stop=["\n"] # Stop generation when a newline character is produced +) + +# Step 5: Send the sampling request to the server. +# num_samples=8 generates 8 independent completions for the same prompt. +print("Sampling...") +future = sampling_client.sample(prompt=prompt, sampling_params=params, num_samples=8) +result = future.result() + +# Step 6: Decode and print the generated responses +print("Responses:") +for i, seq in enumerate(result.sequences): + print(f"{i}: {repr(tokenizer.decode(seq.tokens))}") diff --git a/cookbook/client/tinker/transformer/self_congnition.py b/cookbook/client/tinker/transformer/self_congnition.py new file mode 100644 index 00000000..e1f5b7a3 --- /dev/null +++ b/cookbook/client/tinker/transformer/self_congnition.py @@ -0,0 +1,140 @@ +# Tinker-Compatible Client - Self-Cognition Training & Evaluation Example +# +# This script demonstrates two workflows using the Tinker-compatible client: +# 1. train(): Fine-tune a model on a self-cognition dataset so it learns +# a custom identity (name, author). +# 2. eval(): Load a trained checkpoint and sample from it to verify +# that the model has learned the custom identity. +# The server must be running first (see server.py and server_config.yaml). + +import numpy as np +from tqdm import tqdm +from tinker import types +from twinkle_client import init_tinker_compat_client +from twinkle.dataloader import DataLoader +from twinkle.dataset import Dataset, DatasetMeta +from twinkle.preprocessor import SelfCognitionProcessor +from twinkle.server.tinker.common import input_feature_to_datum +from modelscope import AutoTokenizer + +# The base model to fine-tune / evaluate +base_model = "Qwen/Qwen2.5-3B-Instruct" + + +def train(): + # Step 1: Prepare the dataset + + # Load the self-cognition dataset from ModelScope (first 500 examples) + dataset = Dataset(dataset_meta=DatasetMeta('ms://swift/self-cognition', data_slice=range(500))) + + # Apply the chat template matching the base model (max 256 tokens per sample) + dataset.set_template('Template', model_id=f'ms://{base_model}', max_length=256) + + # Replace placeholder names with custom model/author identity + dataset.map(SelfCognitionProcessor('twinkle模型', 'twinkle团队'), load_from_cache_file=False) + + # Tokenize and encode the dataset into model-ready input features + dataset.encode(batched=True, load_from_cache_file=False) + + # Wrap the dataset into a DataLoader that yields batches of size 8 + dataloader = DataLoader(dataset=dataset, batch_size=8) + + # Step 2: Initialize the training client + + # Connect to the Twinkle server running locally + service_client = init_tinker_compat_client(base_url='http://localhost:8000') + + # Create a LoRA training client for the base model (rank=16 for the LoRA adapter) + training_client = service_client.create_lora_training_client( + base_model=base_model, + rank=16 + ) + + # Step 3: Run the training loop + + for epoch in range(3): + print(f"Epoch {epoch}") + for step, batch in tqdm(enumerate(dataloader)): + # Convert each InputFeature into a Datum for the Tinker API + input_datum = [input_feature_to_datum(input_feature) for input_feature in batch] + + # Send data to server: forward + backward pass (computes gradients) + fwdbwd_future = training_client.forward_backward(input_datum, "cross_entropy") + + # Optimizer step: update model weights with Adam + optim_future = training_client.optim_step(types.AdamParams(learning_rate=1e-4)) + + # Wait for both operations to complete + fwdbwd_result = fwdbwd_future.result() + optim_result = optim_future.result() + + # Compute weighted average log-loss per token for monitoring + logprobs = np.concatenate([output['logprobs'].tolist() for output in fwdbwd_result.loss_fn_outputs]) + weights = np.concatenate([example.loss_fn_inputs['weights'].tolist() for example in input_datum]) + print(f"Loss per token: {-np.dot(logprobs, weights) / weights.sum():.4f}") + + # Save a checkpoint after each epoch + save_future = training_client.save_state(f"twinkle-lora-{epoch}") + save_result = save_future.result() + print(f"Saved checkpoint to {save_result.path}") + + +def eval(): + # Step 1: Load the trained LoRA checkpoint for inference + + # Path to a previously saved LoRA checkpoint (twinkle:// URI) + weight_path = "twinkle://20260207_110850-Qwen_Qwen2_5-0_5B-Instruct-ce7e819f/weights/twinkle-lora-2" + + # Connect to the server and create a sampling client with the trained weights + service_client = init_tinker_compat_client(base_url='http://localhost:8000') + sampling_client = service_client.create_sampling_client( + model_path=weight_path, + base_model=base_model) + + # Load the tokenizer for encoding the prompt and decoding the output + tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True) + + # Step 2: Prepare the chat prompt + + # Build a multi-turn conversation to test the model's self-cognition + inputs = [ + { + 'role': 'system', + 'content': 'You are a helpful assistant.' + }, + { + 'role': 'user', + 'content': 'what is your name?' + } + ] + + # Apply the model's chat template to format the conversation + input_ids = tokenizer.apply_chat_template( + inputs, + tokenize=True, + add_generation_prompt=True # Adds the assistant prompt prefix + ) + + # Step 3: Generate responses + + prompt = types.ModelInput.from_ints(input_ids) + params = types.SamplingParams( + max_tokens=50, # Maximum tokens to generate + temperature=0.2, # Low temperature for more focused responses + stop=["\n"] # Stop at newline + ) + + # Sample 8 independent completions + print("Sampling...") + future = sampling_client.sample(prompt=prompt, sampling_params=params, num_samples=8) + result = future.result() + + # Decode and print each response + print("Responses:") + for i, seq in enumerate(result.sequences): + print(f"{i}: {repr(tokenizer.decode(seq.tokens))}") + + +if __name__ == "__main__": + # train() # Uncomment to run training + eval() # Run evaluation / inference diff --git a/cookbook/client/tinker/transformer/server.py b/cookbook/client/tinker/transformer/server.py new file mode 100644 index 00000000..f8669622 --- /dev/null +++ b/cookbook/client/tinker/transformer/server.py @@ -0,0 +1,21 @@ +# Twinkle Server Launcher - Tinker-Compatible Transformers Backend +# +# This script starts the Twinkle server with Tinker-compatible API support. +# It reads the server_config.yaml in the same directory for all +# configuration (model, sampler, deployment settings, etc.). +# Run this script BEFORE running any client scripts (lora.py, sample.py, etc.). + +import os + +# Enable Ray debug mode for verbose logging during development +# os.environ['RAY_DEBUG'] = '1' +os.environ['TWINKLE_TRUST_REMOTE_CODE'] = '0' + +from twinkle.server import launch_server + +# Resolve the path to server_config.yaml relative to this script's location +file_dir = os.path.abspath(os.path.dirname(__file__)) +config_path = os.path.join(file_dir, 'server_config.yaml') + +# Launch the Twinkle server — this call blocks until the server is shut down +launch_server(config_path=config_path) \ No newline at end of file diff --git a/cookbook/client/tinker/transformer/server_config.yaml b/cookbook/client/tinker/transformer/server_config.yaml new file mode 100644 index 00000000..fbdd29c0 --- /dev/null +++ b/cookbook/client/tinker/transformer/server_config.yaml @@ -0,0 +1,95 @@ +# Twinkle Server Configuration - Tinker-Compatible Transformers Backend + +# Server protocol type: "tinker" enables the Tinker-compatible API +server_type: tinker + +# proxy_location: determines where the HTTP proxy runs. +# "EveryNode" means each Ray node runs its own proxy (good for multi-node). +proxy_location: EveryNode + +# HTTP listener settings +http_options: + host: 0.0.0.0 # Listen on all network interfaces + port: 8000 # Port number for the server + +# Applications: each entry defines a service component deployed on the server +applications: + + # 1. TinkerCompatServer - The central API server + # Handles client connections, training run tracking, checkpoint listing. + - name: server + route_prefix: /api/v1 # API endpoint prefix (Tinker-compatible) + import_path: server # Python module to import + args: + + deployments: + - name: TinkerCompatServer + autoscaling_config: + min_replicas: 1 # Minimum number of replicas + max_replicas: 1 # Maximum number of replicas + target_ongoing_requests: 128 # Target concurrent requests per replica + ray_actor_options: + num_cpus: 0.1 # CPU resources allocated to this actor + + # 2. Model Service (commented out) - Would host the base model for training. + # Uncomment and configure if you need a training model worker. + - name: models-Qwen2.5-3B-Instruct + route_prefix: /api/v1/model/Qwen/Qwen2.5-3B-Instruct + import_path: model + args: + use_megatron: false # Use HuggingFace Transformers backend + model_id: "ms://Qwen/Qwen2.5-3B-Instruct" # ModelScope model identifier + nproc_per_node: 2 # Number of GPU processes per node + device_group: + name: model + ranks: [0, 1] # GPU rank indices + device_type: cuda + device_mesh: + device_type: cuda + mesh: [0, 1] + mesh_dim_names: ['dp'] # 'dp' = data parallel + queue_config: + rps_limit: 100 # Max requests per second + tps_limit: 10000 # Max tokens per second + adapter_config: + per_token_adapter_limit: 30 # Max concurrent LoRA adapters + adapter_timeout: 1800 # Seconds before idle adapter unload + deployments: + - name: ModelManagement + autoscaling_config: + min_replicas: 1 + max_replicas: 1 + target_ongoing_requests: 16 + ray_actor_options: + num_cpus: 0.1 + + # 3. Sampler Service - Runs inference / sampling using vLLM engine + # Used for generating text from the model (e.g., evaluating LoRA results). + - name: sampler-Qwen2.5-3B-Instruct + route_prefix: /api/v1/sampler/Qwen/Qwen2.5-3B-Instruct + import_path: sampler + args: + model_id: "ms://Qwen/Qwen2.5-3B-Instruct" # ModelScope model identifier + nproc_per_node: 1 # Number of GPU processes per node + sampler_type: vllm # Inference engine: 'vllm' (fast) or 'torch' (TorchSampler) + engine_args: # vLLM engine-specific settings + max_model_len: 4096 # Maximum sequence length the engine supports + gpu_memory_utilization: 0.5 # Fraction of GPU memory to use (0.0-1.0) + enable_lora: true # Allow loading LoRA adapters during inference + device_group: # Logical device group for the sampler + name: sampler + ranks: [0] # GPU rank indices to use + device_type: cuda + device_mesh: + device_type: cuda + mesh: [0] + mesh_dim_names: ['dp'] + deployments: + - name: SamplerManagement + autoscaling_config: + min_replicas: 1 + max_replicas: 1 + target_ongoing_requests: 16 + ray_actor_options: + num_cpus: 0.1 + num_gpus: 1 # Sampler needs a full GPU for inference diff --git a/cookbook/client/twinkle/megatron/server.py b/cookbook/client/twinkle/megatron/server.py new file mode 100644 index 00000000..433c6309 --- /dev/null +++ b/cookbook/client/twinkle/megatron/server.py @@ -0,0 +1,20 @@ +# Twinkle Server Launcher - Megatron Backend +# +# This script starts the Twinkle server using Ray Serve with Megatron support. +# It reads the server_config.yaml in the same directory for all +# configuration (model, processor, deployment settings, etc.). +# Run this script BEFORE running the client training script (lora.py). + +import os + +# Enable Ray debug mode for verbose logging during development +os.environ['RAY_DEBUG'] = '1' + +from twinkle.server import launch_server + +# Resolve the path to server_config.yaml relative to this script's location +file_dir = os.path.abspath(os.path.dirname(__file__)) +config_path = os.path.join(file_dir, 'server_config.yaml') + +# Launch the Twinkle server — this call blocks until the server is shut down +launch_server(config_path=config_path) \ No newline at end of file diff --git a/cookbook/client/twinkle/megatron/server_config.yaml b/cookbook/client/twinkle/megatron/server_config.yaml new file mode 100644 index 00000000..ba40efc3 --- /dev/null +++ b/cookbook/client/twinkle/megatron/server_config.yaml @@ -0,0 +1,87 @@ +# Twinkle Server Configuration - Megatron Backend + +# Server protocol type: "twinkle" for the native Twinkle client protocol +server_type: twinkle + +# proxy_location: determines where the HTTP proxy runs. +# "EveryNode" means each Ray node runs its own proxy (good for multi-node). +proxy_location: EveryNode + +# HTTP listener settings +http_options: + host: 0.0.0.0 # Listen on all network interfaces + port: 8000 # Port number for the server + +# Applications: each entry defines a service component deployed on the server +applications: + + # 1. TwinkleServer - The central management server + # Handles client connections, training run tracking, checkpoint listing. + - name: server + route_prefix: /server # API endpoint prefix + import_path: server # Python module to import + args: + + deployments: + - name: TwinkleServer + autoscaling_config: + min_replicas: 1 # Minimum number of replicas + max_replicas: 1 # Maximum number of replicas + target_ongoing_requests: 128 # Target concurrent requests per replica + ray_actor_options: + num_cpus: 0.1 # CPU resources allocated to this actor + + # 2. Model Service - Hosts the base model for training (Megatron backend) + # This is the actual model worker that performs forward/backward passes. + - name: models-Qwen2.5-3B-Instruct + route_prefix: /models/Qwen/Qwen2.5-3B-Instruct # REST path for this model + import_path: model + args: + use_megatron: true # Use Megatron-LM backend (not HuggingFace) + mixed_precision: bf16 + model_id: "ms://Qwen/Qwen2.5-3B-Instruct" # ModelScope model identifier to load + nproc_per_node: 2 # Number of GPU processes per node + device_group: # Logical device group for this model + name: model + ranks: [0,1] # GPU rank indices to use + device_type: cuda + device_mesh: # Distributed training mesh configuration + device_type: cuda + mesh: [0,1] # Device indices in the mesh + mesh_dim_names: ['dp'] # Mesh dimension names: 'dp' = data parallel + adapter_config: + per_token_adapter_limit: 30 # Max concurrent LoRA adapters + adapter_timeout: 1800 # Seconds before idle adapter unload + deployments: + - name: ModelManagement + autoscaling_config: + min_replicas: 1 + max_replicas: 1 + target_ongoing_requests: 16 + ray_actor_options: + num_cpus: 0.1 + + # 3. Processor Service - Handles data preprocessing on CPU + # Runs tokenization, template application, and other CPU-bound tasks. + - name: processor + route_prefix: /processors + import_path: processor + args: + nproc_per_node: 2 # Number of processor workers per node + ncpu_proc_per_node: 2 # Number of CPU processes per node + device_group: + name: model + ranks: 2 # CPU rank index + device_type: CPU + device_mesh: + device_type: CPU + mesh: [0,1] + mesh_dim_names: ['dp'] + deployments: + - name: ProcessorManagement + autoscaling_config: + min_replicas: 1 + max_replicas: 1 + target_ongoing_requests: 128 + ray_actor_options: + num_cpus: 0.1 \ No newline at end of file diff --git a/cookbook/client/twinkle/transformer/grpo.py b/cookbook/client/twinkle/transformer/grpo.py new file mode 100644 index 00000000..454453da --- /dev/null +++ b/cookbook/client/twinkle/transformer/grpo.py @@ -0,0 +1,237 @@ +# Twinkle Client - GRPO (Group Relative Policy Optimization) Training Example +# +# This script demonstrates GRPO reinforcement learning training using the +# Twinkle client API with model.save() + adapter_uri for weight sync. +# Instead of calling sync_weights directly, it periodically saves model weights +# and passes the checkpoint path to the sampler as adapter_uri. +# +# Flow: +# 1. Prepare Countdown dataset (client-side) +# 2. Initialize Twinkle client, model, and sampler +# 3. Configure model with GRPOLoss, optimizer, LR scheduler +# 4. Training loop: +# a. Every SYNC_INTERVAL steps: model.save() → get twinkle_path +# b. sampler.sample(inputs, adapter_uri=twinkle_path, num_samples=N) +# c. Compute rewards and advantages (client-side) +# d. model.forward_backward(inputs, advantages, old_logps) +# e. Optimizer step +# +# The server must be running first (see server.py and server_config.yaml). +# Requires both model and sampler services to be configured. + +import dotenv +dotenv.load_dotenv('.env') + +import gc +import os +from typing import List, Tuple + +from peft import LoraConfig + +from twinkle import get_logger +from twinkle.advantage import GRPOAdvantage +from twinkle.dataset import DatasetMeta +from twinkle.metric import CompletionRewardMetric +from twinkle_client import init_twinkle_client +from twinkle_client.dataloader import DataLoader +from twinkle_client.dataset import Dataset +from twinkle_client.model import MultiLoraTransformersModel +from twinkle_client.sampler import vLLMSampler + +logger = get_logger() + +# ========== Configuration ========== +MODEL_ID = 'ms://Qwen/Qwen2.5-3B-Instruct' +NUM_GENERATIONS = 8 +MAX_NEW_TOKENS = 1024 +LEARNING_RATE = 1e-5 +MAX_STEPS = 10 +BATCH_SIZE = 4 +TEMPERATURE = 1.0 +SYNC_INTERVAL = 5 # Save weights for sampler every N steps +GRADIENT_ACCUMULATION_STEPS = 4 + + +def create_countdown_dataset(): + """Create Countdown Game dataset for GRPO training.""" + + dataset = Dataset(dataset_meta=DatasetMeta( + "ms://zouxuhong/Countdown-Tasks-3to4", data_slice=range(500))) + dataset.set_template( + 'Template', model_id=MODEL_ID, max_length=8192) + dataset.map('CountdownProcessor') + dataset.encode(add_generation_prompt=True, batched=True) + return dataset + + +def compute_rewards( + trajectories: List[dict], +) -> Tuple[List[float], List[float], List[float]]: + """Compute format and accuracy rewards for Countdown game.""" + from twinkle.reward import CountDownAccuracy, FormatReward + format_rewards = FormatReward()(trajectories, []) + accuracy_rewards = CountDownAccuracy()(trajectories, []) + total_rewards = [a + b for a, b in zip(accuracy_rewards, format_rewards)] + return total_rewards, format_rewards, accuracy_rewards + + +def train(): + # Step 1: Initialize the Twinkle client + client = init_twinkle_client( + base_url='http://127.0.0.1:8000', + api_key=os.environ.get('MODELSCOPE_SDK_TOKEN'), + ) + + # Step 2: Prepare dataset and dataloader + dataset = create_countdown_dataset() + dataloader = DataLoader(dataset=dataset, batch_size=BATCH_SIZE) + + # Step 3: Configure the training model + model = MultiLoraTransformersModel(model_id=MODEL_ID) + + lora_config = LoraConfig( + target_modules='all-linear', + r=8, + lora_alpha=32, + lora_dropout=0.05, + ) + model.add_adapter_to_model( + 'default', lora_config, + gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS, + ) + + # Set GRPO loss (the key difference from SFT training) + model.set_loss('GRPOLoss', epsilon=0.2, beta=0.0) + + # Set optimizer and LR scheduler + model.set_optimizer('AdamW', lr=LEARNING_RATE) + model.set_lr_scheduler( + 'CosineWarmupScheduler', + num_warmup_steps=500, + num_training_steps=MAX_STEPS, + ) + + # Set processor and template for encoding inputs + model.set_processor('InputProcessor') + model.set_template('Template', model_id=MODEL_ID) + + # Step 4: Configure the sampler + sampler = vLLMSampler(model_id=MODEL_ID) + sampler.set_template('Template', model_id=MODEL_ID) + + # Step 5: Setup metrics and advantage function + advantage_fn = GRPOAdvantage() + metrics = CompletionRewardMetric() + + sampling_params = { + 'max_tokens': MAX_NEW_TOKENS, + 'temperature': TEMPERATURE, + 'top_p': 0.95, + } + + # Track the current adapter path for sampling + current_adapter_uri = None + + step = 0 + for batch in dataloader: + if step >= MAX_STEPS: + break + + metrics.reset() + prompts = batch if isinstance(batch, list) else [batch] + + # ========== 1. Save weights and update adapter_uri ========== + # Instead of sync_weights, save the model checkpoint and pass + # the resulting path to the sampler as adapter_uri + if step % SYNC_INTERVAL == 0: + logger.info(f"Step {step}: Saving weights for sampler...") + twinkle_path = model.save( + name=f'grpo-sampler-step-{step}', + save_optimizer=False, + ) + current_adapter_uri = twinkle_path + logger.info( + f"Step {step}: Saved weights to {current_adapter_uri}") + + # ========== 2. Sample completions ========== + sample_response = sampler.sample( + inputs=prompts, + sampling_params=sampling_params, + adapter_uri=current_adapter_uri, + num_samples=NUM_GENERATIONS, + ) + + input_features = [] + old_logps_list = [] + completion_lengths = [] + + sequences = sample_response.get('sequences', []) + for seq in sequences: + input_features.append(seq.get('new_input_feature', seq)) + old_logps_list.append(seq.get('logprobs', [])) + completion_lengths.append(len(seq.get('tokens', []))) + + if not input_features: + logger.warning(f"Step {step}: No valid samples, skipping") + step += 1 + continue + + # ========== 3. Compute rewards ========== + total_rewards, format_rewards, accuracy_rewards = compute_rewards( + input_features) + metrics.accumulate( + None, None, + completion_lengths=completion_lengths, + rewards={ + 'total': total_rewards, + 'format': format_rewards, + 'accuracy': accuracy_rewards, + }) + + # ========== 4. Compute advantages ========== + advantages = advantage_fn( + total_rewards, + num_generations=NUM_GENERATIONS, + scale='group', + ).tolist() + + frac_zero_std = ( + 1.0 if all(abs(a) < 1e-8 for a in advantages) else 0.0) + if frac_zero_std == 1.0: + logger.info( + f"Step {step}: All advantages are zero, skipping training") + step += 1 + continue + + # ========== 5. Training step (GRPO) ========== + # forward_backward with GRPO loss: passes advantages and old_logps + # to the server-side GRPOLoss for proper policy optimization + model.forward_backward( + inputs=input_features, + advantages=advantages, + old_logps=old_logps_list, + ) + + # Gradient clipping and optimizer step + model.clip_grad_norm(1.0) + model.step() + model.zero_grad() + model.lr_step() + + gc.collect() + + # ========== 6. Log ========== + log_dict = metrics.calculate() + log_dict.update(model.calculate_metric()) + log_dict['train/frac_reward_zero_std'] = frac_zero_std + logger.info(f"Step {step}: {log_dict}") + step += 1 + + # Save final checkpoint + twinkle_path = model.save( + name='grpo-countdown-final', save_optimizer=True) + logger.info(f"Saved final checkpoint: {twinkle_path}") + + +if __name__ == '__main__': + train() diff --git a/cookbook/client/twinkle/transformer/lora.py b/cookbook/client/twinkle/transformer/lora.py new file mode 100644 index 00000000..1824fbd4 --- /dev/null +++ b/cookbook/client/twinkle/transformer/lora.py @@ -0,0 +1,146 @@ +# Twinkle Client - Transformers LoRA Training Example +# +# This script demonstrates how to fine-tune a language model using LoRA +# (Low-Rank Adaptation) through the Twinkle client-server architecture. +# The server must be running first (see server.py and server_config.yaml). + +# Step 1: Load environment variables from a .env file (e.g., API tokens) +import dotenv +dotenv.load_dotenv('.env') + +import os +from peft import LoraConfig + +from twinkle import get_logger +from twinkle.dataset import DatasetMeta +from twinkle_client.dataloader import DataLoader +from twinkle_client.dataset import Dataset +from twinkle_client.model import MultiLoraTransformersModel +from twinkle_client import init_twinkle_client + +logger = get_logger() + +# Whether to use Megatron for training +use_megatron = True +# Step 2: Initialize the Twinkle client to communicate with the remote server. +# - base_url: the address of the running Twinkle server +# - api_key: authentication token (loaded from environment variable) +client = init_twinkle_client( + base_url='http://127.0.0.1:8000', api_key=os.environ.get('MODELSCOPE_SDK_TOKEN')) + +# Step 3: Query the server for existing training runs and their checkpoints. +# This is useful for resuming a previous training session. +runs = client.list_training_runs() + +resume_path = None +for run in runs: + logger.info(run.model_dump_json(indent=2)) + # List all saved checkpoints for this training run + checkpoints = client.list_checkpoints(run.training_run_id) + + for checkpoint in checkpoints: + logger.info(checkpoint.model_dump_json(indent=2)) + # Uncomment the line below to resume from a specific checkpoint: + # resume_path = checkpoint.twinkle_path + + +def train(): + # Step 4: Prepare the dataset + + # Load the self-cognition dataset from ModelScope + dataset = Dataset(dataset_meta=DatasetMeta('ms://swift/self-cognition', data_slice=range(500))) + + # Apply a chat template so the data matches the model's expected input format + dataset.set_template( + 'Template', model_id='ms://Qwen/Qwen2.5-3B-Instruct', max_length=512) + + # Replace placeholder names in the dataset with custom model/author names + dataset.map('SelfCognitionProcessor', init_args={ + 'model_name': 'twinkle模型', 'model_author': 'ModelScope社区'}) + + # Tokenize and encode the dataset into model-ready input features + dataset.encode(batched=True) + + # Wrap the dataset into a DataLoader that yields batches of size 4 + dataloader = DataLoader(dataset=dataset, batch_size=4) + + # Step 5: Configure the model + + # Create a multi-LoRA Transformers model pointing to the base model on ModelScope + model = MultiLoraTransformersModel( + model_id='ms://Qwen/Qwen2.5-3B-Instruct') + + # Define LoRA configuration: apply low-rank adapters to all linear layers + lora_config = LoraConfig( + target_modules='all-linear' + ) + + # Attach the LoRA adapter named 'default' to the model. + # gradient_accumulation_steps=2 means gradients are accumulated over 2 micro-batches + # before an optimizer step, effectively doubling the batch size. + model.add_adapter_to_model( + 'default', lora_config, gradient_accumulation_steps=2) + + # Set the same chat template used during data preprocessing + model.set_template('Template') + + # Set the input processor (pads sequences on the right side) + model.set_processor('InputProcessor', padding_side='right') + + # Use cross-entropy loss for language modeling + model.set_loss('CrossEntropyLoss') + + # Use Adam optimizer with a learning rate of 1e-4 (Only support Adam optimizer if server use megatron) + model.set_optimizer('Adam', lr=1e-4) + + # Use a linear learning rate scheduler (Do not support LR scheduler if server use megatron) + if not use_megatron: + model.set_lr_scheduler('LinearLR') + + # Step 6: Optionally resume from a previous checkpoint + if resume_path: + logger.info(f'Resuming training from {resume_path}') + model.load(resume_path, load_optimizer=True) + + # Step 7: Run the training loop + logger.info(model.get_train_configs()) + + for epoch in range(3): + logger.info(f'Starting epoch {epoch}') + for step, batch in enumerate(dataloader): + # Forward pass + backward pass (computes gradients) + output = model.forward_backward(inputs=batch) + + # Log the loss every 2 steps (aligned with gradient accumulation) + if step % 2 == 0: + logger.info(f'Current is step {step // 2}, loss: {output}') + + # Clip gradients to prevent exploding gradients (max norm = 1.0) + model.clip_grad_norm(1.0) + + # Perform one optimizer step (update model weights) + model.step() + + # Reset gradients to zero for the next iteration + model.zero_grad() + + # Advance the learning rate scheduler by one step + model.lr_step() + + # Step 8: Save the trained checkpoint + twinkle_path = model.save(name=f'twinkle-epoch-{epoch}', save_optimizer=True) + logger.info(f"Saved checkpoint: {twinkle_path}") + + # Step 9: Upload the checkpoint to ModelScope Hub + # YOUR_USER_NAME = "your_username" + # hub_model_id = f'{YOUR_USER_NAME}/twinkle-self-cognition' + # model.upload_to_hub( + # checkpoint_dir=twinkle_path, + # hub_model_id=hub_model_id, + # async_upload=False + # ) + # logger.info(f"Uploaded checkpoint to hub: {hub_model_id}") + + +if __name__ == '__main__': + train() diff --git a/cookbook/client/twinkle/transformer/sampler.py b/cookbook/client/twinkle/transformer/sampler.py new file mode 100644 index 00000000..4a8cc7ee --- /dev/null +++ b/cookbook/client/twinkle/transformer/sampler.py @@ -0,0 +1,89 @@ +# Twinkle Client - Sampler (Inference) Example +# +# This script demonstrates how to run text generation inference +# through the Twinkle client-server architecture. +# The server must be running first (see server.py and server_config.yaml). +# +# This is the client/server equivalent of cookbook/legacy/sampler/sampler_demo.py. +# Instead of running everything locally, the sampler runs on the server side +# while the client sends requests over HTTP. + +# Step 1: Load environment variables from a .env file (e.g., API tokens) +import dotenv +dotenv.load_dotenv('.env') + +import os +from transformers import AutoTokenizer + +from twinkle import get_logger +from twinkle_client import init_twinkle_client +from twinkle_client.sampler import vLLMSampler + +logger = get_logger() + +MODEL_ID = 'Qwen/Qwen2.5-3B-Instruct' + +# Optional: adapter URI for LoRA inference +# This can be a twinkle:// path from a training run checkpoint +# or None to use the base model +# ADAPTER_URI = None +# Example: +ADAPTER_URI = "twinkle://20260208_224851-fa3cdd11-default/weights/twinkle-epoch-2" + + +def sample(): + # Step 2: Initialize the Twinkle client to communicate with the remote server. + client = init_twinkle_client( + base_url='http://127.0.0.1:8000', + api_key=os.environ.get('MODELSCOPE_SDK_TOKEN'), + ) + + # Step 3: Create the sampler client pointing to the model on the server + sampler = vLLMSampler(model_id=MODEL_ID) + + # Step 4: Set the chat template so the sampler can encode Trajectory inputs + sampler.set_template('Template', model_id=MODEL_ID) + + # Step 5: Prepare inputs as Trajectory dicts (messages format) + # Each trajectory is a conversation with system and user messages + trajectory = { + 'messages': [ + {'role': 'system', 'content': 'You are a helpful assistant.'}, + {'role': 'user', 'content': 'Who are you?'}, + ] + } + + num_prompts = 4 + num_samples = 2 # Generate 2 completions per prompt + + # Step 6: Configure sampling parameters + sampling_params = { + 'max_tokens': 128, + 'temperature': 1.0, + } + + # Step 7: Call the sampler + # - inputs: list of Trajectory dicts (will be encoded server-side using the template) + # - sampling_params: controls generation behavior + # - adapter_uri: optional LoRA adapter path for fine-tuned inference + # - num_samples: number of completions per prompt + response = sampler.sample( + inputs=[trajectory] * num_prompts, + sampling_params=sampling_params, + adapter_uri=ADAPTER_URI, + num_samples=num_samples, + ) + + # Step 8: Decode and print the results + tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True) + + logger.info(f"Generated {len(response['sequences'])} sequences " + f"({num_prompts} prompts x {num_samples} samples)") + + for i, seq in enumerate(response['sequences']): + text = tokenizer.decode(seq['tokens'], skip_special_tokens=True) + logger.info(f"Sequence {i}:\n {text}\n") + + +if __name__ == '__main__': + sample() diff --git a/cookbook/client/twinkle/transformer/server.py b/cookbook/client/twinkle/transformer/server.py new file mode 100644 index 00000000..92260007 --- /dev/null +++ b/cookbook/client/twinkle/transformer/server.py @@ -0,0 +1,20 @@ +# Twinkle Server Launcher - Transformers Backend +# +# This script starts the Twinkle server using Ray Serve. +# It reads the server_config.yaml in the same directory for all +# configuration (model, processor, deployment settings, etc.). +# Run this script BEFORE running the client training script (lora.py). + +import os + +# Enable Ray debug mode for verbose logging during development +os.environ['RAY_DEBUG'] = '1' + +from twinkle.server import launch_server + +# Resolve the path to server_config.yaml relative to this script's location +file_dir = os.path.abspath(os.path.dirname(__file__)) +config_path = os.path.join(file_dir, 'server_config.yaml') + +# Launch the Twinkle server — this call blocks until the server is shut down +launch_server(config_path=config_path) \ No newline at end of file diff --git a/cookbook/client/twinkle/transformer/server_config.yaml b/cookbook/client/twinkle/transformer/server_config.yaml new file mode 100644 index 00000000..392e333e --- /dev/null +++ b/cookbook/client/twinkle/transformer/server_config.yaml @@ -0,0 +1,118 @@ +# Twinkle Server Configuration - Transformers Backend + +# Server protocol type: "twinkle" for the native Twinkle client protocol +server_type: twinkle + +# proxy_location: determines where the HTTP proxy runs. +# "EveryNode" means each Ray node runs its own proxy (good for multi-node). +proxy_location: EveryNode + +# HTTP listener settings +http_options: + host: 0.0.0.0 # Listen on all network interfaces + port: 8000 # Port number for the server + +# Applications: each entry defines a service component deployed on the server +applications: + + # 1. TwinkleServer - The central management server + # Handles client connections, training run tracking, checkpoint listing. + - name: server + route_prefix: /server # API endpoint prefix + import_path: server # Python module to import + args: + + deployments: + - name: TwinkleServer + autoscaling_config: + min_replicas: 1 # Minimum number of replicas + max_replicas: 1 # Maximum number of replicas + target_ongoing_requests: 128 # Target concurrent requests per replica + ray_actor_options: + num_cpus: 0.1 # CPU resources allocated to this actor + + # 2. Model Service - Hosts the base model for training + # This is the actual model worker that performs forward/backward passes. + - name: models-Qwen2.5-3B-Instruct + route_prefix: /models/Qwen/Qwen2.5-3B-Instruct # REST path for this model + import_path: model + args: + use_megatron: false # Use HuggingFace Transformers (not Megatron) + model_id: "ms://Qwen/Qwen2.5-3B-Instruct" # ModelScope model identifier to load + adapter_config: + per_token_adapter_limit: 30 # Max LoRA adapters that can be active simultaneously + adapter_timeout: 1800 # Seconds before an idle adapter is unloaded + nproc_per_node: 2 # Number of GPU processes per node + device_group: # Logical device group for this model + name: model + ranks: [0,1] # GPU rank indices to use + device_type: cuda + device_mesh: # Distributed training mesh configuration + device_type: cuda + mesh: [0,1] # Device indices in the mesh + mesh_dim_names: ['dp'] # Mesh dimension names: 'dp' = data parallel + deployments: + - name: ModelManagement + autoscaling_config: + min_replicas: 1 + max_replicas: 1 + target_ongoing_requests: 16 + ray_actor_options: + num_cpus: 0.1 + + # 3. Processor Service - Handles data preprocessing on CPU + # Runs tokenization, template application, and other CPU-bound tasks. + - name: processor + route_prefix: /processors + import_path: processor + args: + nproc_per_node: 2 # Number of processor workers per node + ncpu_proc_per_node: 2 # Number of CPU processes per node + device_group: + name: model + ranks: 2 # CPU rank index + device_type: CPU + device_mesh: + device_type: CPU + mesh: [0,1] + mesh_dim_names: ['dp'] + deployments: + - name: ProcessorManagement + autoscaling_config: + min_replicas: 1 + max_replicas: 1 + target_ongoing_requests: 128 + ray_actor_options: + num_cpus: 0.1 + + # 4. Sampler Service - Handles text generation inference + # Uses vLLM for efficient batched generation with optional LoRA adapters. + - name: sampler-Qwen2.5-3B-Instruct + route_prefix: /samplers/Qwen/Qwen2.5-3B-Instruct # REST path for this sampler + import_path: sampler + args: + model_id: "ms://Qwen/Qwen2.5-3B-Instruct" # ModelScope model identifier to load + sampler_type: vllm # Sampler backend (vllm or torch) + nproc_per_node: 1 # Number of GPU processes per node + engine_args: # vLLM engine configuration + gpu_memory_utilization: 0.4 + max_model_len: 1024 + adapter_config: # Adapter lifecycle management + per_token_adapter_limit: 30 # Max LoRA adapters per user + adapter_timeout: 1800 # Seconds before idle adapter is unloaded + device_group: + name: sampler + ranks: [0] # GPU rank indices to use + device_type: cuda + device_mesh: + device_type: cuda + mesh: [0] + mesh_dim_names: ['dp'] + deployments: + - name: SamplerManagement + autoscaling_config: + min_replicas: 1 + max_replicas: 1 + target_ongoing_requests: 16 + ray_actor_options: + num_cpus: 0.1 \ No newline at end of file diff --git a/cookbook/legacy/components/dataset.py b/cookbook/legacy/components/dataset.py new file mode 100644 index 00000000..85b29784 --- /dev/null +++ b/cookbook/legacy/components/dataset.py @@ -0,0 +1,4 @@ +from twinkle.dataset import Dataset + + +dataset = Dataset('ms://swift/self-cognition') \ No newline at end of file diff --git a/cookbook/legacy/grpo/dapo_math.py b/cookbook/legacy/grpo/dapo_math.py new file mode 100644 index 00000000..4d4a2c8b --- /dev/null +++ b/cookbook/legacy/grpo/dapo_math.py @@ -0,0 +1,582 @@ +""" +DAPO-Math-17k GRPO training demo for Twinkle. + +Uses the AI-ModelScope/DAPO-Math-17k dataset (~17k competition-level math +problems). All ground-truth answers in this dataset are **integers**, making +reward verification straightforward and reliable. + +Reward = accuracy_reward only (no format reward). + - From the last 300 chars of model output, extracts: + 1) ``Answer: `` (the prompt asks for this format) + 2) ``\\boxed{}`` (fallback) + - Normalizes both prediction and ground truth to integers. + - Returns 1.0 for correct, 0.0 for incorrect. + +Designed for strong instruct models (e.g. Qwen3-30B-A3B-Instruct) that do +NOT use tags. The dataset prompt already contains step-by-step +instructions, so no additional system prompt is needed. + +Reference reward implementations: + - verl: verl/utils/reward_score/math_dapo.py (compute_score) + - slime: slime/rollout/rm_hub/math_dapo_utils.py (compute_score) +""" +import gc +import os +import re +import time +from typing import List, Tuple, Dict, Any, Optional + +from peft import LoraConfig + +import twinkle +from twinkle import DeviceMesh, DeviceGroup, get_device_placement, get_logger +from twinkle.advantage import GRPOAdvantage +from twinkle.checkpoint_engine import CheckpointEngineManager +from twinkle.data_format import SamplingParams, SampleResponse +from twinkle.data_format import Trajectory, InputFeature, Message +from twinkle.dataloader import DataLoader +from twinkle.dataset import Dataset, DatasetMeta +from twinkle.model import TransformersModel +from twinkle.preprocessor import Preprocessor +from twinkle.processor import InputProcessor +from twinkle.reward.base import Reward +from twinkle.sampler import vLLMSampler +from twinkle.template import Template +from twinkle.metric import CompletionRewardMetric + +logger = get_logger() + +# ========== Configuration ========== +MODEL_ID = os.environ.get('MODEL_ID', 'ms://Qwen/Qwen3-30B-A3B-Instruct-2507') +USE_MEGATRON = bool(int(os.environ.get('USE_MEGATRON', '1'))) + +MODEL_GPUS = int(os.environ.get('MODEL_GPUS', 4)) +SAMPLER_GPUS = int(os.environ.get('SAMPLER_GPUS', 4)) +SAMPLER_TP = int(os.environ.get('SAMPLER_TP', SAMPLER_GPUS // 2)) +NUM_GPUS = MODEL_GPUS + SAMPLER_GPUS + +NUM_GENERATIONS = int(os.environ.get('NUM_GENERATIONS', 8)) +MAX_NEW_TOKENS = int(os.environ.get('MAX_NEW_TOKENS', 4096)) +LEARNING_RATE = float(os.environ.get('LR', 1e-5)) +GRPO_EPSILON = float(os.environ.get('GRPO_EPSILON', 0.2)) +GRPO_BETA = float(os.environ.get('GRPO_BETA', 0.0)) +MAX_STEPS = int(os.environ.get('MAX_STEPS', 200)) +BATCH_SIZE = int(os.environ.get('BATCH_SIZE', 2)) +GRADIENT_ACCUMULATION_STEPS = int(os.environ.get('GRADIENT_ACCUMULATION_STEPS', 1)) +TEMPERATURE = float(os.environ.get('TEMPERATURE', 1.0)) +WEIGHT_SYNC_INTERVAL = int(os.environ.get('WEIGHT_SYNC_INTERVAL', 1)) +ADAPTER_NAME = 'default' +DATA_NUM = int(os.environ.get('DATA_NUM', 17000)) # DAPO-Math-17k has ~17k samples + +# SwanLab experiment tracking +USE_SWANLAB = bool(int(os.environ.get('USE_SWANLAB', '1'))) +if USE_SWANLAB: + import swanlab + swanlab.login(api_key=os.environ['SWANLAB_API_KEY'], save=True) + swanlab.init(project="twinkle-math", config={ + 'model_id': MODEL_ID, + 'dataset': 'DAPO-Math-17k', + 'num_gpus': NUM_GPUS, + 'model_gpus': MODEL_GPUS, + 'sampler_gpus': SAMPLER_GPUS, + 'num_generations': NUM_GENERATIONS, + 'max_new_tokens': MAX_NEW_TOKENS, + 'learning_rate': LEARNING_RATE, + 'grpo_epsilon': GRPO_EPSILON, + 'grpo_beta': GRPO_BETA, + 'batch_size': BATCH_SIZE, + 'gradient_accumulation_steps': GRADIENT_ACCUMULATION_STEPS, + }) + + +# ========== DAPO Math Reward (adapted from verl/slime math_dapo) ========== +# All answers in DAPO-Math-17k are integers, so verification is simpler +# than general MATH problems. + +# --- Normalization constants (from verl/slime math_dapo_utils) --- +_SUBSTITUTIONS = [ + ("an ", ""), ("a ", ""), (".$", "$"), ("\\$", ""), (r"\ ", ""), + (" ", ""), ("mbox", "text"), (",\\text{and}", ","), + ("\\text{and}", ","), ("\\text{m}", "\\text{}"), +] +_REMOVED_EXPRESSIONS = [ + "square", "ways", "integers", "dollars", "mph", "inches", "hours", + "km", "units", "\\ldots", "sue", "points", "feet", "minutes", + "digits", "cents", "degrees", "cm", "gm", "pounds", "meters", + "meals", "edges", "students", "childrentickets", "multiples", + "\\text{s}", "\\text{.}", "\\text{\ns}", "\\text{}^2", + "\\text{}^3", "\\text{\n}", "\\text{}", r"\mathrm{th}", + r"^\circ", r"^{\circ}", r"\;", r",\!", "{,}", '"', "\\dots", + "<|im_end|>", "<|endoftext|>", +] + + +def _normalize_final_answer(answer: str) -> str: + """Normalize a math answer string for comparison. + + Adapted from verl/slime math_dapo_utils.normalize_final_answer. + """ + answer = str(answer) + answer = answer.split("=")[-1] + for before, after in _SUBSTITUTIONS: + answer = answer.replace(before, after) + for expr in _REMOVED_EXPRESSIONS: + answer = answer.replace(expr, "") + # Strip LaTeX wrappers + answer = re.sub(r"(.*?)(\$)(.*?)(\$)(.*)", "$\\3$", answer) + answer = re.sub(r"(\\text\{)(.*?)(\})", "\\2", answer) + answer = re.sub(r"(\\textbf\{)(.*?)(\})", "\\2", answer) + answer = re.sub(r"(\\overline\{)(.*?)(\})", "\\2", answer) + answer = re.sub(r"(\\boxed\{)(.*)(\})", "\\2", answer) + answer = re.sub(r"(frac)([^{])(.)", "frac{\\2}{\\3}", answer) + answer = re.sub(r"(sqrt)([^{])", "sqrt{\\2}", answer) + answer = answer.replace("$", "") + if answer.replace(",", "").isdigit(): + answer = answer.replace(",", "") + return answer.strip() + + +def _last_boxed_only_string(string: str) -> Optional[str]: + """Extract the last \\boxed{...} from a string.""" + idx = string.rfind("\\boxed{") + if idx < 0: + return None + i = idx + right_brace_idx = None + num_left_braces_open = 0 + while i < len(string): + if string[i] == "{": + num_left_braces_open += 1 + if string[i] == "}": + num_left_braces_open -= 1 + if num_left_braces_open == 0: + right_brace_idx = i + break + i += 1 + return string[idx: right_brace_idx + 1] if right_brace_idx is not None else None + + +def _remove_boxed(s: str) -> str: + """Remove \\boxed{} wrapper.""" + left = "\\boxed{" + if not (s.startswith(left) and s.endswith("}")): + return s + return s[len(left): -1] + + +def _extract_answer_minerva( + solution: str, + pattern: str = r"(?i)Answer\s*:\s*([^\n]+)", +) -> Optional[str]: + """Extract answer via 'Answer: ...' pattern (Minerva-style). + + This is the primary extraction method since the DAPO prompt asks: + "The last line of your response should be of the form Answer: $Answer" + """ + matches = re.findall(pattern, solution) + if matches: + return _normalize_final_answer(matches[-1]) + return None + + +def _extract_answer_boxed(solution: str) -> Optional[str]: + """Extract answer from \\boxed{} (fallback).""" + boxed = _last_boxed_only_string(solution) + if boxed is not None: + try: + return _remove_boxed(boxed) + except Exception: + pass + return None + + +def compute_dapo_score(completion: str, ground_truth: str) -> Dict[str, Any]: + """Compute DAPO-Math reward score for a single sample. + + Adapted from verl/utils/reward_score/math_dapo.py compute_score. + All DAPO-Math-17k answers are integers, so we normalize to int for + comparison. + + Returns dict with 'score' (1.0 or 0.0), 'acc' (bool), 'pred' (str). + """ + # Only look at the tail for efficiency + tail = completion[-300:] if len(completion) > 300 else completion + + # Normalize ground truth to integer string + try: + gt_normalized = str(int(float(ground_truth))) + except (ValueError, OverflowError): + gt_normalized = _normalize_final_answer(ground_truth) + + # Try "Answer: ..." extraction first (matches the prompt format) + pred = _extract_answer_minerva(tail) + if pred is not None: + pred_normalized = _normalize_final_answer(pred) + try: + pred_int = str(int(float(pred_normalized))) + correct = (pred_int == gt_normalized) + except (ValueError, OverflowError): + correct = (pred_normalized == gt_normalized) + return {"score": 1.0 if correct else 0.0, "acc": correct, "pred": pred} + + # Fallback: try \boxed{} + pred = _extract_answer_boxed(tail) + if pred is not None: + pred_normalized = _normalize_final_answer(pred) + try: + pred_int = str(int(float(pred_normalized))) + correct = (pred_int == gt_normalized) + except (ValueError, OverflowError): + correct = (pred_normalized == gt_normalized) + return {"score": 1.0 if correct else 0.0, "acc": correct, "pred": pred} + + return {"score": 0.0, "acc": False, "pred": None} + + +# ========== Preprocessor ========== +class DAPOMathProcessor(Preprocessor): + """Preprocessor for DAPO-Math-17k dataset. + + Dataset fields: + - data_source: "math_dapo" + - prompt: list of messages [{"role": "user", "content": "..."}] + - reward_model: {"ground_truth": "", "style": "..."} + - ability: "MATH" + - extra_info: {"index": "..."} + + The prompt already contains instructions ("Solve ... step by step. + The last line of your response should be of the form Answer: $Answer"), + so no additional system prompt is needed. + """ + + def __call__(self, row) -> Trajectory: + # prompt is already a list of message dicts + prompt_messages = row['prompt'] + ground_truth = row['reward_model']['ground_truth'] + + messages = [ + Message(role=msg['role'], content=msg['content']) + for msg in prompt_messages + ] + return Trajectory( + messages=messages, + user_data=[('ground_truth', str(ground_truth))], + ) + + +# ========== Reward Functions ========== +class DAPOMathAccuracyReward(Reward): + """Accuracy reward for DAPO-Math-17k. + + Extracts the answer from model output and compares with ground truth. + Uses the same verification logic as verl/slime math_dapo. + Returns 1.0 for correct, 0.0 for incorrect. + """ + + def __call__( + self, trajectories: List[Trajectory], ground_truths: List[Trajectory] + ) -> List[float]: + rewards = [] + for trajectory in trajectories: + messages = trajectory.get('messages', []) + completion = '' + for msg in reversed(messages): + if msg.get('role') == 'assistant': + completion = msg.get('content', '') + break + + gt = '' + user_data = trajectory.get('user_data', []) + if isinstance(user_data, list): + for item in user_data: + if isinstance(item, (list, tuple)) and len(item) == 2: + if item[0] == 'ground_truth': + gt = str(item[1]) + break + + if completion and gt: + result = compute_dapo_score(completion, gt) + rewards.append(result['score']) + else: + rewards.append(0.0) + return rewards + + +def create_dapo_math_dataset(): + """Create DAPO-Math-17k dataset. + + Downloads from ModelScope: AI-ModelScope/DAPO-Math-17k + """ + meta = DatasetMeta( + "ms://AI-ModelScope/DAPO-Math-17k", + split='train', + data_slice=range(DATA_NUM), + ) + dataset = Dataset(meta) + dataset.set_template("Template", model_id=MODEL_ID, max_length=2048) + dataset.map(DAPOMathProcessor()) + dataset.encode(add_generation_prompt=True) + return dataset + + +def compute_rewards( + trajectories: List[Trajectory], +) -> Tuple[List[float], List[float]]: + """Compute accuracy rewards for DAPO-Math. + + Returns (total_rewards, accuracy_rewards). + No format reward — instruct model does not need thinking tags. + """ + accuracy_reward_fn = DAPOMathAccuracyReward() + accuracy_rewards = accuracy_reward_fn(trajectories, []) + return accuracy_rewards, accuracy_rewards + + +# ========== Main ========== +def main(): + device_groups = [ + DeviceGroup( + name='model', + ranks=list(range(MODEL_GPUS)), + device_type='GPU', + gpus_per_worker=1, + ), + DeviceGroup( + name='sampler', + ranks=list(range(MODEL_GPUS, NUM_GPUS)), + device_type='GPU', + gpus_per_worker=SAMPLER_TP, + ), + ] + if USE_MEGATRON: + PP_SIZE = 2 + model_mesh = DeviceMesh.from_sizes( + dp_size=MODEL_GPUS // PP_SIZE, pp_size=PP_SIZE, + ep_size=MODEL_GPUS // PP_SIZE, + ) + else: + model_mesh = DeviceMesh.from_sizes( + world_size=MODEL_GPUS, dp_size=MODEL_GPUS, + ) + assert SAMPLER_GPUS % SAMPLER_TP == 0 + sampler_mesh = DeviceMesh.from_sizes( + world_size=SAMPLER_GPUS, dp_size=SAMPLER_GPUS // SAMPLER_TP, + tp_size=SAMPLER_TP, + ) + twinkle.initialize( + mode='ray', + nproc_per_node=NUM_GPUS, + groups=device_groups, + lazy_collect=False, + ) + + lora_config = LoraConfig( + target_modules='all-linear', + r=8, + lora_alpha=32, + lora_dropout=0.05, + ) + + # ── Model ───────────────────────────────────────────────────────── + if USE_MEGATRON: + from twinkle.model.megatron import MegatronModel + model = MegatronModel( + model_id=MODEL_ID, + device_mesh=model_mesh, + remote_group='model', + mixed_precision='bf16', + recompute_granularity='full', + recompute_num_layers=None, + ) + else: + model = TransformersModel( + model_id=MODEL_ID, + device_mesh=model_mesh, + remote_group='model', + ) + + model.add_adapter_to_model( + ADAPTER_NAME, + lora_config, + gradient_accumulation_steps=1, + ) + if USE_MEGATRON: + model.set_optimizer( + 'default', lr=LEARNING_RATE, + ) + model.set_lr_scheduler( + 'default', + lr_decay_steps=MAX_STEPS, + max_lr=LEARNING_RATE, + ) + else: + model.set_optimizer( + 'AdamW', lr=LEARNING_RATE, + ) + model.set_lr_scheduler( + 'CosineAnnealingLR', T_max=MAX_STEPS, eta_min=0, + ) + model.set_loss( + 'GRPOLoss', + epsilon=GRPO_EPSILON, + beta=GRPO_BETA, + ) + model.set_processor(InputProcessor) + model.set_template('Template', model_id=MODEL_ID) + + # ── Sampler ─────────────────────────────────────────────────────── + sampler = vLLMSampler( + model_id=MODEL_ID, + engine_args={ + 'gpu_memory_utilization': 0.7, + 'max_model_len': 8192, + 'max_loras': 1, + 'max_lora_rank': 32, + 'enable_sleep_mode': False, + 'enable_lora': True, + "logprobs_mode": "processed_logprobs", + }, + device_mesh=sampler_mesh, + remote_group='sampler', + ) + sampler.set_template(Template, model_id=MODEL_ID) + + ckpt_manager = CheckpointEngineManager(model=model, sampler=sampler) + + # Global batch = prompts for one full gradient accumulation cycle + GLOBAL_BATCH_SIZE = BATCH_SIZE * GRADIENT_ACCUMULATION_STEPS + dataloader = DataLoader( + dataset=create_dapo_math_dataset, + batch_size=GLOBAL_BATCH_SIZE, + min_batch_size=GLOBAL_BATCH_SIZE, + device_mesh=model_mesh, + remote_group='model', + num_workers=0, + ) + advantage_fn = GRPOAdvantage() + metrics = CompletionRewardMetric() + + sampling_params = SamplingParams( + max_tokens=MAX_NEW_TOKENS, + temperature=TEMPERATURE, + top_p=0.95, + ) + + # ── Training loop ───────────────────────────────────────────────── + optim_step = 0 + logger.info(get_device_placement()) + + for batch in dataloader: + if optim_step >= MAX_STEPS: + break + + step_start = time.perf_counter() + metrics.reset() + timings: Dict[str, float] = { + 'weight_sync': 0.0, + 'generate': 0.0, + 'reward': 0.0, + 'advantage': 0.0, + 'train': 0.0, + 'total': 0.0, + } + + global_prompts = batch if isinstance(batch, list) else [batch] + + t0 = time.perf_counter() + if optim_step % WEIGHT_SYNC_INTERVAL == 0: + ckpt_manager.sync_weights(merge_and_sync=False) + sampler.reset_prefix_cache() + timings['weight_sync'] = time.perf_counter() - t0 + + t1 = time.perf_counter() + sample_response = sampler.sample( + global_prompts*NUM_GENERATIONS, + sampling_params, + num_samples=1, + ) + timings['generate'] = time.perf_counter() - t1 + + all_input_data: List[Dict[str, Any]] = [] + all_old_logps: List[List[float]] = [] + all_completion_lengths: List[int] = [] + + for sequence in sample_response.sequences: + all_input_data.append(sequence.new_input_feature) + all_old_logps.append(sequence.logprobs) + all_completion_lengths.append(len(sequence.tokens)) + + if not all_input_data: + logger.warning( + f"Optim step {optim_step}: No valid samples, skipping" + ) + continue + + # ========== 3. Rewards ========== + t2 = time.perf_counter() + total_rewards, accuracy_rewards = compute_rewards(all_input_data) + timings['reward'] = time.perf_counter() - t2 + + metrics.accumulate( + None, + None, + generate_time=timings['generate'], + weight_sync_time=timings['weight_sync'], + completion_lengths=all_completion_lengths, + rewards={ + 'total': total_rewards, + 'accuracy': accuracy_rewards, + }, + ) + + # ========== 4. Advantages ========== + t3 = time.perf_counter() + advantages = advantage_fn( + total_rewards, + num_generations=NUM_GENERATIONS, + scale='group', + ) + advantages = advantages.tolist() + timings['advantage'] = time.perf_counter() - t3 + + frac_zero_std = ( + 1.0 if all(abs(a) < 1e-8 for a in advantages) else 0.0 + ) + + # ========== 5. Training ========== + t4 = time.perf_counter() + + model.forward_backward( + inputs=all_input_data, + advantages=advantages, + old_logps=all_old_logps, + ) + + model.clip_grad_and_step() + timings['train'] = time.perf_counter() - t4 + + gc.collect() + from twinkle import torch_util + torch_util.empty_cache() + + timings['total'] = time.perf_counter() - step_start + optim_step += 1 + + # ========== 6. Log ========== + log_dict = metrics.calculate() + log_dict.update(model.calculate_metric(is_training=True)) + log_dict['train/frac_reward_zero_std'] = frac_zero_std + log_dict['train/optim_step'] = optim_step + for k, v in timings.items(): + log_dict[f'time/{k}'] = round(v, 2) + + if USE_SWANLAB: + swanlab.log(log_dict) + logger.info(f"[Step {optim_step}/{MAX_STEPS}] {log_dict}") + + logger.info(f"Training completed. optim_steps={optim_step}") + model.save('grpo-math-checkpoint') + + +if __name__ == '__main__': + main() diff --git a/cookbook/legacy/grpo/gsm8k.py b/cookbook/legacy/grpo/gsm8k.py new file mode 100644 index 00000000..9c2a509f --- /dev/null +++ b/cookbook/legacy/grpo/gsm8k.py @@ -0,0 +1,473 @@ +""" +GSM8K GRPO training demo for Twinkle. + +This demo trains a model on Grade School Math (GSM8K) using GRPO. +Reward = accuracy_reward (1.0 if answer correct, 0.0 otherwise) + + format_reward (1.0 if output contains ..., 0.0 otherwise) + +Expected to show reward improvement within ~30-80 steps. + +Reference configs: + - Verl: run_qwen2_5-3b_gsm8k_grpo_lora.sh (lr=3e-6, n=5, max_resp=1024) + - TRL: accuracy_reward (math_verify based) + - Swift: countdown demo (lr=1e-5, n=8, gas=8) +""" +import gc +import os +import re +import time +from typing import List, Tuple, Dict, Any + +from peft import LoraConfig + +import twinkle +from twinkle import DeviceMesh, DeviceGroup, get_device_placement, get_logger +from twinkle.advantage import GRPOAdvantage +from twinkle.checkpoint_engine import CheckpointEngineManager +from twinkle.data_format import SamplingParams +from twinkle.data_format import Trajectory, Message +from twinkle.dataloader import DataLoader +from twinkle.dataset import Dataset, DatasetMeta +from twinkle.model import TransformersModel +from twinkle.preprocessor import Preprocessor +from twinkle.processor import InputProcessor +from twinkle.reward.base import Reward +from twinkle.sampler import vLLMSampler +from twinkle.template import Template +from twinkle.metric import CompletionRewardMetric + +logger = get_logger() + +# ========== Configuration ========== +MODEL_ID = os.environ.get('MODEL_ID', 'ms://Qwen/Qwen3-30B-A3B-Instruct-2507') +USE_MEGATRON = bool(int(os.environ.get('USE_MEGATRON', '1'))) + +MODEL_GPUS = int(os.environ.get('MODEL_GPUS', 4)) +SAMPLER_GPUS = int(os.environ.get('SAMPLER_GPUS', 4)) +SAMPLER_TP = int(os.environ.get('SAMPLER_TP', SAMPLER_GPUS // 2)) +NUM_GPUS = MODEL_GPUS + SAMPLER_GPUS + +PP_SIZE = 2 +NUM_GENERATIONS = int(os.environ.get('NUM_GENERATIONS', 8)) +MAX_NEW_TOKENS = int(os.environ.get('MAX_NEW_TOKENS', 32768)) +LEARNING_RATE = float(os.environ.get('LR', 1e-5)) +GRPO_EPSILON = float(os.environ.get('GRPO_EPSILON', 0.2)) +GRPO_BETA = float(os.environ.get('GRPO_BETA', 0.0)) +MAX_STEPS = int(os.environ.get('MAX_STEPS', 200)) +BATCH_SIZE = int(os.environ.get('BATCH_SIZE', 2)) +GRADIENT_ACCUMULATION_STEPS = int(os.environ.get('GRADIENT_ACCUMULATION_STEPS', 1)) +TEMPERATURE = float(os.environ.get('TEMPERATURE', 1.0)) +WEIGHT_SYNC_INTERVAL = int(os.environ.get('WEIGHT_SYNC_INTERVAL', 1)) +ADAPTER_NAME = 'default' +DATA_NUM = int(os.environ.get('DATA_NUM', 7473)) # GSM8K train split has 7473 samples + +# SwanLab experiment tracking +USE_SWANLAB = bool(int(os.environ.get('USE_SWANLAB', '1'))) +if USE_SWANLAB: + import swanlab + swanlab.login(api_key=os.environ['SWANLAB_API_KEY'], save=True) + swanlab.init(project="twinkle-gsm8k", config={ + 'model_id': MODEL_ID, + 'num_gpus': NUM_GPUS, + 'model_gpus': MODEL_GPUS, + 'sampler_gpus': SAMPLER_GPUS, + 'num_generations': NUM_GENERATIONS, + 'max_new_tokens': MAX_NEW_TOKENS, + 'learning_rate': LEARNING_RATE, + 'grpo_epsilon': GRPO_EPSILON, + 'grpo_beta': GRPO_BETA, + 'batch_size': BATCH_SIZE, + 'gradient_accumulation_steps': GRADIENT_ACCUMULATION_STEPS, + }) + + +SYSTEM_PROMPT = ( + "You are a helpful math assistant. Solve the problem step by step. " + "Show your reasoning in tags, then give the final " + "numerical answer after ####.\n" + "For example:\n ... reasoning ... \n#### 42" +) + + +class GSM8KProcessor(Preprocessor): + """Preprocessor for GSM8K dataset. + + GSM8K fields: question (str), answer (str ending with '#### ') + Extracts the ground truth number and stores it in user_data for reward. + """ + + @staticmethod + def extract_ground_truth(answer_str: str) -> str: + """Extract the number after '####' from GSM8K answer.""" + match = re.search(r'####\s*([\-\d,\.]+)', answer_str) + if match: + return match.group(1).replace(',', '').strip() + return '' + + def __call__(self, row) -> Trajectory: + question = row['question'] + answer = row.get('answer', '') + ground_truth = self.extract_ground_truth(answer) + + messages = [ + Message(role='system', content=SYSTEM_PROMPT), + Message(role='user', content=question), + ] + return Trajectory( + messages=messages, + user_data=[('ground_truth', ground_truth)], + ) + + +# ========== GSM8K Reward Functions ========== +class GSM8KAccuracyReward(Reward): + """Accuracy reward for GSM8K: checks if the model's answer matches ground truth. + + Extracts the last '#### ' from model output and compares with ground truth. + Returns 1.0 for correct, 0.0 for incorrect. + """ + + @staticmethod + def extract_answer(completion: str) -> str: + """Extract the last #### answer from model completion.""" + # Only check last 500 chars for efficiency + text = completion[-500:] if len(completion) > 500 else completion + matches = re.findall(r'####\s*([\-\d,\.\s]+)', text) + if matches: + return matches[-1].replace(',', '').replace(' ', '').strip() + return '' + + def __call__( + self, trajectories: List[Trajectory], ground_truths: List[Trajectory] + ) -> List[float]: + rewards = [] + for trajectory in trajectories: + messages = trajectory.get('messages', []) + # Get model completion (last assistant message) + completion = '' + for msg in reversed(messages): + if msg.get('role') == 'assistant': + completion = msg.get('content', '') + break + + # Get ground truth from user_data + gt = '' + user_data = trajectory.get('user_data', []) + if isinstance(user_data, list): + for item in user_data: + if isinstance(item, (list, tuple)) and len(item) == 2: + if item[0] == 'ground_truth': + gt = str(item[1]) + break + + predicted = self.extract_answer(completion) + + # Numeric comparison + correct = False + if predicted and gt: + try: + correct = abs(float(predicted) - float(gt)) < 1e-5 + except (ValueError, OverflowError): + correct = predicted == gt + + rewards.append(1.0 if correct else 0.0) + return rewards + + +class GSM8KFormatReward(Reward): + """Format reward: checks if output contains ... tag. + + Returns 1.0 if format is correct, 0.0 otherwise. + """ + + def __call__( + self, trajectories: List[Trajectory], ground_truths: List[Trajectory] + ) -> List[float]: + rewards = [] + for trajectory in trajectories: + messages = trajectory.get('messages', []) + completion = '' + for msg in reversed(messages): + if msg.get('role') == 'assistant': + completion = msg.get('content', '') + break + has_think = bool( + re.search(r'.*?', completion, re.DOTALL) + ) + has_answer = bool(re.search(r'####\s*[\-\d,\.]+', completion)) + rewards.append(1.0 if (has_think and has_answer) else 0.0) + return rewards + + +def create_gsm8k_dataset(): + """Create GSM8K dataset.""" + meta = DatasetMeta( + "ms://modelscope/gsm8k", + subset_name='main', split='train', + data_slice=range(DATA_NUM), + ) + dataset = Dataset(meta) + dataset.set_template("Template", model_id=MODEL_ID, max_length=2048) + dataset.map(GSM8KProcessor()) + dataset.encode(add_generation_prompt=True) + return dataset + + +def compute_rewards( + trajectories: List[Trajectory], +) -> Tuple[List[float], List[float], List[float]]: + """Compute accuracy and format rewards for GSM8K.""" + accuracy_reward_fn = GSM8KAccuracyReward() + format_reward_fn = GSM8KFormatReward() + + accuracy_rewards = accuracy_reward_fn(trajectories, []) + format_rewards = format_reward_fn(trajectories, []) + total_rewards = [a + f for a, f in zip(accuracy_rewards, format_rewards)] + return total_rewards, format_rewards, accuracy_rewards + + +# ========== Main ========== +def main(): + device_groups = [ + DeviceGroup( + name='model', + ranks=list(range(MODEL_GPUS)), + device_type='GPU', + gpus_per_worker=1, + ), + DeviceGroup( + name='sampler', + ranks=list(range(MODEL_GPUS, NUM_GPUS)), + device_type='GPU', + gpus_per_worker=SAMPLER_TP, + ), + ] + if USE_MEGATRON: + model_mesh = DeviceMesh.from_sizes( + dp_size=MODEL_GPUS//PP_SIZE, pp_size=PP_SIZE, ep_size=MODEL_GPUS // PP_SIZE, + ) + else: + model_mesh = DeviceMesh.from_sizes( + world_size=MODEL_GPUS, dp_size=MODEL_GPUS, + ) + assert SAMPLER_GPUS % SAMPLER_TP == 0 + sampler_mesh = DeviceMesh.from_sizes( + world_size=SAMPLER_GPUS, dp_size=SAMPLER_GPUS // SAMPLER_TP, tp_size=SAMPLER_TP + ) + twinkle.initialize( + mode='ray', + nproc_per_node=NUM_GPUS, + groups=device_groups, + lazy_collect=False, + ) + + lora_config = LoraConfig( + target_modules="all-linear", + r=8, + lora_alpha=32, + lora_dropout=0.05, + ) + + if USE_MEGATRON: + from twinkle.model.megatron import MegatronModel + model = MegatronModel( + model_id=MODEL_ID, + device_mesh=model_mesh, + remote_group='model', + mixed_precision='bf16', + ) + else: + model = TransformersModel( + model_id=MODEL_ID, + device_mesh=model_mesh, + remote_group='model', + ) + + model.add_adapter_to_model( + ADAPTER_NAME, + lora_config, + gradient_accumulation_steps=1, + ) + if USE_MEGATRON: + model.set_optimizer( + 'default', lr=LEARNING_RATE, + ) + model.set_lr_scheduler( + 'default', + lr_decay_steps=MAX_STEPS, + max_lr=LEARNING_RATE, + ) + else: + model.set_optimizer( + 'AdamW', lr=LEARNING_RATE, + ) + model.set_lr_scheduler( + 'CosineAnnealingLR', T_max=MAX_STEPS, eta_min=0, + ) + model.set_loss( + 'GRPOLoss', + epsilon=GRPO_EPSILON, + beta=GRPO_BETA, + ) + model.set_processor(InputProcessor) + model.set_template('Template', model_id=MODEL_ID) + + # ── Sampler (load real weights for meaningful generation) ───────── + sampler = vLLMSampler( + model_id=MODEL_ID, + engine_args={ + 'gpu_memory_utilization': 0.7, + 'max_model_len': 8192, + 'max_loras': 1, + 'max_lora_rank': 32, + 'enable_sleep_mode': False, + 'enable_lora': True, + "logprobs_mode": "processed_logprobs", + }, + device_mesh=sampler_mesh, + remote_group='sampler', + ) + sampler.set_template(Template, model_id=MODEL_ID) + + ckpt_manager = CheckpointEngineManager(model=model, sampler=sampler) + + # Global batch = prompts for one full gradient accumulation cycle + GLOBAL_BATCH_SIZE = BATCH_SIZE * GRADIENT_ACCUMULATION_STEPS + dataloader = DataLoader( + dataset=create_gsm8k_dataset, + batch_size=GLOBAL_BATCH_SIZE, + min_batch_size=GLOBAL_BATCH_SIZE, + device_mesh=model_mesh, + remote_group='model', + num_workers=0, + ) + advantage_fn = GRPOAdvantage() + metrics = CompletionRewardMetric() + + sampling_params = SamplingParams( + max_tokens=MAX_NEW_TOKENS, + temperature=TEMPERATURE, + top_p=0.95, + ) + + # ── Training loop ──────────────────────────────────────────────── + optim_step = 0 + logger.info(get_device_placement()) + + for batch in dataloader: + if optim_step >= MAX_STEPS: + break + + step_start = time.perf_counter() + metrics.reset() + timings: Dict[str, float] = { + 'weight_sync': 0.0, + 'generate': 0.0, + 'reward': 0.0, + 'advantage': 0.0, + 'train': 0.0, + 'total': 0.0, + } + + global_prompts = batch if isinstance(batch, list) else [batch] + + t0 = time.perf_counter() + if optim_step % WEIGHT_SYNC_INTERVAL == 0: + ckpt_manager.sync_weights(merge_and_sync=False) + sampler.reset_prefix_cache() + timings['weight_sync'] = time.perf_counter() - t0 + + t1 = time.perf_counter() + sample_response = sampler.sample( + global_prompts*NUM_GENERATIONS, + sampling_params, + num_samples=1, + ) + timings['generate'] = time.perf_counter() - t1 + + all_input_data: List[Dict[str, Any]] = [] + all_old_logps: List[List[float]] = [] + all_completion_lengths: List[int] = [] + + for sequence in sample_response.sequences: + all_input_data.append(sequence.new_input_feature) + all_old_logps.append(sequence.logprobs) + all_completion_lengths.append(len(sequence.tokens)) + + if not all_input_data: + logger.warning( + f"Optim step {optim_step}: No valid samples, skipping" + ) + continue + + # ========== 3. Rewards ========== + t2 = time.perf_counter() + total_rewards, format_rewards, accuracy_rewards = compute_rewards( + all_input_data + ) + timings['reward'] = time.perf_counter() - t2 + + metrics.accumulate( + None, + None, + generate_time=timings['generate'], + weight_sync_time=timings['weight_sync'], + completion_lengths=all_completion_lengths, + rewards={ + 'total': total_rewards, + 'format': format_rewards, + 'accuracy': accuracy_rewards, + }, + ) + + # ========== 4. Advantages ========== + t3 = time.perf_counter() + advantages = advantage_fn( + total_rewards, + num_generations=NUM_GENERATIONS, + scale='group', + ) + advantages = advantages.tolist() + timings['advantage'] = time.perf_counter() - t3 + + frac_zero_std = ( + 1.0 if all(abs(a) < 1e-8 for a in advantages) else 0.0 + ) + + # ========== 5. Training ========== + t4 = time.perf_counter() + + model.forward_backward( + inputs=all_input_data, + advantages=advantages, + old_logps=all_old_logps, + ) + + model.clip_grad_and_step() + timings['train'] = time.perf_counter() - t4 + + gc.collect() + from twinkle import torch_util + torch_util.empty_cache() + + timings['total'] = time.perf_counter() - step_start + optim_step += 1 + + # ========== 6. Log ========== + log_dict = metrics.calculate() + log_dict.update(model.calculate_metric(is_training=True)) + log_dict['train/frac_reward_zero_std'] = frac_zero_std + log_dict['train/optim_step'] = optim_step + for k, v in timings.items(): + log_dict[f'time/{k}'] = round(v, 2) + + if USE_SWANLAB: + swanlab.log(log_dict) + logger.info(f"[Step {optim_step}/{MAX_STEPS}] {log_dict}") + + logger.info(f"Training completed. optim_steps={optim_step}") + model.save('grpo-gsm8k-checkpoint') + + +if __name__ == '__main__': + main() diff --git a/cookbook/legacy/grpo/gsm8k_dense.py b/cookbook/legacy/grpo/gsm8k_dense.py new file mode 100644 index 00000000..30fb1d7a --- /dev/null +++ b/cookbook/legacy/grpo/gsm8k_dense.py @@ -0,0 +1,464 @@ +""" +GSM8K GRPO training demo for Twinkle. + +This demo trains a model on Grade School Math (GSM8K) using GRPO. +Reward = accuracy_reward (1.0 if answer correct, 0.0 otherwise) + + format_reward (1.0 if output contains ..., 0.0 otherwise) + +Expected to show reward improvement within ~30-80 steps. + +Reference configs: + - Verl: run_qwen2_5-3b_gsm8k_grpo_lora.sh (lr=3e-6, n=5, max_resp=1024) + - TRL: accuracy_reward (math_verify based) + - Swift: countdown demo (lr=1e-5, n=8, gas=8) +""" +import gc +import os +import re +import time +from typing import List, Tuple, Dict, Any + +from peft import LoraConfig + +import twinkle +from twinkle import DeviceMesh, DeviceGroup, get_device_placement, get_logger +from twinkle.advantage import GRPOAdvantage +from twinkle.checkpoint_engine import CheckpointEngineManager +from twinkle.data_format import SamplingParams +from twinkle.data_format import Trajectory, Message +from twinkle.dataloader import DataLoader +from twinkle.dataset import Dataset, DatasetMeta +from twinkle.model import TransformersModel +from twinkle.preprocessor import Preprocessor +from twinkle.processor import InputProcessor +from twinkle.reward.base import Reward +from twinkle.sampler import vLLMSampler +from twinkle.template import Template +from twinkle.metric import CompletionRewardMetric + +logger = get_logger() + +# ========== Configuration ========== +MODEL_ID = os.environ.get('MODEL_ID', 'ms://Qwen/Qwen2.5-3B-Instruct') +USE_MEGATRON = bool(int(os.environ.get('USE_MEGATRON', '1'))) + +MODEL_GPUS = int(os.environ.get('MODEL_GPUS', 4)) +SAMPLER_GPUS = int(os.environ.get('SAMPLER_GPUS',4)) +SAMPLER_TP = int(os.environ.get('SAMPLER_TP', 1)) +NUM_GPUS = MODEL_GPUS + SAMPLER_GPUS + +NUM_GENERATIONS = int(os.environ.get('NUM_GENERATIONS', 8)) +MAX_NEW_TOKENS = int(os.environ.get('MAX_NEW_TOKENS', 4096)) +LEARNING_RATE = float(os.environ.get('LR', 1e-5)) +GRPO_EPSILON = float(os.environ.get('GRPO_EPSILON', 0.2)) +GRPO_BETA = float(os.environ.get('GRPO_BETA', 0.0)) +MAX_STEPS = int(os.environ.get('MAX_STEPS', 200)) +BATCH_SIZE = int(os.environ.get('BATCH_SIZE', 4)) +GRADIENT_ACCUMULATION_STEPS = int(os.environ.get('GRADIENT_ACCUMULATION_STEPS', 1)) +TEMPERATURE = float(os.environ.get('TEMPERATURE', 1.0)) +WEIGHT_SYNC_INTERVAL = int(os.environ.get('WEIGHT_SYNC_INTERVAL', 1)) +ADAPTER_NAME = 'default' +DATA_NUM = int(os.environ.get('DATA_NUM', 7473)) # GSM8K train split has 7473 samples + +USE_SWANLAB = bool(int(os.environ.get('USE_SWANLAB', '1'))) +if USE_SWANLAB: + import swanlab + swanlab.login(api_key=os.environ['SWANLAB_API_KEY'], save=True) + swanlab.init(project="twinkle-gsm8k", config={ + 'model_id': MODEL_ID, + 'num_gpus': NUM_GPUS, + 'model_gpus': MODEL_GPUS, + 'sampler_gpus': SAMPLER_GPUS, + 'num_generations': NUM_GENERATIONS, + 'max_new_tokens': MAX_NEW_TOKENS, + 'learning_rate': LEARNING_RATE, + 'grpo_epsilon': GRPO_EPSILON, + 'grpo_beta': GRPO_BETA, + 'batch_size': BATCH_SIZE, + 'gradient_accumulation_steps': GRADIENT_ACCUMULATION_STEPS, + }) + + +SYSTEM_PROMPT = ( + "You are a helpful math assistant. Solve the problem step by step. " + "Show your reasoning in tags, then give the final " + "numerical answer after ####.\n" + "For example:\n ... reasoning ... \n#### 42" +) + + +class GSM8KProcessor(Preprocessor): + """Preprocessor for GSM8K dataset. + + GSM8K fields: question (str), answer (str ending with '#### ') + Extracts the ground truth number and stores it in user_data for reward. + """ + + @staticmethod + def extract_ground_truth(answer_str: str) -> str: + """Extract the number after '####' from GSM8K answer.""" + match = re.search(r'####\s*([\-\d,\.]+)', answer_str) + if match: + return match.group(1).replace(',', '').strip() + return '' + + def __call__(self, row) -> Trajectory: + question = row['question'] + answer = row.get('answer', '') + ground_truth = self.extract_ground_truth(answer) + + messages = [ + Message(role='system', content=SYSTEM_PROMPT), + Message(role='user', content=question), + ] + return Trajectory( + messages=messages, + user_data=[('ground_truth', ground_truth)], + ) + + +# ========== GSM8K Reward Functions ========== +class GSM8KAccuracyReward(Reward): + """Accuracy reward for GSM8K: checks if the model's answer matches ground truth. + + Extracts the last '#### ' from model output and compares with ground truth. + Returns 1.0 for correct, 0.0 for incorrect. + """ + + @staticmethod + def extract_answer(completion: str) -> str: + """Extract the last #### answer from model completion.""" + # Only check last 500 chars for efficiency + text = completion[-500:] if len(completion) > 500 else completion + matches = re.findall(r'####\s*([\-\d,\.\s]+)', text) + if matches: + return matches[-1].replace(',', '').replace(' ', '').strip() + return '' + + def __call__( + self, trajectories: List[Trajectory], ground_truths: List[Trajectory] + ) -> List[float]: + rewards = [] + for trajectory in trajectories: + messages = trajectory.get('messages', []) + # Get model completion (last assistant message) + completion = '' + for msg in reversed(messages): + if msg.get('role') == 'assistant': + completion = msg.get('content', '') + break + + # Get ground truth from user_data + gt = '' + user_data = trajectory.get('user_data', []) + if isinstance(user_data, list): + for item in user_data: + if isinstance(item, (list, tuple)) and len(item) == 2: + if item[0] == 'ground_truth': + gt = str(item[1]) + break + + predicted = self.extract_answer(completion) + + # Numeric comparison + correct = False + if predicted and gt: + try: + correct = abs(float(predicted) - float(gt)) < 1e-5 + except (ValueError, OverflowError): + correct = predicted == gt + + rewards.append(1.0 if correct else 0.0) + return rewards + + +class GSM8KFormatReward(Reward): + """Format reward: checks if output contains ... tag. + + Returns 1.0 if format is correct, 0.0 otherwise. + """ + + def __call__( + self, trajectories: List[Trajectory], ground_truths: List[Trajectory] + ) -> List[float]: + rewards = [] + for trajectory in trajectories: + messages = trajectory.get('messages', []) + completion = '' + for msg in reversed(messages): + if msg.get('role') == 'assistant': + completion = msg.get('content', '') + break + has_think = bool( + re.search(r'.*?', completion, re.DOTALL) + ) + has_answer = bool(re.search(r'####\s*[\-\d,\.]+', completion)) + rewards.append(1.0 if (has_think and has_answer) else 0.0) + return rewards + + +def create_gsm8k_dataset(): + """Create GSM8K dataset.""" + meta = DatasetMeta( + "ms://modelscope/gsm8k", + subset_name='main', split='train', + data_slice=range(DATA_NUM), + ) + dataset = Dataset(meta) + dataset.set_template("Template", model_id=MODEL_ID, max_length=2048) + dataset.map(GSM8KProcessor()) + dataset.encode(add_generation_prompt=True) + return dataset + + +def compute_rewards( + trajectories: List[Trajectory], +) -> Tuple[List[float], List[float], List[float]]: + """Compute accuracy and format rewards for GSM8K.""" + accuracy_reward_fn = GSM8KAccuracyReward() + format_reward_fn = GSM8KFormatReward() + + accuracy_rewards = accuracy_reward_fn(trajectories, []) + format_rewards = format_reward_fn(trajectories, []) + total_rewards = [a + f for a, f in zip(accuracy_rewards, format_rewards)] + return total_rewards, format_rewards, accuracy_rewards + + +# ========== Main ========== +def main(): + device_groups = [ + DeviceGroup( + name='model', + ranks=list(range(MODEL_GPUS)), + device_type='GPU', + gpus_per_worker=1, + ), + DeviceGroup( + name='sampler', + ranks=list(range(MODEL_GPUS, NUM_GPUS)), + device_type='GPU', + gpus_per_worker=SAMPLER_TP, + ), + ] + if USE_MEGATRON: + model_mesh = DeviceMesh.from_sizes( + world_size=MODEL_GPUS, dp_size=MODEL_GPUS, + ) + else: + model_mesh = DeviceMesh.from_sizes( + world_size=MODEL_GPUS, dp_size=MODEL_GPUS, + ) + assert SAMPLER_GPUS % SAMPLER_TP == 0 + sampler_mesh = DeviceMesh.from_sizes( + world_size=SAMPLER_GPUS, dp_size=SAMPLER_GPUS // SAMPLER_TP, tp_size=SAMPLER_TP + ) + twinkle.initialize( + mode='ray', + nproc_per_node=NUM_GPUS, + groups=device_groups, + lazy_collect=False, + ) + + lora_config = LoraConfig( + target_modules="all-linear", + r=8, + lora_alpha=32, + lora_dropout=0.05, + ) + + if USE_MEGATRON: + from twinkle.model.megatron import MegatronModel + model = MegatronModel( + model_id=MODEL_ID, + device_mesh=model_mesh, + remote_group='model', + mixed_precision='bf16', + recompute_granularity='full', + recompute_num_layers=None, + ) + else: + model = TransformersModel( + model_id=MODEL_ID, + device_mesh=model_mesh, + remote_group='model', + ) + + model.add_adapter_to_model( + ADAPTER_NAME, + lora_config, + gradient_accumulation_steps=1, + ) + if USE_MEGATRON: + model.set_optimizer( + 'default', lr=LEARNING_RATE, + ) + model.set_lr_scheduler( + 'default', + lr_decay_steps=MAX_STEPS, + max_lr=LEARNING_RATE, + ) + else: + model.set_optimizer( + 'AdamW', lr=LEARNING_RATE, + ) + model.set_lr_scheduler( + 'CosineAnnealingLR', T_max=MAX_STEPS, eta_min=0, + ) + model.set_loss( + 'GRPOLoss', + epsilon=GRPO_EPSILON, + beta=GRPO_BETA, + ) + model.set_processor(InputProcessor) + model.set_template('Template', model_id=MODEL_ID) + + # ── Sampler (load real weights for meaningful generation) ───────── + sampler = vLLMSampler( + model_id=MODEL_ID, + engine_args={ + 'gpu_memory_utilization': 0.7, + 'max_model_len': 4096, + 'max_loras': 1, + 'max_lora_rank': 32, + + 'enable_sleep_mode': False, + 'enable_lora': True, + "logprobs_mode": "processed_logprobs", + }, + device_mesh=sampler_mesh, + remote_group='sampler', + ) + sampler.set_template(Template, model_id=MODEL_ID) + + ckpt_manager = CheckpointEngineManager(model=model, sampler=sampler) + + # Global batch = prompts for one full gradient accumulation cycle + GLOBAL_BATCH_SIZE = BATCH_SIZE * GRADIENT_ACCUMULATION_STEPS + dataloader = DataLoader( + dataset=create_gsm8k_dataset, + batch_size=GLOBAL_BATCH_SIZE, + min_batch_size=GLOBAL_BATCH_SIZE, + device_mesh=model_mesh, + remote_group='model', + num_workers=0, + ) + advantage_fn = GRPOAdvantage() + metrics = CompletionRewardMetric() + + sampling_params = SamplingParams( + max_tokens=MAX_NEW_TOKENS, + temperature=TEMPERATURE, + top_p=0.95, + ) + + optim_step = 0 + logger.info(get_device_placement()) + + for batch in dataloader: + if optim_step >= MAX_STEPS: + break + + step_start = time.perf_counter() + metrics.reset() + timings: Dict[str, float] = { + 'weight_sync': 0.0, + 'generate': 0.0, + 'reward': 0.0, + 'advantage': 0.0, + 'train': 0.0, + 'total': 0.0, + } + + global_prompts = batch if isinstance(batch, list) else [batch] + + t0 = time.perf_counter() + if optim_step % WEIGHT_SYNC_INTERVAL == 0: + ckpt_manager.sync_weights(merge_and_sync=False) + sampler.reset_prefix_cache() + timings['weight_sync'] = time.perf_counter() - t0 + + t1 = time.perf_counter() + sample_response = sampler.sample( + global_prompts*NUM_GENERATIONS, + sampling_params, + num_samples=1, + ) + timings['generate'] = time.perf_counter() - t1 + + all_input_data: List[Dict[str, Any]] = [] + all_old_logps: List[List[float]] = [] + all_completion_lengths: List[int] = [] + + for sequence in sample_response.sequences: + all_input_data.append(sequence.new_input_feature) + all_old_logps.append(sequence.logprobs) + all_completion_lengths.append(len(sequence.tokens)) + + t2 = time.perf_counter() + total_rewards, format_rewards, accuracy_rewards = compute_rewards( + all_input_data + ) + timings['reward'] = time.perf_counter() - t2 + + metrics.accumulate( + None, + None, + generate_time=timings['generate'], + weight_sync_time=timings['weight_sync'], + completion_lengths=all_completion_lengths, + rewards={ + 'total': total_rewards, + 'format': format_rewards, + 'accuracy': accuracy_rewards, + }, + ) + + advantages = advantage_fn( + total_rewards, + num_generations=NUM_GENERATIONS, + scale='group', + ) + advantages = advantages.tolist() + + frac_zero_std = ( + 1.0 if all(abs(a) < 1e-8 for a in advantages) else 0.0 + ) + + # ========== 5. Training ========== + t3 = time.perf_counter() + + + model.forward_backward( + inputs=all_input_data, + advantages=advantages, + old_logps=all_old_logps, + ) + + model.clip_grad_and_step() + timings['train'] = time.perf_counter() - t3 + + gc.collect() + from twinkle import torch_util + torch_util.empty_cache() + + timings['total'] = time.perf_counter() - step_start + optim_step += 1 + + # ========== 6. Log ========== + log_dict = metrics.calculate() + log_dict.update(model.calculate_metric(is_training=True)) + log_dict['train/frac_reward_zero_std'] = frac_zero_std + log_dict['train/optim_step'] = optim_step + for k, v in timings.items(): + log_dict[f'time/{k}'] = round(v, 2) + + if USE_SWANLAB: + swanlab.log(log_dict) + logger.info(f"[Step {optim_step}/{MAX_STEPS}] {log_dict}") + + logger.info(f"Training completed. optim_steps={optim_step}") + model.save('grpo-gsm8k-checkpoint') + + +if __name__ == '__main__': + main() diff --git a/cookbook/legacy/grpo/lora.py b/cookbook/legacy/grpo/lora.py new file mode 100644 index 00000000..72dd54ee --- /dev/null +++ b/cookbook/legacy/grpo/lora.py @@ -0,0 +1,241 @@ +import gc +import os +import time +from typing import List, Tuple, Dict, Any + +from peft import LoraConfig + +import twinkle +from twinkle import DeviceMesh, DeviceGroup, get_device_placement, get_logger +from twinkle.advantage import GRPOAdvantage +from twinkle.checkpoint_engine import CheckpointEngineManager +from twinkle.data_format import SamplingParams, SampleResponse +from twinkle.data_format import Trajectory, InputFeature +from twinkle.dataloader import DataLoader +from twinkle.dataset import Dataset, DatasetMeta +from twinkle.model import TransformersModel +from twinkle.processor import InputProcessor +from twinkle.sampler import vLLMSampler +from twinkle.template import Template +from twinkle.metric import CompletionRewardMetric + +logger = get_logger() + +# ========== Configuration ========== +MODEL_ID = os.environ.get('MODEL_ID', 'ms://Qwen/Qwen2.5-3B-Instruct') +NUM_GPUS = int(os.environ.get('NUM_GPUS', 4)) +MODEL_GPUS = int(os.environ.get('MODEL_GPUS', NUM_GPUS // 2)) +SAMPLER_GPUS = NUM_GPUS - MODEL_GPUS +NUM_GENERATIONS = int(os.environ.get('NUM_GENERATIONS', 4)) +MAX_NEW_TOKENS = int(os.environ.get('MAX_NEW_TOKENS', 1024)) +LEARNING_RATE = float(os.environ.get('LR', 1e-5)) +GRPO_EPSILON = float(os.environ.get('GRPO_EPSILON', 0.2)) +GRPO_BETA = float(os.environ.get('GRPO_BETA', 0.0)) +MAX_STEPS = int(os.environ.get('MAX_STEPS', 2000)) +BATCH_SIZE = int(os.environ.get('BATCH_SIZE', 2)) +GRADIENT_ACCUMULATION_STEPS = int(os.environ.get('GRADIENT_ACCUMULATION_STEPS', 1)) +TEMPERATURE = float(os.environ.get('TEMPERATURE', 1.0)) +WEIGHT_SYNC_INTERVAL = int(os.environ.get('WEIGHT_SYNC_INTERVAL', 1)) +ADAPTER_NAME = 'default' +DATA_NUM = 500 +USE_MEGATRON = False + +# SwanLab is optional - only used if SWANLAB_API_KEY is set +USE_SWANLAB = 'SWANLAB_API_KEY' in os.environ +if USE_SWANLAB: + import swanlab + if USE_SWANLAB: + swanlab.login(api_key=os.environ['SWANLAB_API_KEY'], save=True) + swanlab.init(project="ms-swift", config={ + 'model_id': MODEL_ID, + 'num_gpus': NUM_GPUS, + 'model_gpus': MODEL_GPUS, + 'sampler_gpus': SAMPLER_GPUS, + 'num_generations': NUM_GENERATIONS, + 'learning_rate': LEARNING_RATE, + 'grpo_beta': GRPO_BETA, + 'batch_size': BATCH_SIZE, + 'gradient_accumulation_steps': GRADIENT_ACCUMULATION_STEPS, + }) + else: + logger.info("SWANLAB_API_KEY not set, running without experiment tracking") + + +def create_countdown_dataset(): + """Create Countdown Game dataset.""" + from twinkle.preprocessor import CountdownProcessor + dataset = Dataset(DatasetMeta("ms://zouxuhong/Countdown-Tasks-3to4", data_slice=range(DATA_NUM))) + dataset.set_template("Template", model_id=MODEL_ID, max_length=8192) + dataset.map(CountdownProcessor()) + dataset.encode(add_generation_prompt=True) + return dataset + + +def compute_rewards(trajectories: List[Trajectory]) -> Tuple[List[float], List[float], List[float]]: + """Compute format and accuracy rewards.""" + from twinkle.reward import CountDownAccuracy, FormatReward + format_rewards = FormatReward()(trajectories, []) + accuracy_rewards = CountDownAccuracy()(trajectories, []) + total_rewards = [a+b for a, b in zip(accuracy_rewards, format_rewards)] + return total_rewards, format_rewards, accuracy_rewards + +def main(): + device_groups = [ + DeviceGroup(name='model', ranks=list(range(MODEL_GPUS)), + device_type='GPU', gpus_per_worker=1), + DeviceGroup(name='sampler', ranks=list(range(MODEL_GPUS, NUM_GPUS)), + device_type='GPU', gpus_per_worker=1), + ] + if USE_MEGATRON: + model_mesh = DeviceMesh.from_sizes(dp_size=MODEL_GPUS, tp_size=2, pp_size=2) + else: + model_mesh = DeviceMesh.from_sizes(world_size=MODEL_GPUS, dp_size=MODEL_GPUS) + sampler_mesh = DeviceMesh.from_sizes(world_size=SAMPLER_GPUS, dp_size=SAMPLER_GPUS) + twinkle.initialize(mode='ray', nproc_per_node=NUM_GPUS, groups=device_groups, lazy_collect=False) + logger.info(get_device_placement()) + + lora_config = LoraConfig( + target_modules="all-linear", r=8, lora_alpha=32, lora_dropout=0.05, + ) + + # ── Model (training) ────────────────────────────────────────────── + if USE_MEGATRON: + from twinkle.model.megatron import MegatronModel + model = MegatronModel( + model_id=MODEL_ID, + device_mesh=model_mesh, + remote_group='model', + mixed_precision='bf16', + recompute_granularity='selective', + recompute_num_layers=None, + ) + else: + model = TransformersModel( + model_id=MODEL_ID, device_mesh=model_mesh, remote_group='model', + ) + + + model.add_adapter_to_model( + ADAPTER_NAME, lora_config, + gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS, + ) + model.set_optimizer('AdamW', lr=LEARNING_RATE, adapter_name=ADAPTER_NAME) + model.set_lr_scheduler('LinearLR', adapter_name=ADAPTER_NAME) + model.set_loss('GRPOLoss', adapter_name=ADAPTER_NAME, + epsilon=GRPO_EPSILON, beta=GRPO_BETA) + model.set_processor(InputProcessor, adapter_name=ADAPTER_NAME) + model.set_template('Template', model_id=MODEL_ID, adapter_name=ADAPTER_NAME) + + sampler = vLLMSampler( + model_id=MODEL_ID, + engine_args={ + 'load_format': 'dummy', + 'gpu_memory_utilization': 0.7, + 'max_model_len': 2048, + 'enable_sleep_mode': False, + 'enable_lora': True, + }, + device_mesh=sampler_mesh, + remote_group='sampler', + ) + sampler.set_template(Template, model_id=MODEL_ID) + + ckpt_manager = CheckpointEngineManager(model=model, sampler=sampler) + dataloader = DataLoader( + dataset=create_countdown_dataset, batch_size=BATCH_SIZE, min_batch_size=BATCH_SIZE, + device_mesh=model_mesh, remote_group='model', num_workers=0, + ) + advantage_fn = GRPOAdvantage() + metrics = CompletionRewardMetric() + + sampling_params = SamplingParams( + max_tokens=MAX_NEW_TOKENS, temperature=TEMPERATURE, top_p=0.95, + ) + step = 0 + + for batch in dataloader: + if step >= MAX_STEPS: + break + + metrics.reset() + + prompts = batch if isinstance(batch, list) else [batch] + + weight_sync_time = None + # ========== 1. Weight Sync ========== + if step % WEIGHT_SYNC_INTERVAL == 0: + sync_start = time.perf_counter() + ckpt_manager.sync_weights(adapter_name=ADAPTER_NAME) + weight_sync_time = time.perf_counter() - sync_start + + # ========== 2. Generate ========== + gen_start = time.perf_counter() + sample_response = sampler.sample(prompts, sampling_params, num_samples=NUM_GENERATIONS) + generate_time = time.perf_counter() - gen_start + + input_data : List[Dict[str, Any]] = [] + old_logps_list: List[List[float]] = [] + completion_lengths: List[int] = [] + + for sequence in sample_response.sequences: + input_data.append(sequence.new_input_feature) + old_logps_list.append(sequence.logprobs) + completion_lengths.append(len(sequence.tokens)) + + if not input_data: + logger.warning(f"Step {step}: No valid samples, skipping") + step += 1 + continue + + # ========== 4. Compute rewards ========== + total_rewards, format_rewards, accuracy_rewards = compute_rewards(input_data) + metrics.accumulate(None, None, + generate_time=generate_time, + weight_sync_time=weight_sync_time, + completion_lengths=completion_lengths, + rewards={ + 'total': total_rewards, + 'format': format_rewards, + 'accuracy': accuracy_rewards, + }) + + # ========== 5. Compute advantages ========== + advantages = advantage_fn(total_rewards, num_generations=NUM_GENERATIONS, scale='group') + # Convert to list so dispatch='slice_dp' slices it in sync with inputs + advantages = advantages.tolist() + + frac_zero_std = 1.0 if all(abs(a) < 1e-8 for a in advantages) else 0.0 + if frac_zero_std == 1.0: + logger.info(f"Step {step}: All advantages are zero, skipping training") + step += 1 + continue + + # ========== 6. Training step ========== + # Pass InputFeature list directly (exact token alignment with sampler). + # advantages and old_logps are lists, sliced in sync by dispatch. + model.forward_backward( + inputs=input_data, + adapter_name=ADAPTER_NAME, + advantages=advantages, + old_logps=old_logps_list, + ) + model.clip_grad_and_step(adapter_name=ADAPTER_NAME) + + from twinkle import torch_util + gc.collect() + torch_util.empty_cache() + + # ========== 7. Log ========== + log_dict = metrics.calculate() + log_dict.update(model.calculate_metric()) + log_dict['train/frac_reward_zero_std'] = frac_zero_std + if USE_SWANLAB: + swanlab.log(log_dict) + logger.info(log_dict) + step += 1 + + model.save('grpo-countdown-checkpoint', adapter_name=ADAPTER_NAME) + + +if __name__ == '__main__': + main() diff --git a/cookbook/legacy/grpo/lora_backup.py b/cookbook/legacy/grpo/lora_backup.py new file mode 100644 index 00000000..03920f64 --- /dev/null +++ b/cookbook/legacy/grpo/lora_backup.py @@ -0,0 +1,288 @@ +import gc +import os +import time +from typing import List, Tuple, Dict, Any + +from peft import LoraConfig + +import twinkle +from twinkle import DeviceMesh, DeviceGroup, get_device_placement, get_logger +from twinkle.advantage import GRPOAdvantage +from twinkle.checkpoint_engine import CheckpointEngineManager +from twinkle.data_format import SamplingParams, SampleResponse +from twinkle.data_format import Trajectory, InputFeature +from twinkle.dataloader import DataLoader +from twinkle.dataset import Dataset, DatasetMeta +from twinkle.model import TransformersModel +from twinkle.processor import InputProcessor +from twinkle.sampler import vLLMSampler +from twinkle.template import Template +from twinkle.metric import CompletionRewardMetric + +logger = get_logger() + +# ========== Configuration ========== +MODEL_ID = os.environ.get('MODEL_ID', 'ms://Qwen/Qwen2.5-3B-Instruct') +USE_MEGATRON = bool(int(os.environ.get('USE_MEGATRON', '0'))) + +MODEL_GPUS = int(os.environ.get('MODEL_GPUS', 4)) +SAMPLER_GPUS = int(os.environ.get('SAMPLER_GPUS', 2)) +NUM_GPUS = MODEL_GPUS + SAMPLER_GPUS + +NUM_GENERATIONS = int(os.environ.get('NUM_GENERATIONS', 8)) +MAX_NEW_TOKENS = int(os.environ.get('MAX_NEW_TOKENS', 2048)) +LEARNING_RATE = float(os.environ.get('LR', 1e-6)) +GRPO_EPSILON = float(os.environ.get('GRPO_EPSILON', 0.2)) +GRPO_BETA = float(os.environ.get('GRPO_BETA', 0.0)) +MAX_STEPS = int(os.environ.get('MAX_STEPS', 100)) +BATCH_SIZE = int(os.environ.get('BATCH_SIZE', 4)) +GRADIENT_ACCUMULATION_STEPS = int(os.environ.get('GRADIENT_ACCUMULATION_STEPS', 8)) +TEMPERATURE = float(os.environ.get('TEMPERATURE', 1.0)) +WEIGHT_SYNC_INTERVAL = int(os.environ.get('WEIGHT_SYNC_INTERVAL', 1)) +ADAPTER_NAME = 'default' +DATA_NUM = int(os.environ.get('DATA_NUM', 5000)) + +# SwanLab is optional - only used if SWANLAB_API_KEY is set +USE_SWANLAB = True +if USE_SWANLAB: + import swanlab + if USE_SWANLAB: + swanlab.login(api_key=os.environ['SWANLAB_API_KEY'], save=True) + swanlab.init(project="ms-swift", config={ + 'model_id': MODEL_ID, + 'num_gpus': NUM_GPUS, + 'model_gpus': MODEL_GPUS, + 'sampler_gpus': SAMPLER_GPUS, + 'num_generations': NUM_GENERATIONS, + 'learning_rate': LEARNING_RATE, + 'grpo_beta': GRPO_BETA, + 'batch_size': BATCH_SIZE, + 'gradient_accumulation_steps': GRADIENT_ACCUMULATION_STEPS, + }) + else: + logger.info("SWANLAB_API_KEY not set, running without experiment tracking") + + +def create_countdown_dataset(): + """Create Countdown Game dataset.""" + from twinkle.preprocessor import CountdownProcessor + dataset = Dataset(DatasetMeta("ms://zouxuhong/Countdown-Tasks-3to4", data_slice=range(DATA_NUM))) + dataset.set_template("Template", model_id=MODEL_ID, max_length=8192) + dataset.map(CountdownProcessor()) + dataset.encode(add_generation_prompt=True) + return dataset + + +def compute_rewards(trajectories: List[Trajectory]) -> Tuple[List[float], List[float], List[float]]: + """Compute format and accuracy rewards.""" + from twinkle.reward import CountDownAccuracy, FormatReward + format_rewards = FormatReward()(trajectories, []) + accuracy_rewards = CountDownAccuracy()(trajectories, []) + total_rewards = [a+b for a, b in zip(accuracy_rewards, format_rewards)] + return total_rewards, format_rewards, accuracy_rewards + +def main(): + device_groups = [ + DeviceGroup(name='model', ranks=list(range(MODEL_GPUS)), + device_type='GPU', gpus_per_worker=1), + DeviceGroup(name='sampler', ranks=list(range(MODEL_GPUS, NUM_GPUS)), + device_type='GPU', gpus_per_worker=1), + ] + if USE_MEGATRON: + model_mesh = DeviceMesh.from_sizes(dp_size=MODEL_GPUS, tp_size=1, pp_size=1) + else: + model_mesh = DeviceMesh.from_sizes(world_size=MODEL_GPUS, dp_size=MODEL_GPUS) + sampler_mesh = DeviceMesh.from_sizes(world_size=SAMPLER_GPUS, dp_size=SAMPLER_GPUS) + twinkle.initialize(mode='ray', nproc_per_node=NUM_GPUS, groups=device_groups, lazy_collect=False) + logger.info(get_device_placement()) + + lora_config = LoraConfig( + target_modules="all-linear", r=8, lora_alpha=32, lora_dropout=0.05, + ) + + # ── Model (training) ────────────────────────────────────────────── + if USE_MEGATRON: + from twinkle.model.megatron import MegatronModel + model = MegatronModel( + model_id=MODEL_ID, + device_mesh=model_mesh, + remote_group='model', + mixed_precision='bf16', + recompute_granularity='selective', + recompute_num_layers=None, + ) + else: + model = TransformersModel( + model_id=MODEL_ID, device_mesh=model_mesh, remote_group='model', + ) + + # gradient_accumulation_steps=1: externally managed micro-batch loop + model.add_adapter_to_model( + ADAPTER_NAME, lora_config, + gradient_accumulation_steps=1, + ) + if USE_MEGATRON: + model.set_optimizer('default', lr=LEARNING_RATE, adapter_name=ADAPTER_NAME) + model.set_lr_scheduler('default', lr_decay_steps=MAX_STEPS, + max_lr=LEARNING_RATE, adapter_name=ADAPTER_NAME) + else: + model.set_optimizer('AdamW', lr=LEARNING_RATE, adapter_name=ADAPTER_NAME) + model.set_lr_scheduler('LinearLR', adapter_name=ADAPTER_NAME) + model.set_loss('GRPOLoss', adapter_name=ADAPTER_NAME, + epsilon=GRPO_EPSILON, beta=GRPO_BETA) + model.set_processor(InputProcessor, adapter_name=ADAPTER_NAME) + model.set_template('Template', model_id=MODEL_ID, adapter_name=ADAPTER_NAME) + + sampler = vLLMSampler( + model_id=MODEL_ID, + engine_args={ + 'load_format': 'dummy', + 'gpu_memory_utilization': 0.7, + 'max_model_len': 2048, + 'enable_sleep_mode': False, + 'enable_lora': True, + }, + device_mesh=sampler_mesh, + remote_group='sampler', + ) + sampler.set_template(Template, model_id=MODEL_ID) + + ckpt_manager = CheckpointEngineManager(model=model, sampler=sampler) + + # Use global batch size so each dataloader iteration yields enough + # prompts for one full gradient accumulation cycle. + GLOBAL_BATCH_SIZE = BATCH_SIZE * GRADIENT_ACCUMULATION_STEPS + dataloader = DataLoader( + dataset=create_countdown_dataset, batch_size=GLOBAL_BATCH_SIZE, + min_batch_size=GLOBAL_BATCH_SIZE, + device_mesh=model_mesh, remote_group='model', num_workers=0, + ) + advantage_fn = GRPOAdvantage() + metrics = CompletionRewardMetric() + + sampling_params = SamplingParams( + max_tokens=MAX_NEW_TOKENS, temperature=TEMPERATURE, top_p=0.95, + ) + + # ── Training loop ──────────────────────────────────────────────── + # Each dataloader iteration yields a global batch (BATCH_SIZE * GRAD_ACC prompts). + # We sample all at once, then split into micro-batches for forward_backward. + optim_step = 0 + + for batch in dataloader: + if optim_step >= MAX_STEPS: + break + + step_start = time.perf_counter() + metrics.reset() + timings: Dict[str, float] = { + 'weight_sync': 0.0, 'generate': 0.0, 'reward': 0.0, + 'advantage': 0.0, 'train': 0.0, 'total': 0.0, + } + + global_prompts = batch if isinstance(batch, list) else [batch] + + # ========== 1. Weight Sync (once per optim step) ========== + t0 = time.perf_counter() + if optim_step % WEIGHT_SYNC_INTERVAL == 0: + ckpt_manager.sync_weights(adapter_name=ADAPTER_NAME) + timings['weight_sync'] = time.perf_counter() - t0 + + # ========== 2. Generate (once per optim step, full global batch) ========== + t1 = time.perf_counter() + sample_response = sampler.sample( + global_prompts, sampling_params, num_samples=NUM_GENERATIONS, + ) + timings['generate'] = time.perf_counter() - t1 + + # Collect all sampled data + all_input_data: List[Dict[str, Any]] = [] + all_old_logps: List[List[float]] = [] + all_completion_lengths: List[int] = [] + + for sequence in sample_response.sequences: + all_input_data.append(sequence.new_input_feature) + all_old_logps.append(sequence.logprobs) + all_completion_lengths.append(len(sequence.tokens)) + + if not all_input_data: + logger.warning(f"Optim step {optim_step}: No valid samples, skipping") + continue + + # ========== 3. Compute rewards (once per optim step) ========== + t2 = time.perf_counter() + total_rewards, format_rewards, accuracy_rewards = compute_rewards(all_input_data) + timings['reward'] = time.perf_counter() - t2 + + metrics.accumulate(None, None, + generate_time=timings['generate'], + weight_sync_time=timings['weight_sync'], + completion_lengths=all_completion_lengths, + rewards={ + 'total': total_rewards, + 'format': format_rewards, + 'accuracy': accuracy_rewards, + }) + + # ========== 4. Compute advantages (once per optim step) ========== + t3 = time.perf_counter() + advantages = advantage_fn( + total_rewards, num_generations=NUM_GENERATIONS, scale='group', + ) + advantages = advantages.tolist() + timings['advantage'] = time.perf_counter() - t3 + + frac_zero_std = 1.0 if all(abs(a) < 1e-8 for a in advantages) else 0.0 + + # ========== 5. Training (split into micro-batches) ========== + t4 = time.perf_counter() + # Each prompt generates NUM_GENERATIONS sequences, so one micro-batch + # is BATCH_SIZE prompts * NUM_GENERATIONS sequences. + micro_batch_seqs = BATCH_SIZE * NUM_GENERATIONS + + for micro_idx in range(GRADIENT_ACCUMULATION_STEPS): + start = micro_idx * micro_batch_seqs + end = start + micro_batch_seqs + mb_inputs = all_input_data[start:end] + mb_old_logps = all_old_logps[start:end] + mb_advantages = advantages[start:end] + + if not mb_inputs: + break + + # Skip micro-batch if all advantages are zero + if all(abs(a) < 1e-8 for a in mb_advantages): + logger.info(f"Optim step {optim_step}, micro {micro_idx}: " + f"All advantages zero, skipping") + continue + + model.forward_backward( + inputs=mb_inputs, + adapter_name=ADAPTER_NAME, + advantages=mb_advantages, + old_logps=mb_old_logps, + ) + + model.clip_grad_and_step(adapter_name=ADAPTER_NAME) + timings['train'] = time.perf_counter() - t4 + + timings['total'] = time.perf_counter() - step_start + optim_step += 1 + + # ========== 6. Log ========== + log_dict = metrics.calculate() + log_dict.update(model.calculate_metric(is_training=True)) + log_dict['train/frac_reward_zero_std'] = frac_zero_std + log_dict['train/optim_step'] = optim_step + for k, v in timings.items(): + log_dict[f'time/{k}'] = round(v, 2) + + if USE_SWANLAB: + swanlab.log(log_dict) + logger.info(f"[Step {optim_step}/{MAX_STEPS}] {log_dict}") + + logger.info(f"Training completed. optim_steps={optim_step}") + model.save('grpo-countdown-checkpoint', adapter_name=ADAPTER_NAME) + +if __name__ == '__main__': + main() diff --git a/cookbook/legacy/grpo/lora_gpu.py b/cookbook/legacy/grpo/lora_gpu.py new file mode 100644 index 00000000..51f56800 --- /dev/null +++ b/cookbook/legacy/grpo/lora_gpu.py @@ -0,0 +1,692 @@ +""" +GRPO LoRA Training Script for GPU (CUDA) + +This script tests the twinkle RL training capabilities on GPU: +1. TransformersModel backend +2. vLLMSampler / TorchSampler integration +3. GRPOLoss and advantage computation +4. Weight synchronization between model and sampler + +Based on lora_npu.py, adapted for CUDA GPU environment. + +Usage: + # Basic test with Transformers backend (local mode, no Ray) + CUDA_VISIBLE_DEVICES=0 TWINKLE_MODE=local python lora_gpu.py + + # Test with multiple GPUs (Ray mode) + CUDA_VISIBLE_DEVICES=0,1 TWINKLE_MODE=ray python lora_gpu.py + + # Use vLLMSampler (requires more GPU memory) + TWINKLE_USE_TORCH_SAMPLER=0 python lora_gpu.py + + # Debug mode + TWINKLE_DEBUG=1 python lora_gpu.py + +Environment Variables: + TWINKLE_MODEL_ID: Model path (default: Qwen/Qwen3-0.6B) + TWINKLE_MAX_LENGTH: Max sequence length (default: 2048) + TWINKLE_MAX_STEPS: Max training steps (default: 3) + TWINKLE_USE_REF_MODEL: Use reference model for KL (default: 0) + TWINKLE_USE_TORCH_SAMPLER: Use TorchSampler instead of vLLMSampler (default: 1) + TWINKLE_DEBUG: Enable debug logging (default: 0) + TWINKLE_MODE: 'local' or 'ray' (default: local) + +Test Results (as of 2026-01-30): + - TransformersModel + TorchSampler: PASS + - vLLMSampler sampling: PASS + - vLLMSampler LoRA weight sync: IN PROGRESS (needs more debugging) +""" +import numpy as np +from peft import LoraConfig +import os +import sys + +# Add twinkle src to path for development +twinkle_src = os.path.join(os.path.dirname(__file__), '..', '..', 'src') +if os.path.exists(twinkle_src): + sys.path.insert(0, twinkle_src) + +import twinkle +from twinkle import DeviceMesh, get_device_placement +from twinkle.dataset import Dataset, DatasetMeta +from twinkle.infra import DeviceGroup, remote_function, remote_class +from twinkle.model import TransformersModel +from twinkle.reward import MathReward +from twinkle.sampler import vLLMSampler, TorchSampler +from twinkle.data_format.sampling import SamplingParams +from twinkle.weight_loader import NativeLoader +from twinkle.advantage import GRPOAdvantage + +# Environment variable setup +os.environ.setdefault('TRUST_REMOTE_CODE', '1') +os.environ.setdefault('TWINKLE_SEED', '42') +os.environ.setdefault('TWINKLE_FULL_DETERMINISM', '1') + +# Training configuration +use_ref_model = os.environ.get('TWINKLE_USE_REF_MODEL', '0') != '0' +use_torch_sampler = os.environ.get('TWINKLE_USE_TORCH_SAMPLER', '1') != '0' # Default to TorchSampler for easier testing +num_generations = 8 +kl_beta = 0.0 +max_length = int(os.environ.get('TWINKLE_MAX_LENGTH', '2048')) +model_path = os.environ.get('TWINKLE_MODEL_ID', 'Qwen/Qwen3-0.6B') +debug_mode = os.environ.get('TWINKLE_DEBUG', '0') != '0' +run_mode = os.environ.get('TWINKLE_MODE', 'local') # 'local' or 'ray' + +# Global device meshes (will be set in train()) +actor_device_mesh = None +ref_device_mesh = None + + +def build_template_kwargs(include_model_id: bool = False): + kwargs = {} + if include_model_id: + kwargs['model_id'] = model_path + if max_length > 0: + kwargs['max_length'] = max_length + kwargs['truncation_strategy'] = 'right' + return kwargs + + +def parse_device_config(): + """Parse GPU device configuration from environment.""" + visible_devices_env = os.environ.get('CUDA_VISIBLE_DEVICES') + if visible_devices_env: + visible_devices = [d for d in visible_devices_env.split(',') if d.strip()] + nproc_per_node = len(visible_devices) + else: + # Try to detect available GPUs + try: + import torch + nproc_per_node = torch.cuda.device_count() + except: + nproc_per_node = 1 + + def _parse_ranks_env(name: str): + raw = os.environ.get(name) + if not raw: + return None + ranks = [int(v.strip()) for v in raw.split(',') if v.strip()] + return ranks or None + + actor_ranks = _parse_ranks_env('TWINKLE_ACTOR_RANKS') + ref_ranks = _parse_ranks_env('TWINKLE_REF_RANKS') + + if actor_ranks is None: + actor_size = int(os.environ.get('TWINKLE_ACTOR_SIZE', str(nproc_per_node))) + actor_ranks = list(range(actor_size)) + + if ref_ranks is None and use_ref_model: + ref_size = int(os.environ.get('TWINKLE_REF_SIZE', '1')) + ref_start = (max(actor_ranks) + 1) if actor_ranks else 0 + ref_ranks = list(range(ref_start, ref_start + ref_size)) + + return nproc_per_node, actor_ranks, ref_ranks + + +def create_device_groups(actor_ranks, ref_ranks): + """Create device groups for actor and reference models.""" + device_groups = [ + DeviceGroup( + name='actor', + ranks=actor_ranks, + device_type='cuda', # GPU + ), + ] + + if use_ref_model and ref_ranks: + device_groups.append( + DeviceGroup( + name='ref', + ranks=ref_ranks, + device_type='cuda', + ) + ) + + return device_groups + + +def create_device_meshes(actor_ranks, ref_ranks): + """Create device meshes for actor and reference models.""" + actor_mesh = DeviceMesh( + device_type='cuda', + mesh=np.array([len(actor_ranks)]), + mesh_dim_names=('dp',), + ) + + ref_mesh = DeviceMesh( + device_type='cuda', + mesh=np.array([len(ref_ranks) if ref_ranks is not None else 0]), + mesh_dim_names=('dp',), + ) + + return actor_mesh, ref_mesh + + +def get_eos_token_ids(): + """Get EOS token IDs from tokenizer.""" + try: + from transformers import AutoTokenizer + tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) + eos_ids = tokenizer.eos_token_id + if eos_ids is None: + return [] + elif isinstance(eos_ids, (list, tuple)): + return list(eos_ids) + else: + return [eos_ids] + except Exception as e: + print(f'[WARN] Failed to get EOS token IDs: {e}') + return [] + + +def get_sampling_params(eos_token_ids) -> SamplingParams: + """Create SamplingParams for generation.""" + return SamplingParams( + max_tokens=128, + temperature=1.0, + top_p=0.95, + ) + + +def debug_print_rollout(step, trajectories, ground_truths, rewards=None): + """Debug helper that prints rollout intermediates.""" + if not debug_mode: + return + + # Extract prediction content (last message of the first sample) + try: + pred_msg = trajectories[0]['messages'][-1]['content'] if trajectories else None + except (KeyError, IndexError, TypeError): + pred_msg = None + + # Extract prompt and ground truth (first sample) + try: + prompt_msg = ground_truths[0]['messages'][0]['content'] if ground_truths else None + gt_msg = ground_truths[0]['messages'][-1]['content'] if ground_truths else None + except (KeyError, IndexError, TypeError): + prompt_msg, gt_msg = None, None + + # Print prompt, prediction, and ground truth + print( + f'[DEBUG][step {step}] prompt={prompt_msg[:100] if prompt_msg else None}... | ' + f'pred={pred_msg[:100] if pred_msg else None}... | ' + f'gt={gt_msg[:100] if gt_msg else None}...', + flush=True, + ) + + # Print reward statistics + if rewards is not None and isinstance(rewards, (list, tuple)) and rewards: + rewards_np = np.array(rewards, dtype=np.float32) + print( + f'[DEBUG][step {step}] rewards: n={len(rewards)}, ' + f'min={rewards_np.min():.4f}, mean={rewards_np.mean():.4f}, max={rewards_np.max():.4f}', + flush=True, + ) + + +@remote_class() +class ActorGroup: + """Actor group containing sampler and model for RL training.""" + + def __init__(self, engine_args=None, lora_config=None, adapter_name=None, **kwargs): + global actor_device_mesh + + if use_torch_sampler: + self.sampler = TorchSampler( + model_path, + device_mesh=actor_device_mesh, + ) + else: + if engine_args is None: + raise ValueError("engine_args is required for vLLMSampler.") + self.sampler = vLLMSampler( + model_path, + engine_args=engine_args, + device_mesh=actor_device_mesh, + ) + self.sampler.add_adapter_to_sampler(adapter_name, lora_config) + self.sampler.set_template('Template', adapter_name=adapter_name, **build_template_kwargs(include_model_id=True)) + + self.model = TransformersModel( + model_id=model_path, + remote_group='actor', + device_mesh=actor_device_mesh + ) + self.model.add_adapter_to_model(adapter_name, lora_config) + + self.model.set_loss( + 'GRPOLoss', + epsilon=0.2, + beta=kl_beta, + num_generations=num_generations, + ) + + self.model.set_optimizer('AdamW', lr=1e-6) + self.model.set_lr_scheduler('LinearLR') + self.model.set_template('Template', **build_template_kwargs(include_model_id=False)) + self.model.set_processor('GRPOLossProcessor') + + self.adapter_name = adapter_name + self.lora_config = lora_config + + @remote_function(collect='flatten') + def sample(self, batch, sampling_params: SamplingParams = None): + return self.sampler.sample(batch, sampling_params=sampling_params, adapter_name=self.adapter_name) + + @remote_function() + def forward(self, inputs, **kwargs): + outputs = self.model.forward(inputs=inputs, **kwargs) + return outputs['logits'] + + @remote_function() + def forward_only(self, inputs, **kwargs): + outputs = self.model.forward_only(inputs=inputs, **kwargs) + return outputs['logits'] + + @remote_function() + def forward_backward(self, inputs, trajectories, ref_logits=None, old_logits=None, **kwargs): + if old_logits is None: + old_logits = self.model.forward_only(inputs=inputs, **kwargs)['logits'] + return self.model.forward_backward( + inputs=inputs, + trajectories=trajectories, + ref_logits=ref_logits, + old_logits=old_logits, + **kwargs, + ) + + @remote_function() + def step(self): + return self.model.step() + + @remote_function() + def zero_grad(self): + return self.model.zero_grad() + + @remote_function() + def lr_step(self): + return self.model.lr_step() + +def create_dataset(): + """Create math dataset for RL training.""" + dataset = Dataset(DatasetMeta('ms://modelscope/competition_math')) + dataset.set_template('Template', **build_template_kwargs(include_model_id=True)) + dataset.map('CompetitionMathGRPOProcessor') + dataset.check(batched=True) + return dataset + + +def create_simple_dataset(): + """Create a simple synthetic dataset for testing (no external dependencies).""" + # Simple math-like prompts for testing + # Multiple samples per batch to test advantage computation with num_generations > 1 + samples = [ + { + 'messages': [ + {'role': 'system', 'content': 'You are a helpful math assistant. Respond with only the final answer in the form \\boxed{...}.'}, + {'role': 'user', 'content': 'What is 2 + 2?'}, + {'role': 'assistant', 'content': ''}, + ], + 'user_data': [('solution', '\\boxed{4}')], + }, + { + 'messages': [ + {'role': 'system', 'content': 'You are a helpful math assistant. Respond with only the final answer in the form \\boxed{...}.'}, + {'role': 'user', 'content': 'What is 3 * 5?'}, + {'role': 'assistant', 'content': ''}, + ], + 'user_data': [('solution', '\\boxed{15}')], + }, + { + 'messages': [ + {'role': 'system', 'content': 'You are a helpful math assistant. Respond with only the final answer in the form \\boxed{...}.'}, + {'role': 'user', 'content': 'What is 10 - 3?'}, + {'role': 'assistant', 'content': ''}, + ], + 'user_data': [('solution', '\\boxed{7}')], + }, + { + 'messages': [ + {'role': 'system', 'content': 'You are a helpful math assistant. Respond with only the final answer in the form \\boxed{...}.'}, + {'role': 'user', 'content': 'What is 6 / 2?'}, + {'role': 'assistant', 'content': ''}, + ], + 'user_data': [('solution', '\\boxed{3}')], + }, + ] + return samples + + +def train_local(): + """Local mode training - single process, no Ray.""" + global actor_device_mesh, ref_device_mesh + + import torch + + print(f'[INFO] Starting GRPO training on GPU (LOCAL mode)') + print(f'[INFO] Model: {model_path}') + print(f'[INFO] Use torch sampler: {use_torch_sampler}') + print(f'[INFO] Max length: {max_length}') + + # Setup device + device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + print(f'[INFO] Device: {device}') + + # Create simple device mesh for single GPU + actor_device_mesh = DeviceMesh( + device_type='cuda', + mesh=np.array([1]), + mesh_dim_names=('dp',), + ) + ref_device_mesh = actor_device_mesh + + # Initialize twinkle in local mode + twinkle.initialize(mode='local', nproc_per_node=1) + + # Use simple dataset for testing + samples = create_simple_dataset() + + eos_token_ids = get_eos_token_ids() + + lora_config = LoraConfig( + target_modules=['q_proj', 'k_proj', 'v_proj', 'o_proj'], + r=8, + lora_alpha=16, + ) + + # Create sampler + print(f'[INFO] Creating sampler...', flush=True) + if use_torch_sampler: + from twinkle.sampler import TorchSampler + sampler = TorchSampler( + model_path, + device_mesh=actor_device_mesh, + ) + else: + from twinkle.sampler import vLLMSampler + engine_args = { + 'model': model_path, + 'enable_lora': True, + 'max_loras': 1, + 'max_lora_rank': 64, + 'max_model_len': max_length, + 'gpu_memory_utilization': 0.5, + 'trust_remote_code': True, + } + sampler = vLLMSampler( + model_path, + engine_args=engine_args, + device_mesh=actor_device_mesh, + ) + + sampler.add_adapter_to_sampler('default', lora_config) + sampler.set_template('Template', adapter_name='default', **build_template_kwargs(include_model_id=True)) + + # Create model + print(f'[INFO] Creating model...', flush=True) + model = TransformersModel( + model_id=model_path, + device_mesh=actor_device_mesh, + trust_remote_code=True, + ) + model.add_adapter_to_model('default', lora_config) + + model.set_loss( + 'GRPOLoss', + epsilon=0.2, + beta=kl_beta, + num_generations=num_generations, + ) + + model.set_optimizer('AdamW', lr=1e-6) + model.set_lr_scheduler('LinearLR') + model.set_template('Template', **build_template_kwargs(include_model_id=True)) + model.set_processor('GRPOLossProcessor') + + # Create reward function + reward = MathReward() + + # Create weight loader for syncing + weight_loader = NativeLoader() + + step = 0 + max_steps = int(os.environ.get('TWINKLE_MAX_STEPS', '10')) + + print(f'[INFO] Starting training loop for {max_steps} steps') + + # Training loop + for batch_idx in range(max_steps): + step += 1 + print(f'\n[step {step}] ========== Starting iteration ==========', flush=True) + + # Use batch of 2 samples (num_generations=2 means 2 samples per prompt) + # This allows proper advantage computation + batch_start = (batch_idx * num_generations) % len(samples) + batch_list = [] + for i in range(num_generations): + sample_idx = (batch_start + i) % len(samples) + batch_list.append(samples[sample_idx].copy()) + ground_truths = [b.copy() for b in batch_list] + + sampling_params = get_sampling_params(eos_token_ids) + + # Sample from policy + print(f'[step {step}] Sampling...', flush=True) + sample_response = sampler.sample(batch_list, sampling_params=sampling_params, adapter_name='default') + + # Convert sample response to trajectories + trajectories = [] + for i, seq in enumerate(sample_response.sequences): + # Decode the tokens to get the response text + response_text = sampler.decode_response(seq.tokens, adapter_name='default') + + # Create trajectory with the sampled response + src_batch = batch_list[i % len(batch_list)] + traj = { + 'messages': [ + src_batch['messages'][0], # Keep system message if present + src_batch['messages'][1] if len(src_batch['messages']) > 1 else {'role': 'user', 'content': ''}, + {'role': 'assistant', 'content': response_text}, + ], + 'user_data': src_batch.get('user_data', []), + } + trajectories.append(traj) + + print(f'[step {step}] Sampled {len(trajectories)} trajectories', flush=True) + if debug_mode and trajectories: + print(f'[step {step}] Sample response: {trajectories[0]["messages"][-1]["content"][:200]}...', flush=True) + + # Compute rewards + print(f'[step {step}] Computing rewards...', flush=True) + rewards = reward(trajectories, ground_truths) + print(f'[step {step}] Rewards: {rewards}', flush=True) + + # Compute advantages + advantage_fn = GRPOAdvantage() + # For single sample, use batch normalization + if len(rewards) < num_generations: + advantages = advantage_fn(rewards, num_generations=1, scale='batch') + else: + advantages = advantage_fn(rewards, num_generations=num_generations) + + for trajectory, advantage in zip(trajectories, advantages.tolist()): + trajectory['advantages'] = advantage + + print(f'[step {step}] Advantages: {advantages.tolist()}', flush=True) + + # Debug print + debug_print_rollout(step, trajectories, ground_truths, rewards=rewards) + + # Get old logits (for importance sampling ratio) + print(f'[step {step}] Computing old logits...', flush=True) + old_outputs = model.forward_only(inputs=trajectories) + old_logits = old_outputs['logits'] if isinstance(old_outputs, dict) else old_outputs.logits + + # Forward-backward pass + print(f'[step {step}] Forward-backward...', flush=True) + loss = model.forward_backward( + inputs=trajectories, + trajectories=trajectories, + old_logits=old_logits, + ref_logits=None, + ) + + print(f'[step {step}] loss: {loss}', flush=True) + + # Optimizer step + model.step() + model.zero_grad() + model.lr_step() + + # Sync weights to sampler + print(f'[step {step}] Syncing weights...', flush=True) + weight_loader(model, sampler, 'default') + + if max_steps and step >= max_steps: + break + + print(f'\n[INFO] Training completed after {step} steps') + + +def train_ray(): + """Ray mode training - distributed with Ray.""" + global actor_device_mesh, ref_device_mesh + + nproc_per_node, actor_ranks, ref_ranks = parse_device_config() + + print(f'[INFO] Starting GRPO training on GPU (RAY mode)') + print(f'[INFO] Model: {model_path}') + print(f'[INFO] Actor ranks: {actor_ranks}') + print(f'[INFO] Ref ranks: {ref_ranks}') + print(f'[INFO] Use torch sampler: {use_torch_sampler}') + print(f'[INFO] Use ref model: {use_ref_model}') + + device_groups = create_device_groups(actor_ranks, ref_ranks) + actor_device_mesh, ref_device_mesh = create_device_meshes(actor_ranks, ref_ranks) + + # Initialize twinkle with ray mode + twinkle.initialize(mode='ray', groups=device_groups, nproc_per_node=nproc_per_node) + + # Use simple dataset for testing + samples = create_simple_dataset() + + eos_token_ids = get_eos_token_ids() + + engine_args = { + 'model': model_path, + 'enable_lora': True, + 'max_loras': 1, + 'max_lora_rank': 64, + 'max_model_len': max_length, + 'gpu_memory_utilization': float(os.environ.get('TWINKLE_VLLM_GPU_MEMORY_UTILIZATION', '0.9')), + 'trust_remote_code': True, + } + + lora_config = LoraConfig( + target_modules=['q_proj', 'k_proj', 'v_proj', 'o_proj'], + r=8, + lora_alpha=16, + ) + + actor_group = ActorGroup( + None if use_torch_sampler else engine_args, + remote_group='actor', + lora_config=lora_config, + adapter_name='default', + ) + + ref_model = None + if use_ref_model: + ref_model = TransformersModel( + model_id=model_path, + remote_group='ref', + device_mesh=ref_device_mesh + ) + ref_model.set_processor('InputProcessor') + ref_model.set_template('Template', **build_template_kwargs()) + + reward = MathReward() + + print('Device placement:', get_device_placement()) + + step = 0 + max_steps = int(os.environ.get('TWINKLE_MAX_STEPS', '5')) + + # Training loop + for batch_idx in range(max_steps): + step += 1 + print(f'[step {step}] Starting iteration', flush=True) + + # Use samples cyclically + batch = samples[batch_idx % len(samples)] + if isinstance(batch, dict): + batch_list = [batch] + else: + batch_list = list(batch) + ground_truths = batch_list.copy() + + sampling_params = get_sampling_params(eos_token_ids) + + # Sample from policy + print(f'[step {step}] Sampling...', flush=True) + trajectories = actor_group.sample(batch_list, sampling_params) + if callable(trajectories): + trajectories = trajectories() + print(f'[step {step}] Sampled {len(trajectories)} trajectories', flush=True) + + # Get reference logits if using ref model + ref_logits = None + if use_ref_model and ref_model is not None: + ref_outputs = ref_model.forward_only(inputs=trajectories) + if callable(ref_outputs) and getattr(ref_outputs, '_is_lazy_collect', False): + ref_outputs = ref_outputs() + if isinstance(ref_outputs, list): + ref_logits = [o['logits'] if isinstance(o, dict) else o.logits for o in ref_outputs] + else: + ref_logits = ref_outputs['logits'] if isinstance(ref_outputs, dict) else ref_outputs.logits + + # Compute rewards + print(f'[step {step}] Computing rewards...', flush=True) + rewards = reward(trajectories, ground_truths) + if callable(rewards): + rewards = rewards() + print(f'[step {step}] Rewards: {rewards}', flush=True) + + # Compute advantages + advantage_fn = GRPOAdvantage() + advantages = advantage_fn(rewards, num_generations=num_generations) + for trajectory, advantage in zip(trajectories, advantages.tolist()): + trajectory['advantages'] = advantage + + # Debug print + debug_print_rollout(step, trajectories, ground_truths, rewards=rewards) + + # Forward-backward pass + print(f'[step {step}] Forward-backward...', flush=True) + loss = actor_group.forward_backward(trajectories, trajectories, ref_logits) + if callable(loss): + loss = loss() + + print(f'[step {step}] loss: {loss}', flush=True) + + # Optimizer step + actor_group.step() + actor_group.zero_grad() + actor_group.lr_step() + + if max_steps and step >= max_steps: + break + + print(f'[INFO] Training completed after {step} steps') + + +def train(): + """Main training entry point.""" + if run_mode == 'local': + train_local() + else: + train_ray() + + +if __name__ == '__main__': + train() diff --git a/cookbook/legacy/grpo/lora_npu.py b/cookbook/legacy/grpo/lora_npu.py new file mode 100644 index 00000000..d3cf520a --- /dev/null +++ b/cookbook/legacy/grpo/lora_npu.py @@ -0,0 +1,441 @@ +import numpy as np +from peft import LoraConfig +import os +import twinkle +from twinkle import DeviceMesh, get_device_placement +from twinkle.dataloader import DataLoader +from twinkle.dataset import Dataset, DatasetMeta +from twinkle.infra import DeviceGroup, remote_function, remote_class +from twinkle.model import TransformersModel +from twinkle.reward import MathReward +from twinkle.sampler import vLLMSampler, TorchSampler +from twinkle.data_format.sampling import SamplingParams, SampleResponse +from twinkle.weight_loader import NativeLoader +from twinkle.advantage import compute_advantages + +# Environment variable setup +os.environ.setdefault('TRUST_REMOTE_CODE', '1') +os.environ.setdefault('TWINKLE_SEED', '42') +os.environ.setdefault('TWINKLE_FULL_DETERMINISM', '1') +os.environ.setdefault('RAY_TMPDIR', os.path.expanduser('~/tmp/ray')) + +# Training configuration +use_ref_model = os.environ.get('TWINKLE_USE_REF_MODEL', '1') != '0' +use_torch_sampler = os.environ.get('TWINKLE_USE_TORCH_SAMPLER', '0') != '0' +num_generations = 2 +kl_beta = 0.0 +max_length = int(os.environ.get('TWINKLE_MAX_LENGTH', '4096')) +model_path = os.environ.get('TWINKLE_MODEL_ID', 'Qwen/Qwen3-0.6B') +debug_mode = os.environ.get('TWINKLE_DEBUG', '0') != '0' + + +def build_template_kwargs(include_model_id: bool = False): + kwargs = {} + if include_model_id: + kwargs['model_id'] = model_path + if max_length > 0: + kwargs['max_length'] = max_length + kwargs['truncation_strategy'] = 'right' + return kwargs + + +def parse_device_config(): + visible_devices_env = os.environ.get('ASCEND_RT_VISIBLE_DEVICES') + if visible_devices_env: + visible_devices = [d for d in visible_devices_env.split(',') if d.strip()] + nproc_per_node = len(visible_devices) + else: + nproc_per_node = 8 + + def _parse_ranks_env(name: str): + raw = os.environ.get(name) + if not raw: + return None + ranks = [int(v.strip()) for v in raw.split(',') if v.strip()] + return ranks or None + + actor_ranks = _parse_ranks_env('TWINKLE_ACTOR_RANKS') + ref_ranks = _parse_ranks_env('TWINKLE_REF_RANKS') + + if actor_ranks is None: + actor_size = int(os.environ.get('TWINKLE_ACTOR_SIZE', '6')) + actor_ranks = list(range(actor_size)) + + if ref_ranks is None and use_ref_model: + ref_size = int(os.environ.get('TWINKLE_REF_SIZE', '2')) + ref_start = (max(actor_ranks) + 1) if actor_ranks else 0 + ref_ranks = list(range(ref_start, ref_start + ref_size)) + + return nproc_per_node, actor_ranks, ref_ranks + + +def create_device_groups(actor_ranks, ref_ranks): + device_groups = [ + DeviceGroup( + name='actor', + ranks=actor_ranks, + device_type='npu', + ), + ] + + if use_ref_model and ref_ranks: + device_groups.append( + DeviceGroup( + name='ref', + ranks=ref_ranks, + device_type='npu', + ) + ) + + return device_groups + + +def create_device_meshes(actor_ranks, ref_ranks): + actor_device_mesh = DeviceMesh( + device_type='npu', + mesh=np.array([len(actor_ranks)]), + mesh_dim_names=('dp',), + ) + + ref_device_mesh = DeviceMesh( + device_type='npu', + mesh=np.array([len(ref_ranks) if ref_ranks is not None else 0]), + mesh_dim_names=('dp',), + ) + + return actor_device_mesh, ref_device_mesh + + +def get_eos_token_ids(): + try: + from transformers import AutoTokenizer + tokenizer = AutoTokenizer.from_pretrained(model_path) + eos_ids = tokenizer.eos_token_id + if eos_ids is None: + return [] + elif isinstance(eos_ids, (list, tuple)): + return list(eos_ids) + else: + return [eos_ids] + except Exception: + return [] + + +def get_sampling_params(eos_token_ids) -> SamplingParams: + """Create SamplingParams for generation.""" + return SamplingParams( + max_tokens=128, + temperature=1, + top_p=0.95, + ) + + +def build_trajectories_from_sample_response(sample_response: SampleResponse, batch_list, tokenizer): + """Convert sampler output into GRPO trajectories.""" + if not sample_response or not getattr(sample_response, 'sequences', None): + return [] + if not batch_list: + return [] + + trajectories = [] + for i, seq in enumerate(sample_response.sequences): + src_batch = batch_list[i % len(batch_list)] + src_messages = [dict(msg) for msg in src_batch.get('messages', [])] + if src_messages and src_messages[-1].get('role') == 'assistant': + # Remove reference answer and append sampled assistant reply. + src_messages = src_messages[:-1] + + response_text = tokenizer.decode(seq.tokens, skip_special_tokens=True) if tokenizer is not None else '' + trajectories.append({ + 'messages': src_messages + [{'role': 'assistant', 'content': response_text}], + 'user_data': list(src_batch.get('user_data', [])), + }) + return trajectories + + +def debug_print_rollout(step, trajectories, ground_truths, rewards=None): + """Debug helper that prints rollout intermediates (sampling, rewards, etc.). + + Set the TWINKLE_DEBUG environment variable to '1' to enable this output. + The output covers: + 1. Prompt, model prediction, and ground truth for the first sample + 2. Reward statistics (min, mean, max) + + Args: + step: Current training step + trajectories: List of sampled trajectories + ground_truths: List of ground truth records + rewards: Optional list of reward values (prints stats when provided) + + Environment Variables: + TWINKLE_DEBUG: Set to '1' to enable debug logging + + Example: + # Enable debug mode + TWINKLE_DEBUG=1 python lora_npu.py + """ + if not debug_mode: + return + + # Extract prediction content (last message of the first sample) + try: + pred_msg = trajectories[0]['messages'][-1]['content'] if trajectories else None + except (KeyError, IndexError, TypeError): + pred_msg = None + + # Extract prompt and ground truth (first sample) + try: + prompt_msg = ground_truths[0]['messages'][0]['content'] if ground_truths else None + gt_msg = ground_truths[0]['messages'][-1]['content'] if ground_truths else None + except (KeyError, IndexError, TypeError): + prompt_msg, gt_msg = None, None + + # Print prompt, prediction, and ground truth + print( + f'[DEBUG][step {step}] prompt={prompt_msg} | pred={pred_msg} | gt={gt_msg}', + flush=True, + ) + + # Print reward statistics + if rewards is not None and isinstance(rewards, (list, tuple)) and rewards: + rewards_np = np.array(rewards, dtype=np.float32) + print( + f'[DEBUG][step {step}] rewards: n={len(rewards)}, ' + f'min={rewards_np.min():.4f}, mean={rewards_np.mean():.4f}, max={rewards_np.max():.4f}', + flush=True, + ) + + +def _collect_sample_responses(results): + """Custom collect function to merge multiple SampleResponse objects.""" + if not results: + return SampleResponse(sequences=[]) + if len(results) == 1: + return results[0] + all_sequences = [] + for resp in results: + if resp is not None and hasattr(resp, 'sequences'): + all_sequences.extend(resp.sequences) + return SampleResponse(sequences=all_sequences) + + +@remote_class() +class ActorGroup: + + def __init__(self, engine_args=None, lora_config=None, adapter_name=None, **kwargs): + if use_torch_sampler: + self.sampler = TorchSampler( + model_path, + device_mesh=actor_device_mesh, + ) + else: + if engine_args is None: + raise ValueError("engine_args is required for vLLMSampler.") + self.sampler = vLLMSampler( + model_path, + engine_args=engine_args, + device_mesh=actor_device_mesh, + ) + self.sampler.add_adapter_to_sampler(adapter_name, lora_config) + # Fix: use 'Template' instead of 'Qwen3Template' - Qwen3Template was never exported in twinkle.template + self.sampler.set_template('Template', adapter_name=adapter_name, **build_template_kwargs(include_model_id=True)) + + self.model = TransformersModel( + model_id=model_path, + remote_group='actor', + device_mesh=actor_device_mesh + ) + self.model.add_adapter_to_model(adapter_name, lora_config) + + self.model.set_loss( + 'GRPOLoss', + epsilon=0.2, + beta=kl_beta, + num_generations=num_generations, + ) + + self.model.set_optimizer('AdamW', lr=1e-6) + self.model.set_lr_scheduler('LinearLR') + self.model.set_template('Template', **build_template_kwargs(include_model_id=False)) + self.model.set_processor('GRPOLossProcessor') + + self.weight_loader = NativeLoader() + self.adapter_name = adapter_name + self.lora_config = lora_config + + @remote_function(collect=_collect_sample_responses) + def sample(self, batch, sampling_params: SamplingParams = None): + return self.sampler.sample(batch, sampling_params=sampling_params, adapter_name=self.adapter_name) + + @remote_function() + def forward(self, inputs, **kwargs): + outputs = self.model.forward(inputs=inputs, **kwargs) + return outputs['logits'] + + @remote_function() + def forward_only(self, inputs, **kwargs): + outputs = self.model.forward_only(inputs=inputs, **kwargs) + return outputs['logits'] + + @remote_function() + def forward_backward(self, inputs, trajectories, ref_logits=None, old_logits=None, **kwargs): + if old_logits is None: + old_logits = self.model.forward_only(inputs=inputs, **kwargs)['logits'] + return self.model.forward_backward( + inputs=inputs, + trajectories=trajectories, + ref_logits=ref_logits, + old_logits=old_logits, + **kwargs, + ) + + @remote_function() + def step(self): + return self.model.step() + + @remote_function() + def zero_grad(self): + return self.model.zero_grad() + + @remote_function() + def lr_step(self): + return self.model.lr_step() + + @remote_function() + def sync_weights(self): + self.weight_loader(self.model, self.sampler, self.adapter_name) + + +def create_dataset(): + dataset = Dataset(DatasetMeta('ms://modelscope/competition_math')) + dataset.set_template('Template', **build_template_kwargs(include_model_id=True)) + dataset.map('CompetitionMathGRPOProcessor') + dataset.check(batched=True) + return dataset + + +def train(): + raise NotImplementedError("Not implemented") + nproc_per_node, actor_ranks, ref_ranks = parse_device_config() + + device_groups = create_device_groups(actor_ranks, ref_ranks) + global actor_device_mesh, ref_device_mesh + actor_device_mesh, ref_device_mesh = create_device_meshes(actor_ranks, ref_ranks) + + twinkle.initialize(mode='ray', groups=device_groups, nproc_per_node=nproc_per_node) + + dataloader = DataLoader( + create_dataset, + remote_group='actor', + device_mesh=actor_device_mesh + ) + + eos_token_ids = get_eos_token_ids() + try: + from transformers import AutoTokenizer + tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) + except Exception: + tokenizer = None + + engine_args = { + 'model': model_path, + 'enable_lora': True, + 'max_loras': 1, + 'max_lora_rank': 64, + 'max_model_len': max_length, + 'gpu_memory_utilization': float(os.environ.get('TWINKLE_VLLM_GPU_MEMORY_UTILIZATION', '0.9')), + } + + lora_config = LoraConfig( + target_modules=['q_proj', 'k_proj', 'v_proj', 'o_proj'] + ) + + actor_group = ActorGroup( + None if use_torch_sampler else engine_args, + remote_group='actor', + lora_config=lora_config, + adapter_name='default', + ) + + ref_model = None + if use_ref_model: + ref_model = TransformersModel( + model_id=model_path, + remote_group='ref', + device_mesh=ref_device_mesh + ) + ref_model.set_processor('InputProcessor') + ref_model.set_template('Template', **build_template_kwargs()) + + reward = MathReward() + + print('Device placement:', get_device_placement()) + + step = 0 + max_steps = int(os.environ.get('TWINKLE_MAX_STEPS', '20')) + + for batch in dataloader: + step += 1 + print(f'[step {step}] batch ready', flush=True) + + if isinstance(batch, dict): + batch_list = [batch] + else: + batch_list = list(batch) + sampling_params = get_sampling_params(eos_token_ids) + + sample_response = actor_group.sample(batch_list, sampling_params) + if callable(sample_response): + sample_response = sample_response() + trajectories = build_trajectories_from_sample_response(sample_response, batch_list, tokenizer) + if not trajectories: + print(f'[step {step}] empty sampled trajectories, skip.', flush=True) + continue + + # Expand ground truths to align with sampled trajectory count. + ground_truths = [batch_list[i % len(batch_list)] for i in range(len(trajectories))] + + ref_logits = None + if use_ref_model: + ref_outputs = ref_model.forward_only(inputs=trajectories) + if callable(ref_outputs) and getattr(ref_outputs, '_is_lazy_collect', False): + ref_outputs = ref_outputs() + if isinstance(ref_outputs, list): + ref_logits = [o['logits'] if isinstance(o, dict) else o.logits for o in ref_outputs] + else: + ref_logits = ref_outputs['logits'] if isinstance(ref_outputs, dict) else ref_outputs.logits + + rewards = reward(trajectories, ground_truths) + if callable(rewards): + rewards = rewards() + + effective_num_generations = num_generations if len(rewards) % num_generations == 0 else 1 + scale = 'group' if effective_num_generations > 1 else 'batch' + advantages = compute_advantages( + rewards, + num_generations=effective_num_generations, + scale=scale, + ) + for trajectory, advantage in zip(trajectories, advantages.tolist()): + trajectory['advantages'] = float(advantage) + + # Debug: print reward statistics (enable via TWINKLE_DEBUG=1) + debug_print_rollout(step, trajectories, ground_truths, rewards=rewards) + + loss = actor_group.forward_backward(trajectories, trajectories, ref_logits) + if callable(loss): + loss = loss() + + print(f'[step {step}] loss: {loss}', flush=True) + + actor_group.step() + actor_group.zero_grad() + actor_group.lr_step() + + if max_steps and step >= max_steps: + break + + + +if __name__ == '__main__': + train() diff --git a/cookbook/legacy/grpo/lora_pr.py b/cookbook/legacy/grpo/lora_pr.py new file mode 100644 index 00000000..3b921afa --- /dev/null +++ b/cookbook/legacy/grpo/lora_pr.py @@ -0,0 +1,139 @@ +import gc +import time +from typing import List, Tuple +from peft import LoraConfig +import twinkle +from twinkle import DeviceMesh, DeviceGroup, get_device_placement, get_logger +from twinkle.advantage import GRPOAdvantage +from twinkle.checkpoint_engine import CheckpointEngineManager +from twinkle.data_format import SamplingParams, Trajectory, InputFeature +from twinkle.dataloader import DataLoader +from twinkle.dataset import Dataset, DatasetMeta +from twinkle.metric import CompletionRewardMetric +from twinkle.model import TransformersModel +from twinkle.processor import InputProcessor +from twinkle.sampler import vLLMSampler +from twinkle.template import Template +from twinkle import torch_util + +logger = get_logger() + + +def create_countdown_dataset(): + from twinkle.preprocessor import CountdownProcessor + dataset = Dataset(DatasetMeta("ms://zouxuhong/Countdown-Tasks-3to4", data_slice=range(50000))) + dataset.set_template("Template", model_id='ms://Qwen/Qwen2.5-3B-Instruct', max_length=8192) + dataset.map(CountdownProcessor()) + dataset.encode() + return dataset + + +def compute_rewards(trajectories: List[Trajectory]) -> Tuple[List[float], List[float], List[float]]: + from twinkle.reward import CountDownAccuracy, FormatReward + format_rewards = FormatReward()(trajectories, []) + accuracy_rewards = CountDownAccuracy()(trajectories, []) + total_rewards = [a+b for a, b in zip(accuracy_rewards, format_rewards)] + return total_rewards, format_rewards, accuracy_rewards + +def main(): + device_groups = [ + DeviceGroup(name='model', ranks=4, device_type='GPU', gpus_per_worker=1), + DeviceGroup(name='sampler', ranks=4, device_type='GPU', gpus_per_worker=1), + ] + model_mesh = DeviceMesh.from_sizes(dp_size=4) + sampler_mesh = DeviceMesh.from_sizes(dp_size=4) + twinkle.initialize(mode='ray', nproc_per_node=8, groups=device_groups, lazy_collect=False) + logger.info(get_device_placement()) + lora_config = LoraConfig(target_modules="all-linear", r=8, lora_alpha=32, lora_dropout=0.05) + model = TransformersModel(model_id='ms://Qwen/Qwen2.5-3B-Instruct', device_mesh=model_mesh, remote_group='model') + model.add_adapter_to_model('default', lora_config, gradient_accumulation_steps=4,) + sampler = vLLMSampler( + model_id='ms://Qwen/Qwen2.5-3B-Instruct', + engine_args={ + 'load_format': 'dummy', + 'gpu_memory_utilization': 0.7, + 'max_model_len': 2048, + 'enable_sleep_mode': False, + 'enable_lora': True, + }, + device_mesh=sampler_mesh, + remote_group='sampler', + ) + sampler.set_template(Template, model_id='ms://Qwen/Qwen2.5-3B-Instruct') + + ckpt_manager = CheckpointEngineManager(model=model, sampler=sampler) + dataloader = DataLoader( + dataset=create_countdown_dataset, batch_size=4, min_batch_size=4, + device_mesh=model_mesh, remote_group='model', num_workers=0, + ) + advantage_fn = GRPOAdvantage() + metrics = CompletionRewardMetric() + + sampling_params = SamplingParams(max_tokens=1024, temperature=1.0, top_p=0.95) + step = 0 + model.set_optimizer('AdamW', lr=1e-5) + model.set_lr_scheduler(scheduler_cls='CosineWarmupScheduler', num_warmup_steps=500, num_training_steps=2000) + model.set_loss('GRPOLoss', epsilon=0.2, beta=0.0) + model.set_processor(InputProcessor) + model.set_template('Template', model_id='ms://Qwen/Qwen2.5-3B-Instruct') + + for batch in dataloader: + if step >= 2000: + break + + metrics.reset() + prompts = batch if isinstance(batch, list) else [batch] + if step % 1 == 0: + ckpt_manager.sync_weights(adapter_name='default') + sample_response = sampler.sample(prompts, sampling_params, num_samples=8) + trajectories: List[Trajectory] = [] + input_features: List[InputFeature] = [] + old_logps_list: List[List[float]] = [] + completion_lengths: List[int] = [] + + for sequence in sample_response.sequences: + input_features.append(sequence.new_input_feature) + trajectories.append(sequence.new_input_feature) + old_logps_list.append(sequence.logprobs) + completion_lengths.append(len(sequence.tokens)) + + if not trajectories: + logger.warning(f"Step {step}: No valid samples, skipping") + step += 1 + continue + + total_rewards, format_rewards, accuracy_rewards = compute_rewards(trajectories) + metrics.accumulate(None, None, + completion_lengths=completion_lengths, + rewards={ + 'total': total_rewards, + 'format': format_rewards, + 'accuracy': accuracy_rewards, + }) + + advantages = advantage_fn(total_rewards, num_generations=8, scale='group').tolist() + frac_zero_std = 1.0 if all(abs(a) < 1e-8 for a in advantages) else 0.0 + if frac_zero_std == 1.0: + logger.info(f"Step {step}: All advantages are zero, skipping training") + step += 1 + continue + + model.forward_backward( + inputs=input_features, + advantages=advantages, + old_logps=old_logps_list, + ) + model.clip_grad_and_step() + gc.collect() + torch_util.empty_cache() + log_dict = metrics.calculate() + log_dict.update(model.calculate_metric()) + log_dict['train/frac_reward_zero_std'] = frac_zero_std + logger.info(log_dict) + step += 1 + + model.save('grpo-countdown-checkpoint') + + +if __name__ == '__main__': + main() diff --git a/cookbook/legacy/grpo/megatron_lora.py b/cookbook/legacy/grpo/megatron_lora.py new file mode 100644 index 00000000..951f0a1b --- /dev/null +++ b/cookbook/legacy/grpo/megatron_lora.py @@ -0,0 +1,378 @@ +""" +GRPO Training Cookbook - MegatronModel with LoRA (Standalone Mode) + +Tests MegatronModel RL training with the same Countdown Game task as lora.py. + +Usage: + python cookbook/grpo/megatron_lora.py +""" + +import os +import re +import time +import numpy as np +from typing import List, Dict, Tuple +from dataclasses import dataclass, field + +# os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3" + +from peft import LoraConfig + +import twinkle +from twinkle import DeviceMesh, DeviceGroup, get_device_placement, get_logger +from twinkle.data_format import Trajectory, Message, InputFeature +from twinkle.dataloader import DataLoader +from twinkle.dataset import Dataset, DatasetMeta +from twinkle.model import MegatronModel +from twinkle.processor import InputProcessor +from twinkle.sampler import vLLMSampler +from twinkle.data_format.sampling import SamplingParams, SampleResponse +from twinkle.advantage import GRPOAdvantage +from twinkle.checkpoint_engine import CheckpointEngineManager +from twinkle.template import Template + +from transformers import AutoTokenizer +from twinkle.hub import HubOperation + +logger = get_logger() + +# ========== Configuration ========== +MODEL_ID = os.environ.get('MODEL_ID', 'ms://Qwen/Qwen2.5-3B-Instruct') +MODEL_GPUS = 4 +SAMPLER_GPUS = 2 +NUM_GPUS = MODEL_GPUS + SAMPLER_GPUS +NUM_GENERATIONS = 4 +MAX_NEW_TOKENS = 1024 +LEARNING_RATE = 1e-5 +GRPO_EPSILON = 0.2 +GRPO_BETA = 0.0 +MAX_STEPS = 20 +BATCH_SIZE = 2 +GRADIENT_ACCUMULATION_STEPS = 1 +TEMPERATURE = 1.0 +WEIGHT_SYNC_INTERVAL = 1 +ADAPTER_NAME = 'default' + + +# ========== Metrics ========== +@dataclass +class TrainingMetrics: + generate_time: float = 0.0 + weight_sync_time: float = 0.0 + rewards: List[float] = field(default_factory=list) + format_rewards: List[float] = field(default_factory=list) + accuracy_rewards: List[float] = field(default_factory=list) + completion_lengths: List[int] = field(default_factory=list) + loss: float = 0.0 + grad_norm: float = 0.0 + + def reset(self): + self.generate_time = 0.0 + self.weight_sync_time = 0.0 + self.rewards = [] + self.format_rewards = [] + self.accuracy_rewards = [] + self.completion_lengths = [] + self.loss = 0.0 + self.grad_norm = 0.0 + + +# ========== Rewards ========== +def format_reward(completion: str) -> float: + has_think = bool(re.search(r".*?", completion, re.DOTALL)) + has_answer = bool(re.search(r".*?", completion, re.DOTALL)) + return 1.0 if (has_think and has_answer) else 0.0 + + +def countdown_accuracy_reward(completion: str, target: int, nums: List[int]) -> float: + try: + match = re.search(r'(.*?)<\/answer>', completion) + if match is None: + return 0.0 + equation = match.group(1).strip() + if '=' in equation: + equation = equation.split('=')[0] + used_numbers = [int(n) for n in re.findall(r'\d+', equation)] + if sorted(used_numbers) != sorted(nums): + return 0.0 + if not re.match(r'^[\d+\-*/().\s]+$', equation): + return 0.0 + result = eval(equation, {'__builtins__': None}, {}) + return 1.0 if abs(float(result) - float(target)) < 1e-5 else 0.0 + except Exception: + return 0.0 + + +# ========== Dataset ========== +def create_countdown_dataset(): + from twinkle.preprocessor import CountdownProcessor + dataset = Dataset(DatasetMeta("ms://zouxuhong/Countdown-Tasks-3to4", data_slice=range(50000))) + dataset.set_template("Template", model_id=MODEL_ID, max_length=8192) + dataset.map(CountdownProcessor()) + return dataset + + +# ========== Sample Processing ========== +def process_samples( + prompts: List[Trajectory], + sample_response: SampleResponse, + tokenizer, + num_generations: int, + template: Template, +) -> Tuple[List[Trajectory], List[InputFeature], List[List[float]], List[int]]: + """Process sampled responses — same logic as lora.py.""" + trajectories: List[Trajectory] = [] + input_features: List[InputFeature] = [] + old_logps_list: List[List[float]] = [] + completion_lengths: List[int] = [] + + sequences = sample_response.sequences + prompt_ids_cache: Dict[int, List[int]] = {} + + for i, prompt in enumerate(prompts): + if i not in prompt_ids_cache: + prompt_messages = [ + dict(msg) for msg in prompt.get('messages', []) + if not (msg.get('role') == 'assistant' + and not msg.get('content', '').strip()) + ] + encoded = tokenizer.apply_chat_template( + prompt_messages, tokenize=True, add_generation_prompt=True, + ) + if hasattr(encoded, 'tolist'): + encoded = encoded.tolist() + prompt_ids_cache[i] = list(encoded) + + prompt_ids = prompt_ids_cache[i] + + for j in range(num_generations): + seq_idx = i * num_generations + j + if seq_idx >= len(sequences): + break + + seq = sequences[seq_idx] + response_tokens = list(seq.tokens) + response_logprobs = seq.logprobs if seq.logprobs else [] + response_text = tokenizer.decode(response_tokens, skip_special_tokens=True) + + messages = [ + msg for msg in prompt.get('messages', []) + if not (msg.get('role') == 'assistant' + and not msg.get('content', '').strip()) + ] + messages.append(Message(role='assistant', content=response_text)) + trajectories.append(Trajectory( + messages=messages, + user_data=prompt.get('user_data', []), + )) + + input_ids = prompt_ids + response_tokens + labels = [-100] * len(prompt_ids) + response_tokens + input_feature = InputFeature( + input_ids=np.array(input_ids), + labels=np.array(labels), + ) + input_feature = template._invoke_post_pipeline([input_feature]) + input_features.append(input_feature[0]) + + old_logps_list.append(response_logprobs) + completion_lengths.append(len(response_tokens)) + + return trajectories, input_features, old_logps_list, completion_lengths + + +def compute_rewards(trajectories: List[Trajectory]) -> Tuple[List[float], List[float], List[float]]: + total_rewards, format_rewards, accuracy_rewards = [], [], [] + for traj in trajectories: + messages = traj.get('messages', []) + completion = "" + for msg in reversed(messages): + if msg.get('role') == 'assistant': + completion = msg.get('content', '') + break + user_data = traj.get('user_data', [{}]) + data = user_data[0] if isinstance(user_data, list) and user_data else {} + target = data.get('target', 0) + nums = data.get('nums', []) + fmt_reward = format_reward(completion) + acc_reward = countdown_accuracy_reward(completion, target, nums) + format_rewards.append(fmt_reward) + accuracy_rewards.append(acc_reward) + total_rewards.append(fmt_reward + acc_reward) + return total_rewards, format_rewards, accuracy_rewards + + +def wait_result(result): + if hasattr(result, '_is_lazy_collect') and result._is_lazy_collect: + return result() + if callable(result) and hasattr(result, '_get_result'): + return result() + return result + + +# ========== Main ========== +def main(): + # ── Device setup ────────────────────────────────────────────────── + device_groups = [ + DeviceGroup(name='model', ranks=list(range(MODEL_GPUS)), + device_type='GPU', gpus_per_worker=1), + DeviceGroup(name='sampler', ranks=list(range(MODEL_GPUS, NUM_GPUS)), + device_type='GPU', gpus_per_worker=1), + ] + # MegatronModel: DP=2, TP=1, PP=1 for 2 GPUs + model_mesh = DeviceMesh.from_sizes( + dp_size=MODEL_GPUS, tp_size=2, pp_size=2, + ) + sampler_mesh = DeviceMesh.from_sizes(world_size=SAMPLER_GPUS, dp_size=SAMPLER_GPUS) + + twinkle.initialize(mode='ray', nproc_per_node=NUM_GPUS, groups=device_groups) + logger.info(get_device_placement()) + + lora_config = LoraConfig( + target_modules="all-linear", r=8, lora_alpha=32, lora_dropout=0.05, + ) + + model = MegatronModel( + model_id=MODEL_ID, + device_mesh=model_mesh, + remote_group='model', + mixed_precision='bf16', + recompute_granularity='selective', + recompute_num_layers=None, + ) + model.add_adapter_to_model( + ADAPTER_NAME, lora_config, + gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS, + ) + # MegatronModel uses Megatron's distributed optimizer and scheduler + model.set_optimizer('default', lr=LEARNING_RATE, adapter_name=ADAPTER_NAME) + model.set_lr_scheduler('default', lr_decay_steps=MAX_STEPS, max_lr=LEARNING_RATE, + adapter_name=ADAPTER_NAME) + model.set_loss('GRPOLoss', adapter_name=ADAPTER_NAME, + epsilon=GRPO_EPSILON, beta=GRPO_BETA) + model.set_processor(InputProcessor, adapter_name=ADAPTER_NAME) + model.set_template('Template', model_id=MODEL_ID, adapter_name=ADAPTER_NAME) + + sampler = vLLMSampler( + model_id=MODEL_ID, + engine_args={ + 'load_format': 'dummy', + 'gpu_memory_utilization': 0.3, + 'max_model_len': 2048, + 'enforce_eager': True, + 'enable_sleep_mode': False, + 'enable_lora': True, + }, + device_mesh=sampler_mesh, + remote_group='sampler', + ) + sampler.set_template(Template, model_id=MODEL_ID) + + ckpt_manager = CheckpointEngineManager(model=model, sampler=sampler) + dataset = create_countdown_dataset() + dataloader = DataLoader( + dataset=dataset, batch_size=BATCH_SIZE, + device_mesh=model_mesh, remote_group='model', num_workers=0, + ) + model_path = HubOperation.download_model(MODEL_ID) + tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) + advantage_fn = GRPOAdvantage() + metrics = TrainingMetrics() + + sampling_params = SamplingParams( + max_tokens=MAX_NEW_TOKENS, temperature=TEMPERATURE, top_p=0.95, + ) + step = 0 + + for batch in dataloader: + if step >= MAX_STEPS: + break + + metrics.reset() + + if callable(batch): + batch = batch() + prompts = batch if isinstance(batch, list) else [batch] + + # ========== 1. Weight Sync ========== + if step % WEIGHT_SYNC_INTERVAL == 0: + sync_start = time.perf_counter() + ckpt_manager.sync_weights(adapter_name=ADAPTER_NAME) + metrics.weight_sync_time = time.perf_counter() - sync_start + + # ========== 2. Generate ========== + gen_start = time.perf_counter() + sample_response = wait_result( + sampler.sample(prompts, sampling_params, num_samples=NUM_GENERATIONS) + ) + metrics.generate_time = time.perf_counter() - gen_start + + # ========== 3. Process samples ========== + template = sampler._get_template(adapter_name=ADAPTER_NAME) + trajectories, input_features, old_logps_list, completion_lengths = \ + process_samples(prompts, sample_response, tokenizer, NUM_GENERATIONS, template) + + if not trajectories: + logger.warning(f"Step {step}: No valid samples, skipping") + step += 1 + continue + + metrics.completion_lengths = completion_lengths + + # ========== 4. Compute rewards ========== + total_rewards, format_rewards, accuracy_rewards = compute_rewards(trajectories) + metrics.rewards = total_rewards + metrics.format_rewards = format_rewards + metrics.accuracy_rewards = accuracy_rewards + + # ========== 5. Compute advantages ========== + advantages = advantage_fn(total_rewards, num_generations=NUM_GENERATIONS, scale='group') + advantages = advantages.tolist() + + frac_zero_std = 1.0 if all(abs(a) < 1e-8 for a in advantages) else 0.0 + if frac_zero_std == 1.0: + logger.info(f"Step {step}: All advantages are zero, skipping training") + step += 1 + continue + + # ========== 6. Training step ========== + # MegatronModel.forward_backward returns float loss directly + loss = wait_result(model.forward_backward( + inputs=input_features, + adapter_name=ADAPTER_NAME, + advantages=advantages, + old_logps=old_logps_list, + )) + + # MegatronModel: step/zero_grad/lr_step separately + # step() stores grad_norm internally + wait_result(model.step(adapter_name=ADAPTER_NAME)) + wait_result(model.zero_grad(adapter_name=ADAPTER_NAME)) + wait_result(model.lr_step(adapter_name=ADAPTER_NAME)) + + metrics.loss = float(loss) if loss is not None else 0.0 + # grad_norm is not directly returned; it's stored in optimizer_config + # For now, log loss only; grad_norm can be retrieved if needed + metrics.grad_norm = 0.0 + + import gc + from twinkle.utils.framework import Torch + gc.collect() + Torch.empty_cache() + + # ========== 7. Log ========== + logger.info( + f"Step {step}: loss={metrics.loss:.6f}, grad_norm={metrics.grad_norm:.7f}, " + f"reward={sum(metrics.rewards) / max(len(metrics.rewards), 1):.4f}, " + f"format={sum(metrics.format_rewards) / max(len(metrics.format_rewards), 1):.2f}, " + f"accuracy={sum(metrics.accuracy_rewards) / max(len(metrics.accuracy_rewards), 1):.2f}, " + f"completion_len={sum(metrics.completion_lengths) / max(len(metrics.completion_lengths), 1):.1f}" + ) + + step += 1 + + logger.info(f"Training completed. Total steps: {step}") + + +if __name__ == '__main__': + main() diff --git a/cookbook/legacy/lora.py b/cookbook/legacy/lora.py new file mode 100644 index 00000000..2f104f7e --- /dev/null +++ b/cookbook/legacy/lora.py @@ -0,0 +1,189 @@ +# Copyright (c) ModelScope Contributors. All rights reserved. +"""Megatron-Core LoRA training example. + +Supports both local (torchrun) and Ray execution modes. + +Usage (Local mode with 8 GPUs: TP=2, PP=2, DP=2): + torchrun --nproc_per_node=8 cookbook/megatron/lora.py --tp_size 2 --pp_size 2 --dp_size 2 + +Usage (Ray mode): + TRUST_REMOTE_CODE=1 python cookbook/megatron/lora.py --mode ray --tp_size 2 --pp_size 2 --dp_size 2 +""" +import argparse +import os + +# CRITICAL: Set CUDA device before any CUDA imports (local mode only) +import torch +from peft import LoraConfig + +import twinkle +from twinkle import (DeviceGroup, DeviceMesh, Platform, get_device_placement, + get_logger) +from twinkle.dataloader import DataLoader +from twinkle.dataset import Dataset, DatasetMeta +from twinkle.model import MegatronModel +from twinkle.processor import InputProcessor + +# Parse arguments first to determine mode +parser = argparse.ArgumentParser() +parser.add_argument('--mode', + type=str, + default='local', + choices=['local', 'ray']) +parser.add_argument('--max_steps', type=int, default=20) +parser.add_argument('--model', + type=str, + default='ms://Qwen/Qwen2.5-7B-Instruct') +parser.add_argument('--dp_size', type=int, default=None, help='Data parallel size (default: auto-calculated)') +parser.add_argument('--tp_size', type=int, default=1, help='Tensor parallel size') +parser.add_argument('--pp_size', type=int, default=1, help='Pipeline parallel size') +parser.add_argument('--vpp_size', type=int, default=None, help='Virtual Pipeline Parallel size (default: None)') +parser.add_argument('--cp_size', type=int, default=1, help='Context parallel size') +parser.add_argument('--sequence_parallel', type=lambda x: x.lower() == 'true', default=True, + help='Enable sequence parallel (default: True)') +parser.add_argument('--micro_batch_size', type=int, default=1, + help='Micro batch size per DP rank (default: 1). For VPP, increase this to >= PP*VPP') +GAS = 16 # gradient accumulation steps +args = parser.parse_args() + +# Set mode in environment before importing twinkle +os.environ['TWINKLE_MODE'] = args.mode + +if args.mode == 'local': + LOCAL_RANK = int(os.environ.get('LOCAL_RANK', '0')) + torch.cuda.set_device(LOCAL_RANK) + +logger = get_logger() + + +def create_dataset(): + dataset = Dataset( + dataset_meta=DatasetMeta('ms://modelscope/competition_math')) + dataset.set_template('Template', + model_id=args.model) + dataset.map('CompetitionMathProcessor') + dataset.encode(batched=True, load_from_cache_file=False) + return dataset + + +def train(): + # Calculate dp_size from world_size and other parallelism dimensions if not specified + if args.mode == 'local': + WORLD_SIZE = int(os.environ.get('WORLD_SIZE', '1')) + else: + WORLD_SIZE = 8 # Default for Ray mode + + dp_size = args.dp_size + if dp_size is None: + # Auto-calculate dp_size from world_size and other parallelism dimensions + dp_size = WORLD_SIZE // (args.tp_size * args.pp_size * args.cp_size) + if dp_size < 1: + raise ValueError(f"Invalid parallelism config: world_size={WORLD_SIZE}, " + f"tp_size={args.tp_size}, pp_size={args.pp_size}, cp_size={args.cp_size}. " + f"Total parallelism ({args.tp_size * args.pp_size * args.cp_size}) exceeds world_size.") + + # Validate total parallelism matches world_size + total_parallelism = dp_size * args.tp_size * args.pp_size * args.cp_size + if total_parallelism != WORLD_SIZE: + raise ValueError(f"Total parallelism ({total_parallelism}) != world_size ({WORLD_SIZE}). " + f"dp={dp_size}, tp={args.tp_size}, pp={args.pp_size}, cp={args.cp_size}") + + # Use DeviceMesh.from_sizes for flexible parallelism configuration + device_mesh = DeviceMesh.from_sizes( + dp_size=dp_size, + pp_size=args.pp_size, + tp_size=args.tp_size, + cp_size=args.cp_size, + vpp_size=args.vpp_size, + ) + + # Device group name - used as remote_group in Ray mode + GROUP_NAME = 'model' + + device_group = [ + DeviceGroup( + name=GROUP_NAME, + ranks=device_mesh.world_size, + device_type=Platform.get_platform().device_prefix(), + ) + ] + + twinkle.initialize( + mode=args.mode, + nproc_per_node=device_mesh.world_size, + groups=device_group, + global_device_mesh=device_mesh, + lazy_collect=args.mode == 'ray', + ) + + # For VPP, num_microbatches must be >= PP * VPP + # micro_batch_size is per forward step, batch_size is total per optimizer step + micro_batch_size = args.micro_batch_size + batch_size = micro_batch_size * device_mesh.data_world_size + + # In Ray mode, pass remote_group + _remote_args = {'device_mesh': device_mesh} + if args.mode == 'ray': + _remote_args['remote_group'] = GROUP_NAME + + dataloader = DataLoader(dataset=create_dataset, batch_size=batch_size, **_remote_args) + model = MegatronModel( + model_id=args.model, + sequence_parallel=args.sequence_parallel, + mixed_precision='bf16', + recompute_granularity='selective', + **_remote_args + ) + + lora_config = LoraConfig(target_modules='all-linear') + adapter_name = 'lora' + model.add_adapter_to_model(adapter_name, + lora_config, + gradient_accumulation_steps=GAS) + model.set_template('Template', model_id=args.model, adapter_name=adapter_name) + model.set_processor(InputProcessor, + padding_side='right', + adapter_name=adapter_name) + model.set_optimizer('default', lr=1e-4, adapter_name=adapter_name) + model.set_lr_scheduler('default', lr_decay_steps=1000, max_lr=1e-4, adapter_name=adapter_name) + logger.info(get_device_placement()) + logger.info(model.get_train_configs(adapter_name=adapter_name)) + + for step, batch in enumerate(dataloader): + output = model.forward_backward(inputs=batch, + adapter_name=adapter_name) + if step % GAS == 0: + loss_value = output() if callable(output) else output + logger.info(f'Step {step // GAS}, loss: {loss_value}') + model.clip_grad_norm(1.0, adapter_name=adapter_name) + model.step(adapter_name=adapter_name) + model.zero_grad(adapter_name=adapter_name) + model.lr_step(adapter_name=adapter_name) + # Early stop for testing + if args.max_steps and step >= args.max_steps * GAS: + logger.info(f'Reached max_steps ({args.max_steps}), stopping.') + break + model.save('./output/megatron_lora', adapter_name=adapter_name) + logger.info('Training completed!') + + +def cleanup(): + """Clean up distributed resources.""" + import torch.distributed as dist + try: + if dist.is_initialized(): + dist.barrier() + from megatron.core import parallel_state as mpu + if mpu.is_initialized(): + mpu.destroy_model_parallel() + except Exception as e: + logger.warning(f"Error during cleanup: {e}") + if dist.is_initialized(): + dist.destroy_process_group() + + +if __name__ == '__main__': + try: + train() + finally: + cleanup() diff --git a/cookbook/legacy/moe_lora.py b/cookbook/legacy/moe_lora.py new file mode 100644 index 00000000..734c250d --- /dev/null +++ b/cookbook/legacy/moe_lora.py @@ -0,0 +1,180 @@ +# Copyright (c) ModelScope Contributors. All rights reserved. +"""Megatron-Core MoE (Mixture of Experts) LoRA training example. + +Supports Expert Parallel (EP) training in both local (torchrun) and Ray modes. + +Usage (Local mode with EP=2): + torchrun --nproc_per_node=4 cookbook/megatron/moe_lora.py --tp_size 2 --pp_size 1 --ep_size 2 + +Usage (Ray mode with EP=2): + TRUST_REMOTE_CODE=1 python cookbook/megatron/moe_lora.py --mode ray --tp_size 2 --pp_size 1 --ep_size 2 --num_gpus 4 +""" +import argparse +import os + +# CRITICAL: Set CUDA device before any CUDA imports (local mode only) +import torch +from peft import LoraConfig + +import twinkle +from twinkle import (DeviceGroup, DeviceMesh, Platform, get_device_placement, + get_logger) +from twinkle.dataloader import DataLoader +from twinkle.dataset import Dataset, DatasetMeta +from twinkle.model import MegatronModel +from twinkle.processor import InputProcessor + +GAS = 16 # gradient accumulation steps +# Parse arguments first to determine mode + +parser = argparse.ArgumentParser() +parser.add_argument('--mode', + type=str, + default='ray', + choices=['local', 'ray']) +parser.add_argument('--tp_size', type=int, default=2) +parser.add_argument('--pp_size', type=int, default=2) +parser.add_argument('--vpp_size', type=int, default=None) +parser.add_argument('--cp_size', type=int, default=1) +parser.add_argument('--ep_size', + type=int, + default=2, + help='Expert parallel size') +parser.add_argument('--etp_size', + type=int, + default=None, + help='Expert Tensor Parallel size (default: None, derived from TP)') +parser.add_argument('--max_steps', type=int, default=100) +parser.add_argument('--micro_batch_size', type=int, default=1, + help='Micro batch size per DP rank') +parser.add_argument('--num_gpus', type=int, default=4) +parser.add_argument( + '--model', + type=str, + default='ms://Qwen/Qwen3-30B-A3B', + help='MoE model path. Default: Qwen3-30B-A3B (128 experts)') +parser.add_argument( + '--sequence_parallel', + action='store_true', + default=True, + help='Enable sequence parallel (auto-enabled for MoE with TP > 1)') +args, unknown = parser.parse_known_args() + +logger = get_logger() + + +def create_dataset(): + """Create dataset for MoE training.""" + dataset = Dataset( + dataset_meta=DatasetMeta('ms://modelscope/competition_math')) + # Use Qwen3 template for MoE model + dataset.set_template('Template', model_id=args.model) + dataset.map('CompetitionMathProcessor') + dataset.encode(batched=True, load_from_cache_file=False) + return dataset + + +def train(): + if args.mode == 'local': + WORLD_SIZE = int(os.environ.get('WORLD_SIZE', '1')) + else: + WORLD_SIZE = args.num_gpus + + # Calculate DP size based on world size and parallelism dimensions + # ETP (Expert Tensor Parallel) is orthogonal to TP, so we don't include it in world_size calculation + dp_size = WORLD_SIZE // (args.tp_size * args.pp_size * args.ep_size) + assert dp_size > 0, f"dp_size must be greater than 0, got {dp_size}" + device_mesh = DeviceMesh.from_sizes(dp_size=dp_size, pp_size=args.pp_size, + tp_size=args.tp_size, cp_size=args.cp_size, ep_size=args.ep_size, + etp_size=args.etp_size, vpp_size=args.vpp_size) + + # Device group name - used as remote_group in Ray mode + GROUP_NAME = 'model' + + device_group = [ + DeviceGroup( + name=GROUP_NAME, + ranks=device_mesh.world_size, + device_type=Platform.get_platform().device_prefix(), + ) + ] + + twinkle.initialize( + mode=args.mode, + nproc_per_node=device_mesh.world_size, + ncpu_proc_per_node=16, + groups=device_group, + global_device_mesh=device_mesh, + lazy_collect=True, + ) + + # Batch size must be >= data_parallel_size and divisible by it + micro_batch_size = args.micro_batch_size + batch_size = micro_batch_size * device_mesh.data_world_size + + _remote_args = {} + if args.mode == 'ray': + _remote_args = { + 'remote_group': GROUP_NAME, + 'device_mesh': device_mesh, + } + + dataloader = DataLoader(dataset=create_dataset, batch_size=batch_size, device_mesh=device_mesh, **_remote_args) + model = MegatronModel( + model_id=args.model, + sequence_parallel=args.sequence_parallel, + mixed_precision='bf16', + recompute_granularity='selective', + **_remote_args + ) + + # LoRA config - target all linear layers in MoE (including experts) + lora_config = LoraConfig( + target_modules='all-linear', + r=8, + lora_alpha=8, + lora_dropout=0.0, + ) + adapter_name = 'lora' + model.add_adapter_to_model(adapter_name, lora_config, gradient_accumulation_steps=GAS) + model.set_template('Template', model_id=args.model, adapter_name=adapter_name) + model.set_processor(InputProcessor, padding_side='right', adapter_name=adapter_name) + model.set_optimizer('default', lr=1e-4, adapter_name=adapter_name) + model.set_lr_scheduler('default', max_lr=1e-4, lr_decay_steps=1000, adapter_name=adapter_name) + logger.info(get_device_placement()) + logger.info(model.get_train_configs(adapter_name=adapter_name)) + + for step, batch in enumerate(dataloader): + output = model.forward_backward(inputs=batch, adapter_name=adapter_name) + if step % GAS == 0: + loss_value = output() if callable(output) else output + logger.info(f'Step {step // GAS}, loss: {loss_value}') + model.clip_grad_norm(1.0, adapter_name=adapter_name) + model.step(adapter_name=adapter_name) + model.zero_grad(adapter_name=adapter_name) + model.lr_step(adapter_name=adapter_name) + # Early stop for testing + if args.max_steps and step >= args.max_steps * GAS: + logger.info(f'Reached max_steps ({args.max_steps}), stopping.') + break + model.save('./output/megatron_moe_lora') + logger.info('Training completed!') + + +def cleanup(): + """Clean up distributed resources.""" + import torch.distributed as dist + try: + if dist.is_initialized(): + dist.barrier() + from megatron.core import parallel_state as mpu + if mpu.is_initialized(): + mpu.destroy_model_parallel() + except Exception as e: + logger.warning(f"Error during cleanup: {e}") + if dist.is_initialized(): + dist.destroy_process_group() + + +if __name__ == '__main__': + train() diff --git a/cookbook/legacy/npu/lora_npu.py b/cookbook/legacy/npu/lora_npu.py new file mode 100644 index 00000000..1e79e605 --- /dev/null +++ b/cookbook/legacy/npu/lora_npu.py @@ -0,0 +1,89 @@ +import os +import numpy as np +from peft import LoraConfig +from torch.optim import AdamW +from torch.optim.lr_scheduler import LinearLR + +import twinkle +from twinkle import get_device_placement, get_logger, DeviceMesh, DeviceGroup, Platform +from twinkle.dataloader import DataLoader +from twinkle.dataset import Dataset, DatasetMeta +from twinkle.loss import CrossEntropyLoss +from twinkle.model import TransformersModel +from twinkle.processor import InputProcessor + +logger = get_logger() + +# Runtime defaults for local NPU training. +os.environ.setdefault('TRUST_REMOTE_CODE', '1') +os.environ.setdefault('TWINKLE_SEED', '42') +os.environ.setdefault('TWINKLE_FULL_DETERMINISM', '1') + +MODEL_PATH = 'ms://Qwen/Qwen2.5-7B-Instruct' +NPROC_PER_NODE = 4 +BATCH_SIZE = 8 +device_group = [ + DeviceGroup( + name='model', + ranks=[0,1,2,3], + device_type=Platform.get_platform().device_prefix(), + ) +] + + +device_mesh = DeviceMesh( + device_type='npu', + mesh=np.array([[0,1], [2,3]]), + mesh_dim_names=('dp', 'fsdp') +) + +twinkle.initialize( + mode='ray', + nproc_per_node=NPROC_PER_NODE, + groups=device_group, + global_device_mesh=device_mesh, + lazy_collect=False, +) + + +def create_dataset(): + dataset = Dataset(dataset_meta=DatasetMeta('ms://modelscope/competition_math')) + dataset.set_template('Template', model_id=MODEL_PATH) + dataset.map('CompetitionMathProcessor') + dataset.encode(batched=True) + return dataset + + +def train(): + dataset = create_dataset() + # Use the model group so the DataLoader is sharded across NPU ranks. + dataloader = DataLoader(dataset=dataset, batch_size=BATCH_SIZE, remote_group='model') + + model = TransformersModel(model_id=MODEL_PATH, remote_group='model') + + lora_config = LoraConfig( + target_modules='all-linear' + ) + + model.add_adapter_to_model('default', lora_config, gradient_accumulation_steps=16) + model.set_template('Template') + model.set_processor(InputProcessor, padding_side='right') + model.set_loss(CrossEntropyLoss) + model.set_optimizer(AdamW, lr=1e-4) + model.set_lr_scheduler(LinearLR) + logger.info(get_device_placement()) + logger.info(model.get_train_configs()) + for step, batch in enumerate(dataloader): + output = model.forward_backward(inputs=batch) + if step % 16 == 0: + logger.info(f'Current is step {step // 16}, loss: {output}') + model.clip_grad_norm(1.0) + model.step() + model.zero_grad() + model.lr_step() + if step % 50 == 0: + model.save('./output') + + +if __name__ == '__main__': + train() diff --git a/cookbook/legacy/remote/tinker/ascend/lora.py b/cookbook/legacy/remote/tinker/ascend/lora.py new file mode 100644 index 00000000..37bd8f0b --- /dev/null +++ b/cookbook/legacy/remote/tinker/ascend/lora.py @@ -0,0 +1,149 @@ +#%% +import os +import dotenv +dotenv.load_dotenv(".env") + +from twinkle.utils.import_utils import requires +from twinkle_client import init_tinker_compat_client +timeout_s = float(os.environ.get("TWINKLE_HTTP_TIMEOUT", "180")) +service_client = init_tinker_compat_client(base_url=os.environ['EAS_URL'], timeout=timeout_s) + +print("Available models:") +for item in service_client.get_server_capabilities().supported_models: + print("- " + item.model_name) + + +#%% +rest_client = service_client.create_rest_client() + +resume_path = "" +if os.environ.get("TWINKLE_SKIP_HISTORY", "0") != "1": + future = rest_client.list_training_runs(limit=50) + response = future.result() + print(f"Found {len(response.training_runs)} training runs") + for tr in response.training_runs: + print(tr.model_dump_json(indent=2)) + + chpts = rest_client.list_checkpoints(tr.training_run_id).result() + for chpt in chpts.checkpoints: + print(" " + chpt.model_dump_json(indent=2)) + resume_path = chpt.tinker_path # Just get the last one for demo purposes +else: + pass # Skip training run history + +#%% +if not resume_path: + base_model = "Qwen/Qwen3-0.6B" + training_client = service_client.create_lora_training_client( + base_model=base_model + ) +else: + training_client = service_client.create_training_client_from_state_with_optimizer(path=resume_path) + +#%% +# Create some training examples +examples = [ + {"input": "banana split", "output": "anana-bay plit-say"}, + {"input": "quantum physics", "output": "uantum-qay ysics-phay"}, + {"input": "donut shop", "output": "onut-day op-shay"}, + {"input": "pickle jar", "output": "ickle-pay ar-jay"}, + {"input": "space exploration", "output": "ace-spay exploration-way"}, + {"input": "rubber duck", "output": "ubber-ray uck-day"}, + {"input": "coding wizard", "output": "oding-cay izard-way"}, +] + +max_examples = int(os.environ.get("TWINKLE_DEMO_EXAMPLES", "0")) +if max_examples > 0: + examples = examples[:max_examples] + +# Convert examples into the format expected by the training client +from tinker import types +# Get the tokenizer from the training client +# tokenizer = training_client.get_tokenizer() # NOTE: network call huggingface +def load_tokenizer( + model_id: str, + *, + local_path: str | None = None, + local_path_env: str = "TWINKLE_MODEL_PATH", + trust_remote_code: bool = True, +): + requires("modelscope") + from modelscope import AutoTokenizer + + if local_path is None: + local_path = os.environ.get(local_path_env) + + if local_path and os.path.isdir(local_path): + return AutoTokenizer.from_pretrained(local_path, trust_remote_code=trust_remote_code) + + return AutoTokenizer.from_pretrained(model_id, trust_remote_code=trust_remote_code) + +tokenizer = load_tokenizer(base_model) + +def process_example(example: dict, tokenizer) -> types.Datum: + # Format the input with Input/Output template + # For most real use cases, you'll want to use a renderer / chat template, + # (see later docs) but here, we'll keep it simple. + prompt = f"English: {example['input']}\nPig Latin:" + + prompt_tokens = tokenizer.encode(prompt, add_special_tokens=True) + prompt_weights = [0] * len(prompt_tokens) + # Add a space before the output string, and finish with double newline + completion_tokens = tokenizer.encode(f" {example['output']}\n\n", add_special_tokens=False) + completion_weights = [1] * len(completion_tokens) + + tokens = prompt_tokens + completion_tokens + weights = prompt_weights + completion_weights + + input_tokens = tokens[:-1] + target_tokens = tokens[1:] # We're predicting the next token, so targets need to be shifted. + weights = weights[1:] + + # A datum is a single training example for the loss function. + # It has model_input, which is the input sequence that'll be passed into the LLM, + # loss_fn_inputs, which is a dictionary of extra inputs used by the loss function. + return types.Datum( + model_input=types.ModelInput.from_ints(tokens=input_tokens), + loss_fn_inputs=dict(weights=weights, target_tokens=target_tokens) + ) + +processed_examples = [process_example(ex, tokenizer) for ex in examples] + +#%% +import numpy as np +skip_train = os.environ.get("TWINKLE_SKIP_TRAIN", "0") == "1" +epochs = int(os.environ.get("TWINKLE_DEMO_EPOCHS", 2)) +batches = int(os.environ.get("TWINKLE_DEMO_BATCHES", 5)) +if not skip_train: + for epoch in range(epochs): + for batch in range(batches): + fwdbwd_future = training_client.forward_backward(processed_examples, "cross_entropy") + optim_future = training_client.optim_step(types.AdamParams(learning_rate=1e-4)) + + # Wait for the results + fwdbwd_result = fwdbwd_future.result() + optim_result = optim_future.result() + + # fwdbwd_result contains the logprobs of all the tokens we put in. Now we can compute the weighted + # average log loss per token for progress visibility. + logprobs = np.concatenate([output['logprobs'].tolist() for output in fwdbwd_result.loss_fn_outputs]) + weights = np.concatenate([example.loss_fn_inputs['weights'].tolist() for example in processed_examples]) + print(f"Epoch {epoch}, Batch {batch}: Loss per token: {-np.dot(logprobs, weights) / weights.sum():.4f}") + + save_future = training_client.save_state(f"pig-latin-lora-epoch-{epoch}") + save_result = save_future.result() + print(f"Saved checkpoint for epoch {epoch} to {save_result.path}") + +#%% +# First, create a sampling client. We need to transfer weights +sampling_client = training_client.save_weights_and_get_sampling_client(name='pig-latin-model') + +# Now, we can sample from the model. +prompt = types.ModelInput.from_ints(tokenizer.encode("English: coffee break\nPig Latin:")) +params = types.SamplingParams(max_tokens=20, temperature=0.0, stop=["\n"]) # Greedy sampling +future = sampling_client.sample(prompt=prompt, sampling_params=params, num_samples=8) +result = future.result() +print("Responses:") +for i, seq in enumerate(result.sequences): + print(f"{i}: {repr(tokenizer.decode(seq.tokens))}") +# %% diff --git a/cookbook/legacy/remote/tinker/ascend/server.py b/cookbook/legacy/remote/tinker/ascend/server.py new file mode 100644 index 00000000..b7515dbf --- /dev/null +++ b/cookbook/legacy/remote/tinker/ascend/server.py @@ -0,0 +1,9 @@ +import os +os.environ['RAY_DEBUG'] = '1' + +from twinkle.server import launch_server + +file_dir = os.path.abspath(os.path.dirname(__file__)) +config_path = os.path.join(file_dir, 'server_config.yaml') + +launch_server(config_path=config_path) diff --git a/cookbook/legacy/remote/tinker/ascend/server_config.yaml b/cookbook/legacy/remote/tinker/ascend/server_config.yaml new file mode 100644 index 00000000..0ced7d62 --- /dev/null +++ b/cookbook/legacy/remote/tinker/ascend/server_config.yaml @@ -0,0 +1,98 @@ +server_type: tinker +proxy_location: EveryNode +http_options: + host: 0.0.0.0 + port: 9009 + +applications: + - name: server + route_prefix: /api/v1 + import_path: server + args: + supported_models: + - model_name: "Qwen/Qwen3-0.6B" + + deployments: + - name: TwinkleServer + autoscaling_config: + min_replicas: 1 + max_replicas: 1 + target_ongoing_requests: 128 + ray_actor_options: + num_cpus: 0.1 + logging_config: + log_level: DEBUG + + - name: models-Qwen2.5-7B-Instruct + # route_prefix 需要是 HTTP 路径前缀;不要用本地文件路径 + route_prefix: /api/v1/model/Qwen/Qwen3-0.6B + import_path: model + args: + model_id: "/home/zyh/model/Qwen3-0.6B" + nproc_per_node: 2 + device_group: + name: model + # 这里的 ranks 是 Twinkle/Ray 的"逻辑 rank",不是物理 NPU 号。 + # 物理卡选择由 ASCEND_RT_VISIBLE_DEVICES 控制;显式写入可见设备,避免 worker 环境丢失。 + ranks: [0,1] + device_type: npu + visible_devices: "8,9" + device_mesh: + device_type: npu + mesh: [0,1] + mesh_dim_names: ['dp'] + deployments: + - name: ModelManagement + autoscaling_config: + min_replicas: 1 + max_replicas: 1 + target_ongoing_requests: 16 + ray_actor_options: + num_cpus: 0.1 + + - name: sampler-Qwen3-0.6B + route_prefix: /api/v1/sampler/Qwen/Qwen3-0.6B + import_path: sampler + args: + model_id: "/home/zyh/model/Qwen3-0.6B" + nproc_per_node: 1 + sampler_type: torch + device_group: + name: sampler + ranks: 1 + device_type: CPU + device_mesh: + device_type: cpu + mesh: [0] + mesh_dim_names: ['dp'] + deployments: + - name: SamplerManagement + autoscaling_config: + min_replicas: 1 + max_replicas: 1 + target_ongoing_requests: 16 + ray_actor_options: + num_cpus: 0.1 + + # - name: processor + # route_prefix: /processors + # import_path: main:processor_app + # args: + # nproc_per_node: 2 + # ncpu_proc_per_node: 2 + # device_group: + # name: model + # ranks: 2 + # device_type: CPU + # device_mesh: + # device_type: CPU + # mesh: [0,1] + # mesh_dim_names: ['dp'] + # deployments: + # - name: ProcessorManagement + # autoscaling_config: + # min_replicas: 1 + # max_replicas: 1 + # target_ongoing_requests: 128 + # ray_actor_options: + # num_cpus: 0.1 diff --git a/cookbook/legacy/remote/tinker/lora.py b/cookbook/legacy/remote/tinker/lora.py new file mode 100644 index 00000000..b7d5418b --- /dev/null +++ b/cookbook/legacy/remote/tinker/lora.py @@ -0,0 +1,127 @@ +#%% +import dotenv +dotenv.load_dotenv(".env") + +from twinkle_client import init_tinker_compat_client +service_client = init_tinker_compat_client(base_url=os.environ['EAS_URL']) + +print("Available models:") +for item in service_client.get_server_capabilities().supported_models: + print("- " + item.model_name) + + +#%% +rest_client = service_client.create_rest_client() + +future = rest_client.list_training_runs(limit=50) +response = future.result() +resume_path = "" +print(f"Found {len(response.training_runs)} training runs") +for tr in response.training_runs: + print(tr.model_dump_json(indent=2)) + + chpts = rest_client.list_checkpoints(tr.training_run_id).result() + for chpt in chpts.checkpoints: + print(" " + chpt.model_dump_json(indent=2)) + resume_path = chpt.tinker_path # Just get the last one for demo purposes + +#%% +if not resume_path: + base_model = "Qwen/Qwen2.5-0.5B-Instruct" + training_client = service_client.create_lora_training_client( + base_model=base_model + ) +else: + training_client = service_client.create_training_client_from_state_with_optimizer(path=resume_path) + +#%% +# Create some training examples +examples = [ + {"input": "banana split", "output": "anana-bay plit-say"}, + {"input": "quantum physics", "output": "uantum-qay ysics-phay"}, + {"input": "donut shop", "output": "onut-day op-shay"}, + {"input": "pickle jar", "output": "ickle-pay ar-jay"}, + {"input": "space exploration", "output": "ace-spay exploration-way"}, + {"input": "rubber duck", "output": "ubber-ray uck-day"}, + {"input": "coding wizard", "output": "oding-cay izard-way"}, +] + +# Convert examples into the format expected by the training client +from tinker import types +from modelscope import AutoTokenizer +# Get the tokenizer from the training client +# tokenizer = training_client.get_tokenizer() # NOTE: network call huggingface +tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True) + +def process_example(example: dict, tokenizer) -> types.Datum: + # Format the input with Input/Output template + # For most real use cases, you'll want to use a renderer / chat template, + # (see later docs) but here, we'll keep it simple. + prompt = f"English: {example['input']}\nPig Latin:" + + prompt_tokens = tokenizer.encode(prompt, add_special_tokens=True) + prompt_weights = [0] * len(prompt_tokens) + # Add a space before the output string, and finish with double newline + completion_tokens = tokenizer.encode(f" {example['output']}\n\n", add_special_tokens=False) + completion_weights = [1] * len(completion_tokens) + + tokens = prompt_tokens + completion_tokens + weights = prompt_weights + completion_weights + + input_tokens = tokens[:-1] + target_tokens = tokens[1:] # We're predicting the next token, so targets need to be shifted. + weights = weights[1:] + + # A datum is a single training example for the loss function. + # It has model_input, which is the input sequence that'll be passed into the LLM, + # loss_fn_inputs, which is a dictionary of extra inputs used by the loss function. + return types.Datum( + model_input=types.ModelInput.from_ints(tokens=input_tokens), + loss_fn_inputs=dict(weights=weights, target_tokens=target_tokens) + ) + +processed_examples = [process_example(ex, tokenizer) for ex in examples] + +# Visualize the first example for debugging purposes +datum0 = processed_examples[0] +print(f"{'Input':<20} {'Target':<20} {'Weight':<10}") +print("-" * 50) +for i, (inp, tgt, wgt) in enumerate(zip(datum0.model_input.to_ints(), datum0.loss_fn_inputs['target_tokens'].tolist(), datum0.loss_fn_inputs['weights'].tolist())): + print(f"{repr(tokenizer.decode([inp])):<20} {repr(tokenizer.decode([tgt])):<20} {wgt:<10}") + +#%% +import numpy as np +for epoch in range(2): + for batch in range(5): + + fwdbwd_future = training_client.forward_backward(processed_examples, "cross_entropy") + optim_future = training_client.optim_step(types.AdamParams(learning_rate=1e-4)) + + # Wait for the results + fwdbwd_result = fwdbwd_future.result() + optim_result = optim_future.result() + + # fwdbwd_result contains the logprobs of all the tokens we put in. Now we can compute the weighted + # average log loss per token. + print(f"Epoch {epoch}, Batch {batch}: ", end="") + logprobs = np.concatenate([output['logprobs'].tolist() for output in fwdbwd_result.loss_fn_outputs]) + weights = np.concatenate([example.loss_fn_inputs['weights'].tolist() for example in processed_examples]) + print(f"Loss per token: {-np.dot(logprobs, weights) / weights.sum():.4f}") + + save_future = training_client.save_state(f"pig-latin-lora-epoch-{epoch}") + save_result = save_future.result() + print(f"Saved checkpoint for epoch {epoch} to {save_result.path}") + +#%% +# First, create a sampling client. We need to transfer weights +sampling_client = training_client.save_weights_and_get_sampling_client(name='pig-latin-model') + +# Now, we can sample from the model. +prompt = types.ModelInput.from_ints(tokenizer.encode("English: coffee break\nPig Latin:")) +params = types.SamplingParams(max_tokens=20, temperature=0.0, stop=["\n"]) # Greedy sampling +future = sampling_client.sample(prompt=prompt, sampling_params=params, num_samples=8) +result = future.result() +print("Responses:") +for i, seq in enumerate(result.sequences): + print(f"{i}: {repr(tokenizer.decode(seq.tokens))}") +# %% diff --git a/cookbook/legacy/remote/tinker/server.py b/cookbook/legacy/remote/tinker/server.py new file mode 100644 index 00000000..1fd179f1 --- /dev/null +++ b/cookbook/legacy/remote/tinker/server.py @@ -0,0 +1,9 @@ +import os +os.environ['RAY_DEBUG'] = '1' + +from twinkle.server import launch_server + +file_dir = os.path.abspath(os.path.dirname(__file__)) +config_path = os.path.join(file_dir, 'server_config.yaml') + +launch_server(config_path=config_path) \ No newline at end of file diff --git a/cookbook/legacy/remote/tinker/server_config.yaml b/cookbook/legacy/remote/tinker/server_config.yaml new file mode 100644 index 00000000..e8f4ec77 --- /dev/null +++ b/cookbook/legacy/remote/tinker/server_config.yaml @@ -0,0 +1,74 @@ +server_type: tinker +proxy_location: EveryNode +http_options: + host: 0.0.0.0 + port: 8000 + +applications: + - name: server + route_prefix: /api/v1 + import_path: server + args: + + deployments: + - name: TinkerCompatServer + autoscaling_config: + min_replicas: 1 + max_replicas: 1 + target_ongoing_requests: 128 + ray_actor_options: + num_cpus: 0.1 + logging_config: + log_level: DEBUG + + - name: models-Qwen2.5-0.5B-Instruct + route_prefix: /api/v1/model/Qwen/Qwen2.5-0.5B-Instruct + import_path: model + args: + model_id: "ms://Qwen/Qwen2.5-0.5B-Instruct" + nproc_per_node: 2 + device_group: + name: model + ranks: [0, 1] + device_type: cuda + device_mesh: + device_type: cuda + mesh: [0, 1] + mesh_dim_names: ['dp'] + deployments: + - name: ModelManagement + autoscaling_config: + min_replicas: 1 + max_replicas: 1 + target_ongoing_requests: 16 + ray_actor_options: + num_cpus: 0.1 + logging_config: + log_level: DEBUG + + # Example: Add more models as needed + # - name: models-Qwen2.5-7B-Instruct + # route_prefix: /api/v1/model/Qwen/Qwen2.5-7B-Instruct + # import_path: main:build_model_app + # args: + # model_id: "ms://Qwen/Qwen2.5-7B-Instruct" + # nproc_per_node: 4 + # device_group: + # name: model7b + # ranks: [2, 3, 4, 5] + # device_type: cuda + # device_mesh: + # device_type: cuda + # mesh: [2, 3, 4, 5] + # mesh_dim_names: ['dp'] + # deployments: + # - name: ModelManagement + # autoscaling_config: + # min_replicas: 1 + # max_replicas: 1 + # target_ongoing_requests: 16 + # ray_actor_options: + # num_cpus: 0.1 + # logging_config: + # log_level: DEBUG + diff --git a/cookbook/legacy/remote/twinkle/lora.py b/cookbook/legacy/remote/twinkle/lora.py new file mode 100644 index 00000000..f9406f1b --- /dev/null +++ b/cookbook/legacy/remote/twinkle/lora.py @@ -0,0 +1,65 @@ +import dotenv +dotenv.load_dotenv('.env') +import os +from peft import LoraConfig + +from twinkle import get_device_placement, get_logger +from twinkle.dataset import DatasetMeta +from twinkle_client.dataloader import DataLoader +from twinkle_client.dataset import Dataset +from twinkle_client.model import MultiLoraTransformersModel +from twinkle_client import init_twinkle_client + +logger = get_logger() + +client = init_twinkle_client(base_url=os.environ['EAS_URL'], api_key=os.environ['EAS_TOKEN']) +# List all training runs +runs = client.list_training_runs() + +resume_path = None +for run in runs: + logger.info(run.model_dump_json(indent=2)) + # Get checkpoints for a run + checkpoints = client.list_checkpoints(run.training_run_id) + + for checkpoint in checkpoints: + logger.info(checkpoint.model_dump_json(indent=2)) + resume_path = checkpoint.twinkle_path +def train(): + dataset = Dataset(dataset_meta=DatasetMeta('ms://swift/self-cognition')) + dataset.set_template('Template', model_id='ms://Qwen/Qwen2.5-7B-Instruct', max_length=512) + dataset.map('SelfCognitionProcessor', init_args={'model_name': 'twinkle模型', 'model_author': 'twinkle团队'}) + dataset.encode(batched=True) + dataloader = DataLoader(dataset=dataset, batch_size=8) + + model = MultiLoraTransformersModel(model_id='ms://Qwen/Qwen2.5-7B-Instruct') + + lora_config = LoraConfig( + target_modules='all-linear' + ) + + model.add_adapter_to_model('default', lora_config, gradient_accumulation_steps=2) + model.set_template('Template') + model.set_processor('InputProcessor', padding_side='right') + model.set_loss('CrossEntropyLoss') + model.set_optimizer('AdamW', lr=1e-4) + model.set_lr_scheduler('LinearLR') + # Resume training if resume_path is provided + if resume_path: + model.load(resume_path, load_optimizer=True) + logger.info(model.get_train_configs()) + for step, batch in enumerate(dataloader): + output = model.forward_backward(inputs=batch) + if step % 2 == 0: + logger.info(f'Current is step {step // 2}, loss: {output}') + model.clip_grad_norm(1.0) + model.step() + model.zero_grad() + model.lr_step() + if step > 0 and step % 8 == 0: + logger.info(f'Saving checkpoint at step {step}') + model.save(f'step-{step}', save_optimizer=True) + + +if __name__ == '__main__': + train() diff --git a/cookbook/legacy/remote/twinkle/server.py b/cookbook/legacy/remote/twinkle/server.py new file mode 100644 index 00000000..b7515dbf --- /dev/null +++ b/cookbook/legacy/remote/twinkle/server.py @@ -0,0 +1,9 @@ +import os +os.environ['RAY_DEBUG'] = '1' + +from twinkle.server import launch_server + +file_dir = os.path.abspath(os.path.dirname(__file__)) +config_path = os.path.join(file_dir, 'server_config.yaml') + +launch_server(config_path=config_path) diff --git a/cookbook/legacy/remote/twinkle/server_config.yaml b/cookbook/legacy/remote/twinkle/server_config.yaml new file mode 100644 index 00000000..4adcb9a5 --- /dev/null +++ b/cookbook/legacy/remote/twinkle/server_config.yaml @@ -0,0 +1,68 @@ +server_type: twinkle +proxy_location: EveryNode +http_options: + host: 0.0.0.0 + port: 9000 + +applications: + - name: server + route_prefix: /server + import_path: server + args: + + deployments: + - name: TwinkleServer + autoscaling_config: + min_replicas: 1 + max_replicas: 1 + target_ongoing_requests: 128 + ray_actor_options: + num_cpus: 0.1 + logging_config: + log_level: DEBUG + + - name: models-Qwen2.5-7B-Instruct + route_prefix: /models/Qwen/Qwen2.5-7B-Instruct + import_path: model + args: + model_id: "ms://Qwen/Qwen2.5-0.5B-Instruct" + nproc_per_node: 2 + device_group: + name: model + ranks: [0,1] + device_type: cuda + device_mesh: + device_type: cuda + mesh: [0,1] + mesh_dim_names: ['dp'] + deployments: + - name: ModelManagement + autoscaling_config: + min_replicas: 1 + max_replicas: 1 + target_ongoing_requests: 16 + ray_actor_options: + num_cpus: 0.1 + + - name: processor + route_prefix: /processors + import_path: processor + args: + nproc_per_node: 2 + ncpu_proc_per_node: 2 + device_group: + name: model + ranks: 2 + device_type: CPU + device_mesh: + device_type: CPU + mesh: [0,1] + mesh_dim_names: ['dp'] + deployments: + - name: ProcessorManagement + autoscaling_config: + min_replicas: 1 + max_replicas: 1 + target_ongoing_requests: 128 + ray_actor_options: + num_cpus: 0.1 \ No newline at end of file diff --git a/cookbook/legacy/sampler/sampler_demo.py b/cookbook/legacy/sampler/sampler_demo.py new file mode 100644 index 00000000..3548a0e5 --- /dev/null +++ b/cookbook/legacy/sampler/sampler_demo.py @@ -0,0 +1,52 @@ +import twinkle +from twinkle import DeviceGroup, DeviceMesh +from transformers import AutoTokenizer +from twinkle.sampler import vLLMSampler +from twinkle.data_format.sampling import SamplingParams +from twinkle.template import Template +from twinkle.data_format import Trajectory + +MODEL_ID = 'Qwen/Qwen2.5-7B-Instruct' +VLLM_TP = 2 # Tensor parallelism for vLLM (GPUs per worker) +VLLM_DP = 2 # Data parallelism (number of workers) +NUM_GPUS = VLLM_TP * VLLM_DP # Total GPUs = 8 +URI = "twinkle://tml-EMPTY_TOKEN/20260203_211942-Qwen_Qwen2_5-7B-Instruct-11cdabc7/weights/twinkle-lora-2" + +if __name__ == '__main__': + twinkle.initialize( + mode='ray', + nproc_per_node=NUM_GPUS, + groups=[ + # gpus_per_worker=VLLM_TP creates 4 workers (8 GPUs / 2 per worker) + DeviceGroup(name='sampler', ranks=list(range(NUM_GPUS)), device_type='GPU', gpus_per_worker=VLLM_TP), + ], + ) + + tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True) + engine_kwargs = { + 'gpu_memory_utilization': 0.4, + 'max_model_len': 1024, + # 'enforce_eager': True, + # 'enable_sleep_mode': True, + } + sampler = vLLMSampler( + model_id=MODEL_ID, + engine_args=engine_kwargs, + device_mesh=DeviceMesh.from_sizes(world_size=VLLM_DP, dp_size=VLLM_DP), + remote_group='sampler', + ) + sampler.set_template(Template, model_id=MODEL_ID) + trajectory = Trajectory(messages=[{'role': 'system', 'content': 'You are a helpful assistant.'}, {'role': 'user', 'content': 'Who are you?'}]) + + num_prompts = 4 + num_samples = 2 # Generate 2 completions per prompt + sampling_params = SamplingParams(max_tokens=128, temperature=1.0) + + # Pass num_samples to sample() method (aligned with tinker's API) + response = sampler.sample([trajectory] * num_prompts, sampling_params, adapter_uri=URI, num_samples=num_samples) + if callable(response): + response = response() + + for i, seq in enumerate(response.sequences): + text = tokenizer.decode(seq.tokens, skip_special_tokens=True) + print(f"{i}:\n {text}") diff --git a/cookbook/legacy/sft/ep_fsdp_qwen3_moe.py b/cookbook/legacy/sft/ep_fsdp_qwen3_moe.py new file mode 100644 index 00000000..68c6d15a --- /dev/null +++ b/cookbook/legacy/sft/ep_fsdp_qwen3_moe.py @@ -0,0 +1,109 @@ +# Copyright (c) ModelScope Contributors. All rights reserved. +import os + +import torch.distributed as dist +from transformers import AutoConfig + +import twinkle +from twinkle import DeviceMesh, Platform, get_device_placement, get_logger +from twinkle.dataloader import DataLoader +from twinkle.dataset import Dataset, DatasetMeta +from twinkle.model import TransformersModel + +logger = get_logger() + +MODEL_ID = os.environ.get( + 'QWEN3_MODEL_ID', 'ms://Qwen/Qwen3-30B-A3B-Instruct-2507') +DATASET_ID = os.environ.get( + 'QWEN3_DATASET_ID', '/path/to/alpaca/dataset') +TEMPLATE_ID = os.environ.get('QWEN3_TEMPLATE_ID', 'Template') +PROCESSOR_ID = os.environ.get('QWEN3_PROCESSOR_ID', 'AlpacaProcessor') +NUM_LAYERS = int(os.environ.get('QWEN3_NUM_LAYERS', '1')) + +# 4 GPUs: dp=2, ep=2 +device_mesh = DeviceMesh.from_sizes( + device_type=Platform.get_platform().device_prefix(), + dp_size=2, + ep_size=2, +) + +twinkle.initialize( + mode='local', + nproc_per_node=4, + global_device_mesh=device_mesh, +) + + +def train(): + config = AutoConfig.from_pretrained(MODEL_ID, trust_remote_code=True) + if hasattr(config, "num_hidden_layers"): + original_layers = config.num_hidden_layers + if NUM_LAYERS < original_layers: + os.environ.setdefault('TRANSFORMERS_VERBOSITY', 'error') + os.environ.setdefault('TRANSFORMERS_NO_ADVISORY_WARNINGS', '1') + try: + from transformers.utils import logging as hf_logging + hf_logging.set_verbosity_error() + except Exception: + pass + config.num_hidden_layers = NUM_LAYERS + if hasattr(config, "use_cache"): + config.use_cache = False + + dataset = Dataset(dataset_meta=DatasetMeta(DATASET_ID)) + try: + dataset.set_template(TEMPLATE_ID, model_id=MODEL_ID) + except ValueError: + # Fallback to built-in Template when a plugin id is not available. + dataset.set_template('Template', model_id=MODEL_ID) + + processor = PROCESSOR_ID + if PROCESSOR_ID.lower() == 'alpaca': + processor = 'AlpacaProcessor' + + dataset.map(processor) + dataset.encode(batched=True) + dataloader = DataLoader( + dataset=dataset, + batch_size=4, + device_mesh=device_mesh, + ) + + grad_accum_steps = 4 + model = TransformersModel( + model_id=MODEL_ID, + config=config, + device_mesh=device_mesh, + fsdp_config={ + "expert_parallel": { + "enabled": True, + "router_dtype": "fp32", + "all_to_all": "torch", + "keep_router_logits": False, + } + }, + ) + # Disable foreach to avoid DTensor mixed-type errors in EP runs. + model.set_optimizer('AdamW', foreach=False) + + logger.info(get_device_placement()) + logger.info(model.get_train_configs()) + + for step, batch in enumerate(dataloader): + if callable(batch): + batch = batch() + model.forward_backward( + inputs=batch, gradient_accumulation_steps=grad_accum_steps) + model.clip_grad_and_step(gradient_accumulation_steps=grad_accum_steps) + if step % grad_accum_steps == 0: + metric = model.calculate_metric(is_training=True) + if callable(metric): + metric = metric() + logger.info( + f'Current is step {step // grad_accum_steps}, metric: {metric}') + if step > 1 and step % 50 == 0: + model.save('./output') + + +if __name__ == '__main__': + train() diff --git a/cookbook/legacy/sft/full_sft.py b/cookbook/legacy/sft/full_sft.py new file mode 100644 index 00000000..540092b9 --- /dev/null +++ b/cookbook/legacy/sft/full_sft.py @@ -0,0 +1,50 @@ +import numpy as np + +import twinkle +from twinkle import get_device_placement, get_logger, DeviceMesh, DeviceGroup, Platform +from twinkle.dataloader import DataLoader +from twinkle.dataset import Dataset, DatasetMeta +from twinkle.model import TransformersModel + +logger = get_logger() + + +device_group = [ + DeviceGroup( + name='model', + ranks=[0,1,2,3], + device_type=Platform.get_platform().device_prefix(), + ) +] + + +device_mesh = DeviceMesh( + device_type='cuda', + mesh=np.array([[0,1], [2,3]]), + mesh_dim_names=('dp', 'fsdp') +) + +twinkle.initialize(mode='ray', nproc_per_node=4, groups=device_group, global_device_mesh=device_mesh) + + +def train(): + dataset = Dataset(dataset_meta=DatasetMeta('ms://modelscope/competition_math')) + dataset.set_template('Qwen3Template', model_id='ms://Qwen/Qwen2.5-7B-Instruct') + dataset.map('CompetitionMathProcessor') + dataset.encode(batched=True) + dataloader = DataLoader(dataset=dataset, batch_size=8, remote_group='model') + + model = TransformersModel(model_id='ms://Qwen/Qwen2.5-7B-Instruct', remote_group='model', gradient_accumulation_steps=16) + logger.info(get_device_placement()) + logger.info(model.get_train_configs()) + for step, batch in enumerate(dataloader): + output = model.forward_backward(inputs=batch) + if step % 16 == 0: + logger.info(f'Current is step {step // 16}, loss: {output}') + model.clip_grad_and_step() + if step % 50 == 0: + model.save('./output') + + +if __name__ == '__main__': + train() diff --git a/cookbook/legacy/sft/local_dataset.py b/cookbook/legacy/sft/local_dataset.py new file mode 100644 index 00000000..c41bcfb8 --- /dev/null +++ b/cookbook/legacy/sft/local_dataset.py @@ -0,0 +1,57 @@ +import numpy as np +from peft import LoraConfig + +import twinkle +from twinkle import get_device_placement, get_logger, DeviceMesh, DeviceGroup, Platform +from twinkle.dataloader import DataLoader +from twinkle.dataset import Dataset, DatasetMeta +from twinkle.model import TransformersModel + +logger = get_logger() + + +device_group = [ + DeviceGroup( + name='model', + ranks=[0,1,2,3], + device_type=Platform.get_platform().device_prefix(), + ) +] + + +device_mesh = DeviceMesh( + device_type='cuda', + mesh=np.array([[0,1], [2,3]]), + mesh_dim_names=('dp', 'fsdp') +) + +twinkle.initialize(mode='ray', groups=device_group, global_device_mesh=device_mesh) + + +def train(): + dataset = Dataset(dataset_meta=DatasetMeta('ms://modelscope/competition_math')) + dataset.set_template('Qwen3Template', model_id='ms://Qwen/Qwen2.5-7B-Instruct') + dataset.map('CompetitionMathProcessor') + dataset.encode(batched=True) + dataloader = DataLoader(dataset=dataset, batch_size=8) + + model = TransformersModel(model_id='ms://Qwen/Qwen2.5-7B-Instruct', remote_group='model') + + lora_config = LoraConfig( + target_modules='all-linear' + ) + + model.add_adapter_to_model('default', lora_config, gradient_accumulation_steps=16) + logger.info(get_device_placement()) + logger.info(model.get_train_configs()) + for step, batch in enumerate(dataloader): + output = model.forward_backward(inputs=batch) + if step % 16 == 0: + logger.info(f'Current is step {step // 16}, loss: {output}') + model.clip_grad_and_step() + if step % 50 == 0: + model.save('./output') + + +if __name__ == '__main__': + train() diff --git a/cookbook/legacy/sft/multi_lora.py b/cookbook/legacy/sft/multi_lora.py new file mode 100644 index 00000000..469041e4 --- /dev/null +++ b/cookbook/legacy/sft/multi_lora.py @@ -0,0 +1,69 @@ +from peft import LoraConfig +import twinkle +from tqdm import tqdm +from twinkle import get_device_placement, get_logger +from twinkle.dataloader import DataLoader +from twinkle.dataset import Dataset, DatasetMeta, LazyDataset, PackingDataset, IterableDataset, IterablePackingDataset +from twinkle.model import MultiLoraTransformersModel +from twinkle.preprocessor import SelfCognitionProcessor + +twinkle.initialize(mode='local') + +logger = get_logger() + + +def eval(model: MultiLoraTransformersModel): + dataset = Dataset(dataset_meta=DatasetMeta('ms://swift/self-cognition', data_slice=range(500))) + dataset.set_template('Template', model_id='ms://Qwen/Qwen2.5-7B-Instruct', max_length=512) + dataset.map(SelfCognitionProcessor('twinkle模型', 'twinkle团队')) + dataset.encode(batched=True) + # dataset.pack_dataset() + dataloader = DataLoader(dataset=dataset, batch_size=8) + for step, batch in tqdm(enumerate(dataloader)): + model.forward_only(inputs=batch) + model.calculate_loss() + metrics = model.calculate_metric(is_training=False) + return metrics + +def train(): + dataset = Dataset(dataset_meta=DatasetMeta('ms://swift/self-cognition', data_slice=range(5000))) + dataset.set_template('Template', model_id='ms://Qwen/Qwen2.5-7B-Instruct', max_length=512) + dataset.map(SelfCognitionProcessor('twinkle模型', 'twinkle团队')) + dataset.encode(batched=True) + # dataset.pack_dataset() + dataloader = DataLoader(dataset=dataset, batch_size=8, num_workers=4) + + model = MultiLoraTransformersModel(model_id='ms://Qwen/Qwen2.5-7B-Instruct') + + lora_config = LoraConfig( + r=8, + lora_alpha=32, + target_modules='all-linear' + ) + + model.add_adapter_to_model('default', lora_config, gradient_accumulation_steps=4) + model.set_optimizer('AdamW', lr=1e-4, adapter_name='default') + model.set_lr_scheduler('CosineWarmupScheduler', num_warmup_steps=5, num_training_steps=len(dataloader), adapter_name='default') + logger.info(get_device_placement()) + logger.info(model.get_train_configs(adapter_name='default')) + logger.info(f'Total steps: {len(dataloader)//4}') + loss_metric = 99.0 + for step, batch in enumerate(dataloader): + model.forward_backward(inputs=batch, adapter_name='default') + if step > 0 and step % 20 == 0: + logger.info(f'Current is step {step // 4} of {len(dataloader)//4}, metric: {model.calculate_metric(is_training=True, adapter_name='default')}') + model.clip_grad_and_step(adapter_name='default') + #if step > 0 and (step / 4) % 30 == 0: + # metrics = eval(model) + # logger.info(f'Eval metric: {metrics}') + # metrics['step'] = step + # if loss_metric > float(metrics['loss']): + # model.save(f'checkpoint-{step}') + # loss_metric = float(metrics['loss']) + model.save(f'last-checkpoint', adapter_name='default') + model.load(f'last-checkpoint', adapter_name='default') + model.remove_adapter('default') + + +if __name__ == '__main__': + train() diff --git a/cookbook/legacy/sft/single_program_megatron.py b/cookbook/legacy/sft/single_program_megatron.py new file mode 100644 index 00000000..f92c20a5 --- /dev/null +++ b/cookbook/legacy/sft/single_program_megatron.py @@ -0,0 +1,95 @@ +import os +os.environ["CUDA_DEVICE_MAX_CONNECTIONS"] = "1" +os.environ['TORCHINDUCTOR_COMPILE_THREADS'] = '1' +from peft import LoraConfig +import twinkle +from tqdm import tqdm +from twinkle import DeviceMesh, Platform +from twinkle import get_device_placement, get_logger +from twinkle.dataloader import DataLoader +from twinkle.dataset import Dataset, DatasetMeta, LazyDataset, PackingDataset, IterableDataset, IterablePackingDataset +from twinkle.model import MultiLoraMegatronModel, MegatronModel +from twinkle.preprocessor import SelfCognitionProcessor +import torch +torch._dynamo.disable() +if Platform.get_rank() == 0: + import swanlab + swanlab.login(api_key=os.environ['SWANLAB_API_KEY'], save=True) + + run = swanlab.init( + project="megatron-swift", + ) + + +device_mesh = DeviceMesh.from_sizes(cp_size=2, pp_size=2, tp_size=2, dp_size=2, sequence_parallel=True) +twinkle.initialize(mode='local', global_device_mesh=device_mesh) + +logger = get_logger() + + +def eval(model): + dataset = Dataset(dataset_meta=DatasetMeta('ms://swift/self-cognition', data_slice=range(500), max_length=256)) + dataset.set_template('Template', model_id='ms://Qwen/Qwen2.5-7B-Instruct', max_length=256) + dataset.map(SelfCognitionProcessor('twinkle模型', 'twinkle团队'), load_from_cache_file=False) + dataset.encode(batched=True, load_from_cache_file=False) + # dataset.pack_dataset() + dataloader = DataLoader(dataset=dataset, batch_size=8) + for step, batch in tqdm(enumerate(dataloader)): + model.forward_only(inputs=batch) + model.calculate_loss() + metrics = model.calculate_metric(is_training=False) + return metrics + +def train(): + dataset = Dataset(dataset_meta=DatasetMeta('ms://swift/self-cognition', data_slice=range(1000))) + dataset.set_template('Template', model_id='ms://Qwen/Qwen2.5-7B-Instruct', max_length=256) + dataset.map(SelfCognitionProcessor('twinkle模型', 'twinkle团队')) + dataset.encode(batched=True) + # dataset.pack_dataset() + dataloader = DataLoader(dataset=dataset, batch_size=16, num_workers=0) + + model = MultiLoraMegatronModel(model_id='ms://Qwen/Qwen2.5-7B-Instruct', mixed_precision='bf16', recompute_granularity='full', recompute_method='uniform', recompute_num_layers=1) + + lora_config = LoraConfig( + r=8, + lora_alpha=32, + target_modules='all-linear' + ) + + model.add_adapter_to_model('default', lora_config, gradient_accumulation_steps=1) + model.set_optimizer(optimizer_cls='default', lr=1e-4, adapter_name='default') + model.set_lr_scheduler(scheduler_cls='default', lr_warmup_steps=5, lr_decay_steps=len(dataloader), adapter_name='default') + logger.info(get_device_placement()) + logger.info(model.get_train_configs(adapter_name='default')) + logger.info(f'Total steps: {len(dataloader)}') + loss_metric = 99.0 + for step, batch in enumerate(dataloader): + model.forward_backward(inputs=batch, adapter_name='default') + # outputs = model.forward_only(inputs=batch, adapter_name='default') + model.clip_grad_and_step(adapter_name='default') + if step % 5 == 0: + metric = model.calculate_metric(is_training=True, adapter_name='default') + _metrics = {} + for key, value in metric.items(): + try: + value = float(value) + _metrics[key] = value + except: + pass + if Platform.get_rank() == 0: + swanlab.log(_metrics) + logger.info(f'Current is step {step} of {len(dataloader)}, metric: {metric}') + #if step > 0 and (step / 4) % 30 == 0: + # metrics = eval(model) + # logger.info(f'Eval metric: {metrics}') + # metrics['step'] = step + # if loss_metric > float(metrics['loss']): + # model.save(f'checkpoint-{step}') + # loss_metric = float(metrics['loss']) + model.save(f'last-checkpoint', adapter_name='default') + model.load(f'last-checkpoint', adapter_name='default') + # model.remove_adapter(adapter_name='default') + + +if __name__ == '__main__': + train() \ No newline at end of file diff --git a/cookbook/legacy/sft/single_program_megatron_full.py b/cookbook/legacy/sft/single_program_megatron_full.py new file mode 100644 index 00000000..1ef23e79 --- /dev/null +++ b/cookbook/legacy/sft/single_program_megatron_full.py @@ -0,0 +1,86 @@ +import os +os.environ["CUDA_DEVICE_MAX_CONNECTIONS"] = "1" +import torch +torch._dynamo.disable() +from tqdm import tqdm + +import twinkle +from twinkle import DeviceMesh, Platform +from twinkle import get_device_placement, get_logger +from twinkle.dataloader import DataLoader +from twinkle.dataset import Dataset, DatasetMeta +from twinkle.model import MegatronModel +from twinkle.preprocessor import SelfCognitionProcessor + +if Platform.get_rank() == 0: + import swanlab + swanlab.login(api_key=os.environ['SWANLAB_API_KEY'], save=True) + + run = swanlab.init( + project="megatron-swift", + ) + +device_mesh = DeviceMesh.from_sizes(cp_size=2, pp_size=2, tp_size=2, dp_size=2) +twinkle.initialize(mode='local', global_device_mesh=device_mesh) + +logger = get_logger() + + +def eval(model): + dataset = Dataset(dataset_meta=DatasetMeta('ms://swift/self-cognition', data_slice=range(500))) + dataset.set_template('Template', model_id='ms://Qwen/Qwen2.5-7B-Instruct', max_length=512) + dataset.map(SelfCognitionProcessor('twinkle模型', 'twinkle团队')) + dataset.encode(batched=True) + # dataset.pack_dataset() + dataloader = DataLoader(dataset=dataset, batch_size=4) + for step, batch in tqdm(enumerate(dataloader)): + model.forward_only(inputs=batch) + model.calculate_loss() + metrics = model.calculate_metric(is_training=False) + return metrics + +def train(): + dataset = Dataset(dataset_meta=DatasetMeta('ms://swift/self-cognition', data_slice=range(1000))) + dataset.set_template('Template', model_id='ms://Qwen/Qwen2.5-7B-Instruct', max_length=512) + dataset.map(SelfCognitionProcessor('twinkle模型', 'twinkle团队')) + dataset.encode(batched=True) + # dataset.pack_dataset() + dataloader = DataLoader(dataset=dataset, batch_size=16, num_workers=0) + + model = MegatronModel(model_id='ms://Qwen/Qwen2.5-7B-Instruct', mixed_precision='bf16', recompute_granularity='full', recompute_method='uniform', recompute_num_layers=1) + + model.set_optimizer(optimizer_cls='default', lr=1e-5) + model.set_lr_scheduler(scheduler_cls='default', lr_warmup_steps=10, lr_decay_steps=len(dataloader)) + logger.info(get_device_placement()) + logger.info(model.get_train_configs()) + logger.info(f'Total steps: {len(dataloader)}') + loss_metric = 99.0 + for step, batch in enumerate(dataloader): + model.forward_backward(inputs=batch, micro_batch_size=1) + # outputs = model.forward_only(inputs=batch, adapter_name='default') + model.clip_grad_and_step() + if step % 5 == 0: + metric = model.calculate_metric(is_training=True) + _metrics = {} + for key, value in metric.items(): + try: + value = float(value) + _metrics[key] = value + except: + pass + if Platform.get_rank() == 0: + swanlab.log(_metrics) + logger.info(f'Current is step {step} of {len(dataloader)}, metric: {metric}') + #if step > 0 and (step / 4) % 30 == 0: + # metrics = eval(model) + # logger.info(f'Eval metric: {metrics}') + # metrics['step'] = step + # if loss_metric > float(metrics['loss']): + # model.save(f'checkpoint-{step}') + # loss_metric = float(metrics['loss']) + model.save(f'last-checkpoint') + # model.load(f'last-checkpoint', adapter_name='default') + + +if __name__ == '__main__': + train() diff --git a/cookbook/legacy/sft/single_program_moe.py b/cookbook/legacy/sft/single_program_moe.py new file mode 100644 index 00000000..b6d8e191 --- /dev/null +++ b/cookbook/legacy/sft/single_program_moe.py @@ -0,0 +1,97 @@ +import os +os.environ["CUDA_DEVICE_MAX_CONNECTIONS"] = "1" +os.environ['TORCHINDUCTOR_COMPILE_THREADS'] = '1' +from peft import LoraConfig +import twinkle +import time +from tqdm import tqdm +from twinkle import DeviceMesh, Platform +from twinkle import get_device_placement, get_logger +from twinkle.dataloader import DataLoader +from twinkle.dataset import Dataset, DatasetMeta, LazyDataset, PackingDataset, IterableDataset, IterablePackingDataset +from twinkle.model import MultiLoraMegatronModel, MegatronModel, TransformersModel +from twinkle.preprocessor import SelfCognitionProcessor +import torch +torch._dynamo.disable() +if Platform.get_rank() == 0: + import swanlab + swanlab.login(api_key=os.environ['SWANLAB_API_KEY'], save=True) + + run = swanlab.init( + project="megatron-swift", + ) + + +device_mesh = DeviceMesh.from_sizes(dp_size=2, fsdp_size=4) +twinkle.initialize(mode='local', global_device_mesh=device_mesh) + +logger = get_logger() + + +def eval(model): + dataset = Dataset(dataset_meta=DatasetMeta('ms://swift/self-cognition', data_slice=range(500), max_length=256)) + dataset.set_template('Template', model_id='ms://Qwen/Qwen3-30B-A3B-Instruct-2507', max_length=256) + dataset.map(SelfCognitionProcessor('twinkle模型', 'twinkle团队'), load_from_cache_file=False) + dataset.encode(batched=True, load_from_cache_file=False) + # dataset.pack_dataset() + dataloader = DataLoader(dataset=dataset, batch_size=8) + for step, batch in tqdm(enumerate(dataloader)): + model.forward_only(inputs=batch) + model.calculate_loss() + metrics = model.calculate_metric(is_training=False) + return metrics + +def train(): + dataset = Dataset(dataset_meta=DatasetMeta('ms://swift/self-cognition', data_slice=range(1000))) + dataset.set_template('Template', model_id='ms://Qwen/Qwen3-30B-A3B-Instruct-2507', max_length=256) + dataset.map(SelfCognitionProcessor('twinkle模型', 'twinkle团队')) + dataset.encode(batched=True) + # dataset.pack_dataset() + dataloader = DataLoader(dataset=dataset, batch_size=16, num_workers=0) + + model = TransformersModel(model_id='ms://Qwen/Qwen3-30B-A3B-Instruct-2507', fsdp_config={'transformer_cls_names_to_wrap':["Qwen3MoeSparseMoeBlock"]}) + + lora_config = LoraConfig( + r=8, + lora_alpha=32, + target_modules='all-linear' + ) + model.apply_patch('ms://twinkle-kit/qwen3_moe_transformers4_patch') + # model.add_adapter_to_model('default', lora_config, gradient_accumulation_steps=1) + model.set_optimizer(optimizer_cls='AdamW', lr=1e-4) + model.set_lr_scheduler(scheduler_cls='CosineWarmupScheduler', num_warmup_steps=5, num_training_steps=len(dataloader)) + logger.info(get_device_placement()) + logger.info(model.get_train_configs()) + logger.info(f'Total steps: {len(dataloader)}') + loss_metric = 99.0 + for step, batch in enumerate(dataloader): + model.forward_backward(inputs=batch) + # outputs = model.forward_only(inputs=batch, adapter_name='default') + model.clip_grad_and_step() + if step % 1 == 0: + metric = model.calculate_metric(is_training=True) + _metrics = {} + for key, value in metric.items(): + try: + value = float(value) + _metrics[key] = value + except: + pass + if Platform.get_rank() == 0: + swanlab.log(_metrics) + logger.info(f'Current is step {step} of {len(dataloader)}, metric: {metric}') + #if step > 0 and (step / 4) % 30 == 0: + # metrics = eval(model) + # logger.info(f'Eval metric: {metrics}') + # metrics['step'] = step + # if loss_metric > float(metrics['loss']): + # model.save(f'checkpoint-{step}') + # loss_metric = float(metrics['loss']) + model.save(f'last-checkpoint') + time.sleep(3) + model.load(f'last-checkpoint') + # model.remove_adapter(adapter_name='default') + + +if __name__ == '__main__': + train() \ No newline at end of file diff --git a/cookbook/legacy/sft/streaming_dataset.py b/cookbook/legacy/sft/streaming_dataset.py new file mode 100644 index 00000000..bbfdcee5 --- /dev/null +++ b/cookbook/legacy/sft/streaming_dataset.py @@ -0,0 +1,67 @@ +import numpy as np +from peft import LoraConfig + +import twinkle +from twinkle import get_device_placement, get_logger, DeviceMesh, DeviceGroup, Platform +from twinkle.dataloader import DataLoader +from twinkle.dataset import Dataset, DatasetMeta +from twinkle.model import TransformersModel + +logger = get_logger() + + +device_group = [ + DeviceGroup( + name='model', + ranks=[0,1,2,3], + device_type=Platform.get_platform().device_prefix(), + ) +] + + +# device_mesh = DeviceMesh( +# device_type='cuda', +# mesh=np.array([[0,1], [2,3]]), +# mesh_dim_names=('dp', 'fsdp') +# ) + +device_mesh = DeviceMesh( + device_type='cuda', + mesh=np.array([0,1,2,3]), + mesh_dim_names=('dp',) +) + +twinkle.initialize(mode='ray', groups=device_group, global_device_mesh=device_mesh) + + +def create_dataset(): + dataset = Dataset(dataset_meta=DatasetMeta('ms://modelscope/competition_math'), streaming=True) + dataset.set_template('Qwen3Template', model_id='ms://Qwen/Qwen2.5-7B-Instruct') + dataset.map('CompetitionMathProcessor') + dataset.encode(batched=True) + return dataset + + +def train(): + dataloader = DataLoader(dataset=create_dataset, batch_size=8, remote_group='model') + + model = TransformersModel(model_id='ms://Qwen/Qwen2.5-7B-Instruct', remote_group='model') + + lora_config = LoraConfig( + target_modules='all-linear' + ) + + model.add_adapter_to_model('default', lora_config, gradient_accumulation_steps=16) + logger.info(get_device_placement()) + logger.info(model.get_train_configs()) + for step, batch in enumerate(dataloader): + output = model.forward_backward(inputs=batch) + if step % 16 == 0: + logger.info(f'Current is step {step // 16}, loss: {output}') + model.clip_grad_and_step() + if step % 50 == 0: + model.save('./output') + + +if __name__ == '__main__': + train() diff --git a/cookbook/legacy/sft/vlm_lora.py b/cookbook/legacy/sft/vlm_lora.py new file mode 100644 index 00000000..922c74d5 --- /dev/null +++ b/cookbook/legacy/sft/vlm_lora.py @@ -0,0 +1,228 @@ +# Copyright (c) twinkle authors. All rights reserved. +"""VLM (Vision-Language Model) LoRA training example with Transformers backend. + +This example demonstrates training Qwen3-VL with LoRA using the +HuggingFace Transformers backend (not Megatron). + +Usage: + torchrun --nproc_per_node=2 cookbook/sft/vlm_lora.py +""" +import argparse +import io +import os +import time +from typing import Any, Dict + +import numpy as np +import torch +from PIL import Image +from peft import LoraConfig + +import twinkle +from twinkle import get_device_placement, get_logger +from twinkle.dataloader import DataLoader +from twinkle.dataset import Dataset, DatasetMeta +from twinkle.model import TransformersModel +from twinkle.processor import InputProcessor +from twinkle.loss import CrossEntropyLoss +from torch.optim import AdamW +from torch.optim.lr_scheduler import LinearLR + +# Parse arguments +parser = argparse.ArgumentParser() +parser.add_argument('--model', type=str, default='Qwen/Qwen3-VL-8B-Instruct') +parser.add_argument('--dataset', type=str, default='ms://AI-ModelScope/LaTeX_OCR') +parser.add_argument('--subset', type=str, default='human_handwrite') +parser.add_argument('--samples', type=int, default=500) +parser.add_argument('--batch_size', type=int, default=None) +parser.add_argument('--max_steps', type=int, default=50) +parser.add_argument('--lr', type=float, default=1e-4) +parser.add_argument('--lora_rank', type=int, default=8) +parser.add_argument('--lora_alpha', type=int, default=32) +parser.add_argument('--lora_dropout', type=float, default=0.05) # swift uses 0.05 by default +parser.add_argument('--max_grad_norm', type=float, default=1.0) +args = parser.parse_args() + +# Initialize twinkle +twinkle.initialize(mode='local') + +logger = get_logger() + +GAS = 4 # gradient accumulation steps + + +def preprocess_latex_ocr(sample: Dict[str, Any]) -> Dict[str, Any]: + """Convert LaTeX_OCR sample to VLM format.""" + image_data = sample.get('image') + formula = sample.get('text', sample.get('formula', '')) + + if image_data is None or not formula: + return {'messages': [], 'images': []} + + # Convert image data to PIL Image + if isinstance(image_data, dict) and 'bytes' in image_data: + img = Image.open(io.BytesIO(image_data['bytes'])) + elif isinstance(image_data, Image.Image): + img = image_data + elif isinstance(image_data, bytes): + img = Image.open(io.BytesIO(image_data)) + else: + return {'messages': [], 'images': []} + + # Create conversation with image placeholder + messages = [ + {'role': 'user', 'content': '\nUsing LaTeX to perform OCR on the image.'}, + {'role': 'assistant', 'content': formula} + ] + + return {'messages': messages, 'images': [img]} + + +def create_dataset(): + """Create VLM dataset with preprocessing.""" + dataset = Dataset( + dataset_meta=DatasetMeta( + dataset_id=args.dataset, + subset_name=args.subset, + split='train', + data_slice=range(args.samples) + ) + ) + + # Apply preprocessing + dataset.dataset = dataset.dataset.map( + preprocess_latex_ocr, + batched=False, + load_from_cache_file=False + ) + + # Filter out invalid samples + dataset.dataset = dataset.dataset.filter( + lambda x: len(x.get('messages', [])) > 0 + ) + + # Set up Qwen3-VL template and encoding + dataset.set_template('Qwen3VLTemplate', model_id=args.model) + dataset.encode(batched=False, load_from_cache_file=False) + + return dataset + + +def train(): + """Main training function.""" + logger.info(f"VLM LoRA Training (Transformers backend)") + logger.info(f"Model: {args.model}") + logger.info(f"Dataset: {args.dataset}/{args.subset} ({args.samples} samples)") + logger.info(f"Batch size: {args.batch_size}") + logger.info(f"Gradient accumulation steps: {GAS}") + logger.info(f"Learning rate: {args.lr}") + logger.info(f"LoRA rank: {args.lora_rank}, alpha: {args.lora_alpha}, dropout: {args.lora_dropout}") + logger.info(f"Max grad norm: {args.max_grad_norm}") + logger.info("=" * 60) + + # Create dataloader + if args.batch_size is None: + args.batch_size = int(os.environ.get('WORLD_SIZE', 4)) + dataloader = DataLoader( + dataset=create_dataset, + batch_size=args.batch_size, + ) + from modelscope import Qwen3VLForConditionalGeneration + + model = TransformersModel( + model_cls=Qwen3VLForConditionalGeneration, + model_id=args.model, + mixed_precision='bf16', + trust_remote_code=True, + ) + + # Configure LoRA + lora_config = LoraConfig( + target_modules=['q_proj', 'k_proj', 'v_proj', 'o_proj', 'gate_proj', 'up_proj', 'down_proj'], + r=args.lora_rank, + lora_alpha=args.lora_alpha, + lora_dropout=args.lora_dropout, + ) + + model.add_adapter_to_model( + 'vlm_lora', + lora_config, + gradient_accumulation_steps=GAS + ) + + # Set up template for VLM + model.set_template('Qwen3VLTemplate', model_id=args.model) + + # Set up processor for input collation + model.set_processor(InputProcessor, padding_side='right') + + # Set up loss + model.set_loss(CrossEntropyLoss) + + # Set up optimizer + model.set_optimizer(AdamW, lr=args.lr) + model.set_lr_scheduler(LinearLR) + + logger.info(get_device_placement()) + logger.info(model.get_train_configs()) + + # Training loop with detailed logging + losses = [] + grad_norms = [] + step_times = [] + total_start_time = time.time() + + for step, batch in enumerate(dataloader): + step_start_time = time.time() + + loss = model.forward_backward(inputs=batch) + grad_norm = model.clip_grad_and_step(max_grad_norm=args.max_grad_norm) + + step_end_time = time.time() + step_time = step_end_time - step_start_time + step_times.append(step_time) + + # Log every GAS steps (when optimizer actually steps) + if (step + 1) % GAS == 0: + optimizer_step = (step + 1) // GAS + loss_value = loss() if callable(loss) else loss + avg_loss = float(loss_value) if loss_value is not None else 0.0 + losses.append(avg_loss) + + # Get grad_norm (might be None if not synced) + grad_norm_value = float(grad_norm) if grad_norm is not None else 0.0 + grad_norms.append(grad_norm_value) + + avg_step_time = np.mean(step_times[-GAS:]) + + logger.info(f'Step {optimizer_step:3d} | loss: {avg_loss:.4f} | grad_norm: {grad_norm_value:.4f} | time: {avg_step_time:.3f}s') + + # Early stop based on optimizer steps + if args.max_steps and (step + 1) >= args.max_steps * GAS: + break + + total_time = time.time() - total_start_time + + # Summary + logger.info("=" * 60) + logger.info("TRAINING SUMMARY") + logger.info("=" * 60) + logger.info(f"Total optimizer steps: {len(losses)}") + logger.info(f"Total time: {total_time:.2f}s") + logger.info(f"Avg time per optimizer step: {total_time / len(losses) if losses else 0:.3f}s") + logger.info(f"Initial loss: {losses[0] if losses else 'N/A':.4f}") + logger.info(f"Final loss: {losses[-1] if losses else 'N/A':.4f}") + logger.info(f"Avg grad_norm: {np.mean(grad_norms) if grad_norms else 'N/A':.4f}") + logger.info(f"Min grad_norm: {np.min(grad_norms) if grad_norms else 'N/A':.4f}") + logger.info(f"Max grad_norm: {np.max(grad_norms) if grad_norms else 'N/A':.4f}") + logger.info("=" * 60) + + # Save model + output_dir = './output/transformers_vlm_lora' + model.save(output_dir) + logger.info(f'Model saved to {output_dir}') + logger.info('VLM LoRA training completed!') + + +if __name__ == '__main__': + train() diff --git a/cookbook/legacy/single_program_full.py b/cookbook/legacy/single_program_full.py new file mode 100644 index 00000000..76c4d4b0 --- /dev/null +++ b/cookbook/legacy/single_program_full.py @@ -0,0 +1,87 @@ +from peft import LoraConfig +import twinkle +import os +from tqdm import tqdm +from twinkle import Platform, DeviceMesh +from twinkle import get_device_placement, get_logger +from twinkle.dataloader import DataLoader +from twinkle.dataset import Dataset, DatasetMeta, LazyDataset, PackingDataset, IterableDataset, IterablePackingDataset +from twinkle.model import TransformersModel +from twinkle.preprocessor import SelfCognitionProcessor +if Platform.get_rank() == 0: + import swanlab + swanlab.login(api_key=os.environ['SWANLAB_API_KEY'], save=True) + + run = swanlab.init( + project="TransformersModel", + ) + +device_mesh = DeviceMesh.from_sizes(dp_size=2, fsdp_size=2) + +twinkle.initialize(mode='local', global_device_mesh=device_mesh) + +logger = get_logger() + + +def eval(model): + dataset = Dataset(dataset_meta=DatasetMeta('ms://swift/self-cognition', data_slice=range(500))) + dataset.set_template('Template', model_id='ms://Qwen/Qwen2.5-7B-Instruct', max_length=512) + dataset.map(SelfCognitionProcessor('twinkle模型', 'twinkle团队')) + dataset.encode(batched=True) + # dataset.pack_dataset() + dataloader = DataLoader(dataset=dataset, batch_size=8) + for step, batch in tqdm(enumerate(dataloader)): + model.forward_only(inputs=batch) + model.calculate_loss() + metrics = model.calculate_metric(is_training=False) + return metrics + +def train(): + dataset = Dataset(dataset_meta=DatasetMeta('ms://swift/self-cognition', data_slice=range(1000))) + dataset.set_template('Template', model_id='ms://Qwen/Qwen2.5-7B-Instruct', max_length=512) + dataset.map(SelfCognitionProcessor('swift-robot', 'swift')) + dataset.encode(batched=True) + # dataset.pack_dataset() + dataloader = DataLoader(dataset=dataset, batch_size=4, num_workers=4) + + model = TransformersModel(model_id='ms://Qwen/Qwen2.5-7B-Instruct') + + lora_config = LoraConfig( + r=8, + lora_alpha=32, + target_modules='all-linear' + ) + + model.set_optimizer('AdamW', lr=1e-4) + model.set_lr_scheduler('CosineWarmupScheduler', num_warmup_steps=5, num_training_steps=len(dataloader)//8) + logger.info(get_device_placement()) + logger.info(model.get_train_configs()) + logger.info(f'Total steps: {len(dataloader)//8}') + loss_metric = 99.0 + for step, batch in enumerate(dataloader): + model.forward_backward(inputs=batch) + model.clip_grad_and_step() + if step % 40 == 0: + metric = model.calculate_metric(is_training=True) + _metrics = {} + for key, value in metric.items(): + try: + value = float(value) + _metrics[key] = value + except: + pass + if Platform.get_rank() == 0: + swanlab.log(_metrics) + logger.info(f'Current is step {step // 8} of {len(dataloader)//8}, metric: {metric}') + #if step > 0 and (step / 4) % 30 == 0: + # metrics = eval(model) + # logger.info(f'Eval metric: {metrics}') + # metrics['step'] = step + # if loss_metric > float(metrics['loss']): + # model.save(f'checkpoint-{step}') + # loss_metric = float(metrics['loss']) + model.save(f'last-checkpoint', interval=1) + + +if __name__ == '__main__': + train() diff --git a/cookbook/legacy/vlm_lora.py b/cookbook/legacy/vlm_lora.py new file mode 100644 index 00000000..8a1647c0 --- /dev/null +++ b/cookbook/legacy/vlm_lora.py @@ -0,0 +1,269 @@ +# Copyright (c) twinkle authors. All rights reserved. +# TODO: test + +"""Megatron-Core VLM (Vision-Language Model) LoRA training example. + +This example demonstrates training Qwen3-VL-8B-Instruct with LoRA using +Megatron-Core backend for efficient multi-GPU training. + +Usage (Local mode): + torchrun --nproc_per_node=2 cookbook/megatron/vlm_lora.py --tp_size 2 + +Usage (with custom model): + torchrun --nproc_per_node=4 cookbook/megatron/vlm_lora.py \ + --tp_size 2 --pp_size 2 \ + --model /path/to/Qwen3-VL-8B-Instruct +""" +import argparse +import io +import os +from typing import Any, Dict + +import numpy as np +import torch +from PIL import Image +from peft import LoraConfig + +import twinkle +from twinkle import DeviceGroup, DeviceMesh, Platform, get_logger, get_device_placement +from twinkle.dataloader import DataLoader +from twinkle.dataset import Dataset, DatasetMeta +from twinkle.model import MegatronModel +from twinkle.processor import InputProcessor +from twinkle.utils.platform import is_last_rank + +# Parse arguments +parser = argparse.ArgumentParser() +parser.add_argument('--mode', type=str, default='local', choices=['local', 'ray']) +parser.add_argument('--tp_size', type=int, default=1) +parser.add_argument('--pp_size', type=int, default=1) +parser.add_argument('--cp_size', type=int, default=1) +parser.add_argument('--num_gpus', type=int, default=2, help='Number of GPUs (Ray mode)') +parser.add_argument('--max_steps', type=int, default=10) +parser.add_argument('--model', type=str, default='Qwen/Qwen3-VL-8B-Instruct') +parser.add_argument('--dataset', type=str, default='ms://AI-ModelScope/LaTeX_OCR') +parser.add_argument('--subset', type=str, default='human_handwrite') +parser.add_argument('--samples', type=int, default=1000) +parser.add_argument('--batch_size', type=int, default=4) +GAS = 4 # gradient accumulation steps +args = parser.parse_args() + +# Set mode before importing twinkle +os.environ['TWINKLE_MODE'] = args.mode + +if args.mode == 'local': + LOCAL_RANK = int(os.environ.get('LOCAL_RANK', '0')) + torch.cuda.set_device(LOCAL_RANK) + +logger = get_logger() + + +def preprocess_latex_ocr(sample: Dict[str, Any]) -> Dict[str, Any]: + """Convert LaTeX_OCR sample to VLM format.""" + image_data = sample.get('image') + formula = sample.get('text', sample.get('formula', '')) + + if image_data is None or not formula: + return {'messages': [], 'images': []} + + # Convert image data to PIL Image + if isinstance(image_data, dict) and 'bytes' in image_data: + img = Image.open(io.BytesIO(image_data['bytes'])) + elif isinstance(image_data, Image.Image): + img = image_data + elif isinstance(image_data, bytes): + img = Image.open(io.BytesIO(image_data)) + else: + return {'messages': [], 'images': []} + + # Create conversation with image placeholder + messages = [ + {'role': 'user', 'content': '\nUsing LaTeX to perform OCR on the image.'}, + {'role': 'assistant', 'content': formula} + ] + + return {'messages': messages, 'images': [img]} + + +def create_dataset(): + """Create VLM dataset with preprocessing.""" + dataset = Dataset( + dataset_meta=DatasetMeta( + dataset_id=args.dataset, + subset_name=args.subset, + split='train', + data_slice=range(args.samples) + ) + ) + + # Apply preprocessing + dataset.dataset = dataset.dataset.map( + preprocess_latex_ocr, + batched=False, + load_from_cache_file=False + ) + + # Filter out invalid samples + dataset.dataset = dataset.dataset.filter( + lambda x: len(x.get('messages', [])) > 0 + ) + + # Set up Qwen3-VL template and encoding + # Note: For VLM models, the template handles image token insertion + dataset.set_template('Qwen3VLTemplate', model_id=args.model) + dataset.encode(batched=False, load_from_cache_file=False) + + return dataset + + +def train(): + """Main training function.""" + TP_SIZE = args.tp_size + PP_SIZE = args.pp_size + CP_SIZE = args.cp_size + + if args.mode == 'local': + WORLD_SIZE = int(os.environ.get('WORLD_SIZE', '1')) + else: + WORLD_SIZE = args.num_gpus + + DP_SIZE = WORLD_SIZE // (TP_SIZE * PP_SIZE * CP_SIZE) + + logger.info(f"VLM LoRA Training: TP={TP_SIZE}, PP={PP_SIZE}, CP={CP_SIZE}, DP={DP_SIZE}") + logger.info(f"Model: {args.model}") + logger.info(f"Dataset: {args.dataset}/{args.subset} ({args.samples} samples)") + + # Device mesh: Use DeviceMesh.from_sizes for proper configuration + device_mesh = DeviceMesh.from_sizes( + dp_size=DP_SIZE, + pp_size=PP_SIZE, + tp_size=TP_SIZE, + cp_size=CP_SIZE, + ) + + GROUP_NAME = 'model' + device_group = [ + DeviceGroup( + name=GROUP_NAME, + ranks=list(range(WORLD_SIZE)), + device_type=Platform.get_platform().device_prefix(), + ) + ] + + twinkle.initialize( + mode=args.mode, + nproc_per_node=WORLD_SIZE, + groups=device_group, + global_device_mesh=device_mesh, + lazy_collect=False, + ) + + # Create dataloader + if args.mode == 'ray': + dataloader = DataLoader( + dataset=create_dataset, + batch_size=args.batch_size, + remote_group=GROUP_NAME, + device_mesh=device_mesh, + ) + else: + dataloader = DataLoader( + dataset=create_dataset, + batch_size=args.batch_size, + device_mesh=device_mesh, + ) + + # Create Megatron model + _remote_args = {} + if args.mode == 'ray': + _remote_args = { + 'remote_group': GROUP_NAME, + 'device_mesh': device_mesh, + } + + model = MegatronModel( + model_id=args.model, + device_mesh=device_mesh, + mixed_precision='bf16', + recompute_granularity='selective', + sequence_parallel=False, # VLM may have variable seq lengths + **_remote_args + ) + + # Configure LoRA + lora_config = LoraConfig( + target_modules='all-linear', + r=8, + lora_alpha=32, + lora_dropout=0.0, + ) + adapter_name = 'vlm_lora' + + model.add_adapter_to_model( + adapter_name, + lora_config, + gradient_accumulation_steps=GAS + ) + + # Set up template for VLM + # The template handles image token insertion for Qwen3-VL + model.set_template('Qwen3VLTemplate', model_id=args.model, adapter_name=adapter_name) + + # Set up processor for input collation + model.set_processor(InputProcessor, padding_side='right', adapter_name=adapter_name) + + # Set up optimizer (use Megatron's default optimizer) + model.set_optimizer('default', lr=1e-4, adapter_name=adapter_name) + model.set_lr_scheduler('default', lr_decay_steps=1000, max_lr=1e-4, adapter_name=adapter_name) + + logger.info(get_device_placement()) + logger.info(model.get_train_configs(adapter_name=adapter_name)) + + # Training loop + losses = [] + for step, batch in enumerate(dataloader): + output = model.forward_backward(inputs=batch, adapter_name=adapter_name) + + if step % GAS == 0: + loss_value = output() if callable(output) else output + avg_loss = float(loss_value) if loss_value is not None else 0.0 + logger.info(f'Step {step // GAS}, loss: {avg_loss:.4f}') + + model.clip_grad_norm(1.0, adapter_name=adapter_name) + model.step(adapter_name=adapter_name) + model.zero_grad(adapter_name=adapter_name) + model.lr_step(adapter_name=adapter_name) + + # Early stop + if args.max_steps and step >= args.max_steps * GAS: + logger.info(f'Reached max_steps ({args.max_steps}), stopping.') + break + + + # Save model + output_dir = './output/megatron_vlm_lora' + model.save(output_dir, adapter_name=adapter_name) + logger.info(f'Model saved to {output_dir}') + logger.info('VLM LoRA training completed!') + + +def cleanup(): + """Clean up distributed resources.""" + import torch.distributed as dist + try: + if dist.is_initialized(): + dist.barrier() + from megatron.core import parallel_state as mpu + if mpu.is_initialized(): + mpu.destroy_model_parallel() + except Exception as e: + logger.warning(f"Error during cleanup: {e}") + if dist.is_initialized(): + dist.destroy_process_group() + + +if __name__ == '__main__': + try: + train() + finally: + cleanup() diff --git a/cookbook/megatron/tp.py b/cookbook/megatron/tp.py new file mode 100644 index 00000000..d018e2a8 --- /dev/null +++ b/cookbook/megatron/tp.py @@ -0,0 +1,101 @@ +import os + +from peft import LoraConfig +from tqdm import tqdm + +import twinkle +from twinkle import DeviceMesh, Platform +from twinkle import get_device_placement, get_logger +from twinkle.dataloader import DataLoader +from twinkle.dataset import Dataset, DatasetMeta +from twinkle.model import MegatronModel +from twinkle.preprocessor import SelfCognitionProcessor + +if Platform.get_rank() == 0 and os.environ.get('SWANLAB_API_KEY'): + # rank0 recording + import swanlab + swanlab.login(api_key=os.environ['SWANLAB_API_KEY'], save=True) + + run = swanlab.init( + project="twinkle", + ) + + +# Construct a device_mesh, tp=pp=cp=2, dp=1 +device_mesh = DeviceMesh.from_sizes(dp_size=1, tp_size=2, pp_size=2, cp_size=2) +# use torchrun mode +twinkle.initialize(mode='local', global_device_mesh=device_mesh) + +logger = get_logger() + + +def eval(model): + # 100 Samples + dataset = Dataset(dataset_meta=DatasetMeta('ms://swift/self-cognition', data_slice=range(100))) + dataset.set_template('Template', model_id='ms://Qwen/Qwen2.5-7B-Instruct') + dataset.map(SelfCognitionProcessor('twinkle大模型', 'ModelScope社区')) + dataset.encode() + dataloader = DataLoader(dataset=dataset, batch_size=1) + for step, batch in tqdm(enumerate(dataloader)): + model.forward_only(inputs=batch) + metrics = model.calculate_metric(is_training=False) + return metrics + + +def train(): + # 1000 samples + dataset = Dataset(dataset_meta=DatasetMeta('ms://swift/self-cognition', data_slice=range(1000))) + # Set template to prepare encoding + dataset.set_template('Template', model_id='ms://Qwen/Qwen2.5-7B-Instruct') + # Preprocess the dataset to standard format + dataset.map(SelfCognitionProcessor('twinkle大模型', 'ModelScope社区')) + # Encode dataset + dataset.encode() + # Global batch size = 1, dp_size = 1 + dataloader = DataLoader(dataset=dataset, batch_size=16) + # Use a MegatronModel + model = MegatronModel(model_id='ms://Qwen/Qwen2.5-7B-Instruct') + + lora_config = LoraConfig( + r=8, + lora_alpha=32, + target_modules='all-linear' + ) + + # Add a lora to model, with name `default` + # Comment this to use full-parameter training + model.add_adapter_to_model('default', lora_config) + # Add Optimizer for lora `default` + model.set_optimizer(optimizer_cls='default', lr=1e-4) + # Add LRScheduler for lora `default` + model.set_lr_scheduler(scheduler_cls='default', lr_warmup_steps=5, lr_decay_steps=len(dataloader)) + logger.info(get_device_placement()) + # Print the training config + logger.info(model.get_train_configs()) + logger.info(f'Total steps: {len(dataloader)}') + loss_metric = 99.0 + # lora: 10G * 8 + # full: 40G * 8 + for step, batch in enumerate(dataloader): + # Do forward and backward + model.forward_backward(inputs=batch) + # Step + model.clip_grad_and_step() + if step % 5 == 0: + # Print metric + metric = model.calculate_metric(is_training=True) + if Platform.get_rank() == 0 and os.environ.get('SWANLAB_API_KEY'): + swanlab.log(metric) + logger.info(f'Current is step {step} of {len(dataloader)}, metric: {metric}') + if step > 0 and step % 20 == 0: + metrics = eval(model) + logger.info(f'Eval metric: {metrics}') + metrics['step'] = step + if loss_metric > float(metrics['loss']): + model.save(f'checkpoint-{step}') + loss_metric = float(metrics['loss']) + model.save(f'last-checkpoint') + + +if __name__ == '__main__': + train() \ No newline at end of file diff --git a/cookbook/megatron/tp.sh b/cookbook/megatron/tp.sh new file mode 100644 index 00000000..ec2492c9 --- /dev/null +++ b/cookbook/megatron/tp.sh @@ -0,0 +1 @@ +CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 torchrun --nproc_per_node=8 ddp.py \ No newline at end of file diff --git a/cookbook/megatron/tp_moe.py b/cookbook/megatron/tp_moe.py new file mode 100644 index 00000000..c56ddbc6 --- /dev/null +++ b/cookbook/megatron/tp_moe.py @@ -0,0 +1,100 @@ +import os + +from peft import LoraConfig +from tqdm import tqdm + +import twinkle +from twinkle import DeviceMesh, Platform +from twinkle import get_device_placement, get_logger +from twinkle.dataloader import DataLoader +from twinkle.dataset import Dataset, DatasetMeta +from twinkle.model import MegatronModel +from twinkle.preprocessor import SelfCognitionProcessor + +if Platform.get_rank() == 0 and os.environ.get('SWANLAB_API_KEY'): + # rank0 recording + import swanlab + swanlab.login(api_key=os.environ['SWANLAB_API_KEY'], save=True) + + run = swanlab.init( + project="twinkle", + ) + + +# Construct a device_mesh, tp=pp=cp=ep=2, dp=1 +device_mesh = DeviceMesh.from_sizes(dp_size=1, tp_size=2, pp_size=2, cp_size=2, ep_size=2) +# use torchrun mode +twinkle.initialize(mode='local', global_device_mesh=device_mesh) + +logger = get_logger() + + +def eval(model): + # 100 Samples + dataset = Dataset(dataset_meta=DatasetMeta('ms://swift/self-cognition', data_slice=range(100))) + dataset.set_template('Template', model_id='ms://Qwen/Qwen3-30B-A3B-Instruct-2507') + dataset.map(SelfCognitionProcessor('twinkle大模型', 'ModelScope社区')) + dataset.encode() + dataloader = DataLoader(dataset=dataset, batch_size=1) + for step, batch in tqdm(enumerate(dataloader)): + model.forward_only(inputs=batch) + metrics = model.calculate_metric(is_training=False) + return metrics + + +def train(): + # 1000 samples + dataset = Dataset(dataset_meta=DatasetMeta('ms://swift/self-cognition', data_slice=range(1000))) + # Set template to prepare encoding + dataset.set_template('Template', model_id='ms://Qwen/Qwen3-30B-A3B-Instruct-2507') + # Preprocess the dataset to standard format + dataset.map(SelfCognitionProcessor('twinkle大模型', 'ModelScope社区')) + # Encode dataset + dataset.encode() + # Global batch size = 1, dp_size = 1 + dataloader = DataLoader(dataset=dataset, batch_size=16) + # Use a MegatronModel + model = MegatronModel(model_id='ms://Qwen/Qwen3-30B-A3B-Instruct-2507') + + lora_config = LoraConfig( + r=8, + lora_alpha=32, + target_modules='all-linear' + ) + + # Add a lora to model, with name `default` + # Comment this to use full-parameter training + model.add_adapter_to_model('default', lora_config) + # Add Optimizer for lora `default` + model.set_optimizer(optimizer_cls='default', lr=1e-4) + # Add LRScheduler for lora `default` + model.set_lr_scheduler(scheduler_cls='default', lr_warmup_steps=5, lr_decay_steps=len(dataloader)) + logger.info(get_device_placement()) + # Print the training config + logger.info(model.get_train_configs()) + logger.info(f'Total steps: {len(dataloader)}') + loss_metric = 99.0 + # lora: 23G * 8 + for step, batch in enumerate(dataloader): + # Do forward and backward + model.forward_backward(inputs=batch) + # Step + model.clip_grad_and_step() + if step % 5 == 0: + # Print metric + metric = model.calculate_metric(is_training=True) + if Platform.get_rank() == 0 and os.environ.get('SWANLAB_API_KEY'): + swanlab.log(metric) + logger.info(f'Current is step {step} of {len(dataloader)}, metric: {metric}') + if step > 0 and step % 20 == 0: + metrics = eval(model) + logger.info(f'Eval metric: {metrics}') + metrics['step'] = step + if loss_metric > float(metrics['loss']): + model.save(f'checkpoint-{step}') + loss_metric = float(metrics['loss']) + model.save(f'last-checkpoint') + + +if __name__ == '__main__': + train() \ No newline at end of file diff --git a/cookbook/megatron/tp_moe.sh b/cookbook/megatron/tp_moe.sh new file mode 100644 index 00000000..bcbde769 --- /dev/null +++ b/cookbook/megatron/tp_moe.sh @@ -0,0 +1 @@ +CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 torchrun --nproc_per_node=8 ddp_moe.py \ No newline at end of file diff --git a/cookbook/ray/run.sh b/cookbook/ray/run.sh new file mode 100644 index 00000000..ddb52ec8 --- /dev/null +++ b/cookbook/ray/run.sh @@ -0,0 +1 @@ +python3 single_controller.py \ No newline at end of file diff --git a/cookbook/ray/single_controller.py b/cookbook/ray/single_controller.py new file mode 100644 index 00000000..078bcbf0 --- /dev/null +++ b/cookbook/ray/single_controller.py @@ -0,0 +1,110 @@ +import os + +from peft import LoraConfig +from tqdm import tqdm + +import twinkle +from twinkle import DeviceMesh, Platform, DeviceGroup +from twinkle import get_device_placement, get_logger +from twinkle.dataloader import DataLoader +from twinkle.dataset import Dataset, DatasetMeta +from twinkle.model import TransformersModel +from twinkle.preprocessor import SelfCognitionProcessor + +if Platform.get_rank() == 0 and os.environ.get('SWANLAB_API_KEY'): + # rank0 recording + import swanlab + swanlab.login(api_key=os.environ['SWANLAB_API_KEY'], save=True) + + run = swanlab.init( + project="twinkle", + ) + + +device_group = [ + DeviceGroup( + name='default', + ranks=8, + device_type='cuda', + ) + ] + +# Construct a device_mesh, fsdp=4, dp=2 +device_mesh = DeviceMesh.from_sizes(fsdp_size=4, dp_size=2) +# use ray mode +twinkle.initialize(mode='ray', groups=device_group, global_device_mesh=device_mesh) + +logger = get_logger() + + +def eval(model): + # 100 Samples + dataset = Dataset(dataset_meta=DatasetMeta('ms://swift/self-cognition', data_slice=range(100))) + dataset.set_template('Template', model_id='ms://Qwen/Qwen2.5-7B-Instruct') + dataset.map(SelfCognitionProcessor('twinkle大模型', 'ModelScope社区')) + dataset.encode() + dataloader = DataLoader(dataset=dataset, batch_size=8, min_batch_size=8) + for step, batch in tqdm(enumerate(dataloader)): + model.forward_only(inputs=batch) + model.calculate_loss() + metrics = model.calculate_metric(is_training=False) + return metrics + + +def train(): + # 1000 samples + dataset = Dataset(dataset_meta=DatasetMeta('ms://swift/self-cognition', data_slice=range(1000))) + # Set template to prepare encoding + dataset.set_template('Template', model_id='ms://Qwen/Qwen2.5-7B-Instruct') + # Preprocess the dataset to standard format + dataset.map(SelfCognitionProcessor('twinkle大模型', 'ModelScope社区')) + # Encode dataset + dataset.encode() + # Global batch size = 8, for GPUs, so 1 sample per GPU + dataloader = DataLoader(dataset=dataset, batch_size=8, min_batch_size=8) + # Use a TransformersModel + model = TransformersModel(model_id='ms://Qwen/Qwen2.5-7B-Instruct', remote_group='default') + + lora_config = LoraConfig( + r=8, + lora_alpha=32, + target_modules='all-linear' + ) + + # Add a lora to model, with name `default` + # Comment this to use full-parameter training + model.add_adapter_to_model('default', lora_config, gradient_accumulation_steps=2) + # Add Optimizer for lora `default` + model.set_optimizer(optimizer_cls='AdamW', lr=1e-4) + # Add LRScheduler for lora `default` + model.set_lr_scheduler(scheduler_cls='CosineWarmupScheduler', num_warmup_steps=5, num_training_steps=len(dataloader)) + logger.info(get_device_placement()) + # Print the training config + logger.info(model.get_train_configs()) + logger.info(f'Total steps: {len(dataloader)}') + loss_metric = 99.0 + # lora: 18G * 4 + # full: 50G * 4 + for step, batch in enumerate(dataloader): + # Do forward and backward + model.forward_backward(inputs=batch) + # Step + model.clip_grad_and_step() + if step % 20 == 0: + # Print metric + metric = model.calculate_metric(is_training=True) + if Platform.get_rank() == 0 and os.environ.get('SWANLAB_API_KEY'): + swanlab.log(metric) + logger.info(f'Current is step {step} of {len(dataloader)}, metric: {metric}') + if step > 0 and step % 40 == 0: + metrics = eval(model) + logger.info(f'Eval metric: {metrics}') + metrics['step'] = step + if loss_metric > float(metrics['loss']): + model.save(f'checkpoint-{step}') + loss_metric = float(metrics['loss']) + model.save(f'last-checkpoint') + + +if __name__ == '__main__': + train() \ No newline at end of file diff --git a/cookbook/transformers/ep_fsdp_qwen3_moe.py b/cookbook/transformers/ep_fsdp_qwen3_moe.py new file mode 100644 index 00000000..ac56159f --- /dev/null +++ b/cookbook/transformers/ep_fsdp_qwen3_moe.py @@ -0,0 +1,99 @@ +# Copyright (c) ModelScope Contributors. All rights reserved. +import os + +import numpy as np +from transformers import AutoConfig + +import twinkle +from twinkle import DeviceMesh, Platform, get_device_placement, get_logger +from twinkle.dataloader import DataLoader +from twinkle.dataset import Dataset, DatasetMeta +from twinkle.model import TransformersModel +from twinkle.preprocessor import SelfCognitionProcessor + +logger = get_logger() + +MODEL_ID = os.environ.get( + 'QWEN3_MODEL_ID', 'ms://Qwen/Qwen3-30B-A3B-Instruct-2507') +DATASET_ID = os.environ.get( + 'DATASET_ID', 'ms://swift/self-cognition') +TEMPLATE_ID = os.environ.get('TEMPLATE_ID', 'Template') +_num_layers_env = os.environ.get('NUM_LAYERS') +NUM_LAYERS = int(_num_layers_env) if _num_layers_env is not None else None + +# 4 gpus, dp=2, ep=2 +dp_size = 2 +ep_size = 2 + +device_mesh = DeviceMesh( + device_type=Platform.get_platform().device_prefix(), + mesh=np.arange(dp_size * ep_size).reshape(dp_size, ep_size), + mesh_dim_names=("dp", "ep"), +) + +twinkle.initialize( + mode='local', + global_device_mesh=device_mesh, +) + + +def train(): + config = AutoConfig.from_pretrained(MODEL_ID, trust_remote_code=True) + if NUM_LAYERS is not None and hasattr(config, "num_hidden_layers"): + config.num_hidden_layers = NUM_LAYERS + if hasattr(config, "use_cache"): + config.use_cache = False + + dataset = Dataset(dataset_meta=DatasetMeta( + 'ms://swift/self-cognition', data_slice=range(1000))) + try: + dataset.set_template(TEMPLATE_ID, model_id=MODEL_ID) + except ValueError: + dataset.set_template('Template', model_id=MODEL_ID) + + dataset.map(SelfCognitionProcessor('twinkle大模型', 'ModelScope社区')) + dataset.encode(batched=True) + dataloader = DataLoader( + dataset=dataset, + batch_size=4, + device_mesh=device_mesh, + ) + + grad_accum_steps = 4 + model = TransformersModel( + model_id=MODEL_ID, + config=config, + device_mesh=device_mesh, + fsdp_config={ + "expert_parallel": { + "enabled": True, + "router_dtype": "fp32", + "all_to_all": "torch", + "keep_router_logits": False, + } + }, + ) + # Disable foreach to avoid DTensor mixed-type errors in EP runs. + model.set_optimizer('AdamW', foreach=False) + + logger.info(get_device_placement()) + logger.info(model.get_train_configs()) + + for step, batch in enumerate(dataloader): + if callable(batch): + batch = batch() + model.forward_backward( + inputs=batch, gradient_accumulation_steps=grad_accum_steps) + model.clip_grad_and_step(gradient_accumulation_steps=grad_accum_steps) + if step % grad_accum_steps == 0: + metric = model.calculate_metric(is_training=True) + if callable(metric): + metric = metric() + logger.info( + f'Current is step {step // grad_accum_steps}, metric: {metric}') + if step > 0 and step % 50 == 0: + model.save('./output') + + +if __name__ == '__main__': + train() diff --git a/cookbook/transformers/ep_fsdp_qwen3_moe.sh b/cookbook/transformers/ep_fsdp_qwen3_moe.sh new file mode 100644 index 00000000..cfc8a7cf --- /dev/null +++ b/cookbook/transformers/ep_fsdp_qwen3_moe.sh @@ -0,0 +1,7 @@ +# EP + FSDP2 (Transformers MoE) example. +# With expert_parallel enabled, expert parameters are sharded across the EP dimension. +# Non-expert parameters are sharded by FSDP (across world_size). +# Officially validated scope: qwen3_moe_like models (for example, Qwen3-30B-A3B). +# Other MoE models may work if their MoE blocks expose: `experts` + `gate/router` + `top_k` (or `num_experts_per_tok`). +# EP runtime constraints: `num_experts % ep_world_size == 0`. +CUDA_VISIBLE_DEVICES=0,1,2,3 torchrun --nproc_per_node=4 ep_fsdp_qwen3_moe.py diff --git a/cookbook/transformers/fsdp2.py b/cookbook/transformers/fsdp2.py new file mode 100644 index 00000000..af4a00bd --- /dev/null +++ b/cookbook/transformers/fsdp2.py @@ -0,0 +1,102 @@ +import os + +from peft import LoraConfig +from tqdm import tqdm + +import twinkle +from twinkle import DeviceMesh, Platform +from twinkle import get_device_placement, get_logger +from twinkle.dataloader import DataLoader +from twinkle.dataset import Dataset, DatasetMeta +from twinkle.model import TransformersModel +from twinkle.preprocessor import SelfCognitionProcessor + +if Platform.get_rank() == 0 and os.environ.get('SWANLAB_API_KEY'): + # rank0 recording + import swanlab + swanlab.login(api_key=os.environ['SWANLAB_API_KEY'], save=True) + + run = swanlab.init( + project="twinkle", + ) + + +# Construct a device_mesh, fsdp=4, dp=2 +device_mesh = DeviceMesh.from_sizes(fsdp_size=4, dp_size=2) +# use torchrun mode +twinkle.initialize(mode='local', global_device_mesh=device_mesh) + +logger = get_logger() + + +def eval(model): + # 100 Samples + dataset = Dataset(dataset_meta=DatasetMeta('ms://swift/self-cognition', data_slice=range(100))) + dataset.set_template('Template', model_id='ms://Qwen/Qwen2.5-7B-Instruct') + dataset.map(SelfCognitionProcessor('twinkle大模型', 'ModelScope社区')) + dataset.encode() + dataloader = DataLoader(dataset=dataset, batch_size=8) + for step, batch in tqdm(enumerate(dataloader)): + model.forward_only(inputs=batch) + model.calculate_loss() + metrics = model.calculate_metric(is_training=False) + return metrics + + +def train(): + # 1000 samples + dataset = Dataset(dataset_meta=DatasetMeta('ms://swift/self-cognition', data_slice=range(1000))) + # Set template to prepare encoding + dataset.set_template('Template', model_id='ms://Qwen/Qwen2.5-7B-Instruct') + # Preprocess the dataset to standard format + dataset.map(SelfCognitionProcessor('twinkle大模型', 'ModelScope社区')) + # Encode dataset + dataset.encode() + # Global batch size = 8, for GPUs, so 1 sample per GPU + dataloader = DataLoader(dataset=dataset, batch_size=8) + # Use a TransformersModel + model = TransformersModel(model_id='ms://Qwen/Qwen2.5-7B-Instruct') + + lora_config = LoraConfig( + r=8, + lora_alpha=32, + target_modules='all-linear' + ) + + # Add a lora to model, with name `default` + # Comment this to use full-parameter training + model.add_adapter_to_model('default', lora_config, gradient_accumulation_steps=2) + # Add Optimizer for lora `default` + model.set_optimizer(optimizer_cls='AdamW', lr=1e-4) + # Add LRScheduler for lora `default` + model.set_lr_scheduler(scheduler_cls='CosineWarmupScheduler', num_warmup_steps=5, num_training_steps=len(dataloader)) + logger.info(get_device_placement()) + # Print the training config + logger.info(model.get_train_configs()) + logger.info(f'Total steps: {len(dataloader)}') + loss_metric = 99.0 + # lora: 18G * 4 + # full: 50G * 4 + for step, batch in enumerate(dataloader): + # Do forward and backward + model.forward_backward(inputs=batch) + # Step + model.clip_grad_and_step() + if step % 20 == 0: + # Print metric + metric = model.calculate_metric(is_training=True) + if Platform.get_rank() == 0 and os.environ.get('SWANLAB_API_KEY'): + swanlab.log(metric) + logger.info(f'Current is step {step} of {len(dataloader)}, metric: {metric}') + if step > 0 and step % 40 == 0: + metrics = eval(model) + logger.info(f'Eval metric: {metrics}') + metrics['step'] = step + if loss_metric > float(metrics['loss']): + model.save(f'checkpoint-{step}') + loss_metric = float(metrics['loss']) + model.save(f'last-checkpoint') + + +if __name__ == '__main__': + train() \ No newline at end of file diff --git a/cookbook/transformers/fsdp2.sh b/cookbook/transformers/fsdp2.sh new file mode 100644 index 00000000..57950493 --- /dev/null +++ b/cookbook/transformers/fsdp2.sh @@ -0,0 +1 @@ +CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 torchrun --nproc_per_node=8 fsdp2.py \ No newline at end of file diff --git a/cookbook/transformers/fsdp2_moe.py b/cookbook/transformers/fsdp2_moe.py new file mode 100644 index 00000000..3de5cdd5 --- /dev/null +++ b/cookbook/transformers/fsdp2_moe.py @@ -0,0 +1,102 @@ +import os + +from peft import LoraConfig +from tqdm import tqdm + +import twinkle +from twinkle import DeviceMesh, Platform +from twinkle import get_device_placement, get_logger +from twinkle.dataloader import DataLoader +from twinkle.dataset import Dataset, DatasetMeta +from twinkle.model import TransformersModel +from twinkle.preprocessor import SelfCognitionProcessor + +if Platform.get_rank() == 0 and os.environ.get('SWANLAB_API_KEY'): + # rank0 recording + import swanlab + swanlab.login(api_key=os.environ['SWANLAB_API_KEY'], save=True) + + run = swanlab.init( + project="twinkle", + ) + + +# Construct a device_mesh, fsdp=4, dp=2 +device_mesh = DeviceMesh.from_sizes(fsdp_size=4, dp_size=2) +# use torchrun mode +twinkle.initialize(mode='local', global_device_mesh=device_mesh) + +logger = get_logger() + + +def eval(model): + # 100 Samples + dataset = Dataset(dataset_meta=DatasetMeta('ms://swift/self-cognition', data_slice=range(100))) + dataset.set_template('Template', model_id='ms://Qwen/Qwen3-30B-A3B-Instruct-2507') + dataset.map(SelfCognitionProcessor('twinkle大模型', 'ModelScope社区')) + dataset.encode() + dataloader = DataLoader(dataset=dataset, batch_size=4) + for step, batch in tqdm(enumerate(dataloader)): + model.forward_only(inputs=batch) + model.calculate_loss() + metrics = model.calculate_metric(is_training=False) + return metrics + + +def train(): + # 1000 samples + dataset = Dataset(dataset_meta=DatasetMeta('ms://swift/self-cognition', data_slice=range(1000))) + # Set template to prepare encoding + dataset.set_template('Template', model_id='ms://Qwen/Qwen3-30B-A3B-Instruct-2507') + # Preprocess the dataset to standard format + dataset.map(SelfCognitionProcessor('twinkle大模型', 'ModelScope社区')) + # Encode dataset + dataset.encode() + # Global batch size = 4, for GPUs, so 1 sample per GPU + dataloader = DataLoader(dataset=dataset, batch_size=8) + # Use a TransformersModel, transformer_cls_names_to_wrap=Qwen3MoeSparseMoeBlock to avoid hang of fsdp2 + model = TransformersModel(model_id='ms://Qwen/Qwen3-30B-A3B-Instruct-2507', fsdp_config={'transformer_cls_names_to_wrap':["Qwen3MoeSparseMoeBlock"]}) + # Patch MoE model to fix the hang bug, support transformers==4.* + model.apply_patch('ms://twinkle-kit/qwen3_moe_transformers4_patch') + lora_config = LoraConfig( + r=8, + lora_alpha=32, + target_modules='all-linear' + ) + + # Add a lora to model, with name `default` + # Comment this to use full-parameter training + model.add_adapter_to_model('default', lora_config, gradient_accumulation_steps=2) + # Add Optimizer for lora `default` + model.set_optimizer(optimizer_cls='AdamW', lr=1e-4) + # Add LRScheduler for lora `default` + model.set_lr_scheduler(scheduler_cls='CosineWarmupScheduler', num_warmup_steps=5, num_training_steps=len(dataloader)) + logger.info(get_device_placement()) + # Print the training config + logger.info(model.get_train_configs()) + logger.info(f'Total steps: {len(dataloader)}') + loss_metric = 99.0 + # lora: 34G * 8 + for step, batch in enumerate(dataloader): + # Do forward and backward + model.forward_backward(inputs=batch) + # Step + model.clip_grad_and_step() + if step % 20 == 0: + # Print metric + metric = model.calculate_metric(is_training=True) + if Platform.get_rank() == 0 and os.environ.get('SWANLAB_API_KEY'): + swanlab.log(metric) + logger.info(f'Current is step {step} of {len(dataloader)}, metric: {metric}') + if step > 0 and step % 40 == 0: + metrics = eval(model) + logger.info(f'Eval metric: {metrics}') + metrics['step'] = step + if loss_metric > float(metrics['loss']): + model.save(f'checkpoint-{step}') + loss_metric = float(metrics['loss']) + model.save(f'last-checkpoint') + + +if __name__ == '__main__': + train() \ No newline at end of file diff --git a/cookbook/transformers/fsdp2_moe.sh b/cookbook/transformers/fsdp2_moe.sh new file mode 100644 index 00000000..b8c56aa0 --- /dev/null +++ b/cookbook/transformers/fsdp2_moe.sh @@ -0,0 +1 @@ +CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 torchrun --nproc_per_node=8 fsdp2_moe.py \ No newline at end of file diff --git a/cookbook/transformers/sp_fsdp_dense.py b/cookbook/transformers/sp_fsdp_dense.py new file mode 100644 index 00000000..99c2e4ec --- /dev/null +++ b/cookbook/transformers/sp_fsdp_dense.py @@ -0,0 +1,99 @@ +from functools import partial +import numpy as np +from peft import LoraConfig + +import twinkle +from twinkle import get_logger, DeviceGroup, Platform, DeviceMesh +from twinkle.dataloader import DataLoader +from twinkle.dataset import Dataset, DatasetMeta +from twinkle.model import TransformersModel +from twinkle.preprocessor import SelfCognitionProcessor + +logger = get_logger() +MODEL_ID = 'ms://Qwen/Qwen2.5-7B-Instruct' +DATASETS='ms://swift/self-cognition' + +device_group = [ + DeviceGroup( + name="default", + ranks=[0, 1, 2, 3], + device_type=Platform.get_platform().device_prefix(), + ) +] + +# FSDP + SP validation over 4 GPUs: dp=2, fsdp=2 (SP only affects input slicing) +device_mesh = DeviceMesh( + device_type="cuda", + mesh=np.arange(4).reshape(2, 2), + mesh_dim_names=("dp", "fsdp"), + ulysses_size=2, +) + +twinkle.initialize( + mode="local", + nproc_per_node=4, + global_device_mesh=device_mesh, + lazy_collect=False, +) + +def eval(model): + dataloader = DataLoader( + dataset=partial(create_dataset, data_slice=range(100)), + batch_size=4, + device_mesh=device_mesh, + ) + for _, batch in enumerate(dataloader): + model.forward_only(inputs=batch, adapter_name="default") + model.calculate_loss(adapter_name="default") + return model.calculate_metric(is_training=False, adapter_name="default") + + +def create_dataset(data_slice=None): + dataset = Dataset( + dataset_meta=DatasetMeta(DATASETS, data_slice=range(500)) + ) + dataset.set_template( + "Template", + model_id=MODEL_ID + ) + dataset.map(SelfCognitionProcessor("twinkle模型", "twinkle团队")) + dataset.encode(batched=True) + return dataset +def train(): + dataloader = DataLoader( + dataset=partial(create_dataset, data_slice=None), + batch_size=8, + device_mesh=device_mesh, + ) + + model = TransformersModel( + model_id=MODEL_ID, + device_mesh=device_mesh, + strategy="native_fsdp", + ) + + lora_config = LoraConfig(target_modules="all-linear") + model.add_adapter_to_model("default", lora_config, gradient_accumulation_steps=1) + model.set_optimizer("AdamW", lr=1e-4, adapter_name="default") + model.set_lr_scheduler( + scheduler_cls="CosineWarmupScheduler", + num_warmup_steps=5, + num_training_steps=len(dataloader), + adapter_name="default", + ) + + logger.info(model.get_train_configs(adapter_name="default")) + logger.info(f"Total steps: {len(dataloader)}") + + + for step, batch in enumerate(dataloader): + model.forward_backward(inputs=batch, adapter_name="default") + model.clip_grad_and_step(adapter_name="default") + if step % 20 == 0: + metric = model.calculate_metric(is_training=True, adapter_name="default") + logger.info(f"Current is step {step} of {len(dataloader)}, metric: {metric}") + model.save("last-checkpoint", interval=1) + + +if __name__ == "__main__": + train() \ No newline at end of file diff --git a/cookbook/transformers/sp_fsdp_dense.sh b/cookbook/transformers/sp_fsdp_dense.sh new file mode 100644 index 00000000..9603780e --- /dev/null +++ b/cookbook/transformers/sp_fsdp_dense.sh @@ -0,0 +1,10 @@ +#!/bin/bash +# To enabele sequence parallelism, please set ulysses_size > 1 +# device_mesh = DeviceMesh( +# device_type="cuda", +# mesh=np.arange(4).reshape(2, 2), +# mesh_dim_names=("dp", "fsdp"), +# ulysses_size=2, +# ) +# +CUDA_VISIBLE_DEVICES=0,1,2,3 torchrun --nproc_per_node=4 sp_fsdp_dense.py \ No newline at end of file diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 00000000..d0c3cbf1 --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 00000000..8ccd292e --- /dev/null +++ b/docs/README.md @@ -0,0 +1,37 @@ +## maintain docs +1. build docs + ```shell + # in root directory: + make docs + ``` + +2. doc string format + + We adopt the google style docstring format as the standard, please refer to the following documents. + 1. Google Python style guide docstring [link](http://google.github.io/styleguide/pyguide.html#381-docstrings) + 2. Google docstring example [link](https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html) + 3. sample:torch.nn.modules.conv [link](https://pytorch.org/docs/stable/_modules/torch/nn/modules/conv.html#Conv1d) + 4. load function as an example: + + ```python + def load(file, file_format=None, **kwargs): + """Load data from json/yaml/pickle files. + + This method provides a unified api for loading data from serialized files. + + Args: + file (str or :obj:`Path` or file-like object): Filename or a file-like + object. + file_format (str, optional): If not specified, the file format will be + inferred from the file extension, otherwise use the specified one. + Currently supported formats include "json", "yaml/yml". + + Examples: + >>> load('/path/of/your/file') # file is stored in disk + >>> load('https://path/of/your/file') # file is stored on internet + >>> load('oss://path/of/your/file') # file is stored in petrel + + Returns: + The content from the file. + """ + ``` diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 00000000..9534b018 --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=build + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/source/.readthedocs.yaml b/docs/source/.readthedocs.yaml new file mode 100644 index 00000000..245cf63d --- /dev/null +++ b/docs/source/.readthedocs.yaml @@ -0,0 +1,15 @@ +# .readthedocs.yaml +version: 2 + +build: + os: ubuntu-22.04 + tools: + python: "3.11" + jobs: + pre_install: + - pip install poetry + - poetry config virtualenvs.create false + - poetry install --only docs --no-interaction --no-ansi + +sphinx: + configuration: docs/source/conf.py \ No newline at end of file diff --git a/docs/source/_templates/autosummary/class.rst b/docs/source/_templates/autosummary/class.rst new file mode 100644 index 00000000..b9aade44 --- /dev/null +++ b/docs/source/_templates/autosummary/class.rst @@ -0,0 +1,10 @@ +.. currentmodule:: {{ module }} + + +{{ name | underline}} + +.. autoclass:: {{ name }} + :inherited-members: + :members: + +.. autogenerated from source/_templates/autosummary/class.rst diff --git a/docs/source/_templates/classtemplate.rst b/docs/source/_templates/classtemplate.rst new file mode 100644 index 00000000..d3ea0e59 --- /dev/null +++ b/docs/source/_templates/classtemplate.rst @@ -0,0 +1,12 @@ +.. currentmodule:: {{ module }} + + +{{ name | underline}} + +.. autoclass:: {{ name }} + :members: + :special-members: __init__, __call__ + +.. + autogenerated from source/_templates/classtemplate.rst + note it does not have :inherited-members: diff --git a/docs/source/_templates/sobolengine.rst b/docs/source/_templates/sobolengine.rst new file mode 100644 index 00000000..e732eecc --- /dev/null +++ b/docs/source/_templates/sobolengine.rst @@ -0,0 +1,14 @@ +.. currentmodule:: {{ module }} + + +{{ name | underline}} + +.. autoclass:: {{ name }} + :members: + :exclude-members: MAXBIT, MAXDIM + :undoc-members: + + +.. + autogenerated from source/_templates/sobolengine.rst + note it has specific options diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 00000000..875bcee4 --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,123 @@ +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +import os +import sys + +# import sphinx_book_theme + +sys.path.insert(0, os.path.abspath('../../src')) +# -- Project information ----------------------------------------------------- + +project = 'twinkle' +copyright = '2022-2025, Alibaba ModelScope' +author = 'ModelScope Authors' +version_file = '../../src/twinkle/version.py' +html_theme = 'sphinx_rtd_theme' +language = 'zh_CN' + + +def get_version(): + with open(version_file, 'r', encoding='utf-8') as f: + exec(compile(f.read(), version_file, 'exec')) + return locals()['__version__'] + + +# The full version, including alpha/beta/rc tags +version = get_version() +release = version + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + 'sphinx.ext.napoleon', + 'sphinx.ext.autosummary', + 'sphinx.ext.autodoc', + 'sphinx.ext.viewcode', + 'sphinx_markdown_tables', + 'sphinx_copybutton', + 'myst_parser', +] + +# build the templated autosummary files +autosummary_generate = True +numpydoc_show_class_members = False + +# Enable overriding of function signatures in the first line of the docstring. +autodoc_docstring_signature = True + +# Disable docstring inheritance +autodoc_inherit_docstrings = False + +# Show type hints in the description +autodoc_typehints = 'description' + +# Add parameter types if the parameter is documented in the docstring +autodoc_typehints_description_target = 'documented_params' + +autodoc_default_options = { + 'member-order': 'bysource', +} + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +# +source_suffix = ['.rst', '.md'] + +# The master toctree document. +root_doc = 'index' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = ['build', 'source/.ipynb_checkpoints', 'source/api/generated', 'Thumbs.db', '.DS_Store'] +# A list of glob-style patterns [1] that are used to find source files. +# They are matched against the source file names relative to the source directory, +# using slashes as directory separators on all platforms. +# The default is **, meaning that all files are recursively included from the source directory. +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +# html_theme = 'sphinx_book_theme' +# html_theme_path = [sphinx_book_theme.get_html_theme_path()] +# html_theme_options = {} + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] +# html_css_files = ['css/readthedocs.css'] + +# -- Options for HTMLHelp output --------------------------------------------- +# Output file base name for HTML help builder. + +# -- Extension configuration ------------------------------------------------- +# Ignore >>> when copying code +copybutton_prompt_text = r'>>> |\.\.\. ' +copybutton_prompt_is_regexp = True + +# Example configuration for intersphinx: refer to the Python standard library. +intersphinx_mapping = {'https://docs.python.org/': None} + +myst_enable_extensions = [ + 'amsmath', + 'dollarmath', + 'colon_fence', +] diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 00000000..e4a93fc4 --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,44 @@ +.. twinkle documentation file, + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Twinkle DOCUMENTATION +======================== + +.. toctree:: + :maxdepth: 2 + :caption: 使用指引 + + 使用指引/快速开始.md + 使用指引/安装.md + 使用指引/服务端和客户端/index.rst + 使用指引/NPU的支持.md + +.. toctree:: + :maxdepth: 2 + :caption: 组件 + + 组件/数据集/index.rst + 组件/数据格式/index.rst + 组件/模板/index.rst + 组件/预处理器和过滤器/index.rst + 组件/数据加载/index.rst + 组件/任务处理器/index.rst + 组件/模型/index.rst + 组件/采样器/index.rst + 组件/奖励/index.rst + 组件/优势/index.rst + 组件/检查点引擎/index.rst + 组件/指标/index.rst + 组件/损失/index.rst + 组件/LRScheduler/index.rst + 组件/补丁/index.rst + 组件/组件化/index.rst + 组件/Kernel/index.rst + 组件/训练中间件/index.rst + +Indices and tables +================== +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git "a/docs/source/\344\275\277\347\224\250\346\214\207\345\274\225/NPU\347\232\204\346\224\257\346\214\201.md" "b/docs/source/\344\275\277\347\224\250\346\214\207\345\274\225/NPU\347\232\204\346\224\257\346\214\201.md" new file mode 100644 index 00000000..3241dbf5 --- /dev/null +++ "b/docs/source/\344\275\277\347\224\250\346\214\207\345\274\225/NPU\347\232\204\346\224\257\346\214\201.md" @@ -0,0 +1,310 @@ +# NPU(昇腾)开箱指南 + +本文档介绍如何在华为昇腾 NPU 环境下安装和使用 Twinkle 框架。 + +## 环境要求 + +在开始之前,请确保您的系统满足以下要求: + +| 组件 | 版本要求 | 说明 | +|------|---------|------| +| Python | >= 3.11, < 3.13 | Twinkle 框架要求 | +| 昇腾固件驱动(HDK) | 推荐最新版本 | 硬件驱动和固件 | +| CANN 工具包 | 8.3.RC1 或更高 | 异构计算架构 | +| PyTorch | 2.7.1 | 深度学习框架 | +| torch_npu | 2.7.1 | 昇腾 PyTorch 适配插件 | + +**重要说明**: +- torch 和 torch_npu 版本**必须完全一致**(例如都为 2.7.1) +- 推荐使用 Python 3.11 以获得最佳兼容性 +- CANN 工具包需要约 10GB+ 磁盘空间 + +## 支持的硬件 + +Twinkle 当前支持以下昇腾 NPU 设备: + +- 昇腾 910 系列 +- 其他兼容的昇腾加速卡 + +## 安装步骤 + +### 1. 安装 NPU 环境(驱动、CANN、torch_npu) + +NPU 环境的安装包括昇腾驱动、CANN 工具包、PyTorch 和 torch_npu。 + +**📖 完整安装教程**:[torch_npu 官方安装指南](https://gitcode.com/Ascend/pytorch/overview) + +该文档包含: +- 昇腾驱动(HDK)安装步骤 +- CANN 工具包安装步骤 +- PyTorch 和 torch_npu 安装步骤 +- 版本配套说明 + +**推荐版本配置**: +- Python: 3.11 +- PyTorch: 2.7.1 +- torch_npu: 2.7.1 +- CANN: 8.3.RC1 或更高 + +### 2. 安装 Twinkle + +NPU 环境配置完成后,从源码安装 Twinkle 框架: + +```bash +git clone https://github.com/modelscope/twinkle.git +cd twinkle +pip install -e ".[transformers,ray]" +``` + +### 3. 安装 vLLM 和 vLLM-Ascend(可选) + +如果需要使用 vLLMSampler 进行高效推理,可以安装 vLLM 和 vLLM-Ascend。 + +**安装步骤**: + +```bash +# 第一步:安装 vLLM +pip install vllm==0.11.0 + +# 第二步:安装 vLLM-Ascend +pip install vllm-ascend==0.11.0rc3 +``` + +**注意事项**: +- 按照上述顺序安装,忽略可能的依赖冲突提示 +- 安装前确保已激活 CANN 环境:`source /usr/local/Ascend/ascend-toolkit/set_env.sh` +- 推荐使用的版本为 vLLM 0.11.0 和 vLLM-Ascend 0.11.0rc3 + +### 4. 验证安装 + +创建测试脚本 `verify_npu.py`: + +```python +import torch +import torch_npu + +print(f"PyTorch version: {torch.__version__}") +print(f"torch_npu version: {torch_npu.__version__}") +print(f"NPU available: {torch.npu.is_available()}") +print(f"NPU device count: {torch.npu.device_count()}") + +if torch.npu.is_available(): + print(f"Current NPU device: {torch.npu.current_device()}") + print(f"NPU device name: {torch.npu.get_device_name(0)}") + + # 简单测试 + x = torch.randn(3, 3).npu() + y = torch.randn(3, 3).npu() + z = x + y + print(f"NPU computation test passed: {z.shape}") +``` + +运行验证: + +```bash +python verify_npu.py +``` + +如果输出显示 `NPU available: True` 且没有报错,说明安装成功! + +**注意**:目前 Twinkle 暂未提供 NPU 的 Docker 镜像,建议使用手动安装方式。如需容器化部署,请参考昇腾社区的官方镜像。 + +## 快速开始 + +**重要提示**:以下示例均来自 `cookbook/` 目录,已在实际 NPU 环境中验证通过。建议直接运行 cookbook 中的脚本,而不是复制粘贴代码片段。 + +### SFT LoRA 微调 + +已验证的 4 卡 DP+FSDP 训练示例: + +**示例路径**:[cookbook/sft/lora_npu.py](https://github.com/modelscope/twinkle/blob/main/cookbook/sft/lora_npu.py) + +**运行方式**: +```bash +# 指定使用 4 张 NPU 卡 +export ASCEND_RT_VISIBLE_DEVICES=0,1,2,3 + +# 运行训练 +python cookbook/sft/lora_npu.py +``` + +**示例特性**: +- ✅ Ray 分布式模式 +- ✅ DP + FSDP 混合并行(2x2) +- ✅ LoRA 微调 +- ✅ 完整的数据加载和训练循环 + +### GRPO 强化学习训练 + +已验证的多卡 GRPO 训练示例: + +**示例路径**:[cookbook/grpo/lora_npu.py](https://github.com/modelscope/twinkle/blob/main/cookbook/grpo/lora_npu.py) + +**运行方式**: +```bash +# 指定使用 8 张 NPU 卡 +export ASCEND_RT_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 + +# 运行训练 +python cookbook/grpo/lora_npu.py +``` + +**示例特性**: +- ✅ Actor-Critic 架构 +- ✅ 支持 Reference Model +- ✅ 可选 TorchSampler 或 vLLMSampler +- ✅ 完整的 RL 训练流程 + +### 更多示例 + +查看 `cookbook/remote/tinker/ascend/` 目录了解远程训练服务端配置。 + +## 并行策略 + +Twinkle 在 NPU 上目前支持以下**经过验证**的并行策略: + +| 并行类型 | 说明 | NPU 支持 | 验证状态 | +|---------|------|---------|---------| +| DP (Data Parallel) | 数据并行 | ✅ | 已验证(见 cookbook/sft/lora_npu.py) | +| FSDP (Fully Sharded Data Parallel) | 完全分片数据并行 | ✅ | 已验证(见 cookbook/sft/lora_npu.py) | +| TP (Tensor Parallel) | 张量并行(Megatron) | 🚧 | 待验证 | +| PP (Pipeline Parallel) | 流水线并行(Megatron) | 🚧 | 待验证 | +| CP (Context Parallel) | 上下文并行 | 🚧 | 待验证 | +| EP (Expert Parallel) | 专家并行(MoE) | 🚧 | 待验证 | + +**图例说明**: +- ✅ 已验证:有实际运行示例代码 +- 🚧 待验证:理论上支持但暂无 NPU 验证示例 +- ❌ 不支持:当前版本不可用 + +### DP + FSDP 示例 + +以下示例来自 `cookbook/sft/lora_npu.py`,在实际 NPU 环境中验证通过: + +```python +import numpy as np +from twinkle import DeviceMesh + +# 4 卡:DP=2, FSDP=2 +device_mesh = DeviceMesh( + device_type='npu', + mesh=np.array([[0, 1], [2, 3]]), + mesh_dim_names=('dp', 'fsdp') +) +``` + +**注意**:Megatron 后端(TP/PP/EP)在 NPU 上的支持正在开发中,暂无可用示例。如需使用这些高级并行策略,请先在 GPU 环境下验证,或关注项目更新。 + +## 常见问题 + +### 1. torch_npu 版本不匹配 + +**问题**:安装 torch_npu 后出现版本不兼容警告或错误。 + +**解决方案**: +- 确保 torch 和 torch_npu 版本完全一致 +- 检查 CANN 版本是否与 torch_npu 兼容 + +```bash +# 查看当前版本 +python -c "import torch; import torch_npu; print(torch.__version__, torch_npu.__version__)" + +# 重新安装匹配版本 +pip uninstall torch torch_npu -y +pip install torch==2.7.1 +pip install torch_npu-2.7.1-cp311-cp311-linux_aarch64.whl +``` + +### 2. CANN 工具包版本问题 + +**问题**:CANN 版本与 torch_npu 不兼容。 + +**解决方案**: +- 参考[昇腾社区版本配套表](https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/80RC1alpha002/softwareinstall/instg/atlasdeploy_03_0015.html) +- 安装对应版本的 CANN 工具包 + +## 功能支持情况 + +基于实际代码验证的功能支持矩阵: + +| 功能 | GPU | NPU | 验证示例 | 说明 | +|------|-----|-----|---------|------| +| SFT + LoRA | ✅ | ✅ | cookbook/sft/lora_npu.py | 已验证可用 | +| GRPO | ✅ | ✅ | cookbook/grpo/lora_npu.py | 已验证可用 | +| DP 并行 | ✅ | ✅ | cookbook/sft/lora_npu.py | 已验证可用 | +| FSDP 并行 | ✅ | ✅ | cookbook/sft/lora_npu.py | 已验证可用 | +| Ray 分布式 | ✅ | ✅ | cookbook/sft/lora_npu.py | 已验证可用 | +| TorchSampler | ✅ | ✅ | cookbook/grpo/lora_npu.py | 已验证可用 | +| vLLMSampler | ✅ | ✅ | cookbook/grpo/lora_npu.py | 已验证可用 | +| 全量微调 | ✅ | 🚧 | - | 理论支持,待验证 | +| QLoRA | ✅ | ❌ | - | 量化算子暂不支持 | +| DPO | ✅ | 🚧 | - | 理论支持,待验证 | +| Megatron TP/PP | ✅ | 🚧 | - | 待适配和验证 | +| Flash Attention | ✅ | ⚠️ | - | 部分算子不支持 | + +**图例说明**: +- ✅ **已验证**:有实际运行示例,确认可用 +- 🚧 **待验证**:理论上支持但暂无 NPU 环境验证 +- ⚠️ **部分支持**:可用但有限制或性能差异 +- ❌ **不支持**:当前版本不可用 + +**使用建议**: +1. 优先使用标记为“已验证”的功能,稳定性有保障 +2. “待验证”功能可以尝试,但可能遇到兼容性问题 +3. 遇到问题时,参考对应的示例代码进行配置 + +## 示例代码 + +Twinkle 提供了以下经过验证的 NPU 训练示例: + +### SFT 训练 +- **4 卡 DP+FSDP LoRA 微调**:[cookbook/sft/lora_npu.py](https://github.com/modelscope/twinkle/blob/main/cookbook/sft/lora_npu.py) + - 使用 Ray 模式进行分布式训练 + - 演示 DP + FSDP 混合并行 + - 包含完整的数据加载和训练循环 + +### GRPO 训练 +- **多卡 GRPO RL 训练**:[cookbook/grpo/lora_npu.py](https://github.com/modelscope/twinkle/blob/main/cookbook/grpo/lora_npu.py) + - Actor-Critic 架构 + - 支持参考模型(Reference Model) + - 可选 TorchSampler 或 vLLMSampler + +### 远程训练(Tinker 协议) +- **服务端配置**:[cookbook/remote/tinker/ascend/](https://github.com/modelscope/twinkle/tree/main/cookbook/remote/tinker/ascend) + - 提供 HTTP API 接口 + - 支持远程训练和推理 + - 适用于生产环境部署 + +**运行示例**: +```bash +# SFT 训练 +export ASCEND_RT_VISIBLE_DEVICES=0,1,2,3 +python cookbook/sft/lora_npu.py + +# GRPO 训练 +export ASCEND_RT_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 +python cookbook/grpo/lora_npu.py +``` + +## 参考资源 + +- [昇腾社区官网](https://www.hiascend.com/) +- [CANN 软件安装指南](https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/80RC1alpha002/softwareinstall/instg/atlasdeploy_03_0001.html) +- [torch_npu GitHub](https://github.com/Ascend/pytorch) +- [Twinkle GitHub](https://github.com/modelscope/twinkle) +- [Twinkle 文档](https://twinkle.readthedocs.io/) + +## 获取帮助 + +如果您在使用过程中遇到问题: + +1. **查看日志**:设置环境变量 `ASCEND_GLOBAL_LOG_LEVEL=1` 获取详细日志 +2. **提交 Issue**:[Twinkle GitHub Issues](https://github.com/modelscope/twinkle/issues) +3. **社区讨论**:[昇腾社区论坛](https://www.hiascend.com/forum) + +## 下一步 + +- 📖 阅读 [快速开始](Quick-start.md) 了解更多训练示例 +- 📖 阅读 [安装指南](Installation.md) 了解其他平台的安装 +- 🚀 浏览 `cookbook/` 目录查看完整示例代码 +- 💡 查看 [Twinkle 文档](https://twinkle.readthedocs.io/) 了解高级功能 diff --git "a/docs/source/\344\275\277\347\224\250\346\214\207\345\274\225/\345\256\211\350\243\205.md" "b/docs/source/\344\275\277\347\224\250\346\214\207\345\274\225/\345\256\211\350\243\205.md" new file mode 100644 index 00000000..351b7b60 --- /dev/null +++ "b/docs/source/\344\275\277\347\224\250\346\214\207\345\274\225/\345\256\211\350\243\205.md" @@ -0,0 +1,28 @@ +# Twinkle安装 + +## Wheel包安装 + +可以使用pip进行安装: + +```shell +pip install 'twinkle-kit' +``` + +## 源代码安装 + +```shell +git clone https://github.com/modelscope/twinkle.git +cd twinkle +pip install -e . --no-build-isolation +``` + +## 支持的硬件 + +| 硬件环境 | 备注 | +|--------------------------|-----------------------------| +| GPU A10/A100/H100/RTX系列等 | | +| GPU T4/V100等 | 不支持bfloat16、Flash-Attention | +| Ascend NPU | 部分算子不支持 | +| PPU | 支持 | +| CPU | 支持dataset、dataloader等部分组件 | + diff --git "a/docs/source/\344\275\277\347\224\250\346\214\207\345\274\225/\345\277\253\351\200\237\345\274\200\345\247\213.md" "b/docs/source/\344\275\277\347\224\250\346\214\207\345\274\225/\345\277\253\351\200\237\345\274\200\345\247\213.md" new file mode 100644 index 00000000..5a1006f1 --- /dev/null +++ "b/docs/source/\344\275\277\347\224\250\346\214\207\345\274\225/\345\277\253\351\200\237\345\274\200\345\247\213.md" @@ -0,0 +1,212 @@ +
+ +## ✨ Twinkle 是什么? + +大模型训练组件库。基于 PyTorch,更简洁、更灵活、生产就绪。 + +

+🧩 松耦合架构 · 标准化接口
+🚀 多运行模式 · torchrun / Ray / HTTP
+🔌 多框架兼容 · Transformers / Megatron
+👥 多租户支持 · 单基座模型部署 +

+ +
+ +## Twinkle 适配性 + +Twinkle 和 [ms-swift](https://github.com/modelscope/ms-swift) 都是模型训练框架,但二者的特性有很大不同,开发者可以根据自己的需求选择。 + +### 何时选择 Twinkle + +- 如果你是大模型的初学者,希望更好地了解模型机制和模型训练方法 +- 如果你是大模型研究者,希望定制模型或训练方法 +- 如果你善于编写 training loop,希望定制训练过程 +- 如果你希望提供企业级或商业化训练平台 +- 如果你缺少训练硬件,希望使用社区资源 + +### 何时选择ms-swift + +- 如果你不关心训练过程,希望仅提供数据集便可完成训练 +- 如果你需要更多的模型支持和数据集种类 +- 如果你需要Embedding、Reranker、Classification等多种类型的训练 +- 如果你需要推理、部署、量化等其他能力 +- 如果你对新模型的训练支持敏感,Swift 会保证 day-0 的更新能力 + +## Twinkle 的可定制组件 + +在 Twinkle 的设计中,torchrun、Ray、HTTP 的训练使用同样的 API,并共享相同的组件和输入输出结构。因此,其很多组件可以由开发者自定义来实现新的算法开发。 + +下面我们列出推荐定制的组件列表: + +| 组件名称 | 基类 | 说明 | +| --------------------- | ------------------------------------------ | ------------------------------------------------------- | +| 损失 | twinkle.loss.Loss | 用于定义模型训练的损失函数 | +| 指标 | twinkle.metric.Metric | 用于定义模型训练的评价体系 | +| Optimizer/LRScheduler | 基于PyTorch | 用于定义模型训练的优化器和LR衰减器 | +| 补丁 | twinkle.patch.Patch | 用于修复模型训练过程中的问题 | +| 预处理器 | twinkle.preprocessor.Preprocessor | 用于对数据进行预处理(ETL),并返回 Template 可用的标准格式 | +| 过滤器 | twinkle.preprocessor.Filter | 用于对原始数据进行合理性过滤 | +| 任务数据处理器 | twinkle.processor.InputProcessor | 用于将模型输入转换为各任务需要的数据,并添加额外字段 | +| 模型 | twinkle.model.TwinkleModel | 大模型本身 | +| 采样器 | twinkle.sampler.Sampler | 采样器,例如 vLLM | +| 奖励 | twinkle.reward.Reward | 用于实现不同 RL 训练的奖励 | +| 优势 | twinkle.advantage.Advantage | 用于实现不同 RL 训练的优势估计 | +| 模板 | twinkle.template.Template | 用于处理标准输入,并转换成模型需要的 token | +| 权重同步 | twinkle.checkpoint_engine.CheckpointEngine | 用于 RL 训练中的权重同步 | + +> 未在上表中列出的组件,如Dataset、DataLoader等也可以实现定制,只需要跟随基类API设计即可。 + +## DeviceGroup 和 DeviceMesh + +DeviceGroup 和 DeviceMesh 是 Twinkle 架构的核心。所有的代码构建均基于这两个设计。 + +```python +import twinkle +from twinkle import DeviceMesh, DeviceGroup +device_group = [ + DeviceGroup( + name='default', + ranks=8, + device_type='cuda', + ) + ] + +device_mesh = DeviceMesh.from_sizes(pp_size=2, tp_size=2, dp_size=2) +twinkle.initialize(mode='ray', nproc_per_node=8, groups=device_group) +``` + +当 device_group 定义完成后,需要使用 `twinkle.initialize` 来初始化资源。 + +DeviceGroup:定义本次训练需要多少个资源组。定义后,组件可以通过选择资源组的方式将自己运行在远端: + +```python +from twinkle.model import TransformersModel +model = TransformersModel(model_id='ms://Qwen/Qwen2.5-7B-Instruct', remote_group='default', device_mesh=device_mesh) +# 或者 +from twinkle.model import MegatronModel +model = MegatronModel(model_id='ms://Qwen/Qwen2.5-7B-Instruct', remote_group='default', device_mesh=device_mesh) +``` + +DeviceMesh 指定了模型等组件在资源组中的拓扑结构。可以理解为如何进行并行。这会影响一系列的框架决策,例如数据获取、数据消费、数据返回等。 + +## 使用样例 + +```python +from peft import LoraConfig +import twinkle +from twinkle import DeviceMesh, DeviceGroup +from twinkle.dataloader import DataLoader +from twinkle.dataset import Dataset, DatasetMeta +from twinkle.model import TransformersModel +from twinkle.preprocessor import SelfCognitionProcessor + +device_group = [DeviceGroup(name='default',ranks=8,device_type='cuda')] +device_mesh = DeviceMesh.from_sizes(fsdp_size=4, dp_size=2) +# local for torchrun +twinkle.initialize(mode='ray', groups=device_group, global_device_mesh=device_mesh) + + +def train(): + # 1000 samples + dataset = Dataset(dataset_meta=DatasetMeta('ms://swift/self-cognition', data_slice=range(1000))) + # Set template to prepare encoding + dataset.set_template('Template', model_id='ms://Qwen/Qwen2.5-7B-Instruct') + # Preprocess the dataset to standard format + dataset.map(SelfCognitionProcessor('twinkle大模型', 'ModelScope社区')) + # Encode dataset + dataset.encode() + # Global batch size = 8, for GPUs, so 1 sample per GPU + dataloader = DataLoader(dataset=dataset, batch_size=8, min_batch_size=8) + # Use a TransformersModel + model = TransformersModel(model_id='ms://Qwen/Qwen2.5-7B-Instruct', remote_group='default') + + lora_config = LoraConfig( + r=8, + lora_alpha=32, + target_modules='all-linear' + ) + + # Add a lora to model, with name `default` + # Comment this to use full-parameter training + model.add_adapter_to_model('default', lora_config, gradient_accumulation_steps=2) + # Add Optimizer for lora `default` + model.set_optimizer(optimizer_cls='AdamW', lr=1e-4) + # Add LRScheduler for lora `default` + model.set_lr_scheduler(scheduler_cls='CosineWarmupScheduler', num_warmup_steps=5, + num_training_steps=len(dataloader)) + for step, batch in enumerate(dataloader): + # Do forward and backward + model.forward_backward(inputs=batch) + # Step + model.clip_grad_and_step() + if step % 20 == 0: + # Print metric + metric = model.calculate_metric(is_training=True) + print(f'Current is step {step} of {len(dataloader)}, metric: {metric}') + model.save(f'last-checkpoint') + + +if __name__ == '__main__': + train() +``` + +这样启动训练: + +```shell +python3 train.py +``` + +## 支持的大语言模型列表 + +| Model Type | Model ID 举例 | Requires | Support Megatron | HF Model ID | +| ------------------- | -------------------------------------------------------------------------------------------------------------------------- | -------------------- | ---------------- | ---------------------------------------------------------------------------------------------------------- | +| qwen2 全系列 | [Qwen/Qwen2-0.5B-Instruct](https://modelscope.cn/models/Qwen/Qwen2-0.5B-Instruct) | transformers>=4.37 | ✔ | [Qwen/Qwen2-0.5B-Instruct](https://huggingface.co/Qwen/Qwen2-0.5B-Instruct) | +| | [Qwen/Qwen2-72B-Instruct](https://modelscope.cn/models/Qwen/Qwen2-72B-Instruct) | transformers>=4.37 | ✔ | [Qwen/Qwen2-72B-Instruct](https://huggingface.co/Qwen/Qwen2-72B-Instruct) | +| | [Qwen/Qwen2-1.5B](https://modelscope.cn/models/Qwen/Qwen2-1.5B) | transformers>=4.37 | ✔ | [Qwen/Qwen2-1.5B](https://huggingface.co/Qwen/Qwen2-1.5B) | +| | [Qwen/Qwen2-7B](https://modelscope.cn/models/Qwen/Qwen2-7B) | transformers>=4.37 | ✔ | [Qwen/Qwen2-7B](https://huggingface.co/Qwen/Qwen2-7B) | +| | [Qwen/Qwen2-72B](https://modelscope.cn/models/Qwen/Qwen2-72B) | transformers>=4.37 | ✔ | [Qwen/Qwen2-72B](https://huggingface.co/Qwen/Qwen2-72B) | +| | [Qwen/Qwen2.5-0.5B-Instruct](https://modelscope.cn/models/Qwen/Qwen2.5-0.5B-Instruct) | transformers>=4.37 | ✔ | [Qwen/Qwen2.5-0.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct) | +| | [Qwen/Qwen2.5-1.5B-Instruct](https://modelscope.cn/models/Qwen/Qwen2.5-1.5B-Instruct) | transformers>=4.37 | ✔ | [Qwen/Qwen2.5-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct) | +| | [Qwen/Qwen2.5-72B-Instruct](https://modelscope.cn/models/Qwen/Qwen2.5-72B-Instruct) | transformers>=4.37 | ✔ | [Qwen/Qwen2.5-72B-Instruct](https://huggingface.co/Qwen/Qwen2.5-72B-Instruct) | +| | [Qwen/Qwen2.5-0.5B](https://modelscope.cn/models/Qwen/Qwen2.5-0.5B) | transformers>=4.37 | ✔ | [Qwen/Qwen2.5-0.5B](https://huggingface.co/Qwen/Qwen2.5-0.5B) | +| | [Qwen/Qwen2.5-32B](https://modelscope.cn/models/Qwen/Qwen2.5-32B) | transformers>=4.37 | ✔ | [Qwen/Qwen2.5-32B](https://huggingface.co/Qwen/Qwen2.5-32B) | +| qwen2_moe 全系列 | [Qwen/Qwen1.5-MoE-A2.7B-Chat](https://modelscope.cn/models/Qwen/Qwen1.5-MoE-A2.7B-Chat) | transformers>=4.40 | ✔ | [Qwen/Qwen1.5-MoE-A2.7B-Chat](https://huggingface.co/Qwen/Qwen1.5-MoE-A2.7B-Chat) | +| | [Qwen/Qwen1.5-MoE-A2.7B](https://modelscope.cn/models/Qwen/Qwen1.5-MoE-A2.7B) | transformers>=4.40 | ✔ | [Qwen/Qwen1.5-MoE-A2.7B](https://huggingface.co/Qwen/Qwen1.5-MoE-A2.7B) | +| qwen3 全系列 | [Qwen/Qwen3-0.6B-Base](https://modelscope.cn/models/Qwen/Qwen3-0.6B-Base) | transformers>=4.51 | ✔ | [Qwen/Qwen3-0.6B-Base](https://huggingface.co/Qwen/Qwen3-0.6B-Base) | +| | [Qwen/Qwen3-14B-Base](https://modelscope.cn/models/Qwen/Qwen3-14B-Base) | transformers>=4.51 | ✔ | [Qwen/Qwen3-14B-Base](https://huggingface.co/Qwen/Qwen3-14B-Base) | +| | [Qwen/Qwen3-0.6B](https://modelscope.cn/models/Qwen/Qwen3-0.6B) | transformers>=4.51 | ✔ | [Qwen/Qwen3-0.6B](https://huggingface.co/Qwen/Qwen3-0.6B) | +| | [Qwen/Qwen3-1.7B](https://modelscope.cn/models/Qwen/Qwen3-1.7B) | transformers>=4.51 | ✔ | [Qwen/Qwen3-1.7B](https://huggingface.co/Qwen/Qwen3-1.7B) | +| | [Qwen/Qwen3-32B](https://modelscope.cn/models/Qwen/Qwen2.5-32B) | transformers>=4.51 | ✔ | [Qwen/Qwen3-32B](https://huggingface.co/Qwen/Qwen3-32B) | +| qwen3_moe 全系列 | [Qwen/Qwen3-30B-A3B-Base](https://modelscope.cn/models/Qwen/Qwen3-30B-A3B-Base) | transformers>=4.51 | ✔ | [Qwen/Qwen3-30B-A3B-Base](https://huggingface.co/Qwen/Qwen3-30B-A3B-Base) | +| | [Qwen/Qwen3-30B-A3B](https://modelscope.cn/models/Qwen/Qwen3-30B-A3B) | transformers>=4.51 | ✔ | [Qwen/Qwen3-30B-A3B](https://huggingface.co/Qwen/Qwen3-30B-A3B) | +| | [Qwen/Qwen3-235B-A22B](https://modelscope.cn/models/Qwen/Qwen3-235B-A22B) | transformers>=4.51 | ✔ | [Qwen/Qwen3-235B-A22B](https://huggingface.co/Qwen/Qwen3-235B-A22B) | +| chatglm2 全系列 | [ZhipuAI/chatglm2-6b](https://modelscope.cn/models/ZhipuAI/chatglm2-6b) | transformers<4.42 | ✘ | [zai-org/chatglm2-6b](https://huggingface.co/zai-org/chatglm2-6b) | +| | [ZhipuAI/chatglm2-6b-32k](https://modelscope.cn/models/ZhipuAI/chatglm2-6b-32k) | transformers<4.42 | ✘ | [zai-org/chatglm2-6b-32k](https://huggingface.co/zai-org/chatglm2-6b-32k) | +| chatglm3 全系列 | [ZhipuAI/chatglm3-6b](https://modelscope.cn/models/ZhipuAI/chatglm3-6b) | transformers<4.42 | ✘ | [zai-org/chatglm3-6b](https://huggingface.co/zai-org/chatglm3-6b) | +| | [ZhipuAI/chatglm3-6b-base](https://modelscope.cn/models/ZhipuAI/chatglm3-6b-base) | transformers<4.42 | ✘ | [zai-org/chatglm3-6b-base](https://huggingface.co/zai-org/chatglm3-6b-base) | +| | [ZhipuAI/chatglm3-6b-32k](https://modelscope.cn/models/ZhipuAI/chatglm3-6b-32k) | transformers<4.42 | ✘ | [zai-org/chatglm3-6b-32k](https://huggingface.co/zai-org/chatglm3-6b-32k) | +| | [ZhipuAI/chatglm3-6b-128k](https://modelscope.cn/models/ZhipuAI/chatglm3-6b-128k) | transformers<4.42 | ✘ | [zai-org/chatglm3-6b-128k](https://huggingface.co/zai-org/chatglm3-6b-128k) | +| chatglm4 全系列 | [ZhipuAI/glm-4-9b-chat](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat) | transformers>=4.42 | ✘ | [zai-org/glm-4-9b-chat](https://huggingface.co/zai-org/glm-4-9b-chat) | +| | [ZhipuAI/glm-4-9b](https://modelscope.cn/models/ZhipuAI/glm-4-9b) | transformers>=4.42 | ✘ | [zai-org/glm-4-9b](https://huggingface.co/zai-org/glm-4-9b) | +| | [ZhipuAI/glm-4-9b-chat-1m](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m) | transformers>=4.42 | ✘ | [zai-org/glm-4-9b-chat-1m](https://huggingface.co/zai-org/glm-4-9b-chat-1m) | +| | [ZhipuAI/LongWriter-glm4-9b](https://modelscope.cn/models/ZhipuAI/LongWriter-glm4-9b) | transformers>=4.42 | ✘ | [zai-org/LongWriter-glm4-9b](https://huggingface.co/zai-org/LongWriter-glm4-9b) | +| glm_edge 全系列 | [ZhipuAI/glm-edge-1.5b-chat](https://modelscope.cn/models/ZhipuAI/glm-edge-1.5b-chat) | transformers>=4.46 | ✘ | [zai-org/glm-edge-1.5b-chat](https://huggingface.co/zai-org/glm-edge-1.5b-chat) | +| | [ZhipuAI/glm-edge-4b-chat](https://modelscope.cn/models/ZhipuAI/glm-edge-4b-chat) | transformers>=4.46 | ✘ | [zai-org/glm-edge-4b-chat](https://huggingface.co/zai-org/glm-edge-4b-chat) | +| internlm2 全系列 | [Shanghai_AI_Laboratory/internlm2-1_8b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-1_8b) | transformers>=4.38 | ✘ | [internlm/internlm2-1_8b](https://huggingface.co/internlm/internlm2-1_8b) | +| | [Shanghai_AI_Laboratory/internlm2-chat-1_8b-sft](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-1_8b-sft) | transformers>=4.38 | ✘ | [internlm/internlm2-chat-1_8b-sft](https://huggingface.co/internlm/internlm2-chat-1_8b-sft) | +| | [Shanghai_AI_Laboratory/internlm2-base-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-base-7b) | transformers>=4.38 | ✘ | [internlm/internlm2-base-7b](https://huggingface.co/internlm/internlm2-base-7b) | +| | [Shanghai_AI_Laboratory/internlm2-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-7b) | transformers>=4.38 | ✘ | [internlm/internlm2-7b](https://huggingface.co/internlm/internlm2-7b) | +| | [Shanghai_AI_Laboratory/internlm2-chat-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-7b) | transformers>=4.38 | ✘ | [internlm/internlm2-chat-7b](https://huggingface.co/internlm/internlm2-chat-7b) | +| deepseek_v1 | [deepseek-ai/deepseek-vl-7b-chat](https://modelscope.cn/models/deepseek-ai/deepseek-vl-7b-chat) | transformers>=4.39.4 | ✔ | | +| | [deepseek-ai/DeepSeek-V2-Lite](https://modelscope.cn/models/deepseek-ai/DeepSeek-V2-Lite) | transformers>=4.39.3 | ✔ | [deepseek-ai/DeepSeek-V2-Lite](https://huggingface.co/deepseek-ai/DeepSeek-V2-Lite) | +| | [deepseek-ai/DeepSeek-V2-Lite-Chat](https://modelscope.cn/models/deepseek-ai/DeepSeek-V2-Lite-Chat) | transformers>=4.39.3 | ✔ | [deepseek-ai/DeepSeek-V2-Lite-Chat](https://huggingface.co/deepseek-ai/DeepSeek-V2-Lite-Chat) | +| | [deepseek-ai/DeepSeek-V2](https://modelscope.cn/models/deepseek-ai/DeepSeek-V2) | transformers>=4.39.3 | ✔ | [deepseek-ai/DeepSeek-V2](https://huggingface.co/deepseek-ai/DeepSeek-V2) | +| | [deepseek-ai/DeepSeek-V2-Chat](https://modelscope.cn/models/deepseek-ai/DeepSeek-V2-Chat) | transformers>=4.39.3 | ✔ | [deepseek-ai/DeepSeek-V2-Chat](https://huggingface.co/deepseek-ai/DeepSeek-V2-Chat) | +| | [deepseek-ai/DeepSeek-V2.5](https://modelscope.cn/models/deepseek-ai/DeepSeek-V2.5) | transformers>=4.39.3 | ✔ | [deepseek-ai/DeepSeek-V2.5](https://huggingface.co/deepseek-ai/DeepSeek-V2.5) | +| | [deepseek-ai/DeepSeek-Prover-V2-7B](https://modelscope.cn/models/deepseek-ai/DeepSeek-Prover-V2-7B) | transformers>=4.39.3 | ✔ | [deepseek-ai/DeepSeek-Prover-V2-7B](https://huggingface.co/deepseek-ai/DeepSeek-Prover-V2-7B) | +| | [deepseek-ai/DeepSeek-R1](https://modelscope.cn/models/deepseek-ai/DeepSeek-R1) | transformers>=4.39.3 | ✔ | [deepseek-ai/DeepSeek-R1](https://huggingface.co/deepseek-ai/DeepSeek-R1) | +| deepSeek-r1-distill | [deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B](https://modelscope.cn/models/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B) | transformers>=4.37 | ✔ | [deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B) | +| | [deepseek-ai/DeepSeek-R1-Distill-Qwen-7B](https://modelscope.cn/models/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B) | transformers>=4.37 | ✔ | [deepseek-ai/DeepSeek-R1-Distill-Qwen-7B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B) | +| | [deepseek-ai/DeepSeek-R1-Distill-Qwen-14B](https://modelscope.cn/models/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B) | transformers>=4.37 | ✔ | [deepseek-ai/DeepSeek-R1-Distill-Qwen-14B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B) | +| | [deepseek-ai/DeepSeek-R1-Distill-Qwen-32B](https://modelscope.cn/models/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B) | transformers>=4.37 | ✔ | [deepseek-ai/DeepSeek-R1-Distill-Qwen-32B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B) | diff --git "a/docs/source/\344\275\277\347\224\250\346\214\207\345\274\225/\346\234\215\345\212\241\347\253\257\345\222\214\345\256\242\346\210\267\347\253\257/Tinker\345\205\274\345\256\271\345\256\242\346\210\267\347\253\257.md" "b/docs/source/\344\275\277\347\224\250\346\214\207\345\274\225/\346\234\215\345\212\241\347\253\257\345\222\214\345\256\242\346\210\267\347\253\257/Tinker\345\205\274\345\256\271\345\256\242\346\210\267\347\253\257.md" new file mode 100644 index 00000000..bdf3eebd --- /dev/null +++ "b/docs/source/\344\275\277\347\224\250\346\214\207\345\274\225/\346\234\215\345\212\241\347\253\257\345\222\214\345\256\242\346\210\267\347\253\257/Tinker\345\205\274\345\256\271\345\256\242\346\210\267\347\253\257.md" @@ -0,0 +1,254 @@ +# Tinker 兼容客户端 + +Tinker 兼容 Client 适用于已有 Tinker 训练代码的场景。通过 `init_tinker_compat_client` 初始化后,会对 Tinker SDK 进行 patch,使其指向 Twinkle Server,**其余代码可直接复用已有的 Tinker 训练代码**。 + +## 初始化 + +```python +from twinkle_client import init_tinker_compat_client + +# 初始化 Tinker 兼容客户端 +# init_tinker_compat_client 会自动 patch Tinker SDK, +# 使其可以连接到 Twinkle Server 而非 Tinker Server +service_client = init_tinker_compat_client( + base_url='http://localhost:8000', # Server 地址 + api_key='your-api-key' # 认证令牌 +) + +# 验证连接:列出 Server 上可用的模型 +for item in service_client.get_server_capabilities().supported_models: + print("- " + item.model_name) +``` + +### init_tinker_compat_client 做了什么? + +调用 `init_tinker_compat_client` 时,会自动执行以下操作: + +1. **Patch Tinker SDK**:绕过 Tinker 的 `tinker://` 前缀校验,使其可以连接到标准 HTTP 地址 +2. **设置请求头**:注入 `X-Ray-Serve-Request-Id` 和 `Authorization` 等必要的认证头 +3. **返回 `ServiceClient`**:返回一个标准的 Tinker `ServiceClient` 对象,后续操作与原生 Tinker 完全一致 + +这意味着在初始化之后,**所有已有的 Tinker 训练代码都可以直接使用**,无需任何修改。 + +## 完整训练示例 + +```python +import os +import numpy as np +import dotenv +dotenv.load_dotenv('.env') + +from tinker import types +from modelscope import AutoTokenizer +from twinkle_client import init_tinker_compat_client + +# Step 1: 初始化客户端(会自动 patch Tinker SDK) +service_client = init_tinker_compat_client( + base_url='http://localhost:8000', + api_key=os.environ.get('MODELSCOPE_SDK_TOKEN') +) + +# Step 2: 查询已有训练运行(可选) +rest_client = service_client.create_rest_client() +response = rest_client.list_training_runs(limit=50).result() +print(f"Found {len(response.training_runs)} training runs") + +# Step 3: 创建训练客户端 +base_model = "Qwen/Qwen2.5-0.5B-Instruct" + +# 新建训练会话 +training_client = service_client.create_lora_training_client( + base_model=base_model +) + +# 或从检查点恢复 +# resume_path = "twinkle://run_id/weights/checkpoint_name" +# training_client = service_client.create_training_client_from_state_with_optimizer(path=resume_path) + +# Step 4: 准备训练数据 +examples = [ + {"input": "banana split", "output": "anana-bay plit-say"}, + {"input": "quantum physics", "output": "uantum-qay ysics-phay"}, + {"input": "donut shop", "output": "onut-day op-shay"}, +] + +tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True) + +def process_example(example: dict, tokenizer) -> types.Datum: + """将原始样本转为 Tinker API 所需的 Datum 格式。 + + Datum 包含: + - model_input: 输入 token IDs + - loss_fn_inputs: 目标 token 和逐 token 权重(0=忽略, 1=计算损失) + """ + prompt = f"English: {example['input']}\nPig Latin:" + + # 提示部分:weight=0,不参与损失计算 + prompt_tokens = tokenizer.encode(prompt, add_special_tokens=True) + prompt_weights = [0] * len(prompt_tokens) + + # 补全部分:weight=1,参与损失计算 + completion_tokens = tokenizer.encode(f" {example['output']}\n\n", add_special_tokens=False) + completion_weights = [1] * len(completion_tokens) + + # 拼接并构建 next-token prediction 格式 + tokens = prompt_tokens + completion_tokens + weights = prompt_weights + completion_weights + + input_tokens = tokens[:-1] + target_tokens = tokens[1:] + weights = weights[1:] + + return types.Datum( + model_input=types.ModelInput.from_ints(tokens=input_tokens), + loss_fn_inputs=dict(weights=weights, target_tokens=target_tokens) + ) + +processed_examples = [process_example(ex, tokenizer) for ex in examples] + +# Step 5: 训练循环 +for epoch in range(2): + for batch in range(5): + # 发送训练数据到 Server:前向 + 反向传播 + fwdbwd_future = training_client.forward_backward(processed_examples, "cross_entropy") + # 优化器更新 + optim_future = training_client.optim_step(types.AdamParams(learning_rate=1e-4)) + + # 等待结果 + fwdbwd_result = fwdbwd_future.result() + optim_result = optim_future.result() + + # 计算加权平均 log-loss + logprobs = np.concatenate([o['logprobs'].tolist() for o in fwdbwd_result.loss_fn_outputs]) + weights = np.concatenate([e.loss_fn_inputs['weights'].tolist() for e in processed_examples]) + print(f"Epoch {epoch}, Batch {batch}: Loss = {-np.dot(logprobs, weights) / weights.sum():.4f}") + + # 每个 epoch 保存检查点 + save_result = training_client.save_state(f"lora-epoch-{epoch}").result() + print(f"Saved checkpoint to {save_result.path}") +``` + +## 使用 Twinkle 数据集组件 + +Tinker 兼容模式也可以利用 Twinkle 的数据集组件来简化数据准备,而不是手动构建 `Datum`: + +```python +from tqdm import tqdm +from tinker import types +from twinkle_client import init_tinker_compat_client +from twinkle.dataloader import DataLoader +from twinkle.dataset import Dataset, DatasetMeta +from twinkle.preprocessor import SelfCognitionProcessor +from twinkle.server.tinker.common import input_feature_to_datum + +base_model = "Qwen/Qwen2.5-0.5B-Instruct" + +# 使用 Twinkle 的 Dataset 组件加载和预处理数据 +dataset = Dataset(dataset_meta=DatasetMeta('ms://swift/self-cognition', data_slice=range(500))) +dataset.set_template('Template', model_id=f'ms://{base_model}', max_length=256) +dataset.map(SelfCognitionProcessor('twinkle模型', 'twinkle团队'), load_from_cache_file=False) +dataset.encode(batched=True, load_from_cache_file=False) +dataloader = DataLoader(dataset=dataset, batch_size=8) + +# 初始化 Tinker 兼容客户端 +service_client = init_tinker_compat_client(base_url='http://localhost:8000') +training_client = service_client.create_lora_training_client(base_model=base_model, rank=16) + +# 训练循环:使用 input_feature_to_datum 转换数据格式 +for epoch in range(3): + for step, batch in tqdm(enumerate(dataloader)): + # 将 Twinkle 的 InputFeature 转换为 Tinker 的 Datum + input_datum = [input_feature_to_datum(input_feature) for input_feature in batch] + + fwdbwd_future = training_client.forward_backward(input_datum, "cross_entropy") + optim_future = training_client.optim_step(types.AdamParams(learning_rate=1e-4)) + + fwdbwd_result = fwdbwd_future.result() + optim_result = optim_future.result() + + training_client.save_state(f"twinkle-lora-{epoch}").result() +``` + +## 推理采样 + +Tinker 兼容模式支持推理采样功能(需要 Server 配置了 Sampler 服务)。 + +### 从训练中采样 + +在训练完成后,可以直接从训练客户端创建采样客户端: + +```python +# 保存当前权重并创建采样客户端 +sampling_client = training_client.save_weights_and_get_sampling_client(name='my-model') + +# 准备推理输入 +prompt = types.ModelInput.from_ints(tokenizer.encode("English: coffee break\nPig Latin:")) +params = types.SamplingParams( + max_tokens=20, # 最大生成 token 数 + temperature=0.0, # 贪心采样(确定性输出) + stop=["\n"] # 遇到换行停止 +) + +# 生成多条补全 +result = sampling_client.sample(prompt=prompt, sampling_params=params, num_samples=8).result() + +for i, seq in enumerate(result.sequences): + print(f"{i}: {tokenizer.decode(seq.tokens)}") +``` + +### 从检查点采样 + +也可以加载已保存的检查点进行推理: + +```python +from tinker import types +from modelscope import AutoTokenizer +from twinkle_client import init_tinker_compat_client + +base_model = "Qwen/Qwen2.5-0.5B-Instruct" + +# 初始化客户端 +service_client = init_tinker_compat_client(base_url='http://localhost:8000') + +# 从已保存的检查点创建采样客户端 +sampling_client = service_client.create_sampling_client( + model_path="twinkle://run_id/weights/checkpoint_name", # 检查点的 twinkle:// 路径 + base_model=base_model +) + +# 准备推理输入 +tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True) + +# 构建多轮对话输入 +inputs = [ + {'role': 'system', 'content': 'You are a helpful assistant.'}, + {'role': 'user', 'content': 'what is your name?'} +] +input_ids = tokenizer.apply_chat_template(inputs, tokenize=True, add_generation_prompt=True) + +prompt = types.ModelInput.from_ints(input_ids) +params = types.SamplingParams( + max_tokens=50, # 最大生成 token 数 + temperature=0.2, # 低温度,更聚焦的回答 + stop=["\n"] # 遇到换行停止 +) + +# 生成多条补全 +result = sampling_client.sample(prompt=prompt, sampling_params=params, num_samples=8).result() + +for i, seq in enumerate(result.sequences): + print(f"{i}: {tokenizer.decode(seq.tokens)}") +``` + +### 发布检查点到 ModelScope Hub + +训练完成后,可以通过 REST client 将检查点发布到 ModelScope Hub: + +```python +rest_client = service_client.create_rest_client() + +# 从 tinker 路径发布检查点 +# 需要在初始化客户端时设置有效的 ModelScope token 作为 api_key +rest_client.publish_checkpoint_from_tinker_path(save_result.path).result() +print("Published checkpoint to ModelScope Hub") +``` diff --git "a/docs/source/\344\275\277\347\224\250\346\214\207\345\274\225/\346\234\215\345\212\241\347\253\257\345\222\214\345\256\242\346\210\267\347\253\257/Twinkle\345\256\242\346\210\267\347\253\257.md" "b/docs/source/\344\275\277\347\224\250\346\214\207\345\274\225/\346\234\215\345\212\241\347\253\257\345\222\214\345\256\242\346\210\267\347\253\257/Twinkle\345\256\242\346\210\267\347\253\257.md" new file mode 100644 index 00000000..4d5734b3 --- /dev/null +++ "b/docs/source/\344\275\277\347\224\250\346\214\207\345\274\225/\346\234\215\345\212\241\347\253\257\345\222\214\345\256\242\346\210\267\347\253\257/Twinkle\345\256\242\346\210\267\347\253\257.md" @@ -0,0 +1,174 @@ +# Twinkle 客户端 + +Twinkle Client 是原生客户端,设计理念是:**将 `from twinkle import` 改为 `from twinkle_client import`,即可将本地训练代码迁移为远端调用,原有训练逻辑无需改动**。 + +## 初始化 + +```python +from twinkle_client import init_twinkle_client + +# 初始化客户端,连接到 Twinkle Server +client = init_twinkle_client( + base_url='http://127.0.0.1:8000', # Server 地址 + api_key='your-api-key' # 认证令牌(可通过环境变量 TWINKLE_SERVER_TOKEN 设置) +) +``` + +初始化完成后,`client` 对象(`TwinkleClient`)提供以下管理功能: + +```python +# 健康检查 +client.health_check() + +# 列出当前用户的训练运行 +runs = client.list_training_runs(limit=20) + +# 获取特定训练运行详情 +run = client.get_training_run(run_id='xxx') + +# 列出检查点 +checkpoints = client.list_checkpoints(run_id='xxx') + +# 获取检查点路径(用于恢复训练) +path = client.get_checkpoint_path(run_id='xxx', checkpoint_id='yyy') + +# 获取最新检查点路径 +latest_path = client.get_latest_checkpoint_path(run_id='xxx') +``` + +## 从本地代码迁移到远端 + +迁移非常简单,只需将 import 路径从 `twinkle` 替换为 `twinkle_client`: + +```python +# 本地训练代码(原始) +from twinkle.dataloader import DataLoader +from twinkle.dataset import Dataset +from twinkle.model import MultiLoraTransformersModel + +# 远端训练代码(迁移后) +from twinkle_client.dataloader import DataLoader +from twinkle_client.dataset import Dataset +from twinkle_client.model import MultiLoraTransformersModel +``` + +训练循环、数据处理等逻辑完全不需要修改。 + +## 完整训练示例(Transformers 后端) + +```python +import os +import dotenv +dotenv.load_dotenv('.env') + +from peft import LoraConfig +from twinkle import get_logger +from twinkle.dataset import DatasetMeta + +# 从 twinkle_client import 替代 twinkle,实现远端调用 +from twinkle_client.dataloader import DataLoader +from twinkle_client.dataset import Dataset +from twinkle_client.model import MultiLoraTransformersModel +from twinkle_client import init_twinkle_client + +logger = get_logger() + +# Step 1: 初始化客户端 +client = init_twinkle_client( + base_url='http://127.0.0.1:8000', + api_key=os.environ.get('MODELSCOPE_SDK_TOKEN') +) + +# Step 2: 查询已有训练运行(可选,用于恢复训练) +runs = client.list_training_runs() +resume_path = None +for run in runs: + checkpoints = client.list_checkpoints(run.training_run_id) + for checkpoint in checkpoints: + logger.info(checkpoint.model_dump_json(indent=2)) + # 取消注释以从检查点恢复: + # resume_path = checkpoint.twinkle_path + +# Step 3: 准备数据集 +dataset = Dataset(dataset_meta=DatasetMeta('ms://swift/self-cognition')) + +# 设置 chat 模板,使数据匹配模型的输入格式 +dataset.set_template('Template', model_id='ms://Qwen/Qwen2.5-7B-Instruct', max_length=512) + +# 数据预处理:替换占位符为自定义名称 +dataset.map('SelfCognitionProcessor', + init_args={'model_name': 'twinkle模型', 'model_author': 'twinkle团队'}) + +# 编码数据集为模型可用的 token +dataset.encode(batched=True) + +# 创建 DataLoader +dataloader = DataLoader(dataset=dataset, batch_size=8) + +# Step 4: 配置模型 +model = MultiLoraTransformersModel(model_id='ms://Qwen/Qwen2.5-7B-Instruct') + +# 配置 LoRA +lora_config = LoraConfig(target_modules='all-linear') +model.add_adapter_to_model('default', lora_config, gradient_accumulation_steps=2) + +# 设置模板、处理器、损失函数 +model.set_template('Template') +model.set_processor('InputProcessor', padding_side='right') +model.set_loss('CrossEntropyLoss') + +# 设置优化器和学习率调度器 +model.set_optimizer('AdamW', lr=1e-4) +model.set_lr_scheduler('LinearLR') + +# Step 5: 恢复训练(可选) +if resume_path: + logger.info(f'Resuming training from {resume_path}') + model.load(resume_path, load_optimizer=True) + +# Step 6: 训练循环 +for step, batch in enumerate(dataloader): + # 前向传播 + 反向传播 + output = model.forward_backward(inputs=batch) + + if step % 2 == 0: + logger.info(f'Step {step // 2}, loss: {output}') + + # 梯度裁剪 + model.clip_grad_norm(1.0) + + # 优化器更新 + model.step() + + # 梯度清零 + model.zero_grad() + + # 学习率调度 + model.lr_step() + +# Step 7: 保存检查点 +twinkle_path = model.save(name=f'step-{step}', save_optimizer=True) +logger.info(f"Saved checkpoint: {twinkle_path}") + +# Step 8: 上传到 ModelScope Hub(可选) +model.upload_to_hub( + checkpoint_dir=twinkle_path, + hub_model_id='your-username/your-model-name', + async_upload=False +) +``` + +## Megatron 后端的差异 + +使用 Megatron 后端时,客户端代码的主要差异: + +```python +# Megatron 后端不需要显式设置 loss(由 Megatron 内部计算) +# model.set_loss('CrossEntropyLoss') # 不需要 + +# 优化器和 LR 调度器使用 Megatron 内置默认值 +model.set_optimizer('default', lr=1e-4) +model.set_lr_scheduler('default', lr_decay_steps=1000, max_lr=1e-4) +``` + +其余数据处理、训练循环、检查点保存等代码完全相同。 diff --git "a/docs/source/\344\275\277\347\224\250\346\214\207\345\274\225/\346\234\215\345\212\241\347\253\257\345\222\214\345\256\242\346\210\267\347\253\257/index.rst" "b/docs/source/\344\275\277\347\224\250\346\214\207\345\274\225/\346\234\215\345\212\241\347\253\257\345\222\214\345\256\242\346\210\267\347\253\257/index.rst" new file mode 100644 index 00000000..6effe8f9 --- /dev/null +++ "b/docs/source/\344\275\277\347\224\250\346\214\207\345\274\225/\346\234\215\345\212\241\347\253\257\345\222\214\345\256\242\346\210\267\347\253\257/index.rst" @@ -0,0 +1,9 @@ +服务端和客户端 +=============== +.. toctree:: + :maxdepth: 1 + + 概述.md + 服务端.md + Twinkle客户端.md + Tinker兼容客户端.md diff --git "a/docs/source/\344\275\277\347\224\250\346\214\207\345\274\225/\346\234\215\345\212\241\347\253\257\345\222\214\345\256\242\346\210\267\347\253\257/\346\234\215\345\212\241\347\253\257.md" "b/docs/source/\344\275\277\347\224\250\346\214\207\345\274\225/\346\234\215\345\212\241\347\253\257\345\222\214\345\256\242\346\210\267\347\253\257/\346\234\215\345\212\241\347\253\257.md" new file mode 100644 index 00000000..d194159d --- /dev/null +++ "b/docs/source/\344\275\277\347\224\250\346\214\207\345\274\225/\346\234\215\345\212\241\347\253\257\345\222\214\345\256\242\346\210\267\347\253\257/\346\234\215\345\212\241\347\253\257.md" @@ -0,0 +1,385 @@ +# 服务端(Server) + +## Ray 集群配置 + +在启动 Server 之前,**必须先启动并配置 Ray 节点**。只有正确配置了 Ray 节点后,Server 才能正确分配和占用资源(GPU、CPU 等)。 + +### 启动 Ray 节点 + +Ray 集群由多个节点(Node)组成,每个节点可以配置不同的资源。启动步骤如下: + +#### 1. 启动 Head 节点(第一个 GPU 节点) + +```bash +# 停止已有的 Ray 集群(如果有) +ray stop + +# 启动 Head 节点,使用 GPU 0-3,共 4 个 GPU +CUDA_VISIBLE_DEVICES=0,1,2,3 ray start --head --num-gpus=4 --port=6379 +``` + +#### 2. 启动 Worker 节点 + +```bash +# 第二个 GPU 节点,使用 GPU 4-7,共 4 个 GPU +CUDA_VISIBLE_DEVICES=4,5,6,7 ray start --address=10.28.252.9:6379 --num-gpus=4 + +# CPU 节点(用于运行 Processor 等 CPU 任务) +ray start --address=10.28.252.9:6379 --num-gpus=0 +``` + +**说明:** +- `--head`:标记此节点为 Head 节点(集群的主节点) +- `--port=6379`:Head 节点监听端口 +- `--address=:`:Worker 节点连接到 Head 节点的地址 +- `--num-gpus=N`:该节点可用的 GPU 数量 +- `CUDA_VISIBLE_DEVICES`:限制该节点可见的 GPU 设备 + +#### 3. 完整示例:3 节点集群 + +```bash +# 停止旧集群并启动新集群 +ray stop && \ +CUDA_VISIBLE_DEVICES=0,1,2,3 ray start --head --num-gpus=4 --port=6379 && \ +CUDA_VISIBLE_DEVICES=4,5,6,7 ray start --address=10.28.252.9:6379 --num-gpus=4 && \ +ray start --address=10.28.252.9:6379 --num-gpus=0 +``` + +此配置启动了 3 个节点: +- **Node 0**(Head):4 个 GPU(卡 0-3) +- **Node 1**(Worker):4 个 GPU(卡 4-7) +- **Node 2**(Worker):纯 CPU 节点 + +### YAML 配置中的 Node Rank + +在 YAML 配置文件中,**每个组件需要占用一个独立的 Node**,`ranks` 配置在各自的 Node 内都是从 0 开始编号的。 + +**示例配置:** + +```yaml +applications: + # 模型服务占用 Node 0(Head 节点,GPU 0-3) + - name: models-Qwen2.5-7B-Instruct + route_prefix: /models/Qwen/Qwen2.5-7B-Instruct + import_path: model + args: + nproc_per_node: 4 + device_group: + name: model + ranks: [0, 1, 2, 3] # Node 0 内的 GPU 编号 + device_type: cuda + device_mesh: + device_type: cuda + mesh: [0, 1, 2, 3] + mesh_dim_names: ['dp'] + + # Sampler 服务占用 Node 1(Worker 节点,GPU 4-7) + - name: sampler-Qwen2.5-7B-Instruct + route_prefix: /sampler/Qwen/Qwen2.5-7B-Instruct + import_path: sampler + args: + nproc_per_node: 2 + device_group: + name: sampler + ranks: [0, 1] # Node 1 内的 GPU 编号(对应物理 GPU 4-5) + device_type: cuda + device_mesh: + device_type: cuda + mesh: [0, 1] + mesh_dim_names: ['dp'] + + # Processor 服务占用 Node 2(CPU 节点) + - name: processor + route_prefix: /processors + import_path: processor + args: + ncpu_proc_per_node: 4 + device_group: + name: processor + ranks: 0 # Node 2 内的 CPU 编号 + device_type: CPU + device_mesh: + device_type: CPU + mesh: [0, 1, 2, 3] + mesh_dim_names: ['dp'] +``` + +**重要提示:** +- 每个组件的 `ranks` 配置都是相对于其所占用的 Ray Node 而言 +- 不同组件会自动分配到不同的 Node 上 +- Ray 会根据资源需求(`ray_actor_options` 中的 `num_gpus`、`num_cpus`)自动调度到合适的 Node + +## 启动方式 + +Server 统一通过 `launch_server` 函数或 CLI 命令启动,配合 YAML 配置文件。 + +### 方式一:Python 脚本启动 + +```python +# server.py +import os +from twinkle.server import launch_server + +# 获取配置文件路径(与脚本同目录的 server_config.yaml) +file_dir = os.path.abspath(os.path.dirname(__file__)) +config_path = os.path.join(file_dir, 'server_config.yaml') + +# 启动服务,此调用将阻塞直到服务关闭 +launch_server(config_path=config_path) +``` + +### 方式二:命令行启动 + +```bash +# 启动 Twinkle 原生 Server +python -m twinkle.server --config server_config.yaml + +# 启动 Tinker 兼容 Server +python -m twinkle.server --config server_config.yaml --server-type tinker +``` + +CLI 支持的参数: + +| 参数 | 说明 | 默认值 | +|------|------|-------| +| `-c, --config` | YAML 配置文件路径(必须) | — | +| `-t, --server-type` | Server 模式:`twinkle` 或 `tinker` | `twinkle` | +| `--namespace` | Ray 命名空间 | tinker 模式默认 `twinkle_cluster` | +| `--no-wait` | 不阻塞等待(守护模式) | `False` | +| `--log-level` | 日志级别 | `INFO` | + +## YAML 配置详解 + +配置文件定义了 Server 的完整部署方案,包括 HTTP 监听、应用组件和资源分配。 + +### Twinkle Server + Transformers 后端 + +```yaml +# server_config.yaml — Twinkle 原生协议 + Transformers 后端 + +# 协议类型:twinkle 原生协议 +server_type: twinkle + +# HTTP 代理位置:EveryNode 表示每个 Ray 节点运行一个代理(多节点场景推荐) +proxy_location: EveryNode + +# HTTP 监听配置 +http_options: + host: 0.0.0.0 # 监听所有网络接口 + port: 8000 # 服务端口号 + +# 应用列表:每个条目定义一个部署在 Server 上的服务组件 +applications: + + # 1. TwinkleServer:中央管理服务 + # 负责处理客户端连接、训练运行跟踪、检查点管理等 + - name: server + route_prefix: /server # API 路径前缀 + import_path: server # 内置组件标识 + args: # 无额外参数 + deployments: + - name: TwinkleServer + autoscaling_config: + min_replicas: 1 # 最小副本数 + max_replicas: 1 # 最大副本数 + target_ongoing_requests: 128 # 每副本目标并发请求数 + ray_actor_options: + num_cpus: 0.1 # 此 Actor 分配的 CPU 资源 + + # 2. Model 服务:承载基座模型 + # 执行前向传播、反向传播等训练计算 + - name: models-Qwen2.5-7B-Instruct + route_prefix: /models/Qwen/Qwen2.5-7B-Instruct # 模型的 REST 路径 + import_path: model + args: + use_megatron: false # 使用 Transformers 后端 + model_id: "ms://Qwen/Qwen2.5-7B-Instruct" # ModelScope 模型标识 + adapter_config: # LoRA 适配器配置 + per_token_adapter_limit: 30 # 同时可激活的最大 LoRA 数量 + adapter_timeout: 1800 # 空闲适配器超时卸载时间(秒) + nproc_per_node: 2 # 每节点 GPU 进程数 + device_group: # 逻辑设备组 + name: model + ranks: [0, 1] # 使用的 GPU 卡号 + device_type: cuda + device_mesh: # 分布式训练网格 + device_type: cuda + mesh: [0, 1] # 网格中的设备索引 + mesh_dim_names: ['dp'] # 网格维度:dp=数据并行 + deployments: + - name: ModelManagement + autoscaling_config: + min_replicas: 1 + max_replicas: 1 + target_ongoing_requests: 16 + ray_actor_options: + num_cpus: 0.1 + + # 3. Processor 服务:数据预处理 + # 在 CPU 上执行 tokenization、模板转换等预处理任务 + - name: processor + route_prefix: /processors + import_path: processor + args: + nproc_per_node: 2 # 每节点处理器 worker 数 + ncpu_proc_per_node: 2 # 每节点 CPU 进程数 + device_group: + name: model + ranks: 2 + device_type: CPU + device_mesh: + device_type: CPU + mesh: [0, 1] + mesh_dim_names: ['dp'] + deployments: + - name: ProcessorManagement + autoscaling_config: + min_replicas: 1 + max_replicas: 1 + target_ongoing_requests: 128 + ray_actor_options: + num_cpus: 0.1 +``` + +### Twinkle Server + Megatron 后端 + +与 Transformers 后端的区别仅在 Model 服务的 `use_megatron` 参数: + +```yaml + # Model 服务 — Megatron 后端 + - name: models-Qwen2.5-7B-Instruct + route_prefix: /models/Qwen/Qwen2.5-7B-Instruct + import_path: model + args: + use_megatron: true # 使用 Megatron-LM 后端 + model_id: "ms://Qwen/Qwen2.5-7B-Instruct" + nproc_per_node: 2 + device_group: + name: model + ranks: [0, 1] + device_type: cuda + device_mesh: + device_type: cuda + mesh: [0, 1] + mesh_dim_names: ['dp'] +``` + +> **注意**:Megatron 后端不需要 `adapter_config`(LoRA 适配器管理由 Megatron 内部处理)。 + +### Tinker 兼容 Server 配置 + +Tinker 兼容模式的主要区别: +- `server_type` 设为 `tinker` +- `route_prefix` 使用 `/api/v1` 前缀(Tinker 协议规范) +- 可额外配置 Sampler 服务(用于推理采样) + +```yaml +# server_config.yaml — Tinker 兼容协议 + +server_type: tinker + +proxy_location: EveryNode + +http_options: + host: 0.0.0.0 + port: 8000 + +applications: + + # 1. TinkerCompatServer:中央 API 服务 + - name: server + route_prefix: /api/v1 # Tinker 协议 API 前缀 + import_path: server + args: + deployments: + - name: TinkerCompatServer + autoscaling_config: + min_replicas: 1 + max_replicas: 1 + target_ongoing_requests: 128 + ray_actor_options: + num_cpus: 0.1 + + # 2. Model 服务(Megatron 后端示例) + - name: models-Qwen2.5-0.5B-Instruct + route_prefix: /api/v1/model/Qwen/Qwen2.5-0.5B-Instruct + import_path: model + args: + use_megatron: true + model_id: "ms://Qwen/Qwen2.5-0.5B-Instruct" + nproc_per_node: 2 + device_group: + name: model + ranks: [0, 1] + device_type: cuda + device_mesh: + device_type: cuda + mesh: [0, 1] + mesh_dim_names: ['dp'] + deployments: + - name: ModelManagement + autoscaling_config: + min_replicas: 1 + max_replicas: 1 + target_ongoing_requests: 16 + ray_actor_options: + num_cpus: 0.1 + + # 3. Sampler 服务(可选,用于推理采样) + - name: sampler-Qwen2.5-0.5B-Instruct + route_prefix: /api/v1/sampler/Qwen/Qwen2.5-0.5B-Instruct + import_path: sampler + args: + model_id: "ms://Qwen/Qwen2.5-0.5B-Instruct" + nproc_per_node: 1 + sampler_type: vllm # 推理引擎:vllm(高性能)或 torch + engine_args: # vLLM 引擎参数 + max_model_len: 4096 # 最大序列长度 + gpu_memory_utilization: 0.5 # GPU 显存使用比例 + enable_lora: true # 支持推理时加载 LoRA + device_group: + name: sampler + ranks: [0] + device_type: cuda + device_mesh: + device_type: cuda + mesh: [0] + mesh_dim_names: ['dp'] + deployments: + - name: SamplerManagement + autoscaling_config: + min_replicas: 1 + max_replicas: 1 + target_ongoing_requests: 16 + ray_actor_options: + num_cpus: 0.1 + num_gpus: 1 # Sampler 需要独立 GPU +``` + +## 配置项说明 + +### 应用组件(import_path) + +| import_path | Twinkle 模式 | Tinker 模式 | 说明 | +|-------------|-------------|------------|------| +| `server` | ✅ | ✅ | 中央管理服务,处理训练运行和检查点 | +| `model` | ✅ | ✅ | 模型服务,承载基座模型进行训练 | +| `processor` | ✅ | ❌ | 数据预处理服务(仅 Twinkle 模式,Tinker 模式需在本地处理) | +| `sampler` | ✅ | ✅ | 推理采样服务 | + +### device_group 与 device_mesh + +- **device_group**:定义逻辑设备组,指定使用哪些 GPU 卡 +- **device_mesh**:定义分布式训练网格,控制并行策略 + +```yaml +device_group: + name: model # 设备组名称 + ranks: [0, 1] # GPU 卡号列表 + device_type: cuda # 设备类型:cuda / CPU + +device_mesh: + device_type: cuda + mesh: [0, 1] # 网格中的设备索引 + mesh_dim_names: ['dp'] # 维度名称,常用:dp(数据并行), tp(张量并行), pp(流水线并行) +``` diff --git "a/docs/source/\344\275\277\347\224\250\346\214\207\345\274\225/\346\234\215\345\212\241\347\253\257\345\222\214\345\256\242\346\210\267\347\253\257/\346\246\202\350\277\260.md" "b/docs/source/\344\275\277\347\224\250\346\214\207\345\274\225/\346\234\215\345\212\241\347\253\257\345\222\214\345\256\242\346\210\267\347\253\257/\346\246\202\350\277\260.md" new file mode 100644 index 00000000..e4617854 --- /dev/null +++ "b/docs/source/\344\275\277\347\224\250\346\214\207\345\274\225/\346\234\215\345\212\241\347\253\257\345\222\214\345\256\242\346\210\267\347\253\257/\346\246\202\350\277\260.md" @@ -0,0 +1,97 @@ +# 服务端和客户端 + +Twinkle 提供了完整的 HTTP Server/Client 架构,支持将模型部署为服务,并通过客户端远程调用完成训练、推理等任务。这种架构将**模型承载(Server 端)**和**训练逻辑(Client 端)**解耦,使得多个用户可以共享同一个基座模型进行训练。 + +## 核心概念 + +- **Server 端**:基于 Ray Serve 部署,承载模型权重和推理/训练计算。Server 负责管理模型加载、前向/反向传播、权重保存、采样推理等。 +- **Client 端**:在本地运行,负责数据准备、训练循环编排、超参配置等。Client 通过 HTTP 与 Server 通信,发送数据和指令。 + +### 两种 Server 模式 + +Twinkle Server 支持两种协议模式: + +| 模式 | server_type | 说明 | +|------|------------|------| +| **Twinkle Server** | `twinkle` | 原生 Twinkle 协议,搭配 `twinkle_client` 使用,API 更简洁 | +| **Tinker 兼容 Server** | `tinker` | 兼容 Tinker 协议,搭配 `init_tinker_compat_client` 使用,可复用已有 Tinker 训练代码 | + +### 两种模型后端 + +无论哪种 Server 模式,模型加载均支持两种后端: + +| 后端 | use_megatron | 说明 | +|------|-------------|------| +| **Transformers** | `false` | 基于 HuggingFace Transformers,适用于大多数场景 | +| **Megatron** | `true` | 基于 Megatron-LM,适用于超大规模模型训练,支持更高效的并行策略 | + +### 两种 Client 模式 + +| Client | 初始化方式 | 说明 | +|--------|---------|------| +| **Twinkle Client** | `init_twinkle_client` | 原生客户端,将 `from twinkle import` 改为 `from twinkle_client import` 即可将本地训练代码迁移为远端调用 | +| **Tinker 兼容 Client** | `init_tinker_compat_client` | 对 Tinker SDK 进行 patch,使已有 Tinker 训练代码可直接复用 | + +## 如何选择 + +### Server 模式选择 + +| 场景 | 推荐 | +|------|------| +| 全新项目,使用 Twinkle 体系 | Twinkle Server (`server_type: twinkle`) | +| 已有 Tinker 训练代码,希望迁移到 Twinkle | Tinker 兼容 Server (`server_type: tinker`) | +| 需要推理采样功能 | Tinker 兼容 Server(内置 Sampler 支持) | + +### Client 模式选择 + +| 场景 | 推荐 | +|------|------| +| 已有 Twinkle 本地训练代码,希望改为远端 | Twinkle Client — 仅需改 import 路径 | +| 已有 Tinker 训练代码,希望复用 | Tinker 兼容 Client — 仅需初始化 patch | +| 全新项目 | Twinkle Client — API 更简洁 | + +### 模型后端选择 + +| 场景 | 推荐 | +|------|------| +| 7B/14B 等中小规模模型 | Transformers 后端 | +| 超大规模模型,需要高级并行策略 | Megatron 后端 | +| 快速实验和原型验证 | Transformers 后端 | + +## Cookbook 参考 + +完整的可运行示例位于 `cookbook/client/` 目录: + +``` +cookbook/client/ +├── twinkle/ # Twinkle 原生协议示例 +│ ├── transformer/ # Transformers 后端 +│ │ ├── server.py # 启动脚本 +│ │ ├── server_config.yaml # 配置文件 +│ │ └── lora.py # LoRA 训练客户端 +│ └── megatron/ # Megatron 后端 +│ ├── server.py +│ ├── server_config.yaml +│ └── lora.py +└── tinker/ # Tinker 兼容协议示例 + ├── transformer/ # Transformers 后端 + │ ├── server.py + │ ├── server_config.yaml + │ ├── lora.py # LoRA 训练 + │ ├── sample.py # 推理采样 + │ └── self_congnition.py # 自我认知训练+评估 + └── megatron/ # Megatron 后端 + ├── server.py + ├── server_config.yaml + └── lora.py +``` + +运行步骤: + +```bash +# 1. 先启动 Server +python cookbook/client/twinkle/transformer/server.py + +# 2. 在另一个终端运行 Client +python cookbook/client/twinkle/transformer/lora.py +``` diff --git "a/docs/source/\344\275\277\347\224\250\346\214\207\345\274\225/\351\255\224\346\220\255\345\205\215\350\264\271\350\265\204\346\272\220.md" "b/docs/source/\344\275\277\347\224\250\346\214\207\345\274\225/\351\255\224\346\220\255\345\205\215\350\264\271\350\265\204\346\272\220.md" new file mode 100644 index 00000000..f1a32ba0 --- /dev/null +++ "b/docs/source/\344\275\277\347\224\250\346\214\207\345\274\225/\351\255\224\346\220\255\345\205\215\350\264\271\350\265\204\346\272\220.md" @@ -0,0 +1,25 @@ +# 魔搭免费资源 + +在 Twinkle 框架开源的同时,我们在[魔搭社区官网](https://www.modelscope.cn)上提供了免费可用的 RL 训练资源。开发者仅需传入 ModelScope SDK token 即可**免费**训练。 + +目前在集群中运行的模型是。下面介绍具体的使用方法: + +## Step 1. 注册魔搭用户 + +开发者首先需要注册成为魔搭用户,使用魔搭社区的 token 进行调用。 + +注册地址:https://www.modelscope.cn/ + +token 在这里获取:https://www.modelscope.cn/my/access/token 拷贝访问令牌在 SDK 中使用即可。 + +## Step 2. 加入 twinkle-explorers 组织 + +目前 twinkle-kit 的远程训练能力在灰度测试中,开发者需要加入 [twinkle-explorers](https://www.modelscope.cn/models/twinkle-explorers) 组织,组织内的用户可以进行前期使用和测试。 +该组织的申请和加入没有门槛,当前仅用于项目上线前期的流量控制和缺陷反馈。在项目稳定后,我们会移除加入组织的限制。 + +## Step 3. 查看 Cookbook 并二次定制开发 + +我们强烈推荐开发者查看我们的 [cookbook](https://github.com/modelscope/twinkle/tree/main/cookbook/client),并根据其中的训练代码进行二次开发。 + +开发者可以定制数据集/优势函数/奖励/模板等,其中 Loss 部分由于需要在服务端执行,因此当前暂不支持(安全性原因)。 +如果需要支持您的额外 Loss,可以将该 Loss 实现上传到 ModelHub 中,并在答疑群中或者 issue 中联系我们,将对应组件开放白名单即可使用。 \ No newline at end of file diff --git "a/docs/source/\347\273\204\344\273\266/LRScheduler/CosineWarmupScheduler.md" "b/docs/source/\347\273\204\344\273\266/LRScheduler/CosineWarmupScheduler.md" new file mode 100644 index 00000000..27021318 --- /dev/null +++ "b/docs/source/\347\273\204\344\273\266/LRScheduler/CosineWarmupScheduler.md" @@ -0,0 +1,28 @@ +# CosineWarmupScheduler + +这个 LRScheduler 用于在训练初始对学习率进行 warmup,在到达指定学习率后对学习率进行衰减。 + +```python +class CosineWarmupScheduler: + + def __init__(self, optimizer, num_warmup_steps: int, num_training_steps: int, num_cycles: float = 0.5): + ... + + ... +``` + +构造参数: +- optimizer: optimizer 优化器实例 +- num_warmup_steps: warmup 的步数 +- num_training_steps: 总训练的步数 +- num_cycles: cosine 曲线周期,默认 0.5 半个余弦周期,即从最大学习率衰减到最小。调节为 1 为从最大学习率衰减到最小再回到最大。 + +这些参数可以通过模型的 `set_lr_scheduler` 来设置: + +```python +model.set_lr_scheduler(CosineWarmupScheduler, num_warmup_steps=10, num_training_steps=100, num_cycles=0.5) +``` + +optimizer 参数不需要传入,模型模块内部会自动添加。 + +> Megatron 模型不支持该 Scheduler。 \ No newline at end of file diff --git "a/docs/source/\347\273\204\344\273\266/LRScheduler/LinearWarmupScheduler.md" "b/docs/source/\347\273\204\344\273\266/LRScheduler/LinearWarmupScheduler.md" new file mode 100644 index 00000000..d14a07d4 --- /dev/null +++ "b/docs/source/\347\273\204\344\273\266/LRScheduler/LinearWarmupScheduler.md" @@ -0,0 +1,27 @@ +# LinearWarmupScheduler + +这个 LRScheduler 用于在训练初始对学习率进行 warmup,在到达指定学习率后对学习率进行衰减。 + +```python +class LinearWarmupScheduler: + + def __init__(self, optimizer, num_warmup_steps: int, num_training_steps: int): + ... + + ... +``` + +构造参数: +- optimizer: optimizer 优化器实例 +- num_warmup_steps: warmup 的步数 +- num_training_steps: 总训练的步数 + +这些参数可以通过模型的 `set_lr_scheduler` 来设置: + +```python +model.set_lr_scheduler(LinearWarmupScheduler, num_warmup_steps=10, num_training_steps=100) +``` + +optimizer 参数不需要传入,模型模块内部会自动添加。 + +> Megatron 模型不支持该 Scheduler。 \ No newline at end of file diff --git "a/docs/source/\347\273\204\344\273\266/LRScheduler/index.rst" "b/docs/source/\347\273\204\344\273\266/LRScheduler/index.rst" new file mode 100644 index 00000000..9a767b90 --- /dev/null +++ "b/docs/source/\347\273\204\344\273\266/LRScheduler/index.rst" @@ -0,0 +1,7 @@ +LRScheduler +=============== +.. toctree:: + :maxdepth: 1 + + CosineWarmupScheduler.md + LinearWarmupScheduler.md diff --git "a/docs/source/\347\273\204\344\273\266/\344\273\273\345\212\241\345\244\204\347\220\206\345\231\250/InputProcessor.md" "b/docs/source/\347\273\204\344\273\266/\344\273\273\345\212\241\345\244\204\347\220\206\345\231\250/InputProcessor.md" new file mode 100644 index 00000000..a9a22ee0 --- /dev/null +++ "b/docs/source/\347\273\204\344\273\266/\344\273\273\345\212\241\345\244\204\347\220\206\345\231\250/InputProcessor.md" @@ -0,0 +1,53 @@ +# InputProcessor + +InputProcessor 承载了不同任务的数据准备过程。 + +```python +class InputProcessor: + + def __init__(self, device_mesh: Optional[DeviceMesh] = None, + padding_free: bool = False, + framework: Literal['transformers', 'megatron'] = 'transformers', + **kwargs): + ... + + def __call__(self, inputs: Union[InputFeature, List[InputFeature]], **kwargs) -> Union[InputFeature, List[InputFeature]]: + # 整体处理的入口 + ... + + def prepare_inputs(self, inputs: Union[List[InputFeature], InputFeature], **kwargs) -> List[InputFeature]: + # 移动到 cuda 设备上 + ... + + def pad_cp(self, inputs: List[InputFeature], **kwargs) ->List[InputFeature]: + # 处理 cp + ... + + def split_cp(self, inputs: List[Dict[str, Any]], **kwargs) -> List[Dict[str, Any]]: + # 处理 cp + ... + + def collate_fn(self, inputs: List[InputFeature], micro_batch_size: Optional[int] = None, + variable_seq_lengths=False, **kwargs) -> List[InputFeature]: + # data_collator + ... +``` + +- device_mesh: 用于切分 cp。如果没有 cp,device_mesh 参数可以不传。 +- padding_free: 是否将多个样本拼接为一个,这个功能和 PackingDataset 比较相似,但 PackingDataset 会让每个 batch 长度基本一致,而 padding_free 仅考虑本 batch 内部的拼接。 + - 使用 PackingDataset 会自动在 InputProcessor 内触发 padding_free +- framework: 支持 transformers 和 megatron。不同的模型架构返回的模型输入略有不同 + +> Twinkle 将 collate_fn 放入 InputProcessor 中,因为不同的任务(sft/grpo 等)对输入需求是不同的。目前 InputProcessor 默认执行在模型端,因为这样可以将 DataLoader 和模型进行解耦。 +> 因为 collate_fn 和运行任务、Megatron 的 micro_batch_size 等信息有关,如果在 DataLoader 中运行,会导致 DataLoader 无法独立成为组件,其逻辑也会变得复杂。 + +InputProcessor 实现了 __call__ 方法,因此你可以使用自己的 function 来完成自己的任务数据准备流程: + +```python +def my_processor(inputs: Union[InputFeature, List[InputFeature]]) -> Union[InputFeature, List[InputFeature]]: + return ... + +model.set_processor(my_processor) +# 或者 +dataloader.set_processor(my_processor) +``` diff --git "a/docs/source/\347\273\204\344\273\266/\344\273\273\345\212\241\345\244\204\347\220\206\345\231\250/index.rst" "b/docs/source/\347\273\204\344\273\266/\344\273\273\345\212\241\345\244\204\347\220\206\345\231\250/index.rst" new file mode 100644 index 00000000..a2c88eaf --- /dev/null +++ "b/docs/source/\347\273\204\344\273\266/\344\273\273\345\212\241\345\244\204\347\220\206\345\231\250/index.rst" @@ -0,0 +1,6 @@ +任务处理器 +=============== +.. toctree:: + :maxdepth: 1 + + InputProcessor.md diff --git "a/docs/source/\347\273\204\344\273\266/\344\274\230\345\212\277/Advantage.md" "b/docs/source/\347\273\204\344\273\266/\344\274\230\345\212\277/Advantage.md" new file mode 100644 index 00000000..01914019 --- /dev/null +++ "b/docs/source/\347\273\204\344\273\266/\344\274\230\345\212\277/Advantage.md" @@ -0,0 +1,61 @@ +# Advantage + +Advantage (优势函数) 是强化学习中用于计算动作相对于平均水平的优势值的组件。在 RLHF 训练中,优势函数用于指导策略优化。 + +## 基本接口 + +```python +class Advantage: + + def __call__(self, + rewards: Union['torch.Tensor', List[float]], + num_generations: int = 1, + scale: Literal['group', 'batch', 'none'] = 'group', + **kwargs) -> 'torch.Tensor': + """ + 计算优势值 + + Args: + rewards: 奖励值列表或张量 + num_generations: 每个 prompt 生成的样本数量 + scale: 归一化方式 + - 'group': 对每组样本进行归一化 (GRPO) + - 'batch': 对整个 batch 进行归一化 + - 'none': 不进行归一化 + + Returns: + 优势值张量 + """ + ... +``` + +## 可用的优势函数 + +Twinkle 提供了两种优势函数实现: + +### GRPOAdvantage + +GRPO (Group Relative Policy Optimization) 优势函数通过减去组内均值来计算优势。 + +- 简单高效,适合大多数场景 +- 减少方差,提高训练稳定性 +- 在组内进行相对比较 + +详见: [GRPOAdvantage](GRPOAdvantage.md) + +### RLOOAdvantage + +RLOO (Reinforcement Learning with Leave-One-Out) 优势函数使用留一法计算基线。 + +- 理论上更优,减少偏差 +- 需要更多样本(建议 8 个以上) +- 更准确的反事实基线估计 + +详见: [RLOOAdvantage](RLOOAdvantage.md) + +## 如何选择 + +- **GRPO**: 适合样本数量较少(4 个左右)的场景,计算效率高 +- **RLOO**: 适合样本数量较多(8 个以上)的场景,理论效果更好 + +> 优势函数的选择对 RLHF 训练效果有重要影响。建议根据计算资源和样本数量选择合适的方法。 diff --git "a/docs/source/\347\273\204\344\273\266/\344\274\230\345\212\277/GRPOAdvantage.md" "b/docs/source/\347\273\204\344\273\266/\344\274\230\345\212\277/GRPOAdvantage.md" new file mode 100644 index 00000000..6e264c9c --- /dev/null +++ "b/docs/source/\347\273\204\344\273\266/\344\274\230\345\212\277/GRPOAdvantage.md" @@ -0,0 +1,68 @@ +# GRPOAdvantage + +GRPO (Group Relative Policy Optimization) 优势函数通过减去组内均值来计算优势。 + +## 使用示例 + +```python +from twinkle.advantage import GRPOAdvantage + +advantage_fn = GRPOAdvantage() + +# 假设有 2 个 prompt,每个生成 4 个样本 +rewards = [0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0] # 8 个奖励值 +advantages = advantage_fn(rewards, num_generations=4, scale='group') + +# advantages 会是每组减去组内均值: +# 第一组: [0.0-0.5, 1.0-0.5, 0.0-0.5, 1.0-0.5] = [-0.5, 0.5, -0.5, 0.5] +# 第二组: [1.0-0.25, 0.0-0.25, 0.0-0.25, 0.0-0.25] = [0.75, -0.25, -0.25, -0.25] +``` + +## 工作原理 + +GRPO 将样本分组(每组对应一个 prompt 的多个生成),然后在组内: +1. 计算组内奖励均值 +2. 每个样本的优势 = 该样本的奖励 - 组内均值 +3. 可选地对优势值进行归一化 + +这种方法能够: +- 减少方差,提高训练稳定性 +- 在组内进行相对比较,更符合人类偏好的相对性 +- 避免奖励尺度的影响 + +## 完整训练示例 + +在 GRPO 训练中使用优势函数: + +```python +from twinkle.advantage import GRPOAdvantage +from twinkle.model import TransformersModel +from twinkle.sampler import vLLMSampler +from twinkle.reward import MathReward + +# 创建组件 +actor = TransformersModel(model_id='Qwen/Qwen2.5-7B-Instruct') +sampler = vLLMSampler(model_id='Qwen/Qwen2.5-7B-Instruct') +reward_fn = MathReward() +advantage_fn = GRPOAdvantage() + +# 训练循环 +for batch in dataloader: + # 1. 采样生成 + response = sampler.sample(batch, num_samples=4) + + # 2. 计算奖励 + rewards = reward_fn(response.trajectories, batch.ground_truths) + + # 3. 计算优势 + advantages = advantage_fn(rewards, num_generations=4) + + # 4. 策略优化 + loss = actor.forward_backward( + inputs=response.inputs, + advantages=advantages + ) + actor.clip_grad_and_step() +``` + +> GRPO 方法简单高效,适合大多数 RLHF 训练场景。 diff --git "a/docs/source/\347\273\204\344\273\266/\344\274\230\345\212\277/RLOOAdvantage.md" "b/docs/source/\347\273\204\344\273\266/\344\274\230\345\212\277/RLOOAdvantage.md" new file mode 100644 index 00000000..d08f37b1 --- /dev/null +++ "b/docs/source/\347\273\204\344\273\266/\344\274\230\345\212\277/RLOOAdvantage.md" @@ -0,0 +1,65 @@ +# RLOOAdvantage + +RLOO (Reinforcement Learning with Leave-One-Out) 优势函数使用留一法计算基线。 + +## 使用示例 + +```python +from twinkle.advantage import RLOOAdvantage + +advantage_fn = RLOOAdvantage() + +rewards = [0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0] +advantages = advantage_fn(rewards, num_generations=4) + +# 对于每个样本,基线是除了它以外的其他样本的均值 +# 第一组第一个样本: 0.0 - mean([1.0, 0.0, 1.0]) = 0.0 - 0.667 = -0.667 +# ... +``` + +## 工作原理 + +RLOO 对每个样本: +1. 计算除该样本外组内其他样本的奖励均值 (留一基线) +2. 优势 = 该样本奖励 - 留一基线 +3. 可选地进行归一化 + +RLOO 的优势: +- 避免使用样本自身信息作为基线,减少偏差 +- 更准确地估计反事实基线 +- 在样本数量较多时效果更好 + +## 完整训练示例 + +```python +from twinkle.advantage import RLOOAdvantage +from twinkle.model import TransformersModel +from twinkle.sampler import vLLMSampler +from twinkle.reward import MathReward + +# 创建组件 +actor = TransformersModel(model_id='Qwen/Qwen2.5-7B-Instruct') +sampler = vLLMSampler(model_id='Qwen/Qwen2.5-7B-Instruct') +reward_fn = MathReward() +advantage_fn = RLOOAdvantage() + +# 训练循环 +for batch in dataloader: + # 1. 采样生成(每个 prompt 生成更多样本以提高 RLOO 效果) + response = sampler.sample(batch, num_samples=8) + + # 2. 计算奖励 + rewards = reward_fn(response.trajectories, batch.ground_truths) + + # 3. 计算优势 + advantages = advantage_fn(rewards, num_generations=8) + + # 4. 策略优化 + loss = actor.forward_backward( + inputs=response.inputs, + advantages=advantages + ) + actor.clip_grad_and_step() +``` + +> RLOO 在理论上更优,但需要更多样本(建议每个 prompt 生成 8 个以上样本)。 diff --git "a/docs/source/\347\273\204\344\273\266/\344\274\230\345\212\277/index.rst" "b/docs/source/\347\273\204\344\273\266/\344\274\230\345\212\277/index.rst" new file mode 100644 index 00000000..5938286c --- /dev/null +++ "b/docs/source/\347\273\204\344\273\266/\344\274\230\345\212\277/index.rst" @@ -0,0 +1,8 @@ +优势 +=============== +.. toctree:: + :maxdepth: 1 + + Advantage.md + GRPOAdvantage.md + RLOOAdvantage.md diff --git "a/docs/source/\347\273\204\344\273\266/\345\206\205\346\240\270/Kernel.md" "b/docs/source/\347\273\204\344\273\266/\345\206\205\346\240\270/Kernel.md" new file mode 100644 index 00000000..89ae37ca --- /dev/null +++ "b/docs/source/\347\273\204\344\273\266/\345\206\205\346\240\270/Kernel.md" @@ -0,0 +1,307 @@ +# Twinkle Kernel 模块 + +Twinkle Kernel 模块提供了两条内核替换路径,用于加速训练和推理: + +* **层级 Kernelize(Layer-level kernelize)** + 使用优化内核替换完整的 `nn.Module` 实现。 +* **函数级 Kernelize(Function-level kernelize)** + 对 Python 模块中的特定函数进行 monkey-patch。 + +这两种方式可以独立使用,也可以通过统一入口组合使用。 + +--- + +## 概览:两条 Kernelize 路径 + +| 路径 | 粒度 | 典型场景 | +| --- | --- | --- | +| 层级替换 | 整个 `nn.Module` | Linear / Conv / MLP / Attention | +| 函数级替换 | 单个函数 | 热点路径、数学算子、激活函数 | + +--- + +## 层级内核替换(Layer-Level) + +### 适用场景 + +* 你已经有完整的层内核实现 +* 希望在模型中批量替换某类 `nn.Module` +* 同时适用于训练与推理 + +--- + +### 示例 1:本地 Kernel 仓库 + +适用于: + +* 内核实现位于本地仓库 +* 希望替换 HuggingFace 或自定义模型中的层 + +```python +from twinkle.kernel import ( + kernelize_model, + register_layer_kernel, + register_external_layer, +) +from transformers import Qwen2Config, Qwen2ForCausalLM +from transformers.models.qwen2.modeling_qwen2 import Qwen2MLP + +# 1) 从本地仓库注册层内核 +register_layer_kernel( + kernel_name="MyAwesomeMLP", + repo_path="/path/to/local/repo", + package_name="my_kernels", + layer_name="Qwen2MLPTrainingKernel", + device="cuda", + mode="train", +) + +# 2) 绑定外部层与内核名 +register_external_layer(Qwen2MLP, "MyAwesomeMLP") + +# 3) 构建模型并应用内核替换 +config = Qwen2Config( + hidden_size=128, + num_hidden_layers=1, + num_attention_heads=4, + num_key_value_heads=4, + intermediate_size=256, + use_cache=False, +) +model = Qwen2ForCausalLM(config) +model = kernelize_model(model, mode="train", device="cuda", use_fallback=True) +``` + +--- + +### 示例 2:Hub Kernel 仓库 + +适用于: + +* 内核托管在 Hub 上 + +```python +import torch +import torch.nn as nn +from twinkle.kernel import ( + kernelize_model, + register_layer_kernel, + register_external_layer, +) + +# 1) 定义自定义层 +class SiluAndMul(nn.Module): + def forward(self, x: torch.Tensor) -> torch.Tensor: + x1, x2 = x.chunk(2, dim=-1) + return nn.functional.silu(x1) * x2 + +# 2) 注册 Hub 内核并绑定层 +register_layer_kernel( + kernel_name="SiluAndMulKernel", + repo_id="kernels-community/activation", + layer_name="SiluAndMul", + device="cuda", + mode="train", +) +register_external_layer(SiluAndMul, "SiluAndMulKernel") + +# 3) 应用到模型 +class SimpleModel(nn.Module): + def __init__(self): + super().__init__() + self.activation = SiluAndMul() + + def forward(self, x: torch.Tensor) -> torch.Tensor: + return self.activation(x) + +model = SimpleModel() +model = kernelize_model(model, mode="train", device="cuda", use_fallback=True) +``` + +--- + +## 本地 Kernel 仓库(最小结构) + +本地 kernel 仓库本质上是一个普通 Python 包。 +最少只需要一个 `layers.py` 来放层级内核实现。 + +```text +# 仓库结构: +my_kernels/ # 本地 kernel 仓库(Python 包) +├── __init__.py # 包入口 +└── layers.py # 层级 kernel 实现 +``` + +```python +# my_kernels/__init__.py +from . import layers +__all__ = ["layers"] + +# my_kernels/layers.py +import torch +import torch.nn as nn + +class Qwen2MLPTrainingKernel(nn.Module): + def forward(self, x: torch.Tensor) -> torch.Tensor: + gate = self.gate_proj(x) + up = self.up_proj(x) + return self.down_proj(self.act_fn(gate) * up) +``` + +--- + +## 函数级内核替换(Function-Level) + +### 适用场景 + +* 只需要加速少量热点函数 +* 不适合或不需要替换整个层 +* 常用于数学算子、激活函数、工具函数 + +--- + +### 示例 1:批量注册(简单场景) + +```python +from twinkle.kernel import register_kernels, kernelize_model + +# 1) 注册函数内核 +config = { + "functions": { + "add": { + "target_module": "my_pkg.math_ops", + "func_impl": lambda x, y: x + y + 1, + "device": "cuda", + "mode": "inference", + }, + }, +} +register_kernels(config) + +# 2) 应用(仅函数替换时 model 可为 None) +kernelize_model(model=None, mode="inference", device="cuda", use_fallback=True) +``` + +--- + +### 示例 2:高级函数来源(完整控制) + +适用于: + +* 不同函数来自不同来源(impl / repo / hub),或需要 compile/backward 等标志。 + +```python +from twinkle.kernel.function import ( + register_function_kernel, + apply_function_kernel, +) +import torch.nn as nn +from twinkle.kernel import kernelize_model + +TARGET_MODULE = "my_pkg.math_ops" + +# 1) 直接传入实现 +def fast_add(x, y): + return x + y + 1 + +register_function_kernel( + func_name="add", + target_module=TARGET_MODULE, + func_impl=fast_add, + device="cuda", + mode="inference", +) + +# 2) Repo 对象(FuncRepositoryProtocol) +class MyFuncRepo: + def load(self): + return MyKernelFunc + +class MyKernelFunc(nn.Module): + def forward(self, x, y): + return x * y + +register_function_kernel( + func_name="mul", + target_module=TARGET_MODULE, + repo=MyFuncRepo(), + device="cuda", + mode="compile", +) + +# 3) Hub 仓库 +register_function_kernel( + func_name="silu_and_mul", + target_module="my_pkg.activations", + repo_id="kernels-community/activation", + revision="main", # 或 version="0.1.0" + device="cuda", + mode="inference", +) + +# 4) 应用函数内核 +applied = apply_function_kernel( + target_module=TARGET_MODULE, + device="cuda", + mode="inference", + strict=False, +) +print("patched:", applied) + +# 5) 可选:通过 kernelize_model 统一应用 +model = nn.Sequential(nn.Linear(8, 8), nn.ReLU()) +kernelize_model(model=model, mode="inference", device="cuda", use_fallback=True) +``` + +--- + +## 层级 + 函数级统一批量注册 + +### 适用场景 + +* 需要框架级统一集成 +* 希望通过单一配置入口管理 +* 同时管理层和函数两类内核 + +```python +from twinkle.kernel import register_kernels, kernelize_model +import torch.nn as nn + +# 1) 注册层级 + 函数级内核 +config = { + "layers": { + "linear": { + "repo_id": "kernels-community/linear", + "layer_name": "Linear", + "version": "0.1.0", + "device": "cuda", + "mode": "train", + }, + "conv2d": { + "repo_path": "/path/to/local/repo", + "package_name": "my_kernels", + "layer_name": "Conv2d", + "device": "cuda", + }, + }, + "functions": { + "add": { + "target_module": "my_pkg.math_ops", + "func_impl": lambda x, y: x + y + 1, + "device": "cuda", + "mode": "inference", + }, + "relu": { + "target_module": "my_pkg.activations", + "repo_id": "kernels-community/activation", + "revision": "main", + "device": "cuda", + }, + }, +} +register_kernels(config) + +# 2) 通过 kernelize_model 应用 +model = nn.Sequential(nn.Linear(8, 8), nn.ReLU()) +kernelize_model(model=model, mode="train", device="cuda", use_fallback=True) +``` diff --git "a/docs/source/\347\273\204\344\273\266/\345\206\205\346\240\270/index.rst" "b/docs/source/\347\273\204\344\273\266/\345\206\205\346\240\270/index.rst" new file mode 100644 index 00000000..0c65152f --- /dev/null +++ "b/docs/source/\347\273\204\344\273\266/\345\206\205\346\240\270/index.rst" @@ -0,0 +1,6 @@ +Kernel +=============== +.. toctree:: + :maxdepth: 1 + + Kernel.md diff --git "a/docs/source/\347\273\204\344\273\266/\345\245\226\345\212\261/Reward.md" "b/docs/source/\347\273\204\344\273\266/\345\245\226\345\212\261/Reward.md" new file mode 100644 index 00000000..68779145 --- /dev/null +++ "b/docs/source/\347\273\204\344\273\266/\345\245\226\345\212\261/Reward.md" @@ -0,0 +1,119 @@ +# Reward + +Reward (奖励函数) 是 RLHF 训练中用于评估模型输出质量的组件。奖励函数根据模型生成的轨迹计算奖励分数,用于指导策略学习。 + +## 基本接口 + +```python +class Reward: + + def __call__(self, trajectories: List[Trajectory], ground_truths: List[Trajectory]): + """ + 计算奖励值 + + Args: + trajectories: 模型生成的轨迹列表 + ground_truths: 真实答案轨迹列表 + + Returns: + 奖励值列表 + """ + ... +``` + +## MathReward + +数学奖励函数用于评估数学问题的答案正确性。 + +```python +from twinkle.reward import MathReward + +reward_fn = MathReward() +rewards = reward_fn(generated_trajectories, ground_truth_trajectories) +# rewards: List[float],1.0 表示正确,0.0 表示错误 +``` + +## FormatReward + +格式奖励函数用于检查输出是否符合指定格式。 + +```python +from twinkle.reward import FormatReward + +reward_fn = FormatReward() +rewards = reward_fn(trajectories, ground_truths) +``` + +## CountDownAccuracyReward + +倒计时准确率奖励函数,在答案接近正确时给予部分奖励。 + +```python +from twinkle.reward import CountDownAccuracyReward + +reward_fn = CountDownAccuracyReward() +rewards = reward_fn(trajectories, ground_truths) +``` + +## 自定义奖励函数 + +你可以通过继承 Reward 基类或使用函数来创建自定义奖励: + +```python +from twinkle.reward import Reward +from twinkle.data_format import Trajectory +from typing import List + +class CustomReward(Reward): + + def __call__(self, trajectories: List[Trajectory], ground_truths: List[Trajectory]): + rewards = [] + for traj, gt in zip(trajectories, ground_truths): + # 自定义评估逻辑 + score = self._evaluate(traj, gt) + rewards.append(score) + return rewards + + def _evaluate(self, traj, gt): + # 实现具体评估逻辑 + ... +``` + +或使用函数: + +```python +def my_reward(trajectories, ground_truths): + return [1.0 if t == gt else 0.0 for t, gt in zip(trajectories, ground_truths)] + +# 在训练中使用 +rewards = my_reward(generated, ground_truths) +``` + +## 使用场景 + +奖励函数在 RLHF 训练的典型使用流程: + +```python +from twinkle.sampler import vLLMSampler +from twinkle.reward import MathReward +from twinkle.advantage import GRPOAdvantage + +sampler = vLLMSampler(model_id='Qwen/Qwen2.5-7B-Instruct') +reward_fn = MathReward() +advantage_fn = GRPOAdvantage() + +for batch in dataloader: + # 1. 采样生成多个候选答案 + response = sampler.sample(batch, num_samples=4) + + # 2. 使用奖励函数评估质量 + rewards = reward_fn(response.trajectories, batch.ground_truths) + + # 3. 计算优势值 + advantages = advantage_fn(rewards, num_generations=4) + + # 4. 用优势值进行策略梯度更新 + ... +``` + +> 奖励函数的设计对 RLHF 效果至关重要。好的奖励函数应该准确反映任务目标,并提供明确的学习信号。 diff --git "a/docs/source/\347\273\204\344\273\266/\345\245\226\345\212\261/index.rst" "b/docs/source/\347\273\204\344\273\266/\345\245\226\345\212\261/index.rst" new file mode 100644 index 00000000..084262b2 --- /dev/null +++ "b/docs/source/\347\273\204\344\273\266/\345\245\226\345\212\261/index.rst" @@ -0,0 +1,6 @@ +奖励 +=============== +.. toctree:: + :maxdepth: 1 + + Reward.md diff --git "a/docs/source/\347\273\204\344\273\266/\346\214\207\346\240\207/Accuracy.md" "b/docs/source/\347\273\204\344\273\266/\346\214\207\346\240\207/Accuracy.md" new file mode 100644 index 00000000..ccd6cd1d --- /dev/null +++ "b/docs/source/\347\273\204\344\273\266/\346\214\207\346\240\207/Accuracy.md" @@ -0,0 +1,14 @@ +# Accuracy + +准确率指标用于衡量训练时的token级别准确率信息。 + +```python +from twinkle.metric import Accuracy +from twinkle.data_format import InputFeature, ModelOutput +metric = Accuracy(device_mesh=..., process_group=...) +metric.accumulate(InputFeature(labels=...), ModelOutput(logits=...)) +... +_metric = metric.calculate() +``` + +> Accuracy目前尚未支持List\[InputFeature\]作为输入,也就是对Megatron的支持待适配。 \ No newline at end of file diff --git "a/docs/source/\347\273\204\344\273\266/\346\214\207\346\240\207/LossMetric.md" "b/docs/source/\347\273\204\344\273\266/\346\214\207\346\240\207/LossMetric.md" new file mode 100644 index 00000000..efb4b9f0 --- /dev/null +++ "b/docs/source/\347\273\204\344\273\266/\346\214\207\346\240\207/LossMetric.md" @@ -0,0 +1,12 @@ +# LossMetric + +LossMetric用于打印和评估残差和grad_norm信息 + +```python +from twinkle.metric import LossMetric +from twinkle.data_format import InputFeature, ModelOutput +metric = LossMetric(device_mesh=..., process_group=...) +metric.accumulate(InputFeature(labels=...), ModelOutput(loss=...), grad_norm=...) +... +_metric = metric.calculate() +``` diff --git "a/docs/source/\347\273\204\344\273\266/\346\214\207\346\240\207/TrainMetric.md" "b/docs/source/\347\273\204\344\273\266/\346\214\207\346\240\207/TrainMetric.md" new file mode 100644 index 00000000..7adbd89f --- /dev/null +++ "b/docs/source/\347\273\204\344\273\266/\346\214\207\346\240\207/TrainMetric.md" @@ -0,0 +1,13 @@ +# TrainMetric + +训练指标用于衡量训练过程中的状态。训练指标包含了当前学习率、当前step、总训练时长、训练速度等训练指标。 + +```python +from twinkle.metric import TrainMetric +metric = TrainMetric() +metric.accumulate(None, None, lr=0.0001, step=10, gradient_accumulation_steps=16) +... +_metric = metric.calculate() +``` + +> TrainMetric 不需要 device_mesh 和 process_group 信息,也不需要 inputs、outputs 信息 \ No newline at end of file diff --git "a/docs/source/\347\273\204\344\273\266/\346\214\207\346\240\207/index.rst" "b/docs/source/\347\273\204\344\273\266/\346\214\207\346\240\207/index.rst" new file mode 100644 index 00000000..a8d9d6c5 --- /dev/null +++ "b/docs/source/\347\273\204\344\273\266/\346\214\207\346\240\207/index.rst" @@ -0,0 +1,9 @@ +指标 +=============== +.. toctree:: + :maxdepth: 1 + + TrainMetric.md + LossMetric.md + Accuracy.md + 构建指标.md diff --git "a/docs/source/\347\273\204\344\273\266/\346\214\207\346\240\207/\346\236\204\345\273\272\346\214\207\346\240\207.md" "b/docs/source/\347\273\204\344\273\266/\346\214\207\346\240\207/\346\236\204\345\273\272\346\214\207\346\240\207.md" new file mode 100644 index 00000000..17c53e9d --- /dev/null +++ "b/docs/source/\347\273\204\344\273\266/\346\214\207\346\240\207/\346\236\204\345\273\272\346\214\207\346\240\207.md" @@ -0,0 +1,24 @@ +# 构建指标 + +指标用于衡量训练过程和训练结果。指标组件属于可定制组件的一部分。 + +```python +class Metric: + + def __init__(self, device_mesh, process_group, **kwargs): + self.process_group = process_group + self.device_mesh = device_mesh + + # 由于 microbatch 的存在,输入到 Metric 的 inputs 可能是个 List + def accumulate(self, inputs: 'Union[InputFeature, List[InputFeature]]', outputs: 'ModelOutput'): + ... + + def calculate(self): + ... + + def reset(self): + ... +``` + +指标无法通过 Callable 传入。因为它包含了 `accumulate` 和 `calculate` 两个部分,并需要支持 `reset` 来归零。指标的构造中会自动传入 device_mesh 和隶属于当前 dp 组的 process_group,用以跨进程通信。 +并且,在实际的实现中,基类提供了 `gather_results` 方法来辅助收集各个进程的输入结果。 diff --git "a/docs/source/\347\273\204\344\273\266/\346\215\237\345\244\261/CrossEntropy.md" "b/docs/source/\347\273\204\344\273\266/\346\215\237\345\244\261/CrossEntropy.md" new file mode 100644 index 00000000..74658322 --- /dev/null +++ "b/docs/source/\347\273\204\344\273\266/\346\215\237\345\244\261/CrossEntropy.md" @@ -0,0 +1,20 @@ +# 交叉熵 + +交叉熵是模型SFT和PT训练中最常用的一类损失。用于对labels的精确概率拟合。 + +```python +class CrossEntropyLoss(Loss): + + def __init__(self, **kwargs): + self.reduction = kwargs.get('reduction', 'mean') + + def __call__(self, inputs, outputs, **kwargs): + import torch + logits = outputs['logits'].view(-1, outputs['logits'].shape[-1]) + labels = inputs['labels'].view(-1) + return torch.nn.CrossEntropyLoss(reduction=self.reduction)(logits, labels) +``` + +构造中可以传入reduction参数,支持`sum`, `mean`, `none`等(和`torch.nn.CrossEntropyLoss`输入相同)。 + +> 在Transformers模型中目前使用`sum`。目的是在optimizer.step之前统计有效token数量并在grad层面取单token平均。 \ No newline at end of file diff --git "a/docs/source/\347\273\204\344\273\266/\346\215\237\345\244\261/index.rst" "b/docs/source/\347\273\204\344\273\266/\346\215\237\345\244\261/index.rst" new file mode 100644 index 00000000..2696d072 --- /dev/null +++ "b/docs/source/\347\273\204\344\273\266/\346\215\237\345\244\261/index.rst" @@ -0,0 +1,7 @@ +损失 +=============== +.. toctree:: + :maxdepth: 1 + + CrossEntropy.md + 构建损失.md diff --git "a/docs/source/\347\273\204\344\273\266/\346\215\237\345\244\261/\346\236\204\345\273\272\346\215\237\345\244\261.md" "b/docs/source/\347\273\204\344\273\266/\346\215\237\345\244\261/\346\236\204\345\273\272\346\215\237\345\244\261.md" new file mode 100644 index 00000000..09f0399b --- /dev/null +++ "b/docs/source/\347\273\204\344\273\266/\346\215\237\345\244\261/\346\236\204\345\273\272\346\215\237\345\244\261.md" @@ -0,0 +1,34 @@ +# 构建新的 Loss + +Twinkle 中的 loss 基类定义为: + +```python +class Loss: + + def __call__(self, inputs: InputFeature, outputs: ModelOutput, **kwargs): + ... +``` + +损失的输入为模型的 `InputFeature`,输出为模型标准 `ModelOutput`,kwargs 可以在模型的 calculate_loss 中传入。由于它是一个带有 `__call__` 方法的类,因此开发者也可以使用 Callable: + + +```python +def my_loss(inputs: InputFeature, outputs: ModelOutput, extra_data1: int, extra_data2: dict): + ... + return loss +``` + +在模型中这样使用: + +```python +model.set_loss(my_loss) +model.calculate_loss(extra_data1=10, extra_data2={}) +``` + +你也可以将 Loss 上传到 ModelScope/Hugging Face 的 Hub 中,在使用时动态拉取: + +```python +model.set_loss('ms://my_group/my_loss') +``` + +具体可以参考插件文档的介绍。 diff --git "a/docs/source/\347\273\204\344\273\266/\346\225\260\346\215\256\345\212\240\350\275\275/DataLoader.md" "b/docs/source/\347\273\204\344\273\266/\346\225\260\346\215\256\345\212\240\350\275\275/DataLoader.md" new file mode 100644 index 00000000..86c76c0d --- /dev/null +++ "b/docs/source/\347\273\204\344\273\266/\346\225\260\346\215\256\345\212\240\350\275\275/DataLoader.md" @@ -0,0 +1,47 @@ +# DataLoader + +DataLoader 是 PyTorch 中用于加载处理后的数据集,并提供数据给模型的组件。该组件的工作流程为: + +传入数据集 -> 构建 sampler 和 batch_sampler -> 索引数据 -> 调用 sampler 拿到索引 -> 从 dataset 中取出一个 batch -> 进行 collate_fn 操作 -> 吐出数据 + +DataLoader 的整体工作方式类似于: + +```python +for data in dataloader: + ... +``` + +可以看出 dataloader 包含 `__iter__` 方法,返回一个迭代器出来。在 DDP、TP、Ulysses 等不同训练条件下,由于每个 rank 取出的数据不同,因此一般 sampler 有多种实现,较为复杂。 + +在 Twinkle 中,我们采取了一个非常简单直接的方案,将 `DeviceMesh` 传递给 DataLoader,由于 DeviceMesh 中包含了集群结构,因此 DeviceMesh 可以给出所有 rank 需要的数据分片。 +因此我们额外开发了 `DeviceMeshSampler` 和 `DeviceMeshFetcher`,分别用于普通数据集和流式数据集两类的取样工作。 +另外,由于 LazyDataset 的存在,导致数据集实际取出数据时可能包含了无效数据或者抛出异常,因此提供了 `RetrySampler` 来进行跳过和重试。 + +DataLoader 的使用非常简单: + +```python +dataloader = DataLoader(dataset) +for data in dataloader: + ... +``` +在 torchrun 条件下,由于整体同构,因此全局只需要一个 device_mesh,这个参数无需通过 DataLoader 的构造传入,infra 模块会自动分析并传入。 + +DataLoader 也支持在 Ray 模式下工作: +```python + +def create_dataset(): + dataset = Dataset(...) + dataset.map(...) + dataset.encode(...) + return dataset + +dataloader = DataLoader(create_dataset, device_mesh=actor_device_mesh, remote_group='actor') +for data in dataloader: + ... +``` + +DataLoader 的 dataset 参数可以传入一个 Callable 来返回一个 Dataset,这样可以做到数据集的构建代码放在 driver 中,但实际的构建在 Dataloader 的 worker 中,防止了跨进程的 pickle,提高速度。 +dataloader 的 `@remote_class` 装饰器的执行范围也是 `first`,这意味着它只会有一个 worker 用来取出数据。 + +> 开发者无需担心 dataloader 返回的 data 占用 driver 内存,data 通常是一个引用句柄,到了需要使用的 worker 才会实际传递并解包。 +> Dataloader 默认不设置任何的 collate_fn,而是将这个过程交由模型处理。 \ No newline at end of file diff --git "a/docs/source/\347\273\204\344\273\266/\346\225\260\346\215\256\345\212\240\350\275\275/index.rst" "b/docs/source/\347\273\204\344\273\266/\346\225\260\346\215\256\345\212\240\350\275\275/index.rst" new file mode 100644 index 00000000..55fb78da --- /dev/null +++ "b/docs/source/\347\273\204\344\273\266/\346\225\260\346\215\256\345\212\240\350\275\275/index.rst" @@ -0,0 +1,6 @@ +数据加载 +=============== +.. toctree:: + :maxdepth: 1 + + DataLoader.md diff --git "a/docs/source/\347\273\204\344\273\266/\346\225\260\346\215\256\346\240\274\345\274\217/InputFeature.md" "b/docs/source/\347\273\204\344\273\266/\346\225\260\346\215\256\346\240\274\345\274\217/InputFeature.md" new file mode 100644 index 00000000..cef254cb --- /dev/null +++ "b/docs/source/\347\273\204\344\273\266/\346\225\260\346\215\256\346\240\274\345\274\217/InputFeature.md" @@ -0,0 +1,26 @@ +# 模型输入 + +twinkle用于表示模型输入的类是`InputFeature`,该类适配于transformers/megatron等模型结构。 + +```python +InputType = Union[List[List[int]], List[int], np.ndarray, Any] + +class InputFeature(TypedDict, total=False): + # Text-related fields + input_ids: InputType + attention_mask: InputType + position_ids: InputType + labels: InputType +``` + +InputFeature本质上是一个Dict。其输入来自于`Template`组件的输出。 + +- input_ids: List[Messages]以模板进行嵌套之后的token list +- attention_mask: 注意力掩膜 +- position_ids: 用于样本区分的位置编码 +- labels: 训练的label,已经进行了一个token的左位移 + +在packing或padding_free的情况下,input_ids等字段由多个样本的列表拼接而来。 +在多模态场景下,InputFeature包含多模态其他字段。 + +InputFeature是twinkle中所有模板输出、模型输入的标准接口。 diff --git "a/docs/source/\347\273\204\344\273\266/\346\225\260\346\215\256\346\240\274\345\274\217/Message.md" "b/docs/source/\347\273\204\344\273\266/\346\225\260\346\215\256\346\240\274\345\274\217/Message.md" new file mode 100644 index 00000000..af157284 --- /dev/null +++ "b/docs/source/\347\273\204\344\273\266/\346\225\260\346\215\256\346\240\274\345\274\217/Message.md" @@ -0,0 +1,43 @@ +# 消息 + +消息代表了模型对话的单轮信息。消息的定义为: + +```python + +class ToolCall(TypedDict, total=False): + tool_name: str + arguments: str + +class Message(TypedDict, total=False): + role: Literal['system', 'user', 'assistant', 'tool'] + type: str + content: Union[str, List[Dict[str, str]]] + tool_calls: List[ToolCall] + reasoning_content: str + images: Optional[List[Union[str, Any]]] + videos: Optional[List[Union[str, Any]]] + audios: Optional[List[Union[str, Any]]] +``` + +本质上,`Message`是一个Dict。里面包含了若干字段,和开发者强相关的有: + +- role: 消息类型,包含了'system', 'user', 'assistant', 'tool'四类。 + - system: 系统指令消息,仅在第0个消息中出现 + - user: 用户输入消息 + - assistant: 模型回复的消息 + - tool: 工具调用结果,类似user消息输入给模型 +- content: 消息正文,如果包含多模态信息,则需要有占位符: + - : 图片占位符 + -