Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 29 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,36 @@ RUN echo "Available release branches:" && git branch -r -l 'origin/release/*' --
echo "Checking out: $LATEST_RELEASE" && \
git checkout --track "$LATEST_RELEASE"

RUN sh INSTALL_MEGATRON.sh
ENV SETUPTOOLS_USE_DISTUTILS=local

# Install base packages
RUN pip install --upgrade peft accelerate transformers "modelscope[framework]" --no-cache-dir

# Install vllm
RUN pip install --upgrade vllm --no-cache-dir

# Install transformer_engine and megatron_core
RUN SITE_PACKAGES=$(python -c "import site; print(site.getsitepackages()[0])") && \
CUDNN_PATH=$SITE_PACKAGES/nvidia/cudnn \
CPLUS_INCLUDE_PATH=$SITE_PACKAGES/nvidia/cudnn/include \
pip install --no-build-isolation "transformer_engine[pytorch]" --no-cache-dir

RUN pip install megatron_core mcore_bridge --no-cache-dir

# Install flash-attention (default arch 8.0;9.0, override via build-arg if needed)
ARG TORCH_CUDA_ARCH_LIST="8.0;9.0"
RUN TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST}" \
MAX_JOBS=8 \
FLASH_ATTENTION_FORCE_BUILD=TRUE \
pip install flash-attn --no-build-isolation --no-cache-dir

RUN pip install flash-linear-attention -U --no-cache-dir

# Install numpy
RUN pip install numpy==2.2 --no-cache-dir

# Install tinker, ray, and other deps
RUN pip install --no-cache-dir tinker==0.14.0 "ray[serve]" transformers peft accelerate -U

# Install twinkle itself
RUN pip install -e . --no-build-isolation
8 changes: 4 additions & 4 deletions INSTALL_MEGATRON.sh
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,12 @@ echo "Using CUDA architecture: $TORCH_CUDA_ARCH_LIST"
# Install latest base packages
echo ""
echo "Installing peft, accelerate, transformers, modelscope..."
pip install --upgrade peft accelerate transformers "modelscope[framework]"
pip install --upgrade peft accelerate transformers "modelscope[framework]" --no-cache-dir

# Install latest vllm
echo ""
echo "Installing latest vllm..."
pip install --upgrade vllm
pip install --upgrade vllm --no-cache-dir

# Get site-packages path and install transformer_engine and megatron_core
echo ""
Expand All @@ -83,12 +83,12 @@ MAX_JOBS=8 \
FLASH_ATTENTION_FORCE_BUILD=TRUE \
pip install flash-attn --no-build-isolation --no-cache-dir

pip install flash-linear-attention -U
pip install flash-linear-attention -U --no-cache-dir

# Install numpy
echo ""
echo "Installing numpy==2.2 and deep_gemm..."
pip install numpy==2.2
pip install numpy==2.2 --no-cache-dir

# Verify installation
echo ""
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ def train():
# 1000 samples
dataset = Dataset(dataset_meta=DatasetMeta('ms://swift/self-cognition', data_slice=range(1000)))
# Set template to prepare encoding
dataset.set_template('Template', model_id=base_model)
dataset.set_template('Qwen3_5Template', model_id=base_model)
# Preprocess the dataset to standard format
dataset.map(SelfCognitionProcessor('twinkle LLM', 'ModelScope Community'))
# Encode dataset
Expand Down Expand Up @@ -242,7 +242,7 @@ api_key='your-api-key'

# Use twinkle dataset to load the data
dataset = Dataset(dataset_meta=DatasetMeta('ms://swift/self-cognition', data_slice=range(500)))
dataset.set_template('Template', model_id=base_model, max_length=256)
dataset.set_template('Qwen3_5Template', model_id=base_model, max_length=256)
dataset.map(SelfCognitionProcessor('twinkle Model', 'ModelScope Team'), load_from_cache_file=False)
dataset.encode(batched=True, load_from_cache_file=False)
dataloader = DataLoader(dataset=dataset, batch_size=8)
Expand Down
4 changes: 2 additions & 2 deletions README_ZH.md
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ def train():
# 1000 samples
dataset = Dataset(dataset_meta=DatasetMeta('ms://swift/self-cognition', data_slice=range(1000)))
# Set template to prepare encoding
dataset.set_template('Template', model_id=base_model)
dataset.set_template('Qwen3_5Template', model_id=base_model)
# Preprocess the dataset to standard format
dataset.map(SelfCognitionProcessor('twinkle LLM', 'ModelScope Community'))
# Encode dataset
Expand Down Expand Up @@ -224,7 +224,7 @@ api_key='your-api-key'

# Use twinkle dataset to load the data
dataset = Dataset(dataset_meta=DatasetMeta('ms://swift/self-cognition', data_slice=range(500)))
dataset.set_template('Template', model_id=base_model, max_length=256)
dataset.set_template('Qwen3_5Template', model_id=base_model, max_length=256)
dataset.map(SelfCognitionProcessor('twinkle Model', 'ModelScope Team'), load_from_cache_file=False)
dataset.encode(batched=True, load_from_cache_file=False)
dataloader = DataLoader(dataset=dataset, batch_size=8)
Expand Down
2 changes: 1 addition & 1 deletion cookbook/client/tinker/modelscope/self_cognition.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def train():
dataset = Dataset(dataset_meta=DatasetMeta('ms://swift/self-cognition', data_slice=range(500)))

# Apply the chat template matching the base model (max 256 tokens per sample)
dataset.set_template('Template', model_id=f'ms://{base_model}', max_length=256)
dataset.set_template('Qwen3_5Template', model_id=f'ms://{base_model}', max_length=256)

# Replace placeholder names with custom model/author identity
dataset.map(SelfCognitionProcessor('twinkle模型', 'twinkle团队'), load_from_cache_file=False)
Expand Down
2 changes: 1 addition & 1 deletion cookbook/client/tinker/modelscope/short_math_grpo.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ def create_math_dataset():
data_slice=range(DATA_NUM),
)
dataset = Dataset(meta)
dataset.set_template('Template', model_id=BASE_MODEL, max_length=4096, truncation_strategy='delete')
dataset.set_template('Qwen3_5Template', model_id=BASE_MODEL, max_length=4096, truncation_strategy='delete')
dataset.map(MathPreprocessor())
dataset.filter(lambda row: bool(row['messages']))
dataset.encode(add_generation_prompt=True)
Expand Down
2 changes: 1 addition & 1 deletion cookbook/client/tinker/self_host/self_cognition.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def train():
dataset = Dataset(dataset_meta=DatasetMeta('ms://swift/self-cognition', data_slice=range(500)))

# Apply the chat template matching the base model (max 256 tokens per sample)
dataset.set_template('Template', model_id=f'ms://{base_model}', max_length=256)
dataset.set_template('Qwen3_5Template', model_id=f'ms://{base_model}', max_length=256)

# Replace placeholder names with custom model/author identity
dataset.map(SelfCognitionProcessor('twinkle模型', 'twinkle团队'), load_from_cache_file=False)
Expand Down
2 changes: 1 addition & 1 deletion cookbook/client/tinker/self_host/short_math_grpo.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ def create_math_dataset():
data_slice=range(DATA_NUM),
)
dataset = Dataset(meta)
dataset.set_template('Template', model_id=BASE_MODEL, max_length=4096, truncation_strategy='delete')
dataset.set_template('Qwen3_5Template', model_id=BASE_MODEL, max_length=4096, truncation_strategy='delete')
dataset.map(MathPreprocessor())
dataset.filter(lambda row: bool(row['messages']))
dataset.encode(add_generation_prompt=True)
Expand Down
4 changes: 2 additions & 2 deletions cookbook/client/twinkle/modelscope/self_congnition.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def train():
dataset = Dataset(dataset_meta=DatasetMeta('ms://swift/self-cognition', data_slice=range(500)))

# Apply a chat template so the data matches the model's expected input format
dataset.set_template('Template', model_id=f'ms://{base_model}', max_length=512)
dataset.set_template('Qwen3_5Template', model_id=f'ms://{base_model}', max_length=512)

# Replace placeholder names in the dataset with custom model/author names
dataset.map('SelfCognitionProcessor', init_args={'model_name': 'twinkle模型', 'model_author': 'ModelScope社区'})
Expand All @@ -77,7 +77,7 @@ def train():
model.add_adapter_to_model('default', lora_config, gradient_accumulation_steps=2)

# Set the same chat template used during data preprocessing
model.set_template('Template')
model.set_template('Qwen3_5Template')

# Set the input processor (pads sequences on the right side)
model.set_processor('InputProcessor', padding_side='right')
Expand Down
6 changes: 3 additions & 3 deletions cookbook/client/twinkle/self_host/grpo.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@

def create_gsm8k_dataset():
dataset = Dataset(DatasetMeta('ms://modelscope/gsm8k', subset_name='main', split='train'))
dataset.set_template('Template', model_id=MODEL_ID, max_length=2048)
dataset.set_template('Qwen3_5Template', model_id=MODEL_ID, max_length=2048)
dataset.map('GSM8KProcessor')
dataset.encode(add_generation_prompt=True)
return dataset
Expand Down Expand Up @@ -112,11 +112,11 @@ def train():

# Set processor and template for encoding inputs
model.set_processor('InputProcessor')
model.set_template('Template', model_id=MODEL_ID)
model.set_template('Qwen3_5Template', model_id=MODEL_ID)

# Step 4: Configure the sampler
sampler = vLLMSampler(model_id=MODEL_ID)
sampler.set_template('Template', model_id=MODEL_ID)
sampler.set_template('Qwen3_5Template', model_id=MODEL_ID)

# Step 5: Setup metrics and advantage function
advantage_fn = GRPOAdvantage()
Expand Down
2 changes: 1 addition & 1 deletion cookbook/client/twinkle/self_host/sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def sample():
sampler = vLLMSampler(model_id=MODEL_ID)

# Step 4: Set the chat template so the sampler can encode Trajectory inputs
sampler.set_template('Template', model_id=MODEL_ID)
sampler.set_template('Qwen3_5Template', model_id=MODEL_ID)

# Step 5: Prepare inputs as Trajectory dicts (messages format)
# Each trajectory is a conversation with system and user messages
Expand Down
4 changes: 2 additions & 2 deletions cookbook/client/twinkle/self_host/self_congnition.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def train():
dataset = Dataset(dataset_meta=DatasetMeta('ms://swift/self-cognition', data_slice=range(500)))

# Apply a chat template so the data matches the model's expected input format
dataset.set_template('Template', model_id=f'ms://{base_model}', max_length=512)
dataset.set_template('Qwen3_5Template', model_id=f'ms://{base_model}', max_length=512)

# Replace placeholder names in the dataset with custom model/author names
dataset.map('SelfCognitionProcessor', init_args={'model_name': 'twinkle模型', 'model_author': 'ModelScope社区'})
Expand All @@ -84,7 +84,7 @@ def train():
model.add_adapter_to_model('default', lora_config, gradient_accumulation_steps=2)

# Set the same chat template used during data preprocessing
model.set_template('Template')
model.set_template('Qwen3_5Template')

# Set the input processor (pads sequences on the right side)
model.set_processor('InputProcessor', padding_side='right')
Expand Down
4 changes: 2 additions & 2 deletions cookbook/megatron/tp_moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
def eval(model):
# 100 Samples
dataset = Dataset(dataset_meta=DatasetMeta('ms://swift/self-cognition', data_slice=range(100)))
dataset.set_template('Template', model_id='ms://Qwen/Qwen3.5-35B-A3B')
dataset.set_template('Qwen3_5Template', model_id='ms://Qwen/Qwen3.5-35B-A3B')
dataset.map(SelfCognitionProcessor('twinkle大模型', 'ModelScope社区'))
dataset.encode()
dataloader = DataLoader(dataset=dataset, batch_size=16)
Expand All @@ -34,7 +34,7 @@ def train():
# 1000 samples
dataset = Dataset(dataset_meta=DatasetMeta('ms://swift/self-cognition', data_slice=range(1000)))
# Set template to prepare encoding
dataset.set_template('Template', model_id='ms://Qwen/Qwen3.5-35B-A3B')
dataset.set_template('Qwen3_5Template', model_id='ms://Qwen/Qwen3.5-35B-A3B')
# Preprocess the dataset to standard format
dataset.map(SelfCognitionProcessor('twinkle大模型', 'ModelScope社区'))
# Encode dataset
Expand Down
4 changes: 2 additions & 2 deletions cookbook/ray/single_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
def eval(model):
# 100 Samples
dataset = Dataset(dataset_meta=DatasetMeta('ms://swift/self-cognition', data_slice=range(100)))
dataset.set_template('Template', model_id='ms://Qwen/Qwen3.5-35B-A3B')
dataset.set_template('Qwen3_5Template', model_id='ms://Qwen/Qwen3.5-35B-A3B')
dataset.map(SelfCognitionProcessor('twinkle大模型', 'ModelScope社区'))
dataset.encode()
dataloader = DataLoader(dataset=dataset, batch_size=8, min_batch_size=8)
Expand All @@ -41,7 +41,7 @@ def train():
# 1000 samples
dataset = Dataset(dataset_meta=DatasetMeta('ms://swift/self-cognition', data_slice=range(1000)))
# Set template to prepare encoding
dataset.set_template('Template', model_id='ms://Qwen/Qwen3.5-4B')
dataset.set_template('Qwen3_5Template', model_id='ms://Qwen/Qwen3.5-4B')
# Preprocess the dataset to standard format
dataset.map(SelfCognitionProcessor('twinkle大模型', 'ModelScope社区'))
# Encode dataset
Expand Down
6 changes: 6 additions & 0 deletions cookbook/rl/gkd_on_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,9 @@ def main():
# ── Student vLLM sampler (for on-policy generation) ────────────────────────
student_sampler = vLLMSampler(
model_id=STUDENT_MODEL_ID,
# enable_lora=True used with ckpt_manager.sync_weights(merge_and_sync=False)
# meaning only sync lora weights, if merge_and_sync=True,
# lora will be merged into the base model and sync all weights to vLLM
engine_args={'gpu_memory_utilization': 0.85, 'max_model_len': 4096, 'enable_lora': True, 'max_loras': 1},
device_mesh=sampler_mesh,
remote_group='student_sampler',
Expand Down Expand Up @@ -210,6 +213,9 @@ def main():
break

# 1. Sync student model weights to student sampler
# enable_lora=True used with ckpt_manager.sync_weights(merge_and_sync=False)
# meaning only sync lora weights, if merge_and_sync=True,
# lora will be merged into the base model and sync all weights to vLLM
ckpt_manager.sync_weights(merge_and_sync=False)
student_sampler.reset_prefix_cache()

Expand Down
6 changes: 6 additions & 0 deletions cookbook/rl/grpo.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,9 @@ def main():
'max_model_len': 4496,
'max_lora_rank': 32, # save as lora_config
# NOTE: To use enable_lora with qwen3.5, ensure vLLM includes PR https://github.com/vllm-project/vllm/pull/36976
# enable_lora=True used with ckpt_manager.sync_weights(merge_and_sync=False)
# meaning only sync lora weights, if merge_and_sync=True,
# lora will be merged into the base model and sync all weights to vLLM
'enable_lora': True,
},
device_mesh=sampler_mesh,
Expand Down Expand Up @@ -133,6 +136,9 @@ def main():
break
metrics.reset()
global_prompts = batch if isinstance(batch, list) else [batch]
# enable_lora=True used with ckpt_manager.sync_weights(merge_and_sync=False)
# meaning only sync lora weights, if merge_and_sync=True,
# lora will be merged into the base model and sync all weights to vLLM
ckpt_manager.sync_weights(merge_and_sync=False)
sampler.reset_prefix_cache()
sample_responses = sampler.sample(
Expand Down
11 changes: 6 additions & 5 deletions cookbook/rl/grpo_mm.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,6 @@
)
from twinkle.sampler import vLLMSampler

import swanlab
swanlab.init(
project='twinkle',
)
logger = get_logger()

# Model configuration
Expand Down Expand Up @@ -184,6 +180,9 @@ def main():
'gpu_memory_utilization': 0.8,
'max_model_len': 32000,
'max_lora_rank': 32,
# enable_lora=True used with ckpt_manager.sync_weights(merge_and_sync=False)
# meaning only sync lora weights, if merge_and_sync=True,
# lora will be merged into the base model and sync all weights to vLLM
'enable_lora': True,
'limit_mm_per_prompt': {'image': 9}, # OlympiadBench has up to 9 images
},
Expand Down Expand Up @@ -221,6 +220,9 @@ def main():
metrics.reset()

# Sync weights to sampler
# enable_lora=True used with ckpt_manager.sync_weights(merge_and_sync=False)
# meaning only sync lora weights, if merge_and_sync=True,
# lora will be merged into the base model and sync all weights to vLLM
ckpt_manager.sync_weights(merge_and_sync=False)
sampler.reset_prefix_cache()

Expand Down Expand Up @@ -282,7 +284,6 @@ def main():
log_dict.update(model.calculate_metric(is_training=True, adapter_name=ADAPTER_NAME))
metrics.reset()
logger.info(f'[Step {optim_step}/{MAX_STEPS}] {log_dict}')
swanlab.log(log_dict)

logger.info(f'Training completed. optim_steps={optim_step}')
model.save('olympiad-grpo-mixed-final', adapter_name=ADAPTER_NAME)
Expand Down
13 changes: 6 additions & 7 deletions cookbook/rl/short_math_grpo.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,6 @@
SYSTEM_PROMPT = ('You are a helpful math assistant. Solve the problem with minimal but correct reasoning '
'and put your final answer within \\boxed{}.')

import swanlab
swanlab.init(
project='twinkle',
)


# ========== Reward Functions ==========
class GSM8KBrevityReward(Reward):
"""Brevity reward: rewards shorter completions that contain a valid answer.
Expand Down Expand Up @@ -167,6 +161,9 @@ def main():
'max_model_len': 8192,
'max_lora_rank': 32, # save as lora_config
# NOTE: To use enable_lora with qwen3.5, ensure vLLM includes PR https://github.com/vllm-project/vllm/pull/36976
# enable_lora=True used with ckpt_manager.sync_weights(merge_and_sync=False)
# meaning only sync lora weights, if merge_and_sync=True,
# lora will be merged into the base model and sync all weights to vLLM
'enable_lora': True,
},
device_mesh=sampler_mesh,
Expand Down Expand Up @@ -202,6 +199,9 @@ def main():
for prompt in batch:
expand_prompts.extend([prompt] * NUM_GENERATIONS)

# enable_lora=True used with ckpt_manager.sync_weights(merge_and_sync=False)
# meaning only sync lora weights, if merge_and_sync=True,
# lora will be merged into the base model and sync all weights to vLLM
ckpt_manager.sync_weights(merge_and_sync=False)
sampler.reset_prefix_cache()

Expand Down Expand Up @@ -256,7 +256,6 @@ def main():

log_dict = metrics.calculate()
log_dict.update(model.calculate_metric(is_training=True))
swanlab.log(log_dict)
metrics.reset()
logger.info(f'[Step {optim_step}/{MAX_STEPS}] {log_dict}')

Expand Down
4 changes: 2 additions & 2 deletions cookbook/transformers/ep_fsdp_qwen3_moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

MODEL_ID = os.environ.get('QWEN3_MODEL_ID', 'ms://Qwen/Qwen3.5-4B')
DATASET_ID = os.environ.get('DATASET_ID', 'ms://swift/self-cognition')
TEMPLATE_ID = os.environ.get('TEMPLATE_ID', 'Template')
TEMPLATE_ID = os.environ.get('TEMPLATE_ID', 'Qwen3_5Template')
_num_layers_env = os.environ.get('NUM_LAYERS')
NUM_LAYERS = int(_num_layers_env) if _num_layers_env is not None else None
BATCH_SIZE = int(os.environ.get('BATCH_SIZE', '4'))
Expand Down Expand Up @@ -47,7 +47,7 @@ def train():
try:
dataset.set_template(TEMPLATE_ID, model_id=MODEL_ID)
except ValueError:
dataset.set_template('Template', model_id=MODEL_ID)
dataset.set_template('Qwen3_5Template', model_id=MODEL_ID)

dataset.map(SelfCognitionProcessor('twinkle大模型', 'ModelScope社区'))
dataset.encode(batched=True)
Expand Down
4 changes: 2 additions & 2 deletions cookbook/transformers/fsdp2_moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
def eval(model):
# 100 Samples
dataset = Dataset(dataset_meta=DatasetMeta('ms://swift/self-cognition', data_slice=range(100)))
dataset.set_template('Template', model_id='ms://Qwen/Qwen3.5-4B')
dataset.set_template('Qwen3_5Template', model_id='ms://Qwen/Qwen3.5-4B')
dataset.map(SelfCognitionProcessor('twinkle大模型', 'ModelScope社区'))
dataset.encode()
dataloader = DataLoader(dataset=dataset, batch_size=4)
Expand All @@ -35,7 +35,7 @@ def train():
# 1000 samples
dataset = Dataset(dataset_meta=DatasetMeta('ms://swift/self-cognition', data_slice=range(1000)))
# Set template to prepare encoding
dataset.set_template('Template', model_id='ms://Qwen/Qwen3.5-4B')
dataset.set_template('Qwen3_5Template', model_id='ms://Qwen/Qwen3.5-4B')
# Preprocess the dataset to standard format
dataset.map(SelfCognitionProcessor('twinkle大模型', 'ModelScope社区'))
# Encode dataset
Expand Down
Loading
Loading