Skip to content

Commit 09c6202

Browse files
committed
update cookbook and doc
1 parent 28b3bb2 commit 09c6202

6 files changed

Lines changed: 485 additions & 18 deletions

File tree

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
import os
2+
3+
from peft import LoraConfig
4+
5+
import twinkle
6+
from twinkle import DeviceMesh, get_device_placement, get_logger
7+
from twinkle.dataloader import DataLoader
8+
from twinkle.dataset import Dataset, DatasetMeta
9+
from twinkle.model import MegatronModel
10+
from twinkle.preprocessor import SelfCognitionProcessor
11+
12+
# Build a device mesh for the verified NPU LoRA smoke.
13+
MODEL_ID = os.environ.get('TWINKLE_LOCAL_MODEL_DIR', 'ms://Qwen/Qwen3-4B')
14+
DATASET_PATH = os.environ.get(
15+
'TWINKLE_LOCAL_DATASET_PATH',
16+
'ms://swift/self-cognition',
17+
)
18+
MAX_STEPS = int(os.environ.get('TWINKLE_MAX_STEPS', '10'))
19+
TRAIN_SAMPLES = int(os.environ.get('TWINKLE_TRAIN_SAMPLE_LIMIT', '160'))
20+
BATCH_SIZE = int(os.environ.get('TWINKLE_BATCH_SIZE', '16'))
21+
22+
# 8 cards: dp=2, tp=2, pp=2
23+
device_mesh = DeviceMesh.from_sizes(dp_size=2, tp_size=2, pp_size=2)
24+
twinkle.initialize(mode='local', global_device_mesh=device_mesh)
25+
26+
logger = get_logger()
27+
28+
29+
def build_dataloader() -> DataLoader:
30+
dataset = Dataset(dataset_meta=DatasetMeta(DATASET_PATH, data_slice=range(TRAIN_SAMPLES)))
31+
dataset.set_template('Template', model_id=MODEL_ID)
32+
dataset.map(SelfCognitionProcessor('twinkle大模型', 'ModelScope社区'))
33+
dataset.encode()
34+
return DataLoader(dataset=dataset, batch_size=BATCH_SIZE)
35+
36+
37+
def train():
38+
dataloader = build_dataloader()
39+
40+
model = MegatronModel(model_id=MODEL_ID)
41+
lora_config = LoraConfig(r=8, lora_alpha=32, target_modules='all-linear')
42+
model.add_adapter_to_model('default', lora_config)
43+
model.set_optimizer(optimizer_cls='default', lr=1e-4)
44+
45+
# Keep the scheduler compatible with the shortened smoke run.
46+
lr_decay_steps = max(MAX_STEPS, 2)
47+
model.set_lr_scheduler(
48+
scheduler_cls='default',
49+
lr_warmup_steps=1,
50+
lr_decay_steps=lr_decay_steps,
51+
)
52+
53+
logger.info(get_device_placement())
54+
logger.info(model.get_train_configs())
55+
logger.info(
56+
'LoRA NPU smoke config: '
57+
f'model_id={MODEL_ID}, dataset={DATASET_PATH}, batch_size={BATCH_SIZE}, '
58+
f'train_samples={TRAIN_SAMPLES}, max_steps={MAX_STEPS}'
59+
)
60+
logger.info(f'dataloader_steps={len(dataloader)}')
61+
62+
for step, batch in enumerate(dataloader):
63+
model.forward_backward(inputs=batch)
64+
model.clip_grad_and_step()
65+
metric = model.calculate_metric(is_training=True)
66+
logger.info(f'step={step} metric={metric}')
67+
if step + 1 >= MAX_STEPS:
68+
break
69+
70+
model.save('last-checkpoint')
71+
72+
73+
if __name__ == '__main__':
74+
train()
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#!/usr/bin/env bash
2+
3+
MEGATRON_LM_PATH=${MEGATRON_LM_PATH:-/path/to/Megatron-LM}
4+
ASCEND_RT_VISIBLE_DEVICES=${ASCEND_RT_VISIBLE_DEVICES:-0,1,2,3,4,5,6,7}
5+
6+
export PYTHONPATH="${MEGATRON_LM_PATH}:${PYTHONPATH:-}"
7+
8+
ASCEND_RT_VISIBLE_DEVICES="${ASCEND_RT_VISIBLE_DEVICES}" \
9+
torchrun --nproc_per_node=8 cookbook/megatron/npu/tp_lora_npu.py
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
import os
2+
3+
from peft import LoraConfig
4+
5+
import twinkle
6+
from twinkle import DeviceMesh, get_device_placement, get_logger
7+
from twinkle.dataloader import DataLoader
8+
from twinkle.dataset import Dataset, DatasetMeta
9+
from twinkle.model import MegatronModel
10+
from twinkle.preprocessor import SelfCognitionProcessor
11+
12+
# Build a device mesh for the verified NPU MoE LoRA smoke.
13+
# Expert LoRA currently only supports ETP=1, so we keep TP at 1 here.
14+
MODEL_ID = os.environ.get(
15+
'TWINKLE_LOCAL_MODEL_DIR',
16+
'ms://Qwen/Qwen3-30B-A3B-Instruct-2507',
17+
)
18+
DATASET_PATH = os.environ.get(
19+
'TWINKLE_LOCAL_DATASET_PATH',
20+
'ms://swift/self-cognition',
21+
)
22+
MAX_STEPS = int(os.environ.get('TWINKLE_MAX_STEPS', '10'))
23+
TRAIN_SAMPLES = int(os.environ.get('TWINKLE_TRAIN_SAMPLE_LIMIT', '80'))
24+
BATCH_SIZE = int(os.environ.get('TWINKLE_BATCH_SIZE', '8'))
25+
DP_SIZE = int(os.environ.get('TWINKLE_DP_SIZE', '8'))
26+
TP_SIZE = int(os.environ.get('TWINKLE_TP_SIZE', '1'))
27+
EP_SIZE = int(os.environ.get('TWINKLE_EP_SIZE', '2'))
28+
PP_SIZE = int(os.environ.get('TWINKLE_PP_SIZE', '1'))
29+
CP_SIZE = int(os.environ.get('TWINKLE_CP_SIZE', '1'))
30+
LR = float(os.environ.get('TWINKLE_LR', '1e-4'))
31+
32+
# 8 cards: dp=8, tp=1, ep=2, pp=1, cp=1
33+
device_mesh = DeviceMesh.from_sizes(
34+
dp_size=DP_SIZE,
35+
tp_size=TP_SIZE,
36+
pp_size=PP_SIZE,
37+
cp_size=CP_SIZE,
38+
ep_size=EP_SIZE,
39+
)
40+
twinkle.initialize(mode='local', global_device_mesh=device_mesh)
41+
42+
logger = get_logger()
43+
44+
45+
def build_dataloader() -> DataLoader:
46+
dataset = Dataset(dataset_meta=DatasetMeta(DATASET_PATH, data_slice=range(TRAIN_SAMPLES)))
47+
dataset.set_template('Template', model_id=MODEL_ID)
48+
dataset.map(SelfCognitionProcessor('twinkle大模型', 'ModelScope社区'))
49+
dataset.encode()
50+
return DataLoader(dataset=dataset, batch_size=BATCH_SIZE)
51+
52+
53+
def _to_loss_value(outputs) -> float:
54+
loss = outputs['loss'] if isinstance(outputs, dict) else outputs.loss
55+
return float(loss.detach().cpu()) if hasattr(loss, 'detach') else float(loss)
56+
57+
58+
def train():
59+
dataloader = build_dataloader()
60+
61+
model = MegatronModel(model_id=MODEL_ID)
62+
lora_config = LoraConfig(r=8, lora_alpha=32, target_modules='all-linear')
63+
model.add_adapter_to_model('default', lora_config)
64+
model.set_optimizer(optimizer_cls='default', lr=LR)
65+
66+
# Keep the scheduler compatible with the shortened smoke run.
67+
lr_decay_steps = max(MAX_STEPS, 2)
68+
model.set_lr_scheduler(
69+
scheduler_cls='default',
70+
lr_warmup_steps=1,
71+
lr_decay_steps=lr_decay_steps,
72+
)
73+
74+
logger.info(get_device_placement())
75+
logger.info(model.get_train_configs())
76+
logger.info(
77+
'MoE LoRA NPU smoke config: '
78+
f'model_id={MODEL_ID}, dataset={DATASET_PATH}, batch_size={BATCH_SIZE}, '
79+
f'train_samples={TRAIN_SAMPLES}, max_steps={MAX_STEPS}, '
80+
f'dp={DP_SIZE}, tp={TP_SIZE}, ep={EP_SIZE}, pp={PP_SIZE}, cp={CP_SIZE}'
81+
)
82+
logger.info(f'dataloader_steps={len(dataloader)}')
83+
84+
for step, batch in enumerate(dataloader):
85+
outputs = model.forward_backward(inputs=batch)
86+
model.clip_grad_and_step()
87+
logger.info(f'step={step} loss={_to_loss_value(outputs)}')
88+
if step + 1 >= MAX_STEPS:
89+
break
90+
91+
model.save('last-checkpoint')
92+
93+
94+
if __name__ == '__main__':
95+
train()
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#!/usr/bin/env bash
2+
3+
MEGATRON_LM_PATH=${MEGATRON_LM_PATH:-/path/to/Megatron-LM}
4+
ASCEND_RT_VISIBLE_DEVICES=${ASCEND_RT_VISIBLE_DEVICES:-0,1,2,3,4,5,6,7}
5+
6+
export PYTHONPATH="${MEGATRON_LM_PATH}:${PYTHONPATH:-}"
7+
8+
ASCEND_RT_VISIBLE_DEVICES="${ASCEND_RT_VISIBLE_DEVICES}" \
9+
torchrun --nproc_per_node=8 cookbook/megatron/npu/tp_moe_lora_npu.py

0 commit comments

Comments
 (0)