|
37 | 37 | ADAPTER_NAME = 'default' |
38 | 38 |
|
39 | 39 | def create_gsm8k_dataset(): |
40 | | - dataset = Dataset(DatasetMeta("ms://modelscope/gsm8k", subset_name='main', split='train')) |
41 | | - dataset.set_template("Template", model_id=MODEL_ID, max_length=2048) |
| 40 | + dataset = Dataset(DatasetMeta('ms://modelscope/gsm8k', subset_name='main', split='train')) |
| 41 | + dataset.set_template('Template', model_id=MODEL_ID, max_length=2048) |
42 | 42 | dataset.map(GSM8KProcessor()) |
43 | 43 | dataset.encode(add_generation_prompt=True) |
44 | 44 | return dataset |
@@ -67,7 +67,7 @@ def main(): |
67 | 67 | sampler_mesh = DeviceMesh.from_sizes(world_size=SAMPLER_GPUS, dp_size=SAMPLER_GPUS) |
68 | 68 | twinkle.initialize(mode='ray', nproc_per_node=NUM_GPUS, groups=device_groups, lazy_collect=False) |
69 | 69 |
|
70 | | - lora_config = LoraConfig(target_modules="all-linear", r=32, lora_alpha=64, lora_dropout=0.05) |
| 70 | + lora_config = LoraConfig(target_modules='all-linear', r=32, lora_alpha=64, lora_dropout=0.05) |
71 | 71 |
|
72 | 72 | if USE_MEGATRON: |
73 | 73 | from twinkle.model.megatron import MegatronModel |
@@ -164,9 +164,9 @@ def main(): |
164 | 164 | optim_step += 1 |
165 | 165 | log_dict = metrics.calculate() |
166 | 166 | log_dict.update(model.calculate_metric(is_training=True)) |
167 | | - logger.info(f"[Step {optim_step}/{MAX_STEPS}] {log_dict}") |
| 167 | + logger.info(f'[Step {optim_step}/{MAX_STEPS}] {log_dict}') |
168 | 168 |
|
169 | | - logger.info(f"Training completed. optim_steps={optim_step}") |
| 169 | + logger.info(f'Training completed. optim_steps={optim_step}') |
170 | 170 | model.save('grpo-gsm8k-checkpoint') |
171 | 171 |
|
172 | 172 | if __name__ == '__main__': |
|
0 commit comments