File tree Expand file tree Collapse file tree 2 files changed +2
-2
lines changed
src/twinkle/model/megatron Expand file tree Collapse file tree 2 files changed +2
-2
lines changed Original file line number Diff line number Diff line change 1010from twinkle .preprocessor import SelfCognitionProcessor
1111
1212# Construct a device_mesh, dp=2
13- device_mesh = DeviceMesh .from_sizes (dp_size = 2 )
13+ device_mesh = DeviceMesh .from_sizes (dp_size = 8 )
1414# use torchrun mode
1515twinkle .initialize (mode = 'local' , global_device_mesh = device_mesh )
1616
Original file line number Diff line number Diff line change @@ -245,7 +245,7 @@ def __init__(
245245
246246 def _construct_default_optimizer_group (self ):
247247 return MegatronOptimizerGroup (
248- loss_instance = CrossEntropyLoss (),
248+ loss_instance = CrossEntropyLoss (reduction = 'sum' ),
249249 template = Template (self .tokenizer_id ),
250250 processor = InputProcessor (self .device_mesh , framework = 'megatron' ),
251251 _device_mesh = self .device_mesh ,
You can’t perform that action at this time.
0 commit comments