Skip to content

Commit aa86099

Browse files
committed
wip
1 parent 8c662f0 commit aa86099

File tree

2 files changed

+2
-2
lines changed

2 files changed

+2
-2
lines changed

cookbook/transformers/fsdp2.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from twinkle.preprocessor import SelfCognitionProcessor
1111

1212
# Construct a device_mesh, dp=2
13-
device_mesh = DeviceMesh.from_sizes(dp_size=2)
13+
device_mesh = DeviceMesh.from_sizes(dp_size=8)
1414
# use torchrun mode
1515
twinkle.initialize(mode='local', global_device_mesh=device_mesh)
1616

src/twinkle/model/megatron/megatron.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,7 @@ def __init__(
245245

246246
def _construct_default_optimizer_group(self):
247247
return MegatronOptimizerGroup(
248-
loss_instance=CrossEntropyLoss(),
248+
loss_instance=CrossEntropyLoss(reduction='sum'),
249249
template=Template(self.tokenizer_id),
250250
processor=InputProcessor(self.device_mesh, framework='megatron'),
251251
_device_mesh=self.device_mesh,

0 commit comments

Comments
 (0)