class MDM(nn.Module):
......
def forward(self, x, timesteps, y=None):
"""
x: [batch_size, njoints, nfeats, max_frames], denoted x_t in the paper
timesteps: [batch_size] (int)
"""
bs, njoints, nfeats, nframes = x.shape
emb = self.embed_timestep(timesteps) # [1, bs, d]
force_mask = y.get('uncond', False)
if 'text' in self.cond_mode:
enc_text = self.encode_text(y['text'])
emb += self.embed_text(self.mask_cond(enc_text, force_mask=force_mask))