-
Notifications
You must be signed in to change notification settings - Fork 54
Open
Labels
Milestone
Description
Bug description
When we use BroadCastToSequence we cannot print the outputs from a middle block (without fitting the model). Most likely this is because compute_output_shape of the post block is not properly set, not returning the correct shape info.
I get the following error
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In[11], line 1
----> 1 dense_block(batch)
File /usr/local/lib/python3.8/dist-packages/merlin/models/config/schema.py:58, in SchemaMixin.__call__(self, *args, **kwargs)
55 def __call__(self, *args, **kwargs):
56 self.check_schema()
---> 58 return super().__call__(*args, **kwargs)
File /usr/local/lib/python3.8/dist-packages/keras/utils/traceback_utils.py:70, in filter_traceback.<locals>.error_handler(*args, **kwargs)
67 filtered_tb = _process_traceback_frames(e.__traceback__)
68 # To get the full stack trace, call:
69 # `tf.debugging.disable_traceback_filtering()`
---> 70 raise e.with_traceback(filtered_tb) from None
71 finally:
72 del filtered_tb
File /usr/local/lib/python3.8/dist-packages/merlin/models/tf/core/combinators.py:129, in SequentialBlock.build(self, input_shape)
121 """Builds the sequential block
122
123 Parameters
(...)
126 The input shape, by default None
127 """
128 self._maybe_propagate_context(input_shape)
--> 129 build_sequentially(self, self.layers, input_shape)
File /usr/local/lib/python3.8/dist-packages/merlin/models/tf/core/combinators.py:855, in build_sequentially(self, layers, input_shape)
850 v = TypeError(
851 f"Couldn't build {layer}, "
852 f"did you forget to add aggregation to {last_layer}?"
853 )
854 six.reraise(t, v, tb)
--> 855 input_shape = layer.compute_output_shape(input_shape)
856 last_layer = layer
857 self.built = True
File /usr/local/lib/python3.8/dist-packages/merlin/models/tf/core/tabular.py:297, in TabularBlock.compute_output_shape(self, input_shapes)
294 if self.pre:
295 input_shapes = self.pre.compute_output_shape(input_shapes)
--> 297 output_shapes = self._check_post_output_size(self.compute_call_output_shape(input_shapes))
299 return output_shapes
File /usr/local/lib/python3.8/dist-packages/merlin/models/tf/core/tabular.py:357, in TabularBlock._check_post_output_size(self, input_shapes)
355 schema = getattr(self, "_schema", None)
356 self.aggregation.set_schema(schema)
--> 357 output_shapes = self.aggregation.compute_output_shape(output_shapes)
359 return output_shapes
File /usr/local/lib/python3.8/dist-packages/merlin/models/tf/core/aggregation.py:69, in ConcatFeatures.compute_output_shape(self, input_shapes)
68 def compute_output_shape(self, input_shapes):
---> 69 agg_dim = sum([i[-1] for i in input_shapes.values()])
70 if isinstance(agg_dim, tf.TensorShape):
71 raise ValueError(f"Not possible to aggregate, received: {input_shapes}.")
TypeError: unsupported operand type(s) for +: 'int' and 'NoneType'
Steps/Code to reproduce bug
Please run the code below to be able to reproduce the issue:
from merlin.schema.tags import Tags
from merlin.io.dataset import Dataset
from merlin.datasets.synthetic import generate_data
train = generate_data("sequence-testing", num_rows=1000)
seq_schema =train.schema.select_by_name(['item_id_seq', 'categories', 'item_age_days_norm'])
context_schema = train.schema.select_by_name(['user_age'])
train.schema = train.schema.select_by_name(['item_id_seq', 'categories', 'item_age_days_norm', 'user_age'])
target_schema = train.schema.select_by_tag(Tags.ITEM_ID)
target = target_schema.column_names[0]
print(target)
item_id_name = train.schema.select_by_tag(Tags.ITEM_ID).first.properties['domain']['name']
dmodel =32
manual_dims = {
'item_id_seq': dmodel,
'categories': 16
}
mlp_block = mm.MLPBlock(
[128,dmodel],
activation='relu',
no_activation_last_layer=True,
)
input_block = mm.InputBlockV2(
context_schema + seq_schema, # if we only feed seq_schema it works but it wont consider the broadcasted feature.
embeddings=mm.Embeddings(
seq_schema.select_by_tag(Tags.CATEGORICAL),
sequence_combiner=None,
dim=manual_dims
),
post = mm.BroadcastToSequence(context_schema, seq_schema),
)
dense_block =mm.SequentialBlock(
input_block,
mlp_block,
mm.XLNetBlock(d_model=dmodel, n_head=4, n_layer=2,
pre=mm.ReplaceMaskedEmbeddings(),
post="inference_hidden_state",
)
)
mlp_block2 = mm.MLPBlock(
[128,dmodel],
activation='relu',
no_activation_last_layer=True,
)
prediction_task= mm.CategoricalOutput(
to_call=input_block["categorical"][item_id_name],
)
batch = mm.sample_batch(train, batch_size=128, include_targets=False, to_ragged=True)
dense_block(batch)
Expected behavior
We should be able to return outputs from each block.
Environment details
- Merlin version:
- Platform:
- Python version:
- PyTorch version (GPU?):
- Tensorflow version (GPU?):
merlin-tensorflow:22.12 image with the latest main branches pulled.