diff --git a/.gitignore b/.gitignore index e400054eaba..a21f8947795 100644 --- a/.gitignore +++ b/.gitignore @@ -99,4 +99,6 @@ ENV/ hooks/ pylint.sh -pylintrc \ No newline at end of file +pylintrc + +.vscode/ \ No newline at end of file diff --git a/official/vision/beta/projects/mesh_rcnn/configs/mesh_rcnn.py b/official/vision/beta/projects/mesh_rcnn/configs/mesh_rcnn.py new file mode 100644 index 00000000000..01a92ed946a --- /dev/null +++ b/official/vision/beta/projects/mesh_rcnn/configs/mesh_rcnn.py @@ -0,0 +1,31 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Mesh R-CNN configuration definition.""" + +import dataclasses + +from official.modeling import hyperparams # type: ignore + + +@dataclasses.dataclass +class VoxelHead(hyperparams.Config): + """Parameterization for the Mesh R-CNN Voxel Branch Prediction Head.""" + voxel_depth: int = 28 + conv_dim: int = 256 + num_conv: int = 0 + use_group_norm: bool = False + predict_classes: bool = False + bilinearly_upscale_input: bool = True + class_based_voxel: bool = False + num_classes: int = 0 diff --git a/official/vision/beta/projects/mesh_rcnn/modeling/factory.py b/official/vision/beta/projects/mesh_rcnn/modeling/factory.py new file mode 100644 index 00000000000..1b853ee3dd7 --- /dev/null +++ b/official/vision/beta/projects/mesh_rcnn/modeling/factory.py @@ -0,0 +1,53 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Contains factory functions for Mesh R-CNN networks.""" + +from typing import Optional + +import tensorflow as tf # type: ignore + +from official.vision.beta.projects.mesh_rcnn.configs.mesh_rcnn import VoxelHead +from official.vision.beta.projects.mesh_rcnn.modeling.heads import voxel_head + + +def build_voxel_head(head_config: VoxelHead, + kernel_regularizer: + Optional[tf.keras.regularizers.Regularizer], + bias_regularizer: + Optional[tf.keras.regularizers.Regularizer], + activity_regularizer: + Optional[tf.keras.regularizers.Regularizer] + ) -> voxel_head.VoxelHead: + """Builds Voxel Branch Prediction Head. + Args: + head_config: Dataclass parameterization instance for voxel head. + kernel_regularizer: Convolutional layer weight regularizer object. + bias_regularizer: Convolutional layer bias regularizer object. + activity_regularizer: Convolutional layer activation regularizer object. + Returns: + Voxel head layer instance. + """ + return voxel_head.VoxelHead( + voxel_depth=head_config.voxel_depth, + conv_dim=head_config.conv_dim, + num_conv=head_config.num_conv, + use_group_norm=head_config.use_group_norm, + predict_classes=head_config.predict_classes, + bilinearly_upscale_input=head_config.bilinearly_upscale_input, + class_based_voxel=head_config.class_based_voxel, + num_classes=head_config.num_classes, + kernel_regularizer=kernel_regularizer, + bias_regularizer=bias_regularizer, + activity_regularizer=activity_regularizer, + ) diff --git a/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head.py b/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head.py new file mode 100755 index 00000000000..4e254b99c2d --- /dev/null +++ b/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head.py @@ -0,0 +1,188 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Mesh R-CNN Heads.""" +from typing import Optional + +import tensorflow as tf # type: ignore +import tensorflow_addons as tfa # type: ignore + + +class VoxelHead(tf.keras.layers.Layer): + """Mesh R-CNN Voxel Branch Prediction Head.""" + + def __init__(self, + voxel_depth: int, + conv_dim: int, + num_conv: int, + use_group_norm: bool, + predict_classes: bool, + bilinearly_upscale_input: bool, + class_based_voxel: bool, + num_classes: int, + kernel_regularizer: + Optional[tf.keras.regularizers.Regularizer] = None, + bias_regularizer: + Optional[tf.keras.regularizers.Regularizer] = None, + activity_regularizer: + Optional[tf.keras.regularizers.Regularizer] = None, + **kwargs): + """Initializes a Voxel Branch Prediction Head. + Args: + voxel_depth: The number of depth channels for the predicted voxels. + conv_dim: Number of output features for each Conv2D layer in the + Voxel head. + num_conv: Number of Conv2D layers prior to the Conv2DTranspose layer. + use_group_norm: Whether or not to use GropNormalization in the fully + connected layers. + predict_classes: Whether or not to reshape the final predictor output + from (N, CD, H, W) to (N, C, D, H, W) where C is `num_classes` to + predict and D is `voxel_depth`. This option is used by the Pix3D + Mesh R-CNN architecture. + bilinearly_upscale_input: Whether or not to bilinearly resize the voxel + head input tensor such that width and height of feature maps are equal + to (`voxel_depth` // 2). This option is used by the ShapeNet Mesh R-CNN + architecture. + class_based_voxel: Whether or predict one of `num_classes` for each voxel + grid output. If `predict_classes` is True but `class_based_voxel` is + False, we will only predict 1 class. This option is used by the Pix3d + Mesh R-CNN architecture. + num_classes: If `class_based_voxel` is predict one of `num_classes` + classes for each voxel. This option is used by the Pix3d Mesh R-CNN + architecture. + kernel_regularizer: Convolutional layer weight regularizer object. + bias_regularizer: Convolutional layer bias regularizer object. + activity_regularizer: Convolutional layer activation regularizer object. + **kwargs: other keyword arguments to be passed. + """ + super().__init__(**kwargs) + + self._voxel_depth = voxel_depth + self._conv_dim = conv_dim + self._num_conv = num_conv + self._use_group_norm = use_group_norm + self._predict_classes = predict_classes + self._bilinearly_upscale_input = bilinearly_upscale_input + self._class_based_voxel = class_based_voxel + self._num_classes = num_classes if ( + predict_classes and class_based_voxel) else 1 + + self._base_config = dict( + activation=None, # Apply ReLU separately in case we want to use GroupNorm + kernel_regularizer=kernel_regularizer, + bias_regularizer=bias_regularizer, + activity_regularizer=activity_regularizer) + + self._non_predictor_initializers = dict( + kernel_initializer=tf.keras.initializers.VarianceScaling( + scale=2, mode='fan_out', distribution='untruncated_normal'), # HeNormal with fan out + bias_initializer=None if self._use_group_norm else 'zeros' + ) + + self._fully_conv2d_config = dict( + filters=self._conv_dim, + kernel_size=(3, 3), + strides=(1, 1), + padding='same', + use_bias=not self._use_group_norm, + **self._non_predictor_initializers, + **self._base_config) + + self._deconv2d_config = dict( + filters=self._conv_dim, + kernel_size=(2, 2), + strides=(2, 2), + padding='valid', + use_bias=True, + **self._non_predictor_initializers, + **self._base_config) + self._deconv2d_config['activation'] = 'relu' + + self._predict_conv2d_config = dict( + filters=self._num_classes * self._voxel_depth, + kernel_size=(1, 1), + strides=(1, 1), + padding='valid', + use_bias=True, + kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.001), + bias_initializer=tf.keras.initializers.Zeros(), + **self._base_config) + + def build(self, input_shape: tf.TensorShape) -> None: + """Creates the voxel head layers and initializes their weights and biases. + Args: + input_shape: Shape of the input tensor to the voxel head. + This the shape of the final layer of the backbone network for the + ShapeNet model and the RoIAlign layer following the RPN for Pix3D. + """ + #pylint: disable=unused-argument + vd = self._voxel_depth + self._interpolate = tf.keras.layers.Resizing( + height=(vd // 2), width=(vd // 2), interpolation='bilinear') + self._reshape = tf.keras.layers.Reshape((self._num_classes, vd, vd, vd)) + + self._conv2d_norm_relu_layers = [] + for _ in range(self._num_conv): + conv = tf.keras.layers.Conv2D(**self._fully_conv2d_config) + self._conv2d_norm_relu_layers.append(conv) + if self._use_group_norm: + group_norm = tfa.layers.GroupNormalization(groups=32, axis=-1) + self._conv2d_norm_relu_layers.append(group_norm) + relu = tf.keras.layers.ReLU() + self._conv2d_norm_relu_layers.append(relu) + + self._deconv = tf.keras.layers.Conv2DTranspose(**self._deconv2d_config) + self._predictor = tf.keras.layers.Conv2D(**self._predict_conv2d_config) + + def call(self, inputs: tf.Tensor) -> tf.Tensor: + """Forward pass of the voxel head for the ShapeNet Mesh R-CNN model. + Args: + inputs: This is the tensor output of the final layer of the backbone + network for the ShapeNet model and the RoIAlign layer following the + RPN for Pix3D. + Returns: + (N, V, V, V) for ShapeNet model and (N, C, V, V, V) for Pix3D model + where N = batch size, V = `voxel_depth`, and C = `num_classes`. + """ + if self._bilinearly_upscale_input: + x = self._interpolate(inputs) + else: + x = inputs + for layer in self._conv2d_norm_relu_layers: + x = layer(x) + x = self._deconv(x) + x = self._predictor(x) + if self._predict_classes: + x = self._reshape(x) + return x + + def get_config(self) -> dict: + """Get config dict of the VoxelHead layer.""" + regularizers = dict(self._base_config) + del regularizers['activation'] + + config = dict( + voxel_depth=self._voxel_depth, + conv_dim=self._conv_dim, + num_conv=self._num_conv, + use_group_norm=self._use_group_norm, + predict_classes=self._predict_classes, + bilinearly_upscale_input=self._bilinearly_upscale_input, + class_based_voxel=self._class_based_voxel, + num_classes=self._num_classes, + **regularizers) + return config + + @classmethod + def from_config(cls, config): + return cls(**config) diff --git a/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head_test.py b/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head_test.py new file mode 100755 index 00000000000..02275af3465 --- /dev/null +++ b/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head_test.py @@ -0,0 +1,175 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for Mesh R-CNN Heads.""" + +from typing import Tuple + +import tensorflow as tf # type: ignore +from absl.testing import parameterized # type: ignore + +from official.vision.beta.projects.mesh_rcnn.configs import \ + mesh_rcnn as mesh_rcnn_config +from official.vision.beta.projects.mesh_rcnn.modeling import factory +from official.vision.beta.projects.mesh_rcnn.modeling.heads import voxel_head + + +@parameterized.named_parameters( + {'testcase_name': 'shapenet', + 'predict_classes': False, 'class_based_voxel': False, 'num_conv': 2, + 'voxel_depth': 48, 'batch_size': 32, 'num_input_channels': 2048}, + {'testcase_name': 'pix3d-class-agnostic', + 'predict_classes': True, 'class_based_voxel': False, 'num_conv': 1, + 'voxel_depth': 24, 'batch_size': 1, 'num_input_channels': 256}, + {'testcase_name': 'pix3d-class-based', + 'predict_classes': True, 'class_based_voxel': True, 'num_conv': 0, + 'voxel_depth': 24, 'batch_size': 32, 'num_input_channels': 256}, +) +class VoxelHeadTest(parameterized.TestCase, tf.test.TestCase): + """Test for Mesh R-CNN Voxel Branch Prediction Head.""" + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + tf.keras.backend.set_image_data_format('channels_last') + self._num_classes = 5 + self._conv_dim = 256 + self._use_group_norm = False + + def _get_expected_out_shape(self, + predict_classes: bool, + class_based_voxel: bool, + voxel_depth: int, + batch_size: int) -> Tuple[int, ...]: + """Get the output shape of the voxel head.""" + # pylint: disable=missing-param-doc + expected_shape: Tuple[int, ...] + if predict_classes: + expected_num_classes: int = self._num_classes if class_based_voxel else 1 + expected_shape = (batch_size, expected_num_classes, + voxel_depth, voxel_depth, voxel_depth) + else: + expected_shape = (batch_size, voxel_depth, voxel_depth, voxel_depth) + return expected_shape + + def _get_input_shape(self, + voxel_depth: int, + batch_size: int, + num_input_channels: int) -> Tuple[int, int, int, int]: + """Get the output input shape of the voxel head.""" + return (batch_size, voxel_depth // 2, voxel_depth // 2, num_input_channels) + + def test_network_creation(self, + predict_classes: bool, + class_based_voxel: bool, + num_conv: int, + voxel_depth: int, + batch_size: int, + num_input_channels: int) -> None: + """Verify the output shapes of the voxel head.""" + # pylint: disable=missing-param-doc + head = voxel_head.VoxelHead(voxel_depth, self._conv_dim, num_conv, + self._use_group_norm, predict_classes, + not predict_classes, class_based_voxel, + self._num_classes) + + input_shape = self._get_input_shape(voxel_depth, batch_size, + num_input_channels) + input_tensor = tf.ones(input_shape, dtype=tf.float32) + output = head(input_tensor) + + expected_shape = self._get_expected_out_shape(predict_classes, + class_based_voxel, + voxel_depth, batch_size) + + self.assertAllEqual(output.shape.as_list(), expected_shape) + + def test_serialize_deserialize(self, + predict_classes: bool, + class_based_voxel: bool, + num_conv: int, + voxel_depth: int, + batch_size: int, + num_input_channels: int) -> None: + """Create a network object that sets all of its config options.""" + # pylint: disable=missing-param-doc + head = voxel_head.VoxelHead(voxel_depth, self._conv_dim, num_conv, + self._use_group_norm, predict_classes, + not predict_classes, class_based_voxel, + self._num_classes) + + input_shape = self._get_input_shape(voxel_depth, batch_size, + num_input_channels) + input_tensor = tf.ones(input_shape, dtype=tf.float32) + _ = head(input_tensor) + + serialized = head.get_config() + deserialized = voxel_head.VoxelHead.from_config(serialized) + + self.assertAllEqual(head.get_config(), deserialized.get_config()) + + def test_gradient_pass_though(self, + predict_classes: bool, + class_based_voxel: bool, + num_conv: int, + voxel_depth: int, + batch_size: int, + num_input_channels: int) -> None: + """Ensure the gradients of the layer are not None.""" + # pylint: disable=missing-param-doc + head = voxel_head.VoxelHead(voxel_depth, self._conv_dim, num_conv, + self._use_group_norm, predict_classes, + not predict_classes, class_based_voxel, + self._num_classes) + loss = tf.keras.losses.MeanSquaredError() + optimizer = tf.keras.optimizers.SGD() + + input_shape = self._get_input_shape(voxel_depth, batch_size, + num_input_channels) + output_shape = self._get_expected_out_shape(predict_classes, + class_based_voxel, + voxel_depth, batch_size) + init = tf.random_normal_initializer() + x = tf.Variable(initial_value=init(shape=input_shape, dtype=tf.float32)) + y = tf.Variable(initial_value=init(shape=output_shape, dtype=tf.float32)) + + with tf.GradientTape() as tape: + x_hat = head(x) + grad_loss = loss(x_hat, y) + grad = tape.gradient(grad_loss, head.trainable_variables) + optimizer.apply_gradients(zip(grad, head.trainable_variables)) + + self.assertNotIn(None, grad) + + def test_build_from_config(self, + predict_classes: bool, + class_based_voxel: bool, + num_conv: int, + voxel_depth: int, + batch_size: int, + num_input_channels: int) -> None: + """Test head creation from config and factory.""" + # pylint: disable=missing-param-doc,unused-argument + cfg = mesh_rcnn_config.VoxelHead(voxel_depth=voxel_depth, + conv_dim=self._conv_dim, + num_conv=num_conv, + use_group_norm=self._use_group_norm, + predict_classes=predict_classes, + bilinearly_upscale_input=not predict_classes, + class_based_voxel=class_based_voxel, + num_classes=self._num_classes) + _ = factory.build_voxel_head(cfg, + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None) + +if __name__ == '__main__': + tf.test.main()