From c799135aa61ec116179a5f9b1b8a3d302a776514 Mon Sep 17 00:00:00 2001 From: Zach Ghera Date: Sat, 9 Oct 2021 13:24:11 -0400 Subject: [PATCH 01/11] Initial work on the voxel head. I am nearly complete with my initial rough implementation of the voxel head. The only thing I have left is the weight and bias initializations in build(). After that, I will start testing following yolo_head_test as an exemplar. --- .../mesh_rcnn/modeling/heads/voxel_head.py | 166 ++++++++++++++++++ 1 file changed, 166 insertions(+) create mode 100755 official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head.py diff --git a/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head.py b/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head.py new file mode 100755 index 00000000000..65ae1732d8b --- /dev/null +++ b/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head.py @@ -0,0 +1,166 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Currrent Questions +1. Should I be setting the default values for the head based on what they use + in shapenet/config/config.py or one of the actual configs like + configs/shapenet/voxmesh_R50.yaml +2. I am not sure what the correct kernel initializer is, based on this post +https://discuss.pytorch.org/t/crossentropyloss-expected-object-of-type-torch-longtensor/28683/6?u=ptrblck + I think it is HeNormal but I could be wrong. +3. It looks like Pytorch impl uses something called group normalization +https://github.com/facebookresearch/meshrcnn/blob/main/shapenet/config/config.py#L30 +https://github.com/facebookresearch/detectron2/blob/cbbc1ce26473cb2a5cc8f58e8ada9ae14cb41052/detectron2/layers/batch_norm.py#L141 + I added a flag in __init__ to use this. But my question is should this layer + be placed before or after the ReLU layer since there is no way to directly + add this as an option to the Conv2d layer like they do in Pytorch? Based on + what I read about BatchNorm, if GroupNorm behaves similarly then it should + go before ReLU so that is what I did. But please correct me if I am wrong + here. +""" +from typing import Any, Optional + +import tensorflow as tf # type: ignore + +import tensorflow_addons as tfa # type: ignore + + +class VoxelHead(tf.keras.layers.Layer): + """Mesh R-CNN Voxel Branch Prediction Head.""" + + def __init__(self, + input_channels: int, + voxel_size: int = 28, + conv_dims: int = 256, + num_conv: int = 0, + use_group_norm: bool = False, + norm_momentum: float = 0.99, + norm_epsilon: float = 0.001, + kernel_initializer: str = 'HeNormal', + kernel_regularizer: + Optional[tf.keras.regularizers.Regularizer] = None, + bias_regularizer: + Optional[tf.keras.regularizers.Regularizer] = None, + **kwargs): + """Initializes a Voxel Branch Prediction Head. + Args: + input_channels: Number of channels in layer preceeding the voxel head. + This the final conv5_3 layer of the backbone network for ShapeNet + model and the RoIAlign layer following the RPN for Pix3D. + voxel_size: The number of depth channels for the predicted voxels. + conv_dims: Number of output features for each Conv2D layer in the + Voxel head. + num_conv: Number of Conv2D layers prior to the Conv2DTranspose layer. + use_group_norm: Whether or not to use GropNormalization in fully + connected layer(s). + norm_momentum: Normalization momentum for the moving average. + norm_epsilon: Small float added to variance to avoid dividing by zero. + kernel_initializer: kernel_initializer for convolutional layers. + kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D. + bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d. + **kwargs: keyword arguments to be passed. + """ + super().__init__(**kwargs) + + assert self.voxel_size % 2 == 0 + + self._input_channels = input_channels + self._voxel_size = voxel_size + self._conv_dims = conv_dims + self._num_conv = num_conv + self._use_group_norm = use_group_norm + + self._base_config = dict( + activation=None, # Apply ReLU separately in case we want to use GroupNorm + norm_momentum=norm_momentum, + norm_epsilon=norm_epsilon, + kernel_initializer=kernel_initializer, + kernel_regularizer=kernel_regularizer, + bias_regularizer=bias_regularizer) + + self._fully_conv2d_config = dict( + filters=self._conv_dims, + kernel_size=(3, 3), + strides=(1, 1), + padding=1, + use_bias=not self._use_group_norm, + data_format='channels_last', + **self._base_config) + + self._deconv2d_config = dict( + filters=self._conv_dims, + kernel_size=(2, 2), + strides=(2, 2), + padding=0, + use_bias=True, + **self._base_config) + self._deconv2d_config['activation'] = 'relu' + + self._predict_conv2d_config = dict( + filters=self._voxel_size, + kernel_size=(1, 1), + strides=(1, 1), + padding=0, + use_bias=True, + **self._base_config) + + def build(self, input_shape: Any) -> None: + """TODO(zghera) + """ + #pylint: disable=unused-argument, missing-param-doc + self._interpolate = tf.keras.layers.UpSampling2D( + size=(self._voxel_size // 2, self._voxel_size // 2), + interpolation="bilinear") + + self._conv2d_norm_relu_layers = [] + for _ in range(self._num_conv): + conv = tf.keras.layers.Conv2D(**self._fully_conv2d_config) + self._conv2d_norm_relu_layers.append(conv) + if self._use_group_norm: + group_norm = tfa.layers.GroupNormalization(groups=32, axis=-1) + self._conv2d_norm_relu_layers.append(group_norm) + relu = tf.keras.layers.ReLU() + self._conv2d_norm_relu_layers.append(relu) + + self._deconv = tf.keras.layers.Conv2DTranspose(**self._deconv2d_config) + self._predictor = tf.keras.layers.Conv2D(**self._predict_conv2d_config) + + # TODO(zghera): Weight and bias initializations + + def call(self, inputs: Any) -> Any: + """TODO(zghera) + Args: + inputs: ... + Return: + ... + """ + # pylint: disable=arguments-differ + x = self._interpolate(inputs) + for layer in self._conv2d_norm_relu_layers: + x = layer(x) + x = self._deconv(x) + return self._predictor(x) + + @property + def output_depth(self) -> int: + return self._voxel_size + + def get_config(self) -> dict: + config = dict( + input_channels=self._input_channels, + voxel_size=self._voxel_size, + conv_dims=self._conv_dims, + num_conv=self._num_conv, + use_group_norm=self._use_group_norm, + **self._base_config) + return config From 5c896d59a334a36d787ac57f01f021ec4924bdcf Mon Sep 17 00:00:00 2001 From: Zach Ghera Date: Tue, 12 Oct 2021 16:34:38 -0400 Subject: [PATCH 02/11] Finish voxel head initial implementation. Added support to handle both ShapeNet and Pix3D as well as initialization of layer weight and biases. --- .../mesh_rcnn/modeling/heads/voxel_head.py | 154 ++++++++++++------ 1 file changed, 101 insertions(+), 53 deletions(-) diff --git a/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head.py b/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head.py index 65ae1732d8b..9a914456623 100755 --- a/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head.py +++ b/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head.py @@ -11,14 +11,29 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Currrent Questions -1. Should I be setting the default values for the head based on what they use - in shapenet/config/config.py or one of the actual configs like - configs/shapenet/voxmesh_R50.yaml -2. I am not sure what the correct kernel initializer is, based on this post -https://discuss.pytorch.org/t/crossentropyloss-expected-object-of-type-torch-longtensor/28683/6?u=ptrblck - I think it is HeNormal but I could be wrong. -3. It looks like Pytorch impl uses something called group normalization +"""Mesh R-CNN Heads. + +TODO(zghera): Remove questions below once complete. + +Currrent Questions +1. This question is probably resolved as the PyTorch Impl manually sets + weights and biases for each conv layer. See sectioned off comments below for + the original question. + That being said, I would still appreciate if someone double checked my + weight & bias initializations. + vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv + I am not sure what the correct kernel and bias initializers are for the + default pytorch conv2d layers. Looking in the PyTorch source (see + https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/conv.py#L144) + It looks like they are actually using a RandomUniform initialization of the + weights with a specific range based on the GitHub comment (see + https://github.com/pytorch/pytorch/commit/8130f2f67ada1951ee27e55b8a506d6de23c13df ) + and the biases are with some variation of HEUniform where + `limit = sqrt(1 / fan_in)` rather than 6 (see + https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/conv.py#L146 ). + Please correct me if I am wrong here. + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +2. It looks like Pytorch impl uses something called group normalization https://github.com/facebookresearch/meshrcnn/blob/main/shapenet/config/config.py#L30 https://github.com/facebookresearch/detectron2/blob/cbbc1ce26473cb2a5cc8f58e8ada9ae14cb41052/detectron2/layers/batch_norm.py#L141 I added a flag in __init__ to use this. But my question is should this layer @@ -28,10 +43,9 @@ go before ReLU so that is what I did. But please correct me if I am wrong here. """ -from typing import Any, Optional +from typing import Optional import tensorflow as tf # type: ignore - import tensorflow_addons as tfa # type: ignore @@ -39,62 +53,82 @@ class VoxelHead(tf.keras.layers.Layer): """Mesh R-CNN Voxel Branch Prediction Head.""" def __init__(self, - input_channels: int, - voxel_size: int = 28, - conv_dims: int = 256, - num_conv: int = 0, - use_group_norm: bool = False, + voxel_depth: int, + conv_dims: int, + num_conv: int, + use_group_norm: bool, + predict_classes: bool, + bilinearly_upscale_input: bool, + class_based_voxel: bool, + num_classes: int, norm_momentum: float = 0.99, norm_epsilon: float = 0.001, - kernel_initializer: str = 'HeNormal', kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, - bias_regularizer: + conv_bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, **kwargs): """Initializes a Voxel Branch Prediction Head. Args: - input_channels: Number of channels in layer preceeding the voxel head. - This the final conv5_3 layer of the backbone network for ShapeNet - model and the RoIAlign layer following the RPN for Pix3D. - voxel_size: The number of depth channels for the predicted voxels. + voxel_depth: The number of depth channels for the predicted voxels. conv_dims: Number of output features for each Conv2D layer in the Voxel head. num_conv: Number of Conv2D layers prior to the Conv2DTranspose layer. - use_group_norm: Whether or not to use GropNormalization in fully - connected layer(s). + use_group_norm: Whether or not to use GropNormalization in the fully + connected layers. + predict_classes: Whether or not to reshape the final predictor output + from (N, CD, H, W) to (N, C, D, H, W) where C is `num_classes` to + predict and D is `voxel_depth`. This option is used by the Pix3D + Mesh R-CNN architecture. + bilinearly_upscale_input: Whether or not to bilinearly resize the voxel + head input tensor such that width and height of feature maps are equal + to (`voxel_depth` // 2). This option is used by the ShapeNet Mesh R-CNN + architecture. + class_based_voxel: Whether or predict one of `num_classes` for each voxel + grid output. If `predict_classes` is True but `class_based_voxel` is + False, we will only predict 1 class. This option is used by the Pix3d + Mesh R-CNN architecture. + num_classes: If `class_based_voxel` is predict one of `num_classes` + classes for each voxel. This option is used by the Pix3d Mesh R-CNN + architecture. norm_momentum: Normalization momentum for the moving average. norm_epsilon: Small float added to variance to avoid dividing by zero. - kernel_initializer: kernel_initializer for convolutional layers. - kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D. - bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d. - **kwargs: keyword arguments to be passed. + kernel_regularizer: Convolutional layer weight regularizer object. + conv_bias_regularizer: Convolutional layer bias regularizer object. + **kwargs: other keyword arguments to be passed. """ super().__init__(**kwargs) - assert self.voxel_size % 2 == 0 - - self._input_channels = input_channels - self._voxel_size = voxel_size + self._voxel_depth = voxel_depth self._conv_dims = conv_dims self._num_conv = num_conv self._use_group_norm = use_group_norm + self._predict_classes = predict_classes + self._bilinearly_upscale_input = bilinearly_upscale_input + self._num_classes = num_classes if ( + predict_classes and class_based_voxel) else 1 self._base_config = dict( activation=None, # Apply ReLU separately in case we want to use GroupNorm norm_momentum=norm_momentum, norm_epsilon=norm_epsilon, - kernel_initializer=kernel_initializer, + kernel_initializer=None, # Set individually for each layer conv layer type + bias_initializer=None, kernel_regularizer=kernel_regularizer, - bias_regularizer=bias_regularizer) + bias_regularizer=conv_bias_regularizer) + self._conv_initializers = dict( + kernel_initializer=tf.keras.initializers.VarianceScaling( + scale=2, mode='fan_out', distribution='untruncated_normal'), # HeNormal with fan out + bias_initializer=None if self._use_group_norm else 'zeros' + ) self._fully_conv2d_config = dict( filters=self._conv_dims, kernel_size=(3, 3), strides=(1, 1), padding=1, use_bias=not self._use_group_norm, - data_format='channels_last', + **self._conv_initializers, **self._base_config) self._deconv2d_config = dict( @@ -103,24 +137,32 @@ def __init__(self, strides=(2, 2), padding=0, use_bias=True, + **self._conv_initializers, **self._base_config) self._deconv2d_config['activation'] = 'relu' self._predict_conv2d_config = dict( - filters=self._voxel_size, + filters=self._num_classes * self._voxel_depth, kernel_size=(1, 1), strides=(1, 1), padding=0, use_bias=True, + kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.001), + bias_initializer=tf.keras.initializers.Zeros(), **self._base_config) - def build(self, input_shape: Any) -> None: - """TODO(zghera) + def build(self, input_shape: tf.TensorShape) -> None: + """Creates the voxel head layers and initializes their weights and biases. + Args: + input_shape: Shape of the input tensor to the voxel head. + This the shape of the final layer of the backbone network for the + ShapeNet model and the RoIAlign layer following the RPN for Pix3D. """ - #pylint: disable=unused-argument, missing-param-doc - self._interpolate = tf.keras.layers.UpSampling2D( - size=(self._voxel_size // 2, self._voxel_size // 2), - interpolation="bilinear") + #pylint: disable=unused-argument + vd = self._voxel_depth + self._interpolate = tf.keras.layers.Resizing( + height=(vd // 2), width=(vd // 2), interpolation='bilinear') + self._reshape = tf.keras.layers.Reshape((self._num_classes, vd, vd, vd)) self._conv2d_norm_relu_layers = [] for _ in range(self._num_conv): @@ -135,30 +177,36 @@ def build(self, input_shape: Any) -> None: self._deconv = tf.keras.layers.Conv2DTranspose(**self._deconv2d_config) self._predictor = tf.keras.layers.Conv2D(**self._predict_conv2d_config) - # TODO(zghera): Weight and bias initializations - - def call(self, inputs: Any) -> Any: - """TODO(zghera) + def call(self, inputs: tf.Tensor) -> tf.Tensor: + """Forward pass of the voxel head for the ShapeNet Mesh R-CNN model. Args: - inputs: ... - Return: - ... + inputs: This is the tensor output of the final layer of the backbone + network for the ShapeNet model and the RoIAlign layer following the + RPN for Pix3D. + Returns: + (N, V, V, V) for ShapeNet model and (N, C, V, V, V) for Pix3D model + where N = batch size, V = `voxel_depth`, and C = `num_classes`. """ - # pylint: disable=arguments-differ - x = self._interpolate(inputs) + x = tf.cond(self._bilinearly_upscale_input, + true_fn=lambda: self._interpolate(inputs), + false_fn=lambda: tf.keras.layers.Lambda(lambda x: x)(inputs)) for layer in self._conv2d_norm_relu_layers: x = layer(x) x = self._deconv(x) - return self._predictor(x) + x = self._predictor(x) + x = tf.cond(self._predict_classes, + true_fn=lambda: self._reshape(x), + false_fn=lambda: tf.keras.layers.Lambda(lambda x: x)(inputs)) + return x @property def output_depth(self) -> int: - return self._voxel_size + return self._voxel_depth def get_config(self) -> dict: config = dict( input_channels=self._input_channels, - voxel_size=self._voxel_size, + voxel_depth=self._voxel_depth, conv_dims=self._conv_dims, num_conv=self._num_conv, use_group_norm=self._use_group_norm, From dfa70168887ff70ffeeed0018c201f392347562d Mon Sep 17 00:00:00 2001 From: Zach Ghera Date: Thu, 14 Oct 2021 22:54:08 -0400 Subject: [PATCH 03/11] Create unit tests for verifying output shape. See updated questions in voxel_head.py. Mypy is also failing to find the voxel_head module right now and I cannot figure out why. I will try to look into this after finishing up testing. --- .../mesh_rcnn/modeling/heads/voxel_head.py | 55 ++++++--------- .../modeling/heads/voxel_head_test.py | 67 +++++++++++++++++++ 2 files changed, 88 insertions(+), 34 deletions(-) create mode 100755 official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head_test.py diff --git a/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head.py b/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head.py index 9a914456623..9266198bd8a 100755 --- a/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head.py +++ b/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head.py @@ -16,24 +16,7 @@ TODO(zghera): Remove questions below once complete. Currrent Questions -1. This question is probably resolved as the PyTorch Impl manually sets - weights and biases for each conv layer. See sectioned off comments below for - the original question. - That being said, I would still appreciate if someone double checked my - weight & bias initializations. - vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv - I am not sure what the correct kernel and bias initializers are for the - default pytorch conv2d layers. Looking in the PyTorch source (see - https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/conv.py#L144) - It looks like they are actually using a RandomUniform initialization of the - weights with a specific range based on the GitHub comment (see - https://github.com/pytorch/pytorch/commit/8130f2f67ada1951ee27e55b8a506d6de23c13df ) - and the biases are with some variation of HEUniform where - `limit = sqrt(1 / fan_in)` rather than 6 (see - https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/conv.py#L146 ). - Please correct me if I am wrong here. - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -2. It looks like Pytorch impl uses something called group normalization +1. It looks like Pytorch impl uses something called group normalization https://github.com/facebookresearch/meshrcnn/blob/main/shapenet/config/config.py#L30 https://github.com/facebookresearch/detectron2/blob/cbbc1ce26473cb2a5cc8f58e8ada9ae14cb41052/detectron2/layers/batch_norm.py#L141 I added a flag in __init__ to use this. But my question is should this layer @@ -42,6 +25,13 @@ what I read about BatchNorm, if GroupNorm behaves similarly then it should go before ReLU so that is what I did. But please correct me if I am wrong here. +2. The PyTorch implementation using a padding of 1 for the initial conv2d + layers. But it appears that tensorflow only provides the options same and + valid. So if my understanding is correct, if we use a kernel size of 3, then + there are cases where 1 padding will not be the same as 'same' padding + (e.g. 22 x 22). +3. Is it okay to not write argument docs for my tests as they are the same + as the voxel head arguments? """ from typing import Optional @@ -61,12 +51,12 @@ def __init__(self, bilinearly_upscale_input: bool, class_based_voxel: bool, num_classes: int, - norm_momentum: float = 0.99, - norm_epsilon: float = 0.001, kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, conv_bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, + conv_activ_regularizer: + Optional[tf.keras.regularizers.Regularizer] = None, **kwargs): """Initializes a Voxel Branch Prediction Head. Args: @@ -91,10 +81,9 @@ def __init__(self, num_classes: If `class_based_voxel` is predict one of `num_classes` classes for each voxel. This option is used by the Pix3d Mesh R-CNN architecture. - norm_momentum: Normalization momentum for the moving average. - norm_epsilon: Small float added to variance to avoid dividing by zero. kernel_regularizer: Convolutional layer weight regularizer object. conv_bias_regularizer: Convolutional layer bias regularizer object. + conv_activ_regularizer: Convolutional layer activation regularizer object. **kwargs: other keyword arguments to be passed. """ super().__init__(**kwargs) @@ -110,34 +99,32 @@ def __init__(self, self._base_config = dict( activation=None, # Apply ReLU separately in case we want to use GroupNorm - norm_momentum=norm_momentum, - norm_epsilon=norm_epsilon, - kernel_initializer=None, # Set individually for each layer conv layer type - bias_initializer=None, kernel_regularizer=kernel_regularizer, - bias_regularizer=conv_bias_regularizer) + bias_regularizer=conv_bias_regularizer, + activity_regularizer=conv_activ_regularizer) - self._conv_initializers = dict( + self._non_predictor_initializers = dict( kernel_initializer=tf.keras.initializers.VarianceScaling( scale=2, mode='fan_out', distribution='untruncated_normal'), # HeNormal with fan out bias_initializer=None if self._use_group_norm else 'zeros' ) + self._fully_conv2d_config = dict( filters=self._conv_dims, kernel_size=(3, 3), strides=(1, 1), - padding=1, + padding='same', use_bias=not self._use_group_norm, - **self._conv_initializers, + **self._non_predictor_initializers, **self._base_config) self._deconv2d_config = dict( filters=self._conv_dims, kernel_size=(2, 2), strides=(2, 2), - padding=0, + padding='valid', use_bias=True, - **self._conv_initializers, + **self._non_predictor_initializers, **self._base_config) self._deconv2d_config['activation'] = 'relu' @@ -145,7 +132,7 @@ def __init__(self, filters=self._num_classes * self._voxel_depth, kernel_size=(1, 1), strides=(1, 1), - padding=0, + padding='valid', use_bias=True, kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.001), bias_initializer=tf.keras.initializers.Zeros(), @@ -196,7 +183,7 @@ def call(self, inputs: tf.Tensor) -> tf.Tensor: x = self._predictor(x) x = tf.cond(self._predict_classes, true_fn=lambda: self._reshape(x), - false_fn=lambda: tf.keras.layers.Lambda(lambda x: x)(inputs)) + false_fn=lambda: tf.keras.layers.Lambda(lambda x: x)(x)) return x @property diff --git a/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head_test.py b/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head_test.py new file mode 100755 index 00000000000..5400347e482 --- /dev/null +++ b/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head_test.py @@ -0,0 +1,67 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for Mesh R-CNN Heads.""" + +import tensorflow as tf # type: ignore +import voxel_head +from absl.testing import parameterized # type: ignore + + +@parameterized.product( + predict_classes=[False, True], + class_based_voxel=[False, True], + num_classes=[1, 5], + voxel_depth=[24, 48], + conv_dims=[256], + num_conv=[0, 2], + use_group_norm=[False, True], +) +class VoxelHeadTest(parameterized.TestCase, tf.test.TestCase): + """Test for Mesh R-CNN Voxel Prediction Head.""" + + def test_network_output(self, + predict_classes: bool, + class_based_voxel: bool, + num_classes: int, + voxel_depth: int, + conv_dims: int, + num_conv: int, + use_group_norm: bool) -> None: + """Verify the output shapes of the voxel head.""" + # pylint: disable=missing-param-doc + tf.keras.backend.set_image_data_format('channels_last') + head = voxel_head.VoxelHead(voxel_depth, conv_dims, num_conv, + use_group_norm, predict_classes, + not predict_classes, class_based_voxel, + num_classes) + batch_size = 32 + num_input_channels = 256 + input_shape = [batch_size, voxel_depth // 2, voxel_depth // 2, + num_input_channels] + input_tensor = tf.ones(input_shape, dtype=tf.float32) + output = head(input_tensor) + + if predict_classes: + expected_num_classes = num_classes if class_based_voxel else 1 + expected_shape = [batch_size, expected_num_classes, voxel_depth, + voxel_depth, voxel_depth] + else: + expected_shape = [batch_size, voxel_depth, voxel_depth, voxel_depth] + + self.assertAllEqual(output.shape.as_list(), expected_shape) + +if __name__ == '__main__': + # from mesh_rcnn.utils.run_utils import prep_gpu + # prep_gpu() + tf.test.main() From 933075e14efaa211f709cdbb57dddd4604a0735d Mon Sep 17 00:00:00 2001 From: Zach Ghera Date: Thu, 14 Oct 2021 23:06:21 -0400 Subject: [PATCH 04/11] Add vscode files to gitignore. --- .gitignore | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index e400054eaba..a21f8947795 100644 --- a/.gitignore +++ b/.gitignore @@ -99,4 +99,6 @@ ENV/ hooks/ pylint.sh -pylintrc \ No newline at end of file +pylintrc + +.vscode/ \ No newline at end of file From 0fcc9b654f54a28c9d92c49c4b46ad21107f6b36 Mon Sep 17 00:00:00 2001 From: Zach Ghera Date: Tue, 19 Oct 2021 14:44:00 -0400 Subject: [PATCH 05/11] Add voxel head serialize unit test and refactoring. --- .../mesh_rcnn/modeling/heads/voxel_head.py | 61 ++++++-------- .../modeling/heads/voxel_head_test.py | 81 +++++++++++++------ 2 files changed, 81 insertions(+), 61 deletions(-) diff --git a/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head.py b/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head.py index 9266198bd8a..67733d7a3b8 100755 --- a/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head.py +++ b/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head.py @@ -11,28 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Mesh R-CNN Heads. - -TODO(zghera): Remove questions below once complete. - -Currrent Questions -1. It looks like Pytorch impl uses something called group normalization -https://github.com/facebookresearch/meshrcnn/blob/main/shapenet/config/config.py#L30 -https://github.com/facebookresearch/detectron2/blob/cbbc1ce26473cb2a5cc8f58e8ada9ae14cb41052/detectron2/layers/batch_norm.py#L141 - I added a flag in __init__ to use this. But my question is should this layer - be placed before or after the ReLU layer since there is no way to directly - add this as an option to the Conv2d layer like they do in Pytorch? Based on - what I read about BatchNorm, if GroupNorm behaves similarly then it should - go before ReLU so that is what I did. But please correct me if I am wrong - here. -2. The PyTorch implementation using a padding of 1 for the initial conv2d - layers. But it appears that tensorflow only provides the options same and - valid. So if my understanding is correct, if we use a kernel size of 3, then - there are cases where 1 padding will not be the same as 'same' padding - (e.g. 22 x 22). -3. Is it okay to not write argument docs for my tests as they are the same - as the voxel head arguments? -""" +"""Mesh R-CNN Heads.""" from typing import Optional import tensorflow as tf # type: ignore @@ -53,9 +32,9 @@ def __init__(self, num_classes: int, kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, - conv_bias_regularizer: + bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, - conv_activ_regularizer: + activity_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, **kwargs): """Initializes a Voxel Branch Prediction Head. @@ -82,8 +61,8 @@ def __init__(self, classes for each voxel. This option is used by the Pix3d Mesh R-CNN architecture. kernel_regularizer: Convolutional layer weight regularizer object. - conv_bias_regularizer: Convolutional layer bias regularizer object. - conv_activ_regularizer: Convolutional layer activation regularizer object. + bias_regularizer: Convolutional layer bias regularizer object. + activity_regularizer: Convolutional layer activation regularizer object. **kwargs: other keyword arguments to be passed. """ super().__init__(**kwargs) @@ -92,16 +71,19 @@ def __init__(self, self._conv_dims = conv_dims self._num_conv = num_conv self._use_group_norm = use_group_norm - self._predict_classes = predict_classes - self._bilinearly_upscale_input = bilinearly_upscale_input + self._predict_classes = tf.constant( + predict_classes, dtype=tf.bool) + self._bilinearly_upscale_input = tf.constant( + bilinearly_upscale_input, dtype=tf.bool) + self._class_based_voxel = class_based_voxel self._num_classes = num_classes if ( predict_classes and class_based_voxel) else 1 self._base_config = dict( activation=None, # Apply ReLU separately in case we want to use GroupNorm kernel_regularizer=kernel_regularizer, - bias_regularizer=conv_bias_regularizer, - activity_regularizer=conv_activ_regularizer) + bias_regularizer=bias_regularizer, + activity_regularizer=activity_regularizer) self._non_predictor_initializers = dict( kernel_initializer=tf.keras.initializers.VarianceScaling( @@ -186,16 +168,23 @@ def call(self, inputs: tf.Tensor) -> tf.Tensor: false_fn=lambda: tf.keras.layers.Lambda(lambda x: x)(x)) return x - @property - def output_depth(self) -> int: - return self._voxel_depth - def get_config(self) -> dict: + """Get config dict of the VoxelHead layer.""" + regularizers = dict(self._base_config) + del regularizers['activation'] + config = dict( - input_channels=self._input_channels, voxel_depth=self._voxel_depth, conv_dims=self._conv_dims, num_conv=self._num_conv, use_group_norm=self._use_group_norm, - **self._base_config) + predict_classes=self._predict_classes, + bilinearly_upscale_input=self._bilinearly_upscale_input, + class_based_voxel=self._class_based_voxel, + num_classes=self._num_classes, + **regularizers) return config + + @classmethod + def from_config(cls, config): + return cls(**config) diff --git a/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head_test.py b/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head_test.py index 5400347e482..133e8944743 100755 --- a/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head_test.py +++ b/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head_test.py @@ -14,53 +14,84 @@ """Tests for Mesh R-CNN Heads.""" import tensorflow as tf # type: ignore -import voxel_head from absl.testing import parameterized # type: ignore +from official.vision.beta.projects.mesh_rcnn.modeling.heads import voxel_head -@parameterized.product( - predict_classes=[False, True], - class_based_voxel=[False, True], - num_classes=[1, 5], - voxel_depth=[24, 48], - conv_dims=[256], - num_conv=[0, 2], - use_group_norm=[False, True], + +@parameterized.named_parameters( + {'testcase_name': 'shapenet', + 'predict_classes': False, 'class_based_voxel': False, 'num_conv': 2, + 'voxel_depth': 48, 'batch_size': 32, 'num_input_channels': 2048}, + {'testcase_name': 'pix3d-class-agnostic', + 'predict_classes': True, 'class_based_voxel': False, 'num_conv': 1, + 'voxel_depth': 24, 'batch_size': 1, 'num_input_channels': 256}, + {'testcase_name': 'pix3d-class-based', + 'predict_classes': True, 'class_based_voxel': True, 'num_conv': 0, + 'voxel_depth': 24, 'batch_size': 32, 'num_input_channels': 256}, ) class VoxelHeadTest(parameterized.TestCase, tf.test.TestCase): """Test for Mesh R-CNN Voxel Prediction Head.""" + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._num_classes = 5 + self._conv_dims = 256 + self._use_group_norm = False - def test_network_output(self, - predict_classes: bool, - class_based_voxel: bool, - num_classes: int, - voxel_depth: int, - conv_dims: int, - num_conv: int, - use_group_norm: bool) -> None: + def test_network_creation(self, + predict_classes: bool, + class_based_voxel: bool, + num_conv: int, + voxel_depth: int, + batch_size: int, + num_input_channels: int) -> None: """Verify the output shapes of the voxel head.""" # pylint: disable=missing-param-doc tf.keras.backend.set_image_data_format('channels_last') - head = voxel_head.VoxelHead(voxel_depth, conv_dims, num_conv, - use_group_norm, predict_classes, + head = voxel_head.VoxelHead(voxel_depth, self._conv_dims, num_conv, + self._use_group_norm, predict_classes, not predict_classes, class_based_voxel, - num_classes) - batch_size = 32 - num_input_channels = 256 + self._num_classes) + input_shape = [batch_size, voxel_depth // 2, voxel_depth // 2, num_input_channels] input_tensor = tf.ones(input_shape, dtype=tf.float32) output = head(input_tensor) if predict_classes: - expected_num_classes = num_classes if class_based_voxel else 1 - expected_shape = [batch_size, expected_num_classes, voxel_depth, - voxel_depth, voxel_depth] + expected_num_classes = self._num_classes if class_based_voxel else 1 + expected_shape = [batch_size, expected_num_classes, + voxel_depth, voxel_depth, voxel_depth] else: expected_shape = [batch_size, voxel_depth, voxel_depth, voxel_depth] self.assertAllEqual(output.shape.as_list(), expected_shape) + def test_serialize_deserialize(self, + predict_classes: bool, + class_based_voxel: bool, + num_conv: int, + voxel_depth: int, + batch_size: int, + num_input_channels: int) -> None: + """Create a network object that sets all of its config options.""" + # pylint: disable=missing-param-doc + tf.keras.backend.set_image_data_format('channels_last') + head = voxel_head.VoxelHead(voxel_depth, self._conv_dims, num_conv, + self._use_group_norm, predict_classes, + not predict_classes, class_based_voxel, + self._num_classes) + + input_shape = [batch_size, voxel_depth // 2, voxel_depth // 2, + num_input_channels] + input_tensor = tf.ones(input_shape, dtype=tf.float32) + _ = head(input_tensor) + + serialized = head.get_config() + deserialized = voxel_head.VoxelHead.from_config(serialized) + + self.assertAllEqual(head.get_config(), deserialized.get_config()) + if __name__ == '__main__': # from mesh_rcnn.utils.run_utils import prep_gpu # prep_gpu() From 4fa19ad60513fa0fba41b18689a763d3aa0ac442 Mon Sep 17 00:00:00 2001 From: Zach Ghera Date: Tue, 19 Oct 2021 16:36:33 -0400 Subject: [PATCH 06/11] Create helpers for getting input/output shapes. --- .../modeling/heads/voxel_head_test.py | 42 ++++++++++++++----- 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head_test.py b/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head_test.py index 133e8944743..80c3ed5e37c 100755 --- a/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head_test.py +++ b/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head_test.py @@ -13,6 +13,8 @@ # limitations under the License. """Tests for Mesh R-CNN Heads.""" +from typing import Tuple + import tensorflow as tf # type: ignore from absl.testing import parameterized # type: ignore @@ -38,6 +40,29 @@ def __init__(self, *args, **kwargs): self._conv_dims = 256 self._use_group_norm = False + def _get_expected_out_shape(self, + predict_classes: bool, + class_based_voxel: bool, + voxel_depth: int, + batch_size: int) -> Tuple[int, ...]: + """Get the output shape of the voxel head.""" + # pylint: disable=missing-param-doc + expected_shape: Tuple[int, ...] + if predict_classes: + expected_num_classes: int = self._num_classes if class_based_voxel else 1 + expected_shape = (batch_size, expected_num_classes, + voxel_depth, voxel_depth, voxel_depth) + else: + expected_shape = (batch_size, voxel_depth, voxel_depth, voxel_depth) + return expected_shape + + def _get_input_shape(self, + voxel_depth: int, + batch_size: int, + num_input_channels: int) -> Tuple[int, int, int, int]: + """Get the output input shape of the voxel head.""" + return (batch_size, voxel_depth // 2, voxel_depth // 2, num_input_channels) + def test_network_creation(self, predict_classes: bool, class_based_voxel: bool, @@ -53,17 +78,14 @@ def test_network_creation(self, not predict_classes, class_based_voxel, self._num_classes) - input_shape = [batch_size, voxel_depth // 2, voxel_depth // 2, - num_input_channels] + input_shape = self._get_input_shape(voxel_depth, batch_size, + num_input_channels) input_tensor = tf.ones(input_shape, dtype=tf.float32) output = head(input_tensor) - if predict_classes: - expected_num_classes = self._num_classes if class_based_voxel else 1 - expected_shape = [batch_size, expected_num_classes, - voxel_depth, voxel_depth, voxel_depth] - else: - expected_shape = [batch_size, voxel_depth, voxel_depth, voxel_depth] + expected_shape = self._get_expected_out_shape(predict_classes, + class_based_voxel, + voxel_depth, batch_size) self.assertAllEqual(output.shape.as_list(), expected_shape) @@ -82,8 +104,8 @@ def test_serialize_deserialize(self, not predict_classes, class_based_voxel, self._num_classes) - input_shape = [batch_size, voxel_depth // 2, voxel_depth // 2, - num_input_channels] + input_shape = self._get_input_shape(voxel_depth, batch_size, + num_input_channels) input_tensor = tf.ones(input_shape, dtype=tf.float32) _ = head(input_tensor) From 2bfed840be8a4d7be06a480fca4b4898792d4074 Mon Sep 17 00:00:00 2001 From: Zach Ghera Date: Tue, 19 Oct 2021 17:42:28 -0400 Subject: [PATCH 07/11] Add voxel head gradient unit test. --- .../modeling/heads/voxel_head_test.py | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head_test.py b/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head_test.py index 80c3ed5e37c..caf19b05652 100755 --- a/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head_test.py +++ b/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head_test.py @@ -114,6 +114,42 @@ def test_serialize_deserialize(self, self.assertAllEqual(head.get_config(), deserialized.get_config()) + def test_gradient_pass_though(self, + predict_classes: bool, + class_based_voxel: bool, + num_conv: int, + voxel_depth: int, + batch_size: int, + num_input_channels: int) -> None: + """Ensure the gradients of the layer are not None.""" + # pylint: disable=missing-param-doc + tf.keras.backend.set_image_data_format('channels_last') + head = voxel_head.VoxelHead(voxel_depth, self._conv_dims, num_conv, + self._use_group_norm, predict_classes, + not predict_classes, class_based_voxel, + self._num_classes) + loss = tf.keras.losses.MeanSquaredError() + optimizer = tf.keras.optimizers.SGD() + + input_shape = self._get_input_shape(voxel_depth, batch_size, + num_input_channels) + output_shape = self._get_expected_out_shape(predict_classes, + class_based_voxel, + voxel_depth, batch_size) + init = tf.random_normal_initializer() + x = tf.Variable(initial_value=init(shape=input_shape, dtype=tf.float32)) + y = tf.Variable(initial_value=init(shape=output_shape, dtype=tf.float32)) + + with tf.GradientTape() as tape: + x_hat = head(x) + grad_loss = loss(x_hat, y) + grad = tape.gradient(grad_loss, head.trainable_variables) + optimizer.apply_gradients(zip(grad, head.trainable_variables)) + + self.assertNotIn(None, grad) + + + if __name__ == '__main__': # from mesh_rcnn.utils.run_utils import prep_gpu # prep_gpu() From 33a9e32466cdf84325919801670d416662f9e4d8 Mon Sep 17 00:00:00 2001 From: Zach Ghera Date: Tue, 19 Oct 2021 17:52:15 -0400 Subject: [PATCH 08/11] Voxel head test minor refactoring. --- .../projects/mesh_rcnn/modeling/heads/voxel_head_test.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head_test.py b/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head_test.py index caf19b05652..a64b0784560 100755 --- a/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head_test.py +++ b/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head_test.py @@ -36,6 +36,7 @@ class VoxelHeadTest(parameterized.TestCase, tf.test.TestCase): """Test for Mesh R-CNN Voxel Prediction Head.""" def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) + tf.keras.backend.set_image_data_format('channels_last') self._num_classes = 5 self._conv_dims = 256 self._use_group_norm = False @@ -72,7 +73,6 @@ def test_network_creation(self, num_input_channels: int) -> None: """Verify the output shapes of the voxel head.""" # pylint: disable=missing-param-doc - tf.keras.backend.set_image_data_format('channels_last') head = voxel_head.VoxelHead(voxel_depth, self._conv_dims, num_conv, self._use_group_norm, predict_classes, not predict_classes, class_based_voxel, @@ -98,7 +98,6 @@ def test_serialize_deserialize(self, num_input_channels: int) -> None: """Create a network object that sets all of its config options.""" # pylint: disable=missing-param-doc - tf.keras.backend.set_image_data_format('channels_last') head = voxel_head.VoxelHead(voxel_depth, self._conv_dims, num_conv, self._use_group_norm, predict_classes, not predict_classes, class_based_voxel, @@ -123,7 +122,6 @@ def test_gradient_pass_though(self, num_input_channels: int) -> None: """Ensure the gradients of the layer are not None.""" # pylint: disable=missing-param-doc - tf.keras.backend.set_image_data_format('channels_last') head = voxel_head.VoxelHead(voxel_depth, self._conv_dims, num_conv, self._use_group_norm, predict_classes, not predict_classes, class_based_voxel, @@ -149,8 +147,5 @@ def test_gradient_pass_though(self, self.assertNotIn(None, grad) - if __name__ == '__main__': - # from mesh_rcnn.utils.run_utils import prep_gpu - # prep_gpu() tf.test.main() From 45459fd05e6788107550564b5ee4a6a726dcece9 Mon Sep 17 00:00:00 2001 From: Zach Ghera Date: Thu, 28 Oct 2021 16:51:48 -0400 Subject: [PATCH 09/11] Create config and factory modules for voxel head. --- .../projects/mesh_rcnn/configs/mesh_rcnn.py | 31 +++++++++++ .../projects/mesh_rcnn/modeling/factory.py | 53 +++++++++++++++++++ 2 files changed, 84 insertions(+) create mode 100644 official/vision/beta/projects/mesh_rcnn/configs/mesh_rcnn.py create mode 100644 official/vision/beta/projects/mesh_rcnn/modeling/factory.py diff --git a/official/vision/beta/projects/mesh_rcnn/configs/mesh_rcnn.py b/official/vision/beta/projects/mesh_rcnn/configs/mesh_rcnn.py new file mode 100644 index 00000000000..01a92ed946a --- /dev/null +++ b/official/vision/beta/projects/mesh_rcnn/configs/mesh_rcnn.py @@ -0,0 +1,31 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Mesh R-CNN configuration definition.""" + +import dataclasses + +from official.modeling import hyperparams # type: ignore + + +@dataclasses.dataclass +class VoxelHead(hyperparams.Config): + """Parameterization for the Mesh R-CNN Voxel Branch Prediction Head.""" + voxel_depth: int = 28 + conv_dim: int = 256 + num_conv: int = 0 + use_group_norm: bool = False + predict_classes: bool = False + bilinearly_upscale_input: bool = True + class_based_voxel: bool = False + num_classes: int = 0 diff --git a/official/vision/beta/projects/mesh_rcnn/modeling/factory.py b/official/vision/beta/projects/mesh_rcnn/modeling/factory.py new file mode 100644 index 00000000000..1b853ee3dd7 --- /dev/null +++ b/official/vision/beta/projects/mesh_rcnn/modeling/factory.py @@ -0,0 +1,53 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Contains factory functions for Mesh R-CNN networks.""" + +from typing import Optional + +import tensorflow as tf # type: ignore + +from official.vision.beta.projects.mesh_rcnn.configs.mesh_rcnn import VoxelHead +from official.vision.beta.projects.mesh_rcnn.modeling.heads import voxel_head + + +def build_voxel_head(head_config: VoxelHead, + kernel_regularizer: + Optional[tf.keras.regularizers.Regularizer], + bias_regularizer: + Optional[tf.keras.regularizers.Regularizer], + activity_regularizer: + Optional[tf.keras.regularizers.Regularizer] + ) -> voxel_head.VoxelHead: + """Builds Voxel Branch Prediction Head. + Args: + head_config: Dataclass parameterization instance for voxel head. + kernel_regularizer: Convolutional layer weight regularizer object. + bias_regularizer: Convolutional layer bias regularizer object. + activity_regularizer: Convolutional layer activation regularizer object. + Returns: + Voxel head layer instance. + """ + return voxel_head.VoxelHead( + voxel_depth=head_config.voxel_depth, + conv_dim=head_config.conv_dim, + num_conv=head_config.num_conv, + use_group_norm=head_config.use_group_norm, + predict_classes=head_config.predict_classes, + bilinearly_upscale_input=head_config.bilinearly_upscale_input, + class_based_voxel=head_config.class_based_voxel, + num_classes=head_config.num_classes, + kernel_regularizer=kernel_regularizer, + bias_regularizer=bias_regularizer, + activity_regularizer=activity_regularizer, + ) From d3afbbacc416e451a68fc95d719425959892f5b8 Mon Sep 17 00:00:00 2001 From: Zach Ghera Date: Thu, 28 Oct 2021 16:52:53 -0400 Subject: [PATCH 10/11] Add voxel head test for building from config. --- .../mesh_rcnn/modeling/heads/voxel_head.py | 12 ++--- .../modeling/heads/voxel_head_test.py | 46 ++++++++++++++----- 2 files changed, 41 insertions(+), 17 deletions(-) diff --git a/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head.py b/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head.py index 67733d7a3b8..fa796d91bfc 100755 --- a/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head.py +++ b/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head.py @@ -23,7 +23,7 @@ class VoxelHead(tf.keras.layers.Layer): def __init__(self, voxel_depth: int, - conv_dims: int, + conv_dim: int, num_conv: int, use_group_norm: bool, predict_classes: bool, @@ -40,7 +40,7 @@ def __init__(self, """Initializes a Voxel Branch Prediction Head. Args: voxel_depth: The number of depth channels for the predicted voxels. - conv_dims: Number of output features for each Conv2D layer in the + conv_dim: Number of output features for each Conv2D layer in the Voxel head. num_conv: Number of Conv2D layers prior to the Conv2DTranspose layer. use_group_norm: Whether or not to use GropNormalization in the fully @@ -68,7 +68,7 @@ def __init__(self, super().__init__(**kwargs) self._voxel_depth = voxel_depth - self._conv_dims = conv_dims + self._conv_dim = conv_dim self._num_conv = num_conv self._use_group_norm = use_group_norm self._predict_classes = tf.constant( @@ -92,7 +92,7 @@ def __init__(self, ) self._fully_conv2d_config = dict( - filters=self._conv_dims, + filters=self._conv_dim, kernel_size=(3, 3), strides=(1, 1), padding='same', @@ -101,7 +101,7 @@ def __init__(self, **self._base_config) self._deconv2d_config = dict( - filters=self._conv_dims, + filters=self._conv_dim, kernel_size=(2, 2), strides=(2, 2), padding='valid', @@ -175,7 +175,7 @@ def get_config(self) -> dict: config = dict( voxel_depth=self._voxel_depth, - conv_dims=self._conv_dims, + conv_dim=self._conv_dim, num_conv=self._num_conv, use_group_norm=self._use_group_norm, predict_classes=self._predict_classes, diff --git a/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head_test.py b/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head_test.py index a64b0784560..02275af3465 100755 --- a/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head_test.py +++ b/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head_test.py @@ -18,6 +18,9 @@ import tensorflow as tf # type: ignore from absl.testing import parameterized # type: ignore +from official.vision.beta.projects.mesh_rcnn.configs import \ + mesh_rcnn as mesh_rcnn_config +from official.vision.beta.projects.mesh_rcnn.modeling import factory from official.vision.beta.projects.mesh_rcnn.modeling.heads import voxel_head @@ -33,12 +36,12 @@ 'voxel_depth': 24, 'batch_size': 32, 'num_input_channels': 256}, ) class VoxelHeadTest(parameterized.TestCase, tf.test.TestCase): - """Test for Mesh R-CNN Voxel Prediction Head.""" + """Test for Mesh R-CNN Voxel Branch Prediction Head.""" def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) tf.keras.backend.set_image_data_format('channels_last') self._num_classes = 5 - self._conv_dims = 256 + self._conv_dim = 256 self._use_group_norm = False def _get_expected_out_shape(self, @@ -73,7 +76,7 @@ def test_network_creation(self, num_input_channels: int) -> None: """Verify the output shapes of the voxel head.""" # pylint: disable=missing-param-doc - head = voxel_head.VoxelHead(voxel_depth, self._conv_dims, num_conv, + head = voxel_head.VoxelHead(voxel_depth, self._conv_dim, num_conv, self._use_group_norm, predict_classes, not predict_classes, class_based_voxel, self._num_classes) @@ -98,7 +101,7 @@ def test_serialize_deserialize(self, num_input_channels: int) -> None: """Create a network object that sets all of its config options.""" # pylint: disable=missing-param-doc - head = voxel_head.VoxelHead(voxel_depth, self._conv_dims, num_conv, + head = voxel_head.VoxelHead(voxel_depth, self._conv_dim, num_conv, self._use_group_norm, predict_classes, not predict_classes, class_based_voxel, self._num_classes) @@ -114,15 +117,15 @@ def test_serialize_deserialize(self, self.assertAllEqual(head.get_config(), deserialized.get_config()) def test_gradient_pass_though(self, - predict_classes: bool, - class_based_voxel: bool, - num_conv: int, - voxel_depth: int, - batch_size: int, - num_input_channels: int) -> None: + predict_classes: bool, + class_based_voxel: bool, + num_conv: int, + voxel_depth: int, + batch_size: int, + num_input_channels: int) -> None: """Ensure the gradients of the layer are not None.""" # pylint: disable=missing-param-doc - head = voxel_head.VoxelHead(voxel_depth, self._conv_dims, num_conv, + head = voxel_head.VoxelHead(voxel_depth, self._conv_dim, num_conv, self._use_group_norm, predict_classes, not predict_classes, class_based_voxel, self._num_classes) @@ -146,6 +149,27 @@ def test_gradient_pass_though(self, self.assertNotIn(None, grad) + def test_build_from_config(self, + predict_classes: bool, + class_based_voxel: bool, + num_conv: int, + voxel_depth: int, + batch_size: int, + num_input_channels: int) -> None: + """Test head creation from config and factory.""" + # pylint: disable=missing-param-doc,unused-argument + cfg = mesh_rcnn_config.VoxelHead(voxel_depth=voxel_depth, + conv_dim=self._conv_dim, + num_conv=num_conv, + use_group_norm=self._use_group_norm, + predict_classes=predict_classes, + bilinearly_upscale_input=not predict_classes, + class_based_voxel=class_based_voxel, + num_classes=self._num_classes) + _ = factory.build_voxel_head(cfg, + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None) if __name__ == '__main__': tf.test.main() From 9656f03b8002dadbc3f25a996411bba27ef5bc4c Mon Sep 17 00:00:00 2001 From: Zach Ghera Date: Tue, 9 Nov 2021 14:37:30 -0500 Subject: [PATCH 11/11] Change 'tf.cond' to 'if-else' statements in voxel head. Since these boolean's will be set at the initialization of the model, it would be cheaper to do this evaluation at graph construction (with 'if-else') time as opposed to at runtime (with 'tf.cond'). --- .../mesh_rcnn/modeling/heads/voxel_head.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head.py b/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head.py index fa796d91bfc..4e254b99c2d 100755 --- a/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head.py +++ b/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head.py @@ -71,10 +71,8 @@ def __init__(self, self._conv_dim = conv_dim self._num_conv = num_conv self._use_group_norm = use_group_norm - self._predict_classes = tf.constant( - predict_classes, dtype=tf.bool) - self._bilinearly_upscale_input = tf.constant( - bilinearly_upscale_input, dtype=tf.bool) + self._predict_classes = predict_classes + self._bilinearly_upscale_input = bilinearly_upscale_input self._class_based_voxel = class_based_voxel self._num_classes = num_classes if ( predict_classes and class_based_voxel) else 1 @@ -156,16 +154,16 @@ def call(self, inputs: tf.Tensor) -> tf.Tensor: (N, V, V, V) for ShapeNet model and (N, C, V, V, V) for Pix3D model where N = batch size, V = `voxel_depth`, and C = `num_classes`. """ - x = tf.cond(self._bilinearly_upscale_input, - true_fn=lambda: self._interpolate(inputs), - false_fn=lambda: tf.keras.layers.Lambda(lambda x: x)(inputs)) + if self._bilinearly_upscale_input: + x = self._interpolate(inputs) + else: + x = inputs for layer in self._conv2d_norm_relu_layers: x = layer(x) x = self._deconv(x) x = self._predictor(x) - x = tf.cond(self._predict_classes, - true_fn=lambda: self._reshape(x), - false_fn=lambda: tf.keras.layers.Lambda(lambda x: x)(x)) + if self._predict_classes: + x = self._reshape(x) return x def get_config(self) -> dict: