From c799135aa61ec116179a5f9b1b8a3d302a776514 Mon Sep 17 00:00:00 2001
From: Zach Ghera <zpghera00@gmail.com>
Date: Sat, 9 Oct 2021 13:24:11 -0400
Subject: [PATCH 01/11] Initial work on the voxel head.

I am nearly complete with my initial rough implementation of the voxel head. The only thing I have left is the weight and bias initializations in build(). After that, I will start testing following yolo_head_test as an exemplar.
---
 .../mesh_rcnn/modeling/heads/voxel_head.py    | 166 ++++++++++++++++++
 1 file changed, 166 insertions(+)
 create mode 100755 official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head.py

diff --git a/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head.py b/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head.py
new file mode 100755
index 00000000000..65ae1732d8b
--- /dev/null
+++ b/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head.py
@@ -0,0 +1,166 @@
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Currrent Questions
+1. Should I be setting the default values for the head based on what they use
+   in shapenet/config/config.py or one of the actual configs like
+   configs/shapenet/voxmesh_R50.yaml
+2. I am not sure what the correct kernel initializer is, based on this post
+https://discuss.pytorch.org/t/crossentropyloss-expected-object-of-type-torch-longtensor/28683/6?u=ptrblck
+   I think it is HeNormal but I could be wrong.
+3. It looks like Pytorch impl uses something called group normalization
+https://github.com/facebookresearch/meshrcnn/blob/main/shapenet/config/config.py#L30
+https://github.com/facebookresearch/detectron2/blob/cbbc1ce26473cb2a5cc8f58e8ada9ae14cb41052/detectron2/layers/batch_norm.py#L141
+   I added a flag in __init__ to use this. But my question is should this layer
+   be placed before or after the ReLU layer since there is no way to directly
+   add this as an option to the Conv2d layer like they do in Pytorch? Based on
+   what I read about BatchNorm, if GroupNorm behaves similarly then it should
+   go before ReLU so that is what I did. But please correct me if I am wrong
+   here.
+"""
+from typing import Any, Optional
+
+import tensorflow as tf  # type: ignore
+
+import tensorflow_addons as tfa  # type: ignore
+
+
+class VoxelHead(tf.keras.layers.Layer):
+  """Mesh R-CNN Voxel Branch Prediction Head."""
+
+  def __init__(self,
+               input_channels: int,
+               voxel_size: int = 28,
+               conv_dims: int = 256,
+               num_conv: int = 0,
+               use_group_norm: bool = False,
+               norm_momentum: float = 0.99,
+               norm_epsilon: float = 0.001,
+               kernel_initializer: str = 'HeNormal',
+               kernel_regularizer:
+               Optional[tf.keras.regularizers.Regularizer] = None,
+               bias_regularizer:
+               Optional[tf.keras.regularizers.Regularizer] = None,
+               **kwargs):
+    """Initializes a Voxel Branch Prediction Head.
+    Args:
+      input_channels: Number of channels in layer preceeding the voxel head.
+        This the final conv5_3 layer of the backbone network for ShapeNet
+        model and the RoIAlign layer following the RPN for Pix3D.
+      voxel_size: The number of depth channels for the predicted voxels.
+      conv_dims: Number of output features for each Conv2D layer in the
+        Voxel head.
+      num_conv: Number of Conv2D layers prior to the Conv2DTranspose layer.
+      use_group_norm: Whether or not to use GropNormalization in fully
+        connected layer(s).
+      norm_momentum: Normalization momentum for the moving average.
+      norm_epsilon: Small float added to variance to avoid dividing by zero.
+      kernel_initializer: kernel_initializer for convolutional layers.
+      kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
+      bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d.
+      **kwargs: keyword arguments to be passed.
+    """
+    super().__init__(**kwargs)
+
+    assert self.voxel_size % 2 == 0
+
+    self._input_channels = input_channels
+    self._voxel_size = voxel_size
+    self._conv_dims = conv_dims
+    self._num_conv = num_conv
+    self._use_group_norm = use_group_norm
+
+    self._base_config = dict(
+        activation=None,  # Apply ReLU separately in case we want to use GroupNorm
+        norm_momentum=norm_momentum,
+        norm_epsilon=norm_epsilon,
+        kernel_initializer=kernel_initializer,
+        kernel_regularizer=kernel_regularizer,
+        bias_regularizer=bias_regularizer)
+
+    self._fully_conv2d_config = dict(
+        filters=self._conv_dims,
+        kernel_size=(3, 3),
+        strides=(1, 1),
+        padding=1,
+        use_bias=not self._use_group_norm,
+        data_format='channels_last',
+        **self._base_config)
+
+    self._deconv2d_config = dict(
+        filters=self._conv_dims,
+        kernel_size=(2, 2),
+        strides=(2, 2),
+        padding=0,
+        use_bias=True,
+        **self._base_config)
+    self._deconv2d_config['activation'] = 'relu'
+
+    self._predict_conv2d_config = dict(
+        filters=self._voxel_size,
+        kernel_size=(1, 1),
+        strides=(1, 1),
+        padding=0,
+        use_bias=True,
+        **self._base_config)
+
+  def build(self, input_shape: Any) -> None:
+    """TODO(zghera)
+    """
+    #pylint: disable=unused-argument, missing-param-doc
+    self._interpolate = tf.keras.layers.UpSampling2D(
+        size=(self._voxel_size // 2, self._voxel_size // 2),
+        interpolation="bilinear")
+
+    self._conv2d_norm_relu_layers = []
+    for _ in range(self._num_conv):
+      conv = tf.keras.layers.Conv2D(**self._fully_conv2d_config)
+      self._conv2d_norm_relu_layers.append(conv)
+      if self._use_group_norm:
+        group_norm = tfa.layers.GroupNormalization(groups=32, axis=-1)
+        self._conv2d_norm_relu_layers.append(group_norm)
+      relu = tf.keras.layers.ReLU()
+      self._conv2d_norm_relu_layers.append(relu)
+
+    self._deconv = tf.keras.layers.Conv2DTranspose(**self._deconv2d_config)
+    self._predictor = tf.keras.layers.Conv2D(**self._predict_conv2d_config)
+
+    # TODO(zghera): Weight and bias initializations
+
+  def call(self, inputs: Any) -> Any:
+    """TODO(zghera)
+    Args:
+      inputs: ...
+    Return:
+      ...
+    """
+    # pylint: disable=arguments-differ
+    x = self._interpolate(inputs)
+    for layer in self._conv2d_norm_relu_layers:
+      x = layer(x)
+    x = self._deconv(x)
+    return self._predictor(x)
+
+  @property
+  def output_depth(self) -> int:
+    return self._voxel_size
+
+  def get_config(self) -> dict:
+    config = dict(
+        input_channels=self._input_channels,
+        voxel_size=self._voxel_size,
+        conv_dims=self._conv_dims,
+        num_conv=self._num_conv,
+        use_group_norm=self._use_group_norm,
+        **self._base_config)
+    return config

From 5c896d59a334a36d787ac57f01f021ec4924bdcf Mon Sep 17 00:00:00 2001
From: Zach Ghera <zpghera00@gmail.com>
Date: Tue, 12 Oct 2021 16:34:38 -0400
Subject: [PATCH 02/11] Finish voxel head initial implementation.

Added support to handle both ShapeNet and Pix3D as well as initialization of layer weight and biases.
---
 .../mesh_rcnn/modeling/heads/voxel_head.py    | 154 ++++++++++++------
 1 file changed, 101 insertions(+), 53 deletions(-)

diff --git a/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head.py b/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head.py
index 65ae1732d8b..9a914456623 100755
--- a/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head.py
+++ b/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head.py
@@ -11,14 +11,29 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Currrent Questions
-1. Should I be setting the default values for the head based on what they use
-   in shapenet/config/config.py or one of the actual configs like
-   configs/shapenet/voxmesh_R50.yaml
-2. I am not sure what the correct kernel initializer is, based on this post
-https://discuss.pytorch.org/t/crossentropyloss-expected-object-of-type-torch-longtensor/28683/6?u=ptrblck
-   I think it is HeNormal but I could be wrong.
-3. It looks like Pytorch impl uses something called group normalization
+"""Mesh R-CNN Heads.
+
+TODO(zghera): Remove questions below once complete.
+
+Currrent Questions
+1. This question is probably resolved as the PyTorch Impl manually sets
+   weights and biases for each conv layer. See sectioned off comments below for
+   the original question.
+   That being said, I would still appreciate if someone double checked my
+   weight & bias initializations.
+   vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
+   I am not sure what the correct kernel and bias initializers are for the
+   default pytorch conv2d layers. Looking in the PyTorch source (see
+   https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/conv.py#L144)
+   It looks like they are actually using a RandomUniform initialization of the
+   weights with a specific range based on the GitHub comment (see
+   https://github.com/pytorch/pytorch/commit/8130f2f67ada1951ee27e55b8a506d6de23c13df )
+   and the biases are with some variation of HEUniform where
+   `limit = sqrt(1 / fan_in)` rather than 6 (see
+   https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/conv.py#L146 ).
+   Please correct me if I am wrong here.
+   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+2. It looks like Pytorch impl uses something called group normalization
 https://github.com/facebookresearch/meshrcnn/blob/main/shapenet/config/config.py#L30
 https://github.com/facebookresearch/detectron2/blob/cbbc1ce26473cb2a5cc8f58e8ada9ae14cb41052/detectron2/layers/batch_norm.py#L141
    I added a flag in __init__ to use this. But my question is should this layer
@@ -28,10 +43,9 @@
    go before ReLU so that is what I did. But please correct me if I am wrong
    here.
 """
-from typing import Any, Optional
+from typing import Optional
 
 import tensorflow as tf  # type: ignore
-
 import tensorflow_addons as tfa  # type: ignore
 
 
@@ -39,62 +53,82 @@ class VoxelHead(tf.keras.layers.Layer):
   """Mesh R-CNN Voxel Branch Prediction Head."""
 
   def __init__(self,
-               input_channels: int,
-               voxel_size: int = 28,
-               conv_dims: int = 256,
-               num_conv: int = 0,
-               use_group_norm: bool = False,
+               voxel_depth: int,
+               conv_dims: int,
+               num_conv: int,
+               use_group_norm: bool,
+               predict_classes: bool,
+               bilinearly_upscale_input: bool,
+               class_based_voxel: bool,
+               num_classes: int,
                norm_momentum: float = 0.99,
                norm_epsilon: float = 0.001,
-               kernel_initializer: str = 'HeNormal',
                kernel_regularizer:
                Optional[tf.keras.regularizers.Regularizer] = None,
-               bias_regularizer:
+               conv_bias_regularizer:
                Optional[tf.keras.regularizers.Regularizer] = None,
                **kwargs):
     """Initializes a Voxel Branch Prediction Head.
     Args:
-      input_channels: Number of channels in layer preceeding the voxel head.
-        This the final conv5_3 layer of the backbone network for ShapeNet
-        model and the RoIAlign layer following the RPN for Pix3D.
-      voxel_size: The number of depth channels for the predicted voxels.
+      voxel_depth: The number of depth channels for the predicted voxels.
       conv_dims: Number of output features for each Conv2D layer in the
         Voxel head.
       num_conv: Number of Conv2D layers prior to the Conv2DTranspose layer.
-      use_group_norm: Whether or not to use GropNormalization in fully
-        connected layer(s).
+      use_group_norm: Whether or not to use GropNormalization in the fully
+        connected layers.
+      predict_classes: Whether or not to reshape the final predictor output
+        from (N, CD, H, W) to (N, C, D, H, W) where C is `num_classes` to
+        predict and D is `voxel_depth`. This option is used by the Pix3D
+        Mesh R-CNN architecture.
+      bilinearly_upscale_input: Whether or not to bilinearly resize the voxel
+        head input tensor such that width and height of feature maps are equal
+        to (`voxel_depth` // 2). This option is used by the ShapeNet Mesh R-CNN
+        architecture.
+      class_based_voxel: Whether or predict one of `num_classes` for each voxel
+        grid output. If `predict_classes` is True but `class_based_voxel` is
+        False, we will only predict 1 class. This option is used by the Pix3d
+        Mesh R-CNN architecture.
+      num_classes: If `class_based_voxel` is predict one of `num_classes`
+        classes for each voxel. This option is used by the Pix3d Mesh R-CNN
+        architecture.
       norm_momentum: Normalization momentum for the moving average.
       norm_epsilon: Small float added to variance to avoid dividing by zero.
-      kernel_initializer: kernel_initializer for convolutional layers.
-      kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
-      bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d.
-      **kwargs: keyword arguments to be passed.
+      kernel_regularizer: Convolutional layer weight regularizer object.
+      conv_bias_regularizer: Convolutional layer bias regularizer object.
+      **kwargs: other keyword arguments to be passed.
     """
     super().__init__(**kwargs)
 
-    assert self.voxel_size % 2 == 0
-
-    self._input_channels = input_channels
-    self._voxel_size = voxel_size
+    self._voxel_depth = voxel_depth
     self._conv_dims = conv_dims
     self._num_conv = num_conv
     self._use_group_norm = use_group_norm
+    self._predict_classes = predict_classes
+    self._bilinearly_upscale_input = bilinearly_upscale_input
+    self._num_classes = num_classes if (
+        predict_classes and class_based_voxel) else 1
 
     self._base_config = dict(
         activation=None,  # Apply ReLU separately in case we want to use GroupNorm
         norm_momentum=norm_momentum,
         norm_epsilon=norm_epsilon,
-        kernel_initializer=kernel_initializer,
+        kernel_initializer=None, # Set individually for each layer conv layer type
+        bias_initializer=None,
         kernel_regularizer=kernel_regularizer,
-        bias_regularizer=bias_regularizer)
+        bias_regularizer=conv_bias_regularizer)
 
+    self._conv_initializers = dict(
+        kernel_initializer=tf.keras.initializers.VarianceScaling(
+            scale=2, mode='fan_out', distribution='untruncated_normal'), # HeNormal with fan out
+        bias_initializer=None if self._use_group_norm else 'zeros'
+    )
     self._fully_conv2d_config = dict(
         filters=self._conv_dims,
         kernel_size=(3, 3),
         strides=(1, 1),
         padding=1,
         use_bias=not self._use_group_norm,
-        data_format='channels_last',
+        **self._conv_initializers,
         **self._base_config)
 
     self._deconv2d_config = dict(
@@ -103,24 +137,32 @@ def __init__(self,
         strides=(2, 2),
         padding=0,
         use_bias=True,
+        **self._conv_initializers,
         **self._base_config)
     self._deconv2d_config['activation'] = 'relu'
 
     self._predict_conv2d_config = dict(
-        filters=self._voxel_size,
+        filters=self._num_classes * self._voxel_depth,
         kernel_size=(1, 1),
         strides=(1, 1),
         padding=0,
         use_bias=True,
+        kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.001),
+        bias_initializer=tf.keras.initializers.Zeros(),
         **self._base_config)
 
-  def build(self, input_shape: Any) -> None:
-    """TODO(zghera)
+  def build(self, input_shape: tf.TensorShape) -> None:
+    """Creates the voxel head layers and initializes their weights and biases.
+    Args:
+      input_shape: Shape of the input tensor to the voxel head.
+        This the shape of the final layer of the backbone network for the
+        ShapeNet model and the RoIAlign layer following the RPN for Pix3D.
     """
-    #pylint: disable=unused-argument, missing-param-doc
-    self._interpolate = tf.keras.layers.UpSampling2D(
-        size=(self._voxel_size // 2, self._voxel_size // 2),
-        interpolation="bilinear")
+    #pylint: disable=unused-argument
+    vd = self._voxel_depth
+    self._interpolate = tf.keras.layers.Resizing(
+        height=(vd // 2), width=(vd // 2), interpolation='bilinear')
+    self._reshape = tf.keras.layers.Reshape((self._num_classes, vd, vd, vd))
 
     self._conv2d_norm_relu_layers = []
     for _ in range(self._num_conv):
@@ -135,30 +177,36 @@ def build(self, input_shape: Any) -> None:
     self._deconv = tf.keras.layers.Conv2DTranspose(**self._deconv2d_config)
     self._predictor = tf.keras.layers.Conv2D(**self._predict_conv2d_config)
 
-    # TODO(zghera): Weight and bias initializations
-
-  def call(self, inputs: Any) -> Any:
-    """TODO(zghera)
+  def call(self, inputs: tf.Tensor) -> tf.Tensor:
+    """Forward pass of the voxel head for the ShapeNet Mesh R-CNN model.
     Args:
-      inputs: ...
-    Return:
-      ...
+      inputs: This is the tensor output of the final layer of the backbone
+        network for the ShapeNet model and the RoIAlign layer following the
+        RPN for Pix3D.
+    Returns:
+      (N, V, V, V) for ShapeNet model and (N, C, V, V, V) for Pix3D model
+      where N = batch size, V = `voxel_depth`, and C = `num_classes`.
     """
-    # pylint: disable=arguments-differ
-    x = self._interpolate(inputs)
+    x = tf.cond(self._bilinearly_upscale_input,
+                true_fn=lambda: self._interpolate(inputs),
+                false_fn=lambda: tf.keras.layers.Lambda(lambda x: x)(inputs))
     for layer in self._conv2d_norm_relu_layers:
       x = layer(x)
     x = self._deconv(x)
-    return self._predictor(x)
+    x = self._predictor(x)
+    x = tf.cond(self._predict_classes,
+                true_fn=lambda: self._reshape(x),
+                false_fn=lambda: tf.keras.layers.Lambda(lambda x: x)(inputs))
+    return x
 
   @property
   def output_depth(self) -> int:
-    return self._voxel_size
+    return self._voxel_depth
 
   def get_config(self) -> dict:
     config = dict(
         input_channels=self._input_channels,
-        voxel_size=self._voxel_size,
+        voxel_depth=self._voxel_depth,
         conv_dims=self._conv_dims,
         num_conv=self._num_conv,
         use_group_norm=self._use_group_norm,

From dfa70168887ff70ffeeed0018c201f392347562d Mon Sep 17 00:00:00 2001
From: Zach Ghera <zpghera00@gmail.com>
Date: Thu, 14 Oct 2021 22:54:08 -0400
Subject: [PATCH 03/11] Create unit tests for verifying output shape.

See updated questions in voxel_head.py. Mypy is also failing to find the voxel_head module right now and I cannot figure out why. I will try to look into this after finishing up testing.
---
 .../mesh_rcnn/modeling/heads/voxel_head.py    | 55 ++++++---------
 .../modeling/heads/voxel_head_test.py         | 67 +++++++++++++++++++
 2 files changed, 88 insertions(+), 34 deletions(-)
 create mode 100755 official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head_test.py

diff --git a/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head.py b/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head.py
index 9a914456623..9266198bd8a 100755
--- a/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head.py
+++ b/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head.py
@@ -16,24 +16,7 @@
 TODO(zghera): Remove questions below once complete.
 
 Currrent Questions
-1. This question is probably resolved as the PyTorch Impl manually sets
-   weights and biases for each conv layer. See sectioned off comments below for
-   the original question.
-   That being said, I would still appreciate if someone double checked my
-   weight & bias initializations.
-   vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
-   I am not sure what the correct kernel and bias initializers are for the
-   default pytorch conv2d layers. Looking in the PyTorch source (see
-   https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/conv.py#L144)
-   It looks like they are actually using a RandomUniform initialization of the
-   weights with a specific range based on the GitHub comment (see
-   https://github.com/pytorch/pytorch/commit/8130f2f67ada1951ee27e55b8a506d6de23c13df )
-   and the biases are with some variation of HEUniform where
-   `limit = sqrt(1 / fan_in)` rather than 6 (see
-   https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/conv.py#L146 ).
-   Please correct me if I am wrong here.
-   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-2. It looks like Pytorch impl uses something called group normalization
+1. It looks like Pytorch impl uses something called group normalization
 https://github.com/facebookresearch/meshrcnn/blob/main/shapenet/config/config.py#L30
 https://github.com/facebookresearch/detectron2/blob/cbbc1ce26473cb2a5cc8f58e8ada9ae14cb41052/detectron2/layers/batch_norm.py#L141
    I added a flag in __init__ to use this. But my question is should this layer
@@ -42,6 +25,13 @@
    what I read about BatchNorm, if GroupNorm behaves similarly then it should
    go before ReLU so that is what I did. But please correct me if I am wrong
    here.
+2. The PyTorch implementation using a padding of 1 for the initial conv2d
+   layers. But it appears that tensorflow only provides the options same and
+   valid. So if my understanding is correct, if we use a kernel size of 3, then
+   there are cases where 1 padding will not be the same as 'same' padding
+   (e.g. 22 x 22).
+3. Is it okay to not write argument docs for my tests as they are the same
+   as the voxel head arguments?
 """
 from typing import Optional
 
@@ -61,12 +51,12 @@ def __init__(self,
                bilinearly_upscale_input: bool,
                class_based_voxel: bool,
                num_classes: int,
-               norm_momentum: float = 0.99,
-               norm_epsilon: float = 0.001,
                kernel_regularizer:
                Optional[tf.keras.regularizers.Regularizer] = None,
                conv_bias_regularizer:
                Optional[tf.keras.regularizers.Regularizer] = None,
+               conv_activ_regularizer:
+               Optional[tf.keras.regularizers.Regularizer] = None,
                **kwargs):
     """Initializes a Voxel Branch Prediction Head.
     Args:
@@ -91,10 +81,9 @@ def __init__(self,
       num_classes: If `class_based_voxel` is predict one of `num_classes`
         classes for each voxel. This option is used by the Pix3d Mesh R-CNN
         architecture.
-      norm_momentum: Normalization momentum for the moving average.
-      norm_epsilon: Small float added to variance to avoid dividing by zero.
       kernel_regularizer: Convolutional layer weight regularizer object.
       conv_bias_regularizer: Convolutional layer bias regularizer object.
+      conv_activ_regularizer: Convolutional layer activation regularizer object.
       **kwargs: other keyword arguments to be passed.
     """
     super().__init__(**kwargs)
@@ -110,34 +99,32 @@ def __init__(self,
 
     self._base_config = dict(
         activation=None,  # Apply ReLU separately in case we want to use GroupNorm
-        norm_momentum=norm_momentum,
-        norm_epsilon=norm_epsilon,
-        kernel_initializer=None, # Set individually for each layer conv layer type
-        bias_initializer=None,
         kernel_regularizer=kernel_regularizer,
-        bias_regularizer=conv_bias_regularizer)
+        bias_regularizer=conv_bias_regularizer,
+        activity_regularizer=conv_activ_regularizer)
 
-    self._conv_initializers = dict(
+    self._non_predictor_initializers = dict(
         kernel_initializer=tf.keras.initializers.VarianceScaling(
             scale=2, mode='fan_out', distribution='untruncated_normal'), # HeNormal with fan out
         bias_initializer=None if self._use_group_norm else 'zeros'
     )
+
     self._fully_conv2d_config = dict(
         filters=self._conv_dims,
         kernel_size=(3, 3),
         strides=(1, 1),
-        padding=1,
+        padding='same',
         use_bias=not self._use_group_norm,
-        **self._conv_initializers,
+        **self._non_predictor_initializers,
         **self._base_config)
 
     self._deconv2d_config = dict(
         filters=self._conv_dims,
         kernel_size=(2, 2),
         strides=(2, 2),
-        padding=0,
+        padding='valid',
         use_bias=True,
-        **self._conv_initializers,
+        **self._non_predictor_initializers,
         **self._base_config)
     self._deconv2d_config['activation'] = 'relu'
 
@@ -145,7 +132,7 @@ def __init__(self,
         filters=self._num_classes * self._voxel_depth,
         kernel_size=(1, 1),
         strides=(1, 1),
-        padding=0,
+        padding='valid',
         use_bias=True,
         kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.001),
         bias_initializer=tf.keras.initializers.Zeros(),
@@ -196,7 +183,7 @@ def call(self, inputs: tf.Tensor) -> tf.Tensor:
     x = self._predictor(x)
     x = tf.cond(self._predict_classes,
                 true_fn=lambda: self._reshape(x),
-                false_fn=lambda: tf.keras.layers.Lambda(lambda x: x)(inputs))
+                false_fn=lambda: tf.keras.layers.Lambda(lambda x: x)(x))
     return x
 
   @property
diff --git a/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head_test.py b/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head_test.py
new file mode 100755
index 00000000000..5400347e482
--- /dev/null
+++ b/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head_test.py
@@ -0,0 +1,67 @@
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for Mesh R-CNN Heads."""
+
+import tensorflow as tf  # type: ignore
+import voxel_head
+from absl.testing import parameterized  # type: ignore
+
+
+@parameterized.product(
+    predict_classes=[False, True],
+    class_based_voxel=[False, True],
+    num_classes=[1, 5],
+    voxel_depth=[24, 48],
+    conv_dims=[256],
+    num_conv=[0, 2],
+    use_group_norm=[False, True],
+)
+class VoxelHeadTest(parameterized.TestCase, tf.test.TestCase):
+  """Test for Mesh R-CNN Voxel Prediction Head."""
+
+  def test_network_output(self,
+                          predict_classes: bool,
+                          class_based_voxel: bool,
+                          num_classes: int,
+                          voxel_depth: int,
+                          conv_dims: int,
+                          num_conv: int,
+                          use_group_norm: bool) -> None:
+    """Verify the output shapes of the voxel head."""
+    # pylint: disable=missing-param-doc
+    tf.keras.backend.set_image_data_format('channels_last')
+    head = voxel_head.VoxelHead(voxel_depth, conv_dims, num_conv,
+                                use_group_norm, predict_classes,
+                                not predict_classes, class_based_voxel,
+                                num_classes)
+    batch_size = 32
+    num_input_channels = 256
+    input_shape = [batch_size, voxel_depth // 2, voxel_depth // 2,
+                   num_input_channels]
+    input_tensor = tf.ones(input_shape, dtype=tf.float32)
+    output = head(input_tensor)
+
+    if predict_classes:
+      expected_num_classes = num_classes if class_based_voxel else 1
+      expected_shape = [batch_size, expected_num_classes, voxel_depth,
+                        voxel_depth, voxel_depth]
+    else:
+      expected_shape = [batch_size, voxel_depth, voxel_depth, voxel_depth]
+
+    self.assertAllEqual(output.shape.as_list(), expected_shape)
+
+if __name__ == '__main__':
+  # from mesh_rcnn.utils.run_utils import prep_gpu
+  # prep_gpu()
+  tf.test.main()

From 933075e14efaa211f709cdbb57dddd4604a0735d Mon Sep 17 00:00:00 2001
From: Zach Ghera <zpghera00@gmail.com>
Date: Thu, 14 Oct 2021 23:06:21 -0400
Subject: [PATCH 04/11] Add vscode files to gitignore.

---
 .gitignore | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index e400054eaba..a21f8947795 100644
--- a/.gitignore
+++ b/.gitignore
@@ -99,4 +99,6 @@ ENV/
 
 hooks/
 pylint.sh
-pylintrc
\ No newline at end of file
+pylintrc
+
+.vscode/
\ No newline at end of file

From 0fcc9b654f54a28c9d92c49c4b46ad21107f6b36 Mon Sep 17 00:00:00 2001
From: Zach Ghera <zpghera00@gmail.com>
Date: Tue, 19 Oct 2021 14:44:00 -0400
Subject: [PATCH 05/11] Add voxel head serialize unit test and refactoring.

---
 .../mesh_rcnn/modeling/heads/voxel_head.py    | 61 ++++++--------
 .../modeling/heads/voxel_head_test.py         | 81 +++++++++++++------
 2 files changed, 81 insertions(+), 61 deletions(-)

diff --git a/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head.py b/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head.py
index 9266198bd8a..67733d7a3b8 100755
--- a/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head.py
+++ b/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head.py
@@ -11,28 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Mesh R-CNN Heads.
-
-TODO(zghera): Remove questions below once complete.
-
-Currrent Questions
-1. It looks like Pytorch impl uses something called group normalization
-https://github.com/facebookresearch/meshrcnn/blob/main/shapenet/config/config.py#L30
-https://github.com/facebookresearch/detectron2/blob/cbbc1ce26473cb2a5cc8f58e8ada9ae14cb41052/detectron2/layers/batch_norm.py#L141
-   I added a flag in __init__ to use this. But my question is should this layer
-   be placed before or after the ReLU layer since there is no way to directly
-   add this as an option to the Conv2d layer like they do in Pytorch? Based on
-   what I read about BatchNorm, if GroupNorm behaves similarly then it should
-   go before ReLU so that is what I did. But please correct me if I am wrong
-   here.
-2. The PyTorch implementation using a padding of 1 for the initial conv2d
-   layers. But it appears that tensorflow only provides the options same and
-   valid. So if my understanding is correct, if we use a kernel size of 3, then
-   there are cases where 1 padding will not be the same as 'same' padding
-   (e.g. 22 x 22).
-3. Is it okay to not write argument docs for my tests as they are the same
-   as the voxel head arguments?
-"""
+"""Mesh R-CNN Heads."""
 from typing import Optional
 
 import tensorflow as tf  # type: ignore
@@ -53,9 +32,9 @@ def __init__(self,
                num_classes: int,
                kernel_regularizer:
                Optional[tf.keras.regularizers.Regularizer] = None,
-               conv_bias_regularizer:
+               bias_regularizer:
                Optional[tf.keras.regularizers.Regularizer] = None,
-               conv_activ_regularizer:
+               activity_regularizer:
                Optional[tf.keras.regularizers.Regularizer] = None,
                **kwargs):
     """Initializes a Voxel Branch Prediction Head.
@@ -82,8 +61,8 @@ def __init__(self,
         classes for each voxel. This option is used by the Pix3d Mesh R-CNN
         architecture.
       kernel_regularizer: Convolutional layer weight regularizer object.
-      conv_bias_regularizer: Convolutional layer bias regularizer object.
-      conv_activ_regularizer: Convolutional layer activation regularizer object.
+      bias_regularizer: Convolutional layer bias regularizer object.
+      activity_regularizer: Convolutional layer activation regularizer object.
       **kwargs: other keyword arguments to be passed.
     """
     super().__init__(**kwargs)
@@ -92,16 +71,19 @@ def __init__(self,
     self._conv_dims = conv_dims
     self._num_conv = num_conv
     self._use_group_norm = use_group_norm
-    self._predict_classes = predict_classes
-    self._bilinearly_upscale_input = bilinearly_upscale_input
+    self._predict_classes = tf.constant(
+        predict_classes, dtype=tf.bool)
+    self._bilinearly_upscale_input = tf.constant(
+        bilinearly_upscale_input, dtype=tf.bool)
+    self._class_based_voxel = class_based_voxel
     self._num_classes = num_classes if (
         predict_classes and class_based_voxel) else 1
 
     self._base_config = dict(
         activation=None,  # Apply ReLU separately in case we want to use GroupNorm
         kernel_regularizer=kernel_regularizer,
-        bias_regularizer=conv_bias_regularizer,
-        activity_regularizer=conv_activ_regularizer)
+        bias_regularizer=bias_regularizer,
+        activity_regularizer=activity_regularizer)
 
     self._non_predictor_initializers = dict(
         kernel_initializer=tf.keras.initializers.VarianceScaling(
@@ -186,16 +168,23 @@ def call(self, inputs: tf.Tensor) -> tf.Tensor:
                 false_fn=lambda: tf.keras.layers.Lambda(lambda x: x)(x))
     return x
 
-  @property
-  def output_depth(self) -> int:
-    return self._voxel_depth
-
   def get_config(self) -> dict:
+    """Get config dict of the VoxelHead layer."""
+    regularizers = dict(self._base_config)
+    del regularizers['activation']
+
     config = dict(
-        input_channels=self._input_channels,
         voxel_depth=self._voxel_depth,
         conv_dims=self._conv_dims,
         num_conv=self._num_conv,
         use_group_norm=self._use_group_norm,
-        **self._base_config)
+        predict_classes=self._predict_classes,
+        bilinearly_upscale_input=self._bilinearly_upscale_input,
+        class_based_voxel=self._class_based_voxel,
+        num_classes=self._num_classes,
+        **regularizers)
     return config
+
+  @classmethod
+  def from_config(cls, config):
+    return cls(**config)
diff --git a/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head_test.py b/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head_test.py
index 5400347e482..133e8944743 100755
--- a/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head_test.py
+++ b/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head_test.py
@@ -14,53 +14,84 @@
 """Tests for Mesh R-CNN Heads."""
 
 import tensorflow as tf  # type: ignore
-import voxel_head
 from absl.testing import parameterized  # type: ignore
 
+from official.vision.beta.projects.mesh_rcnn.modeling.heads import voxel_head
 
-@parameterized.product(
-    predict_classes=[False, True],
-    class_based_voxel=[False, True],
-    num_classes=[1, 5],
-    voxel_depth=[24, 48],
-    conv_dims=[256],
-    num_conv=[0, 2],
-    use_group_norm=[False, True],
+
+@parameterized.named_parameters(
+  {'testcase_name': 'shapenet',
+  'predict_classes': False, 'class_based_voxel': False, 'num_conv': 2,
+  'voxel_depth': 48, 'batch_size': 32, 'num_input_channels': 2048},
+  {'testcase_name': 'pix3d-class-agnostic',
+  'predict_classes': True, 'class_based_voxel': False, 'num_conv': 1,
+  'voxel_depth': 24, 'batch_size': 1, 'num_input_channels': 256},
+  {'testcase_name': 'pix3d-class-based',
+  'predict_classes': True, 'class_based_voxel': True, 'num_conv': 0,
+  'voxel_depth': 24, 'batch_size': 32, 'num_input_channels': 256},
 )
 class VoxelHeadTest(parameterized.TestCase, tf.test.TestCase):
   """Test for Mesh R-CNN Voxel Prediction Head."""
+  def __init__(self, *args, **kwargs):
+    super().__init__(*args, **kwargs)
+    self._num_classes = 5
+    self._conv_dims = 256
+    self._use_group_norm = False
 
-  def test_network_output(self,
-                          predict_classes: bool,
-                          class_based_voxel: bool,
-                          num_classes: int,
-                          voxel_depth: int,
-                          conv_dims: int,
-                          num_conv: int,
-                          use_group_norm: bool) -> None:
+  def test_network_creation(self,
+                            predict_classes: bool,
+                            class_based_voxel: bool,
+                            num_conv: int,
+                            voxel_depth: int,
+                            batch_size: int,
+                            num_input_channels: int) -> None:
     """Verify the output shapes of the voxel head."""
     # pylint: disable=missing-param-doc
     tf.keras.backend.set_image_data_format('channels_last')
-    head = voxel_head.VoxelHead(voxel_depth, conv_dims, num_conv,
-                                use_group_norm, predict_classes,
+    head = voxel_head.VoxelHead(voxel_depth, self._conv_dims, num_conv,
+                                self._use_group_norm, predict_classes,
                                 not predict_classes, class_based_voxel,
-                                num_classes)
-    batch_size = 32
-    num_input_channels = 256
+                                self._num_classes)
+
     input_shape = [batch_size, voxel_depth // 2, voxel_depth // 2,
                    num_input_channels]
     input_tensor = tf.ones(input_shape, dtype=tf.float32)
     output = head(input_tensor)
 
     if predict_classes:
-      expected_num_classes = num_classes if class_based_voxel else 1
-      expected_shape = [batch_size, expected_num_classes, voxel_depth,
-                        voxel_depth, voxel_depth]
+      expected_num_classes = self._num_classes if class_based_voxel else 1
+      expected_shape = [batch_size, expected_num_classes,
+                        voxel_depth, voxel_depth, voxel_depth]
     else:
       expected_shape = [batch_size, voxel_depth, voxel_depth, voxel_depth]
 
     self.assertAllEqual(output.shape.as_list(), expected_shape)
 
+  def test_serialize_deserialize(self,
+                                 predict_classes: bool,
+                                 class_based_voxel: bool,
+                                 num_conv: int,
+                                 voxel_depth: int,
+                                 batch_size: int,
+                                 num_input_channels: int) -> None:
+    """Create a network object that sets all of its config options."""
+    # pylint: disable=missing-param-doc
+    tf.keras.backend.set_image_data_format('channels_last')
+    head = voxel_head.VoxelHead(voxel_depth, self._conv_dims, num_conv,
+                                self._use_group_norm, predict_classes,
+                                not predict_classes, class_based_voxel,
+                                self._num_classes)
+
+    input_shape = [batch_size, voxel_depth // 2, voxel_depth // 2,
+                   num_input_channels]
+    input_tensor = tf.ones(input_shape, dtype=tf.float32)
+    _ = head(input_tensor)
+
+    serialized = head.get_config()
+    deserialized = voxel_head.VoxelHead.from_config(serialized)
+
+    self.assertAllEqual(head.get_config(), deserialized.get_config())
+
 if __name__ == '__main__':
   # from mesh_rcnn.utils.run_utils import prep_gpu
   # prep_gpu()

From 4fa19ad60513fa0fba41b18689a763d3aa0ac442 Mon Sep 17 00:00:00 2001
From: Zach Ghera <zpghera00@gmail.com>
Date: Tue, 19 Oct 2021 16:36:33 -0400
Subject: [PATCH 06/11] Create helpers for getting input/output shapes.

---
 .../modeling/heads/voxel_head_test.py         | 42 ++++++++++++++-----
 1 file changed, 32 insertions(+), 10 deletions(-)

diff --git a/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head_test.py b/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head_test.py
index 133e8944743..80c3ed5e37c 100755
--- a/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head_test.py
+++ b/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head_test.py
@@ -13,6 +13,8 @@
 # limitations under the License.
 """Tests for Mesh R-CNN Heads."""
 
+from typing import Tuple
+
 import tensorflow as tf  # type: ignore
 from absl.testing import parameterized  # type: ignore
 
@@ -38,6 +40,29 @@ def __init__(self, *args, **kwargs):
     self._conv_dims = 256
     self._use_group_norm = False
 
+  def _get_expected_out_shape(self,
+                            predict_classes: bool,
+                            class_based_voxel: bool,
+                            voxel_depth: int,
+                            batch_size: int) -> Tuple[int, ...]:
+    """Get the output shape of the voxel head."""
+    # pylint: disable=missing-param-doc
+    expected_shape: Tuple[int, ...]
+    if predict_classes:
+      expected_num_classes: int = self._num_classes if class_based_voxel else 1
+      expected_shape = (batch_size, expected_num_classes,
+                        voxel_depth, voxel_depth, voxel_depth)
+    else:
+      expected_shape = (batch_size, voxel_depth, voxel_depth, voxel_depth)
+    return expected_shape
+
+  def _get_input_shape(self,
+                       voxel_depth: int,
+                       batch_size: int,
+                       num_input_channels: int) -> Tuple[int, int, int, int]:
+    """Get the output input shape of the voxel head."""
+    return (batch_size, voxel_depth // 2, voxel_depth // 2, num_input_channels)
+
   def test_network_creation(self,
                             predict_classes: bool,
                             class_based_voxel: bool,
@@ -53,17 +78,14 @@ def test_network_creation(self,
                                 not predict_classes, class_based_voxel,
                                 self._num_classes)
 
-    input_shape = [batch_size, voxel_depth // 2, voxel_depth // 2,
-                   num_input_channels]
+    input_shape = self._get_input_shape(voxel_depth, batch_size,
+                                        num_input_channels)
     input_tensor = tf.ones(input_shape, dtype=tf.float32)
     output = head(input_tensor)
 
-    if predict_classes:
-      expected_num_classes = self._num_classes if class_based_voxel else 1
-      expected_shape = [batch_size, expected_num_classes,
-                        voxel_depth, voxel_depth, voxel_depth]
-    else:
-      expected_shape = [batch_size, voxel_depth, voxel_depth, voxel_depth]
+    expected_shape = self._get_expected_out_shape(predict_classes,
+                                                  class_based_voxel,
+                                                  voxel_depth, batch_size)
 
     self.assertAllEqual(output.shape.as_list(), expected_shape)
 
@@ -82,8 +104,8 @@ def test_serialize_deserialize(self,
                                 not predict_classes, class_based_voxel,
                                 self._num_classes)
 
-    input_shape = [batch_size, voxel_depth // 2, voxel_depth // 2,
-                   num_input_channels]
+    input_shape = self._get_input_shape(voxel_depth, batch_size,
+                                        num_input_channels)
     input_tensor = tf.ones(input_shape, dtype=tf.float32)
     _ = head(input_tensor)
 

From 2bfed840be8a4d7be06a480fca4b4898792d4074 Mon Sep 17 00:00:00 2001
From: Zach Ghera <zpghera00@gmail.com>
Date: Tue, 19 Oct 2021 17:42:28 -0400
Subject: [PATCH 07/11] Add voxel head gradient unit test.

---
 .../modeling/heads/voxel_head_test.py         | 36 +++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head_test.py b/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head_test.py
index 80c3ed5e37c..caf19b05652 100755
--- a/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head_test.py
+++ b/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head_test.py
@@ -114,6 +114,42 @@ def test_serialize_deserialize(self,
 
     self.assertAllEqual(head.get_config(), deserialized.get_config())
 
+  def test_gradient_pass_though(self,
+                            predict_classes: bool,
+                            class_based_voxel: bool,
+                            num_conv: int,
+                            voxel_depth: int,
+                            batch_size: int,
+                            num_input_channels: int) -> None:
+    """Ensure the gradients of the layer are not None."""
+    # pylint: disable=missing-param-doc
+    tf.keras.backend.set_image_data_format('channels_last')
+    head = voxel_head.VoxelHead(voxel_depth, self._conv_dims, num_conv,
+                                self._use_group_norm, predict_classes,
+                                not predict_classes, class_based_voxel,
+                                self._num_classes)
+    loss = tf.keras.losses.MeanSquaredError()
+    optimizer = tf.keras.optimizers.SGD()
+
+    input_shape = self._get_input_shape(voxel_depth, batch_size,
+                                        num_input_channels)
+    output_shape = self._get_expected_out_shape(predict_classes,
+                                                  class_based_voxel,
+                                                  voxel_depth, batch_size)
+    init = tf.random_normal_initializer()
+    x = tf.Variable(initial_value=init(shape=input_shape, dtype=tf.float32))
+    y = tf.Variable(initial_value=init(shape=output_shape, dtype=tf.float32))
+
+    with tf.GradientTape() as tape:
+      x_hat = head(x)
+      grad_loss = loss(x_hat, y)
+    grad = tape.gradient(grad_loss, head.trainable_variables)
+    optimizer.apply_gradients(zip(grad, head.trainable_variables))
+
+    self.assertNotIn(None, grad)
+
+
+
 if __name__ == '__main__':
   # from mesh_rcnn.utils.run_utils import prep_gpu
   # prep_gpu()

From 33a9e32466cdf84325919801670d416662f9e4d8 Mon Sep 17 00:00:00 2001
From: Zach Ghera <zpghera00@gmail.com>
Date: Tue, 19 Oct 2021 17:52:15 -0400
Subject: [PATCH 08/11] Voxel head test minor refactoring.

---
 .../projects/mesh_rcnn/modeling/heads/voxel_head_test.py   | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head_test.py b/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head_test.py
index caf19b05652..a64b0784560 100755
--- a/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head_test.py
+++ b/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head_test.py
@@ -36,6 +36,7 @@ class VoxelHeadTest(parameterized.TestCase, tf.test.TestCase):
   """Test for Mesh R-CNN Voxel Prediction Head."""
   def __init__(self, *args, **kwargs):
     super().__init__(*args, **kwargs)
+    tf.keras.backend.set_image_data_format('channels_last')
     self._num_classes = 5
     self._conv_dims = 256
     self._use_group_norm = False
@@ -72,7 +73,6 @@ def test_network_creation(self,
                             num_input_channels: int) -> None:
     """Verify the output shapes of the voxel head."""
     # pylint: disable=missing-param-doc
-    tf.keras.backend.set_image_data_format('channels_last')
     head = voxel_head.VoxelHead(voxel_depth, self._conv_dims, num_conv,
                                 self._use_group_norm, predict_classes,
                                 not predict_classes, class_based_voxel,
@@ -98,7 +98,6 @@ def test_serialize_deserialize(self,
                                  num_input_channels: int) -> None:
     """Create a network object that sets all of its config options."""
     # pylint: disable=missing-param-doc
-    tf.keras.backend.set_image_data_format('channels_last')
     head = voxel_head.VoxelHead(voxel_depth, self._conv_dims, num_conv,
                                 self._use_group_norm, predict_classes,
                                 not predict_classes, class_based_voxel,
@@ -123,7 +122,6 @@ def test_gradient_pass_though(self,
                             num_input_channels: int) -> None:
     """Ensure the gradients of the layer are not None."""
     # pylint: disable=missing-param-doc
-    tf.keras.backend.set_image_data_format('channels_last')
     head = voxel_head.VoxelHead(voxel_depth, self._conv_dims, num_conv,
                                 self._use_group_norm, predict_classes,
                                 not predict_classes, class_based_voxel,
@@ -149,8 +147,5 @@ def test_gradient_pass_though(self,
     self.assertNotIn(None, grad)
 
 
-
 if __name__ == '__main__':
-  # from mesh_rcnn.utils.run_utils import prep_gpu
-  # prep_gpu()
   tf.test.main()

From 45459fd05e6788107550564b5ee4a6a726dcece9 Mon Sep 17 00:00:00 2001
From: Zach Ghera <zpghera00@gmail.com>
Date: Thu, 28 Oct 2021 16:51:48 -0400
Subject: [PATCH 09/11] Create config and factory modules for voxel head.

---
 .../projects/mesh_rcnn/configs/mesh_rcnn.py   | 31 +++++++++++
 .../projects/mesh_rcnn/modeling/factory.py    | 53 +++++++++++++++++++
 2 files changed, 84 insertions(+)
 create mode 100644 official/vision/beta/projects/mesh_rcnn/configs/mesh_rcnn.py
 create mode 100644 official/vision/beta/projects/mesh_rcnn/modeling/factory.py

diff --git a/official/vision/beta/projects/mesh_rcnn/configs/mesh_rcnn.py b/official/vision/beta/projects/mesh_rcnn/configs/mesh_rcnn.py
new file mode 100644
index 00000000000..01a92ed946a
--- /dev/null
+++ b/official/vision/beta/projects/mesh_rcnn/configs/mesh_rcnn.py
@@ -0,0 +1,31 @@
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Mesh R-CNN configuration definition."""
+
+import dataclasses
+
+from official.modeling import hyperparams  # type: ignore
+
+
+@dataclasses.dataclass
+class VoxelHead(hyperparams.Config):
+  """Parameterization for the Mesh R-CNN Voxel Branch Prediction Head."""
+  voxel_depth: int = 28
+  conv_dim: int = 256
+  num_conv: int = 0
+  use_group_norm: bool = False
+  predict_classes: bool = False
+  bilinearly_upscale_input: bool = True
+  class_based_voxel: bool = False
+  num_classes: int = 0
diff --git a/official/vision/beta/projects/mesh_rcnn/modeling/factory.py b/official/vision/beta/projects/mesh_rcnn/modeling/factory.py
new file mode 100644
index 00000000000..1b853ee3dd7
--- /dev/null
+++ b/official/vision/beta/projects/mesh_rcnn/modeling/factory.py
@@ -0,0 +1,53 @@
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Contains factory functions for Mesh R-CNN networks."""
+
+from typing import Optional
+
+import tensorflow as tf  # type: ignore
+
+from official.vision.beta.projects.mesh_rcnn.configs.mesh_rcnn import VoxelHead
+from official.vision.beta.projects.mesh_rcnn.modeling.heads import voxel_head
+
+
+def build_voxel_head(head_config: VoxelHead,
+                      kernel_regularizer:
+                      Optional[tf.keras.regularizers.Regularizer],
+                      bias_regularizer:
+                      Optional[tf.keras.regularizers.Regularizer],
+                      activity_regularizer:
+                      Optional[tf.keras.regularizers.Regularizer]
+                    ) -> voxel_head.VoxelHead:
+  """Builds Voxel Branch Prediction Head.
+  Args:
+    head_config: Dataclass parameterization instance for voxel head.
+    kernel_regularizer: Convolutional layer weight regularizer object.
+    bias_regularizer: Convolutional layer bias regularizer object.
+    activity_regularizer: Convolutional layer activation regularizer object.
+  Returns:
+    Voxel head layer instance.
+  """
+  return voxel_head.VoxelHead(
+    voxel_depth=head_config.voxel_depth,
+    conv_dim=head_config.conv_dim,
+    num_conv=head_config.num_conv,
+    use_group_norm=head_config.use_group_norm,
+    predict_classes=head_config.predict_classes,
+    bilinearly_upscale_input=head_config.bilinearly_upscale_input,
+    class_based_voxel=head_config.class_based_voxel,
+    num_classes=head_config.num_classes,
+    kernel_regularizer=kernel_regularizer,
+    bias_regularizer=bias_regularizer,
+    activity_regularizer=activity_regularizer,
+  )

From d3afbbacc416e451a68fc95d719425959892f5b8 Mon Sep 17 00:00:00 2001
From: Zach Ghera <zpghera00@gmail.com>
Date: Thu, 28 Oct 2021 16:52:53 -0400
Subject: [PATCH 10/11] Add voxel head test for building from config.

---
 .../mesh_rcnn/modeling/heads/voxel_head.py    | 12 ++---
 .../modeling/heads/voxel_head_test.py         | 46 ++++++++++++++-----
 2 files changed, 41 insertions(+), 17 deletions(-)

diff --git a/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head.py b/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head.py
index 67733d7a3b8..fa796d91bfc 100755
--- a/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head.py
+++ b/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head.py
@@ -23,7 +23,7 @@ class VoxelHead(tf.keras.layers.Layer):
 
   def __init__(self,
                voxel_depth: int,
-               conv_dims: int,
+               conv_dim: int,
                num_conv: int,
                use_group_norm: bool,
                predict_classes: bool,
@@ -40,7 +40,7 @@ def __init__(self,
     """Initializes a Voxel Branch Prediction Head.
     Args:
       voxel_depth: The number of depth channels for the predicted voxels.
-      conv_dims: Number of output features for each Conv2D layer in the
+      conv_dim: Number of output features for each Conv2D layer in the
         Voxel head.
       num_conv: Number of Conv2D layers prior to the Conv2DTranspose layer.
       use_group_norm: Whether or not to use GropNormalization in the fully
@@ -68,7 +68,7 @@ def __init__(self,
     super().__init__(**kwargs)
 
     self._voxel_depth = voxel_depth
-    self._conv_dims = conv_dims
+    self._conv_dim = conv_dim
     self._num_conv = num_conv
     self._use_group_norm = use_group_norm
     self._predict_classes = tf.constant(
@@ -92,7 +92,7 @@ def __init__(self,
     )
 
     self._fully_conv2d_config = dict(
-        filters=self._conv_dims,
+        filters=self._conv_dim,
         kernel_size=(3, 3),
         strides=(1, 1),
         padding='same',
@@ -101,7 +101,7 @@ def __init__(self,
         **self._base_config)
 
     self._deconv2d_config = dict(
-        filters=self._conv_dims,
+        filters=self._conv_dim,
         kernel_size=(2, 2),
         strides=(2, 2),
         padding='valid',
@@ -175,7 +175,7 @@ def get_config(self) -> dict:
 
     config = dict(
         voxel_depth=self._voxel_depth,
-        conv_dims=self._conv_dims,
+        conv_dim=self._conv_dim,
         num_conv=self._num_conv,
         use_group_norm=self._use_group_norm,
         predict_classes=self._predict_classes,
diff --git a/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head_test.py b/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head_test.py
index a64b0784560..02275af3465 100755
--- a/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head_test.py
+++ b/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head_test.py
@@ -18,6 +18,9 @@
 import tensorflow as tf  # type: ignore
 from absl.testing import parameterized  # type: ignore
 
+from official.vision.beta.projects.mesh_rcnn.configs import \
+    mesh_rcnn as mesh_rcnn_config
+from official.vision.beta.projects.mesh_rcnn.modeling import factory
 from official.vision.beta.projects.mesh_rcnn.modeling.heads import voxel_head
 
 
@@ -33,12 +36,12 @@
   'voxel_depth': 24, 'batch_size': 32, 'num_input_channels': 256},
 )
 class VoxelHeadTest(parameterized.TestCase, tf.test.TestCase):
-  """Test for Mesh R-CNN Voxel Prediction Head."""
+  """Test for Mesh R-CNN Voxel Branch Prediction Head."""
   def __init__(self, *args, **kwargs):
     super().__init__(*args, **kwargs)
     tf.keras.backend.set_image_data_format('channels_last')
     self._num_classes = 5
-    self._conv_dims = 256
+    self._conv_dim = 256
     self._use_group_norm = False
 
   def _get_expected_out_shape(self,
@@ -73,7 +76,7 @@ def test_network_creation(self,
                             num_input_channels: int) -> None:
     """Verify the output shapes of the voxel head."""
     # pylint: disable=missing-param-doc
-    head = voxel_head.VoxelHead(voxel_depth, self._conv_dims, num_conv,
+    head = voxel_head.VoxelHead(voxel_depth, self._conv_dim, num_conv,
                                 self._use_group_norm, predict_classes,
                                 not predict_classes, class_based_voxel,
                                 self._num_classes)
@@ -98,7 +101,7 @@ def test_serialize_deserialize(self,
                                  num_input_channels: int) -> None:
     """Create a network object that sets all of its config options."""
     # pylint: disable=missing-param-doc
-    head = voxel_head.VoxelHead(voxel_depth, self._conv_dims, num_conv,
+    head = voxel_head.VoxelHead(voxel_depth, self._conv_dim, num_conv,
                                 self._use_group_norm, predict_classes,
                                 not predict_classes, class_based_voxel,
                                 self._num_classes)
@@ -114,15 +117,15 @@ def test_serialize_deserialize(self,
     self.assertAllEqual(head.get_config(), deserialized.get_config())
 
   def test_gradient_pass_though(self,
-                            predict_classes: bool,
-                            class_based_voxel: bool,
-                            num_conv: int,
-                            voxel_depth: int,
-                            batch_size: int,
-                            num_input_channels: int) -> None:
+                                predict_classes: bool,
+                                class_based_voxel: bool,
+                                num_conv: int,
+                                voxel_depth: int,
+                                batch_size: int,
+                                num_input_channels: int) -> None:
     """Ensure the gradients of the layer are not None."""
     # pylint: disable=missing-param-doc
-    head = voxel_head.VoxelHead(voxel_depth, self._conv_dims, num_conv,
+    head = voxel_head.VoxelHead(voxel_depth, self._conv_dim, num_conv,
                                 self._use_group_norm, predict_classes,
                                 not predict_classes, class_based_voxel,
                                 self._num_classes)
@@ -146,6 +149,27 @@ def test_gradient_pass_though(self,
 
     self.assertNotIn(None, grad)
 
+  def test_build_from_config(self,
+                             predict_classes: bool,
+                             class_based_voxel: bool,
+                             num_conv: int,
+                             voxel_depth: int,
+                             batch_size: int,
+                             num_input_channels: int) -> None:
+    """Test head creation from config and factory."""
+    # pylint: disable=missing-param-doc,unused-argument
+    cfg = mesh_rcnn_config.VoxelHead(voxel_depth=voxel_depth,
+              conv_dim=self._conv_dim,
+              num_conv=num_conv,
+              use_group_norm=self._use_group_norm,
+              predict_classes=predict_classes,
+              bilinearly_upscale_input=not predict_classes,
+              class_based_voxel=class_based_voxel,
+              num_classes=self._num_classes)
+    _ = factory.build_voxel_head(cfg,
+                                 kernel_regularizer=None,
+                                 bias_regularizer=None,
+                                 activity_regularizer=None)
 
 if __name__ == '__main__':
   tf.test.main()

From 9656f03b8002dadbc3f25a996411bba27ef5bc4c Mon Sep 17 00:00:00 2001
From: Zach Ghera <zpghera00@gmail.com>
Date: Tue, 9 Nov 2021 14:37:30 -0500
Subject: [PATCH 11/11] Change 'tf.cond' to 'if-else' statements in voxel head.

Since these boolean's will be set at the initialization of the model, it would be cheaper to do this evaluation at graph construction (with 'if-else') time as opposed to at runtime (with 'tf.cond').
---
 .../mesh_rcnn/modeling/heads/voxel_head.py     | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head.py b/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head.py
index fa796d91bfc..4e254b99c2d 100755
--- a/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head.py
+++ b/official/vision/beta/projects/mesh_rcnn/modeling/heads/voxel_head.py
@@ -71,10 +71,8 @@ def __init__(self,
     self._conv_dim = conv_dim
     self._num_conv = num_conv
     self._use_group_norm = use_group_norm
-    self._predict_classes = tf.constant(
-        predict_classes, dtype=tf.bool)
-    self._bilinearly_upscale_input = tf.constant(
-        bilinearly_upscale_input, dtype=tf.bool)
+    self._predict_classes = predict_classes
+    self._bilinearly_upscale_input = bilinearly_upscale_input
     self._class_based_voxel = class_based_voxel
     self._num_classes = num_classes if (
         predict_classes and class_based_voxel) else 1
@@ -156,16 +154,16 @@ def call(self, inputs: tf.Tensor) -> tf.Tensor:
       (N, V, V, V) for ShapeNet model and (N, C, V, V, V) for Pix3D model
       where N = batch size, V = `voxel_depth`, and C = `num_classes`.
     """
-    x = tf.cond(self._bilinearly_upscale_input,
-                true_fn=lambda: self._interpolate(inputs),
-                false_fn=lambda: tf.keras.layers.Lambda(lambda x: x)(inputs))
+    if self._bilinearly_upscale_input:
+      x = self._interpolate(inputs)
+    else:
+      x = inputs
     for layer in self._conv2d_norm_relu_layers:
       x = layer(x)
     x = self._deconv(x)
     x = self._predictor(x)
-    x = tf.cond(self._predict_classes,
-                true_fn=lambda: self._reshape(x),
-                false_fn=lambda: tf.keras.layers.Lambda(lambda x: x)(x))
+    if self._predict_classes:
+      x = self._reshape(x)
     return x
 
   def get_config(self) -> dict: