BinItAI · GeorgePearse · Nov 18, 2025 · Nov 18, 2025 · Nov 18, 2025 · Nov 18, 2025
diff --git a/visdet/cv/cnn/bricks/activation.py b/visdet/cv/cnn/bricks/activation.py
@@ -45,12 +45,15 @@ class Clamp(nn.Module):
             Default to 1.
     """
 
+    min: float
+    max: float
+
     def __init__(self, min: float = -1.0, max: float = 1.0):
         super().__init__()
-        self.min = min
-        self.max = max
+        object.__setattr__(self, "min", min)
+        object.__setattr__(self, "max", max)
 
-    def forward(self, x) -> torch.Tensor:
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
         """Forward function.
 
         Args:

diff --git a/visdet/cv/cnn/bricks/conv_module.py b/visdet/cv/cnn/bricks/conv_module.py
@@ -1,10 +1,15 @@
+from __future__ import annotations
+
 # Copyright (c) OpenMMLab. All rights reserved.
 import warnings
 from functools import partial
+from typing import TYPE_CHECKING, Any, Callable, cast
 
 import torch
 import torch.nn as nn
+from torch import Tensor
 from torch.nn.modules.batchnorm import _BatchNorm
+from torch.nn.modules.conv import _ConvNd
 from torch.nn.modules.instancenorm import _InstanceNorm
 
 from visdet.cv.cnn.bricks.activation import build_activation_layer
@@ -14,8 +19,10 @@
 from visdet.engine.model import constant_init, kaiming_init
 from visdet.engine.registry import MODELS
 
+EfficientConvBnEvalForward = Callable[[_BatchNorm, _ConvNd, Tensor], Tensor]
+
 
-def efficient_conv_bn_eval_forward(bn: _BatchNorm, conv: nn.modules.conv._ConvNd, x: torch.Tensor):
+def efficient_conv_bn_eval_forward(bn: _BatchNorm, conv: _ConvNd, x: Tensor) -> Tensor:
     """
     Implementation based on https://arxiv.org/abs/2305.11624
     "Tune-Mode ConvBN Blocks For Efficient Transfer Learning"
@@ -31,31 +38,37 @@ def efficient_conv_bn_eval_forward(bn: _BatchNorm, conv: nn.modules.conv._ConvNd
     """
     # These lines of code are designed to deal with various cases
     # like bn without affine transform, and conv without bias
+    running_var = bn.running_var
+    running_mean = bn.running_mean
+    if running_var is None or running_mean is None:
+        msg = "BatchNorm running stats must exist when efficient_conv_bn_eval_forward is enabled"
+        raise RuntimeError(msg)
+
     weight_on_the_fly = conv.weight
     if conv.bias is not None:
         bias_on_the_fly = conv.bias
     else:
-        bias_on_the_fly = torch.zeros_like(bn.running_var)
+        bias_on_the_fly = torch.zeros_like(running_var)
 
     if bn.weight is not None:
         bn_weight = bn.weight
     else:
-        bn_weight = torch.ones_like(bn.running_var)
+        bn_weight = torch.ones_like(running_var)
 
     if bn.bias is not None:
         bn_bias = bn.bias
     else:
-        bn_bias = torch.zeros_like(bn.running_var)
+        bn_bias = torch.zeros_like(running_var)
 
     # shape of [C_out, 1, 1, 1] in Conv2d
-    weight_coeff = torch.rsqrt(bn.running_var + bn.eps).reshape([-1] + [1] * (len(conv.weight.shape) - 1))
+    weight_coeff = torch.rsqrt(running_var + bn.eps).reshape([-1] + [1] * (len(conv.weight.shape) - 1))
     # shape of [C_out, 1, 1, 1] in Conv2d
     coefff_on_the_fly = bn_weight.view_as(weight_coeff) * weight_coeff
 
     # shape of [C_out, C_in, k, k] in Conv2d
     weight_on_the_fly = weight_on_the_fly * coefff_on_the_fly
     # shape of [C_out] in Conv2d
-    bias_on_the_fly = bn_bias + coefff_on_the_fly.flatten() * (bias_on_the_fly - bn.running_mean)
+    bias_on_the_fly = bn_bias + coefff_on_the_fly.flatten() * (bias_on_the_fly - running_mean)
 
     return conv._conv_forward(x, weight_on_the_fly, bias_on_the_fly)
 
@@ -117,6 +130,15 @@ class ConvModule(nn.Module):
     """
 
     _abbr_ = "conv_block"
+    conv_cfg: dict[str, Any] | None
+    norm_cfg: dict[str, Any] | None
+    act_cfg: dict[str, Any] | None
+    order: tuple[str, str, str]
+    padding_layer: nn.Module | None
+    activate: nn.Module | None
+    efficient_conv_bn_eval_forward: EfficientConvBnEvalForward | None
+    norm_name: str | None
+    conv: _ConvNd
 
     def __init__(
         self,
@@ -142,22 +164,26 @@ def __init__(
         assert norm_cfg is None or isinstance(norm_cfg, dict)
         assert act_cfg is None or isinstance(act_cfg, dict)
         official_padding_mode = ["zeros", "circular"]
-        self.conv_cfg = conv_cfg
-        self.norm_cfg = norm_cfg
-        self.act_cfg = act_cfg
-        self.inplace = inplace
-        self.with_spectral_norm = with_spectral_norm
-        self.with_explicit_padding = padding_mode not in official_padding_mode
-        self.order = order
+        # Store config dicts as attributes - these are simple data, not tensors
+        object.__setattr__(self, "conv_cfg", conv_cfg)
+        object.__setattr__(self, "norm_cfg", norm_cfg)
+        object.__setattr__(self, "act_cfg", act_cfg)
+        object.__setattr__(self, "inplace", inplace)
+        object.__setattr__(self, "with_spectral_norm", with_spectral_norm)
+        object.__setattr__(self, "with_explicit_padding", padding_mode not in official_padding_mode)
+        object.__setattr__(self, "order", order)
         assert isinstance(self.order, tuple) and len(self.order) == 3
         assert set(order) == {"conv", "norm", "act"}
 
-        self.with_norm = norm_cfg is not None
-        self.with_activation = act_cfg is not None
+        object.__setattr__(self, "with_norm", norm_cfg is not None)
+        object.__setattr__(self, "with_activation", act_cfg is not None)
+        self.padding_layer: nn.Module | None = None
+        self.activate: nn.Module | None = None
+        object.__setattr__(self, "efficient_conv_bn_eval_forward", None)
         # if the conv layer is before a norm layer, bias is unnecessary.
         if bias == "auto":
             bias = not self.with_norm
-        self.with_bias = bias
+        object.__setattr__(self, "with_bias", bias)
 
         if self.with_explicit_padding:
             pad_cfg = dict(type=padding_mode)
@@ -166,7 +192,7 @@ def __init__(
         # reset padding to 0 for conv module
         conv_padding = 0 if self.with_explicit_padding else padding
         # build convolution layer
-        self.conv = build_conv_layer(
+        conv_layer = build_conv_layer(
             conv_cfg,
             in_channels,
             out_channels,
@@ -177,16 +203,17 @@ def __init__(
             groups=groups,
             bias=bias,
         )
+        self.conv = cast(_ConvNd, conv_layer)
         # export the attributes of self.conv to a higher level for convenience
-        self.in_channels = self.conv.in_channels
-        self.out_channels = self.conv.out_channels
-        self.kernel_size = self.conv.kernel_size
-        self.stride = self.conv.stride
-        self.padding = padding
-        self.dilation = self.conv.dilation
-        self.transposed = self.conv.transposed
-        self.output_padding = self.conv.output_padding
-        self.groups = self.conv.groups
+        object.__setattr__(self, "in_channels", self.conv.in_channels)
+        object.__setattr__(self, "out_channels", self.conv.out_channels)
+        object.__setattr__(self, "kernel_size", self.conv.kernel_size)
+        object.__setattr__(self, "stride", self.conv.stride)
+        object.__setattr__(self, "padding", padding)
+        object.__setattr__(self, "dilation", self.conv.dilation)
+        object.__setattr__(self, "transposed", self.conv.transposed)
+        object.__setattr__(self, "output_padding", self.conv.output_padding)
+        object.__setattr__(self, "groups", self.conv.groups)
 
         if self.with_spectral_norm:
             self.conv = nn.utils.spectral_norm(self.conv)
@@ -198,19 +225,22 @@ def __init__(
                 norm_channels = out_channels
             else:
                 norm_channels = in_channels
-            self.norm_name, norm = build_norm_layer(norm_cfg, norm_channels)  # type: ignore
-            self.add_module(self.norm_name, norm)
+            assert norm_cfg is not None
+            norm_name, norm = build_norm_layer(norm_cfg, norm_channels)
+            object.__setattr__(self, "norm_name", norm_name)
+            self.add_module(norm_name, norm)
             if self.with_bias:
                 if isinstance(norm, (_BatchNorm, _InstanceNorm)):
                     warnings.warn("Unnecessary conv bias before batch/instance norm")
         else:
-            self.norm_name = None  # type: ignore
+            object.__setattr__(self, "norm_name", None)
 
         self.turn_on_efficient_conv_bn_eval(efficient_conv_bn_eval)
 
         # build activation layer
         if self.with_activation:
-            act_cfg_ = act_cfg.copy()  # type: ignore
+            assert act_cfg is not None
+            act_cfg_ = cast(dict[str, Any], act_cfg.copy())
             # nn.Tanh has no 'inplace' argument
             if act_cfg_["type"] not in [
                 "Tanh",
@@ -227,7 +257,7 @@ def __init__(
         self.init_weights()
 
     @property
-    def norm(self):
+    def norm(self) -> nn.Module | None:
         if self.norm_name:
             return getattr(self, self.norm_name)
         else:
@@ -244,94 +274,113 @@ def init_weights(self):
         # Note: For PyTorch's conv layers, they will be overwritten by our
         #    initialization implementation using default ``kaiming_init``.
         if not hasattr(self.conv, "init_weights"):
-            if self.with_activation and self.act_cfg["type"] == "LeakyReLU":
+            if self.with_activation and self.act_cfg is not None and self.act_cfg["type"] == "LeakyReLU":
                 nonlinearity = "leaky_relu"
                 a = self.act_cfg.get("negative_slope", 0.01)
             else:
                 nonlinearity = "relu"
                 a = 0
             kaiming_init(self.conv, a=a, nonlinearity=nonlinearity)
         if self.with_norm:
-            constant_init(self.norm, 1, bias=0)
+            norm_layer = self.norm
+            if norm_layer is not None:
+                constant_init(norm_layer, 1, bias=0)
 
     def forward(self, x: torch.Tensor, activate: bool = True, norm: bool = True) -> torch.Tensor:
         layer_index = 0
         while layer_index < len(self.order):
             layer = self.order[layer_index]
             if layer == "conv":
                 if self.with_explicit_padding:
+                    if self.padding_layer is None:
+                        raise RuntimeError("Padding layer is not initialized")
                     x = self.padding_layer(x)
                 # if the next operation is norm and we have a norm layer in
                 # eval mode and we have enabled `efficient_conv_bn_eval` for
                 # the conv operator, then activate the optimized forward and
                 # skip the next norm operator since it has been fused
+                norm_layer = self.norm
                 if (
                     layer_index + 1 < len(self.order)
                     and self.order[layer_index + 1] == "norm"
                     and norm
                     and self.with_norm
-                    and not self.norm.training
+                    and norm_layer is not None
+                    and not norm_layer.training
                     and self.efficient_conv_bn_eval_forward is not None
                 ):
-                    self.conv.forward = partial(self.efficient_conv_bn_eval_forward, self.norm, self.conv)
+                    bn_module = cast(_BatchNorm, norm_layer)
+                    self.conv.forward = partial(self.efficient_conv_bn_eval_forward, bn_module, self.conv)  # type: ignore[method-assign]
                     layer_index += 1
                     x = self.conv(x)
-                    del self.conv.forward
+                    del self.conv.forward  # type: ignore[attr-defined]
                 else:
                     x = self.conv(x)
             elif layer == "norm" and norm and self.with_norm:
-                x = self.norm(x)
+                norm_layer = self.norm
+                if norm_layer is None:
+                    raise RuntimeError("Norm layer not initialized")
+                x = norm_layer(x)
             elif layer == "act" and activate and self.with_activation:
+                if self.activate is None:
+                    raise RuntimeError("Activation layer not initialized")
                 x = self.activate(x)
             layer_index += 1
         return x
 
-    def turn_on_efficient_conv_bn_eval(self, efficient_conv_bn_eval=True):
+    def turn_on_efficient_conv_bn_eval(self, efficient_conv_bn_eval: bool = True) -> None:
         # efficient_conv_bn_eval works for conv + bn
         # with `track_running_stats` option
-        if efficient_conv_bn_eval and self.norm and isinstance(self.norm, _BatchNorm) and self.norm.track_running_stats:
-            self.efficient_conv_bn_eval_forward = efficient_conv_bn_eval_forward
+        norm_layer = self.norm
+        if (
+            efficient_conv_bn_eval
+            and norm_layer is not None
+            and isinstance(norm_layer, _BatchNorm)
+            and norm_layer.track_running_stats
+        ):
+            object.__setattr__(self, "efficient_conv_bn_eval_forward", efficient_conv_bn_eval_forward)
         else:
-            self.efficient_conv_bn_eval_forward = None  # type: ignore
+            object.__setattr__(self, "efficient_conv_bn_eval_forward", None)
 
     @staticmethod
     def create_from_conv_bn(
-        conv: torch.nn.modules.conv._ConvNd,
-        bn: torch.nn.modules.batchnorm._BatchNorm,
-        efficient_conv_bn_eval=True,
+        conv: _ConvNd,
+        bn: _BatchNorm,
+        efficient_conv_bn_eval: bool = True,
     ) -> "ConvModule":
         """Create a ConvModule from a conv and a bn module."""
         self = ConvModule.__new__(ConvModule)
         super(ConvModule, self).__init__()
 
-        self.conv_cfg = None
-        self.norm_cfg = None
-        self.act_cfg = None
-        self.inplace = False
-        self.with_spectral_norm = False
-        self.with_explicit_padding = False
-        self.order = ("conv", "norm", "act")
+        object.__setattr__(self, "conv_cfg", None)
+        object.__setattr__(self, "norm_cfg", None)
+        object.__setattr__(self, "act_cfg", None)
+        object.__setattr__(self, "inplace", False)
+        object.__setattr__(self, "with_spectral_norm", False)
+        object.__setattr__(self, "with_explicit_padding", False)
+        object.__setattr__(self, "order", ("conv", "norm", "act"))
 
-        self.with_norm = True
-        self.with_activation = False
-        self.with_bias = conv.bias is not None
+        object.__setattr__(self, "with_norm", True)
+        object.__setattr__(self, "with_activation", False)
+        object.__setattr__(self, "with_bias", conv.bias is not None)
 
         # build convolution layer
         self.conv = conv
         # export the attributes of self.conv to a higher level for convenience
-        self.in_channels = self.conv.in_channels
-        self.out_channels = self.conv.out_channels
-        self.kernel_size = self.conv.kernel_size
-        self.stride = self.conv.stride
-        self.padding = self.conv.padding
-        self.dilation = self.conv.dilation
-        self.transposed = self.conv.transposed
-        self.output_padding = self.conv.output_padding
-        self.groups = self.conv.groups
+        object.__setattr__(self, "in_channels", self.conv.in_channels)
+        object.__setattr__(self, "out_channels", self.conv.out_channels)
+        object.__setattr__(self, "kernel_size", self.conv.kernel_size)
+        object.__setattr__(self, "stride", self.conv.stride)
+        object.__setattr__(self, "padding", self.conv.padding)
+        object.__setattr__(self, "dilation", self.conv.dilation)
+        object.__setattr__(self, "transposed", self.conv.transposed)
+        object.__setattr__(self, "output_padding", self.conv.output_padding)
+        object.__setattr__(self, "groups", self.conv.groups)
 
         # build normalization layers
-        self.norm_name, norm = "bn", bn
-        self.add_module(self.norm_name, norm)
+        norm_name: str = "bn"
+        object.__setattr__(self, "norm_name", norm_name)
+        self.add_module(norm_name, bn)
 
         self.turn_on_efficient_conv_bn_eval(efficient_conv_bn_eval)
 

diff --git a/visdet/cv/cnn/bricks/drop.py b/visdet/cv/cnn/bricks/drop.py
@@ -37,9 +37,11 @@ class DropPath(nn.Module):
         drop_prob (float): Probability of the path to be zeroed. Default: 0.1
     """
 
+    drop_prob: float
+
     def __init__(self, drop_prob: float = 0.1):
         super().__init__()
-        self.drop_prob = drop_prob
+        object.__setattr__(self, "drop_prob", drop_prob)
 
     def forward(self, x: torch.Tensor) -> torch.Tensor:
         return drop_path(x, self.drop_prob, self.training)
@@ -61,10 +63,14 @@ def __init__(self, drop_prob: float = 0.5, inplace: bool = False):
         super().__init__(p=drop_prob, inplace=inplace)
 
 
-def build_dropout(cfg: dict | float | None, default_args: dict | None = None) -> Any:
+def build_dropout(cfg: dict[str, Any] | float | None, default_args: dict | None = None) -> Any:
     """Builder for drop out layers."""
     if cfg is None:
         return None
     if isinstance(cfg, float):
-        cfg = dict(type="Dropout", drop_prob=cfg)
-    return MODELS.build(cfg, default_args=default_args)
+        cfg_dict: dict[str, Any] = dict(type="Dropout", drop_prob=cfg)
+    else:
+        if not isinstance(cfg, dict):
+            raise TypeError(f"cfg must be dict or float, but got {type(cfg)!r}")
+        cfg_dict = cfg
+    return MODELS.build(cfg_dict, default_args=default_args)