diff --git a/projects/BEVFusion/README.md b/projects/BEVFusion/README.md index 248cb2515..6586b923c 100644 --- a/projects/BEVFusion/README.md +++ b/projects/BEVFusion/README.md @@ -173,6 +173,35 @@ python projects/BEVFusion/deploy/torch2onnx.py \ ``` +To export a camera-only model, please use the following command: + +```bash +DEPLOY_CFG_MAIN_BODY=configs/deploy/bevfusion_camera_point_bev_tensorrt_dynamic.py.py +DEPLOY_CFG_IMAGE_BACKBONE=configs/deploy/bevfusion_camera_backbone_tensorrt_dynamic.py + +MODEL_CFG=... +CHECKPOINT_PATH=... +WORK_DIR=... + +python projects/BEVFusion/deploy/torch2onnx.py \ + ${DEPLOY_CFG_MAIN_BODY} \ + ${MODEL_CFG} \ + ${CHECKPOINT_PATH} \ + --device cuda:0 \ + --work-dir ${WORK_DIR} \ + --module camera_bev_only_network + + +python projects/BEVFusion/deploy/torch2onnx.py \ + ${DEPLOY_CFG_IMAGE_BACKBONE} \ + ${MODEL_CFG} \ + ${CHECKPOINT_PATH} \ + --device cuda:0 \ + --work-dir ${WORK_DIR} \ + --module image_backbone +``` +Note that this camera-only model takes lidar pointclouds as an input for a depth map, and we will release a model without lidar pointclouds in another release. + This will generate two models in the `WORK_DIR` folder. `end2end.onnx` corresponds to the standard exported model ,whereas `end2end_fixed.onnx` contains a fix for the `TopK` operator (compatibility issues between `mmdeploy` and `TensorRT`). ## TODO diff --git a/projects/BEVFusion/bevfusion/bevfusion.py b/projects/BEVFusion/bevfusion/bevfusion.py index a1589355f..243b3beb5 100644 --- a/projects/BEVFusion/bevfusion/bevfusion.py +++ b/projects/BEVFusion/bevfusion/bevfusion.py @@ -355,7 +355,7 @@ def extract_feat( ) features.append(img_feature) - if points is not None and self.pts_middle_encoder is not None: + if self.pts_middle_encoder is not None: pts_feature = self.extract_pts_feat( batch_inputs_dict.get("voxels", {}).get("voxels", None), batch_inputs_dict.get("voxels", {}).get("coors", None), diff --git a/projects/BEVFusion/configs/deploy/bevfusion_camera_point_bev_tensorrt_dynamic.py b/projects/BEVFusion/configs/deploy/bevfusion_camera_point_bev_tensorrt_dynamic.py new file mode 100644 index 000000000..c9b1a9475 --- /dev/null +++ b/projects/BEVFusion/configs/deploy/bevfusion_camera_point_bev_tensorrt_dynamic.py @@ -0,0 +1,93 @@ +codebase_config = dict(type="mmdet3d", task="VoxelDetection", model_type="end2end") + +custom_imports = dict( + imports=[ + "projects.BEVFusion.deploy", + "projects.BEVFusion.bevfusion", + "projects.SparseConvolution", + ], + allow_failed_imports=False, +) + +depth_bins = 129 +# TODO(KokSeang): Read this parameter from a base config +feature_dims = (48, 96) +# image_dims = (640, 576) + +backend_config = dict( + type="tensorrt", + common_config=dict(max_workspace_size=1 << 32), + model_inputs=[ + dict( + input_shapes=dict( + # TODO(TIERIV): Optimize. Now, using points will increase latency significantly + points=dict(min_shape=[5000, 4], opt_shape=[50000, 4], max_shape=[200000, 4]), + lidar2image=dict(min_shape=[1, 4, 4], opt_shape=[6, 4, 4], max_shape=[6, 4, 4]), + img_aug_matrix=dict(min_shape=[1, 4, 4], opt_shape=[6, 4, 4], max_shape=[6, 4, 4]), + geom_feats=dict( + min_shape=[0 * depth_bins * feature_dims[0] * feature_dims[1], 4], + opt_shape=[6 * depth_bins * feature_dims[0] * feature_dims[1] // 2, 4], + max_shape=[6 * depth_bins * feature_dims[0] * feature_dims[1], 4], + ), + kept=dict( + min_shape=[0 * depth_bins * feature_dims[0] * feature_dims[1]], + opt_shape=[6 * depth_bins * feature_dims[0] * feature_dims[1]], + max_shape=[6 * depth_bins * feature_dims[0] * feature_dims[1]], + ), + ranks=dict( + min_shape=[0 * depth_bins * feature_dims[0] * feature_dims[1]], + opt_shape=[6 * depth_bins * feature_dims[0] * feature_dims[1] // 2], + max_shape=[6 * depth_bins * feature_dims[0] * feature_dims[1]], + ), + indices=dict( + min_shape=[0 * depth_bins * feature_dims[0] * feature_dims[1]], + opt_shape=[6 * depth_bins * feature_dims[0] * feature_dims[1] // 2], + max_shape=[6 * depth_bins * feature_dims[0] * feature_dims[1]], + ), + image_feats=dict( + min_shape=[0, 256, feature_dims[0], feature_dims[1]], + opt_shape=[6, 256, feature_dims[0], feature_dims[1]], + max_shape=[6, 256, feature_dims[0], feature_dims[1]], + ), + ) + ) + ], +) + +onnx_config = dict( + type="onnx", + export_params=True, + keep_initializers_as_inputs=False, + opset_version=17, + save_file="camera_point_bev.onnx", + input_names=["points", "lidar2image", "img_aug_matrix", "geom_feats", "kept", "ranks", "indices", "image_feats"], + output_names=["bbox_pred", "score", "label_pred"], + dynamic_axes={ + "points": { + 0: "num_points", + }, + "lidar2image": { + 0: "num_imgs", + }, + "img_aug_matrix": { + 0: "num_imgs", + }, + "geom_feats": { + 0: "num_kept", + }, + "kept": { + 0: "num_geom_feats", + }, + "ranks": { + 0: "num_kept", + }, + "indices": { + 0: "num_kept", + }, + "image_feats": { + 0: "num_imgs", + }, + }, + input_shape=None, + verbose=True, +) diff --git a/projects/BEVFusion/configs/deploy/bevfusion_main_body_lidar_only_intensity_tensorrt_dynamic.py b/projects/BEVFusion/configs/deploy/bevfusion_main_body_lidar_only_intensity_tensorrt_dynamic.py new file mode 100644 index 000000000..9e089fe05 --- /dev/null +++ b/projects/BEVFusion/configs/deploy/bevfusion_main_body_lidar_only_intensity_tensorrt_dynamic.py @@ -0,0 +1,49 @@ +codebase_config = dict(type="mmdet3d", task="VoxelDetection", model_type="end2end") + +custom_imports = dict( + imports=[ + "projects.BEVFusion.deploy", + "projects.BEVFusion.bevfusion", + "projects.SparseConvolution", + ], + allow_failed_imports=False, +) + +backend_config = dict( + type="tensorrt", + common_config=dict(max_workspace_size=1 << 32), + model_inputs=[ + dict( + input_shapes=dict( + voxels=dict( + min_shape=[1, 10, 5], opt_shape=[64000, 10, 5], max_shape=[256000, 10, 5] + ), # [M, maximum number of points, features] features=5 when using intensity + coors=dict(min_shape=[1, 3], opt_shape=[64000, 3], max_shape=[256000, 3]), + num_points_per_voxel=dict(min_shape=[1], opt_shape=[64000], max_shape=[256000]), + ) + ) + ], +) + +onnx_config = dict( + type="onnx", + export_params=True, + keep_initializers_as_inputs=False, + opset_version=17, + save_file="main_body.onnx", + input_names=["voxels", "coors", "num_points_per_voxel"], + output_names=["bbox_pred", "score", "label_pred"], + dynamic_axes={ + "voxels": { + 0: "voxels_num", + }, + "coors": { + 0: "voxels_num", + }, + "num_points_per_voxel": { + 0: "voxels_num", + }, + }, + input_shape=None, + verbose=True, +) diff --git a/projects/BEVFusion/deploy/__init__.py b/projects/BEVFusion/deploy/__init__.py index 356b5149c..a1662c4b8 100644 --- a/projects/BEVFusion/deploy/__init__.py +++ b/projects/BEVFusion/deploy/__init__.py @@ -1,7 +1,3 @@ -from . import base from .voxel_detection import VoxelDetection -__all__ = [ - "base", - "VoxelDetection", -] +__all__ = ["VoxelDetection"] diff --git a/projects/BEVFusion/deploy/builder.py b/projects/BEVFusion/deploy/builder.py new file mode 100644 index 000000000..56acfcd95 --- /dev/null +++ b/projects/BEVFusion/deploy/builder.py @@ -0,0 +1,233 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import Any + +import torch +from data_classes import BuilderData, ModelData, ModelInputs, SetupConfigs +from mmdeploy.apis import build_task_processor +from mmdeploy.apis.onnx.passes import optimize_onnx +from mmdeploy.core import patch_model +from mmdeploy.utils import ( + IR, + Backend, + get_backend, + get_dynamic_axes, + get_ir_config, +) +from mmdet3d.registry import MODELS +from mmengine.registry import RUNNERS + + +class ExportBuilder: + + def __init__(self, setup_configs: SetupConfigs): + self.setup_configs = setup_configs + + def build(self) -> BuilderData: + """Build configs and a PyTorch model for ONNX export. + + Returns: + BuilderData with ModelData, ir_configs, context_info, and patched_model (Pytorch model). + """ + # Build the model data + model_data = self._build_model_data() + + # Build the backend configs + backend = self._build_backend() + + # Build the optimize configs + optimize = self._build_optimize_configs(backend) + + # Build the IR configs + ir_configs = self._build_ir_configs() + + # Update the deploy config + self._update_deploy_cfg(ir_configs, backend) + + # Build the intermediate representations + ir = self._build_intermediate_representations() + + # Build the context info + context_info = self._build_context_info(ir, ir_configs, backend, optimize) + + # Patch the model + patched_model = self._build_patched_model(model_data, backend, ir) + + return BuilderData( + model_data=model_data, + ir_configs=ir_configs, + context_info=context_info, + patched_model=patched_model, + ) + + def _build_model_data(self) -> ModelData: + """Build the model. + + Args: + setup_config: Setup configuration for the model. + + Returns: + Model data. + """ + data_preprocessor = MODELS.build(self.setup_configs.data_preprocessor_cfg) + + # load a sample + runner = RUNNERS.build(self.setup_configs.model_cfg) + runner.load_or_resume() + data = runner.test_dataloader.dataset[self.setup_configs.sample_idx] + + # create model an inputs + task_processor = build_task_processor( + self.setup_configs.model_cfg, self.setup_configs.deploy_cfg, self.setup_configs.device + ) + + torch_model = task_processor.build_pytorch_model(self.setup_configs.checkpoint_path) + data, model_inputs = task_processor.create_input(data, data_preprocessor=data_preprocessor, model=torch_model) + + if isinstance(model_inputs, list) and len(model_inputs) == 1: + model_inputs = model_inputs[0] + + data_samples = data["data_samples"] + input_metas = {"data_samples": data_samples, "mode": "predict", "data_preprocessor": data_preprocessor} + + ( + voxels, + coors, + num_points_per_voxel, + points, + camera_mask, + imgs, + lidar2img, + cam2image, + camera2lidar, + geom_feats, + kept, + ranks, + indices, + ) = model_inputs + + return ModelData( + model_inputs=ModelInputs( + voxels=voxels, + coors=coors, + num_points_per_voxel=num_points_per_voxel, + points=points, + camera_mask=camera_mask, + imgs=imgs, + lidar2img=lidar2img, + cam2image=cam2image, + camera2lidar=camera2lidar, + geom_feats=geom_feats, + kept=kept, + ranks=ranks, + indices=indices, + ), + torch_model=torch_model, + input_metas=input_metas, + ) + + @staticmethod + def _add_or_update(cfg: dict, key: str, val: Any) -> None: + """ + Update key with the values to cfg. + """ + if key in cfg and isinstance(cfg[key], dict) and isinstance(val, dict): + cfg[key].update(val) + else: + cfg[key] = val + + def _update_deploy_cfg(self, ir_configs: dict, backend: Backend) -> None: + """Update the deploy config. + + Args: + ir_configs: IR configs. + backend_configs: Backend configs. + """ + self._add_or_update(self.setup_configs.deploy_cfg, "ir_config", ir_configs) + self._add_or_update(self.setup_configs.deploy_cfg, "backend_config", dict(type=backend)) + + def _build_patched_model(self, model_data: ModelData, backend: str, ir: IR) -> torch.nn.Module: + """Build the patched model. + + Returns: + Patched model. + """ + patched_model = patch_model(model_data.torch_model, cfg=self.setup_configs.deploy_cfg, backend=backend, ir=ir) + # Set Patched model to eval() for inference status + patched_model.eval() + patched_model.to(self.setup_configs.device) + return patched_model + + def _build_backend(self) -> str: + """Build the backend configs. + + Returns: + Backend configs. + """ + return get_backend(self.setup_configs.deploy_cfg).value + + def _build_optimize_configs(self, backend: str) -> dict: + """Build the optimize configs. + + Returns: + Optimize configs. + """ + optimize = self.setup_configs.onnx_cfg.get("optimize", False) + if backend == Backend.NCNN.value: + """NCNN backend needs a precise blob counts, while using onnx optimizer + will merge duplicate initilizers without reference count.""" + optimize = False + return optimize + + def _build_ir_configs(self) -> dict: + """Build the IR configs. + + Returns: + IR configs. + """ + onnx_cfg = self.setup_configs.onnx_cfg + input_names = onnx_cfg["input_names"] + output_names = onnx_cfg["output_names"] + axis_names = input_names + output_names + dynamic_axes = get_dynamic_axes(self.setup_configs.deploy_cfg, axis_names) + verbose = not onnx_cfg.get("strip_doc_string", True) or onnx_cfg.get("verbose", False) + keep_initializers_as_inputs = onnx_cfg.get("keep_initializers_as_inputs", True) + opset_version = onnx_cfg.get("opset_version", 11) + + ir_configs = dict( + type="onnx", + input_names=input_names, + output_names=output_names, + opset_version=opset_version, + dynamic_axes=dynamic_axes, + verbose=verbose, + keep_initializers_as_inputs=keep_initializers_as_inputs, + ) + return ir_configs + + def _build_intermediate_representations(self) -> IR: + """Build the intermediate representations (IR). + + Returns: + Intermediate representation (IR). + """ + return IR.get(get_ir_config(self.setup_configs.deploy_cfg)["type"]) + + def _build_context_info(self, ir: IR, ir_configs: dict, backend: str, optimize: bool) -> dict: + """Build the context info. + + Returns: + Context info. + """ + if optimize: + onnx_custom_passes = optimize_onnx + else: + onnx_custom_passes = None + + return dict( + deploy_cfg=self.setup_configs.deploy_cfg, + ir=ir, + backend=backend, + opset=ir_configs["opset_version"], + cfg=self.setup_configs.deploy_cfg, + onnx_custom_passes=onnx_custom_passes, + ) diff --git a/projects/BEVFusion/deploy/containers.py b/projects/BEVFusion/deploy/containers.py index ddc0b7937..85bdd1704 100644 --- a/projects/BEVFusion/deploy/containers.py +++ b/projects/BEVFusion/deploy/containers.py @@ -21,6 +21,7 @@ def forward(self, imgs): class TrtBevFusionMainContainer(torch.nn.Module): + def __init__(self, mod, *args, **kwargs) -> None: super().__init__(*args, **kwargs) self.mod = mod @@ -72,7 +73,18 @@ def forward( ) outputs = mod._forward(batch_inputs_dict, using_image_features=True) + bbox_pred, score, label_pred = self.postprocessing(outputs) + return bbox_pred, score, label_pred + + def postprocessing(self, outputs: dict): + """Postprocess the outputs of the model to get the final predictions. + + Args: + outputs (dict): The outputs of the model. + Returns: + dict: The final predictions. + """ # The following code is taken from # projects/BEVFusion/bevfusion/bevfusion_head.py # It is used to simplify the post process in deployment @@ -87,3 +99,39 @@ def forward( ) return bbox_pred, score, outputs["query_labels"][0] + + +class TrtBevFusionCameraOnlyContainer(TrtBevFusionMainContainer): + def __init__(self, mod, *args, **kwargs) -> None: + super().__init__(mod=mod, *args, **kwargs) + + def forward( + self, + lidar2img, + img_aug_matrix, + geom_feats, + kept, + ranks, + indices, + image_feats, + points=None, + ): + mod = self.mod + lidar_aug_matrix = torch.eye(4).unsqueeze(0).to(image_feats.device) + batch_inputs_dict = { + "imgs": image_feats.unsqueeze(0), + "lidar2img": lidar2img.unsqueeze(0), + "cam2img": None, + "cam2lidar": None, + "img_aug_matrix": img_aug_matrix.unsqueeze(0), + "img_aug_matrix_inverse": None, + "lidar_aug_matrix": lidar_aug_matrix, + "lidar_aug_matrix_inverse": lidar_aug_matrix, + "geom_feats": (geom_feats, kept, ranks, indices), + "points": [points] if points is not None else None, + } + + outputs = mod._forward(batch_inputs_dict, using_image_features=True) + bbox_pred, score, label_pred = self.postprocessing(outputs) + + return bbox_pred, score, label_pred diff --git a/projects/BEVFusion/deploy/data_classes.py b/projects/BEVFusion/deploy/data_classes.py new file mode 100644 index 000000000..4e244b441 --- /dev/null +++ b/projects/BEVFusion/deploy/data_classes.py @@ -0,0 +1,66 @@ +# Copyright (c) OpenMMLab. All rights reserved. + +from dataclasses import dataclass + +import torch + + +@dataclass(frozen=True) +class SetupConfigs: + """Setup configurations for the model.""" + + deploy_cfg: dict + model_cfg: dict + checkpoint_path: str + device: str + data_preprocessor_cfg: dict + sample_idx: int + module: str + onnx_cfg: dict + work_dir: str + + +@dataclass(frozen=True) +class ModelInputs: + """Model inputs for the model.""" + + voxels: torch.Tensor + coors: torch.Tensor + num_points_per_voxel: torch.Tensor + points: torch.Tensor + camera_mask: torch.Tensor + imgs: torch.Tensor + lidar2img: torch.Tensor + cam2image: torch.Tensor + camera2lidar: torch.Tensor + geom_feats: torch.Tensor + kept: torch.Tensor + ranks: torch.Tensor + indices: torch.Tensor + + +@dataclass(frozen=True) +class ModelData: + """Model data for the model.""" + + model_inputs: ModelInputs + torch_model: torch.nn.Module + input_metas: dict + + +@dataclass(frozen=True) +class BackendConfigs: + """Backend configurations for the model.""" + + type: str + optimize: bool + + +@dataclass(frozen=True) +class BuilderData: + """Builder data for the model.""" + + model_data: ModelData + ir_configs: dict + context_info: dict + patched_model: torch.nn.Module diff --git a/projects/BEVFusion/deploy/exporter.py b/projects/BEVFusion/deploy/exporter.py new file mode 100644 index 000000000..1cccf22bc --- /dev/null +++ b/projects/BEVFusion/deploy/exporter.py @@ -0,0 +1,273 @@ +# Copyright (c) OpenMMLab. All rights reserved. + +import logging +import os.path as osp +from typing import Optional + +import numpy as np +import onnx +import onnx_graphsurgeon as gs +import torch +from builder import ExportBuilder +from containers import TrtBevFusionCameraOnlyContainer, TrtBevFusionImageBackboneContainer, TrtBevFusionMainContainer +from data_classes import ModelData, SetupConfigs +from mmdeploy.core import RewriterContext +from mmdeploy.utils import ( + get_root_logger, +) + + +class Torch2OnnxExporter: + + def __init__(self, setup_configs: SetupConfigs, log_level: str): + """Initialization of Torch2OnnxExporter.""" + self.setup_configs = setup_configs + log_level = logging.getLevelName(log_level) + self.logger = get_root_logger() + self.logger.setLevel(log_level) + self.output_prefix = osp.join( + self.setup_configs.work_dir, + osp.splitext(osp.basename(self.setup_configs.deploy_cfg.onnx_config.save_file))[0], + ) + self.output_path = self.output_prefix + ".onnx" + self.builder = ExportBuilder(self.setup_configs) + + def export(self) -> None: + """ + Export Pytorch Model to ONNX. + """ + self.logger.info(f"Export PyTorch model to ONNX: {self.output_path}.") + + # Build the model data and configs + builder_data = self.builder.build() + + # Export the model + self._export_model( + model_data=builder_data.model_data, + context_info=builder_data.context_info, + patched_model=builder_data.patched_model, + ir_configs=builder_data.ir_configs, + ) + + # Fix the ONNX graph + self._fix_onnx_graph() + + self.logger.info(f"ONNX exported to {self.output_path}") + + def _export_model( + self, model_data: ModelData, context_info: dict, patched_model: torch.nn.Module, ir_configs: dict + ) -> None: + """ + Export torch model to ONNX. + Args: + model_data (ModelData): Dataclass with data inputs. + context_info (dict): Context when deploying to rewrite some configs. + patched_model (torch.nn.Module): Patched Pytorch model. + ir_configs (dict): Configs for intermediate representations in ONNX. + """ + with RewriterContext(**context_info), torch.no_grad(): + image_feats = None + if "img_backbone" in self.setup_configs.model_cfg.model: + image_feats = self._export_image_backbone(model_data, ir_configs, patched_model) + # If the image backbone feat is None, it's exported to ONNX and exit + if image_feats is None: + return + + # Export the camera bev only network + if self.setup_configs.module == "camera_bev_only": + self._export_camera_bev_only( + model_data=model_data, ir_configs=ir_configs, patched_model=patched_model, image_feats=image_feats + ) + + # Export the main network with camera or lidar-only + elif self.setup_configs.module == "main_body": + self._export_main_body( + model_data=model_data, ir_configs=ir_configs, patched_model=patched_model, image_feats=image_feats + ) + + def _export_image_backbone( + self, model_data: ModelData, ir_configs: dict, patched_model: torch.nn.Module + ) -> Optional[torch.Tensor]: + """Export the image backbone. + + Args: + model_data (ModelData): Dataclass with data inputs. + context_info (dict): Context when deploying to rewrite some configs. + patched_model (torch.nn.Module): Patched Pytorch model. + ir_configs (dict): Configs for intermediate representations in ONNX. + + Returns: + Image feats. + """ + data_preprocessor = model_data.input_metas["data_preprocessor"] + model_inputs_data = model_data.model_inputs + device = self.setup_configs.device + + imgs = model_inputs_data.imgs + images_mean = data_preprocessor.mean.to(device) + images_std = data_preprocessor.std.to(device) + image_backbone_container = TrtBevFusionImageBackboneContainer(patched_model, images_mean, images_std) + model_inputs = (imgs.to(device=device, dtype=torch.uint8),) + + if self.setup_configs.module == "image_backbone": + torch.onnx.export( + image_backbone_container, + model_inputs, + self.output_path, + export_params=True, + input_names=ir_configs["input_names"], + output_names=ir_configs["output_names"], + opset_version=ir_configs["opset_version"], + dynamic_axes=ir_configs["dynamic_axes"], + keep_initializers_as_inputs=ir_configs["keep_initializers_as_inputs"], + verbose=ir_configs["verbose"], + ) + self.logger.info(f"Image backbone exported to {self.output_path}") + return + + image_feats = image_backbone_container(*model_inputs) + self.logger.info(f"Converted Image backbone") + return image_feats + + def _export_camera_bev_only( + self, + model_data: ModelData, + ir_configs: dict, + patched_model: torch.nn.Module, + image_feats: Optional[torch.Tensor], + ) -> None: + """Export the camera bev only network to an ONNX file. + + Args: + model_data (ModelData): Dataclass with data inputs. + context_info (dict): Context when deploying to rewrite some configs. + patched_model (torch.nn.Module): Patched Pytorch model. + ir_configs (dict): Configs for intermediate representations in ONNX. + """ + main_container = TrtBevFusionCameraOnlyContainer(patched_model) + data_samples = model_data.input_metas["data_samples"] + imgs = model_data.model_inputs.imgs + lidar2img = model_data.model_inputs.lidar2img + geom_feats = model_data.model_inputs.geom_feats + kept = model_data.model_inputs.kept + ranks = model_data.model_inputs.ranks + indices = model_data.model_inputs.indices + points = model_data.model_inputs.points + img_aug_matrix = imgs.new_tensor(np.stack(data_samples[0].img_aug_matrix)) + device = self.setup_configs.device + + model_inputs = ( + lidar2img.to(device).float(), + img_aug_matrix.to(device).float(), + geom_feats.to(device).float(), + kept.to(device), + ranks.to(device).long(), + indices.to(device).long(), + image_feats, + ) + + if "points" in ir_configs["input_names"]: + model_inputs += (points.to(device).float(),) + + torch.onnx.export( + main_container, + model_inputs, + self.output_path.replace(".onnx", "_temp_to_be_fixed.onnx"), + export_params=True, + input_names=ir_configs["input_names"], + output_names=ir_configs["output_names"], + opset_version=ir_configs["opset_version"], + dynamic_axes=ir_configs["dynamic_axes"], + keep_initializers_as_inputs=ir_configs["keep_initializers_as_inputs"], + verbose=ir_configs["verbose"], + ) + self.logger.info(f"Camera bev only network exported to {self.output_path}") + + def _export_main_body( + self, + model_data: ModelData, + ir_configs: dict, + patched_model: torch.nn.Module, + image_feats: Optional[torch.Tensor], + ) -> None: + """Export the main body (lidar-only or camera-lidar) to an ONNX file. + + Args: + model_data (ModelData): Dataclass with data inputs. + context_info (dict): Context when deploying to rewrite some configs. + patched_model (torch.nn.Module): Patched Pytorch model. + ir_configs (dict): Configs for intermediate representations in ONNX. + """ + main_container = TrtBevFusionMainContainer(patched_model) + data_samples = model_data.input_metas["data_samples"] + voxels = model_data.model_inputs.voxels + coors = model_data.model_inputs.coors + num_points_per_voxel = model_data.model_inputs.num_points_per_voxel + device = self.setup_configs.device + model_inputs = ( + voxels.to(device), + coors.to(device), + num_points_per_voxel.to(device), + ) + + if image_feats is not None: + imgs = model_data.model_inputs.imgs + points = model_data.model_inputs.points + lidar2img = model_data.model_inputs.lidar2img + img_aug_matrix = imgs.new_tensor(np.stack(data_samples[0].img_aug_matrix)) + geom_feats = model_data.model_inputs.geom_feats + kept = model_data.model_inputs.kept + ranks = model_data.model_inputs.ranks + indices = model_data.model_inputs.indices + model_inputs += ( + points.to(device).float(), + lidar2img.to(device).float(), + img_aug_matrix.to(device).float(), + geom_feats.to(device).float(), + kept.to(device), + ranks.to(device).long(), + indices.to(device).long(), + image_feats, + ) + + torch.onnx.export( + main_container, + model_inputs, + self.output_path.replace(".onnx", "_temp_to_be_fixed.onnx"), + export_params=True, + input_names=ir_configs["input_names"], + output_names=ir_configs["output_names"], + opset_version=ir_configs["opset_version"], + dynamic_axes=ir_configs["dynamic_axes"], + keep_initializers_as_inputs=ir_configs["keep_initializers_as_inputs"], + verbose=ir_configs["verbose"], + ) + if image_feats is None: + model_name = "lidar-only" + else: + model_name = "camera-lidar" + self.logger.info(f"Main body network with {model_name} exported to {self.output_path}") + + def _fix_onnx_graph(self) -> None: + """Fix the ONNX graph with an ONNX file.""" + self.logger.info("Attempting to fix the graph (TopK's K becoming a tensor)") + model = onnx.load(self.output_path.replace(".onnx", "_temp_to_be_fixed.onnx")) + graph = gs.import_onnx(model) + + # Fix TopK + topk_nodes = [node for node in graph.nodes if node.op == "TopK"] + assert len(topk_nodes) == 1 + topk = topk_nodes[0] + k = self.setup_configs.model_cfg.get("num_proposals", None) + if k is None: + raise ValueError(f"num_proposals is not found in the model configs!") + topk.inputs[1] = gs.Constant("K", values=np.array([k], dtype=np.int64)) + topk.outputs[0].shape = [1, k] + topk.outputs[0].dtype = topk.inputs[0].dtype if topk.inputs[0].dtype else np.float32 + topk.outputs[1].shape = [1, k] + topk.outputs[1].dtype = np.int64 + + graph.cleanup().toposort() + onnx.save_model(gs.export_onnx(graph), self.output_path) + + self.logger.info(f"(Fixed) ONNX exported to {self.output_path}") diff --git a/projects/BEVFusion/deploy/torch2onnx.py b/projects/BEVFusion/deploy/torch2onnx.py index 7e13434ea..1a12aa5dc 100644 --- a/projects/BEVFusion/deploy/torch2onnx.py +++ b/projects/BEVFusion/deploy/torch2onnx.py @@ -2,32 +2,14 @@ import argparse import logging import os -import os.path as osp -from copy import deepcopy -from functools import partial -from typing import Any -import numpy as np -import onnx -import torch -from containers import TrtBevFusionImageBackboneContainer, TrtBevFusionMainContainer -from mmdeploy.apis import build_task_processor -from mmdeploy.apis.onnx.passes import optimize_onnx -from mmdeploy.core import RewriterContext, patch_model -from mmdeploy.utils import ( - IR, - Backend, - get_backend, - get_dynamic_axes, - get_ir_config, - get_onnx_config, - get_root_logger, - load_config, -) -from mmdet3d.registry import MODELS -from mmengine.registry import RUNNERS -from mmengine.runner import load_checkpoint +from mmdet3d.utils import register_all_modules + +register_all_modules(init_default_scope=True) + +from exporter import Torch2OnnxExporter from torch.multiprocessing import set_start_method +from utils import setup_configs def parse_args(): @@ -44,7 +26,7 @@ def parse_args(): help="module to export", required=True, default="main_body", - choices=["main_body", "image_backbone"], + choices=["main_body", "image_backbone", "camera_bev_only"], ) args = parser.parse_args() return args @@ -53,204 +35,17 @@ def parse_args(): if __name__ == "__main__": args = parse_args() set_start_method("spawn", force=True) - logger = get_root_logger() - log_level = logging.getLevelName(args.log_level) - logger.setLevel(log_level) - - deploy_cfg_path = args.deploy_cfg - model_cfg_path = args.model_cfg - checkpoint_path = args.checkpoint - device = args.device - work_dir = args.work_dir - - deploy_cfg, model_cfg = load_config(deploy_cfg_path, model_cfg_path) - - model_cfg.randomness = dict(seed=0, diff_rank_seed=False, deterministic=False) - model_cfg.launcher = "none" - - data_preprocessor_cfg = deepcopy(model_cfg.model.data_preprocessor) - - voxelize_cfg = data_preprocessor_cfg.pop("voxelize_cfg") - voxelize_cfg.pop("voxelize_reduce") - data_preprocessor_cfg["voxel_layer"] = voxelize_cfg - data_preprocessor_cfg.voxel = True - - data_preprocessor = MODELS.build(data_preprocessor_cfg) - - # load a sample - runner = RUNNERS.build(model_cfg) - runner.load_or_resume() - - data = runner.test_dataloader.dataset[args.sample_idx] - - # create model an inputs - task_processor = build_task_processor(model_cfg, deploy_cfg, device) - - torch_model = task_processor.build_pytorch_model(checkpoint_path) - data, model_inputs = task_processor.create_input(data, data_preprocessor=data_preprocessor, model=torch_model) - - if isinstance(model_inputs, list) and len(model_inputs) == 1: - model_inputs = model_inputs[0] - data_samples = data["data_samples"] - input_metas = {"data_samples": data_samples, "mode": "predict", "data_preprocessor": data_preprocessor} - - ( - voxels, - coors, - num_points_per_voxel, - points, - camera_mask, - imgs, - lidar2img, - cam2image, - camera2lidar, - geom_feats, - kept, - ranks, - indices, - ) = model_inputs - - # export to onnx - context_info = dict() - context_info["deploy_cfg"] = deploy_cfg - output_prefix = osp.join(work_dir, osp.splitext(osp.basename(deploy_cfg.onnx_config.save_file))[0]) - os.makedirs(work_dir, exist_ok=True) - backend = get_backend(deploy_cfg).value - - onnx_cfg = get_onnx_config(deploy_cfg) - opset_version = onnx_cfg.get("opset_version", 11) - - input_names = onnx_cfg["input_names"] - output_names = onnx_cfg["output_names"] - axis_names = input_names + output_names - dynamic_axes = get_dynamic_axes(deploy_cfg, axis_names) - verbose = not onnx_cfg.get("strip_doc_string", True) or onnx_cfg.get("verbose", False) - keep_initializers_as_inputs = onnx_cfg.get("keep_initializers_as_inputs", True) - optimize = onnx_cfg.get("optimize", False) - if backend == Backend.NCNN.value: - """NCNN backend needs a precise blob counts, while using onnx optimizer - will merge duplicate initilizers without reference count.""" - optimize = False - - output_path = output_prefix + ".onnx" - - logger = get_root_logger() - logger.info(f"Export PyTorch model to ONNX: {output_path}.") - - def _add_or_update(cfg: dict, key: str, val: Any): - if key in cfg and isinstance(cfg[key], dict) and isinstance(val, dict): - cfg[key].update(val) - else: - cfg[key] = val - - ir_config = dict( - type="onnx", - input_names=input_names, - output_names=output_names, - opset_version=opset_version, - dynamic_axes=dynamic_axes, - verbose=verbose, - keep_initializers_as_inputs=keep_initializers_as_inputs, + setup_config = setup_configs( + args.deploy_cfg, + args.model_cfg, + args.checkpoint, + args.device, + args.work_dir, + args.sample_idx, + args.module, ) - _add_or_update(deploy_cfg, "ir_config", ir_config) - ir = IR.get(get_ir_config(deploy_cfg)["type"]) - if isinstance(backend, Backend): - backend = backend.value - backend_config = dict(type=backend) - _add_or_update(deploy_cfg, "backend_config", backend_config) - - context_info["cfg"] = deploy_cfg - context_info["ir"] = ir - if "backend" not in context_info: - context_info["backend"] = backend - if "opset" not in context_info: - context_info["opset"] = opset_version - - # patch model - patched_model = patch_model(torch_model, cfg=deploy_cfg, backend=backend, ir=ir) - patched_model.eval() - patched_model.to(device) - if "onnx_custom_passes" not in context_info: - onnx_custom_passes = optimize_onnx if optimize else None - context_info["onnx_custom_passes"] = onnx_custom_passes - with RewriterContext(**context_info), torch.no_grad(): - image_feats = None - - if "img_backbone" in model_cfg.model: - img_aug_matrix = imgs.new_tensor(np.stack(data_samples[0].img_aug_matrix)) - images_mean = data_preprocessor.mean.to(device) - images_std = data_preprocessor.std.to(device) - image_backbone_container = TrtBevFusionImageBackboneContainer(patched_model, images_mean, images_std) - model_inputs = (imgs.to(device=device, dtype=torch.uint8),) - - if args.module == "image_backbone": - return_value = torch.onnx.export( - image_backbone_container, - model_inputs, - output_path, - export_params=True, - input_names=input_names, - output_names=output_names, - opset_version=opset_version, - dynamic_axes=dynamic_axes, - keep_initializers_as_inputs=keep_initializers_as_inputs, - verbose=verbose, - ) - else: - image_feats = image_backbone_container(*model_inputs) - - if args.module == "main_body": - main_container = TrtBevFusionMainContainer(patched_model) - model_inputs = ( - voxels.to(device), - coors.to(device), - num_points_per_voxel.to(device), - ) - if image_feats is not None: - model_inputs += ( - points.to(device).float(), - lidar2img.to(device).float(), - img_aug_matrix.to(device).float(), - geom_feats.to(device).float(), - kept.to(device), - ranks.to(device).long(), - indices.to(device).long(), - image_feats, - ) - torch.onnx.export( - main_container, - model_inputs, - output_path.replace(".onnx", "_temp_to_be_fixed.onnx"), - export_params=True, - input_names=input_names, - output_names=output_names, - opset_version=opset_version, - dynamic_axes=dynamic_axes, - keep_initializers_as_inputs=keep_initializers_as_inputs, - verbose=verbose, - ) - - logger.info("Attempting to fix the graph (TopK's K becoming a tensor)") - - import onnx_graphsurgeon as gs - - model = onnx.load(output_path.replace(".onnx", "_temp_to_be_fixed.onnx")) - graph = gs.import_onnx(model) - - # Fix TopK - topk_nodes = [node for node in graph.nodes if node.op == "TopK"] - assert len(topk_nodes) == 1 - topk = topk_nodes[0] - k = model_cfg.num_proposals - topk.inputs[1] = gs.Constant("K", values=np.array([k], dtype=np.int64)) - topk.outputs[0].shape = [1, k] - topk.outputs[0].dtype = topk.inputs[0].dtype if topk.inputs[0].dtype else np.float32 - topk.outputs[1].shape = [1, k] - topk.outputs[1].dtype = np.int64 - - graph.cleanup().toposort() - onnx.save_model(gs.export_onnx(graph), output_path) - - logger.info(f"(Fixed) ONNX exported to {output_path}") + # Build the exporter + exporter = Torch2OnnxExporter(setup_config, args.log_level) - logger.info(f"ONNX exported to {output_path}") + # Export the model + exporter.export() diff --git a/projects/BEVFusion/deploy/utils.py b/projects/BEVFusion/deploy/utils.py new file mode 100644 index 000000000..c92d97152 --- /dev/null +++ b/projects/BEVFusion/deploy/utils.py @@ -0,0 +1,78 @@ +# Copyright (c) OpenMMLab. All rights reserved. + +import os +from copy import deepcopy + +from data_classes import SetupConfigs +from mmdeploy.utils import ( + get_onnx_config, + load_config, +) + + +def setup_configs( + deploy_cfg_path: str, + model_cfg_path: str, + checkpoint_path: str, + device: str, + work_dir: str, + sample_idx: int, + module: str, +) -> SetupConfigs: + """ + Setup configuration for the model. + + Args: + deploy_cfg_path: Path to the deploy config file. + model_cfg_path: Path to the model config file. + checkpoint_path: Path to the checkpoint file. + device: Device to use for the model. + work_dir: Directory to save the model. + sample_idx: Index of the sample to use for the model. + module: Module to export. + """ + os.makedirs(work_dir, exist_ok=True) + deploy_cfg, model_cfg = load_config(deploy_cfg_path, model_cfg_path) + model_cfg.randomness = dict(seed=0, diff_rank_seed=False, deterministic=False) + model_cfg.launcher = "none" + + onnx_cfg = get_onnx_config(deploy_cfg) + input_names = onnx_cfg["input_names"] + + extract_pts_inputs = True if "points" in input_names or "voxels" in input_names else False + data_preprocessor_cfg = deepcopy(model_cfg.model.data_preprocessor) + + # TODO(KokSeang): Move out from data_preprocessor + voxelize_cfg = deepcopy(model_cfg.get("voxelize_cfg", None)) + + if extract_pts_inputs and voxelize_cfg is None: + # TODO(KokSeang): Remove this + # Default voxelize_layer + voxelize_cfg = dict( + max_num_points=10, + voxel_size=[0.17, 0.17, 0.2], + point_cloud_range=[-122.4, -122.4, -3.0, 122.4, 122.4, 5.0], + max_voxels=[120000, 160000], + deterministic=True, + ) + + if voxelize_cfg is not None: + voxelize_cfg.pop("voxelize_reduce", None) + data_preprocessor_cfg["voxel_layer"] = voxelize_cfg + data_preprocessor_cfg.voxel = True + + # load a sample + if "work_dir" not in model_cfg: + model_cfg["work_dir"] = work_dir + + return SetupConfigs( + deploy_cfg=deploy_cfg, + model_cfg=model_cfg, + checkpoint_path=checkpoint_path, + device=device, + data_preprocessor_cfg=data_preprocessor_cfg, + sample_idx=sample_idx, + module=module, + onnx_cfg=onnx_cfg, + work_dir=work_dir, + ) diff --git a/projects/BEVFusion/deploy/voxel_detection.py b/projects/BEVFusion/deploy/voxel_detection.py index edb850af1..3f2df11d7 100644 --- a/projects/BEVFusion/deploy/voxel_detection.py +++ b/projects/BEVFusion/deploy/voxel_detection.py @@ -26,44 +26,14 @@ class VoxelDetection(_VoxelDetection): def __init__(self, model_cfg: mmengine.Config, deploy_cfg: mmengine.Config, device: str): super().__init__(model_cfg, deploy_cfg, device) - def create_input( - self, - batch: Union[str, Sequence[str]], - data_preprocessor: Optional[BaseDataPreprocessor] = None, - model: Optional[torch.nn.Module] = None, - ) -> Tuple[Dict, torch.Tensor]: + def extract_pts_inputs(self, collate_data): + """ """ - data = [batch] - collate_data = pseudo_collate(data) - data[0]["inputs"]["points"] = data[0]["inputs"]["points"].to(self.device) - - """ cam2img = data[0]["data_samples"].cam2img - cam2lidar = data[0]["data_samples"].cam2lidar - lidar2image = data[0]["data_samples"].lidar2img - lidar2camera = data[0]["data_samples"].lidar2cam - img_aux_matrix = data[0]["data_samples"].img_aug_matrix - - import pickle - d = {} - d["cam2img"] = cam2img - d["cam2lidar"] = cam2lidar - d["lidar2image"] = lidar2image - d["lidar2camera"] = lidar2camera - d["img_aux_matrix"] = img_aux_matrix - d["points"] = data[0]['inputs']['points'].cpu().numpy() - - with open("example.pkl", "wb") as f: - pickle.dump(d, f) """ - - assert data_preprocessor is not None - collate_data = data_preprocessor(collate_data, False) points = collate_data["inputs"]["points"][0] voxels = collate_data["inputs"]["voxels"] - inputs = [voxels["voxels"], voxels["num_points"], voxels["coors"]] feats = voxels["voxels"] num_points_per_voxel = voxels["num_points"] - # NOTE(knzo25): preprocessing in BEVFusion and the # data_preprocessor work different. # The original code/model uses [batch, x, y, z] @@ -74,17 +44,18 @@ def create_input( coors = voxels["coors"] coors = coors[:, 1:] - if "img_backbone" not in self.model_cfg.model: - return collate_data, [feats, coors, num_points_per_voxel] + [None] * 10 + return feats, coors, num_points_per_voxel, points + def extract_img_inputs(self, batch, collate_data, model): + """ """ # NOTE(knzo25): we want to load images from the camera # directly to the model in TensorRT img = batch["inputs"]["img"].type(torch.uint8) data_samples = collate_data["data_samples"][0] - lidar2image = feats.new_tensor(data_samples.lidar2img) - cam2image = feats.new_tensor(data_samples.cam2img) - camera2lidar = feats.new_tensor(data_samples.cam2lidar) + lidar2image = torch.tensor(data_samples.lidar2img).type(torch.float32) + cam2image = lidar2image.new_tensor(data_samples.cam2img) + camera2lidar = lidar2image.new_tensor(data_samples.cam2lidar) # NOTE(knzo25): ONNX/TensorRT do not support matrix inversion, # so they are taken out of the graph @@ -93,7 +64,7 @@ def create_input( # The extrinsics-related variables should only be computed once, # so we bring them outside the graph. Additionally, they require # argsort over the threshold available in TensorRT - img_aux_matrix = feats.new_tensor(np.stack(collate_data["data_samples"][0].img_aug_matrix)) + img_aux_matrix = lidar2image.new_tensor(np.stack(collate_data["data_samples"][0].img_aug_matrix)) img_aux_matrix_inverse = torch.inverse(img_aux_matrix) geom = model.view_transform.get_geometry( camera2lidar[..., :3, :3].unsqueeze(0).to(torch.device("cuda")), @@ -104,25 +75,78 @@ def create_input( ) geom_feats, kept, ranks, indices = model.view_transform.bev_pool_aux(geom) + camera_mask = torch.ones((img.size(0)), device=img.device) + return ( + camera_mask, + img, + lidar2image, + # NOTE(knzo25): not used during export + # but needed to comply with the signature + cam2image, + # NOTE(knzo25): not used during export + # but needed to comply with the signature + camera2lidar, + geom_feats.int(), + kept.bool(), # TensorRT treats bool as uint8 + ranks, + indices, + ) - # TODO(knzo25): just a test. remove - """ import pickle - data = {} - data["geom"] = geom.cpu() - data["geom_feats"] = geom_feats.cpu() - data["kept"] = kept.cpu() - data["ranks"] = ranks.cpu() - data["indices"] = indices.cpu() + def create_input( + self, + batch: Union[str, Sequence[str]], + data_preprocessor: Optional[BaseDataPreprocessor] = None, + model: Optional[torch.nn.Module] = None, + extract_pts_inputs: bool = True, + ) -> Tuple[Dict, torch.Tensor]: - with open("precomputed_features.pkl", "wb") as f: - pickle.dump(data, f) """ + data = [batch] + collate_data = pseudo_collate(data) + + """ cam2img = data[0]["data_samples"].cam2img + cam2lidar = data[0]["data_samples"].cam2lidar + lidar2image = data[0]["data_samples"].lidar2img + lidar2camera = data[0]["data_samples"].lidar2cam + img_aux_matrix = data[0]["data_samples"].img_aug_matrix + + import pickle + d = {} + d["cam2img"] = cam2img + d["cam2lidar"] = cam2lidar + d["lidar2image"] = lidar2image + d["lidar2camera"] = lidar2camera + d["img_aux_matrix"] = img_aux_matrix + d["points"] = data[0]['inputs']['points'].cpu().numpy() + + with open("example.pkl", "wb") as f: + pickle.dump(d, f) """ + + assert data_preprocessor is not None + collate_data = data_preprocessor(collate_data, False) + + if extract_pts_inputs: + data[0]["inputs"]["points"] = data[0]["inputs"]["points"].to(self.device) + feats, coors, num_points_per_voxel, points = self.extract_pts_inputs(collate_data=collate_data) + else: + feats = None + coors = None + num_points_per_voxel = None + points = None + + if "img_backbone" not in self.model_cfg.model: + assert feats is not None, f"lidar feats shouldn't be None!" + return collate_data, [feats, coors, num_points_per_voxel] + [None] * 10 + + camera_mask, img, lidar2image, cam2image, camera2lidar, geom_feats, kept, ranks, indices = ( + self.extract_img_inputs(batch=batch, model=model, collate_data=collate_data) + ) inputs = [ feats, coors, num_points_per_voxel, points, - torch.ones((img.size(0)), device=img.device), + camera_mask, img, lidar2image, # NOTE(knzo25): not used during export @@ -131,8 +155,8 @@ def create_input( # NOTE(knzo25): not used during export # but needed to comply with the signature camera2lidar, - geom_feats.int(), - kept.bool(), # TensorRT treats bool as uint8 + geom_feats, + kept, # TensorRT treats bool as uint8 ranks, indices, ]