diff --git a/.apollo.bazelrc b/.apollo.bazelrc
old mode 100644
new mode 100755
index 3792c50..2f28c99
--- a/.apollo.bazelrc
+++ b/.apollo.bazelrc
@@ -1,4 +1,24 @@
-build --action_env PYTHON_BIN_PATH="/usr/bin/python3"
-build --action_env PYTHON_LIB_PATH="/usr/lib/python3/dist-packages"
-build --python_path="/usr/bin/python3"
-# build --config=tensorrt
+startup --output_user_root="/apollo/.cache/bazel"
+common --distdir="/apollo/.cache/distdir"
+
+build --action_env PYTHON_BIN_PATH="/usr/bin/python3.6"
+build --action_env PYTHON_LIB_PATH="/usr/local/lib/python3.6/dist-packages"
+build --python_path="/usr/bin/python3.6"
+build:gpu --config=cuda
+build:gpu --config=tensorrt
+
+build --action_env CUDA_TOOLKIT_PATH="/usr/local/cuda-10.2"
+build --action_env TF_CUDA_COMPUTE_CAPABILITIES="3.7,5.2,6.0,6.1,7.0,7.2,7.5"
+build --action_env GCC_HOST_COMPILER_PATH="/usr/bin/x86_64-linux-gnu-gcc-7"
+
+# This config refers to building with CUDA available.
+build:using_cuda --define=using_cuda=true
+build:using_cuda --action_env TF_NEED_CUDA=1
+build:using_cuda --crosstool_top=@local_config_cuda//crosstool:toolchain
+
+# This config refers to building CUDA with nvcc.
+build:cuda --config=using_cuda
+build:cuda --define=using_cuda_nvcc=true
+
+build:tensorrt --action_env TF_NEED_TENSORRT=1
+build:teleop --define WITH_TELEOP=true
diff --git a/.cache/distdir/README.md b/.cache/distdir/README.md
deleted file mode 100644
index 45013f3..0000000
--- a/.cache/distdir/README.md
+++ /dev/null
@@ -1 +0,0 @@
-# Directory used as bazel `--distdir` destination
diff --git a/.github/workflows/ccpp.yml b/.github/workflows/ccpp.yml
index 459ae80..f6c183c 100644
--- a/.github/workflows/ccpp.yml
+++ b/.github/workflows/ccpp.yml
@@ -21,4 +21,4 @@ jobs:
     - name: Pull and Create Container
       run: bash docker/scripts/cyber_start.sh
     - name: Build
-      run: docker exec -u $USER apollo_cyber_$USER bash -c "./apollo.sh build"
+      run: docker exec -u $USER apollo_cyber_$USER bash -c "./apollo.sh build_gpu"
diff --git a/README.md b/README.md
index 45d24b3..d33a0f7 100644
--- a/README.md
+++ b/README.md
@@ -36,7 +36,7 @@ build all modules
 
 
 ```bash
-bash apollo.sh build
+bash apollo.sh build_gpu
 ```
 
 
diff --git a/docker/scripts/cyber_start.sh b/docker/scripts/cyber_start.sh
index 9b41d12..a416b61 100755
--- a/docker/scripts/cyber_start.sh
+++ b/docker/scripts/cyber_start.sh
@@ -20,13 +20,15 @@ source "${APOLLO_ROOT_DIR}/scripts/apollo.bashrc"
 
 # CACHE_ROOT_DIR="${APOLLO_ROOT_DIR}/.cache"
 
-VERSION_X86_64="diamond-auto-x86_64-18.04-20201125_1739"
+VERSION_X86_64="diamond-auto-x86_64-18.04-20210119_1643"
 VERSION_AARCH64="diamond-auto-aarch64-18.04-20200925_1309"
 VERSION_LOCAL_CYBER="local_cyber_dev"
 CYBER_CONTAINER="apollo_cyber_${USER}"
 CYBER_INSIDE="in_cyber_docker"
 
 DOCKER_REPO="hotzoneauto2020/diamond-auto"
+#DOCKER_REPO="sunzengpeng/ubuntu_docker"
+#DOCKER_REPO="apolloauto/apollo"
 DOCKER_RUN_CMD="docker run"
 DOCKER_PULL_CMD="docker pull"
 
diff --git a/modules/control/BUILD b/modules/control/BUILD
index 15092ab..0a7ecf5 100644
--- a/modules/control/BUILD
+++ b/modules/control/BUILD
@@ -30,6 +30,7 @@ cc_library(
         "//modules/drivers/proto:wheelangle_cc_proto",
         "//modules/drivers/proto:parking_cc_proto",
         "//modules/common/adapters:adapter_gflags",
+        "//modules/perception/proto:obst_box_cc_proto",
         "@com_github_gflags_gflags//:gflags",
     ],
 )
diff --git a/modules/drivers/robosense/BUILD b/modules/drivers/robosense/BUILD
index ebe9f64..33f69e8 100755
--- a/modules/drivers/robosense/BUILD
+++ b/modules/drivers/robosense/BUILD
@@ -5,6 +5,7 @@ package(default_visibility = ["//visibility:public"])
 cc_binary(
     name = "librobosense_driver_component.so",
     linkopts = ["-shared"],
+    linkshared = True,
     linkstatic = False,
     deps = [":driver"],
 )
diff --git a/modules/drivers/velodyne/driver/BUILD b/modules/drivers/velodyne/driver/BUILD
index a925855..7c7ca00 100644
--- a/modules/drivers/velodyne/driver/BUILD
+++ b/modules/drivers/velodyne/driver/BUILD
@@ -6,6 +6,7 @@ package(default_visibility = ["//visibility:public"])
 cc_binary(
     name = "libvelodyne_driver_component.so",
     linkopts = ["-shared"],
+    linkshared = True,
     linkstatic = False,
     deps = [":velodyne_driver_component_lib"],
 )
diff --git a/modules/perception/conf/lidar_poindcloud_conf.pb.txt b/modules/perception/conf/lidar_poindcloud_conf.pb.txt
new file mode 100644
index 0000000..51c6d5f
--- /dev/null
+++ b/modules/perception/conf/lidar_poindcloud_conf.pb.txt
@@ -0,0 +1,4 @@
+obst_output_channel: "/diamond/perception/Obstacles"
+save_pcd_file: false
+save_pcd_path_front: "/apollo/modules/perception/data/lidar_front.pcd"
+save_pcd_path_rear: "/apollo/modules/perception/data/lidar_rear.pcd"
\ No newline at end of file
diff --git a/modules/perception/conf/tracker_config.pb.txt b/modules/perception/conf/tracker_config.pb.txt
new file mode 100644
index 0000000..bf98a70
--- /dev/null
+++ b/modules/perception/conf/tracker_config.pb.txt
@@ -0,0 +1,24 @@
+name: "HmObjectTracker"
+version: "1.1.0" 
+matcher_method: HUNGARIAN_MATCHER
+filter_method: KALMAN_FILTER 
+track_cached_history_size_maximum: 5
+track_consecutive_invisible_maximum: 1
+track_visible_ratio_minimum: 0.6
+collect_age_minimum: 0
+collect_consecutive_invisible_maximum: 0
+acceleration_noise_maximum: 5
+speed_noise_maximum: 0.4
+match_distance_maximum: 4.0
+location_distance_weight: 0.6
+direction_distance_weight: 0.2
+bbox_size_distance_weight: 0.1
+point_num_distance_weight: 0.1
+histogram_distance_weight: 0.5
+histogram_bin_size: 10
+use_adaptive: true
+measurement_noise: 0.4
+initial_velocity_noise: 5.0
+xy_propagation_noise: 10.0 
+z_propagation_noise: 10.0
+breakdown_threshold_maximum: 10.0
\ No newline at end of file
diff --git a/modules/perception/dag/lidar_point_pillars.dag b/modules/perception/dag/lidar_point_pillars.dag
new file mode 100755
index 0000000..673ced5
--- /dev/null
+++ b/modules/perception/dag/lidar_point_pillars.dag
@@ -0,0 +1,17 @@
+# Define all coms in DAG streaming.
+module_config {
+    module_library : "/apollo/bazel-bin/modules/perception/lidar_point_pillars/point_pillars_detection.so"
+    components {
+        class_name : "PointPillarsDetection"
+        config {
+            name : "point_detect"
+            config_file_path: "/apollo/modules/perception/conf/lidar_poindcloud_conf.pb.txt"
+            readers {
+                channel: "/diamond/sensor/lidar/front/PointCloud2"
+            }
+            readers {
+                channel: "/diamond/sensor/lidar/rear/PointCloud2"
+            }
+        }
+    }
+}
diff --git a/modules/perception/lidar_pointcloudcluster/lidar_pointcloudcluster.dag b/modules/perception/dag/lidar_pointcloudcluster.dag
similarity index 76%
rename from modules/perception/lidar_pointcloudcluster/lidar_pointcloudcluster.dag
rename to modules/perception/dag/lidar_pointcloudcluster.dag
index adf7ee9..8289558 100644
--- a/modules/perception/lidar_pointcloudcluster/lidar_pointcloudcluster.dag
+++ b/modules/perception/dag/lidar_pointcloudcluster.dag
@@ -2,9 +2,10 @@
 module_config {
     module_library : "/apollo/bazel-bin/modules/perception/lidar_pointcloudcluster/lidar_pointcloudcluster.so"
     components {
-        class_name : "lidar_pointcloudcluster"
+        class_name : "Lidar_pointcloudcluster"
         config {
             name : "lidar_front"
+            config_file_path: "/apollo/modules/perception/conf/lidar_poindcloud_conf.pb.txt"
             readers {
                 channel: "/diamond/sensor/lidar/front/PointCloud2"
             }
diff --git a/modules/perception/data/obst_record.00000 b/modules/perception/data/obst_record.00000
new file mode 100644
index 0000000..ac6d93e
Binary files /dev/null and b/modules/perception/data/obst_record.00000 differ
diff --git a/modules/perception/data/Screenshot from 2020-09-30 10-22-45.png b/modules/perception/data/pointcloudcluster_screenshot1.png
old mode 100644
new mode 100755
similarity index 100%
rename from modules/perception/data/Screenshot from 2020-09-30 10-22-45.png
rename to modules/perception/data/pointcloudcluster_screenshot1.png
diff --git a/modules/perception/data/Screenshot from 2020-09-30 10-22-57.png b/modules/perception/data/pointcloudcluster_screenshot2.png
old mode 100644
new mode 100755
similarity index 100%
rename from modules/perception/data/Screenshot from 2020-09-30 10-22-57.png
rename to modules/perception/data/pointcloudcluster_screenshot2.png
diff --git a/modules/perception/data/Screenshot from 2020-09-30 10-23-18.png b/modules/perception/data/pointcloudcluster_screenshot3.png
old mode 100644
new mode 100755
similarity index 100%
rename from modules/perception/data/Screenshot from 2020-09-30 10-23-18.png
rename to modules/perception/data/pointcloudcluster_screenshot3.png
diff --git a/modules/perception/launch/lidar_point_pillars.launch b/modules/perception/launch/lidar_point_pillars.launch
new file mode 100755
index 0000000..92969ee
--- /dev/null
+++ b/modules/perception/launch/lidar_point_pillars.launch
@@ -0,0 +1,7 @@
+<cyber>
+    <module>
+        <name>common</name>
+        <dag_conf>/apollo/modules/perception/dag/lidar_point_pillars.dag</dag_conf>
+        <process_name>lidar_point_pillars</process_name>
+    </module>
+</cyber>
diff --git a/modules/perception/lidar_pointcloudcluster/lidar_pointcloudcluster.launch b/modules/perception/launch/lidar_pointcloudcluster.launch
similarity index 55%
rename from modules/perception/lidar_pointcloudcluster/lidar_pointcloudcluster.launch
rename to modules/perception/launch/lidar_pointcloudcluster.launch
index af3ee4c..6731192 100644
--- a/modules/perception/lidar_pointcloudcluster/lidar_pointcloudcluster.launch
+++ b/modules/perception/launch/lidar_pointcloudcluster.launch
@@ -1,7 +1,7 @@
 <cyber>
     <module>
         <name>common</name>
-        <dag_conf>/apollo/modules/perception/lidar_pointcloudcluster/lidar_pointcloudcluster.dag</dag_conf>
+        <dag_conf>/apollo/modules/perception/dag/lidar_pointcloudcluster.dag</dag_conf>
         <process_name>lidar_pointcloudcluster</process_name>
     </module>
 </cyber>
diff --git a/modules/perception/lidar_point_pillars/BUILD b/modules/perception/lidar_point_pillars/BUILD
new file mode 100755
index 0000000..280ab22
--- /dev/null
+++ b/modules/perception/lidar_point_pillars/BUILD
@@ -0,0 +1,139 @@
+load("@rules_cc//cc:defs.bzl", "cc_library", "cc_binary")
+load("@local_config_cuda//cuda:build_defs.bzl", "cuda_library")
+load("//tools:cpplint.bzl", "cpplint")
+
+package(default_visibility = ["//visibility:public"])
+
+cc_binary(
+    name = "point_pillars_detection.so",
+    linkopts = ["-shared"],
+    linkshared = True,
+    linkstatic = False,
+    deps = [":point_pillars_detection"],
+)
+
+cc_library(
+    name = "point_pillars_detection",
+    srcs = [
+        "point_pillars_detection.cc",
+    ],
+    hdrs = [
+        "point_pillars_detection.h",
+    ],
+    deps = [
+        ":point_pillars",
+        "//cyber",
+        "//modules/drivers/proto:pointcloud_cc_proto",
+        "//modules/perception/proto:obst_box_cc_proto",
+        "//modules/perception/proto:lidar_pointcloud_conf_cc_proto",
+        "//modules/perception/lidar_pointcloud_tracking:hm_tracker",
+        "//modules/perception/lidar_pointcloud_tracking/common:object",
+        "//modules/perception/lidar_pointcloud_tracking:min_box",
+        "//modules/common/time",
+        "@eigen",
+        "@pcl",
+        "@local_config_vtk//:vtk",
+    ],
+    alwayslink = True,
+)
+
+cc_library(
+    name = "params",
+    hdrs = ["params.h"],
+)
+
+cc_library(
+    name = "point_pillars",
+    srcs = [
+        "point_pillars.cc",
+    ],
+    hdrs = [
+        "point_pillars.h",
+    ],
+    deps = [
+        ":anchor_mask_cuda",
+        ":common",
+        ":nms_cuda",
+        ":params",
+        ":postprocess_cuda",
+        ":preprocess_points",
+        ":scatter_cuda",
+        "//cyber/common",
+        "//third_party:libtorch",
+        "@local_config_cuda//cuda:cudart",
+        "@local_config_tensorrt//:tensorrt",
+    ],
+    alwayslink = True,
+)
+
+cc_library(
+    name = "preprocess_points",
+    srcs = [
+        "preprocess_points.cc",
+    ],
+    hdrs = [
+        "preprocess_points.h",
+    ],
+    deps = [
+        ":common",
+        ":preprocess_points_cuda",
+    ],
+)
+
+cc_library(
+    name = "common",
+    hdrs = ["common.h"],
+)
+
+cuda_library(
+    name = "anchor_mask_cuda",
+    srcs = ["anchor_mask_cuda.cu"],
+    hdrs = ["anchor_mask_cuda.h"],
+    deps = [
+        ":common",
+        "@local_config_cuda//cuda:cudart",
+    ],
+)
+
+cuda_library(
+    name = "nms_cuda",
+    srcs = ["nms_cuda.cu"],
+    hdrs = ["nms_cuda.h"],
+    deps = [
+        ":common",
+        "@local_config_cuda//cuda:cudart",
+    ],
+)
+
+cuda_library(
+    name = "postprocess_cuda",
+    srcs = ["postprocess_cuda.cu"],
+    hdrs = ["postprocess_cuda.h"],
+    deps = [
+        ":common",
+        ":nms_cuda",
+        "@local_config_cuda//cuda:cudart",
+    ],
+)
+
+cuda_library(
+    name = "preprocess_points_cuda",
+    srcs = ["preprocess_points_cuda.cu"],
+    hdrs = ["preprocess_points_cuda.h"],
+    deps = [
+        ":common",
+        "@local_config_cuda//cuda:cudart",
+    ],
+)
+
+cuda_library(
+    name = "scatter_cuda",
+    srcs = ["scatter_cuda.cu"],
+    hdrs = ["scatter_cuda.h"],
+    deps = [
+        ":common",
+        "@local_config_cuda//cuda:cudart",
+    ],
+)
+
+cpplint()
diff --git a/modules/perception/lidar_point_pillars/anchor_mask_cuda.cu b/modules/perception/lidar_point_pillars/anchor_mask_cuda.cu
new file mode 100755
index 0000000..116e60e
--- /dev/null
+++ b/modules/perception/lidar_point_pillars/anchor_mask_cuda.cu
@@ -0,0 +1,184 @@
+#include <algorithm>
+
+// headers in local files
+#include "modules/perception/lidar_point_pillars/anchor_mask_cuda.h"
+#include "modules/perception/lidar_point_pillars/common.h"
+
+namespace apollo {
+namespace perception {
+namespace lidar {
+
+// modified prefix sum code from
+__global__ void scan_x(int* g_odata, int* g_idata, int n) {
+  extern __shared__ int temp[];  // allocated on invocation
+  int thid = threadIdx.x;
+  int bid = blockIdx.x;
+  int bdim = blockDim.x;
+  int offset = 1;
+  temp[2 * thid] =
+      g_idata[bid * bdim * 2 + 2 * thid];  // load input into shared memory
+  temp[2 * thid + 1] = g_idata[bid * bdim * 2 + 2 * thid + 1];
+  for (int d = n >> 1; d > 0; d >>= 1) {  // build sum in place up the tree
+    __syncthreads();
+    if (thid < d) {
+      int ai = offset * (2 * thid + 1) - 1;
+      int bi = offset * (2 * thid + 2) - 1;
+      temp[bi] += temp[ai];
+    }
+    offset *= 2;
+  }
+  if (thid == 0) {
+    temp[n - 1] = 0;
+  }                                 // clear the last element
+  for (int d = 1; d < n; d *= 2) {  // traverse down tree & build scan
+    offset >>= 1;
+    __syncthreads();
+    if (thid < d) {
+      int ai = offset * (2 * thid + 1) - 1;
+      int bi = offset * (2 * thid + 2) - 1;
+      int t = temp[ai];
+      temp[ai] = temp[bi];
+      temp[bi] += t;
+    }
+  }
+  __syncthreads();
+  g_odata[bid * bdim * 2 + 2 * thid] =
+      temp[2 * thid + 1];  // write results to device memory
+  int second_ind = 2 * thid + 2;
+  if (second_ind == bdim * 2) {
+    g_odata[bid * bdim * 2 + 2 * thid + 1] =
+        temp[2 * thid + 1] + g_idata[bid * bdim * 2 + 2 * thid + 1];
+  } else {
+    g_odata[bid * bdim * 2 + 2 * thid + 1] = temp[2 * thid + 2];
+  }
+}
+
+// modified prefix sum code from
+// https://www.mimuw.edu.pl/~ps209291/kgkp/slides/scan.pdf
+__global__ void scan_y(int* g_odata, int* g_idata, int n) {
+  extern __shared__ int temp[];  // allocated on invocation
+  int thid = threadIdx.x;
+  int bid = blockIdx.x;
+  int bdim = blockDim.x;
+  int gdim = gridDim.x;
+  int offset = 1;
+  temp[2 * thid] =
+      g_idata[bid + 2 * thid * gdim];  // load input into shared memory
+  temp[2 * thid + 1] = g_idata[bid + 2 * thid * gdim + gdim];
+  for (int d = n >> 1; d > 0; d >>= 1) {  // build sum in place up the tree
+    __syncthreads();
+    if (thid < d) {
+      int ai = offset * (2 * thid + 1) - 1;
+      int bi = offset * (2 * thid + 2) - 1;
+      temp[bi] += temp[ai];
+    }
+    offset *= 2;
+  }
+  if (thid == 0) {
+    temp[n - 1] = 0;
+  }                                 // clear the last element
+  for (int d = 1; d < n; d *= 2) {  // traverse down tree & build scan
+    offset >>= 1;
+    __syncthreads();
+    if (thid < d) {
+      int ai = offset * (2 * thid + 1) - 1;
+      int bi = offset * (2 * thid + 2) - 1;
+      int t = temp[ai];
+      temp[ai] = temp[bi];
+      temp[bi] += t;
+    }
+  }
+  __syncthreads();
+  g_odata[bid + 2 * thid * gdim] =
+      temp[2 * thid + 1];  // write results to device memory
+  int second_ind = 2 * thid + 2;
+  if (second_ind == bdim * 2) {
+    g_odata[bid + 2 * thid * gdim + gdim] =
+        temp[2 * thid + 1] + g_idata[bid + 2 * thid * gdim + gdim];
+  } else {
+    g_odata[bid + 2 * thid * gdim + gdim] = temp[2 * thid + 2];
+  }
+}
+
+__global__ void make_anchor_mask_kernel(
+    const float* dev_box_anchors_min_x, const float* dev_box_anchors_min_y,
+    const float* dev_box_anchors_max_x, const float* dev_box_anchors_max_y,
+    int* dev_sparse_pillar_map, int* dev_anchor_mask, const float min_x_range,
+    const float min_y_range, const float pillar_x_size,
+    const float pillar_y_size, const int grid_x_size, const int grid_y_size,
+    const int num_inds_for_scan) {
+  int tid = threadIdx.x + blockIdx.x * blockDim.x;
+  int anchor_coor[NUM_2D_BOX_CORNERS_MACRO] = {0};
+  const int grid_x_size_1 = grid_x_size - 1;  // grid_x_size - 1
+  const int grid_y_size_1 = grid_y_size - 1;  // grid_y_size - 1
+
+  anchor_coor[0] =
+      floor((dev_box_anchors_min_x[tid] - min_x_range) / pillar_x_size);
+  anchor_coor[1] =
+      floor((dev_box_anchors_min_y[tid] - min_y_range) / pillar_y_size);
+  anchor_coor[2] =
+      floor((dev_box_anchors_max_x[tid] - min_x_range) / pillar_x_size);
+  anchor_coor[3] =
+      floor((dev_box_anchors_max_y[tid] - min_y_range) / pillar_y_size);
+  anchor_coor[0] = max(anchor_coor[0], 0);
+  anchor_coor[1] = max(anchor_coor[1], 0);
+  anchor_coor[2] = min(anchor_coor[2], grid_x_size_1);
+  anchor_coor[3] = min(anchor_coor[3], grid_y_size_1);
+
+  int right_top = dev_sparse_pillar_map[anchor_coor[3] * num_inds_for_scan +
+                                        anchor_coor[2]];
+  int left_bottom = dev_sparse_pillar_map[anchor_coor[1] * num_inds_for_scan +
+                                          anchor_coor[0]];
+  int left_top = dev_sparse_pillar_map[anchor_coor[3] * num_inds_for_scan +
+                                       anchor_coor[0]];
+  int right_bottom = dev_sparse_pillar_map[anchor_coor[1] * num_inds_for_scan +
+                                           anchor_coor[2]];
+
+  int area = right_top - left_top - right_bottom + left_bottom;
+  if (area > 1) {
+    dev_anchor_mask[tid] = 1;
+  } else {
+    dev_anchor_mask[tid] = 0;
+  }
+}
+
+AnchorMaskCuda::AnchorMaskCuda(
+    const int num_threads, const int num_inds_for_scan, const int num_anchor,
+    const float min_x_range, const float min_y_range, const float pillar_x_size,
+    const float pillar_y_size, const int grid_x_size, const int grid_y_size)
+    : num_threads_(num_threads),
+      num_inds_for_scan_(num_inds_for_scan),
+      num_anchor_(num_anchor),
+      min_x_range_(min_x_range),
+      min_y_range_(min_y_range),
+      pillar_x_size_(pillar_x_size),
+      pillar_y_size_(pillar_y_size),
+      grid_x_size_(grid_x_size),
+      grid_y_size_(grid_y_size) {}
+
+void AnchorMaskCuda::DoAnchorMaskCuda(
+    int* dev_sparse_pillar_map, int* dev_cumsum_along_x,
+    int* dev_cumsum_along_y, const float* dev_box_anchors_min_x,
+    const float* dev_box_anchors_min_y, const float* dev_box_anchors_max_x,
+    const float* dev_box_anchors_max_y, int* dev_anchor_mask) {
+  scan_x<<<num_inds_for_scan_, num_inds_for_scan_ / 2,
+           num_inds_for_scan_ * sizeof(int)>>>(
+      dev_cumsum_along_x, dev_sparse_pillar_map, num_inds_for_scan_);
+  scan_y<<<num_inds_for_scan_, num_inds_for_scan_ / 2,
+           num_inds_for_scan_ * sizeof(int)>>>(
+      dev_cumsum_along_y, dev_cumsum_along_x, num_inds_for_scan_);
+  GPU_CHECK(cudaMemcpy(dev_sparse_pillar_map, dev_cumsum_along_y,
+                       num_inds_for_scan_ * num_inds_for_scan_ * sizeof(int),
+                       cudaMemcpyDeviceToDevice));
+
+  int num_blocks = DIVUP(num_anchor_, num_threads_);
+  make_anchor_mask_kernel<<<num_blocks, num_threads_>>>(
+      dev_box_anchors_min_x, dev_box_anchors_min_y, dev_box_anchors_max_x,
+      dev_box_anchors_max_y, dev_sparse_pillar_map, dev_anchor_mask,
+      min_x_range_, min_y_range_, pillar_x_size_, pillar_y_size_, grid_x_size_,
+      grid_y_size_, num_inds_for_scan_);
+}
+
+}  // namespace lidar
+}  // namespace perception
+}  // namespace apollo
diff --git a/modules/perception/lidar_point_pillars/anchor_mask_cuda.h b/modules/perception/lidar_point_pillars/anchor_mask_cuda.h
new file mode 100755
index 0000000..b90108f
--- /dev/null
+++ b/modules/perception/lidar_point_pillars/anchor_mask_cuda.h
@@ -0,0 +1,72 @@
+#pragma once
+
+namespace apollo {
+namespace perception {
+namespace lidar {
+
+class AnchorMaskCuda {
+ private:
+  const int num_threads_;
+  const int num_inds_for_scan_;
+  const int num_anchor_;
+  const float min_x_range_;
+  const float min_y_range_;
+  const float pillar_x_size_;
+  const float pillar_y_size_;
+  const int grid_x_size_;
+  const int grid_y_size_;
+
+ public:
+  /**
+   * @brief Constructor
+   * @param[in] num_threads Number of threads per block
+   * @param[in] num_inds_for_scan Number of indexes for scan(cumsum)
+   * @param[in] num_anchor Number of anchors in total
+   * @param[in] min_x_range Minimum x value for point cloud
+   * @param[in] min_y_range Minimum y value for point cloud
+   * @param[in] pillar_x_size Size of x-dimension for a pillar
+   * @param[in] pillar_y_size Size of y-dimension for a pillar
+   * @param[in] grid_x_size Number of pillars in x-coordinate
+   * @param[in] grid_y_size Number of pillars in y-coordinate
+   * @details Captital variables never change after the compile
+   */
+  AnchorMaskCuda(const int num_threads, const int num_inds_for_scan,
+                 const int num_anchor, const float min_x_range,
+                 const float min_y_range, const float pillar_x_size,
+                 const float pillar_y_size, const int grid_x_size,
+                 const int grid_y_size);
+
+  /**
+   * @brief call cuda code for making anchor mask
+   * @param[in] dev_sparse_pillar_map
+   *   Grid map representation for pillar occupancy
+   * @param[in] dev_cumsum_along_x
+   *   Array for storing cumsum-ed dev_sparse_pillar_map values
+   * @param[in] dev_cumsum_along_y
+   *   Array for storing cumsum-ed dev_cumsum_along_y values
+   * @param[in] dev_box_anchors_min_x
+   *   Array for storing min x value for each anchor
+   * @param[in] dev_box_anchors_min_y
+   *   Array for storing min y value for each anchor
+   * @param[in] dev_box_anchors_max_x
+   *   Array for storing max x value for each anchor
+   * @param[in] dev_box_anchors_max_y
+   *   Array for storing max y value for each anchor
+   * @param[in] dev_box_anchors_max_y
+   *   Array for storing max y value for each anchor
+   * @param[out] dev_anchor_mask Anchor mask for filtering the network output
+   * @details dev_* means device memory. Make a mask for filtering pillar
+   * occupancy area
+   */
+  void DoAnchorMaskCuda(int* dev_sparse_pillar_map, int* dev_cumsum_along_x,
+                        int* dev_cumsum_along_y,
+                        const float* dev_box_anchors_min_x,
+                        const float* dev_box_anchors_min_y,
+                        const float* dev_box_anchors_max_x,
+                        const float* dev_box_anchors_max_y,
+                        int* dev_anchor_mask);
+};
+
+}  // namespace lidar
+}  // namespace perception
+}  // namespace apollo
diff --git a/modules/perception/lidar_point_pillars/common.h b/modules/perception/lidar_point_pillars/common.h
new file mode 100755
index 0000000..f452f07
--- /dev/null
+++ b/modules/perception/lidar_point_pillars/common.h
@@ -0,0 +1,33 @@
+#pragma once
+
+// headers in STL
+#include <stdio.h>
+
+// headers in CUDA
+#include <cuda_runtime_api.h>
+
+namespace apollo {
+namespace perception {
+namespace lidar {
+
+// using MACRO to allocate memory inside CUDA kernel
+#define NUM_3D_BOX_CORNERS_MACRO 8
+#define NUM_2D_BOX_CORNERS_MACRO 4
+#define NUM_THREADS_MACRO 64  // need to be changed when num_threads_ is changed
+
+#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
+
+#define GPU_CHECK(ans) \
+  { GPUAssert((ans), __FILE__, __LINE__); }
+inline void GPUAssert(cudaError_t code, const char* file, int line,
+                      bool abort = true) {
+  if (code != cudaSuccess) {
+    fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file,
+            line);
+    if (abort) exit(code);
+  }
+}
+
+}  // namespace lidar
+}  // namespace perception
+}  // namespace apollo
diff --git a/modules/perception/lidar_point_pillars/nms_cuda.cu b/modules/perception/lidar_point_pillars/nms_cuda.cu
new file mode 100755
index 0000000..369f7b8
--- /dev/null
+++ b/modules/perception/lidar_point_pillars/nms_cuda.cu
@@ -0,0 +1,118 @@
+#include <algorithm>
+
+// headers in local files
+#include "modules/perception/lidar_point_pillars/nms_cuda.h"
+
+namespace apollo {
+namespace perception {
+namespace lidar {
+
+__device__ inline float devIoU(float const *const a, float const *const b) {
+  float left = max(a[0], b[0]), right = min(a[2], b[2]);
+  float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
+  float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
+  float interS = width * height;
+  float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
+  float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
+  return interS / (Sa + Sb - interS);
+}
+
+__global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
+                           const float *dev_boxes, uint64_t *dev_mask,
+                           const int num_box_corners) {
+  const int row_start = blockIdx.y;
+  const int col_start = blockIdx.x;
+
+  const int block_threads = blockDim.x;
+
+  const int row_size = min(n_boxes - row_start * block_threads, block_threads);
+  const int col_size = min(n_boxes - col_start * block_threads, block_threads);
+
+  __shared__ float block_boxes[NUM_THREADS_MACRO * NUM_2D_BOX_CORNERS_MACRO];
+  if (threadIdx.x < col_size) {
+    block_boxes[threadIdx.x * num_box_corners + 0] =
+        dev_boxes[(block_threads * col_start + threadIdx.x) * num_box_corners +
+                  0];
+    block_boxes[threadIdx.x * num_box_corners + 1] =
+        dev_boxes[(block_threads * col_start + threadIdx.x) * num_box_corners +
+                  1];
+    block_boxes[threadIdx.x * num_box_corners + 2] =
+        dev_boxes[(block_threads * col_start + threadIdx.x) * num_box_corners +
+                  2];
+    block_boxes[threadIdx.x * num_box_corners + 3] =
+        dev_boxes[(block_threads * col_start + threadIdx.x) * num_box_corners +
+                  3];
+  }
+  __syncthreads();
+
+  if (threadIdx.x < row_size) {
+    const int cur_box_idx = block_threads * row_start + threadIdx.x;
+    const float cur_box[NUM_2D_BOX_CORNERS_MACRO] = {
+        dev_boxes[cur_box_idx * num_box_corners + 0],
+        dev_boxes[cur_box_idx * num_box_corners + 1],
+        dev_boxes[cur_box_idx * num_box_corners + 2],
+        dev_boxes[cur_box_idx * num_box_corners + 3]};
+    uint64_t t = 0;
+    int start = 0;
+    if (row_start == col_start) {
+      start = threadIdx.x + 1;
+    }
+    for (int i = start; i < col_size; ++i) {
+      if (devIoU(cur_box, block_boxes + i * num_box_corners) >
+          nms_overlap_thresh) {
+        t |= 1ULL << i;
+      }
+    }
+    const int col_blocks = DIVUP(n_boxes, block_threads);
+    dev_mask[cur_box_idx * col_blocks + col_start] = t;
+  }
+}
+
+NmsCuda::NmsCuda(const int num_threads, const int num_box_corners,
+                 const float nms_overlap_threshold)
+    : num_threads_(num_threads),
+      num_box_corners_(num_box_corners),
+      nms_overlap_threshold_(nms_overlap_threshold) {}
+
+void NmsCuda::DoNmsCuda(const int host_filter_count,
+                        float *dev_sorted_box_for_nms, int *out_keep_inds,
+                        int *out_num_to_keep) {
+  const int col_blocks = DIVUP(host_filter_count, num_threads_);
+  dim3 blocks(DIVUP(host_filter_count, num_threads_),
+              DIVUP(host_filter_count, num_threads_));
+  dim3 threads(num_threads_);
+
+  uint64_t *dev_mask = NULL;
+  GPU_CHECK(
+      cudaMalloc(&dev_mask, host_filter_count * col_blocks * sizeof(uint64_t)));
+
+  nms_kernel<<<blocks, threads>>>(host_filter_count, nms_overlap_threshold_,
+                                  dev_sorted_box_for_nms, dev_mask,
+                                  num_box_corners_);
+
+  // postprocess for nms output
+  std::vector<uint64_t> host_mask(host_filter_count * col_blocks);
+  GPU_CHECK(cudaMemcpy(&host_mask[0], dev_mask,
+                       sizeof(uint64_t) * host_filter_count * col_blocks,
+                       cudaMemcpyDeviceToHost));
+  std::vector<uint64_t> remv(col_blocks);
+  memset(&remv[0], 0, sizeof(uint64_t) * col_blocks);
+
+  for (int i = 0; i < host_filter_count; ++i) {
+    int nblock = i / num_threads_;
+    int inblock = i % num_threads_;
+
+    if (!(remv[nblock] & (1ULL << inblock))) {
+      out_keep_inds[(*out_num_to_keep)++] = i;
+      uint64_t *p = &host_mask[0] + i * col_blocks;
+      for (int j = nblock; j < col_blocks; ++j) {
+        remv[j] |= p[j];
+      }
+    }
+  }
+  GPU_CHECK(cudaFree(dev_mask));
+}
+
+}  // namespace lidar
+}  // namespace perception
+}  // namespace apollo
diff --git a/modules/perception/lidar_point_pillars/nms_cuda.h b/modules/perception/lidar_point_pillars/nms_cuda.h
new file mode 100755
index 0000000..a228aef
--- /dev/null
+++ b/modules/perception/lidar_point_pillars/nms_cuda.h
@@ -0,0 +1,46 @@
+#pragma once
+
+// heders in STL
+#include <iostream>
+#include <vector>
+
+// headers in local files
+#include "modules/perception/lidar_point_pillars/common.h"
+
+namespace apollo {
+namespace perception {
+namespace lidar {
+
+class NmsCuda {
+ private:
+  const int num_threads_;
+  const int num_box_corners_;
+  const float nms_overlap_threshold_;
+
+ public:
+  /**
+   * @brief Constructor
+   * @param[in] num_threads Number of threads when launching cuda kernel
+   * @param[in] num_box_corners Number of corners for 2D box
+   * @param[in] nms_overlap_threshold IOU threshold for NMS
+   * @details Captital variables never change after the compile, Non-captital
+   * variables could be chaned through rosparam
+   */
+  NmsCuda(const int num_threads, const int num_box_corners,
+          const float nms_overlap_threshold);
+
+  /**
+   * @brief GPU Non-Maximum Suppresion for network output
+   * @param[in] host_filter_count Number of filtered output
+   * @param[in] dev_sorted_box_for_nms Bounding box output sorted by score
+   * @param[out] out_keep_inds Indexes of selected bounding box
+   * @param[out] out_num_to_keep Number of kept bounding boxes
+   * @details NMS in GPU and postprocessing for selecting box in CPU
+   */
+  void DoNmsCuda(const int host_filter_count, float* dev_sorted_box_for_nms,
+                 int* out_keep_inds, int* out_num_to_keep);
+};
+
+}  // namespace lidar
+}  // namespace perception
+}  // namespace apollo
diff --git a/modules/perception/lidar_point_pillars/params.h b/modules/perception/lidar_point_pillars/params.h
new file mode 100755
index 0000000..6b16847
--- /dev/null
+++ b/modules/perception/lidar_point_pillars/params.h
@@ -0,0 +1,82 @@
+#pragma once
+
+#include <vector>
+
+namespace apollo {
+namespace perception {
+namespace lidar {
+
+class Params {
+ public:
+  static constexpr float kPillarXSize = 0.25f;
+  static constexpr float kPillarYSize = 0.25f;
+  static constexpr float kPillarZSize = 8.0f;
+  static constexpr float kMinXRange = -100.0f;
+  static constexpr float kMinYRange = -70.0f;
+  static constexpr float kMinZRange = -5.0f;
+  static constexpr float kMaxXRange = 100.0f;
+  static constexpr float kMaxYRange = 70.0f;
+  static constexpr float kMaxZRange = 3.0f;
+  static constexpr int kNumClass = 9;
+  static constexpr int kMaxNumPillars = 30000;
+  static constexpr int kMaxNumPointsPerPillar = 60;
+  static constexpr int kNumPointFeature = 5;  // x, y, z, i, delta of time
+  static constexpr int kNumAnchor = 200 * 140 * 8 + 220 * 140 * 8;
+  static constexpr int kNumOutputBoxFeature = 7;
+  static constexpr int kBatchSize = 1;
+  static constexpr int kNumIndsForScan = 1024;
+  static constexpr int kNumThreads = 64;
+  static constexpr int kNumBoxCorners = 4;
+
+  static std::vector<int> AnchorStrides() { return std::vector<int>{4, 2}; }
+
+  static std::vector<int> NumAnchorSets() { return std::vector<int>{8, 8}; }
+
+  static std::vector<std::vector<float>> AnchorDxSizes() {
+    return std::vector<std::vector<float>>{
+        std::vector<float>{2.94046906f, 1.95017717f, 2.73050468f, 2.4560939f},
+        std::vector<float>{2.49008838f, 0.60058911f, 0.76279481f, 0.66344886f,
+                           0.39694519f}};
+  }
+
+  static std::vector<std::vector<float>> AnchorDySizes() {
+    return std::vector<std::vector<float>>{
+        std::vector<float>{11.1885991, 4.60718145f, 6.38352896f, 6.73778078f},
+        std::vector<float>{0.48578221f, 1.68452161f, 2.09973778f, 0.7256437f,
+                           0.40359262f}};
+  }
+
+  static std::vector<std::vector<float>> AnchorDzSizes() {
+    return std::vector<std::vector<float>>{
+        std::vector<float>{3.47030982f, 1.72270761f, 3.13312415f, 2.73004906f},
+        std::vector<float>{0.98297065f, 1.27192197f, 1.44403034f, 1.75748069f,
+                           1.06232151f}};
+  }
+
+  static std::vector<std::vector<float>> AnchorZCoors() {
+    return std::vector<std::vector<float>>{
+        std::vector<float>{-0.0715754f, -0.93897414f, -0.08168083f,
+                           -0.37937912f},
+        std::vector<float>{-1.27247968f, -1.03743013f, -0.99194854f,
+                           -0.73911038f, -1.27868911f}};
+  }
+
+  static std::vector<std::vector<int>> NumAnchorRo() {
+    return std::vector<std::vector<int>>{std::vector<int>{2, 2, 2, 2},
+                                         std::vector<int>{2, 2, 2, 1, 1}};
+  }
+
+  static std::vector<std::vector<float>> AnchorRo() {
+    return std::vector<std::vector<float>>{
+        std::vector<float>{0, M_PI / 2, 0, M_PI / 2, 0, M_PI / 2, 0, M_PI / 2},
+        std::vector<float>{0, M_PI / 2, 0, M_PI / 2, 0, M_PI / 2, 0, 0}};
+  }
+
+ private:
+  Params() = default;
+  ~Params() = default;
+};  // class Params
+
+}  // namespace lidar
+}  // namespace perception
+}  // namespace apollo
diff --git a/modules/perception/lidar_point_pillars/point_pillars.cc b/modules/perception/lidar_point_pillars/point_pillars.cc
new file mode 100755
index 0000000..6d75654
--- /dev/null
+++ b/modules/perception/lidar_point_pillars/point_pillars.cc
@@ -0,0 +1,656 @@
+// headers in STL
+#include <chrono>
+#include <iostream>
+
+#include "cyber/common/log.h"
+
+// headers in local files
+#include "modules/perception/lidar_point_pillars/point_pillars.h"
+using namespace std;
+
+namespace apollo {
+namespace perception {
+namespace lidar {
+
+const float PointPillars::kPillarXSize = Params::kPillarXSize;
+const float PointPillars::kPillarYSize = Params::kPillarYSize;
+const float PointPillars::kPillarZSize = Params::kPillarZSize;
+const float PointPillars::kMinXRange = Params::kMinXRange;
+const float PointPillars::kMinYRange = Params::kMinYRange;
+const float PointPillars::kMinZRange = Params::kMinZRange;
+const float PointPillars::kMaxXRange = Params::kMaxXRange;
+const float PointPillars::kMaxYRange = Params::kMaxYRange;
+const float PointPillars::kMaxZRange = Params::kMaxZRange;
+const int PointPillars::kNumClass = Params::kNumClass;
+const int PointPillars::kMaxNumPillars = Params::kMaxNumPillars;
+const int PointPillars::kMaxNumPointsPerPillar = Params::kMaxNumPointsPerPillar;
+const int PointPillars::kNumPointFeature = Params::kNumPointFeature;
+const int PointPillars::kGridXSize =
+    static_cast<int>((kMaxXRange - kMinXRange) / kPillarXSize);
+const int PointPillars::kGridYSize =
+    static_cast<int>((kMaxYRange - kMinYRange) / kPillarYSize);
+const int PointPillars::kGridZSize =
+    static_cast<int>((kMaxZRange - kMinZRange) / kPillarZSize);
+const int PointPillars::kRpnInputSize = 64 * kGridXSize * kGridYSize;
+const int PointPillars::kNumAnchor = Params::kNumAnchor;
+const int PointPillars::kNumOutputBoxFeature = Params::kNumOutputBoxFeature;
+const int PointPillars::kRpnBoxOutputSize = kNumAnchor * kNumOutputBoxFeature;
+const int PointPillars::kRpnClsOutputSize = kNumAnchor * kNumClass;
+const int PointPillars::kRpnDirOutputSize = kNumAnchor * 2;
+const int PointPillars::kBatchSize = Params::kBatchSize;
+const int PointPillars::kNumIndsForScan = Params::kNumIndsForScan;
+const int PointPillars::kNumThreads = Params::kNumThreads;
+// if you change kNumThreads, need to modify NUM_THREADS_MACRO in
+// common.h
+const int PointPillars::kNumBoxCorners = Params::kNumBoxCorners;
+const std::vector<int> PointPillars::kAnchorStrides = Params::AnchorStrides();
+const std::vector<int> PointPillars::kAnchorRanges{
+    0,
+    kGridXSize,
+    0,
+    kGridYSize,
+    static_cast<int>(kGridXSize * 0.25),
+    static_cast<int>(kGridXSize * 0.75),
+    static_cast<int>(kGridYSize * 0.25),
+    static_cast<int>(kGridYSize * 0.75)};
+const std::vector<int> PointPillars::kNumAnchorSets = Params::NumAnchorSets();
+const std::vector<std::vector<float>> PointPillars::kAnchorDxSizes =
+    Params::AnchorDxSizes();
+const std::vector<std::vector<float>> PointPillars::kAnchorDySizes =
+    Params::AnchorDySizes();
+const std::vector<std::vector<float>> PointPillars::kAnchorDzSizes =
+    Params::AnchorDzSizes();
+const std::vector<std::vector<float>> PointPillars::kAnchorZCoors =
+    Params::AnchorZCoors();
+const std::vector<std::vector<int>> PointPillars::kNumAnchorRo =
+    Params::NumAnchorRo();
+const std::vector<std::vector<float>> PointPillars::kAnchorRo =
+    Params::AnchorRo();
+
+PointPillars::PointPillars(const bool reproduce_result_mode,
+                           const float score_threshold,
+                           const float nms_overlap_threshold,
+                           const std::string pfe_torch_file,
+                           const std::string rpn_onnx_file)
+    : reproduce_result_mode_(reproduce_result_mode),
+      score_threshold_(score_threshold),
+      nms_overlap_threshold_(nms_overlap_threshold),
+      pfe_torch_file_(pfe_torch_file),
+      rpn_onnx_file_(rpn_onnx_file) {
+  if (reproduce_result_mode_) {
+    preprocess_points_ptr_.reset(new PreprocessPoints(
+        kMaxNumPillars, kMaxNumPointsPerPillar, kNumPointFeature, kGridXSize,
+        kGridYSize, kGridZSize, kPillarXSize, kPillarYSize, kPillarZSize,
+        kMinXRange, kMinYRange, kMinZRange, kNumIndsForScan));
+  } else {
+    preprocess_points_cuda_ptr_.reset(new PreprocessPointsCuda(
+        kNumThreads, kMaxNumPillars, kMaxNumPointsPerPillar, kNumPointFeature,
+        kNumIndsForScan, kGridXSize, kGridYSize, kGridZSize, kPillarXSize,
+        kPillarYSize, kPillarZSize, kMinXRange, kMinYRange, kMinZRange));
+  }
+
+  anchor_mask_cuda_ptr_.reset(new AnchorMaskCuda(
+      kNumThreads, kNumIndsForScan, kNumAnchor, kMinXRange, kMinYRange,
+      kPillarXSize, kPillarYSize, kGridXSize, kGridYSize));
+
+  scatter_cuda_ptr_.reset(
+      new ScatterCuda(kNumThreads, kMaxNumPillars, kGridXSize, kGridYSize));
+
+  const float float_min = std::numeric_limits<float>::lowest();
+  const float float_max = std::numeric_limits<float>::max();
+  postprocess_cuda_ptr_.reset(
+      new PostprocessCuda(float_min, float_max, kNumAnchor, kNumClass,
+                          score_threshold_, kNumThreads, nms_overlap_threshold_,
+                          kNumBoxCorners, kNumOutputBoxFeature));
+
+  DeviceMemoryMalloc();
+  InitTorch();
+  InitTRT();
+  InitAnchors();
+}
+
+PointPillars::~PointPillars() {
+  delete[] anchors_px_;
+  delete[] anchors_py_;
+  delete[] anchors_pz_;
+  delete[] anchors_dx_;
+  delete[] anchors_dy_;
+  delete[] anchors_dz_;
+  delete[] anchors_ro_;
+  delete[] box_anchors_min_x_;
+  delete[] box_anchors_min_y_;
+  delete[] box_anchors_max_x_;
+  delete[] box_anchors_max_y_;
+
+  GPU_CHECK(cudaFree(dev_x_coors_));
+  GPU_CHECK(cudaFree(dev_y_coors_));
+  GPU_CHECK(cudaFree(dev_num_points_per_pillar_));
+  GPU_CHECK(cudaFree(dev_sparse_pillar_map_));
+  GPU_CHECK(cudaFree(dev_pillar_point_feature_));
+  GPU_CHECK(cudaFree(dev_pillar_coors_));
+
+  GPU_CHECK(cudaFree(dev_cumsum_along_x_));
+  GPU_CHECK(cudaFree(dev_cumsum_along_y_));
+
+  GPU_CHECK(cudaFree(dev_box_anchors_min_x_));
+  GPU_CHECK(cudaFree(dev_box_anchors_min_y_));
+  GPU_CHECK(cudaFree(dev_box_anchors_max_x_));
+  GPU_CHECK(cudaFree(dev_box_anchors_max_y_));
+  GPU_CHECK(cudaFree(dev_anchor_mask_));
+
+  GPU_CHECK(cudaFree(pfe_buffers_[0]));
+  GPU_CHECK(cudaFree(pfe_buffers_[1]));
+  GPU_CHECK(cudaFree(pfe_buffers_[2]));
+
+  GPU_CHECK(cudaFree(rpn_buffers_[0]));
+  GPU_CHECK(cudaFree(rpn_buffers_[1]));
+  GPU_CHECK(cudaFree(rpn_buffers_[2]));
+  GPU_CHECK(cudaFree(rpn_buffers_[3]));
+
+  GPU_CHECK(cudaFree(dev_scattered_feature_));
+
+  GPU_CHECK(cudaFree(dev_anchors_px_));
+  GPU_CHECK(cudaFree(dev_anchors_py_));
+  GPU_CHECK(cudaFree(dev_anchors_pz_));
+  GPU_CHECK(cudaFree(dev_anchors_dx_));
+  GPU_CHECK(cudaFree(dev_anchors_dy_));
+  GPU_CHECK(cudaFree(dev_anchors_dz_));
+  GPU_CHECK(cudaFree(dev_anchors_ro_));
+  GPU_CHECK(cudaFree(dev_filtered_box_));
+  GPU_CHECK(cudaFree(dev_filtered_score_));
+  GPU_CHECK(cudaFree(dev_filtered_label_));
+  GPU_CHECK(cudaFree(dev_filtered_dir_));
+  GPU_CHECK(cudaFree(dev_box_for_nms_));
+  GPU_CHECK(cudaFree(dev_filter_count_));
+
+  rpn_context_->destroy();
+  rpn_runtime_->destroy();
+  rpn_engine_->destroy();
+}
+
+void PointPillars::DeviceMemoryMalloc() {
+  GPU_CHECK(cudaMalloc(reinterpret_cast<void**>(&dev_x_coors_),
+                       kMaxNumPillars * sizeof(int)));
+  GPU_CHECK(cudaMalloc(reinterpret_cast<void**>(&dev_y_coors_),
+                       kMaxNumPillars * sizeof(int)));
+  GPU_CHECK(cudaMalloc(reinterpret_cast<void**>(&dev_num_points_per_pillar_),
+                       kMaxNumPillars * sizeof(float)));
+  GPU_CHECK(cudaMalloc(reinterpret_cast<void**>(&dev_sparse_pillar_map_),
+                       kNumIndsForScan * kNumIndsForScan * sizeof(int)));
+
+  GPU_CHECK(cudaMalloc(reinterpret_cast<void**>(&dev_pillar_point_feature_),
+                       kMaxNumPillars * kMaxNumPointsPerPillar *
+                           kNumPointFeature * sizeof(float)));
+  GPU_CHECK(cudaMalloc(reinterpret_cast<void**>(&dev_pillar_coors_),
+                       kMaxNumPillars * 4 * sizeof(float)));
+
+  GPU_CHECK(cudaMalloc(reinterpret_cast<void**>(&dev_cumsum_along_x_),
+                       kNumIndsForScan * kNumIndsForScan * sizeof(int)));
+  GPU_CHECK(cudaMalloc(reinterpret_cast<void**>(&dev_cumsum_along_y_),
+                       kNumIndsForScan * kNumIndsForScan * sizeof(int)));
+
+  // for make anchor mask kernel
+  GPU_CHECK(cudaMalloc(reinterpret_cast<void**>(&dev_box_anchors_min_x_),
+                       kNumAnchor * sizeof(float)));
+  GPU_CHECK(cudaMalloc(reinterpret_cast<void**>(&dev_box_anchors_min_y_),
+                       kNumAnchor * sizeof(float)));
+  GPU_CHECK(cudaMalloc(reinterpret_cast<void**>(&dev_box_anchors_max_x_),
+                       kNumAnchor * sizeof(float)));
+  GPU_CHECK(cudaMalloc(reinterpret_cast<void**>(&dev_box_anchors_max_y_),
+                       kNumAnchor * sizeof(float)));
+  GPU_CHECK(cudaMalloc(reinterpret_cast<void**>(&dev_anchor_mask_),
+                       kNumAnchor * sizeof(int)));
+
+  // for trt inference
+  // create GPU buffers and a stream
+  GPU_CHECK(
+      cudaMalloc(&pfe_buffers_[0], kMaxNumPillars * kMaxNumPointsPerPillar *
+                                       kNumPointFeature * sizeof(float)));
+  GPU_CHECK(cudaMalloc(&pfe_buffers_[1], kMaxNumPillars * sizeof(float)));
+  GPU_CHECK(cudaMalloc(&pfe_buffers_[2], kMaxNumPillars * 4 * sizeof(float)));
+
+  GPU_CHECK(cudaMalloc(&rpn_buffers_[0], kRpnInputSize * sizeof(float)));
+  GPU_CHECK(cudaMalloc(&rpn_buffers_[1], kRpnBoxOutputSize * sizeof(float)));
+  GPU_CHECK(cudaMalloc(&rpn_buffers_[2], kRpnClsOutputSize * sizeof(float)));
+  GPU_CHECK(cudaMalloc(&rpn_buffers_[3], kRpnDirOutputSize * sizeof(float)));
+
+  // for scatter kernel
+  GPU_CHECK(cudaMalloc(reinterpret_cast<void**>(&dev_scattered_feature_),
+                       kNumThreads * kGridYSize * kGridXSize * sizeof(float)));
+
+  // for filter
+  GPU_CHECK(cudaMalloc(reinterpret_cast<void**>(&dev_anchors_px_),
+                       kNumAnchor * sizeof(float)));
+  GPU_CHECK(cudaMalloc(reinterpret_cast<void**>(&dev_anchors_py_),
+                       kNumAnchor * sizeof(float)));
+  GPU_CHECK(cudaMalloc(reinterpret_cast<void**>(&dev_anchors_pz_),
+                       kNumAnchor * sizeof(float)));
+  GPU_CHECK(cudaMalloc(reinterpret_cast<void**>(&dev_anchors_dx_),
+                       kNumAnchor * sizeof(float)));
+  GPU_CHECK(cudaMalloc(reinterpret_cast<void**>(&dev_anchors_dy_),
+                       kNumAnchor * sizeof(float)));
+  GPU_CHECK(cudaMalloc(reinterpret_cast<void**>(&dev_anchors_dz_),
+                       kNumAnchor * sizeof(float)));
+  GPU_CHECK(cudaMalloc(reinterpret_cast<void**>(&dev_anchors_ro_),
+                       kNumAnchor * sizeof(float)));
+  GPU_CHECK(cudaMalloc(reinterpret_cast<void**>(&dev_filtered_box_),
+                       kNumAnchor * kNumOutputBoxFeature * sizeof(float)));
+  GPU_CHECK(cudaMalloc(reinterpret_cast<void**>(&dev_filtered_score_),
+                       kNumAnchor * sizeof(float)));
+  GPU_CHECK(cudaMalloc(reinterpret_cast<void**>(&dev_filtered_label_),
+                       kNumAnchor * sizeof(int)));
+  GPU_CHECK(cudaMalloc(reinterpret_cast<void**>(&dev_filtered_dir_),
+                       kNumAnchor * sizeof(int)));
+  GPU_CHECK(cudaMalloc(reinterpret_cast<void**>(&dev_box_for_nms_),
+                       kNumAnchor * kNumBoxCorners * sizeof(float)));
+  GPU_CHECK(
+      cudaMalloc(reinterpret_cast<void**>(&dev_filter_count_), sizeof(int)));
+}
+
+void PointPillars::InitAnchors() {
+  // allocate memory for anchors
+  anchors_px_ = new float[kNumAnchor]();
+  anchors_py_ = new float[kNumAnchor]();
+  anchors_pz_ = new float[kNumAnchor]();
+  anchors_dx_ = new float[kNumAnchor]();
+  anchors_dy_ = new float[kNumAnchor]();
+  anchors_dz_ = new float[kNumAnchor]();
+  anchors_ro_ = new float[kNumAnchor]();
+  box_anchors_min_x_ = new float[kNumAnchor]();
+  box_anchors_min_y_ = new float[kNumAnchor]();
+  box_anchors_max_x_ = new float[kNumAnchor]();
+  box_anchors_max_y_ = new float[kNumAnchor]();
+  // deallocate these memories in destructor
+
+  GenerateAnchors(anchors_px_, anchors_py_, anchors_pz_, anchors_dx_,
+                  anchors_dy_, anchors_dz_, anchors_ro_);
+
+  ConvertAnchors2BoxAnchors(anchors_px_, anchors_py_, box_anchors_min_x_,
+                            box_anchors_min_y_, box_anchors_max_x_,
+                            box_anchors_max_y_);
+
+  PutAnchorsInDeviceMemory();
+}
+
+void PointPillars::GenerateAnchors(float* anchors_px_, float* anchors_py_,
+                                   float* anchors_pz_, float* anchors_dx_,
+                                   float* anchors_dy_, float* anchors_dz_,
+                                   float* anchors_ro_) {
+  for (int i = 0; i < kNumAnchor; ++i) {
+    anchors_px_[i] = 0;
+    anchors_py_[i] = 0;
+    anchors_pz_[i] = 0;
+    anchors_dx_[i] = 0;
+    anchors_dy_[i] = 0;
+    anchors_dz_[i] = 0;
+    anchors_ro_[i] = 0;
+    box_anchors_min_x_[i] = 0;
+    box_anchors_min_y_[i] = 0;
+    box_anchors_max_x_[i] = 0;
+    box_anchors_max_y_[i] = 0;
+  }
+
+  int ind = 0;
+  for (size_t head = 0; head < kNumAnchorSets.size(); ++head) {
+    float x_stride = kPillarXSize * kAnchorStrides[head];
+    float y_stride = kPillarYSize * kAnchorStrides[head];
+    int x_ind_start = kAnchorRanges[head * 4 + 0] / kAnchorStrides[head];
+    int x_ind_end = kAnchorRanges[head * 4 + 1] / kAnchorStrides[head];
+    int y_ind_start = kAnchorRanges[head * 4 + 2] / kAnchorStrides[head];
+    int y_ind_end = kAnchorRanges[head * 4 + 3] / kAnchorStrides[head];
+    // coors of first anchor's center
+    float x_offset = kMinXRange + x_stride / 2.0;
+    float y_offset = kMinYRange + y_stride / 2.0;
+
+    std::vector<float> anchor_x_count, anchor_y_count;
+    for (int i = x_ind_start; i < x_ind_end; ++i) {
+      float anchor_coor_x = static_cast<float>(i) * x_stride + x_offset;
+      anchor_x_count.push_back(anchor_coor_x);
+    }
+    for (int i = y_ind_start; i < y_ind_end; ++i) {
+      float anchor_coor_y = static_cast<float>(i) * y_stride + y_offset;
+      anchor_y_count.push_back(anchor_coor_y);
+    }
+
+    for (int y = 0; y < y_ind_end - y_ind_start; ++y) {
+      for (int x = 0; x < x_ind_end - x_ind_start; ++x) {
+        int ro_count = 0;
+        for (size_t c = 0; c < kNumAnchorRo[head].size(); ++c) {
+          for (int i = 0; i < kNumAnchorRo[head][c]; ++i) {
+            anchors_px_[ind] = anchor_x_count[x];
+            anchors_py_[ind] = anchor_y_count[y];
+            anchors_ro_[ind] = kAnchorRo[head][ro_count];
+            anchors_pz_[ind] = kAnchorZCoors[head][c];
+            anchors_dx_[ind] = kAnchorDxSizes[head][c];
+            anchors_dy_[ind] = kAnchorDySizes[head][c];
+            anchors_dz_[ind] = kAnchorDzSizes[head][c];
+            ro_count++;
+            ind++;
+          }
+        }
+      }
+    }
+  }
+}
+
+void PointPillars::PutAnchorsInDeviceMemory() {
+  GPU_CHECK(cudaMemcpy(dev_box_anchors_min_x_, box_anchors_min_x_,
+                       kNumAnchor * sizeof(float), cudaMemcpyHostToDevice));
+  GPU_CHECK(cudaMemcpy(dev_box_anchors_min_y_, box_anchors_min_y_,
+                       kNumAnchor * sizeof(float), cudaMemcpyHostToDevice));
+  GPU_CHECK(cudaMemcpy(dev_box_anchors_max_x_, box_anchors_max_x_,
+                       kNumAnchor * sizeof(float), cudaMemcpyHostToDevice));
+  GPU_CHECK(cudaMemcpy(dev_box_anchors_max_y_, box_anchors_max_y_,
+                       kNumAnchor * sizeof(float), cudaMemcpyHostToDevice));
+
+  GPU_CHECK(cudaMemcpy(dev_anchors_px_, anchors_px_, kNumAnchor * sizeof(float),
+                       cudaMemcpyHostToDevice));
+  GPU_CHECK(cudaMemcpy(dev_anchors_py_, anchors_py_, kNumAnchor * sizeof(float),
+                       cudaMemcpyHostToDevice));
+  GPU_CHECK(cudaMemcpy(dev_anchors_pz_, anchors_pz_, kNumAnchor * sizeof(float),
+                       cudaMemcpyHostToDevice));
+  GPU_CHECK(cudaMemcpy(dev_anchors_dx_, anchors_dx_, kNumAnchor * sizeof(float),
+                       cudaMemcpyHostToDevice));
+  GPU_CHECK(cudaMemcpy(dev_anchors_dy_, anchors_dy_, kNumAnchor * sizeof(float),
+                       cudaMemcpyHostToDevice));
+  GPU_CHECK(cudaMemcpy(dev_anchors_dz_, anchors_dz_, kNumAnchor * sizeof(float),
+                       cudaMemcpyHostToDevice));
+  GPU_CHECK(cudaMemcpy(dev_anchors_ro_, anchors_ro_, kNumAnchor * sizeof(float),
+                       cudaMemcpyHostToDevice));
+}
+
+void PointPillars::ConvertAnchors2BoxAnchors(float* anchors_px,
+                                             float* anchors_py,
+                                             float* box_anchors_min_x_,
+                                             float* box_anchors_min_y_,
+                                             float* box_anchors_max_x_,
+                                             float* box_anchors_max_y_) {
+  // flipping box's dimension
+  float flipped_anchors_dx[kNumAnchor] = {};
+  float flipped_anchors_dy[kNumAnchor] = {};
+  int ind = 0;
+  for (size_t head = 0; head < kNumAnchorSets.size(); ++head) {
+    int num_x_inds =
+        (kAnchorRanges[head * 4 + 1] - kAnchorRanges[head * 4 + 0]) /
+        kAnchorStrides[head];
+    int num_y_inds =
+        (kAnchorRanges[head * 4 + 3] - kAnchorRanges[head * 4 + 2]) /
+        kAnchorStrides[head];
+    int base_ind = ind;
+    int ro_count = 0;
+    for (int x = 0; x < num_x_inds; ++x) {
+      for (int y = 0; y < num_y_inds; ++y) {
+        ro_count = 0;
+        for (size_t c = 0; c < kNumAnchorRo[head].size(); ++c) {
+          for (int i = 0; i < kNumAnchorRo[head][c]; ++i) {
+            if (kAnchorRo[head][ro_count] <= 0.78) {
+              flipped_anchors_dx[base_ind] = kAnchorDxSizes[head][c];
+              flipped_anchors_dy[base_ind] = kAnchorDySizes[head][c];
+            } else {
+              flipped_anchors_dx[base_ind] = kAnchorDySizes[head][c];
+              flipped_anchors_dy[base_ind] = kAnchorDxSizes[head][c];
+            }
+            ro_count++;
+            base_ind++;
+          }
+        }
+      }
+    }
+    for (int x = 0; x < num_x_inds; ++x) {
+      for (int y = 0; y < num_y_inds; ++y) {
+        for (size_t i = 0; i < kAnchorRo[head].size(); ++i) {
+          box_anchors_min_x_[ind] =
+              anchors_px[ind] - flipped_anchors_dx[ind] / 2.0f;
+          box_anchors_min_y_[ind] =
+              anchors_py[ind] - flipped_anchors_dy[ind] / 2.0f;
+          box_anchors_max_x_[ind] =
+              anchors_px[ind] + flipped_anchors_dx[ind] / 2.0f;
+          box_anchors_max_y_[ind] =
+              anchors_py[ind] + flipped_anchors_dy[ind] / 2.0f;
+          ind++;
+        }
+      }
+    }
+  }
+}
+
+void PointPillars::InitTorch() {
+  if (gpu_id_ >= 0) {
+    device_type_ = torch::kCUDA;
+    device_id_ = gpu_id_;
+  } else {
+    device_type_ = torch::kCPU;
+  }
+
+  // Init torch net
+  torch::Device device(device_type_, device_id_);
+  pfe_net_ = torch::jit::load(pfe_torch_file_, device);
+}
+
+void PointPillars::InitTRT() {
+  // create a TensorRT model from the onnx model and serialize it to a stream
+  nvinfer1::IHostMemory* rpn_trt_model_stream{nullptr};
+  OnnxToTRTModel(rpn_onnx_file_, &rpn_trt_model_stream);
+  if (rpn_trt_model_stream == nullptr) {
+    std::cerr << "Failed to load ONNX file " << std::endl;
+  }
+
+  // deserialize the engine
+  rpn_runtime_ = nvinfer1::createInferRuntime(g_logger_);
+  if (rpn_runtime_ == nullptr) {
+    std::cerr << "Failed to create TensorRT Runtime object." << std::endl;
+  }
+  rpn_engine_ = rpn_runtime_->deserializeCudaEngine(
+      rpn_trt_model_stream->data(), rpn_trt_model_stream->size(), nullptr);
+  if (rpn_engine_ == nullptr) {
+    std::cerr << "Failed to create TensorRT Engine." << std::endl;
+  }
+  rpn_trt_model_stream->destroy();
+  rpn_context_ = rpn_engine_->createExecutionContext();
+  if (rpn_context_ == nullptr) {
+    std::cerr << "Failed to create TensorRT Execution Context." << std::endl;
+  }
+}
+
+void PointPillars::OnnxToTRTModel(
+    const std::string& model_file,  // name of the onnx model
+    nvinfer1::IHostMemory**
+        trt_model_stream) {  // output buffer for the TensorRT model
+  int verbosity = static_cast<int>(nvinfer1::ILogger::Severity::kWARNING);
+
+  // create the builder
+  const auto explicit_batch =
+      static_cast<uint32_t>(kBatchSize) << static_cast<uint32_t>(
+          nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
+  nvinfer1::IBuilder* builder = nvinfer1::createInferBuilder(g_logger_);
+  nvinfer1::INetworkDefinition* network =
+      builder->createNetworkV2(explicit_batch);
+
+  auto parser = nvonnxparser::createParser(*network, g_logger_);
+  if (!parser->parseFromFile(model_file.c_str(), verbosity)) {
+    std::string msg("failed to parse onnx file");
+    g_logger_.log(nvinfer1::ILogger::Severity::kERROR, msg.c_str());
+    exit(EXIT_FAILURE);
+  }
+
+  // Build the engine
+  builder->setMaxBatchSize(kBatchSize);
+  nvinfer1::IBuilderConfig* config = builder->createBuilderConfig();
+  config->setMaxWorkspaceSize(1 << 20);
+  nvinfer1::ICudaEngine* engine =
+      builder->buildEngineWithConfig(*network, *config);
+
+  parser->destroy();
+
+  // serialize the engine, then close everything down
+  *trt_model_stream = engine->serialize();
+  engine->destroy();
+  network->destroy();
+  config->destroy();
+  builder->destroy();
+}
+
+void PointPillars::PreprocessCPU(const float* in_points_array,
+                                 const int in_num_points) {
+  int x_coors[kMaxNumPillars] = {};
+  int y_coors[kMaxNumPillars] = {};
+  float num_points_per_pillar[kMaxNumPillars] = {};
+
+  float* pillar_point_feature =
+      new float[kMaxNumPillars * kMaxNumPointsPerPillar * kNumPointFeature];
+  float* pillar_coors = new float[kMaxNumPillars * 4];
+  float* sparse_pillar_map = new float[kNumIndsForScan * kNumIndsForScan];
+
+  preprocess_points_ptr_->Preprocess(in_points_array, in_num_points, x_coors,
+                                     y_coors, num_points_per_pillar,
+                                     pillar_point_feature, pillar_coors,
+                                     sparse_pillar_map, host_pillar_count_);
+
+  GPU_CHECK(cudaMemset(dev_x_coors_, 0, kMaxNumPillars * sizeof(int)));
+  GPU_CHECK(cudaMemset(dev_y_coors_, 0, kMaxNumPillars * sizeof(int)));
+  GPU_CHECK(cudaMemset(dev_pillar_point_feature_, 0,
+                       kMaxNumPillars * kMaxNumPointsPerPillar *
+                           kNumPointFeature * sizeof(float)));
+  GPU_CHECK(
+      cudaMemset(dev_pillar_coors_, 0, kMaxNumPillars * 4 * sizeof(float)));
+  GPU_CHECK(cudaMemset(dev_num_points_per_pillar_, 0,
+                       kMaxNumPillars * sizeof(float)));
+  GPU_CHECK(cudaMemset(dev_sparse_pillar_map_, 0,
+                       kNumIndsForScan * kNumIndsForScan * sizeof(int)));
+
+  GPU_CHECK(cudaMemcpy(dev_x_coors_, x_coors, kMaxNumPillars * sizeof(int),
+                       cudaMemcpyHostToDevice));
+  GPU_CHECK(cudaMemcpy(dev_y_coors_, y_coors, kMaxNumPillars * sizeof(int),
+                       cudaMemcpyHostToDevice));
+  GPU_CHECK(cudaMemcpy(dev_pillar_point_feature_, pillar_point_feature,
+                       kMaxNumPillars * kMaxNumPointsPerPillar *
+                           kNumPointFeature * sizeof(float),
+                       cudaMemcpyHostToDevice));
+  GPU_CHECK(cudaMemcpy(dev_pillar_coors_, pillar_coors,
+                       kMaxNumPillars * 4 * sizeof(float),
+                       cudaMemcpyHostToDevice));
+  GPU_CHECK(cudaMemcpy(dev_num_points_per_pillar_, num_points_per_pillar,
+                       kMaxNumPillars * sizeof(float), cudaMemcpyHostToDevice));
+  GPU_CHECK(cudaMemcpy(dev_sparse_pillar_map_, sparse_pillar_map,
+                       kNumIndsForScan * kNumIndsForScan * sizeof(float),
+                       cudaMemcpyHostToDevice));
+
+  delete[] pillar_point_feature;
+  delete[] pillar_coors;
+  delete[] sparse_pillar_map;
+}
+
+void PointPillars::PreprocessGPU(const float* in_points_array,
+                                 const int in_num_points) {
+  float* dev_points;
+  GPU_CHECK(cudaMalloc(reinterpret_cast<void**>(&dev_points),
+                       in_num_points * kNumPointFeature * sizeof(float)));
+  GPU_CHECK(cudaMemcpy(dev_points, in_points_array,
+                       in_num_points * kNumPointFeature * sizeof(float),
+                       cudaMemcpyHostToDevice));
+
+  GPU_CHECK(cudaMemset(dev_x_coors_, 0, kMaxNumPillars * sizeof(int)));
+  GPU_CHECK(cudaMemset(dev_y_coors_, 0, kMaxNumPillars * sizeof(int)));
+  GPU_CHECK(cudaMemset(dev_num_points_per_pillar_, 0,
+                       kMaxNumPillars * sizeof(float)));
+  GPU_CHECK(cudaMemset(dev_pillar_point_feature_, 0,
+                       kMaxNumPillars * kMaxNumPointsPerPillar *
+                           kNumPointFeature * sizeof(float)));
+  GPU_CHECK(
+      cudaMemset(dev_pillar_coors_, 0, kMaxNumPillars * 4 * sizeof(float)));
+  GPU_CHECK(cudaMemset(dev_sparse_pillar_map_, 0,
+                       kNumIndsForScan * kNumIndsForScan * sizeof(int)));
+  host_pillar_count_[0] = 0;
+
+  GPU_CHECK(cudaMemset(dev_anchor_mask_, 0, kNumAnchor * sizeof(int)));
+
+  preprocess_points_cuda_ptr_->DoPreprocessPointsCuda(
+      dev_points, in_num_points, dev_x_coors_, dev_y_coors_,
+      dev_num_points_per_pillar_, dev_pillar_point_feature_, dev_pillar_coors_,
+      dev_sparse_pillar_map_, host_pillar_count_);
+
+  GPU_CHECK(cudaFree(dev_points));
+}
+
+void PointPillars::Preprocess(const float* in_points_array,
+                              const int in_num_points) {
+  if (reproduce_result_mode_) {
+    PreprocessCPU(in_points_array, in_num_points);
+  } else {
+    PreprocessGPU(in_points_array, in_num_points);
+  }
+}
+
+void PointPillars::DoInference(const float* in_points_array,
+                               const int in_num_points,
+                               std::vector<float>* out_detections,
+                               std::vector<int>* out_labels) {
+  if (device_id_ < 0) {
+    AERROR << "Torch is not using GPU!";
+    return;
+  }
+
+  Preprocess(in_points_array, in_num_points);
+  anchor_mask_cuda_ptr_->DoAnchorMaskCuda(
+      dev_sparse_pillar_map_, dev_cumsum_along_x_, dev_cumsum_along_y_,
+      dev_box_anchors_min_x_, dev_box_anchors_min_y_, dev_box_anchors_max_x_,
+      dev_box_anchors_max_y_, dev_anchor_mask_);
+
+  cudaStream_t stream;
+  GPU_CHECK(cudaStreamCreate(&stream));
+  GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[0], dev_pillar_point_feature_,
+                            kMaxNumPillars * kMaxNumPointsPerPillar *
+                                kNumPointFeature * sizeof(float),
+                            cudaMemcpyDeviceToDevice, stream));
+  GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[1], dev_num_points_per_pillar_,
+                            kMaxNumPillars * sizeof(float),
+                            cudaMemcpyDeviceToDevice, stream));
+  GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[2], dev_pillar_coors_,
+                            kMaxNumPillars * 4 * sizeof(float),
+                            cudaMemcpyDeviceToDevice, stream));
+  torch::Tensor tensor_pillar_point_feature = torch::from_blob(pfe_buffers_[0],
+      {kMaxNumPillars, kMaxNumPointsPerPillar, kNumPointFeature},
+      torch::kCUDA);
+  torch::Tensor tensor_num_points_per_pillar = torch::from_blob(
+      pfe_buffers_[1], {kMaxNumPillars}, torch::kCUDA);
+  torch::Tensor tensor_pillar_coors = torch::from_blob(pfe_buffers_[2],
+      {kMaxNumPillars, 4}, torch::kCUDA);
+  torch::Device device(device_type_, device_id_);
+  tensor_pillar_point_feature.to(device);
+  tensor_num_points_per_pillar.to(device);
+  tensor_pillar_coors.to(device);
+
+//***********************************************************************
+//bug:operation failed in the TorchScript interpreter
+  auto pfe_output = pfe_net_.forward({tensor_pillar_point_feature,
+                                      tensor_num_points_per_pillar,
+                                      tensor_pillar_coors}).toTensor();
+
+  GPU_CHECK(
+      cudaMemset(dev_scattered_feature_, 0, kRpnInputSize * sizeof(float)));
+  scatter_cuda_ptr_->DoScatterCuda(
+      host_pillar_count_[0], dev_x_coors_, dev_y_coors_,
+      pfe_output.data_ptr<float>(), dev_scattered_feature_);
+  GPU_CHECK(cudaMemcpyAsync(rpn_buffers_[0], dev_scattered_feature_,
+                            kBatchSize * kRpnInputSize * sizeof(float),
+                            cudaMemcpyDeviceToDevice, stream));
+  rpn_context_->enqueueV2(rpn_buffers_, stream, nullptr);
+  GPU_CHECK(cudaMemset(dev_filter_count_, 0, sizeof(int)));
+//bug:radix_sort: failed on 2nd step: cudaErrorInvalidValue: invalid argument
+  postprocess_cuda_ptr_->DoPostprocessCuda(
+      reinterpret_cast<float*>(rpn_buffers_[1]),
+      reinterpret_cast<float*>(rpn_buffers_[2]),
+      reinterpret_cast<float*>(rpn_buffers_[3]), dev_anchor_mask_,
+      dev_anchors_px_, dev_anchors_py_, dev_anchors_pz_, dev_anchors_dx_,
+      dev_anchors_dy_, dev_anchors_dz_, dev_anchors_ro_, dev_filtered_box_,
+      dev_filtered_score_, dev_filtered_label_, dev_filtered_dir_,
+      dev_box_for_nms_, dev_filter_count_, out_detections, out_labels);
+//***************************************************************************
+
+  // release the stream and the buffers
+  cudaStreamDestroy(stream);
+}
+
+}  // namespace lidar
+}  // namespace perception
+}  // namespace apollo
diff --git a/modules/perception/lidar_point_pillars/point_pillars.h b/modules/perception/lidar_point_pillars/point_pillars.h
new file mode 100755
index 0000000..728e4f9
--- /dev/null
+++ b/modules/perception/lidar_point_pillars/point_pillars.h
@@ -0,0 +1,321 @@
+#pragma once
+
+// headers in STL
+#include <algorithm>
+#include <cmath>
+#include <iomanip>
+#include <limits>
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+// headers in TensorRT
+#include "NvInfer.h"
+#include "NvOnnxParser.h"
+
+#include "torch/script.h"
+#include "torch/torch.h"
+
+// headers in local files
+#include "modules/perception/lidar_point_pillars/anchor_mask_cuda.h"
+#include "modules/perception/lidar_point_pillars/common.h"
+#include "modules/perception/lidar_point_pillars/params.h"
+#include "modules/perception/lidar_point_pillars/postprocess_cuda.h"
+#include "modules/perception/lidar_point_pillars/preprocess_points.h"
+#include "modules/perception/lidar_point_pillars/preprocess_points_cuda.h"
+#include "modules/perception/lidar_point_pillars/scatter_cuda.h"
+
+namespace apollo {
+namespace perception {
+namespace lidar {
+
+// Logger for TensorRT info/warning/errors
+class Logger : public nvinfer1::ILogger {
+ public:
+  explicit Logger(Severity severity = Severity::kWARNING)
+      : reportable_severity(severity) {}
+
+  void log(Severity severity, const char* msg) override {
+    // suppress messages with severity enum value greater than the reportable
+    if (severity > reportable_severity) return;
+
+    switch (severity) {
+      case Severity::kINTERNAL_ERROR:
+        std::cerr << "INTERNAL_ERROR: ";
+        break;
+      case Severity::kERROR:
+        std::cerr << "ERROR: ";
+        break;
+      case Severity::kWARNING:
+        std::cerr << "WARNING: ";
+        break;
+      case Severity::kINFO:
+        std::cerr << "INFO: ";
+        break;
+      default:
+        std::cerr << "UNKNOWN: ";
+        break;
+    }
+    std::cerr << msg << std::endl;
+  }
+
+  Severity reportable_severity;
+};
+
+class PointPillars {
+ private:
+  friend class TestClass;
+  static const float kPillarXSize;
+  static const float kPillarYSize;
+  static const float kPillarZSize;
+  static const float kMinXRange;
+  static const float kMinYRange;
+  static const float kMinZRange;
+  static const float kMaxXRange;
+  static const float kMaxYRange;
+  static const float kMaxZRange;
+  static const int kNumClass;
+  static const int kMaxNumPillars;
+  static const int kMaxNumPointsPerPillar;
+  static const int kNumPointFeature;
+  static const int kGridXSize;
+  static const int kGridYSize;
+  static const int kGridZSize;
+  static const int kRpnInputSize;
+  static const int kNumAnchor;
+  static const int kNumOutputBoxFeature;
+  static const int kRpnBoxOutputSize;
+  static const int kRpnClsOutputSize;
+  static const int kRpnDirOutputSize;
+  static const int kBatchSize;
+  static const int kNumIndsForScan;
+  static const int kNumThreads;
+  // if you change kNumThreads, need to modify NUM_THREADS_MACRO in
+  // common.h
+  static const int kNumBoxCorners;
+  static const std::vector<int> kAnchorStrides;
+  static const std::vector<int> kAnchorRanges;
+  static const std::vector<int> kNumAnchorSets;
+  static const std::vector<std::vector<float>> kAnchorDxSizes;
+  static const std::vector<std::vector<float>> kAnchorDySizes;
+  static const std::vector<std::vector<float>> kAnchorDzSizes;
+  static const std::vector<std::vector<float>> kAnchorZCoors;
+  static const std::vector<std::vector<int>> kNumAnchorRo;
+  static const std::vector<std::vector<float>> kAnchorRo;
+
+  // initialize in initializer list
+  const bool reproduce_result_mode_;
+  const float score_threshold_;
+  const float nms_overlap_threshold_;
+  const std::string pfe_torch_file_;
+  const std::string rpn_onnx_file_;
+  // end initializer list
+
+  int host_pillar_count_[1];
+
+  float* anchors_px_;
+  float* anchors_py_;
+  float* anchors_pz_;
+  float* anchors_dx_;
+  float* anchors_dy_;
+  float* anchors_dz_;
+  float* anchors_ro_;
+
+  float* box_anchors_min_x_;
+  float* box_anchors_min_y_;
+  float* box_anchors_max_x_;
+  float* box_anchors_max_y_;
+
+  int* dev_x_coors_;
+  int* dev_y_coors_;
+  float* dev_num_points_per_pillar_;
+  int* dev_sparse_pillar_map_;
+  int* dev_cumsum_along_x_;
+  int* dev_cumsum_along_y_;
+
+  float* dev_pillar_point_feature_;
+  float* dev_pillar_coors_;
+
+  float* dev_box_anchors_min_x_;
+  float* dev_box_anchors_min_y_;
+  float* dev_box_anchors_max_x_;
+  float* dev_box_anchors_max_y_;
+  int* dev_anchor_mask_;
+
+  void* pfe_buffers_[3];
+  void* rpn_buffers_[4];
+
+  float* dev_scattered_feature_;
+
+  float* dev_anchors_px_;
+  float* dev_anchors_py_;
+  float* dev_anchors_pz_;
+  float* dev_anchors_dx_;
+  float* dev_anchors_dy_;
+  float* dev_anchors_dz_;
+  float* dev_anchors_ro_;
+  float* dev_filtered_box_;
+  float* dev_filtered_score_;
+  int* dev_filtered_label_;
+  int* dev_filtered_dir_;
+  float* dev_box_for_nms_;
+  int* dev_filter_count_;
+
+  std::unique_ptr<PreprocessPoints> preprocess_points_ptr_;
+  std::unique_ptr<PreprocessPointsCuda> preprocess_points_cuda_ptr_;
+  std::unique_ptr<AnchorMaskCuda> anchor_mask_cuda_ptr_;
+  std::unique_ptr<ScatterCuda> scatter_cuda_ptr_;
+  std::unique_ptr<PostprocessCuda> postprocess_cuda_ptr_;
+
+  Logger g_logger_;
+  int device_id_ = -1;
+  int gpu_id_ = 0;
+  torch::DeviceType device_type_;
+  torch::jit::script::Module pfe_net_;
+  nvinfer1::IExecutionContext* rpn_context_;
+  nvinfer1::IRuntime* rpn_runtime_;
+  nvinfer1::ICudaEngine* rpn_engine_;
+
+  /**
+   * @brief Memory allocation for device memory
+   * @details Called in the constructor
+   */
+  void DeviceMemoryMalloc();
+
+  /**
+   * @brief Initializing anchor
+   * @details Called in the constructor
+   */
+  void InitAnchors();
+
+  /**
+   * @brief Initializing LibTorch net
+   * @details Called in the constructor
+   */
+  void InitTorch();
+
+  /**
+   * @brief Initializing TensorRT instances
+   * @details Called in the constructor
+   */
+  void InitTRT();
+
+  /**
+   * @brief Generate anchors
+   * @param[in] anchors_px_ Represents x-coordinate values for a corresponding
+   * anchor
+   * @param[in] anchors_py_ Represents y-coordinate values for a corresponding
+   * anchor
+   * @param[in] anchors_pz_ Represents z-coordinate values for a corresponding
+   * anchor
+   * @param[in] anchors_dx_ Represents x-dimension values for a corresponding
+   * anchor
+   * @param[in] anchors_dy_ Represents y-dimension values for a corresponding
+   * anchor
+   * @param[in] anchors_dz_ Represents z-dimension values for a corresponding
+   * anchor
+   * @param[in] anchors_ro_ Represents rotation values for a corresponding
+   * anchor
+   * @details Generate anchors for each grid
+   */
+  void GenerateAnchors(float* anchors_px_, float* anchors_py_,
+                       float* anchors_pz_, float* anchors_dx_,
+                       float* anchors_dy_, float* anchors_dz_,
+                       float* anchors_ro_);
+
+  /**
+   * @brief Convert ONNX to TensorRT model
+   * @param[in] model_file ONNX model file path
+   * @param[out] trt_model_stream TensorRT model made out of ONNX model
+   * @details Load ONNX model, and convert it to TensorRT model
+   */
+  void OnnxToTRTModel(const std::string& model_file,
+                      nvinfer1::IHostMemory** trt_model_stream);
+
+  /**
+   * @brief Preproces points
+   * @param[in] in_points_array Point cloud array
+   * @param[in] in_num_points Number of points
+   * @details Call CPU or GPU preprocess
+   */
+  void Preprocess(const float* in_points_array, const int in_num_points);
+
+  /**
+   * @brief Preproces by CPU
+   * @param[in] in_points_array Point cloud array
+   * @param[in] in_num_points Number of points
+   * @details The output from preprocessCPU is reproducible, while preprocessGPU
+   * is not
+   */
+  void PreprocessCPU(const float* in_points_array, const int in_num_points);
+
+  /**
+   * @brief Preproces by GPU
+   * @param[in] in_points_array Point cloud array
+   * @param[in] in_num_points Number of points
+   * @details Faster preprocess compared with CPU preprocess
+   */
+  void PreprocessGPU(const float* in_points_array, const int in_num_points);
+
+  /**
+   * @brief Convert anchors to box form like min_x, min_y, max_x, max_y anchors
+   * @param[in] anchors_px_
+   *   Represents x-coordinate value for a corresponding anchor
+   * @param[in] anchors_py_
+   *   Represents y-coordinate value for a corresponding anchor
+   * @param[in] box_anchors_min_x_
+   *   Represents minimum x value for a corresponding anchor
+   * @param[in] box_anchors_min_y_
+   *   Represents minimum y value for a corresponding anchor
+   * @param[in] box_anchors_max_x_
+   *   Represents maximum x value for a corresponding anchor
+   * @param[in] box_anchors_max_y_
+   *   Represents maximum y value for a corresponding anchor
+   * @details Make box anchors for nms
+   */
+  void ConvertAnchors2BoxAnchors(float* anchors_px_, float* anchors_py_,
+                                 float* box_anchors_min_x_,
+                                 float* box_anchors_min_y_,
+                                 float* box_anchors_max_x_,
+                                 float* box_anchors_max_y_);
+
+  /**
+   * @brief Memory allocation for anchors
+   * @details Memory allocation for anchors
+   */
+  void PutAnchorsInDeviceMemory();
+
+ public:
+  /**
+   * @brief Constructor
+   * @param[in] reproduce_result_mode Boolean, if true, the output is
+   * reproducible for the same input
+   * @param[in] score_threshold Score threshold for filtering output
+   * @param[in] nms_overlap_threshold IOU threshold for NMS
+   * @param[in] pfe_torch_file Pillar Feature Extractor Torch file path
+   * @param[in] rpn_onnx_file Region Proposal Network ONNX file path
+   * @details Variables could be changed through point_pillars_detection
+   */
+  PointPillars(const bool reproduce_result_mode, const float score_threshold,
+               const float nms_overlap_threshold,
+               const std::string pfe_torch_file,
+               const std::string rpn_onnx_file);
+  ~PointPillars();
+
+  /**
+   * @brief Call PointPillars for the inference
+   * @param[in] in_points_array Point cloud array
+   * @param[in] in_num_points Number of points
+   * @param[out] out_detections Network output bounding box
+   * @param[out] out_labels Network output object's label
+   * @details This is an interface for the algorithm
+   */
+  void DoInference(const float* in_points_array, const int in_num_points,
+                   std::vector<float>* out_detections,
+                   std::vector<int>* out_labels);
+};
+
+}  // namespace lidar
+}  // namespace perception
+}  // namespace apollo
diff --git a/modules/perception/lidar_point_pillars/point_pillars_detection.cc b/modules/perception/lidar_point_pillars/point_pillars_detection.cc
new file mode 100755
index 0000000..7f39c2a
--- /dev/null
+++ b/modules/perception/lidar_point_pillars/point_pillars_detection.cc
@@ -0,0 +1,476 @@
+#include "modules/perception/lidar_point_pillars/point_pillars_detection.h"
+
+namespace apollo {
+namespace perception {
+namespace lidar {
+bool PointPillarsDetection::Init() {
+  cout << "----------------------point pillars init-----------------------" << endl;
+  if (!GetProtoConfig(&lidar_pointcloud_conf_)) {
+    AERROR << "Unable to load lidar pointcloud conf file: " << ConfigFilePath();
+    return false;
+  }
+
+  AINFO << "point pillars init";
+  x_min_ = Params::kMinXRange;
+  x_max_ = Params::kMaxXRange;
+  y_min_ = Params::kMinYRange;
+  y_max_ = Params::kMaxYRange;
+  z_min_ = Params::kMinZRange;
+  z_max_ = Params::kMaxZRange;
+  obst_writer = node_->CreateWriter<apollo::perception::Obstacles>(
+    lidar_pointcloud_conf_.obst_output_channel());
+  point_pillars_ptr_.reset(new PointPillars(reproduce_result_mode, score_threshold, 
+                                            nms_overlap_threshold, pfe_torch_file, 
+                                            rpn_onnx_file));
+  viewer.reset(new pcl::visualization::PCLVisualizer("viewer test"));
+  minpoint = Eigen::Vector4f(-32, -15, -2, 1);
+  maxpoint = Eigen::Vector4f(20, 15, 2, 1);
+  seq_num_ = 0;
+  object_builder_.reset(new apollo::perception::MinBoxObjectBuilder);
+  tracker_.reset(new apollo::perception::HmObjectTracker());
+  tracker_->Init();
+  cout << "------------------point pillars init finish------------------------" << endl;
+  return true;
+}
+
+bool PointPillarsDetection::Proc(const std::shared_ptr<apollo::drivers::PointCloud>& msg1, 
+                                 const std::shared_ptr<apollo::drivers::PointCloud>& msg2) {
+  cout << "-----------enter function----------" << endl;
+  if ((msg1 == nullptr) || (msg2 == nullptr)) {
+    cout << "input null msg1 or msg2" << endl;
+    return false;
+  }
+  if((msg1->point_size() == 0) || (msg2->point_size() == 0)) {
+    cout << "pointcloud data size is 0" << endl;
+    return false;
+  }
+
+  viewer->removeAllPointClouds();
+  viewer->removeAllShapes();
+  if (cudaSetDevice(gpu_id) != cudaSuccess) {
+    AERROR << "Failed to set device to gpu " << gpu_id;
+    return false;
+  }
+
+  //front point cloud
+  pcl::PointCloud<pcl::PointXYZI>::Ptr pcloud_front(new pcl::PointCloud<pcl::PointXYZI>);
+  pcl::PointCloud<pcl::PointXYZI> cloud_front;
+  cloud_front.width = msg1->point_size();
+  cloud_front.height = 1;
+  cloud_front.is_dense = false;
+  cloud_front.points.resize(cloud_front.width * cloud_front.height);
+  for (int i = 0; i < msg1->point_size(); ++i) {
+    cloud_front.points[i].x = msg1->point(i).x();
+    cloud_front.points[i].y = msg1->point(i).y();
+    cloud_front.points[i].z = msg1->point(i).z();
+    cloud_front.points[i].intensity = msg1->point(i).intensity();
+  }
+
+  pcloud_front = cloud_front.makeShared();
+
+  //rear pointcloud
+  pcl::PointCloud<pcl::PointXYZI>::Ptr pcloud_rear(new pcl::PointCloud<pcl::PointXYZI>);
+  pcl::PointCloud<pcl::PointXYZI> cloud_rear;
+  cloud_rear.width = msg2->point_size();
+  cloud_rear.height = 1;
+  cloud_rear.is_dense = false;
+  cloud_rear.points.resize(cloud_rear.width * cloud_rear.height);
+  for (int i = 0; i < msg2->point_size(); ++i) {
+    cloud_rear.points[i].x = -(msg2->point(i).x()) - 12.0;
+    cloud_rear.points[i].y = -(msg2->point(i).y());
+    cloud_rear.points[i].z = msg2->point(i).z();
+    cloud_rear.points[i].intensity = msg2->point(i).intensity();
+  }
+
+  pcloud_rear = cloud_rear.makeShared();
+
+  // fuse pointcloud
+  pcl::PointCloud<pcl::PointXYZI>::Ptr pcloud(new pcl::PointCloud<pcl::PointXYZI>);
+  *pcloud = *pcloud_front + *pcloud_rear;
+
+  vector<int> nan_cloud_inliers;
+  pcl::removeNaNFromPointCloud(*pcloud, *pcloud, nan_cloud_inliers);
+
+  //voxel grid filter
+  pcl::PointCloud<pcl::PointXYZI>::Ptr cloudFiltered(
+      new pcl::PointCloud<pcl::PointXYZI>);
+  pcl::VoxelGrid<pcl::PointXYZI> vg;
+  vg.setInputCloud(pcloud);
+  vg.setLeafSize(downsample_voxel_size_x, downsample_voxel_size_y, downsample_voxel_size_z);
+  vg.filter(*cloudFiltered);
+
+  //crop box
+  pcl::PointCloud<pcl::PointXYZI>::Ptr cloudRegion(
+      new pcl::PointCloud<pcl::PointXYZI>);
+  pcl::CropBox<pcl::PointXYZI> region(true);
+  region.setMin(minpoint);
+  region.setMax(maxpoint);
+  region.setInputCloud(cloudFiltered);
+  // region.setInputCloud(pcloud);
+  region.filter(*cloudRegion);
+
+  vector<int> indices;
+  pcl::CropBox<pcl::PointXYZI> roof(true);
+  roof.setMin(Eigen::Vector4f(-12, -1.9, -1.0, 1));
+  roof.setMax(Eigen::Vector4f(0.0, 1.9, 1.0, 1));
+  roof.setInputCloud(cloudRegion);
+  roof.filter(indices);
+
+  pcl::PointIndices::Ptr inliers(new pcl::PointIndices);
+  for (int point : indices) {
+    inliers->indices.push_back(point);
+  }
+
+  pcl::ExtractIndices<pcl::PointXYZI> extract;
+  extract.setInputCloud(cloudRegion);
+  extract.setIndices(inliers);
+  extract.setNegative(true);
+  extract.filter(*cloudRegion);
+
+  // if (enable_ground_removal) {
+  //   z_min_ = std::max(z_min_, static_cast<float>(ground_removal_height));
+  // }
+
+//***********************************************************
+  // //remove plane 
+  // auto cloud_points = cloudRegion->points;
+  // pcl::ModelCoefficients::Ptr coefficients(new pcl::ModelCoefficients);
+  // pcl::PointIndices::Ptr inliers_seg(new pcl::PointIndices);
+
+  // pcl::SACSegmentation<pcl::PointXYZI> seg;
+  // seg.setOptimizeCoefficients(true);
+  // seg.setModelType(pcl::SACMODEL_PLANE);
+  // seg.setMethodType(pcl::SAC_RANSAC);
+  // //  seg.setMaxIterations(maxIterations);
+  // seg.setDistanceThreshold(distanceThreshold);
+  // seg.setInputCloud(cloudRegion);
+  // seg.segment(*inliers_seg, *coefficients);
+
+  // // segment obstacles
+  // pcl::PointCloud<pcl::PointXYZI>::Ptr obstCloud(
+  //     new pcl::PointCloud<pcl::PointXYZI>());
+  // pcl::PointCloud<pcl::PointXYZI>::Ptr planeCloud(
+  //     new pcl::PointCloud<pcl::PointXYZI>());
+
+  // for (int index : inliers_seg->indices) {
+  //   planeCloud->points.push_back(cloudRegion->points[index]);
+  // }
+
+  // pcl::ExtractIndices<pcl::PointXYZI> extract_obst;
+  // extract_obst.setInputCloud(cloudRegion);
+  // extract_obst.setIndices(inliers_seg);
+  // extract_obst.setNegative(true);
+  // extract_obst.filter(*obstCloud);
+//***********************************************************
+
+  pcl::visualization::PointCloudColorHandlerGenericField<pcl::PointXYZI> fildColor(cloudRegion, "x");
+  viewer->addPointCloud<pcl::PointXYZI>(cloudRegion, fildColor, "init cloud");
+  viewer->setPointCloudRenderingProperties (pcl::visualization::PCL_VISUALIZER_POINT_SIZE, 1, "init cloud");
+  int num_points = cloudRegion->points.size();
+  num_points = std::min(num_points, max_num_points);
+  float* points_array = new float[num_points * num_point_feature];
+  // float points_array[num_points * num_point_feature];
+  // timestamp_ = absl::ToUnixMicros(Clock::Now());
+  // cout << "-------------timestamp---------------" << timestamp_ << endl;
+  points_timestamp_.assign(cloudRegion->points.size(), 0.0);
+  CloudToArray(cloudRegion, points_array, normalizing_factor);
+
+  std::vector<float> out_detections;
+  std::vector<int> out_labels;
+  
+  // for(int i = 0; i < (num_points * num_point_feature); i++) {
+  //   cout << "-------------points_array[i]-------" << points_array[i] << endl;
+  // }
+
+  try {
+    point_pillars_ptr_->DoInference(points_array, num_points, &out_detections, &out_labels);
+  } catch(exception& e) {
+    // cout << e.what() << endl;
+    cout << "-----------------exception occured:core dumped----------------" << endl;
+    delete[] points_array;
+    viewer->spinOnce();
+    return false;
+  }
+  
+  int num_objects = out_detections.size() / num_output_box_feature;
+
+  cout << "-----------------num objects : -------------" << num_objects << endl;
+
+  auto msg_obstacles = std::make_shared<apollo::perception::Obstacles>();
+  //**********************************************************************
+  // label: 0 bus
+  //        1 car 
+  //        2 unknown movable
+  //        3 truck
+  //        4 unknown unmovable
+  //        5 cyclist
+  //        6 motorcyclist
+  //        7 pedestrian
+  //        8 trafficcone
+  //        other unknown
+  //**********************************************************************
+  // std::vector<pcl::PointCloud<pcl::PointXYZI>::Ptr> points_vector;
+  std::vector<std::shared_ptr<apollo::perception::Object>> objects;
+  float intensi = 50.0;
+  for (int j = 0; j < num_objects; ++j) {
+    float x = out_detections.at(j * num_output_box_feature + 0);
+    float y = out_detections.at(j * num_output_box_feature + 1);
+    float z = out_detections.at(j * num_output_box_feature + 2);
+    float dx = out_detections.at(j * num_output_box_feature + 4);
+    float dy = out_detections.at(j * num_output_box_feature + 3);
+    float dz = out_detections.at(j * num_output_box_feature + 5);
+    float yaw = out_detections.at(j * num_output_box_feature + 6);
+    yaw += M_PI / 2;
+    yaw = std::atan2(std::sin(yaw), std::cos(yaw));
+    yaw = -yaw;
+
+    int label = out_labels.at(j);
+
+    float point_x_min = x - dx / 2;
+    float point_y_min = y - dy / 2;
+    float point_z_min = z - dz / 2;
+    float point_x_max = x + dx / 2;
+    float point_y_max = y + dy / 2;
+    float point_z_max = z + dz / 2;
+
+    auto* msg_box = msg_obstacles->add_obstacles();
+    msg_box->set_box_id(label);
+    msg_box->set_x_min(point_x_min);
+    msg_box->set_y_min(point_y_min);
+    msg_box->set_z_min(point_z_min);
+    msg_box->set_x_max(point_x_max);
+    msg_box->set_y_max(point_y_max);
+    msg_box->set_z_max(point_z_max);
+    msg_box->set_yaw(yaw);
+
+    pcl::PointCloud<pcl::PointXYZI>::Ptr temp_cloud(new pcl::PointCloud<pcl::PointXYZI>);
+    for(int i = 0; i < (cloudRegion->size()); i++) {
+      pcl::PointXYZI point = cloudRegion->at(i);
+      point.intensity = intensi;
+      if(point.z >= point_z_min && point.z <= point_z_max && point.y >= point_y_min && point.y <= point_y_max && point.x >= point_x_min &&
+          point.x <= point_x_max) {
+        temp_cloud->push_back(point);
+      }
+    }
+    intensi += 50;
+    // points_vector.push_back(temp_cloud);
+    // apollo::perception::Object out_obj;
+    std::shared_ptr<apollo::perception::Object> out_obj(new apollo::perception::Object);
+    out_obj->cloud = temp_cloud;
+    // objects.push_back(std::make_shared<apollo::perception::Object>(out_obj));
+    objects.push_back(out_obj);
+    // pcl::visualization::PointCloudColorHandlerGenericField<pcl::PointXYZI> fildColor(temp_cloud, "x");
+    // viewer->addPointCloud<pcl::PointXYZI>(temp_cloud, fildColor);
+    string cube = "box" + std::to_string(j+500);
+    string cubeFill = "boxFill" + std::to_string(j+500);
+    // string detected_label = "label: " + label;
+    
+    // struct Point position_label_detect;
+    // position_label_detect.x = 5.0;
+    // position_label_detect.y = 0.0;
+    // position_label_detect.z = 1.2;
+
+    // viewer->addText3D(detected_label, position_label_detect, 1.0, 0.0, 1.0, 0.0, to_string(j+2000));
+    viewer->addCube(point_x_min, point_x_max, point_y_min, point_y_max, point_z_min,
+                    point_z_max, Color(0, 1, 0).r, Color(0, 1, 0).g,
+                    Color(0, 1, 0).b, cube);
+    viewer->setShapeRenderingProperties(
+        pcl::visualization::PCL_VISUALIZER_REPRESENTATION,
+        pcl::visualization::PCL_VISUALIZER_REPRESENTATION_WIREFRAME, cube);
+    viewer->setShapeRenderingProperties(
+        pcl::visualization::PCL_VISUALIZER_COLOR, Color(0, 1, 0).r,
+        Color(0, 1, 0).g, Color(0, 1, 0).b, cube);
+    viewer->setShapeRenderingProperties(
+        pcl::visualization::PCL_VISUALIZER_OPACITY, 1.0, cube);
+
+    viewer->addCube(point_x_min, point_x_max, point_y_min, point_y_max, point_z_min,
+                    point_z_max, Color(0, 1, 0).r, Color(0, 1, 0).g,
+                    Color(0, 1, 0).b, cubeFill);
+    viewer->setShapeRenderingProperties(
+        pcl::visualization::PCL_VISUALIZER_REPRESENTATION,
+        pcl::visualization::PCL_VISUALIZER_REPRESENTATION_SURFACE, cubeFill);
+    viewer->setShapeRenderingProperties(
+        pcl::visualization::PCL_VISUALIZER_COLOR, Color(0, 1, 0).r,
+        Color(0, 1, 0).g, Color(0, 1, 0).b, cubeFill);
+    viewer->setShapeRenderingProperties(
+        pcl::visualization::PCL_VISUALIZER_OPACITY, 0.3, cubeFill);
+
+    std::cout << "object id: " << j << ", x: " << x << ", y: " << y
+              << ", z: " << z << ", dx: " << dx << ", dy: " << dy
+              << ", dz: " << dz << ", yaw: " << yaw << ", label: " << label
+              << std::endl;
+  }
+
+//min_box
+  if(object_builder_ != nullptr) {
+    apollo::perception::ObjectBuilderOptions object_builder_options;
+    if(!object_builder_->Build(object_builder_options, &objects)) {
+      return false;
+    }
+  }
+
+//tracker
+  std::shared_ptr<apollo::perception::SensorObjects> out_sensor_objects(new apollo::perception::SensorObjects);
+  if (objects.size() > 0) {
+    timestamp_ = absl::ToUnixMicros(apollo::common::time::Clock::Now());
+    // timestamp_ +=1;
+    ++seq_num_;
+    std::shared_ptr<Eigen::Matrix4d> velodyne_trans(new Eigen::Matrix4d);
+    *velodyne_trans << 1, 0, 0, 0, 
+                      0, 1, 0, 0,
+                      0, 0, 1, 0,
+                      0, 0, 0, 1;
+    out_sensor_objects->timestamp = timestamp_;
+    out_sensor_objects->sensor2world_pose = *velodyne_trans;
+    out_sensor_objects->seq_num = seq_num_;
+    if (tracker_ != nullptr) {
+      apollo::perception::TrackerOptions tracker_options;
+      tracker_options.velodyne_trans = velodyne_trans;
+      // tracker_options.velodyne_trans = velodyne_trans;
+      try{
+        if(!tracker_->Track(objects, timestamp_, tracker_options, &(out_sensor_objects->objects))) {
+          std::cout << "tracker running error!" << std::endl;
+        }
+      } catch(exception& e) {
+        std::cout << e.what() << std::endl;
+        return false;
+      }
+    }
+    std::cout << "tracked size = " << out_sensor_objects->objects.size() << std::endl;
+
+    for (int i = 0; i < out_sensor_objects->objects.size(); i++) {
+      string cube = "box" + std::to_string(out_sensor_objects->objects[i]->track_id);
+      string cubeFill = "boxFill" + std::to_string(out_sensor_objects->objects[i]->track_id);
+
+      float px_min = out_sensor_objects->objects[i]->vertex2[0];
+      float py_min = out_sensor_objects->objects[i]->vertex2[1];
+      float pz_min = out_sensor_objects->objects[i]->min_height;
+      float px_max = out_sensor_objects->objects[i]->vertex3[0];
+      float py_max = out_sensor_objects->objects[i]->vertex3[1];
+      float pz_max = out_sensor_objects->objects[i]->max_height;
+      
+      // cout << "-------------object speed: " << out_sensor_objects->objects[i]->track_id << "--------------" << speed << endl;
+      // cout << "-------------center point: " << out_sensor_objects->objects[i]->track_id << ": " << out_sensor_objects->objects[i]->center[0] 
+      //                                       << " " << out_sensor_objects->objects[i]->center[1] 
+      //                                       << " " << out_sensor_objects->objects[i]->center[2] << endl;
+      string label_id;
+      switch(out_labels[i]) {
+        case 0: label_id = "bus"; break;
+        case 1: label_id = "car"; break;
+        case 2: label_id = "unknown movable"; break;
+        case 3: label_id = "truck"; break;
+        case 4: label_id = "unknown unmovable"; break;
+        case 5: label_id = "cyclist"; break;
+        case 6: label_id = "motorcyclist"; break;
+        case 7: label_id = "pedestrian"; break;
+        case 8: label_id = "trafficcone"; break;
+        default: label_id = "unknown"; break;
+      }
+      int track_id = out_sensor_objects->objects[i]->track_id;
+      double speed = sqrt((out_sensor_objects->objects[i]->velocity[0]) * out_sensor_objects->objects[i]->velocity[0] + 
+                          (out_sensor_objects->objects[i]->velocity[1]) * out_sensor_objects->objects[i]->velocity[1]);
+      int age = static_cast<int>(out_sensor_objects->objects[i]->tracking_time);
+      
+      // pcl::PointCloud<pcl::PointXYZI>::Ptr line_cloud(new pcl::PointCloud<pcl::PointXYZI>);
+      // for(auto p : out_sensor_objects->objects[i]->drops){
+      //   // struct Point p1;
+      //   // p1.x = p[0];
+      //   // p1.y = p[1];
+      //   // p1.z = p[2];
+      //   pcl::PointXYZI point_temp;
+      //   point_temp.x = p[0];
+      //   point_temp.y = p[1];
+      //   point_temp.z = p[2];
+      //   point_temp.intensity = 0;
+      //   line_cloud->push_back(point_temp);
+      // }
+      // viewer->addPointCloud<pcl::PointXYZI>(line_cloud, "init cloud line" + i);
+      // viewer->setPointCloudRenderingProperties (pcl::visualization::PCL_VISUALIZER_POINT_SIZE, 1, "init cloud line" + i);
+      // viewer->setPointCloudRenderingProperties(pcl::visualization::PCL_VISUALIZER_COLOR, Color(0, 0, 1).r,
+      //                                           Color(0, 0, 1).g, Color(0, 0, 1).b, "init cloud line" + i);
+
+      vector<pcl::PointXYZ> line_vec;
+      int k = 0;
+      for (auto p : out_sensor_objects->objects[i]->drops) {
+        pcl::PointXYZ p1;
+        p1.x = p[0];
+        p1.y = p[1];
+        p1.z = 0;
+        line_vec.push_back(p1);
+        if(line_vec.size() > 2) {
+          viewer->addLine(line_vec[k-1], line_vec[k], 0.0, 0.0, 1.0, "line" + to_string(i) + " " + to_string(k));
+        }
+        k++;
+      }
+      line_vec.clear();
+      float position_x = out_sensor_objects->objects[i]->anchor_point[0];
+      float position_y = out_sensor_objects->objects[i]->anchor_point[1];
+      float position_z = out_sensor_objects->objects[i]->max_height + 0.2;
+
+      struct Point point_label;
+      point_label.x = position_x;
+      point_label.y = position_y;
+      point_label.z = position_z;
+      string label_information = "label: " + label_id + "\n" +
+                                 "track_id: " + to_string(track_id) + "\n" + 
+                                 "speed: " + to_string(speed) + "\n" + 
+                                 "age: " + to_string(age);
+
+      viewer->addText3D(label_information, point_label, 0.1, 1.0, 0.0, 0.0, to_string(i + 1000));
+
+      viewer->addCube(px_min, px_max, py_min, py_max, pz_min,
+                    pz_max, Color(1, 0, 0).r, Color(1, 0, 0).g,
+                    Color(1, 0, 0).b, cube);
+      viewer->setShapeRenderingProperties(
+          pcl::visualization::PCL_VISUALIZER_REPRESENTATION,
+          pcl::visualization::PCL_VISUALIZER_REPRESENTATION_WIREFRAME, cube);
+      viewer->setShapeRenderingProperties(
+          pcl::visualization::PCL_VISUALIZER_COLOR, Color(1, 0, 0).r,
+          Color(1, 0, 0).g, Color(1, 0, 0).b, cube);
+      viewer->setShapeRenderingProperties(
+          pcl::visualization::PCL_VISUALIZER_OPACITY, 1.0, cube);
+
+      viewer->addCube(px_min, px_max, py_min, py_max, pz_min,
+                    pz_max, Color(1, 0, 0).r, Color(1, 0, 0).g,
+                    Color(1, 0, 0).b, cubeFill);
+      viewer->setShapeRenderingProperties(
+          pcl::visualization::PCL_VISUALIZER_REPRESENTATION,
+          pcl::visualization::PCL_VISUALIZER_REPRESENTATION_SURFACE, cubeFill);
+      viewer->setShapeRenderingProperties(
+          pcl::visualization::PCL_VISUALIZER_COLOR, Color(1, 0, 0).r,
+          Color(1, 0, 0).g, Color(1, 0, 0).b, cubeFill);
+      viewer->setShapeRenderingProperties(
+          pcl::visualization::PCL_VISUALIZER_OPACITY, 0.3, cubeFill);
+    }
+  }
+
+  obst_writer->Write(msg_obstacles);
+  viewer->spinOnce();	
+  delete[] points_array;
+  return true;
+}
+
+void PointPillarsDetection::CloudToArray(const pcl::PointCloud<pcl::PointXYZI>::Ptr& pc_ptr, 
+                                         float* out_points_array, 
+                                         float normalizing_factor) {
+  // timestamp_ = absl::ToUnixMicros(Clock::Now());
+  for (size_t i = 0; i < pc_ptr->size(); i++) {
+    pcl::PointXYZI point = pc_ptr->at(i);
+    // if(point.z < z_min_ || point.z > z_max_ || point.y < y_min_ || point.y > y_max_ || point.x < x_min_ ||
+    //     point.x > x_max_) {
+    //   continue;
+    // }
+    out_points_array[i * num_point_feature + 0] = point.x;
+    out_points_array[i * num_point_feature + 1] = point.y;
+    out_points_array[i * num_point_feature + 2] = point.z;
+    // out_points_array[i * num_point_feature + 3] = float(point.intensity) / float(normalizing_factor);
+    out_points_array[i * num_point_feature + 3] = static_cast<float>(point.intensity / normalizing_factor);
+    out_points_array[i * num_point_feature + 4] = static_cast<float>(points_timestamp_[i]);
+    // out_points_array[i * num_point_feature + 4] = static_cast<float>(timestamp_);
+    // out_points_array[i * num_point_feature + 4] = 0;
+  }
+}
+
+}  // namespace lidar
+}  // namespace perception
+}  // namespace apollo
diff --git a/modules/perception/lidar_point_pillars/point_pillars_detection.h b/modules/perception/lidar_point_pillars/point_pillars_detection.h
new file mode 100755
index 0000000..d7e0d5c
--- /dev/null
+++ b/modules/perception/lidar_point_pillars/point_pillars_detection.h
@@ -0,0 +1,128 @@
+#pragma once
+
+#include <iostream>
+#include <string>
+#include <vector>
+#include <algorithm>
+#include <exception>
+#include <chrono>
+#include <thread>
+
+#include "pcl/common/common.h"
+#include "pcl/point_cloud.h"
+#include "pcl/point_types.h"
+#include "pcl/io/pcd_io.h"
+#include "pcl/filters/crop_box.h"
+#include "pcl/filters/extract_indices.h"
+#include "pcl/filters/voxel_grid.h"
+#include "pcl/visualization/pcl_visualizer.h"
+#include "pcl/segmentation/sac_segmentation.h"
+// #include "pcl/visualization/cloud_viewer.h"
+#include "Eigen/Core"
+
+#include "cyber/cyber.h"
+#include "cyber/timer/timer.h"
+#include "modules/common/time/time.h"
+
+#include "modules/drivers/proto/pointcloud.pb.h"
+#include "modules/perception/lidar_point_pillars/point_pillars.h"
+#include "modules/perception/proto/obst_box.pb.h"
+#include "modules/perception/proto/lidar_pointcloud_conf.pb.h"
+#include "modules/perception/lidar_pointcloud_tracking/hm_tracker.h"
+#include "modules/perception/lidar_pointcloud_tracking/common/object.h"
+#include "modules/perception/lidar_pointcloud_tracking/min_box.h"
+
+using namespace std;
+using apollo::cyber::Component;
+using apollo::cyber::ComponentBase;
+using apollo::common::time::Clock;
+
+namespace apollo {
+namespace perception {
+namespace lidar {
+
+class PointPillarsDetection: public Component<apollo::drivers::PointCloud, apollo::drivers::PointCloud>{
+ public:
+  bool Init() override;
+  bool Proc(const std::shared_ptr<apollo::drivers::PointCloud>& msg1, 
+            const std::shared_ptr<apollo::drivers::PointCloud>& msg2) override;
+
+ private:
+
+   pcl::PointCloud<pcl::PointXYZI>::Ptr receive_and_voxel(const std::shared_ptr<apollo::drivers::PointCloud>& msg);
+   void CloudToArray(const pcl::PointCloud<pcl::PointXYZI>::Ptr& pc_ptr, 
+                     float* out_points_array, 
+                     float normalizing_factor);
+//    void GetObjects(std::vector<std::shared_ptr<base::Object>>* objects,
+//                    const Eigen::Affine3d& pose, std::vector<float>* detections,
+//                    std::vector<int>* labels);
+   boost::shared_ptr<pcl::visualization::PCLVisualizer> viewer;
+
+   std::unique_ptr<PointPillars> point_pillars_ptr_;
+   std::vector<double> points_timestamp_;
+
+   float x_min_;
+   float x_max_;
+   float y_min_;
+   float y_max_;
+   float z_min_;
+   float z_max_;
+   bool reproduce_result_mode = false;
+   bool enable_ground_removal = false;
+   float ground_removal_height = -1.5;
+   float score_threshold = 0.5;
+   float nms_overlap_threshold = 0.5;
+
+   string pfe_torch_file = "/apollo/modules/perception/models/point_pillars/pfe.pt";
+   string rpn_onnx_file = "/apollo/modules/perception/models/point_pillars/rpn.onnx";
+
+   int num_point_feature = 5;
+   int max_num_points = std::numeric_limits<int>::max();
+
+   float normalizing_factor = 255.0;
+   int num_output_box_feature = 7;
+   int gpu_id = 0;
+   bool enable_downsample_beams = false;
+   int downsample_beams_factor = 4;
+   bool enable_downsample_pointcloud = false;
+   float downsample_voxel_size_x = 0.1;
+   float downsample_voxel_size_y = 0.1;
+   float downsample_voxel_size_z = 0.1;
+   float distanceThreshold = 0.2;
+
+   struct Color {
+     float r, g, b;
+
+     Color(float setR, float setG, float setB) : r(setR), g(setG), b(setB) {}
+   };
+
+   struct Box {
+     float x_min;
+     float y_min;
+     float z_min;
+     float x_max;
+     float y_max;
+     float z_max;
+   };
+
+  struct Point {
+     float x;
+     float y;
+     float z;
+  };
+
+   Eigen::Vector4f minpoint;
+   Eigen::Vector4f maxpoint;
+   std::shared_ptr<apollo::cyber::Writer<apollo::perception::Obstacles>> obst_writer;
+   apollo::perception::LidarPointcloudConf lidar_pointcloud_conf_;
+
+   //tracker
+   std::unique_ptr<apollo::perception::MinBoxObjectBuilder> object_builder_;
+   std::unique_ptr<apollo::perception::HmObjectTracker> tracker_;
+   double timestamp_;  
+   apollo::perception::SeqId seq_num_;
+};  // class PointPillarsDetection
+CYBER_REGISTER_COMPONENT(PointPillarsDetection)
+}  // namespace lidar
+}  // namespace perception
+}  // namespace apollo
diff --git a/modules/perception/lidar_point_pillars/point_pillars_test.cc b/modules/perception/lidar_point_pillars/point_pillars_test.cc
new file mode 100755
index 0000000..1b4f5df
--- /dev/null
+++ b/modules/perception/lidar_point_pillars/point_pillars_test.cc
@@ -0,0 +1,640 @@
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "pcl/io/pcd_io.h"
+#include "pcl/point_types.h"
+
+#include "modules/perception/common/perception_gflags.h"
+#include "modules/perception/lidar/lib/detection/lidar_point_pillars/point_pillars.h"
+#include "modules/perception/lidar/lib/detection/lidar_point_pillars/preprocess_points.h"
+#include "modules/perception/tool/benchmark/lidar/util/io_util.h"
+
+namespace apollo {
+namespace perception {
+namespace lidar {
+
+class TestSuite : public ::testing::Test {
+ public:
+  TestSuite() {}
+  ~TestSuite() {}
+};
+
+class TestClass {
+ public:
+  TestClass();
+  TestClass(const int num_class, const int max_num_pillars,
+            const int max_num_points_per_pillar, const int num_point_feature,
+            const int grid_x_size, const int grid_y_size, const int grid_z_size,
+            const float pillar_x_size, const float pillar_y_size,
+            const float pillar_z_size, const float min_x_range,
+            const float min_y_range, const float min_z_range,
+            const int num_inds_for_scan, const int num_threads);
+  const int num_class;
+  const int max_num_pillars;
+  const int max_num_points_per_pillar;
+  const int num_point_feature;
+  const int grid_x_size;
+  const int grid_y_size;
+  const int grid_z_size;
+  const float pillar_x_size;
+  const float pillar_y_size;
+  const float pillar_z_size;
+  const float min_x_range;
+  const float min_y_range;
+  const float min_z_range;
+  const int num_inds_for_scan;
+  const int num_threads;
+
+  // Make pointcloud for test
+  void MakePointsForTest(pcl::PointCloud<pcl::PointXYZI>::Ptr in_pcl_pc_ptr);
+  void PclToArray(const pcl::PointCloud<pcl::PointXYZI>::Ptr& in_pcl_pc_ptr,
+                  float* out_points_array,
+                  const float normalizing_factor = 1.0);
+  void PclXYZITToArray(
+      const pcl::PointCloud<pcl::PointXYZI>::Ptr& in_pcl_pc_ptr,
+      float* out_points_array, const float normalizing_factor = 1.0);
+  void Preprocess(const float* in_points_array, int in_num_points, int* x_coors,
+                  int* y_coors, float* num_points_per_pillar,
+                  float* pillar_point_feature, float* pillar_coors,
+                  float* sparse_pillar_map, int* host_pillar_count);
+  void PreprocessGPU(const float* in_points_array, int in_num_points,
+                     int* x_coors, int* y_coors, float* num_points_per_pillar,
+                     float* pillar_point_feature, float* pillar_coors,
+                     int* sparse_pillar_map, int* host_pillar_count);
+  void GenerateAnchors(float* anchors_px, float* anchors_py, float* anchors_pz,
+                       float* anchors_dx, float* anchors_dy, float* anchors_dz,
+                       float* anchors_ro);
+  void ConvertAnchors2BoxAnchors(float* anchors_px, float* anchors_py,
+                                 float* box_anchors_min_x,
+                                 float* box_anchors_min_y,
+                                 float* box_anchors_max_x,
+                                 float* box_anchors_max_y);
+  void DoInference(const float* in_points_array, const int in_num_points,
+                   std::vector<float>* out_detections,
+                   std::vector<int>* out_labels);
+
+ private:
+  std::unique_ptr<PreprocessPoints> preprocess_points_ptr_;
+  std::unique_ptr<PreprocessPointsCuda> preprocess_points_cuda_ptr_;
+  std::unique_ptr<PointPillars> point_pillars_ptr_;
+};
+
+TestClass::TestClass()
+    : num_class(3),
+      max_num_pillars(12000),
+      max_num_points_per_pillar(100),
+      num_point_feature(4),
+      grid_x_size(280),
+      grid_y_size(320),
+      grid_z_size(1),
+      pillar_x_size(0.25),
+      pillar_y_size(0.25),
+      pillar_z_size(4.0),
+      min_x_range(0),
+      min_y_range(-40.0),
+      min_z_range(-3.0),
+      num_inds_for_scan(512),
+      num_threads(64) {
+  preprocess_points_ptr_.reset(new PreprocessPoints(
+      max_num_pillars, max_num_points_per_pillar, num_point_feature,
+      grid_x_size, grid_y_size, grid_z_size, pillar_x_size, pillar_y_size,
+      pillar_z_size, min_x_range, min_y_range, min_z_range, num_inds_for_scan));
+  preprocess_points_cuda_ptr_.reset(new PreprocessPointsCuda(
+      num_threads, max_num_pillars, max_num_points_per_pillar,
+      num_point_feature, num_inds_for_scan, grid_x_size, grid_y_size,
+      grid_z_size, pillar_x_size, pillar_y_size, pillar_z_size, min_x_range,
+      min_y_range, min_z_range));
+
+  bool reproduce_result_mode = false;
+  float score_threshold = 0.5;
+  float nms_overlap_threshold = 0.5;
+
+  point_pillars_ptr_.reset(new PointPillars(
+      reproduce_result_mode, score_threshold, nms_overlap_threshold,
+      FLAGS_pfe_torch_file, FLAGS_rpn_onnx_file));
+}
+
+TestClass::TestClass(const int num_class, const int max_num_pillars,
+                     const int max_num_points_per_pillar,
+                     const int num_point_feature, const int grid_x_size,
+                     const int grid_y_size, const int grid_z_size,
+                     const float pillar_x_size, const float pillar_y_size,
+                     const float pillar_z_size, const float min_x_range,
+                     const float min_y_range, const float min_z_range,
+                     const int num_inds_for_scan, const int num_threads)
+    : num_class(num_class),
+      max_num_pillars(max_num_pillars),
+      max_num_points_per_pillar(max_num_points_per_pillar),
+      num_point_feature(num_point_feature),
+      grid_x_size(grid_x_size),
+      grid_y_size(grid_y_size),
+      grid_z_size(grid_z_size),
+      pillar_x_size(pillar_x_size),
+      pillar_y_size(pillar_y_size),
+      pillar_z_size(pillar_z_size),
+      min_x_range(min_x_range),
+      min_y_range(min_y_range),
+      min_z_range(min_z_range),
+      num_inds_for_scan(num_inds_for_scan),
+      num_threads(num_threads) {
+  preprocess_points_ptr_.reset(new PreprocessPoints(
+      max_num_pillars, max_num_points_per_pillar, num_point_feature,
+      grid_x_size, grid_y_size, grid_z_size, pillar_x_size, pillar_y_size,
+      pillar_z_size, min_x_range, min_y_range, min_z_range, num_inds_for_scan));
+  preprocess_points_cuda_ptr_.reset(new PreprocessPointsCuda(
+      num_threads, max_num_pillars, max_num_points_per_pillar,
+      num_point_feature, num_inds_for_scan, grid_x_size, grid_y_size,
+      grid_z_size, pillar_x_size, pillar_y_size, pillar_z_size, min_x_range,
+      min_y_range, min_z_range));
+
+  bool reproduce_result_mode = false;
+  float score_threshold = 0.5;
+  float nms_overlap_threshold = 0.5;
+
+  point_pillars_ptr_.reset(new PointPillars(
+      reproduce_result_mode, score_threshold, nms_overlap_threshold,
+      FLAGS_pfe_torch_file, FLAGS_rpn_onnx_file));
+}
+
+void TestClass::Preprocess(const float* in_points_array, int in_num_points,
+                           int* x_coors, int* y_coors,
+                           float* num_points_per_pillar,
+                           float* pillar_point_feature, float* pillar_coors,
+                           float* sparse_pillar_map, int* host_pillar_count) {
+  preprocess_points_ptr_->Preprocess(
+      in_points_array, in_num_points, x_coors, y_coors, num_points_per_pillar,
+      pillar_point_feature, pillar_coors, sparse_pillar_map, host_pillar_count);
+}
+
+void TestClass::PreprocessGPU(const float* in_points_array, int in_num_points,
+                              int* x_coors, int* y_coors,
+                              float* num_points_per_pillar,
+                              float* pillar_point_feature, float* pillar_coors,
+                              int* sparse_pillar_map, int* host_pillar_count) {
+  preprocess_points_cuda_ptr_->DoPreprocessPointsCuda(
+      in_points_array, in_num_points, x_coors, y_coors, num_points_per_pillar,
+      pillar_point_feature, pillar_coors, sparse_pillar_map, host_pillar_count);
+}
+
+void TestClass::PclToArray(
+    const pcl::PointCloud<pcl::PointXYZI>::Ptr& in_pcl_pc_ptr,
+    float* out_points_array, const float normalizing_factor) {
+  for (size_t i = 0; i < in_pcl_pc_ptr->size(); ++i) {
+    pcl::PointXYZI point = in_pcl_pc_ptr->at(i);
+    out_points_array[i * 4 + 0] = point.x;
+    out_points_array[i * 4 + 1] = point.y;
+    out_points_array[i * 4 + 2] = point.z;
+    out_points_array[i * 4 + 3] =
+        static_cast<float>(point.intensity / normalizing_factor);
+  }
+}
+
+void TestClass::PclXYZITToArray(
+    const pcl::PointCloud<pcl::PointXYZI>::Ptr& in_pcl_pc_ptr,
+    float* out_points_array, const float normalizing_factor) {
+  for (size_t i = 0; i < in_pcl_pc_ptr->size(); ++i) {
+    pcl::PointXYZI point = in_pcl_pc_ptr->at(i);
+    out_points_array[i * 5 + 0] = point.x;
+    out_points_array[i * 5 + 1] = point.y;
+    out_points_array[i * 5 + 2] = point.z;
+    out_points_array[i * 5 + 3] =
+        static_cast<float>(point.intensity / normalizing_factor);
+    out_points_array[i * 5 + 4] = 0;
+  }
+}
+
+void TestClass::MakePointsForTest(
+    pcl::PointCloud<pcl::PointXYZI>::Ptr in_pcl_pc_ptr) {
+  pcl::PointXYZI point;
+  point.x = 12.9892;
+  point.y = -9.98058;
+  point.z = 0;
+  point.intensity = 4;
+  in_pcl_pc_ptr->push_back(point);
+  point.x = 11.8697;
+  point.y = -11.123;
+  point.z = -0.189377;
+  point.intensity = 35;
+  in_pcl_pc_ptr->push_back(point);
+  point.x = 12.489;
+  point.y = -9.59703;
+  point.z = -2.15565;
+  point.intensity = 11;
+  in_pcl_pc_ptr->push_back(point);
+  point.x = 12.9084;
+  point.y = -10.9626;
+  point.z = -2.15565;
+  point.intensity = 11;
+  in_pcl_pc_ptr->push_back(point);
+  point.x = 13.8676;
+  point.y = -9.61668;
+  point.z = 0.0980819;
+  point.intensity = 14;
+  in_pcl_pc_ptr->push_back(point);
+  point.x = 13.5673;
+  point.y = -12.9834;
+  point.z = 0.21862;
+  point.intensity = 1;
+  in_pcl_pc_ptr->push_back(point);
+  point.x = 13.8213;
+  point.y = -10.8529;
+  point.z = -1.22883;
+  point.intensity = 19;
+  in_pcl_pc_ptr->push_back(point);
+  point.x = 11.8957;
+  point.y = -10.3189;
+  point.z = -1.28556;
+  point.intensity = 13;
+  in_pcl_pc_ptr->push_back(point);
+}
+
+void TestClass::GenerateAnchors(float* anchors_px, float* anchors_py,
+                                float* anchors_pz, float* anchors_dx,
+                                float* anchors_dy, float* anchors_dz,
+                                float* anchors_ro) {
+  return point_pillars_ptr_->GenerateAnchors(anchors_px, anchors_py, anchors_pz,
+                                             anchors_dx, anchors_dy, anchors_dz,
+                                             anchors_ro);
+}
+
+void TestClass::ConvertAnchors2BoxAnchors(float* anchors_px, float* anchors_py,
+                                          float* box_anchors_min_x,
+                                          float* box_anchors_min_y,
+                                          float* box_anchors_max_x,
+                                          float* box_anchors_max_y) {
+  return point_pillars_ptr_->ConvertAnchors2BoxAnchors(
+      anchors_px, anchors_py, box_anchors_min_x, box_anchors_min_y,
+      box_anchors_max_x, box_anchors_max_y);
+}
+
+void TestClass::DoInference(const float* in_points_array,
+                            const int in_num_points,
+                            std::vector<float>* out_detections,
+                            std::vector<int>* out_labels) {
+  return point_pillars_ptr_->DoInference(in_points_array, in_num_points,
+                                         out_detections, out_labels);
+}
+
+TEST(TestSuite, CheckPreprocessPointsCPU) {
+  const int kNumClass = 1;
+  const int kMaxNumPillars = 12000;
+  const int kMaxNumPointsPerPillar = 100;
+  const int kNumPointFeature = 4;
+  const int kGridXSize = 432;
+  const int kGridYSize = 496;
+  const int kGridZSize = 1;
+  const float kPillarXSize = 0.16;
+  const float kPillarYSize = 0.16;
+  const float kPillarZSize = 4.0;
+  const float kMinXRange = 0;
+  const float kMinYRange = -39.68;
+  const float kMinZRange = -3.0;
+  const int kNumIndsForScan = 512;
+  const int kNumThreads = 64;
+  TestClass test_obj(kNumClass, kMaxNumPillars, kMaxNumPointsPerPillar,
+                     kNumPointFeature, kGridXSize, kGridYSize, kGridZSize,
+                     kPillarXSize, kPillarYSize, kPillarZSize, kMinXRange,
+                     kMinYRange, kMinZRange, kNumIndsForScan, kNumThreads);
+
+  pcl::PointCloud<pcl::PointXYZI>::Ptr pcl_pc_ptr(
+      new pcl::PointCloud<pcl::PointXYZI>);
+  test_obj.MakePointsForTest(pcl_pc_ptr);
+
+  float* points_array = new float[pcl_pc_ptr->size() * 4];
+  test_obj.PclToArray(pcl_pc_ptr, points_array);
+
+  int x_coors[kMaxNumPillars] = {};
+  int y_coors[kMaxNumPillars] = {};
+  float num_points_per_pillar[kMaxNumPillars] = {};
+
+  float* pillar_point_feature =
+      new float[test_obj.max_num_pillars * test_obj.max_num_points_per_pillar *
+                test_obj.num_point_feature];
+  float* pillar_coors = new float[test_obj.max_num_pillars * 4];
+  float* sparse_pillar_map = new float[kNumIndsForScan * kNumIndsForScan];
+
+  int host_pillar_count[1] = {0};
+  test_obj.Preprocess(points_array, pcl_pc_ptr->size(), x_coors, y_coors,
+                      num_points_per_pillar, pillar_point_feature, pillar_coors,
+                      sparse_pillar_map, host_pillar_count);
+  EXPECT_EQ(1, num_points_per_pillar[0]);
+  EXPECT_FLOAT_EQ(12.9892, pillar_point_feature[0]);
+  EXPECT_EQ(74, x_coors[1]);
+  EXPECT_EQ(178, y_coors[1]);
+  EXPECT_EQ(1, sparse_pillar_map[178 * 512 + 74]);
+  EXPECT_EQ(8, host_pillar_count[0]);
+
+  delete[] points_array;
+  delete[] pillar_point_feature;
+  delete[] pillar_coors;
+  delete[] sparse_pillar_map;
+}
+
+TEST(TestSuite, CheckPreprocessGPU) {
+  const int kNumClass = 1;
+  const int kMaxNumPillars = 12000;
+  const int kMaxNumPointsPerPillar = 100;
+  const int kNumPointFeature = 4;
+  const int kGridXSize = 432;
+  const int kGridYSize = 496;
+  const int kGridZSize = 1;
+  const float kPillarXSize = 0.16;
+  const float kPillarYSize = 0.16;
+  const float kPillarZSize = 4.0;
+  const float kMinXRange = 0;
+  const float kMinYRange = -39.68;
+  const float kMinZRange = -3.0;
+  const int kNumIndsForScan = 512;
+  const int kNumThreads = 64;
+  const float kNormalizingFactor = 255.0;
+  TestClass test_obj(kNumClass, kMaxNumPillars, kMaxNumPointsPerPillar,
+                     kNumPointFeature, kGridXSize, kGridYSize, kGridZSize,
+                     kPillarXSize, kPillarYSize, kPillarZSize, kMinXRange,
+                     kMinYRange, kMinZRange, kNumIndsForScan, kNumThreads);
+
+  pcl::PointCloud<pcl::PointXYZI>::Ptr pcl_pc_ptr(
+      new pcl::PointCloud<pcl::PointXYZI>);
+  apollo::perception::benchmark::PointCloudPtr org_cloud_ptr(
+      new pcl::PointCloud<apollo::perception::benchmark::PointXYZIL>);
+  std::string file_name =
+      "/apollo/modules/perception/testdata/lidar/app/data/0001_00.pcd";
+
+  bool ret = apollo::perception::benchmark::load_pcl_pcds_xyzit(file_name,
+                                                                org_cloud_ptr);
+  ASSERT_TRUE(ret) << "Failed to load pcd file: " << file_name;
+
+  for (size_t i = 0; i < org_cloud_ptr->size(); ++i) {
+    pcl::PointXYZI point;
+    point.x = org_cloud_ptr->at(i).x;
+    point.y = org_cloud_ptr->at(i).y;
+    point.z = org_cloud_ptr->at(i).z;
+    point.intensity = org_cloud_ptr->at(i).intensity;
+    pcl_pc_ptr->push_back(point);
+  }
+  int in_num_points = pcl_pc_ptr->size();
+  float* points_array = new float[pcl_pc_ptr->size() * 4];
+  test_obj.PclToArray(pcl_pc_ptr, points_array, kNormalizingFactor);
+
+  float* dev_points;
+  int* dev_x_coors;
+  int* dev_y_coors;
+  float* dev_num_points_per_pillar;
+  int* dev_sparse_pillar_map;
+  float* dev_pillar_point_feature;
+  float* dev_pillar_coors;
+  int host_pillar_count[1] = {};
+  GPU_CHECK(cudaMalloc(reinterpret_cast<void**>(&dev_points),
+                       in_num_points * kNumPointFeature * sizeof(float)));
+  GPU_CHECK(cudaMalloc(reinterpret_cast<void**>(&dev_x_coors),
+                       kMaxNumPillars * sizeof(int)));
+  GPU_CHECK(cudaMalloc(reinterpret_cast<void**>(&dev_y_coors),
+                       kMaxNumPillars * sizeof(int)));
+  GPU_CHECK(cudaMalloc(reinterpret_cast<void**>(&dev_num_points_per_pillar),
+                       kMaxNumPillars * sizeof(float)));
+  GPU_CHECK(cudaMalloc(reinterpret_cast<void**>(&dev_sparse_pillar_map),
+                       kNumIndsForScan * kNumIndsForScan * sizeof(int)));
+  GPU_CHECK(cudaMalloc(reinterpret_cast<void**>(&dev_pillar_point_feature),
+                       kMaxNumPillars * kMaxNumPointsPerPillar *
+                           kNumPointFeature * sizeof(float)));
+  GPU_CHECK(cudaMalloc(reinterpret_cast<void**>(&dev_pillar_coors),
+                       kMaxNumPillars * 4 * sizeof(float)));
+
+  GPU_CHECK(cudaMemcpy(dev_points, points_array,
+                       in_num_points * kNumPointFeature * sizeof(float),
+                       cudaMemcpyHostToDevice));
+  GPU_CHECK(cudaMemset(dev_x_coors, 0, kMaxNumPillars * sizeof(int)));
+  GPU_CHECK(cudaMemset(dev_y_coors, 0, kMaxNumPillars * sizeof(int)));
+  GPU_CHECK(
+      cudaMemset(dev_num_points_per_pillar, 0, kMaxNumPillars * sizeof(float)));
+  GPU_CHECK(cudaMemset(dev_pillar_point_feature, 0,
+                       kMaxNumPillars * kMaxNumPointsPerPillar *
+                           kNumPointFeature * sizeof(float)));
+  GPU_CHECK(
+      cudaMemset(dev_pillar_coors, 0, kMaxNumPillars * 4 * sizeof(float)));
+  GPU_CHECK(cudaMemset(dev_sparse_pillar_map, 0,
+                       kNumIndsForScan * kNumIndsForScan * sizeof(int)));
+
+  test_obj.PreprocessGPU(dev_points, in_num_points, dev_x_coors, dev_y_coors,
+                         dev_num_points_per_pillar, dev_pillar_point_feature,
+                         dev_pillar_coors, dev_sparse_pillar_map,
+                         host_pillar_count);
+
+  int* x_coors = new int[kMaxNumPillars];
+  int* y_coors = new int[kMaxNumPillars];
+  float* num_points_per_pillar = new float[kMaxNumPillars];
+  int* sparse_pillar_map = new int[kNumIndsForScan * kNumIndsForScan];
+  float* pillar_point_feature =
+      new float[kMaxNumPillars * kMaxNumPointsPerPillar * kNumPointFeature];
+  float* pillar_coors = new float[kMaxNumPillars * 4];
+  GPU_CHECK(cudaMemcpy(x_coors, dev_x_coors, kMaxNumPillars * sizeof(int),
+                       cudaMemcpyDeviceToHost));
+  GPU_CHECK(cudaMemcpy(y_coors, dev_y_coors, kMaxNumPillars * sizeof(int),
+                       cudaMemcpyDeviceToHost));
+  GPU_CHECK(cudaMemcpy(num_points_per_pillar, dev_num_points_per_pillar,
+                       kMaxNumPillars * sizeof(float), cudaMemcpyDeviceToHost));
+  GPU_CHECK(cudaMemcpy(sparse_pillar_map, dev_sparse_pillar_map,
+                       kNumIndsForScan * kNumIndsForScan * sizeof(int),
+                       cudaMemcpyDeviceToHost));
+  GPU_CHECK(cudaMemcpy(pillar_point_feature, dev_pillar_point_feature,
+                       kMaxNumPillars * kMaxNumPointsPerPillar *
+                           kNumPointFeature * sizeof(float),
+                       cudaMemcpyDeviceToHost));
+  GPU_CHECK(cudaMemcpy(pillar_coors, dev_pillar_coors,
+                       kMaxNumPillars * 4 * sizeof(float),
+                       cudaMemcpyDeviceToHost));
+
+  EXPECT_EQ(x_coors[320], static_cast<int>(pillar_coors[1283]));
+  EXPECT_EQ(y_coors[44], static_cast<int>(pillar_coors[178]));
+  EXPECT_GT(pillar_coors[10930 * 4 + 2], 0.9);
+  EXPECT_GT(pillar_coors[10930 * 4 + 3], 0.9);
+  EXPECT_EQ(pillar_coors[10931 * 4 + 2], 0);
+  EXPECT_EQ(pillar_coors[10931 * 4 + 3], 0);
+  EXPECT_GT(num_points_per_pillar[10930], 0.9);
+  EXPECT_EQ(num_points_per_pillar[10931], 0);
+  EXPECT_EQ(host_pillar_count[0], 10931);
+
+  GPU_CHECK(cudaFree(dev_points));
+  GPU_CHECK(cudaFree(dev_x_coors));
+  GPU_CHECK(cudaFree(dev_y_coors));
+  GPU_CHECK(cudaFree(dev_num_points_per_pillar));
+  GPU_CHECK(cudaFree(dev_sparse_pillar_map));
+  GPU_CHECK(cudaFree(dev_pillar_point_feature));
+  GPU_CHECK(cudaFree(dev_pillar_coors));
+
+  delete[] points_array;
+  delete[] x_coors;
+  delete[] y_coors;
+  delete[] num_points_per_pillar;
+  delete[] sparse_pillar_map;
+  delete[] pillar_point_feature;
+  delete[] pillar_coors;
+}
+/*
+
+TEST(TestSuite, CheckGenerateAnchors) {
+  const int kNumClass = 1;
+  const int kMaxNumPillars = 12000;
+  const int kMaxNumPointsPerPillar = 100;
+  const int kNumPointFeature = 4;
+  const int kGridXSize = 432;
+  const int kGridYSize = 496;
+  const int kGridZSize = 1;
+  const float kPillarXSize = 0.16;
+  const float kPillarYSize = 0.16;
+  const float kPillarZSize = 4.0;
+  const float kMinXRange = 0;
+  const float kMinYRange = -39.68;
+  const float kMinZRange = -3.0;
+  const int kNumIndsForScan = 512;
+  const int kNumThreads = 64;
+  TestClass test_obj(kNumClass, kMaxNumPillars, kMaxNumPointsPerPillar,
+                     kNumPointFeature, kGridXSize, kGridYSize, kGridZSize,
+                     kPillarXSize, kPillarYSize, kPillarZSize, kMinXRange,
+                     kMinYRange, kMinZRange, kNumIndsForScan, kNumThreads);
+
+  const int kNumAnchor = 432 * 0.5 * 496 * 0.5 * 2;
+  float* anchors_px = new float[kNumAnchor];
+  float* anchors_py = new float[kNumAnchor];
+  float* anchors_pz = new float[kNumAnchor];
+  float* anchors_dx = new float[kNumAnchor];
+  float* anchors_dy = new float[kNumAnchor];
+  float* anchors_dz = new float[kNumAnchor];
+  float* anchors_ro = new float[kNumAnchor];
+  test_obj.GenerateAnchors(anchors_px, anchors_py, anchors_pz, anchors_dx,
+                           anchors_dy, anchors_dz, anchors_ro);
+
+  EXPECT_NEAR(0.48, anchors_px[3], 0.001);
+  EXPECT_NEAR(-39.52, anchors_py[109], 0.001);
+  EXPECT_NEAR(-1.73, anchors_pz[76], 0.001);
+  EXPECT_NEAR(1.6, anchors_dx[338], 0.001);
+  EXPECT_NEAR(3.9, anchors_dy[22], 0.001);
+  EXPECT_NEAR(1.56, anchors_dz[993], 0.001);
+  EXPECT_NEAR(1.5708, anchors_ro[1765], 0.001);
+
+  delete[] anchors_px;
+  delete[] anchors_py;
+  delete[] anchors_pz;
+  delete[] anchors_dx;
+  delete[] anchors_dy;
+  delete[] anchors_dz;
+  delete[] anchors_ro;
+}
+
+TEST(TestSuite, CheckGenerateBoxAnchors) {
+  const int kNumClass = 1;
+  const int kMaxNumPillars = 12000;
+  const int kMaxNumPointsPerPillar = 100;
+  const int kNumPointFeature = 4;
+  const int kGridXSize = 432;
+  const int kGridYSize = 496;
+  const int kGridZSize = 1;
+  const float kPillarXSize = 0.16;
+  const float kPillarYSize = 0.16;
+  const float kPillarZSize = 4.0;
+  const float kMinXRange = 0;
+  const float kMinYRange = -39.68;
+  const float kMinZRange = -3.0;
+  const int kNumIndsForScan = 512;
+  const int kNumThreads = 64;
+  TestClass test_obj(kNumClass, kMaxNumPillars, kMaxNumPointsPerPillar,
+                     kNumPointFeature, kGridXSize, kGridYSize, kGridZSize,
+                     kPillarXSize, kPillarYSize, kPillarZSize, kMinXRange,
+                     kMinYRange, kMinZRange, kNumIndsForScan, kNumThreads);
+
+  const int kNumAnchor = 432 * 0.5 * 496 * 0.5 * 2;
+
+  float* anchors_px = new float[kNumAnchor];
+  float* anchors_py = new float[kNumAnchor];
+  float* anchors_pz = new float[kNumAnchor];
+  float* anchors_dx = new float[kNumAnchor];
+  float* anchors_dy = new float[kNumAnchor];
+  float* anchors_dz = new float[kNumAnchor];
+  float* anchors_ro = new float[kNumAnchor];
+  float* box_anchors_min_x = new float[kNumAnchor];
+  float* box_anchors_min_y = new float[kNumAnchor];
+  float* box_anchors_max_x = new float[kNumAnchor];
+  float* box_anchors_max_y = new float[kNumAnchor];
+  test_obj.GenerateAnchors(anchors_px, anchors_py, anchors_pz, anchors_dx,
+                           anchors_dy, anchors_dz, anchors_ro);
+  test_obj.ConvertAnchors2BoxAnchors(anchors_px, anchors_py, box_anchors_min_x,
+                                     box_anchors_min_y, box_anchors_max_x,
+                                     box_anchors_max_y);
+
+  EXPECT_NEAR(53.25, box_anchors_min_x[345], 0.001);
+  EXPECT_NEAR(-41.47, box_anchors_min_y[22], 0.001);
+  EXPECT_NEAR(38.4, box_anchors_max_x[1098], 0.001);
+  EXPECT_NEAR(-38.4, box_anchors_max_y[675], 0.001);
+
+  delete[] anchors_px;
+  delete[] anchors_py;
+  delete[] anchors_pz;
+  delete[] anchors_dx;
+  delete[] anchors_dy;
+  delete[] anchors_dz;
+  delete[] anchors_ro;
+  delete[] box_anchors_min_x;
+  delete[] box_anchors_min_y;
+  delete[] box_anchors_max_x;
+  delete[] box_anchors_max_y;
+}*/
+
+TEST(TestSuite, CheckDoInference) {
+  const int kNumPointFeature = 5;
+  const int kOutputNumBoxFeature = 7;
+  const float kNormalizingFactor = 255.0;
+  TestClass test_obj;
+
+  pcl::PointCloud<pcl::PointXYZI>::Ptr pcl_pc_ptr(
+      new pcl::PointCloud<pcl::PointXYZI>);
+  apollo::perception::benchmark::PointCloudPtr org_cloud_ptr(
+      new pcl::PointCloud<apollo::perception::benchmark::PointXYZIL>);
+  std::string file_name =
+      "/apollo/modules/perception/testdata/lidar/app/data/0001_00.pcd";
+
+  bool ret = apollo::perception::benchmark::load_pcl_pcds_xyzit(file_name,
+                                                                org_cloud_ptr);
+  ASSERT_TRUE(ret) << "Failed to load pcd file: " << file_name;
+
+  for (size_t i = 0; i < org_cloud_ptr->size(); ++i) {
+    pcl::PointXYZI point;
+    point.x = org_cloud_ptr->at(i).x;
+    point.y = org_cloud_ptr->at(i).y;
+    point.z = org_cloud_ptr->at(i).z;
+    point.intensity = org_cloud_ptr->at(i).intensity;
+    pcl_pc_ptr->push_back(point);
+  }
+  float* points_array = new float[pcl_pc_ptr->size() * kNumPointFeature];
+  test_obj.PclXYZITToArray(pcl_pc_ptr, points_array, kNormalizingFactor);
+
+  std::vector<float> out_detections;
+  std::vector<int> out_labels;
+  test_obj.DoInference(points_array, pcl_pc_ptr->size(), &out_detections,
+                       &out_labels);
+
+  int num_objects = out_detections.size() / kOutputNumBoxFeature;
+  EXPECT_GE(num_objects, 10);
+  EXPECT_EQ(num_objects, out_labels.size());
+
+  for (int j = 0; j < num_objects; ++j) {
+    float x = out_detections.at(j * kOutputNumBoxFeature + 0);
+    float y = out_detections.at(j * kOutputNumBoxFeature + 1);
+    float z = out_detections.at(j * kOutputNumBoxFeature + 2);
+    float dx = out_detections.at(j * kOutputNumBoxFeature + 4);
+    float dy = out_detections.at(j * kOutputNumBoxFeature + 3);
+    float dz = out_detections.at(j * kOutputNumBoxFeature + 5);
+    float yaw = out_detections.at(j * kOutputNumBoxFeature + 6);
+    yaw += M_PI / 2;
+    yaw = std::atan2(std::sin(yaw), std::cos(yaw));
+    yaw = -yaw;
+
+    int label = out_labels.at(j);
+    std::cout << "object id: " << j << ", x: " << x << ", y: " << y
+              << ", z: " << z << ", dx: " << dx << ", dy: " << dy
+              << ", dz: " << dz << ", yaw: " << yaw << ", label: " << label
+              << std::endl;
+  }
+}
+
+}  // namespace lidar
+}  // namespace perception
+}  // namespace apollo
diff --git a/modules/perception/lidar_point_pillars/postprocess_cuda.cu b/modules/perception/lidar_point_pillars/postprocess_cuda.cu
new file mode 100755
index 0000000..4fe648c
--- /dev/null
+++ b/modules/perception/lidar_point_pillars/postprocess_cuda.cu
@@ -0,0 +1,282 @@
+// headers in CUDA
+#include <thrust/sort.h>
+
+// headers in local files
+#include "modules/perception/lidar_point_pillars/postprocess_cuda.h"
+
+namespace apollo {
+namespace perception {
+namespace lidar {
+
+__global__ void filter_kernel(
+    const float* box_preds, const float* cls_preds, const float* dir_preds,
+    const int* anchor_mask, const float* dev_anchors_px,
+    const float* dev_anchors_py, const float* dev_anchors_pz,
+    const float* dev_anchors_dx, const float* dev_anchors_dy,
+    const float* dev_anchors_dz, const float* dev_anchors_ro,
+    float* filtered_box, float* filtered_score, int* filtered_label,
+    int* filtered_dir, float* box_for_nms, int* filter_count,
+    const float float_min, const float float_max, const float score_threshold,
+    const int num_box_corners, const int num_output_box_feature,
+    const int num_class) {
+  // boxes ([N, 7] Tensor): normal boxes: x, y, z, w, l, h, r
+  int tid = threadIdx.x + blockIdx.x * blockDim.x;
+  // sigmoid function
+  float top_score = 0;
+  int top_label = 0;
+  for (int i = 0; i < num_class; ++i) {
+    float score = 1 / (1 + expf(-cls_preds[tid * num_class + i]));
+    if (score > top_score) {
+      top_score = score;
+      top_label = i;
+    }
+  }
+  if (anchor_mask[tid] == 1 && top_score > score_threshold) {
+    int counter = atomicAdd(filter_count, 1);
+    float za = dev_anchors_pz[tid] + dev_anchors_dz[tid] / 2;
+
+    // decode network output
+    float diagonal = sqrtf(dev_anchors_dx[tid] * dev_anchors_dx[tid] +
+                           dev_anchors_dy[tid] * dev_anchors_dy[tid]);
+    float box_px = box_preds[tid * num_output_box_feature + 0] * diagonal +
+                   dev_anchors_px[tid];
+    float box_py = box_preds[tid * num_output_box_feature + 1] * diagonal +
+                   dev_anchors_py[tid];
+    float box_pz =
+        box_preds[tid * num_output_box_feature + 2] * dev_anchors_dz[tid] + za;
+    float box_dx =
+        expf(box_preds[tid * num_output_box_feature + 3]) * dev_anchors_dx[tid];
+    float box_dy =
+        expf(box_preds[tid * num_output_box_feature + 4]) * dev_anchors_dy[tid];
+    float box_dz =
+        expf(box_preds[tid * num_output_box_feature + 5]) * dev_anchors_dz[tid];
+    float box_ro =
+        box_preds[tid * num_output_box_feature + 6] + dev_anchors_ro[tid];
+
+    box_pz = box_pz - box_dz / 2;
+
+    filtered_box[counter * num_output_box_feature + 0] = box_px;
+    filtered_box[counter * num_output_box_feature + 1] = box_py;
+    filtered_box[counter * num_output_box_feature + 2] = box_pz;
+    filtered_box[counter * num_output_box_feature + 3] = box_dx;
+    filtered_box[counter * num_output_box_feature + 4] = box_dy;
+    filtered_box[counter * num_output_box_feature + 5] = box_dz;
+    filtered_box[counter * num_output_box_feature + 6] = box_ro;
+    filtered_score[counter] = top_score;
+    filtered_label[counter] = top_label;
+
+    int direction_label;
+    if (dir_preds[tid * 2 + 0] < dir_preds[tid * 2 + 1]) {
+      direction_label = 1;
+    } else {
+      direction_label = 0;
+    }
+    filtered_dir[counter] = direction_label;
+
+    // convrt normal box(normal boxes: x, y, z, w, l, h, r) to box(xmin, ymin,
+    // xmax, ymax) for nms calculation First: dx, dy -> box(x0y0, x0y1, x1y0,
+    // x1y1)
+    float corners[NUM_3D_BOX_CORNERS_MACRO] = {
+        static_cast<float>(-0.5 * box_dx), static_cast<float>(-0.5 * box_dy),
+        static_cast<float>(-0.5 * box_dx), static_cast<float>(0.5 * box_dy),
+        static_cast<float>(0.5 * box_dx),  static_cast<float>(0.5 * box_dy),
+        static_cast<float>(0.5 * box_dx),  static_cast<float>(-0.5 * box_dy)};
+
+    // Second: Rotate, Offset and convert to point(xmin. ymin, xmax, ymax)
+    float rotated_corners[NUM_3D_BOX_CORNERS_MACRO];
+    float offset_corners[NUM_3D_BOX_CORNERS_MACRO];
+    float sin_yaw = sinf(box_ro);
+    float cos_yaw = cosf(box_ro);
+    float xmin = float_max;
+    float ymin = float_max;
+    float xmax = float_min;
+    float ymax = float_min;
+    for (size_t i = 0; i < num_box_corners; ++i) {
+      rotated_corners[i * 2 + 0] =
+          cos_yaw * corners[i * 2 + 0] - sin_yaw * corners[i * 2 + 1];
+      rotated_corners[i * 2 + 1] =
+          sin_yaw * corners[i * 2 + 0] + cos_yaw * corners[i * 2 + 1];
+
+      offset_corners[i * 2 + 0] = rotated_corners[i * 2 + 0] + box_px;
+      offset_corners[i * 2 + 1] = rotated_corners[i * 2 + 1] + box_py;
+
+      xmin = fminf(xmin, offset_corners[i * 2 + 0]);
+      ymin = fminf(ymin, offset_corners[i * 2 + 1]);
+      xmax = fmaxf(xmin, offset_corners[i * 2 + 0]);
+      ymax = fmaxf(ymax, offset_corners[i * 2 + 1]);
+    }
+    // box_for_nms(num_box, 4)
+    box_for_nms[counter * num_box_corners + 0] = xmin;
+    box_for_nms[counter * num_box_corners + 1] = ymin;
+    box_for_nms[counter * num_box_corners + 2] = xmax;
+    box_for_nms[counter * num_box_corners + 3] = ymax;
+  }
+}
+
+__global__ void sort_boxes_by_indexes_kernel(
+    float* filtered_box, int* filtered_label, int* filtered_dir,
+    float* box_for_nms, int* indexes, int filter_count,
+    float* sorted_filtered_boxes, int* sorted_filtered_label,
+    int* sorted_filtered_dir, float* sorted_box_for_nms,
+    const int num_box_corners, const int num_output_box_feature) {
+  int tid = threadIdx.x + blockIdx.x * blockDim.x;
+  if (tid < filter_count) {
+    int sort_index = indexes[tid];
+    sorted_filtered_boxes[tid * num_output_box_feature + 0] =
+        filtered_box[sort_index * num_output_box_feature + 0];
+    sorted_filtered_boxes[tid * num_output_box_feature + 1] =
+        filtered_box[sort_index * num_output_box_feature + 1];
+    sorted_filtered_boxes[tid * num_output_box_feature + 2] =
+        filtered_box[sort_index * num_output_box_feature + 2];
+    sorted_filtered_boxes[tid * num_output_box_feature + 3] =
+        filtered_box[sort_index * num_output_box_feature + 3];
+    sorted_filtered_boxes[tid * num_output_box_feature + 4] =
+        filtered_box[sort_index * num_output_box_feature + 4];
+    sorted_filtered_boxes[tid * num_output_box_feature + 5] =
+        filtered_box[sort_index * num_output_box_feature + 5];
+    sorted_filtered_boxes[tid * num_output_box_feature + 6] =
+        filtered_box[sort_index * num_output_box_feature + 6];
+
+    sorted_filtered_label[tid] = filtered_label[sort_index];
+
+    sorted_filtered_dir[tid] = filtered_dir[sort_index];
+
+    sorted_box_for_nms[tid * num_box_corners + 0] =
+        box_for_nms[sort_index * num_box_corners + 0];
+    sorted_box_for_nms[tid * num_box_corners + 1] =
+        box_for_nms[sort_index * num_box_corners + 1];
+    sorted_box_for_nms[tid * num_box_corners + 2] =
+        box_for_nms[sort_index * num_box_corners + 2];
+    sorted_box_for_nms[tid * num_box_corners + 3] =
+        box_for_nms[sort_index * num_box_corners + 3];
+  }
+}
+
+PostprocessCuda::PostprocessCuda(const float float_min, const float float_max,
+                                 const int num_anchor, const int num_class,
+                                 const float score_threshold,
+                                 const int num_threads,
+                                 const float nms_overlap_threshold,
+                                 const int num_box_corners,
+                                 const int num_output_box_feature)
+    : float_min_(float_min),
+      float_max_(float_max),
+      num_anchor_(num_anchor),
+      num_class_(num_class),
+      score_threshold_(score_threshold),
+      num_threads_(num_threads),
+      nms_overlap_threshold_(nms_overlap_threshold),
+      num_box_corners_(num_box_corners),
+      num_output_box_feature_(num_output_box_feature) {
+  nms_cuda_ptr_.reset(
+      new NmsCuda(num_threads, num_box_corners, nms_overlap_threshold));
+}
+
+void PostprocessCuda::DoPostprocessCuda(
+    const float* rpn_box_output, const float* rpn_cls_output,
+    const float* rpn_dir_output, int* dev_anchor_mask,
+    const float* dev_anchors_px, const float* dev_anchors_py,
+    const float* dev_anchors_pz, const float* dev_anchors_dx,
+    const float* dev_anchors_dy, const float* dev_anchors_dz,
+    const float* dev_anchors_ro, float* dev_filtered_box,
+    float* dev_filtered_score, int* dev_filtered_label, int* dev_filtered_dir,
+    float* dev_box_for_nms, int* dev_filter_count,
+    std::vector<float>* out_detection, std::vector<int>* out_label) {
+  const int num_blocks_filter_kernel = DIVUP(num_anchor_, num_threads_);
+  filter_kernel<<<num_blocks_filter_kernel, num_threads_>>>(
+      rpn_box_output, rpn_cls_output, rpn_dir_output, dev_anchor_mask,
+      dev_anchors_px, dev_anchors_py, dev_anchors_pz, dev_anchors_dx,
+      dev_anchors_dy, dev_anchors_dz, dev_anchors_ro, dev_filtered_box,
+      dev_filtered_score, dev_filtered_label, dev_filtered_dir, dev_box_for_nms,
+      dev_filter_count, float_min_, float_max_, score_threshold_,
+      num_box_corners_, num_output_box_feature_, num_class_);
+
+  int host_filter_count[1] = {0};
+  GPU_CHECK(cudaMemcpy(host_filter_count, dev_filter_count, sizeof(int),
+                       cudaMemcpyDeviceToHost));
+  if (host_filter_count[0] == 0) {
+    return;
+  }
+
+  int* dev_indexes;
+  float *dev_sorted_filtered_box, *dev_sorted_box_for_nms;
+  int *dev_sorted_filtered_label, *dev_sorted_filtered_dir;
+  GPU_CHECK(cudaMalloc(reinterpret_cast<void**>(&dev_indexes),
+                       host_filter_count[0] * sizeof(int)));
+  GPU_CHECK(cudaMalloc(
+      reinterpret_cast<void**>(&dev_sorted_filtered_box),
+      num_output_box_feature_ * host_filter_count[0] * sizeof(float)));
+  GPU_CHECK(cudaMalloc(reinterpret_cast<void**>(&dev_sorted_filtered_label),
+                       host_filter_count[0] * sizeof(int)));
+  GPU_CHECK(cudaMalloc(reinterpret_cast<void**>(&dev_sorted_filtered_dir),
+                       host_filter_count[0] * sizeof(int)));
+  GPU_CHECK(
+      cudaMalloc(reinterpret_cast<void**>(&dev_sorted_box_for_nms),
+                 num_box_corners_ * host_filter_count[0] * sizeof(float)));
+  thrust::sequence(thrust::device, dev_indexes,
+                   dev_indexes + host_filter_count[0]);
+  thrust::sort_by_key(thrust::device, dev_filtered_score,
+                      dev_filtered_score + size_t(host_filter_count[0]),
+                      dev_indexes, thrust::greater<float>());
+
+  const int num_blocks = DIVUP(host_filter_count[0], num_threads_);
+  sort_boxes_by_indexes_kernel<<<num_blocks, num_threads_>>>(
+      dev_filtered_box, dev_filtered_label, dev_filtered_dir, dev_box_for_nms,
+      dev_indexes, host_filter_count[0], dev_sorted_filtered_box,
+      dev_sorted_filtered_label, dev_sorted_filtered_dir,
+      dev_sorted_box_for_nms, num_box_corners_, num_output_box_feature_);
+
+  int keep_inds[host_filter_count[0]];
+  memset(keep_inds, 0, host_filter_count[0] * sizeof(int));
+  int out_num_objects = 0;
+  nms_cuda_ptr_->DoNmsCuda(host_filter_count[0], dev_sorted_box_for_nms,
+                           keep_inds, &out_num_objects);
+
+  float host_filtered_box[host_filter_count[0] * num_output_box_feature_];
+  int host_filtered_label[host_filter_count[0]];
+  int host_filtered_dir[host_filter_count[0]];
+  GPU_CHECK(
+      cudaMemcpy(host_filtered_box, dev_sorted_filtered_box,
+                 num_output_box_feature_ * host_filter_count[0] * sizeof(float),
+                 cudaMemcpyDeviceToHost));
+  GPU_CHECK(cudaMemcpy(host_filtered_label, dev_sorted_filtered_label,
+                       host_filter_count[0] * sizeof(int),
+                       cudaMemcpyDeviceToHost));
+  GPU_CHECK(cudaMemcpy(host_filtered_dir, dev_sorted_filtered_dir,
+                       host_filter_count[0] * sizeof(int),
+                       cudaMemcpyDeviceToHost));
+  for (size_t i = 0; i < out_num_objects; ++i) {
+    out_detection->push_back(
+        host_filtered_box[keep_inds[i] * num_output_box_feature_ + 0]);
+    out_detection->push_back(
+        host_filtered_box[keep_inds[i] * num_output_box_feature_ + 1]);
+    out_detection->push_back(
+        host_filtered_box[keep_inds[i] * num_output_box_feature_ + 2]);
+    out_detection->push_back(
+        host_filtered_box[keep_inds[i] * num_output_box_feature_ + 3]);
+    out_detection->push_back(
+        host_filtered_box[keep_inds[i] * num_output_box_feature_ + 4]);
+    out_detection->push_back(
+        host_filtered_box[keep_inds[i] * num_output_box_feature_ + 5]);
+
+    if (host_filtered_dir[keep_inds[i]] == 0) {
+      out_detection->push_back(
+          host_filtered_box[keep_inds[i] * num_output_box_feature_ + 6] + M_PI);
+    } else {
+      out_detection->push_back(
+          host_filtered_box[keep_inds[i] * num_output_box_feature_ + 6]);
+    }
+
+    out_label->push_back(host_filtered_label[keep_inds[i]]);
+  }
+
+  GPU_CHECK(cudaFree(dev_indexes));
+  GPU_CHECK(cudaFree(dev_sorted_filtered_box));
+  GPU_CHECK(cudaFree(dev_sorted_filtered_label));
+  GPU_CHECK(cudaFree(dev_sorted_filtered_dir));
+  GPU_CHECK(cudaFree(dev_sorted_box_for_nms));
+}
+
+}  // namespace lidar
+}  // namespace perception
+}  // namespace apollo
diff --git a/modules/perception/lidar_point_pillars/postprocess_cuda.h b/modules/perception/lidar_point_pillars/postprocess_cuda.h
new file mode 100755
index 0000000..afe25fb
--- /dev/null
+++ b/modules/perception/lidar_point_pillars/postprocess_cuda.h
@@ -0,0 +1,88 @@
+#pragma once
+
+// headers in STL
+#include <iostream>
+#include <memory>
+#include <vector>
+
+// headers in local files
+#include "modules/perception/lidar_point_pillars/nms_cuda.h"
+
+namespace apollo {
+namespace perception {
+namespace lidar {
+
+class PostprocessCuda {
+ private:
+  const float float_min_;
+  const float float_max_;
+  const int num_anchor_;
+  const int num_class_;
+  const float score_threshold_;
+  const int num_threads_;
+  const float nms_overlap_threshold_;
+  const int num_box_corners_;
+  const int num_output_box_feature_;
+
+  std::unique_ptr<NmsCuda> nms_cuda_ptr_;
+
+ public:
+  /**
+   * @brief Constructor
+   * @param[in] float_min The lowest float value
+   * @param[in] float_max The maximum float value
+   * @param[in] num_anchor Number of anchors in total
+   * @param[in] num_class Number of object's classes
+   * @param[in] score_threshold Score threshold for filtering output
+   * @param[in] num_threads Number of threads when launching cuda kernel
+   * @param[in] nms_overlap_threshold IOU threshold for NMS
+   * @param[in] num_box_corners Number of box's corner
+   * @param[in] num_output_box_feature Number of output box's feature
+   * @details Captital variables never change after the compile, non-capital
+   * variables could be changed through rosparam
+   */
+  PostprocessCuda(const float float_min, const float float_max,
+                  const int num_anchor, const int num_class,
+                  const float score_threshold, const int num_threads,
+                  const float nms_overlap_threshold, const int num_box_corners,
+                  const int num_output_box_feature);
+
+  /**
+   * @brief Postprocessing for the network output
+   * @param[in] rpn_box_output Box predictions from the network output
+   * @param[in] rpn_cls_output Class predictions from the network output
+   * @param[in] rpn_dir_output Direction predictions from the network output
+   * @param[in] dev_anchor_mask Anchor mask for filtering the network output
+   * @param[in] dev_anchors_px X-coordinate values for corresponding anchors
+   * @param[in] dev_anchors_py Y-coordinate values for corresponding anchors
+   * @param[in] dev_anchors_pz Z-coordinate values for corresponding anchors
+   * @param[in] dev_anchors_dx X-dimension values for corresponding anchors
+   * @param[in] dev_anchors_dy Y-dimension values for corresponding anchors
+   * @param[in] dev_anchors_dz Z-dimension values for corresponding anchors
+   * @param[in] dev_anchors_ro Rotation values for corresponding anchors
+   * @param[in] dev_filtered_box Filtered box predictions
+   * @param[in] dev_filtered_score Filtered score predictions
+   * @param[in] dev_filtered_label Filtered label predictions
+   * @param[in] dev_filtered_dir Filtered direction predictions
+   * @param[in] dev_box_for_nms Decoded boxes in min_x min_y max_x max_y
+   * represenation from pose and dimension
+   * @param[in] dev_filter_count The number of filtered output
+   * @param[out] out_detection Output bounding boxes
+   * @param[out] out_label Output labels of objects
+   * @details dev_* represents device memory allocated variables
+   */
+  void DoPostprocessCuda(
+      const float* rpn_box_output, const float* rpn_cls_output,
+      const float* rpn_dir_output, int* dev_anchor_mask,
+      const float* dev_anchors_px, const float* dev_anchors_py,
+      const float* dev_anchors_pz, const float* dev_anchors_dx,
+      const float* dev_anchors_dy, const float* dev_anchors_dz,
+      const float* dev_anchors_ro, float* dev_filtered_box,
+      float* dev_filtered_score, int* dev_filtered_label, int* dev_filtered_dir,
+      float* dev_box_for_nms, int* dev_filter_count,
+      std::vector<float>* out_detection, std::vector<int>* out_label);
+};
+
+}  // namespace lidar
+}  // namespace perception
+}  // namespace apollo
diff --git a/modules/perception/lidar_point_pillars/preprocess_points.cc b/modules/perception/lidar_point_pillars/preprocess_points.cc
new file mode 100755
index 0000000..7fc3cb0
--- /dev/null
+++ b/modules/perception/lidar_point_pillars/preprocess_points.cc
@@ -0,0 +1,130 @@
+// headers in STL
+#include <cmath>
+#include <iostream>
+
+// headers in local files
+#include "modules/perception/lidar_point_pillars/preprocess_points.h"
+
+namespace apollo {
+namespace perception {
+namespace lidar {
+
+PreprocessPoints::PreprocessPoints(
+    const int max_num_pillars, const int max_points_per_pillar,
+    const int num_point_feature, const int grid_x_size, const int grid_y_size,
+    const int grid_z_size, const float pillar_x_size, const float pillar_y_size,
+    const float pillar_z_size, const float min_x_range, const float min_y_range,
+    const float min_z_range, const int num_inds_for_scan)
+    : max_num_pillars_(max_num_pillars),
+      max_num_points_per_pillar_(max_points_per_pillar),
+      num_point_feature_(num_point_feature),
+      grid_x_size_(grid_x_size),
+      grid_y_size_(grid_y_size),
+      grid_z_size_(grid_z_size),
+      pillar_x_size_(pillar_x_size),
+      pillar_y_size_(pillar_y_size),
+      pillar_z_size_(pillar_z_size),
+      min_x_range_(min_x_range),
+      min_y_range_(min_y_range),
+      min_z_range_(min_z_range),
+      num_inds_for_scan_(num_inds_for_scan) {}
+
+void PreprocessPoints::InitializeVariables(int* coor_to_pillaridx,
+                                           float* sparse_pillar_map,
+                                           float* pillar_point_feature,
+                                           float* pillar_coors) {
+  for (int i = 0; i < grid_y_size_; ++i) {
+    for (int j = 0; j < grid_x_size_; ++j) {
+      coor_to_pillaridx[i * grid_x_size_ + j] = -1;
+    }
+  }
+
+  for (int i = 0; i < num_inds_for_scan_; ++i) {
+    for (int j = 0; j < num_inds_for_scan_; ++j) {
+      sparse_pillar_map[i * num_inds_for_scan_ + j] = 0;
+    }
+  }
+
+  for (int i = 0;
+       i < max_num_pillars_ * max_num_points_per_pillar_ * num_point_feature_;
+       ++i) {
+    pillar_point_feature[i] = 0;
+  }
+
+  for (int i = 0; i < max_num_pillars_ * 4; ++i) {
+    pillar_coors[i] = 0;
+  }
+}
+
+void PreprocessPoints::Preprocess(const float* in_points_array,
+                                  int in_num_points, int* x_coors, int* y_coors,
+                                  float* num_points_per_pillar,
+                                  float* pillar_point_feature,
+                                  float* pillar_coors, float* sparse_pillar_map,
+                                  int* host_pillar_count) {
+  int pillar_count = 0;
+  // init variables
+  int* coor_to_pillaridx = new int[grid_y_size_ * grid_x_size_];
+  InitializeVariables(coor_to_pillaridx, sparse_pillar_map,
+                      pillar_point_feature, pillar_coors);
+  for (int i = 0; i < in_num_points; ++i) {
+    int x_coor = std::floor(
+        (in_points_array[i * num_point_feature_ + 0] - min_x_range_) /
+        pillar_x_size_);
+    int y_coor = std::floor(
+        (in_points_array[i * num_point_feature_ + 1] - min_y_range_) /
+        pillar_y_size_);
+    int z_coor = std::floor(
+        (in_points_array[i * num_point_feature_ + 2] - min_z_range_) /
+        pillar_z_size_);
+    if (x_coor < 0 || x_coor >= grid_x_size_ || y_coor < 0 ||
+        y_coor >= grid_y_size_ || z_coor < 0 || z_coor >= grid_z_size_) {
+      continue;
+    }
+    // reverse index
+    int pillar_index = coor_to_pillaridx[y_coor * grid_x_size_ + x_coor];
+    if (pillar_index == -1) {
+      pillar_index = pillar_count;
+      if (pillar_count >= max_num_pillars_) {
+        break;
+      }
+      pillar_count += 1;
+      coor_to_pillaridx[y_coor * grid_x_size_ + x_coor] = pillar_index;
+
+      y_coors[pillar_index] = std::floor(y_coor);
+      x_coors[pillar_index] = std::floor(x_coor);
+
+      sparse_pillar_map[y_coor * num_inds_for_scan_ + x_coor] = 1;
+    }
+    int num = num_points_per_pillar[pillar_index];
+    if (num < max_num_points_per_pillar_) {
+      for (int j = 0; j < num_point_feature_; ++j) {
+        pillar_point_feature[pillar_index * max_num_points_per_pillar_ *
+                                 num_point_feature_ +
+                             num * num_point_feature_ + j] =
+            in_points_array[i * num_point_feature_ + j];
+      }
+      num_points_per_pillar[pillar_index] += 1;
+    }
+  }
+
+  for (int i = 0; i < max_num_pillars_; ++i) {
+    float x = 0;
+    float y = 0;
+    if (i < pillar_count) {
+      x = static_cast<float>(x_coors[i]);
+      y = static_cast<float>(y_coors[i]);
+    }
+    pillar_coors[i * 4 + 0] = 0;  // batch idx, but currently it is useless
+    pillar_coors[i * 4 + 1] = 0;  // z
+    pillar_coors[i * 4 + 2] = y;
+    pillar_coors[i * 4 + 3] = x;
+  }
+  host_pillar_count[0] = pillar_count;
+
+  delete[] coor_to_pillaridx;
+}
+
+}  // namespace lidar
+}  // namespace perception
+}  // namespace apollo
diff --git a/modules/perception/lidar_point_pillars/preprocess_points.h b/modules/perception/lidar_point_pillars/preprocess_points.h
new file mode 100755
index 0000000..908c089
--- /dev/null
+++ b/modules/perception/lidar_point_pillars/preprocess_points.h
@@ -0,0 +1,86 @@
+#pragma once
+
+namespace apollo {
+namespace perception {
+namespace lidar {
+
+class PreprocessPoints {
+ private:
+  friend class TestClass;
+  const int max_num_pillars_;
+  const int max_num_points_per_pillar_;
+  const int num_point_feature_;
+  const int grid_x_size_;
+  const int grid_y_size_;
+  const int grid_z_size_;
+  const float pillar_x_size_;
+  const float pillar_y_size_;
+  const float pillar_z_size_;
+  const float min_x_range_;
+  const float min_y_range_;
+  const float min_z_range_;
+  const int num_inds_for_scan_;
+
+ public:
+  /**
+   * @brief Constructor
+   * @param[in] max_num_pillars Maximum number of pillars
+   * @param[in] max_points_per_pillar Maximum number of points per pillar
+   * @param[in] num_point_feature Number of features in a point
+   * @param[in] grid_x_size Number of pillars in x-coordinate
+   * @param[in] grid_y_size Number of pillars in y-coordinate
+   * @param[in] grid_z_size Number of pillars in z-coordinate
+   * @param[in] pillar_x_size Size of x-dimension for a pillar
+   * @param[in] pillar_y_size Size of y-dimension for a pillar
+   * @param[in] pillar_z_size Size of z-dimension for a pillar
+   * @param[in] min_x_range Minimum x value for pointcloud
+   * @param[in] min_y_range Minimum y value for pointcloud
+   * @param[in] min_z_range Minimum z value for pointcloud
+   * @param[in] num_inds_for_scan Number of indexes for scan(cumsum)
+   * @details Captital variables never change after the compile
+   */
+  PreprocessPoints(const int max_num_pillars, const int max_points_per_pillar,
+                   const int num_point_feature, const int grid_x_size,
+                   const int grid_y_size, const int grid_z_size,
+                   const float pillar_x_size, const float pillar_y_size,
+                   const float pillar_z_size, const float min_x_range,
+                   const float min_y_range, const float min_z_range,
+                   const int num_inds_for_scan);
+
+  /**
+   * @brief CPU preprocessing for input pointcloud
+   * @param[in] in_points_array Pointcloud array
+   * @param[in] in_num_points The number of points
+   * @param[in] x_coors X-coordinate indexes for corresponding pillars
+   * @param[in] y_coors Y-coordinate indexes for corresponding pillars
+   * @param[in] num_points_per_pillar Number of points in corresponding pillars
+   * @param[in] pillar_point_feature Values for features of points in each
+   * pillar
+   * @param[in] pillar_coors Array for coors of pillars
+   * @param[in] sparse_pillar_map Grid map representation for pillar-occupancy
+   * @param[in] host_pillar_count The numnber of valid pillars for the input
+   * pointcloud
+   * @details Convert pointcloud to pillar representation
+   */
+  void Preprocess(const float* in_points_array, int in_num_points, int* x_coors,
+                  int* y_coors, float* num_points_per_pillar,
+                  float* pillar_point_feature, float* pillar_coors,
+                  float* sparse_pillar_map, int* host_pillar_count);
+
+  /**
+   * @brief Initializing variables for preprocessing
+   * @param[in] coor_to_pillaridx Map for converting one set of coordinate to a
+   * pillar
+   * @param[in] sparse_pillar_map Grid map representation for pillar-occupancy
+   * @param[in] pillar_point_feature Values for features of points in each
+   * pillar
+   * @param[in] pillar_coors Array for coors of pillars
+   * @details Initializeing input arguments with certain values
+   */
+  void InitializeVariables(int* coor_to_pillaridx, float* sparse_pillar_map,
+                           float* pillar_point_feature, float* pillar_coors);
+};
+
+}  // namespace lidar
+}  // namespace perception
+}  // namespace apollo
diff --git a/modules/perception/lidar_point_pillars/preprocess_points_cuda.cu b/modules/perception/lidar_point_pillars/preprocess_points_cuda.cu
new file mode 100755
index 0000000..a087bb1
--- /dev/null
+++ b/modules/perception/lidar_point_pillars/preprocess_points_cuda.cu
@@ -0,0 +1,178 @@
+// headers in local files
+#include "modules/perception/lidar_point_pillars/common.h"
+#include "modules/perception/lidar_point_pillars/preprocess_points_cuda.h"
+
+namespace apollo {
+namespace perception {
+namespace lidar {
+
+__global__ void make_pillar_histo_kernel(
+    const float* dev_points, float* dev_pillar_point_feature_in_coors,
+    int* pillar_count_histo, const int num_points,
+    const int max_points_per_pillar, const int grid_x_size,
+    const int grid_y_size, const int grid_z_size, const float min_x_range,
+    const float min_y_range, const float min_z_range, const float pillar_x_size,
+    const float pillar_y_size, const float pillar_z_size,
+    const int num_point_feature) {
+  int th_i = threadIdx.x + blockIdx.x * blockDim.x;
+  if (th_i >= num_points) {
+    return;
+  }
+  int y_coor = floor((dev_points[th_i * num_point_feature + 1] - min_y_range) /
+                     pillar_y_size);
+  int x_coor = floor((dev_points[th_i * num_point_feature + 0] - min_x_range) /
+                     pillar_x_size);
+  int z_coor = floor((dev_points[th_i * num_point_feature + 2] - min_z_range) /
+                     pillar_z_size);
+
+  if (x_coor >= 0 && x_coor < grid_x_size && y_coor >= 0 &&
+      y_coor < grid_y_size && z_coor >= 0 && z_coor < grid_z_size) {
+    int count =
+        atomicAdd(&pillar_count_histo[y_coor * grid_x_size + x_coor], 1);
+    if (count < max_points_per_pillar) {
+      int ind =
+          y_coor * grid_x_size * max_points_per_pillar * num_point_feature +
+          x_coor * max_points_per_pillar * num_point_feature +
+          count * num_point_feature;
+      for (int i = 0; i < num_point_feature; ++i) {
+        dev_pillar_point_feature_in_coors[ind + i] =
+            dev_points[th_i * num_point_feature + i];
+      }
+    }
+  }
+}
+
+__global__ void make_pillar_index_kernel(
+    int* dev_pillar_count_histo, int* dev_counter, int* dev_pillar_count,
+    int* dev_x_coors, int* dev_y_coors, float* dev_num_points_per_pillar,
+    int* dev_sparse_pillar_map, const int max_pillars,
+    const int max_points_per_pillar, const int grid_x_size,
+    const int num_inds_for_scan) {
+  int x = blockIdx.x;
+  int y = threadIdx.x;
+  int num_points_at_this_pillar = dev_pillar_count_histo[y * grid_x_size + x];
+  if (num_points_at_this_pillar == 0) {
+    return;
+  }
+
+  int count = atomicAdd(dev_counter, 1);
+  if (count < max_pillars) {
+    atomicAdd(dev_pillar_count, 1);
+    if (num_points_at_this_pillar >= max_points_per_pillar) {
+      dev_num_points_per_pillar[count] = max_points_per_pillar;
+    } else {
+      dev_num_points_per_pillar[count] = num_points_at_this_pillar;
+    }
+    dev_x_coors[count] = x;
+    dev_y_coors[count] = y;
+    dev_sparse_pillar_map[y * num_inds_for_scan + x] = 1;
+  }
+}
+
+__global__ void make_pillar_feature_kernel(
+    float* dev_pillar_point_feature_in_coors, float* dev_pillar_point_feature,
+    float* dev_pillar_coors, int* dev_x_coors, int* dev_y_coors,
+    float* dev_num_points_per_pillar, const int max_points,
+    const int num_point_feature, const int grid_x_size) {
+  int ith_pillar = blockIdx.x;
+  int num_points_at_this_pillar = dev_num_points_per_pillar[ith_pillar];
+  int ith_point = threadIdx.x;
+  if (ith_point >= num_points_at_this_pillar) {
+    return;
+  }
+  int x_ind = dev_x_coors[ith_pillar];
+  int y_ind = dev_y_coors[ith_pillar];
+  int pillar_ind = ith_pillar * max_points * num_point_feature +
+                   ith_point * num_point_feature;
+  int coors_ind = y_ind * grid_x_size * max_points * num_point_feature +
+                  x_ind * max_points * num_point_feature +
+                  ith_point * num_point_feature;
+  for (int i = 0; i < num_point_feature; ++i) {
+    dev_pillar_point_feature[pillar_ind + i] =
+        dev_pillar_point_feature_in_coors[coors_ind + i];
+  }
+
+  float coor_x = static_cast<float>(x_ind);
+  float coor_y = static_cast<float>(y_ind);
+  dev_pillar_coors[ith_pillar * 4 + 0] = 0;  // batch idx
+  dev_pillar_coors[ith_pillar * 4 + 1] = 0;  // z
+  dev_pillar_coors[ith_pillar * 4 + 2] = coor_y;
+  dev_pillar_coors[ith_pillar * 4 + 3] = coor_x;
+}
+
+PreprocessPointsCuda::PreprocessPointsCuda(
+    const int num_threads, const int max_num_pillars,
+    const int max_points_per_pillar, const int num_point_feature,
+    const int num_inds_for_scan, const int grid_x_size, const int grid_y_size,
+    const int grid_z_size, const float pillar_x_size, const float pillar_y_size,
+    const float pillar_z_size, const float min_x_range, const float min_y_range,
+    const float min_z_range)
+    : num_threads_(num_threads),
+      max_num_pillars_(max_num_pillars),
+      max_num_points_per_pillar_(max_points_per_pillar),
+      num_point_feature_(num_point_feature),
+      num_inds_for_scan_(num_inds_for_scan),
+      grid_x_size_(grid_x_size),
+      grid_y_size_(grid_y_size),
+      grid_z_size_(grid_z_size),
+      pillar_x_size_(pillar_x_size),
+      pillar_y_size_(pillar_y_size),
+      pillar_z_size_(pillar_z_size),
+      min_x_range_(min_x_range),
+      min_y_range_(min_y_range),
+      min_z_range_(min_z_range) {
+  GPU_CHECK(
+      cudaMalloc(reinterpret_cast<void**>(&dev_pillar_point_feature_in_coors_),
+                 grid_y_size_ * grid_x_size_ * max_num_points_per_pillar_ *
+                     num_point_feature_ * sizeof(float)));
+  GPU_CHECK(cudaMalloc(reinterpret_cast<void**>(&dev_pillar_count_histo_),
+                       grid_y_size_ * grid_x_size_ * sizeof(int)));
+  GPU_CHECK(cudaMalloc(reinterpret_cast<void**>(&dev_counter_), sizeof(int)));
+  GPU_CHECK(
+      cudaMalloc(reinterpret_cast<void**>(&dev_pillar_count_), sizeof(int)));
+}
+
+PreprocessPointsCuda::~PreprocessPointsCuda() {
+  GPU_CHECK(cudaFree(dev_pillar_point_feature_in_coors_));
+
+  GPU_CHECK(cudaFree(dev_pillar_count_histo_));
+
+  GPU_CHECK(cudaFree(dev_counter_));
+  GPU_CHECK(cudaFree(dev_pillar_count_));
+}
+
+void PreprocessPointsCuda::DoPreprocessPointsCuda(
+    const float* dev_points, const int in_num_points, int* dev_x_coors,
+    int* dev_y_coors, float* dev_num_points_per_pillar,
+    float* dev_pillar_point_feature, float* dev_pillar_coors,
+    int* dev_sparse_pillar_map, int* host_pillar_count) {
+  GPU_CHECK(cudaMemset(dev_pillar_count_histo_, 0,
+                       grid_y_size_ * grid_x_size_ * sizeof(int)));
+  GPU_CHECK(cudaMemset(dev_counter_, 0, sizeof(int)));
+  GPU_CHECK(cudaMemset(dev_pillar_count_, 0, sizeof(int)));
+
+  int num_block = DIVUP(in_num_points, num_threads_);
+  make_pillar_histo_kernel<<<num_block, num_threads_>>>(
+      dev_points, dev_pillar_point_feature_in_coors_, dev_pillar_count_histo_,
+      in_num_points, max_num_points_per_pillar_, grid_x_size_, grid_y_size_,
+      grid_z_size_, min_x_range_, min_y_range_, min_z_range_, pillar_x_size_,
+      pillar_y_size_, pillar_z_size_, num_point_feature_);
+
+  make_pillar_index_kernel<<<grid_x_size_, grid_y_size_>>>(
+      dev_pillar_count_histo_, dev_counter_, dev_pillar_count_, dev_x_coors,
+      dev_y_coors, dev_num_points_per_pillar, dev_sparse_pillar_map,
+      max_num_pillars_, max_num_points_per_pillar_, grid_x_size_,
+      num_inds_for_scan_);
+
+  GPU_CHECK(cudaMemcpy(host_pillar_count, dev_pillar_count_, sizeof(int),
+                       cudaMemcpyDeviceToHost));
+  make_pillar_feature_kernel<<<host_pillar_count[0],
+                               max_num_points_per_pillar_>>>(
+      dev_pillar_point_feature_in_coors_, dev_pillar_point_feature,
+      dev_pillar_coors, dev_x_coors, dev_y_coors, dev_num_points_per_pillar,
+      max_num_points_per_pillar_, num_point_feature_, grid_x_size_);
+}
+
+}  // namespace lidar
+}  // namespace perception
+}  // namespace apollo
diff --git a/modules/perception/lidar_point_pillars/preprocess_points_cuda.h b/modules/perception/lidar_point_pillars/preprocess_points_cuda.h
new file mode 100755
index 0000000..3e2e12c
--- /dev/null
+++ b/modules/perception/lidar_point_pillars/preprocess_points_cuda.h
@@ -0,0 +1,89 @@
+#pragma once
+
+namespace apollo {
+namespace perception {
+namespace lidar {
+
+class PreprocessPointsCuda {
+ private:
+  // initializer list
+  const int num_threads_;
+  const int max_num_pillars_;
+  const int max_num_points_per_pillar_;
+  const int num_point_feature_;
+  const int num_inds_for_scan_;
+  const int grid_x_size_;
+  const int grid_y_size_;
+  const int grid_z_size_;
+  const float pillar_x_size_;
+  const float pillar_y_size_;
+  const float pillar_z_size_;
+  const float min_x_range_;
+  const float min_y_range_;
+  const float min_z_range_;
+  // end initializer list
+
+  float* dev_pillar_point_feature_in_coors_;
+  int* dev_pillar_count_histo_;
+
+  int* dev_counter_;
+  int* dev_pillar_count_;
+
+ public:
+  /**
+   * @brief Constructor
+   * @param[in] num_threads Number of threads when launching cuda kernel
+   * @param[in] max_num_pillars Maximum number of pillars
+   * @param[in] max_points_per_pillar Maximum number of points per pillar
+   * @param[in] num_point_feature Number of features in a point
+   * @param[in] num_inds_for_scan Number of indexes for scan(cumsum)
+   * @param[in] grid_x_size Number of pillars in x-coordinate
+   * @param[in] grid_y_size Number of pillars in y-coordinate
+   * @param[in] grid_z_size Number of pillars in z-coordinate
+   * @param[in] pillar_x_size Size of x-dimension for a pillar
+   * @param[in] pillar_y_size Size of y-dimension for a pillar
+   * @param[in] pillar_z_size Size of z-dimension for a pillar
+   * @param[in] min_x_range Minimum x value for point cloud
+   * @param[in] min_y_range Minimum y value for point cloud
+   * @param[in] min_z_range Minimum z value for point cloud
+   * @details Captital variables never change after the compile
+   */
+  PreprocessPointsCuda(const int num_threads, const int max_num_pillars,
+                       const int max_points_per_pillar,
+                       const int num_point_feature, const int num_inds_for_scan,
+                       const int grid_x_size, const int grid_y_size,
+                       const int grid_z_size, const float pillar_x_size,
+                       const float pillar_y_size, const float pillar_z_size,
+                       const float min_x_range, const float min_y_range,
+                       const float min_z_range);
+  ~PreprocessPointsCuda();
+
+  /**
+   * @brief CUDA preprocessing for input point cloud
+   * @param[in] dev_points Point cloud array
+   * @param[in] in_num_points The number of points
+   * @param[in] dev_x_coors X-coordinate indexes for corresponding pillars
+   * @param[in] dev_y_coors Y-coordinate indexes for corresponding pillars
+   * @param[in] dev_num_points_per_pillar
+   *   Number of points in corresponding pillars
+   * @param[in] pillar_point_feature
+   *   Values of point feature in each pillar
+   * @param[in] pillar_coors Array for coors of pillars
+   * @param[in] dev_sparse_pillar_map
+   *   Grid map representation for pillar-occupancy
+   * @param[in] host_pillar_count
+   *   The number of valid pillars for an input point cloud
+   * @details Convert point cloud to pillar representation
+   */
+  void DoPreprocessPointsCuda(const float* dev_points, const int in_num_points,
+                              int* dev_x_coors, int* dev_y_coors,
+                              float* dev_num_points_per_pillar,
+                              float* dev_pillar_point_feature,
+                              float* dev_pillar_coors,
+                              int* dev_sparse_pillar_map,
+                              int* host_pillar_count);
+};
+
+}  // namespace lidar
+}  // namespace perception
+}  // namespace apollo
diff --git a/modules/perception/lidar_point_pillars/scatter_cuda.cu b/modules/perception/lidar_point_pillars/scatter_cuda.cu
new file mode 100755
index 0000000..1e1e0a0
--- /dev/null
+++ b/modules/perception/lidar_point_pillars/scatter_cuda.cu
@@ -0,0 +1,38 @@
+// headers in local files
+#include "modules/perception/lidar_point_pillars/scatter_cuda.h"
+
+namespace apollo {
+namespace perception {
+namespace lidar {
+
+__global__ void scatter_kernel(int *x_coors, int *y_coors, float *pfe_output,
+                               float *scattered_feature,
+                               const int max_num_pillars_,
+                               const int grid_x_size, const int grid_y_size) {
+  int i_pillar = blockIdx.x;
+  int i_feature = threadIdx.x;
+  int x_ind = x_coors[i_pillar];
+  int y_ind = y_coors[i_pillar];
+  float feature = pfe_output[i_feature * max_num_pillars_ + i_pillar];
+  scattered_feature[i_feature * grid_y_size * grid_x_size +
+                    y_ind * grid_x_size + x_ind] = feature;
+}
+
+ScatterCuda::ScatterCuda(const int num_threads, const int max_num_pillars,
+                         const int grid_x_size, const int grid_y_size)
+    : num_threads_(num_threads),
+      max_num_pillars_(max_num_pillars),
+      grid_x_size_(grid_x_size),
+      grid_y_size_(grid_y_size) {}
+
+void ScatterCuda::DoScatterCuda(const int pillar_count, int *x_coors,
+                                int *y_coors, float *pfe_output,
+                                float *scattered_feature) {
+  scatter_kernel<<<pillar_count, num_threads_>>>(
+      x_coors, y_coors, pfe_output, scattered_feature, max_num_pillars_,
+      grid_x_size_, grid_y_size_);
+}
+
+}  // namespace lidar
+}  // namespace perception
+}  // namespace apollo
diff --git a/modules/perception/lidar_point_pillars/scatter_cuda.h b/modules/perception/lidar_point_pillars/scatter_cuda.h
new file mode 100755
index 0000000..0b35d49
--- /dev/null
+++ b/modules/perception/lidar_point_pillars/scatter_cuda.h
@@ -0,0 +1,42 @@
+#pragma once
+
+namespace apollo {
+namespace perception {
+namespace lidar {
+
+class ScatterCuda {
+ private:
+  const int num_threads_;
+  const int max_num_pillars_;
+  const int grid_x_size_;
+  const int grid_y_size_;
+
+ public:
+  /**
+   * @brief Constructor
+   * @param[in] num_threads The number of threads to launch cuda kernel
+   * @param[in] max_num_pillars Maximum number of pillars
+   * @param[in] grid_x_size Number of pillars in x-coordinate
+   * @param[in] grid_y_size Number of pillars in y-coordinate
+   * @details Captital variables never change after the compile
+   */
+  ScatterCuda(const int num_threads, const int max_num_pillars,
+              const int grid_x_size, const int grid_y_size);
+
+  /**
+   * @brief Call scatter cuda kernel
+   * @param[in] pillar_count The valid number of pillars
+   * @param[in] x_coors X-coordinate indexes for corresponding pillars
+   * @param[in] y_coors Y-coordinate indexes for corresponding pillars
+   * @param[in] pfe_output Output from Pillar Feature Extractor
+   * @param[out] scattered_feature Gridmap representation for pillars' feature
+   * @details Allocate pillars in gridmap based on index(coordinates)
+   * information
+   */
+  void DoScatterCuda(const int pillar_count, int* x_coors, int* y_coors,
+                     float* pfe_output, float* scattered_feature);
+};
+
+}  // namespace lidar
+}  // namespace perception
+}  // namespace apollo
diff --git a/modules/perception/lidar_pointcloud_tracking/BUILD b/modules/perception/lidar_pointcloud_tracking/BUILD
new file mode 100644
index 0000000..942df02
--- /dev/null
+++ b/modules/perception/lidar_pointcloud_tracking/BUILD
@@ -0,0 +1,147 @@
+load("@rules_cc//cc:defs.bzl", "cc_library", "cc_binary")
+load("@local_config_cuda//cuda:build_defs.bzl", "cuda_library")
+load("//tools:cpplint.bzl", "cpplint")
+
+package(default_visibility = ["//visibility:public"])
+
+cc_library(
+    name = "tracked_object",
+    srcs = ["tracked_object.cc"],
+    hdrs = ["tracked_object.h"],
+    deps = [
+        "//modules/perception/lidar_pointcloud_tracking/common:object",
+        "//modules/perception/lidar_pointcloud_tracking/common:geometry_util",
+    ],
+)
+
+cc_library(
+    name = "base_filter",
+    hdrs = ["base_filter.h"],
+    deps = [
+        ":tracked_object",
+        "//modules/perception/lidar_pointcloud_tracking/common:object",
+        "@eigen",
+    ],
+)
+
+cc_library(
+    name = "log",
+    hdrs = ["log.h"],
+)
+
+cc_library(
+    name = "kalman_filter",
+    srcs = ["kalman_filter.cc"],
+    hdrs = ["kalman_filter.h"],
+    deps = [
+        ":base_filter",
+        "//modules/perception/lidar_pointcloud_tracking/common:object",
+        "//modules/perception/lidar_pointcloud_tracking/common:geometry_util",
+    ],
+)
+
+cc_library(
+    name = "object_track",
+    srcs = ["object_track.cc"],
+    hdrs = ["object_track.h"],
+    deps = [
+        ":tracked_object",
+        ":base_filter",
+        ":kalman_filter",
+        "//modules/perception/lidar_pointcloud_tracking/common:object",
+        "@eigen",
+        "@boost",
+    ],
+)
+
+cc_library(
+    name = "track_object_distance",
+    srcs = ["track_object_distance.cc"],
+    hdrs = ["track_object_distance.h"],
+    deps = [
+        ":tracked_object",
+        ":object_track",
+        "//modules/perception/lidar_pointcloud_tracking/common:geometry_util",
+        "@eigen",
+    ],
+)
+
+cc_library(
+    name = "base_matcher",
+    hdrs = ["base_matcher.h"],
+    deps = [
+        ":object_track",
+        ":tracked_object",
+        "@eigen",
+    ],
+)
+
+cc_library(
+    name = "base_tracker",
+    hdrs = ["base_tracker.h"],
+    deps = [
+        "//modules/perception/lidar_pointcloud_tracking/common:object",
+        "//modules/perception/lidar_pointcloud_tracking/common:pcl_types",
+    ],
+)
+
+cc_library(
+    name = "hungarian_matcher",
+    srcs = ["hungarian_matcher.cc"],
+    hdrs = ["hungarian_matcher.h"],
+    deps = [
+        ":base_matcher",
+        "//modules/perception/lidar_pointcloud_tracking/common:geometry_util",
+        "//modules/perception/lidar_pointcloud_tracking/common:graph_util",
+        "//modules/perception/lidar_pointcloud_tracking/common:hungarian_bigraph_matcher",
+        ":track_object_distance"
+    ],
+)
+
+cc_library(
+    name = "feature_descriptor",
+    srcs = ["feature_descriptor.cc"],
+    hdrs = ["feature_descriptor.h"],
+    deps = [
+        "//modules/perception/lidar_pointcloud_tracking/common:pcl_types",
+        "@eigen",
+    ],
+)
+
+cc_library(
+    name = "hm_tracker",
+    srcs = ["hm_tracker.cc"],
+    hdrs = ["hm_tracker.h"],
+    deps = [
+        "//cyber",
+        "//modules/perception/lidar_pointcloud_tracking/common:object",
+        "//modules/perception/lidar_pointcloud_tracking/common:tracker_config",
+        "//modules/perception/lidar_pointcloud_tracking/common:geometry_util",
+        "//modules/perception/proto:tracker_config_cc_proto",
+        ":base_tracker",
+        ":base_matcher",
+        ":object_track",
+        ":tracked_object",
+        ":kalman_filter",
+        ":hungarian_matcher",
+        ":track_object_distance",
+        ":feature_descriptor",
+        ":log",
+    ],
+)
+
+cc_library(
+    name = "min_box",
+    srcs = ["min_box.cc"],
+    hdrs = ["min_box.h"],
+    deps = [
+        "//modules/perception/lidar_pointcloud_tracking/common:convex_hullxy",
+        "//modules/perception/lidar_pointcloud_tracking/common:pcl_types",
+        "//modules/perception/lidar_pointcloud_tracking/common:object",
+        "//modules/perception/lidar_pointcloud_tracking/common:base_object_builder",
+        "@pcl",
+        "@eigen",
+    ],
+)
+
+cpplint()
diff --git a/modules/perception/lidar_pointcloud_tracking/base_filter.h b/modules/perception/lidar_pointcloud_tracking/base_filter.h
new file mode 100644
index 0000000..26f337c
--- /dev/null
+++ b/modules/perception/lidar_pointcloud_tracking/base_filter.h
@@ -0,0 +1,80 @@
+#pragma once
+
+#include <memory>
+#include <string>
+
+#include "Eigen/Core"
+#include "modules/perception/lidar_pointcloud_tracking/common/object.h"
+#include "modules/perception/lidar_pointcloud_tracking/tracked_object.h"
+
+namespace apollo {
+namespace perception {
+
+class BaseFilter {
+ public:
+  typedef Object ObjectType;
+
+  BaseFilter() { name_ = "BaseFilter"; }
+  virtual ~BaseFilter() {}
+
+  // @brief initialize the state of filter
+  // @param[IN] anchor_point: initial anchor point for filtering
+  // @param[IN] velocity: initial velocity for filtering
+  // @return nothing
+  virtual void Initialize(const Eigen::Vector3f& anchor_point,
+                          const Eigen::Vector3f& velocity) = 0;
+
+  // @brief predict the state of filter
+  // @param[IN] time_diff: time interval for predicting
+  // @return predicted states of filtering
+  virtual Eigen::VectorXf Predict(const double time_diff) = 0;
+
+  // @brief update filter with object
+  // @param[IN] new_object: recently detected object for current updating
+  // @param[IN] old_object: last detected object for last updating
+  // @param[IN] time_diff: time interval from last updating
+  // @return nothing
+  virtual void UpdateWithObject(
+      const std::shared_ptr<TrackedObject>& new_object,
+      const std::shared_ptr<TrackedObject>& old_object,
+      const double time_diff) = 0;
+
+  // @brief update filter without object
+  // @param[IN] time_diff: time interval from last updating
+  // @return nothing
+  virtual void UpdateWithoutObject(const double time_diff) = 0;
+
+  // @brief get state of filter
+  // @param[OUT] anchor_point: anchor point of current state
+  // @param[OUT] velocity: velocity of current state
+  // @return nothing
+  virtual void GetState(Eigen::Vector3f* anchor_point,
+                        Eigen::Vector3f* velocity) = 0;
+
+  // @brief get state of filter with acceleration
+  // @param[OUT] anchor_point: anchor point of current state
+  // @param[OUT] velocity: velocity of current state
+  // @param[OUT] velocity_acceleration: acceleration of current state
+  // @return nothing
+  virtual void GetState(Eigen::Vector3f* anchor_point,
+                        Eigen::Vector3f* velocity,
+                        Eigen::Vector3f* velocity_acceleration) = 0;
+
+  virtual void GetAccelerationGain(Eigen::Vector3f* acceleration_gain) = 0;
+
+  // @brief get online covariance of filter
+  // @param[OUT] online_covariance: online covariance
+  // @return noting
+  virtual void GetOnlineCovariance(Eigen::Matrix3f* online_covariance) = 0;
+
+  // @brief get name of filter
+  // @return name of filter
+  std::string Name() { return name_; }
+
+ protected:
+  std::string name_;
+};  // class BaseFilter
+
+}  // namespace perception
+}  // namespace apollo
+
diff --git a/modules/perception/lidar_pointcloud_tracking/base_matcher.h b/modules/perception/lidar_pointcloud_tracking/base_matcher.h
new file mode 100644
index 0000000..0cf1817
--- /dev/null
+++ b/modules/perception/lidar_pointcloud_tracking/base_matcher.h
@@ -0,0 +1,45 @@
+#pragma once
+
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "Eigen/Core"
+#include "modules/perception/lidar_pointcloud_tracking/object_track.h"
+#include "modules/perception/lidar_pointcloud_tracking/tracked_object.h"
+
+namespace apollo {
+namespace perception {
+
+class BaseMatcher {
+ public:
+  BaseMatcher() {}
+  virtual ~BaseMatcher() {}
+
+  // @brief match detected objects to maintained tracks
+  // @param[IN] objects: detected objects
+  // @param[IN] tracks: maintained tracks
+  // @param[IN] tracks_predict: predicted states of maintained tracks
+  // @param[OUT] assignments: matched pair of <track, object>
+  // @param[OUT] unassigned_tracks: unmatched tracks
+  // @param[OUT] unassigned_objects: unmatched objects
+  // @return nothing
+  virtual void Match(std::vector<std::shared_ptr<TrackedObject>>* objects,
+                     const std::vector<ObjectTrackPtr>& tracks,
+                     const std::vector<Eigen::VectorXf>& tracks_predict,
+                     std::vector<std::pair<int, int>>* assignments,
+                     std::vector<int>* unassigned_tracks,
+                     std::vector<int>* unassigned_objects) = 0;
+
+  // @brief get name of matcher
+  // @return name of matcher
+  virtual std::string Name() const = 0;
+
+ private:
+  //DISALLOW_COPY_AND_ASSIGN(BaseMatcher);
+};  // class BaseObjectTrackMatcher
+
+}  // namespace perception
+}  // namespace apollo
+
diff --git a/modules/perception/lidar_pointcloud_tracking/base_tracker.h b/modules/perception/lidar_pointcloud_tracking/base_tracker.h
new file mode 100644
index 0000000..d09b188
--- /dev/null
+++ b/modules/perception/lidar_pointcloud_tracking/base_tracker.h
@@ -0,0 +1,80 @@
+#pragma once
+
+// SAMPLE CODE:
+//
+// class MyTracker : public BaseTracker {
+// public:
+//     MyTracker() : BaseTracker() {}
+//     virtual ~MyTracker() {}
+//
+//     virtual bool init() override {
+//         // Do something.
+//         return true;
+//     }
+//
+//     virtual bool track(
+//              const std::vector<Object>& objects,
+//              double timestamp,
+//              const TrackerOptions& options,
+//              std::vector<std::shared_ptr<Object>>* tracked_objects) override
+//              {
+//          // Do something.
+//          return true;
+//      }
+//
+//      virtual std::string name() const override {
+//          return "MyTracker";
+//      }
+//
+// };
+//
+// // Register plugin.
+// REGISTER_TRACKER(MyTracker);
+////////////////////////////////////////////////////////
+// USING CODE:
+//
+// BaseTracker* tracker =
+//          BaseTrackerRegisterer::get_instance_by_name("MyTracker");
+// using tracker to do somethings.
+// ////////////////////////////////////////////////////
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "modules/perception/lidar_pointcloud_tracking/common/pcl_types.h"
+#include "modules/perception/lidar_pointcloud_tracking/common/object.h"
+
+namespace apollo {
+namespace perception {
+
+struct TrackerOptions {
+  TrackerOptions() = default;
+  explicit TrackerOptions(Eigen::Matrix4d *pose) : velodyne_trans(pose) {}
+
+  std::shared_ptr<Eigen::Matrix4d> velodyne_trans;
+};
+
+class BaseTracker {
+ public:
+  BaseTracker() {}
+  virtual ~BaseTracker() {}
+
+  virtual bool Init() = 0;
+
+  // @brief: tracking objects.
+  // @param [in]: current frame object list.
+  // @param [in]: timestamp.
+  // @param [in]: options.
+  // @param [out]: current tracked objects.
+  virtual bool Track(const std::vector<std::shared_ptr<Object>> &objects,
+                     double timestamp, const TrackerOptions &options,
+                     std::vector<std::shared_ptr<Object>> *tracked_objects) = 0;
+
+  virtual std::string name() const = 0;
+
+};
+
+}  // namespace perception
+}  // namespace apollo
+
diff --git a/modules/perception/lidar_pointcloud_tracking/common/BUILD b/modules/perception/lidar_pointcloud_tracking/common/BUILD
new file mode 100644
index 0000000..cf1d98c
--- /dev/null
+++ b/modules/perception/lidar_pointcloud_tracking/common/BUILD
@@ -0,0 +1,72 @@
+load("@rules_cc//cc:defs.bzl", "cc_library", "cc_binary")
+load("@local_config_cuda//cuda:build_defs.bzl", "cuda_library")
+load("//tools:cpplint.bzl", "cpplint")
+
+package(default_visibility = ["//visibility:public"])
+
+cc_library(
+    name = "object",
+    srcs = ["object.cc"],
+    hdrs = ["object.h"],
+    deps = [
+        ":pcl_types",
+        "@eigen",
+    ],
+)
+
+cc_library(
+    name = "tracker_config",
+    hdrs = ["tracker_config.h"],
+)
+
+cc_library(
+    name = "base_object_builder",
+    hdrs = ["base_object_builder.h"],
+    deps = [
+        ":object",
+        ":pcl_types",
+        ":geometry_util",
+    ]
+)
+
+cc_library(
+    name = "pcl_types",
+    hdrs = ["pcl_types.h"],
+    deps = [
+        "@pcl",
+    ],
+)
+
+cc_library(
+    name = "convex_hullxy",
+    hdrs = ["convex_hullxy.h"],
+    deps = [
+        "@pcl",
+    ],
+)
+
+cc_library(
+    name = "graph_util",
+    srcs = ["graph_util.cc"],
+    hdrs = ["graph_util.h"],
+)
+
+cc_library(
+    name = "geometry_util",
+    srcs = ["geometry_util.cc"],
+    hdrs = ["geometry_util.h"],
+    deps = [
+        ":pcl_types",
+        "@eigen",
+        "@pcl",
+        "@local_config_vtk//:vtk",
+    ],
+)
+
+cc_library(
+    name = "hungarian_bigraph_matcher",
+    srcs = ["hungarian_bigraph_matcher.cc"],
+    hdrs = ["hungarian_bigraph_matcher.h"],
+)
+
+cpplint()
diff --git a/modules/perception/lidar_pointcloud_tracking/common/base_object_builder.h b/modules/perception/lidar_pointcloud_tracking/common/base_object_builder.h
new file mode 100644
index 0000000..4b2dd21
--- /dev/null
+++ b/modules/perception/lidar_pointcloud_tracking/common/base_object_builder.h
@@ -0,0 +1,95 @@
+#pragma once
+
+#include <memory>
+#include <string>
+#include <vector>
+#include "modules/perception/lidar_pointcloud_tracking/common/geometry_util.h"
+#include "modules/perception/lidar_pointcloud_tracking/common/pcl_types.h"
+#include "modules/perception/lidar_pointcloud_tracking/common/object.h"
+
+namespace apollo {
+namespace perception {
+
+struct ObjectBuilderOptions {
+  EIGEN_MAKE_ALIGNED_OPERATOR_NEW
+  Eigen::Vector3d ref_center;
+};
+
+class BaseObjectBuilder {
+ public:
+  BaseObjectBuilder() {}
+  virtual ~BaseObjectBuilder() {}
+
+  virtual bool Init() = 0;
+
+  // @brief: calc object feature, and fill fields.
+  // @param [in]: options.
+  // @param [in/out]: object list.
+  virtual bool Build(const ObjectBuilderOptions& options,
+                     std::vector<std::shared_ptr<Object>>* objects) = 0;
+
+  virtual std::string name() const = 0;
+
+ protected:
+  virtual void SetDefaultValue(pcl_util::PointCloudPtr cloud,
+                               std::shared_ptr<Object> obj,
+                               Eigen::Vector4f* min_pt,
+                               Eigen::Vector4f* max_pt) {
+    GetCloudMinMax3D<pcl_util::Point>(cloud, min_pt, max_pt);//获得目标点云的最大box值,即最小XYZ和最大XYZ,分别存在min_pt max_pt里面
+    Eigen::Vector3f center(((*min_pt)[0] + (*max_pt)[0]) / 2,//初始化center点
+                           ((*min_pt)[1] + (*max_pt)[1]) / 2,
+                           ((*min_pt)[2] + (*max_pt)[2]) / 2);
+
+    // handle degeneration case
+    float epslin = 1e-3;
+    for (int i = 0; i < 3; i++) {
+      if ((*max_pt)[i] - (*min_pt)[i] < epslin) {
+        (*max_pt)[i] = center[i] + epslin / 2;
+        (*min_pt)[i] = center[i] - epslin / 2;
+      }
+    }
+
+    // length
+    obj->length = (*max_pt)[0] - (*min_pt)[0];//长-X
+    // width
+    obj->width = (*max_pt)[1] - (*min_pt)[1];//宽-Y
+    if (obj->length - obj->width < 0) {//约定最长的边为长,所以长小于宽时交换
+      float tmp = obj->length;
+      obj->length = obj->width;
+      obj->width = tmp;
+      obj->direction = Eigen::Vector3d(0.0, 1.0, 0.0);
+    } else {
+      obj->direction = Eigen::Vector3d(1.0, 0.0, 0.0);
+    }
+    // height
+    obj->height = (*max_pt)[2] - (*min_pt)[2];//高
+    // center
+    obj->center = Eigen::Vector3d(((*max_pt)[0] + (*min_pt)[0]) / 2,//obj的center点填入
+                                  ((*max_pt)[1] + (*min_pt)[1]) / 2,
+                                  ((*max_pt)[2] + (*min_pt)[2]) / 2);
+    // polygon
+    if (cloud->size() < 4) {                             //点云个数小于4时候,就把minbox的边界点定义为minmax3d
+      obj->polygon.points.resize(4);
+      obj->polygon.points[0].x = static_cast<double>((*min_pt)[0]);
+      obj->polygon.points[0].y = static_cast<double>((*min_pt)[1]);
+      obj->polygon.points[0].z = static_cast<double>((*min_pt)[2]);
+
+      obj->polygon.points[1].x = static_cast<double>((*max_pt)[0]);
+      obj->polygon.points[1].y = static_cast<double>((*min_pt)[1]);
+      obj->polygon.points[1].z = static_cast<double>((*min_pt)[2]);
+
+      obj->polygon.points[2].x = static_cast<double>((*max_pt)[0]);
+      obj->polygon.points[2].y = static_cast<double>((*max_pt)[1]);
+      obj->polygon.points[2].z = static_cast<double>((*min_pt)[2]);
+
+      obj->polygon.points[3].x = static_cast<double>((*min_pt)[0]);
+      obj->polygon.points[3].y = static_cast<double>((*max_pt)[1]);
+      obj->polygon.points[3].z = static_cast<double>((*min_pt)[2]);
+    }
+  }
+};
+
+
+}  // namespace perception
+}  // namespace magride
+
diff --git a/modules/perception/lidar_pointcloud_tracking/common/convex_hullxy.h b/modules/perception/lidar_pointcloud_tracking/common/convex_hullxy.h
new file mode 100644
index 0000000..f43eda6
--- /dev/null
+++ b/modules/perception/lidar_pointcloud_tracking/common/convex_hullxy.h
@@ -0,0 +1,183 @@
+#pragma once
+#include <algorithm>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <pcl/common/centroid.h>
+#include <pcl/surface/convex_hull.h>
+#include <pcl/surface/qhull.h>
+
+
+namespace apollo {
+namespace perception {
+
+template <typename PointInT>
+class ConvexHull2DXY : public pcl::ConvexHull<PointInT> {
+ public:
+  typedef boost::shared_ptr<pcl::ConvexHull<PointInT>> Ptr;
+  typedef boost::shared_ptr<const pcl::ConvexHull<PointInT>> ConstPtr;
+  typedef pcl::PointCloud<PointInT> PointCloud;
+  typedef typename PointCloud::Ptr PointCloudPtr;
+  typedef typename PointCloud::ConstPtr PointCloudConstPtr;
+
+  using pcl::ConvexHull<PointInT>::reconstruct;
+  using pcl::ConvexHull<PointInT>::compute_area_;
+  using pcl::ConvexHull<PointInT>::total_area_;
+  using pcl::ConvexHull<PointInT>::total_volume_;
+  using pcl::ConvexHull<PointInT>::qhull_flags;
+
+  ConvexHull2DXY() = default;
+  virtual ~ConvexHull2DXY() = default;
+
+  void Reconstruct2dxy(PointCloudPtr hull,
+                       std::vector<pcl::Vertices> *polygons) {
+    hull->header = input_->header;
+    if (!initCompute() || input_->points.empty() || indices_->empty()) {
+      hull->points.clear();
+      return;
+    }
+
+    PerformReconstruction2dxy(hull, polygons, true);
+
+    hull->width = static_cast<uint32_t>(hull->points.size());
+    hull->height = 1;
+    hull->is_dense = true;
+
+    deinitCompute();
+  }
+
+  void PerformReconstruction2dxy(PointCloudPtr hull,
+                                 std::vector<pcl::Vertices> *polygons,
+                                 bool fill_polygon_data = false) {
+    int dimension = 2;
+
+    // True if qhull should free points in qh_freeqhull() or reallocation
+    boolT ismalloc = True;
+    // output from qh_produce_output(), use NULL to skip qh_produce_output()
+    FILE *outfile = NULL;
+
+#ifndef HAVE_QHULL_2011
+    if (compute_area_) {
+      outfile = stderr;
+    }
+#endif
+
+    // option flags for qhull, see qh_opt.htm
+    const char *flags = qhull_flags.c_str();
+    // error messages from qhull code
+    FILE *errfile = stderr;
+
+    // Array of coordinates for each point
+    coordT *points = reinterpret_cast<coordT *>(
+        calloc(indices_->size() * dimension, sizeof(coordT)));
+    if (points == NULL) {
+      hull->points.resize(0);
+      hull->width = hull->height = 0;
+      polygons->resize(0);
+      return;
+    }
+
+    // Build input data, using appropriate projection
+    int j = 0;
+    for (size_t i = 0; i < indices_->size(); ++i, j += dimension) {
+      points[j + 0] = static_cast<coordT>(input_->points[(*indices_)[i]].x);
+      points[j + 1] = static_cast<coordT>(input_->points[(*indices_)[i]].y);
+    }
+
+    // Compute convex hull
+    int exitcode =
+        qh_new_qhull(dimension, static_cast<int>(indices_->size()), points,
+                     ismalloc, const_cast<char *>(flags), outfile, errfile);
+#ifdef HAVE_QHULL_2011
+    if (compute_area_) {
+      qh_prepare_output();
+    }
+#endif
+
+    // 0 if no error from qhull or it doesn't find any vertices
+    if (exitcode != 0 || qh num_vertices == 0) {
+      PCL_ERROR(
+          "[pcl::%s::performReconstrution2D] "
+          "ERROR: qhull was unable to compute "
+          "a convex hull for the given point "
+          "cloud (%lu)!\n",
+          getClassName().c_str(), indices_->size());
+
+      hull->points.resize(0);
+      hull->width = hull->height = 0;
+      polygons->resize(0);
+
+      qh_freeqhull(!qh_ALL);
+      int curlong, totlong;
+      qh_memfreeshort(&curlong, &totlong);
+      return;
+    }
+
+    // Qhull returns the area in volume for 2D
+    if (compute_area_) {
+      total_area_ = qh totvol;
+      total_volume_ = 0.0;
+    }
+
+    int num_vertices = qh num_vertices;
+    hull->points.resize(num_vertices);
+    memset(&hull->points[0], static_cast<int>(hull->points.size()),
+           sizeof(PointInT));
+
+    vertexT *vertex;
+    int i = 0;
+
+    std::vector<std::pair<int, Eigen::Vector4f>,
+                Eigen::aligned_allocator<std::pair<int, Eigen::Vector4f>>>
+        idx_points(num_vertices);
+    idx_points.resize(hull->points.size());
+    memset(&idx_points[0], static_cast<int>(hull->points.size()),
+           sizeof(std::pair<int, Eigen::Vector4f>));
+
+    FORALLvertices {
+      hull->points[i] = input_->points[(*indices_)[qh_pointid(vertex->point)]];
+      idx_points[i].first = qh_pointid(vertex->point);
+      ++i;
+    }
+
+    // Sort
+    Eigen::Vector4f centroid;
+    pcl::compute3DCentroid(*hull, centroid);
+    for (size_t j = 0; j < hull->points.size(); j++) {
+      idx_points[j].second[0] = hull->points[j].x - centroid[0];
+      idx_points[j].second[1] = hull->points[j].y - centroid[1];
+    }
+
+    std::sort(idx_points.begin(), idx_points.end(), pcl::comparePoints2D);
+
+    polygons->resize(1);
+    (*polygons)[0].vertices.resize(hull->points.size());
+
+    for (int j = 0; j < static_cast<int>(hull->points.size()); j++) {
+      hull->points[j] = input_->points[(*indices_)[idx_points[j].first]];
+      (*polygons)[0].vertices[j] = static_cast<unsigned int>(j);
+    }
+
+    qh_freeqhull(!qh_ALL);
+    int curlong, totlong;
+    qh_memfreeshort(&curlong, &totlong);
+
+    hull->width = static_cast<uint32_t>(hull->points.size());
+    hull->height = 1;
+    hull->is_dense = false;
+    return;
+  }
+
+  std::string getClassName() const { return ("ConvexHull2DXY"); }
+
+ protected:
+  using pcl::PCLBase<PointInT>::input_;
+  using pcl::PCLBase<PointInT>::indices_;
+  using pcl::PCLBase<PointInT>::initCompute;
+  using pcl::PCLBase<PointInT>::deinitCompute;
+};
+
+}  // namespace perception
+}  // namespace apollo
+
diff --git a/modules/perception/lidar_pointcloud_tracking/common/geometry_util.cc b/modules/perception/lidar_pointcloud_tracking/common/geometry_util.cc
new file mode 100644
index 0000000..393e3bf
--- /dev/null
+++ b/modules/perception/lidar_pointcloud_tracking/common/geometry_util.cc
@@ -0,0 +1,121 @@
+#include "modules/perception/lidar_pointcloud_tracking/common/geometry_util.h"
+
+// #include "modules/common/math/math_utils.h"
+
+namespace apollo {
+namespace perception {
+
+using pcl_util::Point;
+using pcl_util::PointCloud;
+using pcl_util::PointCloudPtr;
+
+/*
+ * Transform point cloud methods
+ */
+void TransformPointCloud(pcl_util::PointCloudPtr cloud,
+                         const std::vector<int>& indices,
+                         pcl_util::PointDCloud* trans_cloud) {
+  if (trans_cloud->size() != indices.size()) {
+    trans_cloud->resize(indices.size());
+  }
+  for (size_t i = 0; i < indices.size(); ++i) {
+    const Point& p = cloud->at(indices[i]);
+    Eigen::Vector3d v(p.x, p.y, p.z);
+    pcl_util::PointD& tp = trans_cloud->at(i);
+    tp.x = v.x();
+    tp.y = v.y();
+    tp.z = v.z();
+    //tp.intensity = p.intensity;
+  }
+}
+
+void TransformPointCloud(pcl_util::PointCloudPtr cloud,
+                         const Eigen::Matrix4d& pose_velodyne,
+                         pcl_util::PointDCloudPtr trans_cloud) {
+  Eigen::Matrix4d pose = pose_velodyne;
+
+  if (trans_cloud->size() != cloud->size()) {
+    trans_cloud->resize(cloud->size());
+  }
+  for (size_t i = 0; i < cloud->points.size(); ++i) {
+    const Point& p = cloud->at(i);
+    Eigen::Vector4d v(p.x, p.y, p.z, 1);
+    v = pose * v;
+    pcl_util::PointD& tp = trans_cloud->at(i);
+    tp.x = v.x();
+    tp.y = v.y();
+    tp.z = v.z();
+    //tp.intensity = p.intensity;
+  }
+}
+
+/*
+ * Vector & Matrix related methods
+ */
+void TransAffineToMatrix4(const Eigen::Vector3d& translation,
+                          const Eigen::Vector4d& rotation,
+                          Eigen::Matrix4d* trans_matrix) {
+  const double t_x = translation(0);
+  const double t_y = translation(1);
+  const double t_z = translation(2);
+
+  const double qx = rotation(0);
+  const double qy = rotation(1);
+  const double qz = rotation(2);
+  const double qw = rotation(3);
+
+  (*trans_matrix) << 1 - 2 * (qy * qy + qz * qz), 2 * (qx * qy - qz * qw),
+      2 * (qx * qz + qy * qw), t_x, 2 * (qx * qy + qz * qw),
+      1 - 2 * (qx * qx + qz * qz), 2 * (qy * qz - qx * qw), t_y,
+      2 * (qx * qz - qy * qw), 2 * (qy * qz + qx * qw),
+      1 - 2 * (qx * qx + qy * qy), t_z, 0, 0, 0, 1;
+}
+
+void ComputeMostConsistentBboxDirection(const Eigen::Vector3f& previous_dir,
+                                        Eigen::Vector3f* current_dir) {
+  float dot_val_00 =
+      previous_dir(0) * (*current_dir)(0) + previous_dir(1) * (*current_dir)(1);
+  float dot_val_01 =
+      previous_dir(0) * (*current_dir)(1) - previous_dir(1) * (*current_dir)(0);
+  if (fabs(dot_val_00) >= fabs(dot_val_01)) {
+    if (dot_val_00 < 0) {
+      (*current_dir) = -(*current_dir);
+    }
+  } else {
+    if (dot_val_01 < 0) {
+      (*current_dir) =
+          Eigen::Vector3f((*current_dir)(1), -(*current_dir)(0), 0);
+    } else {
+      (*current_dir) =
+          Eigen::Vector3f(-(*current_dir)(1), (*current_dir)(0), 0);
+    }
+  }
+}
+
+double VectorCosTheta2dXy(const Eigen::Vector3f& v1,
+                          const Eigen::Vector3f& v2) {
+  double v1_len = sqrt((v1.head(2).cwiseProduct(v1.head(2))).sum());
+  double v2_len = sqrt((v2.head(2).cwiseProduct(v2.head(2))).sum());
+  double cos_theta =
+      (v1.head(2).cwiseProduct(v2.head(2))).sum() / (v1_len * v2_len);
+  return cos_theta;
+}
+
+double VectorTheta2dXy(const Eigen::Vector3f& v1, const Eigen::Vector3f& v2) {
+  double v1_len = sqrt((v1.head(2).cwiseProduct(v1.head(2))).sum());
+  double v2_len = sqrt((v2.head(2).cwiseProduct(v2.head(2))).sum());
+  double cos_theta =
+      (v1.head(2).cwiseProduct(v2.head(2))).sum() / (v1_len * v2_len);
+  double sin_theta = (v1(0) * v2(1) - v1(1) * v2(0)) / (v1_len * v2_len);
+
+  cos_theta = Clamp(cos_theta, 1.0, -1.0);
+
+  double theta = acos(cos_theta);
+  if (sin_theta < 0) {
+    theta = -theta;
+  }
+  return theta;
+}
+
+}  // namespace perception
+}  // namespace apollo
diff --git a/modules/perception/lidar_pointcloud_tracking/common/geometry_util.h b/modules/perception/lidar_pointcloud_tracking/common/geometry_util.h
new file mode 100644
index 0000000..acf243c
--- /dev/null
+++ b/modules/perception/lidar_pointcloud_tracking/common/geometry_util.h
@@ -0,0 +1,191 @@
+#pragma once
+
+#include <cfloat>
+#include <algorithm>
+#include <vector>
+
+// #include "Eigen/Core"
+#include "modules/perception/lidar_pointcloud_tracking/common/pcl_types.h"
+
+namespace apollo {
+namespace perception {
+
+template <typename PointT>
+void TransformPointCloud(const Eigen::Matrix4d& trans_mat,
+                         pcl::PointCloud<PointT>* cloud_in_out) {
+  for (std::size_t i = 0; i < cloud_in_out->size(); ++i) {
+    PointT& p = cloud_in_out->at(i);
+    Eigen::Vector4d v(p.x, p.y, p.z, 1);
+    v = trans_mat * v;
+    p.x = v.x();
+    p.y = v.y();
+    p.z = v.z();
+  }
+}
+
+template <typename PointT>
+void TransformPointCloud(const Eigen::Matrix4d& trans_mat,
+                         typename pcl::PointCloud<PointT>::Ptr cloud_in_out) {
+  //CHECK_NOTNULL(cloud_in_out.get());
+  return TransformPointCloud(trans_mat, cloud_in_out.get());
+}
+
+template <typename PointType>
+void TransformPoint(const PointType& point_in, const Eigen::Matrix4d& trans_mat,
+                    PointType* point_out) {
+  Eigen::Vector4d v(point_in.x, point_in.y, point_in.z, 1);
+  v = trans_mat * v;
+  *point_out = point_in;
+  point_out->x = v.x();
+  point_out->y = v.y();
+  point_out->z = v.z();
+}
+
+template <typename PointType>
+void TransformPointCloud(const pcl::PointCloud<PointType>& cloud_in,
+                         const Eigen::Matrix4d& trans_mat,
+                         pcl::PointCloud<PointType>* cloud_out) {
+  if (cloud_out->points.size() < cloud_in.points.size()) {
+    cloud_out->points.resize(cloud_in.points.size());
+  }
+  for (std::size_t i = 0; i < cloud_in.size(); ++i) {
+    const PointType& p = cloud_in.at(i);
+    Eigen::Vector4d v(p.x, p.y, p.z, 1);
+    v = trans_mat * v;
+    PointType& pd = cloud_out->points[i];
+    pd.x = v.x();
+    pd.y = v.y();
+    pd.z = v.z();
+  }
+}
+
+void TransformPointCloud(pcl_util::PointCloudPtr cloud,
+                         const std::vector<int>& indices,
+                         pcl_util::PointDCloud* trans_cloud);
+
+void TransformPointCloud(pcl_util::PointCloudPtr cloud,
+                         const Eigen::Matrix4d& pose_velodyne,
+                         typename pcl_util::PointDCloudPtr trans_cloud);
+/*
+ * Other point cloud related methods
+ * */
+template <typename PointT>
+void GetCloudMinMax3D(typename pcl::PointCloud<PointT>::Ptr cloud,
+                      Eigen::Vector4f* min_point, Eigen::Vector4f* max_point) {
+  Eigen::Vector4f& min_pt = *min_point;
+  Eigen::Vector4f& max_pt = *max_point;
+  min_pt[0] = min_pt[1] = min_pt[2] = FLT_MAX;
+  max_pt[0] = max_pt[1] = max_pt[2] = -FLT_MAX;
+  if (cloud->is_dense) {                                        //pcl::PointCloud类中的一个变量，bool型，判断points中的数据是否有限，有限为true，或者说点云中的点是否包含Inf/NaN这种值
+    for (size_t i = 0; i < cloud->points.size(); ++i) {
+      min_pt[0] = std::min(min_pt[0], cloud->points[i].x);
+      max_pt[0] = std::max(max_pt[0], cloud->points[i].x);
+      min_pt[1] = std::min(min_pt[1], cloud->points[i].y);
+      max_pt[1] = std::max(max_pt[1], cloud->points[i].y);
+      min_pt[2] = std::min(min_pt[2], cloud->points[i].z);
+      max_pt[2] = std::max(max_pt[2], cloud->points[i].z);
+    }
+  } else {
+    for (size_t i = 0; i < cloud->points.size(); ++i) {
+      if (!pcl_isfinite(cloud->points[i].x) ||
+          !pcl_isfinite(cloud->points[i].y) ||
+          !pcl_isfinite(cloud->points[i].z)) {
+        continue;
+      }
+      min_pt[0] = std::min(min_pt[0], cloud->points[i].x);
+      max_pt[0] = std::max(max_pt[0], cloud->points[i].x);
+      min_pt[1] = std::min(min_pt[1], cloud->points[i].y);
+      max_pt[1] = std::max(max_pt[1], cloud->points[i].y);
+      min_pt[2] = std::min(min_pt[2], cloud->points[i].z);
+      max_pt[2] = std::max(max_pt[2], cloud->points[i].z);
+    }
+  }
+}
+
+template <typename PointT>
+void ComputeBboxSizeCenter(typename pcl::PointCloud<PointT>::Ptr cloud,
+                           const Eigen::Vector3d& direction,
+                           Eigen::Vector3d* size, Eigen::Vector3d* center) {
+  Eigen::Vector3d dir(direction[0], direction[1], 0);
+  dir.normalize();
+  Eigen::Vector3d ortho_dir(-dir[1], dir[0], 0.0);
+
+  Eigen::Vector3d z_dir(dir.cross(ortho_dir));
+  Eigen::Vector3d min_pt(DBL_MAX, DBL_MAX, DBL_MAX);
+  Eigen::Vector3d max_pt(-DBL_MAX, -DBL_MAX, -DBL_MAX);
+
+  Eigen::Vector3d loc_pt;
+  for (std::size_t i = 0; i < cloud->size(); i++) {
+    Eigen::Vector3d pt = Eigen::Vector3d(cloud->points[i].x, cloud->points[i].y,
+                                         cloud->points[i].z);
+    loc_pt[0] = pt.dot(dir);
+    loc_pt[1] = pt.dot(ortho_dir);
+    loc_pt[2] = pt.dot(z_dir);
+    for (int j = 0; j < 3; j++) {
+      min_pt[j] = std::min(min_pt[j], loc_pt[j]);
+      max_pt[j] = std::max(max_pt[j], loc_pt[j]);
+    }
+  }
+  *size = max_pt - min_pt;
+  *center = dir * ((max_pt[0] + min_pt[0]) * 0.5) +
+            ortho_dir * ((max_pt[1] + min_pt[1]) * 0.5) + z_dir * min_pt[2];
+}
+
+template <typename PointT>
+Eigen::Vector3d GetCloudBarycenter(
+    typename pcl::PointCloud<PointT>::Ptr cloud) {
+  int point_num = cloud->points.size();
+  Eigen::Vector3d barycenter(0, 0, 0);
+
+  for (int i = 0; i < point_num; i++) {
+    const PointT& pt = cloud->points[i];
+    barycenter[0] += pt.x;
+    barycenter[1] += pt.y;
+    barycenter[2] += pt.z;
+  }
+
+  if (point_num > 0) {
+    barycenter[0] /= point_num;
+    barycenter[1] /= point_num;
+    barycenter[2] /= point_num;
+  }
+  return barycenter;
+}
+
+void TransAffineToMatrix4(const Eigen::Vector3d& translation,
+                          const Eigen::Vector4d& rotation,
+                          Eigen::Matrix4d* trans_matrix);
+
+void ComputeMostConsistentBboxDirection(const Eigen::Vector3f& previous_dir,
+                                        Eigen::Vector3f* current_dir);
+
+double VectorCosTheta2dXy(const Eigen::Vector3f& v1, const Eigen::Vector3f& v2);
+
+double VectorTheta2dXy(const Eigen::Vector3f& v1, const Eigen::Vector3f& v2);
+
+/**
+ * @brief Clamp a value between two bounds.
+ *        If the value goes beyond the bounds, return one of the bounds,
+ *        otherwise, return the original value.
+ * @param value The original value to be clamped.
+ * @param bound1 One bound to clamp the value.
+ * @param bound2 The other bound to clamp the value.
+ * @return The clamped value.
+ */
+template <typename T>
+T Clamp(const T value, T bound1, T bound2) {
+  if (bound1 > bound2) {
+    std::swap(bound1, bound2);
+  }
+
+  if (value < bound1) {
+    return bound1;
+  } else if (value > bound2) {
+    return bound2;
+  }
+  return value;
+}
+
+}  // namespace perception
+}  // namespace apollo
+
diff --git a/modules/perception/lidar_pointcloud_tracking/common/graph_util.cc b/modules/perception/lidar_pointcloud_tracking/common/graph_util.cc
new file mode 100644
index 0000000..0b60431
--- /dev/null
+++ b/modules/perception/lidar_pointcloud_tracking/common/graph_util.cc
@@ -0,0 +1,43 @@
+#include "modules/perception/lidar_pointcloud_tracking/common/graph_util.h"
+
+#include <cstdio>
+#include <queue>
+
+namespace apollo {
+namespace perception {
+
+void ConnectedComponentAnalysis(const std::vector<std::vector<int>>& graph,
+                                std::vector<std::vector<int>>* components) {
+  int no_item = graph.size();
+  std::vector<int> visited;
+  visited.resize(no_item, 0);
+  std::queue<int> que;
+  std::vector<int> component;
+  components->clear();
+
+  for (int i = 0; i < no_item; ++i) {
+    if (visited[i]) {
+      continue;
+    }
+    component.push_back(i);
+    que.push(i);
+    visited[i] = 1;
+    while (!que.empty()) {
+      int id = que.front();
+      que.pop();
+      for (size_t j = 0; j < graph[id].size(); ++j) {
+        int nb_id = graph[id][j];
+        if (visited[nb_id] == 0) {
+          component.push_back(nb_id);
+          que.push(nb_id);
+          visited[nb_id] = 1;
+        }
+      }
+    }
+    components->push_back(component);
+    component.clear();
+  }
+}
+
+}  // namespace perception
+}  // namespace apollo
diff --git a/modules/perception/lidar_pointcloud_tracking/common/graph_util.h b/modules/perception/lidar_pointcloud_tracking/common/graph_util.h
new file mode 100644
index 0000000..bd3a352
--- /dev/null
+++ b/modules/perception/lidar_pointcloud_tracking/common/graph_util.h
@@ -0,0 +1,14 @@
+#pragma once
+
+#include <vector>
+
+namespace apollo {
+namespace perception {
+
+// bfs based component analysis
+void ConnectedComponentAnalysis(const std::vector<std::vector<int>>& graph,
+                                std::vector<std::vector<int>>* components);
+
+}  // namespace perception
+}  // namespace apollo
+
diff --git a/modules/perception/lidar_pointcloud_tracking/common/hungarian_bigraph_matcher.cc b/modules/perception/lidar_pointcloud_tracking/common/hungarian_bigraph_matcher.cc
new file mode 100644
index 0000000..57917e7
--- /dev/null
+++ b/modules/perception/lidar_pointcloud_tracking/common/hungarian_bigraph_matcher.cc
@@ -0,0 +1,480 @@
+#include "modules/perception/lidar_pointcloud_tracking/common/hungarian_bigraph_matcher.h"
+
+#include <iostream>
+
+namespace apollo {
+namespace perception {
+
+HungarianOptimizer::HungarianOptimizer(
+    const std::vector<std::vector<double>>& costs)
+    : state_(nullptr) {
+  width_ = costs.size();
+
+  if (width_ > 0) {
+    height_ = costs[0].size();
+  } else {
+    height_ = 0;
+  }
+
+  matrix_size_ = std::max(width_, height_);
+  max_cost_ = 0;
+
+  // Generate the expanded cost matrix by adding extra 0-valued elements in
+  // order to make a square matrix.  At the same time, find the greatest cost
+  // in the matrix (used later if we want to maximize rather than minimize the
+  // overall cost.)
+  costs_.resize(matrix_size_);
+  for (int row = 0; row < matrix_size_; ++row) {
+    costs_[row].resize(matrix_size_);
+  }
+
+  for (int row = 0; row < matrix_size_; ++row) {
+    for (int col = 0; col < matrix_size_; ++col) {
+      if ((row >= width_) || (col >= height_)) {
+        costs_[row][col] = 0;
+      } else {
+        costs_[row][col] = costs[row][col];
+        max_cost_ = std::max(max_cost_, costs_[row][col]);
+      }
+    }
+  }
+
+  // Initially, none of the cells of the matrix are marked.
+  marks_.resize(matrix_size_);
+  for (int row = 0; row < matrix_size_; ++row) {
+    marks_[row].resize(matrix_size_);
+    for (int col = 0; col < matrix_size_; ++col) {
+      marks_[row][col] = NONE;
+    }
+  }
+
+  stars_in_col_.resize(matrix_size_);
+
+  rows_covered_.resize(matrix_size_);
+  cols_covered_.resize(matrix_size_);
+
+  preimage_.resize(matrix_size_ * 2);
+  image_.resize(matrix_size_ * 2);
+
+  uncov_col_.resize(matrix_size_);
+  uncov_row_.resize(matrix_size_);
+}
+
+// Find an assignment which maximizes the total cost.
+// Return an array of pairs of integers.  Each pair (i, j) corresponds to
+// assigning agent i to task j.
+void HungarianOptimizer::maximize(std::vector<int>* preimage,
+                                  std::vector<int>* image) {
+  // Find a maximal assignment by subtracting each of the
+  // original costs from max_cost_  and then minimizing.
+  for (int row = 0; row < width_; ++row) {
+    for (int col = 0; col < height_; ++col) {
+      costs_[row][col] = max_cost_ - costs_[row][col];
+    }
+  }
+  minimize(preimage, image);
+}
+
+// Find an assignment which minimizes the total cost.
+// Return an array of pairs of integers.  Each pair (i, j) corresponds to
+// assigning agent i to task j.
+void HungarianOptimizer::minimize(std::vector<int>* preimage,
+                                  std::vector<int>* image) {
+  do_munkres();
+  find_assignments(preimage, image);
+}
+
+// Convert the final cost matrix into a set of assignments of agents -> tasks.
+// Return an array of pairs of integers, the same as the return values of
+// Minimize() and Maximize()
+void HungarianOptimizer::find_assignments(std::vector<int>* preimage,
+                                          std::vector<int>* image) {
+  preimage->clear();
+  image->clear();
+  for (int row = 0; row < width_; ++row) {
+    for (int col = 0; col < height_; ++col) {
+      if (is_starred(row, col)) {
+        preimage->push_back(row);
+        image->push_back(col);
+        break;
+      }
+    }
+  }
+  // TODO(user)
+  // result_size = std::min(width_, height_);
+  // CHECK image.size() == result_size
+  // CHECK preimage.size() == result_size
+}
+
+// Find a column in row 'row' containing a star, or return
+// kHungarianOptimizerColNotFound if no such column exists.
+int HungarianOptimizer::find_star_in_row(int row) const {
+  for (int col = 0; col < matrix_size_; ++col) {
+    if (is_starred(row, col)) {
+      return col;
+    }
+  }
+
+  return kHungarianOptimizerColNotFound;
+}
+
+// Find a row in column 'col' containing a star, or return
+// kHungarianOptimizerRowNotFound if no such row exists.
+int HungarianOptimizer::find_star_in_col(int col) const {
+  if (!col_contains_star(col)) {
+    return kHungarianOptimizerRowNotFound;
+  }
+
+  for (int row = 0; row < matrix_size_; ++row) {
+    if (is_starred(row, col)) {
+      return row;
+    }
+  }
+
+  // NOTREACHED
+  return kHungarianOptimizerRowNotFound;
+}
+
+// Find a column in row containing a prime, or return
+// kHungarianOptimizerColNotFound if no such column exists.
+int HungarianOptimizer::find_prime_in_row(int row) const {
+  for (int col = 0; col < matrix_size_; ++col) {
+    if (is_primed(row, col)) {
+      return col;
+    }
+  }
+
+  return kHungarianOptimizerColNotFound;
+}
+
+// Remove the prime marks from every cell in the matrix.
+void HungarianOptimizer::clear_primes() {
+  for (int row = 0; row < matrix_size_; ++row) {
+    for (int col = 0; col < matrix_size_; ++col) {
+      if (is_primed(row, col)) {
+        marks_[row][col] = NONE;
+      }
+    }
+  }
+}
+
+// Uncovery ever row and column in the matrix.
+void HungarianOptimizer::clear_covers() {
+  for (int x = 0; x < matrix_size_; x++) {
+    uncover_row(x);
+    uncover_col(x);
+  }
+}
+
+// Find the smallest uncovered cell in the matrix.
+double HungarianOptimizer::find_smallest_uncovered() {
+  double minval = std::numeric_limits<double>::max();
+  uncov_col_.clear();
+  uncov_row_.clear();
+
+  for (int i = 0; i < matrix_size_; ++i) {
+    if (!row_covered(i)) {
+      uncov_row_.push_back(i);
+    }
+    if (!col_covered(i)) {
+      uncov_col_.push_back(i);
+    }
+  }
+
+  for (size_t row = 0; row < uncov_row_.size(); ++row) {
+    for (size_t col = 0; col < uncov_col_.size(); ++col) {
+      minval = std::min(minval, costs_[uncov_row_[row]][uncov_col_[col]]);
+    }
+  }
+
+  return minval;
+}
+
+// Find an uncovered zero and store its co-ordinates in (zeroRow, zeroCol)
+// and return true, or return false if no such cell exists.
+bool HungarianOptimizer::find_zero(int* zero_row, int* zero_col) {
+  uncov_col_.clear();
+  uncov_row_.clear();
+
+  for (int i = 0; i < matrix_size_; ++i) {
+    if (!row_covered(i)) {
+      uncov_row_.push_back(i);
+    }
+    if (!col_covered(i)) {
+      uncov_col_.push_back(i);
+    }
+  }
+  if (uncov_row_.empty() || uncov_col_.empty()) {
+    return false;
+  }
+
+  for (size_t i = 0; i < uncov_row_.size(); ++i) {
+    for (size_t j = 0; j < uncov_col_.size(); ++j) {
+      if (costs_[uncov_row_[i]][uncov_col_[j]] == 0) {
+        *zero_row = uncov_row_[i];
+        *zero_col = uncov_col_[j];
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+// Print the matrix to stdout (for debugging.)
+void HungarianOptimizer::print_matrix() {
+  for (int row = 0; row < matrix_size_; ++row) {
+    for (int col = 0; col < matrix_size_; ++col) {
+      printf("%g ", costs_[row][col]);
+
+      if (is_starred(row, col)) {
+        printf("*");
+      }
+
+      if (is_primed(row, col)) {
+        printf("'");
+      }
+    }
+    printf("\n");
+  }
+}
+
+//  Run the Munkres algorithm!
+void HungarianOptimizer::do_munkres() {
+  int max_iter = 1000;
+  int iter_num = 0;
+  state_ = &HungarianOptimizer::reduce_rows;
+  while (state_ != NULL && iter_num < max_iter) {
+    // while (state_ != NULL) {
+    (this->*state_)();
+    ++iter_num;
+  }
+  if (iter_num >= max_iter) {
+    check_star();
+  }
+}
+
+void HungarianOptimizer::check_star() {
+  for (int row = 0; row < width_; ++row) {
+    int star_col = -1;
+    bool is_single = true;
+    for (int col = 0; col < height_; ++col) {
+      if (is_starred(row, col)) {
+        if (star_col == -1) {
+          star_col = col;
+        } else {
+          is_single = false;
+          break;
+        }
+      }
+    }
+    if (!is_single) {
+      for (int col = 0; col < height_; ++col) {
+        unstar(row, col);
+      }
+    }
+  }
+}
+// Step 1.
+// For each row of the matrix, find the smallest element and subtract it
+// from every element in its row.  Go to Step 2.
+void HungarianOptimizer::reduce_rows() {
+  for (int row = 0; row < matrix_size_; ++row) {
+    double min_cost = costs_[row][0];
+    for (int col = 1; col < matrix_size_; ++col) {
+      min_cost = std::min(min_cost, costs_[row][col]);
+    }
+    for (int col = 0; col < matrix_size_; ++col) {
+      costs_[row][col] -= min_cost;
+    }
+  }
+  state_ = &HungarianOptimizer::star_zeroes;
+}
+
+// Step 2.
+// Find a zero (Z) in the matrix.  If there is no starred zero in its row
+// or column, star Z.  Repeat for every element in the matrix.  Go to step 3.
+// of the CPU - the next slowest step takes 0.6%.  I can't think of a way
+// of speeding it up though.
+void HungarianOptimizer::star_zeroes() {
+  // Since no rows or columns are covered on entry to this step, we use the
+  // covers as a quick way of marking which rows & columns have stars in them.
+  for (int row = 0; row < matrix_size_; ++row) {
+    if (row_covered(row)) {
+      continue;
+    }
+
+    for (int col = 0; col < matrix_size_; ++col) {
+      if (col_covered(col)) {
+        continue;
+      }
+
+      if (costs_[row][col] == 0) {
+        star(row, col);
+        cover_row(row);
+        cover_col(col);
+        break;
+      }
+    }
+  }
+
+  clear_covers();
+  state_ = &HungarianOptimizer::cover_starred_zeroes;
+}
+
+// Step 3.
+// Cover each column containing a starred zero.  If all columns are
+// covered, the starred zeros describe a complete set of unique assignments.
+// In this case, terminate the algorithm.  Otherwise, go to step 4.
+void HungarianOptimizer::cover_starred_zeroes() {
+  int num_covered = 0;
+
+  for (int col = 0; col < matrix_size_; ++col) {
+    if (col_contains_star(col)) {
+      cover_col(col);
+      num_covered++;
+    }
+  }
+
+  if (num_covered >= matrix_size_) {
+    state_ = NULL;
+    return;
+  }
+  state_ = &HungarianOptimizer::prime_zeroes;
+}
+
+// Step 4.
+// Find a noncovered zero and prime it.  If there is no starred zero in the
+// row containing this primed zero, Go to Step 5.  Otherwise, cover this row
+// and uncover the column containing the starred zero. Continue in this manner
+// until there are no uncovered zeros left, then go to Step 6.
+
+void HungarianOptimizer::prime_zeroes() {
+  // This loop is guaranteed to terminate in at most matrix_size_ iterations,
+  // as find_zero() returns a location only if there is at least one uncovered
+  // zero in the matrix.  Each iteration, either one row is covered or the
+  // loop terminates.  Since there are matrix_size_ rows, after that many
+  // iterations there are no uncovered cells and hence no uncovered zeroes,
+  // so the loop terminates.
+  for (;;) {
+    int zero_row = 0;
+    int zero_col = 0;
+    if (!find_zero(&zero_row, &zero_col)) {
+      // No uncovered zeroes.
+      state_ = &HungarianOptimizer::augment_path;
+      return;
+    }
+
+    prime(zero_row, zero_col);
+    int star_col = find_star_in_row(zero_row);
+
+    if (star_col != kHungarianOptimizerColNotFound) {
+      cover_row(zero_row);
+      uncover_col(star_col);
+    } else {
+      preimage_[0] = zero_row;
+      image_[0] = zero_col;
+      state_ = &HungarianOptimizer::make_augmenting_path;
+      return;
+    }
+  }
+}
+
+// Step 5.
+// Construct a series of alternating primed and starred zeros as follows.
+// Let Z0 represent the uncovered primed zero found in Step 4.  Let Z1 denote
+// the starred zero in the column of Z0 (if any). Let Z2 denote the primed
+// zero in the row of Z1 (there will always be one).  Continue until the
+// series terminates at a primed zero that has no starred zero in its column.
+// unstar each starred zero of the series, star each primed zero of the
+// series, erase all primes and uncover every line in the matrix.  Return to
+// Step 3.
+void HungarianOptimizer::make_augmenting_path() {
+  bool done = false;
+  int count = 0;
+
+  /* Note: this loop is guaranteed to terminate within matrix_size_ iterations
+  // because:
+  // 1) on entry to this step, there is at least 1 column with no starred zero
+  //    (otherwise we would have terminated the algorithm already.)
+  // 2) each row containing a star also contains exactly one primed zero.
+  // 4) each column contains at most one starred zero.
+  //
+  // Since the path_ we construct visits primed and starred zeroes alternately,
+  // and terminates if we reach a primed zero in a column with no star, our
+  // path_ must either contain matrix_size_ or fewer stars (in which case the
+  // loop iterates fewer than matrix_size_ times), or it contains more.  In
+  // that case, because (1) implies that there are fewer than
+  // matrix_size_ stars, we must have visited at least one star more than once.
+  // Consider the first such star that we visit more than once; it must have
+  // been reached immediately after visiting a prime in the same row.  By (2),
+  // this prime is unique and so must have also been visited more than once.
+  // Therefore, that prime must be in the same column as a star that has been
+  // visited more than once, contradicting the assumption that we chose the
+  // first multiply visited star, or it must be in the same column as more
+  // than one star, contradicting (3).  Therefore, we never visit any star
+  // more than once and the loop terminates within matrix_size_ iterations.
+  */
+
+  while (!done) {
+    // First construct the alternating path...
+    int row = find_star_in_col(image_[count]);
+
+    if (row != kHungarianOptimizerRowNotFound) {
+      count++;
+      preimage_[count] = row;
+      image_[count] = image_[count - 1];
+    } else {
+      done = true;
+    }
+
+    if (!done) {
+      int col = find_prime_in_row(preimage_[count]);
+      count++;
+      preimage_[count] = preimage_[count - 1];
+      image_[count] = col;
+    }
+  }
+
+  // Then modify it.
+  for (int i = 0; i <= count; ++i) {
+    int row = preimage_[i];
+    int col = image_[i];
+
+    if (is_starred(row, col)) {
+      unstar(row, col);
+    } else {
+      star(row, col);
+    }
+  }
+
+  clear_covers();
+  clear_primes();
+  state_ = &HungarianOptimizer::cover_starred_zeroes;
+}
+
+// Step 6
+// Add the smallest uncovered value in the matrix to every element of each
+// covered row, and subtract it from every element of each uncovered column.
+// Return to Step 4 without altering any stars, primes, or covered lines.
+void HungarianOptimizer::augment_path() {
+  double minval = find_smallest_uncovered();
+
+  for (int row = 0; row < matrix_size_; ++row) {
+    if (row_covered(row)) {
+      for (int c = 0; c < matrix_size_; ++c) {
+        costs_[row][c] += minval;
+      }
+    }
+  }
+  for (int col = 0; col < matrix_size_; ++col) {
+    if (!col_covered(col)) {
+      for (int r = 0; r < matrix_size_; ++r) {
+        costs_[r][col] -= minval;
+      }
+    }
+  }
+  state_ = &HungarianOptimizer::prime_zeroes;
+}
+
+}  // namespace perception
+}  // namespace apollo
diff --git a/modules/perception/lidar_pointcloud_tracking/common/hungarian_bigraph_matcher.h b/modules/perception/lidar_pointcloud_tracking/common/hungarian_bigraph_matcher.h
new file mode 100644
index 0000000..f9672ca
--- /dev/null
+++ b/modules/perception/lidar_pointcloud_tracking/common/hungarian_bigraph_matcher.h
@@ -0,0 +1,200 @@
+#pragma once
+
+#include <cstdio>
+
+#include <algorithm>
+#include <limits>
+#include <vector>
+
+namespace apollo {
+namespace perception {
+
+class HungarianOptimizer {
+  static const int kHungarianOptimizerRowNotFound = -1;
+  static const int kHungarianOptimizerColNotFound = -2;
+
+ public:
+  // Setup the initial conditions for the algorithm.
+
+  // Parameters: costs is a matrix of the cost of assigning each agent to
+  // each task. costs[i][j] is the cost of assigning agent i to task j.
+  // All the costs must be non-negative.  This matrix does not have to
+  // be square (i.e. we can have different numbers of agents and tasks), but it
+  // must be regular (i.e. there must be the same number of entries in each row
+  // of the matrix).
+  explicit HungarianOptimizer(const std::vector<std::vector<double>>& costs);
+
+  // Find an assignment which maximizes the total cost.
+  // Returns the assignment in the two vectors passed as argument.
+  // agent[i] is assigned to task[i].
+  void maximize(std::vector<int>* agent, std::vector<int>* task);
+
+  // Find an assignment which minimizes the total cost.
+  // Returns the assignment in the two vectors passed as argument.
+  // agent[i] is assigned to task[i].
+  void minimize(std::vector<int>* agent, std::vector<int>* task);
+
+ private:
+  typedef void (HungarianOptimizer::*Step)();
+
+  typedef enum { NONE, PRIME, STAR } Mark;
+
+  // Convert the final cost matrix into a set of assignments of agents -> tasks.
+  // Returns the assignment in the two vectors passed as argument, the same as
+  // Minimize and Maximize
+  void find_assignments(std::vector<int>* agent, std::vector<int>* task);
+
+  // Is the cell (row, col) starred?
+  bool is_starred(int row, int col) const { return marks_[row][col] == STAR; }
+
+  // Mark cell (row, col) with a star
+  void star(int row, int col) {
+    marks_[row][col] = STAR;
+    stars_in_col_[col]++;
+  }
+
+  // Remove a star from cell (row, col)
+  void unstar(int row, int col) {
+    marks_[row][col] = NONE;
+    stars_in_col_[col]--;
+  }
+
+  // Find a column in row 'row' containing a star, or return
+  // kHungarianOptimizerColNotFound if no such column exists.
+  int find_star_in_row(int row) const;
+
+  // Find a row in column 'col' containing a star, or return
+  // kHungarianOptimizerRowNotFound if no such row exists.
+  int find_star_in_col(int col) const;
+
+  // Is cell (row, col) marked with a prime?
+  bool is_primed(int row, int col) const { return marks_[row][col] == PRIME; }
+
+  // Mark cell (row, col) with a prime.
+  void prime(int row, int col) { marks_[row][col] = PRIME; }
+
+  // Find a column in row containing a prime, or return
+  // kHungarianOptimizerColNotFound if no such column exists.
+  int find_prime_in_row(int row) const;
+
+  // Remove the prime marks_ from every cell in the matrix.
+  void clear_primes();
+
+  // Does column col contain a star?
+  bool col_contains_star(int col) const { return stars_in_col_[col] > 0; }
+
+  // Is row 'row' covered?
+  bool row_covered(int row) const { return rows_covered_[row]; }
+
+  // Cover row 'row'.
+  void cover_row(int row) { rows_covered_[row] = true; }
+
+  // Uncover row 'row'.
+  void uncover_row(int row) { rows_covered_[row] = false; }
+
+  // Is column col covered?
+  bool col_covered(int col) const { return cols_covered_[col]; }
+
+  // Cover column col.
+  void cover_col(int col) { cols_covered_[col] = true; }
+
+  // Uncover column col.
+  void uncover_col(int col) { cols_covered_[col] = false; }
+
+  // Uncover ever row and column in the matrix.
+  void clear_covers();
+
+  // Find the smallest uncovered cell in the matrix.
+  double find_smallest_uncovered();
+
+  // Find an uncovered zero and store its coordinates in (zeroRow_, zeroCol_)
+  // and return true, or return false if no such cell exists.
+  bool find_zero(int* zero_row, int* zero_col);
+
+  // Print the matrix to stdout (for debugging.)
+  void print_matrix();
+
+  // Run the Munkres algorithm!
+  void do_munkres();
+
+  void check_star();
+
+  // Step 1.
+  // For each row of the matrix, find the smallest element and subtract it
+  // from every element in its row.  Go to Step 2.
+  void reduce_rows();
+
+  // Step 2.
+  // Find a zero (Z) in the matrix.  If there is no starred zero in its row
+  // or column, star Z.  Repeat for every element in the matrix.  Go to step 3.
+  // Note: profiling shows this method to use 9.2% of the CPU - the next
+  // slowest step takes 0.6%.  I can't think of a way of speeding it up though.
+  void star_zeroes();
+
+  // Step 3.
+  // Cover each column containing a starred zero.  If all columns are
+  // covered, the starred zeros describe a complete set of unique assignments.
+  // In this case, terminate the algorithm.  Otherwise, go to step 4.
+  void cover_starred_zeroes();
+
+  // Step 4.
+  // Find a noncovered zero and prime it.  If there is no starred zero in the
+  // row containing this primed zero, Go to Step 5.  Otherwise, cover this row
+  // and uncover the column containing the starred zero. Continue in this manner
+  // until there are no uncovered zeros left, then go to Step 6.
+  void prime_zeroes();
+
+  // Step 5.
+  // Construct a series of alternating primed and starred zeros as follows.
+  // Let Z0 represent the uncovered primed zero found in Step 4.  Let Z1 denote
+  // the starred zero in the column of Z0 (if any). Let Z2 denote the primed
+  // zero in the row of Z1 (there will always be one).  Continue until the
+  // series terminates at a primed zero that has no starred zero in its column.
+  // Unstar each starred zero of the series, star each primed zero of the
+  // series, erase all primes and uncover every line in the matrix.  Return to
+  // Step 3.
+  void make_augmenting_path();
+
+  // Step 6.
+  // Add the smallest uncovered value in the matrix to every element of each
+  // covered row, and subtract it from every element of each uncovered column.
+  // Return to Step 4 without altering any stars, primes, or covered lines.
+  void augment_path();
+
+  // The size of the problem, i.e. std::max(#agents, #tasks).
+  int matrix_size_ = 0;
+
+  // The expanded cost matrix.
+  std::vector<std::vector<double>> costs_;
+
+  // The greatest cost in the initial cost matrix.
+  double max_cost_ = 0.0;
+
+  // Which rows and columns are currently covered.
+  std::vector<bool> rows_covered_;
+  std::vector<bool> cols_covered_;
+
+  // The marks_ (star/prime/none) on each element of the cost matrix.
+  std::vector<std::vector<Mark>> marks_;
+
+  // The number of stars in each column - used to speed up coverStarredZeroes.
+  std::vector<int> stars_in_col_;
+
+  // Representation of a path_ through the matrix - used in step 5.
+  std::vector<int> preimage_;  // i.e. the agents
+  std::vector<int> image_;     // i.e. the tasks
+
+  // The width_ and height_ of the initial (non-expanded) cost matrix.
+  int width_ = 0;
+  int height_ = 0;
+
+  // The current state of the algorithm
+  HungarianOptimizer::Step state_;
+
+  std::vector<int> uncov_col_;
+  std::vector<int> uncov_row_;
+};
+
+}  // namespace perception
+}  // namespace apollo
+
diff --git a/modules/perception/lidar_pointcloud_tracking/common/object.cc b/modules/perception/lidar_pointcloud_tracking/common/object.cc
new file mode 100644
index 0000000..7468d95
--- /dev/null
+++ b/modules/perception/lidar_pointcloud_tracking/common/object.cc
@@ -0,0 +1,19 @@
+#include "modules/perception/lidar_pointcloud_tracking/common/object.h"
+
+namespace apollo {
+namespace perception {
+
+Object::Object() {
+  cloud.reset(new pcl_util::PointCloud);
+  //type_probs.resize(static_cast<int>(ObjectType::MAX_OBJECT_TYPE), 0);
+  position_uncertainty << 0.01, 0, 0, 0, 0.01, 0, 0, 0, 0.01;
+  velocity_uncertainty << 0.01, 0, 0, 0, 0.01, 0, 0, 0, 0.01;
+}
+
+void Object::clone(const Object& rhs) {
+  *this = rhs;
+  pcl::copyPointCloud<pcl_util::Point, pcl_util::Point>(*(rhs.cloud), *cloud);
+}
+
+}  // namespace perception
+}  // namespace apollo
diff --git a/modules/perception/lidar_pointcloud_tracking/common/object.h b/modules/perception/lidar_pointcloud_tracking/common/object.h
new file mode 100644
index 0000000..e45f635
--- /dev/null
+++ b/modules/perception/lidar_pointcloud_tracking/common/object.h
@@ -0,0 +1,119 @@
+#pragma once
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "modules/perception/lidar_pointcloud_tracking/common/pcl_types.h"
+
+namespace apollo {
+namespace perception {
+
+typedef pcl_util::PointCloud PolygonType;
+typedef pcl_util::PointDCloud PolygonDType;
+using SeqId = uint32_t;
+struct alignas(16) Object {
+  EIGEN_MAKE_ALIGNED_OPERATOR_NEW
+  Object();
+  // deep copy
+  void clone(const Object& rhs);
+
+  // object id per frame
+  int id = 0;
+  // point cloud of the object
+  pcl::PointCloud<pcl::PointXYZI>::Ptr cloud;
+  // convex hull of the object
+  PolygonDType polygon;
+
+  // oriented boundingbox information
+  // main direction
+  Eigen::Vector3d direction = Eigen::Vector3d(1, 0, 0);
+  // the yaw angle, theta = 0.0 <=> direction = (1, 0, 0)
+  double theta = 0.0;
+  // ground center of the object (cx, cy, z_min)
+  Eigen::Vector3d center = Eigen::Vector3d::Zero();
+  // size of the oriented bbox, length is the size in the main direction
+  double length = 0.0;
+  double width = 0.0;
+  double height = 0.0;
+  // shape feature used for tracking
+  std::vector<float> shape_features;
+
+  // fg/bg flag
+  bool is_background = false;
+
+  // tracking information
+  int track_id = 0;
+  Eigen::Vector3d velocity = Eigen::Vector3d::Zero();
+  // age of the tracked object
+  double tracking_time = 0.0;
+  double latest_tracked_time = 0.0;
+  double timestamp = 0.0;
+
+  // stable anchor_point during time, e.g., barycenter
+  Eigen::Vector3d anchor_point;
+
+ // noise covariance matrix for uncertainty of position and velocity
+  Eigen::Matrix3d position_uncertainty;
+  Eigen::Matrix3d velocity_uncertainty;
+
+  // modeling uncertainty from sensor level tracker
+  Eigen::Matrix4d state_uncertainty = Eigen::Matrix4d::Identity();
+  // Tailgating (trajectory of objects)
+  std::vector<Eigen::Vector3d> drops; 
+  // CIPV
+  bool b_cipv = false;
+  // local lidar track id
+  int local_lidar_track_id = -1;
+  // local radar track id
+  int local_radar_track_id = -1;
+  // local camera track id
+  int local_camera_track_id = -1;
+
+  // local lidar track ts
+  double local_lidar_track_ts = -1;
+  // local radar track ts
+  double local_radar_track_ts = -1;
+  // local camera track ts
+  double local_camera_track_ts = -1;
+  
+  Eigen::Vector3d vertex1;
+  Eigen::Vector3d vertex2;
+  Eigen::Vector3d vertex3;
+  Eigen::Vector3d vertex4;
+  double min_height;
+  double max_height;
+};
+// Sensor single frame objects.
+struct SensorObjects {
+  EIGEN_MAKE_ALIGNED_OPERATOR_NEW
+  SensorObjects() {
+    sensor2world_pose = Eigen::Matrix4d::Zero();
+    sensor2world_pose_static = Eigen::Matrix4d::Zero();
+  }
+
+  //std::string ToString() const;
+
+  // Transmit error_code to next subnode.
+  //common::ErrorCode error_code = common::ErrorCode::OK;
+
+  //SensorType sensor_type = SensorType::UNKNOWN_SENSOR_TYPE;
+  //std::string sensor_id;
+  double timestamp = 0.0;
+  SeqId seq_num = 0;
+  std::vector<std::shared_ptr<Object>> objects;
+  Eigen::Matrix4d sensor2world_pose;
+  Eigen::Matrix4d sensor2world_pose_static;
+  //LaneObjectsPtr lane_objects;
+
+  uint32_t cipv_index = -1;
+  uint32_t cipv_track_id = -1;
+
+  // sensor particular supplements, default nullptr
+  //RadarFrameSupplementPtr radar_frame_supplement = nullptr;
+  //CameraFrameSupplementPtr camera_frame_supplement = nullptr;
+};
+
+}  // namespace perception
+}  // namespace apollo
+
diff --git a/modules/perception/lidar_pointcloud_tracking/common/pcl_types.h b/modules/perception/lidar_pointcloud_tracking/common/pcl_types.h
new file mode 100644
index 0000000..79a28b9
--- /dev/null
+++ b/modules/perception/lidar_pointcloud_tracking/common/pcl_types.h
@@ -0,0 +1,26 @@
+#pragma once
+
+#include "pcl/common/time.h"
+#include "pcl/common/transforms.h"
+#include "pcl/kdtree/kdtree_flann.h"
+#include "pcl/point_cloud.h"
+#include "pcl/point_types.h"
+#include "pcl/search/impl/kdtree.hpp"
+
+namespace apollo {
+namespace perception {
+namespace pcl_util {
+
+typedef pcl::PointXYZI Point;
+typedef pcl::PointCloud<Point> PointCloud;
+typedef pcl::PointCloud<Point>::Ptr PointCloudPtr;
+typedef pcl::PointCloud<Point>::ConstPtr PointCloudConstPtr;
+
+typedef Point PointD;
+typedef ::pcl::PointCloud<PointD> PointDCloud;
+typedef ::pcl::PointCloud<PointD>::Ptr PointDCloudPtr;
+typedef ::pcl::PointCloud<PointD>::ConstPtr PointDCloudConstPtr;
+}
+}
+}
+
diff --git a/modules/perception/lidar_pointcloud_tracking/common/tracker_config.h b/modules/perception/lidar_pointcloud_tracking/common/tracker_config.h
new file mode 100644
index 0000000..3c03836
--- /dev/null
+++ b/modules/perception/lidar_pointcloud_tracking/common/tracker_config.h
@@ -0,0 +1,44 @@
+#pragma once
+
+#include <string>
+
+namespace apollo {
+namespace perception {
+namespace tracker_config {
+
+struct alignas(16) ModelConfigs {
+  // ModelConfigs();
+
+  // 成员
+  std::string name;
+  std::string version;
+  std::string matcher_method;
+  std::string filter_method;
+
+  int track_cached_history_size_maximum;
+  int track_consecutive_invisible_maximum;
+  double track_visible_ratio_minimum;
+  int collect_age_minimum;
+  int collect_consecutive_invisible_maximum;
+  int acceleration_noise_maximum;
+  double speed_noise_maximum;
+  double match_distance_maximum;
+  double location_distance_weight;
+  double direction_distance_weight;
+  double bbox_size_distance_weight;
+  double point_num_distance_weight;
+  double histogram_distance_weight;
+  int histogram_bin_size;
+  bool use_adaptive;
+  double measurement_noise;
+  int initial_velocity_noise;
+  int xy_propagation_noise;
+  int z_propagation_noise;
+  double breakdown_threshold_maximum;
+
+};
+
+}  // namespace tracker_config
+}  // namespace perception
+}  // namespace apollo
+
diff --git a/modules/perception/lidar_pointcloud_tracking/feature_descriptor.cc b/modules/perception/lidar_pointcloud_tracking/feature_descriptor.cc
new file mode 100644
index 0000000..450f3e2
--- /dev/null
+++ b/modules/perception/lidar_pointcloud_tracking/feature_descriptor.cc
@@ -0,0 +1,61 @@
+#include "modules/perception/lidar_pointcloud_tracking/feature_descriptor.h"
+
+namespace apollo {
+namespace perception {
+
+void FeatureDescriptor::ComputeHistogram(const int bin_size,
+                                         std::vector<float>* feature) {
+  GetMinMaxCenter();
+
+  int xstep = bin_size;
+  int ystep = bin_size;
+  int zstep = bin_size;
+  int stat_len = xstep + ystep + zstep;
+  std::vector<int> stat_feat(stat_len, 0);
+  float xsize = (max_pt_.x - min_pt_.x) / xstep + 0.000001;
+  float ysize = (max_pt_.y - min_pt_.y) / ystep + 0.000001;
+  float zsize = (max_pt_.z - min_pt_.z) / zstep + 0.000001;
+
+  int pt_num = cloud_->points.size();
+  for (int i = 0; i < pt_num; ++i) {
+    pcl_util::Point& pt = cloud_->points[i];
+    stat_feat[floor((pt.x - min_pt_.x) / xsize)]++;
+    stat_feat[xstep + floor((pt.y - min_pt_.y) / ysize)]++;
+    stat_feat[xstep + ystep + floor((pt.z - min_pt_.z) / zsize)]++;
+  }
+  // update feature
+  (*feature).resize(stat_len);
+  for (size_t i = 0; i < stat_feat.size(); ++i) {
+    (*feature)[i] =
+        static_cast<float>(stat_feat[i]) / static_cast<float>(pt_num);
+  }
+}
+
+void FeatureDescriptor::GetMinMaxCenter() {
+  float xsum = 0.0;
+  float ysum = 0.0;
+  float zsum = 0.0;
+  min_pt_.x = min_pt_.y = min_pt_.z = FLT_MAX;
+  max_pt_.x = max_pt_.y = max_pt_.z = -FLT_MAX;
+  // min max pt
+  int pt_num = cloud_->points.size();
+  for (int i = 0; i < pt_num; ++i) {
+    pcl_util::Point& pt = cloud_->points[i];
+    xsum += pt.x;
+    ysum += pt.y;
+    zsum += pt.z;
+    min_pt_.x = std::min(min_pt_.x, pt.x);
+    max_pt_.x = std::max(max_pt_.x, pt.x);
+    min_pt_.y = std::min(min_pt_.y, pt.y);
+    max_pt_.y = std::max(max_pt_.y, pt.y);
+    min_pt_.z = std::min(min_pt_.z, pt.z);
+    max_pt_.z = std::max(max_pt_.z, pt.z);
+  }
+  // center position
+  center_pt_.x = xsum / pt_num;
+  center_pt_.y = ysum / pt_num;
+  center_pt_.z = zsum / pt_num;
+}
+
+}  // namespace perception
+}  // namespace apollo
diff --git a/modules/perception/lidar_pointcloud_tracking/feature_descriptor.h b/modules/perception/lidar_pointcloud_tracking/feature_descriptor.h
new file mode 100644
index 0000000..8964243
--- /dev/null
+++ b/modules/perception/lidar_pointcloud_tracking/feature_descriptor.h
@@ -0,0 +1,38 @@
+#pragma once
+
+#include <algorithm>
+#include <vector>
+
+#include "modules/perception/lidar_pointcloud_tracking/common/pcl_types.h"
+
+namespace apollo {
+namespace perception {
+
+class FeatureDescriptor {
+ public:
+  // @brief initialize feature descriptor
+  // @param[IN] cloud: given cloud for feature extraction
+  // @return nothing
+  explicit FeatureDescriptor(
+      apollo::perception::pcl_util::PointCloudPtr cloud) {
+    cloud_ = cloud;
+  }
+  ~FeatureDescriptor() {}
+
+  // @brief compute histogram feature of given cloud
+  // @param[IN] bin_size: bin size of histogram
+  // @param[OUT] feature: histogram feature of given cloud
+  // @return nothing
+  void ComputeHistogram(const int bin_size, std::vector<float>* feature);
+
+ private:
+  void GetMinMaxCenter();
+  apollo::perception::pcl_util::PointCloudPtr cloud_;
+  pcl_util::Point min_pt_;
+  pcl_util::Point max_pt_;
+  pcl_util::Point center_pt_;
+};  // class FeatureDescriptor
+
+}  // namespace perception
+}  // namespace apollo
+
diff --git a/modules/perception/lidar_pointcloud_tracking/hm_tracker.cc b/modules/perception/lidar_pointcloud_tracking/hm_tracker.cc
new file mode 100644
index 0000000..2587f21
--- /dev/null
+++ b/modules/perception/lidar_pointcloud_tracking/hm_tracker.cc
@@ -0,0 +1,473 @@
+#include "modules/perception/lidar_pointcloud_tracking/hm_tracker.h"
+
+#include <map>
+#include <numeric>
+#include "modules/perception/lidar_pointcloud_tracking/common/geometry_util.h"
+#include "modules/perception/lidar_pointcloud_tracking/feature_descriptor.h"
+#include "modules/perception/lidar_pointcloud_tracking/hungarian_matcher.h"
+#include "modules/perception/lidar_pointcloud_tracking/kalman_filter.h"
+#include "modules/perception/lidar_pointcloud_tracking/track_object_distance.h"
+
+namespace apollo {
+namespace perception {
+
+HmObjectTracker::HmObjectTracker() {
+  // Initialize tracker's configs
+  // pnh.param("name", config_.name, std::string("HmObjectTracker"));
+  // pnh.param("version", config_.version, std::string("1.1.0"));
+  // pnh.param("matcher_method", config_.matcher_method,std::string("HUNGARIAN_MATCHER"));
+  // pnh.param("filter_method", config_.filter_method, std::string("KALMAN_FILTER"));
+  // pnh.param("track_cached_history_size_maximum", config_.track_cached_history_size_maximum, 5);
+  // pnh.param("track_consecutive_invisible_maximum", config_.track_consecutive_invisible_maximum, 2);
+  // pnh.param("track_visible_ratio_minimum", config_.track_visible_ratio_minimum, 0.6);
+  // pnh.param("collect_age_minimum", config_.collect_age_minimum, 0);
+  // pnh.param("collect_consecutive_invisible_maximum", config_.collect_consecutive_invisible_maximum, 0);
+  // pnh.param("acceleration_noise_maximum", config_.acceleration_noise_maximum, 5);
+  // pnh.param("speed_noise_maximum", config_.speed_noise_maximum, 0.4); 
+  // pnh.param("match_distance_maximum", config_.match_distance_maximum, 4.0);
+  // pnh.param("location_distance_weight", config_.location_distance_weight, 0.6);
+  // pnh.param("direction_distance_weight", config_.direction_distance_weight, 0.2);
+  // pnh.param("bbox_size_distance_weight", config_.bbox_size_distance_weight, 0.1);
+  // pnh.param("point_num_distance_weight", config_.point_num_distance_weight, 0.1);
+  // pnh.param("histogram_distance_weight", config_.histogram_distance_weight, 0.5);
+  // pnh.param("histogram_bin_size", config_.histogram_bin_size, 10);
+  // pnh.param("use_adaptive", config_.use_adaptive, true);
+  // pnh.param("measurement_noise", config_.measurement_noise, 0.4);
+  // pnh.param("naminitial_velocity_noisee", config_.initial_velocity_noise, 5);
+  // pnh.param("xy_propagation_noise", config_.xy_propagation_noise, 10);
+  // pnh.param("z_propagation_noise", config_.z_propagation_noise, 10);
+  // pnh.param("breakdown_threshold_maximum", config_.breakdown_threshold_maximum, 10.0);
+
+  config_.name = "HmObjectTracker";
+  config_.version = "1.1.0";
+  config_.matcher_method = "HUNGARIAN_MATCHER";
+  config_.filter_method = "KALMAN_FILTER";
+  config_.track_cached_history_size_maximum = 5;
+  config_.track_consecutive_invisible_maximum = 2;
+  config_.track_visible_ratio_minimum = 0.6;
+  config_.collect_age_minimum = 0;
+  config_.collect_consecutive_invisible_maximum = 0;
+  config_.acceleration_noise_maximum = 5;
+  config_.speed_noise_maximum = 0.4;
+  config_.match_distance_maximum = 4.0;
+  config_.location_distance_weight = 0.6;
+  config_.direction_distance_weight = 0.2;
+  config_.bbox_size_distance_weight = 0.1;
+  config_.point_num_distance_weight = 0.1;
+  config_.histogram_distance_weight = 0.5;
+  config_.histogram_bin_size = 10;
+  config_.use_adaptive = true;
+  config_.measurement_noise = 0.4;
+  config_.initial_velocity_noise = 5;
+  config_.xy_propagation_noise = 10;
+  config_.z_propagation_noise = 10;
+  config_.breakdown_threshold_maximum = 10.0;
+
+  // std::cout << "config_: = " << config_.track_cached_history_size_maximum << std::endl;
+  // A. Basic tracker setup
+  matcher_.reset(new HungarianMatcher());
+  // B. Matcher setup
+  // load match distance maximum
+  if (!HungarianMatcher::SetMatchDistanceMaximum(config_.match_distance_maximum)) {
+    std::cout << "Failed to set match distance maximum! " << config_.name << std::endl;
+  }
+  // load location distance weight
+  if (!TrackObjectDistance::SetLocationDistanceWeight(
+          config_.location_distance_weight)) {
+    std::cout << "Failed to set location distance weight! " << config_.name << std::endl;
+  }
+  // load direction distance weight
+  if (!TrackObjectDistance::SetDirectionDistanceWeight(
+          config_.direction_distance_weight)) {
+    std::cout << "Failed to set direction distance weight! " << config_.name << std::endl;
+  }
+  // load bbox size distance weight
+  if (!TrackObjectDistance::SetBboxSizeDistanceWeight(
+          config_.bbox_size_distance_weight)) {
+    std::cout << "Failed to set bbox size distance weight! " << config_.name << std::endl;
+  }
+  // load point num distance weight
+  if (!TrackObjectDistance::SetPointNumDistanceWeight(
+          config_.point_num_distance_weight)) {
+    std::cout << "Failed to set point num distance weight! " << config_.name << std::endl;
+  }
+  // load histogram distance weight
+  if (!TrackObjectDistance::SetHistogramDistanceWeight(
+          config_.histogram_distance_weight)) {
+    std::cout << "Failed to set histogram distance weight! " << config_.name << std::endl;
+  }
+  use_histogram_for_match_ =
+      config_.histogram_distance_weight > FLT_EPSILON ? true : false;
+  if (config_.histogram_bin_size <= 0) {
+    std::cout << "invalid histogram bin size of " << config_.name << std::endl;
+  }
+  // C. Filter setup
+  double association_score_maximum = config_.match_distance_maximum;
+  KalmanFilter::SetUseAdaptive(config_.use_adaptive);
+  if (!KalmanFilter::SetAssociationScoreMaximum(association_score_maximum)) {
+    std::cout << "Failed to set association score maximum! " << std::endl;
+  }
+  if (!KalmanFilter::InitParams(
+          config_.measurement_noise, config_.initial_velocity_noise,
+          config_.xy_propagation_noise, config_.z_propagation_noise)) {
+    std::cout << "Failed to set params for kalman filter! " <<std::endl;
+  }
+  if (!KalmanFilter::SetBreakdownThresholdMaximum(
+          config_.breakdown_threshold_maximum)) {
+    std::cout << "Failed to set breakdown threshold maximum! " << std::endl;
+  }
+
+  first_time_ = true;
+}
+bool HmObjectTracker::Init()
+{ 
+  return true;
+}
+
+const std::vector<ObjectTrackPtr>& HmObjectTracker::GetObjectTracks() const {
+  return object_tracks_.GetTracks();
+}
+
+bool HmObjectTracker::Track(
+    const std::vector<std::shared_ptr<Object>>& objects, double timestamp,
+    const TrackerOptions& options,
+    std::vector<std::shared_ptr<Object>>* tracked_objects) {
+  
+  // A. track setup
+  if (tracked_objects == nullptr) return false;
+  if (!valid_) {                                                    //如果程序是第一次运行就先进行初始化,如果不是第一次直接更新下面的就好
+    valid_ = true;
+    return InitializeTrack(objects, timestamp, options, tracked_objects);
+  }
+  Eigen::Matrix4d velo2world_pose = Eigen::Matrix4d::Identity();
+  if (options.velodyne_trans != nullptr) {
+    velo2world_pose = *(options.velodyne_trans);
+  } else {
+    std::cout << "Input velodyne_trans is null";
+    return false;
+  }
+  //double time_diff = timestamp - time_stamp_;//时间戳相减
+  double time_diff = 0.1;
+  time_stamp_ = timestamp;
+  //std::cout << "time_diff = " << time_diff <<std::endl;
+  //std::cout << "time_stamp_ = " << time_stamp_ <<std::endl;
+
+  // B. preprocessing
+  // B.1 transform given pose to local one
+  TransformPoseGlobal2Local(&velo2world_pose);
+  //ADEBUG << "velo2local_pose\n" << velo2world_pose;
+  // B.2 construct objects for tracking
+  std::vector<std::shared_ptr<TrackedObject>> transformed_objects;
+  ConstructTrackedObjects(objects, &transformed_objects, velo2world_pose,
+                          options);
+  // C. prediction
+  std::vector<Eigen::VectorXf> tracks_predict;
+  ComputeTracksPredict(&tracks_predict, time_diff);//计算每个跟踪对象的预测状态
+
+  // D. match objects to tracks
+  std::vector<std::pair<int, int>> assignments;
+  std::vector<int> unassigned_objects;
+  std::vector<int> unassigned_tracks;
+  std::vector<ObjectTrackPtr>& tracks = object_tracks_.GetTracks();
+  if (matcher_ != nullptr) {
+    matcher_->Match(&transformed_objects, tracks, tracks_predict, &assignments,
+                    &unassigned_tracks, &unassigned_objects);
+  } else {
+    std::cout << "matcher_ is not initiated. Please call Init() function before "
+              "other functions." << std::endl;
+    return false;
+  }
+  std::cout << "multi-object-tracking: " << tracks.size() << "  "
+         << assignments.size() << "  " << transformed_objects.size() << "  "
+         << unassigned_objects.size() << "  " << time_diff << std::endl;
+  // E. update tracks
+  // E.1 update tracks with associated objects
+  UpdateAssignedTracks(&tracks_predict, &transformed_objects, assignments,   //transformed_objects中存放已经匹配上的对象信息
+                       time_diff);
+  // E.2 update tracks without associated objects
+  UpdateUnassignedTracks(tracks_predict, unassigned_tracks, time_diff);
+  DeleteLostTracks();
+  // E.3 create new tracks for objects without associated tracks
+  CreateNewTracks(transformed_objects, unassigned_objects);
+  // F. collect tracked results
+  CollectTrackedResults(tracked_objects);
+  return true;
+}
+
+bool HmObjectTracker::InitializeTrack(
+    const std::vector<std::shared_ptr<Object>>& objects, const double timestamp,
+    const TrackerOptions& options,
+    std::vector<std::shared_ptr<Object>>* tracked_objects) {  
+  // A. track setup
+  Eigen::Matrix4d velo2world_pose = Eigen::Matrix4d::Identity();
+  if (options.velodyne_trans != nullptr) {
+    velo2world_pose = *(options.velodyne_trans);     //获取激光雷达坐标系到世界坐标系的转换矩阵
+  } else {
+    std::cout << "Input velodyne_trans is null";
+    return false;
+  }
+  global_to_local_offset_ = Eigen::Vector3d(
+      -velo2world_pose(0, 3), -velo2world_pose(1, 3), -velo2world_pose(2, 3));//lidar坐标系到世界坐标系的评议成分,即车坐标系到世界坐标系的转换矩阵
+
+  // B. preprocessing
+  // B.1 coordinate transformation
+  TransformPoseGlobal2Local(&velo2world_pose);//雷达坐标系到车坐标系的变换矩阵
+  //ADEBUG << "velo2local_pose\n" << velo2world_pose;
+  // B.2 construct tracked objects
+  std::vector<std::shared_ptr<TrackedObject>> transformed_objects;//构造TrackedObject对象,封装object类,并增加速度加速度等信息
+  ConstructTrackedObjects(objects, &transformed_objects, velo2world_pose,
+                          options);                         //transformed_objects内容填充,并计算SF特征 坐标转换   
+                               
+  // C. create tracks
+  std::vector<int> unassigned_objects;
+  unassigned_objects.resize(transformed_objects.size());
+  std::iota(unassigned_objects.begin(), unassigned_objects.end(), 0);//iota批量递增
+  CreateNewTracks(transformed_objects, unassigned_objects);
+  time_stamp_ = timestamp;
+
+  // D. collect tracked results
+  CollectTrackedResults(tracked_objects);
+  return true;
+}
+
+void HmObjectTracker::TransformPoseGlobal2Local(Eigen::Matrix4d* pose) {
+  (*pose)(0, 3) += global_to_local_offset_(0);
+  (*pose)(1, 3) += global_to_local_offset_(1);
+  (*pose)(2, 3) += global_to_local_offset_(2);
+}
+
+void HmObjectTracker::ConstructTrackedObjects(
+    const std::vector<std::shared_ptr<Object>>& objects,
+    std::vector<std::shared_ptr<TrackedObject>>* tracked_objects,
+    const Eigen::Matrix4d& pose, const TrackerOptions& options) {
+  int num_objects = objects.size();
+  tracked_objects->clear();
+  tracked_objects->resize(num_objects);
+  for (int i = 0; i < num_objects; ++i) {
+    std::shared_ptr<Object> obj(new Object());
+    obj->clone(*objects[i]);
+    (*tracked_objects)[i].reset(new TrackedObject(obj));
+    // Computing shape feature
+    if (use_histogram_for_match_) {
+      ComputeShapeFeatures(&((*tracked_objects)[i])); //计算SF特征
+    }
+    // Transforming all tracked objects
+    TransformTrackedObject(&((*tracked_objects)[i]), pose);
+    // Setting barycenter as anchor point of tracked objects
+    Eigen::Vector3f anchor_point = (*tracked_objects)[i]->barycenter;
+    (*tracked_objects)[i]->anchor_point = anchor_point;
+    // Getting lane direction of tracked objects
+    /*pcl_util::PointD query_pt;
+    query_pt.x = anchor_point(0) - global_to_local_offset_(0);
+    query_pt.y = anchor_point(1) - global_to_local_offset_(1);
+    query_pt.z = anchor_point(2) - global_to_local_offset_(2);
+    Eigen::Vector3d lane_dir;
+    if (!options.hdmap_input->GetNearestLaneDirection(query_pt, &lane_dir)) {
+      std::cout << "Failed to initialize the lane direction of tracked object!";
+      // Set lane dir as host dir if query lane direction failed
+      lane_dir = (pose * Eigen::Vector4d(1, 0, 0, 0)).head(3);
+    }
+    (*tracked_objects)[i]->lane_direction = lane_dir.cast<float>();*/
+  }
+}
+
+void HmObjectTracker::ComputeShapeFeatures(
+    std::shared_ptr<TrackedObject>* obj) {
+  // Compute object's shape feature
+  std::shared_ptr<Object>& temp_object = (*obj)->object_ptr;
+  FeatureDescriptor fd(temp_object->cloud);
+  fd.ComputeHistogram(config_.histogram_bin_size,
+                      &temp_object->shape_features);
+}
+
+void HmObjectTracker::TransformTrackedObject(
+    std::shared_ptr<TrackedObject>* obj, const Eigen::Matrix4d& pose) {
+  // Transform tracked object with given pose
+  TransformObject(&((*obj)->object_ptr), pose);
+  // transform direction
+  Eigen::Vector3f& dir = (*obj)->direction;       //dir和center为什么*两边???
+  dir =
+      (pose * Eigen::Vector4d(dir(0), dir(1), dir(2), 0)).head(3).cast<float>();
+  // transform center
+  Eigen::Vector3f& center = (*obj)->center;
+  center = (pose * Eigen::Vector4d(center(0), center(1), center(2), 1))
+               .head(3)
+               .cast<float>();
+  // transform barycenter
+  Eigen::Vector3f& barycenter = (*obj)->barycenter; //barycenter什么时候填充啦?在这个结构体对象生成时候的构造函数里面
+  barycenter =
+      (pose * Eigen::Vector4d(barycenter(0), barycenter(1), barycenter(2), 1))
+          .head(3)
+          .cast<float>();
+}
+
+void HmObjectTracker::TransformObject(std::shared_ptr<Object>* obj,
+                                      const Eigen::Matrix4d& pose) {
+  // Transform object with given pose
+  Eigen::Vector3d& dir = (*obj)->direction;
+  //std::cout <<"object->direction = " << (*obj)->direction << std::endl;
+  dir = (pose * Eigen::Vector4d(dir[0], dir[1], dir[2], 0)).head(3);
+  // transform center
+  Eigen::Vector3d& center = (*obj)->center;
+  center = (pose * Eigen::Vector4d(center[0], center[1], center[2], 1)).head(3);
+  // transform cloud & polygon
+  TransformPointCloud<pcl_util::Point>(pose, (*obj)->cloud);
+  TransformPointCloud<pcl_util::PointD>(pose, &((*obj)->polygon));
+}
+
+void HmObjectTracker::ComputeTracksPredict(
+    std::vector<Eigen::VectorXf>* tracks_predict, const double time_diff) {
+  // Compute tracks' predicted states
+  int no_track = object_tracks_.Size();
+  tracks_predict->resize(no_track);
+  std::vector<ObjectTrackPtr>& tracks = object_tracks_.GetTracks();
+  for (int i = 0; i < no_track; ++i) {
+    (*tracks_predict)[i] = tracks[i]->Predict(time_diff);
+  }
+}
+
+void HmObjectTracker::UpdateAssignedTracks(
+    std::vector<Eigen::VectorXf>* tracks_predict,
+    std::vector<std::shared_ptr<TrackedObject>>* new_objects,
+    const std::vector<std::pair<int, int>>& assignments,
+    const double time_diff) {
+  // Update assigned tracks
+  std::vector<ObjectTrackPtr>& tracks = object_tracks_.GetTracks();
+  for (size_t i = 0; i < assignments.size(); ++i) {
+    int track_id = assignments[i].first;
+    int obj_id = assignments[i].second;
+    tracks[track_id]->UpdateWithObject(&(*new_objects)[obj_id], time_diff);
+  }
+}
+
+void HmObjectTracker::UpdateUnassignedTracks(
+    const std::vector<Eigen::VectorXf>& tracks_predict,
+    const std::vector<int>& unassigned_tracks, const double time_diff) {
+  // Update tracks without matched objects
+  std::vector<ObjectTrackPtr>& tracks = object_tracks_.GetTracks();
+  for (size_t i = 0; i < unassigned_tracks.size(); ++i) {
+    int track_id = unassigned_tracks[i];
+    tracks[track_id]->UpdateWithoutObject(tracks_predict[track_id], time_diff);
+  }
+}
+
+void HmObjectTracker::CreateNewTracks(
+    const std::vector<std::shared_ptr<TrackedObject>>& new_objects,
+    const std::vector<int>& unassigned_objects) {
+  // Create new tracks for objects without matched tracks
+  for (size_t i = 0; i < unassigned_objects.size(); ++i) {
+    int obj_id = unassigned_objects[i];
+    ObjectTrackPtr track(new ObjectTrack(new_objects[obj_id]));//有多少个object-tracked_object对象,就建立多少个object_track,并且加入到跟踪列表中
+    object_tracks_.AddTrack(track); //object_tracks_是ObjectTrackSet对象
+  }
+}
+
+void HmObjectTracker::DeleteLostTracks() {
+  // Delete lost tracks
+  object_tracks_.RemoveLostTracks();
+}
+
+void HmObjectTracker::CollectTrackedResults(
+    std::vector<std::shared_ptr<Object>>* tracked_objects) {
+  // Collect tracked results for reporting include objects may be occluded
+  // temporarily
+  const std::vector<ObjectTrackPtr>& tracks = object_tracks_.GetTracks();
+  tracked_objects->resize(tracks.size());
+  // std::cout << "maximum = " << config_.collect_consecutive_invisible_maximum << std::endl;
+  int track_number = 0;
+  for (size_t i = 0; i < tracks.size(); ++i) {
+    if (tracks[i]->consecutive_invisible_count_ >
+        config_.collect_consecutive_invisible_maximum)
+      continue;
+    if (tracks[i]->age_ < config_.collect_age_minimum) {
+      continue;
+    }
+    //std::cout << "in CollectTrackedResults" << std::endl;
+    std::shared_ptr<Object> obj(new Object);
+    std::shared_ptr<TrackedObject> result_obj = tracks[i]->current_object_;
+    obj->clone(*(result_obj->object_ptr));
+    // fill tracked information of object
+    obj->direction = result_obj->direction.cast<double>();
+    if (fabs(obj->direction[0]) < DBL_MIN) {
+      obj->theta = obj->direction(1) > 0 ? M_PI / 2 : -M_PI / 2;
+    } else {
+      obj->theta = atan2(obj->direction[1], obj->direction[0]);
+    }
+    obj->length = result_obj->size[0];
+    obj->width = result_obj->size[1];
+    obj->height = result_obj->size[2];
+    obj->velocity = result_obj->velocity.cast<double>();
+    obj->velocity_uncertainty = result_obj->velocity_uncertainty.cast<double>();
+    obj->track_id = tracks[i]->idx_;
+    obj->tracking_time = tracks[i]->period_;
+    //obj->type = result_obj->type;
+    obj->center = result_obj->center.cast<double>() - global_to_local_offset_;
+    obj->anchor_point =
+        result_obj->anchor_point.cast<double>() - global_to_local_offset_;
+    // restore original world coordinates
+    for (size_t j = 0; j < obj->cloud->size(); ++j) {
+      obj->cloud->points[j].x -= global_to_local_offset_[0];
+      obj->cloud->points[j].y -= global_to_local_offset_[1];
+      obj->cloud->points[j].z -= global_to_local_offset_[2];
+    }
+    for (size_t j = 0; j < obj->polygon.size(); ++j) {
+      obj->polygon.points[j].x -= global_to_local_offset_[0];
+      obj->polygon.points[j].y -= global_to_local_offset_[1];
+      obj->polygon.points[j].z -= global_to_local_offset_[2];
+    }
+    (*tracked_objects)[track_number] = obj;
+    ++track_number;
+  }
+  // get the trajectory
+  if(track_number!=0){
+    if(first_time_){
+      index_tracked_obj_.resize(track_number);
+      drops_.resize(track_number);
+      for(int i=0; i<track_number; i++){
+        index_tracked_obj_[i] = (*tracked_objects)[i]->track_id;
+        drops_[i].push_back((*tracked_objects)[i]->anchor_point);
+        // drops_[i].push_back((*tracked_objects)[i]->center);
+      }
+      first_time_ = false;
+      
+    }else{
+      std::vector<std::vector<Eigen::Vector3d>> drops_temp;
+      std::vector<int> index_tracked_obj_temp;
+      int i,j;
+      for(i=0; i<track_number; i++){
+        for(j=0; j<index_tracked_obj_.size(); j++){
+          if((*tracked_objects)[i]->track_id == index_tracked_obj_[j]){
+            drops_[j].push_back((*tracked_objects)[i]->center);
+            drops_temp.push_back(drops_[j]);
+            index_tracked_obj_temp.push_back(index_tracked_obj_[j]);
+            break;
+          }
+        }
+        if(j==index_tracked_obj_.size()){
+          std::vector<Eigen::Vector3d> drop;
+          drop.push_back((*tracked_objects)[i]->center);
+          drops_temp.push_back(drop);
+          index_tracked_obj_temp.push_back((*tracked_objects)[i]->track_id);
+        }
+      }
+
+      for(int i=0; i<track_number; i++){
+        (*tracked_objects)[i]->drops = drops_temp[i];
+      }
+
+      
+      drops_.clear();
+      drops_ = drops_temp;
+      index_tracked_obj_.clear();
+      index_tracked_obj_ = index_tracked_obj_temp;
+      
+      
+    }// end
+  }
+
+
+  tracked_objects->resize(track_number);
+}
+
+}  // namespace perception
+}  // namespace apollo
\ No newline at end of file
diff --git a/modules/perception/lidar_pointcloud_tracking/hm_tracker.h b/modules/perception/lidar_pointcloud_tracking/hm_tracker.h
new file mode 100644
index 0000000..3ff897c
--- /dev/null
+++ b/modules/perception/lidar_pointcloud_tracking/hm_tracker.h
@@ -0,0 +1,162 @@
+#pragma once
+
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "modules/perception/lidar_pointcloud_tracking/common/tracker_config.h"
+#include "modules/perception/lidar_pointcloud_tracking/common/object.h"
+#include "modules/perception/lidar_pointcloud_tracking/base_tracker.h"
+#include "modules/perception/lidar_pointcloud_tracking/base_matcher.h"
+#include "modules/perception/lidar_pointcloud_tracking/object_track.h"
+#include "modules/perception/lidar_pointcloud_tracking/tracked_object.h"
+
+namespace apollo {
+namespace perception {
+
+class HmObjectTracker : public BaseTracker {
+ public:
+  EIGEN_MAKE_ALIGNED_OPERATOR_NEW
+  HmObjectTracker();
+  virtual ~HmObjectTracker() = default;
+
+  // @brief initialize tracker's configs
+  bool Init();
+
+  // @brief track detected objects over consecutive frames
+  // @param[IN] objects: recently detected objects
+  // @param[IN] timestamp: timestamp of recently detected objects
+  // @param[IN] options: tracker options with necessary information
+  // @param[OUT] tracked_objects: tracked objects with tracking information
+  // @return true if track successfully, otherwise return false
+  bool Track(const std::vector<std::shared_ptr<Object>>& objects,
+             double timestamp, const TrackerOptions& options,
+             std::vector<std::shared_ptr<Object>>* tracked_objects);
+
+  // @brief get object tracks of tracker
+  // @return object tracks maintained in tracker
+  const std::vector<ObjectTrackPtr>& GetObjectTracks() const;
+
+  std::string name() const { return "HmObjectTracker"; }
+
+ protected:
+  // @brief initialize tracker after obtaining detection of first frame
+  // @param[IN] objects: recently detected objects
+  // @param[IN] timestamp: timestamp of recently detected objects
+  // @param[IN] options: tracker options with necessary information
+  // @param[OUT] tracked_objects: tracked objects with tracking information
+  // @return true if initialize successfully, otherwise return false
+  bool InitializeTrack(const std::vector<std::shared_ptr<Object>>& objects,
+                       const double timestamp, const TrackerOptions& options,
+                       std::vector<std::shared_ptr<Object>>* tracked_objects);
+
+  // @brief transform v2world pose to v2local pose intend to avoid huge value
+  // float computing
+  // @param[OUT] pose: v2world pose
+  // @return nothing
+  void TransformPoseGlobal2Local(Eigen::Matrix4d* pose);
+
+  // @brief construct tracked objects via necessary transformation & feature
+  // computing
+  // @param[IN] objects: objects for construction
+  // @param[OUT] tracked_objects: constructed objects
+  // @param[IN] pose: pose using for coordinate transformation
+  // @param[IN] options: tracker options with necessary information
+  // @return nothing
+  void ConstructTrackedObjects(
+      const std::vector<std::shared_ptr<Object>>& objects,
+      std::vector<std::shared_ptr<TrackedObject>>* tracked_objects,
+      const Eigen::Matrix4d& pose, const TrackerOptions& options);
+
+  // @brief compute objects' shape feature
+  // @param[OUT] object: object for computing shape feature
+  // @return nothing
+  void ComputeShapeFeatures(std::shared_ptr<TrackedObject>* obj);
+
+  // @brief transform tracked object with given pose
+  // @param[OUT] obj: tracked object for transformation
+  // @param[IN] pose: pose using for coordinate transformation
+  // @return nothing
+  void TransformTrackedObject(std::shared_ptr<TrackedObject>* obj,
+                              const Eigen::Matrix4d& pose);
+
+  // @brief transform object with given pose
+  // @param[OUT] obj: object for transformation
+  // @param[IN] pose: pose using for coordinate transformation
+  // @return nothing
+  void TransformObject(std::shared_ptr<Object>* obj,
+                       const Eigen::Matrix4d& pose);
+
+  // @brief compute predicted states of maintained tracks
+  // @param[OUT] tracks_predict: predicted states of maintained tracks
+  // @param[IN] time_diff: time interval for predicting
+  // @return nothing
+  void ComputeTracksPredict(std::vector<Eigen::VectorXf>* tracks_predict,
+                            const double time_diff);
+
+  // @brief update assigned tracks
+  // @param[IN] tracks_predict: predicted states of maintained tracks
+  // @param[IN] new_objects: recently detected objects
+  // @param[IN] assignments: assignment pair of <track, object>
+  // @param[IN] time_diff: time interval for updating
+  // @return nothing
+  void UpdateAssignedTracks(
+      std::vector<Eigen::VectorXf>* tracks_predict,
+      std::vector<std::shared_ptr<TrackedObject>>* new_objects,
+      const std::vector<std::pair<int, int>>& assignments,
+      const double time_diff);
+
+  // @brief update tracks without matched objects
+  // @param[IN] tracks_predict: predicted states of maintained tracks
+  // @param[IN] unassigned_tracks: index of unassigned tracks
+  // @param[IN] time_diff: time interval for updating
+  // @return nothing
+  void UpdateUnassignedTracks(
+      const std::vector<Eigen::VectorXf>& tracks_predict,
+      const std::vector<int>& unassigned_tracks, const double time_diff);
+
+  // @brief create new tracks for objects without matched track
+  // @param[IN] new_objects: recently detected objects
+  // @param[IN] unassigned_objects: index of unassigned objects
+  // @return nothing
+  void CreateNewTracks(
+      const std::vector<std::shared_ptr<TrackedObject>>& new_objects,
+      const std::vector<int>& unassigned_objects);
+
+  // @brief delete lost tracks
+  // @return nothing
+  void DeleteLostTracks();
+
+  // @brief collect tracked results
+  // @param[OUT] tracked_objects: tracked objects with tracking information
+  // @return nothing
+  void CollectTrackedResults(
+      std::vector<std::shared_ptr<Object>>* tracked_objects);
+
+ private:
+  // algorithm setup
+  bool use_histogram_for_match_ = false;
+
+  // matcher
+  std::unique_ptr<BaseMatcher> matcher_;
+
+  // tracks
+  ObjectTrackSet object_tracks_;
+
+  // set offset to avoid huge value float computing
+  Eigen::Vector3d global_to_local_offset_;
+  double time_stamp_ = 0.0;
+  bool valid_ = false;
+
+  tracker_config::ModelConfigs config_;
+
+  std::vector<std::vector<Eigen::Vector3d>> drops_;
+  std::vector<int> index_tracked_obj_;
+  bool first_time_;
+
+};  // class HmObjectTracker
+
+
+}  // namespace perception
+}  // namespace apollo
diff --git a/modules/perception/lidar_pointcloud_tracking/hungarian_matcher.cc b/modules/perception/lidar_pointcloud_tracking/hungarian_matcher.cc
new file mode 100644
index 0000000..af7af3a
--- /dev/null
+++ b/modules/perception/lidar_pointcloud_tracking/hungarian_matcher.cc
@@ -0,0 +1,267 @@
+#include "modules/perception/lidar_pointcloud_tracking/hungarian_matcher.h"
+#include "modules/perception/lidar_pointcloud_tracking/common/geometry_util.h"
+#include "modules/perception/lidar_pointcloud_tracking/common/graph_util.h"
+#include "modules/perception/lidar_pointcloud_tracking/common/hungarian_bigraph_matcher.h"
+#include "modules/perception/lidar_pointcloud_tracking/track_object_distance.h"
+
+namespace apollo {
+namespace perception {
+
+float HungarianMatcher::s_match_distance_maximum_ = 4.0f;
+
+bool HungarianMatcher::SetMatchDistanceMaximum(
+    const float match_distance_maximum) {
+  if (match_distance_maximum >= 0) {
+    s_match_distance_maximum_ = match_distance_maximum;
+    /*AINFO << "match distance maximum of HungarianMatcher is "
+          << s_match_distance_maximum_;*/
+    return true;
+  }
+  //AERROR << "invalid match distance maximum of HungarianMatcher!";
+  return false;
+}
+
+void HungarianMatcher::Match(
+    std::vector<std::shared_ptr<TrackedObject>>* objects,
+    const std::vector<ObjectTrackPtr>& tracks,
+    const std::vector<Eigen::VectorXf>& tracks_predict,
+    std::vector<std::pair<int, int>>* assignments,
+    std::vector<int>* unassigned_tracks, std::vector<int>* unassigned_objects) {
+  // A. computing association matrix
+  Eigen::MatrixXf association_mat(tracks.size(), objects->size());
+  ComputeAssociateMatrix(tracks, tracks_predict, (*objects), &association_mat);
+
+  // B. computing connected components
+  std::vector<std::vector<int>> object_components;
+  std::vector<std::vector<int>> track_components;
+  ComputeConnectedComponents(association_mat, s_match_distance_maximum_,
+                             &track_components, &object_components);
+  std::cout << "HungarianMatcher: partition graph into " << track_components.size()
+         << " sub-graphs." << std::endl;
+
+  // C. matching each sub-graph
+  assignments->clear();
+  unassigned_tracks->clear();
+  unassigned_objects->clear();
+  for (size_t i = 0; i < track_components.size(); i++) {
+    std::vector<std::pair<int, int>> sub_assignments;
+    std::vector<int> sub_unassigned_tracks;
+    std::vector<int> sub_unassigned_objects;
+    MatchInComponents(association_mat, track_components[i],
+                      object_components[i], &sub_assignments,
+                      &sub_unassigned_tracks, &sub_unassigned_objects);
+    for (size_t j = 0; j < sub_assignments.size(); ++j) {
+      int track_id = sub_assignments[j].first;
+      int object_id = sub_assignments[j].second;
+      assignments->push_back(sub_assignments[j]);
+      float association_score = association_mat(track_id, object_id);
+      (*objects)[object_id]->association_score = association_score;
+    }
+    for (size_t j = 0; j < sub_unassigned_tracks.size(); ++j) {
+      unassigned_tracks->push_back(sub_unassigned_tracks[j]);
+    }
+    for (size_t j = 0; j < sub_unassigned_objects.size(); ++j) {
+      unassigned_objects->push_back(sub_unassigned_objects[j]);
+    }
+  }
+}
+
+void HungarianMatcher::MatchInComponents(
+    const Eigen::MatrixXf& association_mat,
+    const std::vector<int>& track_component,
+    const std::vector<int>& object_component,
+    std::vector<std::pair<int, int>>* sub_assignments,
+    std::vector<int>* sub_unassigned_tracks,
+    std::vector<int>* sub_unassigned_objects) {
+  sub_assignments->clear();
+  sub_unassigned_tracks->clear();
+  sub_unassigned_objects->clear();
+  // A. failed to match if either components is empty
+  if (track_component.empty()) {
+    for (size_t i = 0; i < object_component.size(); ++i) {
+      sub_unassigned_objects->push_back(object_component[i]);
+    }
+  }
+  if (object_component.empty()) {
+    for (size_t i = 0; i < track_component.size(); ++i) {
+      sub_unassigned_tracks->push_back(track_component[i]);
+    }
+  }
+  if (track_component.empty() || object_component.empty()) return;
+  // B. if components perfectly match
+  if (track_component.size() == 1 && object_component.size() == 1) {
+    int track_id = track_component[0];
+    int object_id = object_component[0];
+    if (association_mat(track_id, object_id) <= s_match_distance_maximum_) {
+      sub_assignments->push_back(std::make_pair(track_id, object_id));
+    } else {
+      sub_unassigned_objects->push_back(object_id);
+      sub_unassigned_tracks->push_back(track_id);
+    }
+    return;
+  }
+  // C. multi object track match
+  std::vector<int> track_local2global;
+  std::vector<int> object_local2global;
+  std::vector<std::pair<int, int>> local_assignments;
+  std::vector<int> local_unassigned_tracks;
+  std::vector<int> local_unassigned_objects;
+  Eigen::MatrixXf local_association_mat(track_component.size(),
+                                        object_component.size());
+  track_local2global.resize(track_component.size());
+  object_local2global.resize(object_component.size());
+  for (size_t i = 0; i < track_component.size(); ++i) {
+    track_local2global[i] = track_component[i];
+  }
+  for (size_t i = 0; i < object_component.size(); ++i) {
+    object_local2global[i] = object_component[i];
+  }
+  for (size_t i = 0; i < track_component.size(); ++i) {
+    for (size_t j = 0; j < object_component.size(); ++j) {
+      int track_id = track_component[i];
+      int object_id = object_component[j];
+      local_association_mat(i, j) = association_mat(track_id, object_id);
+    }
+  }
+  local_assignments.resize(local_association_mat.cols());
+  local_unassigned_tracks.assign(local_association_mat.rows(), -1);
+  local_unassigned_objects.assign(local_association_mat.cols(), -1);
+  AssignObjectsToTracks(local_association_mat, s_match_distance_maximum_,
+                        &local_assignments, &local_unassigned_tracks,
+                        &local_unassigned_objects);
+  for (size_t i = 0; i < local_assignments.size(); ++i) {
+    int global_track_id = track_local2global[local_assignments[i].first];
+    int global_object_id = object_local2global[local_assignments[i].second];
+    sub_assignments->push_back(
+        std::make_pair(global_track_id, global_object_id));
+  }
+  for (size_t i = 0; i < local_unassigned_tracks.size(); ++i) {
+    int global_track_id = track_local2global[local_unassigned_tracks[i]];
+    sub_unassigned_tracks->push_back(global_track_id);
+  }
+  for (size_t i = 0; i < local_unassigned_objects.size(); ++i) {
+    int global_object_id = object_local2global[local_unassigned_objects[i]];
+    sub_unassigned_objects->push_back(global_object_id);
+  }
+}
+
+void HungarianMatcher::ComputeAssociateMatrix(
+    const std::vector<ObjectTrackPtr>& tracks,
+    const std::vector<Eigen::VectorXf>& tracks_predict,
+    const std::vector<std::shared_ptr<TrackedObject>>& new_objects,
+    Eigen::MatrixXf* association_mat) {
+  // Compute matrix of association distance
+  for (size_t i = 0; i < tracks.size(); ++i) {
+    for (size_t j = 0; j < new_objects.size(); ++j) {
+      (*association_mat)(i, j) = TrackObjectDistance::ComputeDistance(    //计算新出现的object和已跟踪对象之间的距离
+          tracks[i], tracks_predict[i], new_objects[j]);
+    }
+  }
+}
+
+void HungarianMatcher::ComputeConnectedComponents(
+    const Eigen::MatrixXf& association_mat, const float connected_threshold,
+    std::vector<std::vector<int>>* track_components,
+    std::vector<std::vector<int>>* object_components) {
+  // Compute connected components within given threshold
+  int no_track = association_mat.rows();
+  int no_object = association_mat.cols();
+  std::vector<std::vector<int>> nb_graph;
+  nb_graph.resize(no_track + no_object);
+  for (int i = 0; i < no_track; i++) {
+    for (int j = 0; j < no_object; j++) {
+      if (association_mat(i, j) <= connected_threshold) {
+        nb_graph[i].push_back(no_track + j);
+        nb_graph[j + no_track].push_back(i);
+      }
+    }
+  }
+
+  std::vector<std::vector<int>> components;
+  ConnectedComponentAnalysis(nb_graph, &components);
+  track_components->clear();
+  track_components->resize(components.size());
+  object_components->clear();
+  object_components->resize(components.size());
+  for (size_t i = 0; i < components.size(); i++) {
+    for (size_t j = 0; j < components[i].size(); j++) {
+      int id = components[i][j];
+      if (id < no_track) {
+        (*track_components)[i].push_back(id);
+      } else {
+        id -= no_track;
+        (*object_components)[i].push_back(id);
+      }
+    }
+  }
+}
+
+void HungarianMatcher::AssignObjectsToTracks(
+    const Eigen::MatrixXf& association_mat,
+    const double assign_distance_maximum,
+    std::vector<std::pair<int, int>>* assignments,
+    std::vector<int>* unassigned_tracks, std::vector<int>* unassigned_objects) {
+  // Assign objects to tracks with null tracks setup
+  std::vector<int> tracks_idx;
+  std::vector<int> objects_idx;
+  int no_track = association_mat.rows();
+  int no_object = association_mat.cols();
+  // build cost
+  std::vector<std::vector<double>> cost(no_track + no_object);
+  for (int i = 0; i < no_track; ++i) {
+    cost[i].resize(association_mat.cols());
+    for (int j = 0; j < association_mat.cols(); ++j) {
+      cost[i][j] = association_mat(i, j);
+    }
+  }
+  for (int i = 0; i < no_object; ++i) {
+    cost[i + no_track].resize(no_object);
+    for (int j = 0; j < no_object; ++j) {
+      if (j == i) {
+        cost[i + no_track][j] = assign_distance_maximum * 1.2f;
+      } else {
+        cost[i + no_track][j] = 999999.0f;
+      }
+    }
+  }
+
+  HungarianOptimizer hungarian_optimizer(cost);
+  hungarian_optimizer.minimize(&tracks_idx, &objects_idx);
+
+  int assignments_num = 0;
+  std::vector<bool> tracks_used(no_track + no_object, false);
+  std::vector<bool> objects_used(no_object, false);
+  for (size_t i = 0; i < tracks_idx.size(); ++i) {
+    if (tracks_idx[i] < 0 || tracks_idx[i] >= no_track || objects_idx[i] < 0 ||
+        objects_idx[i] >= no_object) {
+      continue;
+    }
+    if (association_mat(tracks_idx[i], objects_idx[i]) <
+        assign_distance_maximum) {
+      (*assignments)[assignments_num++] =
+          std::make_pair(tracks_idx[i], objects_idx[i]);
+      tracks_used[tracks_idx[i]] = true;
+      objects_used[objects_idx[i]] = true;
+    }
+  }
+  assignments->resize(assignments_num);
+  unassigned_tracks->resize(association_mat.rows());
+  int unassigned_tracks_num = 0;
+  for (int i = 0; i < association_mat.rows(); ++i) {
+    if (tracks_used[i] == false) {
+      (*unassigned_tracks)[unassigned_tracks_num++] = i;
+    }
+  }
+  unassigned_tracks->resize(unassigned_tracks_num);
+  unassigned_objects->resize(association_mat.cols());
+  int unassigned_objects_num = 0;
+  for (int i = 0; i < association_mat.cols(); ++i) {
+    if (objects_used[i] == false) {
+      (*unassigned_objects)[unassigned_objects_num++] = i;
+    }
+  }
+  unassigned_objects->resize(unassigned_objects_num);
+}
+
+}  // namespace perception
+}  // namespace apollo
diff --git a/modules/perception/lidar_pointcloud_tracking/hungarian_matcher.h b/modules/perception/lidar_pointcloud_tracking/hungarian_matcher.h
new file mode 100644
index 0000000..febb0c8
--- /dev/null
+++ b/modules/perception/lidar_pointcloud_tracking/hungarian_matcher.h
@@ -0,0 +1,101 @@
+#pragma once
+
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "modules/perception/lidar_pointcloud_tracking/base_matcher.h"
+
+namespace apollo {
+namespace perception {
+
+class HungarianMatcher : public BaseMatcher {
+ public:
+  HungarianMatcher() {}
+  ~HungarianMatcher() {}
+
+  // @brief set match distance maximum for matcher
+  // @param[IN] match_distance_maximum: match distance maximum
+  // @return true if set successfully, otherwise return false
+  static bool SetMatchDistanceMaximum(const float match_distance_maximum);
+
+  // @brief match detected objects to tracks
+  // @param[IN] objects: new detected objects for matching
+  // @param[IN] tracks: maintaining tracks for matching
+  // @param[IN] tracks_predict: predicted state of maintained tracks
+  // @param[OUT] assignments: assignment pair of object & track
+  // @param[OUT] unassigned_tracks: tracks without matched object
+  // @param[OUT] unassigned_objects: objects without matched track
+  // @return nothing
+  void Match(std::vector<std::shared_ptr<TrackedObject>>* objects,
+             const std::vector<ObjectTrackPtr>& tracks,
+             const std::vector<Eigen::VectorXf>& tracks_predict,
+             std::vector<std::pair<int, int>>* assignments,
+             std::vector<int>* unassigned_tracks,
+             std::vector<int>* unassigned_objects);
+
+  // @brief match detected objects to tracks in component level
+  // @param[IN] association_mat: association matrix of all objects to tracks
+  // @param[IN] track_component: component of track
+  // @param[IN] object_component: component of object
+  // @param[OUT] sub_assignments: component assignment pair of object & track
+  // @param[OUT] sub_unassigned_tracks: component tracks not matched
+  // @param[OUT] sub_unassgined_objects: component objects not matched
+  // @return nothing
+  void MatchInComponents(const Eigen::MatrixXf& association_mat,
+                         const std::vector<int>& track_component,
+                         const std::vector<int>& obj_component,
+                         std::vector<std::pair<int, int>>* sub_assignments,
+                         std::vector<int>* sub_unassigned_tracks,
+                         std::vector<int>* sub_unassigned_objects);
+
+  std::string Name() const { return "HungarianMatcher"; }
+
+ protected:
+  // @brief compute association matrix
+  // @param[IN] tracks: maintained tracks for matching
+  // @param[IN] tracks_predict: predicted states of maintained tracks
+  // @param[IN] new_objects: recently detected objects
+  // @param[OUT] association_mat: matrix of association distance
+  // @return nothing
+  void ComputeAssociateMatrix(
+      const std::vector<ObjectTrackPtr>& tracks,
+      const std::vector<Eigen::VectorXf>& tracks_predict,
+      const std::vector<std::shared_ptr<TrackedObject>>& new_objects,
+      Eigen::MatrixXf* association_mat);
+
+  // @brief compute connected components within given threshold
+  // @param[IN] association_mat: matrix of association distance
+  // @param[IN] connected_threshold: threshold of connected components
+  // @param[OUT] track_components: connected objects of given tracks
+  // @param[OUT] obj_components: connected tracks of given objects
+  // @return nothing
+  void ComputeConnectedComponents(
+      const Eigen::MatrixXf& association_mat, const float connected_threshold,
+      std::vector<std::vector<int>>* track_components,
+      std::vector<std::vector<int>>* obj_components);
+
+  // @brief assign objects to tracks using components
+  // @param[IN] association_mat: matrix of association distance
+  // @param[IN] assign_distance_maximum: threshold distance of assignment
+  // @param[OUT] assignments: assignment pair of matched object & track
+  // @param[OUT] unassigned_tracks: tracks without matched object
+  // @param[OUT] unassigned_objects: objects without matched track
+  // @return nothing
+  void AssignObjectsToTracks(const Eigen::MatrixXf& association_mat,
+                             const double assign_distance_maximum,
+                             std::vector<std::pair<int, int>>* assignments,
+                             std::vector<int>* unassigned_tracks,
+                             std::vector<int>* unassigned_objects);
+
+ private:
+  // threshold of matching
+  static float s_match_distance_maximum_;
+
+  //DISALLOW_COPY_AND_ASSIGN(HungarianMatcher);
+};  // class HmMatcher
+
+}  // namespace perception
+}  // namespace apollo
+
diff --git a/modules/perception/lidar_pointcloud_tracking/kalman_filter.cc b/modules/perception/lidar_pointcloud_tracking/kalman_filter.cc
new file mode 100644
index 0000000..36acfd0
--- /dev/null
+++ b/modules/perception/lidar_pointcloud_tracking/kalman_filter.cc
@@ -0,0 +1,534 @@
+#include "modules/perception/lidar_pointcloud_tracking/kalman_filter.h"
+
+#include <algorithm>
+
+#include "modules/perception/lidar_pointcloud_tracking/common/geometry_util.h"
+
+namespace apollo {
+namespace perception {
+
+bool KalmanFilter::s_use_adaptive_ = true;
+double KalmanFilter::s_association_score_maximum_ = 1.0;
+Eigen::Matrix3d KalmanFilter::s_propagation_noise_ =
+    10 * Eigen::Matrix3d::Identity();
+double KalmanFilter::s_measurement_noise_ = 0.4;
+double KalmanFilter::s_initial_velocity_noise_ = 5;
+double KalmanFilter::s_breakdown_threshold_maximum_ = 10;
+size_t KalmanFilter::s_measurement_cached_history_size_minimum_ = 3;
+size_t KalmanFilter::s_measurement_cached_history_size_maximum_ = 6;
+
+void KalmanFilter::SetUseAdaptive(const bool& use_adaptive) {
+  s_use_adaptive_ = use_adaptive;
+  //AINFO << "use adaptive of KalmanFilter is " << s_use_adaptive_;
+}
+
+bool KalmanFilter::SetAssociationScoreMaximum(
+    const double association_score_maximum) {
+  if (association_score_maximum > 0) {
+    s_association_score_maximum_ = association_score_maximum;
+    /*AINFO << "association score maximum of KalmanFilter is "
+          << s_association_score_maximum_;*/
+    return true;
+  }
+  //AERROR << "invalid association score maximum of KalmanFilter!";
+  return false;
+}
+
+bool KalmanFilter::SetBreakdownThresholdMaximum(
+    const double breakdown_threshold_maximum) {
+  if (breakdown_threshold_maximum > 0) {
+    s_breakdown_threshold_maximum_ = breakdown_threshold_maximum;
+    /*AINFO << "breakdown threshold maximum of KalmanFilter is "
+          << s_breakdown_threshold_maximum_;*/
+    return true;
+  }
+  //AERROR << "invalid breakdown threshold maximum of KalmanFilter!";
+  return false;
+}
+
+bool KalmanFilter::InitParams(const double measurement_noise,
+                              const double initial_velocity_noise,
+                              const double xy_propagation_noise,
+                              const double z_propagation_noise) {
+  if (measurement_noise < 0) {
+    std::cout << "invalid measurement noise of KalmanFilter!" << std::endl;
+    return false;
+  }
+  if (initial_velocity_noise < 0) {
+    std::cout << "invalid initial velocity noise of KalmanFilter!" << std::endl;
+    return false;
+  }
+  if (xy_propagation_noise < 0) {
+    std::cout << "invalid xy propagation noise of KalmanFilter!" << std::endl;
+    return false;
+  }
+  if (z_propagation_noise < 0) {
+    std::cout << "invalid z propagation noise of KalmanFilter!" << std::endl;
+    return false;
+  }
+  s_measurement_noise_ = measurement_noise;
+  s_initial_velocity_noise_ = initial_velocity_noise;
+  s_propagation_noise_(0, 0) = xy_propagation_noise;
+  s_propagation_noise_(1, 1) = xy_propagation_noise;
+  s_propagation_noise_(2, 2) = z_propagation_noise;
+  std::cout << "measurement noise of KalmanFilter is " << s_measurement_noise_ << std::endl;
+  std::cout << "initial velocity noise of KalmanFilter is "
+        << s_initial_velocity_noise_ << std::endl;
+  std::cout << "propagation noise of KalmanFilter is\n" << s_propagation_noise_ << std::endl;
+  return true;
+}
+
+KalmanFilter::KalmanFilter() {
+  name_ = "KalmanFilter";
+  age_ = 0;
+  measurement_cached_history_size_ = s_measurement_cached_history_size_minimum_;
+  s_measurement_cached_history_size_maximum_ =
+      s_measurement_cached_history_size_minimum_;
+  velocity_covariance_ =
+      s_initial_velocity_noise_ * Eigen::Matrix3d::Identity();
+  // states
+  update_quality_ = 1.0;
+  breakdown_threshold_ = s_breakdown_threshold_maximum_;
+  belief_velocity_ = Eigen::Vector3d::Zero();
+  belief_acceleration_gain_ = Eigen::Vector3d::Zero();
+  belief_acceleration_ = Eigen::Vector3d::Zero();
+}
+
+void KalmanFilter::Initialize(const Eigen::Vector3f& anchor_point,
+                              const Eigen::Vector3f& velocity) {
+  update_quality_ = 1.0;
+  breakdown_threshold_ = s_breakdown_threshold_maximum_;
+  belief_anchor_point_ = anchor_point.cast<double>();
+  belief_velocity_ = velocity.cast<double>();
+  belief_acceleration_gain_ = Eigen::Vector3d::Zero();
+  belief_acceleration_ = Eigen::Vector3d::Zero();
+}
+
+Eigen::VectorXf KalmanFilter::Predict(const double time_diff) {
+  // Compute predict states
+  Eigen::VectorXf predicted_state;
+  predicted_state.resize(6);
+  predicted_state(0) =
+      belief_anchor_point_(0) + belief_velocity_(0) * time_diff;
+  predicted_state(1) =
+      belief_anchor_point_(1) + belief_velocity_(1) * time_diff;
+  predicted_state(2) =
+      belief_anchor_point_(2) + belief_velocity_(2) * time_diff;
+  predicted_state(3) = belief_velocity_(0);
+  predicted_state(4) = belief_velocity_(1);
+  predicted_state(5) = belief_velocity_(2);
+  // Compute predicted covariance
+  Propagate(time_diff);
+  return predicted_state;
+}
+
+void KalmanFilter::UpdateWithObject(
+    const std::shared_ptr<TrackedObject>& new_object,
+    const std::shared_ptr<TrackedObject>& old_object, const double time_diff) {
+  if (time_diff <= DBL_EPSILON) {
+    std::cout << "Time diff is too limited to updating KalmanFilter!" << std::endl;
+    return;
+  }
+  // A. Compute update quality if needed
+  if (s_use_adaptive_) {
+    ComputeUpdateQuality(new_object, old_object);//计算更新力度(更新评分)
+  } else {
+    update_quality_ = 1.0;
+  }
+
+  // B. Compute measurements
+  Eigen::Vector3f measured_anchor_point = new_object->anchor_point;
+  Eigen::Vector3f measured_velocity =
+      ComputeMeasuredVelocity(new_object, old_object, time_diff);//计算速度
+
+  // debug: compute measured acceleration
+  Eigen::Vector3f measured_acceleration =
+      ComputeMeasuredAcceleration(measured_velocity, time_diff);//计算加速度
+
+  // C. Update model
+  UpdateVelocity(measured_anchor_point, measured_velocity, time_diff);//更新卡尔曼滤波模型中速度的最优值(估算最优速度)
+  UpdateAcceleration(measured_acceleration);                          //更新加速度
+
+  // Cache measurement history
+  if (history_measured_velocity_.size() >= measurement_cached_history_size_) {
+    history_measured_velocity_.pop_front();
+    history_time_diff_.pop_front();
+  }
+  history_measured_velocity_.push_back(measured_velocity);
+  history_time_diff_.push_back(time_diff);
+
+  EvaluateOnlineCovariance();
+
+  age_ += 1;
+}
+
+void KalmanFilter::UpdateWithoutObject(const double time_diff) {
+  // Only update belief anchor point
+  belief_anchor_point_ += belief_velocity_ * time_diff;
+  age_ += 1;
+}
+
+void KalmanFilter::GetState(Eigen::Vector3f* anchor_point,
+                            Eigen::Vector3f* velocity) {
+  (*anchor_point) = belief_anchor_point_.cast<float>();
+  (*velocity) = belief_velocity_.cast<float>();
+}
+
+void KalmanFilter::GetState(Eigen::Vector3f* anchor_point,
+                            Eigen::Vector3f* velocity,
+                            Eigen::Vector3f* acceleration) {
+  (*anchor_point) = belief_anchor_point_.cast<float>();
+  (*velocity) = belief_velocity_.cast<float>();
+  (*acceleration) = belief_acceleration_.cast<float>();
+}
+
+void KalmanFilter::GetAccelerationGain(Eigen::Vector3f* acceleration_gain) {
+  (*acceleration_gain) = belief_acceleration_gain_.cast<float>();
+}
+
+void KalmanFilter::Propagate(const double time_diff) {
+  // Only propagate tracked motion
+  if (age_ <= 0) {
+    return;
+  }
+  velocity_covariance_ += s_propagation_noise_ * time_diff * time_diff;
+}
+//计算当前时刻的速度，相当于卡尔曼滤波中的观测量，共有三个速度：
+Eigen::VectorXf KalmanFilter::ComputeMeasuredVelocity(
+    const std::shared_ptr<TrackedObject>& new_object,
+    const std::shared_ptr<TrackedObject>& old_object, const double time_diff) {
+  // Compute 2D velocity measurement for filtering
+  // Obtain robust measurement via observation redundancy
+
+  // Observation I: anchor point velocity measurement
+  Eigen::Vector3f measured_anchor_point_velocity =
+      ComputeMeasuredAnchorPointVelocity(new_object, old_object, time_diff);//anchor point移动速度
+  // Observation II: bbox center velocity measurement
+  Eigen::Vector3f measured_bbox_center_velocity =
+      ComputeMeasuredBboxCenterVelocity(new_object, old_object, time_diff);//计算标定框中心移动速度
+  // Observation III: bbox corner velocity measurement
+  Eigen::Vector3f measured_bbox_corner_velocity =
+      ComputeMeasuredBboxCornerVelocity(new_object, old_object, time_diff);//计算标定框四个角点的速度
+
+  std::vector<Eigen::Vector3f> measured_candidates;
+  measured_candidates.push_back(measured_anchor_point_velocity);
+  measured_candidates.push_back(measured_bbox_center_velocity);
+  measured_candidates.push_back(measured_bbox_corner_velocity);
+  Eigen::Vector3f measured_velocity =
+      SelectMeasuredVelocity(measured_candidates);//选择增益最小的速度，增益＝当前速度减去上时刻最优速度（belief_velocity_）
+  return measured_velocity;
+}
+//计算点云重心移动速度
+Eigen::VectorXf KalmanFilter::ComputeMeasuredAnchorPointVelocity(
+    const std::shared_ptr<TrackedObject>& new_object,
+    const std::shared_ptr<TrackedObject>& old_object, const double time_diff) {
+  // Compute 2D anchor point velocity measurement
+  Eigen::Vector3f measured_anchor_point_velocity =
+      new_object->anchor_point - old_object->anchor_point;
+  measured_anchor_point_velocity /= time_diff;
+  measured_anchor_point_velocity(2) = 0.0;
+  return measured_anchor_point_velocity;
+}
+//计算标定框重心速度
+Eigen::VectorXf KalmanFilter::ComputeMeasuredBboxCenterVelocity(
+    const std::shared_ptr<TrackedObject>& new_object,
+    const std::shared_ptr<TrackedObject>& old_object, const double time_diff) {
+  // Compute 2D bbox center velocity measurement
+  Eigen::Vector3d old_dir = old_object->direction.cast<double>();
+  Eigen::Vector3d old_size = old_object->size.cast<double>();
+  Eigen::Vector3d old_center = old_object->center.cast<double>();
+  Eigen::Vector3d new_size = old_size;
+  Eigen::Vector3d new_center = old_center;
+  ComputeBboxSizeCenter<pcl_util::Point>(new_object->object_ptr->cloud, old_dir,
+                                         &new_size, &new_center);
+  Eigen::Vector3f measured_bbox_center_velocity_with_old_dir =
+      (new_center - old_center).cast<float>();
+  measured_bbox_center_velocity_with_old_dir /= time_diff;
+  measured_bbox_center_velocity_with_old_dir(2) = 0.0;
+  Eigen::Vector3f measured_bbox_center_velocity =
+      measured_bbox_center_velocity_with_old_dir;
+  Eigen::Vector3f project_dir =
+      new_object->anchor_point - old_object->anchor_point;
+  if (measured_bbox_center_velocity.dot(project_dir) <= 0) {//标定框中心速度和点云重心速度进行点乘，如果小于０，意味着夹角大于９０度，即方向相反
+    measured_bbox_center_velocity = Eigen::Vector3f::Zero();//此时表示有干扰，中心速度置为０
+  }
+  return measured_bbox_center_velocity;
+}
+//计算标定框四个角点的速度
+Eigen::VectorXf KalmanFilter::ComputeMeasuredBboxCornerVelocity(
+    const std::shared_ptr<TrackedObject>& new_object,
+    const std::shared_ptr<TrackedObject>& old_object, const double time_diff) {
+  // Compute 2D bbox corner velocity measurement
+  Eigen::Vector3f project_dir =
+      new_object->anchor_point - old_object->anchor_point;
+  project_dir.normalize();
+  Eigen::Vector3d old_dir = old_object->direction.cast<double>();
+  Eigen::Vector3d old_size = old_object->size.cast<double>();
+  Eigen::Vector3d old_center = old_object->center.cast<double>();
+  Eigen::Vector3d new_size = old_size;
+  Eigen::Vector3d new_center = old_center;
+  ComputeBboxSizeCenter<pcl_util::Point>(new_object->object_ptr->cloud, old_dir,
+                                         &new_size, &new_center);
+  Eigen::Vector3d ortho_old_dir(-old_dir(1), old_dir(0), 0.0);
+
+  Eigen::Vector3d old_bbox_corner_list[4];
+  Eigen::Vector3d new_bbox_corner_list[4];
+  Eigen::Vector3d old_bbox_corner = old_center + old_dir * old_size(0) * 0.5 +
+                                    ortho_old_dir * old_size(1) * 0.5;
+  Eigen::Vector3d new_bbox_corner = new_center + old_dir * new_size(0) * 0.5 +
+                                    ortho_old_dir * new_size(1) * 0.5;
+  old_bbox_corner_list[0] = old_bbox_corner;
+  new_bbox_corner_list[0] = new_bbox_corner;
+  old_bbox_corner = old_center - old_dir * old_size(0) * 0.5 +
+                    ortho_old_dir * old_size(1) * 0.5;
+  new_bbox_corner = new_center - old_dir * new_size(0) * 0.5 +
+                    ortho_old_dir * new_size(1) * 0.5;
+  old_bbox_corner_list[1] = old_bbox_corner;
+  new_bbox_corner_list[1] = new_bbox_corner;
+  old_bbox_corner = old_center + old_dir * old_size(0) * 0.5 -
+                    ortho_old_dir * old_size(1) * 0.5;
+  new_bbox_corner = new_center + old_dir * new_size(0) * 0.5 -
+                    ortho_old_dir * new_size(1) * 0.5;
+  old_bbox_corner_list[2] = old_bbox_corner;
+  new_bbox_corner_list[2] = new_bbox_corner;
+  old_bbox_corner = old_center - old_dir * old_size(0) * 0.5 -
+                    ortho_old_dir * old_size(1) * 0.5;
+  new_bbox_corner = new_center - old_dir * new_size(0) * 0.5 -
+                    ortho_old_dir * new_size(1) * 0.5;
+  old_bbox_corner_list[3] = old_bbox_corner;
+  new_bbox_corner_list[3] = new_bbox_corner;
+
+  Eigen::Vector3f min_bbox_corner_velocity_on_project_dir =
+      Eigen::Vector3f(100, 100, 0);
+  float min_bbox_corner_velocity_on_project_dir_gain_norm =
+      min_bbox_corner_velocity_on_project_dir.norm();
+  for (size_t i = 0; i < 4; ++i) {
+    old_bbox_corner = old_bbox_corner_list[i];
+    new_bbox_corner = new_bbox_corner_list[i];
+    Eigen::Vector3f bbox_corner_velocity =
+        ((new_bbox_corner - old_bbox_corner) / time_diff).cast<float>();
+    float bbox_corner_velocity_project_dir_inner_product =
+        bbox_corner_velocity(0) * project_dir(0) +
+        bbox_corner_velocity(1) * project_dir(1);
+    float bbox_corner_velocity_project_dir_angle_cos =
+        bbox_corner_velocity_project_dir_inner_product /
+        (bbox_corner_velocity.head(2).norm() * project_dir.head(2).norm());
+    float bbox_corner_velocity_norm_on_project_dir =
+        bbox_corner_velocity.head(2).norm() *
+        bbox_corner_velocity_project_dir_angle_cos;
+    Eigen::Vector3f bbox_corner_velocity_on_project_dir =
+        project_dir * bbox_corner_velocity_norm_on_project_dir;
+    bbox_corner_velocity_on_project_dir(2) = 0.0;
+    if (bbox_corner_velocity_on_project_dir(0) * project_dir(0) <= 0) {
+      bbox_corner_velocity_on_project_dir = Eigen::Vector3f::Zero();
+    }
+    Eigen::Vector3f bbox_corner_velocity_on_project_dir_gain =
+        bbox_corner_velocity_on_project_dir - belief_velocity_.cast<float>();
+    if (bbox_corner_velocity_on_project_dir_gain.norm() <
+        min_bbox_corner_velocity_on_project_dir_gain_norm) {
+      min_bbox_corner_velocity_on_project_dir =
+          bbox_corner_velocity_on_project_dir;
+      min_bbox_corner_velocity_on_project_dir_gain_norm =
+          bbox_corner_velocity_on_project_dir_gain.norm();
+    }
+  }
+  return min_bbox_corner_velocity_on_project_dir;
+}
+
+Eigen::Vector3f KalmanFilter::SelectMeasuredVelocity(
+    const std::vector<Eigen::Vector3f>& candidates) {
+  // Select measured velocity among candidates
+  // Strategy I: according motion consistency
+  return SelectMeasuredVelocityAccordingMotionConsistency(candidates);
+}
+//查找增益最小的速度
+Eigen::Vector3f KalmanFilter::SelectMeasuredVelocityAccordingMotionConsistency(
+    const std::vector<Eigen::Vector3f>& candidates) {
+  // Select measured velocity among candidates according motion consistency
+  if (candidates.size() <= 0) return Eigen::Vector3f::Zero();
+  Eigen::Vector3f measured_velocity = candidates[0];
+  Eigen::Vector3f measured_velocity_gain =
+      measured_velocity - belief_velocity_.cast<float>();
+  for (size_t i = 1; i < candidates.size(); ++i) {//查找candidate_velocity_gain最小值
+    Eigen::Vector3f candidate_velocity_gain =
+        candidates[i] - belief_velocity_.cast<float>();
+    if (candidate_velocity_gain.norm() < measured_velocity_gain.norm()) {
+      measured_velocity = candidates[i];
+      measured_velocity_gain = candidate_velocity_gain;
+    }
+  }
+  return measured_velocity;
+}
+
+void KalmanFilter::UpdateVelocity(const Eigen::VectorXf& measured_anchor_point,//卡尔曼速度更新
+                                  const Eigen::VectorXf& measured_velocity,
+                                  const double time_diff) {
+  // Compute Kalman gain
+  Eigen::Matrix3d mat_c = Eigen::Matrix3d::Identity();
+  Eigen::Matrix3d mat_q = s_measurement_noise_ * Eigen::Matrix3d::Identity();
+  Eigen::Matrix3d mat_k =
+      velocity_covariance_ * mat_c.transpose() *
+      (mat_c * velocity_covariance_ * mat_c.transpose() + mat_q).inverse();
+
+  // Compute posterior belief
+  Eigen::Vector3d measured_anchor_point_d =
+      measured_anchor_point.cast<double>();
+  Eigen::Vector3d measured_velocity_d = measured_velocity.cast<double>();
+  Eigen::Vector3d priori_velocity =
+      belief_velocity_ + belief_acceleration_gain_ * time_diff;
+  Eigen::Vector3d velocity_gain =
+      mat_k * (measured_velocity_d - mat_c * priori_velocity);
+
+  // Breakdown
+  ComputeBreakdownThreshold();
+  if (velocity_gain.norm() > breakdown_threshold_) {
+    velocity_gain.normalize();
+    velocity_gain *= breakdown_threshold_;
+  }
+
+  belief_anchor_point_ = measured_anchor_point_d;
+  belief_velocity_ = priori_velocity + velocity_gain;
+  belief_acceleration_gain_ = velocity_gain / time_diff;
+
+  // Adaptive
+  if (s_use_adaptive_) {
+    belief_velocity_ -= belief_acceleration_gain_ * time_diff;
+    belief_acceleration_gain_ *= update_quality_;
+    belief_velocity_ += belief_acceleration_gain_ * time_diff;
+  }
+
+  // Compute posterior covariance         
+  velocity_covariance_ =
+      (Eigen::Matrix3d::Identity() - mat_k * mat_c) * velocity_covariance_;//得到最优协方差矩阵
+}
+
+void KalmanFilter::ComputeUpdateQuality(
+    const std::shared_ptr<TrackedObject>& new_object,
+    const std::shared_ptr<TrackedObject>& old_object) {
+  // Compute update quality for adaptive filtering
+  // Strategy A: according to association score
+  float update_quality_according_association_score =
+      ComputeUpdateQualityAccordingAssociationScore(new_object);
+  // Strategy B: according to point number change
+  float update_quality_according_point_num_change =
+      ComputeUpdateQualityAccordingPointNumChange(new_object, old_object);
+  // Pick a smaller one to control possible filter distraction of noises
+  update_quality_ = update_quality_according_association_score;
+  if (update_quality_according_association_score >
+      update_quality_according_point_num_change) {
+    update_quality_ = update_quality_according_point_num_change;
+  }
+}
+
+float KalmanFilter::ComputeUpdateQualityAccordingAssociationScore(
+    const std::shared_ptr<TrackedObject>& new_object) {
+  // Compute update quality according association score
+  float association_score = new_object->association_score;
+  float update_quality = 1;
+  if (s_association_score_maximum_ == 0) {
+    return update_quality;
+  }
+  update_quality = 1 - (association_score / s_association_score_maximum_);
+  update_quality = update_quality > 1 ? 1 : update_quality;
+  update_quality = update_quality < 0 ? 0 : update_quality;
+  update_quality = update_quality * update_quality;
+  return update_quality;
+}
+
+float KalmanFilter::ComputeUpdateQualityAccordingPointNumChange(
+    const std::shared_ptr<TrackedObject>& new_object,
+    const std::shared_ptr<TrackedObject>& old_object) {
+  // Compute update quality according point number change
+  int new_pt_num = new_object->object_ptr->cloud->size();
+  int old_pt_num = old_object->object_ptr->cloud->size();
+  if (new_pt_num <= 0 || old_pt_num <= 0) {
+    return 0;
+  }
+  float update_quality =
+      1 - std::abs(new_pt_num - old_pt_num) / std::max(new_pt_num, old_pt_num);
+  return update_quality;
+}
+
+void KalmanFilter::ComputeBreakdownThreshold() {
+  // Set breakdown threshold as 2 times of velocity covariance, and smooth
+  // it somehow.
+  breakdown_threshold_ += velocity_covariance_(0, 0) * 2;
+  breakdown_threshold_ /= 2;
+  if (breakdown_threshold_ > s_breakdown_threshold_maximum_) {
+    breakdown_threshold_ = s_breakdown_threshold_maximum_;
+  }
+}
+
+Eigen::Vector3f KalmanFilter::ComputeMeasuredAcceleration(
+    const Eigen::Vector3f& measured_velocity, const double time_diff) {
+  if (history_measured_velocity_.size() < 3) {
+    return Eigen::Vector3f::Zero();
+  }
+  int history_index = history_measured_velocity_.size() - 3;
+  Eigen::Vector3f history_measurement =
+      history_measured_velocity_[history_index];
+  double accumulated_time_diff = time_diff;
+  for (size_t i = history_index + 1; i < history_measured_velocity_.size();
+       ++i) {
+    accumulated_time_diff += history_time_diff_[i];
+  }
+  Eigen::Vector3f measured_acceleration =
+      measured_velocity - history_measurement;
+  measured_acceleration /= accumulated_time_diff;
+  return measured_acceleration;
+}
+
+void KalmanFilter::EvaluateOnlineCovariance() {
+  Eigen::Matrix3d online_covariance = Eigen::Matrix3d::Zero();
+  int evaluate_window = history_measured_velocity_.size() >
+                                s_measurement_cached_history_size_maximum_
+                            ? history_measured_velocity_.size()
+                            : s_measurement_cached_history_size_maximum_;
+  for (int i = 0; i < evaluate_window; ++i) {
+    int history_index = history_measured_velocity_.size() - i - 1;
+    Eigen::Vector3d velocity_resisual = Eigen::Vector3d(5, 5, 0);
+    if (history_index >= 0) {
+      velocity_resisual =
+          history_measured_velocity_[history_index].cast<double>() -
+          belief_velocity_;
+    }
+    online_covariance(0, 0) += velocity_resisual(0) * velocity_resisual(0);
+    online_covariance(0, 1) += velocity_resisual(0) * velocity_resisual(1);
+    online_covariance(1, 0) += velocity_resisual(1) * velocity_resisual(0);
+    online_covariance(1, 1) += velocity_resisual(1) * velocity_resisual(1);
+  }
+  online_velocity_covariance_ = online_covariance / evaluate_window;
+}
+
+void KalmanFilter::GetOnlineCovariance(Eigen::Matrix3f* online_covariance) {
+  *online_covariance = online_velocity_covariance_.cast<float>();
+}
+
+void KalmanFilter::UpdateAcceleration(
+    const Eigen::VectorXf& measured_acceleration) {
+  // Compute Kalman gain
+  Eigen::Matrix3d mat_c = Eigen::Matrix3d::Identity();
+  Eigen::Matrix3d mat_q =
+      s_measurement_noise_ * Eigen::Matrix3d::Identity() * 3;
+  Eigen::Matrix3d mat_k =
+      velocity_covariance_ * mat_c.transpose() *
+      (mat_c * velocity_covariance_ * mat_c.transpose() + mat_q).inverse();
+  // Compute posterior belief
+  Eigen::Vector3d measured_acceleration_d =
+      measured_acceleration.cast<double>();
+  Eigen::Vector3d acceleration_gain =
+      mat_k * (measured_acceleration_d - mat_c * belief_acceleration_);
+  // Adaptive
+  acceleration_gain *= update_quality_;
+  // Breakdown
+  float breakdown_threshold = 2;
+  if (acceleration_gain.norm() > breakdown_threshold) {
+    acceleration_gain.normalize();
+    acceleration_gain *= breakdown_threshold;
+  }
+  // simple add
+  belief_acceleration_ += acceleration_gain;
+}
+
+}  // namespace perception
+}  // namespace apollo
diff --git a/modules/perception/lidar_pointcloud_tracking/kalman_filter.h b/modules/perception/lidar_pointcloud_tracking/kalman_filter.h
new file mode 100644
index 0000000..07b750f
--- /dev/null
+++ b/modules/perception/lidar_pointcloud_tracking/kalman_filter.h
@@ -0,0 +1,224 @@
+#pragma once
+
+#include <deque>
+#include <memory>
+#include <vector>
+
+#include "modules/perception/lidar_pointcloud_tracking/common/object.h"
+#include "modules/perception/lidar_pointcloud_tracking/base_filter.h"
+
+namespace apollo {
+namespace perception {
+
+class KalmanFilter : public BaseFilter {
+ public:
+  EIGEN_MAKE_ALIGNED_OPERATOR_NEW
+  KalmanFilter();
+  ~KalmanFilter() {}
+
+  // @brief set use adaptive for all the filter objects
+  // @param[IN] use_adaptive: flag of whether use adaptive version or not
+  // @return nothing
+  static void SetUseAdaptive(const bool& use_adaptive);
+
+  // @brief set association score maximum for computing update quality
+  // @param[IN] association_score_maximum: association score maximum
+  // @return true if set successfully, otherwise return false
+  static bool SetAssociationScoreMaximum(
+      const double association_score_maximum);
+
+  // @brief set breakdown threshold maximum for computing breakdown ratio
+  // @param[IN] breakdown_threshold_maximum: breakdown threshold maximum
+  // @return true if set successfully, otherwise return false
+  static bool SetBreakdownThresholdMaximum(
+      const double breakdown_threshold_maximum);
+
+  // @brief init initialize parameters for Kalman filter
+  // @param[IN] measurement_noise: noise of measurement
+  // @param[IN] initial_velocity_noise: initial uncertainty of velocity
+  // @param[IN] xy_propagation_noise: propagation uncertainty of xy
+  // @param[IN] z_propagation_noise: propagation uncertainty of z
+  // @return true if set successfully, otherwise return false
+  static bool InitParams(const double measurement_noise,
+                         const double initial_velocity_noise,
+                         const double xy_propagation_noise,
+                         const double z_propagation_noise);
+
+  // @brief initialize the state of filter
+  // @param[IN] anchor_point: initial anchor point for filtering
+  // @param[IN] velocity: initial velocity for filtering
+  // @return nothing
+  void Initialize(const Eigen::Vector3f& anchor_point,
+                  const Eigen::Vector3f& velocity);
+
+  // @brief predict the state of filter
+  // @param[IN] time_diff: time interval for predicting
+  // @return predicted states of filtering
+  Eigen::VectorXf Predict(const double time_diff);
+
+  // @brief update filter with object
+  // @param[IN] new_object: new object for current updating
+  // @param[IN] old_object: old object for last updating
+  // @param[IN] time_diff: time interval from last updating
+  // @return nothing
+  void UpdateWithObject(const std::shared_ptr<TrackedObject>& new_object,
+                        const std::shared_ptr<TrackedObject>& old_object,
+                        const double time_diff);
+
+  // @brief update filter without object
+  // @param[IN] time_diff: time interval from last updating
+  // @return nothing
+  void UpdateWithoutObject(const double time_diff);
+
+  // @brief get state of filter
+  // @param[OUT] anchor_point: anchor point of current state
+  // @param[OUT] velocity: velocity of current state
+  // @return nothing
+  void GetState(Eigen::Vector3f* anchor_point, Eigen::Vector3f* velocity);
+
+  // @brief get state of filter with acceleration
+  // @param[OUT] anchor_point: anchor point of current state
+  // @param[OUT] velocity: velocity of current state
+  // @param[OUT] velocity_acceleration: acceleration of current state
+  // @return nothing
+  void GetState(Eigen::Vector3f* anchor_point, Eigen::Vector3f* velocity,
+                Eigen::Vector3f* acceleration);
+
+  void GetAccelerationGain(Eigen::Vector3f* acceleration_gain);
+
+ protected:
+  // @brief propagate covariance of filter
+  // @param[IN] time_diff: time interval from last updating
+  // @return nothing
+  void Propagate(const double time_diff);
+
+  // @brief compute measured velocity
+  // @param[IN] new_object: new object for current updating
+  // @param[IN] old_object: old object for last updating
+  // @param[IN] time_diff: time interval from last updating
+  // @return measured velocity
+  Eigen::VectorXf ComputeMeasuredVelocity(
+      const std::shared_ptr<TrackedObject>& new_object,
+      const std::shared_ptr<TrackedObject>& old_object, const double time_diff);
+
+  // @brief compute measured anchor point velocity
+  // @param[IN] new_object: new object for current updating
+  // @param[IN] old_object: old object for last updating
+  // @param[IN] time_diff: time interval from last updating
+  // @return measured anchor point velocity
+  Eigen::VectorXf ComputeMeasuredAnchorPointVelocity(
+      const std::shared_ptr<TrackedObject>& new_object,
+      const std::shared_ptr<TrackedObject>& old_object, const double time_diff);
+
+  // @brief compute measured bbox center velocity
+  // @param[IN] new_object: new object for current updating
+  // @param[IN] old_object: old object for last updating
+  // @param[IN] time_diff: time interval from last updating
+  // @return measured bbox center velocity
+  Eigen::VectorXf ComputeMeasuredBboxCenterVelocity(
+      const std::shared_ptr<TrackedObject>& new_object,
+      const std::shared_ptr<TrackedObject>& old_object, const double time_diff);
+
+  // @brief compute measured bbox corner velocity
+  // @param[IN] new_object: new object for current updating
+  // @param[IN] old_object: old object for last updating
+  // @param[IN] time_diff: time interval from last updating
+  // @return measured bbox corner velocity
+  Eigen::VectorXf ComputeMeasuredBboxCornerVelocity(
+      const std::shared_ptr<TrackedObject>& new_object,
+      const std::shared_ptr<TrackedObject>& old_object, const double time_diff);
+
+  // @brief select measured velocity among candidates
+  // @param[IN] candidates: candidates of measured velocity
+  // @return selected measurement of velocity
+  Eigen::Vector3f SelectMeasuredVelocity(
+      const std::vector<Eigen::Vector3f>& candidates);
+
+  // @brief select measured velocity among candidates according motion
+  // consistency
+  // @param[IN] candidates: candidates of measured velocity
+  // @return selected measurement of velocity
+  Eigen::Vector3f SelectMeasuredVelocityAccordingMotionConsistency(
+      const std::vector<Eigen::Vector3f>& candidates);
+
+  // @brief update filter
+  // @param[IN] measured_anchor_point: anchor point of given measurement
+  // @param[IN] measured_velocity: velocity of given measurement
+  // @param[IN] time_diff: time interval from last updating
+  // @return nothing
+  void UpdateVelocity(const Eigen::VectorXf& measured_anchor_point,
+                      const Eigen::VectorXf& measured_velocity,
+                      const double time_diff);
+
+  // @brief compute update quality for adaptive filtering
+  // @param[IN] new_object: new object for current updating
+  // @param[IN] old_object: old object for last updating
+  // @return nothing
+  void ComputeUpdateQuality(const std::shared_ptr<TrackedObject>& new_object,
+                            const std::shared_ptr<TrackedObject>& old_object);
+
+  // @brief compute update quality by using association score
+  // @param[IN] new_object: new object for current updating
+  // @return update quality according association score
+  float ComputeUpdateQualityAccordingAssociationScore(
+      const std::shared_ptr<TrackedObject>& new_object);
+
+  // @brief compute update quality by using association score
+  // @param[IN] old_object: old object for last updating
+  // @param[IN] new_object: new object for current updating
+  // @return update quality according point number change
+  float ComputeUpdateQualityAccordingPointNumChange(
+      const std::shared_ptr<TrackedObject>& new_object,
+      const std::shared_ptr<TrackedObject>& old_object);
+
+  // @brief compute breakdown threshold
+  // @return nothing
+  void ComputeBreakdownThreshold();
+
+  // @brief get online covariance of filter
+  // @param[OUT] online_covariance: online covariance
+  // @return noting
+  void GetOnlineCovariance(Eigen::Matrix3f* online_covariance);
+
+ protected:
+  void EvaluateOnlineCovariance();
+  Eigen::Vector3f ComputeMeasuredAcceleration(
+      const Eigen::Vector3f& measured_velocity, const double time_diff);
+  void UpdateAcceleration(const Eigen::VectorXf& measured_acceleration);
+
+  // adaptive
+  static bool s_use_adaptive_;
+  static double s_association_score_maximum_;
+
+  // parameters
+  static Eigen::Matrix3d s_propagation_noise_;
+  static double s_measurement_noise_;
+  static double s_initial_velocity_noise_;
+  static double s_breakdown_threshold_maximum_;
+
+  static size_t s_measurement_cached_history_size_minimum_;
+  static size_t s_measurement_cached_history_size_maximum_;
+  size_t measurement_cached_history_size_;
+
+  // filter history
+  int age_;
+
+  std::deque<Eigen::Vector3f> history_measured_velocity_;
+  std::deque<double> history_time_diff_;
+
+  // filter covariances
+  Eigen::Matrix3d velocity_covariance_;
+  Eigen::Matrix3d online_velocity_covariance_;
+
+  // filter states
+  Eigen::Vector3d belief_anchor_point_;
+  Eigen::Vector3d belief_velocity_;
+  Eigen::Vector3d belief_acceleration_;
+  Eigen::Vector3d belief_acceleration_gain_;
+  double update_quality_;
+  double breakdown_threshold_;
+};  // class KalmanFilter
+
+}  // namespace perception
+}  // namespace apollo
+
diff --git a/modules/perception/lidar_pointcloud_tracking/log.h b/modules/perception/lidar_pointcloud_tracking/log.h
new file mode 100644
index 0000000..34aab3f
--- /dev/null
+++ b/modules/perception/lidar_pointcloud_tracking/log.h
@@ -0,0 +1,64 @@
+/******************************************************************************
+ * Copyright 2017 The Apollo Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *****************************************************************************/
+
+/**
+ * @file
+ */
+
+#pragma once
+#include "glog/logging.h"
+#include "glog/raw_logging.h"
+
+#define ADEBUG VLOG(4) << "[DEBUG] "
+#define AINFO LOG(INFO)
+#define AWARN LOG(WARNING)
+#define AERROR LOG(ERROR)
+#define AFATAL LOG(FATAL)
+
+// LOG_IF
+#define AINFO_IF(cond) LOG_IF(INFO, cond)
+#define AERROR_IF(cond) LOG_IF(ERROR, cond)
+#define ACHECK(cond) CHECK(cond)
+
+// LOG_EVERY_N
+#define AINFO_EVERY(freq) LOG_EVERY_N(INFO, freq)
+#define AWARN_EVERY(freq) LOG_EVERY_N(WARNING, freq)
+#define AERROR_EVERY(freq) LOG_EVERY_N(ERROR, freq)
+
+#define RETURN_IF_NULL(ptr)               \
+    if (ptr == nullptr) {                 \
+        AWARN << #ptr << " is nullptr.";  \
+        return;                           \
+    }
+
+#define RETURN_VAL_IF_NULL(ptr, val)      \
+    if (ptr == nullptr) {                 \
+        AWARN << #ptr << " is nullptr.";  \
+        return val;                       \
+    }
+
+#define RETURN_IF(condition)                   \
+    if (condition) {                           \
+        AWARN << #condition << " is not met."; \
+        return;                                \
+    }
+
+#define RETURN_VAL_IF(condition, val)          \
+    if (condition) {                           \
+        AWARN << #condition << " is not met."; \
+        return val;                            \
+    }
+
diff --git a/modules/perception/lidar_pointcloud_tracking/min_box.cc b/modules/perception/lidar_pointcloud_tracking/min_box.cc
new file mode 100644
index 0000000..4e35661
--- /dev/null
+++ b/modules/perception/lidar_pointcloud_tracking/min_box.cc
@@ -0,0 +1,397 @@
+#include "modules/perception/lidar_pointcloud_tracking/min_box.h"
+
+namespace apollo {
+namespace perception {
+
+using pcl_util::PointCloud;
+using pcl_util::PointCloudPtr;
+
+const float EPSILON = 1e-6;
+
+bool MinBoxObjectBuilder::Build(const ObjectBuilderOptions& options,
+                                std::vector<std::shared_ptr<Object>>* objects) {
+  if (objects == nullptr) {
+    return false;
+  }
+
+  for (size_t i = 0; i < objects->size(); ++i) {
+    if ((*objects)[i]) {
+      (*objects)[i]->id = i;
+      BuildObject(options, (*objects)[i]);
+    }
+  }
+
+  return true;
+}
+
+double MinBoxObjectBuilder::ComputeAreaAlongOneEdge(
+    std::shared_ptr<Object> obj, size_t first_in_point, Eigen::Vector3d* center,
+    double* lenth, double* width, Eigen::Vector3d* dir) {
+  std::vector<Eigen::Vector3d> ns;
+  Eigen::Vector3d v(0.0, 0.0, 0.0);
+  Eigen::Vector3d vn(0.0, 0.0, 0.0);
+  Eigen::Vector3d n(0.0, 0.0, 0.0);
+  double len = 0;
+  double wid = 0;
+  size_t index = (first_in_point + 1) % obj->polygon.points.size();
+  for (size_t i = 0; i < obj->polygon.points.size(); ++i) {
+    if (i != first_in_point && i != index) {
+      // compute v
+      Eigen::Vector3d o(0.0, 0.0, 0.0);
+      Eigen::Vector3d a(0.0, 0.0, 0.0);
+      Eigen::Vector3d b(0.0, 0.0, 0.0);
+      o[0] = obj->polygon.points[i].x;
+      o[1] = obj->polygon.points[i].y;
+      o[2] = 0;
+      b[0] = obj->polygon.points[first_in_point].x;
+      b[1] = obj->polygon.points[first_in_point].y;
+      b[2] = 0;
+      a[0] = obj->polygon.points[index].x;
+      a[1] = obj->polygon.points[index].y;
+      a[2] = 0;
+      double k =
+          ((a[0] - o[0]) * (b[0] - a[0]) + (a[1] - o[1]) * (b[1] - a[1]));
+      k = k / ((b[0] - a[0]) * (b[0] - a[0]) + (b[1] - a[1]) * (b[1] - a[1]));
+      k = k * -1;
+      // n is pedal of src
+      n[0] = (b[0] - a[0]) * k + a[0];
+      n[1] = (b[1] - a[1]) * k + a[1];
+      n[2] = 0;
+      // compute height from src to line
+      Eigen::Vector3d edge1 = o - b;
+      Eigen::Vector3d edge2 = a - b;
+      // cross product
+      double height = fabs(edge1[0] * edge2[1] - edge2[0] * edge1[1]);
+      height = height / sqrt(edge2[0] * edge2[0] + edge2[1] * edge2[1]);
+      if (height > wid) {
+        wid = height;
+        v = o;
+        vn = n;
+      }
+    } else {
+      n[0] = obj->polygon.points[i].x;
+      n[1] = obj->polygon.points[i].y;
+      n[2] = 0;
+    }
+    ns.push_back(n);
+  }
+  size_t point_num1 = 0;
+  size_t point_num2 = 0;
+  for (size_t i = 0; i < ns.size() - 1; ++i) {
+    Eigen::Vector3d p1 = ns[i];
+    for (size_t j = i + 1; j < ns.size(); ++j) {
+      Eigen::Vector3d p2 = ns[j];
+      double dist = sqrt((p1[0] - p2[0]) * (p1[0] - p2[0]) +
+                         (p1[1] - p2[1]) * (p1[1] - p2[1]));
+      if (dist > len) {
+        len = dist;
+        point_num1 = i;
+        point_num2 = j;
+      }
+    }
+  }
+  Eigen::Vector3d vp1 = v + ns[point_num1] - vn;
+  Eigen::Vector3d vp2 = v + ns[point_num2] - vn;
+  (*center) = (vp1 + vp2 + ns[point_num1] + ns[point_num2]) / 4;
+  (*center)[2] = obj->polygon.points[0].z;
+  obj->vertex1 = vp1;
+  obj->vertex2 = vp2;
+  obj->vertex3 = ns[point_num1];
+  obj->vertex4 = ns[point_num2];
+  if (len > wid) {
+    *dir = ns[point_num2] - ns[point_num1];
+  } else {
+    *dir = vp1 - ns[point_num1];
+  }
+  *lenth = len > wid ? len : wid;
+  *width = len > wid ? wid : len;
+  return (*lenth) * (*width);
+}
+
+void MinBoxObjectBuilder::ReconstructPolygon(const Eigen::Vector3d& ref_ct,
+                                             std::shared_ptr<Object> obj) {
+  if (obj->polygon.points.size() <= 0) {
+    return;
+  }
+  size_t max_point_index = 0;
+  size_t min_point_index = 0;
+  Eigen::Vector3d p;
+  p[0] = obj->polygon.points[0].x;
+  p[1] = obj->polygon.points[0].y;
+  p[2] = obj->polygon.points[0].z;
+  Eigen::Vector3d max_point = p - ref_ct;//ref_ct初始化为０，也就是坐标原点，即lidar中心位置
+  Eigen::Vector3d min_point = p - ref_ct;
+  for (size_t i = 1; i < obj->polygon.points.size(); ++i) {
+    Eigen::Vector3d p;
+    p[0] = obj->polygon.points[i].x;
+    p[1] = obj->polygon.points[i].y;
+    p[2] = obj->polygon.points[i].z;
+    Eigen::Vector3d ray = p - ref_ct;
+    // clock direction           顺时针
+    if (max_point[0] * ray[1] - ray[0] * max_point[1] < EPSILON) {//原点与序号x的点向量依次与序号x+1到原点的向量做数量积
+      max_point = ray;                                            //找到最右边的点
+      max_point_index = i;
+    }
+    // unclock direction
+    if (min_point[0] * ray[1] - ray[0] * min_point[1] > EPSILON) {
+      min_point = ray;
+      min_point_index = i;                                 //找到最左边的点
+    }
+  }
+  //以下代码为筛选有效边长,如果相邻的两个点在line的后面,则因为遮挡原因,视这条边为无效边
+  Eigen::Vector3d line = max_point - min_point;        //以最坐边和最右边的两个点画一条线
+  double total_len = 0;
+  double max_dis = 0;
+  bool has_out = false;
+  for (size_t i = min_point_index, count = 0;
+       count < obj->polygon.points.size();
+       i = (i + 1) % obj->polygon.points.size(), ++count) {
+    Eigen::Vector3d p_x;
+    p_x[0] = obj->polygon.points[i].x;
+    p_x[1] = obj->polygon.points[i].y;
+    p_x[2] = obj->polygon.points[i].z;
+    size_t j = (i + 1) % obj->polygon.points.size();
+    if (j != min_point_index && j != max_point_index) {
+      Eigen::Vector3d p;
+      p[0] = obj->polygon.points[j].x;
+      p[1] = obj->polygon.points[j].y;
+      p[2] = obj->polygon.points[j].z;
+      Eigen::Vector3d ray = p - min_point;
+      if (line[0] * ray[1] - ray[0] * line[1] < EPSILON) {    //j点在line的靠近雷达一侧
+        double dist = sqrt((p[0] - p_x[0]) * (p[0] - p_x[0]) +
+                           (p[1] - p_x[1]) * (p[1] - p_x[1]));
+        total_len += dist;
+        if (dist - max_dis > EPSILON) {
+          max_dis = dist;
+        }
+      } else {
+        // outline
+        has_out = true;
+      }
+    } else if ((i == min_point_index && j == max_point_index) ||
+               (i == max_point_index && j == min_point_index)) {
+      size_t k = (j + 1) % obj->polygon.points.size();
+      Eigen::Vector3d p_k;
+      p_k[0] = obj->polygon.points[k].x;
+      p_k[1] = obj->polygon.points[k].y;
+      p_k[2] = obj->polygon.points[k].z;
+      Eigen::Vector3d p_j;
+      p_j[0] = obj->polygon.points[j].x;
+      p_j[1] = obj->polygon.points[j].y;
+      p_j[2] = obj->polygon.points[j].z;
+      Eigen::Vector3d ray = p - min_point;
+      if (line[0] * ray[1] - ray[0] * line[1] < 0) {
+      } else {
+        // outline
+        has_out = true;
+      }
+    } else if (j == min_point_index || j == max_point_index) {
+      Eigen::Vector3d p;
+      p[0] = obj->polygon.points[j].x;
+      p[1] = obj->polygon.points[j].y;
+      p[2] = obj->polygon.points[j].z;
+      Eigen::Vector3d ray = p_x - min_point;
+      if (line[0] * ray[1] - ray[0] * line[1] < EPSILON) {
+        double dist = sqrt((p[0] - p_x[0]) * (p[0] - p_x[0]) +
+                           (p[1] - p_x[1]) * (p[1] - p_x[1]));
+        total_len += dist;
+        if (dist > max_dis) {
+          max_dis = dist;
+        }
+      } else {
+        // outline
+        has_out = true;
+      }
+    }
+  }
+  //截止这里,有效边筛选结束
+  size_t count = 0;
+  double min_area = std::numeric_limits<double>::max();
+  for (size_t i = min_point_index; count < obj->polygon.points.size();
+       i = (i + 1) % obj->polygon.points.size(), ++count) {
+    Eigen::Vector3d p_x;
+    p_x[0] = obj->polygon.points[i].x;
+    p_x[1] = obj->polygon.points[i].y;
+    p_x[2] = obj->polygon.points[i].z;
+    size_t j = (i + 1) % obj->polygon.points.size();
+    Eigen::Vector3d p_j;
+    p_j[0] = obj->polygon.points[j].x;
+    p_j[1] = obj->polygon.points[j].y;
+    p_j[2] = obj->polygon.points[j].z;
+    double dist = sqrt((p_x[0] - p_j[0]) * (p_x[0] - p_j[0]) +
+                       (p_x[1] - p_j[1]) * (p_x[1] - p_j[1]));
+    if (dist < max_dis && (dist / total_len) < 0.5) {
+      continue;
+    }
+    if (j != min_point_index && j != max_point_index) {
+      Eigen::Vector3d p;
+      p[0] = obj->polygon.points[j].x;
+      p[1] = obj->polygon.points[j].y;
+      p[2] = obj->polygon.points[j].z;
+      Eigen::Vector3d ray = p - min_point;
+      if (line[0] * ray[1] - ray[0] * line[1] < 0) {
+        Eigen::Vector3d center;
+        double length = 0;
+        double width = 0;
+        Eigen::Vector3d dir;
+        double area =
+            ComputeAreaAlongOneEdge(obj, i, &center, &length, &width, &dir);
+        if (area < min_area) {
+          obj->center = center;
+          obj->length = length;
+          obj->width = width;
+          obj->direction = dir;
+          min_area = area;
+        }
+      } else {
+        // outline
+      }
+    } else if ((i == min_point_index && j == max_point_index) ||
+               (i == max_point_index && j == min_point_index)) {
+      if (!has_out) {
+        continue;
+      }
+      Eigen::Vector3d center;
+      double length = 0;
+      double width = 0;
+      Eigen::Vector3d dir;
+      double area =
+          ComputeAreaAlongOneEdge(obj, i, &center, &length, &width, &dir);
+      if (area < min_area) {
+        obj->center = center;
+        obj->length = length;
+        obj->width = width;
+        obj->direction = dir;
+        min_area = area;
+      }
+    } else if (j == min_point_index || j == max_point_index) {
+      Eigen::Vector3d p;
+      p[0] = obj->polygon.points[i].x;
+      p[1] = obj->polygon.points[i].y;
+      p[2] = obj->polygon.points[i].z;
+      Eigen::Vector3d ray = p - min_point;
+      if (line[0] * ray[1] - ray[0] * line[1] < 0) {
+        Eigen::Vector3d center;
+        double length = 0.0;
+        double width = 0.0;
+        Eigen::Vector3d dir;
+        double area =
+            ComputeAreaAlongOneEdge(obj, i, &center, &length, &width, &dir);
+        if (area < min_area) {
+          obj->center = center;
+          obj->length = length;
+          obj->width = width;
+          obj->direction = dir;
+          min_area = area;
+        }
+      } else {
+        // outline
+      }
+    }
+  }
+  obj->direction.normalize();
+}
+
+void MinBoxObjectBuilder::ComputePolygon2dxy(std::shared_ptr<Object> obj) {
+  Eigen::Vector4f min_pt;
+  Eigen::Vector4f max_pt;
+  pcl_util::PointCloudPtr cloud = obj->cloud;//存放目标点云
+  SetDefaultValue(cloud, obj, &min_pt, &max_pt);//设置默认值
+  if (cloud->points.size() < 4u) {                   //点云个数小于4个时,返回
+    return;
+  }
+  GetCloudMinMax3D<pcl_util::Point>(cloud, &min_pt, &max_pt);//为啥又get一遍?zz吧 SetDefaultValue
+  obj->height = static_cast<double>(max_pt[2]) - static_cast<double>(min_pt[2]);//SetDefaultValue这个里面不是算过啦?又强制float转换成了double
+  // 以下这段代码并没有什么用，不知道为什么加上
+  // const double min_eps = 10 * std::numeric_limits<double>::epsilon();//10*最小非零浮点数?是在干吗?
+
+  // const double diff_x = cloud->points[1].x - cloud->points[0].x;
+  // const double diff_y = cloud->points[1].y - cloud->points[0].y;
+  // size_t idx = 0;
+  // for (idx = 2; idx < cloud->points.size(); ++idx) {
+  //   const double tdiff_x = cloud->points[idx].x - cloud->points[0].x;
+  //   const double tdiff_y = cloud->points[idx].y - cloud->points[0].y;
+  //   if ((diff_x * tdiff_y - tdiff_x * diff_y) > min_eps) {    //两个向量的X乘,向量10Xx0,如果点x在10的右边,则<0结束
+  //     break;
+  //   }
+  // }
+  // if (idx >= cloud->points.size()) {
+  //   cloud->points[0].x += min_eps;
+  //   cloud->points[0].y += min_eps;
+  //   cloud->points[1].x -= min_eps;
+  // }
+
+  obj->min_height = min_pt[2];
+  obj->max_height = max_pt[2];   //hrn 19.11.20
+
+  PointCloudPtr pcd_xy(new PointCloud);     //将点云投影到地平面上,z坐标全部换成目标障碍物的最小z坐标
+  for (size_t i = 0; i < cloud->points.size(); ++i) {
+    pcl_util::Point p = cloud->points[i];
+    p.z = min_pt[2];
+    pcd_xy->push_back(p);
+  }
+
+  ConvexHull2DXY<pcl_util::Point> hull;    //自定义的求凸包的类
+  hull.setInputCloud(pcd_xy);
+  hull.setDimension(2);
+  std::vector<pcl::Vertices> poly_vt;
+  PointCloudPtr plane_hull(new PointCloud);
+  hull.Reconstruct2dxy (plane_hull, &poly_vt);   //凸包点云存放在plane_hull中,poly_vt中的Vertices存放一组点的索引，索引是plane_hull中的点对应的索引
+
+
+  if (poly_vt.size() == 1u) {                           //
+    std::vector<int> ind(poly_vt[0].vertices.begin(),    //将poly_vt[0].vertices数组中的数据全部复制到ind中
+                         poly_vt[0].vertices.end());
+    TransformPointCloud(plane_hull, ind, &obj->polygon);
+  } else {
+    obj->polygon.points.resize(4);                               //poly_vt.size()不为1 的情况下,将minmax3d的四个顶点作为凸包
+    obj->polygon.points[0].x = static_cast<double>(min_pt[0]);
+    obj->polygon.points[0].y = static_cast<double>(min_pt[1]);
+    obj->polygon.points[0].z = static_cast<double>(min_pt[2]);
+
+    obj->polygon.points[1].x = static_cast<double>(min_pt[0]);
+    obj->polygon.points[1].y = static_cast<double>(max_pt[1]);
+    obj->polygon.points[1].z = static_cast<double>(min_pt[2]);
+
+    obj->polygon.points[2].x = static_cast<double>(max_pt[0]);
+    obj->polygon.points[2].y = static_cast<double>(max_pt[1]);
+    obj->polygon.points[2].z = static_cast<double>(min_pt[2]);
+
+    obj->polygon.points[3].x = static_cast<double>(max_pt[0]);
+    obj->polygon.points[3].y = static_cast<double>(min_pt[1]);
+    obj->polygon.points[3].z = static_cast<double>(min_pt[2]);
+  }
+}
+
+void MinBoxObjectBuilder::ComputeGeometricFeature(const Eigen::Vector3d& ref_ct,
+                                                  std::shared_ptr<Object> obj) {
+  // step 1: compute 2D xy plane's polygen
+  ComputePolygon2dxy(obj);
+  // step 2: construct box
+  ReconstructPolygon(ref_ct, obj);
+}
+
+void MinBoxObjectBuilder::BuildObject(ObjectBuilderOptions options,
+                                      std::shared_ptr<Object> object) {
+  ComputeGeometricFeature(options.ref_center, object);
+}
+
+void MinBoxObjectBuilder::TransformPointCloud(pcl_util::PointCloudPtr cloud,
+                         const std::vector<int>& indices,
+                         pcl_util::PointDCloud* trans_cloud) {
+  if (trans_cloud->size() != indices.size()) {
+    trans_cloud->resize(indices.size());
+  }
+  for (size_t i = 0; i < indices.size(); ++i) {
+    const pcl_util::Point& p = cloud->at(indices[i]);
+    Eigen::Vector3d v(p.x, p.y, p.z);
+    pcl_util::PointD& tp = trans_cloud->at(i);
+    tp.x = v.x();
+    tp.y = v.y();
+    tp.z = v.z();
+    //tp.intensity = p.intensity;
+  }
+}
+
+}  // namespace perception
+}  // namespace apollo
diff --git a/modules/perception/lidar_pointcloud_tracking/min_box.h b/modules/perception/lidar_pointcloud_tracking/min_box.h
new file mode 100644
index 0000000..354f459
--- /dev/null
+++ b/modules/perception/lidar_pointcloud_tracking/min_box.h
@@ -0,0 +1,51 @@
+#pragma once
+#include <memory>
+#include <string>
+#include <vector>
+#include <limits>
+#include <pcl/surface/concave_hull.h>
+#include "modules/perception/lidar_pointcloud_tracking/common/convex_hullxy.h"
+#include "modules/perception/lidar_pointcloud_tracking/common/pcl_types.h"
+#include "modules/perception/lidar_pointcloud_tracking/common/object.h"
+#include "modules/perception/lidar_pointcloud_tracking/common/base_object_builder.h"
+
+namespace apollo {
+namespace perception {
+
+class MinBoxObjectBuilder : public BaseObjectBuilder {
+ public:
+  MinBoxObjectBuilder() : BaseObjectBuilder() {}
+  virtual ~MinBoxObjectBuilder() {}
+
+  bool Init() override { return true; }
+
+  bool Build(const ObjectBuilderOptions& options,
+             std::vector<std::shared_ptr<Object>>* objects) override;
+  std::string name() const override { return "MinBoxObjectBuilder"; }
+
+ protected:
+  void BuildObject(ObjectBuilderOptions options,
+                   std::shared_ptr<Object> object);
+
+  void ComputePolygon2dxy(std::shared_ptr<Object> obj);
+
+  double ComputeAreaAlongOneEdge(std::shared_ptr<Object> obj,
+                                 size_t first_in_point, Eigen::Vector3d* center,
+                                 double* lenth, double* width,
+                                 Eigen::Vector3d* dir);
+
+  void ReconstructPolygon(const Eigen::Vector3d& ref_ct,
+                          std::shared_ptr<Object> obj);
+
+  void ComputeGeometricFeature(const Eigen::Vector3d& ref_ct,
+                               std::shared_ptr<Object> obj);
+  
+  void TransformPointCloud(pcl_util::PointCloudPtr cloud,
+                         const std::vector<int>& indices,
+                         pcl_util::PointDCloud* trans_cloud);
+
+};
+
+}  // namespace perception
+}  // namespace apollo
+
diff --git a/modules/perception/lidar_pointcloud_tracking/object_track.cc b/modules/perception/lidar_pointcloud_tracking/object_track.cc
new file mode 100644
index 0000000..cc93aa1
--- /dev/null
+++ b/modules/perception/lidar_pointcloud_tracking/object_track.cc
@@ -0,0 +1,421 @@
+#include "modules/perception/lidar_pointcloud_tracking/object_track.h"
+
+#include <algorithm>
+
+#include "modules/perception/lidar_pointcloud_tracking/common/geometry_util.h"
+#include "modules/perception/lidar_pointcloud_tracking/kalman_filter.h"
+
+namespace apollo {
+namespace perception {
+
+int ObjectTrack::s_track_idx_ = 0;
+int ObjectTrackSet::s_track_consecutive_invisible_maximum_ = 1;
+float ObjectTrackSet::s_track_visible_ratio_minimum_ = 0.6;
+int ObjectTrack::s_track_cached_history_size_maximum_ = 5;
+double ObjectTrack::s_acceleration_noise_maximum_ = 5;
+double ObjectTrack::s_speed_noise_maximum_ = 0.4;
+
+bool ObjectTrack::SetTrackCachedHistorySizeMaximum(
+    const int track_cached_history_size_maximum) {
+  if (track_cached_history_size_maximum > 0) {
+    s_track_cached_history_size_maximum_ = track_cached_history_size_maximum;
+    /*AINFO << "track cached history size maximum of object track is "
+          << s_track_cached_history_size_maximum_;*/
+    return true;
+  }
+  //AERROR << "invalid track cached history size maximum of object track!";
+  return false;
+}
+
+bool ObjectTrack::SetSpeedNoiseMaximum(const double speed_noise_maximum) {
+  if (speed_noise_maximum > 0) {
+    s_speed_noise_maximum_ = speed_noise_maximum;
+    /*AINFO << "speed noise maximum of object track is "
+          << s_speed_noise_maximum_;*/
+    return true;
+  }
+  //AERROR << "invalid speed noise maximum of object track!";
+  return false;
+}
+
+bool ObjectTrack::SetAccelerationNoiseMaximum(
+    const double acceleration_noise_maximum) {
+  if (acceleration_noise_maximum > 0) {
+    s_acceleration_noise_maximum_ = acceleration_noise_maximum;
+    /*AINFO << "acceleration noise maximum of object track is "
+          << s_acceleration_noise_maximum_;*/
+    return true;
+  }
+  //AERROR << "invalid acceleration noise maximum of object track!";
+  return false;
+}
+
+int ObjectTrack::GetNextTrackId() {
+  // Get next available track id
+  int ret_track_id = s_track_idx_;
+  if (s_track_idx_ == INT_MAX) {
+    s_track_idx_ = 0;
+  } else {
+    s_track_idx_++;
+  }
+  return ret_track_id;
+}
+
+ObjectTrack::ObjectTrack(std::shared_ptr<TrackedObject> obj) {
+  // Initialize filter
+  Eigen::Vector3f initial_anchor_point = obj->anchor_point;
+  Eigen::Vector3f initial_velocity = Eigen::Vector3f::Zero();
+  filter_ = new KalmanFilter();
+  filter_->Initialize(initial_anchor_point, initial_velocity);
+
+  // Initialize track info
+  idx_ = ObjectTrack::GetNextTrackId();
+  age_ = 1;
+  total_visible_count_ = 1;
+  consecutive_invisible_count_ = 0;
+  period_ = 0.0;
+  current_object_ = obj;
+
+  // Initialize track states
+  is_static_hypothesis_ = false;
+  belief_anchor_point_ = initial_anchor_point;
+  belief_velocity_ = initial_velocity;
+  const double uncertainty_factor = 5.0;
+  belief_velocity_uncertainty_ =
+      Eigen::Matrix3f::Identity() * uncertainty_factor;
+  belief_velocity_accelaration_ = Eigen::Vector3f::Zero();
+  // NEED TO NOTICE: All the states would be collected mainly based on states
+  // of tracked object. Thus, update tracked object when you update the state
+  // of track !!!!!
+  obj->velocity = belief_velocity_;
+  obj->velocity_uncertainty = belief_velocity_uncertainty_;
+
+  // Initialize object direction with its lane direction
+  //obj->direction = obj->lane_direction;  //和地图暂不交互
+}
+
+ObjectTrack::~ObjectTrack() {
+  if (filter_) {
+    delete filter_;
+    filter_ = nullptr;
+  }
+}
+
+Eigen::VectorXf ObjectTrack::Predict(const double time_diff) {
+  // Get the predict of filter
+  Eigen::VectorXf filter_predict = filter_->Predict(time_diff);
+  // Get the predict of track
+  Eigen::VectorXf track_predict = filter_predict;
+  track_predict(0) = belief_anchor_point_(0) + belief_velocity_(0) * time_diff;
+  track_predict(1) = belief_anchor_point_(1) + belief_velocity_(1) * time_diff;
+  track_predict(2) = belief_anchor_point_(2) + belief_velocity_(2) * time_diff;
+  track_predict(3) = belief_velocity_(0);
+  track_predict(4) = belief_velocity_(1);
+  track_predict(5) = belief_velocity_(2);//又算了两次,为什么?
+  return track_predict;
+}
+
+void ObjectTrack::UpdateWithObject(std::shared_ptr<TrackedObject>* new_object,
+                                   const double time_diff) {
+  //ACHECK(new_object != nullptr) << "Update object with nullptr object";
+  // A. update object track                                                     //卡尔曼滤波更新,更新车的速度和加速度
+  // A.1 update filter
+  //std::cout << "new_object__size" << (*new_object)->size << std::endl;//new_object中存放当前时刻已经匹配后的对象信息
+  filter_->UpdateWithObject((*new_object), current_object_, time_diff);//更新速度＼加速度
+  filter_->GetState(&belief_anchor_point_, &belief_velocity_);
+  filter_->GetOnlineCovariance(&belief_velocity_uncertainty_);
+  // NEED TO NOTICE: All the states would be collected mainly based on states
+  // of tracked object. Thus, update tracked object when you update the state
+  // of track !!!!!
+  (*new_object)->anchor_point = belief_anchor_point_;                           //TrackedObject更新anchor_point\速度＼加速度等
+  (*new_object)->velocity = belief_velocity_;
+  (*new_object)->velocity_uncertainty = belief_velocity_uncertainty_;
+
+  belief_velocity_accelaration_ =
+      ((*new_object)->velocity - current_object_->velocity) / time_diff;
+  // A.2 update track info
+  ++age_;                    //跟踪时长
+  total_visible_count_++;    //目标可见次数
+  consecutive_invisible_count_ = 0;//连续不可见时长清零
+  period_ += time_diff;      //跟踪总时间
+  // A.3 update history
+  int history_size = history_objects_.size();
+  if (history_size >= s_track_cached_history_size_maximum_) {
+    history_objects_.pop_front();
+  }
+  history_objects_.push_back(current_object_);
+  current_object_ = *new_object;
+  //std::cout << "current_object__size" << current_object_->size << std::endl;
+  // B. smooth object track                                                     //更新ObjectTrack状态，平滑速度和方向，由KalmanFilter得出的信息需要经过实际应用的限制进行修整
+  // B.1 smooth velocity
+  SmoothTrackVelocity((*new_object), time_diff);//跟踪物体速度平滑
+  // B.2 smooth orientation
+  SmoothTrackOrientation(); //平滑方向
+}
+
+void ObjectTrack::UpdateWithoutObject(const double time_diff) {
+  // A. update object of track
+  std::shared_ptr<TrackedObject> new_obj(new TrackedObject());
+  new_obj->clone(*current_object_);
+  Eigen::Vector3f predicted_shift = belief_velocity_ * time_diff;
+  new_obj->anchor_point = current_object_->anchor_point + predicted_shift;
+  new_obj->barycenter = current_object_->barycenter + predicted_shift;
+  new_obj->center = current_object_->center + predicted_shift;
+
+  // B. update cloud & polygon
+  pcl_util::PointCloudPtr pc = new_obj->object_ptr->cloud;
+  for (size_t j = 0; j < pc->points.size(); ++j) {
+    pc->points[j].x += predicted_shift[0];
+    pc->points[j].y += predicted_shift[1];
+    pc->points[j].z += predicted_shift[2];
+  }
+  PolygonDType& polygon = new_obj->object_ptr->polygon;
+  for (size_t j = 0; j < polygon.points.size(); ++j) {
+    polygon.points[j].x += predicted_shift[0];
+    polygon.points[j].y += predicted_shift[1];
+    polygon.points[j].z += predicted_shift[2];
+  }
+
+  // C. update filter
+  filter_->UpdateWithoutObject(time_diff);
+
+  // D. update states of track
+  belief_anchor_point_ = new_obj->anchor_point;
+  // NEED TO NOTICE: All the states would be collected mainly based on states
+  // of tracked object. Thus, update tracked object when you update the state
+  // of track !!!!
+  new_obj->velocity = belief_velocity_;
+  new_obj->velocity_uncertainty = belief_velocity_uncertainty_;
+
+  // E. update track info
+  ++age_;
+  consecutive_invisible_count_++;
+  period_ += time_diff;
+
+  // F. update history
+  int history_size = history_objects_.size();
+  if (history_size >= s_track_cached_history_size_maximum_) {
+    history_objects_.pop_front();
+  }
+  history_objects_.push_back(current_object_);
+  current_object_ = new_obj;
+}
+
+void ObjectTrack::UpdateWithoutObject(const Eigen::VectorXf& predict_state,
+                                      const double time_diff) {
+  // A. update object of track
+  std::shared_ptr<TrackedObject> new_obj(new TrackedObject());
+  new_obj->clone(*current_object_);
+  Eigen::Vector3f predicted_shift = predict_state.tail(3) * time_diff;
+  new_obj->anchor_point = current_object_->anchor_point + predicted_shift;
+  new_obj->barycenter = current_object_->barycenter + predicted_shift;
+  new_obj->center = current_object_->center + predicted_shift;
+
+  // B. update cloud & polygon
+  pcl_util::PointCloudPtr pc = new_obj->object_ptr->cloud;
+  for (size_t j = 0; j < pc->points.size(); ++j) {
+    pc->points[j].x += predicted_shift[0];
+    pc->points[j].y += predicted_shift[1];
+    pc->points[j].z += predicted_shift[2];
+  }
+  PolygonDType& polygon = new_obj->object_ptr->polygon;
+  for (size_t j = 0; j < polygon.points.size(); ++j) {
+    polygon.points[j].x += predicted_shift[0];
+    polygon.points[j].y += predicted_shift[1];
+    polygon.points[j].z += predicted_shift[2];
+  }
+
+  // C. update filter without object
+  filter_->UpdateWithoutObject(time_diff);
+
+  // D. update states of track
+  belief_anchor_point_ = new_obj->anchor_point;
+  // NEED TO NOTICE: All the states would be collected mainly based on states
+  // of tracked object. Thus, update tracked object when you update the state
+  // of track !!!!
+  new_obj->velocity = belief_velocity_;
+  new_obj->velocity_uncertainty = belief_velocity_uncertainty_;
+
+  // E. update track info
+  ++age_;
+  consecutive_invisible_count_++;
+  period_ += time_diff;
+
+  // F. update history
+  int history_size = history_objects_.size();
+  if (history_size >= s_track_cached_history_size_maximum_) {
+    history_objects_.pop_front();
+  }
+  history_objects_.push_back(current_object_);
+  current_object_ = new_obj;
+}
+
+void ObjectTrack::SmoothTrackVelocity(                        //平滑速度
+    const std::shared_ptr<TrackedObject>& new_object, const double time_diff) {
+  // A. keep motion if acceleration of filter is greater than a threshold
+  Eigen::Vector3f filter_acceleration_gain = Eigen::Vector3f::Zero();
+  filter_->GetAccelerationGain(&filter_acceleration_gain);                     //加速度增益
+  double filter_accelaration = filter_acceleration_gain.norm();                //二范数即模
+  bool need_keep_motion = filter_accelaration > s_acceleration_noise_maximum_; //加速度增益大于一定的阈值时
+  if (need_keep_motion) {                                                      //加速度增益大于一定的阈值时,上一时刻的速度作为当前速度
+    Eigen::Vector3f last_velocity = Eigen::Vector3f::Zero();
+    if (history_objects_.size() > 0) {
+      last_velocity = history_objects_[history_objects_.size() - 1]->velocity;
+    }
+    belief_velocity_ = last_velocity;
+    belief_velocity_accelaration_ = Eigen::Vector3f::Zero();
+    // NEED TO NOTICE: All the states would be collected mainly based on states
+    // of tracked object. Thus, update tracked object when you update the state
+    // of track !!!!
+    current_object_->velocity = belief_velocity_;
+    // keep static hypothesis
+    return;
+  }         
+  // B. static hypothesis check & clipping noise                                   //否则的话需要检验及噪声去除
+  is_static_hypothesis_ =
+      CheckTrackStaticHypothesis(new_object->object_ptr, time_diff);     //判断速度是否为噪声或者速度太小且速度方向变化小于45度
+  if (is_static_hypothesis_) {
+    belief_velocity_ = Eigen::Vector3f::Zero();
+    belief_velocity_accelaration_ = Eigen::Vector3f::Zero();
+    // NEED TO NOTICE: All the states would be collected mainly based on states
+    // of tracked object. Thus, update tracked object when you update the state
+    // of track !!!!
+    current_object_->velocity = belief_velocity_;
+  }
+}
+
+void ObjectTrack::SmoothTrackOrientation() {
+  Eigen::Vector3f current_dir = current_object_->direction;
+  float current_speed = current_object_->velocity.head(2).norm();
+  bool velocity_is_obvious = current_speed > (s_speed_noise_maximum_ * 2);
+  if (velocity_is_obvious) {
+    current_dir = current_object_->velocity;
+  } else {
+    //current_dir = current_object_->lane_direction;
+    current_dir = current_object_->direction;    //19.1.22与地图暂不交互,暂时先不考虑车道线的方向
+  }
+  current_dir(2) = 0;
+  current_dir.normalize();
+  Eigen::Vector3d new_size;
+  Eigen::Vector3d new_center;
+  ComputeBboxSizeCenter<pcl_util::Point>(current_object_->object_ptr->cloud,
+                                         current_dir.cast<double>(), &new_size,
+                                         &new_center);
+  current_object_->direction = current_dir;
+  current_object_->center = new_center.cast<float>();
+  current_object_->size = new_size.cast<float>();
+}
+
+bool ObjectTrack::CheckTrackStaticHypothesis(
+    const std::shared_ptr<Object>& new_object, const double time_diff) {
+  // A. check whether track velocity angle changed obviously
+  bool is_velocity_angle_change =
+      CheckTrackStaticHypothesisByVelocityAngleChange(new_object, time_diff); //为true即速度方向变化小于45度
+
+  // B. evaluate velocity level
+  double speed = belief_velocity_.head(2).norm();//取xy方向的速度并求平方根
+  bool velocity_is_noise = speed < (s_speed_noise_maximum_ / 2);//速度判断为噪声
+  bool velocity_is_small = speed < (s_speed_noise_maximum_ / 1);//速度太小
+  if (velocity_is_noise) {
+    return true;
+  }
+  // NEED TO NOTICE: clipping small velocity may not reasonable when the true
+  // velocity of target object is really small. e.g. a moving out vehicle in
+  // a parking lot. Thus, instead of clapping all the small velocity, we clap
+  // those whose history trajectory or performance is close to a static one.
+  if (velocity_is_small && is_velocity_angle_change) {
+    return true;
+  }
+  return false;
+}
+
+bool ObjectTrack::CheckTrackStaticHypothesisByVelocityAngleChange(
+    const std::shared_ptr<Object>& new_object, const double time_diff) {
+  Eigen::Vector3f previous_velocity =
+      history_objects_[history_objects_.size() - 1]->velocity;
+  Eigen::Vector3f current_velocity = current_object_->velocity;
+  double velocity_angle_change =
+      VectorTheta2dXy(previous_velocity, current_velocity);//判断当前最优速度的上一时刻最优速度之间的夹角
+  if (fabs(velocity_angle_change) > M_PI / 4.0) {
+    return true;
+  }
+  return false;
+}
+
+/*class ObjectTrackSet*/
+ObjectTrackSet::ObjectTrackSet() { tracks_.reserve(1000); }
+
+ObjectTrackSet::~ObjectTrackSet() { Clear(); }
+
+bool ObjectTrackSet::SetTrackConsecutiveInvisibleMaximum(
+    const int track_consecutive_invisible_maximum) {
+  if (track_consecutive_invisible_maximum >= 0) {
+    s_track_consecutive_invisible_maximum_ =
+        track_consecutive_invisible_maximum;
+    std::cout << "track consecutive invisible maximum of object track set is "
+          << s_track_consecutive_invisible_maximum_ << std::endl;
+    return true;
+  }
+  std::cout << "invalid track consecutive invisible maximum of object track! " << std::endl;
+  return false;
+}
+
+bool ObjectTrackSet::SetTrackVisibleRatioMinimum(
+    const float track_visible_ratio_minimum) {
+  if (track_visible_ratio_minimum >= 0 && track_visible_ratio_minimum <= 1) {
+    s_track_visible_ratio_minimum_ = track_visible_ratio_minimum;
+    std::cout << "track visible ratio minimum of object track set is "
+          << s_track_visible_ratio_minimum_ << std::endl;
+    return true;
+  }
+  std::cout << "invalid track visible ratio minimum of object track! " << std::endl;
+  return false;
+}
+
+void ObjectTrackSet::Clear() {
+  for (size_t i = 0; i < tracks_.size(); i++) {
+    if (tracks_[i]) {
+      delete (tracks_[i]);
+      tracks_[i] = nullptr;
+    }
+  }
+  tracks_.clear();
+}
+
+int ObjectTrackSet::RemoveLostTracks() {
+  size_t track_num = 0;
+  for (size_t i = 0; i < tracks_.size(); ++i) {
+    // A. remove tracks invisible ratio less than given minimum
+    float track_visible_ratio =
+        tracks_[i]->total_visible_count_ * 1.0f / tracks_[i]->age_;
+    if (track_visible_ratio < s_track_visible_ratio_minimum_) continue;
+    // B. remove tracks consecutive invisible count greater than given maximum
+    int track_consecutive_invisible_count =
+        tracks_[i]->consecutive_invisible_count_;
+    if (track_consecutive_invisible_count >
+        s_track_consecutive_invisible_maximum_)
+      continue;
+    // C. update
+    if (i == track_num) {
+      track_num++;
+    } else {
+      ObjectTrackPtr tmp = tracks_[i];
+      tracks_[i] = tracks_[track_num];
+      tracks_[track_num] = tmp;
+      track_num++;
+    }
+  }
+  // remove lost tracks
+  int no_removed = tracks_.size() - track_num;
+  for (size_t i = track_num; i < tracks_.size(); ++i) {
+    if (tracks_[i] != nullptr) {
+      delete (tracks_[i]);
+      tracks_[i] = nullptr;
+    }
+  }
+  tracks_.resize(track_num);
+  return no_removed;
+}
+
+}  // namespace perception
+}  // namespace apollo
diff --git a/modules/perception/lidar_pointcloud_tracking/object_track.h b/modules/perception/lidar_pointcloud_tracking/object_track.h
new file mode 100644
index 0000000..05a5083
--- /dev/null
+++ b/modules/perception/lidar_pointcloud_tracking/object_track.h
@@ -0,0 +1,192 @@
+#pragma once
+
+#include <deque>
+#include <memory>
+#include <queue>
+#include <string>
+#include <vector>
+
+#include "Eigen/Core"
+#include "boost/shared_ptr.hpp"
+
+#include "modules/perception/lidar_pointcloud_tracking/common/object.h"
+#include "modules/perception/lidar_pointcloud_tracking/base_filter.h"
+#include "modules/perception/lidar_pointcloud_tracking/tracked_object.h"
+
+namespace apollo {
+namespace perception {
+
+class ObjectTrack {
+ public:
+  explicit ObjectTrack(std::shared_ptr<TrackedObject> obj);
+  ~ObjectTrack();
+
+  // @brief set track cached history size maximum
+  // @param[IN] track_cached_history_size_maximum: track cached history size
+  // maximum
+  // @return true if set successfully, otherwise return false
+  static bool SetTrackCachedHistorySizeMaximum(
+      const int track_cached_history_size_maximum);
+
+  // @brief set acceleration noise maximum
+  // @param[IN] acceleration_noise_maximum: acceleration noise maximum
+  // @return true if set successfully, otherwise return false
+  static bool SetAccelerationNoiseMaximum(
+      const double acceleration_noise_maximum);
+
+  // @brief set speed noise maximum
+  // @param[IN] speed noise maximum: speed noise maximum
+  // @return true if set successfully, otherwise return false
+  static bool SetSpeedNoiseMaximum(const double speed_noise_maximum);
+
+  // @brief get next available track id
+  // @return next available track id
+  static int GetNextTrackId();
+
+  // @brief predict the state of track
+  // @param[IN] time_diff: time interval for predicting
+  // @return predicted states of track
+  Eigen::VectorXf Predict(const double time_diff);
+
+  // @brief update track with object
+  // @param[IN] new_object: recent detected object for current updating
+  // @param[IN] time_diff: time interval from last updating
+  // @return nothing
+  void UpdateWithObject(std::shared_ptr<TrackedObject>* new_object,
+                        const double time_diff);
+
+  // @brief update track without object
+  // @param[IN] time_diff: time interval from last updating
+  // @return nothing
+  void UpdateWithoutObject(const double time_diff);
+
+  // @brief update track without object with given predicted state
+  // @param[IN] predict_state: given predicted state of track
+  // @param[IN] time_diff: time interval from last updating
+  // @return nothing
+  void UpdateWithoutObject(const Eigen::VectorXf& predict_state,
+                           const double time_diff);
+
+ protected:
+  // @brief smooth velocity over track history
+  // @param[IN] new_object: new detected object for updating
+  // @param[IN] time_diff: time interval from last updating
+  // @return nothing
+  void SmoothTrackVelocity(const std::shared_ptr<TrackedObject>& new_object,
+                           const double time_diff);
+
+  // @brief smooth orientation over track history
+  // @return nothing
+  void SmoothTrackOrientation();
+
+  // @brief check whether track is static or not
+  // @param[IN] new_object: new detected object just updated
+  // @param[IN] time_diff: time interval between last two updating
+  // @return true if track is static, otherwise return false
+  bool CheckTrackStaticHypothesis(const std::shared_ptr<Object>& new_object,
+                                  const double time_diff);
+
+  // @brief sub strategy of checking whether track is static or not via
+  // considering the velocity angle change
+  // @param[IN] new_object: new detected object just updated
+  // @param[IN] time_diff: time interval between last two updating
+  // @return true if track is static, otherwise return false
+  bool CheckTrackStaticHypothesisByVelocityAngleChange(
+      const std::shared_ptr<Object>& new_object, const double time_diff);
+
+ private:
+  ObjectTrack();
+
+ public:
+  EIGEN_MAKE_ALIGNED_OPERATOR_NEW;
+  // algorithm setup
+  BaseFilter* filter_;
+
+  // basic info
+  int idx_;
+  int age_;
+  int total_visible_count_;
+  int consecutive_invisible_count_;
+  double period_;
+
+  std::shared_ptr<TrackedObject> current_object_;
+
+  // history
+  std::deque<std::shared_ptr<TrackedObject>> history_objects_;
+
+  // states
+  // NEED TO NOTICE: All the states would be collected mainly based on states
+  // of tracked object. Thus, update tracked object when you update the state
+  // of track !!!!!
+  bool is_static_hypothesis_;
+  Eigen::Vector3f belief_anchor_point_;
+  Eigen::Vector3f belief_velocity_;
+  Eigen::Matrix3f belief_velocity_uncertainty_;
+  Eigen::Vector3f belief_velocity_accelaration_;
+
+ private:
+  // global setup
+  static int s_track_idx_;
+  static int s_track_cached_history_size_maximum_;
+  static double s_speed_noise_maximum_;
+  static double s_acceleration_noise_maximum_;
+
+  //DISALLOW_COPY_AND_ASSIGN(ObjectTrack);
+};  // class ObjectTrack
+
+typedef ObjectTrack* ObjectTrackPtr;
+
+class ObjectTrackSet {
+ public:
+  ObjectTrackSet();
+  ~ObjectTrackSet();
+
+  // @brief set track consecutive invisible maximum
+  // @param[IN] track_consecutive_invisible_maximum: track consecutive
+  // invisible maximum
+  // @return true if set successfully, otherwise return false
+  static bool SetTrackConsecutiveInvisibleMaximum(
+      const int track_consecutive_invisible_maximum);
+
+  // @brief set track visible ratio minimum
+  // @param[IN] track_visible_ratio_minimum: track visible ratio minimum
+  // @return true if set successfully, otherwise return false
+  static bool SetTrackVisibleRatioMinimum(
+      const float track_visible_ratio_minimum);
+
+  // @brief get maintained tracks
+  // @return maintained tracks
+  std::vector<ObjectTrackPtr>& GetTracks() { return tracks_; }
+
+  // @brief get maintained tracks
+  // @return maintained tracks
+  const std::vector<ObjectTrackPtr>& GetTracks() const { return tracks_; }
+
+  // @brief get size of maintained tracks
+  // @return size of maintained tracks
+  int Size() const { return tracks_.size(); }
+
+  // @brief add track to current set of maintained tracks
+  // @param[IN] track: adding track
+  // @return nothing
+  void AddTrack(ObjectTrackPtr track) { tracks_.push_back(track); }
+
+  // @brief remove lost tracks
+  // @return number of removed tracks
+  int RemoveLostTracks();
+
+  // @brief clear maintained tracks
+  // @return nothing
+  void Clear();
+
+ public:
+  static int s_track_consecutive_invisible_maximum_;
+  static float s_track_visible_ratio_minimum_;
+
+ private:
+  std::vector<ObjectTrackPtr> tracks_;
+};  // class ObjectTrackSet
+
+}  // namespace perception
+}  // namespace apollo
+
diff --git a/modules/perception/lidar_pointcloud_tracking/track_object_distance.cc b/modules/perception/lidar_pointcloud_tracking/track_object_distance.cc
new file mode 100644
index 0000000..817c9f8
--- /dev/null
+++ b/modules/perception/lidar_pointcloud_tracking/track_object_distance.cc
@@ -0,0 +1,222 @@
+#include "track_object_distance.h"
+
+#include <algorithm>
+#include <vector>
+
+#include "modules/perception/lidar_pointcloud_tracking/common/geometry_util.h"
+
+namespace apollo {
+namespace perception {
+
+double TrackObjectDistance::s_location_distance_weight_ = 0.6;
+double TrackObjectDistance::s_direction_distance_weight_ = 0.2;
+double TrackObjectDistance::s_bbox_size_distance_weight_ = 0.1;
+double TrackObjectDistance::s_point_num_distance_weight_ = 0.1;
+double TrackObjectDistance::s_histogram_distance_weight_ = 0.5;
+
+bool TrackObjectDistance::SetLocationDistanceWeight(
+    const float location_distance_weight) {
+  if (location_distance_weight >= 0) {
+    s_location_distance_weight_ = location_distance_weight;
+    std::cout << "location distance weight of TrackObjectDistance is "
+          << s_location_distance_weight_;
+    return true;
+  }
+  std::cout << "invalid location distance weight of TrackeObjectDistance!";
+  return false;
+}
+
+bool TrackObjectDistance::SetDirectionDistanceWeight(
+    const float direction_distance_weight) {
+  if (direction_distance_weight >= 0) {
+    s_direction_distance_weight_ = direction_distance_weight;
+    std::cout << "direction distance weight of TrackObjectDistance is "
+          << s_direction_distance_weight_;
+    return true;
+  }
+  std::cout << "invalid direction distance weight of TrackObjectDistance!";
+  return false;
+}
+
+bool TrackObjectDistance::SetBboxSizeDistanceWeight(
+    const float bbox_size_distance_weight) {
+  if (bbox_size_distance_weight >= 0) {
+    s_bbox_size_distance_weight_ = bbox_size_distance_weight;
+    std::cout << "bbox size distance weight of TrackObjectDistance is "
+          << s_bbox_size_distance_weight_;
+    return true;
+  }
+  std::cout << "invalid bbox size distance weight of TrackObjectDistance!";
+  return false;
+}
+
+bool TrackObjectDistance::SetPointNumDistanceWeight(
+    const float point_num_distance_weight) {
+  if (point_num_distance_weight >= 0) {
+    s_point_num_distance_weight_ = point_num_distance_weight;
+    std::cout << "point num distance weight of TrackObjectDistance is "
+          << s_point_num_distance_weight_;
+    return true;
+  }
+  std::cout << "invalid point num distance weight of TrackObjectDistance!";
+  return false;
+}
+
+bool TrackObjectDistance::SetHistogramDistanceWeight(
+    const float histogram_distance_weight) {
+  if (histogram_distance_weight >= 0) {
+    s_histogram_distance_weight_ = histogram_distance_weight;
+    std::cout << "histogram distance weight of TrackObjectDistance is "
+          << s_histogram_distance_weight_;
+    return true;
+  }
+  std::cout << "invalid histogram distance weight of TrackObjectDistance!";
+  return false;
+}
+
+float TrackObjectDistance::ComputeDistance(
+    ObjectTrackPtr track, const Eigen::VectorXf& track_predict,
+    const std::shared_ptr<TrackedObject>& new_object) {
+  // Compute distance for given track & object
+  float location_distance =
+      ComputeLocationDistance(track, track_predict, new_object);//重心位置坐标距离差异
+  float direction_distance =
+      ComputeDirectionDistance(track, track_predict, new_object);//物体方向差异
+  float bbox_size_distance = ComputeBboxSizeDistance(track, new_object);//Bbox尺寸差异
+  float point_num_distance = ComputePointNumDistance(track, new_object);//点云数量差异
+  float histogram_distance = ComputeHistogramDistance(track, new_object);//直方图差异
+
+  float result_distance = s_location_distance_weight_ * location_distance +
+                          s_direction_distance_weight_ * direction_distance +
+                          s_bbox_size_distance_weight_ * bbox_size_distance +
+                          s_point_num_distance_weight_ * point_num_distance +
+                          s_histogram_distance_weight_ * histogram_distance;//各个差异*权值
+  return result_distance;
+}
+
+float TrackObjectDistance::ComputeLocationDistance(
+    ObjectTrackPtr track, const Eigen::VectorXf& track_predict,
+    const std::shared_ptr<TrackedObject>& new_object) {
+  // Compute location distance for given track & object
+  // range from 0 to positive infinity
+  const std::shared_ptr<TrackedObject>& last_object = track->current_object_;
+  Eigen::Vector2f measured_anchor_point = new_object->anchor_point.head(2);
+  Eigen::Vector2f predicted_anchor_point = track_predict.head(2);
+  Eigen::Vector2f measurement_predict_diff =
+      measured_anchor_point - predicted_anchor_point;
+  float location_distance = measurement_predict_diff.norm();
+
+  Eigen::Vector2f track_motion_dir = last_object->velocity.head(2);
+  float track_speed = track_motion_dir.norm();
+  track_motion_dir /= track_speed;
+  /* Assume location distance is generated from a normal distribution with
+   * symmetric variance. Modify its variance when track speed greater than
+   * a threshold. Penalize variance in the orthogonal direction of motion. */
+  if (track_speed > 2) {
+    Eigen::Vector2f track_motion_orthogonal_dir =
+        Eigen::Vector2f(track_motion_dir(1), -track_motion_dir(0));
+    float motion_dir_distance =
+        track_motion_dir(0) * measurement_predict_diff(0) +
+        track_motion_dir(1) * measurement_predict_diff(1);
+    float motion_orthogonal_dir_distance =
+        track_motion_orthogonal_dir(0) * measurement_predict_diff(0) +
+        track_motion_orthogonal_dir(1) * measurement_predict_diff(1);
+    location_distance = sqrt(motion_dir_distance * motion_dir_distance * 0.25 +
+                             motion_orthogonal_dir_distance *
+                                 motion_orthogonal_dir_distance * 4);
+  }
+  return location_distance;
+}
+
+float TrackObjectDistance::ComputeDirectionDistance(
+    ObjectTrackPtr track, const Eigen::VectorXf& track_predict,
+    const std::shared_ptr<TrackedObject>& new_object) {
+  // Compute direction distance for given track & object
+  // range from 0 to 2
+  const std::shared_ptr<TrackedObject>& last_object = track->current_object_;
+  Eigen::Vector3f old_anchor_point = last_object->anchor_point;
+  Eigen::Vector3f new_anchor_point = new_object->anchor_point;
+  Eigen::Vector3f anchor_point_shift = new_anchor_point - old_anchor_point;
+  anchor_point_shift(2) = 0;
+  Eigen::Vector3f predicted_track_motion = track_predict.head(6).tail(3);
+  predicted_track_motion(2) = 0;
+
+  double cos_theta = 0.994;  // average cos
+  if (!anchor_point_shift.head(2).isZero() &&
+      !predicted_track_motion.head(2).isZero()) {
+    cos_theta = VectorCosTheta2dXy(predicted_track_motion, anchor_point_shift);
+  }
+  float direction_distance = -cos_theta + 1.0;
+  return direction_distance;
+}
+
+float TrackObjectDistance::ComputeBboxSizeDistance(
+    ObjectTrackPtr track, const std::shared_ptr<TrackedObject>& new_object) {
+  // Compute bbox size distance for given track & object
+  // range from 0 to 1
+  const std::shared_ptr<TrackedObject>& last_object = track->current_object_;
+  Eigen::Vector3f old_bbox_dir = last_object->direction;
+  Eigen::Vector3f new_bbox_dir = new_object->direction;
+  Eigen::Vector3f old_bbox_size = last_object->size;
+  Eigen::Vector3f new_bbox_size = new_object->size;
+
+  float size_distance = 0.0;
+  double dot_val_00 = fabs(old_bbox_dir(0) * new_bbox_dir(0) +
+                           old_bbox_dir(1) * new_bbox_dir(1));
+  double dot_val_01 = fabs(old_bbox_dir(0) * new_bbox_dir(1) -
+                           old_bbox_dir(1) * new_bbox_dir(0));
+  bool bbox_dir_close = dot_val_00 > dot_val_01;
+
+  if (bbox_dir_close) {
+    float diff_1 = fabs(old_bbox_size(0) - new_bbox_size(0)) /
+                   std::max(old_bbox_size(0), new_bbox_size(0));
+    float diff_2 = fabs(old_bbox_size(1) - new_bbox_size(1)) /
+                   std::max(old_bbox_size(1), new_bbox_size(1));
+    size_distance = std::min(diff_1, diff_2);
+  } else {
+    float diff_1 = fabs(old_bbox_size(0) - new_bbox_size(1)) /
+                   std::max(old_bbox_size(0), new_bbox_size(1));
+    float diff_2 = fabs(old_bbox_size(1) - new_bbox_size(0)) /
+                   std::max(old_bbox_size(1), new_bbox_size(0));
+    size_distance = std::min(diff_1, diff_2);
+  }
+  return size_distance;
+}
+
+float TrackObjectDistance::ComputePointNumDistance(
+    ObjectTrackPtr track, const std::shared_ptr<TrackedObject>& new_object) {
+  // Compute point num distance for given track & object
+  // range from 0 and 1
+  const std::shared_ptr<TrackedObject>& last_object = track->current_object_;
+  int old_point_number = last_object->object_ptr->cloud->size();
+  int new_point_number = new_object->object_ptr->cloud->size();
+  float point_num_distance = std::abs(old_point_number - new_point_number) *
+                             1.0f /
+                             std::max(old_point_number, new_point_number);
+  return point_num_distance;
+}
+
+float TrackObjectDistance::ComputeHistogramDistance(
+    ObjectTrackPtr track, const std::shared_ptr<TrackedObject>& new_object) {
+  // Compute histogram distance for given track & object
+  // range from 0 to 3
+  const std::shared_ptr<TrackedObject>& last_object = track->current_object_;
+  std::vector<float>& old_object_shape_features =
+      last_object->object_ptr->shape_features;
+  std::vector<float>& new_object_shape_features =
+      new_object->object_ptr->shape_features;
+  if (old_object_shape_features.size() != new_object_shape_features.size()) {
+    std::cout << "sizes of compared features not matched. TrackObjectDistance";
+    return FLT_MAX;
+  }
+
+  float histogram_distance = 0.0;
+  for (size_t i = 0; i < old_object_shape_features.size(); ++i) {
+    histogram_distance +=
+        std::fabs(old_object_shape_features[i] - new_object_shape_features[i]);
+  }
+  return histogram_distance;
+}
+
+}  // namespace perception
+}  // namespace apollo
diff --git a/modules/perception/lidar_pointcloud_tracking/track_object_distance.h b/modules/perception/lidar_pointcloud_tracking/track_object_distance.h
new file mode 100644
index 0000000..e9ed626
--- /dev/null
+++ b/modules/perception/lidar_pointcloud_tracking/track_object_distance.h
@@ -0,0 +1,111 @@
+#pragma once
+
+#include <memory>
+#include <string>
+
+#include "Eigen/Core"
+
+#include "modules/perception/lidar_pointcloud_tracking/object_track.h"
+#include "modules/perception/lidar_pointcloud_tracking/tracked_object.h"
+
+namespace apollo {
+namespace perception {
+
+class TrackObjectDistance {
+ public:
+  // @brief set weight of location dist for all the track object distance
+  // objects
+  // @param[IN] location_distance_weight: weight of location dist
+  // @return true if set successfully, otherwise return false
+  static bool SetLocationDistanceWeight(const float location_distance_weight);
+
+  // @brief set weight of direction dist for all the track object distance
+  // objects
+  // @param[IN] direction_distance_weight: weight of direction dist
+  // @return true if set successfully, otherwise return false
+  static bool SetDirectionDistanceWeight(const float direction_distance_weight);
+
+  // @brief set weight of bbox size dist for all the track object distance
+  // objects
+  // @param[IN] bbox_size_distance_weight: weight of bbox size dist
+  // @return true if set successfully, otherwise return false
+  static bool SetBboxSizeDistanceWeight(const float bbox_size_distance_weight);
+
+  // @brief set weight of point num dist for all the track object distance
+  // objects
+  // @param[IN] point_num_distance_weight: weight of point num dist
+  // @return true if set successfully, otherwise return false
+  static bool SetPointNumDistanceWeight(const float point_num_distance_weight);
+
+  // @brief set weight of histogram dist for all the track object distance
+  // objects
+  // @param[IN] weight_histogram_dist: weight of histogram dist
+  // @return true if set successfully, otherwise return false
+  static bool SetHistogramDistanceWeight(const float histogram_distance_weight);
+
+  // @brief compute distance for given track & object
+  // @param[IN] track: track for <track, object> distance computing
+  // @param[IN] track_predict: predicted state of given track
+  // @param[IN] new_object: recently detected object
+  // @return computed <track, object> distance
+  static float ComputeDistance(
+      ObjectTrackPtr track, const Eigen::VectorXf& track_predict,
+      const std::shared_ptr<TrackedObject>& new_object);
+
+  std::string Name() const { return "TrackObjectDistance"; }
+
+ private:
+  // @brief compute location distance for given track & object
+  // @param[IN] track: track for <track, object> distance computing
+  // @param[IN] track_predict: predicted state of given track
+  // @param[IN] new_object: recently detected object
+  // @return location distance of given <track, object>
+  static float ComputeLocationDistance(
+      ObjectTrackPtr track, const Eigen::VectorXf& track_predict,
+      const std::shared_ptr<TrackedObject>& new_object);
+
+  // @brief compute direction distance for given track & object
+  // @param[IN] track: track for <track, object> distance computing
+  // @param[IN] track_predict: predicted state of given track
+  // @param[IN] new_object: recently detected object
+  // @return direction distance of given <track, object>
+  static float ComputeDirectionDistance(
+      ObjectTrackPtr track, const Eigen::VectorXf& track_predict,
+      const std::shared_ptr<TrackedObject>& new_object);
+
+  // @brief compute bbox size distance for given track & object
+  // @param[IN] track: track for <track, object> distance computing
+  // @param[IN] new_object: recently detected object
+  // @return bbox size distance of given <track, object>
+  static float ComputeBboxSizeDistance(
+      ObjectTrackPtr track, const std::shared_ptr<TrackedObject>& new_object);
+
+  // @brief compute point num distance for given track & object
+  // @param[IN] track: track for <track, object> distance computing
+  // @param[IN] new_object: recently detected object
+  // @return point num distance of given <track, object>
+  static float ComputePointNumDistance(
+      ObjectTrackPtr track, const std::shared_ptr<TrackedObject>& new_object);
+
+  // @brief compute histogram distance for given track & object
+  // @param[IN] track: track for <track, object> distance computing
+  // @param[IN] new_object: recently detected object
+  // @return histogram distance of given <track, object>
+  static float ComputeHistogramDistance(
+      ObjectTrackPtr track, const std::shared_ptr<TrackedObject>& new_object);
+
+ protected:
+  // distance weights
+  static double s_location_distance_weight_;
+  static double s_direction_distance_weight_;
+  static double s_bbox_size_distance_weight_;
+  static double s_point_num_distance_weight_;
+  static double s_histogram_distance_weight_;
+
+ private:
+  //DISALLOW_COPY_AND_ASSIGN(TrackObjectDistance);
+};  // class TrackObjectDistance
+
+}  // namespace perception
+}  // namespace apollo
+
diff --git a/modules/perception/lidar_pointcloud_tracking/tracked_object.cc b/modules/perception/lidar_pointcloud_tracking/tracked_object.cc
new file mode 100644
index 0000000..124df93
--- /dev/null
+++ b/modules/perception/lidar_pointcloud_tracking/tracked_object.cc
@@ -0,0 +1,34 @@
+#include "modules/perception/lidar_pointcloud_tracking/tracked_object.h"
+
+#include "modules/perception/lidar_pointcloud_tracking/common/geometry_util.h"
+
+namespace apollo {
+namespace perception {
+
+TrackedObject::TrackedObject(std::shared_ptr<Object> obj_ptr)
+    : object_ptr(obj_ptr) {
+  if (object_ptr != nullptr) {
+    barycenter = GetCloudBarycenter<apollo::perception::pcl_util::Point>(
+                     object_ptr->cloud)
+                     .cast<float>();
+    center = object_ptr->center.cast<float>();
+    size = Eigen::Vector3f(object_ptr->length, object_ptr->width,
+                           object_ptr->height);
+    direction = object_ptr->direction.cast<float>();
+    lane_direction = Eigen::Vector3f::Zero();
+    anchor_point = barycenter;
+    velocity = Eigen::Vector3f::Zero();
+    acceleration = Eigen::Vector3f::Zero();
+    //type = object_ptr->type;
+    velocity_uncertainty = Eigen::Matrix3f::Identity() * 5;
+  }
+}
+
+void TrackedObject::clone(const TrackedObject& rhs) {
+  *this = rhs;
+  object_ptr.reset(new Object());
+  object_ptr->clone(*rhs.object_ptr);
+}
+
+}  // namespace perception
+}  // namespace apollo
diff --git a/modules/perception/lidar_pointcloud_tracking/tracked_object.h b/modules/perception/lidar_pointcloud_tracking/tracked_object.h
new file mode 100644
index 0000000..6180255
--- /dev/null
+++ b/modules/perception/lidar_pointcloud_tracking/tracked_object.h
@@ -0,0 +1,52 @@
+#pragma once
+
+#include <memory>
+#include <string>
+#include <vector>
+
+//#include "Eigen/Core"
+#include "modules/perception/lidar_pointcloud_tracking/common/object.h"
+
+namespace apollo {
+namespace perception {
+
+struct TrackedObject {
+  EIGEN_MAKE_ALIGNED_OPERATOR_NEW
+  /* NEED TO NOTICE: All the states of track would be collected mainly based on
+   * the states of tracked object. Thus, update tracked object's state when you
+   * update the state of track !!! */
+  TrackedObject() = default;
+  explicit TrackedObject(std::shared_ptr<Object> obj_ptr);
+
+  // deep copy (copy point clouds)
+  void clone(const TrackedObject& rhs);
+
+  // cloud
+  // store transformed object before tracking
+  std::shared_ptr<Object> object_ptr;
+
+  Eigen::Vector3f barycenter;
+
+  // bbox
+  Eigen::Vector3f center;
+  Eigen::Vector3f size;
+  Eigen::Vector3f direction;
+  Eigen::Vector3f lane_direction;
+
+  // states
+  Eigen::Vector3f anchor_point;
+  Eigen::Vector3f velocity;
+  Eigen::Matrix3f velocity_uncertainty;
+  Eigen::Vector3f acceleration;
+
+  // class type
+  //ObjectType type;
+
+  // association distance
+  // range from 0 to association_score_maximum
+  float association_score = 0.0f;
+};  // struct TrackedObject
+
+}  // namespace perception
+}  // namespace apollo
+
diff --git a/modules/perception/lidar_pointcloudcluster/BUILD b/modules/perception/lidar_pointcloudcluster/BUILD
old mode 100644
new mode 100755
index 316cdd7..8536691
--- a/modules/perception/lidar_pointcloudcluster/BUILD
+++ b/modules/perception/lidar_pointcloudcluster/BUILD
@@ -1,11 +1,11 @@
-load("@rules_cc//cc:defs.bzl", "cc_library")
+load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library")
 load("//tools:cpplint.bzl", "cpplint")
 
 package(default_visibility = ["//visibility:public"])
 
 cc_binary(
     name = "lidar_pointcloudcluster.so",
-    linkopts = ["-shared"],
+    linkshared = True,
     linkstatic = False,
     deps = [":lidar_pointcloudcluster_lib"],
 )
@@ -18,6 +18,7 @@ cc_library(
         "//cyber",
         "//modules/drivers/proto:pointcloud_cc_proto",
         "//modules/perception/proto:obst_box_cc_proto",
+        "//modules/perception/proto:lidar_pointcloud_conf_cc_proto",
         "@eigen",
         "@pcl",
         "@local_config_vtk//:vtk",
diff --git a/modules/perception/lidar_pointcloudcluster/lidar_pointcloudcluster.cc b/modules/perception/lidar_pointcloudcluster/lidar_pointcloudcluster.cc
old mode 100644
new mode 100755
index e97371b..ccc409d
--- a/modules/perception/lidar_pointcloudcluster/lidar_pointcloudcluster.cc
+++ b/modules/perception/lidar_pointcloudcluster/lidar_pointcloudcluster.cc
@@ -5,9 +5,13 @@ boost::shared_ptr<pcl::visualization::PCLVisualizer> viewer(
 
 bool Lidar_pointcloudcluster::Init() {
   AINFO << "Commontest component init";
+  if (!GetProtoConfig(&lidar_pointcloud_conf_)) {
+    AERROR << "Unable to load lidar pointcloud conf file: " << ConfigFilePath();
+    return false;
+  }
 
   obst_writer = node_->CreateWriter<apollo::perception::Obstacles>(
-      "/diamond/perception/Obstacles");
+    lidar_pointcloud_conf_.obst_output_channel());
 
   minpoint = Eigen::Vector4f(-32, -15, -2, 1);
   maxpoint = Eigen::Vector4f(20, 15, 2, 1);
@@ -37,8 +41,8 @@ Lidar_pointcloudcluster::filter_and_segment(
 
   vector<int> indices;
   pcl::CropBox<pcl::PointXYZI> roof(true);
-  roof.setMin(Eigen::Vector4f(-12, -4.7, -1.0, 1));
-  roof.setMax(Eigen::Vector4f(0.0, 4.7, 1.0, 1));
+  roof.setMin(Eigen::Vector4f(-12, -1.9, -1.0, 1));
+  roof.setMax(Eigen::Vector4f(0.0, 1.9, 1.0, 1));
   roof.setInputCloud(cloudRegion);
   roof.filter(indices);
 
@@ -66,6 +70,7 @@ Lidar_pointcloudcluster::filter_and_segment(
   seg.setDistanceThreshold(distanceThreshold);
   seg.setInputCloud(cloudRegion);
   seg.segment(*inliers_seg, *coefficients);
+
   // segment obstacles
   pcl::PointCloud<pcl::PointXYZI>::Ptr obstCloud(
       new pcl::PointCloud<pcl::PointXYZI>());
@@ -108,28 +113,26 @@ bool Lidar_pointcloudcluster::Proc(
   cloud.height = 1;
   cloud.is_dense = false;
   cloud.points.resize(cloud.width * cloud.height);
-  for (size_t i = 0; i < msg1->point_size(); ++i) {
+  for (int i = 0; i < msg1->point_size(); ++i) {
     cloud.points[i].x = msg1->point(i).x();
     cloud.points[i].y = msg1->point(i).y();
     cloud.points[i].z = msg1->point(i).z();
     cloud.points[i].intensity = msg1->point(i).intensity();
   }
 
-  vector<int> nan_cloud_inliers;
-  pcl::removeNaNFromPointCloud(*pcloud, *pcloud, nan_cloud_inliers);
   pcloud = cloud.makeShared();
-  //  save point_cloud to pcd file
-  //  pcl::io::savePCDFileASCII("/apollo/write_pcd_test.pcd",*pcloud);
+  vector<int> nan_cloud_inliers;
+  pcl::removeNaNFromPointCloud(*pcloud, *pcloud, nan_cloud_inliers); 
 
-  // rear point cloud
+//rear point cloud
   pcl::PointCloud<pcl::PointXYZI>::Ptr pcloud_rear(
       new pcl::PointCloud<pcl::PointXYZI>);
   pcl::PointCloud<pcl::PointXYZI> cloud_rear;
   cloud_rear.width = msg2->point_size();
   cloud_rear.height = 1;
   cloud_rear.is_dense = false;
-  cloud_rear.points.resize(cloud_rear.width * cloud_rear.height);
-  for (size_t i = 0; i < msg2->point_size(); ++i) {
+  cloud_rear.points.resize(cloud_rear.width * cloud_rear.height);  
+  for (int i = 0; i < msg2->point_size(); ++i) {
     cloud_rear.points[i].x = -(msg2->point(i).x()) - 12;
     cloud_rear.points[i].y = -msg2->point(i).y();
     cloud_rear.points[i].z = msg2->point(i).z();
@@ -138,15 +141,20 @@ bool Lidar_pointcloudcluster::Proc(
 
   pcloud_rear = cloud_rear.makeShared();
   vector<int> nan_cloud_inliers_rear;
-  pcl::removeNaNFromPointCloud(*pcloud_rear, *pcloud_rear,
-                               nan_cloud_inliers_rear);
-
-  // point cloud data fusion
-  std::pair<pcl::PointCloud<pcl::PointXYZI>::Ptr,
-            pcl::PointCloud<pcl::PointXYZI>::Ptr>
+  pcl::removeNaNFromPointCloud(*pcloud_rear, *pcloud_rear, nan_cloud_inliers_rear);
+  
+  if(lidar_pointcloud_conf_.save_pcd_file()) {
+    pcl::io::savePCDFileASCII(lidar_pointcloud_conf_.save_pcd_path_front(),*pcloud);
+    pcl::io::savePCDFileASCII(lidar_pointcloud_conf_.save_pcd_path_rear(),*pcloud_rear);
+  } 
+
+//point cloud data fusion
+  std::pair<pcl::PointCloud<pcl::PointXYZI>::Ptr, 
+            pcl::PointCloud<pcl::PointXYZI>::Ptr> 
       segResult_front = filter_and_segment(pcloud);
-  std::pair<pcl::PointCloud<pcl::PointXYZI>::Ptr,
-            pcl::PointCloud<pcl::PointXYZI>::Ptr>
+
+  std::pair<pcl::PointCloud<pcl::PointXYZI>::Ptr, 
+            pcl::PointCloud<pcl::PointXYZI>::Ptr> 
       segResult_rear = filter_and_segment(pcloud_rear);
 
   pcl::PointCloud<pcl::PointXYZI>::Ptr pcloud_plane(
@@ -227,6 +235,7 @@ bool Lidar_pointcloudcluster::Proc(
     msg_box->set_x_max(box.x_max);
     msg_box->set_y_max(box.y_max);
     msg_box->set_z_max(box.z_max);
+    msg_box->set_yaw(0.0);
 
     string cube = "box" + std::to_string(clusterId);
     string cubeFill = "boxFill" + std::to_string(clusterId);
@@ -257,7 +266,6 @@ bool Lidar_pointcloudcluster::Proc(
 
     ++clusterId;
   }
-  msg_obstacles->set_box_num(clusterId);
   obst_writer->Write(msg_obstacles);
   viewer->addPointCloud<pcl::PointXYZI>(pcloud_obst, "init cloud");
   viewer->spinOnce();
diff --git a/modules/perception/lidar_pointcloudcluster/lidar_pointcloudcluster.h b/modules/perception/lidar_pointcloudcluster/lidar_pointcloudcluster.h
old mode 100644
new mode 100755
index e1ff0a4..79fbad9
--- a/modules/perception/lidar_pointcloudcluster/lidar_pointcloudcluster.h
+++ b/modules/perception/lidar_pointcloudcluster/lidar_pointcloudcluster.h
@@ -1,47 +1,41 @@
-#include <chrono>
+#pragma once
 #include <iostream>
 #include <string>
-#include <unordered_set>
-#include <vector>
 
-#include <pcl/common/common.h>
-#include <pcl/common/transforms.h>
-#include <pcl/filters/crop_box.h>
-#include <pcl/filters/extract_indices.h>
-#include <pcl/filters/voxel_grid.h>
-#include <pcl/io/pcd_io.h>
-#include <pcl/kdtree/kdtree.h>
-#include <pcl/point_cloud.h>
-#include <pcl/point_types.h>
-#include <pcl/segmentation/extract_clusters.h>
-#include <pcl/segmentation/sac_segmentation.h>
-#include <pcl/visualization/cloud_viewer.h>
-#include <pcl/visualization/pcl_visualizer.h>
-#include <boost/thread/thread.hpp>
+#include "pcl/common/common.h"
+#include "pcl/filters/crop_box.h"
+#include "pcl/filters/extract_indices.h"
+#include "pcl/filters/voxel_grid.h"
+#include "pcl/io/pcd_io.h"
+#include "pcl/kdtree/kdtree.h"
+#include "pcl/point_cloud.h"
+#include "pcl/point_types.h"
+#include "pcl/segmentation/extract_clusters.h"
+#include "pcl/segmentation/sac_segmentation.h"
+#include "pcl/visualization/pcl_visualizer.h"
 
 #include "cyber/cyber.h"
 #include "modules/drivers/proto/pointcloud.pb.h"
 #include "modules/perception/proto/obst_box.pb.h"
+#include "modules/perception/proto/lidar_pointcloud_conf.pb.h"
 
 using namespace std;
 using apollo::cyber::Component;
 using apollo::cyber::ComponentBase;
 
 class Lidar_pointcloudcluster : public Component<apollo::drivers::PointCloud,
-                                                 apollo::drivers::PointCloud> {
+                                                apollo::drivers::PointCloud> {
  public:
   bool Init() override;
-  std::pair<pcl::PointCloud<pcl::PointXYZI>::Ptr,
+  
+  std::pair<pcl::PointCloud<pcl::PointXYZI>::Ptr, 
             pcl::PointCloud<pcl::PointXYZI>::Ptr>
   filter_and_segment(const pcl::PointCloud<pcl::PointXYZI>::Ptr origin_cloud);
-  bool Proc(const std::shared_ptr<apollo::drivers::PointCloud>& msg1,
+
+  bool Proc(const std::shared_ptr<apollo::drivers::PointCloud>& msg1, 
             const std::shared_ptr<apollo::drivers::PointCloud>& msg2) override;
 
  private:
-  //  pcl::visualization::CloudViewer viewer("point cloud viewer");
-  //  boost::shared_ptr<pcl::visualization::PCLVisualizer> viewer(new
-  //  pcl::visualization::PCLVisualizer("viewer test"));
-
   // init para
 
   struct Color {
@@ -67,7 +61,9 @@ class Lidar_pointcloudcluster : public Component<apollo::drivers::PointCloud,
   float clusterTolerance = 0.8;
   int minsize = 7;
   int maxsize = 800;
-  std::shared_ptr<apollo::cyber::Writer<apollo::perception::Obstacles>>
-      obst_writer;
+  std::shared_ptr<apollo::cyber::Writer<apollo::perception::Obstacles>> obst_writer;
+  apollo::perception::LidarPointcloudConf lidar_pointcloud_conf_;
+
 };
+
 CYBER_REGISTER_COMPONENT(Lidar_pointcloudcluster)
diff --git a/modules/perception/models/point_pillars/pfe.pt b/modules/perception/models/point_pillars/pfe.pt
new file mode 100755
index 0000000..b813cca
Binary files /dev/null and b/modules/perception/models/point_pillars/pfe.pt differ
diff --git a/modules/perception/models/point_pillars/rpn.onnx b/modules/perception/models/point_pillars/rpn.onnx
new file mode 100755
index 0000000..1579fc2
Binary files /dev/null and b/modules/perception/models/point_pillars/rpn.onnx differ
diff --git a/modules/perception/models/point_pillars_core/pfe.onnx b/modules/perception/models/point_pillars_core/pfe.onnx
new file mode 100755
index 0000000..ca1c29a
Binary files /dev/null and b/modules/perception/models/point_pillars_core/pfe.onnx differ
diff --git a/modules/perception/models/point_pillars_core/rpn.onnx b/modules/perception/models/point_pillars_core/rpn.onnx
new file mode 100755
index 0000000..e057e29
Binary files /dev/null and b/modules/perception/models/point_pillars_core/rpn.onnx differ
diff --git a/modules/perception/proto/BUILD b/modules/perception/proto/BUILD
index 9187a14..08ed665 100644
--- a/modules/perception/proto/BUILD
+++ b/modules/perception/proto/BUILD
@@ -22,4 +22,42 @@ py_proto_library(
     deps = [
         ":obst_box_proto",
     ],
-)
\ No newline at end of file
+)
+
+cc_proto_library(
+    name = "lidar_pointcloud_conf_cc_proto",
+    deps = [
+        ":lidar_pointcloud_conf_proto",
+    ],
+)
+
+proto_library(
+    name = "lidar_pointcloud_conf_proto",
+    srcs = ["lidar_pointcloud_conf.proto"],
+)
+
+py_proto_library(
+    name = "lidar_pointcloud_conf_py_pb2",
+    deps = [
+        ":lidar_pointcloud_conf_proto",
+    ],
+)
+
+cc_proto_library(
+    name = "tracker_config_cc_proto",
+    deps = [
+        ":tracker_config_proto",
+    ],
+)
+
+proto_library(
+    name = "tracker_config_proto",
+    srcs = ["tracker_config.proto"],
+)
+
+py_proto_library(
+    name = "tracker_config_py_pb2",
+    deps = [
+        ":tracker_config_proto",
+    ],
+)
diff --git a/modules/perception/proto/lidar_pointcloud_conf.proto b/modules/perception/proto/lidar_pointcloud_conf.proto
new file mode 100644
index 0000000..98bacf4
--- /dev/null
+++ b/modules/perception/proto/lidar_pointcloud_conf.proto
@@ -0,0 +1,11 @@
+syntax = "proto2";
+
+package apollo.perception;
+
+message LidarPointcloudConf {
+  optional string obst_output_channel = 1;  
+  optional bool save_pcd_file = 2 [default = false];
+  optional string save_pcd_path_front = 3;
+  optional string save_pcd_path_rear = 4;
+};
+
diff --git a/modules/perception/proto/obst_box.proto b/modules/perception/proto/obst_box.proto
index ba8df7f..e1d9681 100644
--- a/modules/perception/proto/obst_box.proto
+++ b/modules/perception/proto/obst_box.proto
@@ -10,9 +10,9 @@ message Obst_box {
   optional float x_max = 5;
   optional float y_max = 6;
   optional float z_max = 7;
+  optional float yaw = 8;
 };
 
 message Obstacles {
     repeated Obst_box obstacles = 1;
-    optional int32 box_num = 2; 
 }
diff --git a/modules/perception/proto/tracker_config.proto b/modules/perception/proto/tracker_config.proto
new file mode 100644
index 0000000..b7f56d3
--- /dev/null
+++ b/modules/perception/proto/tracker_config.proto
@@ -0,0 +1,38 @@
+syntax = "proto2";
+
+package apollo.perception.tracker_config;
+
+message ModelConfigs {
+  optional string name = 1 [ default = "HmObjectTracker" ];
+  optional string version = 2 [ default = "1.1.0" ];
+
+  enum MatcherType {
+    HUNGARIAN_MATCHER = 1;
+  }
+  optional MatcherType matcher_method = 3 [ default = HUNGARIAN_MATCHER ];
+
+  enum FilterType {
+    KALMAN_FILTER = 1;
+  }
+  optional FilterType filter_method = 4 [ default = KALMAN_FILTER ];
+  optional int32 track_cached_history_size_maximum = 5 [ default = 5 ];
+  optional int32 track_consecutive_invisible_maximum = 6 [ default = 1 ];
+  optional float track_visible_ratio_minimum = 7 [ default = 0.6 ];
+  optional int32 collect_age_minimum = 8 [ default = 0 ];
+  optional int32 collect_consecutive_invisible_maximum = 9 [ default = 0 ];
+  optional float acceleration_noise_maximum = 10 [ default = 5 ];
+  optional float speed_noise_maximum = 11 [ default = 0.4 ];
+  optional float match_distance_maximum = 12 [ default = 4.0 ];
+  optional float location_distance_weight = 13 [ default =  0.6 ];
+  optional float direction_distance_weight = 14 [ default = 0.2 ];
+  optional float bbox_size_distance_weight = 15 [ default = 0.1 ];
+  optional float point_num_distance_weight = 16 [ default = 0.1 ];
+  optional float histogram_distance_weight = 17 [ default = 0.5 ];
+  optional int32 histogram_bin_size = 18 [ default = 10 ];
+  optional bool use_adaptive = 19 [ default = true ];
+  optional float measurement_noise = 20 [ default = 0.4 ];
+  optional float initial_velocity_noise = 21 [ default = 5.0];
+  optional float xy_propagation_noise = 22 [ default = 10.0 ];
+  optional float z_propagation_noise = 23 [ default = 10.0 ];
+  optional float breakdown_threshold_maximum = 24 [ default = 10.0 ];
+}
diff --git a/tools/workspace.bzl b/tools/workspace.bzl
old mode 100644
new mode 100755