diff --git a/apps/lidar_odometry_step_1/lidar_odometry_utils_optimizers.cpp b/apps/lidar_odometry_step_1/lidar_odometry_utils_optimizers.cpp index d13e97ae..fef3b255 100644 --- a/apps/lidar_odometry_step_1/lidar_odometry_utils_optimizers.cpp +++ b/apps/lidar_odometry_step_1/lidar_odometry_utils_optimizers.cpp @@ -3,9 +3,8 @@ #include #include #include -#include -#include #include +#include #include @@ -1452,12 +1451,11 @@ static void add_indoor_hessian_contribution( const Eigen::Vector3d& point_source, const TaitBryanPoseWithTrig& pose_trig, const Eigen::Vector3d& viewport, - const int matrix_offset, const bool ablation_study_use_view_point_and_normal_vectors, const bool ablation_study_use_planarity, const bool ablation_study_use_norm, - Eigen::MatrixXd& AtPAndt, - Eigen::MatrixXd& AtPBndt) + Eigen::Matrix& out_AtPA, + Eigen::Matrix& out_AtPB) { // Use precomputed cov_inverse from update_rgd const Eigen::Matrix3d& infm = indoor_bucket.cov_inverse; @@ -1551,8 +1549,8 @@ static void add_indoor_hessian_contribution( AtPB *= w; } - AtPAndt.block<6, 6>(matrix_offset, matrix_offset) += AtPA; - AtPBndt.block<6, 1>(matrix_offset, 0) -= AtPB; + out_AtPA += AtPA; + out_AtPB += AtPB; } // Helper to process outdoor bucket contribution @@ -1561,11 +1559,10 @@ static void add_outdoor_hessian_contribution( const Eigen::Vector3d& point_source, const TaitBryanPoseWithTrig& pose_trig, const Eigen::Vector3d& viewport, - const int matrix_offset, const bool ablation_study_use_view_point_and_normal_vectors, const bool ablation_study_use_planarity, - Eigen::MatrixXd& AtPAndt, - Eigen::MatrixXd& AtPBndt) + Eigen::Matrix& out_AtPA, + Eigen::Matrix& out_AtPB) { // Use precomputed cov_inverse from update_rgd const Eigen::Matrix3d& infm = outdoor_bucket.cov_inverse; @@ -1652,12 +1649,13 @@ static void add_outdoor_hessian_contribution( AtPB *= planarity; } - AtPAndt.block<6, 6>(matrix_offset, matrix_offset) += AtPA; - AtPBndt.block<6, 1>(matrix_offset, 0) -= AtPB; + out_AtPA += AtPA; + out_AtPB += AtPB; } // Unified hessian function that handles indoor and optionally outdoor contributions static void compute_hessian( + size_t point_index, const Point3Di& point, const std::vector& trajectory, const std::vector& tait_bryan_poses, @@ -1670,9 +1668,9 @@ static void compute_hessian( const bool ablation_study_use_view_point_and_normal_vectors, const bool ablation_study_use_planarity, const bool ablation_study_use_norm, - Eigen::MatrixXd& AtPAndt, - Eigen::MatrixXd& AtPBndt, - LookupStats& stats, + std::vector>& point_AtPA, + std::vector>& point_AtPB, + std::vector& per_point_stats, const bool& ablation_study_use_threshold_outer_rgd, const double& convergence_result, const double& convergence_delta_threshold_outer_rgd) @@ -1689,29 +1687,25 @@ static void compute_hessian( const Eigen::Vector3d point_global = trajectory[point.index_pose] * point.point; const Eigen::Vector3d& point_source = point.point; const TaitBryanPoseWithTrig& pose_trig = tait_bryan_poses[point.index_pose]; - const int matrix_offset = point.index_pose * 6; const Eigen::Vector3d viewport = trajectory[point.index_pose].translation(); // Indoor contribution (independent) const auto indoor_key = get_rgd_index_3d(point_global, bucket_size_indoor); const auto indoor_it = buckets_indoor.find(indoor_key); - ++stats.indoor_lookups; + ++per_point_stats[point_index].indoor_lookups; - const NDT::Bucket* indoor_bucket = nullptr; if (indoor_it != buckets_indoor.end()) { - indoor_bucket = &indoor_it->second; add_indoor_hessian_contribution( - *indoor_bucket, + indoor_it->second, point_source, pose_trig, viewport, - matrix_offset, ablation_study_use_view_point_and_normal_vectors, ablation_study_use_planarity, ablation_study_use_norm, - AtPAndt, - AtPBndt); + point_AtPA[point_index], + point_AtPB[point_index]); } // Outdoor contribution (independent, only when hierarchical mode and range >= 5.0) @@ -1736,7 +1730,7 @@ static void compute_hessian( { const auto outdoor_key = get_rgd_index_3d(point_global, bucket_size_outdoor); const auto outdoor_it = buckets_outdoor.find(outdoor_key); - ++stats.outdoor_lookups; + ++per_point_stats[point_index].outdoor_lookups; if (outdoor_it != buckets_outdoor.end()) { @@ -1745,11 +1739,10 @@ static void compute_hessian( point_source, pose_trig, viewport, - matrix_offset, ablation_study_use_view_point_and_normal_vectors, ablation_study_use_planarity, - AtPAndt, - AtPBndt); + point_AtPA[point_index], + point_AtPB[point_index]); } } } @@ -1824,85 +1817,52 @@ void optimize_lidar_odometry( UTL_PROFILER_END(pre_hessian); UTL_PROFILER_BEGIN(hessian_compute, "hessian_compute"); - if (multithread) - { - using MatrixPair = std::pair; - tbb::combinable thread_local_matrices( - [&intermediate_trajectory]() - { - return std::make_pair( - Eigen::MatrixXd::Zero(intermediate_trajectory.size() * 6, intermediate_trajectory.size() * 6), - Eigen::MatrixXd::Zero(intermediate_trajectory.size() * 6, 1)); - }); - tbb::combinable thread_local_stats; - - tbb::parallel_for( - tbb::blocked_range(0, intermediate_points.size()), - [&](const tbb::blocked_range& range) - { - auto& [local_AtPA, local_AtPB] = thread_local_matrices.local(); - auto& local_stats = thread_local_stats.local(); - for (size_t i = range.begin(); i != range.end(); ++i) - { - compute_hessian( - intermediate_points[i], - intermediate_trajectory, - tait_bryan_poses, - buckets_indoor, - bucket_size_indoor, - buckets_outdoor, - bucket_size_outdoor, - max_distance, - ablation_study_use_hierarchical_rgd, - ablation_study_use_view_point_and_normal_vectors, - ablation_study_use_planarity, - ablation_study_use_norm, - local_AtPA, - local_AtPB, - local_stats, - ablation_study_use_threshold_outer_rgd, - convergence_result, - convergence_delta_threshold_outer_rgd); - } - }); + const size_t num_points = intermediate_points.size(); + using Mat6x6 = Eigen::Matrix; + using Vec6x1 = Eigen::Matrix; + std::vector point_AtPA(num_points, Mat6x6::Zero()); + std::vector point_AtPB(num_points, Vec6x1::Zero()); + std::vector per_point_stats(num_points); + + auto compute_point = [&](size_t i) + { + compute_hessian( + i, + intermediate_points[i], + intermediate_trajectory, + tait_bryan_poses, + buckets_indoor, + bucket_size_indoor, + buckets_outdoor, + bucket_size_outdoor, + max_distance, + ablation_study_use_hierarchical_rgd, + ablation_study_use_view_point_and_normal_vectors, + ablation_study_use_planarity, + ablation_study_use_norm, + point_AtPA, + point_AtPB, + per_point_stats, + ablation_study_use_threshold_outer_rgd, + convergence_result, + convergence_delta_threshold_outer_rgd); + }; - thread_local_matrices.combine_each( - [&AtPAndt, &AtPBndt](const MatrixPair& local) - { - AtPAndt += local.first; - AtPBndt += local.second; - }); - thread_local_stats.combine_each( - [&lookup_stats](const LookupStats& local) - { - lookup_stats.indoor_lookups += local.indoor_lookups; - lookup_stats.outdoor_lookups += local.outdoor_lookups; - }); - } + if (multithread) + tbb::parallel_for(size_t(0), num_points, compute_point); else + for (size_t i = 0; i < num_points; ++i) + compute_point(i); + + // Sequential sum in point order -- guarantees identical accumulation order + // for both ST and MT paths, avoiding FP non-associativity nondeterminism. + for (size_t i = 0; i < num_points; ++i) { - for (const auto& pt : intermediate_points) - { - compute_hessian( - pt, - intermediate_trajectory, - tait_bryan_poses, - buckets_indoor, - bucket_size_indoor, - buckets_outdoor, - bucket_size_outdoor, - max_distance, - ablation_study_use_hierarchical_rgd, - ablation_study_use_view_point_and_normal_vectors, - ablation_study_use_planarity, - ablation_study_use_norm, - AtPAndt, - AtPBndt, - lookup_stats, - ablation_study_use_threshold_outer_rgd, - convergence_result, - convergence_delta_threshold_outer_rgd); - } + const int c = intermediate_points[i].index_pose * 6; + AtPAndt.block<6, 6>(c, c) += point_AtPA[i]; + AtPBndt.block<6, 1>(c, 0) -= point_AtPB[i]; + lookup_stats.indoor_lookups += per_point_stats[i].indoor_lookups; + lookup_stats.outdoor_lookups += per_point_stats[i].outdoor_lookups; } UTL_PROFILER_END(hessian_compute); @@ -2335,6 +2295,8 @@ bool process_worker_step_1( std::atomic& loProgress, double& ts_failure) { + UTL_PROFILER_SCOPE("process_worker_step_1"); + Eigen::Vector3d mean_shift(0.0, 0.0, 0.0); if (i > 1 && params.use_motion_from_previous_step) { @@ -2427,6 +2389,8 @@ bool process_worker_step_2( double& ts_failure, std::vector& intermediate_points) { + UTL_PROFILER_SCOPE("process_worker_step_2"); + bool add_pitch_roll_constraint = false; spdlog::stopwatch stopwatch_worker; @@ -2583,8 +2547,7 @@ bool process_worker_step_lidar_odometry_core( { spdlog::stopwatch stopwatch_realtime; - UTL_PROFILER_END(before_iter); - UTL_PROFILER_BEGIN(iter_loop, "iteration_loop"); + UTL_PROFILER_SCOPE("process_worker_step_lidar_odometry_core"); for (int iter = 0; iter < params.nr_iter; iter++) { @@ -2646,7 +2609,6 @@ bool process_worker_step_lidar_odometry_core( break; } } - UTL_PROFILER_END(iter_loop); return true; } @@ -2773,6 +2735,7 @@ bool compute_step_2( for (int i = 0; i < worker_data.size(); i++) { UTL_PROFILER_BEGIN(before_iter, "before_iterations"); + std::vector intermediate_points; // = worker_data[i].load_points(worker_data[i].intermediate_points_cache_file_name); load_vector_data(worker_data[i].intermediate_points_cache_file_name.string(), intermediate_points); @@ -2830,6 +2793,8 @@ bool compute_step_2( double delta = 100000.0; double lm_factor = 1.0; + UTL_PROFILER_END(before_iter); + process_worker_step_lidar_odometry_core( worker_data[i], i > 0 ? worker_data[i - 1] : worker_data[0],