diff --git a/include/kde1d/kde1d.hpp b/include/kde1d/kde1d.hpp index e46d970..663f792 100644 --- a/include/kde1d/kde1d.hpp +++ b/include/kde1d/kde1d.hpp @@ -270,8 +270,9 @@ Kde1d::fit(const Eigen::VectorXd& x, const Eigen::VectorXd& weights) Eigen::VectorXd w = weights; tools::remove_nans(xx, w); - if (w.size() > 0) + if (w.size() > 0) { w /= w.mean(); + } if (type_ == VarType::zero_inflated) { if (w.size() == 0) @@ -319,13 +320,24 @@ Kde1d::fit(const Eigen::VectorXd& x, const Eigen::VectorXd& weights) if (type_ == VarType::discrete) { xx = xx.array().round(); } - loglik_ = (this->pdf(xx, false).array().log()).sum(); + + if (w.size() == 0) { + w = Eigen::VectorXd::Ones(xx.size()); + } + + loglik_ = (this->pdf(xx, false).array().log().array() * w.array()).sum(); + if (prob0_ > 0) { + // For zero inflated data, all observations with value 0 have been removed, + // so their likelihood contribution is missing. There were n * prob0_ such + // observations, each with log-likelihood contribution log(prob0_). + loglik_ += static_cast(x.size()) * prob0_ * std::log(prob0_); + } // calculate effective degrees of freedom interp::InterpolationGrid infl_grid( - grid_points, fitted.col(1).cwiseMin(3.0).cwiseMax(0), 0); - Eigen::VectorXd influences = infl_grid.interpolate(xx).array() * (1 - prob0_); - edf_ = influences.sum() + (prob0_ > 0); + grid_points, fitted.col(1).cwiseMin(3.0).cwiseMax(0), 0); + Eigen::VectorXd influences = infl_grid.interpolate(xx).array(); + edf_ = influences.sum() + static_cast(prob0_ > 0); // store bandwidth in standardized format bandwidth_ = bandwidth_ / multiplier_; @@ -583,7 +595,10 @@ Kde1d::fit_lp(const Eigen::VectorXd& x, grid_points(m - 1), m - 1, Eigen::VectorXd::Ones(x.size())); - wbin = wcount.cwiseQuotient(count); + wbin = (count.array() == 0).select( + Eigen::VectorXd::Zero(count.size()), + wcount.cwiseQuotient(count) + ); } Eigen::MatrixXd res(f0.size(), 2);