diff --git a/stan/math/prim/fun/log_gamma_q_dgamma.hpp b/stan/math/prim/fun/log_gamma_q_dgamma.hpp deleted file mode 100644 index e264c5d50a5..00000000000 --- a/stan/math/prim/fun/log_gamma_q_dgamma.hpp +++ /dev/null @@ -1,156 +0,0 @@ -#ifndef STAN_MATH_PRIM_FUN_LOG_GAMMA_Q_DGAMMA_HPP -#define STAN_MATH_PRIM_FUN_LOG_GAMMA_Q_DGAMMA_HPP - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace stan { -namespace math { - -/** - * Result structure containing log(Q(a,z)) and its gradient with respect to a. - * - * @tparam T return type - */ -template -struct log_gamma_q_result { - T log_q; ///< log(Q(a,z)) where Q is upper regularized incomplete gamma - T dlog_q_da; ///< d/da log(Q(a,z)) -}; - -namespace internal { - -/** - * Compute log(Q(a,z)) using continued fraction expansion for upper incomplete - * gamma function. - * - * @tparam T_a Type of shape parameter a (double or fvar types) - * @tparam T_z Type of value parameter z (double or fvar types) - * @param a Shape parameter - * @param z Value at which to evaluate - * @param precision Convergence threshold - * @param max_steps Maximum number of continued fraction iterations - * @return log(Q(a,z)) with the return type of T_a and T_z - */ -template -inline return_type_t log_q_gamma_cf(const T_a& a, const T_z& z, - double precision = 1e-16, - int max_steps = 250) { - using stan::math::lgamma; - using stan::math::log; - using stan::math::value_of; - using std::fabs; - using T_return = return_type_t; - - const T_return log_prefactor = a * log(z) - z - lgamma(a); - - T_return b = z + 1.0 - a; - T_return C = (fabs(value_of(b)) >= EPSILON) ? b : T_return(EPSILON); - T_return D = 0.0; - T_return f = C; - - for (int i = 1; i <= max_steps; ++i) { - T_a an = -i * (i - a); - b += 2.0; - - D = b + an * D; - if (fabs(D) < EPSILON) { - D = EPSILON; - } - C = b + an / C; - if (fabs(C) < EPSILON) { - C = EPSILON; - } - - D = inv(D); - T_return delta = C * D; - f *= delta; - - const double delta_m1 = value_of(fabs(value_of(delta) - 1.0)); - if (delta_m1 < precision) { - break; - } - } - - return log_prefactor - log(f); -} - -} // namespace internal - -/** - * Compute log(Q(a,z)) and its gradient with respect to a using continued - * fraction expansion, where Q(a,z) = Gamma(a,z) / Gamma(a) is the regularized - * upper incomplete gamma function. - * - * This uses a continued fraction representation for numerical stability when - * computing the upper incomplete gamma function in log space, along with - * analytical gradient computation. - * - * @tparam T_a type of the shape parameter - * @tparam T_z type of the value parameter - * @param a shape parameter (must be positive) - * @param z value parameter (must be non-negative) - * @param precision convergence threshold - * @param max_steps maximum iterations for continued fraction - * @return structure containing log(Q(a,z)) and d/da log(Q(a,z)) - */ -template -inline log_gamma_q_result> log_gamma_q_dgamma( - const T_a& a, const T_z& z, double precision = 1e-16, int max_steps = 250) { - using std::exp; - using std::log; - using T_return = return_type_t; - - const double a_dbl = value_of(a); - const double z_dbl = value_of(z); - - log_gamma_q_result result; - - // For z > a + 1, use continued fraction for better numerical stability - if (z_dbl > a_dbl + 1.0) { - result.log_q = internal::log_q_gamma_cf(a_dbl, z_dbl, precision, max_steps); - - // For gradient, use: d/da log(Q) = (1/Q) * dQ/da - // grad_reg_inc_gamma computes dQ/da - const double Q_val = exp(result.log_q); - const double dQ_da - = grad_reg_inc_gamma(a_dbl, z_dbl, tgamma(a_dbl), digamma(a_dbl)); - result.dlog_q_da = dQ_da / Q_val; - - } else { - // For z <= a + 1, use log1m(P(a,z)) for better numerical accuracy - const double P_val = gamma_p(a_dbl, z_dbl); - result.log_q = log1m(P_val); - - // Gradient: d/da log(Q) = (1/Q) * dQ/da - // grad_reg_inc_gamma computes dQ/da - const double Q_val = exp(result.log_q); - if (Q_val > 0) { - const double dQ_da - = grad_reg_inc_gamma(a_dbl, z_dbl, tgamma(a_dbl), digamma(a_dbl)); - result.dlog_q_da = dQ_da / Q_val; - } else { - // Fallback if Q rounds to zero - use asymptotic approximation - result.dlog_q_da = log(z_dbl) - digamma(a_dbl); - } - } - - return result; -} - -} // namespace math -} // namespace stan - -#endif diff --git a/stan/math/prim/prob/gamma_lccdf.hpp b/stan/math/prim/prob/gamma_lccdf.hpp index eada09359e9..a670cefcecf 100644 --- a/stan/math/prim/prob/gamma_lccdf.hpp +++ b/stan/math/prim/prob/gamma_lccdf.hpp @@ -6,20 +6,15 @@ #include #include #include -#include -#include +#include #include -#include -#include #include -#include #include #include #include #include #include -#include -#include +#include #include #include @@ -29,9 +24,10 @@ namespace math { template inline return_type_t gamma_lccdf( const T_y& y, const T_shape& alpha, const T_inv_scale& beta) { + using T_partials_return = partials_return_t; using std::exp; using std::log; - using T_partials_return = partials_return_t; + using std::pow; using T_y_ref = ref_type_t; using T_alpha_ref = ref_type_t; using T_beta_ref = ref_type_t; @@ -55,127 +51,61 @@ inline return_type_t gamma_lccdf( scalar_seq_view y_vec(y_ref); scalar_seq_view alpha_vec(alpha_ref); scalar_seq_view beta_vec(beta_ref); - const size_t N = max_size(y, alpha, beta); - - constexpr bool any_fvar = is_fvar>::value - || is_fvar>::value - || is_fvar>::value; - constexpr bool partials_fvar = is_fvar::value; + size_t N = max_size(y, alpha, beta); + + // Explicit return for extreme values + // The gradients are technically ill-defined, but treated as zero + for (size_t i = 0; i < stan::math::size(y); i++) { + if (y_vec.val(i) == 0) { + // LCCDF(0) = log(P(Y > 0)) = log(1) = 0 + return ops_partials.build(0.0); + } + } for (size_t n = 0; n < N; n++) { // Explicit results for extreme values // The gradients are technically ill-defined, but treated as zero - const T_partials_return y_dbl = y_vec.val(n); - if (y_dbl == 0.0) { - continue; - } - if (y_dbl == INFTY) { + if (y_vec.val(n) == INFTY) { + // LCCDF(∞) = log(P(Y > ∞)) = log(0) = -∞ return ops_partials.build(negative_infinity()); } + const T_partials_return y_dbl = y_vec.val(n); const T_partials_return alpha_dbl = alpha_vec.val(n); const T_partials_return beta_dbl = beta_vec.val(n); + const T_partials_return beta_y_dbl = beta_dbl * y_dbl; - const T_partials_return beta_y = beta_dbl * y_dbl; - if (beta_y == INFTY) { - return ops_partials.build(negative_infinity()); - } + // Qn = 1 - Pn + const T_partials_return Qn = gamma_q(alpha_dbl, beta_y_dbl); + const T_partials_return log_Qn = log(Qn); - bool use_cf = beta_y > alpha_dbl + 1.0; - T_partials_return log_Qn; - [[maybe_unused]] T_partials_return dlogQ_dalpha = 0.0; - - // Branch by autodiff type first, then handle use_cf logic inside each path - if constexpr (!any_fvar && is_autodiff_v) { - // var-only path: use log_gamma_q_dgamma which computes both log_q - // and its gradient analytically with double inputs - const double beta_y_dbl = value_of_rec(beta_y); - const double alpha_dbl_val = value_of_rec(alpha_dbl); - - if (use_cf) { - auto log_q_result = log_gamma_q_dgamma(alpha_dbl_val, beta_y_dbl); - log_Qn = log_q_result.log_q; - dlogQ_dalpha = log_q_result.dlog_q_da; - } else { - const T_partials_return Pn = gamma_p(alpha_dbl, beta_y); - log_Qn = log1m(Pn); - const T_partials_return Qn = exp(log_Qn); - - // Check if we need to fallback to continued fraction - bool need_cf_fallback - = !std::isfinite(value_of_rec(log_Qn)) || Qn <= 0.0; - if (need_cf_fallback && beta_y > 0.0) { - auto log_q_result = log_gamma_q_dgamma(alpha_dbl_val, beta_y_dbl); - log_Qn = log_q_result.log_q; - dlogQ_dalpha = log_q_result.dlog_q_da; - } else { - dlogQ_dalpha = -grad_reg_lower_inc_gamma(alpha_dbl, beta_y) / Qn; - } - } - } else if constexpr (partials_fvar && is_autodiff_v) { - // fvar path: use unit derivative trick to compute gradients - T_partials_return alpha_unit = alpha_dbl; - alpha_unit.d_ = 1; - T_partials_return beta_unit = beta_y; - beta_unit.d_ = 0; - - if (use_cf) { - log_Qn = internal::log_q_gamma_cf(alpha_dbl, beta_y); - T_partials_return log_Qn_fvar - = internal::log_q_gamma_cf(alpha_unit, beta_unit); - dlogQ_dalpha = log_Qn_fvar.d_; - } else { - const T_partials_return Pn = gamma_p(alpha_dbl, beta_y); - log_Qn = log1m(Pn); - - if (!std::isfinite(value_of_rec(log_Qn)) && beta_y > 0.0) { - // Fallback to continued fraction - log_Qn = internal::log_q_gamma_cf(alpha_dbl, beta_y); - T_partials_return log_Qn_fvar - = internal::log_q_gamma_cf(alpha_unit, beta_unit); - dlogQ_dalpha = log_Qn_fvar.d_; - } else { - T_partials_return log_Qn_fvar = log1m(gamma_p(alpha_unit, beta_unit)); - dlogQ_dalpha = log_Qn_fvar.d_; - } - } - } else { - // No alpha derivative needed (alpha is constant or double-only) - if (use_cf) { - log_Qn = internal::log_q_gamma_cf(alpha_dbl, beta_y); - } else { - const T_partials_return Pn = gamma_p(alpha_dbl, beta_y); - log_Qn = log1m(Pn); - - if (!std::isfinite(value_of_rec(log_Qn)) && beta_y > 0.0) { - log_Qn = internal::log_q_gamma_cf(alpha_dbl, beta_y); - } - } - } - if (!std::isfinite(value_of_rec(log_Qn))) { - return ops_partials.build(negative_infinity()); - } P += log_Qn; - if constexpr (is_autodiff_v || is_autodiff_v) { - const T_partials_return log_y = log(y_dbl); - const T_partials_return alpha_minus_one = fma(alpha_dbl, log_y, -log_y); - - const T_partials_return log_pdf = alpha_dbl * log(beta_dbl) - - lgamma(alpha_dbl) + alpha_minus_one - - beta_y; - - const T_partials_return hazard = exp(log_pdf - log_Qn); // f/Q + if constexpr (is_any_autodiff_v) { + const T_partials_return log_y_dbl = log(y_dbl); + const T_partials_return log_beta_dbl = log(beta_dbl); + const T_partials_return log_pdf + = alpha_dbl * log_beta_dbl - lgamma(alpha_dbl) + + (alpha_dbl - 1.0) * log_y_dbl - beta_y_dbl; + const T_partials_return common_term = exp(log_pdf - log_Qn); if constexpr (is_autodiff_v) { - partials<0>(ops_partials)[n] -= hazard; + // d/dy log(1-F(y)) = -f(y)/(1-F(y)) + partials<0>(ops_partials)[n] -= common_term; } if constexpr (is_autodiff_v) { - partials<2>(ops_partials)[n] -= (y_dbl / beta_dbl) * hazard; + // d/dbeta log(1-F(y)) = -y*f(y)/(beta*(1-F(y))) + partials<2>(ops_partials)[n] -= y_dbl / beta_dbl * common_term; } } + if constexpr (is_autodiff_v) { - partials<1>(ops_partials)[n] += dlogQ_dalpha; + const T_partials_return digamma_val = digamma(alpha_dbl); + const T_partials_return gamma_val = tgamma(alpha_dbl); + // d/dalpha log(1-F(y)) = grad_upper_inc_gamma / (1-F(y)) + partials<1>(ops_partials)[n] + += grad_reg_inc_gamma(alpha_dbl, beta_y_dbl, gamma_val, digamma_val) + / Qn; } } return ops_partials.build(P); diff --git a/test/unit/math/prim/prob/gamma_lccdf_test.cpp b/test/unit/math/prim/prob/gamma_lccdf_test.cpp index e0c84861e0c..2893f2f0166 100644 --- a/test/unit/math/prim/prob/gamma_lccdf_test.cpp +++ b/test/unit/math/prim/prob/gamma_lccdf_test.cpp @@ -66,51 +66,6 @@ TEST(ProbGamma, lccdf_small_alpha_small_y) { EXPECT_LT(result, 0.0); } -TEST(ProbGamma, lccdf_alpha_gt_30_small_y_old_code_rounds_to_zero) { - using stan::math::gamma_lccdf; - using stan::math::gamma_p; - using stan::math::gamma_q; - using stan::math::log1m; - - // For large alpha and very small y, the CCDF is extremely close to 1. - // The old implementation computed `log(gamma_q(alpha, beta * y))`, which can - // round to `log(1) == 0`. The updated implementation uses `log1m(gamma_p)`, - // which preserves the tiny negative value. - double y = 1e-8; - double alpha = 31.25; - double beta = 1.0; - - double new_val = gamma_lccdf(y, alpha, beta); - double expected = log1m(gamma_p(alpha, beta * y)); - - // Old code: log(gamma_q(alpha, beta * y)) - double old_val = std::log(gamma_q(alpha, beta * y)); - - EXPECT_EQ(old_val, 0.0); - EXPECT_LT(new_val, 0.0); - EXPECT_DOUBLE_EQ(new_val, expected); -} - -TEST(ProbGamma, lccdf_log1m_exp_lcdf_rounds_to_inf) { - using stan::math::gamma_lccdf; - using stan::math::gamma_lcdf; - using stan::math::log1m_exp; - using stan::math::negative_infinity; - - double y = 20000.0; - double alpha = 800.0; - double beta = 0.1; - - double lcdf = gamma_lcdf(y, alpha, beta); - double log1m_lcdf = log1m_exp(lcdf); - double lccdf = gamma_lccdf(y, alpha, beta); - - EXPECT_EQ(lcdf, 0.0); - EXPECT_EQ(log1m_lcdf, negative_infinity()); - EXPECT_TRUE(std::isfinite(lccdf)); - EXPECT_LT(lccdf, 0.0); -} - TEST(ProbGamma, lccdf_large_alpha_large_y) { using stan::math::gamma_lccdf; @@ -199,29 +154,6 @@ TEST(ProbGamma, lccdf_extreme_large_alpha) { EXPECT_TRUE(std::isfinite(result)); } -TEST(ProbGamma, lccdf_large_alpha_1000_beta_3) { - using stan::math::gamma_lccdf; - - // Large alpha = 1000, beta = 3 - double alpha = 1000.0; - double beta = 3.0; - - // Test various y values - std::vector y_values = {100.0, 300.0, 333.333, 400.0, 500.0}; - - for (double y : y_values) { - double result = gamma_lccdf(y, alpha, beta); - - // Result should be finite - EXPECT_TRUE(std::isfinite(result)) - << "Failed for y=" << y << ", alpha=" << alpha << ", beta=" << beta; - - // Result should be <= 0 (log of probability) - EXPECT_LE(result, 0.0) << "Positive value for y=" << y - << ", alpha=" << alpha << ", beta=" << beta; - } -} - TEST(ProbGamma, lccdf_monotonic_in_y) { using stan::math::gamma_lccdf; diff --git a/test/unit/math/rev/prob/gamma_lccdf_test.cpp b/test/unit/math/rev/prob/gamma_lccdf_test.cpp index aa1f9a1a942..1066773e060 100644 --- a/test/unit/math/rev/prob/gamma_lccdf_test.cpp +++ b/test/unit/math/rev/prob/gamma_lccdf_test.cpp @@ -230,73 +230,6 @@ TEST(ProbDistributionsGamma, lccdf_extreme_values_small) { } } -TEST(ProbDistributionsGamma, - lccdf_alpha_gt_30_small_y_old_code_rounds_to_zero) { - using stan::math::gamma_lccdf; - using stan::math::gamma_p; - using stan::math::gamma_q; - using stan::math::log1m; - using stan::math::var; - - // Same comparison as the prim test, but also exercises autodiff for - // alpha > 30. - double y_d = 1e-8; - double alpha_d = 31.25; - double beta_d = 1.0; - - var y_v = y_d; - var alpha_v = alpha_d; - var beta_v = beta_d; - - var lccdf_var = gamma_lccdf(y_v, alpha_v, beta_v); - - // Old code: log(gamma_q(alpha, beta * y)) - double old_val = std::log(gamma_q(alpha_d, beta_d * y_d)); - double expected = log1m(gamma_p(alpha_d, beta_d * y_d)); - - EXPECT_EQ(old_val, 0.0); - EXPECT_LT(lccdf_var.val(), 0.0); - EXPECT_DOUBLE_EQ(lccdf_var.val(), expected); - - std::vector vars = {y_v, alpha_v, beta_v}; - std::vector grads; - lccdf_var.grad(vars, grads); - - for (size_t i = 0; i < grads.size(); ++i) { - EXPECT_FALSE(std::isnan(grads[i])) << "Gradient " << i << " is NaN"; - EXPECT_TRUE(std::isfinite(grads[i])) - << "Gradient " << i << " is not finite"; - } - - // d/dy log(CCDF) should be <= 0 (can underflow to -0) - EXPECT_LE(grads[0], 0.0); -} - -TEST(ProbDistributionsGamma, lccdf_log1m_exp_lcdf_rounds_to_inf) { - using stan::math::gamma_lccdf; - using stan::math::gamma_lcdf; - using stan::math::log1m_exp; - using stan::math::negative_infinity; - using stan::math::var; - - double y_d = 20000.0; - double alpha_d = 800.0; - double beta_d = 0.1; - - double lcdf = gamma_lcdf(y_d, alpha_d, beta_d); - double log1m_lcdf = log1m_exp(lcdf); - - var y_v = y_d; - var alpha_v = alpha_d; - var beta_v = beta_d; - var lccdf_var = gamma_lccdf(y_v, alpha_v, beta_v); - - EXPECT_EQ(lcdf, 0.0); - EXPECT_EQ(log1m_lcdf, negative_infinity()); - EXPECT_TRUE(std::isfinite(lccdf_var.val())); - EXPECT_LT(lccdf_var.val(), 0.0); -} - TEST(ProbDistributionsGamma, lccdf_extreme_values_large) { using stan::math::gamma_lccdf; using stan::math::var; @@ -325,36 +258,6 @@ TEST(ProbDistributionsGamma, lccdf_extreme_values_large) { } } -TEST(ProbDistributionsGamma, lccdf_large_alpha_1000_beta_3) { - using stan::math::gamma_lccdf; - using stan::math::var; - - // Large alpha = 1000, beta = 3 - // Note: This test only checks values, not gradients, as large alpha values - // can cause numerical issues with gradient computation - double alpha_d = 1000.0; - double beta_d = 3.0; - - // Test various y values - std::vector y_values = {100.0, 300.0, 333.333, 400.0, 500.0}; - - for (double y_d : y_values) { - var y_v = y_d; - var alpha_v = alpha_d; - var beta_v = beta_d; - - var lccdf_var = gamma_lccdf(y_v, alpha_v, beta_v); - - // Value should be finite and <= 0 - EXPECT_TRUE(std::isfinite(lccdf_var.val())) - << "Failed for y=" << y_d << ", alpha=" << alpha_d - << ", beta=" << beta_d; - EXPECT_LE(lccdf_var.val(), 0.0) - << "Positive value for y=" << y_d << ", alpha=" << alpha_d - << ", beta=" << beta_d; - } -} - TEST(ProbDistributionsGamma, lccdf_alpha_one_derivatives) { using stan::math::gamma_lccdf; using stan::math::var;