diff --git a/scripts/gamma_table.py b/scripts/gamma_table.py index 3ce4fb3..a9db97b 100644 --- a/scripts/gamma_table.py +++ b/scripts/gamma_table.py @@ -29,7 +29,8 @@ # └──────────────────────────────────────────────────────────┘ # x values to test (excluding inputs that would result in complex outputs) gamma_x_values = [ - -1.5, -0.5, # Negative non-integer values that result in real outputs + -1.5, -0.5, -100.5, # Negative non-integer values that result in real outputs + -4503599627370495.5, # Large negative non-integer value that underflows to zero. 0.1, 0.2, 0.5, # Values between 0 and 1 1.0, 1.5, 2.0, 2.5, # Values between 1 and 3 3.0, 4.0, 5.0, # Small integers and values between them @@ -45,7 +46,7 @@ # Print in Rust code format print("const GAMMA_TABLE: [(f64, f64); {}] = [".format(len(table))) for x, y in table: - print(f" ({x:.14e}, {y:.14e}),") + print(f" ({x:.16e}, {y:.16e}),") print("];") # a values to test for gammp, gammq, and invgammp diff --git a/src/gamma.rs b/src/gamma.rs index 930e0cc..4f65982 100644 --- a/src/gamma.rs +++ b/src/gamma.rs @@ -7,7 +7,7 @@ //! - `gammq`: Calculates the regularized upper incomplete gamma function. //! - `invgammp`: Calculates the inverse of the regularized lower incomplete gamma function. -use crate::utils::factorial; +use crate::utils::{factorial, polynomial}; use crate::{EPS, FPMIN, W, Y}; use core::f64::consts::PI; const ASWITCH: usize = 100; @@ -54,19 +54,115 @@ pub fn ln_gamma(z: f64) -> f64 { /// # Returns /// /// The value of the gamma function at `z` -pub fn gamma(z: f64) -> f64 { - if z >= 1f64 { - let z_int = z as usize; - if (z - (z_int as f64)).abs() < EPS { - return factorial(z_int - 1); +// Based on the Fortran implementation by Toshio Fukushima: https://www.researchgate.net/publication/336578125_xgamtxt_Fortran_90_test_program_package_of_qgam_dgam_and_sgam_fast_computation_of_Gamma_function_with_quadruple_double_and_single_precision_accuracy_respectively +pub fn gamma(mut z: f64) -> f64 { + /// This input gives an output of [`f64::MAX`]. + /// Computed with WolframAlpha: https://www.wolframalpha.com/input?i=solve+gamma%28x%29+%3D+%281+%E2%88%92+2%5E%28%E2%88%9253%29%29*2%5E1024+for+x+%3E+1. + const MAX_INPUT: f64 = 171.624_376_956_302_7; + + // Special cases. + if z > MAX_INPUT { + // Output is too large to represent. + return f64::INFINITY; + } else if z == 0.0 { + // The gamma function diverges for an input of zero. + // It diverges to positive or negative infinity depending on which direction zero is approached from. + // According to entry F.9.5.4 of the ISO C 99 standard (https://www.open-std.org/jtc1/sc22/wg14/www/docs/n1256.pdf) + // gamma(+/- 0) = +/- infinity. + // This is the standard followed by `scipy.special.gamma` as well as the `tgamma` function in `libc`. + // Since the Gamma function in (nightly) Rust's standard library currently corresponds to `tgamma` + // and thus follows the standard we do the same here. + return f64::INFINITY.copysign(z); + } else if z.fract() == 0.0 { + // z is a non-zero integer less than or equal to 171. + + if z < 0.0 { + // The Gamma function also diverges for negative integers. + // There is however no clear way for the caller to signal whether the input approached the integer from above or below. + // According to the same standard as above the gamma function should return NaN for these inputs. + return f64::NAN; } + + // 0 < z <= 171, we can just use the factorial. + return factorial(z as usize - 1); + } else if z == f64::NEG_INFINITY { + // There is no well defined limit for z --> -inf. + return f64::NAN; + } else if z.is_nan() { + return f64::NAN; } - if z < 0.5 { - PI / ((PI * z).sin() * gamma(1f64 - z)) + let f = if z > 3.5 { + let mut f = 1.0; + // Decrement z by 1 until less than 3.5. + // This will not be a massively long loop since we checked for too large input above. + while z >= 3.5 { + z -= 1.0; + f *= z; + } + f + } else if z < 2.5 { + let mut f = 1.0; + // Increment z by 1 until larger than 2.5. + while z <= 2.5 { + f *= z; + if f.is_infinite() { + // There is no point in continuing the calculation, + // as `f` will remain infinite when multiplied by any finite number. + // (except zero, but that can only happen if z is an integer, which we have handled above.) + // The final result will thus be `g` / inf, + // which underflows to 0 since `g` (defined below) is always finite. + // This early return stops the loop from running too long for very large negative inputs. + return 0.0; + } + z += 1.0; + } + f.recip() } else { - ln_gamma(z).exp() - } + 1.0 + }; + + let g = if z > 3.0 { + polynomial( + z - 3.25, + [ + 0.000_016_738_398_919_923_317, + 0.000_095_940_184_093_793_52, + 0.000_455_766_746_785_133_16, + 0.002_149_849_572_114_427, + 0.008_882_603_286_354_344, + 0.034_350_198_197_417_38, + 0.115_260_482_799_219_55, + 0.346_268_553_360_568_7, + 0.858_853_822_880_646_4, + 1.776_919_804_709_978_3, + 2.592_571_165_129_980_8, + 2.549_256_966_718_529, + ], + ) + } else if z < 3.0 { + polynomial( + z - 2.75, + [ + 7.377_657_774_398_435e-7, + 0.000_048_420_884_483_658_204, + 0.000_132_763_577_755_739_43, + 0.000_914_252_863_201_387_9, + 0.003_228_437_222_320_849, + 0.014_576_803_693_379_53, + 0.046_735_160_424_814_17, + 0.155_955_856_853_634_36, + 0.384_779_120_148_185_27, + 0.891_167_948_026_476_6, + 1.317_087_179_192_858_7, + 1.608_359_421_985_545_5, + ], + ) + } else { + 2.0 + }; + + g * f } // ============================================================================= diff --git a/src/utils.rs b/src/utils.rs index a67fe34..b8b90c7 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -103,3 +103,31 @@ pub fn sign(a: f64, b: f64) -> f64 { -(a.abs()) } } + +/// Evaluate a polynomial at x using [Horner's method](https://en.wikipedia.org/wiki/Horner%27s_method). +/// +/// The iterator is assumed to give the coefficients in decreasing order by the degree of the associated x-term. +/// +/// For example, to evaluate 4x^2 + 3x + 2, the iterator should give 4.0 then 3.0, and finally 2.0. +pub(crate) fn polynomial(x: f64, coefficients: I) -> f64 +where + I: IntoIterator, +{ + coefficients + .into_iter() + .fold(0.0, |acc, coeff| mul_add(acc, x, coeff)) +} + +/// Multiplies `x` by `mul` and adds `add`. +/// If the target CPU supports fused multiply-add instructions this function will use those. +fn mul_add(x: f64, mul: f64, add: f64) -> f64 { + #[cfg(not(target_feature = "fma"))] + { + x * mul + add + } + + #[cfg(target_feature = "fma")] + { + x.mul_add(mul, add) + } +} diff --git a/tests/gamma_test.rs b/tests/gamma_test.rs index e624689..d0aefef 100644 --- a/tests/gamma_test.rs +++ b/tests/gamma_test.rs @@ -1,6 +1,6 @@ use approx::assert_relative_eq; use proptest::prelude::*; -use puruspe::{gamma, gammp, gammq, invgammp, ln_gamma}; +use puruspe::{gamma, gammp, gammq, invgammp, ln_gamma, utils::factorial}; unsafe extern "C" { fn tgamma(x: f64) -> f64; @@ -22,9 +22,11 @@ fn test_gamma() { for (x, y) in GAMMA_TABLE { let result = gamma(x); let abs_eps = f64::EPSILON; - let rel_eps = 1e-10; + let rel_eps = 1e-14; assert_relative_eq!(result, y, epsilon = abs_eps, max_relative = rel_eps); } + + assert_eq!(gamma(-4503599627370495.5), 0.0); } #[test] @@ -64,44 +66,62 @@ fn test_invgammp() { #[test] fn test_gamma_edge_cases() { // Test gamma at special values + + // Can't do anything with NaNs. Just propagate it. + assert!(gamma(f64::NAN).is_nan()); + + // No limit for Γ(z) as z → -∞ + assert!(gamma(f64::NEG_INFINITY).is_nan()); + // Γ(+∞) = +∞ + assert_eq!(gamma(f64::INFINITY), f64::INFINITY); + + // Γ(±0) = ±∞ + assert_eq!(gamma(0.0), f64::INFINITY); + assert_eq!(gamma(-0.0), -f64::INFINITY); + // Γ(1) = 1 - assert_relative_eq!(gamma(1.0), 1.0, epsilon = 1e-15); + assert_eq!(gamma(1.0), 1.0); // Γ(2) = 1 - assert_relative_eq!(gamma(2.0), 1.0, epsilon = 1e-15); + assert_eq!(gamma(2.0), 1.0); // Γ(1/2) = √π - let sqrt_pi = std::f64::consts::PI.sqrt(); - assert_relative_eq!(gamma(0.5), sqrt_pi, epsilon = 1e-10); + let sqrt_pi = core::f64::consts::PI.sqrt(); + assert_relative_eq!(gamma(0.5), sqrt_pi); // Γ(3/2) = √π/2 - assert_relative_eq!(gamma(1.5), sqrt_pi / 2.0, epsilon = 1e-10); + assert_relative_eq!(gamma(1.5), sqrt_pi / 2.0); // Γ(5/2) = 3√π/4 - assert_relative_eq!(gamma(2.5), 3.0 * sqrt_pi / 4.0, epsilon = 1e-10); + assert_relative_eq!(gamma(2.5), 3.0 * sqrt_pi / 4.0); - // Test very small positive values - let result_small = gamma(1e-8); - assert!(result_small > 1e7 && result_small < 1e9); + // Test very small positive values. + // Computed with WolframAlpha: https://www.wolframalpha.com/input?i=Gamma%281e-8%29. + assert_relative_eq!( + gamma(1e-8), + 99999999.42278434, + max_relative = 1.5 * f64::EPSILON + ); + // Computed with WolframAlpha: https://www.wolframalpha.com/input?i=Gamma%281e-15%29 + assert_relative_eq!( + gamma(1e-15), + 999999999999999.4, + max_relative = 2.0 * f64::EPSILON + ); + // Computed with WolframAlpha: https://www.wolframalpha.com/input?i=Gamma%5B2.2250738585072014E-308%5D. + assert_relative_eq!(gamma(f64::MIN_POSITIVE), 4.4942328371557897e307); - // Test negative values (using reflection formula) + // Test negative values // Γ(-0.5) = -2√π - assert_relative_eq!(gamma(-0.5), -2.0 * sqrt_pi, epsilon = 1e-10); + assert_relative_eq!(gamma(-0.5), -2.0 * sqrt_pi); // Test near-integer negative values - let result_neg = gamma(-1.5); - assert!(result_neg > 2.0 && result_neg < 3.0); - - // Test value very close to 0 but positive - let result_tiny = gamma(1e-15); - assert!(result_tiny > 1e14); + assert_relative_eq!(gamma(-1.5), 4.0 * sqrt_pi / 3.0); // Test integer factorials - // Γ(6) = 5! = 120 - assert_relative_eq!(gamma(6.0), 120.0, epsilon = 1e-13); - - // Γ(11) = 10! = 3628800 - assert_relative_eq!(gamma(11.0), 3628800.0, epsilon = 1e-9); + for i in 1..=172 { + assert_eq!(gamma(i as f64), factorial(i - 1)); + } } #[test] @@ -324,7 +344,7 @@ proptest! { let result = gamma(x); let expected = unsafe { tgamma(x) }; let abs_eps = f64::EPSILON; - let rel_eps = 1e-9; + let rel_eps = 1e-14; assert_relative_eq!(result, expected, epsilon = abs_eps, max_relative = rel_eps); } @@ -363,28 +383,30 @@ const LN_GAMMA_TABLE: [(f64, f64); 19] = [ (1.00000000000000e+10, 2.20258509288811e+11), ]; -const GAMMA_TABLE: [(f64, f64); 21] = [ - (-1.50000000000000e+00, 2.36327180120735e+00), - (-5.00000000000000e-01, -3.54490770181103e+00), - (1.00000000000000e-01, 9.51350769866873e+00), - (2.00000000000000e-01, 4.59084371199880e+00), - (5.00000000000000e-01, 1.77245385090552e+00), - (1.00000000000000e+00, 1.00000000000000e+00), - (1.50000000000000e+00, 8.86226925452758e-01), - (2.00000000000000e+00, 1.00000000000000e+00), - (2.50000000000000e+00, 1.32934038817914e+00), - (3.00000000000000e+00, 2.00000000000000e+00), - (4.00000000000000e+00, 6.00000000000000e+00), - (5.00000000000000e+00, 2.40000000000000e+01), - (1.00000000000000e+01, 3.62880000000000e+05), - (2.00000000000000e+01, 1.21645100408832e+17), - (5.00000000000000e+01, 6.08281864034268e+62), - (2.50000000000000e-01, 3.62560990822191e+00), - (7.50000000000000e-01, 1.22541670246518e+00), - (1.00000000000000e-05, 9.99994227942255e+04), - (1.00000000000000e-10, 9.99999999942278e+09), - (1.00000000000000e+02, 9.33262154439442e+155), - (1.70000000000000e+02, 4.26906800900471e+304), +const GAMMA_TABLE: [(f64, f64); 23] = [ + (-1.5000000000000000e+00, 2.3632718012073548e+00), + (-5.0000000000000000e-01, -3.5449077018110318e+00), + (-1.0050000000000000e+02, -3.3536908198076757e-159), + (-4.5035996273704955e+15, -0.0000000000000000e+00), + (1.0000000000000001e-01, 9.5135076986687324e+00), + (2.0000000000000001e-01, 4.5908437119988035e+00), + (5.0000000000000000e-01, 1.7724538509055159e+00), + (1.0000000000000000e+00, 1.0000000000000000e+00), + (1.5000000000000000e+00, 8.8622692545275794e-01), + (2.0000000000000000e+00, 1.0000000000000000e+00), + (2.5000000000000000e+00, 1.3293403881791370e+00), + (3.0000000000000000e+00, 2.0000000000000000e+00), + (4.0000000000000000e+00, 6.0000000000000000e+00), + (5.0000000000000000e+00, 2.4000000000000000e+01), + (1.0000000000000000e+01, 3.6288000000000000e+05), + (2.0000000000000000e+01, 1.2164510040883200e+17), + (5.0000000000000000e+01, 6.0828186403426756e+62), + (2.5000000000000000e-01, 3.6256099082219082e+00), + (7.5000000000000000e-01, 1.2254167024651774e+00), + (1.0000000000000001e-05, 9.9999422794225538e+04), + (1.0000000000000000e-10, 9.9999999994227848e+09), + (1.0000000000000000e+02, 9.3326215443944153e+155), + (1.7000000000000000e+02, 4.2690680090047053e+304), ]; const GAMMP_TABLE: [(f64, f64, f64); 42] = [