vmanot · thatswiftguy · Mar 18, 2026
diff --git a/Sources/Compute/Intramodular/Statistics/CollectionStatistics.swift b/Sources/Compute/Intramodular/Statistics/CollectionStatistics.swift
@@ -0,0 +1,145 @@
+//
+// Copyright (c) Vatsal Manot
+//
+
+import Darwin
+import Swallow
+
+// MARK: - BinaryFloatingPoint Statistics
+
+extension Collection where Element: BinaryFloatingPoint {
+
+    /// The arithmetic mean of all elements in the collection.
+    ///
+    /// Returns `nil` if the collection is empty.
+    ///
+    /// - Complexity: O(n)
+    public var mean: Element? {
+        guard !isEmpty else { return nil }
+        return reduce(.zero, +) / Element(count)
+    }
+
+    /// The middle value when elements are sorted in ascending order.
+    ///
+    /// For even-count collections, returns the average of the two middle values.
+    /// Returns `nil` if the collection is empty.
+    ///
+    /// - Complexity: O(n log n)
+    public var median: Element? {
+        guard !isEmpty else { return nil }
+        let sorted = self.sorted()
+        let mid = sorted.count / 2
+        if sorted.count % 2 == 0 {
+            return (sorted[mid - 1] + sorted[mid]) / 2
+        } else {
+            return sorted[mid]
+        }
+    }
+
+    /// The population variance of the collection.
+    ///
+    /// Returns `nil` if the collection is empty.
+    ///
+    /// - Complexity: O(n)
+    public var variance: Element? {
+        guard let m = mean else { return nil }
+        let sumOfSquaredDiffs = reduce(.zero) { $0 + ($1 - m) * ($1 - m) }
+        return sumOfSquaredDiffs / Element(count)
+    }
+
+    /// The population standard deviation of the collection.
+    ///
+    /// Returns `nil` if the collection is empty.
+    ///
+    /// - Complexity: O(n)
+    public var standardDeviation: Element? {
+        guard let v = variance else { return nil }
+        return Element(sqrt(Double(v)))
+    }
+
+    /// Returns the value at the given percentile using linear interpolation.
+    ///
+    /// - Parameter p: A value in the range `0.0...1.0`. For example, `0.9` gives the 90th percentile.
+    /// - Returns: The interpolated value at that percentile, or `nil` if the collection is empty or `p` is out of range.
+    ///
+    /// - Complexity: O(n log n)
+    public func percentile(_ p: Double) -> Element? {
+        guard !isEmpty, (0.0...1.0).contains(p) else { return nil }
+        let sorted = self.sorted()
+        guard sorted.count > 1 else { return sorted[0] }
+        let index = p * Double(sorted.count - 1)
+        let lower = Int(index)
+        let upper = Swift.min(lower + 1, sorted.count - 1)
+        let fraction = Element(index - Double(lower))
+        return sorted[lower] + fraction * (sorted[upper] - sorted[lower])
+    }
+
+    /// Returns a new array where all values are linearly scaled to the range `[0, 1]`.
+    ///
+    /// Returns `nil` if the collection is empty or all values are identical (zero range).
+    ///
+    /// - Complexity: O(n)
+    public func normalized() -> [Element]? {
+        guard let minVal = self.min(), let maxVal = self.max() else { return nil }
+        let range = maxVal - minVal
+        guard range != .zero else { return nil }
+        return map { ($0 - minVal) / range }
+    }
+
+    /// Returns a new array of z-scores: each value is shifted by the mean and scaled by the standard deviation.
+    ///
+    /// Returns `nil` if the collection is empty or the standard deviation is zero.
+    ///
+    /// - Complexity: O(n)
+    public func standardized() -> [Element]? {
+        guard let m = mean, let sd = standardDeviation, sd != .zero else { return nil }
+        return map { ($0 - m) / sd }
+    }
+
+    /// Returns the Pearson correlation coefficient between this collection and another of equal length.
+    ///
+    /// A result of `1.0` indicates perfect positive correlation, `-1.0` perfect negative, and `0.0` no linear correlation.
+    /// Returns `nil` if either collection is empty, they differ in length, or either has zero standard deviation.
+    ///
+    /// - Complexity: O(n)
+    public func pearsonCorrelation(with other: some Collection<Element>) -> Element? {
+        guard count == other.count, !isEmpty else { return nil }
+        guard let meanX = mean, let meanY = other.mean else { return nil }
+        let numerator = Swift.zip(self, other).reduce(.zero) { $0 + ($1.0 - meanX) * ($1.1 - meanY) }
+        let denomX = reduce(.zero) { $0 + ($1 - meanX) * ($1 - meanX) }
+        let denomY = other.reduce(.zero) { $0 + ($1 - meanY) * ($1 - meanY) }
+        let denominator = Element(sqrt(Double(denomX * denomY)))
+        guard denominator != .zero else { return nil }
+        return numerator / denominator
+    }
+}
+
+// MARK: - Comparable & Hashable Statistics
+
+extension Collection where Element: Comparable & Hashable {
+
+    /// The most frequently occurring element in the collection.
+    ///
+    /// If multiple elements share the highest frequency, the one returned is unspecified.
+    /// Returns `nil` if the collection is empty.
+    ///
+    /// - Complexity: O(n)
+    public var mode: Element? {
+        guard !isEmpty else { return nil }
+        var counts: [Element: Int] = [:]
+        for element in self {
+            counts[element, default: 0] += 1
+        }
+        return counts.max(by: { $0.value < $1.value })?.key
+    }
+
+    /// A closed range spanning from the minimum to the maximum element in the collection.
+    ///
+    /// Returns `nil` if the collection is empty.
+    ///
+    /// - Complexity: O(n)
+    public var valueRange: ClosedRange<Element>? {
+        guard let lo = self.min(), let hi = self.max() else { return nil }
+        return lo...hi
+    }
+}
diff --git a/Sources/Compute/Intramodular/Statistics/RunningStatistics.swift b/Sources/Compute/Intramodular/Statistics/RunningStatistics.swift
@@ -0,0 +1,191 @@
+//
+// Copyright (c) Vatsal Manot
+//
+
+import Darwin
+import Swallow
+
+/// A type that incrementally computes statistical measures using O(1) memory.
+///
+/// Uses Welford's online algorithm to compute the mean and variance as values
+/// are pushed one at a time, without storing the individual values.
+/// This makes it suitable for large data streams, file processing, or
+/// any context where holding all values in memory is not desirable.
+///
+/// ```swift
+/// var stats = RunningStatistics()
+///
+/// for temperature in sensorReadings {
+///     stats.push(temperature)
+/// }
+///
+/// print(stats.mean)               // Optional(21.32)
+/// print(stats.standardDeviation)  // Optional(1.36)
+/// print(stats.min)                // Optional(19.5)
+/// print(stats.max)                // Optional(23.1)
+/// ```
+public struct RunningStatistics: Sendable {
+
+    private var _count: Int = 0
+    private var _mean: Double = 0.0
+    private var _m2: Double = 0.0
+    private var _min: Double = .infinity
+    private var _max: Double = -.infinity
+
+    /// Creates an empty instance with no accumulated values.
+    public init() {}
+
+    // MARK: - Observed Properties
+
+    /// The number of values pushed so far.
+    public var count: Int {
+        _count
+    }
+
+    /// Returns `true` if no values have been pushed yet.
+    public var isEmpty: Bool {
+        _count == 0
+    }
+
+    // MARK: - Statistical Properties
+
+    /// The running arithmetic mean, or `nil` if no values have been pushed.
+    public var mean: Double? {
+        _count == 0 ? nil : _mean
+    }
+
+    /// The running population variance, or `nil` if no values have been pushed.
+    public var variance: Double? {
+        _count == 0 ? nil : _m2 / Double(_count)
+    }
+
+    /// The running population standard deviation, or `nil` if no values have been pushed.
+    public var standardDeviation: Double? {
+        variance.map(sqrt)
+    }
+
+    /// The smallest value pushed so far, or `nil` if no values have been pushed.
+    public var min: Double? {
+        _count == 0 ? nil : _min
+    }
+
+    /// The largest value pushed so far, or `nil` if no values have been pushed.
+    public var max: Double? {
+        _count == 0 ? nil : _max
+    }
+
+    /// A closed range from `min` to `max`, or `nil` if no values have been pushed.
+    public var valueRange: ClosedRange<Double>? {
+        guard let lo = min, let hi = max else { return nil }
+        return lo...hi
+    }
+
+    // MARK: - Mutation
+
+    /// Incorporates a new `Double` value into the running statistics.
+    ///
+    /// - Complexity: O(1)
+    public mutating func push(_ value: Double) {
+        _count += 1
+        let delta = value - _mean
+        _mean += delta / Double(_count)
+        let delta2 = value - _mean
+        _m2 += delta * delta2
+        if value < _min { _min = value }
+        if value > _max { _max = value }
+    }
+
+    /// Incorporates a new `BinaryFloatingPoint` value into the running statistics.
+    ///
+    /// - Complexity: O(1)
+    public mutating func push<T: BinaryFloatingPoint>(_ value: T) {
+        push(Double(value))
+    }
+
+    /// Incorporates every element from a sequence into the running statistics.
+    ///
+    /// - Complexity: O(n)
+    public mutating func push<S: Sequence>(_ values: S) where S.Element: BinaryFloatingPoint {
+        for value in values {
+            push(value)
+        }
+    }
+
+    /// Resets all accumulated statistics back to the initial empty state.
+    public mutating func reset() {
+        _count = 0
+        _mean = 0.0
+        _m2 = 0.0
+        _min = .infinity
+        _max = -.infinity
+    }
+}
+
+// MARK: - Merging
+
+extension RunningStatistics: MergeOperatable {
+
+    /// Merges another `RunningStatistics` into this one in-place.
+    ///
+    /// The resulting instance is statistically equivalent to having pushed
+    /// all values from both instances into a single `RunningStatistics`.
+    ///
+    /// Uses the parallel/combined Welford formula for numerically stable merging.
+    ///
+    /// - Complexity: O(1)
+    public mutating func mergeInPlace(with other: RunningStatistics) {
+        guard other._count > 0 else { return }
+        guard _count > 0 else {
+            self = other
+            return
+        }
+        let combinedCount = _count + other._count
+        let delta = other._mean - _mean
+        _mean = (_mean * Double(_count) + other._mean * Double(other._count)) / Double(combinedCount)
+        _m2 = _m2 + other._m2 + delta * delta * Double(_count) * Double(other._count) / Double(combinedCount)
+        _count = combinedCount
+        if other._min < _min { _min = other._min }
+        if other._max > _max { _max = other._max }
+    }
+
+    /// Returns a new `RunningStatistics` that is the combination of both instances.
+    ///
+    /// - Complexity: O(1)
+    public func merging(_ other: RunningStatistics) -> RunningStatistics {
+        var copy = self
+        copy.mergeInPlace(with: other)
+        return copy
+    }
+}
+
+// MARK: - Conformances
+
+extension RunningStatistics: CustomStringConvertible {
+    public var description: String {
+        guard _count > 0 else {
+            return "RunningStatistics(empty)"
+        }
+        let sdString = standardDeviation.map { String(format: "%.4f", $0) } ?? "nil"
+        return "RunningStatistics(count: \(_count), mean: \(String(format: "%.4f", _mean)), stdDev: \(sdString), min: \(_min), max: \(_max))"
+    }
+}
+
+extension RunningStatistics: Equatable {
+    public static func == (lhs: RunningStatistics, rhs: RunningStatistics) -> Bool {
+        lhs._count == rhs._count &&
+        lhs._mean == rhs._mean &&
+        lhs._m2 == rhs._m2 &&
+        lhs._min == rhs._min &&
+        lhs._max == rhs._max
+    }
+}
+
+extension RunningStatistics: Hashable {
+    public func hash(into hasher: inout Hasher) {
+        hasher.combine(_count)
+        hasher.combine(_mean)
+        hasher.combine(_m2)
+        hasher.combine(_min)
+        hasher.combine(_max)
+    }
+}