Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 21 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,29 @@ recommended when compiling from source.

very simple API:

```python
>>> import faststat
>>> stats = faststat.Stats()
>>> stats.add(point)
>>> stats.add(point)
...
```
```python
>>> import faststat
>>> stats = faststat.Stats()
>>> stats.add(point)
>>> stats.add(point)
...
```

For lower overhead you can use ``StatsLite`` which keeps only the
core moments:

```python
>>> lite = faststat.StatsLite()
>>> lite.add(point)
```


The following properties are accesible on a Stats object: n, min, max, variance, skewness, and kurtosis.
In addition, a Stats object tracks percentiles.
The following properties are accessible on a ``Stats`` object: ``n``, ``min``,
``max``, variance, skewness and kurtosis. It also tracks percentiles and other
optional features.

``StatsLite`` exposes the same basic properties without the percentile or
window-tracking overhead.

Performance is pretty good: 0.63 microseconds per point on my machine. (Provided the C module is available.)

Expand Down
144 changes: 93 additions & 51 deletions faststat-core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ struct WindowCount {
}

#[derive(Clone)]
pub struct Stats {
pub struct StatsLite {
n: u64,
mean: f64,
m2: f64,
Expand All @@ -88,6 +88,74 @@ pub struct Stats {
max: f64,
sum_of_logs: f64,
sum_of_inv: f64,
}

impl StatsLite {
pub fn new() -> Self {
StatsLite {
n: 0,
mean: 0.0,
m2: 0.0,
m3: 0.0,
m4: 0.0,
min: f64::INFINITY,
max: f64::NEG_INFINITY,
sum_of_logs: 0.0,
sum_of_inv: 0.0,
}
}

fn update_moments(&mut self, x: f64) {
let n = self.n as f64;
let delta = x - self.mean;
let delta_n = delta / n;
let delta_m2 = delta * delta_n * (n - 1.0);
let delta_m3 = delta_m2 * delta_n * (n - 2.0);
let delta_m4 = delta_m2 * delta_n * delta_n * (n * (n - 3.0) + 3.0);
self.mean += delta_n;
self.m4 += delta_m4 + delta_n * (6.0 * delta_n * self.m2 - 4.0 * self.m3);
self.m3 += delta_m3 + delta_n * 3.0 * self.m2;
self.m2 += delta_m2;
}

pub(crate) fn add_internal(&mut self, x: f64) {
self.n += 1;
if self.n == 1 {
self.min = x;
self.max = x;
}
if x <= self.min {
self.min = x;
}
if x >= self.max {
self.max = x;
}
self.sum_of_logs += if x > 0.0 { x.ln() } else { f64::NAN };
self.sum_of_inv += if x > 0.0 { 1.0 / x } else { f64::NAN };
}

pub fn add(&mut self, x: f64) {
self.add_internal(x);
}

pub fn variance(&self) -> Option<f64> {
if self.n < 2 { None } else { Some(self.m2 / (self.n as f64 - 1.0)) }
}

pub fn n(&self) -> u64 { self.n }
pub fn mean(&self) -> f64 { self.mean }
pub fn m2(&self) -> f64 { self.m2 }
pub fn m3(&self) -> f64 { self.m3 }
pub fn m4(&self) -> f64 { self.m4 }
pub fn min(&self) -> f64 { self.min }
pub fn max(&self) -> f64 { self.max }
pub fn sum_of_logs(&self) -> f64 { self.sum_of_logs }
pub fn sum_of_inv(&self) -> f64 { self.sum_of_inv }
}

#[derive(Clone)]
pub struct Stats {
core: StatsLite,
mintime: u64,
maxtime: u64,
lasttime: u64,
Expand All @@ -108,20 +176,11 @@ impl Stats {
}

fn update_moments(&mut self, x: f64) {
let n = self.n as f64;
let delta = x - self.mean;
let delta_n = delta / n;
let delta_m2 = delta * delta_n * (n - 1.0);
let delta_m3 = delta_m2 * delta_n * (n - 2.0);
let delta_m4 = delta_m2 * delta_n * delta_n * (n * (n - 3.0) + 3.0);
self.mean += delta_n;
self.m4 += delta_m4 + delta_n * (6.0 * delta_n * self.m2 - 4.0 * self.m3);
self.m3 += delta_m3 + delta_n * 3.0 * self.m2;
self.m2 += delta_m2;
self.core.update_moments(x);
}

fn insert_percentile_sorted(&mut self, mut x: f64) {
let num = self.n.min(self.percentiles.len() as u64) as usize;
let num = self.core.n.min(self.percentiles.len() as u64) as usize;
if num == 0 { return; }
for i in 0..num - 1 {
if x < self.percentiles[i].val {
Expand Down Expand Up @@ -161,11 +220,11 @@ impl Stats {

fn update_percentiles(&mut self, x: f64) {
if self.percentiles.is_empty() { return; }
if self.n <= self.percentiles.len() as u64 {
if self.core.n <= self.percentiles.len() as u64 {
self.insert_percentile_sorted(x);
return;
}
let n = self.n as f64;
let n = self.core.n as f64;
let last = self.percentiles.len() - 1;
if x < self.percentiles[0].val {
self.percentiles[0].val = x;
Expand All @@ -182,7 +241,7 @@ impl Stats {
}
}
}
let mut prev_v = self.min;
let mut prev_v = self.core.min;
let mut prev_n = 0.0;
for i in 0..last {
let (r_v, r_n) = {
Expand All @@ -195,7 +254,7 @@ impl Stats {
prev_n = cur.n;
}
let cur = &mut self.percentiles[last];
Self::p2_update_point(prev_v, prev_n, cur, self.max, n, n);
Self::p2_update_point(prev_v, prev_n, cur, self.core.max, n, n);
}

fn update_buckets(&mut self, x: f64) {
Expand All @@ -216,7 +275,7 @@ impl Stats {
fn update_lastn(&mut self, x: f64) {
if self.lastn.is_empty() { return; }
let len = self.lastn.len();
let idx = ((self.n - 1) as usize) & (len - 1);
let idx = ((self.core.n() - 1) as usize) & (len - 1);
self.window_avg -= self.lastn[idx].value / len as f64;
self.window_avg += x / len as f64;
self.lastn[idx].value = x;
Expand Down Expand Up @@ -297,15 +356,7 @@ impl Stats {
let lastn = vec![DataPoint::default(); num_prev];
let topn: Vec<DataPoint> = Vec::with_capacity(num_top);
Stats {
n: 0,
mean: 0.0,
m2: 0.0,
m3: 0.0,
m4: 0.0,
min: f64::INFINITY,
max: f64::NEG_INFINITY,
sum_of_logs: 0.0,
sum_of_inv: 0.0,
core: StatsLite::new(),
mintime: 0,
maxtime: 0,
lasttime: 0,
Expand All @@ -326,24 +377,17 @@ impl Stats {

fn _add(&mut self, x: f64, t: u64) {
self.lasttime = t;
self.n += 1;
if self.n == 1 {
self.min = x;
self.max = x;
self.core.add_internal(x);
if self.core.n == 1 {
self.mintime = t;
self.maxtime = t;
}
if x <= self.min {
if x <= self.core.min {
self.mintime = t;
self.min = x;
}
if x >= self.max {
if x >= self.core.max {
self.maxtime = t;
self.max = x;
}
self.sum_of_logs += if x > 0.0 { x.ln() } else { f64::NAN };
self.sum_of_inv += if x > 0.0 { 1.0 / x } else { f64::NAN };
self.update_moments(x);
self.update_percentiles(x);
self.update_buckets(x);
self.update_expo_avgs(x);
Expand Down Expand Up @@ -395,7 +439,7 @@ impl Stats {
}

pub fn get_buckets(&self) -> Vec<(f64, u64)> {
let mut leftover = self.n;
let mut leftover = self.core.n();
let mut out = Vec::new();
for b in &self.buckets {
leftover -= b.count;
Expand All @@ -412,7 +456,7 @@ impl Stats {
pub fn get_prev(&self, offset: usize) -> Option<(u64, f64)> {
if self.lastn.is_empty() { return None; }
let len = self.num_prev();
let idx = (((self.n - 1) as usize + (len - offset)) & (len - 1)) as usize;
let idx = (((self.core.n() - 1) as usize + (len - offset)) & (len - 1)) as usize;
let dp = &self.lastn[idx];
Some((dp.nanotime, dp.value))
}
Expand Down Expand Up @@ -440,18 +484,16 @@ impl Stats {
.collect()
}

pub fn n(&self) -> u64 { self.n }
pub fn mean(&self) -> f64 { self.mean }
pub fn m2(&self) -> f64 { self.m2 }
pub fn m3(&self) -> f64 { self.m3 }
pub fn m4(&self) -> f64 { self.m4 }
pub fn variance(&self) -> Option<f64> {
if self.n < 2 { None } else { Some(self.m2 / (self.n as f64 - 1.0)) }
}
pub fn min(&self) -> f64 { self.min }
pub fn max(&self) -> f64 { self.max }
pub fn sum_of_logs(&self) -> f64 { self.sum_of_logs }
pub fn sum_of_inv(&self) -> f64 { self.sum_of_inv }
pub fn n(&self) -> u64 { self.core.n() }
pub fn mean(&self) -> f64 { self.core.mean() }
pub fn m2(&self) -> f64 { self.core.m2() }
pub fn m3(&self) -> f64 { self.core.m3() }
pub fn m4(&self) -> f64 { self.core.m4() }
pub fn variance(&self) -> Option<f64> { self.core.variance() }
pub fn min(&self) -> f64 { self.core.min() }
pub fn max(&self) -> f64 { self.core.max() }
pub fn sum_of_logs(&self) -> f64 { self.core.sum_of_logs() }
pub fn sum_of_inv(&self) -> f64 { self.core.sum_of_inv() }
pub fn lasttime(&self) -> u64 { self.lasttime }
}

Expand Down
Loading
Loading