Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified benchmark_results.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified embeddings_comparison.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified metrics_heatmap.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
18 changes: 9 additions & 9 deletions metrics_results.csv
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
Algorithm,Trust. (k=5),Trust. (k=15),Trust. (k=30),Continuity,Co-ranking,Spearman,Global Struct.,Density Pres.,Reconstr. R²,Silhouette,Adj. Rand Idx,Norm. MI,Classif. Acc.,Time (s)
UMAP,0.31396772398441847,0.507698015210536,0.5853644963828604,0.507698015210536,0.5076238174735671,0.35555412658930063,0.6161788537501326,0.10050063667731875,0.13542082599932848,0.7799601130001561,0.8462930754820145,0.8850632494551207,0.9833054781801301,9.233492125000339
PCA,0.07813021702838063,0.15121498794286775,0.22647004266369875,0.15121498794286775,0.15125208681135224,0.58237138961399,0.8194799752474329,0.0025296364868204945,0.285093648236993,0.3924475212548915,0.38798476520164954,0.5270341393788788,0.6104812751470133,0.004134916991461068
t-SNE,0.5971062882582081,0.5907994806158412,0.6150621406047115,0.5907994806158412,0.5907252828788722,0.4080166460361109,0.6335746486347822,0.6616467149629146,0.13703070177725074,0.6526165322051649,0.881390391570236,0.9063086626448817,0.9721835345094398,24.230485957989004
MDS,0.10651085141903172,0.19588202559821927,0.28135781858653314,0.19588202559821927,0.19591912446670376,0.7336948793497162,0.8305211414926951,0.5469156055065336,0.2502411375797472,0.39025375801929324,0.5002385159409434,0.5915354872861003,0.7195372949551223,10.611014500027522
Isomap,0.04852531997774068,0.08974216286403265,0.12489334075310703,0.08974216286403265,0.08966796512706363,0.28656024777938754,0.5523112670640861,0.2860020751496045,0.16466414057949508,0.39004821813119417,0.17037665211388042,0.28334026741309826,0.4129093160012379,6.277146417007316
LLE,0.010350584307178631,0.020070487850120573,0.031979224633648676,0.020070487850120573,0.02003338898163606,-0.029907056066161633,0.04688536864050532,0.0,0.0045753304549966956,0.3225187604913397,0.013881736737519326,0.0460183000738781,0.13298669142680283,13.723946416983381
PHATE,0.08180300500834725,0.15796698200704878,0.23233166388425153,0.15796698200704878,0.1580411797440178,0.5525542143219838,0.7853614704391769,0.029193045701356377,0.284276725066662,0.4000785508883673,0.43194478617505977,0.5493046786382542,0.6132822655524606,7.366627875017002
TriMap,0.002671118530884808,0.007939157855685402,0.01712112780560193,0.007939157855685402,0.007939157855685402,-0.05816714714196457,0.04881650156024266,0.0,0.0010666070008534811,0.32954131677512083,-0.0006966476751533949,0.009358083829204274,0.08459145775301764,0.587316292047035
PaCMAP,0.3163049526989427,0.48187720274531626,0.5506770543498424,0.48187720274531626,0.4819514004822853,0.2375034251411084,0.298922618449571,0.22074143232238652,0.080783589630951,0.7091429634791778,0.8200653587469319,0.8717676315550004,0.9710693283813061,0.2123211669968441
UMAP,0.31396772398441847,0.507698015210536,0.5853644963828604,0.507698015210536,0.5076238174735671,0.35555412658930063,0.6161788537501326,0.10050063667731875,0.13542082599932848,0.7799601130001561,0.8462930754820145,0.8850632494551207,0.9833054781801301,9.18106924998574
PCA,0.07813021702838063,0.15121498794286775,0.22647004266369875,0.15121498794286775,0.15125208681135224,0.58237138961399,0.8194799752474329,0.0025296364868204945,0.285093648236993,0.3924475212548915,0.38798476520164954,0.5270341393788788,0.6104812751470133,0.006151334033347666
t-SNE,0.5971062882582081,0.5907994806158412,0.6150621406047115,0.5907994806158412,0.5907252828788722,0.4080166460361109,0.6335746486347822,0.6616467149629146,0.13703070177725074,0.6526165322051649,0.881390391570236,0.9063086626448817,0.9721835345094398,23.834802250028588
MDS,0.10651085141903172,0.19588202559821927,0.28135781858653314,0.19588202559821927,0.19591912446670376,0.7336948793497162,0.8305211414926951,0.5469156055065336,0.2502411375797472,0.39025375801929324,0.5002385159409434,0.5915354872861003,0.7195372949551223,10.837047207984142
Isomap,0.04852531997774068,0.08974216286403265,0.12489334075310703,0.08974216286403265,0.08966796512706363,0.28656024777938754,0.5523112670640861,0.2860020751496045,0.16466414057949508,0.39004821813119417,0.17037665211388042,0.28334026741309826,0.4129093160012379,6.997400040971115
LLE,0.010350584307178631,0.020070487850120573,0.031979224633648676,0.020070487850120573,0.02003338898163606,-0.029907056066161633,0.04688536864050532,0.0,0.0045753304549966956,0.3225187604913397,0.013881736737519326,0.0460183000738781,0.13298669142680283,11.80761875002645
PHATE,0.08180300500834725,0.15796698200704878,0.23233166388425153,0.15796698200704878,0.1580411797440178,0.5525542143219838,0.7853614704391769,0.029193045701356377,0.284276725066662,0.4000785508883673,0.43194478617505977,0.5493046786382542,0.6132822655524606,6.072406957973726
TriMap,0.004117974401780746,0.008384344277499536,0.01778890743832313,0.008384344277499536,0.008347245409015025,-0.05018808619206228,0.0,0.0,0.0005044510409659697,0.322418006569936,0.0006853610507430608,0.010783872332592722,0.09516403590219746,0.4761259580263868
PaCMAP,0.31942125765164164,0.48807271378222966,0.5578000370988685,0.48807271378222966,0.4882582081246522,0.19411768919799563,0.27106218722694786,0.2004738271184264,0.06963633827407312,0.6848164123925791,0.8609584265681568,0.8895398736220883,0.9694026617146395,0.18318291602190584
291 changes: 291 additions & 0 deletions src/barnes_hut.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,291 @@
//! Barnes-Hut tree for O(n log n) t-SNE gradient computation
//!
//! The Barnes-Hut algorithm approximates long-range forces by treating
//! distant groups of points as single points located at their center of mass.

use ndarray::{Array1, Array2};

/// Axis-aligned bounding box for spatial partitioning
#[derive(Clone, Debug)]
pub struct BoundingBox {
pub min_x: f64,
pub max_x: f64,
pub min_y: f64,
pub max_y: f64,
}

impl BoundingBox {
pub fn new(min_x: f64, max_x: f64, min_y: f64, max_y: f64) -> Self {
Self { min_x, max_x, min_y, max_y }
}

pub fn width(&self) -> f64 {
self.max_x - self.min_x
}

pub fn height(&self) -> f64 {
self.max_y - self.min_y
}

pub fn contains(&self, x: f64, y: f64) -> bool {
x >= self.min_x && x <= self.max_x && y >= self.min_y && y <= self.max_y
}

/// Get the quadrant (0-3) for a point relative to center
pub fn get_quadrant(&self, x: f64, y: f64) -> usize {
let cx = (self.min_x + self.max_x) / 2.0;
let cy = (self.min_y + self.max_y) / 2.0;

let mut quad = 0;
if x > cx { quad += 1; }
if y > cy { quad += 2; }
quad
}

/// Get bounding box for a specific quadrant
pub fn get_quadrant_bounds(&self, quadrant: usize) -> BoundingBox {
let cx = (self.min_x + self.max_x) / 2.0;
let cy = (self.min_y + self.max_y) / 2.0;

match quadrant {
0 => BoundingBox::new(self.min_x, cx, self.min_y, cy),
1 => BoundingBox::new(cx, self.max_x, self.min_y, cy),
2 => BoundingBox::new(self.min_x, cx, cy, self.max_y),
3 => BoundingBox::new(cx, self.max_x, cy, self.max_y),
_ => panic!("Invalid quadrant"),
}
}
}

/// Barnes-Hut quadtree node for 2D embeddings
pub struct QuadTreeNode {
/// Center of mass for all points in this node
pub center_of_mass: [f64; 2],
/// Total mass (number of points) in this node
pub total_mass: f64,
/// Bounding box for this node
pub bounds: BoundingBox,
/// Child nodes (None for leaf, Some for internal)
pub children: Option<Box<[Option<QuadTreeNode>; 4]>>,
/// Point index if this is a leaf with a single point
pub point_idx: Option<usize>,
}

impl QuadTreeNode {
/// Build a quadtree from a set of 2D points
pub fn build(points: &Array2<f64>) -> Self {
let n = points.nrows();
if n == 0 {
panic!("Cannot build tree from empty points");
}

// Find bounding box
let mut min_x = f64::INFINITY;
let mut max_x = f64::NEG_INFINITY;
let mut min_y = f64::INFINITY;
let mut max_y = f64::NEG_INFINITY;

for i in 0..n {
let x = points[[i, 0]];
let y = points[[i, 1]];
min_x = min_x.min(x);
max_x = max_x.max(x);
min_y = min_y.min(y);
max_y = max_y.max(y);
}

// Add small margin to avoid points on boundaries
let margin = 1e-5;
let bounds = BoundingBox::new(
min_x - margin,
max_x + margin,
min_y - margin,
max_y + margin,
);

// Build tree recursively
let mut root = QuadTreeNode {
center_of_mass: [0.0, 0.0],
total_mass: 0.0,
bounds,
children: None,
point_idx: None,
};

for i in 0..n {
root.insert(i, points[[i, 0]], points[[i, 1]]);
}

root
}

/// Insert a point into the tree
fn insert(&mut self, idx: usize, x: f64, y: f64) {
// Update center of mass incrementally
let new_mass = self.total_mass + 1.0;
self.center_of_mass[0] = (self.center_of_mass[0] * self.total_mass + x) / new_mass;
self.center_of_mass[1] = (self.center_of_mass[1] * self.total_mass + y) / new_mass;
self.total_mass = new_mass;

// If this is an empty leaf, just store the point
if self.total_mass == 1.0 {
self.point_idx = Some(idx);
return;
}

// If this is a leaf with one point, need to split
if self.children.is_none() {
self.subdivide();

// Reinsert the existing point
if let Some(old_idx) = self.point_idx.take() {
// Use the center of mass from before this insertion as the old point location
let old_x = self.center_of_mass[0] * self.total_mass - x;
let old_y = self.center_of_mass[1] * self.total_mass - y;
let old_quad = self.bounds.get_quadrant(old_x, old_y);
if let Some(ref mut children) = self.children {
if children[old_quad].is_none() {
children[old_quad] = Some(QuadTreeNode {
center_of_mass: [old_x, old_y],
total_mass: 1.0,
bounds: self.bounds.get_quadrant_bounds(old_quad),
children: None,
point_idx: Some(old_idx),
});
} else {
children[old_quad].as_mut().unwrap().insert(old_idx, old_x, old_y);
}
}
}
}

// Insert new point into appropriate quadrant
let quad = self.bounds.get_quadrant(x, y);
if let Some(ref mut children) = self.children {
if children[quad].is_none() {
children[quad] = Some(QuadTreeNode {
center_of_mass: [x, y],
total_mass: 1.0,
bounds: self.bounds.get_quadrant_bounds(quad),
children: None,
point_idx: Some(idx),
});
} else {
children[quad].as_mut().unwrap().insert(idx, x, y);
}
}
}

/// Subdivide this node into 4 quadrants
fn subdivide(&mut self) {
let mut children: [Option<QuadTreeNode>; 4] = [None, None, None, None];
self.children = Some(Box::new(children));
}

/// Compute repulsive forces using Barnes-Hut approximation
pub fn compute_non_edge_forces(
&self,
point: &[f64],
theta: f64,
point_idx: usize,
) -> [f64; 2] {
// Skip if this is the same point
if let Some(idx) = self.point_idx {
if idx == point_idx {
return [0.0, 0.0];
}
}

let dx = self.center_of_mass[0] - point[0];
let dy = self.center_of_mass[1] - point[1];
let dist_sq = dx * dx + dy * dy;

// If node is far enough away, use approximation
let node_size = self.bounds.width().max(self.bounds.height());
if node_size * node_size / dist_sq < theta * theta {
// Compute force from center of mass
let inv_dist = 1.0 / (1.0 + dist_sq);
let force_scalar = self.total_mass * inv_dist * inv_dist;
return [force_scalar * dx, force_scalar * dy];
}

// Otherwise, recurse into children
if let Some(ref children) = self.children {
let mut force = [0.0, 0.0];
for child in children.iter() {
if let Some(ref child_node) = child {
let child_force = child_node.compute_non_edge_forces(point, theta, point_idx);
force[0] += child_force[0];
force[1] += child_force[1];
}
}
return force;
}

// Leaf node with single point - compute exact force
if self.point_idx.is_some() && self.point_idx.unwrap() != point_idx {
let inv_dist = 1.0 / (1.0 + dist_sq);
let force_scalar = inv_dist * inv_dist;
return [force_scalar * dx, force_scalar * dy];
}

[0.0, 0.0]
}
}

#[cfg(test)]
mod tests {
use super::*;
use approx::assert_relative_eq;

#[test]
fn test_bounding_box() {
let bbox = BoundingBox::new(0.0, 10.0, 0.0, 10.0);

assert_eq!(bbox.width(), 10.0);
assert_eq!(bbox.height(), 10.0);

assert!(bbox.contains(5.0, 5.0));
assert!(!bbox.contains(-1.0, 5.0));
assert!(!bbox.contains(11.0, 5.0));

// Test quadrant determination
assert_eq!(bbox.get_quadrant(2.0, 2.0), 0);
assert_eq!(bbox.get_quadrant(7.0, 2.0), 1);
assert_eq!(bbox.get_quadrant(2.0, 7.0), 2);
assert_eq!(bbox.get_quadrant(7.0, 7.0), 3);
}

#[test]
fn test_quadtree_build() {
let points = Array2::from_shape_vec((4, 2), vec![
0.0, 0.0,
1.0, 0.0,
0.0, 1.0,
1.0, 1.0,
]).unwrap();

let tree = QuadTreeNode::build(&points);

assert_eq!(tree.total_mass, 4.0);
assert_relative_eq!(tree.center_of_mass[0], 0.5, epsilon = 1e-10);
assert_relative_eq!(tree.center_of_mass[1], 0.5, epsilon = 1e-10);
}

#[test]
fn test_barnes_hut_forces() {
let points = Array2::from_shape_vec((3, 2), vec![
0.0, 0.0,
10.0, 0.0,
5.0, 5.0,
]).unwrap();

let tree = QuadTreeNode::build(&points);

// Test force computation with high theta (more approximation)
let force = tree.compute_non_edge_forces(&[0.0, 0.0], 0.5, 0);

// Forces should be non-zero (repulsive from other points)
assert!(force[0] != 0.0 || force[1] != 0.0);
}
}
Loading
Loading