Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
5347229
Don't override CXX, CFLAGS from environment.
adam-azarchs Jun 1, 2018
c0de07f
Optimize and clean up SPTree.
adam-azarchs Jun 1, 2018
94be39c
Stop carrying around top-node stuff in child nodes.
adam-azarchs Jun 1, 2018
ef51dc0
Namespace the internal implementation details.
adam-azarchs Jun 1, 2018
88c96ad
Propagate constness in some arguments.
adam-azarchs Jun 1, 2018
47eaa06
More sorting and correction of includes.
adam-azarchs Jun 1, 2018
82917dd
Fix some array init stuff.
adam-azarchs Jun 5, 2018
dc8199c
Factor out main.
adam-azarchs Jun 14, 2018
0019c9b
Use dynamic dispatch for choosing no_dims.
adam-azarchs Jun 14, 2018
930525e
Use a namespace instead of class.
adam-azarchs Jun 14, 2018
6885a19
Remove obsolete bh_sne_3d.
adam-azarchs Jun 14, 2018
6da8ab8
Break out io stuff into separate obj.
adam-azarchs Jun 20, 2018
c2a2ef1
Improve build/link.
adam-azarchs Jun 21, 2018
b266c94
Remove size from SPTreeNode.
adam-azarchs Jun 21, 2018
dedcafe
Trim the fat off VpTree.
adam-azarchs Jun 21, 2018
0ffa88d
Store child nodes in a contiguous array.
adam-azarchs Jun 21, 2018
8a5efce
Add alignment directives for better vectorization.
adam-azarchs Jun 22, 2018
2b02650
Factor theta into max_width_squared early on.
adam-azarchs Jun 22, 2018
aaf77bd
Replace more malloc with vector.
adam-azarchs Jun 25, 2018
58c2070
Apply consistent clang-format.
adam-azarchs Jun 25, 2018
c644302
Factor beta multiplication.
adam-azarchs Jun 27, 2018
90c3647
Add optimizer hints for hot paths.
adam-azarchs Jun 27, 2018
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
build:
build: tsne/bh_sne.pyx \
tsne/bh_sne_src/tsne.cpp \
tsne/bh_sne_src/sptree.cpp \
$(wildcard tsne/bh_sne_src/*.h)
python setup.py build_ext --inplace

install:
python setup.py build_ext --inplace
install: build
python setup.py install

sdist:
Expand Down
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ Requirements
* [numpy](numpy.scipy.org) > =1.7.1
* [scipy](http://www.scipy.org/) >= 0.12.0
* [cython](cython.org) >= 0.19.1
* [cblas](http://www.netlib.org/blas/) or [openblas](https://github.com/xianyi/OpenBLAS). Tested version is v0.2.5 and v0.2.6 (not necessary for OSX).

[Anaconda](http://continuum.io/downloads) is recommended.

Expand Down
20 changes: 6 additions & 14 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,27 +35,19 @@
ext_modules = [Extension(name='bh_sne',
sources=['tsne/bh_sne_src/sptree.cpp', 'tsne/bh_sne_src/tsne.cpp', 'tsne/bh_sne.pyx'],
include_dirs=[numpy.get_include(), 'tsne/bh_sne_src/'],
extra_compile_args=extra_compile_args + ['-ffast-math', '-O3'],
extra_link_args=['-Wl,-framework', '-Wl,Accelerate', '-lcblas'],
extra_compile_args=extra_compile_args + ['-ffast-math', '-O3', '-std=c++11'],
extra_link_args=['-Wl,-framework', '-Wl,Accelerate'],
language='c++')]

else:
# LINUX

ext_modules = [Extension(name='bh_sne',
sources=['tsne/bh_sne_src/sptree.cpp', 'tsne/bh_sne_src/tsne.cpp', 'tsne/bh_sne.pyx'],
include_dirs=[numpy.get_include(), '/usr/local/include', 'tsne/bh_sne_src/'],
library_dirs=['/usr/local/lib', '/usr/lib64/atlas'],
extra_compile_args=['-msse2', '-O3', '-fPIC', '-w', '-ffast-math'],
extra_link_args=['-Wl,-Bstatic', '-lcblas', '-Wl,-Bdynamic'],
language='c++'),

Extension(name='bh_sne_3d',
sources=['tsne/bh_sne_src/sptree.cpp', 'tsne/bh_sne_src/tsne.cpp', 'tsne/bh_sne_3d.pyx'],
include_dirs=[numpy.get_include(), '/usr/local/include', 'tsne/bh_sne_src/'],
library_dirs=['/usr/local/lib', '/usr/lib64/atlas'],
extra_compile_args=['-msse2', '-O3', '-fPIC', '-w', '-ffast-math', '-DTSNE3D'],
extra_link_args=['-Wl,-Bstatic', '-lcblas', '-Wl,-Bdynamic'],
include_dirs=[numpy.get_include(), 'tsne/bh_sne_src/'],
extra_compile_args=['-msse3', '-O3', '-fPIC', '-w', '-ffast-math', '-std=c++11',
'-ffunction-sections', '-flto', '-mtune=native'],
extra_link_args=['-O3', '-Wl,--gc-sections', '-flto', '-mtune=native'],
language='c++')]

ext_modules = cythonize(ext_modules)
Expand Down
10 changes: 1 addition & 9 deletions tsne/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@
import numpy as np
import scipy.linalg as la
import sys
from bh_sne import BH_SNE
from bh_sne_3d import BH_SNE_3D
from bh_sne import BH_SNE as tsne

def bh_sne(data, pca_d=None, d=2, perplexity=30., theta=0.5,
random_state=None, copy_data=False, init=None,
Expand Down Expand Up @@ -79,13 +78,6 @@ def bh_sne(data, pca_d=None, d=2, perplexity=30., theta=0.5,
if mom_switch_iter is None:
mom_switch_iter = 250

if d == 2:
tsne = BH_SNE()
elif d == 3:
tsne = BH_SNE_3D()
else:
raise Exception("TSNE dimensions must be 2 or 3")

Y = tsne.run(X, N, X.shape[1], d, perplexity, theta, seed, init=init, use_init=use_init,
max_iter=max_iter, stop_lying_iter=stop_lying_iter, mom_switch_iter=mom_switch_iter)
return Y
Expand Down
44 changes: 27 additions & 17 deletions tsne/bh_sne.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -4,25 +4,35 @@ cimport numpy as np
cimport cython
from libcpp cimport bool

cdef extern from "tsne.h":
cdef cppclass TSNE:
TSNE()
void run(double* X, int N, int D, double* Y, int no_dims, double perplexity, double theta, int rand_seed, bool skip_random_init, double *init, bool use_init, int max_iter, int stop_lying_iter, int mom_switch_iter)
cdef extern from "tsne.h" namespace "TSNE":
void c_run "TSNE::run" (double* X, int N, int D, double* Y, int no_dims,
double perplexity, double theta,
int rand_seed, bool skip_random_init,
double *init, bool use_init,
int max_iter, int stop_lying_iter, int mom_switch_iter) nogil

cdef class BH_SNE:
cdef TSNE* thisptr # hold a C++ instance

def __cinit__(self):
self.thisptr = new TSNE()

def __dealloc__(self):
del self.thisptr

@cython.boundscheck(False)
@cython.wraparound(False)
def run(self, X, N, D, d, perplexity, theta, seed, init, use_init, max_iter, stop_lying_iter, mom_switch_iter):
cdef np.ndarray[np.float64_t, ndim=2, mode='c'] _X = np.ascontiguousarray(X)
cdef np.ndarray[np.float64_t, ndim=2, mode='c'] _init = np.ascontiguousarray(init)
cdef np.ndarray[np.float64_t, ndim=2, mode='c'] Y = np.zeros((N, d), dtype=np.float64)
self.thisptr.run(&_X[0,0], N, D, &Y[0,0], d, perplexity, theta, seed, False, &_init[0,0], use_init, max_iter, stop_lying_iter, mom_switch_iter)
@staticmethod
def run(X, int N, int D, int d,
double perplexity, double theta,
int seed, init, bool use_init,
int max_iter, int stop_lying_iter, int mom_switch_iter):
cdef np.ndarray[np.float64_t, ndim=2, mode='c'] _X = np.ascontiguousarray(
X,
dtype=np.float64)
cdef np.ndarray[np.float64_t, ndim=2, mode='c'] _init = np.ascontiguousarray(
init,
dtype=np.float64)
cdef np.ndarray[np.float64_t, ndim=2, mode='c'] Y = np.zeros(
(N, d),
dtype=np.float64,
order='C')
assert(N, X.shape[1])
with nogil:
c_run(&_X[0,0], N, D, &Y[0,0], d,
perplexity, theta,
seed, False, &_init[0,0], use_init,
max_iter, stop_lying_iter, mom_switch_iter)
return Y
28 changes: 0 additions & 28 deletions tsne/bh_sne_3d.pyx

This file was deleted.

3 changes: 3 additions & 0 deletions tsne/bh_sne_src/.clang-format
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
BasedOnStyle: Google
IndentWidth: 4
AllowShortIfStatementsOnASingleLine: false
27 changes: 15 additions & 12 deletions tsne/bh_sne_src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,28 @@
#CFLAGS = -march=haswell -ffast-math -O3 -Rpass=loop-vectorize -Rpass-missed=loop-vectorize -Rpass-analysis=loop-vectorize
#CFLAGS = -march=haswell -ffast-math -O3

CXX = g++
CFLAGS = -ffast-math -O3
CXX ?= g++
CFLAGS += -std=c++11 -ffast-math -O3 -flto

all: bh_tsne bh_tsne_3d
CFLAGS += -ffunction-sections
LDFLAGS += -Wl,--gc-sections

bh_tsne: tsne.o sptree.o
$(CXX) $(CFLAGS) tsne.o sptree.o -o bh_tsne
all: bh_tsne

bh_tsne_3d: tsne_3d.o sptree.o
$(CXX) $(CFLAGS) tsne_3d.o sptree.o -o bh_tsne_3d
bh_tsne: main.o io.o tsne.o sptree.o
$(CXX) $(CFLAGS) $(LDFLAGS) $^ -o $@

sptree.o: sptree.cpp sptree.h
$(CXX) $(CFLAGS) -c sptree.cpp
$(CXX) $(CFLAGS) -c $< -o $@

tsne.o: tsne.cpp tsne.h sptree.h vptree.h
$(CXX) $(CFLAGS) -c tsne.cpp
$(CXX) $(CFLAGS) -c $< -o $@

tsne_3d.o: tsne.cpp tsne.h sptree.h vptree.h
$(CXX) $(CFLAGS) -DTSNE3D -c tsne.cpp
main.o: main.cpp tsne.h sptree.h vptree.h
$(CXX) $(CFLAGS) -c $< -o $@

io.o: io.cpp tsne.h
$(CXX) $(CFLAGS) -c $< -o $@

clean:
rm -Rf *.o bh_tsne bh_tsne_3d
rm -Rf *.o bh_tsne
103 changes: 103 additions & 0 deletions tsne/bh_sne_src/io.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
/*
*
* Copyright (c) 2014, Laurens van der Maaten (Delft University of Technology)
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the Delft University of Technology.
* 4. Neither the name of the Delft University of Technology nor the names of
* its contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY LAURENS VAN DER MAATEN ''AS IS'' AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
* EVENT SHALL LAURENS VAN DER MAATEN BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
* OF SUCH DAMAGE.
*
*/

#include <cstdio>
#include <cstdlib>
#include <fstream>

#include "tsne.h"

namespace TSNE {

// Function that loads data from a t-SNE file
// Note: this function does a malloc that should be freed elsewhere
bool load_data(const char* dat_file, double** data, int* n, int* d, int* no_dims, double* theta, double* perplexity, int* rand_seed, int* max_iter) {

// Open file, read first 2 integers, allocate memory, and read the data
FILE *h;
if((h = fopen(dat_file, "r+b")) == NULL) {
fprintf(stderr,"Error: could not open data file.\n");
return false;
}
fread(n, sizeof(int), 1, h); // number of datapoints
fread(d, sizeof(int), 1, h); // original dimensionality
fread(theta, sizeof(double), 1, h); // gradient accuracy
fread(perplexity, sizeof(double), 1, h); // perplexity
fread(no_dims, sizeof(int), 1, h); // output dimensionality
fread(max_iter, sizeof(int),1,h); // maximum number of iterations
*data = (double*) malloc(*d * *n * sizeof(double));
if(*data == NULL) { fprintf(stderr,"Memory allocation failed!\n"); exit(1); }
fread(*data, sizeof(double), *n * *d, h); // the data
if(!feof(h)) fread(rand_seed, sizeof(int), 1, h); // random seed
fclose(h);
fprintf(stderr,"Read the %i x %i data matrix successfully!\n", *n, *d);
return true;
}

// Function that saves map to a t-SNE file
void save_data(const char* res_file, double* data, int* landmarks, double* costs, int n, int d) {

// Open file, write first 2 integers and then the data
FILE *h;
if((h = fopen(res_file, "w+b")) == NULL) {
fprintf(stderr,"Error: could not open data file.\n");
return;
}
fwrite(&n, sizeof(int), 1, h);
fwrite(&d, sizeof(int), 1, h);
fwrite(data, sizeof(double), n * d, h);
fwrite(landmarks, sizeof(int), n, h);
fwrite(costs, sizeof(double), n, h);
fclose(h);
fprintf(stderr,"Wrote the %i x %i data matrix successfully!\n", n, d);
}

void save_csv(const char* csv_file, double* Y, int N, int D) {
std::ofstream csv(csv_file);

for (int d = 0; d < D; d++) {
csv << "TSNE" << d+1 << ",";
}
csv << "\n";

for (int n = 0; n < N; n++) {
int row_offset = n * D;
for (int d = 0; d < D; d++) {
csv << Y[row_offset + d] << ",";
}
csv << "\n";
}

csv.close();
}

} // namespace TSNE
80 changes: 80 additions & 0 deletions tsne/bh_sne_src/main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
/*
*
* Copyright (c) 2014, Laurens van der Maaten (Delft University of Technology)
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the Delft University of Technology.
* 4. Neither the name of the Delft University of Technology nor the names of
* its contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY LAURENS VAN DER MAATEN ''AS IS'' AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
* EVENT SHALL LAURENS VAN DER MAATEN BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
* OF SUCH DAMAGE.
*
*/


#include "tsne.h"

#include <cstdio>
#include <cstdlib>
#include <vector>

using std::fprintf;
using std::free;
using std::vector;

// Function that runs the Barnes-Hut implementation of t-SNE
int main(int argc, char *argv[]) {

// load input and output
const char *dat_file = "data.dat";
const char *res_file = "result.dat";
if (argc > 1) {
dat_file = argv[1];
res_file = argv[2];
}

// Define some variables
int origN, N, D, no_dims, max_iter, *landmarks;
double perc_landmarks;
double perplexity, theta, *data;
int rand_seed = -1;

// Read the parameters and the dataset
if(TSNE::load_data(dat_file, &data, &origN, &D, &no_dims, &theta, &perplexity, &rand_seed, &max_iter)) {

// Make dummy landmarks
N = origN;

// Now fire up the SNE implementation
vector<double> Y(N * no_dims);
TSNE::run(data, N, D, Y.data(), no_dims, perplexity, theta, rand_seed, false, NULL, false, max_iter);

// Save the results
vector<int> landmarks(N);
for(int n = 0; n < N; n++) landmarks[n] = n;
vector<double> costs(N);
TSNE::save_data(res_file, Y.data(), landmarks.data(), costs.data(), N, no_dims);

// Clean up the memory
free(data); data = NULL;
}
}
Loading