diff --git a/milk/tests/test_kmeans.py b/milk/tests/test_kmeans.py index 4edbc02..38e55b3 100644 --- a/milk/tests/test_kmeans.py +++ b/milk/tests/test_kmeans.py @@ -54,6 +54,6 @@ def test_kmeans_return_partial(): assignments,centroids = milk.unsupervised.kmeans(features, 2, R=129) centroids_ = milk.unsupervised.kmeans(features, 2, R=129, return_assignments=False) assignments_ = milk.unsupervised.kmeans(features, 2, R=129, return_centroids=False) - assert np.all(centroids == centroids_) assert np.all(assignments == assignments_) + assert np.allclose(centroids, centroids_) diff --git a/milk/unsupervised/_kmeans.cpp b/milk/unsupervised/_kmeans.cpp index 1902159..b6939e4 100644 --- a/milk/unsupervised/_kmeans.cpp +++ b/milk/unsupervised/_kmeans.cpp @@ -20,8 +20,79 @@ void assert_type_contiguous(PyArrayObject* array,int type) { } template -int computecentroids(ftype* f, ftype* centroids, PyArrayObject* a_assignments, PyArrayObject* a_counts, const int N, const int Nf, const int k) { +bool assignclass_euclidian(ftype* f, ftype* centroids, PyArrayObject* a_assignments, const int N, const int Nf, const int k) { + Py_BEGIN_ALLOW_THREADS + npy_int32* assignments = static_cast(PyArray_DATA(a_assignments)); + + #pragma parallel for schedule(static) + for (int i=0; i < N; i++) { + int best_cluster = -1; + ftype min_distance = 0.0; + for (int c=0; c < k; c++) { + ftype cur_distance = 0.0; + for (int j=0; j < Nf; j++) { + ftype term = (f[i * Nf + j] - centroids[c * Nf + j]); + cur_distance += term * term; + } + if (best_cluster == -1 || cur_distance < min_distance) { + min_distance = cur_distance; + best_cluster = c; + } + } + assignments[i] = 1; //best_cluster; + } + return true; + Py_END_ALLOW_THREADS +} + +PyObject* py_assignclass_euclidian(PyObject* self, PyObject* args) { + try { + PyArrayObject* fmatrix; + PyArrayObject* centroids; + PyArrayObject* assignments; + if (!PyArg_ParseTuple(args, "OOO", &fmatrix, ¢roids, &assignments)) { throw Kmeans_Exception("Wrong number of arguments for assignclass_euclidian."); } + if (!PyArray_Check(fmatrix) || !PyArray_ISCONTIGUOUS(fmatrix)) throw Kmeans_Exception("fmatrix not what was expected."); + if (!PyArray_Check(centroids) || !PyArray_ISCONTIGUOUS(centroids)) throw Kmeans_Exception("centroids not what was expected."); + if (!PyArray_Check(assignments) || !PyArray_ISCONTIGUOUS(assignments)) throw Kmeans_Exception("assignments not what was expected."); + if (PyArray_TYPE(assignments) != NPY_INT32) throw Kmeans_Exception("assignments should be int32."); + if (PyArray_TYPE(fmatrix) != PyArray_TYPE(centroids)) throw Kmeans_Exception("centroids and fmatrix should have same type."); + if (PyArray_NDIM(fmatrix) != 2) throw Kmeans_Exception("fmatrix should be two dimensional"); + if (PyArray_NDIM(centroids) != 2) throw Kmeans_Exception("centroids should be two dimensional"); + if (PyArray_NDIM(assignments) != 1) throw Kmeans_Exception("assignments should be two dimensional"); + + const int N = PyArray_DIM(fmatrix, 0); + const int Nf = PyArray_DIM(fmatrix, 1); + const int k = PyArray_DIM(centroids, 0); + if (PyArray_DIM(centroids, 1) != Nf) throw Kmeans_Exception("centroids has wrong number of features."); + if (PyArray_DIM(assignments, 0) != N) throw Kmeans_Exception("assignments has wrong size."); + switch (PyArray_TYPE(fmatrix)) { +#define TRY_TYPE_ASSIGN(code, ftype) \ + case code: \ + if (assignclass_euclidian( \ + static_cast(PyArray_DATA(fmatrix)), \ + static_cast(PyArray_DATA(centroids)), \ + assignments, \ + N, Nf, k)) { \ + Py_RETURN_TRUE; \ + } \ + Py_RETURN_FALSE; + + TRY_TYPE_ASSIGN(NPY_FLOAT, float); + TRY_TYPE_ASSIGN(NPY_DOUBLE, double); + } + throw Kmeans_Exception("Cannot handle this type."); + } catch (const Kmeans_Exception& exc) { + PyErr_SetString(PyExc_RuntimeError,exc.msg); + return 0; + } catch (...) { + PyErr_SetString(PyExc_RuntimeError,"Some sort of exception in assignclass_euclidian."); + return 0; + } +} + +template +int computecentroids(ftype* f, ftype* centroids, PyArrayObject* a_assignments, PyArrayObject* a_counts, const int N, const int Nf, const int k) { int zero_counts = 0; Py_BEGIN_ALLOW_THREADS const npy_int32* assignments = static_cast(PyArray_DATA(a_assignments)); @@ -30,16 +101,42 @@ int computecentroids(ftype* f, ftype* centroids, PyArrayObject* a_assignments, P std::fill(counts, counts + k, 0); std::fill(centroids, centroids + k*Nf, 0.0); - for (int i = 0; i != N; i++){ - const int c = assignments[i]; - if (c >= k) continue; // throw Kmeans_Exception("wrong value in assignments"); - ftype* ck = centroids + Nf*c; - for (int j = 0; j != Nf; ++j) { - *ck++ += *f++; + #pragma omp parallel shared(assignments, counts, centroids, f) + { + int *local_counts = (int*) malloc(k * sizeof(int)); + ftype *local_ck = (ftype*) malloc(k * Nf * sizeof(ftype)); + std::fill(local_counts, local_counts + k, 0); + std::fill(local_ck, local_ck + k*Nf, ftype()); + + #pragma omp for schedule(static) + for (int i = 0; i < N; i++){ + const int c = assignments[i]; + if (c >= k) continue; // throw Kmeans_Exception("wrong value in assignments"); + ftype* lck = local_ck + Nf*c; + for (int j = 0; j != Nf; ++j) { + *lck++ += f[i*Nf + j]; + } + ++local_counts[c]; } - ++counts[c]; + + ftype* ck = centroids; + ftype* lck = local_ck; + for (int c=0; c < k; c++) { + #pragma omp critical(reduce) + { + for (int j = 0; j != Nf; ++j) { + *ck++ += *lck++; + } + counts[c] += local_counts[c]; + } + } + + free(local_counts); + free(local_ck); } - for (int i = 0; i != k; ++i) { + + #pragma omp parallel for reduction(+:zero_counts) schedule(static) shared(counts,centroids) + for (int i = 0; i < k; ++i) { ftype* ck = centroids + Nf*i; const ftype c = counts[i]; if (c == 0) { @@ -80,7 +177,7 @@ PyObject* py_computecentroids(PyObject* self, PyObject* args) { if (PyArray_DIM(assignments, 0) != N) throw Kmeans_Exception("assignments has wrong size."); if (PyArray_DIM(counts, 0) != k) throw Kmeans_Exception("counts has wrong size."); switch (PyArray_TYPE(fmatrix)) { -#define TRY_TYPE(code, ftype) \ +#define TRY_TYPE_CENTROIDS(code, ftype) \ case code: \ if (computecentroids( \ static_cast(PyArray_DATA(fmatrix)), \ @@ -92,8 +189,8 @@ PyObject* py_computecentroids(PyObject* self, PyObject* args) { } \ Py_RETURN_FALSE; - TRY_TYPE(NPY_FLOAT, float); - TRY_TYPE(NPY_DOUBLE, double); + TRY_TYPE_CENTROIDS(NPY_FLOAT, float); + TRY_TYPE_CENTROIDS(NPY_DOUBLE, double); } throw Kmeans_Exception("Cannot handle this type."); } catch (const Kmeans_Exception& exc) { @@ -107,6 +204,7 @@ PyObject* py_computecentroids(PyObject* self, PyObject* args) { PyMethodDef methods[] = { {"computecentroids", py_computecentroids, METH_VARARGS , "Do NOT call directly.\n" }, + {"assignclass_euclidian", py_assignclass_euclidian, METH_VARARGS , "Do NOT call directly.\n" }, {NULL, NULL,0,NULL}, }; diff --git a/milk/unsupervised/kmeans.py b/milk/unsupervised/kmeans.py index d8bf270..a1324a9 100644 --- a/milk/unsupervised/kmeans.py +++ b/milk/unsupervised/kmeans.py @@ -219,13 +219,13 @@ def kmeans(fmatrix, k, distance='euclidean', max_iter=1000, R=None, return_assig fmatrix = zscore(fmatrix) distance = 'euclidean' if distance == 'euclidean': - def distfunction(fmatrix, cs, dists): - dists = _dot3(fmatrix, (-2)*cs.T, dists) + def distfunction(fmatrix, cs, _): + dists = _dot3(fmatrix, (-2)*cs.T, None) dists += np.array([np.dot(c,c) for c in cs]) # For a distance, we'd need to add the fmatrix**2 components, but # it doesn't matter because we are going to perform an argmin() on # the result. - return dists + return dists.argmin(1) elif distance == 'mahalanobis': icov = kwargs.get('icov', None) if icov is None: @@ -234,13 +234,18 @@ def distfunction(fmatrix, cs, dists): covmat = np.cov(fmatrix.T) icov = linalg.inv(covmat) def distfunction(fmatrix, cs, _): - return np.array([_mahalanobis2(fmatrix, c, icov) for c in cs]).T + dists = np.array([_mahalanobis2(fmatrix, c, icov) for c in cs]).T + return dists.argmin(1) else: raise ValueError('milk.unsupervised.kmeans: `distance` argument unknown (%s)' % distance) if k < 2: raise ValueError('milk.unsupervised.kmeans `k` should be >= 2.') if fmatrix.dtype in (np.float32, np.float64) and fmatrix.flags['C_CONTIGUOUS']: computecentroids = _kmeans.computecentroids + if distance == 'euclidian': + def distfunction(fmatrix, centroids, assignments): + _kmeans.assignclass_euclidian(fmatrix, centroids, assignments) + return assignments else: computecentroids = _pycomputecentroids @@ -256,19 +261,16 @@ def distfunction(fmatrix, cs, _): prev = np.zeros(len(fmatrix), np.int32) counts = np.empty(k, np.int32) - dists = None + assignments = np.empty((fmatrix.shape[0],), dtype=np.int32) for i in xrange(max_iter): - dists = distfunction(fmatrix, centroids, dists) - assignments = dists.argmin(1) + assignments = distfunction(fmatrix, centroids, assignments) if np.all(assignments == prev): break - if computecentroids(fmatrix, centroids, assignments.astype(np.int32), counts): + if computecentroids(fmatrix, centroids, assignments.astype(np.int32, copy=False), counts): (empty,) = np.where(counts == 0) centroids = np.delete(centroids, empty, axis=0) k = len(centroids) counts = np.empty(k, np.int32) - # This will cause new matrices to be allocated in the next iteration - dists = None prev[:] = assignments if return_centroids and return_assignments: return assignments, centroids diff --git a/setup.py b/setup.py index 911eeac..7e0b1d2 100644 --- a/setup.py +++ b/setup.py @@ -57,7 +57,8 @@ 'milk.supervised._lasso' : ['milk/supervised/_lasso.cpp'], } -compiler_args = ['-std=c++0x'] +compiler_args = ['-std=c++0x', '-fopenmp'] +link_args=['-lgomp'] if platform.system() == 'Darwin': compiler_args.append('-stdlib=libc++') @@ -67,6 +68,7 @@ undef_macros=undef_macros, define_macros=define_macros, extra_compile_args=compiler_args, + extra_link_args=link_args, ) for key,sources in _extensions.items() ]