<p>I seem to be getting much faster clustering from armadillo's <a href="http://armasourceforgenet/docshtml#kmeans">kmeans()</a> function in comparison to mlpack's kmeans::KMeans<> class, which is about 2x to 6x slower Using the latest mlpack code from the git repo Am I doing something wrong?</p>
<p>Using the code below I get the following timings on my machine (Intel i5, 64 bit, g++ version 53)</p>
<p>Compiled without openmp:<br>
<code>g++ kmeans_testcpp -o kmeans_test -O3 -std=c++11 -larmadillo -lmlpack</code><br>
mlpack_kmeans time: 173024<br>
arma::kmeans time: 916399</p>
<p>Compiled with openmp:<br>
<code>g++ kmeans_testcpp -o kmeans_test -O3 -std=c++11 -larmadillo -lmlpack -fopenmp</code><br>
mlpack_kmeans time: 177575<br>
arma::kmeans time: 287675</p>
<pre><code>#include <fstream>
#include <mlpack/methods/kmeans/kmeanshpp>
#include <armadillo>
int main() {
arma::uword dims = 20; // number of dimensions
arma::uword samples = 5000000;
arma::uword max_iterations = 10;
arma::uword k = 10; // number of clusters
arma::arma_rng::set_seed_random(); // random start
std::cout << "Generating some synthetic data " << std::endl;
arma::mat data(dims, samples, arma::fill::zeros);
// generate data with unique centroids, added with a small amount of noise
for (arma::uword i=0; i<samples; i++) {
arma::uword c = as_scalar( arma::randi<arma::uvec>(1, arma::distr_param(0,k-1)) );
datacol(i) = arma::linspace<arma::vec>(c, c+dims-1, dims) + 025*arma::randn<arma::vec>(dims);
}
arma::wall_clock timer;
std::cout << "mlpack_kmeans start " << std::endl;
arma::Row<size_t> mlpack_assignments;
arma::mat mlpack_centroids;
mlpack::kmeans::KMeans<> mlpack_kmeans(max_iterations);
timertic();
mlpack_kmeansCluster(data, k, mlpack_assignments, mlpack_centroids);
std::cout << "mlpack_kmeans time: " << timertoc() << std::endl;
std::cout << "---" << std::endl;
std::cout << "arma::kmeans start " << std::endl;
arma::mat arma_centroids;
timertic();
arma::kmeans(arma_centroids, data, k, arma::random_subset, max_iterations, false);
std::cout << "arma::kmeans time: " << timertoc() << std::endl;
std::cout << "---" << std::endl;
mlpack_centroidsprint("mlpack_centroids:");
arma_centroidsprint("arma_centroids:");
return 0;
}
</code></pre>
<p style="font-size:small;-webkit-text-size-adjust:none;color:#666;">—<br>Reply to this email directly or <a href="https://github.com/mlpack/mlpack/issues/514">view it on GitHub</a>.<img alt="" height="1" src="https://github.com/notifications/beacon/AJ4bFFKN8e2eh5KGGfeD8N6ZUU_BAJZQks5pgFgMgaJpZM4HRP0A.gif" width="1" /></p>
<div itemscope itemtype="http://schema.org/EmailMessage">
<div itemprop="action" itemscope itemtype="http://schema.org/ViewAction">
<link itemprop="url" href="https://github.com/mlpack/mlpack/issues/514"></link>
<meta itemprop="name" content="View Issue"></meta>
</div>
<meta itemprop="description" content="View this Issue on GitHub"></meta>
</div>