[mlpack-svn] r10744 - mlpack/trunk/src/mlpack/methods/kmeans

fastlab-svn at coffeetalk-1.cc.gatech.edu fastlab-svn at coffeetalk-1.cc.gatech.edu
Tue Dec 13 05:47:13 EST 2011


Author: rcurtin
Date: 2011-12-13 05:47:12 -0500 (Tue, 13 Dec 2011)
New Revision: 10744

Modified:
   mlpack/trunk/src/mlpack/methods/kmeans/allow_empty_clusters.hpp
   mlpack/trunk/src/mlpack/methods/kmeans/kmeans_impl.hpp
   mlpack/trunk/src/mlpack/methods/kmeans/max_variance_new_cluster.hpp
   mlpack/trunk/src/mlpack/methods/kmeans/max_variance_new_cluster_impl.hpp
Log:
Centroids should be sparse, too, if we want them to be.


Modified: mlpack/trunk/src/mlpack/methods/kmeans/allow_empty_clusters.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/kmeans/allow_empty_clusters.hpp	2011-12-13 10:21:18 UTC (rev 10743)
+++ mlpack/trunk/src/mlpack/methods/kmeans/allow_empty_clusters.hpp	2011-12-13 10:47:12 UTC (rev 10744)
@@ -39,7 +39,7 @@
   template<typename MatType>
   static size_t EmptyCluster(const MatType& data,
                              const size_t emptyCluster,
-                             const arma::mat& centroids,
+                             const MatType& centroids,
                              arma::Col<size_t>& clusterCounts,
                              arma::Col<size_t>& assignments)
   {

Modified: mlpack/trunk/src/mlpack/methods/kmeans/kmeans_impl.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/kmeans/kmeans_impl.hpp	2011-12-13 10:21:18 UTC (rev 10743)
+++ mlpack/trunk/src/mlpack/methods/kmeans/kmeans_impl.hpp	2011-12-13 10:47:12 UTC (rev 10744)
@@ -83,7 +83,7 @@
   }
 
   // Centroids of each cluster.  Each column corresponds to a centroid.
-  arma::mat centroids(data.n_rows, actualClusters);
+  MatType centroids(data.n_rows, actualClusters);
   // Counts of points in each cluster.
   arma::Col<size_t> counts(actualClusters);
   counts.zeros();

Modified: mlpack/trunk/src/mlpack/methods/kmeans/max_variance_new_cluster.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/kmeans/max_variance_new_cluster.hpp	2011-12-13 10:21:18 UTC (rev 10743)
+++ mlpack/trunk/src/mlpack/methods/kmeans/max_variance_new_cluster.hpp	2011-12-13 10:47:12 UTC (rev 10744)
@@ -40,7 +40,7 @@
   template<typename MatType>
   static size_t EmptyCluster(const MatType& data,
                              const size_t emptyCluster,
-                             const arma::mat& centroids,
+                             const MatType& centroids,
                              arma::Col<size_t>& clusterCounts,
                              arma::Col<size_t>& assignments);
 };

Modified: mlpack/trunk/src/mlpack/methods/kmeans/max_variance_new_cluster_impl.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/kmeans/max_variance_new_cluster_impl.hpp	2011-12-13 10:21:18 UTC (rev 10743)
+++ mlpack/trunk/src/mlpack/methods/kmeans/max_variance_new_cluster_impl.hpp	2011-12-13 10:47:12 UTC (rev 10744)
@@ -19,7 +19,7 @@
 template<typename MatType>
 size_t MaxVarianceNewCluster::EmptyCluster(const MatType& data,
                                            const size_t emptyCluster,
-                                           const arma::mat& centroids,
+                                           const MatType& centroids,
                                            arma::Col<size_t>& clusterCounts,
                                            arma::Col<size_t>& assignments)
 {
@@ -32,8 +32,8 @@
   // this is the sensible thing to do.
   for (size_t i = 0; i < data.n_cols; i++)
   {
-    arma::vec diff = data.col(i) - centroids.col(assignments[i]);
-    variances[assignments[i]] += var(diff);
+    variances[assignments[i]] += as_scalar(
+        var(data.col(i) - centroids.col(assignments[i])));
   }
 
   // Now find the cluster with maximum variance.
@@ -47,8 +47,8 @@
   {
     if (assignments[i] == maxVarCluster)
     {
-      arma::vec diff = data.col(i) - centroids.col(maxVarCluster);
-      double distance = var(diff);
+      double distance = as_scalar(
+          var(data.col(i) - centroids.col(maxVarCluster)));
 
       if (distance > maxDistance)
       {




More information about the mlpack-svn mailing list