[mlpack-git] master: Add Serialize() to KMeans and related classes. (73f01e6)

gitdub at big.cc.gt.atl.ga.us gitdub at big.cc.gt.atl.ga.us
Mon Jul 13 04:04:50 EDT 2015


Repository : https://github.com/mlpack/mlpack

On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/8b2ca720828224607c70d2b539c43aecf8f4ec32...b4659b668021db631b3c8a48e3d735b513706fdc

>---------------------------------------------------------------

commit 73f01e6a8c412a51df2b8ee16e76cc8cf7de322a
Author: Ryan Curtin <ryan at ratml.org>
Date:   Sun Jul 12 13:29:13 2015 +0000

    Add Serialize() to KMeans and related classes.


>---------------------------------------------------------------

73f01e6a8c412a51df2b8ee16e76cc8cf7de322a
 src/mlpack/methods/kmeans/allow_empty_clusters.hpp     |  4 ++++
 src/mlpack/methods/kmeans/kmeans.hpp                   |  8 ++++++--
 src/mlpack/methods/kmeans/kmeans_impl.hpp              | 18 ++++++++++++++++++
 src/mlpack/methods/kmeans/max_variance_new_cluster.hpp |  4 ++++
 .../methods/kmeans/max_variance_new_cluster_impl.hpp   | 15 +++++++++++++++
 src/mlpack/methods/kmeans/random_partition.hpp         |  8 ++++++--
 src/mlpack/methods/kmeans/refined_start.hpp            | 12 ++++++++++--
 7 files changed, 63 insertions(+), 6 deletions(-)

diff --git a/src/mlpack/methods/kmeans/allow_empty_clusters.hpp b/src/mlpack/methods/kmeans/allow_empty_clusters.hpp
index 2fb4534..40ca0b1 100644
--- a/src/mlpack/methods/kmeans/allow_empty_clusters.hpp
+++ b/src/mlpack/methods/kmeans/allow_empty_clusters.hpp
@@ -53,6 +53,10 @@ class AllowEmptyClusters
     // Empty clusters are okay!  Do nothing.
     return 0;
   }
+
+  //! Serialize the empty cluster policy (nothing to do).
+  template<typename Archive>
+  void Serialize(Archive& /* ar */, const unsigned int /* version */) { }
 };
 
 } // namespace kmeans
diff --git a/src/mlpack/methods/kmeans/kmeans.hpp b/src/mlpack/methods/kmeans/kmeans.hpp
index fd69ed8..881b19f 100644
--- a/src/mlpack/methods/kmeans/kmeans.hpp
+++ b/src/mlpack/methods/kmeans/kmeans.hpp
@@ -171,6 +171,10 @@ class KMeans
   // Returns a string representation of this object.
   std::string ToString() const;
 
+  //! Serialize the k-means object.
+  template<typename Archive>
+  void Serialize(Archive& ar, const unsigned int version);
+
  private:
   //! Maximum number of iterations before giving up.
   size_t maxIterations;
@@ -182,8 +186,8 @@ class KMeans
   EmptyClusterPolicy emptyClusterAction;
 };
 
-}; // namespace kmeans
-}; // namespace mlpack
+} // namespace kmeans
+} // namespace mlpack
 
 // Include implementation.
 #include "kmeans_impl.hpp"
diff --git a/src/mlpack/methods/kmeans/kmeans_impl.hpp b/src/mlpack/methods/kmeans/kmeans_impl.hpp
index 16ba7e5..8475d33 100644
--- a/src/mlpack/methods/kmeans/kmeans_impl.hpp
+++ b/src/mlpack/methods/kmeans/kmeans_impl.hpp
@@ -285,5 +285,23 @@ std::string KMeans<MetricType,
   return convert.str();
 }
 
+template<typename MetricType,
+         typename InitialPartitionPolicy,
+         typename EmptyClusterPolicy,
+         template<class, class> class LloydStepType,
+         typename MatType>
+template<typename Archive>
+void KMeans<MetricType,
+            InitialPartitionPolicy,
+            EmptyClusterPolicy,
+            LloydStepType,
+            MatType>::Serialize(Archive& ar, const unsigned int /* version */)
+{
+  ar & data::CreateNVP(maxIterations, "max_iterations");
+  ar & data::CreateNVP(metric, "metric");
+  ar & data::CreateNVP(partitioner, "partitioner");
+  ar & data::CreateNVP(emptyClusterAction, "emptyClusterAction");
+}
+
 }; // namespace kmeans
 }; // namespace mlpack
diff --git a/src/mlpack/methods/kmeans/max_variance_new_cluster.hpp b/src/mlpack/methods/kmeans/max_variance_new_cluster.hpp
index ad962fc..0d91523 100644
--- a/src/mlpack/methods/kmeans/max_variance_new_cluster.hpp
+++ b/src/mlpack/methods/kmeans/max_variance_new_cluster.hpp
@@ -49,6 +49,10 @@ class MaxVarianceNewCluster
                       MetricType& metric,
                       const size_t iteration);
 
+  //! Serialize the object.
+  template<typename Archive>
+  void Serialize(Archive& ar, const unsigned int version);
+
  private:
   //! Index of iteration for which variance is cached.
   size_t iteration;
diff --git a/src/mlpack/methods/kmeans/max_variance_new_cluster_impl.hpp b/src/mlpack/methods/kmeans/max_variance_new_cluster_impl.hpp
index c87200d..b6090ae 100644
--- a/src/mlpack/methods/kmeans/max_variance_new_cluster_impl.hpp
+++ b/src/mlpack/methods/kmeans/max_variance_new_cluster_impl.hpp
@@ -75,6 +75,21 @@ size_t MaxVarianceNewCluster::EmptyCluster(const MatType& data,
   return 1; // We only changed one point.
 }
 
+//! Serialize the object.
+template<typename Archive>
+void MaxVarianceNewCluster::Serialize(Archive& /* ar */,
+                                      const unsigned int /* version */)
+{
+  // Serialization is useless here, because the only thing we store is
+  // precalculated quantities, and if we're serializing, our precalculations are
+  // likely to be useless when we deserialize (because the user will be running
+  // a different clustering, probably).  So there is no need to store anything,
+  // and if we are loading, we just reset the assignments array so
+  // precalculation will happen next time EmptyCluster() is called.
+  if (Archive::is_loading::value)
+    assignments.set_size(0);
+}
+
 template<typename MetricType, typename MatType>
 void MaxVarianceNewCluster::Precalculate(const MatType& data,
                                          const arma::mat& oldCentroids,
diff --git a/src/mlpack/methods/kmeans/random_partition.hpp b/src/mlpack/methods/kmeans/random_partition.hpp
index 729295d..34e4f49 100644
--- a/src/mlpack/methods/kmeans/random_partition.hpp
+++ b/src/mlpack/methods/kmeans/random_partition.hpp
@@ -44,9 +44,13 @@ class RandomPartition
     assignments = arma::shuffle(arma::linspace<arma::Col<size_t> >(0,
         (clusters - 1), data.n_cols));
   }
-};
 
+  //! Serialize the partitioner (nothing to do).
+  template<typename Archive>
+  void Serialize(Archive& /* ar */, const unsigned int /* version */) { }
 };
-};
+
+}
+}
 
 #endif
diff --git a/src/mlpack/methods/kmeans/refined_start.hpp b/src/mlpack/methods/kmeans/refined_start.hpp
index ad6408d..39c431c 100644
--- a/src/mlpack/methods/kmeans/refined_start.hpp
+++ b/src/mlpack/methods/kmeans/refined_start.hpp
@@ -66,6 +66,14 @@ class RefinedStart
   //! Modify the percentage of the data used by each subsampling.
   double& Percentage() { return percentage; }
 
+  //! Serialize the object.
+  template<typename Archive>
+  void Serialize(Archive& ar, const unsigned int /* version */)
+  {
+    ar & data::CreateNVP(samplings, "samplings");
+    ar & data::CreateNVP(percentage, "percentage");
+  }
+
  private:
   //! The number of samplings to perform.
   size_t samplings;
@@ -73,8 +81,8 @@ class RefinedStart
   double percentage;
 };
 
-}; // namespace kmeans
-}; // namespace mlpack
+} // namespace kmeans
+} // namespace mlpack
 
 // Include implementation.
 #include "refined_start_impl.hpp"



More information about the mlpack-git mailing list