[mlpack-git] master: Add Serialize() to KMeans and related classes. (73f01e6)
gitdub at big.cc.gt.atl.ga.us
gitdub at big.cc.gt.atl.ga.us
Mon Jul 13 04:04:50 EDT 2015
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/8b2ca720828224607c70d2b539c43aecf8f4ec32...b4659b668021db631b3c8a48e3d735b513706fdc
>---------------------------------------------------------------
commit 73f01e6a8c412a51df2b8ee16e76cc8cf7de322a
Author: Ryan Curtin <ryan at ratml.org>
Date: Sun Jul 12 13:29:13 2015 +0000
Add Serialize() to KMeans and related classes.
>---------------------------------------------------------------
73f01e6a8c412a51df2b8ee16e76cc8cf7de322a
src/mlpack/methods/kmeans/allow_empty_clusters.hpp | 4 ++++
src/mlpack/methods/kmeans/kmeans.hpp | 8 ++++++--
src/mlpack/methods/kmeans/kmeans_impl.hpp | 18 ++++++++++++++++++
src/mlpack/methods/kmeans/max_variance_new_cluster.hpp | 4 ++++
.../methods/kmeans/max_variance_new_cluster_impl.hpp | 15 +++++++++++++++
src/mlpack/methods/kmeans/random_partition.hpp | 8 ++++++--
src/mlpack/methods/kmeans/refined_start.hpp | 12 ++++++++++--
7 files changed, 63 insertions(+), 6 deletions(-)
diff --git a/src/mlpack/methods/kmeans/allow_empty_clusters.hpp b/src/mlpack/methods/kmeans/allow_empty_clusters.hpp
index 2fb4534..40ca0b1 100644
--- a/src/mlpack/methods/kmeans/allow_empty_clusters.hpp
+++ b/src/mlpack/methods/kmeans/allow_empty_clusters.hpp
@@ -53,6 +53,10 @@ class AllowEmptyClusters
// Empty clusters are okay! Do nothing.
return 0;
}
+
+ //! Serialize the empty cluster policy (nothing to do).
+ template<typename Archive>
+ void Serialize(Archive& /* ar */, const unsigned int /* version */) { }
};
} // namespace kmeans
diff --git a/src/mlpack/methods/kmeans/kmeans.hpp b/src/mlpack/methods/kmeans/kmeans.hpp
index fd69ed8..881b19f 100644
--- a/src/mlpack/methods/kmeans/kmeans.hpp
+++ b/src/mlpack/methods/kmeans/kmeans.hpp
@@ -171,6 +171,10 @@ class KMeans
// Returns a string representation of this object.
std::string ToString() const;
+ //! Serialize the k-means object.
+ template<typename Archive>
+ void Serialize(Archive& ar, const unsigned int version);
+
private:
//! Maximum number of iterations before giving up.
size_t maxIterations;
@@ -182,8 +186,8 @@ class KMeans
EmptyClusterPolicy emptyClusterAction;
};
-}; // namespace kmeans
-}; // namespace mlpack
+} // namespace kmeans
+} // namespace mlpack
// Include implementation.
#include "kmeans_impl.hpp"
diff --git a/src/mlpack/methods/kmeans/kmeans_impl.hpp b/src/mlpack/methods/kmeans/kmeans_impl.hpp
index 16ba7e5..8475d33 100644
--- a/src/mlpack/methods/kmeans/kmeans_impl.hpp
+++ b/src/mlpack/methods/kmeans/kmeans_impl.hpp
@@ -285,5 +285,23 @@ std::string KMeans<MetricType,
return convert.str();
}
+template<typename MetricType,
+ typename InitialPartitionPolicy,
+ typename EmptyClusterPolicy,
+ template<class, class> class LloydStepType,
+ typename MatType>
+template<typename Archive>
+void KMeans<MetricType,
+ InitialPartitionPolicy,
+ EmptyClusterPolicy,
+ LloydStepType,
+ MatType>::Serialize(Archive& ar, const unsigned int /* version */)
+{
+ ar & data::CreateNVP(maxIterations, "max_iterations");
+ ar & data::CreateNVP(metric, "metric");
+ ar & data::CreateNVP(partitioner, "partitioner");
+ ar & data::CreateNVP(emptyClusterAction, "emptyClusterAction");
+}
+
}; // namespace kmeans
}; // namespace mlpack
diff --git a/src/mlpack/methods/kmeans/max_variance_new_cluster.hpp b/src/mlpack/methods/kmeans/max_variance_new_cluster.hpp
index ad962fc..0d91523 100644
--- a/src/mlpack/methods/kmeans/max_variance_new_cluster.hpp
+++ b/src/mlpack/methods/kmeans/max_variance_new_cluster.hpp
@@ -49,6 +49,10 @@ class MaxVarianceNewCluster
MetricType& metric,
const size_t iteration);
+ //! Serialize the object.
+ template<typename Archive>
+ void Serialize(Archive& ar, const unsigned int version);
+
private:
//! Index of iteration for which variance is cached.
size_t iteration;
diff --git a/src/mlpack/methods/kmeans/max_variance_new_cluster_impl.hpp b/src/mlpack/methods/kmeans/max_variance_new_cluster_impl.hpp
index c87200d..b6090ae 100644
--- a/src/mlpack/methods/kmeans/max_variance_new_cluster_impl.hpp
+++ b/src/mlpack/methods/kmeans/max_variance_new_cluster_impl.hpp
@@ -75,6 +75,21 @@ size_t MaxVarianceNewCluster::EmptyCluster(const MatType& data,
return 1; // We only changed one point.
}
+//! Serialize the object.
+template<typename Archive>
+void MaxVarianceNewCluster::Serialize(Archive& /* ar */,
+ const unsigned int /* version */)
+{
+ // Serialization is useless here, because the only thing we store is
+ // precalculated quantities, and if we're serializing, our precalculations are
+ // likely to be useless when we deserialize (because the user will be running
+ // a different clustering, probably). So there is no need to store anything,
+ // and if we are loading, we just reset the assignments array so
+ // precalculation will happen next time EmptyCluster() is called.
+ if (Archive::is_loading::value)
+ assignments.set_size(0);
+}
+
template<typename MetricType, typename MatType>
void MaxVarianceNewCluster::Precalculate(const MatType& data,
const arma::mat& oldCentroids,
diff --git a/src/mlpack/methods/kmeans/random_partition.hpp b/src/mlpack/methods/kmeans/random_partition.hpp
index 729295d..34e4f49 100644
--- a/src/mlpack/methods/kmeans/random_partition.hpp
+++ b/src/mlpack/methods/kmeans/random_partition.hpp
@@ -44,9 +44,13 @@ class RandomPartition
assignments = arma::shuffle(arma::linspace<arma::Col<size_t> >(0,
(clusters - 1), data.n_cols));
}
-};
+ //! Serialize the partitioner (nothing to do).
+ template<typename Archive>
+ void Serialize(Archive& /* ar */, const unsigned int /* version */) { }
};
-};
+
+}
+}
#endif
diff --git a/src/mlpack/methods/kmeans/refined_start.hpp b/src/mlpack/methods/kmeans/refined_start.hpp
index ad6408d..39c431c 100644
--- a/src/mlpack/methods/kmeans/refined_start.hpp
+++ b/src/mlpack/methods/kmeans/refined_start.hpp
@@ -66,6 +66,14 @@ class RefinedStart
//! Modify the percentage of the data used by each subsampling.
double& Percentage() { return percentage; }
+ //! Serialize the object.
+ template<typename Archive>
+ void Serialize(Archive& ar, const unsigned int /* version */)
+ {
+ ar & data::CreateNVP(samplings, "samplings");
+ ar & data::CreateNVP(percentage, "percentage");
+ }
+
private:
//! The number of samplings to perform.
size_t samplings;
@@ -73,8 +81,8 @@ class RefinedStart
double percentage;
};
-}; // namespace kmeans
-}; // namespace mlpack
+} // namespace kmeans
+} // namespace mlpack
// Include implementation.
#include "refined_start_impl.hpp"
More information about the mlpack-git
mailing list