[mlpack-git] master, mlpack-1.0.x: Add nystroem method for kernel approximation. (50d5789)

gitdub at big.cc.gt.atl.ga.us gitdub at big.cc.gt.atl.ga.us
Thu Mar 5 21:53:40 EST 2015


Repository : https://github.com/mlpack/mlpack

On branches: master,mlpack-1.0.x
Link       : https://github.com/mlpack/mlpack/compare/904762495c039e345beba14c1142fd719b3bd50e...f94823c800ad6f7266995c700b1b630d5ffdcf40

>---------------------------------------------------------------

commit 50d5789f15b0c7e9c04ec33bc3c5b5cfd43868fe
Author: Marcus Edel <marcus.edel at fu-berlin.de>
Date:   Fri Jul 18 21:56:30 2014 +0000

    Add nystroem method for kernel approximation.


>---------------------------------------------------------------

50d5789f15b0c7e9c04ec33bc3c5b5cfd43868fe
 .../methods/nystroem_method/kmeans_selection.hpp   | 47 +++++++++++
 .../methods/nystroem_method/nystroem_method.hpp    | 81 +++++++++++++++++++
 .../nystroem_method/nystroem_method_impl.hpp       | 90 ++++++++++++++++++++++
 .../methods/nystroem_method/ordered_selection.hpp  | 38 +++++++++
 .../methods/nystroem_method/random_selection.hpp   | 40 ++++++++++
 5 files changed, 296 insertions(+)

diff --git a/src/mlpack/methods/nystroem_method/kmeans_selection.hpp b/src/mlpack/methods/nystroem_method/kmeans_selection.hpp
new file mode 100644
index 0000000..f931d74
--- /dev/null
+++ b/src/mlpack/methods/nystroem_method/kmeans_selection.hpp
@@ -0,0 +1,47 @@
+/**
+ * @file kmeans_selection.hpp
+ * @author Marcus Edel
+ *
+ * Use the centroids of the K-Means clustering method for use in the Nystroem
+ * method of kernel matrix approximation.
+ */
+#ifndef __MLPACK_METHODS_NYSTROEM_METHOD_KMEANS_SELECTION_HPP
+#define __MLPACK_METHODS_NYSTROEM_METHOD_KMEANS_SELECTION_HPP
+
+#include <mlpack/core.hpp>
+#include <mlpack/methods/kmeans/kmeans.hpp>
+
+namespace mlpack {
+namespace kernel {
+
+template<typename ClusteringType = kmeans::KMeans<> >
+class KMeansSelection
+{
+ public:
+  /**
+   * Use the K-Means clustering method to select the specified number of points
+   * in the dataset.
+   *
+   * @param data Dataset to sample from.
+   * @param m Number of points to select.
+   * @return Matrix pointer in which centroids are stored.
+   */
+  const static arma::mat* Select(const arma::mat& data,
+                                 const size_t m,
+                                 const size_t maxIterations = 5)
+  {
+    arma::Col<size_t> assignments;
+    arma::mat* centroids = new arma::mat;
+
+    // Perform the K-Means clustering method.
+    ClusteringType kmeans(maxIterations);
+    kmeans.Cluster(data, m, assignments, *centroids);
+
+    return centroids;
+  }
+};
+
+}; // namespace kernel
+}; // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/nystroem_method/nystroem_method.hpp b/src/mlpack/methods/nystroem_method/nystroem_method.hpp
new file mode 100644
index 0000000..fc91d6e
--- /dev/null
+++ b/src/mlpack/methods/nystroem_method/nystroem_method.hpp
@@ -0,0 +1,81 @@
+/**
+ * @file nystroem_method.hpp
+ * @author Ryan Curtin
+ * @author Marcus Edel
+ *
+ * Implementation of the Nystroem method for approximating a kernel matrix.
+ * There are many variations on how to do this, so template parameters allow the
+ * selection of many different techniques.
+ */
+#ifndef __MLPACK_METHODS_NYSTROEM_METHOD_NYSTROEM_METHOD_HPP
+#define __MLPACK_METHODS_NYSTROEM_METHOD_NYSTROEM_METHOD_HPP
+
+#include <mlpack/core.hpp>
+#include "kmeans_selection.hpp"
+
+namespace mlpack {
+namespace kernel {
+
+template<
+  typename KernelType,
+  typename PointSelectionPolicy = KMeansSelection<>
+>
+class NystroemMethod
+{
+ public:
+  /**
+   * Create the NystroemMethod object. The constructor here does not really do
+   * anything.
+   *
+   * @param data Data matrix.
+   * @param kernel Kernel to be used for computation.
+   * @param rank Rank to be used for matrix approximation.
+   */
+  NystroemMethod(const arma::mat& data, KernelType& kernel, const size_t rank);
+
+  /**
+   * Apply the low-rank factorization to obtain an output matrix G such that
+   * K' = G * G^T.
+   *
+   * @param output Matrix to store kernel approximation into.
+   */
+  void Apply(arma::mat& output);
+
+  /**
+   * Construct the kernel matrix with matrix that contains the selected points.
+   *
+   * @param data Data matrix pointer.
+   * @param miniKernel to store the constructed mini-kernel matrix in.
+   * @param miniKernel to store the constructed semi-kernel matrix in.
+   */
+  void GetKernelMatrix(const arma::mat* data, 
+                       arma::mat& miniKernel, 
+                       arma::mat& semiKernel);
+
+  /**
+   * Construct the kernel matrix with the selected points.
+   *
+   * @param points Indices of selected points.
+   * @param miniKernel to store the constructed mini-kernel matrix in.
+   * @param miniKernel to store the constructed semi-kernel matrix in.
+   */
+  void GetKernelMatrix(const arma::vec& points, 
+                       arma::mat& miniKernel, 
+                       arma::mat& semiKernel);
+
+ private:
+  //! The reference dataset.
+  const arma::mat& data;
+  //! The locally stored kernel, if it is necessary.
+  KernelType& kernel;
+  //! Rank used for matrix approximation.
+  const size_t rank;
+};
+
+}; // namespace kernel
+}; // namespace mlpack
+
+// Include implementation.
+#include "nystroem_method_impl.hpp"
+
+#endif
diff --git a/src/mlpack/methods/nystroem_method/nystroem_method_impl.hpp b/src/mlpack/methods/nystroem_method/nystroem_method_impl.hpp
new file mode 100644
index 0000000..01ae911
--- /dev/null
+++ b/src/mlpack/methods/nystroem_method/nystroem_method_impl.hpp
@@ -0,0 +1,90 @@
+/**
+ * @file nystroem_method_impl.hpp
+ * @author Ryan Curtin
+ * @author Marcus Edel
+ *
+ * Implementation of the Nystroem method for approximating a kernel matrix.
+ */
+#ifndef __MLPACK_METHODS_NYSTROEM_METHOD_NYSTROEM_METHOD_IMPL_HPP
+#define __MLPACK_METHODS_NYSTROEM_METHOD_NYSTROEM_METHOD_IMPL_HPP
+
+// In case it hasn't been included yet.
+#include "nystroem_method.hpp"
+
+namespace mlpack {
+namespace kernel {
+
+template<typename KernelType, typename PointSelectionPolicy>
+NystroemMethod<KernelType, PointSelectionPolicy>::NystroemMethod(
+    const arma::mat& data,
+    KernelType& kernel,
+    const size_t rank) :
+    data(data),
+    kernel(kernel),
+    rank(rank)
+{ }
+
+template<typename KernelType, typename PointSelectionPolicy>
+void NystroemMethod<KernelType, PointSelectionPolicy>::GetKernelMatrix(
+                                       const arma::mat* seletedData, 
+                                       arma::mat& miniKernel, 
+                                       arma::mat& semiKernel)
+{
+  // Assemble mini-kernel matrix.
+  for (size_t i = 0; i < rank; ++i)
+    for (size_t j = 0; j < rank; ++j)
+      miniKernel(i, j) = kernel.Evaluate(seletedData->col(i),
+                                         seletedData->col(j));
+
+  // Construct semi-kernel matrix with interactions between selected data and
+  // all points.
+  for (size_t i = 0; i < data.n_cols; ++i)
+    for (size_t j = 0; j < rank; ++j)
+      semiKernel(i, j) = kernel.Evaluate(data.col(i), 
+                                         seletedData->col(j));
+  // Clean the memory.
+  delete seletedData;
+}
+
+template<typename KernelType, typename PointSelectionPolicy>
+void NystroemMethod<KernelType, PointSelectionPolicy>::GetKernelMatrix(
+                                       const arma::vec& selectedPoints, 
+                                       arma::mat& miniKernel, 
+                                       arma::mat& semiKernel)
+{
+  // Assemble mini-kernel matrix.
+  for (size_t i = 0; i < rank; ++i)
+    for (size_t j = 0; j < rank; ++j)
+      miniKernel(i, j) = kernel.Evaluate(data.col(selectedPoints(i)),
+                                         data.col(selectedPoints(j)));
+
+  // Construct semi-kernel matrix with interactions between selected points and
+  // all points.
+  for (size_t i = 0; i < data.n_cols; ++i)
+    for (size_t j = 0; j < rank; ++j)
+      semiKernel(i, j) = kernel.Evaluate(data.col(i),
+                                         data.col(selectedPoints(j)));
+}
+
+template<typename KernelType, typename PointSelectionPolicy>
+void NystroemMethod<KernelType, PointSelectionPolicy>::Apply(arma::mat& output)
+{
+  arma::mat miniKernel(rank, rank);
+  arma::mat semiKernel(data.n_cols, rank);
+
+  GetKernelMatrix(PointSelectionPolicy::Select(data, rank), miniKernel,
+                  semiKernel);
+
+  // Singular value decomposition mini-kernel matrix.
+  arma::mat U, V;
+  arma::vec s;
+  arma::svd(U, s, V, miniKernel);
+
+  // Construct the output matrix.
+  output = semiKernel * (U * arma::diagmat(1.0 / sqrt(s))) * V;
+}
+
+}; // namespace kernel
+}; // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/nystroem_method/ordered_selection.hpp b/src/mlpack/methods/nystroem_method/ordered_selection.hpp
new file mode 100644
index 0000000..154c04e
--- /dev/null
+++ b/src/mlpack/methods/nystroem_method/ordered_selection.hpp
@@ -0,0 +1,38 @@
+/**
+ * @file ordered_selection.hpp
+ * @author Ryan Curtin
+ *
+ * Select the first points of the dataset for use in the Nystroem method of
+ * kernel matrix approximation. This is mostly for testing, but might have
+ * other uses.
+ */
+#ifndef __MLPACK_METHODS_NYSTROEM_METHOD_ORDERED_SELECTION_HPP
+#define __MLPACK_METHODS_NYSTROEM_METHOD_ORDERED_SELECTION_HPP
+
+#include <mlpack/core.hpp>
+
+namespace mlpack {
+namespace kernel {
+
+class OrderedSelection
+{
+ public:
+  /**
+   * Select the specified number of points in the dataset.
+   *
+   * @param data Dataset to sample from.
+   * @param m Number of points to select.
+   * @return Indices of selected points from the dataset.
+   */
+  const static arma::vec Select(const arma::mat& /* unused */, const size_t m)
+  {
+    // This generates [0 1 2 3 ... (m - 1)].
+    arma::vec selectedPoints = arma::linspace<arma::vec>(0, m - 1, m);
+    return selectedPoints;
+  }
+};
+
+}; // namespace kernel
+}; // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/nystroem_method/random_selection.hpp b/src/mlpack/methods/nystroem_method/random_selection.hpp
new file mode 100644
index 0000000..83b09cc
--- /dev/null
+++ b/src/mlpack/methods/nystroem_method/random_selection.hpp
@@ -0,0 +1,40 @@
+/**
+ * @file random_selection.hpp
+ * @author Ryan Curtin
+ *
+ * Randomly select some points (with replacement) to use for the Nystroem
+ * method.  Replacement is suboptimal, but for rank << number of points, this is
+ * unlikely.
+ */
+#ifndef __MLPACK_METHODS_NYSTROEM_METHOD_RANDOM_SELECTION_HPP
+#define __MLPACK_METHODS_NYSTROEM_METHOD_RANDOM_SELECTION_HPP
+
+#include <mlpack/core.hpp>
+
+namespace mlpack {
+namespace kernel {
+
+class RandomSelection
+{
+ public:
+  /**
+   * Randomly select the specified number of points in the dataset.
+   *
+   * @param data Dataset to sample from.
+   * @param m Number of points to select.
+   * @return Indices of selected points from the dataset.
+   */
+  const static arma::vec Select(const arma::mat& data, const size_t m)
+  {
+    arma::vec selectedPoints(m);
+    for (size_t i = 0; i < m; ++i)
+      selectedPoints(i) = math::RandInt(0, data.n_cols);
+
+    return selectedPoints;
+  }
+};
+
+}; // namespace kernel
+}; // namespace mlpack
+
+#endif



More information about the mlpack-git mailing list