[mlpack-svn] r16837 - mlpack/trunk/src/mlpack/methods/nystroem_method
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Fri Jul 18 17:56:30 EDT 2014
Author: marcus
Date: Fri Jul 18 17:56:30 2014
New Revision: 16837
Log:
Add nystroem method for kernel approximation.
Added:
mlpack/trunk/src/mlpack/methods/nystroem_method/
mlpack/trunk/src/mlpack/methods/nystroem_method/kmeans_selection.hpp
mlpack/trunk/src/mlpack/methods/nystroem_method/nystroem_method.hpp
mlpack/trunk/src/mlpack/methods/nystroem_method/nystroem_method_impl.hpp
mlpack/trunk/src/mlpack/methods/nystroem_method/ordered_selection.hpp
mlpack/trunk/src/mlpack/methods/nystroem_method/random_selection.hpp
Added: mlpack/trunk/src/mlpack/methods/nystroem_method/kmeans_selection.hpp
==============================================================================
--- (empty file)
+++ mlpack/trunk/src/mlpack/methods/nystroem_method/kmeans_selection.hpp Fri Jul 18 17:56:30 2014
@@ -0,0 +1,47 @@
+/**
+ * @file kmeans_selection.hpp
+ * @author Marcus Edel
+ *
+ * Use the centroids of the K-Means clustering method for use in the Nystroem
+ * method of kernel matrix approximation.
+ */
+#ifndef __MLPACK_METHODS_NYSTROEM_METHOD_KMEANS_SELECTION_HPP
+#define __MLPACK_METHODS_NYSTROEM_METHOD_KMEANS_SELECTION_HPP
+
+#include <mlpack/core.hpp>
+#include <mlpack/methods/kmeans/kmeans.hpp>
+
+namespace mlpack {
+namespace kernel {
+
+template<typename ClusteringType = kmeans::KMeans<> >
+class KMeansSelection
+{
+ public:
+ /**
+ * Use the K-Means clustering method to select the specified number of points
+ * in the dataset.
+ *
+ * @param data Dataset to sample from.
+ * @param m Number of points to select.
+ * @return Matrix pointer in which centroids are stored.
+ */
+ const static arma::mat* Select(const arma::mat& data,
+ const size_t m,
+ const size_t maxIterations = 5)
+ {
+ arma::Col<size_t> assignments;
+ arma::mat* centroids = new arma::mat;
+
+ // Perform the K-Means clustering method.
+ ClusteringType kmeans(maxIterations);
+ kmeans.Cluster(data, m, assignments, *centroids);
+
+ return centroids;
+ }
+};
+
+}; // namespace kernel
+}; // namespace mlpack
+
+#endif
Added: mlpack/trunk/src/mlpack/methods/nystroem_method/nystroem_method.hpp
==============================================================================
--- (empty file)
+++ mlpack/trunk/src/mlpack/methods/nystroem_method/nystroem_method.hpp Fri Jul 18 17:56:30 2014
@@ -0,0 +1,81 @@
+/**
+ * @file nystroem_method.hpp
+ * @author Ryan Curtin
+ * @author Marcus Edel
+ *
+ * Implementation of the Nystroem method for approximating a kernel matrix.
+ * There are many variations on how to do this, so template parameters allow the
+ * selection of many different techniques.
+ */
+#ifndef __MLPACK_METHODS_NYSTROEM_METHOD_NYSTROEM_METHOD_HPP
+#define __MLPACK_METHODS_NYSTROEM_METHOD_NYSTROEM_METHOD_HPP
+
+#include <mlpack/core.hpp>
+#include "kmeans_selection.hpp"
+
+namespace mlpack {
+namespace kernel {
+
+template<
+ typename KernelType,
+ typename PointSelectionPolicy = KMeansSelection<>
+>
+class NystroemMethod
+{
+ public:
+ /**
+ * Create the NystroemMethod object. The constructor here does not really do
+ * anything.
+ *
+ * @param data Data matrix.
+ * @param kernel Kernel to be used for computation.
+ * @param rank Rank to be used for matrix approximation.
+ */
+ NystroemMethod(const arma::mat& data, KernelType& kernel, const size_t rank);
+
+ /**
+ * Apply the low-rank factorization to obtain an output matrix G such that
+ * K' = G * G^T.
+ *
+ * @param output Matrix to store kernel approximation into.
+ */
+ void Apply(arma::mat& output);
+
+ /**
+ * Construct the kernel matrix with matrix that contains the selected points.
+ *
+ * @param data Data matrix pointer.
+ * @param miniKernel to store the constructed mini-kernel matrix in.
+ * @param miniKernel to store the constructed semi-kernel matrix in.
+ */
+ void GetKernelMatrix(const arma::mat* data,
+ arma::mat& miniKernel,
+ arma::mat& semiKernel);
+
+ /**
+ * Construct the kernel matrix with the selected points.
+ *
+ * @param points Indices of selected points.
+ * @param miniKernel to store the constructed mini-kernel matrix in.
+ * @param miniKernel to store the constructed semi-kernel matrix in.
+ */
+ void GetKernelMatrix(const arma::vec& points,
+ arma::mat& miniKernel,
+ arma::mat& semiKernel);
+
+ private:
+ //! The reference dataset.
+ const arma::mat& data;
+ //! The locally stored kernel, if it is necessary.
+ KernelType& kernel;
+ //! Rank used for matrix approximation.
+ const size_t rank;
+};
+
+}; // namespace kernel
+}; // namespace mlpack
+
+// Include implementation.
+#include "nystroem_method_impl.hpp"
+
+#endif
Added: mlpack/trunk/src/mlpack/methods/nystroem_method/nystroem_method_impl.hpp
==============================================================================
--- (empty file)
+++ mlpack/trunk/src/mlpack/methods/nystroem_method/nystroem_method_impl.hpp Fri Jul 18 17:56:30 2014
@@ -0,0 +1,90 @@
+/**
+ * @file nystroem_method_impl.hpp
+ * @author Ryan Curtin
+ * @author Marcus Edel
+ *
+ * Implementation of the Nystroem method for approximating a kernel matrix.
+ */
+#ifndef __MLPACK_METHODS_NYSTROEM_METHOD_NYSTROEM_METHOD_IMPL_HPP
+#define __MLPACK_METHODS_NYSTROEM_METHOD_NYSTROEM_METHOD_IMPL_HPP
+
+// In case it hasn't been included yet.
+#include "nystroem_method.hpp"
+
+namespace mlpack {
+namespace kernel {
+
+template<typename KernelType, typename PointSelectionPolicy>
+NystroemMethod<KernelType, PointSelectionPolicy>::NystroemMethod(
+ const arma::mat& data,
+ KernelType& kernel,
+ const size_t rank) :
+ data(data),
+ kernel(kernel),
+ rank(rank)
+{ }
+
+template<typename KernelType, typename PointSelectionPolicy>
+void NystroemMethod<KernelType, PointSelectionPolicy>::GetKernelMatrix(
+ const arma::mat* seletedData,
+ arma::mat& miniKernel,
+ arma::mat& semiKernel)
+{
+ // Assemble mini-kernel matrix.
+ for (size_t i = 0; i < rank; ++i)
+ for (size_t j = 0; j < rank; ++j)
+ miniKernel(i, j) = kernel.Evaluate(seletedData->col(i),
+ seletedData->col(j));
+
+ // Construct semi-kernel matrix with interactions between selected data and
+ // all points.
+ for (size_t i = 0; i < data.n_cols; ++i)
+ for (size_t j = 0; j < rank; ++j)
+ semiKernel(i, j) = kernel.Evaluate(data.col(i),
+ seletedData->col(j));
+ // Clean the memory.
+ delete seletedData;
+}
+
+template<typename KernelType, typename PointSelectionPolicy>
+void NystroemMethod<KernelType, PointSelectionPolicy>::GetKernelMatrix(
+ const arma::vec& selectedPoints,
+ arma::mat& miniKernel,
+ arma::mat& semiKernel)
+{
+ // Assemble mini-kernel matrix.
+ for (size_t i = 0; i < rank; ++i)
+ for (size_t j = 0; j < rank; ++j)
+ miniKernel(i, j) = kernel.Evaluate(data.col(selectedPoints(i)),
+ data.col(selectedPoints(j)));
+
+ // Construct semi-kernel matrix with interactions between selected points and
+ // all points.
+ for (size_t i = 0; i < data.n_cols; ++i)
+ for (size_t j = 0; j < rank; ++j)
+ semiKernel(i, j) = kernel.Evaluate(data.col(i),
+ data.col(selectedPoints(j)));
+}
+
+template<typename KernelType, typename PointSelectionPolicy>
+void NystroemMethod<KernelType, PointSelectionPolicy>::Apply(arma::mat& output)
+{
+ arma::mat miniKernel(rank, rank);
+ arma::mat semiKernel(data.n_cols, rank);
+
+ GetKernelMatrix(PointSelectionPolicy::Select(data, rank), miniKernel,
+ semiKernel);
+
+ // Singular value decomposition mini-kernel matrix.
+ arma::mat U, V;
+ arma::vec s;
+ arma::svd(U, s, V, miniKernel);
+
+ // Construct the output matrix.
+ output = semiKernel * (U * arma::diagmat(1.0 / sqrt(s))) * V;
+}
+
+}; // namespace kernel
+}; // namespace mlpack
+
+#endif
Added: mlpack/trunk/src/mlpack/methods/nystroem_method/ordered_selection.hpp
==============================================================================
--- (empty file)
+++ mlpack/trunk/src/mlpack/methods/nystroem_method/ordered_selection.hpp Fri Jul 18 17:56:30 2014
@@ -0,0 +1,38 @@
+/**
+ * @file ordered_selection.hpp
+ * @author Ryan Curtin
+ *
+ * Select the first points of the dataset for use in the Nystroem method of
+ * kernel matrix approximation. This is mostly for testing, but might have
+ * other uses.
+ */
+#ifndef __MLPACK_METHODS_NYSTROEM_METHOD_ORDERED_SELECTION_HPP
+#define __MLPACK_METHODS_NYSTROEM_METHOD_ORDERED_SELECTION_HPP
+
+#include <mlpack/core.hpp>
+
+namespace mlpack {
+namespace kernel {
+
+class OrderedSelection
+{
+ public:
+ /**
+ * Select the specified number of points in the dataset.
+ *
+ * @param data Dataset to sample from.
+ * @param m Number of points to select.
+ * @return Indices of selected points from the dataset.
+ */
+ const static arma::vec Select(const arma::mat& /* unused */, const size_t m)
+ {
+ // This generates [0 1 2 3 ... (m - 1)].
+ arma::vec selectedPoints = arma::linspace<arma::vec>(0, m - 1, m);
+ return selectedPoints;
+ }
+};
+
+}; // namespace kernel
+}; // namespace mlpack
+
+#endif
Added: mlpack/trunk/src/mlpack/methods/nystroem_method/random_selection.hpp
==============================================================================
--- (empty file)
+++ mlpack/trunk/src/mlpack/methods/nystroem_method/random_selection.hpp Fri Jul 18 17:56:30 2014
@@ -0,0 +1,40 @@
+/**
+ * @file random_selection.hpp
+ * @author Ryan Curtin
+ *
+ * Randomly select some points (with replacement) to use for the Nystroem
+ * method. Replacement is suboptimal, but for rank << number of points, this is
+ * unlikely.
+ */
+#ifndef __MLPACK_METHODS_NYSTROEM_METHOD_RANDOM_SELECTION_HPP
+#define __MLPACK_METHODS_NYSTROEM_METHOD_RANDOM_SELECTION_HPP
+
+#include <mlpack/core.hpp>
+
+namespace mlpack {
+namespace kernel {
+
+class RandomSelection
+{
+ public:
+ /**
+ * Randomly select the specified number of points in the dataset.
+ *
+ * @param data Dataset to sample from.
+ * @param m Number of points to select.
+ * @return Indices of selected points from the dataset.
+ */
+ const static arma::vec Select(const arma::mat& data, const size_t m)
+ {
+ arma::vec selectedPoints(m);
+ for (size_t i = 0; i < m; ++i)
+ selectedPoints(i) = math::RandInt(0, data.n_cols);
+
+ return selectedPoints;
+ }
+};
+
+}; // namespace kernel
+}; // namespace mlpack
+
+#endif
More information about the mlpack-svn
mailing list