[mlpack-svn] r16849 - mlpack/trunk/src/mlpack/methods/kernel_pca

fastlab-svn at coffeetalk-1.cc.gatech.edu fastlab-svn at coffeetalk-1.cc.gatech.edu
Tue Jul 22 14:52:37 EDT 2014


Author: marcus
Date: Tue Jul 22 14:52:36 2014
New Revision: 16849

Log:
Integrate nystroem method into the kernel_pca_main.cpp file.

Modified:
   mlpack/trunk/src/mlpack/methods/kernel_pca/kernel_pca_main.cpp

Modified: mlpack/trunk/src/mlpack/methods/kernel_pca/kernel_pca_main.cpp
==============================================================================
--- mlpack/trunk/src/mlpack/methods/kernel_pca/kernel_pca_main.cpp	(original)
+++ mlpack/trunk/src/mlpack/methods/kernel_pca/kernel_pca_main.cpp	Tue Jul 22 14:52:36 2014
@@ -5,6 +5,11 @@
  * Executable for Kernel PCA.
  */
 #include <mlpack/core.hpp>
+#include <mlpack/methods/nystroem_method/ordered_selection.hpp>
+#include <mlpack/methods/nystroem_method/random_selection.hpp>
+#include <mlpack/methods/nystroem_method/kmeans_selection.hpp>
+#include <mlpack/methods/nystroem_method/nystroem_method.hpp>
+#include <mlpack/methods/kernel_pca/kernel_rules/nystroem_method.hpp>
 
 #include "kernel_pca.hpp"
 
@@ -24,6 +29,14 @@
     "For the case where a linear kernel is used, this reduces to regular "
     "PCA."
     "\n\n"
+
+    "For example, the following will perform KPCA on the 'input.csv' file using"
+    " the gaussian kernel and store the transformed date in the "
+    "'transformed.csv' file."
+
+    "\n\n"
+    "$ kernel_pca -i input.csv -k gaussian -o transformed.csv"
+    "\n\n"
     "The kernels that are supported are listed below:"
     "\n\n"
     " * 'linear': the standard linear dot product (same as normal PCA):\n"
@@ -49,7 +62,15 @@
     "\n"
     "The parameters for each of the kernels should be specified with the "
     "options --bandwidth, --kernel_scale, --offset, or --degree (or a "
-    "combination of those options).\n");
+    "combination of those options)."
+    "\n\n"
+    "Optionally, the nystroem method (\"Using the Nystroem method to speed up"
+    " kernel machines\", 2001) can be used to calculate the kernel matrix by "
+    "specifying the --nystroem_method (-n) option. This approach works by using"
+    " a subset of the data as basis to reconstruct the kernel matrix; to specify"
+    " the sampling scheme, the --sampling parameter is used, the sampling scheme"
+    " for the nystroem method can be chosen from the following list: kmeans,"
+    " random, ordered.");
 
 PARAM_STRING_REQ("input_file", "Input dataset to perform KPCA on.", "i");
 PARAM_STRING_REQ("output_file", "File to save modified dataset to.", "o");
@@ -63,6 +84,11 @@
 PARAM_FLAG("center", "If set, the transformed data will be centered about the "
     "origin.", "c");
 
+PARAM_FLAG("nystroem_method", "If set, the nystroem method will be used.", "n");
+
+PARAM_STRING("sampling", "Sampling scheme to use for the nystroem method: "
+    "'kmeans', 'random', 'ordered'", "s", "kmeans");
+
 PARAM_DOUBLE("kernel_scale", "Scale, for 'hyptan' kernel.", "S", 1.0);
 PARAM_DOUBLE("offset", "Offset, for 'hyptan' and 'polynomial' kernels.", "O",
     0.0);
@@ -71,6 +97,48 @@
 PARAM_DOUBLE("degree", "Degree of polynomial, for 'polynomial' kernel.", "D",
     1.0);
 
+//! Run RunKPCA on the specified dataset for the given kernel type.
+template<typename KernelType>
+void RunKPCA(arma::mat& dataset,
+             const bool centerTransformedData,
+             const bool nystroem,
+             const size_t newDim,
+             const string& sampling,
+             KernelType& kernel)
+{
+  if (nystroem) {
+    // Make sure the sampling scheme is valid.
+    if (sampling == "kmeans")
+    {
+      KernelPCA<KernelType, NystroemKernelRule<KernelType, 
+          KMeansSelection<> > >kpca;
+      kpca.Apply(dataset, newDim);
+    }
+    else if (sampling == "random")
+    {
+      KernelPCA<KernelType, NystroemKernelRule<KernelType, 
+          RandomSelection> > kpca;
+      kpca.Apply(dataset, newDim);
+    }
+    else if (sampling == "ordered")
+    {
+      KernelPCA<KernelType, NystroemKernelRule<KernelType, 
+          OrderedSelection> > kpca;
+      kpca.Apply(dataset, newDim);
+    }
+    else
+    {
+      // Invalid sampling scheme.
+      Log::Fatal << "Invalid sampling scheme ('" << sampling << "'); valid "
+        << "choices are 'kmeans', 'random' and 'ordered'" << endl;
+    }
+  }
+  else {
+    KernelPCA<KernelType> kpca(kernel, centerTransformedData);
+    kpca.Apply(dataset, newDim);    
+  }
+}
+
 int main(int argc, char** argv)
 {
   // Parse command line options.
@@ -99,19 +167,22 @@
   const string kernelType = CLI::GetParam<string>("kernel");
 
   const bool centerTransformedData = CLI::HasParam("center");
+  const bool nystroem = CLI::HasParam("nystroem_method");
+  const string sampling = CLI::GetParam<string>("sampling");
 
   if (kernelType == "linear")
   {
-    KernelPCA<LinearKernel> kpca(LinearKernel(), centerTransformedData);
-    kpca.Apply(dataset, newDim);
+    LinearKernel kernel;
+    RunKPCA<LinearKernel>(dataset, centerTransformedData, nystroem, newDim, 
+        sampling, kernel);
   }
   else if (kernelType == "gaussian")
   {
     const double bandwidth = CLI::GetParam<double>("bandwidth");
 
     GaussianKernel kernel(bandwidth);
-    KernelPCA<GaussianKernel> kpca(kernel, centerTransformedData);
-    kpca.Apply(dataset, newDim);
+    RunKPCA<GaussianKernel>(dataset, centerTransformedData, nystroem, newDim,
+        sampling, kernel);
   }
   else if (kernelType == "polynomial")
   {
@@ -119,8 +190,8 @@
     const double offset = CLI::GetParam<double>("offset");
 
     PolynomialKernel kernel(degree, offset);
-    KernelPCA<PolynomialKernel> kpca(kernel, centerTransformedData);
-    kpca.Apply(dataset, newDim);
+    RunKPCA<PolynomialKernel>(dataset, centerTransformedData, nystroem,
+        newDim, sampling, kernel);
   }
   else if (kernelType == "hyptan")
   {
@@ -128,29 +199,30 @@
     const double offset = CLI::GetParam<double>("offset");
 
     HyperbolicTangentKernel kernel(scale, offset);
-    KernelPCA<HyperbolicTangentKernel> kpca(kernel, centerTransformedData);
-    kpca.Apply(dataset, newDim);
+    RunKPCA<HyperbolicTangentKernel>(dataset, centerTransformedData, nystroem,
+        newDim, sampling, kernel);
   }
   else if (kernelType == "laplacian")
   {
     const double bandwidth = CLI::GetParam<double>("bandwidth");
 
     LaplacianKernel kernel(bandwidth);
-    KernelPCA<LaplacianKernel> kpca(kernel, centerTransformedData);
-    kpca.Apply(dataset, newDim);
+    RunKPCA<LaplacianKernel>(dataset, centerTransformedData, nystroem, newDim,
+        sampling, kernel);
   }
   else if (kernelType == "epanechnikov")
   {
     const double bandwidth = CLI::GetParam<double>("bandwidth");
 
     EpanechnikovKernel kernel(bandwidth);
-    KernelPCA<EpanechnikovKernel> kpca(kernel, centerTransformedData);
-    kpca.Apply(dataset, newDim);
+    RunKPCA<EpanechnikovKernel>(dataset, centerTransformedData, nystroem,
+        newDim, sampling, kernel);
   }
   else if (kernelType == "cosine")
   {
-    KernelPCA<CosineDistance> kpca(CosineDistance(), centerTransformedData);
-    kpca.Apply(dataset, newDim);
+    CosineDistance kernel;
+    RunKPCA<CosineDistance>(dataset, centerTransformedData, nystroem, newDim,
+        sampling, kernel);
   }
   else
   {



More information about the mlpack-svn mailing list