[mlpack-svn] r11621 - mlpack/trunk/src/mlpack/methods/kernel_pca

fastlab-svn at coffeetalk-1.cc.gatech.edu fastlab-svn at coffeetalk-1.cc.gatech.edu
Tue Feb 28 00:40:30 EST 2012


Author: rcurtin
Date: 2012-02-28 00:40:30 -0500 (Tue, 28 Feb 2012)
New Revision: 11621

Modified:
   mlpack/trunk/src/mlpack/methods/kernel_pca/kernel_pca_impl.hpp
   mlpack/trunk/src/mlpack/methods/kernel_pca/kernel_pca_main.cpp
Log:
An actual main executable and some implementation tweaks for speed.


Modified: mlpack/trunk/src/mlpack/methods/kernel_pca/kernel_pca_impl.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/kernel_pca/kernel_pca_impl.hpp	2012-02-28 05:27:09 UTC (rev 11620)
+++ mlpack/trunk/src/mlpack/methods/kernel_pca/kernel_pca_impl.hpp	2012-02-28 05:40:30 UTC (rev 11621)
@@ -1,5 +1,5 @@
 /**
- * @file kernelpca_impl.hpp
+ * @file kernel_pca_impl.hpp
  * @author Ajinkya Kale
  *
  * Implementation of KernelPCA class to perform Kernel Principal Components
@@ -13,8 +13,6 @@
 
 #include <iostream>
 
-using namespace std; // This'll have to go before the release.
-
 namespace mlpack {
 namespace kpca {
 
@@ -25,8 +23,7 @@
       kernel(kernel),
       centerData(centerData),
       scaleData(scaleData)
-{
-}
+{ }
 
 /**
  * Apply Kernel Principal Component Analysis to the provided data set.
@@ -44,16 +41,20 @@
 {
   arma::mat transData = trans(data);
 
+  // Center the data if necessary.
   if(centerData)
   {
     arma::rowvec means = arma::mean(transData, 0);
     transData = transData - arma::ones<arma::colvec>(transData.n_rows) * means;
   }
+
+  // Scale the data if necessary.
   if (scaleData)
   {
     transData = transData / (arma::ones<arma::colvec>(transData.n_rows) *
     stddev(transData, 0, 0));
   }
+
   arma::mat centeredData = trans(transData);
   arma::mat kernelMat = GetKernelMatrix(kernel, centeredData);
   arma::eig_sym(eigVal, coeffs, kernelMat);
@@ -66,7 +67,8 @@
 
   transformedData = trans(coeffs) * data;
   arma::colvec transformedDataMean = arma::mean(transformedData, 1);
-  transformedData = transformedData - (transformedDataMean * arma::ones<arma::rowvec>(transformedData.n_cols));
+  transformedData = transformedData - (transformedDataMean *
+      arma::ones<arma::rowvec>(transformedData.n_cols));
 }
 
 /**

Modified: mlpack/trunk/src/mlpack/methods/kernel_pca/kernel_pca_main.cpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/kernel_pca/kernel_pca_main.cpp	2012-02-28 05:27:09 UTC (rev 11620)
+++ mlpack/trunk/src/mlpack/methods/kernel_pca/kernel_pca_main.cpp	2012-02-28 05:40:30 UTC (rev 11621)
@@ -6,45 +6,151 @@
  */
 #include <mlpack/core.hpp>
 #include <mlpack/core/kernels/linear_kernel.hpp>
+#include <mlpack/core/kernels/gaussian_kernel.hpp>
+#include <mlpack/core/kernels/hyperbolic_tangent_kernel.hpp>
+#include <mlpack/core/kernels/laplacian_kernel.hpp>
+#include <mlpack/core/kernels/polynomial_kernel.hpp>
+#include <mlpack/core/kernels/cosine_distance.hpp>
 
 #include "kernel_pca.hpp"
 
 using namespace mlpack;
 using namespace mlpack::kpca;
+using namespace mlpack::kernel;
 using namespace std;
 using namespace arma;
 
-int main(int /* argc */, char** /* argv */)
+PROGRAM_INFO("Kernel Principal Components Analysis",
+    "This program performs Kernel Principal Components Analysis (KPCA) on the "
+    "specified dataset with the specified kernel.  This will transform the "
+    "data onto the kernel principal components, and optionally reduce the "
+    "dimensionality by ignoring the kernel principal components with the "
+    "smallest eigenvalues."
+    "\n\n"
+    "For the case where a linear kernel is used, this reduces to regular "
+    "PCA."
+    "\n\n"
+    "The kernels that are supported are listed below:"
+    "\n\n"
+    " * 'linear': the standard linear dot product (same as normal PCA):\n"
+    "    K(x, y) = x^T y\n"
+    "\n"
+    " * 'gaussian': a Gaussian kernel; requires bandwidth:\n"
+    "    K(x, y) = exp(-(|| x - y || ^ 2) / (2 * (bandwidth ^ 2)))\n"
+    "\n"
+    " * 'polynomial': polynomial kernel; requires offset and degree:\n"
+    "    K(x, y) = (x^T y + offset) ^ degree\n"
+    "\n"
+    " * 'hyptan': hyperbolic tangent kernel; requires scale and offset:\n"
+    "    K(x, y) = tanh(scale * (x^T y) + offset)\n"
+    "\n"
+    " * 'laplacian': Laplacian kernel; requires bandwidth:\n"
+    "    K(x, y) = exp(-(|| x - y ||) / bandwidth)\n"
+    "\n"
+    " * 'cosine': cosine distance:\n"
+    "    K(x, y) = 1 - (x^T y) / (|| x || * || y ||)\n"
+    "\n"
+    "The parameters for each of the kernels should be specified with the "
+    "options --bandwidth, --scale, --offset, or --degree (or a combination of "
+    "those options).");
+
+PARAM_STRING_REQ("input_file", "Input dataset to perform KPCA on.", "i");
+PARAM_STRING_REQ("output_file", "File to save modified dataset to.", "o");
+PARAM_STRING_REQ("kernel", "The kernel to use; see the above documentation for "
+    "the list of usable kernels.", "k");
+
+PARAM_INT("new_dimensionality", "If not 0, reduce the dimensionality of "
+    "the output dataset by ignoring the dimensions with the smallest "
+    "eigenvalues.", "d", 0);
+
+PARAM_DOUBLE("scale", "Scale, for 'hyptan' kernel.", "s", 1.0);
+PARAM_DOUBLE("offset", "Offset, for 'hyptan' and 'polynomial' kernels.", "O",
+    0.0);
+PARAM_DOUBLE("bandwidth", "Bandwidth, for 'gaussian' and 'laplacian' kernels.",
+    "b", 1.0);
+PARAM_DOUBLE("degree", "Degree of polynomial, for 'polynomial' kernel.", "d",
+    1.0);
+
+int main(int argc, char** argv)
 {
+  // Parse command line options.
+  CLI::ParseCommandLine(argc, argv);
 
-  mat data("1 0 2 3 9;"
-            "5 2 8 4 8;"
-            "6 7 3 1 8");
-/*  mat data("1 2 3;"
-            "4 5 6;"
-            "7 8 9");*/
+  // Load input dataset.
+  mat dataset;
+  const string inputFile = CLI::GetParam<string>("input_file");
+  data::Load(inputFile, dataset, true); // Fatal on failure.
 
-  data.print("DATA : ");
+  // Get the new dimensionality, if it is necessary.
+  size_t newDim = dataset.n_rows;
+  if (CLI::GetParam<int>("new_dimensionality") != 0)
+  {
+    newDim = CLI::GetParam<int>("new_dimensionality");
 
-   // Now run PCA to reduce the dimensionality.
-   kpca::KernelPCA<kernel::LinearKernel> p;
-   //p.CenterData();
-   p.Apply(data, 2); // Reduce to 2 dimensions.
+    if (newDim > dataset.n_rows)
+    {
+      Log::Fatal << "New dimensionality (" << newDim
+          << ") cannot be greater than existing dimensionality ("
+          << dataset.n_rows << ")!" << endl;
+    }
+  }
 
-   data.print("RESULT : ");
-   // Compare with correct results.
-   mat correct("-1.53781086 -3.51358020 -0.16139887 -1.87706634  7.08985628;"
-               " 1.29937798  3.45762685 -2.69910005 -3.15620704  1.09830225");
-   correct.print("CORRECT");
+  // Get the kernel type and make sure it is valid.
+  const string kernelType = CLI::GetParam<string>("kernel");
 
-   // If the eigenvectors are pointed opposite directions, they will cancel
- // each other out in this summation.
-   for(size_t i = 0; i < data.n_rows; i++)
-   {
-     if (fabs(correct(i, 1) + data(i,1)) < 0.001 /* arbitrary */)
-     {
-          // Flip Armadillo coefficients for this column.
-          data.row(i) *= -1;
-     }
-   }
+  if (kernelType == "linear")
+  {
+    KernelPCA<LinearKernel> kpca;
+    kpca.Apply(dataset, newDim);
+  }
+  else if (kernelType == "gaussian")
+  {
+    const double bandwidth = CLI::GetParam<double>("bandwidth");
+
+    GaussianKernel kernel(bandwidth);
+    KernelPCA<GaussianKernel> kpca(kernel);
+    kpca.Apply(dataset, newDim);
+  }
+  else if (kernelType == "polynomial")
+  {
+    const double degree = CLI::GetParam<double>("degree");
+    const double offset = CLI::GetParam<double>("offset");
+
+    PolynomialKernel kernel(offset, degree);
+    KernelPCA<PolynomialKernel> kpca(kernel);
+    kpca.Apply(dataset, newDim);
+  }
+  else if (kernelType == "hyptan")
+  {
+    const double scale = CLI::GetParam<double>("scale");
+    const double offset = CLI::GetParam<double>("offset");
+
+    HyperbolicTangentKernel kernel(scale, offset);
+    KernelPCA<HyperbolicTangentKernel> kpca(kernel);
+    kpca.Apply(dataset, newDim);
+  }
+  else if (kernelType == "laplacian")
+  {
+    const double bandwidth = CLI::GetParam<double>("bandwidth");
+
+    LaplacianKernel kernel(bandwidth);
+    KernelPCA<LaplacianKernel> kpca(kernel);
+    kpca.Apply(dataset, newDim);
+  }
+  else if (kernelType == "cosine")
+  {
+    KernelPCA<CosineDistance> kpca;
+    kpca.Apply(dataset, newDim);
+  }
+  else
+  {
+    // Invalid kernel type.
+    Log::Fatal << "Invalid kernel type ('" << kernelType << "'); valid choices "
+        << "are 'linear', 'gaussian', 'polynomial', 'hyptan', 'laplacian', and "
+        << "'cosine'." << endl;
+  }
+
+  // Save the output dataset.
+  const string outputFile = CLI::GetParam<string>("output_file");
+  data::Save(outputFile, dataset, true); // Fatal on failure.
 }




More information about the mlpack-svn mailing list