[mlpack-svn] r10388 - mlpack/trunk/src/mlpack/methods/mvu

fastlab-svn at coffeetalk-1.cc.gatech.edu fastlab-svn at coffeetalk-1.cc.gatech.edu
Thu Nov 24 02:02:30 EST 2011


Author: rcurtin
Date: 2011-11-24 02:02:29 -0500 (Thu, 24 Nov 2011)
New Revision: 10388

Added:
   mlpack/trunk/src/mlpack/methods/mvu/mvu.hpp
   mlpack/trunk/src/mlpack/methods/mvu/mvu_impl.hpp
   mlpack/trunk/src/mlpack/methods/mvu/mvu_objective_function.cpp
   mlpack/trunk/src/mlpack/methods/mvu/mvu_objective_function.hpp
Removed:
   mlpack/trunk/src/mlpack/methods/mvu/mvu.h
   mlpack/trunk/src/mlpack/methods/mvu/mvu_impl.h
   mlpack/trunk/src/mlpack/methods/mvu/mvu_objective_function.cc
   mlpack/trunk/src/mlpack/methods/mvu/mvu_objective_function.h
   mlpack/trunk/src/mlpack/methods/mvu/mvu_objectives.h
   mlpack/trunk/src/mlpack/methods/mvu/mvu_objectives_impl.h
Modified:
   mlpack/trunk/src/mlpack/methods/mvu/CMakeLists.txt
Log:
Style MVU as per #153 and move to .hpp and .cpp as in #152.


Modified: mlpack/trunk/src/mlpack/methods/mvu/CMakeLists.txt
===================================================================
--- mlpack/trunk/src/mlpack/methods/mvu/CMakeLists.txt	2011-11-24 02:54:14 UTC (rev 10387)
+++ mlpack/trunk/src/mlpack/methods/mvu/CMakeLists.txt	2011-11-24 07:02:29 UTC (rev 10388)
@@ -3,10 +3,10 @@
 # Define the files we need to compile.
 # Anything not in this list will not be compiled into MLPACK.
 set(SOURCES
-  mvu.h
-  mvu_impl.h
-#  mvu_objective_function.h
-#  mvu_objective_function.cc
+  mvu.hpp
+  mvu_impl.hpp
+#  mvu_objective_function.hpp
+#  mvu_objective_function.cpp
 )
 
 # Add directory name to sources.

Deleted: mlpack/trunk/src/mlpack/methods/mvu/mvu.h
===================================================================
--- mlpack/trunk/src/mlpack/methods/mvu/mvu.h	2011-11-24 02:54:14 UTC (rev 10387)
+++ mlpack/trunk/src/mlpack/methods/mvu/mvu.h	2011-11-24 07:02:29 UTC (rev 10388)
@@ -1,43 +0,0 @@
-/***
- * @file mvu.h
- * @author Ryan Curtin
- *
- * An implementation of Maximum Variance Unfolding.  This file defines an MVU
- * class as well as a class representing the objective function (a semidefinite
- * program) which MVU seeks to minimize.  Minimization is performed by the
- * Augmented Lagrangian optimizer (which in turn uses the L-BFGS optimizer).
- */
-
-#ifndef __MLPACK_MVU_H
-#define __MLPACK_MVU_H
-
-#include <mlpack/core.h>
-
-namespace mlpack {
-namespace mvu {
-
-/***
- * The MVU class is meant to provide a good abstraction for users.  The dataset
- * needs to be provided, as well as several parameters.
- *
- * - dataset
- * - new dimensionality
- */
-template<typename LagrangianFunction>
-class MVU {
- public:
-  MVU(arma::mat& data_in); // probably needs arguments
-
-  bool Unfold(arma::mat& output_coordinates); // probably needs arguments
-
- private:
-  arma::mat& data_;
-  LagrangianFunction f_;
-};
-
-}; // namespace mvu
-}; // namespace mlpack
-
-#include "mvu_impl.h"
-
-#endif

Copied: mlpack/trunk/src/mlpack/methods/mvu/mvu.hpp (from rev 10352, mlpack/trunk/src/mlpack/methods/mvu/mvu.h)
===================================================================
--- mlpack/trunk/src/mlpack/methods/mvu/mvu.hpp	                        (rev 0)
+++ mlpack/trunk/src/mlpack/methods/mvu/mvu.hpp	2011-11-24 07:02:29 UTC (rev 10388)
@@ -0,0 +1,43 @@
+/**
+ * @file mvu.hpp
+ * @author Ryan Curtin
+ *
+ * An implementation of Maximum Variance Unfolding.  This file defines an MVU
+ * class as well as a class representing the objective function (a semidefinite
+ * program) which MVU seeks to minimize.  Minimization is performed by the
+ * Augmented Lagrangian optimizer (which in turn uses the L-BFGS optimizer).
+ */
+#ifndef __MLPACK_METHODS_MVU_MVU_HPP
+#define __MLPACK_METHODS_VU_MVU_HPP
+
+#include <mlpack/core.h>
+
+namespace mlpack {
+namespace mvu {
+
+/**
+ * The MVU class is meant to provide a good abstraction for users.  The dataset
+ * needs to be provided, as well as several parameters.
+ *
+ * - dataset
+ * - new dimensionality
+ */
+template<typename LagrangianFunction>
+class MVU
+{
+ public:
+  MVU(arma::mat& data_in); // probably needs arguments
+
+  bool Unfold(arma::mat& output_coordinates); // probably needs arguments
+
+ private:
+  arma::mat& data_;
+  LagrangianFunction f_;
+};
+
+}; // namespace mvu
+}; // namespace mlpack
+
+#include "mvu_impl.h"
+
+#endif

Deleted: mlpack/trunk/src/mlpack/methods/mvu/mvu_impl.h
===================================================================
--- mlpack/trunk/src/mlpack/methods/mvu/mvu_impl.h	2011-11-24 02:54:14 UTC (rev 10387)
+++ mlpack/trunk/src/mlpack/methods/mvu/mvu_impl.h	2011-11-24 07:02:29 UTC (rev 10388)
@@ -1,38 +0,0 @@
-/***
- * @file mvu_impl.h
- * @author Ryan Curtin
- *
- * Implementation of the MVU class and its auxiliary objective function class.
- */
-
-#ifndef __MLPACK_MVU_IMPL_H
-#define __MLPACK_MVU_IMPL_H
-
-#include <mlpack/core/optimizers/aug_lagrangian/aug_lagrangian.hpp>
-
-namespace mlpack {
-namespace mvu {
-
-template<typename LagrangianFunction>
-MVU<LagrangianFunction>::MVU(arma::mat& data_in) :
-    data_(data_in),
-    f_(data_) {
-  // Nothing to do.
-}
-
-template<typename LagrangianFunction>
-bool MVU<LagrangianFunction>::Unfold(arma::mat& output_coordinates) {
-  // Set up Augmented Lagrangian method.
-  // Memory choice is arbitrary; this needs to be configurable.
-  mlpack::optimization::AugLagrangian<LagrangianFunction> aug(f_, 20);
-
-  output_coordinates = f_.GetInitialPoint();
-  aug.Optimize(0, output_coordinates);
-
-  return true;
-}
-
-}; // namespace mvu
-}; // namespace mlpack
-
-#endif

Copied: mlpack/trunk/src/mlpack/methods/mvu/mvu_impl.hpp (from rev 10352, mlpack/trunk/src/mlpack/methods/mvu/mvu_impl.h)
===================================================================
--- mlpack/trunk/src/mlpack/methods/mvu/mvu_impl.hpp	                        (rev 0)
+++ mlpack/trunk/src/mlpack/methods/mvu/mvu_impl.hpp	2011-11-24 07:02:29 UTC (rev 10388)
@@ -0,0 +1,42 @@
+/**
+ * @file mvu_impl.hpp
+ * @author Ryan Curtin
+ *
+ * Implementation of the MVU class and its auxiliary objective function class.
+ */
+#ifndef __MLPACK_METHODS_MVU_IMPL_HPP
+#define __MLPACK_METHODS_MVU_IMPL_HPP
+
+// In case it has not been included.
+#include "mvu.hpp"
+
+#include <mlpack/core/optimizers/aug_lagrangian/aug_lagrangian.hpp>
+
+namespace mlpack {
+namespace mvu {
+
+template<typename LagrangianFunction>
+MVU<LagrangianFunction>::MVU(arma::mat& data_in) :
+    data_(data_in),
+    f_(data_)
+{
+  // Nothing to do.
+}
+
+template<typename LagrangianFunction>
+bool MVU<LagrangianFunction>::Unfold(arma::mat& output_coordinates)
+{
+  // Set up Augmented Lagrangian method.
+  // Memory choice is arbitrary; this needs to be configurable.
+  mlpack::optimization::AugLagrangian<LagrangianFunction> aug(f_, 20);
+
+  output_coordinates = f_.GetInitialPoint();
+  aug.Optimize(0, output_coordinates);
+
+  return true;
+}
+
+}; // namespace mvu
+}; // namespace mlpack
+
+#endif

Deleted: mlpack/trunk/src/mlpack/methods/mvu/mvu_objective_function.cc
===================================================================
--- mlpack/trunk/src/mlpack/methods/mvu/mvu_objective_function.cc	2011-11-24 02:54:14 UTC (rev 10387)
+++ mlpack/trunk/src/mlpack/methods/mvu/mvu_objective_function.cc	2011-11-24 07:02:29 UTC (rev 10388)
@@ -1,163 +0,0 @@
-/***
- * @file mvu_objective_function.cc
- * @author Ryan Curtin
- *
- * Implementation of the MVUObjectiveFunction class.
- */
-
-#include "mvu_objective_function.h"
-
-#include <mlpack/neighbor_search/neighbor_search.h>
-#include <mlpack/fastica/lin_alg.h>
-
-using namespace mlpack;
-using namespace mlpack::mvu;
-
-using mlpack::neighbor::AllkNN;
-
-MVUObjectiveFunction::MVUObjectiveFunction() {
-  // Need to set initial point?  I guess this will be the initial matrix...
-  Log::Fatal << "Initialized MVUObjectiveFunction all wrong." << std::endl;
-}
-
-MVUObjectiveFunction::MVUObjectiveFunction(arma::mat& initial_point) :
-    num_neighbors_(5) {
-  // We will calculate the nearest neighbors of this dataset.
-  AllkNN allknn(initial_point);
-
-  allknn.ComputeNeighbors(neighbor_indices_, neighbor_distances_);
-//  NOTIFY("Neighbor indices: ");
-//  std::cout << neighbor_indices_;
-//  NOTIFY("Neighbor distances: ");
-//  std::cout << neighbor_distances_;
-
-  // Now shrink the point matrix to the correct target size.
-  int dimension = 2; // Get this from CLI: TODO.
-  initial_point_ = initial_point;
-  initial_point_.shed_rows(2, initial_point.n_rows - 1);
-
-  // Center the matrix, for shits and giggles.
-//  arma::mat tmp;
-//  linalg__private::Center(initial_point_, tmp);
-//  initial_point_ = tmp;
-}
-
-double MVUObjectiveFunction::Evaluate(const arma::mat& coordinates) {
-  // We replaced the SDP constraint (K > 0) with (K = R^T R) so now our
-  // objective function is simply that (and R is our coordinate matrix).  Since
-  // the problem is a maximization problem, we simply negate the objective
-  // function and it becomes a minimization problem (which is what L-BFGS and
-  // AugLagrangian use).
-  double objective = 0;
-
-  for (int i = 0; i < coordinates.n_cols; i++)
-    objective -= dot(coordinates.unsafe_col(i), coordinates.unsafe_col(i));
-
-  return objective;
-}
-
-void MVUObjectiveFunction::Gradient(const arma::mat& coordinates,
-                                    arma::mat& gradient) {
-  // Our objective, f(R) = sum_{ij} (R^T R)_ij, is differentiable into
-  //   f'(R) = 2 * R.
-  NOTIFY("Coordinates are ");
-  std::cout << coordinates;
-  gradient = 2 * coordinates;
-  NOTIFY("Calculated gradient is ");
-  std::cout << gradient;
-}
-
-double MVUObjectiveFunction::EvaluateConstraint(int index,
-                                                const arma::mat& coordinates) {
-  if (index == 0) {
-    // We are considering the first constraint:
-    //   sum (R^T * R) = 0
-
-    // This is a naive implementation; we may be able to improve upon it
-    // significantly by avoiding the actual calculation of the Gram matrix
-    // (R^T * R).
-    if (accu(trans(coordinates) * coordinates) > 0) {
-        NOTIFY("Constraint 0 is nonzero: %lf",
-            accu(trans(coordinates) * coordinates));
-    }
-
-    return accu(trans(coordinates) * coordinates);
-  }
-
-  // Return 0 for any constraints which are out of bounds.
-  if (index >= NumConstraints() || index < 0)
-    return 0;
-
-  // Any other constraints are the individual nearest neighbor constraints:
-  //   (R^T R)_ii - 2 (R^T R)_ij + (R^T R)_jj - || x_i - x_j ||^2 = 0
-  //
-  // We will get the i and j values from the given index.
-  int i = floor(((double) (index - 1)) / (double) num_neighbors_);
-  int j = neighbor_indices_[index - 1]; // Retrieve index of this neighbor.
-
-  // (R^T R)_ij = R.col(i) * R.col(j)  (dot product)
-  double rrt_ii = dot(coordinates.col(i), coordinates.col(i));
-  double rrt_ij = dot(coordinates.col(i), coordinates.col(j));
-  double rrt_jj = dot(coordinates.col(j), coordinates.col(j));
-
-  // We must remember the actual distance between points.
-//  NOTIFY("Index %d: i is %d, j is %d", index, i, j);
-//  NOTIFY("Neighbor %d: distance of %lf", index - 1, 
-//      neighbor_distances_[index - 1]);
-//  NOTIFY("rrt_ii: %lf; rrt_ij: %lf; rrt_jj: %lf", rrt_ii, rrt_ij, rrt_jj);
-//  NOTIFY("r_i: ");
-//  std::cout << coordinates.col(i);
-//  NOTIFY("r_j: ");
-//  std::cout << coordinates.col(j);
-//  NOTIFY("LHS: %lf", (rrt_ii - 2 * rrt_ij + rrt_jj));
-  
-  if (((rrt_ii - 2 * rrt_ij + rrt_jj) - 
-      neighbor_distances_[index - 1]) > 1e-5) {
-    NOTIFY("Constraint %d is nonzero:  %lf", index,
-        ((rrt_ii - 2 * rrt_ij + rrt_jj) - neighbor_distances_[index - 1]));
-  }
-
-  return ((rrt_ii - 2 * rrt_ij + rrt_jj) - neighbor_distances_[index - 1]);
-}
-
-void MVUObjectiveFunction::GradientConstraint(int index,
-                                              const arma::mat& coordinates,
-                                              arma::mat& gradient) {
-  // Set gradient to 0 (we will add to it).
-  gradient.zeros(coordinates.n_rows, coordinates.n_cols);
-  
-  // Return 0 for any constraints which are out of bounds.
-  if (index >= NumConstraints() || index < 0)
-    return;
-
-  if (index == 0) {
-    // We consider the gradient of the first constraint:
-    //   sum (R^T * R) = 0
-    // It is eventually worked out that
-    //   d / dR_xy (sum (R^T R)) = sum_i (R_xi)
-    //
-    // We can see that we can separate this out into two distinct sums, for each
-    // row and column, so we can loop first over the columns and then over the
-    // rows to assemble the entire gradient matrix.
-    //for (int i = 0; i < coordinates.n_cols; i++)
-    //  gradient.col(i) += accu(coordinates.col(i)); // sum_i (R_xi)
-    arma::mat ones(gradient.n_cols, gradient.n_cols);
-    gradient = coordinates * ones;
-    
-    return;
-  }
-
-  // Any other constraints are the individual nearest neighbor constraints:
-  //  (R^T R)_ii - 2 (R^T R)_ij + (R^T R)_jj = || x_i - x_j ||^2
-  //
-  // We will get the i and j values from the given index.
-  int i = floor(((double) (index - 1)) / (double) num_neighbors_);
-  int j = neighbor_indices_[index - 1];
-
-  // The gradient matrix for the nearest neighbor constraint (i, j) is zero
-  // except for column i, which is equal to 2 (R.col(i) - R.col(j)) and also
-  // except for column j, which is equal to 2 (R.col(j) - R.col(i)).
-  arma::vec diff_row = coordinates.col(i) - coordinates.col(j);
-  gradient.col(i) += 2 * diff_row;
-  gradient.col(j) -= 2 * diff_row;
-}

Copied: mlpack/trunk/src/mlpack/methods/mvu/mvu_objective_function.cpp (from rev 10352, mlpack/trunk/src/mlpack/methods/mvu/mvu_objective_function.cc)
===================================================================
--- mlpack/trunk/src/mlpack/methods/mvu/mvu_objective_function.cpp	                        (rev 0)
+++ mlpack/trunk/src/mlpack/methods/mvu/mvu_objective_function.cpp	2011-11-24 07:02:29 UTC (rev 10388)
@@ -0,0 +1,175 @@
+/**
+ * @file mvu_objective_function.cpp
+ * @author Ryan Curtin
+ *
+ * Implementation of the MVUObjectiveFunction class.
+ */
+#include "mvu_objective_function.hpp"
+
+#include <mlpack/neighbor_search/neighbor_search.h>
+#include <mlpack/fastica/lin_alg.h>
+
+using namespace mlpack;
+using namespace mlpack::mvu;
+
+using mlpack::neighbor::AllkNN;
+
+MVUObjectiveFunction::MVUObjectiveFunction()
+{
+  // Need to set initial point?  I guess this will be the initial matrix...
+  Log::Fatal << "Initialized MVUObjectiveFunction all wrong." << std::endl;
+}
+
+MVUObjectiveFunction::MVUObjectiveFunction(arma::mat& initial_point) :
+    num_neighbors_(5)
+{
+  // We will calculate the nearest neighbors of this dataset.
+  AllkNN allknn(initial_point);
+
+  allknn.ComputeNeighbors(neighbor_indices_, neighbor_distances_);
+//  NOTIFY("Neighbor indices: ");
+//  std::cout << neighbor_indices_;
+//  NOTIFY("Neighbor distances: ");
+//  std::cout << neighbor_distances_;
+
+  // Now shrink the point matrix to the correct target size.
+  int dimension = 2; // Get this from CLI: TODO.
+  initial_point_ = initial_point;
+  initial_point_.shed_rows(2, initial_point.n_rows - 1);
+
+  // Center the matrix, for shits and giggles.
+//  arma::mat tmp;
+//  linalg__private::Center(initial_point_, tmp);
+//  initial_point_ = tmp;
+}
+
+double MVUObjectiveFunction::Evaluate(const arma::mat& coordinates)
+{
+  // We replaced the SDP constraint (K > 0) with (K = R^T R) so now our
+  // objective function is simply that (and R is our coordinate matrix).  Since
+  // the problem is a maximization problem, we simply negate the objective
+  // function and it becomes a minimization problem (which is what L-BFGS and
+  // AugLagrangian use).
+  double objective = 0;
+
+  for (int i = 0; i < coordinates.n_cols; i++)
+    objective -= dot(coordinates.unsafe_col(i), coordinates.unsafe_col(i));
+
+  return objective;
+}
+
+void MVUObjectiveFunction::Gradient(const arma::mat& coordinates,
+                                    arma::mat& gradient)
+{
+  // Our objective, f(R) = sum_{ij} (R^T R)_ij, is differentiable into
+  //   f'(R) = 2 * R.
+  Log::Info << "Coordinates are:\n" << coordinates;
+
+  gradient = 2 * coordinates;
+
+  Log::Info << "Calculated gradient is\n" << gradient;
+
+  std::cout << gradient;
+}
+
+double MVUObjectiveFunction::EvaluateConstraint(int index,
+                                                const arma::mat& coordinates)
+{
+  if (index == 0)
+  {
+    // We are considering the first constraint:
+    //   sum (R^T * R) = 0
+
+    // This is a naive implementation; we may be able to improve upon it
+    // significantly by avoiding the actual calculation of the Gram matrix
+    // (R^T * R).
+    if (accu(trans(coordinates) * coordinates) > 0)
+    {
+      Log::Debug << "Constraint 0 is nonzero: " <<
+          accu(trans(coordinates) * coordinates) << std::endl;
+    }
+
+    return accu(trans(coordinates) * coordinates);
+  }
+
+  // Return 0 for any constraints which are out of bounds.
+  if (index >= NumConstraints() || index < 0)
+    return 0;
+
+  // Any other constraints are the individual nearest neighbor constraints:
+  //   (R^T R)_ii - 2 (R^T R)_ij + (R^T R)_jj - || x_i - x_j ||^2 = 0
+  //
+  // We will get the i and j values from the given index.
+  int i = floor(((double) (index - 1)) / (double) num_neighbors_);
+  int j = neighbor_indices_[index - 1]; // Retrieve index of this neighbor.
+
+  // (R^T R)_ij = R.col(i) * R.col(j)  (dot product)
+  double rrt_ii = dot(coordinates.col(i), coordinates.col(i));
+  double rrt_ij = dot(coordinates.col(i), coordinates.col(j));
+  double rrt_jj = dot(coordinates.col(j), coordinates.col(j));
+
+  // We must remember the actual distance between points.
+//  NOTIFY("Index %d: i is %d, j is %d", index, i, j);
+//  NOTIFY("Neighbor %d: distance of %lf", index - 1,
+//      neighbor_distances_[index - 1]);
+//  NOTIFY("rrt_ii: %lf; rrt_ij: %lf; rrt_jj: %lf", rrt_ii, rrt_ij, rrt_jj);
+//  NOTIFY("r_i: ");
+//  std::cout << coordinates.col(i);
+//  NOTIFY("r_j: ");
+//  std::cout << coordinates.col(j);
+//  NOTIFY("LHS: %lf", (rrt_ii - 2 * rrt_ij + rrt_jj));
+
+  if (((rrt_ii - 2 * rrt_ij + rrt_jj) -
+      neighbor_distances_[index - 1]) > 1e-5)
+  {
+    Log::Debug << "Constraint " << index << " is nonzero: " <<
+        ((rrt_ii - 2 * rrt_ij + rrt_jj) - neighbor_distances_[index - 1])
+        << std::endl;
+  }
+
+  return ((rrt_ii - 2 * rrt_ij + rrt_jj) - neighbor_distances_[index - 1]);
+}
+
+void MVUObjectiveFunction::GradientConstraint(int index,
+                                              const arma::mat& coordinates,
+                                              arma::mat& gradient)
+{
+  // Set gradient to 0 (we will add to it).
+  gradient.zeros(coordinates.n_rows, coordinates.n_cols);
+
+  // Return 0 for any constraints which are out of bounds.
+  if (index >= NumConstraints() || index < 0)
+    return;
+
+  if (index == 0)
+  {
+    // We consider the gradient of the first constraint:
+    //   sum (R^T * R) = 0
+    // It is eventually worked out that
+    //   d / dR_xy (sum (R^T R)) = sum_i (R_xi)
+    //
+    // We can see that we can separate this out into two distinct sums, for each
+    // row and column, so we can loop first over the columns and then over the
+    // rows to assemble the entire gradient matrix.
+    //for (int i = 0; i < coordinates.n_cols; i++)
+    //  gradient.col(i) += accu(coordinates.col(i)); // sum_i (R_xi)
+    arma::mat ones(gradient.n_cols, gradient.n_cols);
+    gradient = coordinates * ones;
+
+    return;
+  }
+
+  // Any other constraints are the individual nearest neighbor constraints:
+  //  (R^T R)_ii - 2 (R^T R)_ij + (R^T R)_jj = || x_i - x_j ||^2
+  //
+  // We will get the i and j values from the given index.
+  int i = floor(((double) (index - 1)) / (double) num_neighbors_);
+  int j = neighbor_indices_[index - 1];
+
+  // The gradient matrix for the nearest neighbor constraint (i, j) is zero
+  // except for column i, which is equal to 2 (R.col(i) - R.col(j)) and also
+  // except for column j, which is equal to 2 (R.col(j) - R.col(i)).
+  arma::vec diff_row = coordinates.col(i) - coordinates.col(j);
+  gradient.col(i) += 2 * diff_row;
+  gradient.col(j) -= 2 * diff_row;
+}

Deleted: mlpack/trunk/src/mlpack/methods/mvu/mvu_objective_function.h
===================================================================
--- mlpack/trunk/src/mlpack/methods/mvu/mvu_objective_function.h	2011-11-24 02:54:14 UTC (rev 10387)
+++ mlpack/trunk/src/mlpack/methods/mvu/mvu_objective_function.h	2011-11-24 07:02:29 UTC (rev 10388)
@@ -1,80 +0,0 @@
-/***
- * @file mvu_objective_function.h
- * @author Ryan Curtin
- *
- * This file defines the standard MVU objective function, implemented to fulfill
- * the "LagrangianFunction" policy defined in
- * fastlib/optimization/aug_lagrangian/aug_lagrangian.h.
- *
- * We use the change of variables suggested by Burer and Monteiro [1] to
- * reformulate the MVU SDP originally provided by Weinberger and Saul [2].
- *
- * [1] S. Burer and R.D.C. Monteiro.  "A nonlinear programming algorithm for
- *     solving semidefinite programs via low-rank factorization," Mathematical
- *     Programming, vol. 95, no. 2, pp. 329-357, 2002.
- *
- * [2] K.Q. Weinberger and L.K. Saul, "An introduction to Nonlinear
- *     Dimensionality Reduction by Maximum Variance Unfolding," Proceedings of
- *     the Twenty-First National Conference on Artificial Intelligence
- *     (AAAI-06), 2006.
- */
-
-#ifndef __MLPACK_MVU_OBJECTIVE_FUNCTCLIN_H
-#define __MLPACK_MVU_OBJECTIVE_FUNCTCLIN_H
-
-#include <mlpack/core.h>
-
-namespace mlpack {
-namespace mvu {
-
-/***
- * The MVU objective function.  This is a reformulation of the SDP proposed
- * originally by Weinberger and Saul.  Their original formulation was:
- *
- * max (trace(K)) subject to:
- *  (1) sum K_ij = 0
- *  (2) K_ii - 2 K_ij + K_jj = || x_i - x_j ||^2 ; for all (i, j) nearest
- *                                                 neighbors
- *  (3) K >= 0 (K is positive semidefinite)
- *
- * We reformulate, taking K = R R^T.  This gives:
- *
- * max (R * R^T) subject to
- *  (1) sum (R * R^T) = 0
- *  (2) (R R^T)_ii - 2 (R R^T)_ij + (R R^T)_jj = || x_i - x_j ||^2 ;
- *        for all (i, j) nearest neighbors
- *
- * Now, our optimization problem is easier.  The total number of constraints is
- * equal to the number of points multiplied by the number of nearest neighbors.
- */
-class MVUObjectiveFunction {
- public:
-  MVUObjectiveFunction();
-  MVUObjectiveFunction(arma::mat& initial_point);
-
-  double Evaluate(const arma::mat& coordinates);
-  void Gradient(const arma::mat& coordinates, arma::mat& gradient);
-
-  int NumConstraints() const { return num_neighbors_ * initial_point_.n_cols; }
-
-  double EvaluateConstraint(int index, const arma::mat& coordinates);
-  void GradientConstraint(int index,
-                          const arma::mat& coordinates,
-                          arma::mat& gradient);
-
-  const arma::mat& GetInitialPoint() { return initial_point_; }
-
- private:
-  arma::mat initial_point_;
-  int num_neighbors_;
-
-  // These hold the output of the nearest neighbors computation (done in the
-  // constructor).
-  arma::Col<index_t> neighbor_indices_;
-  arma::vec neighbor_distances_;
-};
-
-}; // namespace mvu
-}; // namespace mlpack
-
-#endif

Copied: mlpack/trunk/src/mlpack/methods/mvu/mvu_objective_function.hpp (from rev 10352, mlpack/trunk/src/mlpack/methods/mvu/mvu_objective_function.h)
===================================================================
--- mlpack/trunk/src/mlpack/methods/mvu/mvu_objective_function.hpp	                        (rev 0)
+++ mlpack/trunk/src/mlpack/methods/mvu/mvu_objective_function.hpp	2011-11-24 07:02:29 UTC (rev 10388)
@@ -0,0 +1,80 @@
+/**
+ * @file mvu_objective_function.hpp
+ * @author Ryan Curtin
+ *
+ * This file defines the standard MVU objective function, implemented to fulfill
+ * the "LagrangianFunction" policy defined in
+ * fastlib/optimization/aug_lagrangian/aug_lagrangian.h.
+ *
+ * We use the change of variables suggested by Burer and Monteiro [1] to
+ * reformulate the MVU SDP originally provided by Weinberger and Saul [2].
+ *
+ * [1] S. Burer and R.D.C. Monteiro.  "A nonlinear programming algorithm for
+ *     solving semidefinite programs via low-rank factorization," Mathematical
+ *     Programming, vol. 95, no. 2, pp. 329-357, 2002.
+ *
+ * [2] K.Q. Weinberger and L.K. Saul, "An introduction to Nonlinear
+ *     Dimensionality Reduction by Maximum Variance Unfolding," Proceedings of
+ *     the Twenty-First National Conference on Artificial Intelligence
+ *     (AAAI-06), 2006.
+ */
+#ifndef __MLPACK_METHODS_MVU_MVU_OBJECTIVE_FUNCTION_H
+#define __MLPACK_METHODS_MVU_MVU_OBJECTIVE_FUNCTION_H
+
+#include <mlpack/core.h>
+
+namespace mlpack {
+namespace mvu {
+
+/**
+ * The MVU objective function.  This is a reformulation of the SDP proposed
+ * originally by Weinberger and Saul.  Their original formulation was:
+ *
+ * max (trace(K)) subject to:
+ *  (1) sum K_ij = 0
+ *  (2) K_ii - 2 K_ij + K_jj = || x_i - x_j ||^2 ; for all (i, j) nearest
+ *                                                 neighbors
+ *  (3) K >= 0 (K is positive semidefinite)
+ *
+ * We reformulate, taking K = R R^T.  This gives:
+ *
+ * max (R * R^T) subject to
+ *  (1) sum (R * R^T) = 0
+ *  (2) (R R^T)_ii - 2 (R R^T)_ij + (R R^T)_jj = || x_i - x_j ||^2 ;
+ *        for all (i, j) nearest neighbors
+ *
+ * Now, our optimization problem is easier.  The total number of constraints is
+ * equal to the number of points multiplied by the number of nearest neighbors.
+ */
+class MVUObjectiveFunction
+{
+ public:
+  MVUObjectiveFunction();
+  MVUObjectiveFunction(arma::mat& initial_point);
+
+  double Evaluate(const arma::mat& coordinates);
+  void Gradient(const arma::mat& coordinates, arma::mat& gradient);
+
+  int NumConstraints() const { return num_neighbors_ * initial_point_.n_cols; }
+
+  double EvaluateConstraint(int index, const arma::mat& coordinates);
+  void GradientConstraint(int index,
+                          const arma::mat& coordinates,
+                          arma::mat& gradient);
+
+  const arma::mat& GetInitialPoint() { return initial_point_; }
+
+ private:
+  arma::mat initial_point_;
+  int num_neighbors_;
+
+  // These hold the output of the nearest neighbors computation (done in the
+  // constructor).
+  arma::Col<index_t> neighbor_indices_;
+  arma::vec neighbor_distances_;
+};
+
+}; // namespace mvu
+}; // namespace mlpack
+
+#endif

Deleted: mlpack/trunk/src/mlpack/methods/mvu/mvu_objectives.h
===================================================================
--- mlpack/trunk/src/mlpack/methods/mvu/mvu_objectives.h	2011-11-24 02:54:14 UTC (rev 10387)
+++ mlpack/trunk/src/mlpack/methods/mvu/mvu_objectives.h	2011-11-24 07:02:29 UTC (rev 10388)
@@ -1,183 +0,0 @@
-/*
- * =====================================================================================
- * 
- *       Filename:  mvu_objectives.h
- * 
- *    Description:  
- * 
- *        Version:  1.0
- *        Created:  03/05/2008 12:00:56 PM EST
- *       Revision:  none
- *       Compiler:  gcc
- * 
- *         Author:  Nikolaos Vasiloglou (NV), nvasil at ieee.org
- *        Company:  Georgia Tech Fastlab-ESP Lab
- * 
- * =====================================================================================
- */
-
-#ifndef MVU_OBJECTIVES_H_
-#define MVU_OBJECTIVES_H_
-
-#include <fastlib/fastlib.h>
-#include <mlpack/allknn/allknn.h>
-#include <mlpack/allkfn/allkfn.h>
-#include <fastlib/optimization/lbfgs/optimization_utils.h>
-
-PARAM_INT_REQ("new_dimension", "the number of dimensions for the unfolded", 
-  "optfun");
-PARAM_STRING("nearest_neighbor_file", "file with the nearest neighbor pairs\
- and the squared distances defaults to nearest.txt", "optfun", "nearest.txt");
-PARAM_STRING("furthest_neighbor_file", "file with the nearets neighbor\
- pairs and the squared distances", "optfun", "furthest.txt");
-PARAM_INT("knns", "number of nearest neighbors to build the graph", "optfun",
-  5);
-PARAM_INT("leaf_size", "leaf_size for the tree.\
-  if you choose the option with the nearest file you don't need to specify it",
-  "optfun", 20);
-
-//***UNDOCUMENTED VALUES***
-PARAM(double, "desired_feasibility_error", "Undocumented value.", "optfun", 1,
-  false);
-PARAM(double, "grad_tolerance", "Undocumented value.", "optfun", 0.1, false);
-PARAM(double, "infeasibility_tolerance", "Undocumented value.", "optfun", 0.01,
-  false);
-
-
-/*const fx_module_doc mvu_doc = {
-  mvu_entries, NULL,
-  " This program computes the Maximum Variance Unfolding"
-  " and the Maximum Futhest Neighbor Unfolding as presented "
-  " in the paper: \n"
-  " @conference{vasiloglou2008ssm,\n"
-  "   title={{Scalable semidefinite manifold learning}},\n"
-  "   author={Vasiloglou, N. and Gray, A.G. and Anderson, D.V.},\n"
-  "   booktitle={Machine Learning for Signal Processing, 2008. MLSP 2008. IEEE Workshop on},\n"
-  "   pages={368--373},\n"
-  "   year={2008}\n"
-  " }\n"
-};*/
-
-
-
-class MaxVariance {
- public:
-  static const size_t MAX_KNNS = 30;
-
-  void Init(fx_module* module, arma::mat& data);
-  void Init(fx_module* module);
-
-  void Destruct();
-
-  void ComputeGradient(const arma::mat& coordinates, arma::mat& gradient);
-  void ComputeObjective(const arma::mat& coordinates, double& objective);
-  void ComputeFeasibilityError(const arma::mat& coordinates, double& error);
-  double ComputeLagrangian(const arma::mat& coordinates);
-  void UpdateLagrangeMult(const arma::mat& coordinates);
-  void Project(arma::mat& coordinates);
-  void set_sigma(double sigma);
-  bool IsDiverging(double objective);
-
-  // what the hell is this? // I don't know, you tell me?
-  bool IsOptimizationOver(const arma::mat& coordinates, arma::mat& gradient, double step) { return false; }
-  bool IsIntermediateStepOver(const arma::mat& coordinates, arma::mat& gradient, double step) { return true; }
-
-  void GiveInitMatrix(arma::mat& init_data);
-  size_t num_of_points();
-
- private:
-  datanode *module_;
-
-  mlpack::allknn::AllkNN allknn_;
-  size_t knns_;
-  size_t leaf_size_;
-
-  std::vector<std::pair<size_t, size_t> > nearest_neighbor_pairs_;
-  std::vector<double> nearest_distances_;
-
-  arma::vec eq_lagrange_mult_;
-
-  size_t num_of_nearest_pairs_;
-  double sigma_;
-  double sum_of_furthest_distances_;
-  size_t num_of_points_;
-  size_t new_dimension_;
-};
-
-class MaxFurthestNeighbors {
- public:
-  static const size_t MAX_KNNS = 30;
-
-  void Init(fx_module *module, arma::mat& data);
-  void Init(fx_module *module);
-
-  void Destruct();
-
-  void ComputeGradient(const arma::mat& coordinates, arma::mat& gradient);
-  void ComputeObjective(const arma::mat& coordinates, double& objective);
-  void ComputeFeasibilityError(const arma::mat& coordinates, double& error);
-  double ComputeLagrangian(const arma::mat& coordinates);
-  void UpdateLagrangeMult(const arma::mat& coordinates);
-  void Project(arma::mat& coordinates);
-
-  void set_sigma(double sigma); 
-  void set_lagrange_mult(double val);
-
-  bool IsDiverging(double objective); 
-  bool IsOptimizationOver(arma::mat& coordinates, arma::mat& gradient, double step);
-  bool IsIntermediateStepOver(arma::mat& coordinates, arma::mat& gradient, double step);
-
-  size_t num_of_points();
-  void GiveInitMatrix(arma::mat& init_data);
-
- private:
-  datanode *module_;
-
-  mlpack::allknn::AllkNN allknn_;
-  AllkFN allkfn_;
-
-  size_t knns_;
-  size_t leaf_size_;
-
-  std::vector<std::pair<size_t, size_t> > nearest_neighbor_pairs_;
-  std::vector<double> nearest_distances_;
-
-  arma::vec eq_lagrange_mult_;
-  size_t num_of_nearest_pairs_;
-  size_t num_of_furthest_pairs_;
-
-  std::vector<std::pair<size_t, size_t> > furthest_neighbor_pairs_;
-  std::vector<double> furthest_distances_;
-
-  double sum_of_furthest_distances_;
-  double sigma_;
-  size_t num_of_points_;
-  size_t new_dimension_;
-  double infeasibility1_;
-  double previous_infeasibility1_;
-  double desired_feasibility_error_;
-  double infeasibility_tolerance_;
-  double sum_of_nearest_distances_;
-  double grad_tolerance_;
-};
-
-class MaxVarianceUtils {
- public:
-  static void ConsolidateNeighbors(const arma::Col<size_t>& from_tree_ind,
-                                   const arma::vec&          from_tree_dist,
-                                   size_t num_of_neighbors,
-                                   size_t chosen_neighbors,
-                                   std::vector<std::pair<size_t, size_t> >& neighbor_pairs,
-                                   std::vector<double>& distances,
-                                   size_t& num_of_pairs);
-
-  static void EstimateKnns(const arma::Col<size_t>& nearest_neighbors,
-                           const arma::vec&          nearest_distances,
-                           size_t maximum_knns, 
-                           size_t num_of_points,
-                           size_t dimension,
-                           size_t& optimum_knns); 
-};
-
-#include "mvu_objectives_impl.h"
-#endif //MVU_OBJECTIVES_H_

Deleted: mlpack/trunk/src/mlpack/methods/mvu/mvu_objectives_impl.h
===================================================================
--- mlpack/trunk/src/mlpack/methods/mvu/mvu_objectives_impl.h	2011-11-24 02:54:14 UTC (rev 10387)
+++ mlpack/trunk/src/mlpack/methods/mvu/mvu_objectives_impl.h	2011-11-24 07:02:29 UTC (rev 10388)
@@ -1,668 +0,0 @@
-/*
- * =====================================================================================
- * 
- *       Filename:  mvu_objectives_impl.h
- * 
- *    Description:  
- * 
- *        Version:  1.0
- *        Created:  03/05/2008 04:20:27 PM EST
- *       Revision:  none
- *       Compiler:  gcc
- * 
- *         Author:  Nikolaos Vasiloglou (NV), nvasil at ieee.org
- *        Company:  Georgia Tech Fastlab-ESP Lab
- * 
- * =====================================================================================
- */
-#include <errno.h>
-#include <fastlib/fx/io.h>
-#include<log.h>
-
-void MaxVariance::Init(arma::mat& data) {
-
-  knns_ = mlpack::CLI::GetParam<int>("optfun/knns");
-  leaf_size_ = mlpack::CLI::GetParam<int>("optfun/leaf_size");
-  new_dimension_ = mlpack::CLI::GetParam<int>("optfun/new_dimension");
-  num_of_points_ = data.n_cols;
-
-  mlpack::Log::Info << "Data loaded..." << std::endl;
-  mlpack::Log::Info << "Nearest neighbor constraints..." << std::endl;
-  mlpack::Log::Info << "Building tree with data..." << std::endl;
-
-  if (knns_ == 0) {
-    allknn_.Init(&data, leaf_size_, MAX_KNNS); 
-  } else {
-    allknn_.Init(&data, leaf_size_, knns_); 
-  }
-
-  mlpack::Log::Info << "Tree built..." << std::endl;
-
-  mlpack::Log::Info << "Computing neighborhoods..." << std::endl;
-
-  arma::Col<size_t> from_tree_neighbors;
-  arma::vec          from_tree_distances;
-  allknn_.ComputeNeighbors(from_tree_neighbors,
-                           from_tree_distances);
-
-  mlpack::Log::Info << "Neighborhoods computed..." << std::endl;
-
-  if (knns_ == 0) { // automatically estimate the correct number for k
-    mlpack::Log::Info << "Auto-tuning the knn..." << std::endl;
-
-    MaxVarianceUtils::EstimateKnns(from_tree_neighbors,
-        from_tree_distances,
-        MAX_KNNS, 
-        data.n_cols,
-        data.n_rows,
-        knns_); 
-
-    mlpack::Log::Info << "Optimum knns is " << knns_;
-    mlpack::CLI::GetParam<int>("optfun/optimum_knns") = knns_;
-
-    mlpack::Log::Info << "Consolidating neighbors..." << std::endl;
-
-    MaxVarianceUtils::ConsolidateNeighbors(from_tree_neighbors,
-        from_tree_distances,
-        MAX_KNNS,
-        knns_,
-        nearest_neighbor_pairs_,
-        nearest_distances_,
-        num_of_nearest_pairs_);
-  } else { 
-    mlpack::Log::Info << "Consolidating neighbors..." << std::endl;
-    MaxVarianceUtils::ConsolidateNeighbors(from_tree_neighbors,
-        from_tree_distances,
-        knns_,
-        knns_,
-        nearest_neighbor_pairs_,
-        nearest_distances_,
-        num_of_nearest_pairs_);
-  }
- 
-  eq_lagrange_mult_.ones(num_of_nearest_pairs_);
-  double max_nearest_distance = 0;
-
-  for(size_t i = 0; i < num_of_nearest_pairs_; i++) {
-    max_nearest_distance = std::max(nearest_distances_[i], max_nearest_distance);
-  }
-  sum_of_furthest_distances_ = -max_nearest_distance * std::pow(data.n_cols, 2);
- 
-  mlpack::Log::Info << "Lower bound for optimization is " << sum_of_furthest_distances_ << std::endl;
-  mlpack::CLI::GetParam<int>("optfun/num_of_constraints") = num_of_nearest_pairs_;
-  mlpack::CLI::GetParam<double>("optfun/lower_optimal_bound") = sum_of_furthest_distances_;
-}
-
-void MaxVariance::Init() {
-
-  new_dimension_ = mlpack::CLI::GetParam<int>("optfun/new_dimension");
-  std::string nearest_neighbor_file = mlpack::CLI::GetParam<std::string>("optfun/nearest_neighbor_file");
-  std::string furthest_neighbor_file = mlpack::CLI::GetParam<std::string>("optfun/furthest_neighbor_file");
-
-  // WHY ARE WE NOT USING DATA::LOAD()?  IS IT BECAUSE WE SUFFER FROM THE
-  // STUPID?  YEAH?  IS THAT IT? //RAWR, RAGE AT INANIMATE TEXT.. RAWR?
-  FILE *fp = fopen(nearest_neighbor_file.c_str(), "r");
-  if (fp == NULL) {
-    mlpack::Log::Fatal << "Error while opening " << 
-      nearest_neighbor_file.c_str() << "..." << strerror(errno) << std::endl;
-  }
-  nearest_neighbor_pairs_.clear();
-  nearest_distances_.clear();
-  num_of_points_ = 0;
-  while(!feof(fp)) {
-    size_t n1, n2;
-    double distance;
-    fscanf(fp, "%i %i %lg", &n1, &n2, &distance);
-    nearest_neighbor_pairs_.push_back(std::make_pair(n1, n2));
-    nearest_distances_.push_back(distance);
-    if (n1 > num_of_points_) {
-      num_of_points_ = n1;
-    }
-    if (n2 > num_of_points_) {
-      num_of_points_ = n2;
-    }
-  }
-  num_of_points_++;
-  fclose(fp);   
-
-  num_of_nearest_pairs_ = nearest_neighbor_pairs_.size();
-
-  eq_lagrange_mult_.ones(num_of_nearest_pairs_);
-  double max_nearest_distance = 0;
-  for(size_t i = 0; i < num_of_nearest_pairs_; i++) {
-    max_nearest_distance=std::max(nearest_distances_[i], max_nearest_distance);
-  }
-  sum_of_furthest_distances_ = -max_nearest_distance * std::pow(num_of_points_, 2);
-
-  mlpack::Log::Info << "Lower bound for optimization is " <<
-    sum_of_furthest_distances_ << std::endl;
-  mlpack::CLI::GetParam<int>("optfun/num_of_constraints") = 
-    num_of_nearest_pairs_;
-  mlpack::CLI::GetParam<double("optfun/lower_optimal_bound") = 
-    sum_of_furthest_distances_;
-}
-
-void MaxVariance::Destruct() {
-  allknn_.Destruct(); // should be unnecessary
-}
-
-void MaxVariance::ComputeGradient(const arma::mat& coordinates, arma::mat& gradient) {
-  gradient = -coordinates; // copy values and negate (we need to use -CRR^T
-  // because we want to maximize CRR^T
-
-  size_t dimension = coordinates.n_rows;
-  for(size_t i = 0; i < num_of_nearest_pairs_; i++) {
-    arma::vec a_i_r(dimension);
-    size_t n1 = nearest_neighbor_pairs_[i].first;
-    size_t n2 = nearest_neighbor_pairs_[i].second;
-    const arma::vec point1 = coordinates.unsafe_col(n1);
-    const arma::vec point2 = coordinates.unsafe_col(n2);
-    double dist_diff = la::DistanceSqEuclidean(point1, point2) - nearest_distances_[i];
-    
-    a_i_r = point2 - point1;
-
-    // equality constraints
-    double scale = eq_lagrange_mult_[i] - dist_diff * sigma_;
-    gradient.col(n1) -= scale * a_i_r;
-    gradient.col(n2) += scale * a_i_r;
-  }
-}
-
-void MaxVariance::ComputeObjective(const arma::mat& coordinates, double& objective) {
-  objective = 0;
-
-  size_t dimension = coordinates.n_rows;
-  for(size_t i = 0; i < coordinates.n_cols; i++) {
-    // could this be done more efficiently?
-    objective -= dot(coordinates.unsafe_col(i), coordinates.unsafe_col(i));
-  }
-}
-
-void MaxVariance::ComputeFeasibilityError(const arma::mat& coordinates, double& error) {
-  error = 0;
-
-  size_t dimension = coordinates.n_rows;
-  for(size_t i = 0; i < num_of_nearest_pairs_; i++) {
-    size_t n1 = nearest_neighbor_pairs_[i].first;
-    size_t n2 = nearest_neighbor_pairs_[i].second;
-    
-    double distance = la::DistanceSqEuclidean(coordinates.unsafe_col(n1),
-        coordinates.unsafe_col(n2));
-
-    error += std::pow(distance - nearest_distances_[i], 2);
-  }
-}
-
-double MaxVariance::ComputeLagrangian(const arma::mat& coordinates) {
-  size_t dimension = coordinates.n_rows;
-
-  double lagrangian = 0;
-  ComputeObjective(coordinates, lagrangian);
-
-  for(size_t i = 0; i < num_of_nearest_pairs_; i++) {
-    size_t n1 = nearest_neighbor_pairs_[i].first;
-    size_t n2 = nearest_neighbor_pairs_[i].second;
-    
-    double dist_diff = la::DistanceSqEuclidean(coordinates.unsafe_col(n1),
-        coordinates.unsafe_col(n2)) - nearest_distances_[i];
-
-    lagrangian += dist_diff * ((dist_diff * sigma_) -
-        (eq_lagrange_mult_[i] * dist_diff));
-  }
-
-  return lagrangian;
-}
-
-void MaxVariance::UpdateLagrangeMult(const arma::mat& coordinates) {
-  size_t dimension = coordinates.n_rows;
-
-  for(size_t i = 0; i < num_of_nearest_pairs_; i++) {
-    size_t n1 = nearest_neighbor_pairs_[i].first;
-    size_t n2 = nearest_neighbor_pairs_[i].second;
-    
-    double dist_diff =la::DistanceSqEuclidean(coordinates.unsafe_col(n1),
-        coordinates.unsafe_col(n2)) - nearest_distances_[i];
-    eq_lagrange_mult_[i] -= (sigma_ * dist_diff);
-  }
-}
-
-void MaxVariance::set_sigma(double sigma) {
-  sigma_ = sigma;
-}
-
-bool MaxVariance::IsDiverging(double objective) {
-  if (objective < sum_of_furthest_distances_) {
-    mlpack::Log::Info << "objective(" << objective 
-      << ") < sum_of_furthest_distances (" << sum_of_furthest_distances_ 
-      << ")" << std::endl;
-
-    return true;
-  } else {
-    return false;
-  }
-}
-
-void MaxVariance::Project(arma::mat& coordinates) {
-  OptUtils::RemoveMean(coordinates);
-}
-
-size_t MaxVariance::num_of_points() {
-  return num_of_points_;
-}
-
-void MaxVariance::GiveInitMatrix(arma::mat& init_data) {
-  init_data.randu(new_dimension_, num_of_points_);
-}
-
-///////////////////////////////////////////////////////////////
-///////////////////////////////////////////////////////////////
-void MaxFurthestNeighbors::Init(arma::mat& data) {
-  module_ = module;
-  new_dimension_ = mlpack::CLI::GetParam<int>("optfun/new_dimension");
-  num_of_points_ = data.n_cols;
-
-  infeasibility1_ = DBL_MAX;
-  previous_infeasibility1_ = DBL_MAX;
-  desired_feasibility_error_ = mlpack::CLI::GetParam<double>("optfun/desired_feasibility_error");
-  grad_tolerance_ = mlpack::CLI::GetParam<double>("optfun/grad_tolerance");
-  infeasibility_tolerance_=  mlpack::CLI::GetParam<double>("optfun/infeasibility_tolerance");
-
-  knns_ = mlpack::CLI::GetParam<int>("optfun/knns");
-  leaf_size_ = mlpack::CLI::GetParam<int>("optfun/leaf_size");
-
-  mlpack::Log::Info << "Data loaded..." << std::endl;
-  mlpack::Log::Info << "Nearest neighbor constraints..." << std::endl;
-  mlpack::Log::Info << "Building tree with data..." << std::endl;
-
-  if (knns_ == 0) {
-     allknn_.Init(&data, leaf_size_, MAX_KNNS); 
-  } else {
-    allknn_.Init(&data, leaf_size_, knns_); 
-  }
-
-  mlpack::Log::Info << "Tree built ..." << std::endl;
-  mlpack::Log::Info << "Computing neighborhoods ..." << std::endl
-
-  arma::Col<size_t> from_tree_neighbors;
-  arma::vec from_tree_distances;
-  allknn_.ComputeNeighbors(from_tree_neighbors,
-                           from_tree_distances);
-
-  mlpack::Log::Info << "Neighborhoods computed..." << std::endl;
-
-  if (knns_ == 0) { // automatically select k
-    mlpack::Log::Info << "Auto-tuning the knn..." << std::endl;
-    MaxVarianceUtils::EstimateKnns(from_tree_neighbors,
-                                   from_tree_distances,
-                                   MAX_KNNS, 
-                                   data.n_cols,
-                                   data.n_rows,
-                                   knns_); 
-
-    mlpack::Log::Info << "Optimum knns is " << knns_ << std::endl;
-    mlpack::CLI::GetParam<int>("optfun/optimum_knns") = knns_;
-    MaxVarianceUtils::ConsolidateNeighbors(from_tree_neighbors,
-                                           from_tree_distances,
-                                           MAX_KNNS,
-                                           knns_,
-                                           nearest_neighbor_pairs_,
-                                           nearest_distances_,
-                                           num_of_nearest_pairs_);
-  } else { 
-    mlpack::Log::Info << "Consolidating neighbors..." << std::endl;
-    MaxVarianceUtils::ConsolidateNeighbors(from_tree_neighbors,
-                                           from_tree_distances,
-                                           knns_,
-                                           knns_,
-                                           nearest_neighbor_pairs_,
-                                           nearest_distances_,
-                                           num_of_nearest_pairs_);
-  }
-
-  // is there a better way to do this? (take dot product of distances)
-  // this variable isn't even used before it's reset again!
-//  sum_of_nearest_distances_ = 0;
-//  for(int i = 0; i < nearest_distances_.size(); i++)
-//    sum_of_nearest_distances_ += std::pow(nearest_distances_[i], 2.0);
-//  sum_of_nearest_distances_ = std::pow(sum_of_nearest_distances_, 0.5);
-
-  mlpack::CLI::GetParam<int>("optfun/num_of_constraints") = num_of_nearest_pairs_;
-
-  eq_lagrange_mult_.ones(num_of_nearest_pairs_);
-  
-  mlpack::Log::Info <<("Furthest neighbor constraints...\n");
-  mlpack::Log::Info <<("Building tree with data...\n");
-
-  allkfn_.Init(&data, leaf_size_, 1); 
-
-  mlpack::Log::Info <<("Tree built...\n");
-  mlpack::Log::Info <<("Computing furthest neighborhoods...\n");
-
-  // mem:: madness?
-//  from_tree_neighbors.Renew();
-//  from_tree_distances.Renew();
-  allkfn_.ComputeNeighbors(from_tree_neighbors,
-                           from_tree_distances);
-
-  mlpack::Log::Info << "Furthest neighbors computed..." << std::endl;
-  mlpack::Log::Info << "Consolidating neighbors..." << std::endl;
-
-  MaxVarianceUtils::ConsolidateNeighbors(from_tree_neighbors,
-                                         from_tree_distances,
-                                         1,
-                                         1,
-                                         furthest_neighbor_pairs_,
-                                         furthest_distances_,
-                                         num_of_furthest_pairs_);
-
-  double max_nearest_distance = 0;
-  for(size_t i = 0; i < num_of_nearest_pairs_; i++) {
-    max_nearest_distance = std::max(nearest_distances_[i], max_nearest_distance);
-  }
-  sum_of_furthest_distances_ = -(max_nearest_distance * data.n_cols * num_of_furthest_pairs_);
- 
-  mlpack::Log::Info << "Lower bound for optimization: " << 
-    sum_of_furthest_distances_ << std::endl;
-  mlpack::CLI::GetParam<double>("optfun/lower_optimal_bound") = sum_of_furthest_distances_;
-}
-
-void MaxFurthestNeighbors::Init() {
-  new_dimension_ = mlpack::CLI::GetParam<int>("optfun/new_dimension");
-
-  infeasibility1_ = DBL_MAX;
-  previous_infeasibility1_ = DBL_MAX;
-  desired_feasibility_error_ = mlpack::CLI::GetParam<double>("optfun/desired_feasibility_error");
-  grad_tolerance_ = mlpack::CLI::GetParam<double>("optfun/grad_tolerance");
-  infeasibility_tolerance_=  mlpack::CLI::GetParam<double>("optfun/infeasibility_tolerance");
-
-  std::string nearest_neighbor_file = 
-    mlpack::CLI::GetParam<std::string>("optfun/nearest_neighbor_file");
-  std::string furthest_neighbor_file = 
-    mlpack::CLI::GetParam<std::string>("optfun/furthest_neighbor_file");
-
-  // need to use data::Load()
-  FILE *fp = fopen(nearest_neighbor_file.c_str(), "r");
-  if (fp == NULL) {
-    mlpack::Log::Fatal << "Error while opening " << 
-      nearest_neighbor_file.c_str() << "..." << 
-      strerror(errno) << std::endl;
-  }
-  num_of_points_ = 0;
-  while(!feof(fp)) {
-    size_t n1, n2;
-    double distance;
-    fscanf(fp, "%i %i %lg", &n1, &n2, &distance);
-    nearest_neighbor_pairs_.push_back(std::make_pair(n1, n2));
-    nearest_distances_.push_back(distance);
-    if (n1 > num_of_points_) {
-      num_of_points_=n1;
-    }
-    if (n2 > num_of_points_) {
-      num_of_points_=n2;
-    }
-  }
-  num_of_points_++;
-  num_of_nearest_pairs_ = nearest_neighbor_pairs_.size();
-  // this variable is not used before it is set again!
-//  sum_of_nearest_distances_ = std::pow(dot(nearest_distances_[0],
-//      nearest_distances_[0]), 0.5); // use norm()?
-
-  fclose(fp);
-  fp = fopen(furthest_neighbor_file.c_str(), "r");
-  if (fp == NULL) {
-    mlpack::Log::Fatal << "Error while opening " << 
-      furthest_neighbor_file.c_str() << "..." << 
-      strerror(errno) << std::endl;
-  }
-  
-  while(!feof(fp)) {
-    size_t n1, n2;
-    double distance;
-    fscanf(fp, "%i %i %lg", &n1, &n2, &distance);
-    furthest_neighbor_pairs_.push_back(std::make_pair(n1, n2));
-    furthest_distances_.push_back(distance);
-  }
-  fclose(fp);
-  num_of_furthest_pairs_ = furthest_neighbor_pairs_.size();
-
-  eq_lagrange_mult_.ones(num_of_nearest_pairs_);
-  double max_nearest_distance = 0;
-  for(size_t i = 0; i < num_of_nearest_pairs_; i++) {
-    max_nearest_distance = std::max(nearest_distances_[i], max_nearest_distance);
-  }
-  sum_of_furthest_distances_ = -(max_nearest_distance * num_of_points_ * num_of_points_);
- 
-  mlpack::Log::Info << "Lower bound for optimization: " << 
-    sum_of_furthest_distances_ << std::endl;
-  mlpack::CLI::GetParam<int>("optfun/num_of_constraints") = num_of_nearest_pairs_;
-  mlpack::CLI::GetParam<double>("optfun/lower_optimal_bound") = sum_of_furthest_distances_;
-}
-
-void MaxFurthestNeighbors::Destruct() {
-  allknn_.Destruct(); // likely not necessary
-  allkfn_.Destruct(); // likely not necessary
-}
-
-void MaxFurthestNeighbors::ComputeGradient(const arma::mat& coordinates, arma::mat& gradient) {
-  gradient.zeros();
-  // objective
-  for(size_t i = 0; i < num_of_furthest_pairs_; i++) {
-    arma::vec a_i_r(coordinates.n_rows);
-    size_t n1 = furthest_neighbor_pairs_[i].first;
-    size_t n2 = furthest_neighbor_pairs_[i].second;
-    
-    a_i_r = coordinates.unsafe_col(n2) - coordinates.unsafe_col(n1);
-
-    gradient.col(n1) -= a_i_r;
-    gradient.col(n2) += a_i_r;
-  }
-  // equality constraints
-  for(size_t i = 0; i < num_of_nearest_pairs_; i++) {
-    arma::vec a_i_r(coordinates.n_rows);
-
-    size_t n1 = nearest_neighbor_pairs_[i].first;
-    size_t n2 = nearest_neighbor_pairs_[i].second;
-    arma::vec point1 = coordinates.unsafe_col(n1);
-    arma::vec point2 = coordinates.unsafe_col(n2);
-    double dist_diff = la::DistanceSqEuclidean(point1, point2) -
-        nearest_distances_[i];
-
-    a_i_r = point2 - point1;
-
-    double scale = (dist_diff * sigma_ - eq_lagrange_mult_[i]);
-    gradient.col(n1) += scale * a_i_r;
-    gradient.col(n2) -= scale * a_i_r;
-  }
-}
-
-void MaxFurthestNeighbors::ComputeObjective(const arma::mat& coordinates, double& objective) {
-  objective = 0;
-  
-  for(size_t i = 0; i < num_of_furthest_pairs_; i++) {
-    size_t n1 = furthest_neighbor_pairs_[i].first;
-    size_t n2 = furthest_neighbor_pairs_[i].second;
-    
-    objective -= la::DistanceSqEuclidean(coordinates.unsafe_col(n1),
-        coordinates.unsafe_col(n2));
-  }
-}
-
-void MaxFurthestNeighbors::ComputeFeasibilityError(const arma::mat& coordinates, double& error) {
-  error = 0;
-
-  for(size_t i = 0; i < num_of_nearest_pairs_; i++) {
-    size_t n1 = nearest_neighbor_pairs_[i].first;
-    size_t n2 = nearest_neighbor_pairs_[i].second;
-    
-    double dist_diff = la::DistanceSqEuclidean(coordinates.unsafe_col(n1),
-        coordinates.unsafe_col(n2)) - nearest_distances_[i];
-
-    error += std::pow(dist_diff, 2);
-  }
-
-  error= 100 * (std::pow(error, 0.5) / sum_of_nearest_distances_);
-}
-
-double MaxFurthestNeighbors::ComputeLagrangian(const arma::mat& coordinates) {
-  double lagrangian = 0;
-  ComputeObjective(coordinates, lagrangian);
-
-  for(size_t i = 0; i < num_of_nearest_pairs_; i++) {
-    size_t n1 = nearest_neighbor_pairs_[i].first;
-    size_t n2 = nearest_neighbor_pairs_[i].second;
-    
-    double dist_diff = la::DistanceSqEuclidean(coordinates.unsafe_col(n1),
-        coordinates.unsafe_col(n2)) - nearest_distances_[i];
-
-    lagrangian += (std::pow(dist_diff, 2) * sigma_ / 2) - (eq_lagrange_mult_[i] *
-        dist_diff);
-  }
-
-  return lagrangian;
-}
-
-void MaxFurthestNeighbors::UpdateLagrangeMult(const arma::mat& coordinates) {
-  for(size_t i = 0; i < num_of_nearest_pairs_; i++) {
-    size_t n1 = nearest_neighbor_pairs_[i].first;
-    size_t n2 = nearest_neighbor_pairs_[i].second;
-    
-    double dist_diff =la::DistanceSqEuclidean(coordinates.unsafe_col(n1),
-        coordinates.unsafe_col(n2)) - nearest_distances_[i];
-
-    eq_lagrange_mult_[i] -= sigma_ * dist_diff;
-  }
-}
-
-void MaxFurthestNeighbors::set_sigma(double sigma) {
-  sigma_ = sigma;
-}
-
-void MaxFurthestNeighbors::set_lagrange_mult(double val) {
-  eq_lagrange_mult_.fill(val);
-}
-
-bool MaxFurthestNeighbors::IsDiverging(double objective) {
-  if (objective < sum_of_furthest_distances_) {
-    mlpack::Log::Info << "objective(" << objective 
-      << ") < sum_of_furthest_distances (" << sum_of_furthest_distances_ 
-      << ")" << std::endl;
-    return true;
-  } else {
-    return false;
-  }
-}
-
-void MaxFurthestNeighbors::Project(arma::mat& coordinates) {
-  OptUtils::RemoveMean(coordinates);
-}
-
-size_t MaxFurthestNeighbors::num_of_points() {
-  return num_of_points_;
-}
-
-void MaxFurthestNeighbors::GiveInitMatrix(arma::mat& init_data) {
-  init_data.randu(new_dimension_, num_of_points_);
-}
-
-bool MaxFurthestNeighbors::IsOptimizationOver(arma::mat& coordinates, 
-      arma::mat& gradient, double step) {
-  ComputeFeasibilityError(coordinates, infeasibility1_);
-  if (infeasibility1_ < desired_feasibility_error_ || 
-      fabs(infeasibility1_ - previous_infeasibility1_) < infeasibility_tolerance_)  {
-    mlpack::Log::Info << "Optimization is over" << std::endl;
-    return true;
-  } else {
-    previous_infeasibility1_ = infeasibility1_;
-    return false; 
-  }
-}
-
-bool MaxFurthestNeighbors::IsIntermediateStepOver(arma::mat& coordinates, 
-      arma::mat& gradient, double step) {
-  double norm_gradient = std::pow(dot(gradient, gradient), 0.5);
-  double feasibility_error;
-  ComputeFeasibilityError(coordinates, feasibility_error);
-  if (norm_gradient * step < grad_tolerance_ 
-      || feasibility_error < desired_feasibility_error_) {
-    return true;
-  }
-  return false;
-}
-
-
-///////////////////////////////////////////////////////////////
-void MaxVarianceUtils::ConsolidateNeighbors(const arma::Col<size_t>& from_tree_ind,
-                                            const arma::vec&  from_tree_dist,
-                                            size_t num_of_neighbors,
-                                            size_t chosen_neighbors,
-                                            std::vector<std::pair<size_t, size_t> >& neighbor_pairs,
-                                            std::vector<double>& distances,
-                                            size_t& num_of_pairs) {
-  
-  num_of_pairs = 0;
-  size_t num_of_points = from_tree_ind.n_elem / num_of_neighbors;
-
-  // just in case they aren't clear already
-  neighbor_pairs.clear();
-  distances.clear();
-
-  bool skip = false;
-  for(size_t i = 0; i < num_of_points; i++) {
-    for(size_t k = 0; k < chosen_neighbors; k++) {  
-      size_t n1 = i; // neighbor 1
-      size_t n2 = from_tree_ind[(i * num_of_neighbors) + k]; // neighbor 2
-      if(n1 > n2) {
-        for(size_t n = 0; n < chosen_neighbors; n++) {
-          if(from_tree_ind[(n2 * num_of_neighbors) + n] == n1) {
-            skip = true;
-            break;
-          }
-        }
-      }  
-      if(!skip) {
-        num_of_pairs += 1;
-        neighbor_pairs.push_back(std::make_pair(n1, n2));
-        distances.push_back(from_tree_dist[(i * num_of_neighbors) + k]);
-      }
-      skip = false;
-    }
-  }
-}
-
-void MaxVarianceUtils::EstimateKnns(const arma::Col<size_t>& nearest_neighbors,
-                                    const arma::vec& nearest_distances,
-                                    size_t maximum_knns,
-                                    size_t num_of_points,
-                                    size_t dimension,
-                                    size_t& optimum_knns) {
-  double max_loocv_score = -DBL_MAX;
-  double loocv_score = 0;
-  // double unit_sphere_volume = math::SphereVolume(1.0, dimension);
-
-  optimum_knns = 0;
-
-  for(size_t k = 2; k < maximum_knns; k++) {
-    loocv_score = 0.0;
-    double mean_band = 0.0;
-    for(size_t i = 0; i < num_of_points; i++) {
-      double scale_factor = std::pow(nearest_distances[(i * maximum_knns) + k], (double) dimension / 2.0);
-      double probability = 0;
-
-      for(size_t j = 0; j < k; j++) {
-        probability += exp(-nearest_distances[(i * maximum_knns) + j] /
-            (2 * std::pow(nearest_distances[(i * maximum_knns) + k], 0.5))) / scale_factor;
-      }
-
-      loocv_score += log(probability);
-      mean_band += nearest_distances[(i * maximum_knns) + k];
-    }
-    mlpack::Log::Info << "Knn=%i," << k
-      << "mean_band=%lg," << mean_band/num_of_points 
-      << "score=%lg," << loocv_score
-      << "dimension=%i" << dimension << std::endl;
-    if (loocv_score > max_loocv_score) {
-      max_loocv_score = loocv_score;
-      optimum_knns = k;
-    }
-  }
-}




More information about the mlpack-svn mailing list