[mlpack-svn] r10458 - in mlpack/trunk/src/mlpack: core/io core/kernels core/math core/tree core/utilities tests

fastlab-svn at coffeetalk-1.cc.gatech.edu fastlab-svn at coffeetalk-1.cc.gatech.edu
Tue Nov 29 20:32:27 EST 2011


Author: nslagle
Date: 2011-11-29 20:32:27 -0500 (Tue, 29 Nov 2011)
New Revision: 10458

Modified:
   mlpack/trunk/src/mlpack/core/io/log.cpp
   mlpack/trunk/src/mlpack/core/kernels/gaussian_kernel.hpp
   mlpack/trunk/src/mlpack/core/math/range.hpp
   mlpack/trunk/src/mlpack/core/tree/binary_space_tree.hpp
   mlpack/trunk/src/mlpack/core/tree/binary_space_tree_impl.hpp
   mlpack/trunk/src/mlpack/core/tree/hrectbound.hpp
   mlpack/trunk/src/mlpack/core/tree/hrectbound_impl.hpp
   mlpack/trunk/src/mlpack/core/tree/periodichrectbound_impl.hpp
   mlpack/trunk/src/mlpack/core/utilities/save_restore_utility.cpp
   mlpack/trunk/src/mlpack/core/utilities/save_restore_utility.hpp
   mlpack/trunk/src/mlpack/core/utilities/save_restore_utility_impl.hpp
   mlpack/trunk/src/mlpack/tests/save_restore_utility_test.cpp
   mlpack/trunk/src/mlpack/tests/tree_test.cpp
Log:
mlpack/core: apply some changes to HRect, Log, and BinarySpaceTree

Modified: mlpack/trunk/src/mlpack/core/io/log.cpp
===================================================================
--- mlpack/trunk/src/mlpack/core/io/log.cpp	2011-11-29 22:38:10 UTC (rev 10457)
+++ mlpack/trunk/src/mlpack/core/io/log.cpp	2011-11-30 01:32:27 UTC (rev 10458)
@@ -4,7 +4,8 @@
  *
  * Implementation of the Log class.
  */
-#include <iostream>
+#include <cxxabi.h>
+#include <execinfo.h>
 
 #include "log.hpp"
 
@@ -41,7 +42,71 @@
 {
   if(!condition)
   {
+    void* array[25];
+    size_t size = backtrace (array, sizeof(array)/sizeof(void*));
+    char** messages = backtrace_symbols(array, size);
+
+    // skip first stack frame (points here)
+    for (size_t i = 1; i < size && messages != NULL; ++i)
+    {
+      char *mangled_name = 0, *offset_begin = 0, *offset_end = 0;
+
+      // find parantheses and +address offset surrounding mangled name
+      for (char *p = messages[i]; *p; ++p)
+      {
+        if (*p == '(')
+        {
+          mangled_name = p;
+        }
+        else if (*p == '+')
+        {
+          offset_begin = p;
+        }
+        else if (*p == ')')
+        {
+          offset_end = p;
+          break;
+        }
+      }
+
+      // if the line could be processed, attempt to demangle the symbol
+      if (mangled_name && offset_begin && offset_end &&
+          mangled_name < offset_begin)
+      {
+        *mangled_name++ = '\0';
+        *offset_begin++ = '\0';
+        *offset_end++ = '\0';
+
+        int status;
+        char* real_name = abi::__cxa_demangle(mangled_name, 0, 0, &status);
+
+        // if demangling is successful, output the demangled function name
+        if (status == 0)
+        {
+          Log::Debug << "[bt]: (" << i << ") " << messages[i] << " : "
+                    << real_name << "+" << offset_begin << offset_end
+                    << std::endl;
+
+        }
+        // otherwise, output the mangled function name
+        else
+        {
+          Log::Debug << "[bt]: (" << i << ") " << messages[i] << " : "
+                    << mangled_name << "+" << offset_begin << offset_end
+                    << std::endl;
+        }
+        free(real_name);
+      }
+      // otherwise, print the whole line
+      else
+      {
+          Log::Debug << "[bt]: (" << i << ") " << messages[i] << std::endl;
+      }
+    }
     Log::Debug << message << std::endl;
+    free(messages);
+
+    //backtrace_symbols_fd (array, size, 2);
     exit(1);
   }
 }

Modified: mlpack/trunk/src/mlpack/core/kernels/gaussian_kernel.hpp
===================================================================
--- mlpack/trunk/src/mlpack/core/kernels/gaussian_kernel.hpp	2011-11-29 22:38:10 UTC (rev 10457)
+++ mlpack/trunk/src/mlpack/core/kernels/gaussian_kernel.hpp	2011-11-30 01:32:27 UTC (rev 10458)
@@ -16,10 +16,10 @@
 
 /**
  * The standard Gaussian kernel.  Given two vectors @f$ x @f$, @f$ y @f$, and a
- * bandwidth @f$ \sigma @f$ (set in the constructor),
+ * bandwidth @f$ \bandwidth @f$ (set in the constructor),
  *
  * @f[
- * K(x, y) = \exp(-\frac{|| x - y ||^2}{2 \sigma^2}).
+ * K(x, y) = \exp(-\frac{|| x - y ||^2}{2 \bandwidth^2}).
  * @f]
  *
  * The implementation is all in the header file because it is so simple.
@@ -28,16 +28,20 @@
 {
  public:
   /**
-   * Default constructor; sets sigma to 1.
+   * Default constructor; sets bandwidth to 1.0.
    */
-  GaussianKernel() : gamma(0.5) { }
+  GaussianKernel() : bandwidth(1.0), normalizer(sqrt(2.0 * M_PI)), gamma(-0.5) { }
 
   /**
    * Construct the Gaussian kernel with a custom bandwidth.
    *
-   * @param sigma The bandwidth of the kernel.
+   * @param bandwidth The bandwidth of the kernel.
    */
-  GaussianKernel(double sigma) : gamma(-0.5 * pow(sigma, -2.0)) { }
+  GaussianKernel(double bandwidth) :
+    bandwidth(bandwidth),
+    normalizer(bandwidth * sqrt(2.0 * M_PI)),
+    gamma(-0.5 * pow(bandwidth, -2.0))
+  { }
 
   /**
    * Evaluation of the Gaussian kernel.  This could be generalized to use any
@@ -46,7 +50,7 @@
    *
    * @param a First vector.
    * @param b Second vector.
-   * @return K(a, b) using the bandwidth (@f$\sigma at f$) specified in the
+   * @return K(a, b) using the bandwidth (@f$\bandwidth at f$) specified in the
    *   constructor.
    */
   double Evaluate(const arma::vec& a, const arma::vec& b) const
@@ -55,11 +59,33 @@
     arma::vec diff = b - a;
     return exp(gamma * arma::dot(diff, diff));
   }
+  /**
+   * Evaluation of the Gaussian kernel using a double precision argument
+   *
+   * @param t double value.
+   * @return K(t) using the bandwidth (@f$\bandwidth at f$) specified in the
+   *   constructor.
+   */
+  double Evaluate(double t) const {
+    // The precalculation of gamma saves us some little computation time.
+    return exp(gamma * t * t);
+  }
 
+  const double& Normalizer() { return normalizer; }
+  const double& Bandwidth() { return bandwidth; }
+
  private:
   /**
+   * kernel bandwidth
+   * */
+  double bandwidth;
+  /**
+   * normalizing constant
+   */
+  double normalizer;
+  /**
    * Precalculated constant depending on the bandwidth; @f$ \gamma =
-   * -\frac{1}{2 \sigma^2} @f$.
+   * -\frac{1}{2 \bandwidth^2} @f$.
    */
   double gamma;
 };

Modified: mlpack/trunk/src/mlpack/core/math/range.hpp
===================================================================
--- mlpack/trunk/src/mlpack/core/math/range.hpp	2011-11-29 22:38:10 UTC (rev 10457)
+++ mlpack/trunk/src/mlpack/core/math/range.hpp	2011-11-30 01:32:27 UTC (rev 10458)
@@ -7,6 +7,8 @@
 #ifndef __MLPACK_CORE_MATH_RANGE_HPP
 #define __MLPACK_CORE_MATH_RANGE_HPP
 
+#include <vector>
+
 namespace mlpack {
 namespace math {
 
@@ -135,6 +137,7 @@
    * @param d Point to check.
    */
   bool Contains(double d) const;
+
 };
 
 }; // namespace math

Modified: mlpack/trunk/src/mlpack/core/tree/binary_space_tree.hpp
===================================================================
--- mlpack/trunk/src/mlpack/core/tree/binary_space_tree.hpp	2011-11-29 22:38:10 UTC (rev 10457)
+++ mlpack/trunk/src/mlpack/core/tree/binary_space_tree.hpp	2011-11-30 01:32:27 UTC (rev 10458)
@@ -214,6 +214,9 @@
   //! Return whether or not this node is a leaf (true if it has no children).
   bool is_leaf() const;
 
+  //! Fills the tree to the specified level
+  size_t ExtendTree(size_t level);
+
   /**
    * Gets the left child of this node.
    */
@@ -225,6 +228,17 @@
   BinarySpaceTree *right() const;
 
   /**
+   * Obtains the number of nodes in the tree, starting with this
+   * */
+  size_t TreeSize() const;
+
+  /**
+   * Obtains the number of levels below this node in the tree, starting with
+   *   this
+   * */
+  size_t TreeDepth() const;
+
+  /**
    * Gets the index of the beginning point of this subset.
    */
   size_t begin() const;
@@ -242,6 +256,18 @@
   void Print() const;
 
  private:
+  /* hidden copy constructor, available only to fill (pad) the tree to a specified level */
+  BinarySpaceTree(size_t begin, size_t count, Bound bound, Statistic stat) :
+    left_(NULL),
+    right_(NULL),
+    begin_(begin),
+    count_(count),
+    bound_(bound),
+    stat_(stat) {}
+  BinarySpaceTree* CopyMe()
+  {
+     return new BinarySpaceTree(begin_,count_,bound_,stat_);
+  }
   /**
    * Splits the current node, assigning its left and right children recursively.
    *

Modified: mlpack/trunk/src/mlpack/core/tree/binary_space_tree_impl.hpp
===================================================================
--- mlpack/trunk/src/mlpack/core/tree/binary_space_tree_impl.hpp	2011-11-29 22:38:10 UTC (rev 10457)
+++ mlpack/trunk/src/mlpack/core/tree/binary_space_tree_impl.hpp	2011-11-30 01:32:27 UTC (rev 10458)
@@ -107,7 +107,7 @@
 {
   // Hopefully the vector is initialized correctly!  We can't check that
   // entirely but we can do a minor sanity check.
-  assert(old_from_new.size() == data.n_cols);
+  Log::Assert(old_from_new.size() == data.n_cols);
 
   // Perform the actual splitting.
   SplitNode(data, old_from_new);
@@ -129,7 +129,7 @@
 {
   // Hopefully the vector is initialized correctly!  We can't check that
   // entirely but we can do a minor sanity check.
-  assert(old_from_new.size() == data.n_cols);
+  Log::Assert(old_from_new.size() == data.n_cols);
 
   // Perform the actual splitting.
   SplitNode(data, old_from_new);
@@ -188,10 +188,12 @@
     return this;
   else if (is_leaf())
     return NULL;
-  else if (begin_q < right_->begin_)
+  else if (begin_q < left_->end_)
     return left_->FindByBeginCount(begin_q, count_q);
+  else if (right_)
+    return right_->FindByBeginCount(begin_q, count_q);
   else
-    return right_->FindByBeginCount(begin_q, count_q);
+    return NULL;
 }
 
 /**
@@ -216,13 +218,92 @@
     return this;
   else if (is_leaf())
     return NULL;
-  else if (begin_q < right_->begin_)
+  else if (begin_q < left_->end_)
     return left_->FindByBeginCount(begin_q, count_q);
+  else if (right_)
+    return right_->FindByBeginCount(begin_q, count_q);
   else
-    return right_->FindByBeginCount(begin_q, count_q);
+    return NULL;
 }
 
 template<typename Bound, typename Statistic>
+size_t BinarySpaceTree<Bound, Statistic>::ExtendTree(size_t level)
+{
+  --level;
+  /* return the number of nodes duplicated */
+  size_t nodesDuplicated = 0;
+  if (level > 0)
+  {
+    if (!left_)
+    {
+      left_ = CopyMe();
+      ++nodesDuplicated;
+    }
+    nodesDuplicated += left_->ExtendTree(level);
+    if (right_)
+    {
+      nodesDuplicated += right_->ExtendTree(level);
+    }
+  }
+  return nodesDuplicated;
+}
+
+/* TODO: we can likely calculate this earlier, then store the
+ *   result in a private member variable; for now, we can
+ *   just calculate as needed...
+ *
+ *   Also, perhaps we should rewrite these recursive functions
+ *     to avoid exceeding the stack limit
+ */
+
+template<typename Bound, typename Statistic>
+size_t BinarySpaceTree<Bound, Statistic>::TreeSize() const
+{
+  size_t size = 1;
+  if (is_leaf())
+  {
+    return size;
+  }
+  else
+  {
+    size += this->left()->TreeSize();
+    if (right_)
+    {
+      size += this->right()->TreeSize();
+    }
+  }
+  return size;
+}
+
+template<typename Bound, typename Statistic>
+size_t BinarySpaceTree<Bound, Statistic>::TreeDepth() const
+{
+  size_t levels = 1;
+  if (is_leaf())
+  {
+    return levels;
+  }
+  else
+  {
+    size_t rightLevels = 0;
+    if (right_)
+    {
+      rightLevels = right_->TreeDepth();
+    }
+    size_t leftLevels = left_->TreeDepth();
+    if (leftLevels > rightLevels)
+    {
+      levels += leftLevels;
+    }
+    else
+    {
+      levels += rightLevels;
+    }
+  }
+  return levels;
+}
+
+template<typename Bound, typename Statistic>
 inline const Bound& BinarySpaceTree<Bound, Statistic>::bound() const
 {
   return bound_;
@@ -300,14 +381,16 @@
 }
 
 template<typename Bound, typename Statistic>
-void BinarySpaceTree<Bound, Statistic>::Print() const
-{
-  printf("node: %d to %d: %d points total\n", begin_, begin_ + count_ - 1,
-      count_);
+void BinarySpaceTree<Bound, Statistic>::Print() const {
+  printf("node: %d to %d: %d points total\n",
+      begin_, begin_ + count_ - 1, count_);
   if (!is_leaf())
   {
     left_->Print();
-    right_->Print();
+    if (right_)
+    {
+      right_->Print();
+    }
   }
 }
 
@@ -339,18 +422,16 @@
     }
   }
 
-  // Split in the middle of that dimension.
-  double split_val = bound_[split_dim].mid();
-
   if (max_width == 0) // All these points are the same.  We can't split.
     return;
 
+  double split_value = bound_[split_dim].mid();
+
   // Perform the actual splitting.  This will order the dataset such that points
-  // with value in dimension split_dim less than or equal to split_val are on
+  // with value in dimension split_dim less than or equal to split_value are on
   // the left of split_col, and points with value in dimension split_dim greater
-  // than split_val are on the right side of split_col.
-  size_t split_col = GetSplitIndex(data, split_dim, split_val);
-
+  // than split_value are on the right side of split_col.
+  size_t split_col = GetSplitIndex(data, split_dim, split_value);
   // Now that we know the split column, we will recursively split the children
   // by calling their constructors (which perform this splitting process).
   left_ = new BinarySpaceTree<Bound, Statistic>(data, begin_,
@@ -375,7 +456,7 @@
 
   // Figure out which dimension to split on.
   size_t split_dim = data.n_rows; // Indicate invalid by max_dim + 1.
-  double max_width = -1;
+  double max_width = -DBL_MAX;
 
   // Find the split dimension.
   for (size_t d = 0; d < data.n_rows; d++)
@@ -389,17 +470,19 @@
     }
   }
 
-  // Split in the middle of that dimension.
-  double split_val = bound_[split_dim].mid();
-
   if (max_width == 0) // All these points are the same.  We can't split.
     return;
 
+  // Split in the middle of that dimension.
+  size_t split_col = -1;
+  double split_value = -DBL_MAX;
+
+  split_value = bound_[split_dim].mid();
   // Perform the actual splitting.  This will order the dataset such that points
-  // with value in dimension split_dim less than or equal to split_val are on
+  // with value in dimension split_dim less than or equal to split_value are on
   // the left of split_col, and points with value in dimension split_dim greater
-  // than split_val are on the right side of split_col.
-  size_t split_col = GetSplitIndex(data, split_dim, split_val, old_from_new);
+  // than split_value are on the right side of split_col.
+  split_col = GetSplitIndex(data, split_dim, split_value, old_from_new);
 
   // Now that we know the split column, we will recursively split the children
   // by calling their constructors (which perform this splitting process).
@@ -448,7 +531,7 @@
       right--;
   }
 
-  assert(left == right + 1);
+  Log::Assert(left == right + 1);
 
   return left;
 }
@@ -498,7 +581,7 @@
       right--;
   }
 
-  assert(left == right + 1);
+  Log::Assert(left == right + 1);
 
   return left;
 }

Modified: mlpack/trunk/src/mlpack/core/tree/hrectbound.hpp
===================================================================
--- mlpack/trunk/src/mlpack/core/tree/hrectbound.hpp	2011-11-29 22:38:10 UTC (rev 10457)
+++ mlpack/trunk/src/mlpack/core/tree/hrectbound.hpp	2011-11-30 01:32:27 UTC (rev 10458)
@@ -74,6 +74,11 @@
    * Calculates minimum bound-to-point squared distance.
    */
   double MinDistance(const arma::vec& point) const;
+  /**
+   * Calculates minimum bound-to-point squared distance.
+   */
+  double MinDistance(const arma::vec& point,
+                     const std::vector<size_t>& indices) const;
 
   /**
    * Calculates minimum bound-to-bound squared distance.
@@ -81,16 +86,33 @@
    * Example: bound1.MinDistanceSq(other) for minimum squared distance.
    */
   double MinDistance(const HRectBound& other) const;
+  /**
+   * Calculates minimum bound-to-bound squared distance.
+   *
+   * Example: bound1.MinDistanceSq(other) for minimum squared distance.
+   */
+  double MinDistance(const HRectBound& other,
+                     const std::vector<size_t>& indices) const;
 
   /**
    * Calculates maximum bound-to-point squared distance.
    */
   double MaxDistance(const arma::vec& point) const;
+  /**
+   * Computes maximum distance.
+   */
+  double MaxDistance(const arma::vec& point,
+                     const std::vector<size_t>& indices) const;
 
   /**
    * Computes maximum distance.
    */
   double MaxDistance(const HRectBound& other) const;
+  /**
+   * Computes maximum distance.
+   */
+  double MaxDistance(const HRectBound& other,
+                     const std::vector<size_t>& indices) const;
 
   /**
    * Calculates minimum and maximum bound-to-bound squared distance.

Modified: mlpack/trunk/src/mlpack/core/tree/hrectbound_impl.hpp
===================================================================
--- mlpack/trunk/src/mlpack/core/tree/hrectbound_impl.hpp	2011-11-29 22:38:10 UTC (rev 10457)
+++ mlpack/trunk/src/mlpack/core/tree/hrectbound_impl.hpp	2011-11-30 01:32:27 UTC (rev 10458)
@@ -130,24 +130,20 @@
 template<int t_pow>
 double HRectBound<t_pow>::MinDistance(const arma::vec& point) const
 {
-  assert(point.n_elem == dim_);
+  Log::Assert(point.n_elem == dim_);
 
   double sum = 0;
-  const math::Range* mbound = bounds_;
 
   double lower, higher;
-  for (size_t d = 0; d < dim_; d++)
+  for (size_t dimension = 0; dimension < dim_; dimension++)
   {
-    lower = mbound->lo - point[d]; // negative if point[d] > bounds_[d]
-    higher = point[d] - mbound->hi; // negative if point[d] < bounds_[d]
+    lower = bounds_[dimension].lo - point[dimension];
+    higher = point[dimension] - bounds_[dimension].hi;
 
     // since only one of 'lower' or 'higher' is negative, if we add each's
     // absolute value to itself and then sum those two, our result is the
     // nonnegative half of the equation times two; then we raise to power t_pow
     sum += pow((lower + fabs(lower)) + (higher + fabs(higher)), (double) t_pow);
-
-    // move bound pointer
-    mbound++;
   }
 
   // now take the t_pow'th root (but make sure our result is squared); then
@@ -157,6 +153,36 @@
 }
 
 /**
+ * Calculates minimum bound-to-point squared distance, filtered
+ *   by indices.
+ */
+template<int t_pow>
+double HRectBound<t_pow>::MinDistance(const arma::vec& point,
+                                      const std::vector<size_t>& indices) const
+{
+  Log::Assert(point.n_elem == dim_);
+
+  double sum = 0.0;
+
+  double lower, higher;
+  for (size_t index = 0; index < indices.size(); index++)
+  {
+    size_t dimension = indices[index];
+    lower = bounds_[dimension].lo - point[dimension];
+    higher = point[dimension] - bounds_[dimension].hi;
+
+    // since at least one of 'lower' or 'higher' is negative, if we add each's
+    // absolute value to itself and then sum those two, our result is the
+    // nonnegative half of the equation times two; then we raise to power t_pow
+    sum += pow((lower + fabs(lower)) + (higher + fabs(higher)), (double) t_pow);
+  }
+
+  // now take the t_pow'th root (but make sure our result is squared); then
+  // divide by four to cancel out the constant of 2 (which has been squared now)
+  // that was introduced earlier
+  return pow(sum, 2.0 / (double) t_pow) / 4.0;
+}
+/**
  * Calculates minimum bound-to-bound squared distance.
  *
  * Example: bound1.MinDistanceSq(other) for minimum squared distance.
@@ -164,25 +190,51 @@
 template<int t_pow>
 double HRectBound<t_pow>::MinDistance(const HRectBound& other) const
 {
-  assert(dim_ == other.dim_);
+  Log::Assert(dim_ == other.dim_);
 
-  double sum = 0;
-  const math::Range* mbound = bounds_;
-  const math::Range* obound = other.bounds_;
+  double sum = 0.0;
+  double lower = 0.0;
+  double higher = 0.0;
 
-  double lower, higher;
-  for (size_t d = 0; d < dim_; d++)
+  for(size_t dimension = 0; dimension < dim_; dimension++)
   {
-    lower = obound->lo - mbound->hi;
-    higher = mbound->lo - obound->hi;
-    // We invoke the following:
-    //   x + fabs(x) = max(x * 2, 0)
-    //   (x * 2)^2 / 4 = x^2
+    lower = bounds_[dimension].lo - other.bounds_[dimension].hi;
+    higher = other.bounds_[dimension].lo - bounds_[dimension].hi;
+
+    // since at least one of 'lower' or 'higher' is negative, if we add each's
+    // absolute value to itself and then sum those two, our result is the
+    // nonnegative half of the equation times two; then we raise to power t_pow
     sum += pow((lower + fabs(lower)) + (higher + fabs(higher)), (double) t_pow);
+  }
 
-    // move bound pointers
-    mbound++;
-    obound++;
+  return pow(sum, 2.0 / (double) t_pow) / 4.0;
+}
+/**
+ * Calculates minimum bound-to-bound squared distance,
+ *   filtered by indices.
+ *
+ * Example: bound1.MinDistanceSq(other, indices) for minimum squared distance.
+ */
+template<int t_pow>
+double HRectBound<t_pow>::MinDistance(const HRectBound& other,
+                                      const std::vector<size_t>& indices) const
+{
+  Log::Assert(dim_ == other.dim_);
+
+  double sum = 0.0;
+  double lower = 0.0;
+  double higher = 0.0;
+
+  for(size_t index = 0; index < indices.size(); index++)
+  {
+    size_t dimension = indices[index];
+    lower = bounds_[dimension].lo - other.bounds_[dimension].hi;
+    higher = other.bounds_[dimension].lo - bounds_[dimension].hi;
+
+    // since only one of 'lower' or 'higher' is negative, if we add each's
+    // absolute value to itself and then sum those two, our result is the
+    // nonnegative half of the equation times two; then we raise to power t_pow
+    sum += pow((lower + fabs(lower)) + (higher + fabs(higher)), (double) t_pow);
   }
 
   return pow(sum, 2.0 / (double) t_pow) / 4.0;
@@ -194,42 +246,93 @@
 template<int t_pow>
 double HRectBound<t_pow>::MaxDistance(const arma::vec& point) const
 {
-  double sum = 0;
+  double sum = 0.0;
+  double lower, higher;
 
-  assert(point.n_elem == dim_);
+  Log::Assert(point.n_elem == dim_);
 
-  for (size_t d = 0; d < dim_; d++)
+  for (size_t dimension = 0; dimension < dim_; dimension++)
   {
-    double v = fabs(std::max(point[d] - bounds_[d].lo,
-        bounds_[d].hi - point[d]));
-    sum += pow(v, (double) t_pow);
+    lower = fabs(point[dimension] - bounds_[dimension].lo);
+    higher = fabs(point[dimension] - bounds_[dimension].hi);
+
+    sum += pow(fabs(higher-lower) + higher + lower, (double) t_pow);
   }
 
-  return pow(sum, 2.0 / (double) t_pow);
+  return pow(sum, 2.0 / (double) t_pow) / 4.0;
 }
+/**
+ * Calculates maximum bound-to-point squared distance,
+ *   filtered by indices.
+ */
+template<int t_pow>
+double HRectBound<t_pow>::MaxDistance(const arma::vec& point,
+                                      const std::vector<size_t>& indices) const
+{
+  double sum = 0.0;
+  double lower, higher;
 
+  Log::Assert(point.n_elem == dim_);
+
+  for (size_t index = 0; index < indices.size(); index++)
+  {
+    size_t dimension = indices[index];
+    lower = fabs(point[dimension] - bounds_[dimension].lo);
+    higher = fabs(point[dimension] - bounds_[dimension].hi);
+
+    sum += pow(fabs(higher-lower) + higher + lower, (double) t_pow);
+  }
+
+  return pow(sum, 2.0 / (double) t_pow) / 4.0;
+}
+
 /**
  * Computes maximum distance.
  */
 template<int t_pow>
 double HRectBound<t_pow>::MaxDistance(const HRectBound& other) const
 {
-  double sum = 0;
+  double sum = 0.0;
+  double lower, higher;
 
-  assert(dim_ == other.dim_);
+  Log::Assert(other.dim_ == dim_);
 
-  double v;
-  for (size_t d = 0; d < dim_; d++)
+  for (size_t dimension = 0; dimension < dim_; dimension++)
   {
-    v = fabs(std::max(other.bounds_[d].hi - bounds_[d].lo,
-        bounds_[d].hi - other.bounds_[d].lo));
-    sum += pow(v, (double) t_pow); // v is non-negative.
+    lower = fabs(other.bounds_[dimension].hi - bounds_[dimension].lo);
+    higher = fabs(other.bounds_[dimension].lo - bounds_[dimension].hi);
+
+    sum += pow(fabs(higher-lower) + higher + lower, (double) t_pow);
   }
 
-  return pow(sum, 2.0 / (double) t_pow);
+  return pow(sum, 2.0 / (double) t_pow) / 4.0;
 }
 
 /**
+ * Computes the maximum distance between blocks,
+ *   filtered by indices.
+ */
+template<int t_pow>
+double HRectBound<t_pow>::MaxDistance(const HRectBound& other,
+                                      const std::vector<size_t>& indices) const
+{
+  double sum = 0.0;
+  double lower, higher;
+
+  Log::Assert(other.dim_ == dim_);
+
+  for (size_t index = 0; index < indices.size(); index++)
+  {
+    size_t dimension = indices[index];
+    lower = fabs(other.bounds_[dimension].hi - bounds_[dimension].lo);
+    higher = fabs(other.bounds_[dimension].lo - bounds_[dimension].hi);
+
+    sum += pow(fabs(higher-lower) + higher + lower, (double) t_pow);
+  }
+
+  return pow(sum, 2.0 / (double) t_pow) / 4.0;
+}
+/**
  * Calculates minimum and maximum bound-to-bound squared distance.
  */
 template<int t_pow>
@@ -238,7 +341,7 @@
   double sum_lo = 0;
   double sum_hi = 0;
 
-  assert(dim_ == other.dim_);
+  Log::Assert(dim_ == other.dim_);
 
   double v1, v2, v_lo, v_hi;
   for (size_t d = 0; d < dim_; d++)
@@ -274,7 +377,7 @@
   double sum_lo = 0;
   double sum_hi = 0;
 
-  assert(point.n_elem == dim_);
+  Log::Assert(point.n_elem == dim_);
 
   double v1, v2, v_lo, v_hi;
   for(size_t d = 0; d < dim_; d++)
@@ -329,7 +432,7 @@
 template<int t_pow>
 HRectBound<t_pow>& HRectBound<t_pow>::operator|=(const HRectBound& other)
 {
-  assert(other.dim_ == dim_);
+  Log::Assert(other.dim_ == dim_);
 
   for (size_t i = 0; i < dim_; i++)
     bounds_[i] |= other.bounds_[i];

Modified: mlpack/trunk/src/mlpack/core/tree/periodichrectbound_impl.hpp
===================================================================
--- mlpack/trunk/src/mlpack/core/tree/periodichrectbound_impl.hpp	2011-11-29 22:38:10 UTC (rev 10457)
+++ mlpack/trunk/src/mlpack/core/tree/periodichrectbound_impl.hpp	2011-11-30 01:32:27 UTC (rev 10458)
@@ -155,7 +155,7 @@
 {
   double sum = 0;
 
-  mlpack::Log::Assert(dim_ == other.dim_);
+  Log::Assert(dim_ == other.dim_);
 
   for (size_t d = 0; d < dim_; d++){
     double v = 0, d1, d2, d3;
@@ -225,7 +225,7 @@
 {
   double sum = 0;
 
-  mlpack::Log::Assert(dim_ == other.dim_);
+  Log::Assert(dim_ == other.dim_);
 
   for (size_t d = 0; d < dim_; d++)
   {
@@ -254,7 +254,7 @@
   double sum_lo = 0;
   double sum_hi = 0;
 
-  mlpack::Log::Assert(point.n_elem == dim_);
+  Log::Assert(point.n_elem == dim_);
 
   double v1, v2, v_lo, v_hi;
   for (size_t d = 0; d < dim_; d++)
@@ -291,7 +291,7 @@
   double sum_lo = 0;
   double sum_hi = 0;
 
-  mlpack::Log::Assert(dim_ == other.dim_);
+  Log::Assert(dim_ == other.dim_);
 
   double v1, v2, v_lo, v_hi;
   for (size_t d = 0; d < dim_; d++)
@@ -325,7 +325,7 @@
 PeriodicHRectBound<t_pow>& PeriodicHRectBound<t_pow>::operator|=(
     const arma::vec& vector)
 {
-  assert(vector.n_elem == dim_);
+  Log::Assert(vector.n_elem == dim_);
 
   for (size_t i = 0; i < dim_; i++)
     bounds_[i] |= vector[i];
@@ -340,7 +340,7 @@
 PeriodicHRectBound<t_pow>& PeriodicHRectBound<t_pow>::operator|=(
     const PeriodicHRectBound& other)
 {
-  assert(other.dim_ == dim_);
+  Log::Assert(other.dim_ == dim_);
 
   for (size_t i = 0; i < dim_; i++)
     bounds_[i] |= other.bounds_[i];

Modified: mlpack/trunk/src/mlpack/core/utilities/save_restore_utility.cpp
===================================================================
--- mlpack/trunk/src/mlpack/core/utilities/save_restore_utility.cpp	2011-11-29 22:38:10 UTC (rev 10457)
+++ mlpack/trunk/src/mlpack/core/utilities/save_restore_utility.cpp	2011-11-30 01:32:27 UTC (rev 10458)
@@ -12,70 +12,70 @@
 using namespace mlpack;
 using namespace utilities;
 
-bool SaveRestoreUtility::ReadFile (std::string filename)
+bool SaveRestoreUtility::ReadFile(std::string filename)
 {
   xmlDocPtr xmlDocTree = NULL;
-  if (NULL == (xmlDocTree = xmlReadFile (filename.c_str(), NULL, 0)))
+  if (NULL == (xmlDocTree = xmlReadFile(filename.c_str(), NULL, 0)))
   {
-    errx (1, "Clearly, we couldn't load the XML file\n");
+    Log::Fatal << "Clearly, we couldn't load the XML file\n";
   }
-  xmlNodePtr root = xmlDocGetRootElement (xmlDocTree);
+  xmlNodePtr root = xmlDocGetRootElement(xmlDocTree);
   parameters.clear();
 
-  RecurseOnNodes (root->children);
-  xmlFreeDoc (xmlDocTree);
+  RecurseOnNodes(root->children);
+  xmlFreeDoc(xmlDocTree);
   return true;
 }
-void SaveRestoreUtility::RecurseOnNodes (xmlNode* n)
+void SaveRestoreUtility::RecurseOnNodes(xmlNode* n)
 {
   xmlNodePtr current = NULL;
   for (current = n; current; current = current->next)
   {
     if (current->type == XML_ELEMENT_NODE)
     {
-      xmlChar* content = xmlNodeGetContent (current);
+      xmlChar* content = xmlNodeGetContent(current);
       parameters[(const char*) current->name] = (const char*) content;
-      xmlFree (content);
+      xmlFree(content);
     }
-    RecurseOnNodes (current->children);
+    RecurseOnNodes(current->children);
   }
 }
-bool SaveRestoreUtility::WriteFile (std::string filename)
+bool SaveRestoreUtility::WriteFile(std::string filename)
 {
   bool success = false;
-  xmlDocPtr xmlDocTree = xmlNewDoc (BAD_CAST "1.0");
+  xmlDocPtr xmlDocTree = xmlNewDoc(BAD_CAST "1.0");
   xmlNodePtr root = xmlNewNode(NULL, BAD_CAST "root");
   xmlNodePtr child = NULL;
 
-  xmlDocSetRootElement (xmlDocTree, root);
+  xmlDocSetRootElement(xmlDocTree, root);
 
   for (std::map<std::string, std::string>::iterator it = parameters.begin();
        it != parameters.end();
        ++it)
   {
-    child = xmlNewChild (root, NULL,
-                         BAD_CAST (*it).first.c_str(),
-                         BAD_CAST (*it).second.c_str());
+    child = xmlNewChild(root, NULL,
+                        BAD_CAST(*it).first.c_str(),
+                        BAD_CAST(*it).second.c_str());
     /* TODO: perhaps we'll add more later?
-     * xmlNewProp (child, BAD_CAST "attr", BAD_CAST "add more addibutes?"); */
+     * xmlNewProp(child, BAD_CAST "attr", BAD_CAST "add more addibutes?"); */
   }
   /* save the file */
-  xmlSaveFormatFileEnc (filename.c_str(), xmlDocTree, "UTF-8", 1);
-  xmlFreeDoc (xmlDocTree);
+  xmlSaveFormatFileEnc(filename.c_str(), xmlDocTree, "UTF-8", 1);
+  xmlFreeDoc(xmlDocTree);
   return success;
 }
-arma::mat& SaveRestoreUtility::LoadParameter (arma::mat& matrix, std::string name)
+arma::mat& SaveRestoreUtility::LoadParameter(arma::mat& matrix, std::string name)
 {
-  std::map<std::string, std::string>::iterator it = parameters.find (name);
-  if (it != parameters.end ())
+  std::map<std::string, std::string>::iterator it = parameters.find(name);
+  if (it != parameters.end())
   {
     std::string value = (*it).second;
     boost::char_separator<char> sep ("\n");
     boost::tokenizer<boost::char_separator<char> > tok (value, sep);
     std::list<std::list<double> > rows;
     for (boost::tokenizer<boost::char_separator<char> >::iterator
-           tokIt = tok.begin ();
-         tokIt != tok.end ();
+           tokIt = tok.begin();
+         tokIt != tok.end();
          ++tokIt)
     {
       std::string row = *tokIt;
@@ -84,28 +84,28 @@
         tokInner (row, sepComma);
       std::list<double> rowList;
       for (boost::tokenizer<boost::char_separator<char> >::iterator
-             tokInnerIt = tokInner.begin ();
-             tokInnerIt != tokInner.end ();
+             tokInnerIt = tokInner.begin();
+             tokInnerIt != tokInner.end();
              ++tokInnerIt)
       {
         double element;
         std::istringstream iss (*tokInnerIt);
         iss >> element;
-        rowList.push_back (element);
+        rowList.push_back(element);
       }
-      rows.push_back (rowList);
+      rows.push_back(rowList);
     }
-    matrix.zeros (rows.size (), (*(rows.begin ())).size ());
+    matrix.zeros(rows.size(), (*(rows.begin())).size());
     size_t rowCounter = 0;
     size_t columnCounter = 0;
-    for (std::list<std::list<double> >::iterator rowIt = rows.begin ();
-         rowIt != rows.end ();
+    for (std::list<std::list<double> >::iterator rowIt = rows.begin();
+         rowIt != rows.end();
          ++rowIt)
     {
       std::list<double> row = *rowIt;
       columnCounter = 0;
-      for (std::list<double>::iterator elementIt = row.begin ();
-           elementIt != row.end ();
+      for (std::list<double>::iterator elementIt = row.begin();
+           elementIt != row.end();
            ++elementIt)
       {
         matrix(rowCounter, columnCounter) = *elementIt;
@@ -117,26 +117,28 @@
   }
   else
   {
-    errx (1, "Missing the correct name\n");
+    Log::Fatal << "Missing the correct name\n";
   }
+  return matrix;
 }
-std::string SaveRestoreUtility::LoadParameter (std::string str, std::string name)
+std::string SaveRestoreUtility::LoadParameter(std::string str, std::string name)
 {
-  std::map<std::string, std::string>::iterator it = parameters.find (name);
-  if (it != parameters.end ())
+  std::map<std::string, std::string>::iterator it = parameters.find(name);
+  if (it != parameters.end())
   {
     return (*it).second;
   }
   else
   {
-    errx (1, "Missing the correct name\n");
+    Log::Fatal << "Missing the correct name\n";
   }
+  return "";
 }
-char SaveRestoreUtility::LoadParameter (char c, std::string name)
+char SaveRestoreUtility::LoadParameter(char c, std::string name)
 {
   int temp;
-  std::map<std::string, std::string>::iterator it = parameters.find (name);
-  if (it != parameters.end ())
+  std::map<std::string, std::string>::iterator it = parameters.find(name);
+  if (it != parameters.end())
   {
     std::string value = (*it).second;
     std::istringstream input (value);
@@ -145,17 +147,18 @@
   }
   else
   {
-    errx (1, "Missing the correct name\n");
+    Log::Fatal << "Missing the correct name\n";
   }
+  return 0;
 }
-void SaveRestoreUtility::SaveParameter (char c, std::string name)
+void SaveRestoreUtility::SaveParameter(char c, std::string name)
 {
   int temp = (int) c;
   std::ostringstream output;
   output << temp;
   parameters[name] = output.str();
 }
-void SaveRestoreUtility::SaveParameter (arma::mat& mat, std::string name)
+void SaveRestoreUtility::SaveParameter(arma::mat& mat, std::string name)
 {
   std::ostringstream output;
   size_t columns = mat.n_cols;

Modified: mlpack/trunk/src/mlpack/core/utilities/save_restore_utility.hpp
===================================================================
--- mlpack/trunk/src/mlpack/core/utilities/save_restore_utility.hpp	2011-11-29 22:38:10 UTC (rev 10457)
+++ mlpack/trunk/src/mlpack/core/utilities/save_restore_utility.hpp	2011-11-30 01:32:27 UTC (rev 10458)
@@ -36,48 +36,48 @@
   /**
    * RecurseOnNodes performs a depth first search of the XML tree.
    */
-  void RecurseOnNodes (xmlNode* n);
+  void RecurseOnNodes(xmlNode* n);
  public:
   SaveRestoreUtility() {}
   ~SaveRestoreUtility() { parameters.clear(); }
   /**
    * ReadFile reads an XML tree from a file.
    */
-  bool ReadFile (std::string filename);
+  bool ReadFile(std::string filename);
   /**
    * WriteFile writes the XML tree to a file.
    */
-  bool WriteFile (std::string filename);
+  bool WriteFile(std::string filename);
   /**
    * LoadParameter loads a parameter from the parameters map.
    */
   template<typename T>
-  T& LoadParameter (T& t, std::string name);
+  T& LoadParameter(T& t, std::string name);
   /**
    * LoadParameter loads a character from the parameters map.
    */
-  char LoadParameter (char c, std::string name);
+  char LoadParameter(char c, std::string name);
   /**
    * LoadParameter loads a string from the parameters map.
    */
-  std::string LoadParameter (std::string str, std::string name);
+  std::string LoadParameter(std::string str, std::string name);
   /**
    * LoadParameter loads an arma::mat from the parameters map.
    */
-  arma::mat& LoadParameter (arma::mat& matrix, std::string name);
+  arma::mat& LoadParameter(arma::mat& matrix, std::string name);
   /**
    * SaveParameter saves a parameter to the parameters map.
    */
   template<typename T>
-  void SaveParameter (T& t, std::string name);
+  void SaveParameter(T& t, std::string name);
   /**
    * SaveParameter saves a character to the parameters map.
    */
-  void SaveParameter (char c, std::string name);
+  void SaveParameter(char c, std::string name);
   /**
    * SaveParameter saves an arma::mat to the parameters map.
    */
-  void SaveParameter (arma::mat& mat, std::string name);
+  void SaveParameter(arma::mat& mat, std::string name);
 };
 } /* namespace model */
 } /* namespace mlpack */

Modified: mlpack/trunk/src/mlpack/core/utilities/save_restore_utility_impl.hpp
===================================================================
--- mlpack/trunk/src/mlpack/core/utilities/save_restore_utility_impl.hpp	2011-11-29 22:38:10 UTC (rev 10457)
+++ mlpack/trunk/src/mlpack/core/utilities/save_restore_utility_impl.hpp	2011-11-30 01:32:27 UTC (rev 10458)
@@ -15,10 +15,10 @@
 using namespace mlpack::utilities;
 
 template<typename T>
-T& SaveRestoreUtility::LoadParameter (T& t, std::string name)
+T& SaveRestoreUtility::LoadParameter(T& t, std::string name)
 {
-  std::map<std::string, std::string>::iterator it = parameters.find (name);
-  if (it != parameters.end ())
+  std::map<std::string, std::string>::iterator it = parameters.find(name);
+  if (it != parameters.end())
   {
     std::string value = (*it).second;
     std::istringstream input (value);
@@ -27,11 +27,11 @@
   }
   else
   {
-    errx (1, "Missing the correct name\n");
+    Log::Fatal << "Missing the correct name\n";
   }
 }
 template<typename T>
-void SaveRestoreUtility::SaveParameter (T& t, std::string name)
+void SaveRestoreUtility::SaveParameter(T& t, std::string name)
 {
   std::ostringstream output;
   output << t;

Modified: mlpack/trunk/src/mlpack/tests/save_restore_utility_test.cpp
===================================================================
--- mlpack/trunk/src/mlpack/tests/save_restore_utility_test.cpp	2011-11-29 22:38:10 UTC (rev 10457)
+++ mlpack/trunk/src/mlpack/tests/save_restore_utility_test.cpp	2011-11-30 01:32:27 UTC (rev 10458)
@@ -27,24 +27,24 @@
     saveRestore = SaveRestoreUtility();
   }
 
-  bool SaveModel (std::string filename)
+  bool SaveModel(std::string filename)
   {
-    saveRestore.SaveParameter (anInt, "anInt");
-    return saveRestore.WriteFile (filename);
+    saveRestore.SaveParameter(anInt, "anInt");
+    return saveRestore.WriteFile(filename);
   }
 
-  bool LoadModel (std::string filename)
+  bool LoadModel(std::string filename)
   {
-    bool success = saveRestore.ReadFile (filename);
+    bool success = saveRestore.ReadFile(filename);
     if (success)
     {
-      anInt = saveRestore.LoadParameter (anInt, "anInt");
+      anInt = saveRestore.LoadParameter(anInt, "anInt");
     }
     return success;
   }
 
-  const size_t AnInt () { return anInt; }
-  void AnInt (size_t s) { this->anInt = s; }
+  const size_t AnInt() { return anInt; }
+  void AnInt(size_t s) { this->anInt = s; }
 };
 
 /**

Modified: mlpack/trunk/src/mlpack/tests/tree_test.cpp
===================================================================
--- mlpack/trunk/src/mlpack/tests/tree_test.cpp	2011-11-29 22:38:10 UTC (rev 10457)
+++ mlpack/trunk/src/mlpack/tests/tree_test.cpp	2011-11-30 01:32:27 UTC (rev 10458)
@@ -150,6 +150,12 @@
   // This will be the Euclidean squared distance.
   BOOST_REQUIRE_CLOSE(b.MinDistance(point), 95.0, 1e-5);
 
+  // test our filtering condition
+  size_t ind[] = {1,3};
+  std::vector<size_t> indices (ind, ind + sizeof (ind) / sizeof (size_t));
+
+  BOOST_REQUIRE_CLOSE(b.MinDistance(point, indices), 26.0, 1e-5);
+
   point = "2.0 5.0 2.0 -5.0 1.0";
 
   BOOST_REQUIRE_SMALL(b.MinDistance(point), 1e-5);
@@ -157,6 +163,7 @@
   point = "1.0 2.0 0.0 -2.0 1.5";
 
   BOOST_REQUIRE_SMALL(b.MinDistance(point), 1e-5);
+
 }
 
 /**
@@ -190,6 +197,12 @@
   BOOST_REQUIRE_CLOSE(b.MinDistance(c), 22.0, 1e-5);
   BOOST_REQUIRE_CLOSE(c.MinDistance(b), 22.0, 1e-5);
 
+  // test our filtering condition
+  size_t ind[] = {1,3};
+  std::vector<size_t> indices (ind, ind + sizeof (ind) / sizeof (size_t));
+
+  BOOST_REQUIRE_CLOSE(b.MinDistance(c, indices), 17.0, 1e-5);
+
   // The other bound is on the edge of the bound.
   c[0] = Range(-2.0, 0.0);
   c[1] = Range(0.0, 1.0);
@@ -250,6 +263,12 @@
 
   arma::vec point = "-2.0 0.0 10.0 3.0 3.0";
 
+  // test our filtering condition
+  size_t ind[] = {1,3};
+  std::vector<size_t> indices (ind, ind + sizeof (ind) / sizeof (size_t));
+
+  BOOST_REQUIRE_CLOSE(b.MaxDistance(point, indices), 89.0, 1e-5);
+
   // This will be the Euclidean squared distance.
   BOOST_REQUIRE_CLOSE(b.MaxDistance(point), 253.0, 1e-5);
 
@@ -293,6 +312,12 @@
   BOOST_REQUIRE_CLOSE(b.MaxDistance(c), 210.0, 1e-5);
   BOOST_REQUIRE_CLOSE(c.MaxDistance(b), 210.0, 1e-5);
 
+  // test our filtering condition
+  size_t ind[] = {1,3};
+  std::vector<size_t> indices (ind, ind + sizeof (ind) / sizeof (size_t));
+
+  BOOST_REQUIRE_CLOSE(b.MaxDistance(c, indices), 136.0, 1e-5);
+
   // The other bound is on the edge of the bound.
   c[0] = Range(-2.0, 0.0);
   c[1] = Range(0.0, 1.0);
@@ -1006,6 +1031,20 @@
     }
   }
 
+  arma::mat dataset = arma::mat(25, 1000);
+  for (size_t col = 0; col < dataset.n_cols; ++col)
+  {
+    for (size_t row = 0; row < dataset.n_rows; ++row)
+    {
+      dataset(row, col) = row + col;
+    }
+  }
+  TreeType root(dataset);
+  // check the tree size
+  BOOST_REQUIRE_EQUAL(root.TreeSize(), 127);
+  // check the tree depth
+  BOOST_REQUIRE_EQUAL(root.TreeDepth(), 7);
+
   // Reset it to the default value at the end of the test.
   CLI::GetParam<int>("tree/leaf_size") = 20;
 }




More information about the mlpack-svn mailing list