[mlpack-git] master: Minor style fixes and remove unused code. (0f4b25a)

gitdub at mlpack.org gitdub at mlpack.org
Wed Aug 17 20:44:47 EDT 2016


Repository : https://github.com/mlpack/mlpack
On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/87776e52cf9ead63fa458118a0cfd2fe46b23466...0f4b25acd6aaa14294c044874ba6cc0751712baa

>---------------------------------------------------------------

commit 0f4b25acd6aaa14294c044874ba6cc0751712baa
Author: Ryan Curtin <ryan at ratml.org>
Date:   Wed Aug 17 20:44:47 2016 -0400

    Minor style fixes and remove unused code.


>---------------------------------------------------------------

0f4b25acd6aaa14294c044874ba6cc0751712baa
 .../tree/binary_space_tree/rp_tree_max_split.hpp   | 21 +-------
 .../binary_space_tree/rp_tree_max_split_impl.hpp   | 57 +++++-----------------
 .../tree/binary_space_tree/rp_tree_mean_split.hpp  |  7 ++-
 .../binary_space_tree/rp_tree_mean_split_impl.hpp  | 10 +---
 4 files changed, 20 insertions(+), 75 deletions(-)

diff --git a/src/mlpack/core/tree/binary_space_tree/rp_tree_max_split.hpp b/src/mlpack/core/tree/binary_space_tree/rp_tree_max_split.hpp
index 52b2be8..d56aa9e 100644
--- a/src/mlpack/core/tree/binary_space_tree/rp_tree_max_split.hpp
+++ b/src/mlpack/core/tree/binary_space_tree/rp_tree_max_split.hpp
@@ -34,6 +34,7 @@ class RPTreeMaxSplit
     //! The value according to which the node is being split.
     ElemType splitVal;
   };
+
   /**
    * Split the node by a random hyperplane.
    *
@@ -58,30 +59,12 @@ class RPTreeMaxSplit
    * @param splitInfo An information about the split.
    */
   template<typename VecType>
-  static bool AssignToLeftNode(
-    const VecType& point,
-    const SplitInfo& splitInfo)
+  static bool AssignToLeftNode(const VecType& point, const SplitInfo& splitInfo)
   {
     return (arma::dot(point, splitInfo.direction) <= splitInfo.splitVal);
   }
 
  private:
-
-  /**
-   * Get random deviation from the median of points multiplied by the direction
-   * obtained in GetRandomDirection().
-   *
-   * @param data The dataset used by the binary space tree.
-   * @param begin Index of the starting point in the dataset that belongs to
-   *    this node.
-   * @param count Number of points in this node.
-   * @param direction A random unit vector.
-   */
-  static ElemType GetRandomDeviation(const MatType& data,
-                                     const size_t begin,
-                                     const size_t count,
-                                     const arma::Col<ElemType>& direction);
-
   /**
    * This method finds the position of the hyperplane that will split the node.
    *
diff --git a/src/mlpack/core/tree/binary_space_tree/rp_tree_max_split_impl.hpp b/src/mlpack/core/tree/binary_space_tree/rp_tree_max_split_impl.hpp
index b187f99..9943853 100644
--- a/src/mlpack/core/tree/binary_space_tree/rp_tree_max_split_impl.hpp
+++ b/src/mlpack/core/tree/binary_space_tree/rp_tree_max_split_impl.hpp
@@ -16,10 +16,10 @@ namespace tree {
 
 template<typename BoundType, typename MatType>
 bool RPTreeMaxSplit<BoundType, MatType>::SplitNode(const BoundType& /* bound */,
-                                                  MatType& data,
-                                                  const size_t begin,
-                                                  const size_t count,
-                                                  SplitInfo& splitInfo)
+                                                   MatType& data,
+                                                   const size_t begin,
+                                                   const size_t count,
+                                                   SplitInfo& splitInfo)
 {
   splitInfo.direction.zeros(data.n_rows);
 
@@ -27,44 +27,8 @@ bool RPTreeMaxSplit<BoundType, MatType>::SplitNode(const BoundType& /* bound */,
   math::RandVector(splitInfo.direction);
 
   // Get the value according to which we will perform the split.
-  if (!GetSplitVal(data, begin, count, splitInfo.direction, splitInfo.splitVal))
-    return false;
-
-  return true;
-}
-
-template<typename BoundType, typename MatType>
-typename MatType::elem_type RPTreeMaxSplit<BoundType, MatType>::
-GetRandomDeviation(const MatType& data,
-                   const size_t begin,
-                   const size_t count,
-                   const arma::Col<ElemType>& direction)
-{
-  // Choose a random point
-  size_t index = math::RandInt(begin, begin + count);
-
-  ElemType furthestDistance = 0;
-
-  // Find the furthest point from the point that we chose
-  for (size_t i = begin; i < index; i++)
-  {
-    const ElemType dist = metric::SquaredEuclideanDistance::Evaluate(
-        data.col(index), data.col(i));
-    if (dist > furthestDistance)
-      furthestDistance = dist;
-  }
-
-  for (size_t i = index; i < begin + count; i++)
-  {
-    const ElemType dist = metric::SquaredEuclideanDistance::Evaluate(
-        data.col(index), data.col(i));
-    if (dist > furthestDistance)
-      furthestDistance = dist;
-  }
-
-  // Get a random deviation.
-  return math::Random(-6.0 * std::sqrt(furthestDistance / data.n_rows),
-                      6.0 * std::sqrt(furthestDistance / data.n_rows));
+  return GetSplitVal(data, begin, count, splitInfo.direction,
+      splitInfo.splitVal);
 }
 
 template<typename BoundType, typename MatType>
@@ -96,8 +60,13 @@ bool RPTreeMaxSplit<BoundType, MatType>::GetSplitVal(
   splitVal = arma::median(values);
 
   // Add a random deviation to the median.
-  // This algorithm differs from the method suggested in the
-  // random projection tree paper.
+  // This algorithm differs from the method suggested in the random projection
+  // tree paper, for two reasons:
+  //   1. Evaluating the method proposed in the paper is time-consuming, since
+  //      we must solve the furthest-pair problem.
+  //   2. The proposed method does not appear to guarantee that a valid split
+  //      value will be generated (i.e. it can produce a split value where there
+  //      may be no points on the left or the right).
   splitVal += math::Random((minimum - splitVal) * 0.75,
       (maximum - splitVal) * 0.75);
 
diff --git a/src/mlpack/core/tree/binary_space_tree/rp_tree_mean_split.hpp b/src/mlpack/core/tree/binary_space_tree/rp_tree_mean_split.hpp
index 7c0a2dc..32914d0 100644
--- a/src/mlpack/core/tree/binary_space_tree/rp_tree_mean_split.hpp
+++ b/src/mlpack/core/tree/binary_space_tree/rp_tree_mean_split.hpp
@@ -39,6 +39,7 @@ class RPTreeMeanSplit
     //! median split.
     bool meanSplit;
   };
+
   /**
    * Split the node according to the mean value in the dimension with maximum
    * width.
@@ -51,7 +52,7 @@ class RPTreeMeanSplit
    * @param splitInfo An information about the split. This information contains
    *    the direction and the value.
    */
-  static bool SplitNode(const BoundType& /*bound*/,
+  static bool SplitNode(const BoundType& /* bound */,
                         MatType& data,
                         const size_t begin,
                         const size_t count,
@@ -64,9 +65,7 @@ class RPTreeMeanSplit
    * @param splitInfo An information about the split.
    */
   template<typename VecType>
-  static bool AssignToLeftNode(
-    const VecType& point,
-    const SplitInfo& splitInfo)
+  static bool AssignToLeftNode(const VecType& point, const SplitInfo& splitInfo)
   {
     if (splitInfo.meanSplit)
       return arma::dot(point - splitInfo.mean, point - splitInfo.mean) <=
diff --git a/src/mlpack/core/tree/binary_space_tree/rp_tree_mean_split_impl.hpp b/src/mlpack/core/tree/binary_space_tree/rp_tree_mean_split_impl.hpp
index 1732ef4..1bb1879 100644
--- a/src/mlpack/core/tree/binary_space_tree/rp_tree_mean_split_impl.hpp
+++ b/src/mlpack/core/tree/binary_space_tree/rp_tree_mean_split_impl.hpp
@@ -44,8 +44,7 @@ bool RPTreeMeanSplit<BoundType, MatType>::SplitNode(const BoundType&  bound,
 
     // Get the median value of the scalar products of the normal and the
     // sampled points. The node will be split according to this value.
-    if (!GetDotMedian(data, samples, splitInfo.direction, splitInfo.splitVal))
-      return false;
+    return GetDotMedian(data, samples, splitInfo.direction, splitInfo.splitVal);
   }
   else
   {
@@ -54,11 +53,8 @@ bool RPTreeMeanSplit<BoundType, MatType>::SplitNode(const BoundType&  bound,
 
     // Get the median of the distances between the mean point and the sampled
     // points. The node will be split according to this value.
-    if (!GetMeanMedian(data, samples, splitInfo.mean, splitInfo.splitVal))
-      return false;
+    return GetMeanMedian(data, samples, splitInfo.mean, splitInfo.splitVal);
   }
-
-  return true;
 }
 
 template<typename BoundType, typename MatType>
@@ -138,8 +134,6 @@ bool RPTreeMeanSplit<BoundType, MatType>::GetMeanMedian(
   return true;
 }
 
-
-
 } // namespace tree
 } // namespace mlpack
 




More information about the mlpack-git mailing list