[mlpack-git] master: Fixed missing perform_split.hpp. (94f4fa2)

gitdub at mlpack.org gitdub at mlpack.org
Fri Aug 26 16:40:30 EDT 2016


Repository : https://github.com/mlpack/mlpack
On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/1797a49c8f76d65814fec4a122d0d2fea01fc2d9...9e5cd0ac9c5cde9ac141bc84e7327bd11e19d42e

>---------------------------------------------------------------

commit 94f4fa27c2fdb7888b65d1b9712b4198a643f6a9
Author: Mikhail Lozhnikov <lozhnikovma at gmail.com>
Date:   Fri Aug 26 23:40:30 2016 +0300

    Fixed missing perform_split.hpp.


>---------------------------------------------------------------

94f4fa27c2fdb7888b65d1b9712b4198a643f6a9
 .../core/tree/binary_space_tree/perform_split.hpp  | 151 +++++++++++++++++++++
 1 file changed, 151 insertions(+)

diff --git a/src/mlpack/core/tree/binary_space_tree/perform_split.hpp b/src/mlpack/core/tree/binary_space_tree/perform_split.hpp
new file mode 100644
index 0000000..78311be
--- /dev/null
+++ b/src/mlpack/core/tree/binary_space_tree/perform_split.hpp
@@ -0,0 +1,151 @@
+/**
+ * @file perform_split.hpp
+ * @author Mikhail Lozhnikov
+ *
+ * This file contains functions that implement the default binary split
+ * behavior. The functions perform the actual splitting. This will order
+ * the dataset such that points that belong to the left subtree are on the left
+ * of the split column, and points from the right subtree are on the right side
+ * of the split column.
+ */
+
+#ifndef MLPACK_CORE_TREE_BINARY_SPACE_TREE_PERFORM_SPLIT_HPP
+#define MLPACK_CORE_TREE_BINARY_SPACE_TREE_PERFORM_SPLIT_HPP
+
+namespace mlpack {
+namespace tree /** Trees and tree-building procedures. */ {
+namespace split {
+
+/**
+ * This function implements the default split behavior i.e. it rearranges
+ * points according to the split information. The SplitType::AssignToLeftNode()
+ * function is used in order to determine the child that contains any particular
+ * point.
+ *
+ * @param data The dataset used by the binary space tree.
+ * @param begin Index of the starting point in the dataset that belongs to
+ *    this node.
+ * @param count Number of points in this node.
+ * @param splitInfo The information about the split.
+ */
+template<typename MatType, typename SplitType>
+size_t PerformSplit(MatType& data,
+                    const size_t begin,
+                    const size_t count,
+                    const typename SplitType::SplitInfo& splitInfo)
+{
+  // This method modifies the input dataset.  We loop both from the left and
+  // right sides of the points contained in this node.  The points less than
+  // splitVal should be on the left side of the matrix, and the points greater
+  // than splitVal should be on the right side of the matrix.
+  size_t left = begin;
+  size_t right = begin + count - 1;
+
+  // First half-iteration of the loop is out here because the termination
+  // condition is in the middle.
+  while (SplitType::AssignToLeftNode(data.col(left), splitInfo) && (left <= right))
+    left++;
+  while ((!SplitType::AssignToLeftNode(data.col(right), splitInfo)) &&
+      (left <= right) && (right > 0))
+    right--;
+
+  while (left <= right)
+  {
+    // Swap columns.
+    data.swap_cols(left, right);
+
+    // See how many points on the left are correct.  When they are correct,
+    // increase the left counter accordingly.  When we encounter one that isn't
+    // correct, stop.  We will switch it later.
+    while (SplitType::AssignToLeftNode(data.col(left), splitInfo) &&
+        (left <= right))
+      left++;
+
+    // Now see how many points on the right are correct.  When they are correct,
+    // decrease the right counter accordingly.  When we encounter one that isn't
+    // correct, stop.  We will switch it with the wrong point we found in the
+    // previous loop.
+    while ((!SplitType::AssignToLeftNode(data.col(right), splitInfo)) &&
+        (left <= right))
+      right--;
+  }
+
+  Log::Assert(left == right + 1);
+
+  return left;
+}
+
+/**
+ * This function implements the default split behavior i.e. it rearranges
+ * points according to the split information. The SplitType::AssignToLeftNode()
+ * function is used in order to determine the child that contains any particular
+ * point. The function takes care of indices and returns the list of changed
+ * indices.
+ *
+ * @param data The dataset used by the binary space tree.
+ * @param begin Index of the starting point in the dataset that belongs to
+ *    this node.
+ * @param count Number of points in this node.
+ * @param splitInfo The information about the split.
+ * @param oldFromNew Vector which will be filled with the old positions for
+ *    each new point.
+ */
+template<typename MatType, typename SplitType>
+size_t PerformSplit(MatType& data,
+                    const size_t begin,
+                    const size_t count,
+                    const typename SplitType::SplitInfo& splitInfo,
+                    std::vector<size_t>& oldFromNew)
+{
+  // This method modifies the input dataset.  We loop both from the left and
+  // right sides of the points contained in this node.  The points less than
+  // splitVal should be on the left side of the matrix, and the points greater
+  // than splitVal should be on the right side of the matrix.
+  size_t left = begin;
+  size_t right = begin + count - 1;
+
+  // First half-iteration of the loop is out here because the termination
+  // condition is in the middle.
+  while (SplitType::AssignToLeftNode(data.col(left), splitInfo) && (left <= right))
+    left++;
+  while ((!SplitType::AssignToLeftNode(data.col(right), splitInfo)) &&
+      (left <= right) && (right > 0))
+    right--;
+
+  while (left <= right)
+  {
+    // Swap columns.
+    data.swap_cols(left, right);
+
+    // Update the indices for what we changed.
+    size_t t = oldFromNew[left];
+    oldFromNew[left] = oldFromNew[right];
+    oldFromNew[right] = t;
+
+    // See how many points on the left are correct.  When they are correct,
+    // increase the left counter accordingly.  When we encounter one that isn't
+    // correct, stop.  We will switch it later.
+    while (SplitType::AssignToLeftNode(data.col(left), splitInfo) &&
+        (left <= right))
+      left++;
+
+    // Now see how many points on the right are correct.  When they are correct,
+    // decrease the right counter accordingly.  When we encounter one that isn't
+    // correct, stop.  We will switch it with the wrong point we found in the
+    // previous loop.
+    while ((!SplitType::AssignToLeftNode(data.col(right), splitInfo)) &&
+        (left <= right))
+      right--;
+  }
+
+  Log::Assert(left == right + 1);
+
+  return left;
+}
+
+} // namespace split
+} // namespace tree
+} // namespace mlpack
+
+
+#endif // MLPACK_CORE_TREE_BINARY_SPACE_TREE_PERFORM_SPLIT_HPP




More information about the mlpack-git mailing list