[mlpack-git] master: Intermediate checkin so I can move systems easily. (1e409da)

gitdub at mlpack.org gitdub at mlpack.org
Tue Oct 4 14:11:33 EDT 2016


Repository : https://github.com/mlpack/mlpack
On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/9ef7339d40550a974b3939e9fcb966fac2c09065...ebdb5abeaa3fd621a06ae663862bb72df76d2b40

>---------------------------------------------------------------

commit 1e409da07b55ff6ce7f4087e32275ec612257fa1
Author: Ryan Curtin <ryan at ratml.org>
Date:   Wed Sep 14 15:38:11 2016 -0400

    Intermediate checkin so I can move systems easily.


>---------------------------------------------------------------

1e409da07b55ff6ce7f4087e32275ec612257fa1
 src/mlpack/core/tree/octree/octree_impl.hpp | 214 ++++++++++++++++++++++++++++
 1 file changed, 214 insertions(+)

diff --git a/src/mlpack/core/tree/octree/octree_impl.hpp b/src/mlpack/core/tree/octree/octree_impl.hpp
new file mode 100644
index 0000000..cd8234a
--- /dev/null
+++ b/src/mlpack/core/tree/octree/octree_impl.hpp
@@ -0,0 +1,214 @@
+/**
+ * @file octree_impl.hpp
+ * @author Ryan Curtin
+ *
+ * Implementation of generalized octree (Octree).
+ */
+#ifndef MLPACK_CORE_TREE_OCTREE_OCTREE_IMPL_HPP
+#define MLPACK_CORE_TREE_OCTREE_OCTREE_IMPL_HPP
+
+#include "octree.hpp"
+
+//! Construct the tree.
+template<typename MetricType, typename StatisticType, typename MatType>
+Octree<MetricType, StatisticType, MatType>::Octree(const MatType& dataset,
+                                                   const double maxLeafSize) :
+    dataset(new MatType(dataset)),
+
+{
+  // Calculate empirical center of data.
+  bound |= *dataset;
+  arma::vec center = bound.Center();
+  double maxWidth = bound.MaxWidth();
+
+  SplitNode(center, maxWidth);
+
+  // Initialize the statistic.
+  stat = StatisticType(*this);
+}
+
+//! Construct the tree.
+template<typename MetricType, typename StatisticType, typename MatType>
+Octree<MetricType, StatisticType, MatType>::Octree(
+    const MatType& dataset,
+    std::vector<size_t>& oldFromNew,
+    const size_t maxLeafSize) :
+    dataset(new MatType(dataset)),
+
+{
+  // Calculate empirical center of data.
+  bound |= *dataset;
+  arma::vec center = bound.Center();
+  double maxWidth = bound.MaxWidth();
+
+  oldFromNew.resize(data.n_cols);
+  for (size_t i = 0; i < data.n_cols; ++i)
+    oldFromNew[i] = i;
+
+  SplitNode(center, maxWidth, oldFromNew);
+
+  // Initialize the statistic.
+  stat = StatisticType(*this);
+}
+
+//! Construct the tree.
+template<typename MetricType, typename StatisticType, typename MatType>
+Octree<MetricType, StatisticType, MatType>::Octree(
+    const MatType& dataset,
+    std::vector<size_t>& oldFromNew,
+    const size_t maxLeafSize) :
+    dataset(new MatType(dataset)),
+
+{
+  // Calculate empirical center of data.
+  bound |= *dataset;
+  arma::vec center = bound.Center();
+  double maxWidth = bound.MaxWidth();
+
+  oldFromNew.resize(data.n_cols);
+  for (size_t i = 0; i < data.n_cols; ++i)
+    oldFromNew[i] = i;
+
+  SplitNode(center, maxWidth, oldFromNew);
+
+  // Initialize the statistic.
+  stat = StatisticType(*this);
+}
+
+
+//! Split the node.
+template<typename MetricType, typename StatisticType, typename MatType>
+void Octree<MetricType, StatisticType, MatType>::SplitNode(
+    const arma::vec& center,
+    const double width)
+{
+  // We must split the dataset by sequentially creating each of the children.
+  // We do this in two steps: first we make a pass to count the number of points
+  // that will fall into each child; then in the second pass we rearrange the
+  // points and create the children.
+  arma::Col<size_t> childCounts(std::pow(2, dataset->n_rows),
+      arma::fill::zeros);
+  arma::Col<size_t> assignments(count, arma::fill::zeros);
+
+  // First pass: calculate number of points in each child, and find child
+  // assignments for each point.
+  for (size_t i = 0; i < count; ++i)
+  {
+    for (size_t d = 0; d < dataset->n_rows; ++d)
+    {
+      // We are guaranteed that the points fall within 'width / 2' of the center
+      // in each dimension, so we just need to check which side of the center
+      // the points fall on.  The last dimension represents the most significant
+      // bit in the assignment; the bit is '1' if it falls to the right of the
+      // center.
+      if (dataset(d, begin + i) > center(d))
+        assignments(i) |= (1 << d);
+    }
+
+    childCounts(assignments(i))++;
+  }
+
+  // Sort all of the points so we know where to copy them.
+  arma::uvec ordering = arma::stable_sort_index(assignments, "ascend");
+
+  // This strategy may copy the matrix during the computation, but that isn't
+  // really a problem.  We use non-contiguous submatrix views to extract the
+  // columns in the correct order.
+  dataset->cols(begin, begin + count - 1) = dataset->cols(begin + ordering);
+
+  // Now that the dataset is reordered, we can create the children.
+  size_t childBegin = begin;
+  arma::vec childCenter(center.n_elem);
+  const double childWidth = width / 2.0;
+  for (size_t i = 0; i < childCounts.n_elem; ++i)
+  {
+    // If the child has no points, don't create it.
+    if (childCounts[i] == 0)
+      continue;
+
+    // Create the correct center.
+    for (size_t d = 0; d < center.n_elem; ++d)
+    {
+      // Is the dimension "right" (1) or "left" (0)?
+      if ((i >> d) & 1 == 0)
+        childCenter[d] = center[d] - childWidth;
+      else
+        childCenter[d] = center[d] + childWidth;
+    }
+
+    children.push_back(new Octree(this, childBegin, childCounts[i], childCenter,
+        childWidth, maxLeafSize));
+
+    childBegin += childCounts[i];
+  }
+}
+
+//! Split the node, and store mappings.
+template<typename MetricType, typename StatisticType, typename MatType>
+void Octree<MetricType, StatisticType, MatType>::SplitNode(
+    const arma::vec& center,
+    const double width,
+    std::vector<size_t>& oldFromNew)
+{
+  // We must split the dataset by sequentially creating each of the children.
+  // We do this in two steps: first we make a pass to count the number of points
+  // that will fall into each child; then in the second pass we rearrange the
+  // points and create the children.
+  arma::Col<size_t> childCounts(std::pow(2, dataset->n_rows),
+      arma::fill::zeros);
+  arma::Col<size_t> assignments(count, arma::fill::zeros);
+
+  // First pass: calculate number of points in each child, and find child
+  // assignments for each point.
+  for (size_t i = 0; i < count; ++i)
+  {
+    for (size_t d = 0; d < dataset->n_rows; ++d)
+    {
+      // We are guaranteed that the points fall within 'width / 2' of the center
+      // in each dimension, so we just need to check which side of the center
+      // the points fall on.  The last dimension represents the most significant
+      // bit in the assignment; the bit is '1' if it falls to the right of the
+      // center.
+      if (dataset(d, begin + i) > center(d))
+        assignments(i) |= (1 << d);
+    }
+
+    childCounts(assignments(i))++;
+  }
+
+  // Sort all of the points so we know where to copy them.
+  arma::uvec ordering = arma::stable_sort_index(assignments, "ascend");
+
+  // This strategy may copy the matrix during the computation, but that isn't
+  // really a problem.  We use non-contiguous submatrix views to extract the
+  // columns in the correct order.
+  dataset->cols(begin, begin + count - 1) = dataset->cols(begin + ordering);
+  for (size_t i = 0; i < count; ++i)
+    oldFromNew[ordering[i] + begin] = i + begin;
+
+  // Now that the dataset is reordered, we can create the children.
+  size_t childBegin = begin;
+  arma::vec childCenter(center.n_elem);
+  const double childWidth = width / 2.0;
+  for (size_t i = 0; i < childCounts.n_elem; ++i)
+  {
+    // If the child has no points, don't create it.
+    if (childCounts[i] == 0)
+      continue;
+
+    // Create the correct center.
+    for (size_t d = 0; d < center.n_elem; ++d)
+    {
+      // Is the dimension "right" (1) or "left" (0)?
+      if ((i >> d) & 1 == 0)
+        childCenter[d] = center[d] - childWidth;
+      else
+        childCenter[d] = center[d] + childWidth;
+    }
+
+    children.push_back(new Octree(this, childBegin, childCounts[i], oldFromNew,
+        childCenter, childWidth, maxLeafSize));
+
+    childBegin += childCounts[i];
+  }
+}




More information about the mlpack-git mailing list