[mlpack-svn] r10047 - mlpack/trunk/src/mlpack/core/tree
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Wed Oct 26 20:10:54 EDT 2011
Author: nslagle
Date: 2011-10-26 20:10:53 -0400 (Wed, 26 Oct 2011)
New Revision: 10047
Removed:
mlpack/trunk/src/mlpack/core/tree/hrectbound_impl.h
mlpack/trunk/src/mlpack/core/tree/spacetree.h
mlpack/trunk/src/mlpack/core/tree/spacetree_impl.h
mlpack/trunk/src/mlpack/core/tree/statistic.h
Log:
mlpack/src/mlpack/core: remove files I accidentally re-added
Deleted: mlpack/trunk/src/mlpack/core/tree/hrectbound_impl.h
===================================================================
--- mlpack/trunk/src/mlpack/core/tree/hrectbound_impl.h 2011-10-27 00:08:59 UTC (rev 10046)
+++ mlpack/trunk/src/mlpack/core/tree/hrectbound_impl.h 2011-10-27 00:10:53 UTC (rev 10047)
@@ -1,331 +0,0 @@
-/**
- * @file tree/hrectbound_impl.h
- *
- * Implementation of hyper-rectangle bound policy class.
- * Template parameter t_pow is the metric to use; use 2 for Euclidean (L2).
- *
- * @experimental
- */
-#ifndef __TREE_HRECTBOUND_IMPL_H
-#define __TREE_HRECTBOUND_IMPL_H
-
-#include <math.h>
-
-#include "../math/math_lib.h"
-
-// In case it has not been included yet.
-#include "hrectbound.h"
-
-namespace mlpack {
-namespace bound {
-
-/**
- * Empty constructor
- */
-template<int t_pow>
-HRectBound<t_pow>::HRectBound() :
- dim_(0),
- bounds_(NULL) { /* nothing to do */ }
-
-/**
- * Initializes to specified dimensionality with each dimension the empty
- * set.
- */
-template<int t_pow>
-HRectBound<t_pow>::HRectBound(size_t dimension) :
- dim_(dimension),
- bounds_(new Range[dim_]) { /* nothing to do */ }
-
-/***
- * Copy constructor necessary to prevent memory leaks.
- */
-template<int t_pow>
-HRectBound<t_pow>::HRectBound(const HRectBound& other) :
- dim_(other.dim()),
- bounds_(new Range[dim_]) {
- // Copy other bounds over.
- for (size_t i = 0; i < dim_; i++)
- bounds_[i] = other[i];
-}
-
-/***
- * Same as the copy constructor.
- */
-template<int t_pow>
-HRectBound<t_pow>& HRectBound<t_pow>::operator=(const HRectBound& other) {
- if (bounds_)
- delete[] bounds_;
-
- // We can't just copy the bounds_ pointer like the default copy constructor
- // will!
- dim_ = other.dim();
- bounds_ = new Range[dim_];
- for (size_t i = 0; i < dim_; i++)
- bounds_[i] = other[i];
-
- return *this;
-}
-
-/**
- * Destructor: clean up memory
- */
-template<int t_pow>
-HRectBound<t_pow>::~HRectBound() {
- if (bounds_)
- delete[] bounds_;
-}
-
-/**
- * Resets all dimensions to the empty set.
- */
-template<int t_pow>
-void HRectBound<t_pow>::Clear() {
- for (size_t i = 0; i < dim_; i++) {
- bounds_[i] = Range();
- }
-}
-
-/**
- * Gets the range for a particular dimension.
- */
-template<int t_pow>
-const Range HRectBound<t_pow>::operator[](size_t i) const {
- return bounds_[i];
-}
-
-/**
- * Sets the range for the given dimension.
- */
-template<int t_pow>
-Range& HRectBound<t_pow>::operator[](size_t i) {
- return bounds_[i];
-}
-
-/***
- * Calculates the centroid of the range, placing it into the given vector.
- *
- * @param centroid Vector which the centroid will be written to.
- */
-template<int t_pow>
-void HRectBound<t_pow>::Centroid(arma::vec& centroid) const {
- // set size correctly if necessary
- if(!(centroid.n_elem == dim_))
- centroid.set_size(dim_);
-
- for(size_t i = 0; i < dim_; i++) {
- centroid(i) = bounds_[i].mid();
- }
-}
-
-/**
- * Calculates minimum bound-to-point squared distance.
- */
-template<int t_pow>
-double HRectBound<t_pow>::MinDistance(const arma::vec& point) const {
- assert(point.n_elem == dim_);
-
- double sum = 0;
- const Range* mbound = bounds_;
-
- double lower, higher;
- for(size_t d = 0; d < dim_; d++) {
- lower = mbound->lo - point[d]; // negative if point[d] > bounds_[d]
- higher = point[d] - mbound->hi; // negative if point[d] < bounds_[d]
-
- // since only one of 'lower' or 'higher' is negative, if we add each's
- // absolute value to itself and then sum those two, our result is the
- // nonnegative half of the equation times two; then we raise to power t_pow
- sum += pow((lower + fabs(lower)) + (higher + fabs(higher)), (double) t_pow);
-
- // move bound pointer
- mbound++;
- }
-
- // now take the t_pow'th root (but make sure our result is squared); then
- // divide by four to cancel out the constant of 2 (which has been squared now)
- // that was introduced earlier
- return pow(sum, 2.0 / (double) t_pow) / 4.0;
-}
-
-/**
- * Calculates minimum bound-to-bound squared distance.
- *
- * Example: bound1.MinDistanceSq(other) for minimum squared distance.
- */
-template<int t_pow>
-double HRectBound<t_pow>::MinDistance(const HRectBound& other) const {
- assert(dim_ == other.dim_);
-
- double sum = 0;
- const Range* mbound = bounds_;
- const Range* obound = other.bounds_;
-
- double lower, higher;
- for (size_t d = 0; d < dim_; d++) {
- lower = obound->lo - mbound->hi;
- higher = mbound->lo - obound->hi;
- // We invoke the following:
- // x + fabs(x) = max(x * 2, 0)
- // (x * 2)^2 / 4 = x^2
- sum += pow((lower + fabs(lower)) + (higher + fabs(higher)), (double) t_pow);
-
- // move bound pointers
- mbound++;
- obound++;
- }
-
- return pow(sum, 2.0 / (double) t_pow) / 4.0;
-}
-
-/**
- * Calculates maximum bound-to-point squared distance.
- */
-template<int t_pow>
-double HRectBound<t_pow>::MaxDistance(const arma::vec& point) const {
- double sum = 0;
-
- assert(point.n_elem == dim_);
-
- for (size_t d = 0; d < dim_; d++) {
- double v = fabs(std::max(
- point[d] - bounds_[d].lo,
- bounds_[d].hi - point[d]));
- sum += pow(v, (double) t_pow);
- }
-
- return pow(sum, 2.0 / (double) t_pow);
-}
-
-/**
- * Computes maximum distance.
- */
-template<int t_pow>
-double HRectBound<t_pow>::MaxDistance(const HRectBound& other) const {
- double sum = 0;
-
- assert(dim_ == other.dim_);
-
- double v;
- for(size_t d = 0; d < dim_; d++) {
- v = fabs(std::max(
- other.bounds_[d].hi - bounds_[d].lo,
- bounds_[d].hi - other.bounds_[d].lo));
- sum += pow(v, (double) t_pow); // v is non-negative
- }
-
- return pow(sum, 2.0 / (double) t_pow);
-}
-
-/**
- * Calculates minimum and maximum bound-to-bound squared distance.
- */
-template<int t_pow>
-Range HRectBound<t_pow>::RangeDistance(const HRectBound& other) const {
- double sum_lo = 0;
- double sum_hi = 0;
-
- assert(dim_ == other.dim_);
-
- double v1, v2, v_lo, v_hi;
- for (size_t d = 0; d < dim_; d++) {
- v1 = other.bounds_[d].lo - bounds_[d].hi;
- v2 = bounds_[d].lo - other.bounds_[d].hi;
- // one of v1 or v2 is negative
- if(v1 >= v2) {
- v_hi = -v2; // make it nonnegative
- v_lo = (v1 > 0) ? v1 : 0; // force to be 0 if negative
- } else {
- v_hi = -v1; // make it nonnegative
- v_lo = (v2 > 0) ? v2 : 0; // force to be 0 if negative
- }
-
- sum_lo += pow(v_lo, (double) t_pow);
- sum_hi += pow(v_hi, (double) t_pow);
- }
-
- return Range(pow(sum_lo, 2.0 / (double) t_pow),
- pow(sum_hi, 2.0 / (double) t_pow));
-}
-
-/**
- * Calculates minimum and maximum bound-to-point squared distance.
- */
-template<int t_pow>
-Range HRectBound<t_pow>::RangeDistance(const arma::vec& point) const {
- double sum_lo = 0;
- double sum_hi = 0;
-
- Log::Assert(point.n_elem == dim_);
-
- double v1, v2, v_lo, v_hi;
- for(size_t d = 0; d < dim_; d++) {
- v1 = bounds_[d].lo - point[d]; // Negative if point[d] > lo.
- v2 = point[d] - bounds_[d].hi; // Negative if point[d] < hi.
- // One of v1 or v2 (or both) is negative.
- if(v1 >= 0) { // point[d] <= bounds_[d].lo.
- v_hi = -v2; // v2 will be larger but must be negated.
- v_lo = v1;
- } else { // point[d] is between lo and hi, or greater than hi.
- if (v2 >= 0) {
- v_hi = -v1; // v1 will be larger, but must be negated.
- v_lo = v2;
- } else {
- v_hi = -std::min(v1, v2); // Both are negative, but we need the larger.
- v_lo = 0;
- }
- }
-
- sum_lo += pow(v_lo, (double) t_pow);
- sum_hi += pow(v_hi, (double) t_pow);
- }
-
- return Range(pow(sum_lo, 2.0 / (double) t_pow),
- pow(sum_hi, 2.0 / (double) t_pow));
-}
-
-/**
- * Expands this region to include a new point.
- */
-template<int t_pow>
-HRectBound<t_pow>& HRectBound<t_pow>::operator|=(const arma::vec& vector) {
- Log::Assert(vector.n_elem == dim_);
-
- for (size_t i = 0; i < dim_; i++) {
- bounds_[i] |= vector[i];
- }
-
- return *this;
-}
-
-/**
- * Expands this region to encompass another bound.
- */
-template<int t_pow>
-HRectBound<t_pow>& HRectBound<t_pow>::operator|=(const HRectBound& other) {
- assert(other.dim_ == dim_);
-
- for (size_t i = 0; i < dim_; i++) {
- bounds_[i] |= other.bounds_[i];
- }
-
- return *this;
-}
-
-/**
- * Determines if a point is within this bound.
- */
-template<int t_pow>
-bool HRectBound<t_pow>::Contains(const arma::vec& point) const {
- for (size_t i = 0; i < point.n_elem; i++) {
- if (!bounds_[i].Contains(point(i))) {
- return false;
- }
- }
-
- return true;
-}
-
-}; // namespace bound
-}; // namespace mlpack
-
-#endif
Deleted: mlpack/trunk/src/mlpack/core/tree/spacetree.h
===================================================================
--- mlpack/trunk/src/mlpack/core/tree/spacetree.h 2011-10-27 00:08:59 UTC (rev 10046)
+++ mlpack/trunk/src/mlpack/core/tree/spacetree.h 2011-10-27 00:10:53 UTC (rev 10047)
@@ -1,202 +0,0 @@
-/**
- * @file spacetree.h
- *
- * Generalized space partitioning tree.
- *
- * @experimental
- */
-
-#ifndef TREE_SPACETREE_H
-#define TREE_SPACETREE_H
-
-#include "statistic.h"
-
-#include <mlpack/core.h>
-#include <armadillo>
-
-namespace mlpack {
-namespace tree {
-
-PARAM_MODULE("tree", "Parameters for the binary space partitioning tree.");
-PARAM_INT("leaf_size", "Leaf size used during tree construction.", "tree", 20);
-
-/**
- * A binary space partitioning tree, such as a KD-tree or a ball tree. Once the
- * bound and type of dataset is defined, the tree will construct itself. Call
- * the constructor with the dataset to build the tree on, and the entire tree
- * will be built.
- *
- * This particular tree does not allow growth, so you cannot add or delete nodes
- * from it. If you need to add or delete a node, the better procedure is to
- * rebuild the tree entirely.
- *
- * This tree does take one command line parameter, which is the leaf size to be
- * used. You can set this at runtime with --tree/leaf_size [leaf_size]. You
- * can also set it in your program using CLI:
- *
- * @code
- * CLI::GetParam<int>("tree/leaf_size") = target_leaf_size;
- * @endcode
- *
- * @tparam TBound The bound used for each node. The valid types of bounds and
- * the necessary skeleton interface for this class can be found in bounds/.
- * @tparam TDataset The type of dataset (forced to be arma::mat for now).
- * @tparam TStatistic Extra data contained in the node. See statistic.h for
- * the necessary skeleton interface.
- */
-template<typename Bound,
- typename Statistic = EmptyStatistic>
-class BinarySpaceTree {
- private:
- BinarySpaceTree *left_; //< The left child node.
- BinarySpaceTree *right_; //< The right child node.
- size_t begin_; //< The first point in the dataset contained in this node.
- size_t count_; //< The count of points in the dataset contained in this node.
- Bound bound_; //< The bound object for this node.
- Statistic stat_; //< The extra data contained in the node.
-
- public:
- /***
- * Construct this as the head node of a binary space tree using the given
- * dataset. This will modify the ordering of the points in the dataset!
- *
- * Optionally, pass in vectors which represent a mapping from the old
- * dataset's point ordering to the new ordering, and vice versa.
- *
- * @param data Dataset to create tree from.
- * @param leaf_size Leaf size of the tree.
- * @param old_from_new Vector which will be filled with the old positions for
- * each new point.
- * @param new_from_old Vector which will be filled with the new positions for
- * each old point.
- */
- BinarySpaceTree(arma::mat& data);
- BinarySpaceTree(arma::mat& data, std::vector<size_t>& old_from_new);
- BinarySpaceTree(arma::mat& data,
- std::vector<size_t>& old_from_new,
- std::vector<size_t>& new_from_old);
-
- BinarySpaceTree(arma::mat& data,
- size_t begin_in,
- size_t count_in);
- BinarySpaceTree(arma::mat& data,
- size_t begin_in,
- size_t count_in,
- std::vector<size_t>& old_from_new);
- BinarySpaceTree(arma::mat& data,
- size_t begin_in,
- size_t count_in,
- std::vector<size_t>& old_from_new,
- std::vector<size_t>& new_from_old);
-
- BinarySpaceTree();
-
- /***
- * Deletes this node, deallocating the memory for the children and calling
- * their destructors in turn. This will invalidate any pointers or references
- * to any nodes which are children of this one.
- */
- ~BinarySpaceTree();
-
- /**
- * Find a node in this tree by its begin and count.
- *
- * Every node is uniquely identified by these two numbers.
- * This is useful for communicating position over the network,
- * when pointers would be invalid.
- *
- * @param begin_q the begin() of the node to find
- * @param count_q the count() of the node to find
- * @return the found node, or NULL
- */
- const BinarySpaceTree* FindByBeginCount(size_t begin_q,
- size_t count_q) const;
-
- /**
- * Find a node in this tree by its begin and count (const).
- *
- * Every node is uniquely identified by these two numbers.
- * This is useful for communicating position over the network,
- * when pointers would be invalid.
- *
- * @param begin_q the begin() of the node to find
- * @param count_q the count() of the node to find
- * @return the found node, or NULL
- */
- BinarySpaceTree* FindByBeginCount(size_t begin_q, size_t count_q);
-
- // TODO: Not const correct
-
- const Bound& bound() const;
- Bound& bound();
-
- const Statistic& stat() const;
- Statistic& stat();
-
- bool is_leaf() const;
-
- /**
- * Gets the left branch of the tree.
- */
- BinarySpaceTree *left() const;
-
- /**
- * Gets the right branch.
- */
- BinarySpaceTree *right() const;
-
- /**
- * Gets the index of the begin point of this subset.
- */
- size_t begin() const;
-
- /**
- * Gets the index one beyond the last index in the series.
- */
- size_t end() const;
-
- /**
- * Gets the number of points in this subset.
- */
- size_t count() const;
-
- void Print() const;
-
- private:
-
- /***
- * Splits the current node, assigning its left and right children recursively.
- *
- * Optionally, return a list of the changed indices.
- *
- * @param data Dataset which we are using.
- * @param leaf_size Leaf size to split with.
- * @param old_from_new Vector holding permuted indices.
- */
- void SplitNode(arma::mat& data);
- void SplitNode(arma::mat& data, std::vector<size_t>& old_from_new);
-
- /***
- * Find the index to split on for this node, given that we are splitting in
- * the given split dimension on the specified split value.
- *
- * Optionally, return a list of the changed indices.
- *
- * @param data Dataset which we are using.
- * @param split_dim Dimension of dataset to split on.
- * @param split_val Value to split on, in the given split dimension.
- * @param old_from_new Vector holding permuted indices.
- */
- size_t GetSplitIndex(arma::mat& data, int split_dim, double split_val);
- size_t GetSplitIndex(arma::mat& data, int split_dim, double split_val,
- std::vector<size_t>& old_from_new);
-
-};
-
-}; // namespace tree
-}; // namespace mlpack
-
-// Include implementation.
-#include "spacetree_impl.h"
-
-#endif
Deleted: mlpack/trunk/src/mlpack/core/tree/spacetree_impl.h
===================================================================
--- mlpack/trunk/src/mlpack/core/tree/spacetree_impl.h 2011-10-27 00:08:59 UTC (rev 10046)
+++ mlpack/trunk/src/mlpack/core/tree/spacetree_impl.h 2011-10-27 00:10:53 UTC (rev 10047)
@@ -1,486 +0,0 @@
-/**
- * @file spacetree_impl.h
- *
- * Implementation of generalized space partitioning tree.
- *
- * @experimental
- */
-
-#ifndef TREE_SPACETREE_IMPL_H
-#define TREE_SPACETREE_IMPL_H
-
-// Try to prevent direct inclusion
-#ifndef TREE_SPACETREE_H
-#error "Do not include this header directly."
-#endif
-
-#include <mlpack/core/io/io.h>
-#include "../io/log.h"
-
-namespace mlpack {
-namespace tree {
-
-// Each of these overloads is kept as a separate function to keep the overhead
-// from the two std::vectors out, if possible.
-template<typename Bound, typename Statistic>
-BinarySpaceTree<Bound, Statistic>::BinarySpaceTree(arma::mat& data) :
- left_(NULL),
- right_(NULL),
- begin_(0), /* This root node starts at index 0, */
- count_(data.n_cols), /* and spans all of the dataset. */
- bound_(data.n_rows),
- stat_() {
- // Do the actual splitting of this node.
- SplitNode(data);
-}
-
-template<typename Bound, typename Statistic>
-BinarySpaceTree<Bound, Statistic>::BinarySpaceTree(
- arma::mat& data,
- std::vector<size_t>& old_from_new) :
- left_(NULL),
- right_(NULL),
- begin_(0),
- count_(data.n_cols),
- bound_(data.n_rows),
- stat_() {
- // Initialize old_from_new correctly.
- old_from_new.resize(data.n_cols);
- for (size_t i = 0; i < data.n_cols; i++)
- old_from_new[i] = i; // Fill with unharmed indices.
-
- // Now do the actual splitting.
- SplitNode(data, old_from_new);
-}
-
-template<typename Bound, typename Statistic>
-BinarySpaceTree<Bound, Statistic>::BinarySpaceTree(
- arma::mat& data,
- std::vector<size_t>& old_from_new,
- std::vector<size_t>& new_from_old) :
- left_(NULL),
- right_(NULL),
- begin_(0),
- count_(data.n_cols),
- bound_(data.n_rows),
- stat_() {
- // Initialize the old_from_new vector correctly.
- old_from_new.resize(data.n_cols);
- for (size_t i = 0; i < data.n_cols; i++)
- old_from_new[i] = i; // Fill with unharmed indices.
-
- // Now do the actual splitting.
- SplitNode(data, old_from_new);
-
- // Map the new_from_old indices correctly.
- new_from_old.resize(data.n_cols);
- for (size_t i = 0; i < data.n_cols; i++)
- new_from_old[old_from_new[i]] = i;
-}
-
-template<typename Bound, typename Statistic>
-BinarySpaceTree<Bound, Statistic>::BinarySpaceTree(
- arma::mat& data,
- size_t begin_in,
- size_t count_in) :
- left_(NULL),
- right_(NULL),
- begin_(begin_in),
- count_(count_in),
- bound_(data.n_rows),
- stat_() {
- // Perform the actual splitting.
- SplitNode(data);
-}
-
-template<typename Bound, typename Statistic>
-BinarySpaceTree<Bound, Statistic>::BinarySpaceTree(
- arma::mat& data,
- size_t begin_in,
- size_t count_in,
- std::vector<size_t>& old_from_new) :
- left_(NULL),
- right_(NULL),
- begin_(begin_in),
- count_(count_in),
- bound_(data.n_rows),
- stat_() {
- // Hopefully the vector is initialized correctly! We can't check that
- // entirely but we can do a minor sanity check.
- assert(old_from_new.size() == data.n_cols);
-
- // Perform the actual splitting.
- SplitNode(data, old_from_new);
-}
-
-template<typename Bound, typename Statistic>
-BinarySpaceTree<Bound, Statistic>::BinarySpaceTree(
- arma::mat& data,
- size_t begin_in,
- size_t count_in,
- std::vector<size_t>& old_from_new,
- std::vector<size_t>& new_from_old) :
- left_(NULL),
- right_(NULL),
- begin_(begin_in),
- count_(count_in),
- bound_(data.n_rows),
- stat_() {
- // Hopefully the vector is initialized correctly! We can't check that
- // entirely but we can do a minor sanity check.
- assert(old_from_new.size() == data.n_cols);
-
- // Perform the actual splitting.
- SplitNode(data, old_from_new);
-
- // Map the new_from_old indices correctly.
- new_from_old.resize(data.n_cols);
- for (size_t i = 0; i < data.n_cols; i++)
- new_from_old[old_from_new[i]] = i;
-}
-
-template<typename Bound, typename Statistic>
-BinarySpaceTree<Bound, Statistic>::BinarySpaceTree() :
- left_(NULL),
- right_(NULL),
- begin_(0),
- count_(0),
- bound_(),
- stat_() {
- // Nothing to do.
-}
-
-/***
- * Deletes this node, deallocating the memory for the children and calling their
- * destructors in turn. This will invalidate any pointers or references to any
- * nodes which are children of this one.
- */
-template<typename Bound, typename Statistic>
-BinarySpaceTree<Bound, Statistic>::~BinarySpaceTree() {
- if (left_)
- delete left_;
- if (right_)
- delete right_;
-}
-
-/**
- * Find a node in this tree by its begin and count.
- *
- * Every node is uniquely identified by these two numbers.
- * This is useful for communicating position over the network,
- * when pointers would be invalid.
- *
- * @param begin_q the begin() of the node to find
- * @param count_q the count() of the node to find
- * @return the found node, or NULL
- */
-template<typename Bound, typename Statistic>
-const BinarySpaceTree<Bound, Statistic>*
-BinarySpaceTree<Bound, Statistic>::FindByBeginCount(size_t begin_q,
- size_t count_q) const {
-
- mlpack::Log::Assert(begin_q >= begin_);
- mlpack::Log::Assert(count_q <= count_);
- if (begin_ == begin_q && count_ == count_q)
- return this;
- else if (is_leaf())
- return NULL;
- else if (begin_q < right_->begin_)
- return left_->FindByBeginCount(begin_q, count_q);
- else
- return right_->FindByBeginCount(begin_q, count_q);
-}
-
-/**
- * Find a node in this tree by its begin and count (const).
- *
- * Every node is uniquely identified by these two numbers.
- * This is useful for communicating position over the network,
- * when pointers would be invalid.
- *
- * @param begin_q the begin() of the node to find
- * @param count_q the count() of the node to find
- * @return the found node, or NULL
- */
-template<typename Bound, typename Statistic>
-BinarySpaceTree<Bound, Statistic>*
-BinarySpaceTree<Bound, Statistic>::FindByBeginCount(size_t begin_q,
- size_t count_q) {
-
- mlpack::Log::Assert(begin_q >= begin_);
- mlpack::Log::Assert(count_q <= count_);
- if (begin_ == begin_q && count_ == count_q)
- return this;
- else if (is_leaf())
- return NULL;
- else if (begin_q < right_->begin_)
- return left_->FindByBeginCount(begin_q, count_q);
- else
- return right_->FindByBeginCount(begin_q, count_q);
-}
-
-template<typename Bound, typename Statistic>
-const Bound& BinarySpaceTree<Bound, Statistic>::bound() const {
- return bound_;
-}
-
-template<typename Bound, typename Statistic>
-Bound& BinarySpaceTree<Bound, Statistic>::bound() {
- return bound_;
-}
-
-template<typename Bound, typename Statistic>
-const Statistic& BinarySpaceTree<Bound, Statistic>::stat() const {
- return stat_;
-}
-
-template<typename Bound, typename Statistic>
-Statistic& BinarySpaceTree<Bound, Statistic>::stat() {
- return stat_;
-}
-
-template<typename Bound, typename Statistic>
-bool BinarySpaceTree<Bound, Statistic>::is_leaf() const {
- return !left_;
-}
-
-/**
- * Gets the left branch of the tree.
- */
-template<typename Bound, typename Statistic>
-BinarySpaceTree<Bound, Statistic>*
-BinarySpaceTree<Bound, Statistic>::left() const {
- // TODO: Const correctness
- return left_;
-}
-
-/**
- * Gets the right branch.
- */
-template<typename Bound, typename Statistic>
-BinarySpaceTree<Bound, Statistic>*
-BinarySpaceTree<Bound, Statistic>::right() const {
- // TODO: Const correctness
- return right_;
-}
-
-/**
- * Gets the index of the begin point of this subset.
- */
-template<typename Bound, typename Statistic>
-size_t BinarySpaceTree<Bound, Statistic>::begin() const {
- return begin_;
-}
-
-/**
- * Gets the index one beyond the last index in the series.
- */
-template<typename Bound, typename Statistic>
-size_t BinarySpaceTree<Bound, Statistic>::end() const {
- return begin_ + count_;
-}
-
-/**
- * Gets the number of points in this subset.
- */
-template<typename Bound, typename Statistic>
-size_t BinarySpaceTree<Bound, Statistic>::count() const {
- return count_;
-}
-
-template<typename Bound, typename Statistic>
-void BinarySpaceTree<Bound, Statistic>::Print() const {
- printf("node: %d to %d: %d points total\n",
- begin_, begin_ + count_ - 1, count_);
- if (!is_leaf()) {
- left_->Print();
- right_->Print();
- }
-}
-
-template<typename Bound, typename Statistic>
-void BinarySpaceTree<Bound, Statistic>::SplitNode(arma::mat& data) {
- // This should be a single function for Bound.
- // We need to expand the bounds of this node properly.
- for (size_t i = begin_; i < (begin_ + count_); i++)
- bound_ |= data.unsafe_col(i);
-
- // Now, check if we need to split at all.
- if (count_ <= (size_t) CLI::GetParam<int>("tree/leaf_size"))
- return; // We can't split this.
-
- // Figure out which dimension to split on.
- size_t split_dim = data.n_rows; // Indicate invalid by max_dim + 1.
- double max_width = -1;
-
- // Find the split dimension.
- for (size_t d = 0; d < data.n_rows; d++) {
- double width = bound_[d].width();
-
- if (width > max_width) {
- max_width = width;
- split_dim = d;
- }
- }
-
- // Split in the middle of that dimension.
- double split_val = bound_[split_dim].mid();
-
- if (max_width == 0) // All these points are the same. We can't split.
- return;
-
- // Perform the actual splitting. This will order the dataset such that points
- // with value in dimension split_dim less than or equal to split_val are on
- // the left of split_col, and points with value in dimension split_dim greater
- // than split_val are on the right side of split_col.
- size_t split_col = GetSplitIndex(data, split_dim, split_val);
-
- // Now that we know the split column, we will recursively split the children
- // by calling their constructors (which perform this splitting process).
- left_ = new BinarySpaceTree<Bound, Statistic>(data, begin_,
- split_col - begin_);
- right_ = new BinarySpaceTree<Bound, Statistic>(data, split_col,
- begin_ + count_ - split_col);
-}
-
-template<typename Bound, typename Statistic>
-void BinarySpaceTree<Bound, Statistic>::SplitNode(
- arma::mat& data,
- std::vector<size_t>& old_from_new) {
- // This should be a single function for Bound.
- // We need to expand the bounds of this node properly.
- for (size_t i = begin_; i < (begin_ + count_); i++)
- bound_ |= data.unsafe_col(i);
-
- // First, check if we need to split at all.
- if (count_ <= (size_t) CLI::GetParam<int>("tree/leaf_size"))
- return; // We can't split this.
-
- // Figure out which dimension to split on.
- size_t split_dim = data.n_rows; // Indicate invalid by max_dim + 1.
- double max_width = -1;
-
- // Find the split dimension.
- for (size_t d = 0; d < data.n_rows; d++) {
- double width = bound_[d].width();
-
- if (width > max_width) {
- max_width = width;
- split_dim = d;
- }
- }
-
- // Split in the middle of that dimension.
- double split_val = bound_[split_dim].mid();
-
- if (max_width == 0) // All these points are the same. We can't split.
- return;
-
- // Perform the actual splitting. This will order the dataset such that points
- // with value in dimension split_dim less than or equal to split_val are on
- // the left of split_col, and points with value in dimension split_dim greater
- // than split_val are on the right side of split_col.
- size_t split_col = GetSplitIndex(data, split_dim, split_val, old_from_new);
-
- // Now that we know the split column, we will recursively split the children
- // by calling their constructors (which perform this splitting process).
- left_ = new BinarySpaceTree<Bound, Statistic>(data, begin_,
- split_col - begin_, old_from_new);
- right_ = new BinarySpaceTree<Bound, Statistic>(data, split_col,
- begin_ + count_ - split_col, old_from_new);
-}
-
-template<typename Bound, typename Statistic>
-size_t BinarySpaceTree<Bound, Statistic>::GetSplitIndex(
- arma::mat& data,
- int split_dim,
- double split_val) {
- // This method modifies the input dataset. We loop both from the left and
- // right sides of the points contained in this node. The points less than
- // split_val should be on the left side of the matrix, and the points greater
- // than split_val should be on the right side of the matrix.
- size_t left = begin_;
- size_t right = begin_ + count_ - 1;
-
- // First half-iteration of the loop is out here because the termination
- // condition is in the middle.
- while ((data(split_dim, left) < split_val) && (left <= right))
- left++;
- while ((data(split_dim, right) >= split_val) && (left <= right))
- right--;
-
- while(left <= right) {
- // Swap columns.
- data.swap_cols(left, right);
-
- // See how many points on the left are correct. When they are correct,
- // increase the left counter accordingly. When we encounter one that isn't
- // correct, stop. We will switch it later.
- while ((data(split_dim, left) < split_val) && (left <= right))
- left++;
-
- // Now see how many points on the right are correct. When they are correct,
- // decrease the right counter accordingly. When we encounter one that isn't
- // correct, stop. We will switch it with the wrong point we found in the
- // previous loop.
- while ((data(split_dim, right) >= split_val) && (left <= right))
- right--;
- }
-
- assert(left == right + 1);
-
- return left;
-}
-
-template<typename Bound, typename Statistic>
-size_t BinarySpaceTree<Bound, Statistic>::GetSplitIndex(
- arma::mat& data,
- int split_dim,
- double split_val,
- std::vector<size_t>& old_from_new) {
- // This method modifies the input dataset. We loop both from the left and
- // right sides of the points contained in this node. The points less than
- // split_val should be on the left side of the matrix, and the points greater
- // than split_val should be on the right side of the matrix.
- size_t left = begin_;
- size_t right = begin_ + count_ -1;
-
- // First half-iteration of the loop is out here because the termination
- // condition is in the middle.
- while ((data(split_dim, left) < split_val) && (left <= right))
- left++;
- while ((data(split_dim, right) >= split_val) && (left <= right))
- right--;
-
- while(left <= right) {
- // Swap columns.
- data.swap_cols(left, right);
-
- // Update the indices for what we changed.
- size_t t = old_from_new[left];
- old_from_new[left] = old_from_new[right];
- old_from_new[right] = t;
-
- // See how many points on the left are correct. When they are correct,
- // increase the left counter accordingly. When we encounter one that isn't
- // correct, stop. We will switch it later.
- while ((data(split_dim, left) < split_val) && (left <= right))
- left++;
-
- // Now see how many points on the right are correct. When they are correct,
- // decrease the right counter accordingly. When we encounter one that isn't
- // correct, stop. We will switch it with the wrong point we found in the
- // previous loop.
- while ((data(split_dim, right) >= split_val) && (left <= right))
- right--;
- }
-
- assert(left == right + 1);
-
- return left;
-}
-
-}; // namespace tree
-}; // namespace mlpack
-
-#endif
Deleted: mlpack/trunk/src/mlpack/core/tree/statistic.h
===================================================================
--- mlpack/trunk/src/mlpack/core/tree/statistic.h 2011-10-27 00:08:59 UTC (rev 10046)
+++ mlpack/trunk/src/mlpack/core/tree/statistic.h 2011-10-27 00:10:53 UTC (rev 10047)
@@ -1,41 +0,0 @@
-/**
- * @file statistic.h
- *
- * Home for the concept of tree statistics.
- *
- * You should define your own statistic that looks like EmptyStatistic.
- *
- * @experimental
- */
-
-#ifndef TREE_STATISTIC_H
-#define TREE_STATISTIC_H
-
-#include <armadillo>
-
-/**
- * Empty statistic if you are not interested in storing statistics in your
- * tree. Use this as a template for your own.
- *
- * @experimental
- */
-class EmptyStatistic {
- public:
- EmptyStatistic() {}
- ~EmptyStatistic() {}
-
- /**
- * Initializes by taking statistics on raw data.
- */
- void Init(const arma::mat& dataset, size_t start, size_t count) { }
-
- /**
- * Initializes by combining statistics of two partitions.
- *
- * This lets you build fast bottom-up statistics when building trees.
- */
- void Init(const arma::mat& dataset, size_t start, size_t count,
- const EmptyStatistic& left_stat, const EmptyStatistic& right_stat) { }
-};
-
-#endif
More information about the mlpack-svn
mailing list