[mlpack-git] master: Various rp-tree fixes. Fixed debug build. (ea66e5d)
gitdub at mlpack.org
gitdub at mlpack.org
Tue Aug 16 13:24:05 EDT 2016
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/a7794bde8082c691553152393e1e230098f5e920...87776e52cf9ead63fa458118a0cfd2fe46b23466
>---------------------------------------------------------------
commit ea66e5d17914460289974cc61f0669941edc2524
Author: Mikhail Lozhnikov <lozhnikovma at gmail.com>
Date: Tue Aug 16 20:24:05 2016 +0300
Various rp-tree fixes. Fixed debug build.
>---------------------------------------------------------------
ea66e5d17914460289974cc61f0669941edc2524
.../binary_space_tree/binary_space_tree_impl.hpp | 8 ++++-
.../tree/binary_space_tree/rp_tree_max_split.hpp | 14 --------
.../binary_space_tree/rp_tree_max_split_impl.hpp | 38 +++++++---------------
.../tree/binary_space_tree/rp_tree_mean_split.hpp | 14 --------
.../binary_space_tree/rp_tree_mean_split_impl.hpp | 37 ++++++++-------------
.../binary_space_tree/vantage_point_split_impl.hpp | 2 --
6 files changed, 32 insertions(+), 81 deletions(-)
diff --git a/src/mlpack/core/tree/binary_space_tree/binary_space_tree_impl.hpp b/src/mlpack/core/tree/binary_space_tree/binary_space_tree_impl.hpp
index 95403c2..6059a3c 100644
--- a/src/mlpack/core/tree/binary_space_tree/binary_space_tree_impl.hpp
+++ b/src/mlpack/core/tree/binary_space_tree/binary_space_tree_impl.hpp
@@ -667,7 +667,10 @@ void BinarySpaceTree<MetricType, StatisticType, MatType, BoundType, SplitType>::
// Perform the actual splitting. This will order the dataset such that points
// that belong to the left subtree are on the left of splitCol, and points
// from the right subtree are on the right side of splitCol.
- splitCol = PerformSplit(*dataset, begin, count, splitInfo);
+ splitCol = PerformSplit(*dataset, begin, count, splitInfo);
+
+ assert(splitCol > begin);
+ assert(splitCol < begin + count);
// Now that we know the split column, we will recursively split the children
// by calling their constructors (which perform this splitting process).
@@ -732,6 +735,9 @@ SplitNode(std::vector<size_t>& oldFromNew,
// from the right subtree are on the right side of splitCol.
splitCol = PerformSplit(*dataset, begin, count, splitInfo, oldFromNew);
+ assert(splitCol > begin);
+ assert(splitCol < begin + count);
+
// Now that we know the split column, we will recursively split the children
// by calling their constructors (which perform this splitting process).
left = new BinarySpaceTree(this, begin, splitCol - begin, oldFromNew,
diff --git a/src/mlpack/core/tree/binary_space_tree/rp_tree_max_split.hpp b/src/mlpack/core/tree/binary_space_tree/rp_tree_max_split.hpp
index 5c31e84..52b2be8 100644
--- a/src/mlpack/core/tree/binary_space_tree/rp_tree_max_split.hpp
+++ b/src/mlpack/core/tree/binary_space_tree/rp_tree_max_split.hpp
@@ -83,20 +83,6 @@ class RPTreeMaxSplit
const arma::Col<ElemType>& direction);
/**
- * Obtain a number of random distinct samples from the dataset. All samples
- * belong to [begin, begin + count).
- *
- * @param distinctSamples The indices of the samples.
- * @param begin The lower bound of indices.
- * @param count The number of point candidates.
- * @param numSamples The maximum number of samples.
- */
- static void GetDistinctSamples(arma::uvec& distinctSamples,
- const size_t begin,
- const size_t count,
- const size_t numSamples);
-
- /**
* This method finds the position of the hyperplane that will split the node.
*
* @param data The dataset used by the binary space tree.
diff --git a/src/mlpack/core/tree/binary_space_tree/rp_tree_max_split_impl.hpp b/src/mlpack/core/tree/binary_space_tree/rp_tree_max_split_impl.hpp
index d5cd714..50f6a89 100644
--- a/src/mlpack/core/tree/binary_space_tree/rp_tree_max_split_impl.hpp
+++ b/src/mlpack/core/tree/binary_space_tree/rp_tree_max_split_impl.hpp
@@ -69,25 +69,6 @@ GetRandomDeviation(const MatType& data,
}
template<typename BoundType, typename MatType>
-void RPTreeMaxSplit<BoundType, MatType>::GetDistinctSamples(
- arma::uvec& distinctSamples,
- const size_t begin,
- const size_t count,
- const size_t numSamples)
-{
- arma::Col<size_t> samples;
-
- samples.zeros(count);
-
- for (size_t i = 0; i < numSamples; i++)
- samples [ (size_t) math::RandInt(count) ]++;
-
- distinctSamples = arma::find(samples > 0);
-
- distinctSamples += begin;
-}
-
-template<typename BoundType, typename MatType>
bool RPTreeMaxSplit<BoundType, MatType>::GetSplitVal(
const MatType& data,
const size_t begin,
@@ -100,26 +81,29 @@ bool RPTreeMaxSplit<BoundType, MatType>::GetSplitVal(
arma::uvec samples;
// Get no more than numSamples distinct samples.
- GetDistinctSamples(samples, begin, count, numSamples);
+ math::ObtainDistinctSamples(begin, begin + count, numSamples, samples);
- std::vector<ElemType> values(samples.n_elem);
+ arma::Col<ElemType> values(samples.n_elem);
// Find the median of scalar products of the samples and the normal vector.
for (size_t k = 0; k < samples.n_elem; k++)
values[k] = arma::dot(data.col(samples[k]), direction);
- std::sort(values.begin(), values.end());
-
- if (values[0] == values[values.size() - 1])
+ const ElemType maximum = arma::max(values);
+ const ElemType minimum = arma::min(values);
+ if (minimum == maximum)
return false;
- splitVal = values[values.size() / 2];
+ splitVal = arma::median(values);
// Add a random deviation to the median.
// This algorithm differs from the method suggested in the
// random projection tree paper.
- splitVal += math::Random((values[values.size() / 2] - values[0]) * 0.75,
- (values[values.size() - 1] - values[values.size() / 2]) * 0.75);
+ splitVal += math::Random((minimum - splitVal) * 0.75,
+ (maximum - splitVal) * 0.75);
+
+ if (splitVal == maximum)
+ splitVal = minimum;
return true;
}
diff --git a/src/mlpack/core/tree/binary_space_tree/rp_tree_mean_split.hpp b/src/mlpack/core/tree/binary_space_tree/rp_tree_mean_split.hpp
index ad198a2..93708ed 100644
--- a/src/mlpack/core/tree/binary_space_tree/rp_tree_mean_split.hpp
+++ b/src/mlpack/core/tree/binary_space_tree/rp_tree_mean_split.hpp
@@ -85,20 +85,6 @@ class RPTreeMeanSplit
static void GetRandomDirection(arma::Col<ElemType>& direction);
/**
- * Obtain a number of random distinct samples from the dataset. All samples
- * belong to [begin, begin + count).
- *
- * @param distinctSamples The indices of the samples.
- * @param begin The lower bound of indices.
- * @param count The number of point candidates.
- * @param numSamples The maximum number of samples.
- */
- static void GetDistinctSamples(arma::uvec& distinctSamples,
- const size_t begin,
- const size_t count,
- const size_t numSamples);
-
- /**
* Get the average distance between points in the dataset.
*
* @param data The dataset used by the binary space tree.
diff --git a/src/mlpack/core/tree/binary_space_tree/rp_tree_mean_split_impl.hpp b/src/mlpack/core/tree/binary_space_tree/rp_tree_mean_split_impl.hpp
index ec5ad1d..14ebb53 100644
--- a/src/mlpack/core/tree/binary_space_tree/rp_tree_mean_split_impl.hpp
+++ b/src/mlpack/core/tree/binary_space_tree/rp_tree_mean_split_impl.hpp
@@ -25,7 +25,7 @@ bool RPTreeMeanSplit<BoundType, MatType>::SplitNode(const BoundType& bound,
arma::uvec samples;
// Get no more than numSamples distinct samples.
- GetDistinctSamples(samples, begin, count, numSamples);
+ math::ObtainDistinctSamples(begin, begin + count, numSamples, samples);
// Find the average distance between points.
ElemType averageDistanceSq = GetAveragePointDistance(data, samples);
@@ -62,25 +62,6 @@ bool RPTreeMeanSplit<BoundType, MatType>::SplitNode(const BoundType& bound,
}
template<typename BoundType, typename MatType>
-void RPTreeMeanSplit<BoundType, MatType>::GetDistinctSamples(
- arma::uvec& distinctSamples,
- const size_t begin,
- const size_t count,
- const size_t numSamples)
-{
- arma::Col<size_t> samples;
-
- samples.zeros(count);
-
- for (size_t i = 0; i < numSamples; i++)
- samples [ (size_t) math::RandInt(count) ]++;
-
- distinctSamples = arma::find(samples > 0);
-
- distinctSamples += begin;
-}
-
-template<typename BoundType, typename MatType>
typename MatType::elem_type RPTreeMeanSplit<BoundType, MatType>::
GetAveragePointDistance(
MatType& data,
@@ -131,11 +112,16 @@ bool RPTreeMeanSplit<BoundType, MatType>::GetDotMedian(
for (size_t k = 0; k < samples.n_elem; k++)
values[k] = arma::dot(data.col(samples[k]), direction);
- if (arma::min(values) == arma::max(values))
+ const ElemType maximum = arma::max(values);
+ const ElemType minimum = arma::min(values);
+ if (minimum == maximum)
return false;
splitVal = arma::median(values);
+ if (splitVal == maximum)
+ splitVal = minimum;
+
return true;
}
@@ -148,7 +134,7 @@ bool RPTreeMeanSplit<BoundType, MatType>::GetMeanMedian(
{
arma::Col<ElemType> values(samples.n_elem);
- mean = arma::mean(data.cols(samples));
+ mean = arma::mean(data.cols(samples), 1);
arma::Col<ElemType> tmp(data.n_rows);
@@ -160,11 +146,16 @@ bool RPTreeMeanSplit<BoundType, MatType>::GetMeanMedian(
values[k] = arma::dot(tmp, tmp);
}
- if (arma::min(values) == arma::max(values))
+ const ElemType maximum = arma::max(values);
+ const ElemType minimum = arma::min(values);
+ if (minimum == maximum)
return false;
splitVal = arma::median(values);
+ if (splitVal == maximum)
+ splitVal = minimum;
+
return true;
}
diff --git a/src/mlpack/core/tree/binary_space_tree/vantage_point_split_impl.hpp b/src/mlpack/core/tree/binary_space_tree/vantage_point_split_impl.hpp
index 94ec63b..3028d53 100644
--- a/src/mlpack/core/tree/binary_space_tree/vantage_point_split_impl.hpp
+++ b/src/mlpack/core/tree/binary_space_tree/vantage_point_split_impl.hpp
@@ -31,8 +31,6 @@ SplitNode(const BoundType& bound, MatType& data, const size_t begin,
splitInfo = SplitInfo(bound.Metric(), data.col(vantagePointIndex), mu);
- assert(splitCol > begin);
- assert(splitCol < begin + count);
return true;
}
More information about the mlpack-git
mailing list