[mlpack-svn] r14962 - mlpack/trunk/src/mlpack/core/tree/cover_tree
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Thu Apr 25 17:51:26 EDT 2013
Author: rcurtin
Date: 2013-04-25 17:51:26 -0400 (Thu, 25 Apr 2013)
New Revision: 14962
Modified:
mlpack/trunk/src/mlpack/core/tree/cover_tree/cover_tree.hpp
mlpack/trunk/src/mlpack/core/tree/cover_tree/cover_tree_impl.hpp
Log:
Refactor cover tree constructors into one CreateChildren() function, which saves
a lot of space. A little bit of changing needed to be done to set the scale
right in the root node, and as a result the assert that checks that the furthest
descendant distance is within the scale has been removed (since that has never
been a problem). This is for #274, but the other constructor will not be
changed until mlpack 1.1.0.
Modified: mlpack/trunk/src/mlpack/core/tree/cover_tree/cover_tree.hpp
===================================================================
--- mlpack/trunk/src/mlpack/core/tree/cover_tree/cover_tree.hpp 2013-04-25 21:43:08 UTC (rev 14961)
+++ mlpack/trunk/src/mlpack/core/tree/cover_tree/cover_tree.hpp 2013-04-25 21:51:26 UTC (rev 14962)
@@ -95,6 +95,8 @@
* The dataset will not be modified during the building procedure (unlike
* BinarySpaceTree).
*
+ * The last argument will be removed in mlpack 1.1.0 (see #274 and #273).
+ *
* @param dataset Reference to the dataset to build a tree on.
* @param base Base to use during tree building (default 2.0).
*/
@@ -103,6 +105,19 @@
MetricType* metric = NULL);
/**
+ * Create the cover tree with the given dataset and the given instantiated
+ * metric. Optionally, set the base. The dataset will not be modified during
+ * the building procedure (unlike BinarySpaceTree).
+ *
+ * @param dataset Reference to the dataset to build a tree on.
+ * @param metric Instantiated metric to use during tree building.
+ * @param base Base to use during tree building (default 2.0).
+ */
+ CoverTree(const arma::mat& dataset,
+ MetricType& metric,
+ const double base = 2.0);
+
+ /**
* Construct a child cover tree node. This constructor is not meant to be
* used externally, but it could be used to insert another node into a tree.
* This procedure uses only one vector for the near set, the far set, and the
@@ -326,6 +341,15 @@
MetricType* metric;
/**
+ * Create the children for this node.
+ */
+ void CreateChildren(arma::Col<size_t>& indices,
+ arma::vec& distances,
+ size_t nearSetSize,
+ size_t& farSetSize,
+ size_t& usedSetSize);
+
+ /**
* Fill the vector of distances with the distances between the point specified
* by pointIndex and each point in the indices array. The distances of the
* first pointSetSize points in indices are calculated (so, this does not
Modified: mlpack/trunk/src/mlpack/core/tree/cover_tree/cover_tree_impl.hpp
===================================================================
--- mlpack/trunk/src/mlpack/core/tree/cover_tree/cover_tree_impl.hpp 2013-04-25 21:43:08 UTC (rev 14961)
+++ mlpack/trunk/src/mlpack/core/tree/cover_tree/cover_tree_impl.hpp 2013-04-25 21:51:26 UTC (rev 14962)
@@ -24,6 +24,7 @@
MetricType* metric) :
dataset(dataset),
point(RootPointPolicy::ChooseRoot(dataset)),
+ scale(INT_MAX),
base(base),
parent(NULL),
parentDistance(0),
@@ -35,6 +36,10 @@
if (localMetric)
this->metric = new MetricType();
+ // If there is only one point in the dataset... uh, we're done.
+ if (dataset.n_cols == 1)
+ return;
+
// Kick off the building. Create the indices array and the distances array.
arma::Col<size_t> indices = arma::linspace<arma::Col<size_t> >(1,
dataset.n_cols - 1, dataset.n_cols - 1);
@@ -48,147 +53,62 @@
// Build the initial distances.
ComputeDistances(point, indices, distances, dataset.n_cols - 1);
- // Now determine the scale factor of the root node.
- const double maxDistance = max(distances);
- scale = (int) ceil(log(maxDistance) / log(base));
- const double bound = pow(base, scale - 1);
-
- // Unfortunately, we can't call out to other constructors, so we have to copy
- // a little bit of code from the other constructor. First we build the self
- // child.
- size_t childNearSetSize = SplitNearFar(indices, distances, bound,
- dataset.n_cols - 1);
- size_t childFarSetSize = (dataset.n_cols - 1) - childNearSetSize;
+ // Create the children.
+ size_t farSetSize = 0;
size_t usedSetSize = 0;
- children.push_back(new CoverTree(dataset, base, point, scale - 1,
- this, 0, indices, distances, childNearSetSize, childFarSetSize,
- usedSetSize, *metric));
+ CreateChildren(indices, distances, dataset.n_cols - 1, farSetSize,
+ usedSetSize);
- furthestDescendantDistance = children[0]->FurthestDescendantDistance();
+ // Use the furthest descendant distance to determine the scale of the root
+ // node.
+ scale = (int) ceil(log(furthestDescendantDistance) / log(base));
- // If we created an implicit node, take its self-child instead (this could
- // happen multiple times).
- while (children[children.size() - 1]->NumChildren() == 1)
- {
- CoverTree* old = children[children.size() - 1];
- children.erase(children.begin() + children.size() - 1);
+ // Initialize statistic.
+ stat = StatisticType(*this);
+}
- // Now take its child.
- children.push_back(&(old->Child(0)));
+template<typename MetricType, typename RootPointPolicy, typename StatisticType>
+CoverTree<MetricType, RootPointPolicy, StatisticType>::CoverTree(
+ const arma::mat& dataset,
+ MetricType& metric,
+ const double base) :
+ dataset(dataset),
+ point(RootPointPolicy::ChooseRoot(dataset)),
+ scale(INT_MAX),
+ base(base),
+ parent(NULL),
+ parentDistance(0),
+ furthestDescendantDistance(0),
+ localMetric(false),
+ metric(&metric)
+{
+ // If there is only one point in the dataset, uh, we're done.
+ if (dataset.n_cols == 1)
+ return;
- // Set its parent correctly.
- old->Child(0).Parent() = this;
+ // Kick off the building. Create the indices array and the distances array.
+ arma::Col<size_t> indices = arma::linspace<arma::Col<size_t> >(1,
+ dataset.n_cols - 1, dataset.n_cols - 1);
+ // This is now [1 2 3 4 ... n]. We must be sure that our point does not
+ // occur.
+ if (point != 0)
+ indices[point - 1] = 0; // Put 0 back into the set; remove what was there.
- // Remove its child (so it doesn't delete it).
- old->Children().erase(old->Children().begin() + old->Children().size() - 1);
+ arma::vec distances(dataset.n_cols - 1);
- // Now delete it.
- delete old;
- }
+ // Build the initial distances.
+ ComputeDistances(point, indices, distances, dataset.n_cols - 1);
- size_t nearSetSize = (dataset.n_cols - 1) - usedSetSize;
+ // Create the children.
+ size_t farSetSize = 0;
+ size_t usedSetSize = 0;
+ CreateChildren(indices, distances, dataset.n_cols - 1, farSetSize,
+ usedSetSize);
- // We have no far set, so the array is organized thusly:
- // [ near | used ]. No resorting is necessary.
- // Therefore, go ahead and build the children.
- while (nearSetSize > 0)
- {
- // We want to select the furthest point in the near set as the next child.
- size_t newPointIndex = nearSetSize - 1;
+ // Use the furthest descendant distance to determine the scale of the root
+ // node.
+ scale = (int) ceil(log(furthestDescendantDistance) / log(base));
- // Swap to front if necessary.
- if (newPointIndex != 0)
- {
- const size_t tempIndex = indices[newPointIndex];
- const double tempDist = distances[newPointIndex];
-
- indices[newPointIndex] = indices[0];
- distances[newPointIndex] = distances[0];
-
- indices[0] = tempIndex;
- distances[0] = tempDist;
- }
-
- if (distances[0] > furthestDescendantDistance)
- furthestDescendantDistance = distances[0];
-
- size_t childUsedSetSize = 0;
-
- // If there's only one point left, we don't need this crap.
- if (nearSetSize == 1)
- {
- size_t childNearSetSize = 0;
- size_t childFarSetSize = 0;
- children.push_back(new CoverTree(dataset, base,
- indices[0], scale - 1, this, distances[0], indices, distances,
- childNearSetSize, childFarSetSize, usedSetSize, *metric));
-
- // And we're done.
- break;
- }
-
- // Create the near and far set indices and distance vectors.
- arma::Col<size_t> childIndices(nearSetSize);
- childIndices.rows(0, (nearSetSize - 2)) = indices.rows(1, nearSetSize - 1);
- // Put the current point into the used set, so when we move our indices to
- // the used set, this will be done for us.
- childIndices(nearSetSize - 1) = indices[0];
- arma::vec childDistances(nearSetSize);
-
- // Build distances for the child.
- ComputeDistances(indices[0], childIndices, childDistances,
- nearSetSize - 1);
- childDistances(nearSetSize - 1) = 0;
-
- // Split into near and far sets for this point.
- childNearSetSize = SplitNearFar(childIndices, childDistances, bound,
- nearSetSize - 1);
-
- // Build this child (recursively).
- childUsedSetSize = 1; // Mark self point as used.
- childFarSetSize = ((nearSetSize - 1) - childNearSetSize);
- children.push_back(new CoverTree(dataset, base, indices[0],
- scale - 1, this, distances[0], childIndices, childDistances,
- childNearSetSize, childFarSetSize, childUsedSetSize, *metric));
-
- // If we created an implicit node, take its self-child instead (this could
- // happen multiple times).
- while (children[children.size() - 1]->NumChildren() == 1)
- {
- CoverTree* old = children[children.size() - 1];
- children.erase(children.begin() + children.size() - 1);
-
- // Now take its child.
- children.push_back(&(old->Child(0)));
-
- // Set its parent correctly.
- old->Child(0).Parent() = this;
-
- // Remove its child (so it doesn't delete it).
- old->Children().erase(old->Children().begin() + old->Children().size()
- - 1);
-
- // Now delete it.
- delete old;
- }
-
- // Now with the child created, it returns the childIndices and
- // childDistances vectors in this form:
- // [ childFar | childUsed ]
- // For each point in the childUsed set, we must move that point to the used
- // set in our own vector.
- size_t farSetSize = 0;
- MoveToUsedSet(indices, distances, nearSetSize, farSetSize, usedSetSize,
- childIndices, childFarSetSize, childUsedSetSize);
- }
-
- // Calculate furthest descendant.
- for (size_t i = 0; i < usedSetSize; ++i)
- if (distances[i] > furthestDescendantDistance)
- furthestDescendantDistance = distances[i];
-
- Log::Assert(furthestDescendantDistance <= pow(base, scale + 1));
-
// Initialize statistic.
stat = StatisticType(*this);
}
@@ -225,10 +145,26 @@
return;
}
+ // Otherwise, create the children.
+ CreateChildren(indices, distances, nearSetSize, farSetSize, usedSetSize);
+
+ // Initialize statistic.
+ stat = StatisticType(*this);
+}
+
+template<typename MetricType, typename RootPointPolicy, typename StatisticType>
+inline void
+CoverTree<MetricType, RootPointPolicy, StatisticType>::CreateChildren(
+ arma::Col<size_t>& indices,
+ arma::vec& distances,
+ size_t nearSetSize,
+ size_t& farSetSize,
+ size_t& usedSetSize)
+{
// Determine the next scale level. This should be the first level where there
// are any points in the far set. So, if we know the maximum distance in the
// distances array, this will be the largest i such that
- // maxDistance > pow(ec, i)
+ // maxDistance > pow(base, i)
// and using this for the scale factor should guarantee we are not creating an
// implicit node. If the maximum distance is 0, every point in the near set
// will be created as a leaf, and a child to this node. We also do not need
@@ -240,9 +176,8 @@
// Make the self child at the lowest possible level.
// This should not modify farSetSize or usedSetSize.
size_t tempSize = 0;
- children.push_back(new CoverTree(dataset, base, pointIndex,
- INT_MIN, this, 0, indices, distances, 0, tempSize, usedSetSize,
- metric));
+ children.push_back(new CoverTree(dataset, base, point, INT_MIN, this, 0,
+ indices, distances, 0, tempSize, usedSetSize, *metric));
// Every point in the near set should be a leaf.
for (size_t i = 0; i < nearSetSize; ++i)
@@ -250,7 +185,7 @@
// farSetSize and usedSetSize will not be modified.
children.push_back(new CoverTree(dataset, base, indices[i],
INT_MIN, this, distances[i], indices, distances, 0, tempSize,
- usedSetSize, metric));
+ usedSetSize, *metric));
usedSetSize++;
}
@@ -270,9 +205,6 @@
(int) ceil(log(maxDistance) / log(base))) - 1;
const double bound = pow(base, nextScale);
- // This needs to be taken out. It's a sanity check for now.
- Log::Assert(nextScale < scale);
-
// First, make the self child. We must split the given near set into the near
// set and far set for the self child.
size_t childNearSetSize =
@@ -281,9 +213,9 @@
// Build the self child (recursively).
size_t childFarSetSize = nearSetSize - childNearSetSize;
size_t childUsedSetSize = 0;
- children.push_back(new CoverTree(dataset, base, pointIndex,
- nextScale, this, 0, indices, distances, childNearSetSize, childFarSetSize,
- childUsedSetSize, metric));
+ children.push_back(new CoverTree(dataset, base, point, nextScale, this, 0,
+ indices, distances, childNearSetSize, childFarSetSize, childUsedSetSize,
+ *metric));
// The self-child can't modify the furthestChildDistance away from 0, but it
// can modify the furthestDescendantDistance.
@@ -352,9 +284,9 @@
if ((nearSetSize == 1) && (farSetSize == 0))
{
size_t childNearSetSize = 0;
- children.push_back(new CoverTree(dataset, base,
- indices[0], nextScale, this, distances[0], indices, distances,
- childNearSetSize, farSetSize, usedSetSize, metric));
+ children.push_back(new CoverTree(dataset, base, indices[0], nextScale,
+ this, distances[0], indices, distances, childNearSetSize, farSetSize,
+ usedSetSize, *metric));
// Because the far set size is 0, we don't have to do any swapping to
// move the point into the used set.
@@ -392,9 +324,9 @@
// Build this child (recursively).
childUsedSetSize = 1; // Mark self point as used.
- children.push_back(new CoverTree(dataset, base, indices[0],
- nextScale, this, distances[0], childIndices, childDistances,
- childNearSetSize, childFarSetSize, childUsedSetSize, metric));
+ children.push_back(new CoverTree(dataset, base, indices[0], nextScale,
+ this, distances[0], childIndices, childDistances, childNearSetSize,
+ childFarSetSize, childUsedSetSize, *metric));
// If we created an implicit node, take its self-child instead (this could
// happen multiple times).
@@ -431,11 +363,6 @@
usedSetSize); ++i)
if (distances[i] > furthestDescendantDistance)
furthestDescendantDistance = distances[i];
-
- Log::Assert(furthestDescendantDistance <= pow(base, scale + 1));
-
- // Lastly, initialize the statistic.
- stat = StatisticType(*this);
}
// Manually create a cover tree node.
More information about the mlpack-svn
mailing list