[mlpack-git] master: A start at implementing pointwise Hamerly bounds. (cf56ad4)
gitdub at big.cc.gt.atl.ga.us
gitdub at big.cc.gt.atl.ga.us
Thu Mar 12 16:03:07 EDT 2015
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/eddd7167d69b6c88b271ef2e51d1c20e13f1acd8...70342dd8e5c17e0c164cfb8189748671e9c0dd44
>---------------------------------------------------------------
commit cf56ad4a2a051ca0abeadfe59cc139a4d62bf057
Author: Ryan Curtin <ryan at ratml.org>
Date: Thu Jan 29 17:32:07 2015 -0500
A start at implementing pointwise Hamerly bounds.
>---------------------------------------------------------------
cf56ad4a2a051ca0abeadfe59cc139a4d62bf057
src/mlpack/methods/kmeans/dual_tree_kmeans.hpp | 1 +
src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp | 2 ++
src/mlpack/methods/kmeans/dual_tree_kmeans_rules.hpp | 2 ++
src/mlpack/methods/kmeans/dual_tree_kmeans_rules_impl.hpp | 11 +++++++----
4 files changed, 12 insertions(+), 4 deletions(-)
diff --git a/src/mlpack/methods/kmeans/dual_tree_kmeans.hpp b/src/mlpack/methods/kmeans/dual_tree_kmeans.hpp
index 68714cd..ab036fa 100644
--- a/src/mlpack/methods/kmeans/dual_tree_kmeans.hpp
+++ b/src/mlpack/methods/kmeans/dual_tree_kmeans.hpp
@@ -52,6 +52,7 @@ class DualTreeKMeans
arma::vec distances;
arma::Col<size_t> visited;
arma::Col<size_t> distanceIteration;
+ arma::vec hamerlyBounds;
//! The current iteration.
size_t iteration;
diff --git a/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp b/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp
index 9d31609..8887acb 100644
--- a/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp
+++ b/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp
@@ -32,6 +32,8 @@ DualTreeKMeans<MetricType, MatType, TreeType>::DualTreeKMeans(
assignments.zeros(dataset.n_cols);
visited.zeros(dataset.n_cols);
distanceIteration.zeros(dataset.n_cols);
+ hamerlyBounds.set_size(dataset.n_cols);
+ hamerlyBounds.fill(DBL_MAX);
Timer::Start("tree_building");
diff --git a/src/mlpack/methods/kmeans/dual_tree_kmeans_rules.hpp b/src/mlpack/methods/kmeans/dual_tree_kmeans_rules.hpp
index 43d88d8..4c960be 100644
--- a/src/mlpack/methods/kmeans/dual_tree_kmeans_rules.hpp
+++ b/src/mlpack/methods/kmeans/dual_tree_kmeans_rules.hpp
@@ -25,6 +25,7 @@ class DualTreeKMeansRules
arma::Col<size_t>& assignments,
arma::Col<size_t>& visited,
arma::Col<size_t>& distanceIteration,
+ arma::vec& hamerlyBounds,
const arma::mat& interclusterDistances,
MetricType& metric);
@@ -62,6 +63,7 @@ class DualTreeKMeansRules
arma::Col<size_t>& assignments;
arma::Col<size_t>& visited;
arma::Col<size_t>& distanceIteration;
+ arma::vec& hamerlyBounds;
const arma::mat& interclusterDistances;
MetricType& metric;
diff --git a/src/mlpack/methods/kmeans/dual_tree_kmeans_rules_impl.hpp b/src/mlpack/methods/kmeans/dual_tree_kmeans_rules_impl.hpp
index 04b82ae..21d526d 100644
--- a/src/mlpack/methods/kmeans/dual_tree_kmeans_rules_impl.hpp
+++ b/src/mlpack/methods/kmeans/dual_tree_kmeans_rules_impl.hpp
@@ -26,6 +26,7 @@ DualTreeKMeansRules<MetricType, TreeType>::DualTreeKMeansRules(
arma::Col<size_t>& assignments,
arma::Col<size_t>& visited,
arma::Col<size_t>& distanceIteration,
+ arma::vec& hamerlyBounds,
const arma::mat& interclusterDistances,
MetricType& metric) :
dataset(dataset),
@@ -39,6 +40,7 @@ DualTreeKMeansRules<MetricType, TreeType>::DualTreeKMeansRules(
assignments(assignments),
visited(visited),
distanceIteration(distanceIteration),
+ hamerlyBounds(hamerlyBounds),
interclusterDistances(interclusterDistances),
metric(metric),
distanceCalculations(0)
@@ -56,10 +58,6 @@ inline force_inline double DualTreeKMeansRules<MetricType, TreeType>::BaseCase(
// It's possible that the reference node has been pruned before we got to the
// base case. In that case, don't do the base case, and just return.
-// if (referenceIndex == 37447)
-// Log::Warn << "Visit " << referenceIndex << ", q" << queryIndex << ". " <<
-//traversalInfo.LastReferenceNode()->Stat().ClustersPruned() +
-//visited[referenceIndex] << ".\n";
if (traversalInfo.LastReferenceNode()->Stat().ClustersPruned() +
visited[referenceIndex] == centroids.n_cols)
return 0.0;
@@ -75,12 +73,17 @@ inline force_inline double DualTreeKMeansRules<MetricType, TreeType>::BaseCase(
distanceIteration[referenceIndex] = iteration;
distances[referenceIndex] = distance;
assignments[referenceIndex] = mappings[queryIndex];
+ hamerlyBounds[referenceIndex] = DBL_MAX; // Not sure about this one.
}
else if (distance < distances[referenceIndex])
{
distances[referenceIndex] = distance;
assignments[referenceIndex] = mappings[queryIndex];
}
+ else if (distance < hamerlyBounds[referenceIndex])
+ {
+ hamerlyBounds[referenceIndex] = distance; // Not yet done.
+ }
++visited[referenceIndex];
More information about the mlpack-git
mailing list