[mlpack-git] master: Set FirstBound() after the allknn. Now, checking if FirstBound() == DBL_MAX is unnecessary, although this doesn't (yet) seem to give any additional prunes. (ee7eb43)
gitdub at big.cc.gt.atl.ga.us
gitdub at big.cc.gt.atl.ga.us
Thu Mar 12 16:02:59 EDT 2015
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/eddd7167d69b6c88b271ef2e51d1c20e13f1acd8...70342dd8e5c17e0c164cfb8189748671e9c0dd44
>---------------------------------------------------------------
commit ee7eb43f0197e50ed6aab9720daaff0b61f9807d
Author: Ryan Curtin <ryan at ratml.org>
Date: Tue Jan 13 17:35:03 2015 -0500
Set FirstBound() after the allknn. Now, checking if FirstBound() == DBL_MAX is unnecessary, although this doesn't (yet) seem to give any additional prunes.
>---------------------------------------------------------------
ee7eb43f0197e50ed6aab9720daaff0b61f9807d
src/mlpack/methods/kmeans/dual_tree_kmeans.hpp | 2 ++
.../methods/kmeans/dual_tree_kmeans_impl.hpp | 35 +++++-----------------
.../methods/kmeans/dual_tree_kmeans_rules_impl.hpp | 4 ---
3 files changed, 10 insertions(+), 31 deletions(-)
diff --git a/src/mlpack/methods/kmeans/dual_tree_kmeans.hpp b/src/mlpack/methods/kmeans/dual_tree_kmeans.hpp
index c1e6070..ebeb0ed 100644
--- a/src/mlpack/methods/kmeans/dual_tree_kmeans.hpp
+++ b/src/mlpack/methods/kmeans/dual_tree_kmeans.hpp
@@ -58,6 +58,8 @@ class DualTreeKMeans
//! Track distance calculations.
size_t distanceCalculations;
+ void ClusterTreeUpdate(TreeType* node);
+
void TreeUpdate(TreeType* node,
const size_t clusters,
const arma::vec& clusterDistances);
diff --git a/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp b/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp
index 0065a61..88c4e96 100644
--- a/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp
+++ b/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp
@@ -80,6 +80,9 @@ double DualTreeKMeans<MetricType, MatType, TreeType>::Iterate(
distanceCalculations += nns.BaseCases();
distanceCalculations += nns.Scores();
+ // Update FirstBound().
+ ClusterTreeUpdate(centroidTree);
+
// Now run the dual-tree algorithm.
typedef DualTreeKMeansRules<MetricType, TreeType> RulesType;
RulesType rules(dataset, centroids, newCentroids, counts, oldFromNewCentroids,
@@ -128,43 +131,21 @@ double DualTreeKMeans<MetricType, MatType, TreeType>::Iterate(
return std::sqrt(residual);
}
-/*
template<typename MetricType, typename MatType, typename TreeType>
void DualTreeKMeans<MetricType, MatType, TreeType>::ClusterTreeUpdate(
TreeType* node)
{
- // We will abuse stat.owner to hold the cluster with the most change.
- // stat.minQueryNodeDistance will hold the distance.
- double maxChange = 0.0;
- size_t maxChangeCluster = 0;
-
+ // Just update the first bound, after recursing to the bottom.
+ double firstBound = 0.0;
for (size_t i = 0; i < node->NumChildren(); ++i)
{
ClusterTreeUpdate(&node->Child(i));
-
- const double nodeChange = node->Child(i).Stat().MinQueryNodeDistance();
- if (nodeChange > maxChange)
- {
- maxChange = nodeChange;
- maxChangeCluster = node->Child(i).Stat().Owner();
- }
+ if (node->Child(i).Stat().FirstBound() >= firstBound)
+ firstBound = node->Child(i).Stat().FirstBound();
}
- for (size_t i = 0; i < node->NumPoints(); ++i)
- {
- const size_t cluster = oldFromNewCentroids[node->Point(i)];
- const double pointChange = clusterDistances[cluster];
- if (pointChange > maxChange)
- {
- maxChange = pointChange;
- maxChangeCluster = cluster;
- }
- }
-
- node->Stat().Owner() = maxChangeCluster;
- node->Stat().MinQueryNodeDistance() = maxChange;
+ node->Stat().FirstBound() = firstBound;
}
-*/
template<typename MetricType, typename MatType, typename TreeType>
void DualTreeKMeans<MetricType, MatType, TreeType>::TreeUpdate(
diff --git a/src/mlpack/methods/kmeans/dual_tree_kmeans_rules_impl.hpp b/src/mlpack/methods/kmeans/dual_tree_kmeans_rules_impl.hpp
index 53382eb..d456c83 100644
--- a/src/mlpack/methods/kmeans/dual_tree_kmeans_rules_impl.hpp
+++ b/src/mlpack/methods/kmeans/dual_tree_kmeans_rules_impl.hpp
@@ -215,10 +215,6 @@ double DualTreeKMeansRules<MetricType, TreeType>::ElkanTypeScore(
if (queryNode.NumDescendants() > 1)
{
const double minQueryDistance = queryNode.Stat().FirstBound();
- if (minQueryDistance == DBL_MAX)
- return 0.0;
- else
- Log::Warn << "Not DBL_MAX!\n";
const double score = ElkanTypeScore(queryNode, referenceNode,
minQueryDistance);
return score;
More information about the mlpack-git
mailing list