[mlpack-git] master: Replace SortStruct by std::pair (640f3a7)
gitdub at mlpack.org
gitdub at mlpack.org
Tue Jul 12 08:45:38 EDT 2016
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/e434aee1eab9e3ced97cccd840dd383ef8089acf...81c14d9ec8250f8a2caaf1ac06c4be12fc5fd31f
>---------------------------------------------------------------
commit 640f3a7859ea8d400bc6fed6b4395c2291ba0259
Author: Mikhail Lozhnikov <lozhnikovma at gmail.com>
Date: Tue Jul 12 15:45:38 2016 +0300
Replace SortStruct by std::pair
>---------------------------------------------------------------
640f3a7859ea8d400bc6fed6b4395c2291ba0259
.../core/tree/rectangle_tree/r_star_tree_split.hpp | 20 --
.../tree/rectangle_tree/r_star_tree_split_impl.hpp | 266 ++++++++-----------
.../core/tree/rectangle_tree/x_tree_split.hpp | 21 --
.../core/tree/rectangle_tree/x_tree_split_impl.hpp | 295 +++++++++------------
4 files changed, 238 insertions(+), 364 deletions(-)
diff --git a/src/mlpack/core/tree/rectangle_tree/r_star_tree_split.hpp b/src/mlpack/core/tree/rectangle_tree/r_star_tree_split.hpp
index b87389f..99bef1e 100644
--- a/src/mlpack/core/tree/rectangle_tree/r_star_tree_split.hpp
+++ b/src/mlpack/core/tree/rectangle_tree/r_star_tree_split.hpp
@@ -38,26 +38,6 @@ class RStarTreeSplit
private:
/**
- * Class to allow for faster sorting.
- */
- template<typename ElemType>
- struct SortStruct
- {
- ElemType d;
- int n;
- };
-
- /**
- * Comparator for sorting with SortStruct.
- */
- template<typename ElemType>
- static bool StructComp(const SortStruct<ElemType>& s1,
- const SortStruct<ElemType>& s2)
- {
- return s1.d < s2.d;
- }
-
- /**
* Insert a node into another node.
*/
template <typename TreeType>
diff --git a/src/mlpack/core/tree/rectangle_tree/r_star_tree_split_impl.hpp b/src/mlpack/core/tree/rectangle_tree/r_star_tree_split_impl.hpp
index 795170d..77bedba 100644
--- a/src/mlpack/core/tree/rectangle_tree/r_star_tree_split_impl.hpp
+++ b/src/mlpack/core/tree/rectangle_tree/r_star_tree_split_impl.hpp
@@ -26,6 +26,7 @@ void RStarTreeSplit::SplitLeafNode(TreeType *tree,std::vector<bool>& relevels)
{
// Convenience typedef.
typedef typename TreeType::ElemType ElemType;
+ typedef bound::HRectBound<metric::EuclideanDistance, ElemType> BoundType;
if (tree->Count() <= tree->MaxLeafSize())
return;
@@ -65,25 +66,30 @@ void RStarTreeSplit::SplitLeafNode(TreeType *tree,std::vector<bool>& relevels)
return;
}
- std::vector<SortStruct<ElemType>> sorted(tree->Count());
+ std::vector<std::pair<ElemType, size_t>> sorted(tree->Count());
arma::Col<ElemType> center;
tree->Bound().Center(center); // Modifies centroid.
for (size_t i = 0; i < sorted.size(); i++)
{
- sorted[i].d = tree->Metric().Evaluate(center,
+ sorted[i].first = tree->Metric().Evaluate(center,
tree->Dataset().col(tree->Point(i)));
- sorted[i].n = i;
+ sorted[i].second = i;
}
- std::sort(sorted.begin(), sorted.end(), StructComp<ElemType>);
+ std::sort(sorted.begin(), sorted.end(),
+ [] (const std::pair<ElemType, size_t>& p1,
+ const std::pair<ElemType, size_t>& p2)
+ {
+ return p1.first < p2.first;
+ });
std::vector<size_t> pointIndices(p);
for (size_t i = 0; i < p; i++)
{
// We start from the end of sorted.
- pointIndices[i] = tree->Point(sorted[sorted.size() - 1 - i].n);
+ pointIndices[i] = tree->Point(sorted[sorted.size() - 1 - i].second);
- root->DeletePoint(tree->Point(sorted[sorted.size() - 1 - i].n),
+ root->DeletePoint(tree->Point(sorted[sorted.size() - 1 - i].second),
relevels);
}
@@ -106,14 +112,19 @@ void RStarTreeSplit::SplitLeafNode(TreeType *tree,std::vector<bool>& relevels)
{
ElemType axisScore = 0.0;
// Since we only have points in the leaf nodes, we only need to sort once.
- std::vector<SortStruct<ElemType>> sorted(tree->Count());
+ std::vector<std::pair<ElemType, size_t>> sorted(tree->Count());
for (size_t i = 0; i < sorted.size(); i++)
{
- sorted[i].d = tree->Dataset().col(tree->Point(i))[j];
- sorted[i].n = i;
+ sorted[i].first = tree->Dataset().col(tree->Point(i))[j];
+ sorted[i].second = i;
}
- std::sort(sorted.begin(), sorted.end(), StructComp<ElemType>);
+ std::sort(sorted.begin(), sorted.end(),
+ [] (const std::pair<ElemType, size_t>& p1,
+ const std::pair<ElemType, size_t>& p2)
+ {
+ return p1.first < p2.first;
+ });
// We'll store each of the three scores for each distribution.
std::vector<ElemType> areas(tree->MaxLeafSize() - 2 * tree->MinLeafSize() +
@@ -138,46 +149,21 @@ void RStarTreeSplit::SplitLeafNode(TreeType *tree,std::vector<bool>& relevels)
size_t cutOff = tree->MinLeafSize() + i;
- // We'll calculate the max and min in each dimension by hand to save time.
- std::vector<ElemType> maxG1(tree->Bound().Dim());
- std::vector<ElemType> minG1(maxG1.size());
- std::vector<ElemType> maxG2(maxG1.size());
- std::vector<ElemType> minG2(maxG1.size());
- for (size_t k = 0; k < tree->Bound().Dim(); k++)
- {
- minG1[k] = maxG1[k] = tree->Dataset().col(tree->Point(sorted[0].n))[k];
- minG2[k] = maxG2[k] =
- tree->Dataset().col(tree->Point(sorted[sorted.size() - 1].n))[k];
+ BoundType bound1(tree->Bound().Dim());
+ BoundType bound2(tree->Bound().Dim());
- for (size_t l = 1; l < tree->Count() - 1; l++)
- {
- if (l < cutOff)
- {
- if (tree->Dataset().col(tree->Point(sorted[l].n))[k] < minG1[k])
- minG1[k] = tree->Dataset().col(tree->Point(sorted[l].n))[k];
- else if (tree->Dataset().col(tree->Point(sorted[l].n))[k] > maxG1[k])
- maxG1[k] = tree->Dataset().col(tree->Point(sorted[l].n))[k];
- }
- else
- {
- if (tree->Dataset().col(tree->Point(sorted[l].n))[k] < minG2[k])
- minG2[k] = tree->Dataset().col(tree->Point(sorted[l].n))[k];
- else if (tree->Dataset().col(tree->Point(sorted[l].n))[k] > maxG2[k])
- maxG2[k] = tree->Dataset().col(tree->Point(sorted[l].n))[k];
- }
- }
- }
+ for (size_t l = 0; l < cutOff; l++)
+ bound1 |= tree->Dataset().col(tree->Point(sorted[l].second));
- ElemType area1 = 1.0, area2 = 1.0;
- ElemType oArea = 1.0;
- for (size_t k = 0; k < maxG1.size(); k++)
- {
- margins[i] += maxG1[k] - minG1[k] + maxG2[k] - minG2[k];
- area1 *= maxG1[k] - minG1[k];
- area2 *= maxG2[k] - minG2[k];
- oArea *= (maxG1[k] < minG2[k] || maxG2[k] < minG1[k]) ? 0.0 :
- (std::min(maxG1[k], maxG2[k]) - std::max(minG1[k], minG2[k]));
- }
+ for (size_t l = cutOff; l < tree->Count(); l++)
+ bound2 |= tree->Dataset().col(tree->Point(sorted[l].second));
+
+ ElemType area1 = bound1.Volume();
+ ElemType area2 = bound2.Volume();
+ ElemType oArea = bound1.Overlap(bound2);
+
+ for (size_t k = 0; k < bound1.Dim(); k++)
+ margins[i] += bound1[k].Width() + bound2[k].Width();
areas[i] += area1 + area2;
overlapedAreas[i] += oArea;
@@ -210,14 +196,19 @@ void RStarTreeSplit::SplitLeafNode(TreeType *tree,std::vector<bool>& relevels)
}
}
- std::vector<SortStruct<ElemType>> sorted(tree->Count());
+ std::vector<std::pair<ElemType, size_t>> sorted(tree->Count());
for (size_t i = 0; i < sorted.size(); i++)
{
- sorted[i].d = tree->Dataset().col(tree->Point(i))[bestAxis];
- sorted[i].n = i;
+ sorted[i].first = tree->Dataset().col(tree->Point(i))[bestAxis];
+ sorted[i].second = i;
}
- std::sort(sorted.begin(), sorted.end(), StructComp<ElemType>);
+ std::sort(sorted.begin(), sorted.end(),
+ [] (const std::pair<ElemType, size_t>& p1,
+ const std::pair<ElemType, size_t>& p2)
+ {
+ return p1.first < p2.first;
+ });
TreeType* treeOne = new TreeType(tree->Parent());
TreeType* treeTwo = new TreeType(tree->Parent());
@@ -227,9 +218,9 @@ void RStarTreeSplit::SplitLeafNode(TreeType *tree,std::vector<bool>& relevels)
for (size_t i = 0; i < tree->Count(); i++)
{
if (i < bestAreaIndexOnBestAxis + tree->MinLeafSize())
- treeOne->InsertPoint(tree->Point(sorted[i].n));
+ treeOne->InsertPoint(tree->Point(sorted[i].second));
else
- treeTwo->InsertPoint(tree->Point(sorted[i].n));
+ treeTwo->InsertPoint(tree->Point(sorted[i].second));
}
}
else
@@ -237,9 +228,9 @@ void RStarTreeSplit::SplitLeafNode(TreeType *tree,std::vector<bool>& relevels)
for (size_t i = 0; i < tree->Count(); i++)
{
if (i < bestOverlapIndexOnBestAxis + tree->MinLeafSize())
- treeOne->InsertPoint(tree->Point(sorted[i].n));
+ treeOne->InsertPoint(tree->Point(sorted[i].second));
else
- treeTwo->InsertPoint(tree->Point(sorted[i].n));
+ treeTwo->InsertPoint(tree->Point(sorted[i].second));
}
}
@@ -278,6 +269,7 @@ bool RStarTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels
{
// Convenience typedef.
typedef typename TreeType::ElemType ElemType;
+ typedef bound::HRectBound<metric::EuclideanDistance, ElemType> BoundType;
// If we are splitting the root node, we need will do things differently so
// that the constructor and other methods don't confuse the end user by giving
@@ -369,14 +361,19 @@ bool RStarTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels
ElemType axisScore = 0.0;
// We'll do Bound().Lo() now and use Bound().Hi() later.
- std::vector<SortStruct<ElemType>> sorted(tree->NumChildren());
+ std::vector<std::pair<ElemType, size_t>> sorted(tree->NumChildren());
for (size_t i = 0; i < sorted.size(); i++)
{
- sorted[i].d = tree->Child(i).Bound()[j].Lo();
- sorted[i].n = i;
+ sorted[i].first = tree->Child(i).Bound()[j].Lo();
+ sorted[i].second = i;
}
- std::sort(sorted.begin(), sorted.end(), StructComp<ElemType>);
+ std::sort(sorted.begin(), sorted.end(),
+ [] (const std::pair<ElemType, size_t>& p1,
+ const std::pair<ElemType, size_t>& p2)
+ {
+ return p1.first < p2.first;
+ });
// We'll store each of the three scores for each distribution.
std::vector<ElemType> areas(tree->MaxNumChildren() -
@@ -401,49 +398,21 @@ bool RStarTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels
size_t cutOff = tree->MinNumChildren() + i;
- // We'll calculate the max and min in each dimension by hand to save time.
- std::vector<ElemType> maxG1(tree->Bound().Dim());
- std::vector<ElemType> minG1(maxG1.size());
- std::vector<ElemType> maxG2(maxG1.size());
- std::vector<ElemType> minG2(maxG1.size());
- for (size_t k = 0; k < tree->Bound().Dim(); k++)
- {
- minG1[k] = tree->Child(sorted[0].n).Bound()[k].Lo();
- maxG1[k] = tree->Child(sorted[0].n).Bound()[k].Hi();
- minG2[k] =
- tree->Child(sorted[sorted.size() - 1].n).Bound()[k].Lo();
- maxG2[k] =
- tree->Child(sorted[sorted.size() - 1].n).Bound()[k].Hi();
-
- for (size_t l = 1; l < tree->NumChildren() - 1; l++)
- {
- if (l < cutOff)
- {
- if (tree->Child(sorted[l].n).Bound()[k].Lo() < minG1[k])
- minG1[k] = tree->Child(sorted[l].n).Bound()[k].Lo();
- else if (tree->Child(sorted[l].n).Bound()[k].Hi() > maxG1[k])
- maxG1[k] = tree->Child(sorted[l].n).Bound()[k].Hi();
- }
- else
- {
- if (tree->Child(sorted[l].n).Bound()[k].Lo() < minG2[k])
- minG2[k] = tree->Child(sorted[l].n).Bound()[k].Lo();
- else if (tree->Child(sorted[l].n).Bound()[k].Hi() > maxG2[k])
- maxG2[k] = tree->Child(sorted[l].n).Bound()[k].Hi();
- }
- }
- }
+ BoundType bound1(tree->Bound().Dim());
+ BoundType bound2(tree->Bound().Dim());
- ElemType area1 = 1.0, area2 = 1.0;
- ElemType oArea = 1.0;
- for (size_t k = 0; k < maxG1.size(); k++)
- {
- margins[i] += maxG1[k] - minG1[k] + maxG2[k] - minG2[k];
- area1 *= maxG1[k] - minG1[k];
- area2 *= maxG2[k] - minG2[k];
- oArea *= (maxG1[k] < minG2[k] || maxG2[k] < minG1[k]) ? 0.0 :
- (std::min(maxG1[k], maxG2[k]) - std::max(minG1[k], minG2[k]));
- }
+ for (size_t l = 0; l < cutOff; l++)
+ bound1 |= tree->Child(sorted[l].second).Bound();
+
+ for (size_t l = cutOff; l < tree->NumChildren(); l++)
+ bound2 |= tree->Child(sorted[l].second).Bound();
+
+ ElemType area1 = bound1.Volume();
+ ElemType area2 = bound2.Volume();
+ ElemType oArea = bound1.Overlap(bound2);
+
+ for (size_t k = 0; k < bound1.Dim(); k++)
+ margins[i] += bound1[k].Width() + bound2[k].Width();
areas[i] += area1 + area2;
overlapedAreas[i] += oArea;
@@ -480,15 +449,19 @@ bool RStarTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels
{
ElemType axisScore = 0.0;
- // We'll do Bound().Lo() now and use Bound().Hi() later.
- std::vector<SortStruct<ElemType>> sorted(tree->NumChildren());
+ std::vector<std::pair<ElemType, size_t>> sorted(tree->NumChildren());
for (size_t i = 0; i < sorted.size(); i++)
{
- sorted[i].d = tree->Child(i).Bound()[j].Hi();
- sorted[i].n = i;
+ sorted[i].first = tree->Child(i).Bound()[j].Hi();
+ sorted[i].second = i;
}
- std::sort(sorted.begin(), sorted.end(), StructComp<ElemType>);
+ std::sort(sorted.begin(), sorted.end(),
+ [] (const std::pair<ElemType, size_t>& p1,
+ const std::pair<ElemType, size_t>& p2)
+ {
+ return p1.first < p2.first;
+ });
// We'll store each of the three scores for each distribution.
std::vector<ElemType> areas(tree->MaxNumChildren() -
@@ -513,51 +486,21 @@ bool RStarTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels
size_t cutOff = tree->MinNumChildren() + i;
- // We'll calculate the max and min in each dimension by hand to save time.
- std::vector<ElemType> maxG1(tree->Bound().Dim());
- std::vector<ElemType> minG1(maxG1.size());
- std::vector<ElemType> maxG2(maxG1.size());
- std::vector<ElemType> minG2(maxG1.size());
+ BoundType bound1(tree->Bound().Dim());
+ BoundType bound2(tree->Bound().Dim());
- for (size_t k = 0; k < tree->Bound().Dim(); k++)
- {
- minG1[k] = tree->Child(sorted[0].n).Bound()[k].Lo();
- maxG1[k] = tree->Child(sorted[0].n).Bound()[k].Hi();
- minG2[k] =
- tree->Child(sorted[sorted.size() - 1].n).Bound()[k].Lo();
- maxG2[k] =
- tree->Child(sorted[sorted.size() - 1].n).Bound()[k].Hi();
-
- for (size_t l = 1; l < tree->NumChildren() - 1; l++)
- {
- if (l < cutOff)
- {
- if (tree->Child(sorted[l].n).Bound()[k].Lo() < minG1[k])
- minG1[k] = tree->Child(sorted[l].n).Bound()[k].Lo();
- else if (tree->Child(sorted[l].n).Bound()[k].Hi() > maxG1[k])
- maxG1[k] = tree->Child(sorted[l].n).Bound()[k].Hi();
- }
- else
- {
- if (tree->Child(sorted[l].n).Bound()[k].Lo() < minG2[k])
- minG2[k] = tree->Child(sorted[l].n).Bound()[k].Lo();
- else if (tree->Child(sorted[l].n).Bound()[k].Hi() > maxG2[k])
- maxG2[k] = tree->Child(sorted[l].n).Bound()[k].Hi();
- }
- }
- }
+ for (size_t l = 0; l < cutOff; l++)
+ bound1 |= tree->Child(sorted[l].second).Bound();
- ElemType area1 = 1.0, area2 = 1.0;
- ElemType oArea = 1.0;
+ for (size_t l = cutOff; l < tree->NumChildren(); l++)
+ bound2 |= tree->Child(sorted[l].second).Bound();
- for (size_t k = 0; k < maxG1.size(); k++)
- {
- margins[i] += maxG1[k] - minG1[k] + maxG2[k] - minG2[k];
- area1 *= maxG1[k] - minG1[k];
- area2 *= maxG2[k] - minG2[k];
- oArea *= (maxG1[k] < minG2[k] || maxG2[k] < minG1[k]) ? 0.0 :
- (std::min(maxG1[k], maxG2[k]) - std::max(minG1[k], minG2[k]));
- }
+ ElemType area1 = bound1.Volume();
+ ElemType area2 = bound2.Volume();
+ ElemType oArea = bound1.Overlap(bound2);
+
+ for (size_t k = 0; k < bound1.Dim(); k++)
+ margins[i] += bound1[k].Width() + bound2[k].Width();
areas[i] += area1 + area2;
overlapedAreas[i] += oArea;
@@ -591,25 +534,30 @@ bool RStarTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels
}
}
- std::vector<SortStruct<ElemType>> sorted(tree->NumChildren());
+ std::vector<std::pair<ElemType, size_t>> sorted(tree->NumChildren());
if (lowIsBest)
{
for (size_t i = 0; i < sorted.size(); i++)
{
- sorted[i].d = tree->Child(i).Bound()[bestAxis].Lo();
- sorted[i].n = i;
+ sorted[i].first = tree->Child(i).Bound()[bestAxis].Lo();
+ sorted[i].second = i;
}
}
else
{
for (size_t i = 0; i < sorted.size(); i++)
{
- sorted[i].d = tree->Child(i).Bound()[bestAxis].Hi();
- sorted[i].n = i;
+ sorted[i].first = tree->Child(i).Bound()[bestAxis].Hi();
+ sorted[i].second = i;
}
}
- std::sort(sorted.begin(), sorted.end(), StructComp<ElemType>);
+ std::sort(sorted.begin(), sorted.end(),
+ [] (const std::pair<ElemType, size_t>& p1,
+ const std::pair<ElemType, size_t>& p2)
+ {
+ return p1.first < p2.first;
+ });
TreeType* treeOne = new TreeType(tree->Parent());
TreeType* treeTwo = new TreeType(tree->Parent());
@@ -619,9 +567,9 @@ bool RStarTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels
for (size_t i = 0; i < tree->NumChildren(); i++)
{
if (i < bestAreaIndexOnBestAxis + tree->MinNumChildren())
- InsertNodeIntoTree(treeOne, &(tree->Child(sorted[i].n)));
+ InsertNodeIntoTree(treeOne, &(tree->Child(sorted[i].second)));
else
- InsertNodeIntoTree(treeTwo, &(tree->Child(sorted[i].n)));
+ InsertNodeIntoTree(treeTwo, &(tree->Child(sorted[i].second)));
}
}
else
@@ -629,9 +577,9 @@ bool RStarTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels
for (size_t i = 0; i < tree->NumChildren(); i++)
{
if (i < bestOverlapIndexOnBestAxis + tree->MinNumChildren())
- InsertNodeIntoTree(treeOne, &(tree->Child(sorted[i].n)));
+ InsertNodeIntoTree(treeOne, &(tree->Child(sorted[i].second)));
else
- InsertNodeIntoTree(treeTwo, &(tree->Child(sorted[i].n)));
+ InsertNodeIntoTree(treeTwo, &(tree->Child(sorted[i].second)));
}
}
diff --git a/src/mlpack/core/tree/rectangle_tree/x_tree_split.hpp b/src/mlpack/core/tree/rectangle_tree/x_tree_split.hpp
index 24ece3c..efa6242 100644
--- a/src/mlpack/core/tree/rectangle_tree/x_tree_split.hpp
+++ b/src/mlpack/core/tree/rectangle_tree/x_tree_split.hpp
@@ -48,27 +48,6 @@ class XTreeSplit
private:
/**
- * Class to allow for faster sorting.
- */
- template<typename ElemType>
- class sortStruct
- {
- public:
- ElemType d;
- int n;
- };
-
- /**
- * Comparator for sorting with sortStruct.
- */
- template<typename ElemType>
- static bool structComp(const sortStruct<ElemType>& s1,
- const sortStruct<ElemType>& s2)
- {
- return s1.d < s2.d;
- }
-
- /**
* Insert a node into another node.
*/
template<typename TreeType>
diff --git a/src/mlpack/core/tree/rectangle_tree/x_tree_split_impl.hpp b/src/mlpack/core/tree/rectangle_tree/x_tree_split_impl.hpp
index 073d771..7b1f62d 100644
--- a/src/mlpack/core/tree/rectangle_tree/x_tree_split_impl.hpp
+++ b/src/mlpack/core/tree/rectangle_tree/x_tree_split_impl.hpp
@@ -25,6 +25,7 @@ void XTreeSplit::SplitLeafNode(TreeType *tree,std::vector<bool>& relevels)
{
// Convenience typedef.
typedef typename TreeType::ElemType ElemType;
+ typedef bound::HRectBound<metric::EuclideanDistance, ElemType> BoundType;
if (tree->Count() <= tree->MaxLeafSize())
return;
@@ -64,25 +65,30 @@ void XTreeSplit::SplitLeafNode(TreeType *tree,std::vector<bool>& relevels)
return;
}
- std::vector<sortStruct<ElemType>> sorted(tree->Count());
+ std::vector<std::pair<ElemType, size_t>> sorted(tree->Count());
arma::Col<ElemType> center;
tree->Bound().Center(center); // Modifies centroid.
for (size_t i = 0; i < sorted.size(); i++)
{
- sorted[i].d = tree->Metric().Evaluate(center,
+ sorted[i].first = tree->Metric().Evaluate(center,
tree->Dataset().col(tree->Point(i)));
- sorted[i].n = i;
+ sorted[i].second = i;
}
- std::sort(sorted.begin(), sorted.end(), structComp<ElemType>);
+ std::sort(sorted.begin(), sorted.end(),
+ [] (const std::pair<ElemType, size_t>& p1,
+ const std::pair<ElemType, size_t>& p2)
+ {
+ return p1.first < p2.first;
+ });
std::vector<size_t> pointIndices(p);
for (size_t i = 0; i < p; i++)
{
// We start from the end of sorted.
- pointIndices[i] = tree->Point(sorted[sorted.size() - 1 - i].n);
+ pointIndices[i] = tree->Point(sorted[sorted.size() - 1 - i].second);
- root->DeletePoint(tree->Point(sorted[sorted.size() - 1 - i].n),
+ root->DeletePoint(tree->Point(sorted[sorted.size() - 1 - i].second),
relevels);
}
@@ -116,13 +122,19 @@ void XTreeSplit::SplitLeafNode(TreeType *tree,std::vector<bool>& relevels)
{
ElemType axisScore = 0.0;
// Since we only have points in the leaf nodes, we only need to sort once.
- std::vector<sortStruct<ElemType>> sorted(tree->Count());
- for (size_t i = 0; i < sorted.size(); i++) {
- sorted[i].d = tree->Dataset().col(tree->Point(i))[j];
- sorted[i].n = i;
+ std::vector<std::pair<ElemType, size_t>> sorted(tree->Count());
+ for (size_t i = 0; i < sorted.size(); i++)
+ {
+ sorted[i].first = tree->Dataset().col(tree->Point(i))[j];
+ sorted[i].second = i;
}
- std::sort(sorted.begin(), sorted.end(), structComp<ElemType>);
+ std::sort(sorted.begin(), sorted.end(),
+ [] (const std::pair<ElemType, size_t>& p1,
+ const std::pair<ElemType, size_t>& p2)
+ {
+ return p1.first < p2.first;
+ });
// We'll store each of the three scores for each distribution.
std::vector<ElemType> areas(tree->MaxLeafSize() -
@@ -144,46 +156,23 @@ void XTreeSplit::SplitLeafNode(TreeType *tree,std::vector<bool>& relevels)
// another. Then we calculate the three scores for that distribution.
size_t cutOff = tree->MinLeafSize() + i;
- // We'll calculate the max and min in each dimension by hand to save time.
- std::vector<ElemType> maxG1(tree->Bound().Dim());
- std::vector<ElemType> minG1(maxG1.size());
- std::vector<ElemType> maxG2(maxG1.size());
- std::vector<ElemType> minG2(maxG1.size());
- for (size_t k = 0; k < tree->Bound().Dim(); k++)
- {
- minG1[k] = maxG1[k] = tree->Dataset().col(tree->Point(sorted[0].n))[k];
- minG2[k] = maxG2[k] = tree->Dataset().col(
- tree->Point(sorted[sorted.size() - 1].n))[k];
- for (size_t l = 1; l < tree->Count() - 1; l++)
- {
- if (l < cutOff)
- {
- if (tree->Dataset().col(tree->Point(sorted[l].n))[k] < minG1[k])
- minG1[k] = tree->Dataset().col(tree->Point(sorted[l].n))[k];
- else if (tree->Dataset().col(tree->Point(sorted[l].n))[k] > maxG1[k])
- maxG1[k] = tree->Dataset().col(tree->Point(sorted[l].n))[k];
- }
- else
- {
- if (tree->Dataset().col(tree->Point(sorted[l].n))[k] < minG2[k])
- minG2[k] = tree->Dataset().col(tree->Point(sorted[l].n))[k];
- else if (tree->Dataset().col(tree->Point(sorted[l].n))[k] > maxG2[k])
- maxG2[k] = tree->Dataset().col(tree->Point(sorted[l].n))[k];
- }
- }
- }
+ BoundType bound1(tree->Bound().Dim());
+ BoundType bound2(tree->Bound().Dim());
+
+ for (size_t l = 0; l < cutOff; l++)
+ bound1 |= tree->Dataset().col(tree->Point(sorted[l].second));
+
+ for (size_t l = cutOff; l < tree->Count(); l++)
+ bound2 |= tree->Dataset().col(tree->Point(sorted[l].second));
+
+ ElemType area1 = bound1.Volume();
+ ElemType area2 = bound2.Volume();
+ ElemType oArea = bound1.Overlap(bound2);
+
+ for (size_t k = 0; k < bound1.Dim(); k++)
+ margins[i] += bound1[k].Width() + bound2[k].Width();
- ElemType area1 = 1.0, area2 = 1.0;
- ElemType oArea = 1.0;
- for (size_t k = 0; k < maxG1.size(); k++)
- {
- margins[i] += maxG1[k] - minG1[k] + maxG2[k] - minG2[k];
- area1 *= maxG1[k] - minG1[k];
- area2 *= maxG2[k] - minG2[k];
- oArea *= ((maxG1[k] < minG2[k]) || (maxG2[k] < minG1[k])) ? 0.0 :
- std::min(maxG1[k], maxG2[k]) - std::max(minG1[k], minG2[k]);
- }
areas[i] += area1 + area2;
overlapedAreas[i] += oArea;
axisScore += margins[i];
@@ -214,14 +203,19 @@ void XTreeSplit::SplitLeafNode(TreeType *tree,std::vector<bool>& relevels)
}
}
- std::vector<sortStruct<ElemType>> sorted(tree->Count());
+ std::vector<std::pair<ElemType, size_t>> sorted(tree->Count());
for (size_t i = 0; i < sorted.size(); i++)
{
- sorted[i].d = tree->Dataset().col(tree->Point(i))[bestAxis];
- sorted[i].n = i;
+ sorted[i].first = tree->Dataset().col(tree->Point(i))[bestAxis];
+ sorted[i].second = i;
}
- std::sort(sorted.begin(), sorted.end(), structComp<ElemType>);
+ std::sort(sorted.begin(), sorted.end(),
+ [] (const std::pair<ElemType, size_t>& p1,
+ const std::pair<ElemType, size_t>& p2)
+ {
+ return p1.first < p2.first;
+ });
TreeType* treeOne = new TreeType(tree->Parent(),
tree->AuxiliaryInfo().NormalNodeMaxNumChildren());
@@ -236,9 +230,9 @@ void XTreeSplit::SplitLeafNode(TreeType *tree,std::vector<bool>& relevels)
for (size_t i = 0; i < tree->Count(); i++)
{
if (i < bestAreaIndexOnBestAxis + tree->MinLeafSize())
- treeOne->InsertPoint(tree->Point(sorted[i].n));
+ treeOne->InsertPoint(tree->Point(sorted[i].second));
else
- treeTwo->InsertPoint(tree->Point(sorted[i].n));
+ treeTwo->InsertPoint(tree->Point(sorted[i].second));
}
}
else
@@ -246,9 +240,9 @@ void XTreeSplit::SplitLeafNode(TreeType *tree,std::vector<bool>& relevels)
for (size_t i = 0; i < tree->Count(); i++)
{
if (i < bestOverlapIndexOnBestAxis + tree->MinLeafSize())
- treeOne->InsertPoint(tree->Point(sorted[i].n));
+ treeOne->InsertPoint(tree->Point(sorted[i].second));
else
- treeTwo->InsertPoint(tree->Point(sorted[i].n));
+ treeTwo->InsertPoint(tree->Point(sorted[i].second));
}
}
@@ -303,6 +297,7 @@ bool XTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels)
{
// Convenience typedef.
typedef typename TreeType::ElemType ElemType;
+ typedef bound::HRectBound<metric::EuclideanDistance, ElemType> BoundType;
// If we are splitting the root node, we need will do things differently so
// that the constructor and other methods don't confuse the end user by giving
@@ -388,14 +383,19 @@ bool XTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels)
ElemType axisScore = 0.0;
// We'll do Bound().Lo() now and use Bound().Hi() later.
- std::vector<sortStruct<ElemType>> sorted(tree->NumChildren());
+ std::vector<std::pair<ElemType, size_t>> sorted(tree->NumChildren());
for (size_t i = 0; i < sorted.size(); i++)
{
- sorted[i].d = tree->Child(i).Bound()[j].Lo();
- sorted[i].n = i;
+ sorted[i].first = tree->Child(i).Bound()[j].Lo();
+ sorted[i].second = i;
}
- std::sort(sorted.begin(), sorted.end(), structComp<ElemType>);
+ std::sort(sorted.begin(), sorted.end(),
+ [] (const std::pair<ElemType, size_t>& p1,
+ const std::pair<ElemType, size_t>& p2)
+ {
+ return p1.first < p2.first;
+ });
// We'll store each of the three scores for each distribution.
std::vector<ElemType> areas(tree->MaxNumChildren() -
@@ -418,48 +418,23 @@ bool XTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels)
// another. Then we calculate the three scores for that distribution.
size_t cutOff = tree->MinNumChildren() + i;
- // We'll calculate the max and min in each dimension by hand to save time.
- std::vector<ElemType> maxG1(tree->Bound().Dim());
- std::vector<ElemType> minG1(maxG1.size());
- std::vector<ElemType> maxG2(maxG1.size());
- std::vector<ElemType> minG2(maxG1.size());
- for (size_t k = 0; k < tree->Bound().Dim(); k++)
- {
- minG1[k] = tree->Child(sorted[0].n).Bound()[k].Lo();
- maxG1[k] = tree->Child(sorted[0].n).Bound()[k].Hi();
- minG2[k] =
- tree->Child(sorted[sorted.size() - 1].n).Bound()[k].Lo();
- maxG2[k] =
- tree->Child(sorted[sorted.size() - 1].n).Bound()[k].Hi();
- for (size_t l = 1; l < tree->NumChildren() - 1; l++)
- {
- if (l < cutOff)
- {
- if (tree->Child(sorted[l].n).Bound()[k].Lo() < minG1[k])
- minG1[k] = tree->Child(sorted[l].n).Bound()[k].Lo();
- else if (tree->Child(sorted[l].n).Bound()[k].Hi() > maxG1[k])
- maxG1[k] = tree->Child(sorted[l].n).Bound()[k].Hi();
- }
- else
- {
- if (tree->Child(sorted[l].n).Bound()[k].Lo() < minG2[k])
- minG2[k] = tree->Child(sorted[l].n).Bound()[k].Lo();
- else if (tree->Child(sorted[l].n).Bound()[k].Hi() > maxG2[k])
- maxG2[k] = tree->Child(sorted[l].n).Bound()[k].Hi();
- }
- }
- }
- ElemType area1 = 1.0, area2 = 1.0;
- ElemType oArea = 1.0;
- for (size_t k = 0; k < maxG1.size(); k++)
- {
- margins[i] += maxG1[k] - minG1[k] + maxG2[k] - minG2[k];
- area1 *= maxG1[k] - minG1[k];
- area2 *= maxG2[k] - minG2[k];
- oArea *= ((maxG1[k] < minG2[k]) || (maxG2[k] < minG1[k])) ? 0.0 :
- std::min(maxG1[k], maxG2[k]) - std::max(minG1[k], minG2[k]);
- }
+ BoundType bound1(tree->Bound().Dim());
+ BoundType bound2(tree->Bound().Dim());
+
+ for (size_t l = 0; l < cutOff; l++)
+ bound1 |= tree->Child(sorted[l].second).Bound();
+
+ for (size_t l = cutOff; l < tree->NumChildren(); l++)
+ bound2 |= tree->Child(sorted[l].second).Bound();
+
+ ElemType area1 = bound1.Volume();
+ ElemType area2 = bound2.Volume();
+ ElemType oArea = bound1.Overlap(bound2);
+
+ for (size_t k = 0; k < bound1.Dim(); k++)
+ margins[i] += bound1[k].Width() + bound2[k].Width();
+
areas[i] += area1 + area2;
overlapedAreas[i] += oArea;
axisScore += margins[i];
@@ -518,15 +493,19 @@ bool XTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels)
{
ElemType axisScore = 0.0;
- // We'll do Bound().Lo() now and use Bound().Hi() later.
- std::vector<sortStruct<ElemType>> sorted(tree->NumChildren());
+ std::vector<std::pair<ElemType, size_t>> sorted(tree->NumChildren());
for (size_t i = 0; i < sorted.size(); i++)
{
- sorted[i].d = tree->Child(i).Bound()[j].Hi();
- sorted[i].n = i;
+ sorted[i].first = tree->Child(i).Bound()[j].Hi();
+ sorted[i].second = i;
}
- std::sort(sorted.begin(), sorted.end(), structComp<ElemType>);
+ std::sort(sorted.begin(), sorted.end(),
+ [] (const std::pair<ElemType, size_t>& p1,
+ const std::pair<ElemType, size_t>& p2)
+ {
+ return p1.first < p2.first;
+ });
// We'll store each of the three scores for each distribution.
std::vector<ElemType> areas(tree->MaxNumChildren() -
@@ -549,46 +528,23 @@ bool XTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels)
// another. Then we calculate the three scores for that distribution.
size_t cutOff = tree->MinNumChildren() + i;
- // We'll calculate the max and min in each dimension by hand to save time.
- std::vector<ElemType> maxG1(tree->Bound().Dim());
- std::vector<ElemType> minG1(maxG1.size());
- std::vector<ElemType> maxG2(maxG1.size());
- std::vector<ElemType> minG2(maxG1.size());
- for (size_t k = 0; k < tree->Bound().Dim(); k++)
- {
- minG1[k] = tree->Child(sorted[0].n).Bound()[k].Lo();
- maxG1[k] = tree->Child(sorted[0].n).Bound()[k].Hi();
- minG2[k] = tree->Child(sorted[sorted.size() - 1].n).Bound()[k].Lo();
- maxG2[k] = tree->Child(sorted[sorted.size() - 1].n).Bound()[k].Hi();
- for (size_t l = 1; l < tree->NumChildren() - 1; l++)
- {
- if (l < cutOff)
- {
- if (tree->Child(sorted[l].n).Bound()[k].Lo() < minG1[k])
- minG1[k] = tree->Child(sorted[l].n).Bound()[k].Lo();
- else if (tree->Child(sorted[l].n).Bound()[k].Hi() > maxG1[k])
- maxG1[k] = tree->Child(sorted[l].n).Bound()[k].Hi();
- }
- else
- {
- if (tree->Child(sorted[l].n).Bound()[k].Lo() < minG2[k])
- minG2[k] = tree->Child(sorted[l].n).Bound()[k].Lo();
- else if (tree->Child(sorted[l].n).Bound()[k].Hi() > maxG2[k])
- maxG2[k] = tree->Child(sorted[l].n).Bound()[k].Hi();
- }
- }
- }
- ElemType area1 = 1.0, area2 = 1.0;
- ElemType oArea = 1.0;
- for (size_t k = 0; k < maxG1.size(); k++)
- {
- margins[i] += maxG1[k] - minG1[k] + maxG2[k] - minG2[k];
- area1 *= maxG1[k] - minG1[k];
- area2 *= maxG2[k] - minG2[k];
- oArea *= ((maxG1[k] < minG2[k]) || (maxG2[k] < minG1[k])) ? 0.0 :
- std::min(maxG1[k], maxG2[k]) - std::max(minG1[k], minG2[k]);
- }
+ BoundType bound1(tree->Bound().Dim());
+ BoundType bound2(tree->Bound().Dim());
+
+ for (size_t l = 0; l < cutOff; l++)
+ bound1 |= tree->Child(sorted[l].second).Bound();
+
+ for (size_t l = cutOff; l < tree->NumChildren(); l++)
+ bound2 |= tree->Child(sorted[l].second).Bound();
+
+ ElemType area1 = bound1.Volume();
+ ElemType area2 = bound2.Volume();
+ ElemType oArea = bound1.Overlap(bound2);
+
+ for (size_t k = 0; k < bound1.Dim(); k++)
+ margins[i] += bound1[k].Width() + bound2[k].Width();
+
areas[i] += area1 + area2;
overlapedAreas[i] += oArea;
@@ -645,25 +601,30 @@ bool XTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels)
}
}
- std::vector<sortStruct<ElemType>> sorted(tree->NumChildren());
+ std::vector<std::pair<ElemType, size_t>> sorted(tree->NumChildren());
if (lowIsBest)
{
for (size_t i = 0; i < sorted.size(); i++)
{
- sorted[i].d = tree->Child(i).Bound()[bestAxis].Lo();
- sorted[i].n = i;
+ sorted[i].first = tree->Child(i).Bound()[bestAxis].Lo();
+ sorted[i].second = i;
}
}
else
{
for (size_t i = 0; i < sorted.size(); i++)
{
- sorted[i].d = tree->Child(i).Bound()[bestAxis].Hi();
- sorted[i].n = i;
+ sorted[i].first = tree->Child(i).Bound()[bestAxis].Hi();
+ sorted[i].second = i;
}
}
- std::sort(sorted.begin(), sorted.end(), structComp<ElemType>);
+ std::sort(sorted.begin(), sorted.end(),
+ [] (const std::pair<ElemType, size_t>& p1,
+ const std::pair<ElemType, size_t>& p2)
+ {
+ return p1.first < p2.first;
+ });
TreeType* treeOne = new TreeType(tree->Parent(), tree->MaxNumChildren());
TreeType* treeTwo = new TreeType(tree->Parent(), tree->MaxNumChildren());
@@ -677,9 +638,9 @@ bool XTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels)
for (size_t i = 0; i < tree->NumChildren(); i++)
{
if (i < bestAreaIndexOnBestAxis + tree->MinNumChildren())
- InsertNodeIntoTree(treeOne, tree->children[sorted[i].n]);
+ InsertNodeIntoTree(treeOne, tree->children[sorted[i].second]);
else
- InsertNodeIntoTree(treeTwo, tree->children[sorted[i].n]);
+ InsertNodeIntoTree(treeTwo, tree->children[sorted[i].second]);
}
}
else
@@ -692,9 +653,9 @@ bool XTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels)
for (size_t i = 0; i < tree->NumChildren(); i++)
{
if (i < bestOverlapIndexOnBestAxis + tree->MinNumChildren())
- InsertNodeIntoTree(treeOne, tree->children[sorted[i].n]);
+ InsertNodeIntoTree(treeOne, tree->children[sorted[i].second]);
else
- InsertNodeIntoTree(treeTwo, tree->children[sorted[i].n]);
+ InsertNodeIntoTree(treeTwo, tree->children[sorted[i].second]);
}
}
else
@@ -710,30 +671,36 @@ bool XTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels)
if ((minOverlapSplitDimension != tree->Bound().Dim()) &&
(bestScoreMinOverlapSplit / areaOfBestMinOverlapSplit < MAX_OVERLAP))
{
- std::vector<sortStruct<ElemType>> sorted2(tree->NumChildren());
+ std::vector<std::pair<ElemType, size_t>> sorted2(tree->NumChildren());
if (minOverlapSplitUsesHi)
{
for (size_t i = 0; i < sorted2.size(); i++)
{
- sorted2[i].d = tree->Child(i).Bound()[bestAxis].Hi();
- sorted2[i].n = i;
+ sorted2[i].first = tree->Child(i).Bound()[bestAxis].Hi();
+ sorted2[i].second = i;
}
}
else
{
for (size_t i = 0; i < sorted2.size(); i++)
{
- sorted2[i].d = tree->Child(i).Bound()[bestAxis].Lo();
- sorted2[i].n = i;
+ sorted2[i].first = tree->Child(i).Bound()[bestAxis].Lo();
+ sorted2[i].second = i;
}
}
- std::sort(sorted2.begin(), sorted2.end(), structComp<ElemType>);
+ std::sort(sorted2.begin(), sorted2.end(),
+ [] (const std::pair<ElemType, size_t>& p1,
+ const std::pair<ElemType, size_t>& p2)
+ {
+ return p1.first < p2.first;
+ });
+
for (size_t i = 0; i < tree->NumChildren(); i++)
{
if (i < bestIndexMinOverlapSplit + tree->MinNumChildren())
- InsertNodeIntoTree(treeOne, tree->children[sorted[i].n]);
+ InsertNodeIntoTree(treeOne, tree->children[sorted2[i].second]);
else
- InsertNodeIntoTree(treeTwo, tree->children[sorted[i].n]);
+ InsertNodeIntoTree(treeTwo, tree->children[sorted2[i].second]);
}
}
else
More information about the mlpack-git
mailing list