[mlpack-git] master: Replace SortStruct by std::pair (640f3a7)

gitdub at mlpack.org gitdub at mlpack.org
Tue Jul 12 08:45:38 EDT 2016


Repository : https://github.com/mlpack/mlpack
On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/e434aee1eab9e3ced97cccd840dd383ef8089acf...81c14d9ec8250f8a2caaf1ac06c4be12fc5fd31f

>---------------------------------------------------------------

commit 640f3a7859ea8d400bc6fed6b4395c2291ba0259
Author: Mikhail Lozhnikov <lozhnikovma at gmail.com>
Date:   Tue Jul 12 15:45:38 2016 +0300

    Replace SortStruct by std::pair


>---------------------------------------------------------------

640f3a7859ea8d400bc6fed6b4395c2291ba0259
 .../core/tree/rectangle_tree/r_star_tree_split.hpp |  20 --
 .../tree/rectangle_tree/r_star_tree_split_impl.hpp | 266 ++++++++-----------
 .../core/tree/rectangle_tree/x_tree_split.hpp      |  21 --
 .../core/tree/rectangle_tree/x_tree_split_impl.hpp | 295 +++++++++------------
 4 files changed, 238 insertions(+), 364 deletions(-)

diff --git a/src/mlpack/core/tree/rectangle_tree/r_star_tree_split.hpp b/src/mlpack/core/tree/rectangle_tree/r_star_tree_split.hpp
index b87389f..99bef1e 100644
--- a/src/mlpack/core/tree/rectangle_tree/r_star_tree_split.hpp
+++ b/src/mlpack/core/tree/rectangle_tree/r_star_tree_split.hpp
@@ -38,26 +38,6 @@ class RStarTreeSplit
 
  private:
   /**
-   * Class to allow for faster sorting.
-   */
-  template<typename ElemType>
-  struct SortStruct
-  {
-    ElemType d;
-    int n;
-  };
-
-  /**
-   * Comparator for sorting with SortStruct.
-   */
-  template<typename ElemType>
-  static bool StructComp(const SortStruct<ElemType>& s1,
-                         const SortStruct<ElemType>& s2)
-  {
-    return s1.d < s2.d;
-  }
-
-  /**
    * Insert a node into another node.
    */
   template <typename TreeType>
diff --git a/src/mlpack/core/tree/rectangle_tree/r_star_tree_split_impl.hpp b/src/mlpack/core/tree/rectangle_tree/r_star_tree_split_impl.hpp
index 795170d..77bedba 100644
--- a/src/mlpack/core/tree/rectangle_tree/r_star_tree_split_impl.hpp
+++ b/src/mlpack/core/tree/rectangle_tree/r_star_tree_split_impl.hpp
@@ -26,6 +26,7 @@ void RStarTreeSplit::SplitLeafNode(TreeType *tree,std::vector<bool>& relevels)
 {
   // Convenience typedef.
   typedef typename TreeType::ElemType ElemType;
+  typedef bound::HRectBound<metric::EuclideanDistance, ElemType> BoundType;
 
   if (tree->Count() <= tree->MaxLeafSize())
     return;
@@ -65,25 +66,30 @@ void RStarTreeSplit::SplitLeafNode(TreeType *tree,std::vector<bool>& relevels)
       return;
     }
 
-    std::vector<SortStruct<ElemType>> sorted(tree->Count());
+    std::vector<std::pair<ElemType, size_t>> sorted(tree->Count());
     arma::Col<ElemType> center;
     tree->Bound().Center(center); // Modifies centroid.
     for (size_t i = 0; i < sorted.size(); i++)
     {
-      sorted[i].d = tree->Metric().Evaluate(center,
+      sorted[i].first = tree->Metric().Evaluate(center,
           tree->Dataset().col(tree->Point(i)));
-      sorted[i].n = i;
+      sorted[i].second = i;
     }
 
-    std::sort(sorted.begin(), sorted.end(), StructComp<ElemType>);
+    std::sort(sorted.begin(), sorted.end(),
+        [] (const std::pair<ElemType, size_t>& p1,
+            const std::pair<ElemType, size_t>& p2)
+        {
+          return p1.first < p2.first;
+        });
     std::vector<size_t> pointIndices(p);
 
     for (size_t i = 0; i < p; i++)
     {
       // We start from the end of sorted.
-      pointIndices[i] = tree->Point(sorted[sorted.size() - 1 - i].n);
+      pointIndices[i] = tree->Point(sorted[sorted.size() - 1 - i].second);
 
-      root->DeletePoint(tree->Point(sorted[sorted.size() - 1 - i].n),
+      root->DeletePoint(tree->Point(sorted[sorted.size() - 1 - i].second),
           relevels);
     }
 
@@ -106,14 +112,19 @@ void RStarTreeSplit::SplitLeafNode(TreeType *tree,std::vector<bool>& relevels)
   {
     ElemType axisScore = 0.0;
     // Since we only have points in the leaf nodes, we only need to sort once.
-    std::vector<SortStruct<ElemType>> sorted(tree->Count());
+    std::vector<std::pair<ElemType, size_t>> sorted(tree->Count());
     for (size_t i = 0; i < sorted.size(); i++)
     {
-      sorted[i].d = tree->Dataset().col(tree->Point(i))[j];
-      sorted[i].n = i;
+      sorted[i].first = tree->Dataset().col(tree->Point(i))[j];
+      sorted[i].second = i;
     }
 
-    std::sort(sorted.begin(), sorted.end(), StructComp<ElemType>);
+    std::sort(sorted.begin(), sorted.end(),
+        [] (const std::pair<ElemType, size_t>& p1,
+            const std::pair<ElemType, size_t>& p2)
+        {
+          return p1.first < p2.first;
+        });
 
     // We'll store each of the three scores for each distribution.
     std::vector<ElemType> areas(tree->MaxLeafSize() - 2 * tree->MinLeafSize() +
@@ -138,46 +149,21 @@ void RStarTreeSplit::SplitLeafNode(TreeType *tree,std::vector<bool>& relevels)
 
       size_t cutOff = tree->MinLeafSize() + i;
 
-      // We'll calculate the max and min in each dimension by hand to save time.
-      std::vector<ElemType> maxG1(tree->Bound().Dim());
-      std::vector<ElemType> minG1(maxG1.size());
-      std::vector<ElemType> maxG2(maxG1.size());
-      std::vector<ElemType> minG2(maxG1.size());
-      for (size_t k = 0; k < tree->Bound().Dim(); k++)
-      {
-        minG1[k] = maxG1[k] = tree->Dataset().col(tree->Point(sorted[0].n))[k];
-        minG2[k] = maxG2[k] =
-            tree->Dataset().col(tree->Point(sorted[sorted.size() - 1].n))[k];
+      BoundType bound1(tree->Bound().Dim());
+      BoundType bound2(tree->Bound().Dim());
 
-        for (size_t l = 1; l < tree->Count() - 1; l++)
-        {
-          if (l < cutOff)
-          {
-            if (tree->Dataset().col(tree->Point(sorted[l].n))[k] < minG1[k])
-              minG1[k] = tree->Dataset().col(tree->Point(sorted[l].n))[k];
-            else if (tree->Dataset().col(tree->Point(sorted[l].n))[k] > maxG1[k])
-              maxG1[k] = tree->Dataset().col(tree->Point(sorted[l].n))[k];
-          }
-          else
-          {
-            if (tree->Dataset().col(tree->Point(sorted[l].n))[k] < minG2[k])
-              minG2[k] = tree->Dataset().col(tree->Point(sorted[l].n))[k];
-            else if (tree->Dataset().col(tree->Point(sorted[l].n))[k] > maxG2[k])
-              maxG2[k] = tree->Dataset().col(tree->Point(sorted[l].n))[k];
-          }
-        }
-      }
+      for (size_t l = 0; l < cutOff; l++)
+        bound1 |= tree->Dataset().col(tree->Point(sorted[l].second));
 
-      ElemType area1 = 1.0, area2 = 1.0;
-      ElemType oArea = 1.0;
-      for (size_t k = 0; k < maxG1.size(); k++)
-      {
-        margins[i] += maxG1[k] - minG1[k] + maxG2[k] - minG2[k];
-        area1 *= maxG1[k] - minG1[k];
-        area2 *= maxG2[k] - minG2[k];
-        oArea *= (maxG1[k] < minG2[k] || maxG2[k] < minG1[k]) ? 0.0 :
-            (std::min(maxG1[k], maxG2[k]) - std::max(minG1[k], minG2[k]));
-      }
+      for (size_t l = cutOff; l < tree->Count(); l++)
+        bound2 |= tree->Dataset().col(tree->Point(sorted[l].second));
+
+      ElemType area1 = bound1.Volume();
+      ElemType area2 = bound2.Volume();
+      ElemType oArea = bound1.Overlap(bound2);
+
+      for (size_t k = 0; k < bound1.Dim(); k++)
+        margins[i] += bound1[k].Width() + bound2[k].Width();
 
       areas[i] += area1 + area2;
       overlapedAreas[i] += oArea;
@@ -210,14 +196,19 @@ void RStarTreeSplit::SplitLeafNode(TreeType *tree,std::vector<bool>& relevels)
     }
   }
 
-  std::vector<SortStruct<ElemType>> sorted(tree->Count());
+  std::vector<std::pair<ElemType, size_t>> sorted(tree->Count());
   for (size_t i = 0; i < sorted.size(); i++)
   {
-    sorted[i].d = tree->Dataset().col(tree->Point(i))[bestAxis];
-    sorted[i].n = i;
+    sorted[i].first = tree->Dataset().col(tree->Point(i))[bestAxis];
+    sorted[i].second = i;
   }
 
-  std::sort(sorted.begin(), sorted.end(), StructComp<ElemType>);
+  std::sort(sorted.begin(), sorted.end(),
+      [] (const std::pair<ElemType, size_t>& p1,
+          const std::pair<ElemType, size_t>& p2)
+      {
+        return p1.first < p2.first;
+      });
 
   TreeType* treeOne = new TreeType(tree->Parent());
   TreeType* treeTwo = new TreeType(tree->Parent());
@@ -227,9 +218,9 @@ void RStarTreeSplit::SplitLeafNode(TreeType *tree,std::vector<bool>& relevels)
     for (size_t i = 0; i < tree->Count(); i++)
     {
       if (i < bestAreaIndexOnBestAxis + tree->MinLeafSize())
-        treeOne->InsertPoint(tree->Point(sorted[i].n));
+        treeOne->InsertPoint(tree->Point(sorted[i].second));
       else
-        treeTwo->InsertPoint(tree->Point(sorted[i].n));
+        treeTwo->InsertPoint(tree->Point(sorted[i].second));
     }
   }
   else
@@ -237,9 +228,9 @@ void RStarTreeSplit::SplitLeafNode(TreeType *tree,std::vector<bool>& relevels)
     for (size_t i = 0; i < tree->Count(); i++)
     {
       if (i < bestOverlapIndexOnBestAxis + tree->MinLeafSize())
-        treeOne->InsertPoint(tree->Point(sorted[i].n));
+        treeOne->InsertPoint(tree->Point(sorted[i].second));
       else
-        treeTwo->InsertPoint(tree->Point(sorted[i].n));
+        treeTwo->InsertPoint(tree->Point(sorted[i].second));
     }
   }
 
@@ -278,6 +269,7 @@ bool RStarTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels
 {
   // Convenience typedef.
   typedef typename TreeType::ElemType ElemType;
+  typedef bound::HRectBound<metric::EuclideanDistance, ElemType> BoundType;
 
   // If we are splitting the root node, we need will do things differently so
   // that the constructor and other methods don't confuse the end user by giving
@@ -369,14 +361,19 @@ bool RStarTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels
     ElemType axisScore = 0.0;
 
     // We'll do Bound().Lo() now and use Bound().Hi() later.
-    std::vector<SortStruct<ElemType>> sorted(tree->NumChildren());
+    std::vector<std::pair<ElemType, size_t>> sorted(tree->NumChildren());
     for (size_t i = 0; i < sorted.size(); i++)
     {
-      sorted[i].d = tree->Child(i).Bound()[j].Lo();
-      sorted[i].n = i;
+      sorted[i].first = tree->Child(i).Bound()[j].Lo();
+      sorted[i].second = i;
     }
 
-    std::sort(sorted.begin(), sorted.end(), StructComp<ElemType>);
+    std::sort(sorted.begin(), sorted.end(),
+        [] (const std::pair<ElemType, size_t>& p1,
+            const std::pair<ElemType, size_t>& p2)
+        {
+          return p1.first < p2.first;
+        });
 
     // We'll store each of the three scores for each distribution.
     std::vector<ElemType> areas(tree->MaxNumChildren() -
@@ -401,49 +398,21 @@ bool RStarTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels
 
       size_t cutOff = tree->MinNumChildren() + i;
 
-      // We'll calculate the max and min in each dimension by hand to save time.
-      std::vector<ElemType> maxG1(tree->Bound().Dim());
-      std::vector<ElemType> minG1(maxG1.size());
-      std::vector<ElemType> maxG2(maxG1.size());
-      std::vector<ElemType> minG2(maxG1.size());
-      for (size_t k = 0; k < tree->Bound().Dim(); k++)
-      {
-        minG1[k] = tree->Child(sorted[0].n).Bound()[k].Lo();
-        maxG1[k] = tree->Child(sorted[0].n).Bound()[k].Hi();
-        minG2[k] =
-            tree->Child(sorted[sorted.size() - 1].n).Bound()[k].Lo();
-        maxG2[k] =
-            tree->Child(sorted[sorted.size() - 1].n).Bound()[k].Hi();
-
-        for (size_t l = 1; l < tree->NumChildren() - 1; l++)
-        {
-          if (l < cutOff)
-          {
-            if (tree->Child(sorted[l].n).Bound()[k].Lo() < minG1[k])
-              minG1[k] = tree->Child(sorted[l].n).Bound()[k].Lo();
-            else if (tree->Child(sorted[l].n).Bound()[k].Hi() > maxG1[k])
-              maxG1[k] = tree->Child(sorted[l].n).Bound()[k].Hi();
-          }
-          else
-          {
-            if (tree->Child(sorted[l].n).Bound()[k].Lo() < minG2[k])
-              minG2[k] = tree->Child(sorted[l].n).Bound()[k].Lo();
-            else if (tree->Child(sorted[l].n).Bound()[k].Hi() > maxG2[k])
-              maxG2[k] = tree->Child(sorted[l].n).Bound()[k].Hi();
-          }
-        }
-      }
+      BoundType bound1(tree->Bound().Dim());
+      BoundType bound2(tree->Bound().Dim());
 
-      ElemType area1 = 1.0, area2 = 1.0;
-      ElemType oArea = 1.0;
-      for (size_t k = 0; k < maxG1.size(); k++)
-      {
-        margins[i] += maxG1[k] - minG1[k] + maxG2[k] - minG2[k];
-        area1 *= maxG1[k] - minG1[k];
-        area2 *= maxG2[k] - minG2[k];
-        oArea *= (maxG1[k] < minG2[k] || maxG2[k] < minG1[k]) ? 0.0 :
-            (std::min(maxG1[k], maxG2[k]) - std::max(minG1[k], minG2[k]));
-      }
+      for (size_t l = 0; l < cutOff; l++)
+        bound1 |= tree->Child(sorted[l].second).Bound();
+
+      for (size_t l = cutOff; l < tree->NumChildren(); l++)
+        bound2 |= tree->Child(sorted[l].second).Bound();
+
+      ElemType area1 = bound1.Volume();
+      ElemType area2 = bound2.Volume();
+      ElemType oArea = bound1.Overlap(bound2);
+
+      for (size_t k = 0; k < bound1.Dim(); k++)
+        margins[i] += bound1[k].Width() + bound2[k].Width();
 
       areas[i] += area1 + area2;
       overlapedAreas[i] += oArea;
@@ -480,15 +449,19 @@ bool RStarTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels
   {
     ElemType axisScore = 0.0;
 
-    // We'll do Bound().Lo() now and use Bound().Hi() later.
-    std::vector<SortStruct<ElemType>> sorted(tree->NumChildren());
+    std::vector<std::pair<ElemType, size_t>> sorted(tree->NumChildren());
     for (size_t i = 0; i < sorted.size(); i++)
     {
-      sorted[i].d = tree->Child(i).Bound()[j].Hi();
-      sorted[i].n = i;
+      sorted[i].first = tree->Child(i).Bound()[j].Hi();
+      sorted[i].second = i;
     }
 
-    std::sort(sorted.begin(), sorted.end(), StructComp<ElemType>);
+    std::sort(sorted.begin(), sorted.end(),
+        [] (const std::pair<ElemType, size_t>& p1,
+            const std::pair<ElemType, size_t>& p2)
+        {
+          return p1.first < p2.first;
+        });
 
     // We'll store each of the three scores for each distribution.
     std::vector<ElemType> areas(tree->MaxNumChildren() -
@@ -513,51 +486,21 @@ bool RStarTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels
 
       size_t cutOff = tree->MinNumChildren() + i;
 
-      // We'll calculate the max and min in each dimension by hand to save time.
-      std::vector<ElemType> maxG1(tree->Bound().Dim());
-      std::vector<ElemType> minG1(maxG1.size());
-      std::vector<ElemType> maxG2(maxG1.size());
-      std::vector<ElemType> minG2(maxG1.size());
+      BoundType bound1(tree->Bound().Dim());
+      BoundType bound2(tree->Bound().Dim());
 
-      for (size_t k = 0; k < tree->Bound().Dim(); k++)
-      {
-        minG1[k] = tree->Child(sorted[0].n).Bound()[k].Lo();
-        maxG1[k] = tree->Child(sorted[0].n).Bound()[k].Hi();
-        minG2[k] =
-            tree->Child(sorted[sorted.size() - 1].n).Bound()[k].Lo();
-        maxG2[k] =
-            tree->Child(sorted[sorted.size() - 1].n).Bound()[k].Hi();
-
-        for (size_t l = 1; l < tree->NumChildren() - 1; l++)
-        {
-          if (l < cutOff)
-          {
-            if (tree->Child(sorted[l].n).Bound()[k].Lo() < minG1[k])
-              minG1[k] = tree->Child(sorted[l].n).Bound()[k].Lo();
-            else if (tree->Child(sorted[l].n).Bound()[k].Hi() > maxG1[k])
-              maxG1[k] = tree->Child(sorted[l].n).Bound()[k].Hi();
-          }
-          else
-          {
-            if (tree->Child(sorted[l].n).Bound()[k].Lo() < minG2[k])
-              minG2[k] = tree->Child(sorted[l].n).Bound()[k].Lo();
-            else if (tree->Child(sorted[l].n).Bound()[k].Hi() > maxG2[k])
-              maxG2[k] = tree->Child(sorted[l].n).Bound()[k].Hi();
-          }
-        }
-      }
+      for (size_t l = 0; l < cutOff; l++)
+        bound1 |= tree->Child(sorted[l].second).Bound();
 
-      ElemType area1 = 1.0, area2 = 1.0;
-      ElemType oArea = 1.0;
+      for (size_t l = cutOff; l < tree->NumChildren(); l++)
+        bound2 |= tree->Child(sorted[l].second).Bound();
 
-      for (size_t k = 0; k < maxG1.size(); k++)
-      {
-        margins[i] += maxG1[k] - minG1[k] + maxG2[k] - minG2[k];
-        area1 *= maxG1[k] - minG1[k];
-        area2 *= maxG2[k] - minG2[k];
-        oArea *= (maxG1[k] < minG2[k] || maxG2[k] < minG1[k]) ? 0.0 :
-            (std::min(maxG1[k], maxG2[k]) - std::max(minG1[k], minG2[k]));
-      }
+      ElemType area1 = bound1.Volume();
+      ElemType area2 = bound2.Volume();
+      ElemType oArea = bound1.Overlap(bound2);
+
+      for (size_t k = 0; k < bound1.Dim(); k++)
+        margins[i] += bound1[k].Width() + bound2[k].Width();
 
       areas[i] += area1 + area2;
       overlapedAreas[i] += oArea;
@@ -591,25 +534,30 @@ bool RStarTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels
     }
   }
 
-  std::vector<SortStruct<ElemType>> sorted(tree->NumChildren());
+  std::vector<std::pair<ElemType, size_t>> sorted(tree->NumChildren());
   if (lowIsBest)
   {
     for (size_t i = 0; i < sorted.size(); i++)
     {
-      sorted[i].d = tree->Child(i).Bound()[bestAxis].Lo();
-      sorted[i].n = i;
+      sorted[i].first = tree->Child(i).Bound()[bestAxis].Lo();
+      sorted[i].second = i;
     }
   }
   else
   {
     for (size_t i = 0; i < sorted.size(); i++)
     {
-      sorted[i].d = tree->Child(i).Bound()[bestAxis].Hi();
-      sorted[i].n = i;
+      sorted[i].first = tree->Child(i).Bound()[bestAxis].Hi();
+      sorted[i].second = i;
     }
   }
 
-  std::sort(sorted.begin(), sorted.end(), StructComp<ElemType>);
+  std::sort(sorted.begin(), sorted.end(),
+      [] (const std::pair<ElemType, size_t>& p1,
+          const std::pair<ElemType, size_t>& p2)
+      {
+        return p1.first < p2.first;
+      });
 
   TreeType* treeOne = new TreeType(tree->Parent());
   TreeType* treeTwo = new TreeType(tree->Parent());
@@ -619,9 +567,9 @@ bool RStarTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels
     for (size_t i = 0; i < tree->NumChildren(); i++)
     {
       if (i < bestAreaIndexOnBestAxis + tree->MinNumChildren())
-        InsertNodeIntoTree(treeOne, &(tree->Child(sorted[i].n)));
+        InsertNodeIntoTree(treeOne, &(tree->Child(sorted[i].second)));
       else
-        InsertNodeIntoTree(treeTwo, &(tree->Child(sorted[i].n)));
+        InsertNodeIntoTree(treeTwo, &(tree->Child(sorted[i].second)));
     }
   }
   else
@@ -629,9 +577,9 @@ bool RStarTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels
     for (size_t i = 0; i < tree->NumChildren(); i++)
     {
       if (i < bestOverlapIndexOnBestAxis + tree->MinNumChildren())
-        InsertNodeIntoTree(treeOne, &(tree->Child(sorted[i].n)));
+        InsertNodeIntoTree(treeOne, &(tree->Child(sorted[i].second)));
       else
-        InsertNodeIntoTree(treeTwo, &(tree->Child(sorted[i].n)));
+        InsertNodeIntoTree(treeTwo, &(tree->Child(sorted[i].second)));
     }
   }
 
diff --git a/src/mlpack/core/tree/rectangle_tree/x_tree_split.hpp b/src/mlpack/core/tree/rectangle_tree/x_tree_split.hpp
index 24ece3c..efa6242 100644
--- a/src/mlpack/core/tree/rectangle_tree/x_tree_split.hpp
+++ b/src/mlpack/core/tree/rectangle_tree/x_tree_split.hpp
@@ -48,27 +48,6 @@ class XTreeSplit
 
  private:
   /**
-   * Class to allow for faster sorting.
-   */
-  template<typename ElemType>
-  class sortStruct
-  {
-   public:
-    ElemType d;
-    int n;
-  };
-
-  /**
-   * Comparator for sorting with sortStruct.
-   */
-  template<typename ElemType>
-  static bool structComp(const sortStruct<ElemType>& s1,
-                         const sortStruct<ElemType>& s2)
-  {
-    return s1.d < s2.d;
-  }
-
-  /**
    * Insert a node into another node.
    */
   template<typename TreeType>
diff --git a/src/mlpack/core/tree/rectangle_tree/x_tree_split_impl.hpp b/src/mlpack/core/tree/rectangle_tree/x_tree_split_impl.hpp
index 073d771..7b1f62d 100644
--- a/src/mlpack/core/tree/rectangle_tree/x_tree_split_impl.hpp
+++ b/src/mlpack/core/tree/rectangle_tree/x_tree_split_impl.hpp
@@ -25,6 +25,7 @@ void XTreeSplit::SplitLeafNode(TreeType *tree,std::vector<bool>& relevels)
 {
   // Convenience typedef.
   typedef typename TreeType::ElemType ElemType;
+  typedef bound::HRectBound<metric::EuclideanDistance, ElemType> BoundType;
 
   if (tree->Count() <= tree->MaxLeafSize())
     return;
@@ -64,25 +65,30 @@ void XTreeSplit::SplitLeafNode(TreeType *tree,std::vector<bool>& relevels)
       return;
     }
 
-    std::vector<sortStruct<ElemType>> sorted(tree->Count());
+    std::vector<std::pair<ElemType, size_t>> sorted(tree->Count());
     arma::Col<ElemType> center;
     tree->Bound().Center(center); // Modifies centroid.
     for (size_t i = 0; i < sorted.size(); i++)
     {
-      sorted[i].d = tree->Metric().Evaluate(center,
+      sorted[i].first = tree->Metric().Evaluate(center,
           tree->Dataset().col(tree->Point(i)));
-       sorted[i].n = i;
+       sorted[i].second = i;
     }
 
-    std::sort(sorted.begin(), sorted.end(), structComp<ElemType>);
+    std::sort(sorted.begin(), sorted.end(),
+        [] (const std::pair<ElemType, size_t>& p1,
+            const std::pair<ElemType, size_t>& p2)
+        {
+          return p1.first < p2.first;
+        });
     std::vector<size_t> pointIndices(p);
 
     for (size_t i = 0; i < p; i++)
     {
       // We start from the end of sorted.
-      pointIndices[i] = tree->Point(sorted[sorted.size() - 1 - i].n);
+      pointIndices[i] = tree->Point(sorted[sorted.size() - 1 - i].second);
 
-      root->DeletePoint(tree->Point(sorted[sorted.size() - 1 - i].n),
+      root->DeletePoint(tree->Point(sorted[sorted.size() - 1 - i].second),
           relevels);
     }
 
@@ -116,13 +122,19 @@ void XTreeSplit::SplitLeafNode(TreeType *tree,std::vector<bool>& relevels)
   {
     ElemType axisScore = 0.0;
     // Since we only have points in the leaf nodes, we only need to sort once.
-    std::vector<sortStruct<ElemType>> sorted(tree->Count());
-    for (size_t i = 0; i < sorted.size(); i++) {
-      sorted[i].d = tree->Dataset().col(tree->Point(i))[j];
-      sorted[i].n = i;
+    std::vector<std::pair<ElemType, size_t>> sorted(tree->Count());
+    for (size_t i = 0; i < sorted.size(); i++)
+    {
+      sorted[i].first = tree->Dataset().col(tree->Point(i))[j];
+      sorted[i].second = i;
     }
 
-    std::sort(sorted.begin(), sorted.end(), structComp<ElemType>);
+    std::sort(sorted.begin(), sorted.end(),
+        [] (const std::pair<ElemType, size_t>& p1,
+            const std::pair<ElemType, size_t>& p2)
+        {
+          return p1.first < p2.first;
+        });
 
     // We'll store each of the three scores for each distribution.
     std::vector<ElemType> areas(tree->MaxLeafSize() -
@@ -144,46 +156,23 @@ void XTreeSplit::SplitLeafNode(TreeType *tree,std::vector<bool>& relevels)
       // another.  Then we calculate the three scores for that distribution.
 
       size_t cutOff = tree->MinLeafSize() + i;
-      // We'll calculate the max and min in each dimension by hand to save time.
-      std::vector<ElemType> maxG1(tree->Bound().Dim());
-      std::vector<ElemType> minG1(maxG1.size());
-      std::vector<ElemType> maxG2(maxG1.size());
-      std::vector<ElemType> minG2(maxG1.size());
-      for (size_t k = 0; k < tree->Bound().Dim(); k++)
-      {
-        minG1[k] = maxG1[k] = tree->Dataset().col(tree->Point(sorted[0].n))[k];
-        minG2[k] = maxG2[k] = tree->Dataset().col(
-            tree->Point(sorted[sorted.size() - 1].n))[k];
 
-        for (size_t l = 1; l < tree->Count() - 1; l++)
-        {
-          if (l < cutOff)
-          {
-            if (tree->Dataset().col(tree->Point(sorted[l].n))[k] < minG1[k])
-              minG1[k] = tree->Dataset().col(tree->Point(sorted[l].n))[k];
-            else if (tree->Dataset().col(tree->Point(sorted[l].n))[k] > maxG1[k])
-              maxG1[k] = tree->Dataset().col(tree->Point(sorted[l].n))[k];
-          }
-          else
-          {
-            if (tree->Dataset().col(tree->Point(sorted[l].n))[k] < minG2[k])
-              minG2[k] = tree->Dataset().col(tree->Point(sorted[l].n))[k];
-            else if (tree->Dataset().col(tree->Point(sorted[l].n))[k] > maxG2[k])
-              maxG2[k] = tree->Dataset().col(tree->Point(sorted[l].n))[k];
-          }
-        }
-      }
+      BoundType bound1(tree->Bound().Dim());
+      BoundType bound2(tree->Bound().Dim());
+
+      for (size_t l = 0; l < cutOff; l++)
+        bound1 |= tree->Dataset().col(tree->Point(sorted[l].second));
+
+      for (size_t l = cutOff; l < tree->Count(); l++)
+        bound2 |= tree->Dataset().col(tree->Point(sorted[l].second));
+
+      ElemType area1 = bound1.Volume();
+      ElemType area2 = bound2.Volume();
+      ElemType oArea = bound1.Overlap(bound2);
+
+      for (size_t k = 0; k < bound1.Dim(); k++)
+        margins[i] += bound1[k].Width() + bound2[k].Width();
 
-      ElemType area1 = 1.0, area2 = 1.0;
-      ElemType oArea = 1.0;
-      for (size_t k = 0; k < maxG1.size(); k++)
-      {
-        margins[i] += maxG1[k] - minG1[k] + maxG2[k] - minG2[k];
-        area1 *= maxG1[k] - minG1[k];
-        area2 *= maxG2[k] - minG2[k];
-        oArea *= ((maxG1[k] < minG2[k]) || (maxG2[k] < minG1[k])) ? 0.0 :
-            std::min(maxG1[k], maxG2[k]) - std::max(minG1[k], minG2[k]);
-      }
       areas[i] += area1 + area2;
       overlapedAreas[i] += oArea;
       axisScore += margins[i];
@@ -214,14 +203,19 @@ void XTreeSplit::SplitLeafNode(TreeType *tree,std::vector<bool>& relevels)
     }
   }
 
-  std::vector<sortStruct<ElemType>> sorted(tree->Count());
+  std::vector<std::pair<ElemType, size_t>> sorted(tree->Count());
   for (size_t i = 0; i < sorted.size(); i++)
   {
-    sorted[i].d = tree->Dataset().col(tree->Point(i))[bestAxis];
-    sorted[i].n = i;
+    sorted[i].first = tree->Dataset().col(tree->Point(i))[bestAxis];
+    sorted[i].second = i;
   }
 
-  std::sort(sorted.begin(), sorted.end(), structComp<ElemType>);
+  std::sort(sorted.begin(), sorted.end(),
+        [] (const std::pair<ElemType, size_t>& p1,
+            const std::pair<ElemType, size_t>& p2)
+        {
+          return p1.first < p2.first;
+        });
 
   TreeType* treeOne = new TreeType(tree->Parent(),
                             tree->AuxiliaryInfo().NormalNodeMaxNumChildren());
@@ -236,9 +230,9 @@ void XTreeSplit::SplitLeafNode(TreeType *tree,std::vector<bool>& relevels)
     for (size_t i = 0; i < tree->Count(); i++)
     {
       if (i < bestAreaIndexOnBestAxis + tree->MinLeafSize())
-        treeOne->InsertPoint(tree->Point(sorted[i].n));
+        treeOne->InsertPoint(tree->Point(sorted[i].second));
       else
-        treeTwo->InsertPoint(tree->Point(sorted[i].n));
+        treeTwo->InsertPoint(tree->Point(sorted[i].second));
     }
   }
   else
@@ -246,9 +240,9 @@ void XTreeSplit::SplitLeafNode(TreeType *tree,std::vector<bool>& relevels)
     for (size_t i = 0; i < tree->Count(); i++)
     {
       if (i < bestOverlapIndexOnBestAxis + tree->MinLeafSize())
-        treeOne->InsertPoint(tree->Point(sorted[i].n));
+        treeOne->InsertPoint(tree->Point(sorted[i].second));
       else
-        treeTwo->InsertPoint(tree->Point(sorted[i].n));
+        treeTwo->InsertPoint(tree->Point(sorted[i].second));
     }
   }
 
@@ -303,6 +297,7 @@ bool XTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels)
 {
   // Convenience typedef.
   typedef typename TreeType::ElemType ElemType;
+  typedef bound::HRectBound<metric::EuclideanDistance, ElemType> BoundType;
 
   // If we are splitting the root node, we need will do things differently so
   // that the constructor and other methods don't confuse the end user by giving
@@ -388,14 +383,19 @@ bool XTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels)
     ElemType axisScore = 0.0;
 
     // We'll do Bound().Lo() now and use Bound().Hi() later.
-    std::vector<sortStruct<ElemType>> sorted(tree->NumChildren());
+    std::vector<std::pair<ElemType, size_t>> sorted(tree->NumChildren());
     for (size_t i = 0; i < sorted.size(); i++)
     {
-      sorted[i].d = tree->Child(i).Bound()[j].Lo();
-      sorted[i].n = i;
+      sorted[i].first = tree->Child(i).Bound()[j].Lo();
+      sorted[i].second = i;
     }
 
-    std::sort(sorted.begin(), sorted.end(), structComp<ElemType>);
+    std::sort(sorted.begin(), sorted.end(),
+        [] (const std::pair<ElemType, size_t>& p1,
+            const std::pair<ElemType, size_t>& p2)
+        {
+          return p1.first < p2.first;
+        });
 
     // We'll store each of the three scores for each distribution.
     std::vector<ElemType> areas(tree->MaxNumChildren() -
@@ -418,48 +418,23 @@ bool XTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels)
       // another.  Then we calculate the three scores for that distribution.
 
       size_t cutOff = tree->MinNumChildren() + i;
-      // We'll calculate the max and min in each dimension by hand to save time.
-      std::vector<ElemType> maxG1(tree->Bound().Dim());
-      std::vector<ElemType> minG1(maxG1.size());
-      std::vector<ElemType> maxG2(maxG1.size());
-      std::vector<ElemType> minG2(maxG1.size());
-      for (size_t k = 0; k < tree->Bound().Dim(); k++)
-      {
-        minG1[k] = tree->Child(sorted[0].n).Bound()[k].Lo();
-        maxG1[k] = tree->Child(sorted[0].n).Bound()[k].Hi();
-        minG2[k] =
-            tree->Child(sorted[sorted.size() - 1].n).Bound()[k].Lo();
-        maxG2[k] =
-            tree->Child(sorted[sorted.size() - 1].n).Bound()[k].Hi();
-        for (size_t l = 1; l < tree->NumChildren() - 1; l++)
-        {
-          if (l < cutOff)
-          {
-            if (tree->Child(sorted[l].n).Bound()[k].Lo() < minG1[k])
-              minG1[k] = tree->Child(sorted[l].n).Bound()[k].Lo();
-            else if (tree->Child(sorted[l].n).Bound()[k].Hi() > maxG1[k])
-              maxG1[k] = tree->Child(sorted[l].n).Bound()[k].Hi();
-          }
-          else
-          {
-            if (tree->Child(sorted[l].n).Bound()[k].Lo() < minG2[k])
-              minG2[k] = tree->Child(sorted[l].n).Bound()[k].Lo();
-            else if (tree->Child(sorted[l].n).Bound()[k].Hi() > maxG2[k])
-              maxG2[k] = tree->Child(sorted[l].n).Bound()[k].Hi();
-          }
-        }
-      }
 
-      ElemType area1 = 1.0, area2 = 1.0;
-      ElemType oArea = 1.0;
-      for (size_t k = 0; k < maxG1.size(); k++)
-      {
-        margins[i] += maxG1[k] - minG1[k] + maxG2[k] - minG2[k];
-        area1 *= maxG1[k] - minG1[k];
-        area2 *= maxG2[k] - minG2[k];
-        oArea *= ((maxG1[k] < minG2[k]) || (maxG2[k] < minG1[k])) ? 0.0 :
-            std::min(maxG1[k], maxG2[k]) - std::max(minG1[k], minG2[k]);
-      }
+      BoundType bound1(tree->Bound().Dim());
+      BoundType bound2(tree->Bound().Dim());
+
+      for (size_t l = 0; l < cutOff; l++)
+        bound1 |= tree->Child(sorted[l].second).Bound();
+
+      for (size_t l = cutOff; l < tree->NumChildren(); l++)
+        bound2 |= tree->Child(sorted[l].second).Bound();
+
+      ElemType area1 = bound1.Volume();
+      ElemType area2 = bound2.Volume();
+      ElemType oArea = bound1.Overlap(bound2);
+
+      for (size_t k = 0; k < bound1.Dim(); k++)
+        margins[i] += bound1[k].Width() + bound2[k].Width();
+
       areas[i] += area1 + area2;
       overlapedAreas[i] += oArea;
       axisScore += margins[i];
@@ -518,15 +493,19 @@ bool XTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels)
   {
     ElemType axisScore = 0.0;
 
-    // We'll do Bound().Lo() now and use Bound().Hi() later.
-    std::vector<sortStruct<ElemType>> sorted(tree->NumChildren());
+    std::vector<std::pair<ElemType, size_t>> sorted(tree->NumChildren());
     for (size_t i = 0; i < sorted.size(); i++)
     {
-      sorted[i].d = tree->Child(i).Bound()[j].Hi();
-      sorted[i].n = i;
+      sorted[i].first = tree->Child(i).Bound()[j].Hi();
+      sorted[i].second = i;
     }
 
-    std::sort(sorted.begin(), sorted.end(), structComp<ElemType>);
+    std::sort(sorted.begin(), sorted.end(),
+        [] (const std::pair<ElemType, size_t>& p1,
+            const std::pair<ElemType, size_t>& p2)
+        {
+          return p1.first < p2.first;
+        });
 
     // We'll store each of the three scores for each distribution.
     std::vector<ElemType> areas(tree->MaxNumChildren() -
@@ -549,46 +528,23 @@ bool XTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels)
       // another.  Then we calculate the three scores for that distribution.
 
       size_t cutOff = tree->MinNumChildren() + i;
-      // We'll calculate the max and min in each dimension by hand to save time.
-      std::vector<ElemType> maxG1(tree->Bound().Dim());
-      std::vector<ElemType> minG1(maxG1.size());
-      std::vector<ElemType> maxG2(maxG1.size());
-      std::vector<ElemType> minG2(maxG1.size());
-      for (size_t k = 0; k < tree->Bound().Dim(); k++)
-      {
-        minG1[k] = tree->Child(sorted[0].n).Bound()[k].Lo();
-        maxG1[k] = tree->Child(sorted[0].n).Bound()[k].Hi();
-        minG2[k] = tree->Child(sorted[sorted.size() - 1].n).Bound()[k].Lo();
-        maxG2[k] = tree->Child(sorted[sorted.size() - 1].n).Bound()[k].Hi();
-        for (size_t l = 1; l < tree->NumChildren() - 1; l++)
-        {
-          if (l < cutOff)
-          {
-            if (tree->Child(sorted[l].n).Bound()[k].Lo() < minG1[k])
-              minG1[k] = tree->Child(sorted[l].n).Bound()[k].Lo();
-            else if (tree->Child(sorted[l].n).Bound()[k].Hi() > maxG1[k])
-              maxG1[k] = tree->Child(sorted[l].n).Bound()[k].Hi();
-          }
-          else
-          {
-            if (tree->Child(sorted[l].n).Bound()[k].Lo() < minG2[k])
-              minG2[k] = tree->Child(sorted[l].n).Bound()[k].Lo();
-            else if (tree->Child(sorted[l].n).Bound()[k].Hi() > maxG2[k])
-              maxG2[k] = tree->Child(sorted[l].n).Bound()[k].Hi();
-          }
-        }
-      }
 
-      ElemType area1 = 1.0, area2 = 1.0;
-      ElemType oArea = 1.0;
-      for (size_t k = 0; k < maxG1.size(); k++)
-      {
-        margins[i] += maxG1[k] - minG1[k] + maxG2[k] - minG2[k];
-        area1 *= maxG1[k] - minG1[k];
-        area2 *= maxG2[k] - minG2[k];
-        oArea *= ((maxG1[k] < minG2[k]) || (maxG2[k] < minG1[k])) ? 0.0 :
-            std::min(maxG1[k], maxG2[k]) - std::max(minG1[k], minG2[k]);
-      }
+      BoundType bound1(tree->Bound().Dim());
+      BoundType bound2(tree->Bound().Dim());
+
+      for (size_t l = 0; l < cutOff; l++)
+        bound1 |= tree->Child(sorted[l].second).Bound();
+
+      for (size_t l = cutOff; l < tree->NumChildren(); l++)
+        bound2 |= tree->Child(sorted[l].second).Bound();
+
+      ElemType area1 = bound1.Volume();
+      ElemType area2 = bound2.Volume();
+      ElemType oArea = bound1.Overlap(bound2);
+
+      for (size_t k = 0; k < bound1.Dim(); k++)
+        margins[i] += bound1[k].Width() + bound2[k].Width();
+
 
       areas[i] += area1 + area2;
       overlapedAreas[i] += oArea;
@@ -645,25 +601,30 @@ bool XTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels)
     }
   }
 
-  std::vector<sortStruct<ElemType>> sorted(tree->NumChildren());
+  std::vector<std::pair<ElemType, size_t>> sorted(tree->NumChildren());
   if (lowIsBest)
   {
     for (size_t i = 0; i < sorted.size(); i++)
     {
-      sorted[i].d = tree->Child(i).Bound()[bestAxis].Lo();
-      sorted[i].n = i;
+      sorted[i].first = tree->Child(i).Bound()[bestAxis].Lo();
+      sorted[i].second = i;
     }
   }
   else
   {
     for (size_t i = 0; i < sorted.size(); i++)
     {
-      sorted[i].d = tree->Child(i).Bound()[bestAxis].Hi();
-      sorted[i].n = i;
+      sorted[i].first = tree->Child(i).Bound()[bestAxis].Hi();
+      sorted[i].second = i;
     }
   }
 
-  std::sort(sorted.begin(), sorted.end(), structComp<ElemType>);
+  std::sort(sorted.begin(), sorted.end(),
+      [] (const std::pair<ElemType, size_t>& p1,
+          const std::pair<ElemType, size_t>& p2)
+      {
+        return p1.first < p2.first;
+      });
 
   TreeType* treeOne = new TreeType(tree->Parent(), tree->MaxNumChildren());
   TreeType* treeTwo = new TreeType(tree->Parent(), tree->MaxNumChildren());
@@ -677,9 +638,9 @@ bool XTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels)
       for (size_t i = 0; i < tree->NumChildren(); i++)
       {
         if (i < bestAreaIndexOnBestAxis + tree->MinNumChildren())
-          InsertNodeIntoTree(treeOne, tree->children[sorted[i].n]);
+          InsertNodeIntoTree(treeOne, tree->children[sorted[i].second]);
         else
-          InsertNodeIntoTree(treeTwo, tree->children[sorted[i].n]);
+          InsertNodeIntoTree(treeTwo, tree->children[sorted[i].second]);
       }
     }
     else
@@ -692,9 +653,9 @@ bool XTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels)
       for (size_t i = 0; i < tree->NumChildren(); i++)
       {
         if (i < bestOverlapIndexOnBestAxis + tree->MinNumChildren())
-          InsertNodeIntoTree(treeOne, tree->children[sorted[i].n]);
+          InsertNodeIntoTree(treeOne, tree->children[sorted[i].second]);
         else
-          InsertNodeIntoTree(treeTwo, tree->children[sorted[i].n]);
+          InsertNodeIntoTree(treeTwo, tree->children[sorted[i].second]);
       }
     }
     else
@@ -710,30 +671,36 @@ bool XTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels)
     if ((minOverlapSplitDimension != tree->Bound().Dim()) &&
         (bestScoreMinOverlapSplit / areaOfBestMinOverlapSplit < MAX_OVERLAP))
     {
-      std::vector<sortStruct<ElemType>> sorted2(tree->NumChildren());
+      std::vector<std::pair<ElemType, size_t>> sorted2(tree->NumChildren());
       if (minOverlapSplitUsesHi)
       {
         for (size_t i = 0; i < sorted2.size(); i++)
         {
-          sorted2[i].d = tree->Child(i).Bound()[bestAxis].Hi();
-          sorted2[i].n = i;
+          sorted2[i].first = tree->Child(i).Bound()[bestAxis].Hi();
+          sorted2[i].second = i;
         }
       }
       else
       {
         for (size_t i = 0; i < sorted2.size(); i++)
         {
-          sorted2[i].d = tree->Child(i).Bound()[bestAxis].Lo();
-          sorted2[i].n = i;
+          sorted2[i].first = tree->Child(i).Bound()[bestAxis].Lo();
+          sorted2[i].second = i;
         }
       }
-      std::sort(sorted2.begin(), sorted2.end(), structComp<ElemType>);
+      std::sort(sorted2.begin(), sorted2.end(),
+          [] (const std::pair<ElemType, size_t>& p1,
+              const std::pair<ElemType, size_t>& p2)
+          {
+            return p1.first < p2.first;
+          });
+
       for (size_t i = 0; i < tree->NumChildren(); i++)
       {
         if (i < bestIndexMinOverlapSplit + tree->MinNumChildren())
-          InsertNodeIntoTree(treeOne, tree->children[sorted[i].n]);
+          InsertNodeIntoTree(treeOne, tree->children[sorted2[i].second]);
         else
-          InsertNodeIntoTree(treeTwo, tree->children[sorted[i].n]);
+          InsertNodeIntoTree(treeTwo, tree->children[sorted2[i].second]);
       }
     }
     else




More information about the mlpack-git mailing list