[mlpack-svn] r15311 - mlpack/trunk/src/mlpack/methods/rann

fastlab-svn at coffeetalk-1.cc.gatech.edu fastlab-svn at coffeetalk-1.cc.gatech.edu
Tue Jun 25 12:19:47 EDT 2013


Author: rcurtin
Date: 2013-06-25 12:19:47 -0400 (Tue, 25 Jun 2013)
New Revision: 15311

Modified:
   mlpack/trunk/src/mlpack/methods/rann/ra_search_rules.hpp
   mlpack/trunk/src/mlpack/methods/rann/ra_search_rules_impl.hpp
Log:
Fix dangling spaces and use Log::Assert() not assert().


Modified: mlpack/trunk/src/mlpack/methods/rann/ra_search_rules.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/rann/ra_search_rules.hpp	2013-06-25 14:53:04 UTC (rev 15310)
+++ mlpack/trunk/src/mlpack/methods/rann/ra_search_rules.hpp	2013-06-25 16:19:47 UTC (rev 15311)
@@ -3,8 +3,8 @@
  * @author Parikshit Ram
  *
  * Defines the pruning rules and base case rules necessary to perform a
- * tree-based rank-approximate search (with an arbitrary tree) 
- * for the RASearch class. 
+ * tree-based rank-approximate search (with an arbitrary tree) for the RASearch
+ * class.
  */
 #ifndef __MLPACK_METHODS_NEIGHBOR_SEARCH_RA_SEARCH_RULES_HPP
 #define __MLPACK_METHODS_NEIGHBOR_SEARCH_RA_SEARCH_RULES_HPP
@@ -33,17 +33,17 @@
   double BaseCase(const size_t queryIndex, const size_t referenceIndex);
 
   /**
-   * TOFIX: This function is specified for the cover tree (usually) so 
-   *   I need to think about it more algorithmically and keep its 
+   * TOFIX: This function is specified for the cover tree (usually) so
+   *   I need to think about it more algorithmically and keep its
    *   implementation mostly empty.
    *   Also, since the access to the points in the subtree of a cover tree
    *   is non-trivial, we might have to re-work this.
-   *   FOR NOW: I am just using as for a BSP-tree, I will fix it when 
+   *   FOR NOW: I am just using as for a BSP-tree, I will fix it when
    *   we figure out cover trees.
    *
    */
 
-  double Prescore(TreeType& queryNode, 
+  double Prescore(TreeType& queryNode,
                   TreeType& referenceNode,
                   TreeType& referenceChildNode,
                   const double baseCaseResult) const;
@@ -58,17 +58,17 @@
    * Get the score for recursion order.  A low score indicates priority for
    * recursion, while DBL_MAX indicates that the node should not be recursed
    * into at all (it should be pruned).
-   * 
-   * For rank-approximation, the scoring function first checks if pruning 
-   * by distance is possible. 
-   *   If yes, then the node is given the score of 
-   *   'DBL_MAX' and the expected number of samples from that node are 
-   *   added to the number of samples made for the query. 
    *
-   *   If no, then the function tries to see if the node can be pruned by 
+   * For rank-approximation, the scoring function first checks if pruning
+   * by distance is possible.
+   *   If yes, then the node is given the score of
+   *   'DBL_MAX' and the expected number of samples from that node are
+   *   added to the number of samples made for the query.
+   *
+   *   If no, then the function tries to see if the node can be pruned by
    *   approximation. If number of samples required from this node is small
    *   enough, then that number of samples are acquired from this node
-   *   and the score is set to be 'DBL_MAX'. 
+   *   and the score is set to be 'DBL_MAX'.
    *
    *   If the pruning by approximation is not possible either, the algorithm
    *   continues with the usual tree-traversal.
@@ -82,17 +82,17 @@
    * Get the score for recursion order.  A low score indicates priority for
    * recursion, while DBL_MAX indicates that the node should not be recursed
    * into at all (it should be pruned).
-   * 
-   * For rank-approximation, the scoring function first checks if pruning 
-   * by distance is possible. 
-   *   If yes, then the node is given the score of 
-   *   'DBL_MAX' and the expected number of samples from that node are 
-   *   added to the number of samples made for the query. 
    *
-   *   If no, then the function tries to see if the node can be pruned by 
+   * For rank-approximation, the scoring function first checks if pruning
+   * by distance is possible.
+   *   If yes, then the node is given the score of
+   *   'DBL_MAX' and the expected number of samples from that node are
+   *   added to the number of samples made for the query.
+   *
+   *   If no, then the function tries to see if the node can be pruned by
    *   approximation. If number of samples required from this node is small
    *   enough, then that number of samples are acquired from this node
-   *   and the score is set to be 'DBL_MAX'. 
+   *   and the score is set to be 'DBL_MAX'.
    *
    *   If the pruning by approximation is not possible either, the algorithm
    *   continues with the usual tree-traversal.
@@ -114,10 +114,10 @@
    * bound.
    *
    * For rank-approximation, it also checks if the number of samples left
-   * for a query to satisfy the rank constraint is small enough at this 
-   * point of the algorithm, then this node is approximated by sampling 
+   * for a query to satisfy the rank constraint is small enough at this
+   * point of the algorithm, then this node is approximated by sampling
    * and given a new score of 'DBL_MAX'.
-   * 
+   *
    * @param queryIndex Index of query point.
    * @param referenceNode Candidate node to be recursed into.
    * @param oldScore Old score produced by Score() (or Rescore()).
@@ -130,16 +130,16 @@
    * Get the score for recursion order.  A low score indicates priority for
    * recursionm while DBL_MAX indicates that the node should not be recursed
    * into at all (it should be pruned).
-   * 
-   * For the rank-approximation, we check if the referenceNode can be 
-   * approximated by sampling. If it can be, enough samples are made for 
+   *
+   * For the rank-approximation, we check if the referenceNode can be
+   * approximated by sampling. If it can be, enough samples are made for
    * every query in the queryNode. No further query-tree traversal is
-   * performed. 
-   * 
-   * The 'NumSamplesMade' query stat is propagated up the tree. And then 
+   * performed.
+   *
+   * The 'NumSamplesMade' query stat is propagated up the tree. And then
    * if pruning occurs (by distance or by sampling), the 'NumSamplesMade'
    * stat is not propagated down the tree. If no pruning occurs, the
-   * stat is propagated down the tree. 
+   * stat is propagated down the tree.
    *
    * @param queryNode Candidate query node to recurse into.
    * @param referenceNode Candidate reference node to recurse into.
@@ -151,16 +151,16 @@
    * situation where it may be needed to calculate the recursion order).  A low
    * score indicates priority for recursion, while DBL_MAX indicates that the
    * node should not be recursed into at all (it should be pruned).
-   * 
-   * For the rank-approximation, we check if the referenceNode can be 
-   * approximated by sampling. If it can be, enough samples are made for 
+   *
+   * For the rank-approximation, we check if the referenceNode can be
+   * approximated by sampling. If it can be, enough samples are made for
    * every query in the queryNode. No further query-tree traversal is
-   * performed. 
-   * 
-   * The 'NumSamplesMade' query stat is propagated up the tree. And then 
+   * performed.
+   *
+   * The 'NumSamplesMade' query stat is propagated up the tree. And then
    * if pruning occurs (by distance or by sampling), the 'NumSamplesMade'
    * stat is not propagated down the tree. If no pruning occurs, the
-   * stat is propagated down the tree. 
+   * stat is propagated down the tree.
    *
    * @param queryNode Candidate query node to recurse into.
    * @param referenceNode Candidate reference node to recurse into.
@@ -172,21 +172,21 @@
 
   /**
    * Re-evaluate the score for recursion order.  A low score indicates priority
-   * for recursion, while DBL_MAX indicates that the node should not be 
+   * for recursion, while DBL_MAX indicates that the node should not be
    * recursed into at all (it should be pruned).  This is used when the score
-   * has already been calculated, but another recursion may have modified the 
+   * has already been calculated, but another recursion may have modified the
    * bounds for pruning.  So the old score is checked against the new pruning
    * bound.
-   * 
-   * For the rank-approximation, we check if the referenceNode can be 
-   * approximated by sampling. If it can be, enough samples are made for 
+   *
+   * For the rank-approximation, we check if the referenceNode can be
+   * approximated by sampling. If it can be, enough samples are made for
    * every query in the queryNode. No further query-tree traversal is
-   * performed. 
-   * 
-   * The 'NumSamplesMade' query stat is propagated up the tree. And then 
+   * performed.
+   *
+   * The 'NumSamplesMade' query stat is propagated up the tree. And then
    * if pruning occurs (by distance or by sampling), the 'NumSamplesMade'
    * stat is not propagated down the tree. If no pruning occurs, the
-   * stat is propagated down the tree. 
+   * stat is propagated down the tree.
    *
    * @param queryNode Candidate query node to recurse into.
    * @param referenceNode Candidate reference node to recurse into.
@@ -198,12 +198,12 @@
 
 
   size_t NumDistComputations() { return numDistComputations; }
-  size_t NumEffectiveSamples() 
-  { 
+  size_t NumEffectiveSamples()
+  {
     if (numSamplesMade.n_elem == 0)
       return 0;
-    else 
-      return arma::sum(numSamplesMade); 
+    else
+      return arma::sum(numSamplesMade);
   }
 
  private:
@@ -258,10 +258,10 @@
                       const size_t neighbor,
                       const double distance);
 
-  /** 
+  /**
    * Compute the minimum number of samples required to guarantee
    * the given rank-approximation and success probability.
-   * 
+   *
    * @param n Size of the set to be sampled from.
    * @param k The number of neighbors required within the rank-approximation.
    * @param tau The rank-approximation in percentile of the data.
@@ -273,7 +273,7 @@
                             const double alpha) const;
 
   /**
-   * Compute the success probability of obtaining 'k'-neighbors from a 
+   * Compute the success probability of obtaining 'k'-neighbors from a
    * set of size 'n' within the top 't' neighbors if 'm' samples are made.
    *
    * @param n Size of the set being sampled from.
@@ -287,8 +287,8 @@
                             const size_t t) const;
 
   /**
-   * Pick up desired number of samples (with replacement) from a given range 
-   * of integers so that only the distinct samples are returned from 
+   * Pick up desired number of samples (with replacement) from a given range
+   * of integers so that only the distinct samples are returned from
    * the range [0 - specified upper bound)
    *
    * @param numSamples Number of random samples.

Modified: mlpack/trunk/src/mlpack/methods/rann/ra_search_rules_impl.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/rann/ra_search_rules_impl.hpp	2013-06-25 14:53:04 UTC (rev 15310)
+++ mlpack/trunk/src/mlpack/methods/rann/ra_search_rules_impl.hpp	2013-06-25 16:19:47 UTC (rev 15311)
@@ -34,7 +34,7 @@
   sampleAtLeaves(sampleAtLeaves),
   firstLeafExact(firstLeafExact),
   singleSampleLimit(singleSampleLimit)
-{ 
+{
   Timer::Start("computing_number_of_samples_reqd");
   numSamplesReqd = MinimumSamplesReqd(referenceSet.n_cols, neighbors.n_rows,
                                       tau, alpha);
@@ -45,7 +45,7 @@
   numDistComputations = 0;
   samplingRatio = (double) numSamplesReqd / (double) referenceSet.n_cols;
 
-  Log::Info << "Minimum Samples Required per-query: " << numSamplesReqd << 
+  Log::Info << "Minimum Samples Required per-query: " << numSamplesReqd <<
     ", Sampling Ratio: " << samplingRatio << std::endl;
 
   if (naive) // no tree traversal going to happen, just do naive sampling here.
@@ -54,7 +54,7 @@
     for (size_t i = 0; i < querySet.n_cols; ++i)
     {
       arma::uvec distinctSamples;
-      ObtainDistinctSamples(numSamplesReqd, referenceSet.n_cols, 
+      ObtainDistinctSamples(numSamplesReqd, referenceSet.n_cols,
                             distinctSamples);
       for (size_t j = 0; j < distinctSamples.n_elem; j++)
         BaseCase(i, (size_t) distinctSamples[j]);
@@ -98,21 +98,21 @@
   size_t t = (size_t) std::ceil(tau * (double) n / 100.0);
 
   double prob;
-  assert(alpha <= 1.0);
+  Log::Assert(alpha <= 1.0);
 
   // going through all values of sample sizes
   // to find the minimum samples required to satisfy the
   // desired bound
   bool done = false;
 
-  // This performs a binary search on the integer values between 'lb = k' 
+  // This performs a binary search on the integer values between 'lb = k'
   // and 'ub = n' to find the minimum number of samples 'm' required to obtain
   // the desired success probability 'alpha'.
-  do 
+  do
   {
     prob = SuccessProbability(n, k, m, t);
 
-    if (prob > alpha) 
+    if (prob > alpha)
     {
       if (prob - alpha < 0.001 || ub < lb + 2) {
         done = true;
@@ -120,20 +120,20 @@
       }
       else
         ub = m;
-    } 
-    else 
+    }
+    else
     {
-      if (prob < alpha) 
+      if (prob < alpha)
       {
-        if (m == lb) 
+        if (m == lb)
         {
           m++;
           continue;
         }
-        else 
+        else
           lb = m;
-      } 
-      else 
+      }
+      else
       {
         done = true;
         break;
@@ -154,7 +154,7 @@
                    const size_t m,
                    const size_t t) const
 {
-  if (k == 1) 
+  if (k == 1)
   {
     if (m > n - t)
       return 1.0;
@@ -175,13 +175,13 @@
     double eps = (double) t / (double) n;
     double sum = 0.0;
 
-    // The probability that 'k' of the 'm' samples lie within the top 't' 
+    // The probability that 'k' of the 'm' samples lie within the top 't'
     // of the neighbors is given by:
     // sum_{j = k}^m Choose(m, j) (t/n)^j (1 - t/n)^{m - j}
-    // which is also equal to 
+    // which is also equal to
     // 1 - sum_{j = 0}^{k - 1} Choose(m, j) (t/n)^j (1 - t/n)^{m - j}
     //
-    // So this is a m - k term summation or a k term summation. So if 
+    // So this is a m - k term summation or a k term summation. So if
     // m > 2k, do the k term summation, otherwise do the m term summation.
 
     size_t lb;
@@ -215,7 +215,7 @@
 
     for (size_t j = lb; j < ub; j++)
     {
-      // compute Choose(m, j) 
+      // compute Choose(m, j)
       double mCj = (double) m;
       size_t jTrans;
 
@@ -226,13 +226,13 @@
       else
         jTrans = m - j;
 
-      for(size_t i = 2; i <= jTrans; i++) 
+      for(size_t i = 2; i <= jTrans; i++)
       {
         mCj *= (double) (m - (i - 1));
         mCj /= (double) i;
       }
 
-      sum += (mCj * std::pow(eps, (double) j) 
+      sum += (mCj * std::pow(eps, (double) j)
               * std::pow(1.0 - eps, (double) (m - j)));
     }
 
@@ -245,7 +245,7 @@
 
 
 template<typename SortPolicy, typename MetricType, typename TreeType>
-inline force_inline 
+inline force_inline
 double RASearchRules<SortPolicy, MetricType, TreeType>::
 BaseCase(const size_t queryIndex, const size_t referenceIndex)
 {
@@ -281,7 +281,7 @@
          TreeType& referenceNode,
          TreeType& referenceChildNode,
          const double baseCaseResult) const
-{ 
+{
   return 0.0;
 }
 
@@ -292,7 +292,7 @@
           TreeType& queryChildNode,
           TreeType& referenceNode,
           const double baseCaseResult) const
-{ 
+{
   return 0.0;
 }
 
@@ -307,8 +307,8 @@
   const double bestDistance = distances(distances.n_rows - 1, queryIndex);
 
 
-  // If this is better than the best distance we've seen so far, 
-  // maybe there will be something down this node. 
+  // If this is better than the best distance we've seen so far,
+  // maybe there will be something down this node.
   // Also check if enough samples are already made for this query.
   if (SortPolicy::IsBetter(distance, bestDistance)
       && numSamplesMade[queryIndex] < numSamplesReqd)
@@ -316,14 +316,14 @@
     // We cannot prune this node
     // Try approximating this node by sampling
 
-    // If you are required to visit the first leaf (to find possible 
+    // If you are required to visit the first leaf (to find possible
     // duplicates), make sure you do not approximate.
     if (numSamplesMade[queryIndex] > 0 || !firstLeafExact)
     {
       // check if this node can be approximated by sampling
-      size_t samplesReqd = 
+      size_t samplesReqd =
         (size_t) std::ceil(samplingRatio * (double) referenceNode.Count());
-      samplesReqd = std::min(samplesReqd, 
+      samplesReqd = std::min(samplesReqd,
                              numSamplesReqd - numSamplesMade[queryIndex]);
 
       if (samplesReqd > singleSampleLimit && !referenceNode.IsLeaf())
@@ -345,7 +345,7 @@
             // so no book-keeping is required here.
             BaseCase(queryIndex,
                      referenceNode.Begin() + (size_t) distinctSamples[i]);
-          
+
           // Node approximated so we can prune it
           return DBL_MAX;
         }
@@ -362,11 +362,11 @@
               // function so no book-keeping is required here.
               BaseCase(queryIndex,
                        referenceNode.Begin() + (size_t) distinctSamples[i]);
-          
+
             // (Leaf) Node approximated so can prune it
             return DBL_MAX;
           }
-          else 
+          else
           {
             // not allowed to sample from leaves, so cannot prune.
             return distance;
@@ -383,19 +383,19 @@
   }
   else
   {
-    // Either there cannot be anything better in this node. 
-    // Or enough number of samples are already made. 
+    // Either there cannot be anything better in this node.
+    // Or enough number of samples are already made.
     // So prune it.
 
-    // add 'fake' samples from this node; fake because the distances to 
+    // add 'fake' samples from this node; fake because the distances to
     // these samples need not be computed.
 
-    // If enough samples already made, this step does not change the 
+    // If enough samples already made, this step does not change the
     // result of the search.
-    numSamplesMade[queryIndex] += 
+    numSamplesMade[queryIndex] +=
       (size_t) std::floor(samplingRatio * (double) referenceNode.Count());
 
-    return DBL_MAX; 
+    return DBL_MAX;
   } // if can-prune
 } // Score(point, node)
 
@@ -403,7 +403,7 @@
 template<typename SortPolicy, typename MetricType, typename TreeType>
 inline double RASearchRules<SortPolicy, MetricType, TreeType>::
 Score(const size_t queryIndex,
-      TreeType& referenceNode, 
+      TreeType& referenceNode,
       const double baseCaseResult)
 {
 
@@ -412,11 +412,11 @@
       &referenceNode, baseCaseResult);
   const double bestDistance = distances(distances.n_rows - 1, queryIndex);
 
-  // Hereon, this 'Score' function is exactly same as the previous 
+  // Hereon, this 'Score' function is exactly same as the previous
   // 'Score' function.
 
-  // If this is better than the best distance we've seen so far, 
-  // maybe there will be something down this node. 
+  // If this is better than the best distance we've seen so far,
+  // maybe there will be something down this node.
   // Also check if enough samples are already made for this query.
   if (SortPolicy::IsBetter(distance, bestDistance)
       && numSamplesMade[queryIndex] < numSamplesReqd)
@@ -424,16 +424,16 @@
     // We cannot prune this node
     // Try approximating this node by sampling
 
-    // If you are required to visit the first leaf (to find possible 
+    // If you are required to visit the first leaf (to find possible
     // duplicates), make sure you do not approximate.
     if (numSamplesMade[queryIndex] > 0 || !firstLeafExact)
     {
       // Check if this node can be approximated by sampling:
       // 'referenceNode.Count() should correspond to the number of points
       // present in this subtree.
-      size_t samplesReqd = 
+      size_t samplesReqd =
         (size_t) std::ceil(samplingRatio * (double) referenceNode.Count());
-      samplesReqd = std::min(samplesReqd, 
+      samplesReqd = std::min(samplesReqd,
                              numSamplesReqd - numSamplesMade[queryIndex]);
 
       if (samplesReqd > singleSampleLimit && !referenceNode.IsLeaf())
@@ -447,7 +447,7 @@
         {
           // Then samplesReqd <= singleSampleLimit.
           // Hence approximate node by sampling enough number of points
-          // from this subtree. 
+          // from this subtree.
           arma::uvec distinctSamples;
           ObtainDistinctSamples(samplesReqd, referenceNode.Count(),
                                 distinctSamples);
@@ -456,7 +456,7 @@
             // function so no book-keeping is required here.
             BaseCase(queryIndex,
                      referenceNode.Begin() + (size_t) distinctSamples[i]);
-          
+
           // Node approximated so we can prune it
           return DBL_MAX;
         }
@@ -473,11 +473,11 @@
               // function so no book-keeping is required here.
               BaseCase(queryIndex,
                        referenceNode.Begin() + (size_t) distinctSamples[i]);
-          
+
             // (Leaf) Node approximated so can prune it
             return DBL_MAX;
           }
-          else 
+          else
           {
             // not allowed to sample from leaves, so cannot prune.
             return distance;
@@ -493,22 +493,22 @@
   }
   else
   {
-    // Either there cannot be anything better in this node. 
-    // Or enough number of samples are already made. 
+    // Either there cannot be anything better in this node.
+    // Or enough number of samples are already made.
     // So prune it.
 
-    // add 'fake' samples from this node; fake because the distances to 
+    // add 'fake' samples from this node; fake because the distances to
     // these samples need not be computed.
 
-    // If enough samples already made, this step does not change the 
+    // If enough samples already made, this step does not change the
     // result of the search.
     if (numSamplesMade[queryIndex] < numSamplesReqd)
-      // add 'fake' samples from this node; fake because the distances to 
+      // add 'fake' samples from this node; fake because the distances to
       // these samples need not be computed.
-      numSamplesMade[queryIndex] += 
+      numSamplesMade[queryIndex] +=
         (size_t) std::floor(samplingRatio * (double) referenceNode.Count());
 
-    return DBL_MAX; 
+    return DBL_MAX;
   } // if can-prune
 
   return (SortPolicy::IsBetter(distance, bestDistance)) ? distance : DBL_MAX;
@@ -519,7 +519,7 @@
 template<typename SortPolicy, typename MetricType, typename TreeType>
 inline double RASearchRules<SortPolicy, MetricType, TreeType>::
 Rescore(const size_t queryIndex,
-        TreeType& referenceNode, 
+        TreeType& referenceNode,
         const double oldScore)
 {
   // If we are already pruning, still prune.
@@ -529,8 +529,8 @@
   // Just check the score again against the distances.
   const double bestDistance = distances(distances.n_rows - 1, queryIndex);
 
-  // If this is better than the best distance we've seen so far, 
-  // maybe there will be something down this node. 
+  // If this is better than the best distance we've seen so far,
+  // maybe there will be something down this node.
   // Also check if enough samples are already made for this query.
   if (SortPolicy::IsBetter(oldScore, bestDistance)
       && numSamplesMade[queryIndex] < numSamplesReqd)
@@ -538,15 +538,15 @@
     // We cannot prune this node
     // Try approximating this node by sampling
 
-    // Here we assume that since we are re-scoring, the algorithm 
-    // has already sampled some candidates, and if specified, also 
-    // traversed to the first leaf. 
-    // So no checks regarding that is made any more. 
+    // Here we assume that since we are re-scoring, the algorithm
+    // has already sampled some candidates, and if specified, also
+    // traversed to the first leaf.
+    // So no checks regarding that is made any more.
     //
     // check if this node can be approximated by sampling
-    size_t samplesReqd = 
+    size_t samplesReqd =
       (size_t) std::ceil(samplingRatio * (double) referenceNode.Count());
-    samplesReqd = std::min(samplesReqd, 
+    samplesReqd = std::min(samplesReqd,
                            numSamplesReqd - numSamplesMade[queryIndex]);
 
     if (samplesReqd > singleSampleLimit && !referenceNode.IsLeaf())
@@ -568,7 +568,7 @@
           // function so no book-keeping is required here.
           BaseCase(queryIndex,
                    referenceNode.Begin() + (size_t) distinctSamples[i]);
-          
+
         // Node approximated so we can prune it
         return DBL_MAX;
       }
@@ -585,11 +585,11 @@
             // function so no book-keeping is required here.
             BaseCase(queryIndex,
                      referenceNode.Begin() + (size_t) distinctSamples[i]);
-          
+
           // (Leaf) Node approximated so can prune it
           return DBL_MAX;
         }
-        else 
+        else
         {
           // not allowed to sample from leaves, so cannot prune.
           return oldScore;
@@ -599,19 +599,19 @@
   }
   else
   {
-    // Either there cannot be anything better in this node. 
-    // Or enough number of samples are already made. 
+    // Either there cannot be anything better in this node.
+    // Or enough number of samples are already made.
     // So prune it.
 
-    // add 'fake' samples from this node; fake because the distances to 
+    // add 'fake' samples from this node; fake because the distances to
     // these samples need not be computed.
 
-    // If enough samples already made, this step does not change the 
+    // If enough samples already made, this step does not change the
     // result of the search.
-    numSamplesMade[queryIndex] += 
+    numSamplesMade[queryIndex] +=
       (size_t) std::floor(samplingRatio * (double) referenceNode.Count());
 
-    return DBL_MAX; 
+    return DBL_MAX;
   } // if can-prune
 } // Rescore(point, node, oldScore)
 
@@ -620,7 +620,7 @@
 inline double RASearchRules<SortPolicy, MetricType, TreeType>::
 Score(TreeType& queryNode, TreeType& referenceNode)
 {
-  // First try to find the distance bound to check if we can prune 
+  // First try to find the distance bound to check if we can prune
   // by distance.
 
   // finding the best node-to-node distance
@@ -653,7 +653,7 @@
   // update the number of samples made for that node
   //  -- propagate up from child nodes if child nodes have made samples
   //     that the parent node is not aware of.
-  //     REMEMBER to propagate down samples made to the child nodes 
+  //     REMEMBER to propagate down samples made to the child nodes
   //     if 'queryNode' descend is deemed necessary.
 
   // only update from children if a non-leaf node obviously
@@ -669,18 +669,18 @@
         numSamplesMadeInChildNodes = numSamples;
     }
 
-    // The number of samples made for a node is propagated up from the 
+    // The number of samples made for a node is propagated up from the
     // child nodes if the child nodes have made samples that the parent
     // (which is the current 'queryNode') is not aware of.
-    queryNode.Stat().NumSamplesMade() 
-      = std::max(queryNode.Stat().NumSamplesMade(), 
+    queryNode.Stat().NumSamplesMade()
+      = std::max(queryNode.Stat().NumSamplesMade(),
                  numSamplesMadeInChildNodes);
   }
 
   // Now check if the node-pair interaction can be pruned
 
-  // If this is better than the best distance we've seen so far, 
-  // maybe there will be something down this node. 
+  // If this is better than the best distance we've seen so far,
+  // maybe there will be something down this node.
   // Also check if enough samples are already made for this 'queryNode'.
   if (SortPolicy::IsBetter(distance, bestDistance)
       && queryNode.Stat().NumSamplesMade() < numSamplesReqd)
@@ -688,14 +688,14 @@
     // We cannot prune this node
     // Try approximating this node by sampling
 
-    // If you are required to visit the first leaf (to find possible 
+    // If you are required to visit the first leaf (to find possible
     // duplicates), make sure you do not approximate.
     if (queryNode.Stat().NumSamplesMade() > 0 || !firstLeafExact)
     {
       // check if this node can be approximated by sampling
-      size_t samplesReqd = 
+      size_t samplesReqd =
         (size_t) std::ceil(samplingRatio * (double) referenceNode.Count());
-      samplesReqd 
+      samplesReqd
         = std::min(samplesReqd,
                    numSamplesReqd - queryNode.Stat().NumSamplesMade());
 
@@ -703,15 +703,15 @@
       {
         // if too many samples required and not at a leaf, then can't prune
 
-        // Since query tree descend is necessary now, 
+        // Since query tree descend is necessary now,
         // propagate the number of samples made down to the children
 
-        // Iterate through all children and propagate the number of 
+        // Iterate through all children and propagate the number of
         // samples made to the children.
         // Only update if the parent node has made samples the children
         // have not seen
-        for (size_t i = 0; i < queryNode.NumChildren(); i++) 
-          queryNode.Child(i).Stat().NumSamplesMade() 
+        for (size_t i = 0; i < queryNode.NumChildren(); i++)
+          queryNode.Child(i).Stat().NumSamplesMade()
             = std::max(queryNode.Stat().NumSamplesMade(),
                        queryNode.Child(i).Stat().NumSamplesMade());
 
@@ -724,7 +724,7 @@
           // Then samplesReqd <= singleSampleLimit.
           // Hence approximate node by sampling enough number of points
           // for every query in the 'queryNode'
-          for (size_t queryIndex = queryNode.Begin(); 
+          for (size_t queryIndex = queryNode.Begin();
                queryIndex < queryNode.End(); queryIndex++)
           {
             arma::uvec distinctSamples;
@@ -741,8 +741,8 @@
           // also update the number of sample made for the child nodes.
           queryNode.Stat().NumSamplesMade() += samplesReqd;
 
-          // since you are not going to descend down the query tree for this 
-          // referenceNode, there is no point updating the number of 
+          // since you are not going to descend down the query tree for this
+          // referenceNode, there is no point updating the number of
           // samples made for the child nodes of this queryNode.
 
           // Node approximated so we can prune it
@@ -754,7 +754,7 @@
           {
             // Approximate node by sampling enough number of points
             // for every query in the 'queryNode'.
-            for (size_t queryIndex = queryNode.Begin(); 
+            for (size_t queryIndex = queryNode.Begin();
                  queryIndex < queryNode.End(); queryIndex++)
             {
               arma::uvec distinctSamples;
@@ -771,22 +771,22 @@
             // also update the number of sample made for the child nodes.
             queryNode.Stat().NumSamplesMade() += samplesReqd;
 
-            // since you are not going to descend down the query tree for this 
-            // referenceNode, there is no point updating the number of 
+            // since you are not going to descend down the query tree for this
+            // referenceNode, there is no point updating the number of
             // samples made for the child nodes of this queryNode.
 
             // (Leaf) Node approximated so can prune it
             return DBL_MAX;
           }
-          else 
+          else
           {
             // Not allowed to sample from leaves, so cannot prune.
             // Propagate the number of samples made down to the children
-            
-            // Go through all children and propagate the number of 
+
+            // Go through all children and propagate the number of
             // samples made to the children.
-            for (size_t i = 0; i < queryNode.NumChildren(); i++) 
-              queryNode.Child(i).Stat().NumSamplesMade() 
+            for (size_t i = 0; i < queryNode.NumChildren(); i++)
+              queryNode.Child(i).Stat().NumSamplesMade()
                 = std::max(queryNode.Stat().NumSamplesMade(),
                            queryNode.Child(i).Stat().NumSamplesMade());
 
@@ -801,36 +801,36 @@
 
       // Propagate the number of samples made down to the children
 
-      // Go through all children and propagate the number of 
+      // Go through all children and propagate the number of
       // samples made to the children.
-      for (size_t i = 0; i < queryNode.NumChildren(); i++) 
-        queryNode.Child(i).Stat().NumSamplesMade() 
+      for (size_t i = 0; i < queryNode.NumChildren(); i++)
+        queryNode.Child(i).Stat().NumSamplesMade()
           = std::max(queryNode.Stat().NumSamplesMade(),
                      queryNode.Child(i).Stat().NumSamplesMade());
-      
+
       return distance;
     } // if first-leaf exact visit required
   }
   else
   {
-    // Either there cannot be anything better in this node. 
-    // Or enough number of samples are already made. 
+    // Either there cannot be anything better in this node.
+    // Or enough number of samples are already made.
     // So prune it.
 
-    // add 'fake' samples from this node; fake because the distances to 
+    // add 'fake' samples from this node; fake because the distances to
     // these samples need not be computed.
 
-    // If enough samples already made, this step does not change the 
-    // result of the search since this queryNode will never be 
+    // If enough samples already made, this step does not change the
+    // result of the search since this queryNode will never be
     // descended anymore.
-    queryNode.Stat().NumSamplesMade() += 
+    queryNode.Stat().NumSamplesMade() +=
       (size_t) std::floor(samplingRatio * (double) referenceNode.Count());
 
-    // since you are not going to descend down the query tree for this 
-    // reference node, there is no point updating the number of samples 
+    // since you are not going to descend down the query tree for this
+    // reference node, there is no point updating the number of samples
     // made for the child nodes of this queryNode.
 
-    return DBL_MAX; 
+    return DBL_MAX;
   } // if can-prune
 } // Score(node, node)
 
@@ -838,10 +838,10 @@
 template<typename SortPolicy, typename MetricType, typename TreeType>
 inline double RASearchRules<SortPolicy, MetricType, TreeType>::
 Score(TreeType& queryNode,
-      TreeType& referenceNode, 
+      TreeType& referenceNode,
       const double baseCaseResult)
 {
-  // First try to find the distance bound to check if we can prune 
+  // First try to find the distance bound to check if we can prune
   // by distance.
 
   // find the best node-to-node distance
@@ -874,7 +874,7 @@
   // update the number of samples made for that node
   //  -- propagate up from child nodes if child nodes have made samples
   //     that the parent node is not aware of.
-  //     REMEMBER to propagate down samples made to the child nodes 
+  //     REMEMBER to propagate down samples made to the child nodes
   //     if 'queryNode' descend is deemed necessary.
 
   // only update from children if a non-leaf node obviously
@@ -890,18 +890,18 @@
         numSamplesMadeInChildNodes = numSamples;
     }
 
-    // The number of samples made for a node is propagated up from the 
+    // The number of samples made for a node is propagated up from the
     // child nodes if the child nodes have made samples that the parent
     // (which is the current 'queryNode') is not aware of.
-    queryNode.Stat().NumSamplesMade() 
-      = std::max(queryNode.Stat().NumSamplesMade(), 
+    queryNode.Stat().NumSamplesMade()
+      = std::max(queryNode.Stat().NumSamplesMade(),
                  numSamplesMadeInChildNodes);
   }
 
   // Now check if the node-pair interaction can be pruned
 
-  // If this is better than the best distance we've seen so far, 
-  // maybe there will be something down this node. 
+  // If this is better than the best distance we've seen so far,
+  // maybe there will be something down this node.
   // Also check if enough samples are already made for this 'queryNode'.
   if (SortPolicy::IsBetter(distance, bestDistance)
       && queryNode.Stat().NumSamplesMade() < numSamplesReqd)
@@ -909,14 +909,14 @@
     // We cannot prune this node
     // Try approximating this node by sampling
 
-    // If you are required to visit the first leaf (to find possible 
+    // If you are required to visit the first leaf (to find possible
     // duplicates), make sure you do not approximate.
     if (queryNode.Stat().NumSamplesMade() > 0 || !firstLeafExact)
     {
       // check if this node can be approximated by sampling
-      size_t samplesReqd = 
+      size_t samplesReqd =
         (size_t) std::ceil(samplingRatio * (double) referenceNode.Count());
-      samplesReqd 
+      samplesReqd
         = std::min(samplesReqd,
                    numSamplesReqd - queryNode.Stat().NumSamplesMade());
 
@@ -924,15 +924,15 @@
       {
         // if too many samples required and not at a leaf, then can't prune
 
-        // Since query tree descend is necessary now, 
+        // Since query tree descend is necessary now,
         // propagate the number of samples made down to the children
 
-        // Iterate through all children and propagate the number of 
+        // Iterate through all children and propagate the number of
         // samples made to the children.
         // Only update if the parent node has made samples the children
         // have not seen
-        for (size_t i = 0; i < queryNode.NumChildren(); i++) 
-          queryNode.Child(i).Stat().NumSamplesMade() 
+        for (size_t i = 0; i < queryNode.NumChildren(); i++)
+          queryNode.Child(i).Stat().NumSamplesMade()
             = std::max(queryNode.Stat().NumSamplesMade(),
                        queryNode.Child(i).Stat().NumSamplesMade());
 
@@ -945,7 +945,7 @@
           // Then samplesReqd <= singleSampleLimit.
           // Hence approximate node by sampling enough number of points
           // for every query in the 'queryNode'
-          for (size_t queryIndex = queryNode.Begin(); 
+          for (size_t queryIndex = queryNode.Begin();
                queryIndex < queryNode.End(); queryIndex++)
           {
             arma::uvec distinctSamples;
@@ -962,8 +962,8 @@
           // also update the number of sample made for the child nodes.
           queryNode.Stat().NumSamplesMade() += samplesReqd;
 
-          // since you are not going to descend down the query tree for this 
-          // referenceNode, there is no point updating the number of 
+          // since you are not going to descend down the query tree for this
+          // referenceNode, there is no point updating the number of
           // samples made for the child nodes of this queryNode.
 
           // Node approximated so we can prune it
@@ -975,7 +975,7 @@
           {
             // Approximate node by sampling enough number of points
             // for every query in the 'queryNode'.
-            for (size_t queryIndex = queryNode.Begin(); 
+            for (size_t queryIndex = queryNode.Begin();
                  queryIndex < queryNode.End(); queryIndex++)
             {
               arma::uvec distinctSamples;
@@ -992,22 +992,22 @@
             // also update the number of sample made for the child nodes.
             queryNode.Stat().NumSamplesMade() += samplesReqd;
 
-            // since you are not going to descend down the query tree for this 
-            // referenceNode, there is no point updating the number of 
+            // since you are not going to descend down the query tree for this
+            // referenceNode, there is no point updating the number of
             // samples made for the child nodes of this queryNode.
 
             // (Leaf) Node approximated so can prune it
             return DBL_MAX;
           }
-          else 
+          else
           {
             // Not allowed to sample from leaves, so cannot prune.
             // Propagate the number of samples made down to the children
-            
-            // Go through all children and propagate the number of 
+
+            // Go through all children and propagate the number of
             // samples made to the children.
-            for (size_t i = 0; i < queryNode.NumChildren(); i++) 
-              queryNode.Child(i).Stat().NumSamplesMade() 
+            for (size_t i = 0; i < queryNode.NumChildren(); i++)
+              queryNode.Child(i).Stat().NumSamplesMade()
                 = std::max(queryNode.Stat().NumSamplesMade(),
                            queryNode.Child(i).Stat().NumSamplesMade());
 
@@ -1022,36 +1022,36 @@
 
       // Propagate the number of samples made down to the children
 
-      // Go through all children and propagate the number of 
+      // Go through all children and propagate the number of
       // samples made to the children.
-      for (size_t i = 0; i < queryNode.NumChildren(); i++) 
-        queryNode.Child(i).Stat().NumSamplesMade() 
+      for (size_t i = 0; i < queryNode.NumChildren(); i++)
+        queryNode.Child(i).Stat().NumSamplesMade()
           = std::max(queryNode.Stat().NumSamplesMade(),
                      queryNode.Child(i).Stat().NumSamplesMade());
-      
+
       return distance;
     } // if first-leaf exact visit required
   }
   else
   {
-    // Either there cannot be anything better in this node. 
-    // Or enough number of samples are already made. 
+    // Either there cannot be anything better in this node.
+    // Or enough number of samples are already made.
     // So prune it.
 
-    // add 'fake' samples from this node; fake because the distances to 
+    // add 'fake' samples from this node; fake because the distances to
     // these samples need not be computed.
 
-    // If enough samples already made, this step does not change the 
-    // result of the search since this queryNode will never be 
+    // If enough samples already made, this step does not change the
+    // result of the search since this queryNode will never be
     // descended anymore.
-    queryNode.Stat().NumSamplesMade() += 
+    queryNode.Stat().NumSamplesMade() +=
       (size_t) std::floor(samplingRatio * (double) referenceNode.Count());
 
-    // since you are not going to descend down the query tree for this 
-    // reference node, there is no point updating the number of samples 
+    // since you are not going to descend down the query tree for this
+    // reference node, there is no point updating the number of samples
     // made for the child nodes of this queryNode.
 
-    return DBL_MAX; 
+    return DBL_MAX;
   } // if can-prune
 } // Score(node, node, baseCaseResult)
 
@@ -1059,13 +1059,13 @@
 template<typename SortPolicy, typename MetricType, typename TreeType>
 inline double RASearchRules<SortPolicy, MetricType, TreeType>::
 Rescore(TreeType& queryNode,
-        TreeType& referenceNode, 
-        const double oldScore) 
+        TreeType& referenceNode,
+        const double oldScore)
 {
   if (oldScore == DBL_MAX)
     return oldScore;
 
-  // First try to find the distance bound to check if we can prune 
+  // First try to find the distance bound to check if we can prune
   // by distance.
   double pointBound = DBL_MAX;
   double childBound = DBL_MAX;
@@ -1094,8 +1094,8 @@
   // update the number of samples made for that node
   //  -- propagate up from child nodes if child nodes have made samples
   //     that the parent node is not aware of.
-  //     REMEMBER to propagate down samples made to the child nodes 
-  //     if the parent samples. 
+  //     REMEMBER to propagate down samples made to the child nodes
+  //     if the parent samples.
 
   // only update from children if a non-leaf node obviously
   if (!queryNode.IsLeaf())
@@ -1110,18 +1110,18 @@
         numSamplesMadeInChildNodes = numSamples;
     }
 
-    // The number of samples made for a node is propagated up from the 
+    // The number of samples made for a node is propagated up from the
     // child nodes if the child nodes have made samples that the parent
     // (which is the current 'queryNode') is not aware of.
-    queryNode.Stat().NumSamplesMade() 
-      = std::max(queryNode.Stat().NumSamplesMade(), 
+    queryNode.Stat().NumSamplesMade()
+      = std::max(queryNode.Stat().NumSamplesMade(),
                  numSamplesMadeInChildNodes);
   }
 
   // Now check if the node-pair interaction can be pruned by sampling
 
-  // If this is better than the best distance we've seen so far, 
-  // maybe there will be something down this node. 
+  // If this is better than the best distance we've seen so far,
+  // maybe there will be something down this node.
   // Also check if enough samples are already made for this query.
   if (SortPolicy::IsBetter(oldScore, bestDistance)
       && queryNode.Stat().NumSamplesMade() < numSamplesReqd)
@@ -1129,15 +1129,15 @@
     // We cannot prune this node
     // Try approximating this node by sampling
 
-    // Here we assume that since we are re-scoring, the algorithm 
-    // has already sampled some candidates, and if specified, also 
-    // traversed to the first leaf. 
-    // So no checks regarding that is made any more. 
+    // Here we assume that since we are re-scoring, the algorithm
+    // has already sampled some candidates, and if specified, also
+    // traversed to the first leaf.
+    // So no checks regarding that is made any more.
     //
     // check if this node can be approximated by sampling
-    size_t samplesReqd = 
+    size_t samplesReqd =
       (size_t) std::ceil(samplingRatio * (double) referenceNode.Count());
-    samplesReqd 
+    samplesReqd
       = std::min(samplesReqd,
                  numSamplesReqd - queryNode.Stat().NumSamplesMade());
 
@@ -1145,15 +1145,15 @@
     {
       // if too many samples required and not at a leaf, then can't prune
 
-      // Since query tree descend is necessary now, 
+      // Since query tree descend is necessary now,
       // propagate the number of samples made down to the children
 
-      // Go through all children and propagate the number of 
+      // Go through all children and propagate the number of
       // samples made to the children.
       // Only update if the parent node has made samples the children
       // have not seen
-      for (size_t i = 0; i < queryNode.NumChildren(); i++) 
-        queryNode.Child(i).Stat().NumSamplesMade() 
+      for (size_t i = 0; i < queryNode.NumChildren(); i++)
+        queryNode.Child(i).Stat().NumSamplesMade()
           = std::max(queryNode.Stat().NumSamplesMade(),
                      queryNode.Child(i).Stat().NumSamplesMade());
 
@@ -1166,7 +1166,7 @@
         // Then samplesReqd <= singleSampleLimit.
         // Hence approximate node by sampling enough number of points
         // for every query in the 'queryNode'
-        for (size_t queryIndex = queryNode.Begin(); 
+        for (size_t queryIndex = queryNode.Begin();
              queryIndex < queryNode.End(); queryIndex++)
         {
           arma::uvec distinctSamples;
@@ -1178,13 +1178,13 @@
             BaseCase(queryIndex,
                      referenceNode.Begin() + (size_t) distinctSamples[i]);
         }
-        
+
         // update the number of samples made for the queryNode and
         // also update the number of sample made for the child nodes.
         queryNode.Stat().NumSamplesMade() += samplesReqd;
 
-        // since you are not going to descend down the query tree for this 
-        // referenceNode, there is no point updating the number of 
+        // since you are not going to descend down the query tree for this
+        // referenceNode, there is no point updating the number of
         // samples made for the child nodes of this queryNode.
 
         // Node approximated so we can prune it
@@ -1196,7 +1196,7 @@
         {
           // Approximate node by sampling enough number of points
           // for every query in the 'queryNode'.
-          for (size_t queryIndex = queryNode.Begin(); 
+          for (size_t queryIndex = queryNode.Begin();
                queryIndex < queryNode.End(); queryIndex++)
           {
             arma::uvec distinctSamples;
@@ -1213,22 +1213,22 @@
           // also update the number of sample made for the child nodes.
           queryNode.Stat().NumSamplesMade() += samplesReqd;
 
-          // since you are not going to descend down the query tree for this 
-          // referenceNode, there is no point updating the number of 
+          // since you are not going to descend down the query tree for this
+          // referenceNode, there is no point updating the number of
           // samples made for the child nodes of this queryNode.
-          
+
           // (Leaf) Node approximated so can prune it
           return DBL_MAX;
         }
-        else 
+        else
         {
           // not allowed to sample from leaves, so cannot prune.
           // propagate the number of samples made down to the children
-            
-          // going through all children and propagate the number of 
+
+          // going through all children and propagate the number of
           // samples made to the children.
-          for (size_t i = 0; i < queryNode.NumChildren(); i++) 
-            queryNode.Child(i).Stat().NumSamplesMade() 
+          for (size_t i = 0; i < queryNode.NumChildren(); i++)
+            queryNode.Child(i).Stat().NumSamplesMade()
               = std::max(queryNode.Stat().NumSamplesMade(),
                          queryNode.Child(i).Stat().NumSamplesMade());
 
@@ -1239,24 +1239,24 @@
   }
   else
   {
-    // Either there cannot be anything better in this node. 
-    // Or enough number of samples are already made. 
+    // Either there cannot be anything better in this node.
+    // Or enough number of samples are already made.
     // So prune it.
 
-    // add 'fake' samples from this node; fake because the distances to 
+    // add 'fake' samples from this node; fake because the distances to
     // these samples need not be computed.
 
-    // If enough samples already made, this step does not change the 
-    // result of the search since this queryNode will never be 
+    // If enough samples already made, this step does not change the
+    // result of the search since this queryNode will never be
     // descended anymore.
-    queryNode.Stat().NumSamplesMade() += 
+    queryNode.Stat().NumSamplesMade() +=
       (size_t) std::floor(samplingRatio * (double) referenceNode.Count());
 
-    // since you are not going to descend down the query tree for this 
-    // reference node, there is no point updating the number of samples 
+    // since you are not going to descend down the query tree for this
+    // reference node, there is no point updating the number of samples
     // made for the child nodes of this queryNode.
 
-    return DBL_MAX; 
+    return DBL_MAX;
   } // if can-prune
 } // Rescore(node, node, oldScore)
 




More information about the mlpack-svn mailing list