[mlpack-svn] r13320 - mlpack/trunk/src/mlpack/methods/det

fastlab-svn at coffeetalk-1.cc.gatech.edu fastlab-svn at coffeetalk-1.cc.gatech.edu
Thu Aug 2 17:40:35 EDT 2012


Author: rcurtin
Date: 2012-08-02 17:40:35 -0400 (Thu, 02 Aug 2012)
New Revision: 13320

Modified:
   mlpack/trunk/src/mlpack/methods/det/det_main.cpp
   mlpack/trunk/src/mlpack/methods/det/dt_utils.cpp
   mlpack/trunk/src/mlpack/methods/det/dtree.cpp
Log:
Fix DET so it actually works.  A few things here and there needed to be changed.


Modified: mlpack/trunk/src/mlpack/methods/det/det_main.cpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/det/det_main.cpp	2012-08-02 16:57:42 UTC (rev 13319)
+++ mlpack/trunk/src/mlpack/methods/det/det_main.cpp	2012-08-02 21:40:35 UTC (rev 13320)
@@ -171,7 +171,13 @@
 
     data::Load(labelsFile, labels, true);
 
-    size_t numClasses = max(max(labels));
+    size_t numClasses = 0;
+    for (size_t i = 0; i < labels.n_elem; ++i)
+    {
+      if (labels[i] > numClasses)
+        numClasses = labels[i];
+    }
+
     Log::Info << numClasses << " found in labels file '" << labelsFile << "'."
         << std::endl;
 

Modified: mlpack/trunk/src/mlpack/methods/det/dt_utils.cpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/det/dt_utils.cpp	2012-08-02 16:57:42 UTC (rev 13319)
+++ mlpack/trunk/src/mlpack/methods/det/dt_utils.cpp	2012-08-02 21:40:35 UTC (rev 13320)
@@ -150,7 +150,6 @@
   {
     std::pair<double, double> treeSeq(oldAlpha,
         dtree->SubtreeLeavesLogNegError());
-    Log::Debug << "sllne " << dtree->SubtreeLeavesLogNegError() << std::endl;
     prunedSequence.push_back(treeSeq);
     oldAlpha = alpha;
     alpha = dtree->PruneAndUpdate(oldAlpha, dataset.n_cols, useVolumeReg);
@@ -174,6 +173,9 @@
   arma::mat cvData(dataset);
   size_t testSize = dataset.n_cols / folds;
 
+  std::vector<double> regularizationConstants;
+  regularizationConstants.resize(prunedSequence.size(), 0);
+
   // Go through each fold.
   for (size_t fold = 0; fold < folds; fold++)
   {
@@ -213,25 +215,22 @@
 
     // Sequentially prune with all the values of available alphas and adding
     // values for test values.
-    std::vector<std::pair<double, double> >::iterator it;
-    for (it = prunedSequence.begin(); it < prunedSequence.end() - 2; ++it)
+    for (size_t i = 0; i < prunedSequence.size() - 2; ++i)
     {
       // Compute test values for this state of the tree.
       double cvVal = 0.0;
-      for (size_t i = 0; i < test.n_cols; i++)
+      for (size_t j = 0; j < test.n_cols; j++)
       {
-        arma::vec testPoint = test.unsafe_col(i);
+        arma::vec testPoint = test.unsafe_col(j);
         cvVal += cvDTree->ComputeValue(testPoint);
       }
 
-      // Update the cv error value by mapping out of log-space then back into
-      // it, using long doubles.
-      long double notLogVal = -std::exp((long double) it->second) -
-          2.0 * cvVal / (double) dataset.n_cols;
-      it->second = (double) std::log(-notLogVal);
+      // Update the cv regularization constant.
+      regularizationConstants[i] += 2.0 * cvVal / (double) dataset.n_cols;
 
       // Determine the new alpha value and prune accordingly.
-      oldAlpha = sqrt(((it + 1)->first) * ((it + 2)->first));
+      oldAlpha = 0.5 * (prunedSequence[i + 1].first +
+          prunedSequence[i + 2].first);
       alpha = cvDTree->PruneAndUpdate(oldAlpha, train.n_cols, useVolumeReg);
     }
 
@@ -243,25 +242,27 @@
       cvVal += cvDTree->ComputeValue(testPoint);
     }
 
-    // Update the cv error value.
-    long double notLogVal = -std::exp((long double) it->second) -
-        2.0 * cvVal / (double) dataset.n_cols;
-    it->second -= (double) std::log(-notLogVal);
+    regularizationConstants[prunedSequence.size() - 2] += 2.0 * cvVal /
+        (double) dataset.n_cols;
 
     test.reset();
     delete cvDTree;
   }
 
   double optimalAlpha = -1.0;
-  double cvBestError = std::numeric_limits<double>::max();
-  std::vector<std::pair<double, double> >::iterator it;
+  long double cvBestError = -std::numeric_limits<long double>::max();
 
-  for (it = prunedSequence.begin(); it < prunedSequence.end() -1; ++it)
+  for (size_t i = 0; i < prunedSequence.size() - 1; ++i)
   {
-    if (it->second < cvBestError)
+    // We can no longer work in the log-space for this because we have no
+    // guarantee the quantity will be positive.
+    long double thisError = -std::exp((long double) prunedSequence[i].second) +
+        (long double) regularizationConstants[i];
+
+    if (thisError > cvBestError)
     {
-      cvBestError = it->second;
-      optimalAlpha = it->first;
+      cvBestError = thisError;
+      optimalAlpha = prunedSequence[i].first;
     }
   }
 
@@ -278,12 +279,12 @@
   newDataset = dataset;
 
   // Grow the tree.
-  oldAlpha = 0.0;
+  oldAlpha = -DBL_MAX;
   alpha = dtreeOpt->Grow(newDataset, oldFromNew, useVolumeReg, maxLeafSize,
       minLeafSize);
 
   // Prune with optimal alpha.
-  while ((oldAlpha > optimalAlpha) && (dtreeOpt->SubtreeLeaves() > 1))
+  while ((oldAlpha < optimalAlpha) && (dtreeOpt->SubtreeLeaves() > 1))
   {
     oldAlpha = alpha;
     alpha = dtreeOpt->PruneAndUpdate(oldAlpha, newDataset.n_cols, useVolumeReg);
@@ -291,7 +292,7 @@
     // Some sanity checks.
     Log::Assert((alpha < std::numeric_limits<double>::max()) ||
         (dtreeOpt->SubtreeLeaves() == 1));
-    Log::Assert(alpha < oldAlpha);
+    Log::Assert(alpha > oldAlpha);
   }
 
   Log::Info << dtreeOpt->SubtreeLeaves() << " leaf nodes in the optimally "

Modified: mlpack/trunk/src/mlpack/methods/det/dtree.cpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/det/dtree.cpp	2012-08-02 16:57:42 UTC (rev 13319)
+++ mlpack/trunk/src/mlpack/methods/det/dtree.cpp	2012-08-02 21:40:35 UTC (rev 13320)
@@ -436,11 +436,13 @@
     else
       gT = alphaUpper - std::log(subtreeLeaves - 1);
 
-    if (gT < oldAlpha)
+//    Log::Debug << "gT is " << gT << " oldAlpha is " << oldAlpha << std::endl;
+
+    if (gT > oldAlpha)
     {
       // Go down the tree and update accordingly.  Traverse the children.
-      double leftG = left->PruneAndUpdate(oldAlpha, useVolReg);
-      double rightG = right->PruneAndUpdate(oldAlpha, useVolReg);
+      double leftG = left->PruneAndUpdate(oldAlpha, points, useVolReg);
+      double rightG = right->PruneAndUpdate(oldAlpha, points, useVolReg);
 
       // Update values.
       subtreeLeaves = left->SubtreeLeaves() + right->SubtreeLeaves();
@@ -472,6 +474,8 @@
       double tmpAlphaSum = leftPow / leftRatio + rightPow / rightRatio -
           thisPow;
 
+//      Log::Debug << "tmpAlphaSum is " << tmpAlphaSum;
+
       if (left->SubtreeLeaves() > 1)
       {
         const double exponent = 2 * std::log(points) + logVolume +
@@ -482,6 +486,8 @@
         tmpAlphaSum += std::exp(exponent);
       }
 
+//      Log::Debug << " then " << tmpAlphaSum;
+
       if (right->SubtreeLeaves() > 1)
       {
         const double exponent = 2 * std::log(points) + logVolume +
@@ -490,8 +496,12 @@
         tmpAlphaSum += std::exp(exponent);
       }
 
+//      Log::Debug << " then " << tmpAlphaSum << std::endl;
+
       alphaUpper = std::log(tmpAlphaSum) - 2 * std::log(points) - logVolume;
 
+//      Log::Debug << "alphaUpper is " << alphaUpper << std::endl;
+
       // Update gT value.
       if (useVolReg)
       {
@@ -503,8 +513,12 @@
         gT = alphaUpper - std::log(subtreeLeaves - 1);
       }
 
-      assert(gT < std::numeric_limits<double>::max());
+//      Log::Debug << "and gT is " << gT << std::endl;
 
+      Log::Assert(gT < std::numeric_limits<double>::max());
+
+//      Log::Debug << "gT " << gT << " leftG " << leftG << " rightG " << rightG
+//          << std::endl;
       return std::min(gT, std::min(leftG, rightG));
     }
     else




More information about the mlpack-svn mailing list