[mlpack-svn] r13320 - mlpack/trunk/src/mlpack/methods/det
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Thu Aug 2 17:40:35 EDT 2012
Author: rcurtin
Date: 2012-08-02 17:40:35 -0400 (Thu, 02 Aug 2012)
New Revision: 13320
Modified:
mlpack/trunk/src/mlpack/methods/det/det_main.cpp
mlpack/trunk/src/mlpack/methods/det/dt_utils.cpp
mlpack/trunk/src/mlpack/methods/det/dtree.cpp
Log:
Fix DET so it actually works. A few things here and there needed to be changed.
Modified: mlpack/trunk/src/mlpack/methods/det/det_main.cpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/det/det_main.cpp 2012-08-02 16:57:42 UTC (rev 13319)
+++ mlpack/trunk/src/mlpack/methods/det/det_main.cpp 2012-08-02 21:40:35 UTC (rev 13320)
@@ -171,7 +171,13 @@
data::Load(labelsFile, labels, true);
- size_t numClasses = max(max(labels));
+ size_t numClasses = 0;
+ for (size_t i = 0; i < labels.n_elem; ++i)
+ {
+ if (labels[i] > numClasses)
+ numClasses = labels[i];
+ }
+
Log::Info << numClasses << " found in labels file '" << labelsFile << "'."
<< std::endl;
Modified: mlpack/trunk/src/mlpack/methods/det/dt_utils.cpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/det/dt_utils.cpp 2012-08-02 16:57:42 UTC (rev 13319)
+++ mlpack/trunk/src/mlpack/methods/det/dt_utils.cpp 2012-08-02 21:40:35 UTC (rev 13320)
@@ -150,7 +150,6 @@
{
std::pair<double, double> treeSeq(oldAlpha,
dtree->SubtreeLeavesLogNegError());
- Log::Debug << "sllne " << dtree->SubtreeLeavesLogNegError() << std::endl;
prunedSequence.push_back(treeSeq);
oldAlpha = alpha;
alpha = dtree->PruneAndUpdate(oldAlpha, dataset.n_cols, useVolumeReg);
@@ -174,6 +173,9 @@
arma::mat cvData(dataset);
size_t testSize = dataset.n_cols / folds;
+ std::vector<double> regularizationConstants;
+ regularizationConstants.resize(prunedSequence.size(), 0);
+
// Go through each fold.
for (size_t fold = 0; fold < folds; fold++)
{
@@ -213,25 +215,22 @@
// Sequentially prune with all the values of available alphas and adding
// values for test values.
- std::vector<std::pair<double, double> >::iterator it;
- for (it = prunedSequence.begin(); it < prunedSequence.end() - 2; ++it)
+ for (size_t i = 0; i < prunedSequence.size() - 2; ++i)
{
// Compute test values for this state of the tree.
double cvVal = 0.0;
- for (size_t i = 0; i < test.n_cols; i++)
+ for (size_t j = 0; j < test.n_cols; j++)
{
- arma::vec testPoint = test.unsafe_col(i);
+ arma::vec testPoint = test.unsafe_col(j);
cvVal += cvDTree->ComputeValue(testPoint);
}
- // Update the cv error value by mapping out of log-space then back into
- // it, using long doubles.
- long double notLogVal = -std::exp((long double) it->second) -
- 2.0 * cvVal / (double) dataset.n_cols;
- it->second = (double) std::log(-notLogVal);
+ // Update the cv regularization constant.
+ regularizationConstants[i] += 2.0 * cvVal / (double) dataset.n_cols;
// Determine the new alpha value and prune accordingly.
- oldAlpha = sqrt(((it + 1)->first) * ((it + 2)->first));
+ oldAlpha = 0.5 * (prunedSequence[i + 1].first +
+ prunedSequence[i + 2].first);
alpha = cvDTree->PruneAndUpdate(oldAlpha, train.n_cols, useVolumeReg);
}
@@ -243,25 +242,27 @@
cvVal += cvDTree->ComputeValue(testPoint);
}
- // Update the cv error value.
- long double notLogVal = -std::exp((long double) it->second) -
- 2.0 * cvVal / (double) dataset.n_cols;
- it->second -= (double) std::log(-notLogVal);
+ regularizationConstants[prunedSequence.size() - 2] += 2.0 * cvVal /
+ (double) dataset.n_cols;
test.reset();
delete cvDTree;
}
double optimalAlpha = -1.0;
- double cvBestError = std::numeric_limits<double>::max();
- std::vector<std::pair<double, double> >::iterator it;
+ long double cvBestError = -std::numeric_limits<long double>::max();
- for (it = prunedSequence.begin(); it < prunedSequence.end() -1; ++it)
+ for (size_t i = 0; i < prunedSequence.size() - 1; ++i)
{
- if (it->second < cvBestError)
+ // We can no longer work in the log-space for this because we have no
+ // guarantee the quantity will be positive.
+ long double thisError = -std::exp((long double) prunedSequence[i].second) +
+ (long double) regularizationConstants[i];
+
+ if (thisError > cvBestError)
{
- cvBestError = it->second;
- optimalAlpha = it->first;
+ cvBestError = thisError;
+ optimalAlpha = prunedSequence[i].first;
}
}
@@ -278,12 +279,12 @@
newDataset = dataset;
// Grow the tree.
- oldAlpha = 0.0;
+ oldAlpha = -DBL_MAX;
alpha = dtreeOpt->Grow(newDataset, oldFromNew, useVolumeReg, maxLeafSize,
minLeafSize);
// Prune with optimal alpha.
- while ((oldAlpha > optimalAlpha) && (dtreeOpt->SubtreeLeaves() > 1))
+ while ((oldAlpha < optimalAlpha) && (dtreeOpt->SubtreeLeaves() > 1))
{
oldAlpha = alpha;
alpha = dtreeOpt->PruneAndUpdate(oldAlpha, newDataset.n_cols, useVolumeReg);
@@ -291,7 +292,7 @@
// Some sanity checks.
Log::Assert((alpha < std::numeric_limits<double>::max()) ||
(dtreeOpt->SubtreeLeaves() == 1));
- Log::Assert(alpha < oldAlpha);
+ Log::Assert(alpha > oldAlpha);
}
Log::Info << dtreeOpt->SubtreeLeaves() << " leaf nodes in the optimally "
Modified: mlpack/trunk/src/mlpack/methods/det/dtree.cpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/det/dtree.cpp 2012-08-02 16:57:42 UTC (rev 13319)
+++ mlpack/trunk/src/mlpack/methods/det/dtree.cpp 2012-08-02 21:40:35 UTC (rev 13320)
@@ -436,11 +436,13 @@
else
gT = alphaUpper - std::log(subtreeLeaves - 1);
- if (gT < oldAlpha)
+// Log::Debug << "gT is " << gT << " oldAlpha is " << oldAlpha << std::endl;
+
+ if (gT > oldAlpha)
{
// Go down the tree and update accordingly. Traverse the children.
- double leftG = left->PruneAndUpdate(oldAlpha, useVolReg);
- double rightG = right->PruneAndUpdate(oldAlpha, useVolReg);
+ double leftG = left->PruneAndUpdate(oldAlpha, points, useVolReg);
+ double rightG = right->PruneAndUpdate(oldAlpha, points, useVolReg);
// Update values.
subtreeLeaves = left->SubtreeLeaves() + right->SubtreeLeaves();
@@ -472,6 +474,8 @@
double tmpAlphaSum = leftPow / leftRatio + rightPow / rightRatio -
thisPow;
+// Log::Debug << "tmpAlphaSum is " << tmpAlphaSum;
+
if (left->SubtreeLeaves() > 1)
{
const double exponent = 2 * std::log(points) + logVolume +
@@ -482,6 +486,8 @@
tmpAlphaSum += std::exp(exponent);
}
+// Log::Debug << " then " << tmpAlphaSum;
+
if (right->SubtreeLeaves() > 1)
{
const double exponent = 2 * std::log(points) + logVolume +
@@ -490,8 +496,12 @@
tmpAlphaSum += std::exp(exponent);
}
+// Log::Debug << " then " << tmpAlphaSum << std::endl;
+
alphaUpper = std::log(tmpAlphaSum) - 2 * std::log(points) - logVolume;
+// Log::Debug << "alphaUpper is " << alphaUpper << std::endl;
+
// Update gT value.
if (useVolReg)
{
@@ -503,8 +513,12 @@
gT = alphaUpper - std::log(subtreeLeaves - 1);
}
- assert(gT < std::numeric_limits<double>::max());
+// Log::Debug << "and gT is " << gT << std::endl;
+ Log::Assert(gT < std::numeric_limits<double>::max());
+
+// Log::Debug << "gT " << gT << " leftG " << leftG << " rightG " << rightG
+// << std::endl;
return std::min(gT, std::min(leftG, rightG));
}
else
More information about the mlpack-svn
mailing list