[mlpack-svn] r16796 - in mlpack/trunk/src/mlpack: methods/decision_stump tests
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Wed Jul 9 15:19:52 EDT 2014
Author: saxena.udit
Date: Wed Jul 9 15:19:52 2014
New Revision: 16796
Log:
Entropy calculation improved.
Modified:
mlpack/trunk/src/mlpack/methods/decision_stump/decision_stump_impl.hpp
mlpack/trunk/src/mlpack/tests/decision_stump_test.cpp
Modified: mlpack/trunk/src/mlpack/methods/decision_stump/decision_stump_impl.hpp
==============================================================================
--- mlpack/trunk/src/mlpack/methods/decision_stump/decision_stump_impl.hpp (original)
+++ mlpack/trunk/src/mlpack/methods/decision_stump/decision_stump_impl.hpp Wed Jul 9 15:19:52 2014
@@ -35,10 +35,12 @@
bucketSize = inpBucketSize;
// If classLabels are not all identical, proceed with training.
- int bestAtt = -1;
+ int bestAtt = 0;
double entropy;
- double bestEntropy = -DBL_MAX;
-
+ double rootEntropy = CalculateEntropy<size_t>(labels.subvec(0,labels.n_elem-1));
+ // std::cout<<"rootEntropy is: "<<rootEntropy<<"\n";
+ // double bestEntropy = DBL_MAX;
+ double gain, bestGain = 0.0;
for (int i = 0; i < data.n_rows; i++)
{
// Go through each attribute of the data.
@@ -49,13 +51,18 @@
entropy = SetupSplitAttribute(data.row(i), labels);
Log::Debug << "Entropy for attribute " << i << " is " << entropy << ".\n";
-
+ gain = rootEntropy - entropy;
// Find the attribute with the best entropy so that the gain is
// maximized.
- if (entropy > bestEntropy)
+
+ // if (entropy < bestEntropy)
+ // Instead of the above rule, we are maximizing gain, which was
+ // what is returned from SetupSplitAttribute.
+ if (gain < bestGain)
{
bestAtt = i;
- bestEntropy = entropy;
+ // bestEntropy = entropy;
+ bestGain = gain;
}
}
}
@@ -380,6 +387,7 @@
entropy += (p1 == 0) ? 0 : p1 * log2(p1);
}
+
return entropy;
}
Modified: mlpack/trunk/src/mlpack/tests/decision_stump_test.cpp
==============================================================================
--- mlpack/trunk/src/mlpack/tests/decision_stump_test.cpp (original)
+++ mlpack/trunk/src/mlpack/tests/decision_stump_test.cpp Wed Jul 9 15:19:52 2014
@@ -221,7 +221,7 @@
BOOST_AUTO_TEST_CASE(DimensionSelectionTest)
{
const size_t numClasses = 2;
- const size_t inpBucketSize = 25;
+ const size_t inpBucketSize = 2500;
arma::mat dataset(4, 5000);
@@ -294,17 +294,35 @@
DecisionStump<> ds(dataset, labels, numClasses, inpBucketSize);
// Make sure it split on the dimension that is most separable.
- BOOST_REQUIRE_EQUAL(ds.SplitAttribute(), 1);
+ BOOST_CHECK_EQUAL(ds.SplitAttribute(), 1);
// Make sure every bin below -1 classifies as label 0, and every bin above 1
// classifies as label 1 (What happens in [-1, 1] isn't that big a deal.).
for (size_t i = 0; i < ds.Split().n_elem; ++i)
{
if (ds.Split()[i] <= -3.0)
- BOOST_REQUIRE_EQUAL(ds.BinLabels()[i], 0);
+ BOOST_CHECK_EQUAL(ds.BinLabels()[i], 0);
else if (ds.Split()[i] >= 3.0)
- BOOST_REQUIRE_EQUAL(ds.BinLabels()[i], 1);
+ BOOST_CHECK_EQUAL(ds.BinLabels()[i], 1);
}
}
+BOOST_AUTO_TEST_CASE(TempAttributeSplit)
+{
+ const size_t numClasses = 2;
+ const size_t inpBucketSize = 3;
+
+ mat trainingData;
+ trainingData << 1 << 1 << 1 << 2 << 2 << 2 << endr
+ << 0.5 << 0.6 << 0.7 << 0.4 << 0.3 << 0.5 << endr;
+
+ Mat<size_t> labelsIn;
+ labelsIn << 0 << 0 << 0 << 0 << 1 << 1 << 1;
+
+ DecisionStump<> ds(trainingData, labelsIn.row(0), numClasses, inpBucketSize);
+
+ // Row<size_t> predictedLabels(testingData.n_cols);
+ // ds.Classify(testingData, predictedLabels);
+ BOOST_CHECK_EQUAL(ds.SplitAttribute(), 0);
+}
BOOST_AUTO_TEST_SUITE_END();
More information about the mlpack-svn
mailing list