[mlpack-svn] r12196 - mlpack/trunk/src/mlpack/methods/det
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Wed Apr 4 12:46:33 EDT 2012
Author: pram
Date: 2012-04-04 12:46:32 -0400 (Wed, 04 Apr 2012)
New Revision: 12196
Modified:
mlpack/trunk/src/mlpack/methods/det/dt_main.cpp
mlpack/trunk/src/mlpack/methods/det/dt_utils.hpp
mlpack/trunk/src/mlpack/methods/det/dtree.hpp
mlpack/trunk/src/mlpack/methods/det/dtree_impl.hpp
Log:
DET naming schemes
Modified: mlpack/trunk/src/mlpack/methods/det/dt_main.cpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/det/dt_main.cpp 2012-04-04 16:15:29 UTC (rev 12195)
+++ mlpack/trunk/src/mlpack/methods/det/dt_main.cpp 2012-04-04 16:46:32 UTC (rev 12196)
@@ -9,6 +9,7 @@
#include "dt_utils.hpp"
using namespace mlpack;
+using namespace mlpack::det;
using namespace std;
PROGRAM_INFO("Density estimation with DET", "This program "
@@ -73,22 +74,10 @@
"variable importance of each feature "
"out on the command line.", "I");
-int main(int argc, char *argv[]) {
-
-
+int main(int argc, char *argv[])
+{
CLI::ParseCommandLine(argc, argv);
- DTree<>* test_pvt = new DTree<>();
- bool test_success = test_pvt->TestPrivateFunctions();
-
- if (test_success) {
- Log::Warn << "Private functions tests successful." << endl;
- } else {
- Log::Warn << "Private functions tests failed." << endl;
- }
-
- exit(0);
-
string train_set_file = CLI::GetParam<string>("S");
arma::Mat<float> training_data;
@@ -117,7 +106,7 @@
= CLI::GetParam<string>("u");
Timer::Start("DET/Training");
- DTree<float> *dtree_opt = dt_utils::Trainer<float>
+ DTree<float> *dtree_opt = Trainer<float>
(&training_data, folds, CLI::HasParam("R"), CLI::GetParam<int>("M"),
CLI::GetParam<int>("N"), unpruned_tree_estimate_file);
Timer::Stop("DET/Training");
@@ -221,14 +210,14 @@
assert(training_data.n_cols == labels.n_cols);
assert(labels.n_rows == 1);
- dt_utils::PrintLeafMembership<float>
+ PrintLeafMembership<float>
(dtree_opt, training_data, labels, num_classes,
(string) CLI::GetParam<string>("l"));
} // leaf class membership
if(CLI::HasParam("I")) {
- dt_utils::PrintVariableImportance<float>
+ PrintVariableImportance<float>
(dtree_opt, training_data.n_rows,
(string) CLI::GetParam<string>("i"));
} // print variable importance
Modified: mlpack/trunk/src/mlpack/methods/det/dt_utils.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/det/dt_utils.hpp 2012-04-04 16:15:29 UTC (rev 12195)
+++ mlpack/trunk/src/mlpack/methods/det/dt_utils.hpp 2012-04-04 16:46:32 UTC (rev 12196)
@@ -6,20 +6,19 @@
* different tasks with the Density Tree class.
*/
-#ifndef DT_UTILS_HPP
-#define DT_UTILS_HPP
+#ifndef __MLPACK_METHODS_DET_DT_UTILS_HPP
+#define __MLPACK_METHODS_DET_DT_UTILS_HPP
#include <string>
#include <mlpack/core.hpp>
#include "dtree.hpp"
-using namespace mlpack;
using namespace std;
+namespace mlpack {
+namespace det {
-namespace dt_utils {
-
template<typename eT>
void PrintLeafMembership(DTree<eT> *dtree,
const arma::Mat<eT>& data,
@@ -328,6 +327,7 @@
return dtree_opt;
} // Trainer
-}; // namespace dt_utils
+}; // namespace det
+}; // namespace mlpack
-#endif
+#endif // __MLPACK_METHODS_DET_DT_UTILS_HPP
Modified: mlpack/trunk/src/mlpack/methods/det/dtree.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/det/dtree.hpp 2012-04-04 16:15:29 UTC (rev 12195)
+++ mlpack/trunk/src/mlpack/methods/det/dtree.hpp 2012-04-04 16:46:32 UTC (rev 12196)
@@ -2,15 +2,13 @@
* @file dtree.hpp
* @author Parikshit Ram (pram at cc.gatech.edu)
*
- * Density Tree class
- *
+ * Density Estimation Tree class
*/
-#ifndef DTREE_HPP
-#define DTREE_HPP
+#ifndef __MLPACK_METHODS_DET_DTREE_HPP
+#define __MLPACK_METHODS_DET_DTREE_HPP
#include <assert.h>
-#include <vector>
#include <mlpack/core.hpp>
@@ -18,6 +16,9 @@
using namespace std;
+namespace mlpack {
+namespace det /** Density Estimation Trees */ {
+
// This two types in the template are used
// for two purposes:
// eT - the type to store the data in (for most practical
@@ -407,6 +408,9 @@
}; // Class DTree
+}; // namespace det
+}; // namespace mlpack
+
#include "dtree_impl.hpp"
-#endif
+#endif // __MLPACK_METHODS_DET_DTREE_HPP
Modified: mlpack/trunk/src/mlpack/methods/det/dtree_impl.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/det/dtree_impl.hpp 2012-04-04 16:15:29 UTC (rev 12195)
+++ mlpack/trunk/src/mlpack/methods/det/dtree_impl.hpp 2012-04-04 16:46:32 UTC (rev 12196)
@@ -3,17 +3,18 @@
* @author Parikshit Ram (pram at cc.gatech.edu)
*
* Implementations of some declared functions in
- * the Density Tree class.
+ * the Density Estimation Tree class.
*
*/
-#ifndef DTREE_IMPL_HPP
-#define DTREE_IMPL_HPP
+#ifndef __MLPACK_METHODS_DET_DTREE_IMPL_HPP
+#define __MLPACK_METHODS_DET_DTREE_IMPL_HPP
#include "dtree.hpp"
+namespace mlpack{
+namespace det {
-
// This function computes the l2-error of a given node
// from the formula - R(t) = -|t|^2 / (N^2 V_t)
template<typename eT, typename cT>
@@ -68,8 +69,6 @@
bool some_split_found = false;
size_t point_mass_in_dim = 0;
- // printf("In FindSplit %Lg\n", error_);fflush(NULL);
-
// loop through each dimension
for (size_t dim = 0; dim < max_vals_->n_elem; dim++) {
// have to deal with REAL, INTEGER, NOMINAL data
@@ -96,7 +95,6 @@
assert(std::exp(log_range_all_not_dim) > 0);
// get the values for the dimension
- // NEED TO CHECK: if this works correctly
RowVecType dim_val_vec = data->row(dim).subvec(start_, end_ - 1);
// sort the values in ascending order
@@ -104,11 +102,10 @@
// get ready to go through the sorted list and compute error
assert(dim_val_vec.n_elem > maxLeafSize);
- // enforcing the leaves to have a minimum of MIN_LEAF_SIZE
+ // enforcing the leaves to have a minimum
// number of points to avoid spikes
-
// one way of doing it is only considering splits resulting
- // in sizes > MIN_LEAF_SIZE
+ // in sizes > some constant (minLeafSize)
size_t left_child_size = minLeafSize - 1, right_child_size;
// finding the best split for this dimension
@@ -145,11 +142,6 @@
cT temp_l_error = -1.0 * std::exp(temp_log_neg_l_error);
-// = -1.0 * ((cT)(i + 1) / (cT)total_n)
-// * ((cT)(i + 1) / (cT)total_n)
-// / (std::exp(log_range_all_not_dim
-// + (cT) std::log(split - min)));
-
assert(std::abs(temp_l_error)
< std::numeric_limits<cT>::max());
@@ -164,11 +156,6 @@
cT temp_r_error = -1.0 * std::exp(temp_log_neg_r_error);
-// = -1.0 * ((cT) (n_t - i - 1) / (cT)total_n)
-// * ((cT) (n_t - i - 1) / (cT)total_n)
-// / (std::exp(log_range_all_not_dim
-// + (cT) std::log(max - split)));
-
assert(std::abs(temp_r_error)
< std::numeric_limits<cT>::max());
@@ -308,9 +295,6 @@
right_(NULL)
{
error_ = ComputeNodeError_(total_points);
- // if this assert fails, this implies that you need
- // a higher precision (or higher range) 'eT'
- assert(std::abs(error_) < std::numeric_limits<cT>::max());
bucket_tag_ = -1;
root_ = true;
@@ -438,9 +422,9 @@
&left_error, &right_error,
maxLeafSize, minLeafSize)) {
- // printf("Split found\n");fflush(NULL);
- // Split the data for the children
- // MatType data_l, data_r;
+ // Move the data around for the children
+ // to have points in a node lie contiguously
+ // (to increase efficiency during the training).
eT split_val, lsplit_val, rsplit_val;
SplitData_(data, dim, split_ind,
old_from_new, &split_val,
@@ -452,8 +436,8 @@
VecType* min_vals_l = new VecType(*min_vals_);
VecType* min_vals_r = new VecType(*min_vals_);
- (*max_vals_l)[dim] = split_val; // changed from just lsplit_val
- (*min_vals_r)[dim] = split_val; // changed from just rsplit_val
+ (*max_vals_l)[dim] = split_val;
+ (*min_vals_r)[dim] = split_val;
// store split dim and split val in the node
split_value_ = split_val;
@@ -482,15 +466,6 @@
subtree_leaves_v_t_inv_ = left_->subtree_leaves_v_t_inv()
+ right_->subtree_leaves_v_t_inv();
- // // storing the sum of the estimates (OF WHAT)
- // st_estimate_ = left_->st_estimate() + right_->st_estimate();
-
- // // storing del_f / del r(split_dim)
- // cT del_f = (ratio_ * v_t_inv_)
- // - (left_->ratio() * left_->v_t_inv());
- // cT del_r = max_vals_[split_dim_] - split_value_;
- // del_f_del_r_ = fabs(del_f / del_r);
-
// Forming T1 by removing leaves for which
// R(t) = R(t_L) + R(t_R)
if ((left_->subtree_leaves() == 1)
@@ -510,8 +485,6 @@
subtree_leaves_ = 1;
subtree_leaves_error_ = error_;
subtree_leaves_v_t_inv_ = v_t_inv_;
-// st_estimate_ = ratio_ * ratio_ * v_t_inv_;
-// del_f_del_r_ = 0.0;
} // end if-else
} else {
// We can make this a leaf node
@@ -520,11 +493,6 @@
subtree_leaves_error_ = error_;
subtree_leaves_v_t_inv_ = v_t_inv_;
- // // TO CHECK:
- // // if these are the density estimate
- // // it should be ratio_ * v_t_inv_
- // st_estimate_ = ratio_ * ratio_ * v_t_inv_;
- // del_f_del_r_ = 0.0;
} // end if-else
// if leaf do not compute g_k(t), else compute, store,
@@ -586,10 +554,6 @@
subtree_leaves_v_t_inv_ = left_->subtree_leaves_v_t_inv()
+ right_->subtree_leaves_v_t_inv();
- // // updating values for the sum of density estimates
- // st_estimate_
- // = left_->st_estimate() + right_->st_estimate();
-
// update g_t value
if (useVolReg) {
g_t = (error_ - subtree_leaves_error_)
@@ -613,24 +577,10 @@
}
} else { // prune this subtree
- // otherwise this should be equal to the alpha
- // for this node. So we check that:
- // assert(g_t == old_alpha, "Alpha != g(t) but less than!!");
-
- // // compute \del f_hat(x) / \del r(split_dim)
- // cT st_change_in_estimate
- // = st_estimate_ - (ratio_ * ratio_ * v_t_inv_);
-
- // printf("%lg:%lg Pruned %lg\n",
- // old_alpha, del_f_del_r_, st_change_in_estimate);
-
-
// making this node a leaf node
subtree_leaves_ = 1;
subtree_leaves_error_ = error_;
subtree_leaves_v_t_inv_ = v_t_inv_;
-// st_estimate_ = ratio_ * ratio_ * v_t_inv_;
-// del_f_del_r_ = 0.0;
delete left_;
left_ = NULL;
delete right_;
@@ -646,8 +596,8 @@
// bounding box of this node (check generally done
// at the root, so its the bounding box of the data)
//
-// Improvement: To open up the range with epsilons on
-// both sides where epsilon on the density near the boundary.
+// Future improvement: To open up the range with epsilons on
+// both sides where epsilon depends on the density near the boundary.
template<typename eT, typename cT>
bool DTree<eT, cT>::
WithinRange_(VecType* query)
@@ -679,7 +629,7 @@
else
if ((*query)[split_dim_] <= split_value_) // if left subtree
// go to left child
- return left_->ComputeValue(query); //, printer);
+ return left_->ComputeValue(query);
else // if right subtree
// go to right child
return right_->ComputeValue(query);
@@ -766,5 +716,7 @@
}
} // ComputeVariableImportance
+}; // namespace det
+}; // namespace mlpack
#endif
More information about the mlpack-svn
mailing list