[mlpack-svn] r12196 - mlpack/trunk/src/mlpack/methods/det

fastlab-svn at coffeetalk-1.cc.gatech.edu fastlab-svn at coffeetalk-1.cc.gatech.edu
Wed Apr 4 12:46:33 EDT 2012


Author: pram
Date: 2012-04-04 12:46:32 -0400 (Wed, 04 Apr 2012)
New Revision: 12196

Modified:
   mlpack/trunk/src/mlpack/methods/det/dt_main.cpp
   mlpack/trunk/src/mlpack/methods/det/dt_utils.hpp
   mlpack/trunk/src/mlpack/methods/det/dtree.hpp
   mlpack/trunk/src/mlpack/methods/det/dtree_impl.hpp
Log:
DET naming schemes

Modified: mlpack/trunk/src/mlpack/methods/det/dt_main.cpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/det/dt_main.cpp	2012-04-04 16:15:29 UTC (rev 12195)
+++ mlpack/trunk/src/mlpack/methods/det/dt_main.cpp	2012-04-04 16:46:32 UTC (rev 12196)
@@ -9,6 +9,7 @@
 #include "dt_utils.hpp"
 
 using namespace mlpack;
+using namespace mlpack::det;
 using namespace std;
 
 PROGRAM_INFO("Density estimation with DET", "This program "
@@ -73,22 +74,10 @@
 	   "variable importance of each feature "
 	   "out on the command line.", "I");
 
-int main(int argc, char *argv[]) {
-
-
+int main(int argc, char *argv[]) 
+{
   CLI::ParseCommandLine(argc, argv);
 
-  DTree<>* test_pvt = new DTree<>();
-  bool test_success = test_pvt->TestPrivateFunctions();
-
-  if (test_success) {
-    Log::Warn << "Private functions tests successful." << endl;
-  } else {
-    Log::Warn << "Private functions tests failed." << endl;
-  }
-
-  exit(0);
-
   string train_set_file = CLI::GetParam<string>("S");
   arma::Mat<float> training_data;
 
@@ -117,7 +106,7 @@
     = CLI::GetParam<string>("u");
 
   Timer::Start("DET/Training");
-  DTree<float> *dtree_opt = dt_utils::Trainer<float>
+  DTree<float> *dtree_opt = Trainer<float>
     (&training_data, folds, CLI::HasParam("R"), CLI::GetParam<int>("M"),
      CLI::GetParam<int>("N"), unpruned_tree_estimate_file);
   Timer::Stop("DET/Training");
@@ -221,14 +210,14 @@
     assert(training_data.n_cols == labels.n_cols);
     assert(labels.n_rows == 1);
 
-    dt_utils::PrintLeafMembership<float>
+    PrintLeafMembership<float>
       (dtree_opt, training_data, labels, num_classes,
        (string) CLI::GetParam<string>("l"));
   } // leaf class membership
   
 
   if(CLI::HasParam("I")) {
-    dt_utils::PrintVariableImportance<float>
+    PrintVariableImportance<float>
       (dtree_opt, training_data.n_rows,
        (string) CLI::GetParam<string>("i"));
   } // print variable importance

Modified: mlpack/trunk/src/mlpack/methods/det/dt_utils.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/det/dt_utils.hpp	2012-04-04 16:15:29 UTC (rev 12195)
+++ mlpack/trunk/src/mlpack/methods/det/dt_utils.hpp	2012-04-04 16:46:32 UTC (rev 12196)
@@ -6,20 +6,19 @@
  * different tasks with the Density Tree class.
  */
 
-#ifndef DT_UTILS_HPP
-#define DT_UTILS_HPP
+#ifndef __MLPACK_METHODS_DET_DT_UTILS_HPP
+#define __MLPACK_METHODS_DET_DT_UTILS_HPP
 
 #include <string>
 
 #include <mlpack/core.hpp>
 #include "dtree.hpp"
 
-using namespace mlpack;
 using namespace std;
 
+namespace mlpack {
+namespace det {
 
-namespace dt_utils {
-
   template<typename eT>
   void PrintLeafMembership(DTree<eT> *dtree,
 			   const arma::Mat<eT>& data,
@@ -328,6 +327,7 @@
     return dtree_opt;
   } // Trainer
 
-}; // namespace dt_utils
+}; // namespace det
+}; // namespace mlpack
 
-#endif
+#endif // __MLPACK_METHODS_DET_DT_UTILS_HPP

Modified: mlpack/trunk/src/mlpack/methods/det/dtree.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/det/dtree.hpp	2012-04-04 16:15:29 UTC (rev 12195)
+++ mlpack/trunk/src/mlpack/methods/det/dtree.hpp	2012-04-04 16:46:32 UTC (rev 12196)
@@ -2,15 +2,13 @@
  * @file dtree.hpp
  * @author Parikshit Ram (pram at cc.gatech.edu)
  *
- * Density Tree class
- *
+ * Density Estimation Tree class
  */
 
-#ifndef DTREE_HPP
-#define DTREE_HPP
+#ifndef __MLPACK_METHODS_DET_DTREE_HPP
+#define __MLPACK_METHODS_DET_DTREE_HPP
 
 #include <assert.h>
-#include <vector>
 
 #include <mlpack/core.hpp>
 
@@ -18,6 +16,9 @@
 using namespace std;
 
 
+namespace mlpack {
+namespace det /** Density Estimation Trees */ {
+
 // This two types in the template are used 
 // for two purposes:
 // eT - the type to store the data in (for most practical 
@@ -407,6 +408,9 @@
   
 }; // Class DTree
 
+}; // namespace det
+}; // namespace mlpack
+
 #include "dtree_impl.hpp"
 
-#endif
+#endif // __MLPACK_METHODS_DET_DTREE_HPP

Modified: mlpack/trunk/src/mlpack/methods/det/dtree_impl.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/det/dtree_impl.hpp	2012-04-04 16:15:29 UTC (rev 12195)
+++ mlpack/trunk/src/mlpack/methods/det/dtree_impl.hpp	2012-04-04 16:46:32 UTC (rev 12196)
@@ -3,17 +3,18 @@
  * @author Parikshit Ram (pram at cc.gatech.edu)
  *
  * Implementations of some declared functions in 
- * the Density Tree class.
+ * the Density Estimation Tree class.
  *
  */
 
-#ifndef DTREE_IMPL_HPP
-#define DTREE_IMPL_HPP
+#ifndef __MLPACK_METHODS_DET_DTREE_IMPL_HPP
+#define __MLPACK_METHODS_DET_DTREE_IMPL_HPP
 
 #include "dtree.hpp"
 
+namespace mlpack{
+namespace det {
 
-
 // This function computes the l2-error of a given node
 // from the formula - R(t) = -|t|^2 / (N^2 V_t)
 template<typename eT, typename cT>
@@ -68,8 +69,6 @@
   bool some_split_found = false;
   size_t point_mass_in_dim = 0;
 
-  // printf("In FindSplit %Lg\n", error_);fflush(NULL);
-
   // loop through each dimension
   for (size_t dim = 0; dim < max_vals_->n_elem; dim++) {
     // have to deal with REAL, INTEGER, NOMINAL data
@@ -96,7 +95,6 @@
       assert(std::exp(log_range_all_not_dim) > 0);
 
       // get the values for the dimension
-      // NEED TO CHECK: if this works correctly
       RowVecType dim_val_vec = data->row(dim).subvec(start_, end_ - 1);
 
       // sort the values in ascending order
@@ -104,11 +102,10 @@
 
       // get ready to go through the sorted list and compute error
       assert(dim_val_vec.n_elem > maxLeafSize);
-      // enforcing the leaves to have a minimum of MIN_LEAF_SIZE 
+      // enforcing the leaves to have a minimum  
       // number of points to avoid spikes
-
       // one way of doing it is only considering splits resulting
-      // in sizes > MIN_LEAF_SIZE
+      // in sizes > some constant (minLeafSize)
       size_t left_child_size = minLeafSize - 1, right_child_size;
 
       // finding the best split for this dimension
@@ -145,11 +142,6 @@
 
 	    cT temp_l_error = -1.0 * std::exp(temp_log_neg_l_error);
 
-// 	      = -1.0 * ((cT)(i + 1) / (cT)total_n)
-// 	      * ((cT)(i + 1) / (cT)total_n)
-// 	      / (std::exp(log_range_all_not_dim 
-// 			  + (cT) std::log(split - min)));
-
 	    assert(std::abs(temp_l_error) 
 		   < std::numeric_limits<cT>::max());
 
@@ -164,11 +156,6 @@
 	    
 	    cT temp_r_error = -1.0 * std::exp(temp_log_neg_r_error);
 
-// 	      = -1.0 * ((cT) (n_t - i - 1) / (cT)total_n)
-// 	      * ((cT) (n_t - i - 1) / (cT)total_n)
-// 	      / (std::exp(log_range_all_not_dim 
-// 			  + (cT) std::log(max - split)));
-
 	    assert(std::abs(temp_r_error) 
 		   < std::numeric_limits<cT>::max());
 
@@ -308,9 +295,6 @@
   right_(NULL)
 {
   error_ = ComputeNodeError_(total_points);
-  // if this assert fails, this implies that you need 
-  // a higher precision (or higher range) 'eT'
-  assert(std::abs(error_) < std::numeric_limits<cT>::max());
 
   bucket_tag_ = -1;
   root_ = true;
@@ -438,9 +422,9 @@
 		   &left_error, &right_error,
 		   maxLeafSize, minLeafSize)) {
 
-      // printf("Split found\n");fflush(NULL);
-      // Split the data for the children
-      // MatType data_l, data_r;
+      // Move the data around for the children
+      // to have points in a node lie contiguously
+      // (to increase efficiency during the training).
       eT split_val, lsplit_val, rsplit_val;
       SplitData_(data, dim, split_ind,
 		 old_from_new, &split_val,
@@ -452,8 +436,8 @@
       VecType* min_vals_l = new VecType(*min_vals_);
       VecType* min_vals_r = new VecType(*min_vals_);
 
-      (*max_vals_l)[dim] = split_val; // changed from just lsplit_val
-      (*min_vals_r)[dim] = split_val; // changed from just rsplit_val
+      (*max_vals_l)[dim] = split_val;
+      (*min_vals_r)[dim] = split_val;
 
       // store split dim and split val in the node
       split_value_ = split_val;
@@ -482,15 +466,6 @@
       subtree_leaves_v_t_inv_ = left_->subtree_leaves_v_t_inv()
 	+ right_->subtree_leaves_v_t_inv();
 
-      // 	// storing the sum of the estimates (OF WHAT)
-      // 	st_estimate_ = left_->st_estimate() + right_->st_estimate();
-
-      // 	// storing del_f / del r(split_dim)
-      // 	cT del_f = (ratio_ * v_t_inv_)
-      // 	  - (left_->ratio() * left_->v_t_inv());
-      // 	cT del_r = max_vals_[split_dim_] - split_value_;
-      // 	del_f_del_r_ = fabs(del_f / del_r);
-
       // Forming T1 by removing leaves for which
       // R(t) = R(t_L) + R(t_R)
       if ((left_->subtree_leaves() == 1)
@@ -510,8 +485,6 @@
       subtree_leaves_ = 1;
       subtree_leaves_error_ = error_;
       subtree_leaves_v_t_inv_ = v_t_inv_;
-//       st_estimate_ = ratio_ * ratio_ * v_t_inv_;
-//       del_f_del_r_ = 0.0;
     } // end if-else
   } else {
     // We can make this a leaf node
@@ -520,11 +493,6 @@
     subtree_leaves_error_ = error_;
     subtree_leaves_v_t_inv_ = v_t_inv_;
 
-    //       // TO CHECK: 
-    //       // if these are the density estimate 
-    //       // it should be ratio_ * v_t_inv_
-    //       st_estimate_ = ratio_ * ratio_ * v_t_inv_;
-    //       del_f_del_r_ = 0.0;
   } // end if-else 
     
     // if leaf do not compute g_k(t), else compute, store,
@@ -586,10 +554,6 @@
       subtree_leaves_v_t_inv_ = left_->subtree_leaves_v_t_inv()
 	+ right_->subtree_leaves_v_t_inv();
 
-      // 	// updating values for the sum of density estimates 
-      // 	st_estimate_
-      // 	  = left_->st_estimate() + right_->st_estimate();
-
       // update g_t value
       if (useVolReg) {
 	g_t = (error_ - subtree_leaves_error_) 
@@ -613,24 +577,10 @@
       }
     } else { // prune this subtree
 
-      // otherwise this should be equal to the alpha
-      // for this node. So we check that:
-      // assert(g_t == old_alpha, "Alpha != g(t) but less than!!");
-
-      // 	// compute \del f_hat(x) / \del r(split_dim)
-      // 	cT st_change_in_estimate 
-      // 	  = st_estimate_ - (ratio_ * ratio_ * v_t_inv_);
-
-      // printf("%lg:%lg Pruned %lg\n",
-      //       old_alpha, del_f_del_r_, st_change_in_estimate);
-
-
       // making this node a leaf node
       subtree_leaves_ = 1;
       subtree_leaves_error_ = error_;
       subtree_leaves_v_t_inv_ = v_t_inv_;
-//       st_estimate_ = ratio_ * ratio_ * v_t_inv_;
-//       del_f_del_r_ = 0.0;
       delete left_;
       left_ = NULL;
       delete right_;
@@ -646,8 +596,8 @@
 // bounding box of this node (check generally done
 // at the root, so its the bounding box of the data)
 //
-// Improvement: To open up the range with epsilons on 
-// both sides where epsilon on the density near the boundary.
+// Future improvement: To open up the range with epsilons on 
+// both sides where epsilon depends on the density near the boundary.
 template<typename eT, typename cT>
 bool DTree<eT, cT>::
 WithinRange_(VecType* query) 
@@ -679,7 +629,7 @@
   else
     if ((*query)[split_dim_] <= split_value_)  // if left subtree
       // go to left child
-      return left_->ComputeValue(query); //, printer);
+      return left_->ComputeValue(query);
     else  // if right subtree
       // go to right child
       return right_->ComputeValue(query);
@@ -766,5 +716,7 @@
   }
 } // ComputeVariableImportance
 
+}; // namespace det
+}; // namespace mlpack
 
 #endif




More information about the mlpack-svn mailing list