[mlpack-svn] r10146 - in mlpack/trunk/src/contrib/nslagle: . kdeDL myKDE

fastlab-svn at coffeetalk-1.cc.gatech.edu fastlab-svn at coffeetalk-1.cc.gatech.edu
Fri Nov 4 19:47:13 EDT 2011


Author: nslagle
Date: 2011-11-04 19:47:12 -0400 (Fri, 04 Nov 2011)
New Revision: 10146

Added:
   mlpack/trunk/src/contrib/nslagle/myKDE/test_kde_dual_tree.cpp
Modified:
   mlpack/trunk/src/contrib/nslagle/CMakeLists.txt
   mlpack/trunk/src/contrib/nslagle/kdeDL/dualtree_kde.h
   mlpack/trunk/src/contrib/nslagle/kdeDL/dualtree_kde_common.h
   mlpack/trunk/src/contrib/nslagle/kdeDL/dualtree_kde_impl.h
   mlpack/trunk/src/contrib/nslagle/kdeDL/dualtree_kde_main.cc
   mlpack/trunk/src/contrib/nslagle/kdeDL/dualtree_vkde.h
   mlpack/trunk/src/contrib/nslagle/kdeDL/kde_stat.h
   mlpack/trunk/src/contrib/nslagle/kdeDL/naive_kde.h
   mlpack/trunk/src/contrib/nslagle/myKDE/CMakeLists.txt
   mlpack/trunk/src/contrib/nslagle/myKDE/kde_dual_tree.hpp
   mlpack/trunk/src/contrib/nslagle/myKDE/kde_dual_tree_impl.hpp
Log:
mlpack/contrib/nslagle: commit more changes

Modified: mlpack/trunk/src/contrib/nslagle/CMakeLists.txt
===================================================================
--- mlpack/trunk/src/contrib/nslagle/CMakeLists.txt	2011-11-04 21:52:41 UTC (rev 10145)
+++ mlpack/trunk/src/contrib/nslagle/CMakeLists.txt	2011-11-04 23:47:12 UTC (rev 10146)
@@ -3,9 +3,9 @@
 # we just want to recurse into the child directories here
 set(DIRS
    myKDE
-   kdeDL
-   nested_summation_template
-   proximity_project
+   #kdeDL
+   #nested_summation_template
+   #proximity_project
 )
 
 foreach(dir ${DIRS})

Modified: mlpack/trunk/src/contrib/nslagle/kdeDL/dualtree_kde.h
===================================================================
--- mlpack/trunk/src/contrib/nslagle/kdeDL/dualtree_kde.h	2011-11-04 21:52:41 UTC (rev 10145)
+++ mlpack/trunk/src/contrib/nslagle/kdeDL/dualtree_kde.h	2011-11-04 23:47:12 UTC (rev 10146)
@@ -65,7 +65,7 @@
 #define INSIDE_DUALTREE_KDE_H
 
 #include "mlpack/core.h"
-#include "mlpack/core/tree/spacetree.hpp"
+#include "mlpack/core/tree/binary_space_tree.hpp"
 //#include "contrib/nslagle/series_expansion/farfield_expansion.h"
 //#include "contrib/nslagle/series_expansion/local_expansion.h"
 //#include "contrib/nslagle/series_expansion/mult_farfield_expansion.h"
@@ -248,7 +248,7 @@
   /** @brief The permutation mapping indices of references_ to
    *         original order.
    */
-  arma::Col<size_t> old_from_new_references_;
+  std::vector<size_t> old_from_new_references_;
 
   ////////// Private Member Functions //////////
 
@@ -342,13 +342,13 @@
 //    }
 //    else {
 //      NOTIFY("Using the default dimension of %d", qset_.n_rows);
-      mult_const_ = 1.0 / ka_.kernel_.CalcNormConstant(qset_.n_rows);
+      mult_const_ = 1.0;// / ka_.kernel_.CalcNormConstant(qset_.n_rows);
 //    }
 
     // Set accuracy parameters.
     relative_error_ = CLI::GetParam<double>("relative_error");
-    threshold_ = CLI::GetParam<double>("threshold") *
-      ka_.kernel_.CalcNormConstant(qset_.n_rows);
+    threshold_ = CLI::GetParam<double>("threshold");
+     // *      ka_.kernel_.CalcNormConstant(qset_.n_rows);
     
     // initialize the lower and upper bound densities
     densities_l_.zeros();
@@ -365,7 +365,7 @@
       num_local_prunes_ = 0;
 
     printf("\nStarting fast KDE on bandwidth value of %g...\n",
-	   sqrt(ka_.kernel_.bandwidth_sq()));
+	   sqrt(ka_.Bandwidth()));
     CLI::StartTimer ("fast_kde_compute");
 
     // Preprocessing step for initializing series expansion objects
@@ -417,7 +417,7 @@
       (&queries == &references);
 
     // Read in the number of points owned by a leaf.
-    int leaflen = CLI::GetParam<int>("leaflen");
+    // TODO :int leaflen = CLI::GetParam<int>("leaflen");
 
     // Copy reference dataset and reference weights and compute its
     // sum.
@@ -455,9 +455,7 @@
     // weights according to the permutation of the reference set in
     // the reference tree.
     CLI::StartTimer("tree_d");
-    rroot_ = proximity::MakeGenMetricTree<Tree>(rset_, leaflen,
-						&old_from_new_references_, 
-						NULL);
+    rroot_ = new Tree(rset_, /*leaflen,*/old_from_new_references_);
     DualtreeKdeCommon::ShuffleAccordingToPermutation
       (rset_weights_, old_from_new_references_);
 
@@ -466,12 +464,10 @@
       old_from_new_queries_ = old_from_new_references_;
     }
     else {
-      qroot_ = proximity::MakeGenMetricTree<Tree>(qset_, leaflen,
-						  &old_from_new_queries_, 
-						  NULL);
+      qroot_ = new Tree(qset_, /*leaflen,*/ old_from_new_queries_);
     }
     CLI::StopTimer("tree_d");
-    
+
     // Initialize the density lists
     densities_l_ = arma::vec(qset_.n_cols);
     densities_e_ = arma::vec(qset_.n_cols);
@@ -497,35 +493,35 @@
       {
         order = 7;
       }
-      ka_.Init(bandwidth, order, qset_.n_rows);
+      ka_ = kernel::GaussianKernel(bandwidth);// order, qset_.n_rows);
     }
     else if(qset_.n_rows <= 3) {
       if (!hasOrder)
       {
         order = 5;
       }
-      ka_.Init(bandwidth, order, qset_.n_rows);
+      ka_ = kernel::GaussianKernel(bandwidth);//, order, qset_.n_rows);
     }
     else if(qset_.n_rows <= 5) {
       if (!hasOrder)
       {
         order = 3;
       }
-      ka_.Init(bandwidth, order, qset_.n_rows);
+      ka_ = kernel::GaussianKernel(bandwidth);//, order, qset_.n_rows);
     }
     else if(qset_.n_rows <= 6) {
       if (!hasOrder)
       {
         order = 1;
       }
-      ka_.Init(bandwidth, order, qset_.n_rows);
+      ka_ = kernel::GaussianKernel(bandwidth);//, order, qset_.n_rows);
     }
     else {
       if (!hasOrder)
       {
         order = 0;
       }
-      ka_.Init(bandwidth, order, qset_.n_rows);
+      ka_ = kernel::GaussianKernel(bandwidth);//, order, qset_.n_rows);
     }
   }
 
@@ -534,7 +530,7 @@
     FILE *stream = stdout;
     const char *fname = NULL;
 
-    if((fname = CLI::GetParam<std::string>("fast_kde_output")) != NULL) {
+    if((fname = CLI::GetParam<std::string>("fast_kde_output").c_str()) != NULL) {
       stream = fopen(fname, "w+");
     }
     for(size_t q = 0; q < qset_.n_cols; q++) {

Modified: mlpack/trunk/src/contrib/nslagle/kdeDL/dualtree_kde_common.h
===================================================================
--- mlpack/trunk/src/contrib/nslagle/kdeDL/dualtree_kde_common.h	2011-11-04 21:52:41 UTC (rev 10145)
+++ mlpack/trunk/src/contrib/nslagle/kdeDL/dualtree_kde_common.h	2011-11-04 23:47:12 UTC (rev 10146)
@@ -97,7 +97,7 @@
    *  @param permutation The permutation.
    */
   static void ShuffleAccordingToPermutation
-  (arma::vec &v, const arma::Col<size_t> &permutation) {
+  (arma::vec &v, const std::vector<size_t> &permutation) {
     
     arma::vec v_tmp(v.size());
     for(size_t i = 0; i < v_tmp.size(); i++) {

Modified: mlpack/trunk/src/contrib/nslagle/kdeDL/dualtree_kde_impl.h
===================================================================
--- mlpack/trunk/src/contrib/nslagle/kdeDL/dualtree_kde_impl.h	2011-11-04 21:52:41 UTC (rev 10145)
+++ mlpack/trunk/src/contrib/nslagle/kdeDL/dualtree_kde_impl.h	2011-11-04 23:47:12 UTC (rev 10146)
@@ -27,7 +27,7 @@
       
       // pairwise distance and kernel value
       double dsqd = kernel::LMetric<2,false>::Evaluate (q_col, r_col);
-      double kernel_value = ka_.kernel_.EvalUnnormOnSq(dsqd);
+      double kernel_value = ka_.kernel_.Evaluate(q_col,r_col);//EvalUnnormOnSq(dsqd);
       double weighted_kernel_value = rset_weights_[r] * kernel_value;
       
       densities_l_[q] += weighted_kernel_value;

Modified: mlpack/trunk/src/contrib/nslagle/kdeDL/dualtree_kde_main.cc
===================================================================
--- mlpack/trunk/src/contrib/nslagle/kdeDL/dualtree_kde_main.cc	2011-11-04 21:52:41 UTC (rev 10145)
+++ mlpack/trunk/src/contrib/nslagle/kdeDL/dualtree_kde_main.cc	2011-11-04 23:47:12 UTC (rev 10146)
@@ -12,6 +12,7 @@
 #include "dualtree_vkde.h"
 #include "naive_kde.h"
 
+using namespace mlpack;
 using namespace mlpack::kernel;
 
 void VariableBandwidthKde(arma::mat &queries, arma::mat &references, 
@@ -267,18 +268,18 @@
     !strcmp(queries_file_name.c_str(), references_file_name.c_str());
 
   // data::Load inits a arma::mat with the contents of a .csv or .arff.
-  data::Load(references_file_name.c_str(), references);
+  references.load(references_file_name.c_str());
   if(queries_equal_references) {
     queries = references;
   }
   else {
-    data::Load(queries_file_name.c_str(), queries);
+    queries.load(queries_file_name.c_str());
   }
 
   // If the reference weight file name is specified, then read in,
   // otherwise, initialize to uniform weights.
   if(CLI::HasParam("dwgts")) {
-    data::Load(CLI::GetParam<std::string>("dwgts").c_str(), reference_weights);
+    reference_weights.load(CLI::GetParam<std::string>("dwgts").c_str());
   }
   else {
     reference_weights = arma::mat(1, references.n_cols);

Modified: mlpack/trunk/src/contrib/nslagle/kdeDL/dualtree_vkde.h
===================================================================
--- mlpack/trunk/src/contrib/nslagle/kdeDL/dualtree_vkde.h	2011-11-04 21:52:41 UTC (rev 10145)
+++ mlpack/trunk/src/contrib/nslagle/kdeDL/dualtree_vkde.h	2011-11-04 23:47:12 UTC (rev 10146)
@@ -61,9 +61,9 @@
 #define INSIDE_DUALTREE_VKDE_H
 
 #include "mlpack/core.h"
-#include "mlpack/core/tree/spacetree.hpp"
+#include "mlpack/core/tree/binary_space_tree.hpp"
 //#include "contrib/dongryel/proximity_project/gen_metric_tree.h"
-//#include "dualtree_kde_common.h"
+#include "dualtree_kde_common.h"
 //#include "kde_stat.h"
 #include "mlpack/methods/neighbor_search/neighbor_search.h"
 
@@ -196,12 +196,12 @@
   /** @brief The permutation mapping indices of queries_ to original
    *         order.
    */
-  arma::Col<size_t> old_from_new_queries_;
+  std::vector<size_t> old_from_new_queries_;
   
   /** @brief The permutation mapping indices of references_ to
    *         original order.
    */
-  arma::Col<size_t> old_from_new_references_;
+  std::vector<size_t> old_from_new_references_;
 
   ////////// Private Member Functions //////////
 
@@ -287,7 +287,7 @@
     // Set accuracy parameters.
     relative_error_ = CLI::GetParam<double>("relative_error");
     threshold_ = CLI::GetParam<double>("threshold") *
-      kernels_[0].CalcNormConstant(qset_.n_rows());
+      kernels_[0].Normalizer();//CalcNormConstant(qset_.n_rows);
     
     // initialize the lower and upper bound densities
     densities_l_.zeros();
@@ -352,7 +352,7 @@
       (&queries == &references);
 
     // read in the number of points owned by a leaf
-    int leaflen = CLI::GetParam<int>("leaflen");
+    //TODO int leaflen = CLI::GetParam<int>("leaflen");
 
     // Copy reference dataset and reference weights and compute its
     // sum. rset_weight_sum_ should be the raw sum of the reference
@@ -392,9 +392,7 @@
     // weights according to the permutation of the reference set in
     // the reference tree.
     CLI::StartTimer("tree_d");
-    rroot_ = proximity::MakeGenMetricTree<Tree>(rset_, leaflen,
-						old_from_new_references_,
-						NULL);
+    rroot_ = new Tree(rset_, /*leaflen,*/ old_from_new_references_);
     DualtreeKdeCommon::ShuffleAccordingToPermutation
       (rset_weights_, old_from_new_references_);
 
@@ -403,9 +401,7 @@
       old_from_new_queries_ = old_from_new_references_;
     }
     else {
-      qroot_ = proximity::MakeGenMetricTree<Tree>(qset_, leaflen,
-						  &old_from_new_queries_,
-						  NULL);
+      qroot_ = new Tree(qset_, /*leaflen*/ old_from_new_queries_);
     }
     CLI::StopTimer("tree_d");
 
@@ -420,16 +416,16 @@
 
     // Initialize the kernels for each reference point.
     int knns = CLI::GetParam<int>("knn");
-    AllkNN all_knn = AllkNN(rset_, 20);
-    kernels_.Init(rset_.n_cols());
+    AllkNN all_knn = AllkNN(rset_, knns);
     arma::Mat<size_t> resulting_neighbors;
-    arma::mat squared_distances;    
+    arma::mat squared_distances;
 
     CLI::StartTimer("bandwidth_initialization");
     all_knn.ComputeNeighbors(resulting_neighbors, squared_distances);
 
-    for(size_t i = 0; i < squared_distances.size(); i += knns) {
-      kernels_[i / knns].Init(sqrt(squared_distances[i + knns - 1]));
+    for(size_t i = 0; i < squared_distances.size(); i += knns)
+    {
+      kernels_.push_back(kernel::GaussianKernel(sqrt(squared_distances[i + knns - 1])));
     }
     CLI::StopTimer("bandwidth_initialization");
 
@@ -437,11 +433,11 @@
     // that have been chosen.
     double min_norm_const = DBL_MAX;
     for(size_t i = 0; i < rset_weights_.size(); i++) {
-      double norm_const = kernels_[i].CalcNormConstant(qset_.n_rows());
+      double norm_const = kernels_[i].Normalizer();//CalcNormConstant(qset_.n_rows);
       min_norm_const = std::min(min_norm_const, norm_const);
     }
     for(size_t i = 0; i < rset_weights_.size(); i++) {
-      double norm_const = kernels_[i].CalcNormConstant(qset_.n_rows());
+      double norm_const = kernels_[i].Normalizer();//CalcNormConstant(qset_.n_rows);
       rset_weights_[i] *= (min_norm_const / norm_const);
     }
 
@@ -454,11 +450,11 @@
     FILE *stream = stdout;
     const char *fname = NULL;
 
-    if((fname = CLI::GetParam<std::string>("fast_kde_output")) != NULL)
+    if((fname = CLI::GetParam<std::string>("fast_kde_output").c_str()) != NULL)
     {
       stream = fopen(fname, "w+");
     }
-    for(size_t q = 0; q < qset_.n_cols(); q++) {
+    for(size_t q = 0; q < qset_.n_cols; q++) {
       fprintf(stream, "%g\n", densities_e_[q]);
     }
 

Modified: mlpack/trunk/src/contrib/nslagle/kdeDL/kde_stat.h
===================================================================
--- mlpack/trunk/src/contrib/nslagle/kdeDL/kde_stat.h	2011-11-04 21:52:41 UTC (rev 10145)
+++ mlpack/trunk/src/contrib/nslagle/kdeDL/kde_stat.h	2011-11-04 23:47:12 UTC (rev 10146)
@@ -200,12 +200,12 @@
   /** @brief The far field expansion created by the reference points
    *         in this node.
    */
-  typename TKernelAux::TFarFieldExpansion farfield_expansion_;
-    
+  // TODO: typename TKernelAux::TFarFieldExpansion farfield_expansion_;
+
   /** @brief The local expansion stored in this node.
    */
-  typename TKernelAux::TLocalExpansion local_expansion_;
-    
+  // TODO :typename TKernelAux::TLocalExpansion local_expansion_;
+
   /** @brief The subspace associated with this node.
    */
   //SubspaceStat subspace_;
@@ -213,9 +213,9 @@
   /** @brief Gets the weight sum.
    */
   double get_weight_sum() {
-    return farfield_expansion_.get_weight_sum();
+    return 0.0;//farfield_expansion_.get_weight_sum();
   }
- 
+
   /** @brief Adds the other postponed contributions.
    */
   void AddPostponed(const KdeStat& parent_stat) {
@@ -281,8 +281,8 @@
   }
     
   void Init(const TKernelAux &ka) {
-    farfield_expansion_.Init(ka);
-    local_expansion_.Init(ka);
+    // TODO: farfield_expansion_.Init(ka);
+    // TODO: local_expansion_.Init(ka);
   }
     
   void Init(const arma::mat& dataset, size_t &start, size_t &count) {
@@ -300,8 +300,8 @@
     
   void Init(const arma::vec& center, const TKernelAux &ka) {
       
-    farfield_expansion_.Init(center, ka);
-    local_expansion_.Init(center, ka);
+    // TODO: farfield_expansion_.Init(center, ka);
+    // TODO: local_expansion_.Init(center, ka);
     Init();
   }
     

Modified: mlpack/trunk/src/contrib/nslagle/kdeDL/naive_kde.h
===================================================================
--- mlpack/trunk/src/contrib/nslagle/kdeDL/naive_kde.h	2011-11-04 21:52:41 UTC (rev 10145)
+++ mlpack/trunk/src/contrib/nslagle/kdeDL/naive_kde.h	2011-11-04 23:47:12 UTC (rev 10146)
@@ -98,16 +98,16 @@
     printf("\nStarting naive KDE...\n");
     CLI::StartTimer("naive_kde_compute");
 
-    for(size_t q = 0; q < qset_.n_cols(); q++) {
+    for(size_t q = 0; q < qset_.n_cols; q++) {
       
       const arma::vec q_col = qset_.unsafe_col(q);
 
       // Compute unnormalized sum first.
-      for(size_t r = 0; r < rset_.n_cols(); r++) {
+      for(size_t r = 0; r < rset_.n_cols; r++) {
 	const arma::vec r_col = rset_.unsafe_col(r);
 	double dsqd = kernel::LMetric<2,false>::Evaluate(q_col, r_col);
 	
-	densities_[q] += rset_weights_[r] * kernels_[r].EvalUnnormOnSq(dsqd);
+	densities_[q] += rset_weights_[r] * kernels_[r].Evaluate(q_col,r_col);//EvalUnnormOnSq(dsqd);
       }
 
       // Then normalize it.
@@ -127,16 +127,16 @@
     printf("\nStarting naive KDE...\n");
     CLI::StartTimer("naive_kde_compute");
 
-    for(size_t q = 0; q < qset_.n_cols(); q++) {
+    for(size_t q = 0; q < qset_.n_cols; q++) {
       
       const arma::vec q_col = qset_.unsafe_col(q);
       
       // Compute unnormalized sum.
-      for(size_t r = 0; r < rset_.n_cols(); r++) {
+      for(size_t r = 0; r < rset_.n_cols; r++) {
 	const arma::vec r_col = rset_.unsafe_col(r);
-	double dsqd = kernel::LMetric<2,false>::Evaluate (q_col, r_col);
+	//double dsqd = kernel::LMetric<2,false>::Evaluate (q_col, r_col);
 	
-	densities_[q] += rset_weights_[r] * kernels_[r].EvalUnnormOnSq(dsqd);
+	densities_[q] += rset_weights_[r] * kernels_[r].Evaluate(q_col,r_col);//EvalUnnormOnSq(dsqd);
       }
       // Then, normalize it.
       densities_[q] /= norm_const_;
@@ -148,7 +148,7 @@
   void Init(arma::mat &qset, arma::mat &rset, struct datanode *module_in) {
 
     // Use the uniform weights for a moment.
-    arma::mat uniform_weights(1, rset.n_cols());
+    arma::mat uniform_weights(1, rset.n_cols);
     uniform_weights.fill(1.0);
 
     Init(qset, rset, uniform_weights, module_in);
@@ -184,7 +184,7 @@
         rset_(r,c) = rset(r,c);
       }
     }
-    rset_weights_ = arma::vec(reference_weights.n_cols());
+    rset_weights_ = arma::vec(reference_weights.n_cols);
     for(size_t i = 0; i < rset_weights_.size(); i++)
     {
       rset_weights_[i] = reference_weights(0, i);
@@ -197,20 +197,20 @@
     }
 
     // Get bandwidth and compute the normalizing constant.
-    kernels_.Init(rset_.n_cols());
-    if(!strcmp(CLI::GetParam<std::string>("mode").c_str(), "variablebw")) {
-
+    if(!strcmp(CLI::GetParam<std::string>("mode").c_str(), "variablebw"))
+    {
       // Initialize the kernels for each reference point.
       int knns = CLI::GetParam<int>("knn");
-      AllkNN all_knn = AllkNN(rset_, 20, knns);
+      AllkNN all_knn = AllkNN(rset_, knns);
       arma::Mat<size_t> resulting_neighbors;
       arma::mat squared_distances;
 
       CLI::StartTimer("bandwidth_initialization");
       all_knn.ComputeNeighbors(resulting_neighbors, squared_distances);
 
-      for(size_t i = 0; i < squared_distances.size(); i += knns) {
-	kernels_[i / knns].Init(sqrt(squared_distances[i + knns - 1]));
+      for(size_t i = 0; i < squared_distances.size(); i += knns)
+      {
+        kernels_.push_back(kernel::GaussianKernel(sqrt(squared_distances[i + knns - 1])));
       }
       CLI::StopTimer("bandwidth_initialization");
 
@@ -218,30 +218,32 @@
       // that have been chosen.
       double min_norm_const = DBL_MAX;
       for(size_t i = 0; i < rset_weights_.size(); i++) {
-	double norm_const = kernels_[i].CalcNormConstant(qset_.n_rows());
+	double norm_const = kernels_[i].Normalizer();//CalcNormConstant(qset_.n_rows());
 	min_norm_const = std::min(min_norm_const, norm_const);
       }
       for(size_t i = 0; i < rset_weights_.size(); i++) {
-	double norm_const = kernels_[i].CalcNormConstant(qset_.n_rows());
+	double norm_const = kernels_[i].Normalizer();//CalcNormConstant(qset_.n_rows());
 	rset_weights_[i] *= (min_norm_const / norm_const);
       }
-      
+
       // Compute normalization constant.
       norm_const_ = weight_sum * min_norm_const;
     }
-    else {
-      for(size_t i = 0; i < kernels_.size(); i++) {
-	kernels_[i].Init(CLI::GetParam<double>("bandwidth"));
+    else
+    {
+      for(size_t i = 0; i < kernels_.size(); i++)
+      {
+        kernels_[i] = kernel::GaussianKernel(CLI::GetParam<double>("bandwidth"));
       }
-      norm_const_ = kernels_[0].CalcNormConstant(qset_.n_rows()) * weight_sum;
+      norm_const_ = kernels_[0].Normalizer();//CalcNormConstant(qset_.n_rows()) * weight_sum;
     }
 
     // Allocate density storage.
-    densities_ = arma::vec(qset.n_cols());
+    densities_ = arma::vec(qset.n_cols);
     densities_.zeros();
   }
 
-  /** @brief Output KDE results to a stream 
+  /** @brief Output KDE results to a stream
    *
    *  If the user provided "--naive_kde_output=" argument, then the
    *  output will be directed to a file whose name is provided after
@@ -254,10 +256,10 @@
     const char *fname = NULL;
 
     {
-      fname = CLI::GetParam<std::string>("naive_kde_output");
+      fname = CLI::GetParam<std::string>("naive_kde_output").c_str();
       stream = fopen(fname, "w+");
     }
-    for(size_t q = 0; q < qset_.n_cols(); q++) {
+    for(size_t q = 0; q < qset_.n_cols; q++) {
       fprintf(stream, "%g\n", densities_[q]);
     }
     

Modified: mlpack/trunk/src/contrib/nslagle/myKDE/CMakeLists.txt
===================================================================
--- mlpack/trunk/src/contrib/nslagle/myKDE/CMakeLists.txt	2011-11-04 21:52:41 UTC (rev 10145)
+++ mlpack/trunk/src/contrib/nslagle/myKDE/CMakeLists.txt	2011-11-04 23:47:12 UTC (rev 10146)
@@ -4,9 +4,9 @@
 # Anything not in this list will not be compiled into the output library
 # Do not include test programs here
 set(SOURCES
-  test_tree.cc
   kde_dual_tree.hpp
   kde_dual_tree_impl.hpp
+  test_kde_dual_tree.cpp
 )
 
 # add directory name to sources
@@ -18,10 +18,10 @@
 set(MLPACK_CONTRIB_SRCS ${MLPACK_CONTRIB_SRCS} ${DIR_SRCS} PARENT_SCOPE)
 
 # link dependencies of test executable
-add_executable(test_tree
+add_executable(test_kde_dual_tree
    EXCLUDE_FROM_ALL
-   test_tree.cc
+   test_kde_dual_tree.cpp
 )
-target_link_libraries(test_tree
+target_link_libraries(test_kde_dual_tree
    mlpack
 )

Modified: mlpack/trunk/src/contrib/nslagle/myKDE/kde_dual_tree.hpp
===================================================================
--- mlpack/trunk/src/contrib/nslagle/myKDE/kde_dual_tree.hpp	2011-11-04 21:52:41 UTC (rev 10145)
+++ mlpack/trunk/src/contrib/nslagle/myKDE/kde_dual_tree.hpp	2011-11-04 23:47:12 UTC (rev 10146)
@@ -2,7 +2,7 @@
 #define KDE_DUAL_TREE_HPP
 
 #include <iostream>
-#include <priority_queue>
+#include <queue>
 
 #include <mlpack/core.h>
 #include <mlpack/core/kernels/gaussian_kernel.hpp>
@@ -17,6 +17,7 @@
 namespace kde
 {
 /* structure within the priority queue */
+template <typename TTree = tree::BinarySpaceTree<bound::HRectBound<2> > >
 struct queueNode
 {
   TTree* T;
@@ -28,13 +29,14 @@
   size_t bLowerIndex;
   size_t bUpperIndex;
 };
+template <typename TTree = tree::BinarySpaceTree<bound::HRectBound<2> > >
 class QueueNodeCompare
 {
   bool reverse;
  public:
   QueueNodeCompare(const bool& revparam=false) : reverse(revparam) {}
-  bool operator() (const struct queueNode& lhs,
-                   const struct queueNode& rhs) const
+  bool operator() (const struct queueNode<TTree>& lhs,
+                   const struct queueNode<TTree>& rhs) const
   {
     if (reverse)
       return (lhs.priority>rhs.priority);
@@ -52,6 +54,8 @@
   /* possibly, these refer to the same object */
   TTree* referenceRoot;
   TTree* queryRoot;
+  std::map<void*, size_t> nodeIndices;
+  size_t nextAvailableNodeIndex;
   std::vector<size_t> referenceShuffledIndices;
   std::vector<size_t> queryShuffledIndices;
   arma::mat referenceData;
@@ -66,17 +70,19 @@
   double delta;
   /* relative error with respect to the density estimate */
   double epsilon;
-  math::Range bandwidths;
-  std::priority_queue<struct queueNode,
-                      std::vector<struct queueNode>,
-                      QueueNodeCompare> nodePriorityQueue;
+  std::priority_queue<struct queueNode<TTree>,
+                      std::vector<struct queueNode<TTree> >,
+                      QueueNodeCompare<TTree> > nodePriorityQueue;
   size_t bandwidthCount;
   std::vector<double> bandwidths;
+  std::vector<double> inverseBandwidths;
+  double lowBandwidth;
+  double highBandwidth;
   size_t levelsInTree;
   size_t queryTreeSize;
 
   void SetDefaults();
-  void MultiBandwidthDualTree();
+  size_t MultiBandwidthDualTree();
   void MultiBandwidthDualTreeBase(TTree* Q,
                                   TTree* T, size_t QIndex,
                                   size_t lowerBIndex, size_t upperBIndex);
@@ -88,22 +94,24 @@
   {
     return levelsInTree - node->levelsBelow();
   }
-  void Winnow(size_t bLower, size_t bUpper, size_t* newLower, size_t* newUpper);
+  void Winnow(size_t level, size_t* newLower, size_t* newUpper);
  public:
   /* the two data sets are different */
   KdeDualTree (arma::mat& referenceData, arma::mat& queryData);
   /* the reference data is also the query data */
   KdeDualTree (arma::mat& referenceData);
+  std::vector<double> Calculate();
   /* setters and getters */
-  const math::Range& BandwidthRange() const { return bandwidthRange; }
   const size_t& BandwidthCount() const { return bandwidthCount; }
   const double& Delta() const { return delta; }
   const double& Epsilon() const { return epsilon; }
+  const double& LowBandwidth() const { return lowBandwidth; }
+  const double& HighBandwidth() const { return highBandwidth; }
 
-  void BandwidthRange(double l, double u) { bandwidthRange = math::Range(l,u); }
   size_t& BandwidthCount() { return bandwidthCount; }
   double& Delta() { return delta; }
   double& Epsilon() { return epsilon; }
+  void SetBandwidthBounds(double l, double u);
 };
 }; /* end namespace kde */
 }; /* end namespace mlpack */

Modified: mlpack/trunk/src/contrib/nslagle/myKDE/kde_dual_tree_impl.hpp
===================================================================
--- mlpack/trunk/src/contrib/nslagle/myKDE/kde_dual_tree_impl.hpp	2011-11-04 21:52:41 UTC (rev 10145)
+++ mlpack/trunk/src/contrib/nslagle/myKDE/kde_dual_tree_impl.hpp	2011-11-04 23:47:12 UTC (rev 10146)
@@ -8,6 +8,8 @@
 #endif
 #endif
 
+#define MADEIT std::cout<<"made it to "<<__LINE__<<" in "<<__FILE__<<std::endl
+
 using namespace mlpack;
 using namespace mlpack::kde;
 
@@ -20,8 +22,8 @@
 KdeDualTree<TKernel, TTree>::KdeDualTree (arma::mat& reference,
                                    arma::mat& query)
 {
-  referenceRoot (new TTree (reference)),
-  queryRoot (new TTree (query))
+  referenceRoot = new TTree (reference, referenceShuffledIndices),
+  queryRoot = new TTree (query, queryShuffledIndices);
   referenceData = reference;
   queryData = query;
   levelsInTree = queryRoot->levelsBelow();
@@ -37,27 +39,121 @@
 
   referenceRoot = new TTree (reference, referenceShuffledIndices);
   queryRoot = referenceRoot;
+  queryShuffledIndices = referenceShuffledIndices;
   levelsInTree = queryRoot->levelsBelow();
   queryTreeSize = queryRoot->treeSize();
   SetDefaults();
 }
 
 template<typename TKernel, typename TTree>
-KdeDualTree<TKernel, TTree>::SetDefaults()
+void KdeDualTree<TKernel, TTree>::SetDefaults()
 {
-  BandwidthRange(0.01, 100.0);
+  SetBandwidthBounds(0.01, 100.0);
   bandwidthCount = 10;
   delta = epsilon = 0.05;
   kernel = TKernel(1.0);
+  nextAvailableNodeIndex = 0;
 }
 
 template<typename TKernel, typename TTree>
-void KdeDualTree<TKernel, TTree>::MultiBandwidthDualTree()
+std::vector<double> KdeDualTree<TKernel, TTree>::Calculate()
 {
+  /* calculate the bandwidths */
+  bandwidths.clear();
+  inverseBandwidths.clear();
+
+  if (bandwidthCount > 1)
+  {
+    double bandwidthDelta = (highBandwidth - lowBandwidth) / (bandwidthCount - 1);
+    for (size_t bIndex = 0; bIndex < bandwidthCount; ++bIndex)
+    {
+      bandwidths.push_back(lowBandwidth + bandwidthDelta * bIndex);
+      inverseBandwidths.push_back(1.0 / bandwidths.back());
+    }
+  }
+  else
+  {
+    bandwidths.push_back(lowBandwidth);
+    inverseBandwidths.push_back(1.0 / lowBandwidth);
+  }
+
+  /* resize the critical matrices */
+  upperBoundLevelByBandwidth.zeros(levelsInTree,bandwidthCount);
+  for (size_t bIndex = 0; bIndex < bandwidthCount; ++bIndex)
+  {
+    arma::vec col = upperBoundLevelByBandwidth.unsafe_col(bIndex);
+    col.fill(referenceRoot->count() * inverseBandwidths[bIndex]);
+  }
+  upperBoundLevelByBandwidth.fill(referenceRoot->count());
+  lowerBoundLevelByBandwidth.zeros(levelsInTree,bandwidthCount);
+  upperBoundQPointByBandwidth.zeros(queryRoot->count(),bandwidthCount);
+  for (size_t bIndex = 0; bIndex < bandwidthCount; ++bIndex)
+  {
+    arma::vec col = upperBoundQPointByBandwidth.unsafe_col(bIndex);
+    col.fill(referenceRoot->count() * inverseBandwidths[bIndex]);
+  }
+  lowerBoundQPointByBandwidth.zeros(queryRoot->count(),bandwidthCount);
+  upperBoundQNodeByBandwidth.zeros(queryTreeSize,bandwidthCount);
+  for (size_t bIndex = 0; bIndex < bandwidthCount; ++bIndex)
+  {
+    arma::vec col = upperBoundQNodeByBandwidth.unsafe_col(bIndex);
+    col.fill(referenceRoot->count() * inverseBandwidths[bIndex]);
+  }
+  lowerBoundQNodeByBandwidth.zeros(queryTreeSize,bandwidthCount);
+
+  arma::vec dl;
+  arma::vec du;
+  dl.zeros(bandwidthCount);
+  du.zeros(bandwidthCount);
+  double priority = pow(
+      queryRoot->bound().MinDistance(referenceRoot->bound()),
+      0.5);
+  struct queueNode<TTree> firstNode =
+      {referenceRoot,queryRoot, nextAvailableNodeIndex, dl, du,
+        priority, 0, bandwidthCount - 1};
+  nodeIndices[queryRoot] = nextAvailableNodeIndex;
+  ++nextAvailableNodeIndex;
+  nodePriorityQueue.push(firstNode);
+  size_t finalLevel = MultiBandwidthDualTree();
+
+  size_t maxIndex = 0;
+  double maxLogLikelihood = (upperBoundLevelByBandwidth(finalLevel,0) +
+                             lowerBoundLevelByBandwidth(finalLevel,0)) / 2.0;
+  for (size_t bIndex = 1; bIndex < bandwidthCount; ++bIndex)
+  {
+    double currentLogLikelihood = (upperBoundLevelByBandwidth(finalLevel,bIndex) +
+                                   lowerBoundLevelByBandwidth(finalLevel,bIndex)) / 2.0;
+    if (currentLogLikelihood > maxLogLikelihood)
+    {
+      currentLogLikelihood = maxLogLikelihood;
+      maxIndex = bIndex;
+    }
+  }
+  std::cout << upperBoundLevelByBandwidth << "\n";
+  std::cout << lowerBoundLevelByBandwidth << "\n";
+  std::cout << "best bandwidth " << bandwidths[maxIndex] << ";\n";
+  exit(1);
+  std::vector<double> densities;
+  for (std::vector<size_t>::iterator shuffIt = queryShuffledIndices.begin();
+      shuffIt != queryShuffledIndices.end(); ++shuffIt)
+  {
+    densities.push_back((upperBoundQPointByBandwidth(*shuffIt, maxIndex) +
+                         lowerBoundQPointByBandwidth(*shuffIt, maxIndex)) / (2.0 * referenceRoot->count()));
+
+  }
+  return densities;
+}
+
+template<typename TKernel, typename TTree>
+size_t KdeDualTree<TKernel, TTree>::MultiBandwidthDualTree()
+{
+  /* current level */
+  size_t v = 0;
   while (!nodePriorityQueue.empty())
   {
     /* get the first structure in the queue */
-    struct queueNode queueCurrent = nodePriorityQueue.pop();
+    struct queueNode<TTree> queueCurrent = nodePriorityQueue.top();
+    nodePriorityQueue.pop();
     TTree* Q = queueCurrent.Q;
     TTree* T = queueCurrent.T;
     size_t sizeOfTNode = T->count();
@@ -66,7 +162,7 @@
     arma::vec deltaLower = queueCurrent.deltaLower;
     arma::vec deltaUpper = queueCurrent.deltaUpper;
     /* v is the level of the Q node */
-    size_t v = GetLevelOfNode(Q);
+    v = GetLevelOfNode(Q);
     size_t bUpper = queueCurrent.bUpperIndex;
     size_t bLower = queueCurrent.bLowerIndex;
     /* check to see whether we've reached the epsilon condition */
@@ -96,7 +192,7 @@
     /* return */
     if (epsilonCondition)
     {
-      return;
+      return v;
     }
     /* we didn't meet the criteria; let's narrow the bandwidth range */
     Winnow(v, &bLower, &bUpper);
@@ -109,11 +205,13 @@
       std::vector<bool> deltaCondition;
       for (size_t bIndex = bLower; bIndex <= bUpper; ++bIndex)
       {
-        double bandwidth = bandwidths[bIndex];
-        double dl = sizeOfTNode * kernel(dMax / bandwidth);
-        double du = sizeOfTNode * kernel(dMin / bandwidth);
+        double inverseBandwidth = inverseBandwidths[bIndex];
+        double dl = sizeOfTNode * inverseBandwidth * kernel.Evaluate(dMax * inverseBandwidth);
+        double du = sizeOfTNode * inverseBandwidth * kernel.Evaluate(dMin * inverseBandwidth);
         deltaLower(bIndex) = dl;
         deltaUpper(bIndex) = du - sizeOfTNode;
+        //std::cout << "QIndex: " << QIndex << " bIndex: " << bIndex << std::endl;
+        //std::cout << "max QIndex: " << queryTreeSize - 1 << std::endl;
         if ((du - dl)/(lowerBoundQNodeByBandwidth(QIndex, bIndex) + dl) < delta)
         {
           for (size_t q = Q->begin(); q < Q->end(); ++q)
@@ -151,12 +249,12 @@
       if (meetsDeltaCondition)
       {
         /* adjust the current structure, then reinsert it into the queue */
-        queueCurrent.dl = deltaLower;
-        queueCurrent.du = deltaUpper;
+        queueCurrent.deltaLower = deltaLower;
+        queueCurrent.deltaUpper = deltaUpper;
         queueCurrent.bUpperIndex = bUpper;
         queueCurrent.bLowerIndex = bLower;
         queueCurrent.priority += PRIORITY_MAX;
-        nodePriorityQueue.insert(queueCurrent);
+        nodePriorityQueue.push(queueCurrent);
         continue;
       }
       else
@@ -208,28 +306,45 @@
       MultiBandwidthDualTreeBase(Q, T, QIndex, bLower, bUpper);
     }
     double priority = pow(Q->bound().MinDistance(T->bound()), 0.5);
-    if (!Q->is_left() && !T->is_leaf())
+    if (!Q->is_leaf() && !T->is_leaf())
     {
-      struct queueNode leftLeft =
-      {T->left(),Q->left(), 2*QIndex + 1, arma::vec(deltaUpper),
-        arma::vec(deltaLower), priority, bLower, bUpper};
-      struct queueNode leftRight =
-      {T->left(),Q->right(), 2*QIndex + 2, arma::vec(deltaUpper),
-        arma::vec(deltaLower), priority, bLower, bUpper};
-      struct queueNode rightLeft =
-      {T->right(),Q->left(), 2*QIndex + 1, arma::vec(deltaUpper),
-        arma::vec(deltaLower), priority, bLower, bUpper};
-      struct queueNode rightRight =
-      {T->right(),Q->right(), 2*QIndex + 2, arma::vec(deltaUpper),
-        arma::vec(deltaLower), priority, bLower, bUpper};
-      nodePriorityQueue.insert(leftLeft);
-      nodePriorityQueue.insert(leftRight);
-      nodePriorityQueue.insert(rightLeft);
-      nodePriorityQueue.insert(rightRight);
+      //std::cout << "QIndex for the current non-leaf : " << QIndex << std::endl;
+      TTree* QLeft = Q->left();
+      TTree* QRight = Q->right();
+      if (nodeIndices.find(QLeft) == nodeIndices.end())
+      {
+        nodeIndices[QLeft] = nextAvailableNodeIndex;
+        ++nextAvailableNodeIndex;
+      }
+      if (nodeIndices.find(QRight) == nodeIndices.end())
+      {
+        nodeIndices[QRight] = nextAvailableNodeIndex;
+        ++nextAvailableNodeIndex;
+      }
+      size_t QLeftIndex = (*(nodeIndices.find(QLeft))).second;
+      size_t QRightIndex = (*(nodeIndices.find(QRight))).second;
+      struct queueNode<TTree> leftLeft =
+      {T->left(),Q->left(), QLeftIndex, arma::vec(deltaLower),
+        arma::vec(deltaUpper), priority, bLower, bUpper};
+      struct queueNode<TTree> leftRight =
+      {T->left(),Q->right(), QRightIndex, arma::vec(deltaLower),
+        arma::vec(deltaUpper), priority, bLower, bUpper};
+      struct queueNode<TTree> rightLeft =
+      {T->right(),Q->left(), QLeftIndex, arma::vec(deltaLower),
+        arma::vec(deltaUpper), priority, bLower, bUpper};
+      struct queueNode<TTree> rightRight =
+      {T->right(),Q->right(), QRightIndex, arma::vec(deltaLower),
+        arma::vec(deltaUpper), priority, bLower, bUpper};
+      nodePriorityQueue.push(leftLeft);
+      nodePriorityQueue.push(leftRight);
+      nodePriorityQueue.push(rightLeft);
+      nodePriorityQueue.push(rightRight);
     }
   }
+  return v;
 }
 
+template<typename TKernel, typename TTree>
 void KdeDualTree<TKernel, TTree>::Winnow(size_t level,
                                          size_t* bLower,
                                          size_t* bUpper)
@@ -310,14 +425,14 @@
     {
       arma::vec diff = queryPoint - referenceData.unsafe_col(t);
       double distSquared = arma::dot(diff, diff);
-      size_t bandwidthIndex = upperBIndex;
+      size_t bandwidthIndex = upperBIndex + 1;
       while (bandwidthIndex > lowerBIndex)
       {
         --bandwidthIndex;
-        double bandwidth = bandwidths[bandwidthIndex];
-        double scaledProduct = pow(distSquared, 0.5) / bandwidth;
+        double inverseBandwidth = inverseBandwidths[bandwidthIndex];
+        double scaledProduct = pow(distSquared, 0.5) * inverseBandwidth;
         /* TODO: determine the power of the incoming argument */
-        double contribution = kernel(scaledProduct);
+        double contribution = inverseBandwidth * kernel.Evaluate(scaledProduct);
         if (contribution > DBL_EPSILON)
         {
           upperBoundQPointByBandwidth(q, bandwidthIndex) += contribution;
@@ -368,6 +483,16 @@
         sizeOfQNode * log(lowerBoundQNodeByBandwidth(QIndex, bIndex));
   }
 }
+template<typename TKernel, typename TTree>
+void KdeDualTree<TKernel, TTree>::SetBandwidthBounds(double l, double u)
+{
+  if (u <= l + DBL_EPSILON || l <= DBL_EPSILON)
+  {
+    Log::Fatal << "Incorrect bandwidth range assignment" << std::endl;
+  }
+  lowBandwidth = l;
+  highBandwidth = u;
+}
 
 };
 };

Added: mlpack/trunk/src/contrib/nslagle/myKDE/test_kde_dual_tree.cpp
===================================================================
--- mlpack/trunk/src/contrib/nslagle/myKDE/test_kde_dual_tree.cpp	                        (rev 0)
+++ mlpack/trunk/src/contrib/nslagle/myKDE/test_kde_dual_tree.cpp	2011-11-04 23:47:12 UTC (rev 10146)
@@ -0,0 +1,100 @@
+#include <mlpack/core.h>
+#include "kde_dual_tree.hpp"
+
+PROGRAM_INFO("Kernel Density Estimation Multibandwidth Dual Tree",
+    "KDE multibandwidth dual tree calculates density estimates for each "
+    "query point, given a collection of reference points, using a collection "
+    "of equidistantly spaced bandwidths\n\n"
+    "$ kde_dual_tree --reference_file=reference.csv --query_file=query.csv\n"
+    "  --output_file=output.csv --low_bandwidth=0.1 --high_bandwidth=100.0\n"
+    "  --bandwidth_count=10 --epsilon=0.01 --delta=0.01", "kde_dual_tree");
+
+PARAM_STRING_REQ("reference_file", "CSV file containing the reference dataset.",
+    "");
+PARAM_STRING("query_file", "CSV file containing query points",
+    "", "");
+PARAM_STRING("output_file", "File to output CSV-formatted results into.", "",
+    "kde_dual_tree_output.csv");
+PARAM_DOUBLE("low_bandwidth", "Low bandwidth", "", 0.1);
+PARAM_DOUBLE("high_bandwidth", "Low bandwidth", "", 100.0);
+PARAM_INT("bandwidth_count", "Low bandwidth", "", 10);
+PARAM_DOUBLE("epsilon", "error tolerance", "", 0.01);
+PARAM_DOUBLE("delta", "reversibility tolerance", "", 0.01);
+
+int main (int argc, char* argv[])
+{
+  CLI::ParseCommandLine(argc, argv);
+  std::string referenceFile = CLI::GetParam<std::string>("reference_file");
+  std::string queryFile = CLI::GetParam<std::string>("query_file");
+  std::string outputFile = CLI::GetParam<std::string>("output_file");
+  arma::mat referenceData;
+  arma::mat queryData;
+  double epsilon = CLI::GetParam<double>("epsilon");
+  double delta = CLI::GetParam<double>("delta");
+  int bandwidthCount = CLI::GetParam<int>("bandwidth_count");
+  double lowBandwidth = CLI::GetParam<double>("low_bandwidth");
+  double highBandwidth = CLI::GetParam<double>("high_bandwidth");
+
+  /* check the parameters */
+  if (delta < 0.0)
+  {
+    Log::Fatal << "Improper delta: " << delta <<
+                  "; delta must be positive" << std::endl;
+  }
+  if (epsilon < 0.0)
+  {
+    Log::Fatal << "Improper epsilon: " << epsilon <<
+                  "; epsilon must be positive" << std::endl;
+  }
+  if (bandwidthCount <= 0)
+  {
+    Log::Fatal << "Improper bandwidth_count: " << bandwidthCount <<
+                  "; bandwidth_count must be positive" << std::endl;
+  }
+  if (highBandwidth <= lowBandwidth + DBL_EPSILON || lowBandwidth <= 0.0)
+  {
+    Log::Fatal << "Improper bandwidth range: " << lowBandwidth << ", " <<
+                   highBandwidth << "; bandwidth range must be a positive interval" << std::endl;
+  }
+
+  if (!data::Load(referenceFile.c_str(), referenceData))
+  {
+    Log::Fatal << "Failed to load the reference file " << referenceFile << std::endl;
+  }
+
+  Log::Info << "Loaded reference data from " << referenceFile << std::endl;
+
+  std::vector<double> densities;
+  if (queryFile == "")
+  {
+    /* invoke KDE without specific query data */
+    KdeDualTree<> kde = KdeDualTree<>(referenceData);
+    kde.Epsilon() = epsilon;
+    kde.Delta() = delta;
+    kde.BandwidthCount() = bandwidthCount;
+    kde.SetBandwidthBounds(lowBandwidth, highBandwidth);
+    densities = kde.Calculate();
+  }
+  else
+  {
+    /* invoke KDE without specific query data */
+    KdeDualTree<> kde = KdeDualTree<>(referenceData, queryData);
+    kde.Epsilon() = epsilon;
+    kde.Delta() = delta;
+    kde.BandwidthCount() = bandwidthCount;
+    kde.SetBandwidthBounds(lowBandwidth, highBandwidth);
+    densities = kde.Calculate();
+  }
+  size_t index = 0;
+  for (std::vector<double>::iterator dIt = densities.begin();
+      dIt != densities.end();
+      ++dIt)
+  {
+    if (*dIt != 0.0)
+    {
+      std::cout << "density[" << index << "]=" << *dIt << std::endl;
+    }
+    ++index;
+  }
+  return 0;
+}




More information about the mlpack-svn mailing list