[mlpack-svn] r10146 - in mlpack/trunk/src/contrib/nslagle: . kdeDL myKDE
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Fri Nov 4 19:47:13 EDT 2011
Author: nslagle
Date: 2011-11-04 19:47:12 -0400 (Fri, 04 Nov 2011)
New Revision: 10146
Added:
mlpack/trunk/src/contrib/nslagle/myKDE/test_kde_dual_tree.cpp
Modified:
mlpack/trunk/src/contrib/nslagle/CMakeLists.txt
mlpack/trunk/src/contrib/nslagle/kdeDL/dualtree_kde.h
mlpack/trunk/src/contrib/nslagle/kdeDL/dualtree_kde_common.h
mlpack/trunk/src/contrib/nslagle/kdeDL/dualtree_kde_impl.h
mlpack/trunk/src/contrib/nslagle/kdeDL/dualtree_kde_main.cc
mlpack/trunk/src/contrib/nslagle/kdeDL/dualtree_vkde.h
mlpack/trunk/src/contrib/nslagle/kdeDL/kde_stat.h
mlpack/trunk/src/contrib/nslagle/kdeDL/naive_kde.h
mlpack/trunk/src/contrib/nslagle/myKDE/CMakeLists.txt
mlpack/trunk/src/contrib/nslagle/myKDE/kde_dual_tree.hpp
mlpack/trunk/src/contrib/nslagle/myKDE/kde_dual_tree_impl.hpp
Log:
mlpack/contrib/nslagle: commit more changes
Modified: mlpack/trunk/src/contrib/nslagle/CMakeLists.txt
===================================================================
--- mlpack/trunk/src/contrib/nslagle/CMakeLists.txt 2011-11-04 21:52:41 UTC (rev 10145)
+++ mlpack/trunk/src/contrib/nslagle/CMakeLists.txt 2011-11-04 23:47:12 UTC (rev 10146)
@@ -3,9 +3,9 @@
# we just want to recurse into the child directories here
set(DIRS
myKDE
- kdeDL
- nested_summation_template
- proximity_project
+ #kdeDL
+ #nested_summation_template
+ #proximity_project
)
foreach(dir ${DIRS})
Modified: mlpack/trunk/src/contrib/nslagle/kdeDL/dualtree_kde.h
===================================================================
--- mlpack/trunk/src/contrib/nslagle/kdeDL/dualtree_kde.h 2011-11-04 21:52:41 UTC (rev 10145)
+++ mlpack/trunk/src/contrib/nslagle/kdeDL/dualtree_kde.h 2011-11-04 23:47:12 UTC (rev 10146)
@@ -65,7 +65,7 @@
#define INSIDE_DUALTREE_KDE_H
#include "mlpack/core.h"
-#include "mlpack/core/tree/spacetree.hpp"
+#include "mlpack/core/tree/binary_space_tree.hpp"
//#include "contrib/nslagle/series_expansion/farfield_expansion.h"
//#include "contrib/nslagle/series_expansion/local_expansion.h"
//#include "contrib/nslagle/series_expansion/mult_farfield_expansion.h"
@@ -248,7 +248,7 @@
/** @brief The permutation mapping indices of references_ to
* original order.
*/
- arma::Col<size_t> old_from_new_references_;
+ std::vector<size_t> old_from_new_references_;
////////// Private Member Functions //////////
@@ -342,13 +342,13 @@
// }
// else {
// NOTIFY("Using the default dimension of %d", qset_.n_rows);
- mult_const_ = 1.0 / ka_.kernel_.CalcNormConstant(qset_.n_rows);
+ mult_const_ = 1.0;// / ka_.kernel_.CalcNormConstant(qset_.n_rows);
// }
// Set accuracy parameters.
relative_error_ = CLI::GetParam<double>("relative_error");
- threshold_ = CLI::GetParam<double>("threshold") *
- ka_.kernel_.CalcNormConstant(qset_.n_rows);
+ threshold_ = CLI::GetParam<double>("threshold");
+ // * ka_.kernel_.CalcNormConstant(qset_.n_rows);
// initialize the lower and upper bound densities
densities_l_.zeros();
@@ -365,7 +365,7 @@
num_local_prunes_ = 0;
printf("\nStarting fast KDE on bandwidth value of %g...\n",
- sqrt(ka_.kernel_.bandwidth_sq()));
+ sqrt(ka_.Bandwidth()));
CLI::StartTimer ("fast_kde_compute");
// Preprocessing step for initializing series expansion objects
@@ -417,7 +417,7 @@
(&queries == &references);
// Read in the number of points owned by a leaf.
- int leaflen = CLI::GetParam<int>("leaflen");
+ // TODO :int leaflen = CLI::GetParam<int>("leaflen");
// Copy reference dataset and reference weights and compute its
// sum.
@@ -455,9 +455,7 @@
// weights according to the permutation of the reference set in
// the reference tree.
CLI::StartTimer("tree_d");
- rroot_ = proximity::MakeGenMetricTree<Tree>(rset_, leaflen,
- &old_from_new_references_,
- NULL);
+ rroot_ = new Tree(rset_, /*leaflen,*/old_from_new_references_);
DualtreeKdeCommon::ShuffleAccordingToPermutation
(rset_weights_, old_from_new_references_);
@@ -466,12 +464,10 @@
old_from_new_queries_ = old_from_new_references_;
}
else {
- qroot_ = proximity::MakeGenMetricTree<Tree>(qset_, leaflen,
- &old_from_new_queries_,
- NULL);
+ qroot_ = new Tree(qset_, /*leaflen,*/ old_from_new_queries_);
}
CLI::StopTimer("tree_d");
-
+
// Initialize the density lists
densities_l_ = arma::vec(qset_.n_cols);
densities_e_ = arma::vec(qset_.n_cols);
@@ -497,35 +493,35 @@
{
order = 7;
}
- ka_.Init(bandwidth, order, qset_.n_rows);
+ ka_ = kernel::GaussianKernel(bandwidth);// order, qset_.n_rows);
}
else if(qset_.n_rows <= 3) {
if (!hasOrder)
{
order = 5;
}
- ka_.Init(bandwidth, order, qset_.n_rows);
+ ka_ = kernel::GaussianKernel(bandwidth);//, order, qset_.n_rows);
}
else if(qset_.n_rows <= 5) {
if (!hasOrder)
{
order = 3;
}
- ka_.Init(bandwidth, order, qset_.n_rows);
+ ka_ = kernel::GaussianKernel(bandwidth);//, order, qset_.n_rows);
}
else if(qset_.n_rows <= 6) {
if (!hasOrder)
{
order = 1;
}
- ka_.Init(bandwidth, order, qset_.n_rows);
+ ka_ = kernel::GaussianKernel(bandwidth);//, order, qset_.n_rows);
}
else {
if (!hasOrder)
{
order = 0;
}
- ka_.Init(bandwidth, order, qset_.n_rows);
+ ka_ = kernel::GaussianKernel(bandwidth);//, order, qset_.n_rows);
}
}
@@ -534,7 +530,7 @@
FILE *stream = stdout;
const char *fname = NULL;
- if((fname = CLI::GetParam<std::string>("fast_kde_output")) != NULL) {
+ if((fname = CLI::GetParam<std::string>("fast_kde_output").c_str()) != NULL) {
stream = fopen(fname, "w+");
}
for(size_t q = 0; q < qset_.n_cols; q++) {
Modified: mlpack/trunk/src/contrib/nslagle/kdeDL/dualtree_kde_common.h
===================================================================
--- mlpack/trunk/src/contrib/nslagle/kdeDL/dualtree_kde_common.h 2011-11-04 21:52:41 UTC (rev 10145)
+++ mlpack/trunk/src/contrib/nslagle/kdeDL/dualtree_kde_common.h 2011-11-04 23:47:12 UTC (rev 10146)
@@ -97,7 +97,7 @@
* @param permutation The permutation.
*/
static void ShuffleAccordingToPermutation
- (arma::vec &v, const arma::Col<size_t> &permutation) {
+ (arma::vec &v, const std::vector<size_t> &permutation) {
arma::vec v_tmp(v.size());
for(size_t i = 0; i < v_tmp.size(); i++) {
Modified: mlpack/trunk/src/contrib/nslagle/kdeDL/dualtree_kde_impl.h
===================================================================
--- mlpack/trunk/src/contrib/nslagle/kdeDL/dualtree_kde_impl.h 2011-11-04 21:52:41 UTC (rev 10145)
+++ mlpack/trunk/src/contrib/nslagle/kdeDL/dualtree_kde_impl.h 2011-11-04 23:47:12 UTC (rev 10146)
@@ -27,7 +27,7 @@
// pairwise distance and kernel value
double dsqd = kernel::LMetric<2,false>::Evaluate (q_col, r_col);
- double kernel_value = ka_.kernel_.EvalUnnormOnSq(dsqd);
+ double kernel_value = ka_.kernel_.Evaluate(q_col,r_col);//EvalUnnormOnSq(dsqd);
double weighted_kernel_value = rset_weights_[r] * kernel_value;
densities_l_[q] += weighted_kernel_value;
Modified: mlpack/trunk/src/contrib/nslagle/kdeDL/dualtree_kde_main.cc
===================================================================
--- mlpack/trunk/src/contrib/nslagle/kdeDL/dualtree_kde_main.cc 2011-11-04 21:52:41 UTC (rev 10145)
+++ mlpack/trunk/src/contrib/nslagle/kdeDL/dualtree_kde_main.cc 2011-11-04 23:47:12 UTC (rev 10146)
@@ -12,6 +12,7 @@
#include "dualtree_vkde.h"
#include "naive_kde.h"
+using namespace mlpack;
using namespace mlpack::kernel;
void VariableBandwidthKde(arma::mat &queries, arma::mat &references,
@@ -267,18 +268,18 @@
!strcmp(queries_file_name.c_str(), references_file_name.c_str());
// data::Load inits a arma::mat with the contents of a .csv or .arff.
- data::Load(references_file_name.c_str(), references);
+ references.load(references_file_name.c_str());
if(queries_equal_references) {
queries = references;
}
else {
- data::Load(queries_file_name.c_str(), queries);
+ queries.load(queries_file_name.c_str());
}
// If the reference weight file name is specified, then read in,
// otherwise, initialize to uniform weights.
if(CLI::HasParam("dwgts")) {
- data::Load(CLI::GetParam<std::string>("dwgts").c_str(), reference_weights);
+ reference_weights.load(CLI::GetParam<std::string>("dwgts").c_str());
}
else {
reference_weights = arma::mat(1, references.n_cols);
Modified: mlpack/trunk/src/contrib/nslagle/kdeDL/dualtree_vkde.h
===================================================================
--- mlpack/trunk/src/contrib/nslagle/kdeDL/dualtree_vkde.h 2011-11-04 21:52:41 UTC (rev 10145)
+++ mlpack/trunk/src/contrib/nslagle/kdeDL/dualtree_vkde.h 2011-11-04 23:47:12 UTC (rev 10146)
@@ -61,9 +61,9 @@
#define INSIDE_DUALTREE_VKDE_H
#include "mlpack/core.h"
-#include "mlpack/core/tree/spacetree.hpp"
+#include "mlpack/core/tree/binary_space_tree.hpp"
//#include "contrib/dongryel/proximity_project/gen_metric_tree.h"
-//#include "dualtree_kde_common.h"
+#include "dualtree_kde_common.h"
//#include "kde_stat.h"
#include "mlpack/methods/neighbor_search/neighbor_search.h"
@@ -196,12 +196,12 @@
/** @brief The permutation mapping indices of queries_ to original
* order.
*/
- arma::Col<size_t> old_from_new_queries_;
+ std::vector<size_t> old_from_new_queries_;
/** @brief The permutation mapping indices of references_ to
* original order.
*/
- arma::Col<size_t> old_from_new_references_;
+ std::vector<size_t> old_from_new_references_;
////////// Private Member Functions //////////
@@ -287,7 +287,7 @@
// Set accuracy parameters.
relative_error_ = CLI::GetParam<double>("relative_error");
threshold_ = CLI::GetParam<double>("threshold") *
- kernels_[0].CalcNormConstant(qset_.n_rows());
+ kernels_[0].Normalizer();//CalcNormConstant(qset_.n_rows);
// initialize the lower and upper bound densities
densities_l_.zeros();
@@ -352,7 +352,7 @@
(&queries == &references);
// read in the number of points owned by a leaf
- int leaflen = CLI::GetParam<int>("leaflen");
+ //TODO int leaflen = CLI::GetParam<int>("leaflen");
// Copy reference dataset and reference weights and compute its
// sum. rset_weight_sum_ should be the raw sum of the reference
@@ -392,9 +392,7 @@
// weights according to the permutation of the reference set in
// the reference tree.
CLI::StartTimer("tree_d");
- rroot_ = proximity::MakeGenMetricTree<Tree>(rset_, leaflen,
- old_from_new_references_,
- NULL);
+ rroot_ = new Tree(rset_, /*leaflen,*/ old_from_new_references_);
DualtreeKdeCommon::ShuffleAccordingToPermutation
(rset_weights_, old_from_new_references_);
@@ -403,9 +401,7 @@
old_from_new_queries_ = old_from_new_references_;
}
else {
- qroot_ = proximity::MakeGenMetricTree<Tree>(qset_, leaflen,
- &old_from_new_queries_,
- NULL);
+ qroot_ = new Tree(qset_, /*leaflen*/ old_from_new_queries_);
}
CLI::StopTimer("tree_d");
@@ -420,16 +416,16 @@
// Initialize the kernels for each reference point.
int knns = CLI::GetParam<int>("knn");
- AllkNN all_knn = AllkNN(rset_, 20);
- kernels_.Init(rset_.n_cols());
+ AllkNN all_knn = AllkNN(rset_, knns);
arma::Mat<size_t> resulting_neighbors;
- arma::mat squared_distances;
+ arma::mat squared_distances;
CLI::StartTimer("bandwidth_initialization");
all_knn.ComputeNeighbors(resulting_neighbors, squared_distances);
- for(size_t i = 0; i < squared_distances.size(); i += knns) {
- kernels_[i / knns].Init(sqrt(squared_distances[i + knns - 1]));
+ for(size_t i = 0; i < squared_distances.size(); i += knns)
+ {
+ kernels_.push_back(kernel::GaussianKernel(sqrt(squared_distances[i + knns - 1])));
}
CLI::StopTimer("bandwidth_initialization");
@@ -437,11 +433,11 @@
// that have been chosen.
double min_norm_const = DBL_MAX;
for(size_t i = 0; i < rset_weights_.size(); i++) {
- double norm_const = kernels_[i].CalcNormConstant(qset_.n_rows());
+ double norm_const = kernels_[i].Normalizer();//CalcNormConstant(qset_.n_rows);
min_norm_const = std::min(min_norm_const, norm_const);
}
for(size_t i = 0; i < rset_weights_.size(); i++) {
- double norm_const = kernels_[i].CalcNormConstant(qset_.n_rows());
+ double norm_const = kernels_[i].Normalizer();//CalcNormConstant(qset_.n_rows);
rset_weights_[i] *= (min_norm_const / norm_const);
}
@@ -454,11 +450,11 @@
FILE *stream = stdout;
const char *fname = NULL;
- if((fname = CLI::GetParam<std::string>("fast_kde_output")) != NULL)
+ if((fname = CLI::GetParam<std::string>("fast_kde_output").c_str()) != NULL)
{
stream = fopen(fname, "w+");
}
- for(size_t q = 0; q < qset_.n_cols(); q++) {
+ for(size_t q = 0; q < qset_.n_cols; q++) {
fprintf(stream, "%g\n", densities_e_[q]);
}
Modified: mlpack/trunk/src/contrib/nslagle/kdeDL/kde_stat.h
===================================================================
--- mlpack/trunk/src/contrib/nslagle/kdeDL/kde_stat.h 2011-11-04 21:52:41 UTC (rev 10145)
+++ mlpack/trunk/src/contrib/nslagle/kdeDL/kde_stat.h 2011-11-04 23:47:12 UTC (rev 10146)
@@ -200,12 +200,12 @@
/** @brief The far field expansion created by the reference points
* in this node.
*/
- typename TKernelAux::TFarFieldExpansion farfield_expansion_;
-
+ // TODO: typename TKernelAux::TFarFieldExpansion farfield_expansion_;
+
/** @brief The local expansion stored in this node.
*/
- typename TKernelAux::TLocalExpansion local_expansion_;
-
+ // TODO :typename TKernelAux::TLocalExpansion local_expansion_;
+
/** @brief The subspace associated with this node.
*/
//SubspaceStat subspace_;
@@ -213,9 +213,9 @@
/** @brief Gets the weight sum.
*/
double get_weight_sum() {
- return farfield_expansion_.get_weight_sum();
+ return 0.0;//farfield_expansion_.get_weight_sum();
}
-
+
/** @brief Adds the other postponed contributions.
*/
void AddPostponed(const KdeStat& parent_stat) {
@@ -281,8 +281,8 @@
}
void Init(const TKernelAux &ka) {
- farfield_expansion_.Init(ka);
- local_expansion_.Init(ka);
+ // TODO: farfield_expansion_.Init(ka);
+ // TODO: local_expansion_.Init(ka);
}
void Init(const arma::mat& dataset, size_t &start, size_t &count) {
@@ -300,8 +300,8 @@
void Init(const arma::vec& center, const TKernelAux &ka) {
- farfield_expansion_.Init(center, ka);
- local_expansion_.Init(center, ka);
+ // TODO: farfield_expansion_.Init(center, ka);
+ // TODO: local_expansion_.Init(center, ka);
Init();
}
Modified: mlpack/trunk/src/contrib/nslagle/kdeDL/naive_kde.h
===================================================================
--- mlpack/trunk/src/contrib/nslagle/kdeDL/naive_kde.h 2011-11-04 21:52:41 UTC (rev 10145)
+++ mlpack/trunk/src/contrib/nslagle/kdeDL/naive_kde.h 2011-11-04 23:47:12 UTC (rev 10146)
@@ -98,16 +98,16 @@
printf("\nStarting naive KDE...\n");
CLI::StartTimer("naive_kde_compute");
- for(size_t q = 0; q < qset_.n_cols(); q++) {
+ for(size_t q = 0; q < qset_.n_cols; q++) {
const arma::vec q_col = qset_.unsafe_col(q);
// Compute unnormalized sum first.
- for(size_t r = 0; r < rset_.n_cols(); r++) {
+ for(size_t r = 0; r < rset_.n_cols; r++) {
const arma::vec r_col = rset_.unsafe_col(r);
double dsqd = kernel::LMetric<2,false>::Evaluate(q_col, r_col);
- densities_[q] += rset_weights_[r] * kernels_[r].EvalUnnormOnSq(dsqd);
+ densities_[q] += rset_weights_[r] * kernels_[r].Evaluate(q_col,r_col);//EvalUnnormOnSq(dsqd);
}
// Then normalize it.
@@ -127,16 +127,16 @@
printf("\nStarting naive KDE...\n");
CLI::StartTimer("naive_kde_compute");
- for(size_t q = 0; q < qset_.n_cols(); q++) {
+ for(size_t q = 0; q < qset_.n_cols; q++) {
const arma::vec q_col = qset_.unsafe_col(q);
// Compute unnormalized sum.
- for(size_t r = 0; r < rset_.n_cols(); r++) {
+ for(size_t r = 0; r < rset_.n_cols; r++) {
const arma::vec r_col = rset_.unsafe_col(r);
- double dsqd = kernel::LMetric<2,false>::Evaluate (q_col, r_col);
+ //double dsqd = kernel::LMetric<2,false>::Evaluate (q_col, r_col);
- densities_[q] += rset_weights_[r] * kernels_[r].EvalUnnormOnSq(dsqd);
+ densities_[q] += rset_weights_[r] * kernels_[r].Evaluate(q_col,r_col);//EvalUnnormOnSq(dsqd);
}
// Then, normalize it.
densities_[q] /= norm_const_;
@@ -148,7 +148,7 @@
void Init(arma::mat &qset, arma::mat &rset, struct datanode *module_in) {
// Use the uniform weights for a moment.
- arma::mat uniform_weights(1, rset.n_cols());
+ arma::mat uniform_weights(1, rset.n_cols);
uniform_weights.fill(1.0);
Init(qset, rset, uniform_weights, module_in);
@@ -184,7 +184,7 @@
rset_(r,c) = rset(r,c);
}
}
- rset_weights_ = arma::vec(reference_weights.n_cols());
+ rset_weights_ = arma::vec(reference_weights.n_cols);
for(size_t i = 0; i < rset_weights_.size(); i++)
{
rset_weights_[i] = reference_weights(0, i);
@@ -197,20 +197,20 @@
}
// Get bandwidth and compute the normalizing constant.
- kernels_.Init(rset_.n_cols());
- if(!strcmp(CLI::GetParam<std::string>("mode").c_str(), "variablebw")) {
-
+ if(!strcmp(CLI::GetParam<std::string>("mode").c_str(), "variablebw"))
+ {
// Initialize the kernels for each reference point.
int knns = CLI::GetParam<int>("knn");
- AllkNN all_knn = AllkNN(rset_, 20, knns);
+ AllkNN all_knn = AllkNN(rset_, knns);
arma::Mat<size_t> resulting_neighbors;
arma::mat squared_distances;
CLI::StartTimer("bandwidth_initialization");
all_knn.ComputeNeighbors(resulting_neighbors, squared_distances);
- for(size_t i = 0; i < squared_distances.size(); i += knns) {
- kernels_[i / knns].Init(sqrt(squared_distances[i + knns - 1]));
+ for(size_t i = 0; i < squared_distances.size(); i += knns)
+ {
+ kernels_.push_back(kernel::GaussianKernel(sqrt(squared_distances[i + knns - 1])));
}
CLI::StopTimer("bandwidth_initialization");
@@ -218,30 +218,32 @@
// that have been chosen.
double min_norm_const = DBL_MAX;
for(size_t i = 0; i < rset_weights_.size(); i++) {
- double norm_const = kernels_[i].CalcNormConstant(qset_.n_rows());
+ double norm_const = kernels_[i].Normalizer();//CalcNormConstant(qset_.n_rows());
min_norm_const = std::min(min_norm_const, norm_const);
}
for(size_t i = 0; i < rset_weights_.size(); i++) {
- double norm_const = kernels_[i].CalcNormConstant(qset_.n_rows());
+ double norm_const = kernels_[i].Normalizer();//CalcNormConstant(qset_.n_rows());
rset_weights_[i] *= (min_norm_const / norm_const);
}
-
+
// Compute normalization constant.
norm_const_ = weight_sum * min_norm_const;
}
- else {
- for(size_t i = 0; i < kernels_.size(); i++) {
- kernels_[i].Init(CLI::GetParam<double>("bandwidth"));
+ else
+ {
+ for(size_t i = 0; i < kernels_.size(); i++)
+ {
+ kernels_[i] = kernel::GaussianKernel(CLI::GetParam<double>("bandwidth"));
}
- norm_const_ = kernels_[0].CalcNormConstant(qset_.n_rows()) * weight_sum;
+ norm_const_ = kernels_[0].Normalizer();//CalcNormConstant(qset_.n_rows()) * weight_sum;
}
// Allocate density storage.
- densities_ = arma::vec(qset.n_cols());
+ densities_ = arma::vec(qset.n_cols);
densities_.zeros();
}
- /** @brief Output KDE results to a stream
+ /** @brief Output KDE results to a stream
*
* If the user provided "--naive_kde_output=" argument, then the
* output will be directed to a file whose name is provided after
@@ -254,10 +256,10 @@
const char *fname = NULL;
{
- fname = CLI::GetParam<std::string>("naive_kde_output");
+ fname = CLI::GetParam<std::string>("naive_kde_output").c_str();
stream = fopen(fname, "w+");
}
- for(size_t q = 0; q < qset_.n_cols(); q++) {
+ for(size_t q = 0; q < qset_.n_cols; q++) {
fprintf(stream, "%g\n", densities_[q]);
}
Modified: mlpack/trunk/src/contrib/nslagle/myKDE/CMakeLists.txt
===================================================================
--- mlpack/trunk/src/contrib/nslagle/myKDE/CMakeLists.txt 2011-11-04 21:52:41 UTC (rev 10145)
+++ mlpack/trunk/src/contrib/nslagle/myKDE/CMakeLists.txt 2011-11-04 23:47:12 UTC (rev 10146)
@@ -4,9 +4,9 @@
# Anything not in this list will not be compiled into the output library
# Do not include test programs here
set(SOURCES
- test_tree.cc
kde_dual_tree.hpp
kde_dual_tree_impl.hpp
+ test_kde_dual_tree.cpp
)
# add directory name to sources
@@ -18,10 +18,10 @@
set(MLPACK_CONTRIB_SRCS ${MLPACK_CONTRIB_SRCS} ${DIR_SRCS} PARENT_SCOPE)
# link dependencies of test executable
-add_executable(test_tree
+add_executable(test_kde_dual_tree
EXCLUDE_FROM_ALL
- test_tree.cc
+ test_kde_dual_tree.cpp
)
-target_link_libraries(test_tree
+target_link_libraries(test_kde_dual_tree
mlpack
)
Modified: mlpack/trunk/src/contrib/nslagle/myKDE/kde_dual_tree.hpp
===================================================================
--- mlpack/trunk/src/contrib/nslagle/myKDE/kde_dual_tree.hpp 2011-11-04 21:52:41 UTC (rev 10145)
+++ mlpack/trunk/src/contrib/nslagle/myKDE/kde_dual_tree.hpp 2011-11-04 23:47:12 UTC (rev 10146)
@@ -2,7 +2,7 @@
#define KDE_DUAL_TREE_HPP
#include <iostream>
-#include <priority_queue>
+#include <queue>
#include <mlpack/core.h>
#include <mlpack/core/kernels/gaussian_kernel.hpp>
@@ -17,6 +17,7 @@
namespace kde
{
/* structure within the priority queue */
+template <typename TTree = tree::BinarySpaceTree<bound::HRectBound<2> > >
struct queueNode
{
TTree* T;
@@ -28,13 +29,14 @@
size_t bLowerIndex;
size_t bUpperIndex;
};
+template <typename TTree = tree::BinarySpaceTree<bound::HRectBound<2> > >
class QueueNodeCompare
{
bool reverse;
public:
QueueNodeCompare(const bool& revparam=false) : reverse(revparam) {}
- bool operator() (const struct queueNode& lhs,
- const struct queueNode& rhs) const
+ bool operator() (const struct queueNode<TTree>& lhs,
+ const struct queueNode<TTree>& rhs) const
{
if (reverse)
return (lhs.priority>rhs.priority);
@@ -52,6 +54,8 @@
/* possibly, these refer to the same object */
TTree* referenceRoot;
TTree* queryRoot;
+ std::map<void*, size_t> nodeIndices;
+ size_t nextAvailableNodeIndex;
std::vector<size_t> referenceShuffledIndices;
std::vector<size_t> queryShuffledIndices;
arma::mat referenceData;
@@ -66,17 +70,19 @@
double delta;
/* relative error with respect to the density estimate */
double epsilon;
- math::Range bandwidths;
- std::priority_queue<struct queueNode,
- std::vector<struct queueNode>,
- QueueNodeCompare> nodePriorityQueue;
+ std::priority_queue<struct queueNode<TTree>,
+ std::vector<struct queueNode<TTree> >,
+ QueueNodeCompare<TTree> > nodePriorityQueue;
size_t bandwidthCount;
std::vector<double> bandwidths;
+ std::vector<double> inverseBandwidths;
+ double lowBandwidth;
+ double highBandwidth;
size_t levelsInTree;
size_t queryTreeSize;
void SetDefaults();
- void MultiBandwidthDualTree();
+ size_t MultiBandwidthDualTree();
void MultiBandwidthDualTreeBase(TTree* Q,
TTree* T, size_t QIndex,
size_t lowerBIndex, size_t upperBIndex);
@@ -88,22 +94,24 @@
{
return levelsInTree - node->levelsBelow();
}
- void Winnow(size_t bLower, size_t bUpper, size_t* newLower, size_t* newUpper);
+ void Winnow(size_t level, size_t* newLower, size_t* newUpper);
public:
/* the two data sets are different */
KdeDualTree (arma::mat& referenceData, arma::mat& queryData);
/* the reference data is also the query data */
KdeDualTree (arma::mat& referenceData);
+ std::vector<double> Calculate();
/* setters and getters */
- const math::Range& BandwidthRange() const { return bandwidthRange; }
const size_t& BandwidthCount() const { return bandwidthCount; }
const double& Delta() const { return delta; }
const double& Epsilon() const { return epsilon; }
+ const double& LowBandwidth() const { return lowBandwidth; }
+ const double& HighBandwidth() const { return highBandwidth; }
- void BandwidthRange(double l, double u) { bandwidthRange = math::Range(l,u); }
size_t& BandwidthCount() { return bandwidthCount; }
double& Delta() { return delta; }
double& Epsilon() { return epsilon; }
+ void SetBandwidthBounds(double l, double u);
};
}; /* end namespace kde */
}; /* end namespace mlpack */
Modified: mlpack/trunk/src/contrib/nslagle/myKDE/kde_dual_tree_impl.hpp
===================================================================
--- mlpack/trunk/src/contrib/nslagle/myKDE/kde_dual_tree_impl.hpp 2011-11-04 21:52:41 UTC (rev 10145)
+++ mlpack/trunk/src/contrib/nslagle/myKDE/kde_dual_tree_impl.hpp 2011-11-04 23:47:12 UTC (rev 10146)
@@ -8,6 +8,8 @@
#endif
#endif
+#define MADEIT std::cout<<"made it to "<<__LINE__<<" in "<<__FILE__<<std::endl
+
using namespace mlpack;
using namespace mlpack::kde;
@@ -20,8 +22,8 @@
KdeDualTree<TKernel, TTree>::KdeDualTree (arma::mat& reference,
arma::mat& query)
{
- referenceRoot (new TTree (reference)),
- queryRoot (new TTree (query))
+ referenceRoot = new TTree (reference, referenceShuffledIndices),
+ queryRoot = new TTree (query, queryShuffledIndices);
referenceData = reference;
queryData = query;
levelsInTree = queryRoot->levelsBelow();
@@ -37,27 +39,121 @@
referenceRoot = new TTree (reference, referenceShuffledIndices);
queryRoot = referenceRoot;
+ queryShuffledIndices = referenceShuffledIndices;
levelsInTree = queryRoot->levelsBelow();
queryTreeSize = queryRoot->treeSize();
SetDefaults();
}
template<typename TKernel, typename TTree>
-KdeDualTree<TKernel, TTree>::SetDefaults()
+void KdeDualTree<TKernel, TTree>::SetDefaults()
{
- BandwidthRange(0.01, 100.0);
+ SetBandwidthBounds(0.01, 100.0);
bandwidthCount = 10;
delta = epsilon = 0.05;
kernel = TKernel(1.0);
+ nextAvailableNodeIndex = 0;
}
template<typename TKernel, typename TTree>
-void KdeDualTree<TKernel, TTree>::MultiBandwidthDualTree()
+std::vector<double> KdeDualTree<TKernel, TTree>::Calculate()
{
+ /* calculate the bandwidths */
+ bandwidths.clear();
+ inverseBandwidths.clear();
+
+ if (bandwidthCount > 1)
+ {
+ double bandwidthDelta = (highBandwidth - lowBandwidth) / (bandwidthCount - 1);
+ for (size_t bIndex = 0; bIndex < bandwidthCount; ++bIndex)
+ {
+ bandwidths.push_back(lowBandwidth + bandwidthDelta * bIndex);
+ inverseBandwidths.push_back(1.0 / bandwidths.back());
+ }
+ }
+ else
+ {
+ bandwidths.push_back(lowBandwidth);
+ inverseBandwidths.push_back(1.0 / lowBandwidth);
+ }
+
+ /* resize the critical matrices */
+ upperBoundLevelByBandwidth.zeros(levelsInTree,bandwidthCount);
+ for (size_t bIndex = 0; bIndex < bandwidthCount; ++bIndex)
+ {
+ arma::vec col = upperBoundLevelByBandwidth.unsafe_col(bIndex);
+ col.fill(referenceRoot->count() * inverseBandwidths[bIndex]);
+ }
+ upperBoundLevelByBandwidth.fill(referenceRoot->count());
+ lowerBoundLevelByBandwidth.zeros(levelsInTree,bandwidthCount);
+ upperBoundQPointByBandwidth.zeros(queryRoot->count(),bandwidthCount);
+ for (size_t bIndex = 0; bIndex < bandwidthCount; ++bIndex)
+ {
+ arma::vec col = upperBoundQPointByBandwidth.unsafe_col(bIndex);
+ col.fill(referenceRoot->count() * inverseBandwidths[bIndex]);
+ }
+ lowerBoundQPointByBandwidth.zeros(queryRoot->count(),bandwidthCount);
+ upperBoundQNodeByBandwidth.zeros(queryTreeSize,bandwidthCount);
+ for (size_t bIndex = 0; bIndex < bandwidthCount; ++bIndex)
+ {
+ arma::vec col = upperBoundQNodeByBandwidth.unsafe_col(bIndex);
+ col.fill(referenceRoot->count() * inverseBandwidths[bIndex]);
+ }
+ lowerBoundQNodeByBandwidth.zeros(queryTreeSize,bandwidthCount);
+
+ arma::vec dl;
+ arma::vec du;
+ dl.zeros(bandwidthCount);
+ du.zeros(bandwidthCount);
+ double priority = pow(
+ queryRoot->bound().MinDistance(referenceRoot->bound()),
+ 0.5);
+ struct queueNode<TTree> firstNode =
+ {referenceRoot,queryRoot, nextAvailableNodeIndex, dl, du,
+ priority, 0, bandwidthCount - 1};
+ nodeIndices[queryRoot] = nextAvailableNodeIndex;
+ ++nextAvailableNodeIndex;
+ nodePriorityQueue.push(firstNode);
+ size_t finalLevel = MultiBandwidthDualTree();
+
+ size_t maxIndex = 0;
+ double maxLogLikelihood = (upperBoundLevelByBandwidth(finalLevel,0) +
+ lowerBoundLevelByBandwidth(finalLevel,0)) / 2.0;
+ for (size_t bIndex = 1; bIndex < bandwidthCount; ++bIndex)
+ {
+ double currentLogLikelihood = (upperBoundLevelByBandwidth(finalLevel,bIndex) +
+ lowerBoundLevelByBandwidth(finalLevel,bIndex)) / 2.0;
+ if (currentLogLikelihood > maxLogLikelihood)
+ {
+ currentLogLikelihood = maxLogLikelihood;
+ maxIndex = bIndex;
+ }
+ }
+ std::cout << upperBoundLevelByBandwidth << "\n";
+ std::cout << lowerBoundLevelByBandwidth << "\n";
+ std::cout << "best bandwidth " << bandwidths[maxIndex] << ";\n";
+ exit(1);
+ std::vector<double> densities;
+ for (std::vector<size_t>::iterator shuffIt = queryShuffledIndices.begin();
+ shuffIt != queryShuffledIndices.end(); ++shuffIt)
+ {
+ densities.push_back((upperBoundQPointByBandwidth(*shuffIt, maxIndex) +
+ lowerBoundQPointByBandwidth(*shuffIt, maxIndex)) / (2.0 * referenceRoot->count()));
+
+ }
+ return densities;
+}
+
+template<typename TKernel, typename TTree>
+size_t KdeDualTree<TKernel, TTree>::MultiBandwidthDualTree()
+{
+ /* current level */
+ size_t v = 0;
while (!nodePriorityQueue.empty())
{
/* get the first structure in the queue */
- struct queueNode queueCurrent = nodePriorityQueue.pop();
+ struct queueNode<TTree> queueCurrent = nodePriorityQueue.top();
+ nodePriorityQueue.pop();
TTree* Q = queueCurrent.Q;
TTree* T = queueCurrent.T;
size_t sizeOfTNode = T->count();
@@ -66,7 +162,7 @@
arma::vec deltaLower = queueCurrent.deltaLower;
arma::vec deltaUpper = queueCurrent.deltaUpper;
/* v is the level of the Q node */
- size_t v = GetLevelOfNode(Q);
+ v = GetLevelOfNode(Q);
size_t bUpper = queueCurrent.bUpperIndex;
size_t bLower = queueCurrent.bLowerIndex;
/* check to see whether we've reached the epsilon condition */
@@ -96,7 +192,7 @@
/* return */
if (epsilonCondition)
{
- return;
+ return v;
}
/* we didn't meet the criteria; let's narrow the bandwidth range */
Winnow(v, &bLower, &bUpper);
@@ -109,11 +205,13 @@
std::vector<bool> deltaCondition;
for (size_t bIndex = bLower; bIndex <= bUpper; ++bIndex)
{
- double bandwidth = bandwidths[bIndex];
- double dl = sizeOfTNode * kernel(dMax / bandwidth);
- double du = sizeOfTNode * kernel(dMin / bandwidth);
+ double inverseBandwidth = inverseBandwidths[bIndex];
+ double dl = sizeOfTNode * inverseBandwidth * kernel.Evaluate(dMax * inverseBandwidth);
+ double du = sizeOfTNode * inverseBandwidth * kernel.Evaluate(dMin * inverseBandwidth);
deltaLower(bIndex) = dl;
deltaUpper(bIndex) = du - sizeOfTNode;
+ //std::cout << "QIndex: " << QIndex << " bIndex: " << bIndex << std::endl;
+ //std::cout << "max QIndex: " << queryTreeSize - 1 << std::endl;
if ((du - dl)/(lowerBoundQNodeByBandwidth(QIndex, bIndex) + dl) < delta)
{
for (size_t q = Q->begin(); q < Q->end(); ++q)
@@ -151,12 +249,12 @@
if (meetsDeltaCondition)
{
/* adjust the current structure, then reinsert it into the queue */
- queueCurrent.dl = deltaLower;
- queueCurrent.du = deltaUpper;
+ queueCurrent.deltaLower = deltaLower;
+ queueCurrent.deltaUpper = deltaUpper;
queueCurrent.bUpperIndex = bUpper;
queueCurrent.bLowerIndex = bLower;
queueCurrent.priority += PRIORITY_MAX;
- nodePriorityQueue.insert(queueCurrent);
+ nodePriorityQueue.push(queueCurrent);
continue;
}
else
@@ -208,28 +306,45 @@
MultiBandwidthDualTreeBase(Q, T, QIndex, bLower, bUpper);
}
double priority = pow(Q->bound().MinDistance(T->bound()), 0.5);
- if (!Q->is_left() && !T->is_leaf())
+ if (!Q->is_leaf() && !T->is_leaf())
{
- struct queueNode leftLeft =
- {T->left(),Q->left(), 2*QIndex + 1, arma::vec(deltaUpper),
- arma::vec(deltaLower), priority, bLower, bUpper};
- struct queueNode leftRight =
- {T->left(),Q->right(), 2*QIndex + 2, arma::vec(deltaUpper),
- arma::vec(deltaLower), priority, bLower, bUpper};
- struct queueNode rightLeft =
- {T->right(),Q->left(), 2*QIndex + 1, arma::vec(deltaUpper),
- arma::vec(deltaLower), priority, bLower, bUpper};
- struct queueNode rightRight =
- {T->right(),Q->right(), 2*QIndex + 2, arma::vec(deltaUpper),
- arma::vec(deltaLower), priority, bLower, bUpper};
- nodePriorityQueue.insert(leftLeft);
- nodePriorityQueue.insert(leftRight);
- nodePriorityQueue.insert(rightLeft);
- nodePriorityQueue.insert(rightRight);
+ //std::cout << "QIndex for the current non-leaf : " << QIndex << std::endl;
+ TTree* QLeft = Q->left();
+ TTree* QRight = Q->right();
+ if (nodeIndices.find(QLeft) == nodeIndices.end())
+ {
+ nodeIndices[QLeft] = nextAvailableNodeIndex;
+ ++nextAvailableNodeIndex;
+ }
+ if (nodeIndices.find(QRight) == nodeIndices.end())
+ {
+ nodeIndices[QRight] = nextAvailableNodeIndex;
+ ++nextAvailableNodeIndex;
+ }
+ size_t QLeftIndex = (*(nodeIndices.find(QLeft))).second;
+ size_t QRightIndex = (*(nodeIndices.find(QRight))).second;
+ struct queueNode<TTree> leftLeft =
+ {T->left(),Q->left(), QLeftIndex, arma::vec(deltaLower),
+ arma::vec(deltaUpper), priority, bLower, bUpper};
+ struct queueNode<TTree> leftRight =
+ {T->left(),Q->right(), QRightIndex, arma::vec(deltaLower),
+ arma::vec(deltaUpper), priority, bLower, bUpper};
+ struct queueNode<TTree> rightLeft =
+ {T->right(),Q->left(), QLeftIndex, arma::vec(deltaLower),
+ arma::vec(deltaUpper), priority, bLower, bUpper};
+ struct queueNode<TTree> rightRight =
+ {T->right(),Q->right(), QRightIndex, arma::vec(deltaLower),
+ arma::vec(deltaUpper), priority, bLower, bUpper};
+ nodePriorityQueue.push(leftLeft);
+ nodePriorityQueue.push(leftRight);
+ nodePriorityQueue.push(rightLeft);
+ nodePriorityQueue.push(rightRight);
}
}
+ return v;
}
+template<typename TKernel, typename TTree>
void KdeDualTree<TKernel, TTree>::Winnow(size_t level,
size_t* bLower,
size_t* bUpper)
@@ -310,14 +425,14 @@
{
arma::vec diff = queryPoint - referenceData.unsafe_col(t);
double distSquared = arma::dot(diff, diff);
- size_t bandwidthIndex = upperBIndex;
+ size_t bandwidthIndex = upperBIndex + 1;
while (bandwidthIndex > lowerBIndex)
{
--bandwidthIndex;
- double bandwidth = bandwidths[bandwidthIndex];
- double scaledProduct = pow(distSquared, 0.5) / bandwidth;
+ double inverseBandwidth = inverseBandwidths[bandwidthIndex];
+ double scaledProduct = pow(distSquared, 0.5) * inverseBandwidth;
/* TODO: determine the power of the incoming argument */
- double contribution = kernel(scaledProduct);
+ double contribution = inverseBandwidth * kernel.Evaluate(scaledProduct);
if (contribution > DBL_EPSILON)
{
upperBoundQPointByBandwidth(q, bandwidthIndex) += contribution;
@@ -368,6 +483,16 @@
sizeOfQNode * log(lowerBoundQNodeByBandwidth(QIndex, bIndex));
}
}
+template<typename TKernel, typename TTree>
+void KdeDualTree<TKernel, TTree>::SetBandwidthBounds(double l, double u)
+{
+ if (u <= l + DBL_EPSILON || l <= DBL_EPSILON)
+ {
+ Log::Fatal << "Incorrect bandwidth range assignment" << std::endl;
+ }
+ lowBandwidth = l;
+ highBandwidth = u;
+}
};
};
Added: mlpack/trunk/src/contrib/nslagle/myKDE/test_kde_dual_tree.cpp
===================================================================
--- mlpack/trunk/src/contrib/nslagle/myKDE/test_kde_dual_tree.cpp (rev 0)
+++ mlpack/trunk/src/contrib/nslagle/myKDE/test_kde_dual_tree.cpp 2011-11-04 23:47:12 UTC (rev 10146)
@@ -0,0 +1,100 @@
+#include <mlpack/core.h>
+#include "kde_dual_tree.hpp"
+
+PROGRAM_INFO("Kernel Density Estimation Multibandwidth Dual Tree",
+ "KDE multibandwidth dual tree calculates density estimates for each "
+ "query point, given a collection of reference points, using a collection "
+ "of equidistantly spaced bandwidths\n\n"
+ "$ kde_dual_tree --reference_file=reference.csv --query_file=query.csv\n"
+ " --output_file=output.csv --low_bandwidth=0.1 --high_bandwidth=100.0\n"
+ " --bandwidth_count=10 --epsilon=0.01 --delta=0.01", "kde_dual_tree");
+
+PARAM_STRING_REQ("reference_file", "CSV file containing the reference dataset.",
+ "");
+PARAM_STRING("query_file", "CSV file containing query points",
+ "", "");
+PARAM_STRING("output_file", "File to output CSV-formatted results into.", "",
+ "kde_dual_tree_output.csv");
+PARAM_DOUBLE("low_bandwidth", "Low bandwidth", "", 0.1);
+PARAM_DOUBLE("high_bandwidth", "Low bandwidth", "", 100.0);
+PARAM_INT("bandwidth_count", "Low bandwidth", "", 10);
+PARAM_DOUBLE("epsilon", "error tolerance", "", 0.01);
+PARAM_DOUBLE("delta", "reversibility tolerance", "", 0.01);
+
+int main (int argc, char* argv[])
+{
+ CLI::ParseCommandLine(argc, argv);
+ std::string referenceFile = CLI::GetParam<std::string>("reference_file");
+ std::string queryFile = CLI::GetParam<std::string>("query_file");
+ std::string outputFile = CLI::GetParam<std::string>("output_file");
+ arma::mat referenceData;
+ arma::mat queryData;
+ double epsilon = CLI::GetParam<double>("epsilon");
+ double delta = CLI::GetParam<double>("delta");
+ int bandwidthCount = CLI::GetParam<int>("bandwidth_count");
+ double lowBandwidth = CLI::GetParam<double>("low_bandwidth");
+ double highBandwidth = CLI::GetParam<double>("high_bandwidth");
+
+ /* check the parameters */
+ if (delta < 0.0)
+ {
+ Log::Fatal << "Improper delta: " << delta <<
+ "; delta must be positive" << std::endl;
+ }
+ if (epsilon < 0.0)
+ {
+ Log::Fatal << "Improper epsilon: " << epsilon <<
+ "; epsilon must be positive" << std::endl;
+ }
+ if (bandwidthCount <= 0)
+ {
+ Log::Fatal << "Improper bandwidth_count: " << bandwidthCount <<
+ "; bandwidth_count must be positive" << std::endl;
+ }
+ if (highBandwidth <= lowBandwidth + DBL_EPSILON || lowBandwidth <= 0.0)
+ {
+ Log::Fatal << "Improper bandwidth range: " << lowBandwidth << ", " <<
+ highBandwidth << "; bandwidth range must be a positive interval" << std::endl;
+ }
+
+ if (!data::Load(referenceFile.c_str(), referenceData))
+ {
+ Log::Fatal << "Failed to load the reference file " << referenceFile << std::endl;
+ }
+
+ Log::Info << "Loaded reference data from " << referenceFile << std::endl;
+
+ std::vector<double> densities;
+ if (queryFile == "")
+ {
+ /* invoke KDE without specific query data */
+ KdeDualTree<> kde = KdeDualTree<>(referenceData);
+ kde.Epsilon() = epsilon;
+ kde.Delta() = delta;
+ kde.BandwidthCount() = bandwidthCount;
+ kde.SetBandwidthBounds(lowBandwidth, highBandwidth);
+ densities = kde.Calculate();
+ }
+ else
+ {
+ /* invoke KDE without specific query data */
+ KdeDualTree<> kde = KdeDualTree<>(referenceData, queryData);
+ kde.Epsilon() = epsilon;
+ kde.Delta() = delta;
+ kde.BandwidthCount() = bandwidthCount;
+ kde.SetBandwidthBounds(lowBandwidth, highBandwidth);
+ densities = kde.Calculate();
+ }
+ size_t index = 0;
+ for (std::vector<double>::iterator dIt = densities.begin();
+ dIt != densities.end();
+ ++dIt)
+ {
+ if (*dIt != 0.0)
+ {
+ std::cout << "density[" << index << "]=" << *dIt << std::endl;
+ }
+ ++index;
+ }
+ return 0;
+}
More information about the mlpack-svn
mailing list