[mlpack-svn] r10154 - mlpack/trunk/src/contrib/nslagle/myKDE
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Sat Nov 5 15:22:14 EDT 2011
Author: nslagle
Date: 2011-11-05 15:22:13 -0400 (Sat, 05 Nov 2011)
New Revision: 10154
Modified:
mlpack/trunk/src/contrib/nslagle/myKDE/kde_dual_tree.hpp
mlpack/trunk/src/contrib/nslagle/myKDE/kde_dual_tree_impl.hpp
Log:
mlpack/contrib/nslagle: commit a possibly working version
Modified: mlpack/trunk/src/contrib/nslagle/myKDE/kde_dual_tree.hpp
===================================================================
--- mlpack/trunk/src/contrib/nslagle/myKDE/kde_dual_tree.hpp 2011-11-05 16:39:00 UTC (rev 10153)
+++ mlpack/trunk/src/contrib/nslagle/myKDE/kde_dual_tree.hpp 2011-11-05 19:22:13 UTC (rev 10154)
@@ -28,6 +28,7 @@
double priority;
size_t bLowerIndex;
size_t bUpperIndex;
+ size_t QLevel;
};
template <typename TTree = tree::BinarySpaceTree<bound::HRectBound<2> > >
class QueueNodeCompare
@@ -39,9 +40,9 @@
const struct queueNode<TTree>& rhs) const
{
if (reverse)
+ return (lhs.priority<rhs.priority);
+ else
return (lhs.priority>rhs.priority);
- else
- return (lhs.priority<rhs.priority);
}
};
@@ -58,6 +59,7 @@
size_t nextAvailableNodeIndex;
std::vector<size_t> referenceShuffledIndices;
std::vector<size_t> queryShuffledIndices;
+ arma::Mat<size_t> touched;
arma::mat referenceData;
arma::mat queryData;
arma::mat upperBoundLevelByBandwidth;
@@ -85,7 +87,8 @@
size_t MultiBandwidthDualTree();
void MultiBandwidthDualTreeBase(TTree* Q,
TTree* T, size_t QIndex,
- size_t lowerBIndex, size_t upperBIndex);
+ size_t lowerBIndex, size_t upperBIndex,
+ size_t levelOfQ);
double GetPriority(TTree* nodeQ, TTree* nodeT)
{
return nodeQ->bound().MinDistance(*nodeT);
Modified: mlpack/trunk/src/contrib/nslagle/myKDE/kde_dual_tree_impl.hpp
===================================================================
--- mlpack/trunk/src/contrib/nslagle/myKDE/kde_dual_tree_impl.hpp 2011-11-05 16:39:00 UTC (rev 10153)
+++ mlpack/trunk/src/contrib/nslagle/myKDE/kde_dual_tree_impl.hpp 2011-11-05 19:22:13 UTC (rev 10154)
@@ -82,10 +82,11 @@
for (size_t bIndex = 0; bIndex < bandwidthCount; ++bIndex)
{
arma::vec col = upperBoundLevelByBandwidth.unsafe_col(bIndex);
- col.fill(referenceRoot->count() * inverseBandwidths[bIndex]);
+ col.fill(referenceRoot->count() * log(inverseBandwidths[bIndex]));
}
- upperBoundLevelByBandwidth.fill(referenceRoot->count());
lowerBoundLevelByBandwidth.zeros(levelsInTree,bandwidthCount);
+ /* place pretend log(0)s into the matrix */
+ lowerBoundLevelByBandwidth.fill(referenceRoot->count() * log(DBL_EPSILON));
upperBoundQPointByBandwidth.zeros(queryRoot->count(),bandwidthCount);
for (size_t bIndex = 0; bIndex < bandwidthCount; ++bIndex)
{
@@ -102,35 +103,50 @@
lowerBoundQNodeByBandwidth.zeros(queryTreeSize,bandwidthCount);
arma::vec dl;
- arma::vec du;
+ arma::vec du(bandwidthCount);
dl.zeros(bandwidthCount);
- du.zeros(bandwidthCount);
+ for (size_t bIndex = 0; bIndex < bandwidthCount; ++bIndex)
+ {
+ du(bIndex) = referenceRoot->count() * inverseBandwidths[bIndex];
+ }
double priority = pow(
queryRoot->bound().MinDistance(referenceRoot->bound()),
0.5);
struct queueNode<TTree> firstNode =
{referenceRoot,queryRoot, nextAvailableNodeIndex, dl, du,
- priority, 0, bandwidthCount - 1};
+ priority, 0, bandwidthCount - 1, 0};
nodeIndices[queryRoot] = nextAvailableNodeIndex;
++nextAvailableNodeIndex;
nodePriorityQueue.push(firstNode);
size_t finalLevel = MultiBandwidthDualTree();
+ std::cout << "the best level is " << finalLevel << "\n";
- size_t maxIndex = 0;
- double maxLogLikelihood = (upperBoundLevelByBandwidth(finalLevel,0) +
- lowerBoundLevelByBandwidth(finalLevel,0)) / 2.0;
- for (size_t bIndex = 1; bIndex < bandwidthCount; ++bIndex)
+ size_t maxIndex = -1;
+ double maxLogLikelihood = -DBL_MAX;
+
+ for (size_t bIndex = 0; bIndex < bandwidthCount; ++bIndex)
{
double currentLogLikelihood = (upperBoundLevelByBandwidth(finalLevel,bIndex) +
lowerBoundLevelByBandwidth(finalLevel,bIndex)) / 2.0;
+ std::cout << bandwidths[bIndex] << "," << inverseBandwidths[bIndex] << "," << currentLogLikelihood << std::endl;
if (currentLogLikelihood > maxLogLikelihood)
{
- currentLogLikelihood = maxLogLikelihood;
+ maxLogLikelihood = currentLogLikelihood;
maxIndex = bIndex;
}
}
- std::cout << upperBoundLevelByBandwidth << "\n";
- std::cout << lowerBoundLevelByBandwidth << "\n";
+ if (maxIndex == (size_t)-1)
+ {
+ std::cout << "We failed.\n";
+ }
+ //std::cout << "upperBoundLevelByBandwidth" << "\n";
+ //std::cout << upperBoundLevelByBandwidth << "\n";
+ //std::cout << "lowerBoundLevelByBandwidth" << "\n";
+ //std::cout << lowerBoundLevelByBandwidth << "\n";
+ //std::cout << "upperBoundQNodeByBandwidth" << "\n";
+ //std::cout << upperBoundQNodeByBandwidth << "\n";
+ //std::cout << "lowerBoundQNodeByBandwidth" << "\n";
+ //std::cout << lowerBoundQNodeByBandwidth << "\n";
std::cout << "best bandwidth " << bandwidths[maxIndex] << ";\n";
exit(1);
std::vector<double> densities;
@@ -138,7 +154,7 @@
shuffIt != queryShuffledIndices.end(); ++shuffIt)
{
densities.push_back((upperBoundQPointByBandwidth(*shuffIt, maxIndex) +
- lowerBoundQPointByBandwidth(*shuffIt, maxIndex)) / (2.0 * referenceRoot->count()));
+ lowerBoundQPointByBandwidth(*shuffIt, maxIndex)) / (kernel.Normalizer() * 2.0 * referenceRoot->count()));
}
return densities;
@@ -153,16 +169,27 @@
{
/* get the first structure in the queue */
struct queueNode<TTree> queueCurrent = nodePriorityQueue.top();
+ //std::cout << "priority " << queueCurrent.priority << std::endl;
+ //std::cout << "size " << nodePriorityQueue.size() << std::endl;
+ //std::cout << "priorities============\n";
+ //for (std::priority_queue< struct queueNode<TTree>,
+ // std::vector<struct queueNode<TTree> >,
+ // QueueNodeCompare<TTree> >::iterator it = nodePriorityQueue.begin(); it!=nodePriorityQueue.end(); ++it)
+ //{
+ // std::cout << (*it).priority << "\n";
+ //}
+ //std::cout << "======================\n";
nodePriorityQueue.pop();
TTree* Q = queueCurrent.Q;
TTree* T = queueCurrent.T;
size_t sizeOfTNode = T->count();
size_t sizeOfQNode = Q->count();
size_t QIndex = queueCurrent.QIndex;
+ //std::cout << "the pointer to deltaUpper is " << QIndex << " " << &(queueCurrent.deltaUpper) << "\n";
arma::vec deltaLower = queueCurrent.deltaLower;
arma::vec deltaUpper = queueCurrent.deltaUpper;
/* v is the level of the Q node */
- v = GetLevelOfNode(Q);
+ v = queueCurrent.QLevel;
size_t bUpper = queueCurrent.bUpperIndex;
size_t bLower = queueCurrent.bLowerIndex;
/* check to see whether we've reached the epsilon condition */
@@ -171,11 +198,11 @@
bIndex <= queueCurrent.bUpperIndex;
++bIndex)
{
- if (lowerBoundLevelByBandwidth(v,bIndex) > DBL_EPSILON)
+ if (abs(lowerBoundLevelByBandwidth(v,bIndex)) > DBL_EPSILON)
{
- double constraint = (upperBoundLevelByBandwidth(v,bIndex) -
+ double constraint = abs((upperBoundLevelByBandwidth(v,bIndex) -
lowerBoundLevelByBandwidth(v,bIndex)) /
- lowerBoundLevelByBandwidth(v,bIndex);
+ lowerBoundLevelByBandwidth(v,bIndex));
if (constraint > epsilon)
{
epsilonCondition = false;
@@ -209,10 +236,10 @@
double dl = sizeOfTNode * inverseBandwidth * kernel.Evaluate(dMax * inverseBandwidth);
double du = sizeOfTNode * inverseBandwidth * kernel.Evaluate(dMin * inverseBandwidth);
deltaLower(bIndex) = dl;
- deltaUpper(bIndex) = du - sizeOfTNode;
+ deltaUpper(bIndex) = du - inverseBandwidths[bIndex] * sizeOfTNode;
//std::cout << "QIndex: " << QIndex << " bIndex: " << bIndex << std::endl;
//std::cout << "max QIndex: " << queryTreeSize - 1 << std::endl;
- if ((du - dl)/(lowerBoundQNodeByBandwidth(QIndex, bIndex) + dl) < delta)
+ if (abs((du - dl)/(lowerBoundQNodeByBandwidth(QIndex, bIndex) + dl)) < delta)
{
for (size_t q = Q->begin(); q < Q->end(); ++q)
{
@@ -222,19 +249,35 @@
/* subtract the current log-likelihood */
upperBoundLevelByBandwidth(v, bIndex) -=
sizeOfQNode * log(upperBoundQNodeByBandwidth(QIndex, bIndex));
- lowerBoundLevelByBandwidth(v, bIndex) -=
- sizeOfQNode * log(lowerBoundQNodeByBandwidth(QIndex, bIndex));
+ if (abs(lowerBoundQNodeByBandwidth(QIndex, bIndex)) > DBL_EPSILON)
+ {
+ lowerBoundLevelByBandwidth(v, bIndex) -=
+ sizeOfQNode * log(lowerBoundQNodeByBandwidth(QIndex, bIndex));
+ }
+ else
+ {
+ lowerBoundLevelByBandwidth(v, bIndex) -=
+ sizeOfQNode * log(DBL_EPSILON);
+ }
/* adjust the current inner portion */
lowerBoundQNodeByBandwidth(QIndex, bIndex) += deltaLower(bIndex);
upperBoundQNodeByBandwidth(QIndex, bIndex) += deltaUpper(bIndex);
/* add the corrected log-likelihood */
upperBoundLevelByBandwidth(v, bIndex) +=
sizeOfQNode * log(upperBoundQNodeByBandwidth(QIndex, bIndex));
- lowerBoundLevelByBandwidth(v, bIndex) +=
- sizeOfQNode * log(lowerBoundQNodeByBandwidth(QIndex, bIndex));
+ if (lowerBoundQNodeByBandwidth(QIndex, bIndex) > DBL_EPSILON)
+ {
+ lowerBoundLevelByBandwidth(v, bIndex) +=
+ sizeOfQNode * log(lowerBoundQNodeByBandwidth(QIndex, bIndex));
+ }
+ else
+ {
+ lowerBoundLevelByBandwidth(v, bIndex) +=
+ sizeOfQNode * log(DBL_EPSILON);
+ }
}
/* check the delta condition with the new values */
- if ((du - dl)/(lowerBoundQNodeByBandwidth(QIndex, bIndex) + dl) >= delta)
+ if (abs((du - dl)/(lowerBoundQNodeByBandwidth(QIndex, bIndex) + dl)) >= delta)
{
deltaCondition.push_back(false);
meetsDeltaCondition = false;
@@ -255,6 +298,7 @@
queueCurrent.bLowerIndex = bLower;
queueCurrent.priority += PRIORITY_MAX;
nodePriorityQueue.push(queueCurrent);
+ /* the continue forces us to undo the previous node */
continue;
}
else
@@ -275,7 +319,7 @@
}
}
}
- else /* the priority exceeds the maximum available */
+ else /* the priority exceeds the maximum available; back the node out */
{
deltaLower = -deltaLower;
deltaUpper = -deltaUpper;
@@ -289,21 +333,37 @@
/* subtract the current log-likelihood */
upperBoundLevelByBandwidth(v, bIndex) -=
sizeOfQNode * log(upperBoundQNodeByBandwidth(QIndex, bIndex));
- lowerBoundLevelByBandwidth(v, bIndex) -=
- sizeOfQNode * log(lowerBoundQNodeByBandwidth(QIndex, bIndex));
+ if (lowerBoundQNodeByBandwidth(QIndex, bIndex) > DBL_EPSILON)
+ {
+ lowerBoundLevelByBandwidth(v, bIndex) -=
+ sizeOfQNode * log(lowerBoundQNodeByBandwidth(QIndex, bIndex));
+ }
+ else
+ {
+ lowerBoundLevelByBandwidth(v, bIndex) -=
+ sizeOfQNode * log(DBL_EPSILON);
+ }
/* adjust the current inner portion */
lowerBoundQNodeByBandwidth(QIndex, bIndex) += deltaLower(bIndex);
upperBoundQNodeByBandwidth(QIndex, bIndex) += deltaUpper(bIndex);
/* add the corrected log-likelihood */
upperBoundLevelByBandwidth(v, bIndex) +=
sizeOfQNode * log(upperBoundQNodeByBandwidth(QIndex, bIndex));
- lowerBoundLevelByBandwidth(v, bIndex) +=
- sizeOfQNode * log(lowerBoundQNodeByBandwidth(QIndex, bIndex));
+ if (lowerBoundQNodeByBandwidth(QIndex, bIndex) > DBL_EPSILON)
+ {
+ lowerBoundLevelByBandwidth(v, bIndex) +=
+ sizeOfQNode * log(lowerBoundQNodeByBandwidth(QIndex, bIndex));
+ }
+ else
+ {
+ lowerBoundLevelByBandwidth(v, bIndex) +=
+ sizeOfQNode * log(DBL_EPSILON);
+ }
}
}
if (Q->is_leaf() && T->is_leaf())
{
- MultiBandwidthDualTreeBase(Q, T, QIndex, bLower, bUpper);
+ MultiBandwidthDualTreeBase(Q, T, QIndex, bLower, bUpper, queueCurrent.QLevel);
}
double priority = pow(Q->bound().MinDistance(T->bound()), 0.5);
if (!Q->is_leaf() && !T->is_leaf())
@@ -323,24 +383,47 @@
}
size_t QLeftIndex = (*(nodeIndices.find(QLeft))).second;
size_t QRightIndex = (*(nodeIndices.find(QRight))).second;
+ arma::vec leftLeftLower(deltaLower.n_rows);
+ arma::vec leftLeftUpper(deltaUpper.n_rows);
+ arma::vec leftRightLower(deltaLower.n_rows);
+ arma::vec leftRightUpper(deltaUpper.n_rows);
+ arma::vec rightLeftLower(deltaLower.n_rows);
+ arma::vec rightLeftUpper(deltaUpper.n_rows);
+ arma::vec rightRightLower(deltaLower.n_rows);
+ arma::vec rightRightUpper(deltaUpper.n_rows);
+ for (size_t index = 0; index < deltaLower.n_cols; ++index)
+ {
+ leftLeftLower(index) = deltaLower(index);
+ leftLeftUpper(index) = deltaUpper(index);
+ leftRightLower(index) = deltaLower(index);
+ leftRightUpper(index) = deltaUpper(index);
+ rightLeftLower(index) = deltaLower(index);
+ rightLeftUpper(index) = deltaUpper(index);
+ rightRightLower(index) = deltaLower(index);
+ rightRightUpper(index) = deltaUpper(index);
+ }
+ //std::cout << "deltaUpper address " << &deltaUpper << "\n";
+ //std::cout << "leftLeftUpper address " << &leftLeftUpper << "\n";
+
struct queueNode<TTree> leftLeft =
- {T->left(),Q->left(), QLeftIndex, arma::vec(deltaLower),
- arma::vec(deltaUpper), priority, bLower, bUpper};
+ {T->left(),Q->left(), QLeftIndex, leftLeftLower,
+ leftLeftUpper, priority, bLower, bUpper, queueCurrent.QLevel + 1};
struct queueNode<TTree> leftRight =
- {T->left(),Q->right(), QRightIndex, arma::vec(deltaLower),
- arma::vec(deltaUpper), priority, bLower, bUpper};
+ {T->left(),Q->right(), QRightIndex, leftRightLower,
+ leftRightUpper, priority, bLower, bUpper, queueCurrent.QLevel + 1};
struct queueNode<TTree> rightLeft =
- {T->right(),Q->left(), QLeftIndex, arma::vec(deltaLower),
- arma::vec(deltaUpper), priority, bLower, bUpper};
+ {T->right(),Q->left(), QLeftIndex, rightLeftLower,
+ rightLeftUpper, priority, bLower, bUpper, queueCurrent.QLevel + 1};
struct queueNode<TTree> rightRight =
- {T->right(),Q->right(), QRightIndex, arma::vec(deltaLower),
- arma::vec(deltaUpper), priority, bLower, bUpper};
+ {T->right(),Q->right(), QRightIndex, rightRightLower,
+ rightRightUpper, priority, bLower, bUpper, queueCurrent.QLevel + 1};
nodePriorityQueue.push(leftLeft);
nodePriorityQueue.push(leftRight);
nodePriorityQueue.push(rightLeft);
nodePriorityQueue.push(rightRight);
}
}
+ MADEIT;
return v;
}
@@ -353,11 +436,11 @@
double constraint = delta;
bool enteredTheLoop = false;
/* bring the lower up */
- if (lowerBoundLevelByBandwidth(level,bIndex) > DBL_EPSILON)
+ if (abs(lowerBoundLevelByBandwidth(level,bIndex)) > DBL_EPSILON)
{
- constraint = (upperBoundLevelByBandwidth(level,bIndex) -
+ constraint = abs((upperBoundLevelByBandwidth(level,bIndex) -
lowerBoundLevelByBandwidth(level,bIndex)) /
- lowerBoundLevelByBandwidth(level,bIndex);
+ lowerBoundLevelByBandwidth(level,bIndex));
}
while (constraint < delta && bIndex <= *bUpper)
{
@@ -383,11 +466,11 @@
constraint = delta;
enteredTheLoop = false;
/* bring the lower up */
- if (lowerBoundLevelByBandwidth(level,bIndex) > DBL_EPSILON)
+ if (abs(lowerBoundLevelByBandwidth(level,bIndex)) > DBL_EPSILON)
{
- constraint = (upperBoundLevelByBandwidth(level,bIndex) -
+ constraint = abs((upperBoundLevelByBandwidth(level,bIndex) -
lowerBoundLevelByBandwidth(level,bIndex)) /
- lowerBoundLevelByBandwidth(level,bIndex);
+ lowerBoundLevelByBandwidth(level,bIndex));
}
while (constraint < delta && bIndex >= *bLower)
{
@@ -414,7 +497,7 @@
template<typename TKernel, typename TTree>
void KdeDualTree<TKernel, TTree>::MultiBandwidthDualTreeBase(TTree* Q,
TTree* T, size_t QIndex,
- size_t lowerBIndex, size_t upperBIndex)
+ size_t lowerBIndex, size_t upperBIndex, size_t levelOfQ)
{
size_t sizeOfTNode = T->count();
size_t sizeOfQNode = Q->count();
@@ -449,15 +532,22 @@
upperBoundQPointByBandwidth(q, bIndex) -= sizeOfTNode;
}
}
- size_t levelOfQ = GetLevelOfNode(Q);
for (size_t bIndex = lowerBIndex; bIndex <= upperBIndex; ++bIndex)
{
/* subtract out the current log-likelihood amount for this Q node so we can readjust
* the Q node bounds by current bandwidth */
upperBoundLevelByBandwidth(levelOfQ, bIndex) -=
sizeOfQNode * log(upperBoundQNodeByBandwidth(QIndex, bIndex));
- lowerBoundLevelByBandwidth(levelOfQ, bIndex) -=
- sizeOfQNode * log(lowerBoundQNodeByBandwidth(QIndex, bIndex));
+ if (lowerBoundQNodeByBandwidth(QIndex, bIndex) > DBL_EPSILON)
+ {
+ lowerBoundLevelByBandwidth(levelOfQ, bIndex) -=
+ sizeOfQNode * log(lowerBoundQNodeByBandwidth(QIndex, bIndex));
+ }
+ else
+ {
+ lowerBoundLevelByBandwidth(levelOfQ, bIndex) -=
+ sizeOfQNode * log(DBL_EPSILON);
+ }
arma::vec upperBound = upperBoundQPointByBandwidth.unsafe_col(bIndex);
arma::vec lowerBound = lowerBoundQPointByBandwidth.unsafe_col(bIndex);
double minimumLower = lowerBoundQPointByBandwidth(Q->begin(), bIndex);
@@ -476,11 +566,21 @@
/* adjust Q node bounds, then add the new quantities to the level by bandwidth
* log-likelihood bounds */
lowerBoundQNodeByBandwidth(QIndex, bIndex) = minimumLower;
- upperBoundQNodeByBandwidth(QIndex, bIndex) = maximumUpper - sizeOfTNode;
+ //std::cout << "maximum upper - TNodeSize = " << maximumUpper - sizeOfTNode << "\n";
+ upperBoundQNodeByBandwidth(QIndex, bIndex) = maximumUpper -
+ inverseBandwidths[bIndex] * sizeOfTNode;
upperBoundLevelByBandwidth(levelOfQ, bIndex) +=
sizeOfQNode * log(upperBoundQNodeByBandwidth(QIndex, bIndex));
- lowerBoundLevelByBandwidth(levelOfQ, bIndex) +=
- sizeOfQNode * log(lowerBoundQNodeByBandwidth(QIndex, bIndex));
+ if (lowerBoundQNodeByBandwidth(QIndex, bIndex) > DBL_EPSILON)
+ {
+ lowerBoundLevelByBandwidth(levelOfQ, bIndex) +=
+ sizeOfQNode * log(lowerBoundQNodeByBandwidth(QIndex, bIndex));
+ }
+ else
+ {
+ lowerBoundLevelByBandwidth(levelOfQ, bIndex) +=
+ sizeOfQNode * log(DBL_EPSILON);
+ }
}
}
template<typename TKernel, typename TTree>
More information about the mlpack-svn
mailing list