[mlpack-svn] r17485 - mlpack/trunk/src/mlpack/core/tree/cosine_tree

fastlab-svn at coffeetalk-1.cc.gatech.edu fastlab-svn at coffeetalk-1.cc.gatech.edu
Tue Dec 9 23:10:57 EST 2014


Author: rcurtin
Date: Tue Dec  9 23:10:56 2014
New Revision: 17485

Log:
Fix memory leak, although I'm not sure it's responsible for the i386 failures.


Modified:
   mlpack/trunk/src/mlpack/core/tree/cosine_tree/cosine_tree.cpp
   mlpack/trunk/src/mlpack/core/tree/cosine_tree/cosine_tree.hpp

Modified: mlpack/trunk/src/mlpack/core/tree/cosine_tree/cosine_tree.cpp
==============================================================================
--- mlpack/trunk/src/mlpack/core/tree/cosine_tree/cosine_tree.cpp	(original)
+++ mlpack/trunk/src/mlpack/core/tree/cosine_tree/cosine_tree.cpp	Tue Dec  9 23:10:56 2014
@@ -14,14 +14,14 @@
 CosineTree::CosineTree(const arma::mat& dataset) :
     dataset(dataset),
     parent(NULL),
-    right(NULL),
     left(NULL),
+    right(NULL),
     numColumns(dataset.n_cols)
-{  
+{
   // Initialize sizes of column indices and l2 norms.
   indices.resize(numColumns);
   l2NormsSquared.zeros(numColumns);
-  
+
   // Set indices and calculate squared norms of the columns.
   for(size_t i = 0; i < numColumns; i++)
   {
@@ -29,13 +29,13 @@
     double l2Norm = arma::norm(dataset.col(i), 2);
     l2NormsSquared(i) = l2Norm * l2Norm;
   }
-  
+
   // Frobenius norm of columns in the node.
   frobNormSquared = arma::accu(l2NormsSquared);
-  
+
   // Calculate centroid of columns in the node.
   CalculateCentroid();
-  
+
   splitPointIndex = ColumnSampleLS();
 }
 
@@ -43,27 +43,27 @@
                        const std::vector<size_t>& subIndices) :
     dataset(parentNode.GetDataset()),
     parent(&parentNode),
-    right(NULL),
     left(NULL),
+    right(NULL),
     numColumns(subIndices.size())
 {
   // Initialize sizes of column indices and l2 norms.
   indices.resize(numColumns);
   l2NormsSquared.zeros(numColumns);
-  
+
   // Set indices and squared norms of the columns.
   for(size_t i = 0; i < numColumns; i++)
   {
     indices[i] = parentNode.indices[subIndices[i]];
     l2NormsSquared(i) = parentNode.l2NormsSquared(subIndices[i]);
   }
-  
+
   // Frobenius norm of columns in the node.
   frobNormSquared = arma::accu(l2NormsSquared);
-  
+
   // Calculate centroid of columns in the node.
   CalculateCentroid();
-  
+
   splitPointIndex = ColumnSampleLS();
 }
 
@@ -72,63 +72,73 @@
                        const double delta) :
     dataset(dataset),
     epsilon(epsilon),
-    delta(delta)
+    delta(delta),
+    left(NULL),
+    right(NULL)
 {
   // Declare the cosine tree priority queue.
   CosineNodeQueue treeQueue;
-  
+
   // Define root node of the tree and add it to the queue.
   CosineTree root(dataset);
   arma::vec tempVector = arma::zeros(dataset.n_rows);
   root.L2Error(0);
   root.BasisVector(tempVector);
   treeQueue.push(&root);
-  
+
   // Initialize Monte Carlo error estimate for comparison.
   double monteCarloError = root.FrobNormSquared();
-  
-  while(monteCarloError > epsilon * root.FrobNormSquared())
+
+  while (monteCarloError > epsilon * root.FrobNormSquared())
   {
     // Pop node from queue with highest projection error.
     CosineTree* currentNode;
     currentNode = treeQueue.top();
     treeQueue.pop();
-    
+
     // Split the node into left and right children.
     currentNode->CosineNodeSplit();
-    
+
     // Obtain pointers to the left and right children of the current node.
     CosineTree *currentLeft, *currentRight;
     currentLeft = currentNode->Left();
     currentRight = currentNode->Right();
-    
+
     // Calculate basis vectors of left and right children.
     arma::vec lBasisVector, rBasisVector;
-    
+
     ModifiedGramSchmidt(treeQueue, currentLeft->Centroid(), lBasisVector);
     ModifiedGramSchmidt(treeQueue, currentRight->Centroid(), rBasisVector,
                         &lBasisVector);
-    
+
     // Add basis vectors to their respective nodes.
     currentLeft->BasisVector(lBasisVector);
     currentRight->BasisVector(rBasisVector);
-    
+
     // Calculate Monte Carlo error estimates for child nodes.
     MonteCarloError(currentLeft, treeQueue, &lBasisVector, &rBasisVector);
     MonteCarloError(currentRight, treeQueue, &lBasisVector, &rBasisVector);
-    
+
     // Push child nodes into the priority queue.
     treeQueue.push(currentLeft);
     treeQueue.push(currentRight);
-    
+
     // Calculate Monte Carlo error estimate for the root node.
     monteCarloError = MonteCarloError(&root, treeQueue);
   }
-  
+
   // Construct the subspace basis from the current priority queue.
   ConstructBasis(treeQueue);
 }
 
+CosineTree::~CosineTree()
+{
+  if (left)
+    delete left;
+  if (right)
+    delete right;
+}
+
 void CosineTree::ModifiedGramSchmidt(CosineNodeQueue& treeQueue,
                                      arma::vec& centroid,
                                      arma::vec& newBasisVector,
@@ -146,18 +156,18 @@
   for(; i != treeQueue.end(); i++)
   {
     currentNode = *i;
-    
+
     double projection = arma::dot(currentNode->BasisVector(), centroid);
     newBasisVector -= projection * currentNode->BasisVector();
   }
-  
+
   // If additional basis vector is passed, take it into account.
   if(addBasisVector)
   {
     double projection = arma::dot(*addBasisVector, centroid);
     newBasisVector -= *addBasisVector * projection;
   }
-  
+
   // Normalize the modified centroid vector.
   if(arma::norm(newBasisVector, 2))
     newBasisVector /= arma::norm(newBasisVector, 2);
@@ -170,19 +180,19 @@
 {
   std::vector<size_t> sampledIndices;
   arma::vec probabilities;
-  
+
   // Sample O(log m) points from the input node's distribution.
   // 'm' is the number of columns present in the node.
-  size_t numSamples = log(node->NumColumns()) + 1;  
+  size_t numSamples = log(node->NumColumns()) + 1;
   node->ColumnSamplesLS(sampledIndices, probabilities, numSamples);
-  
+
   // Get pointer to the original dataset.
   arma::mat dataset = node->GetDataset();
-  
+
   // Initialize weighted projection magnitudes as zeros.
   arma::vec weightedMagnitudes;
   weightedMagnitudes.zeros(numSamples);
-  
+
   // Set size of projection vector, depending on whether additional basis
   // vectors are passed.
   size_t projectionSize;
@@ -190,7 +200,7 @@
     projectionSize = treeQueue.size() + 2;
   else
     projectionSize = treeQueue.size();
-  
+
   // For each sample, calculate the weighted projection onto the current basis.
   for(size_t i = 0; i < numSamples; i++)
   {
@@ -200,13 +210,13 @@
 
     CosineTree *currentNode;
     CosineNodeQueue::const_iterator j = treeQueue.begin();
-  
+
     size_t k = 0;
     // Compute the projection of the sampled vector onto the existing subspace.
     for(; j != treeQueue.end(); j++, k++)
     {
       currentNode = *j;
-    
+
       projection(k) = arma::dot(dataset.col(sampledIndices[i]),
                                 currentNode->BasisVector());
     }
@@ -218,33 +228,33 @@
       projection(k) = arma::dot(dataset.col(sampledIndices[i]),
                                 *addBasisVector2);
     }
-    
+
     // Calculate the Frobenius norm squared of the projected vector.
     double frobProjection = arma::norm(projection, "frob");
     double frobProjectionSquared = frobProjection * frobProjection;
-    
+
     // Calculate the weighted projection magnitude.
     weightedMagnitudes(i) = frobProjectionSquared / probabilities(i);
   }
-  
+
   // Compute mean and standard deviation of the weighted samples.
   double mu = arma::mean(weightedMagnitudes);
   double sigma = arma::stddev(weightedMagnitudes);
-  
+
   if(!sigma)
   {
     node->L2Error(node->FrobNormSquared() - mu);
     return (node->FrobNormSquared() - mu);
   }
-  
+
   // Fit a normal distribution using the calculated statistics, and calculate a
   // lower bound on the magnitudes for the passed 'delta' parameter.
   boost::math::normal dist(mu, sigma);
   double lowerBound = boost::math::quantile(dist, delta);
-  
+
   // Upper bound on the subspace reconstruction error.
   node->L2Error(node->FrobNormSquared() - lowerBound);
-  
+
   return (node->FrobNormSquared() - lowerBound);
 }
 
@@ -252,11 +262,11 @@
 {
   // Initialize basis as matrix of zeros.
   basis.zeros(dataset.n_rows, treeQueue.size());
-  
+
   // Variables for iterating through the priority queue.
   CosineTree *currentNode;
   CosineNodeQueue::const_iterator i = treeQueue.begin();
-  
+
   // Transfer basis vectors from the queue to the basis matrix.
   size_t j = 0;
   for(; i != treeQueue.end(); i++, j++)
@@ -270,18 +280,18 @@
 {
   //! If less than two nodes, splitting does not make sense.
   if(numColumns < 3) return;
-  
+
   //! Calculate cosines with respect to the splitting point.
   arma::vec cosines;
   CalculateCosines(cosines);
-  
+
   //! Compute maximum and minimum cosine values.
   double cosineMax, cosineMin;
   cosineMax = arma::max(cosines % (cosines < 1));
   cosineMin = arma::min(cosines);
-  
+
   std::vector<size_t> leftIndices, rightIndices;
-  
+
   // Split columns into left and right children. The splitting condition for the
   // column to be in the left child is as follows:
   //       cos_max - cos(i) <= cos(i) - cos_min
@@ -296,7 +306,7 @@
       rightIndices.push_back(i);
     }
   }
-  
+
   // Split the node into left and right children.
   left = new CosineTree(*this, leftIndices);
   right = new CosineTree(*this, rightIndices);
@@ -309,23 +319,23 @@
   // Initialize the cumulative distribution vector size.
   arma::vec cDistribution;
   cDistribution.zeros(numColumns + 1);
-  
+
   // Calculate cumulative length-squared distribution for the node.
   for(size_t i = 0; i < numColumns; i++)
   {
     cDistribution(i+1) = cDistribution(i) + l2NormsSquared(i) / frobNormSquared;
   }
-  
+
   // Intialize sizes of the 'sampledIndices' and 'probabilities' vectors.
   sampledIndices.resize(numSamples);
   probabilities.zeros(numSamples);
-  
+
   for(size_t i = 0; i < numSamples; i++)
   {
     // Generate a random value for sampling.
     double randValue = arma::randu();
     size_t start = 0, end = numColumns, searchIndex;
-    
+
     // Sample from the distribution and store corresponding probability.
     searchIndex = BinarySearch(cDistribution, randValue, start, end);
     sampledIndices[i] = indices[searchIndex];
@@ -344,17 +354,17 @@
   // Initialize the cumulative distribution vector size.
   arma::vec cDistribution;
   cDistribution.zeros(numColumns + 1);
-  
+
   // Calculate cumulative length-squared distribution for the node.
   for(size_t i = 0; i < numColumns; i++)
   {
     cDistribution(i+1) = cDistribution(i) + l2NormsSquared(i) / frobNormSquared;
   }
-  
+
   // Generate a random value for sampling.
   double randValue = arma::randu();
   size_t start = 0, end = numColumns;
-  
+
   // Sample from the distribution.
   return BinarySearch(cDistribution, randValue, start, end);
 }
@@ -365,13 +375,13 @@
                                 size_t end)
 {
   size_t pivot = (start + end) / 2;
-  
+
   // If pivot is zero, first point is the sampled point.
   if(!pivot)
   {
     return pivot;
   }
-  
+
   // Binary search recursive algorithm.
   if(value > cDistribution(pivot - 1) && value <= cDistribution(pivot))
   {
@@ -391,7 +401,7 @@
 {
   // Initialize cosine vector as a vector of zeros.
   cosines.zeros(numColumns);
-  
+
   for(size_t i = 0; i < numColumns; i++)
   {
     // If norm is zero, store cosine value as zero. Else, calculate cosine value
@@ -412,7 +422,7 @@
 {
   // Initialize centroid as vector of zeros.
   centroid.zeros(dataset.n_rows);
-  
+
   // Calculate centroid of columns in the node.
   for(size_t i = 0; i < numColumns; i++)
   {

Modified: mlpack/trunk/src/mlpack/core/tree/cosine_tree/cosine_tree.hpp
==============================================================================
--- mlpack/trunk/src/mlpack/core/tree/cosine_tree/cosine_tree.hpp	(original)
+++ mlpack/trunk/src/mlpack/core/tree/cosine_tree/cosine_tree.hpp	Tue Dec  9 23:10:56 2014
@@ -4,7 +4,6 @@
  *
  * Definition of Cosine Tree.
  */
- 
 #ifndef __MLPACK_CORE_TREE_COSINE_TREE_COSINE_TREE_HPP
 #define __MLPACK_CORE_TREE_COSINE_TREE_COSINE_TREE_HPP
 
@@ -25,7 +24,6 @@
 class CosineTree
 {
  public:
-      
   /**
    * CosineTree constructor for the root node of the tree. It initializes the
    * necessary variables required for splitting of the node, and building the
@@ -35,7 +33,7 @@
    * @param dataset Matrix for which cosine tree is constructed.
    */
   CosineTree(const arma::mat& dataset);
-  
+
   /**
    * CosineTree constructor for nodes other than the root node of the tree. It
    * takes in a pointer to the parent node and a list of column indices which
@@ -46,7 +44,7 @@
    * @param subIndices Pointer to vector of column indices to be included.
    */
   CosineTree(CosineTree& parentNode, const std::vector<size_t>& subIndices);
- 
+
   /**
    * Construct the CosineTree and the basis for the given matrix, and passed
    * 'epsilon' and 'delta' parameters. The CosineTree is constructed by
@@ -64,7 +62,12 @@
   CosineTree(const arma::mat& dataset,
              const double epsilon,
              const double delta);
-  
+
+  /**
+   * Clean up the CosineTree: release allocated memory (including children).
+   */
+  ~CosineTree();
+
   /**
    * Calculates the orthonormalization of the passed centroid, with respect to
    * the current vector subspace.
@@ -73,12 +76,12 @@
    * @param centroid Centroid of the node being added to the basis.
    * @param newBasisVector Orthonormalized centroid of the node.
    * @param addBasisVector Address to additional basis vector.
-   */                           
+   */
   void ModifiedGramSchmidt(CosineNodeQueue& treeQueue,
                            arma::vec& centroid,
                            arma::vec& newBasisVector,
                            arma::vec* addBasisVector = NULL);
-  
+
   /**
    * Estimates the squared error of the projection of the input node's matrix
    * onto the current vector subspace. A normal distribution is fit using
@@ -90,35 +93,35 @@
    * @param treeQueue Priority queue of cosine nodes.
    * @param addBasisVector1 Address to first additional basis vector.
    * @param addBasisVector2 Address to second additional basis vector.
-   */                         
+   */
   double MonteCarloError(CosineTree* node,
                          CosineNodeQueue& treeQueue,
                          arma::vec* addBasisVector1 = NULL,
                          arma::vec* addBasisVector2 = NULL);
-  
+
   /**
    * Constructs the final basis matrix, after the cosine tree construction.
    *
    * @param treeQueue Priority queue of cosine nodes.
-   */                       
+   */
   void ConstructBasis(CosineNodeQueue& treeQueue);
-                         
+
   /**
    * This function splits the cosine node into two children based on the cosines
    * of the columns contained in the node, with respect to the sampled splitting
    * point. The function also calls the CosineTree constructor for the children.
    */
   void CosineNodeSplit();
-                         
+
   /**
    * Sample 'numSamples' points from the Length-Squared distribution of the
    * cosine node. The function uses 'l2NormsSquared' to calculate the cumulative
    * probability distribution of the column vectors. The sampling is based on a
    * randomly generated values in the range [0, 1].
    */
-  void ColumnSamplesLS(std::vector<size_t>& sampledIndices, 
+  void ColumnSamplesLS(std::vector<size_t>& sampledIndices,
                        arma::vec& probabilities, size_t numSamples);
-  
+
   /**
    * Sample a point from the Length-Squared distribution of the cosine node. The
    * function uses 'l2NormsSquared' to calculate the cumulative probability
@@ -126,7 +129,7 @@
    * generated value in the range [0, 1].
    */
   size_t ColumnSampleLS();
-  
+
   /**
    * Sample a column based on the cumulative Length-Squared distribution of the
    * cosine node, and a randomly generated value in the range [0, 1]. Binary
@@ -141,7 +144,7 @@
    */
   size_t BinarySearch(arma::vec& cDistribution, double value, size_t start,
                       size_t end);
-  
+
   /**
    * Calculate cosines of the columns present in the node, with respect to the
    * sampled splitting point. The calculated cosine values are useful for
@@ -150,52 +153,52 @@
    * @param cosines Vector to store the cosine values in.
    */
   void CalculateCosines(arma::vec& cosines);
-  
+
   /**
    * Calculate centroid of the columns present in the node. The calculated
    * centroid is used as a basis vector for the cosine tree being constructed.
    */
   void CalculateCentroid();
-  
+
   //! Returns the basis of the constructed subspace.
   void GetFinalBasis(arma::mat& finalBasis) { finalBasis = basis; }
-  
+
   //! Get pointer to the dataset matrix.
   const arma::mat& GetDataset() const { return dataset; }
-  
+
   //! Get the indices of columns in the node.
   std::vector<size_t>& VectorIndices() { return indices; }
-  
+
   //! Set the Monte Carlo error.
   void L2Error(const double error) { this->l2Error = error; }
-  
+
   //! Get the Monte Carlo error.
   double L2Error() const { return l2Error; }
-  
+
   //! Get pointer to the centroid vector.
   arma::vec& Centroid() { return centroid; }
-  
+
   //! Set the basis vector of the node.
   void BasisVector(arma::vec& bVector) { this->basisVector = bVector; }
-  
+
   //! Get the basis vector of the node.
   arma::vec& BasisVector() { return basisVector; }
-  
+
   //! Get pointer to the left child of the node.
   CosineTree* Left() { return left; }
-  
+
   //! Get pointer to the right child of the node.
   CosineTree* Right() { return right; }
-  
+
   //! Get number of columns of input matrix in the node.
   size_t NumColumns() const { return numColumns; }
-  
+
   //! Get the Frobenius norm squared of columns in the node.
   double FrobNormSquared() const { return frobNormSquared; }
-  
+
   //! Get the column index of split point of the node.
   size_t SplitPointIndex() const { return indices[splitPointIndex]; }
-  
+
  private:
   //! Matrix for which cosine tree is constructed.
   const arma::mat& dataset;
@@ -207,10 +210,10 @@
   arma::mat basis;
   //! Parent of the node.
   CosineTree* parent;
-  //! Right child of the node.
-  CosineTree* right;
   //! Left child of the node.
   CosineTree* left;
+  //! Right child of the node.
+  CosineTree* right;
   //! Indices of columns of input matrix in the node.
   std::vector<size_t> indices;
   //! L2-norm squared of columns in the node.
@@ -232,7 +235,7 @@
 class CompareCosineNode
 {
  public:
- 
+
   // Comparison function for construction of priority queue.
   bool operator() (const CosineTree* a, const CosineTree* b) const
   {



More information about the mlpack-svn mailing list