[mlpack-svn] r15800 - in mlpack/trunk/src/mlpack/core/tree: . cosine_tree

fastlab-svn at coffeetalk-1.cc.gatech.edu fastlab-svn at coffeetalk-1.cc.gatech.edu
Tue Sep 17 15:07:54 EDT 2013


Author: muditrajgupta
Date: Tue Sep 17 15:07:54 2013
New Revision: 15800

Log:
Adding Cosine Trees

Added:
   mlpack/trunk/src/mlpack/core/tree/cosine_tree/
   mlpack/trunk/src/mlpack/core/tree/cosine_tree/cosine_tree.hpp
   mlpack/trunk/src/mlpack/core/tree/cosine_tree/cosine_tree_builder.hpp
   mlpack/trunk/src/mlpack/core/tree/cosine_tree/cosine_tree_builder_impl.hpp
   mlpack/trunk/src/mlpack/core/tree/cosine_tree/cosine_tree_impl.hpp
Modified:
   mlpack/trunk/src/mlpack/core/tree/CMakeLists.txt

Modified: mlpack/trunk/src/mlpack/core/tree/CMakeLists.txt
==============================================================================
--- mlpack/trunk/src/mlpack/core/tree/CMakeLists.txt	(original)
+++ mlpack/trunk/src/mlpack/core/tree/CMakeLists.txt	Tue Sep 17 15:07:54 2013
@@ -11,6 +11,10 @@
   binary_space_tree/single_tree_traverser_impl.hpp
   binary_space_tree/traits.hpp
   bounds.hpp
+  cosine_tree/cosine_tree_impl.hpp
+  cosine_tree/cosine_tree.hpp
+  cosine_tree/cosine_tree_builder.hpp
+  cosine_tree/cosine_tree_builder_impl.hpp
   cover_tree/cover_tree.hpp
   cover_tree/cover_tree_impl.hpp
   cover_tree/first_point_is_root.hpp

Added: mlpack/trunk/src/mlpack/core/tree/cosine_tree/cosine_tree.hpp
==============================================================================
--- (empty file)
+++ mlpack/trunk/src/mlpack/core/tree/cosine_tree/cosine_tree.hpp	Tue Sep 17 15:07:54 2013
@@ -0,0 +1,112 @@
+/**
+ * @file cosine_tree.hpp
+ * @author Mudit Raj Gupta
+ *
+ * Definition of Cosine Tree.
+ */
+ 
+#ifndef __MLPACK_CORE_TREE_COSINE_TREE_COSINE_TREE_HPP
+#define __MLPACK_CORE_TREE_COSINE_TREE_COSINE_TREE_HPP
+
+#include <mlpack/core.hpp>
+
+namespace mlpack {
+namespace tree /** Cosine Trees and building procedures. */ {
+
+/**
+ */
+//template<typename MatType = arma::mat>
+
+class CosineTree
+{
+ private:
+  //! Data. 
+  arma::mat data;
+  //! Centroid.
+  arma::rowvec centroid;
+  //! Sampling Probabilities
+  arma::vec probabilities;
+  //! The left child node.
+  CosineTree* left;
+  //! The right child node.
+  CosineTree* right;
+  //! Number of points in the node.
+  size_t numPoints;
+  
+ public:
+  //! So other classes can use TreeType::Mat.
+  //typedef MatType Mat;
+  /**
+   * Constructor 
+   * 
+   * @param data Dataset to create tree from. 
+   * @param centroid Centroid of the matrix.
+   * @param probabilities Sampling probabilities
+   */
+  CosineTree(arma::mat& data, arma::vec centroid, arma::vec probabilities);
+
+  /**
+   * Create an empty tree node.
+   */
+  CosineTree();
+
+  /**
+   * Deletes this node, deallocating the memory for the children and calling
+   * their destructors in turn.  This will invalidate any pointers or references
+   * to any nodes which are children of this one.
+   */
+  ~CosineTree();
+
+  //! Gets the left child of this node.
+  CosineTree* Left() const;
+  
+  //!Sets the Left child of this node.
+  void Left(CosineTree* child);
+
+  //! Gets the right child of this node.
+  CosineTree* Right() const;
+
+  //!Sets the Right child of this node.
+  void Right(CosineTree* child);
+
+  /**
+   * Return the specified child (0 will be left, 1 will be right).  If the index
+   * is greater than 1, this will return the right child.
+   *
+   * @param child Index of child to return.
+   */
+  CosineTree& Child(const size_t child) const;
+
+  //! Return the number of points in this node (0 if not a leaf).
+  size_t NumPoints() const;
+
+  //! Sets the number of points in this node (0 if not a leaf).
+  void NumPoints(size_t n);
+
+  //! Returns a reference to the data
+  arma::mat Data();
+  
+  //! Sets a reference to the data
+  void Data(arma::mat& d);
+  
+  //! Returns a reference to Sample Probabilites
+  arma::vec Probabilities();
+  
+  //! Sets a reference to Sample Probabilites
+  void Probabilities(arma::vec& prob);
+  
+  //! Returns a reference to the centroid
+  arma::rowvec Centroid();  
+  
+  //! Sets the centroid
+  void Centroid(arma::rowvec& centr);  
+   
+};
+
+}; // namespace tree
+}; // namespace mlpack
+
+// Include implementation.
+#include "cosine_tree_impl.hpp"
+
+#endif

Added: mlpack/trunk/src/mlpack/core/tree/cosine_tree/cosine_tree_builder.hpp
==============================================================================
--- (empty file)
+++ mlpack/trunk/src/mlpack/core/tree/cosine_tree/cosine_tree_builder.hpp	Tue Sep 17 15:07:54 2013
@@ -0,0 +1,123 @@
+/**
+ * @file cosine_tree_builder.hpp
+ * @author Mudit Raj Gupta
+ *
+ * Helper class to build the cosine tree
+ */
+#ifndef __MLPACK_CORE_TREE_COSINE_TREE_COSINE_TREE_BUILDER_HPP
+#define __MLPACK_CORE_TREE_COSINE_TREE_COSINE_TREE_BUILDER_HPP
+
+#include <mlpack/core.hpp>
+#include "cosine_tree.hpp"
+
+using namespace mlpack::tree;
+
+namespace mlpack {
+namespace tree /** tree-building procedures. */ {
+
+class CosineTreeBuilder
+{
+ private:
+  /**
+   * Length Square Sampling method for sampling rows
+   * of the matrix
+   *
+   * @param A Matrix for which probabilities are calculated
+   * @param prob Reference to the probability vector
+   */
+  void LSSampling(arma::mat A, arma::vec& prob); 
+
+  /**
+   * Calculates the centroid of the matrix
+   *
+   * @param A Matrix for which the centroid has to be calculated
+   */
+  arma::rowvec CalculateCentroid(arma::mat A) const;
+
+  /**
+   * Calculates the Cosine Similarity between two vector
+   * 
+   * @param A Vector 1
+   * @param B Vector 2
+   */  
+  double CosineSimilarity(arma::vec A, arma::vec B);
+
+  /**
+   * Return the Euclidean Norm of the vectoe
+   * 
+   * @param A Vector for which Euclidean Norm has to be calculated
+   */
+  double EuclideanNorm(arma::vec A);
+  
+  /**
+   * Calculates the Pivot for splitting
+   * 
+   * @param prob Probability for a point to act as the pivot
+   */
+  size_t GetPivot(arma::vec prob);
+  
+  /**
+   * Splits the points into the root node into children nodes
+   *
+   * @param c Array of Cosin Similarities
+   * @param ALeft Matrix for storing the points in Left Node
+   * @param ARight Matrix for storing the points in Right Node
+   * @param A All points
+   */
+  void SplitData(std::vector<double> c, arma::mat& ALeft, 
+                 arma::mat& Aright, arma::mat A);
+
+  /**
+   * Creates Cosine Similarity Array
+   *
+   * @param c Array of Cosine Similarity
+   * @param A All points
+   * @param pivot pivot point
+   */
+  void CreateCosineSimilarityArray(std::vector<double>& c, 
+                                   arma::mat A, size_t pivot);
+  
+  /**
+   * Calculates Maximum Cosine Similarity
+   *
+   * @param c Array of Cosine Similarities
+   */
+  double GetMaxSimilarity(std::vector<double> c);
+   
+  /**
+   * Calculates Maximum Cosine Similarity
+   *
+   * @param c Array of Cosine Similarities
+   */
+  double GetMinSimilarity(std::vector<double> c);
+ 
+ public:
+  //! Empty Constructor
+  CosineTreeBuilder();
+  //! Destructor
+  ~CosineTreeBuilder();
+ 
+  /** 
+   * Creates a new cosine tree node 
+   *
+   * @param A Data for constructing the node
+   * @param root Reference to the constructed node
+   */
+  void CTNode(arma::mat A, CosineTree& root);
+
+  /**
+   * Splits a cosine tree node
+   * 
+   * @param root Node to be split
+   * @param right reference to the right child
+   * @param left reference to the left child
+   */
+  void CTNodeSplit(CosineTree& root, CosineTree& left, CosineTree& right);
+};
+}; // namespace tree
+}; // namespace mlpack
+
+// Include implementation.
+#include "cosine_tree_builder_impl.hpp"
+
+#endif

Added: mlpack/trunk/src/mlpack/core/tree/cosine_tree/cosine_tree_builder_impl.hpp
==============================================================================
--- (empty file)
+++ mlpack/trunk/src/mlpack/core/tree/cosine_tree/cosine_tree_builder_impl.hpp	Tue Sep 17 15:07:54 2013
@@ -0,0 +1,188 @@
+/**
+ * @file cosine_tree_builder_impl.hpp
+ * @author Mudit Raj Gupta
+ *
+ * Implementation of cosine tree builder.
+ */
+
+#ifndef __MLPACK_CORE_TREE_COSINE_TREE_COSINE_TREE_BUILDER_IMPL_HPP
+#define __MLPACK_CORE_TREE_COSINE_TREE_COSINE_TREE_BUILDER_IMPL_HPP
+
+#include "cosine_tree_builder.hpp"
+
+namespace mlpack {
+namespace tree {
+
+// Empty Constructor
+CosineTreeBuilder::CosineTreeBuilder() 
+{
+  Log::Info<<"Constructor"<<std::endl;
+}
+
+// Destructor
+CosineTreeBuilder::~CosineTreeBuilder() {}
+
+void CosineTreeBuilder::LSSampling(arma::mat A, arma::vec& probability)
+{
+  Log::Info<<"LSSampling"<<std::endl;
+  //Saving the frobenious norm of the matrix
+  double normA = arma::norm(A,"fro");
+  //Calculating probability of each point to be sampled
+  for (size_t i=0;i<A.n_rows;i++)
+    probability(i) = arma::norm(A.row(i),"fro")/normA;
+}
+
+double CosineTreeBuilder::EuclideanNorm(arma::vec A)
+{
+   Log::Info<<"EuclideanNorm"<<std::endl;
+   //Calculating the Euclidean Norm
+   return sqrt(arma::sum(arma::square(A)));
+}
+
+double CosineTreeBuilder::CosineSimilarity(arma::vec A, arma::vec B)
+{
+  Log::Info<<"CosineSimilarity"<<std::endl;
+  //Value of the cosine
+  double value = arma::dot(A,B)/(EuclideanNorm(A)*EuclideanNorm(B));
+  //Stripping to account for floating point error
+  if (value > 1.0)
+    value = 1.0;
+  if (value < -1.0)
+    value = -1.0;
+  //Calculating angle
+  return acos(value);
+}
+
+arma::rowvec CosineTreeBuilder::CalculateCentroid(arma::mat A) const
+{
+  Log::Info<<"CalculateCentroid"<<std::endl;
+  //Summing over all coloumns
+  arma::rowvec colsum = arma::sum(A,0);
+  //Averaging
+  double k = 1.0/A.n_rows;
+  return k*colsum;
+}
+
+void CosineTreeBuilder::CTNode(arma::mat A, CosineTree& root)
+{
+  Log::Info<<"CTNode"<<std::endl;
+  //Calculating Centroid
+  arma::rowvec centroid = CalculateCentroid(A);
+  //Calculating sampling probabilities
+  arma::vec probabilities = arma::zeros<arma::vec>(A.n_rows,1);
+  LSSampling(A,probabilities);
+  //Setting Values
+  root.Probabilities(probabilities);
+  root.Data(A);
+  root.Centroid(centroid);
+  root.Left(NULL);
+  root.Right(NULL);
+  root.NumPoints(A.n_rows);
+}
+
+size_t CosineTreeBuilder::GetPivot(arma::vec prob)
+{
+  Log::Info<<"GetPivot"<<std::endl;
+  //Setting firtst value as the pivot
+  double maxPivot=prob(0,0); 
+  size_t pivot=0;
+ 
+  //Searching for the pivot poitn
+  for (size_t i=0;i<prob.n_rows;i++)
+  {
+    if(prob(i,0) > maxPivot)
+    {
+      maxPivot = prob(i,0);
+      pivot = i;
+    }
+  }
+  return pivot;
+}
+
+void CosineTreeBuilder::SplitData(std::vector<double> c, arma::mat& ALeft, 
+                                  arma::mat& ARight,arma::mat A)
+{
+  Log::Info<<"SplitData"<<std::endl;
+  double cMax,cMin;
+  //Calculating the lower and the Upper Limit
+  cMin = GetMaxSimilarity(c);
+  cMax = GetMinSimilarity(c);
+  //Couter for left and right
+  size_t lft, rgt;
+  lft = 0; 
+  rgt = 0;
+  //Splitting on the basis of nearness to the the high or low value
+  for(size_t i=0;i<A.n_rows;i++)
+  {
+    if ((cMax - c[i])<=(c[i] - cMin))
+    {
+      ALeft.insert_rows(lft,A.row(i));
+      lft ++;
+    }
+    else
+    {
+      ARight.insert_rows(rgt,A.row(i));
+      rgt ++;
+    }
+  }
+}
+
+void CosineTreeBuilder::CreateCosineSimilarityArray(std::vector<double>& c, 
+                                                    arma::mat A, size_t pivot)
+{
+  Log::Info<<"CreateCosineSimilarityArray"<<std::endl;
+  for(size_t i=0;i<A.n_rows;i++)
+    c.push_back(CosineSimilarity(A.row(pivot).t(),A.row(i).t()));
+}
+double CosineTreeBuilder::GetMinSimilarity(std::vector<double> c)
+{
+  Log::Info<<"GetMinSimilarity"<<std::endl;
+  double cMin = c[0];
+  for(size_t i=1;i<c.size();i++)
+    if(cMin<c[i])  
+      cMin = c[i];
+  return cMin;
+}
+double CosineTreeBuilder::GetMaxSimilarity(std::vector<double> c)
+{
+  Log::Info<<"GetMaxSimilarity"<<std::endl;
+  double cMax = c[0];
+  for(size_t i=1;i<c.size();i++)
+    if(cMax<c[i])  
+      cMax = c[i];
+  return cMax;
+}
+void CosineTreeBuilder::CTNodeSplit(CosineTree& root, CosineTree& left, 
+                                    CosineTree& right)
+{
+  //Extracting points from the root
+  arma::mat A = root.Data();
+  //Cosine Similarity Array
+  std::vector<double> c;
+  //Matrices holding points for the left and the right node
+  arma::mat ALeft, ARight;
+  //Sampling probabilities
+  arma::vec prob = root.Probabilities();
+  //Pivot
+  size_t pivot = GetPivot(prob);
+  //Creating Array
+  CreateCosineSimilarityArray(c,A,pivot);
+  //Splitting data points
+  SplitData(c,ALeft,ARight,A);
+  //Creating Nodes
+  if(ALeft.n_rows > 0)
+  {
+    CTNode(ALeft,left);
+    root.Left(left);
+  }
+  if(ARight.n_rows > 0)
+  {
+    CTNode(ARight,right);
+    root.Right(right);
+  }
+}
+
+}; // namespace cosinetreebuilder
+}; // namespace mlpack
+
+#endif

Added: mlpack/trunk/src/mlpack/core/tree/cosine_tree/cosine_tree_impl.hpp
==============================================================================
--- (empty file)
+++ mlpack/trunk/src/mlpack/core/tree/cosine_tree/cosine_tree_impl.hpp	Tue Sep 17 15:07:54 2013
@@ -0,0 +1,112 @@
+/**
+ * @file cosine_tree_impl.hpp
+ * @author Mudit Raj Gupta
+ *
+ * Implementation of cosine tree.
+ */
+#ifndef __MLPACK_CORE_TREE_COSINE_TREE_COSINE_TREE_IMPL_HPP
+#define __MLPACK_CORE_TREE_COSINE_TREE_COSINE_TREE_IMPL_HPP
+
+#include "cosine_tree.hpp"
+
+namespace mlpack {
+namespace tree {
+
+CosineTree::CosineTree(arma::mat& data, arma::vec centroid, arma::vec probabilities) : 
+    left(NULL),
+    right(NULL),
+    data(data),
+    centroid(centroid),
+    numPoints(data.n_cols),
+    probabilities(probabilities)
+{ 
+  // Nothing to do
+}
+
+CosineTree::CosineTree() : 
+    left(NULL),
+    right(NULL)
+{   
+  // Nothing to do
+}
+
+CosineTree::~CosineTree()
+{
+  if (left)
+    delete left;
+  if (right)
+    delete right;
+}
+
+CosineTree* CosineTree::Right() const
+{
+  return right;
+}
+
+void CosineTree::Right(CosineTree* child)
+{
+  right = child;
+}
+
+CosineTree* CosineTree::Left() const
+{
+  return left;
+}
+
+void CosineTree::Left(CosineTree* child)
+{
+  left = child;
+}
+
+CosineTree& CosineTree::Child(const size_t child) const
+{
+  if (child == 0)
+    return *left;
+  else
+    return *right;
+}
+
+size_t CosineTree::NumPoints() const
+{
+  return numPoints;
+}
+
+void CosineTree::NumPoints(size_t n)
+{
+  numPoints = n;
+}
+
+arma::mat CosineTree::Data()
+{
+  return data;
+}
+
+void CosineTree::Data(arma::mat& d)
+{
+    data = d;
+}
+
+arma::vec CosineTree::Probabilities() 
+{
+  return probabilities;
+}
+
+void CosineTree::Probabilities(arma::vec& prob)
+{
+  probabilities = prob; 
+}
+
+arma::rowvec CosineTree::Centroid()
+{
+  return centroid;
+}
+
+void CosineTree::Centroid(arma::rowvec& centr)
+{
+    centroid = centr;
+}
+
+}; // namespace tree
+}; // namespace mlpack
+
+#endif



More information about the mlpack-svn mailing list