[mlpack-svn] r15800 - in mlpack/trunk/src/mlpack/core/tree: . cosine_tree
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Tue Sep 17 15:07:54 EDT 2013
Author: muditrajgupta
Date: Tue Sep 17 15:07:54 2013
New Revision: 15800
Log:
Adding Cosine Trees
Added:
mlpack/trunk/src/mlpack/core/tree/cosine_tree/
mlpack/trunk/src/mlpack/core/tree/cosine_tree/cosine_tree.hpp
mlpack/trunk/src/mlpack/core/tree/cosine_tree/cosine_tree_builder.hpp
mlpack/trunk/src/mlpack/core/tree/cosine_tree/cosine_tree_builder_impl.hpp
mlpack/trunk/src/mlpack/core/tree/cosine_tree/cosine_tree_impl.hpp
Modified:
mlpack/trunk/src/mlpack/core/tree/CMakeLists.txt
Modified: mlpack/trunk/src/mlpack/core/tree/CMakeLists.txt
==============================================================================
--- mlpack/trunk/src/mlpack/core/tree/CMakeLists.txt (original)
+++ mlpack/trunk/src/mlpack/core/tree/CMakeLists.txt Tue Sep 17 15:07:54 2013
@@ -11,6 +11,10 @@
binary_space_tree/single_tree_traverser_impl.hpp
binary_space_tree/traits.hpp
bounds.hpp
+ cosine_tree/cosine_tree_impl.hpp
+ cosine_tree/cosine_tree.hpp
+ cosine_tree/cosine_tree_builder.hpp
+ cosine_tree/cosine_tree_builder_impl.hpp
cover_tree/cover_tree.hpp
cover_tree/cover_tree_impl.hpp
cover_tree/first_point_is_root.hpp
Added: mlpack/trunk/src/mlpack/core/tree/cosine_tree/cosine_tree.hpp
==============================================================================
--- (empty file)
+++ mlpack/trunk/src/mlpack/core/tree/cosine_tree/cosine_tree.hpp Tue Sep 17 15:07:54 2013
@@ -0,0 +1,112 @@
+/**
+ * @file cosine_tree.hpp
+ * @author Mudit Raj Gupta
+ *
+ * Definition of Cosine Tree.
+ */
+
+#ifndef __MLPACK_CORE_TREE_COSINE_TREE_COSINE_TREE_HPP
+#define __MLPACK_CORE_TREE_COSINE_TREE_COSINE_TREE_HPP
+
+#include <mlpack/core.hpp>
+
+namespace mlpack {
+namespace tree /** Cosine Trees and building procedures. */ {
+
+/**
+ */
+//template<typename MatType = arma::mat>
+
+class CosineTree
+{
+ private:
+ //! Data.
+ arma::mat data;
+ //! Centroid.
+ arma::rowvec centroid;
+ //! Sampling Probabilities
+ arma::vec probabilities;
+ //! The left child node.
+ CosineTree* left;
+ //! The right child node.
+ CosineTree* right;
+ //! Number of points in the node.
+ size_t numPoints;
+
+ public:
+ //! So other classes can use TreeType::Mat.
+ //typedef MatType Mat;
+ /**
+ * Constructor
+ *
+ * @param data Dataset to create tree from.
+ * @param centroid Centroid of the matrix.
+ * @param probabilities Sampling probabilities
+ */
+ CosineTree(arma::mat& data, arma::vec centroid, arma::vec probabilities);
+
+ /**
+ * Create an empty tree node.
+ */
+ CosineTree();
+
+ /**
+ * Deletes this node, deallocating the memory for the children and calling
+ * their destructors in turn. This will invalidate any pointers or references
+ * to any nodes which are children of this one.
+ */
+ ~CosineTree();
+
+ //! Gets the left child of this node.
+ CosineTree* Left() const;
+
+ //!Sets the Left child of this node.
+ void Left(CosineTree* child);
+
+ //! Gets the right child of this node.
+ CosineTree* Right() const;
+
+ //!Sets the Right child of this node.
+ void Right(CosineTree* child);
+
+ /**
+ * Return the specified child (0 will be left, 1 will be right). If the index
+ * is greater than 1, this will return the right child.
+ *
+ * @param child Index of child to return.
+ */
+ CosineTree& Child(const size_t child) const;
+
+ //! Return the number of points in this node (0 if not a leaf).
+ size_t NumPoints() const;
+
+ //! Sets the number of points in this node (0 if not a leaf).
+ void NumPoints(size_t n);
+
+ //! Returns a reference to the data
+ arma::mat Data();
+
+ //! Sets a reference to the data
+ void Data(arma::mat& d);
+
+ //! Returns a reference to Sample Probabilites
+ arma::vec Probabilities();
+
+ //! Sets a reference to Sample Probabilites
+ void Probabilities(arma::vec& prob);
+
+ //! Returns a reference to the centroid
+ arma::rowvec Centroid();
+
+ //! Sets the centroid
+ void Centroid(arma::rowvec& centr);
+
+};
+
+}; // namespace tree
+}; // namespace mlpack
+
+// Include implementation.
+#include "cosine_tree_impl.hpp"
+
+#endif
Added: mlpack/trunk/src/mlpack/core/tree/cosine_tree/cosine_tree_builder.hpp
==============================================================================
--- (empty file)
+++ mlpack/trunk/src/mlpack/core/tree/cosine_tree/cosine_tree_builder.hpp Tue Sep 17 15:07:54 2013
@@ -0,0 +1,123 @@
+/**
+ * @file cosine_tree_builder.hpp
+ * @author Mudit Raj Gupta
+ *
+ * Helper class to build the cosine tree
+ */
+#ifndef __MLPACK_CORE_TREE_COSINE_TREE_COSINE_TREE_BUILDER_HPP
+#define __MLPACK_CORE_TREE_COSINE_TREE_COSINE_TREE_BUILDER_HPP
+
+#include <mlpack/core.hpp>
+#include "cosine_tree.hpp"
+
+using namespace mlpack::tree;
+
+namespace mlpack {
+namespace tree /** tree-building procedures. */ {
+
+class CosineTreeBuilder
+{
+ private:
+ /**
+ * Length Square Sampling method for sampling rows
+ * of the matrix
+ *
+ * @param A Matrix for which probabilities are calculated
+ * @param prob Reference to the probability vector
+ */
+ void LSSampling(arma::mat A, arma::vec& prob);
+
+ /**
+ * Calculates the centroid of the matrix
+ *
+ * @param A Matrix for which the centroid has to be calculated
+ */
+ arma::rowvec CalculateCentroid(arma::mat A) const;
+
+ /**
+ * Calculates the Cosine Similarity between two vector
+ *
+ * @param A Vector 1
+ * @param B Vector 2
+ */
+ double CosineSimilarity(arma::vec A, arma::vec B);
+
+ /**
+ * Return the Euclidean Norm of the vectoe
+ *
+ * @param A Vector for which Euclidean Norm has to be calculated
+ */
+ double EuclideanNorm(arma::vec A);
+
+ /**
+ * Calculates the Pivot for splitting
+ *
+ * @param prob Probability for a point to act as the pivot
+ */
+ size_t GetPivot(arma::vec prob);
+
+ /**
+ * Splits the points into the root node into children nodes
+ *
+ * @param c Array of Cosin Similarities
+ * @param ALeft Matrix for storing the points in Left Node
+ * @param ARight Matrix for storing the points in Right Node
+ * @param A All points
+ */
+ void SplitData(std::vector<double> c, arma::mat& ALeft,
+ arma::mat& Aright, arma::mat A);
+
+ /**
+ * Creates Cosine Similarity Array
+ *
+ * @param c Array of Cosine Similarity
+ * @param A All points
+ * @param pivot pivot point
+ */
+ void CreateCosineSimilarityArray(std::vector<double>& c,
+ arma::mat A, size_t pivot);
+
+ /**
+ * Calculates Maximum Cosine Similarity
+ *
+ * @param c Array of Cosine Similarities
+ */
+ double GetMaxSimilarity(std::vector<double> c);
+
+ /**
+ * Calculates Maximum Cosine Similarity
+ *
+ * @param c Array of Cosine Similarities
+ */
+ double GetMinSimilarity(std::vector<double> c);
+
+ public:
+ //! Empty Constructor
+ CosineTreeBuilder();
+ //! Destructor
+ ~CosineTreeBuilder();
+
+ /**
+ * Creates a new cosine tree node
+ *
+ * @param A Data for constructing the node
+ * @param root Reference to the constructed node
+ */
+ void CTNode(arma::mat A, CosineTree& root);
+
+ /**
+ * Splits a cosine tree node
+ *
+ * @param root Node to be split
+ * @param right reference to the right child
+ * @param left reference to the left child
+ */
+ void CTNodeSplit(CosineTree& root, CosineTree& left, CosineTree& right);
+};
+}; // namespace tree
+}; // namespace mlpack
+
+// Include implementation.
+#include "cosine_tree_builder_impl.hpp"
+
+#endif
Added: mlpack/trunk/src/mlpack/core/tree/cosine_tree/cosine_tree_builder_impl.hpp
==============================================================================
--- (empty file)
+++ mlpack/trunk/src/mlpack/core/tree/cosine_tree/cosine_tree_builder_impl.hpp Tue Sep 17 15:07:54 2013
@@ -0,0 +1,188 @@
+/**
+ * @file cosine_tree_builder_impl.hpp
+ * @author Mudit Raj Gupta
+ *
+ * Implementation of cosine tree builder.
+ */
+
+#ifndef __MLPACK_CORE_TREE_COSINE_TREE_COSINE_TREE_BUILDER_IMPL_HPP
+#define __MLPACK_CORE_TREE_COSINE_TREE_COSINE_TREE_BUILDER_IMPL_HPP
+
+#include "cosine_tree_builder.hpp"
+
+namespace mlpack {
+namespace tree {
+
+// Empty Constructor
+CosineTreeBuilder::CosineTreeBuilder()
+{
+ Log::Info<<"Constructor"<<std::endl;
+}
+
+// Destructor
+CosineTreeBuilder::~CosineTreeBuilder() {}
+
+void CosineTreeBuilder::LSSampling(arma::mat A, arma::vec& probability)
+{
+ Log::Info<<"LSSampling"<<std::endl;
+ //Saving the frobenious norm of the matrix
+ double normA = arma::norm(A,"fro");
+ //Calculating probability of each point to be sampled
+ for (size_t i=0;i<A.n_rows;i++)
+ probability(i) = arma::norm(A.row(i),"fro")/normA;
+}
+
+double CosineTreeBuilder::EuclideanNorm(arma::vec A)
+{
+ Log::Info<<"EuclideanNorm"<<std::endl;
+ //Calculating the Euclidean Norm
+ return sqrt(arma::sum(arma::square(A)));
+}
+
+double CosineTreeBuilder::CosineSimilarity(arma::vec A, arma::vec B)
+{
+ Log::Info<<"CosineSimilarity"<<std::endl;
+ //Value of the cosine
+ double value = arma::dot(A,B)/(EuclideanNorm(A)*EuclideanNorm(B));
+ //Stripping to account for floating point error
+ if (value > 1.0)
+ value = 1.0;
+ if (value < -1.0)
+ value = -1.0;
+ //Calculating angle
+ return acos(value);
+}
+
+arma::rowvec CosineTreeBuilder::CalculateCentroid(arma::mat A) const
+{
+ Log::Info<<"CalculateCentroid"<<std::endl;
+ //Summing over all coloumns
+ arma::rowvec colsum = arma::sum(A,0);
+ //Averaging
+ double k = 1.0/A.n_rows;
+ return k*colsum;
+}
+
+void CosineTreeBuilder::CTNode(arma::mat A, CosineTree& root)
+{
+ Log::Info<<"CTNode"<<std::endl;
+ //Calculating Centroid
+ arma::rowvec centroid = CalculateCentroid(A);
+ //Calculating sampling probabilities
+ arma::vec probabilities = arma::zeros<arma::vec>(A.n_rows,1);
+ LSSampling(A,probabilities);
+ //Setting Values
+ root.Probabilities(probabilities);
+ root.Data(A);
+ root.Centroid(centroid);
+ root.Left(NULL);
+ root.Right(NULL);
+ root.NumPoints(A.n_rows);
+}
+
+size_t CosineTreeBuilder::GetPivot(arma::vec prob)
+{
+ Log::Info<<"GetPivot"<<std::endl;
+ //Setting firtst value as the pivot
+ double maxPivot=prob(0,0);
+ size_t pivot=0;
+
+ //Searching for the pivot poitn
+ for (size_t i=0;i<prob.n_rows;i++)
+ {
+ if(prob(i,0) > maxPivot)
+ {
+ maxPivot = prob(i,0);
+ pivot = i;
+ }
+ }
+ return pivot;
+}
+
+void CosineTreeBuilder::SplitData(std::vector<double> c, arma::mat& ALeft,
+ arma::mat& ARight,arma::mat A)
+{
+ Log::Info<<"SplitData"<<std::endl;
+ double cMax,cMin;
+ //Calculating the lower and the Upper Limit
+ cMin = GetMaxSimilarity(c);
+ cMax = GetMinSimilarity(c);
+ //Couter for left and right
+ size_t lft, rgt;
+ lft = 0;
+ rgt = 0;
+ //Splitting on the basis of nearness to the the high or low value
+ for(size_t i=0;i<A.n_rows;i++)
+ {
+ if ((cMax - c[i])<=(c[i] - cMin))
+ {
+ ALeft.insert_rows(lft,A.row(i));
+ lft ++;
+ }
+ else
+ {
+ ARight.insert_rows(rgt,A.row(i));
+ rgt ++;
+ }
+ }
+}
+
+void CosineTreeBuilder::CreateCosineSimilarityArray(std::vector<double>& c,
+ arma::mat A, size_t pivot)
+{
+ Log::Info<<"CreateCosineSimilarityArray"<<std::endl;
+ for(size_t i=0;i<A.n_rows;i++)
+ c.push_back(CosineSimilarity(A.row(pivot).t(),A.row(i).t()));
+}
+double CosineTreeBuilder::GetMinSimilarity(std::vector<double> c)
+{
+ Log::Info<<"GetMinSimilarity"<<std::endl;
+ double cMin = c[0];
+ for(size_t i=1;i<c.size();i++)
+ if(cMin<c[i])
+ cMin = c[i];
+ return cMin;
+}
+double CosineTreeBuilder::GetMaxSimilarity(std::vector<double> c)
+{
+ Log::Info<<"GetMaxSimilarity"<<std::endl;
+ double cMax = c[0];
+ for(size_t i=1;i<c.size();i++)
+ if(cMax<c[i])
+ cMax = c[i];
+ return cMax;
+}
+void CosineTreeBuilder::CTNodeSplit(CosineTree& root, CosineTree& left,
+ CosineTree& right)
+{
+ //Extracting points from the root
+ arma::mat A = root.Data();
+ //Cosine Similarity Array
+ std::vector<double> c;
+ //Matrices holding points for the left and the right node
+ arma::mat ALeft, ARight;
+ //Sampling probabilities
+ arma::vec prob = root.Probabilities();
+ //Pivot
+ size_t pivot = GetPivot(prob);
+ //Creating Array
+ CreateCosineSimilarityArray(c,A,pivot);
+ //Splitting data points
+ SplitData(c,ALeft,ARight,A);
+ //Creating Nodes
+ if(ALeft.n_rows > 0)
+ {
+ CTNode(ALeft,left);
+ root.Left(left);
+ }
+ if(ARight.n_rows > 0)
+ {
+ CTNode(ARight,right);
+ root.Right(right);
+ }
+}
+
+}; // namespace cosinetreebuilder
+}; // namespace mlpack
+
+#endif
Added: mlpack/trunk/src/mlpack/core/tree/cosine_tree/cosine_tree_impl.hpp
==============================================================================
--- (empty file)
+++ mlpack/trunk/src/mlpack/core/tree/cosine_tree/cosine_tree_impl.hpp Tue Sep 17 15:07:54 2013
@@ -0,0 +1,112 @@
+/**
+ * @file cosine_tree_impl.hpp
+ * @author Mudit Raj Gupta
+ *
+ * Implementation of cosine tree.
+ */
+#ifndef __MLPACK_CORE_TREE_COSINE_TREE_COSINE_TREE_IMPL_HPP
+#define __MLPACK_CORE_TREE_COSINE_TREE_COSINE_TREE_IMPL_HPP
+
+#include "cosine_tree.hpp"
+
+namespace mlpack {
+namespace tree {
+
+CosineTree::CosineTree(arma::mat& data, arma::vec centroid, arma::vec probabilities) :
+ left(NULL),
+ right(NULL),
+ data(data),
+ centroid(centroid),
+ numPoints(data.n_cols),
+ probabilities(probabilities)
+{
+ // Nothing to do
+}
+
+CosineTree::CosineTree() :
+ left(NULL),
+ right(NULL)
+{
+ // Nothing to do
+}
+
+CosineTree::~CosineTree()
+{
+ if (left)
+ delete left;
+ if (right)
+ delete right;
+}
+
+CosineTree* CosineTree::Right() const
+{
+ return right;
+}
+
+void CosineTree::Right(CosineTree* child)
+{
+ right = child;
+}
+
+CosineTree* CosineTree::Left() const
+{
+ return left;
+}
+
+void CosineTree::Left(CosineTree* child)
+{
+ left = child;
+}
+
+CosineTree& CosineTree::Child(const size_t child) const
+{
+ if (child == 0)
+ return *left;
+ else
+ return *right;
+}
+
+size_t CosineTree::NumPoints() const
+{
+ return numPoints;
+}
+
+void CosineTree::NumPoints(size_t n)
+{
+ numPoints = n;
+}
+
+arma::mat CosineTree::Data()
+{
+ return data;
+}
+
+void CosineTree::Data(arma::mat& d)
+{
+ data = d;
+}
+
+arma::vec CosineTree::Probabilities()
+{
+ return probabilities;
+}
+
+void CosineTree::Probabilities(arma::vec& prob)
+{
+ probabilities = prob;
+}
+
+arma::rowvec CosineTree::Centroid()
+{
+ return centroid;
+}
+
+void CosineTree::Centroid(arma::rowvec& centr)
+{
+ centroid = centr;
+}
+
+}; // namespace tree
+}; // namespace mlpack
+
+#endif
More information about the mlpack-svn
mailing list