[mlpack-svn] r10522 - mlpack/trunk/src/mlpack/methods/hmm/distributions
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Sat Dec 3 03:05:21 EST 2011
Author: rcurtin
Date: 2011-12-03 03:05:21 -0500 (Sat, 03 Dec 2011)
New Revision: 10522
Modified:
mlpack/trunk/src/mlpack/methods/hmm/distributions/discrete_distribution.cpp
mlpack/trunk/src/mlpack/methods/hmm/distributions/discrete_distribution.hpp
mlpack/trunk/src/mlpack/methods/hmm/distributions/gaussian_distribution.cpp
mlpack/trunk/src/mlpack/methods/hmm/distributions/gaussian_distribution.hpp
Log:
Add Dimensionality() function to distributions; use arma::mat and arma::vec
instead of std::vector<Observation> and Observation; and don't use size_t in
DiscreteDistribution.
Modified: mlpack/trunk/src/mlpack/methods/hmm/distributions/discrete_distribution.cpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/hmm/distributions/discrete_distribution.cpp 2011-12-03 02:17:46 UTC (rev 10521)
+++ mlpack/trunk/src/mlpack/methods/hmm/distributions/discrete_distribution.cpp 2011-12-03 08:05:21 UTC (rev 10522)
@@ -13,32 +13,40 @@
* Return a randomly generated observation according to the probability
* distribution defined by this object.
*/
-size_t DiscreteDistribution::Random() const
+arma::vec DiscreteDistribution::Random() const
{
// Generate a random number.
double randObs = (double) rand() / (double) RAND_MAX;
+ arma::vec result(1);
double sumProb = 0;
for (size_t obs = 0; obs < probabilities.n_elem; obs++)
+ {
if ((sumProb += probabilities[obs]) >= randObs)
- return obs;
+ {
+ result[0] = obs;
+ return result;
+ }
+ }
// This shouldn't happen.
- return probabilities.n_elem - 1;
+ result[0] = probabilities.n_elem - 1;
+ return result;
}
/**
* Estimate the probability distribution directly from the given observations.
*/
-void DiscreteDistribution::Estimate(const std::vector<size_t> observations)
+void DiscreteDistribution::Estimate(const arma::mat& observations)
{
// Clear old probabilities.
probabilities.zeros();
- // Add the probability of each observation.
- for (std::vector<size_t>::const_iterator it = observations.begin();
- it != observations.end(); it++)
- probabilities(*it)++;
+ // Add the probability of each observation. The addition of 0.5 to the
+ // observation is to turn the default flooring operation of the size_t cast
+ // into a rounding operation.
+ for (size_t i = 0; i < observations.n_cols; i++)
+ probabilities((size_t) (observations(0, i) + 0.5))++;
// Now normalize the distribution.
double sum = accu(probabilities);
@@ -52,15 +60,17 @@
* Estimate the probability distribution from the given observations when also
* given probabilities that each observation is from this distribution.
*/
-void DiscreteDistribution::Estimate(const std::vector<size_t> observations,
- const std::vector<double> probObs)
+void DiscreteDistribution::Estimate(const arma::mat& observations,
+ const arma::vec& probObs)
{
// Clear old probabilities.
probabilities.zeros();
- // Add the probability of each observation.
- for (size_t i = 0; i < observations.size(); i++)
- probabilities(observations[i]) += probObs[i];
+ // Add the probability of each observation. The addition of 0.5 to the
+ // observation is to turn the default flooring operation of the size_t cast
+ // into a rounding observation.
+ for (size_t i = 0; i < observations.n_cols; i++)
+ probabilities((size_t) (observations(0, i) + 0.5)) += probObs[i];
// Now normalize the distribution.
double sum = accu(probabilities);
Modified: mlpack/trunk/src/mlpack/methods/hmm/distributions/discrete_distribution.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/hmm/distributions/discrete_distribution.hpp 2011-12-03 02:17:46 UTC (rev 10521)
+++ mlpack/trunk/src/mlpack/methods/hmm/distributions/discrete_distribution.hpp 2011-12-03 08:05:21 UTC (rev 10522)
@@ -14,21 +14,30 @@
namespace distribution {
/**
- * A discrete distribution where the only observations are of type size_t. This
- * is useful (for example) with discrete Hidden Markov Models, where
- * observations are non-negative integers representing specific emissions.
+ * A discrete distribution where the only observations are discrete
+ * observations. This is useful (for example) with discrete Hidden Markov
+ * Models, where observations are non-negative integers representing specific
+ * emissions.
*
* No bounds checking is performed for observations, so if an invalid
* observation is passed (i.e. observation > numObservations), a crash will
* probably occur.
+ *
+ * This distribution only supports one-dimensional observations, so when passing
+ * an arma::vec as an observation, it should only have one dimension
+ * (vec.n_rows == 1). Any additional dimensions will simply be ignored.
+ *
+ * @note
+ * This class, like every other class in MLPACK, uses arma::vec to represent
+ * observations. While a discrete distribution only has positive integers
+ * (size_t) as observations, these can be converted to doubles (which is what
+ * arma::vec holds). This distribution internally converts those doubles back
+ * into size_t before comparisons.
+ * @endnote
*/
class DiscreteDistribution
{
public:
- //! The type of data which this distribution uses; in our case, non-negative
- //! integers represent observations.
- typedef size_t DataType;
-
/**
* Default constructor, which creates a distribution that has no observations.
*/
@@ -66,6 +75,11 @@
}
/**
+ * Get the dimensionality of the distribution.
+ */
+ size_t Dimensionality() const { return 1; }
+
+ /**
* Return the probability of the given observation. If the observation is
* greater than the number of possible observations, then a crash will
* probably occur -- bounds checking is not performed.
@@ -73,25 +87,30 @@
* @param observation Observation to return the probability of.
* @return Probability of the given observation.
*/
- double Probability(size_t observation) const
+ double Probability(const arma::vec& observation) const
{
- return probabilities(observation);
+ // Adding 0.5 helps ensure that we cast the floating point to a size_t
+ // correctly.
+ return probabilities((size_t) (observation[0] + 0.5));
}
/**
- * Return a randomly generated observation according to the probability
- * distribution defined by this object.
+ * Return a randomly generated observation (one-dimensional vector; one
+ * observation) according to the probability distribution defined by this
+ * object.
*
* @return Random observation.
*/
- size_t Random() const;
+ arma::vec Random() const;
/**
* Estimate the probability distribution directly from the given observations.
+ * If any of the observations is greater than numObservations, a crash is
+ * likely to occur.
*
* @param observations List of observations.
*/
- void Estimate(const std::vector<size_t> observations);
+ void Estimate(const arma::mat& observations);
/**
* Estimate the probability distribution from the given observations, taking
@@ -102,8 +121,8 @@
* @param probabilities List of probabilities that each observation is
* actually from this distribution.
*/
- void Estimate(const std::vector<size_t> observations,
- const std::vector<double> probabilities);
+ void Estimate(const arma::mat& observations,
+ const arma::vec& probabilities);
/**
* Return the vector of probabilities.
Modified: mlpack/trunk/src/mlpack/methods/hmm/distributions/gaussian_distribution.cpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/hmm/distributions/gaussian_distribution.cpp 2011-12-03 02:17:46 UTC (rev 10521)
+++ mlpack/trunk/src/mlpack/methods/hmm/distributions/gaussian_distribution.cpp 2011-12-03 08:05:21 UTC (rev 10522)
@@ -20,14 +20,14 @@
*
* @param observations List of observations.
*/
-void GaussianDistribution::Estimate(const std::vector<arma::vec> observations)
+void GaussianDistribution::Estimate(const arma::mat& observations)
{
// Calculate the mean and covariance with each point. Because this is a
// std::vector and not a matrix, this is a little more difficult.
- if (observations.size() > 0)
+ if (observations.n_cols > 0)
{
- mean.zeros(observations[0].n_elem);
- covariance.zeros(observations[0].n_elem, observations[0].n_elem);
+ mean.zeros(observations.n_rows);
+ covariance.zeros(observations.n_rows, observations.n_rows);
}
else // This will end up just being empty.
{
@@ -36,22 +36,22 @@
}
// Calculate the mean.
- for (size_t i = 0; i < observations.size(); i++)
- mean += observations[i];
+ for (size_t i = 0; i < observations.n_cols; i++)
+ mean += observations.col(i);
// Normalize the mean.
- mean /= observations.size();
+ mean /= observations.n_cols;
// Now calculate the covariance.
- for (size_t i = 0; i < observations.size(); i++)
+ for (size_t i = 0; i < observations.n_cols; i++)
{
- arma::vec obsNoMean = observations[i] - mean;
+ arma::vec obsNoMean = observations.col(i) - mean;
covariance += obsNoMean * trans(obsNoMean);
}
// Finish estimating the covariance by normalizing, with the (1 / (n - 1)) so
// that it is the unbiased estimator.
- covariance /= (observations.size() - 1);
+ covariance /= (observations.n_cols - 1);
}
/**
@@ -59,13 +59,13 @@
* account the probability of each observation actually being from this
* distribution.
*/
-void GaussianDistribution::Estimate(const std::vector<arma::vec> observations,
- const std::vector<double> probabilities)
+void GaussianDistribution::Estimate(const arma::mat& observations,
+ const arma::vec& probabilities)
{
- if (observations.size() > 0)
+ if (observations.n_cols > 0)
{
- mean.zeros(observations[0].n_elem);
- covariance.zeros(observations[0].n_elem, observations[0].n_elem);
+ mean.zeros(observations.n_rows);
+ covariance.zeros(observations.n_rows, observations.n_rows);
}
else // This will end up just being empty.
{
@@ -77,9 +77,9 @@
// First calculate the mean, and save the sum of all the probabilities for
// later normalization.
- for (size_t i = 0; i < observations.size(); i++)
+ for (size_t i = 0; i < observations.n_cols; i++)
{
- mean += probabilities[i] * observations[i];
+ mean += probabilities[i] * observations.col(i);
sumProb += probabilities[i];
}
@@ -87,9 +87,9 @@
mean /= sumProb;
// Now find the covariance.
- for (size_t i = 0; i < observations.size(); i++)
+ for (size_t i = 0; i < observations.n_cols; i++)
{
- arma::vec obsNoMean = observations[i] - mean;
+ arma::vec obsNoMean = observations.col(i) - mean;
covariance += probabilities[i] * (obsNoMean * trans(obsNoMean));
}
Modified: mlpack/trunk/src/mlpack/methods/hmm/distributions/gaussian_distribution.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/hmm/distributions/gaussian_distribution.hpp 2011-12-03 02:17:46 UTC (rev 10521)
+++ mlpack/trunk/src/mlpack/methods/hmm/distributions/gaussian_distribution.hpp 2011-12-03 08:05:21 UTC (rev 10522)
@@ -23,9 +23,6 @@
arma::mat covariance;
public:
- //! The type of data which this distribution uses.
- typedef arma::vec DataType;
-
/**
* Default constructor, which creates a Gaussian with zero dimension.
*/
@@ -45,6 +42,9 @@
GaussianDistribution(const arma::vec& mean, const arma::mat& covariance) :
mean(mean), covariance(covariance) { /* nothing to do */ }
+ //! Return the dimensionality of this distribution.
+ size_t Dimensionality() const { return mean.n_elem; }
+
/**
* Return the probability of the given observation.
*/
@@ -66,15 +66,15 @@
*
* @param observations List of observations.
*/
- void Estimate(const std::vector<arma::vec> observations);
+ void Estimate(const arma::mat& observations);
/**
* Estimate the Gaussian distribution from the given observations, taking into
* account the probability of each observation actually being from this
* distribution.
*/
- void Estimate(const std::vector<arma::vec> observations,
- const std::vector<double> probabilities);
+ void Estimate(const arma::mat& observations,
+ const arma::vec& probabilities);
/**
* Return the mean.
More information about the mlpack-svn
mailing list