[mlpack-svn] r15495 - mlpack/conf/jenkins-conf/benchmark/methods/matlab
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Thu Jul 18 12:03:48 EDT 2013
Author: marcus
Date: Thu Jul 18 12:03:47 2013
New Revision: 15495
Log:
Add matlab hmm generator method src and benchmark script.
Added:
mlpack/conf/jenkins-conf/benchmark/methods/matlab/HMM_GENERATE.m
mlpack/conf/jenkins-conf/benchmark/methods/matlab/hmm_generate.py
Added: mlpack/conf/jenkins-conf/benchmark/methods/matlab/HMM_GENERATE.m
==============================================================================
--- (empty file)
+++ mlpack/conf/jenkins-conf/benchmark/methods/matlab/HMM_GENERATE.m Thu Jul 18 12:03:47 2013
@@ -0,0 +1,37 @@
+% @file HMM_GENERATE.m
+% @author Marcus Edel
+%
+% Hidden Markov Model (HMM) Sequence Generator with matlab.
+
+function hmm_generate(cmd)
+% This utility takes an already-trained HMM and generates a random
+% observation sequence and hidden state sequence based on its parameters.
+%
+% Required options:
+% (-l) [int] Length of sequence to generate.
+% (-t) [string] File containing trans values.
+% (-e) [string] File containing emis values.
+
+
+% Load trans and emis values.
+transFile = regexp(cmd, '.*?-t ([^\s]+)', 'tokens', 'once');
+transData = csvread(transFile{:});
+
+emisFile = regexp(cmd, '.*?-e ([^\s]+)', 'tokens', 'once');
+emisData = csvread(emisFile{:});
+
+% Get all the parameters.
+l = regexp(cmd,'.* -l (\d+)','tokens','once');
+
+if ~isempty(l)
+ l = str2double(l);
+else
+ disp('[Fatal] Required options: Length of sequence to generate.');
+ return;
+end
+
+total_time = tic;
+[seq, states] = hmmgenerate(l, transData, emisData);
+
+disp(sprintf('[INFO ] total_time: %fs', toc(total_time)))
+end
Added: mlpack/conf/jenkins-conf/benchmark/methods/matlab/hmm_generate.py
==============================================================================
--- (empty file)
+++ mlpack/conf/jenkins-conf/benchmark/methods/matlab/hmm_generate.py Thu Jul 18 12:03:47 2013
@@ -0,0 +1,154 @@
+'''
+ @file hmm_generate.py
+ @author Marcus Edel
+
+ Class to benchmark the matlab HMM Sequence Generator method.
+'''
+
+import os
+import sys
+import inspect
+
+# Import the util path, this method even works if the path contains symlinks to
+# modules.
+cmd_subfolder = os.path.realpath(os.path.abspath(os.path.join(
+ os.path.split(inspect.getfile(inspect.currentframe()))[0], "../../util")))
+if cmd_subfolder not in sys.path:
+ sys.path.insert(0, cmd_subfolder)
+
+from log import *
+from profiler import *
+
+import shlex
+import subprocess
+import re
+import collections
+
+'''
+This class implements the HMM Sequence Generator benchmark.
+'''
+class HMMGENERATE(object):
+
+ '''
+ Create the HMM Sequence Generator benchmark instance.
+
+ @param dataset - Input dataset to perform the HMM Sequence Generator on.
+ @param path - Path to the mlpack executable.
+ @param verbose - Display informational messages.
+ '''
+ def __init__(self, dataset, path=os.environ["MATLAB_BIN"], verbose = True):
+ self.verbose = verbose
+ self.dataset = dataset
+ self.path = path
+
+ '''
+ Destructor to clean up at the end.
+ '''
+ def __del__(self):
+ Log.Info("Clean up.", self.verbose)
+ filelist = ["emis_tmp.csv", "trans_tmp.csv"]
+ for f in filelist:
+ if os.path.isfile(f):
+ os.remove(f)
+
+ '''
+ HMM Sequence Generator. If the method has been successfully completed return
+ the elapsed time in seconds.
+
+ @param options - Extra options for the method.
+ @return - Elapsed time in seconds or -1 if the method was not successful.
+ '''
+ def RunMethod(self, options):
+ Log.Info("Perform HMM GENERATE.", self.verbose)
+
+ # Open the HMM model file and extract the emis and trans values.
+ fid = open(self.dataset, 'r')
+ line = fid.read()
+ fid.close()
+
+ patternEmis = re.compile(r"""
+ .*?<hmm_emission_covariance_.*?>(?P<hmm_emission_mean>.*?)
+ </hmm_emission_covariance_
+ """, re.VERBOSE|re.MULTILINE|re.DOTALL)
+
+ patternTrans = re.compile(r"""
+ .*?<hmm_transition>(?P<hmm_transition>.*?)</hmm_transition>
+ """, re.VERBOSE|re.MULTILINE|re.DOTALL)
+
+ emis = patternEmis.findall(line)
+ trans = patternTrans.findall(line)
+
+ # Write the emis and trans values to a temporary file.
+ if not emis or not trans:
+ Log.Fatal("Can't parse the HMM model file.")
+ return -1
+ else:
+ fidEmis = open("emis_tmp.csv", "w")
+ for m in emis:
+ m = m.split('\n')
+ m = m[0] + "," + m[1] + "\n"
+ fidEmis.write(m)
+
+ fidEmis.close()
+
+ fidTrans = open("trans_tmp.csv", "w")
+ for m in trans:
+ fidTrans.write(m)
+ fidTrans.close()
+
+ inputCmd = "-e emis_tmp.csv -t trans_tmp.csv " + options
+ # Split the command using shell-like syntax.
+ cmd = shlex.split(self.path + "matlab -nodisplay -nosplash -r \"try, " +
+ "HMM_GENERATE('" + inputCmd + "'), catch, exit(1), end, exit(0)\"")
+
+ # Run command with the nessecary arguments and return its output as a byte
+ # string. We have untrusted input so we disables all shell based features.
+ try:
+ s = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=False)
+ except Exception:
+ Log.Fatal("Could not execute command: " + str(cmd))
+ return -1
+
+ # Return the elapsed time.
+ timer = self.parseTimer(s)
+ if not timer:
+ Log.Fatal("Can't parse the timer")
+ return -1
+ else:
+ time = self.GetTime(timer)
+ Log.Info(("total time: %fs" % time), self.verbose)
+
+ return time
+
+ '''
+ Parse the timer data form a given string.
+
+ @param data - String to parse timer data from.
+ @return - Namedtuple that contains the timer data.
+ '''
+ def parseTimer(self, data):
+ # Compile the regular expression pattern into a regular expression object to
+ # parse the timer data.
+ pattern = re.compile(r"""
+ .*?total_time: (?P<total_time>.*?)s.*?
+ """, re.VERBOSE|re.MULTILINE|re.DOTALL)
+
+ match = pattern.match(data)
+ if not match:
+ Log.Fatal("Can't parse the data: wrong format")
+ return -1
+ else:
+ # Create a namedtuple and return the timer data.
+ timer = collections.namedtuple("timer", ["total_time"])
+
+ return timer(float(match.group("total_time")))
+
+ '''
+ Return the elapsed time in seconds.
+
+ @param timer - Namedtuple that contains the timer data.
+ @return Elapsed time in seconds.
+ '''
+ def GetTime(self, timer):
+ time = timer.total_time
+ return time
More information about the mlpack-svn
mailing list