[mlpack-svn] r15546 - mlpack/conf/jenkins-conf/benchmark/methods/weka
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Thu Jul 25 16:30:55 EDT 2013
Author: marcus
Date: Thu Jul 25 16:30:54 2013
New Revision: 15546
Log:
Clean weka scripts and make the code compatible with python 3.
Modified:
mlpack/conf/jenkins-conf/benchmark/methods/weka/allknn.py
mlpack/conf/jenkins-conf/benchmark/methods/weka/kmeans.py
mlpack/conf/jenkins-conf/benchmark/methods/weka/linear_regression.py
mlpack/conf/jenkins-conf/benchmark/methods/weka/nbc.py
mlpack/conf/jenkins-conf/benchmark/methods/weka/pca.py
Modified: mlpack/conf/jenkins-conf/benchmark/methods/weka/allknn.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/methods/weka/allknn.py (original)
+++ mlpack/conf/jenkins-conf/benchmark/methods/weka/allknn.py Thu Jul 25 16:30:54 2013
@@ -69,7 +69,7 @@
except Exception:
Log.Fatal("Could not execute command: " + str(cmd))
return -1
-
+
# Return the elapsed time.
timer = self.parseTimer(s)
if not timer:
@@ -94,7 +94,7 @@
.*?total_time: (?P<total_time>.*?)s.*?
""", re.VERBOSE|re.MULTILINE|re.DOTALL)
- match = pattern.match(data)
+ match = pattern.match(data.decode())
if not match:
Log.Fatal("Can't parse the data: wrong format")
return -1
Modified: mlpack/conf/jenkins-conf/benchmark/methods/weka/kmeans.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/methods/weka/kmeans.py (original)
+++ mlpack/conf/jenkins-conf/benchmark/methods/weka/kmeans.py Thu Jul 25 16:30:54 2013
@@ -12,9 +12,9 @@
# Import the util path, this method even works if the path contains symlinks to
# modules.
cmd_subfolder = os.path.realpath(os.path.abspath(os.path.join(
- os.path.split(inspect.getfile(inspect.currentframe()))[0], "../../util")))
+ os.path.split(inspect.getfile(inspect.currentframe()))[0], "../../util")))
if cmd_subfolder not in sys.path:
- sys.path.insert(0, cmd_subfolder)
+ sys.path.insert(0, cmd_subfolder)
from log import *
from profiler import *
@@ -29,82 +29,82 @@
'''
class KMEANS(object):
- '''
- Create the K-Means Clustering benchmark instance.
+ '''
+ Create the K-Means Clustering benchmark instance.
@param dataset - Input dataset to perform K-Means on.
@param path - Path to the mlpack executable.
@param verbose - Display informational messages.
- '''
- def __init__(self, dataset, path=os.environ["WEKA_CLASSPATH"], verbose = True):
- self.verbose = verbose
- self.dataset = dataset
- self.path = path
-
- '''
+ '''
+ def __init__(self, dataset, path=os.environ["WEKA_CLASSPATH"], verbose = True):
+ self.verbose = verbose
+ self.dataset = dataset
+ self.path = path
+
+ '''
K-Means Clustering benchmark instance. If the method has been successfully
completed return the elapsed time in seconds.
@param options - Extra options for the method.
@return - Elapsed time in seconds or -1 if the method was not successful.
'''
- def RunMethod(self, options):
- Log.Info("Perform K-Means.", self.verbose)
-
- # Split the command using shell-like syntax.
- cmd = shlex.split("java -classpath " + self.path + ":methods/weka" +
- " KMeans -i " + self.dataset + " " + options)
-
- # Run command with the nessecary arguments and return its output as a byte
- # string. We have untrusted input so we disables all shell based features.
- try:
- s = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=False)
- except Exception:
- Log.Fatal("Could not execute command: " + str(cmd))
- return -1
-
- # Return the elapsed time.
- timer = self.parseTimer(s)
- if not timer:
- Log.Fatal("Can't parse the timer")
- return -1
- else:
- time = self.GetTime(timer)
- Log.Info(("total time: %fs" % time), self.verbose)
-
- return time
-
- '''
- Parse the timer data form a given string.
-
- @param data - String to parse timer data from.
- @return - Namedtuple that contains the timer data.
- '''
- def parseTimer(self, data):
- # Compile the regular expression pattern into a regular expression object to
- # parse the timer data.
- pattern = re.compile(r"""
- .*?total_time: (?P<total_time>.*?)s.*?
- """, re.VERBOSE|re.MULTILINE|re.DOTALL)
-
- match = pattern.match(data)
- if not match:
- Log.Fatal("Can't parse the data: wrong format")
- return -1
- else:
- # Create a namedtuple and return the timer data.
- timer = collections.namedtuple("timer", ["total_time"])
-
- if match.group("total_time").count(".") == 1:
- return timer(float(match.group("total_time")))
- else:
- return timer(float(match.group("total_time").replace(",", ".")))
-
- '''
- Return the elapsed time in seconds.
-
- @param timer - Namedtuple that contains the timer data.
- @return Elapsed time in seconds.
- '''
- def GetTime(self, timer):
- return timer.total_time
+ def RunMethod(self, options):
+ Log.Info("Perform K-Means.", self.verbose)
+
+ # Split the command using shell-like syntax.
+ cmd = shlex.split("java -classpath " + self.path + ":methods/weka" +
+ " KMeans -i " + self.dataset + " " + options)
+
+ # Run command with the nessecary arguments and return its output as a byte
+ # string. We have untrusted input so we disables all shell based features.
+ try:
+ s = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=False)
+ except Exception:
+ Log.Fatal("Could not execute command: " + str(cmd))
+ return -1
+
+ # Return the elapsed time.
+ timer = self.parseTimer(s)
+ if not timer:
+ Log.Fatal("Can't parse the timer")
+ return -1
+ else:
+ time = self.GetTime(timer)
+ Log.Info(("total time: %fs" % time), self.verbose)
+
+ return time
+
+ '''
+ Parse the timer data form a given string.
+
+ @param data - String to parse timer data from.
+ @return - Namedtuple that contains the timer data.
+ '''
+ def parseTimer(self, data):
+ # Compile the regular expression pattern into a regular expression object to
+ # parse the timer data.
+ pattern = re.compile(r"""
+ .*?total_time: (?P<total_time>.*?)s.*?
+ """, re.VERBOSE|re.MULTILINE|re.DOTALL)
+
+ match = pattern.match(data.decode())
+ if not match:
+ Log.Fatal("Can't parse the data: wrong format")
+ return -1
+ else:
+ # Create a namedtuple and return the timer data.
+ timer = collections.namedtuple("timer", ["total_time"])
+
+ if match.group("total_time").count(".") == 1:
+ return timer(float(match.group("total_time")))
+ else:
+ return timer(float(match.group("total_time").replace(",", ".")))
+
+ '''
+ Return the elapsed time in seconds.
+
+ @param timer - Namedtuple that contains the timer data.
+ @return Elapsed time in seconds.
+ '''
+ def GetTime(self, timer):
+ return timer.total_time
Modified: mlpack/conf/jenkins-conf/benchmark/methods/weka/linear_regression.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/methods/weka/linear_regression.py (original)
+++ mlpack/conf/jenkins-conf/benchmark/methods/weka/linear_regression.py Thu Jul 25 16:30:54 2013
@@ -94,7 +94,7 @@
.*?total_time: (?P<total_time>.*?)s.*?
""", re.VERBOSE|re.MULTILINE|re.DOTALL)
- match = pattern.match(data)
+ match = pattern.match(data.decode())
if not match:
Log.Fatal("Can't parse the data: wrong format")
return -1
Modified: mlpack/conf/jenkins-conf/benchmark/methods/weka/nbc.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/methods/weka/nbc.py (original)
+++ mlpack/conf/jenkins-conf/benchmark/methods/weka/nbc.py Thu Jul 25 16:30:54 2013
@@ -12,9 +12,9 @@
# Import the util path, this method even works if the path contains symlinks to
# modules.
cmd_subfolder = os.path.realpath(os.path.abspath(os.path.join(
- os.path.split(inspect.getfile(inspect.currentframe()))[0], "../../util")))
+ os.path.split(inspect.getfile(inspect.currentframe()))[0], "../../util")))
if cmd_subfolder not in sys.path:
- sys.path.insert(0, cmd_subfolder)
+ sys.path.insert(0, cmd_subfolder)
from log import *
from profiler import *
@@ -29,86 +29,86 @@
'''
class NBC(object):
- '''
- Create the Naive Bayes Classifier benchmark instance.
+ '''
+ Create the Naive Bayes Classifier benchmark instance.
@param dataset - Input dataset to perform NBC on.
@param path - Path to the mlpack executable.
@param verbose - Display informational messages.
- '''
- def __init__(self, dataset, path=os.environ["WEKA_CLASSPATH"], verbose=True):
- self.verbose = verbose
- self.dataset = dataset
- self.path = path
-
- '''
+ '''
+ def __init__(self, dataset, path=os.environ["WEKA_CLASSPATH"], verbose=True):
+ self.verbose = verbose
+ self.dataset = dataset
+ self.path = path
+
+ '''
Naive Bayes Classifier. If the method has been successfully completed return
the elapsed time in seconds.
@param options - Extra options for the method.
@return - Elapsed time in seconds or -1 if the method was not successful.
'''
- def RunMethod(self, options):
- Log.Info("Perform NBC.", self.verbose)
+ def RunMethod(self, options):
+ Log.Info("Perform NBC.", self.verbose)
+
+ if len(self.dataset) != 2:
+ Log.Fatal("This method requires two datasets.")
+ return -1
+
+ # Split the command using shell-like syntax.
+ cmd = shlex.split("java -classpath " + self.path + ":methods/weka" +
+ " NBC -t " + self.dataset[0] + " -T " + self.dataset[1] + " " + options)
+
+ # Run command with the nessecary arguments and return its output as a byte
+ # string. We have untrusted input so we disables all shell based features.
+ try:
+ s = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=False)
+ except Exception:
+ Log.Fatal("Could not execute command: " + str(cmd))
+ return -1
+
+ # Return the elapsed time.
+ timer = self.parseTimer(s)
+ if not timer:
+ Log.Fatal("Can't parse the timer")
+ return -1
+ else:
+ time = self.GetTime(timer)
+ Log.Info(("total time: %fs" % time), self.verbose)
+
+ return time
+
+ '''
+ Parse the timer data form a given string.
- if len(self.dataset) != 2:
- Log.Fatal("This method requires two datasets.")
- return -1
-
- # Split the command using shell-like syntax.
- cmd = shlex.split("java -classpath " + self.path + ":methods/weka" +
- " NBC -t " + self.dataset[0] + " -T " + self.dataset[1] + " " + options)
-
- # Run command with the nessecary arguments and return its output as a byte
- # string. We have untrusted input so we disables all shell based features.
- try:
- s = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=False)
- except Exception:
- Log.Fatal("Could not execute command: " + str(cmd))
- return -1
-
- # Return the elapsed time.
- timer = self.parseTimer(s)
- if not timer:
- Log.Fatal("Can't parse the timer")
- return -1
- else:
- time = self.GetTime(timer)
- Log.Info(("total time: %fs" % time), self.verbose)
-
- return time
-
- '''
- Parse the timer data form a given string.
-
- @param data - String to parse timer data from.
- @return - Namedtuple that contains the timer data.
- '''
- def parseTimer(self, data):
- # Compile the regular expression pattern into a regular expression object to
- # parse the timer data.
- pattern = re.compile(r"""
- .*?total_time: (?P<total_time>.*?)s.*?
- """, re.VERBOSE|re.MULTILINE|re.DOTALL)
-
- match = pattern.match(data)
- if not match:
- Log.Fatal("Can't parse the data: wrong format")
- return -1
- else:
- # Create a namedtuple and return the timer data.
- timer = collections.namedtuple("timer", ["total_time"])
-
- if match.group("total_time").count(".") == 1:
- return timer(float(match.group("total_time")))
- else:
- return timer(float(match.group("total_time").replace(",", ".")))
-
- '''
- Return the elapsed time in seconds.
-
- @param timer - Namedtuple that contains the timer data.
- @return Elapsed time in seconds.
- '''
- def GetTime(self, timer):
- return timer.total_time
+ @param data - String to parse timer data from.
+ @return - Namedtuple that contains the timer data.
+ '''
+ def parseTimer(self, data):
+ # Compile the regular expression pattern into a regular expression object to
+ # parse the timer data.
+ pattern = re.compile(r"""
+ .*?total_time: (?P<total_time>.*?)s.*?
+ """, re.VERBOSE|re.MULTILINE|re.DOTALL)
+
+ match = pattern.match(data.decode())
+ if not match:
+ Log.Fatal("Can't parse the data: wrong format")
+ return -1
+ else:
+ # Create a namedtuple and return the timer data.
+ timer = collections.namedtuple("timer", ["total_time"])
+
+ if match.group("total_time").count(".") == 1:
+ return timer(float(match.group("total_time")))
+ else:
+ return timer(float(match.group("total_time").replace(",", ".")))
+
+ '''
+ Return the elapsed time in seconds.
+
+ @param timer - Namedtuple that contains the timer data.
+ @return Elapsed time in seconds.
+ '''
+ def GetTime(self, timer):
+ return timer.total_time
Modified: mlpack/conf/jenkins-conf/benchmark/methods/weka/pca.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/methods/weka/pca.py (original)
+++ mlpack/conf/jenkins-conf/benchmark/methods/weka/pca.py Thu Jul 25 16:30:54 2013
@@ -12,9 +12,9 @@
# Import the util path, this method even works if the path contains symlinks to
# modules.
cmd_subfolder = os.path.realpath(os.path.abspath(os.path.join(
- os.path.split(inspect.getfile(inspect.currentframe()))[0], "../../util")))
+ os.path.split(inspect.getfile(inspect.currentframe()))[0], "../../util")))
if cmd_subfolder not in sys.path:
- sys.path.insert(0, cmd_subfolder)
+ sys.path.insert(0, cmd_subfolder)
from log import *
from profiler import *
@@ -29,86 +29,86 @@
'''
class PCA(object):
- '''
- Create the Principal Components Analysis benchmark instance.
+ '''
+ Create the Principal Components Analysis benchmark instance.
@param dataset - Input dataset to perform PCA on.
@param path - Path to the mlpack executable.
@param verbose - Display informational messages.
- '''
- def __init__(self, dataset, path=os.environ["WEKA_CLASSPATH"], verbose=True):
- self.verbose = verbose
- self.dataset = dataset
- self.path = path
-
- '''
+ '''
+ def __init__(self, dataset, path=os.environ["WEKA_CLASSPATH"], verbose=True):
+ self.verbose = verbose
+ self.dataset = dataset
+ self.path = path
+
+ '''
Perform Principal Components Analysis. If the method has been successfully
completed return the elapsed time in seconds.
@param options - Extra options for the method.
@return - Elapsed time in seconds or -1 if the method was not successful.
'''
- def RunMethod(self, options):
- Log.Info("Perform PCA.", self.verbose)
+ def RunMethod(self, options):
+ Log.Info("Perform PCA.", self.verbose)
+
+ # Split the command using shell-like syntax.
+ cmd = shlex.split("java -classpath " + self.path + ":methods/weka" +
+ " PCA -i " + self.dataset + " " + options)
+
+ # Run command with the nessecary arguments and return its output as a byte
+ # string. We have untrusted input so we disables all shell based features.
+ try:
+ s = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=False)
+ except Exception:
+ Log.Fatal("Could not execute command: " + str(cmd))
+ return -1
+
+ # Return the elapsed time.
+ timer = self.parseTimer(s)
+ if not timer:
+ Log.Fatal("Can't parse the timer")
+ return -1
+ else:
+ time = self.GetTime(timer)
+ Log.Info(("total time: %fs" % time), self.verbose)
+
+ return time
+
+ '''
+ Parse the timer data form a given string.
- # Split the command using shell-like syntax.
- cmd = shlex.split("java -classpath " + self.path + ":methods/weka" +
- " PCA -i " + self.dataset + " " + options)
-
- # Run command with the nessecary arguments and return its output as a byte
- # string. We have untrusted input so we disables all shell based features.
- try:
- s = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=False)
- except Exception:
- Log.Fatal("Could not execute command: " + str(cmd))
- return -1
-
- # Return the elapsed time.
- timer = self.parseTimer(s)
- if not timer:
- Log.Fatal("Can't parse the timer")
- return -1
- else:
- time = self.GetTime(timer)
- Log.Info(("total time: %fs" % time), self.verbose)
-
- return time
-
- '''
- Parse the timer data form a given string.
-
- @param data - String to parse timer data from.
- @return - Namedtuple that contains the timer data.
- '''
- def parseTimer(self, data):
- # Compile the regular expression pattern into a regular expression object to
- # parse the timer data.
- pattern = re.compile(r"""
- .*?loading_data: (?P<loading_time>.*?)s.*?
- .*?total_time: (?P<total_time>.*?)s.*?
- """, re.VERBOSE|re.MULTILINE|re.DOTALL)
-
- match = pattern.match(data)
- if not match:
- Log.Fatal("Can't parse the data: wrong format")
- return -1
- else:
- # Create a namedtuple and return the timer data.
- timer = collections.namedtuple("timer", ["loading_time", "total_time"])
-
- if match.group("loading_time").count(".") == 1:
- return timer(float(match.group("loading_time")),
- float(match.group("total_time")))
- else:
- return timer(float(match.group("loading_time").replace(",", ".")),
- float(match.group("total_time").replace(",", ".")))
-
- '''
- Return the elapsed time in seconds.
-
- @param timer - Namedtuple that contains the timer data.
- @return Elapsed time in seconds.
- '''
- def GetTime(self, timer):
- time = timer.total_time - timer.loading_time
- return time
+ @param data - String to parse timer data from.
+ @return - Namedtuple that contains the timer data.
+ '''
+ def parseTimer(self, data):
+ # Compile the regular expression pattern into a regular expression object to
+ # parse the timer data.
+ pattern = re.compile(r"""
+ .*?loading_data: (?P<loading_time>.*?)s.*?
+ .*?total_time: (?P<total_time>.*?)s.*?
+ """, re.VERBOSE|re.MULTILINE|re.DOTALL)
+
+ match = pattern.match(data.decode())
+ if not match:
+ Log.Fatal("Can't parse the data: wrong format")
+ return -1
+ else:
+ # Create a namedtuple and return the timer data.
+ timer = collections.namedtuple("timer", ["loading_time", "total_time"])
+
+ if match.group("loading_time").count(".") == 1:
+ return timer(float(match.group("loading_time")),
+ float(match.group("total_time")))
+ else:
+ return timer(float(match.group("loading_time").replace(",", ".")),
+ float(match.group("total_time").replace(",", ".")))
+
+ '''
+ Return the elapsed time in seconds.
+
+ @param timer - Namedtuple that contains the timer data.
+ @return Elapsed time in seconds.
+ '''
+ def GetTime(self, timer):
+ time = timer.total_time - timer.loading_time
+ return time
More information about the mlpack-svn
mailing list