[mlpack-svn] r15546 - mlpack/conf/jenkins-conf/benchmark/methods/weka

fastlab-svn at coffeetalk-1.cc.gatech.edu fastlab-svn at coffeetalk-1.cc.gatech.edu
Thu Jul 25 16:30:55 EDT 2013


Author: marcus
Date: Thu Jul 25 16:30:54 2013
New Revision: 15546

Log:
Clean weka scripts and make the code compatible with python 3.

Modified:
   mlpack/conf/jenkins-conf/benchmark/methods/weka/allknn.py
   mlpack/conf/jenkins-conf/benchmark/methods/weka/kmeans.py
   mlpack/conf/jenkins-conf/benchmark/methods/weka/linear_regression.py
   mlpack/conf/jenkins-conf/benchmark/methods/weka/nbc.py
   mlpack/conf/jenkins-conf/benchmark/methods/weka/pca.py

Modified: mlpack/conf/jenkins-conf/benchmark/methods/weka/allknn.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/methods/weka/allknn.py	(original)
+++ mlpack/conf/jenkins-conf/benchmark/methods/weka/allknn.py	Thu Jul 25 16:30:54 2013
@@ -69,7 +69,7 @@
 		except Exception:
 			Log.Fatal("Could not execute command: " + str(cmd))
 			return -1
-
+			
 		# Return the elapsed time.
 		timer = self.parseTimer(s)
 		if not timer:
@@ -94,7 +94,7 @@
 				.*?total_time: (?P<total_time>.*?)s.*?
 				""", re.VERBOSE|re.MULTILINE|re.DOTALL)
 		
-		match = pattern.match(data)
+		match = pattern.match(data.decode())
 		if not match:
 			Log.Fatal("Can't parse the data: wrong format")
 			return -1

Modified: mlpack/conf/jenkins-conf/benchmark/methods/weka/kmeans.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/methods/weka/kmeans.py	(original)
+++ mlpack/conf/jenkins-conf/benchmark/methods/weka/kmeans.py	Thu Jul 25 16:30:54 2013
@@ -12,9 +12,9 @@
 # Import the util path, this method even works if the path contains symlinks to
 # modules.
 cmd_subfolder = os.path.realpath(os.path.abspath(os.path.join(
-	os.path.split(inspect.getfile(inspect.currentframe()))[0], "../../util")))
+  os.path.split(inspect.getfile(inspect.currentframe()))[0], "../../util")))
 if cmd_subfolder not in sys.path:
-	sys.path.insert(0, cmd_subfolder)
+  sys.path.insert(0, cmd_subfolder)
 
 from log import *
 from profiler import *
@@ -29,82 +29,82 @@
 '''
 class KMEANS(object):
 
-	''' 
-	Create the K-Means Clustering benchmark instance.
+  ''' 
+  Create the K-Means Clustering benchmark instance.
   
   @param dataset - Input dataset to perform K-Means on.
   @param path - Path to the mlpack executable.
   @param verbose - Display informational messages.
-	'''
-	def __init__(self, dataset, path=os.environ["WEKA_CLASSPATH"], verbose = True): 
-		self.verbose = verbose
-		self.dataset = dataset
-		self.path = path	
-		
-	'''
+  '''
+  def __init__(self, dataset, path=os.environ["WEKA_CLASSPATH"], verbose = True): 
+    self.verbose = verbose
+    self.dataset = dataset
+    self.path = path  
+    
+  '''
   K-Means Clustering benchmark instance. If the method has been successfully 
   completed return the elapsed time in seconds.
 
   @param options - Extra options for the method.
   @return - Elapsed time in seconds or -1 if the method was not successful.
   '''
-	def RunMethod(self, options):
-		Log.Info("Perform K-Means.", self.verbose)
-		
-		# Split the command using shell-like syntax.
-		cmd = shlex.split("java -classpath " + self.path + ":methods/weka" + 
-			" KMeans -i " + self.dataset + " " + options)
-		
-		# Run command with the nessecary arguments and return its output as a byte
-		# string. We have untrusted input so we disables all shell based features.
-		try:
-			s = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=False)		
-		except Exception:
-			Log.Fatal("Could not execute command: " + str(cmd))
-			return -1
-
-		# Return the elapsed time.
-		timer = self.parseTimer(s)
-		if not timer:
-			Log.Fatal("Can't parse the timer")
-			return -1
-		else:
-			time = self.GetTime(timer)
-			Log.Info(("total time: %fs" % time), self.verbose)
-
-			return time
-
-	'''
-	Parse the timer data form a given string.
-
-	@param data - String to parse timer data from.
-	@return - Namedtuple that contains the timer data.
-	'''
-	def parseTimer(self, data):
-		# Compile the regular expression pattern into a regular expression object to
-		# parse the timer data.
-		pattern = re.compile(r"""
-				.*?total_time: (?P<total_time>.*?)s.*?
-				""", re.VERBOSE|re.MULTILINE|re.DOTALL)
-		
-		match = pattern.match(data)
-		if not match:
-			Log.Fatal("Can't parse the data: wrong format")
-			return -1
-		else:
-			# Create a namedtuple and return the timer data.
-			timer = collections.namedtuple("timer", ["total_time"])
-			
-			if match.group("total_time").count(".") == 1:
-				return timer(float(match.group("total_time")))
-			else:
-				return timer(float(match.group("total_time").replace(",", ".")))
-
-	'''
-	Return the elapsed time in seconds.
-
-	@param timer - Namedtuple that contains the timer data.
-	@return Elapsed time in seconds.
-	'''
-	def GetTime(self, timer):
-		return timer.total_time
+  def RunMethod(self, options):
+    Log.Info("Perform K-Means.", self.verbose)
+    
+    # Split the command using shell-like syntax.
+    cmd = shlex.split("java -classpath " + self.path + ":methods/weka" + 
+      " KMeans -i " + self.dataset + " " + options)
+    
+    # Run command with the nessecary arguments and return its output as a byte
+    # string. We have untrusted input so we disables all shell based features.
+    try:
+      s = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=False)   
+    except Exception:
+      Log.Fatal("Could not execute command: " + str(cmd))
+      return -1
+
+    # Return the elapsed time.
+    timer = self.parseTimer(s)
+    if not timer:
+      Log.Fatal("Can't parse the timer")
+      return -1
+    else:
+      time = self.GetTime(timer)
+      Log.Info(("total time: %fs" % time), self.verbose)
+
+      return time
+
+  '''
+  Parse the timer data form a given string.
+
+  @param data - String to parse timer data from.
+  @return - Namedtuple that contains the timer data.
+  '''
+  def parseTimer(self, data):
+    # Compile the regular expression pattern into a regular expression object to
+    # parse the timer data.
+    pattern = re.compile(r"""
+        .*?total_time: (?P<total_time>.*?)s.*?
+        """, re.VERBOSE|re.MULTILINE|re.DOTALL)
+    
+    match = pattern.match(data.decode())
+    if not match:
+      Log.Fatal("Can't parse the data: wrong format")
+      return -1
+    else:
+      # Create a namedtuple and return the timer data.
+      timer = collections.namedtuple("timer", ["total_time"])
+      
+      if match.group("total_time").count(".") == 1:
+        return timer(float(match.group("total_time")))
+      else:
+        return timer(float(match.group("total_time").replace(",", ".")))
+
+  '''
+  Return the elapsed time in seconds.
+
+  @param timer - Namedtuple that contains the timer data.
+  @return Elapsed time in seconds.
+  '''
+  def GetTime(self, timer):
+    return timer.total_time

Modified: mlpack/conf/jenkins-conf/benchmark/methods/weka/linear_regression.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/methods/weka/linear_regression.py	(original)
+++ mlpack/conf/jenkins-conf/benchmark/methods/weka/linear_regression.py	Thu Jul 25 16:30:54 2013
@@ -94,7 +94,7 @@
 				.*?total_time: (?P<total_time>.*?)s.*?
 				""", re.VERBOSE|re.MULTILINE|re.DOTALL)
 		
-		match = pattern.match(data)
+		match = pattern.match(data.decode())
 		if not match:
 			Log.Fatal("Can't parse the data: wrong format")
 			return -1

Modified: mlpack/conf/jenkins-conf/benchmark/methods/weka/nbc.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/methods/weka/nbc.py	(original)
+++ mlpack/conf/jenkins-conf/benchmark/methods/weka/nbc.py	Thu Jul 25 16:30:54 2013
@@ -12,9 +12,9 @@
 # Import the util path, this method even works if the path contains symlinks to
 # modules.
 cmd_subfolder = os.path.realpath(os.path.abspath(os.path.join(
-	os.path.split(inspect.getfile(inspect.currentframe()))[0], "../../util")))
+  os.path.split(inspect.getfile(inspect.currentframe()))[0], "../../util")))
 if cmd_subfolder not in sys.path:
-	sys.path.insert(0, cmd_subfolder)
+  sys.path.insert(0, cmd_subfolder)
 
 from log import *
 from profiler import *
@@ -29,86 +29,86 @@
 '''
 class NBC(object):
 
-	''' 
-	Create the Naive Bayes Classifier benchmark instance.
+  ''' 
+  Create the Naive Bayes Classifier benchmark instance.
   
   @param dataset - Input dataset to perform NBC on.
   @param path - Path to the mlpack executable.
   @param verbose - Display informational messages.
-	'''
-	def __init__(self, dataset, path=os.environ["WEKA_CLASSPATH"], verbose=True): 
-		self.verbose = verbose
-		self.dataset = dataset
-		self.path = path
-		
-	'''
+  '''
+  def __init__(self, dataset, path=os.environ["WEKA_CLASSPATH"], verbose=True): 
+    self.verbose = verbose
+    self.dataset = dataset
+    self.path = path
+    
+  '''
   Naive Bayes Classifier. If the method has been successfully completed return 
   the elapsed time in seconds.
 
   @param options - Extra options for the method.
   @return - Elapsed time in seconds or -1 if the method was not successful.
   '''
-	def RunMethod(self, options):
-		Log.Info("Perform NBC.", self.verbose)
+  def RunMethod(self, options):
+    Log.Info("Perform NBC.", self.verbose)
+
+    if len(self.dataset) != 2:
+      Log.Fatal("This method requires two datasets.")
+      return -1
+
+    # Split the command using shell-like syntax.
+    cmd = shlex.split("java -classpath " + self.path + ":methods/weka" + 
+      " NBC -t " + self.dataset[0] + " -T " + self.dataset[1] + " " + options)
+
+    # Run command with the nessecary arguments and return its output as a byte
+    # string. We have untrusted input so we disables all shell based features.
+    try:
+      s = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=False)   
+    except Exception:
+      Log.Fatal("Could not execute command: " + str(cmd))
+      return -1
+
+    # Return the elapsed time.
+    timer = self.parseTimer(s)
+    if not timer:
+      Log.Fatal("Can't parse the timer")
+      return -1
+    else:
+      time = self.GetTime(timer)
+      Log.Info(("total time: %fs" % time), self.verbose)
+
+      return time
+
+  '''
+  Parse the timer data form a given string.
 
-		if len(self.dataset) != 2:
-			Log.Fatal("This method requires two datasets.")
-			return -1
-
-		# Split the command using shell-like syntax.
-		cmd = shlex.split("java -classpath " + self.path + ":methods/weka" + 
-			" NBC -t " + self.dataset[0] + " -T " + self.dataset[1] + " " + options)
-
-		# Run command with the nessecary arguments and return its output as a byte
-		# string. We have untrusted input so we disables all shell based features.
-		try:
-			s = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=False)		
-		except Exception:
-			Log.Fatal("Could not execute command: " + str(cmd))
-			return -1
-
-		# Return the elapsed time.
-		timer = self.parseTimer(s)
-		if not timer:
-			Log.Fatal("Can't parse the timer")
-			return -1
-		else:
-			time = self.GetTime(timer)
-			Log.Info(("total time: %fs" % time), self.verbose)
-
-			return time
-
-	'''
-	Parse the timer data form a given string.
-
-	@param data - String to parse timer data from.
-	@return - Namedtuple that contains the timer data.
-	'''
-	def parseTimer(self, data):
-		# Compile the regular expression pattern into a regular expression object to
-		# parse the timer data.
-		pattern = re.compile(r"""
-				.*?total_time: (?P<total_time>.*?)s.*?
-				""", re.VERBOSE|re.MULTILINE|re.DOTALL)
-		
-		match = pattern.match(data)
-		if not match:
-			Log.Fatal("Can't parse the data: wrong format")
-			return -1
-		else:
-			# Create a namedtuple and return the timer data.
-			timer = collections.namedtuple("timer", ["total_time"])
-			
-			if match.group("total_time").count(".") == 1:
-				return timer(float(match.group("total_time")))
-			else:
-				return timer(float(match.group("total_time").replace(",", ".")))
-
-	'''
-	Return the elapsed time in seconds.
-
-	@param timer - Namedtuple that contains the timer data.
-	@return Elapsed time in seconds.
-	'''
-	def GetTime(self, timer):
-		return timer.total_time
+  @param data - String to parse timer data from.
+  @return - Namedtuple that contains the timer data.
+  '''
+  def parseTimer(self, data):
+    # Compile the regular expression pattern into a regular expression object to
+    # parse the timer data.
+    pattern = re.compile(r"""
+        .*?total_time: (?P<total_time>.*?)s.*?
+        """, re.VERBOSE|re.MULTILINE|re.DOTALL)
+    
+    match = pattern.match(data.decode())
+    if not match:
+      Log.Fatal("Can't parse the data: wrong format")
+      return -1
+    else:
+      # Create a namedtuple and return the timer data.
+      timer = collections.namedtuple("timer", ["total_time"])
+      
+      if match.group("total_time").count(".") == 1:
+        return timer(float(match.group("total_time")))
+      else:
+        return timer(float(match.group("total_time").replace(",", ".")))
+
+  '''
+  Return the elapsed time in seconds.
+
+  @param timer - Namedtuple that contains the timer data.
+  @return Elapsed time in seconds.
+  '''
+  def GetTime(self, timer):
+    return timer.total_time

Modified: mlpack/conf/jenkins-conf/benchmark/methods/weka/pca.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/methods/weka/pca.py	(original)
+++ mlpack/conf/jenkins-conf/benchmark/methods/weka/pca.py	Thu Jul 25 16:30:54 2013
@@ -12,9 +12,9 @@
 # Import the util path, this method even works if the path contains symlinks to
 # modules.
 cmd_subfolder = os.path.realpath(os.path.abspath(os.path.join(
-	os.path.split(inspect.getfile(inspect.currentframe()))[0], "../../util")))
+  os.path.split(inspect.getfile(inspect.currentframe()))[0], "../../util")))
 if cmd_subfolder not in sys.path:
-	sys.path.insert(0, cmd_subfolder)
+  sys.path.insert(0, cmd_subfolder)
 
 from log import *
 from profiler import *
@@ -29,86 +29,86 @@
 '''
 class PCA(object):
 
-	''' 
-	Create the Principal Components Analysis benchmark instance.
+  ''' 
+  Create the Principal Components Analysis benchmark instance.
   
   @param dataset - Input dataset to perform PCA on.
   @param path - Path to the mlpack executable.
   @param verbose - Display informational messages.
-	'''
-	def __init__(self, dataset, path=os.environ["WEKA_CLASSPATH"], verbose=True): 
-		self.verbose = verbose
-		self.dataset = dataset
-		self.path = path
-		
-	'''
+  '''
+  def __init__(self, dataset, path=os.environ["WEKA_CLASSPATH"], verbose=True): 
+    self.verbose = verbose
+    self.dataset = dataset
+    self.path = path
+    
+  '''
   Perform Principal Components Analysis. If the method has been successfully 
   completed return the elapsed time in seconds.
 
   @param options - Extra options for the method.
   @return - Elapsed time in seconds or -1 if the method was not successful.
   '''
-	def RunMethod(self, options):
-		Log.Info("Perform PCA.", self.verbose)
+  def RunMethod(self, options):
+    Log.Info("Perform PCA.", self.verbose)
+
+    # Split the command using shell-like syntax.
+    cmd = shlex.split("java -classpath " + self.path + ":methods/weka" + 
+      " PCA -i " + self.dataset + " " + options)
+
+    # Run command with the nessecary arguments and return its output as a byte
+    # string. We have untrusted input so we disables all shell based features.
+    try:
+      s = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=False)   
+    except Exception:
+      Log.Fatal("Could not execute command: " + str(cmd))
+      return -1
+
+    # Return the elapsed time.
+    timer = self.parseTimer(s)
+    if not timer:
+      Log.Fatal("Can't parse the timer")
+      return -1
+    else:
+      time = self.GetTime(timer)
+      Log.Info(("total time: %fs" % time), self.verbose)
+
+      return time 
+
+  '''
+  Parse the timer data form a given string.
 
-		# Split the command using shell-like syntax.
-		cmd = shlex.split("java -classpath " + self.path + ":methods/weka" + 
-			" PCA -i " + self.dataset + " " + options)
-
-		# Run command with the nessecary arguments and return its output as a byte
-		# string. We have untrusted input so we disables all shell based features.
-		try:
-			s = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=False)		
-		except Exception:
-			Log.Fatal("Could not execute command: " + str(cmd))
-			return -1
-
-		# Return the elapsed time.
-		timer = self.parseTimer(s)
-		if not timer:
-			Log.Fatal("Can't parse the timer")
-			return -1
-		else:
-			time = self.GetTime(timer)
-			Log.Info(("total time: %fs" % time), self.verbose)
-
-			return time	
-
-	'''
-	Parse the timer data form a given string.
-
-	@param data - String to parse timer data from.
-	@return - Namedtuple that contains the timer data.
-	'''
-	def parseTimer(self, data):
-		# Compile the regular expression pattern into a regular expression object to
-		# parse the timer data.
-		pattern = re.compile(r"""
-				.*?loading_data: (?P<loading_time>.*?)s.*?
-				.*?total_time: (?P<total_time>.*?)s.*?
-				""", re.VERBOSE|re.MULTILINE|re.DOTALL)
-		
-		match = pattern.match(data)
-		if not match:
-			Log.Fatal("Can't parse the data: wrong format")
-			return -1
-		else:
-			# Create a namedtuple and return the timer data.
-			timer = collections.namedtuple("timer", ["loading_time", "total_time"])
-
-			if match.group("loading_time").count(".") == 1:
-				return timer(float(match.group("loading_time")),
-					float(match.group("total_time")))
-			else:
-				return timer(float(match.group("loading_time").replace(",", ".")),
-						 	float(match.group("total_time").replace(",", ".")))
-
-	'''
-	Return the elapsed time in seconds.
-
-	@param timer - Namedtuple that contains the timer data.
-	@return Elapsed time in seconds.
-	'''
-	def GetTime(self, timer):
-		time = timer.total_time - timer.loading_time
-		return time
+  @param data - String to parse timer data from.
+  @return - Namedtuple that contains the timer data.
+  '''
+  def parseTimer(self, data):
+    # Compile the regular expression pattern into a regular expression object to
+    # parse the timer data.
+    pattern = re.compile(r"""
+        .*?loading_data: (?P<loading_time>.*?)s.*?
+        .*?total_time: (?P<total_time>.*?)s.*?
+        """, re.VERBOSE|re.MULTILINE|re.DOTALL)
+    
+    match = pattern.match(data.decode())
+    if not match:
+      Log.Fatal("Can't parse the data: wrong format")
+      return -1
+    else:
+      # Create a namedtuple and return the timer data.
+      timer = collections.namedtuple("timer", ["loading_time", "total_time"])
+
+      if match.group("loading_time").count(".") == 1:
+        return timer(float(match.group("loading_time")),
+          float(match.group("total_time")))
+      else:
+        return timer(float(match.group("loading_time").replace(",", ".")),
+              float(match.group("total_time").replace(",", ".")))
+
+  '''
+  Return the elapsed time in seconds.
+
+  @param timer - Namedtuple that contains the timer data.
+  @return Elapsed time in seconds.
+  '''
+  def GetTime(self, timer):
+    time = timer.total_time - timer.loading_time
+    return time



More information about the mlpack-svn mailing list