[mlpack-svn] r15558 - mlpack/conf/jenkins-conf/benchmark/methods/mlpy
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Fri Jul 26 09:07:27 EDT 2013
Author: marcus
Date: Fri Jul 26 09:07:26 2013
New Revision: 15558
Log:
Add timeout for the mlpy benchmark scripts.
Modified:
mlpack/conf/jenkins-conf/benchmark/methods/mlpy/allknn.py
mlpack/conf/jenkins-conf/benchmark/methods/mlpy/kernel_pca.py
mlpack/conf/jenkins-conf/benchmark/methods/mlpy/kmeans.py
mlpack/conf/jenkins-conf/benchmark/methods/mlpy/lars.py
mlpack/conf/jenkins-conf/benchmark/methods/mlpy/linear_regression.py
mlpack/conf/jenkins-conf/benchmark/methods/mlpy/pca.py
Modified: mlpack/conf/jenkins-conf/benchmark/methods/mlpy/allknn.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/methods/mlpy/allknn.py (original)
+++ mlpack/conf/jenkins-conf/benchmark/methods/mlpy/allknn.py Fri Jul 26 09:07:26 2013
@@ -31,11 +31,13 @@
Create the All K-Nearest-Neighbors benchmark instance.
@param dataset - Input dataset to perform All K-Nearest-Neighbors on.
+ @param timeout - The time until the timeout. Default no timeout.
@param verbose - Display informational messages.
'''
- def __init__(self, dataset, verbose=True):
+ def __init__(self, dataset, timeout=0, verbose=True):
self.verbose = verbose
self.dataset = dataset
+ self.timeout = timeout
'''
Use the mlpy libary to implement All K-Nearest-Neighbors.
@@ -44,45 +46,54 @@
@return - Elapsed time in seconds or -1 if the method was not successful.
'''
def AllKnnMlpy(self, options):
- totalTimer = Timer()
-
- # Load input dataset.
- # If the dataset contains two files then the second file is the query file
- # In this case we add this to the command line.
- Log.Info("Loading dataset", self.verbose)
- if len(self.dataset) == 2:
- referenceData = np.genfromtxt(self.dataset[0], delimiter=',')
- queryData = np.genfromtxt(self.dataset[1], delimiter=',')
- else:
- referenceData = np.genfromtxt(self.dataset, delimiter=',')
-
- # Labels are the last row of the dataset.
- labels = referenceData[:, (referenceData.shape[1] - 1)]
- referenceData = referenceData[:,:-1]
-
- with totalTimer:
- # Get all the parameters.
- k = re.search("-k (\d+)", options)
- if not k:
- Log.Fatal("Required option: Number of furthest neighbors to find.")
- return -1
- else:
- k = int(k.group(1))
- if (k < 1 or k > referenceData.shape[0]):
- Log.Fatal("Invalid k: " + k.group(1) + "; must be greater than 0 and "
- + "less ")
- return -1
-
- # Perform All K-Nearest-Neighbors.
- model = mlpy.KNN(k)
- model.learn(referenceData, labels)
+ @timeout(self.timeout, os.strerror(errno.ETIMEDOUT))
+ def RunAllKnnMlpy():
+ totalTimer = Timer()
+
+ # Load input dataset.
+ # If the dataset contains two files then the second file is the query file
+ # In this case we add this to the command line.
+ Log.Info("Loading dataset", self.verbose)
if len(self.dataset) == 2:
- out = model.pred(queryData)
+ referenceData = np.genfromtxt(self.dataset[0], delimiter=',')
+ queryData = np.genfromtxt(self.dataset[1], delimiter=',')
else:
- out = model.pred(referenceData)
+ referenceData = np.genfromtxt(self.dataset, delimiter=',')
- return totalTimer.ElapsedTime()
+ # Labels are the last row of the dataset.
+ labels = referenceData[:, (referenceData.shape[1] - 1)]
+ referenceData = referenceData[:,:-1]
+
+ with totalTimer:
+ # Get all the parameters.
+ k = re.search("-k (\d+)", options)
+ if not k:
+ Log.Fatal("Required option: Number of furthest neighbors to find.")
+ return -1
+ else:
+ k = int(k.group(1))
+ if (k < 1 or k > referenceData.shape[0]):
+ Log.Fatal("Invalid k: " + k.group(1) + "; must be greater than 0 and "
+ + "less ")
+ return -1
+
+ # Perform All K-Nearest-Neighbors.
+ model = mlpy.KNN(k)
+ model.learn(referenceData, labels)
+
+ if len(self.dataset) == 2:
+ out = model.pred(queryData)
+ else:
+ out = model.pred(referenceData)
+
+ return totalTimer.ElapsedTime()
+
+ try:
+ return RunAllKnnMlpy()
+ except TimeoutError as e:
+ Log.Warn("Script timed out after " + str(self.timeout) + " seconds")
+ return -2
'''
Perform All K-Nearest-Neighbors. If the method has been successfully completed
Modified: mlpack/conf/jenkins-conf/benchmark/methods/mlpy/kernel_pca.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/methods/mlpy/kernel_pca.py (original)
+++ mlpack/conf/jenkins-conf/benchmark/methods/mlpy/kernel_pca.py Fri Jul 26 09:07:26 2013
@@ -31,11 +31,13 @@
Create the Kernel Principal Components Analysis benchmark instance.
@param dataset - Input dataset to perform KPCA on.
+ @param timeout - The time until the timeout. Default no timeout.
@param verbose - Display informational messages.
'''
- def __init__(self, dataset, verbose=True):
+ def __init__(self, dataset, timeout=0, verbose=True):
self.verbose = verbose
self.dataset = dataset
+ self.timeout = timeout
'''
Use the mlpy libary to implement Kernel Principal Components Analysis.
@@ -44,52 +46,61 @@
@return - Elapsed time in seconds or -1 if the method was not successful.
'''
def KPCAMlpy(self, options):
- totalTimer = Timer()
- # Load input dataset.
- Log.Info("Loading dataset", self.verbose)
- data = np.genfromtxt(self.dataset, delimiter=',')
-
- with totalTimer:
- # Get the new dimensionality, if it is necessary.
- dimension = re.search('-d (\d+)', options)
- if not dimension:
- d = data.shape[0]
- else:
- d = int(dimension.group(1))
- if (d > data.shape[1]):
- Log.Fatal("New dimensionality (" + str(d) + ") cannot be greater "
- + "than existing dimensionality (" + str(data.shape[1]) + ")!")
- return -1
-
- # Get the kernel type and make sure it is valid.
- kernel = re.search("-k ([^\s]+)", options)
- if not kernel:
- Log.Fatal("Choose kernel type, valid choices are 'polynomial', " +
- "'gaussian', 'linear' and 'hyptan'.")
+ @timeout(self.timeout, os.strerror(errno.ETIMEDOUT))
+ def RunKPCAMlpy():
+ totalTimer = Timer()
+
+ # Load input dataset.
+ Log.Info("Loading dataset", self.verbose)
+ data = np.genfromtxt(self.dataset, delimiter=',')
+
+ with totalTimer:
+ # Get the new dimensionality, if it is necessary.
+ dimension = re.search('-d (\d+)', options)
+ if not dimension:
+ d = data.shape[0]
+ else:
+ d = int(dimension.group(1))
+ if (d > data.shape[1]):
+ Log.Fatal("New dimensionality (" + str(d) + ") cannot be greater "
+ + "than existing dimensionality (" + str(data.shape[1]) + ")!")
+ return -1
+
+ # Get the kernel type and make sure it is valid.
+ kernel = re.search("-k ([^\s]+)", options)
+ if not kernel:
+ Log.Fatal("Choose kernel type, valid choices are 'polynomial', " +
+ "'gaussian', 'linear' and 'hyptan'.")
+ return -1
+ elif kernel.group(1) == "polynomial":
+ degree = re.search('-D (\d+)', options)
+ degree = 1 if not degree else int(degree.group(1))
+
+ kernel = mlpy.kernel_polynomial(data, data, d=degree)
+ elif kernel.group(1) == "gaussian":
+ kernel = mlpy.kernel_gaussian(data, data, sigma=2)
+ elif kernel.group(1) == "linear":
+ kernel = mlpy.kernel_linear(data, data)
+ elif kernel.group(1) == "hyptan":
+ kernel = mlpy.kernel_sigmoid(data, data)
+ else:
+ Log.Fatal("Invalid kernel type (" + kernel.group(1) + "); valid " +
+ "choices are 'polynomial', 'gaussian', 'linear' and 'hyptan'.")
return -1
- elif kernel.group(1) == "polynomial":
- degree = re.search('-D (\d+)', options)
- degree = 1 if not degree else int(degree.group(1))
-
- kernel = mlpy.kernel_polynomial(data, data, d=degree)
- elif kernel.group(1) == "gaussian":
- kernel = mlpy.kernel_gaussian(data, data, sigma=2)
- elif kernel.group(1) == "linear":
- kernel = mlpy.kernel_linear(data, data)
- elif kernel.group(1) == "hyptan":
- kernel = mlpy.kernel_sigmoid(data, data)
- else:
- Log.Fatal("Invalid kernel type (" + kernel.group(1) + "); valid " +
- "choices are 'polynomial', 'gaussian', 'linear' and 'hyptan'.")
- return -1
-
- # Perform Kernel Principal Components Analysis.
- model = mlpy.KPCA()
- model.learn(kernel)
- out = model.transform(kernel, k=d)
- return totalTimer.ElapsedTime()
+ # Perform Kernel Principal Components Analysis.
+ model = mlpy.KPCA()
+ model.learn(kernel)
+ out = model.transform(kernel, k=d)
+
+ return totalTimer.ElapsedTime()
+
+ try:
+ return RunKPCAMlpy()
+ except TimeoutError as e:
+ Log.Warn("Script timed out after " + str(self.timeout) + " seconds")
+ return -2
'''
Perform Kernel Principal Components Analysis. If the method has been
Modified: mlpack/conf/jenkins-conf/benchmark/methods/mlpy/kmeans.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/methods/mlpy/kmeans.py (original)
+++ mlpack/conf/jenkins-conf/benchmark/methods/mlpy/kmeans.py Fri Jul 26 09:07:26 2013
@@ -31,11 +31,13 @@
Create the K-Means Clustering benchmark instance.
@param dataset - Input dataset to perform K-Means on.
+ @param timeout - The time until the timeout. Default no timeout.
@param verbose - Display informational messages.
'''
- def __init__(self, dataset, verbose=True):
+ def __init__(self, dataset, timeout=0, verbose=True):
self.verbose = verbose
self.dataset = dataset
+ self.timeout = timeout
'''
Use the mlpy libary to implement K-Means Clustering.
@@ -44,33 +46,42 @@
@return - Elapsed time in seconds or -1 if the method was not successful.
'''
def KMeansMlpy(self, options):
- totalTimer = Timer()
- # Load input dataset.
- Log.Info("Loading dataset", self.verbose)
- data = np.genfromtxt(self.dataset, delimiter=',')
-
- # Gather all parameters.
- clusters = re.search('-c (\d+)', options)
- seed = re.search("-s (\d+)", options)
-
- # Now do validation of options.
- if not clusters:
- Log.Fatal("Required option: Number of clusters or cluster locations.")
- return -1
- elif int(clusters.group(1)) < 1:
- Log.Fatal("Invalid number of clusters requested! Must be greater than or "
- + "equal to 1.")
- return -1
-
- with totalTimer:
- # Create the KMeans object and perform K-Means clustering.
- if seed:
- kmeans = mlpy.kmeans(data, int(clusters.group(1)), seed=int(seed.group(1)))
- else:
- kmeans = mlpy.kmeans(data, int(clusters.group(1)))
-
- return totalTimer.ElapsedTime()
+ @timeout(self.timeout, os.strerror(errno.ETIMEDOUT))
+ def RunKMeansMlpy():
+ totalTimer = Timer()
+
+ # Load input dataset.
+ Log.Info("Loading dataset", self.verbose)
+ data = np.genfromtxt(self.dataset, delimiter=',')
+
+ # Gather all parameters.
+ clusters = re.search('-c (\d+)', options)
+ seed = re.search("-s (\d+)", options)
+
+ # Now do validation of options.
+ if not clusters:
+ Log.Fatal("Required option: Number of clusters or cluster locations.")
+ return -1
+ elif int(clusters.group(1)) < 1:
+ Log.Fatal("Invalid number of clusters requested! Must be greater than or "
+ + "equal to 1.")
+ return -1
+
+ with totalTimer:
+ # Create the KMeans object and perform K-Means clustering.
+ if seed:
+ kmeans = mlpy.kmeans(data, int(clusters.group(1)), seed=int(seed.group(1)))
+ else:
+ kmeans = mlpy.kmeans(data, int(clusters.group(1)))
+
+ return totalTimer.ElapsedTime()
+
+ try:
+ return RunKMeansMlpy()
+ except TimeoutError as e:
+ Log.Warn("Script timed out after " + str(self.timeout) + " seconds")
+ return -2
'''
Perform K-Means Clustering. If the method has been successfully completed
Modified: mlpack/conf/jenkins-conf/benchmark/methods/mlpy/lars.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/methods/mlpy/lars.py (original)
+++ mlpack/conf/jenkins-conf/benchmark/methods/mlpy/lars.py Fri Jul 26 09:07:26 2013
@@ -31,11 +31,13 @@
Create the Least Angle Regression benchmark instance.
@param dataset - Input dataset to perform Least Angle Regression on.
+ @param timeout - The time until the timeout. Default no timeout.
@param verbose - Display informational messages.
'''
- def __init__(self, dataset, verbose=True):
+ def __init__(self, dataset, timeout=0, verbose=True):
self.verbose = verbose
self.dataset = dataset
+ self.timeout = timeout
'''
Use the mlpy libary to implement Least Angle Regression.
@@ -44,20 +46,29 @@
@return - Elapsed time in seconds or -1 if the method was not successful.
'''
def LARSMlpy(self, options):
- totalTimer = Timer()
- # Load input dataset.
- Log.Info("Loading dataset", self.verbose)
- inputData = np.genfromtxt(self.dataset[0], delimiter=',')
- responsesData = np.genfromtxt(self.dataset[1], delimiter=',')
-
- with totalTimer:
- # Perform LARS.
- model = mlpy.LARS()
- model.learn(inputData, responsesData)
- out = model.beta()
-
- return totalTimer.ElapsedTime()
+ @timeout(self.timeout, os.strerror(errno.ETIMEDOUT))
+ def RunLARSMlpy():
+ totalTimer = Timer()
+
+ # Load input dataset.
+ Log.Info("Loading dataset", self.verbose)
+ inputData = np.genfromtxt(self.dataset[0], delimiter=',')
+ responsesData = np.genfromtxt(self.dataset[1], delimiter=',')
+
+ with totalTimer:
+ # Perform LARS.
+ model = mlpy.LARS()
+ model.learn(inputData, responsesData)
+ out = model.beta()
+
+ return totalTimer.ElapsedTime()
+
+ try:
+ return RunLARSMlpy()
+ except TimeoutError as e:
+ Log.Warn("Script timed out after " + str(self.timeout) + " seconds")
+ return -2
'''
Perform Least Angle Regression. If the method has been successfully completed
Modified: mlpack/conf/jenkins-conf/benchmark/methods/mlpy/linear_regression.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/methods/mlpy/linear_regression.py (original)
+++ mlpack/conf/jenkins-conf/benchmark/methods/mlpy/linear_regression.py Fri Jul 26 09:07:26 2013
@@ -31,11 +31,13 @@
Create the Linear Regression benchmark instance.
@param dataset - Input dataset to perform Linear Regression on.
+ @param timeout - The time until the timeout. Default no timeout.
@param verbose - Display informational messages.
'''
- def __init__(self, dataset, verbose=True):
+ def __init__(self, dataset, timeout=0, verbose=True):
self.verbose = verbose
self.dataset = dataset
+ self.timeout = timeout
'''
Use the mlpy libary to implement Linear Regression.
@@ -44,27 +46,36 @@
@return - Elapsed time in seconds or -1 if the method was not successful.
'''
def LinearRegressionMlpy(self, options):
- totalTimer = Timer()
- # Load input dataset.
- # If the dataset contains two files then the second file is the responses
- # file. In this case we add this to the command line.
- Log.Info("Loading dataset", self.verbose)
- if len(self.dataset) == 2:
- X = np.genfromtxt(self.dataset[0], delimiter=',')
- y = np.genfromtxt(self.dataset[1], delimiter=',')
- else:
- X = np.genfromtxt(self.dataset, delimiter=',')
- y = X[:, (X.shape[1] - 1)]
- X = X[:,:-1]
-
- with totalTimer:
- # Perform linear regression.
- model = mlpy.OLS()
- model.learn(X, y)
- b = model.beta()
-
- return totalTimer.ElapsedTime()
+ @timeout(self.timeout, os.strerror(errno.ETIMEDOUT))
+ def RunLinearRegressionMlpy():
+ totalTimer = Timer()
+
+ # Load input dataset.
+ # If the dataset contains two files then the second file is the responses
+ # file. In this case we add this to the command line.
+ Log.Info("Loading dataset", self.verbose)
+ if len(self.dataset) == 2:
+ X = np.genfromtxt(self.dataset[0], delimiter=',')
+ y = np.genfromtxt(self.dataset[1], delimiter=',')
+ else:
+ X = np.genfromtxt(self.dataset, delimiter=',')
+ y = X[:, (X.shape[1] - 1)]
+ X = X[:,:-1]
+
+ with totalTimer:
+ # Perform linear regression.
+ model = mlpy.OLS()
+ model.learn(X, y)
+ b = model.beta()
+
+ return totalTimer.ElapsedTime()
+
+ try:
+ return RunLinearRegressionMlpy()
+ except TimeoutError as e:
+ Log.Warn("Script timed out after " + str(self.timeout) + " seconds")
+ return -2
'''
Perform Linear Regression. If the method has been successfully completed
Modified: mlpack/conf/jenkins-conf/benchmark/methods/mlpy/pca.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/methods/mlpy/pca.py (original)
+++ mlpack/conf/jenkins-conf/benchmark/methods/mlpy/pca.py Fri Jul 26 09:07:26 2013
@@ -31,11 +31,13 @@
Create the Principal Components Analysis benchmark instance.
@param dataset - Input dataset to perform PCA on.
+ @param timeout - The time until the timeout. Default no timeout.
@param verbose - Display informational messages.
'''
- def __init__(self, dataset, verbose=True):
+ def __init__(self, dataset, timeout=0, verbose=True):
self.verbose = verbose
self.dataset = dataset
+ self.timeout = timeout
'''
Use the mlpy libary to implement Principal Components Analysis.
@@ -44,34 +46,43 @@
@return - Elapsed time in seconds or -1 if the method was not successful.
'''
def PCAMlpy(self, options):
- totalTimer = Timer()
- # Load input dataset.
- Log.Info("Loading dataset", self.verbose)
- data = np.genfromtxt(self.dataset, delimiter=',')
-
- with totalTimer:
- # Find out what dimension we want.
- match = re.search('-d (\d+)', options)
-
- if not match:
- k = data.shape[1]
- else:
- k = int(match.group(1))
- if (k > data.shape[1]):
- Log.Fatal("New dimensionality (" + str(k) + ") cannot be greater "
- + "than existing dimensionality (" + str(data.shape[1]) + ")!")
- return -1
-
- # Get the options for running PCA.
- s = True if options.find("-s") > -1 else False
-
- # Perform PCA.
- prep = mlpy.PCA(whiten = s)
- prep.learn(data)
- out = prep.transform(data, k)
-
- return totalTimer.ElapsedTime()
+ @timeout(self.timeout, os.strerror(errno.ETIMEDOUT))
+ def RunPCAMlpy():
+ totalTimer = Timer()
+
+ # Load input dataset.
+ Log.Info("Loading dataset", self.verbose)
+ data = np.genfromtxt(self.dataset, delimiter=',')
+
+ with totalTimer:
+ # Find out what dimension we want.
+ match = re.search('-d (\d+)', options)
+
+ if not match:
+ k = data.shape[1]
+ else:
+ k = int(match.group(1))
+ if (k > data.shape[1]):
+ Log.Fatal("New dimensionality (" + str(k) + ") cannot be greater "
+ + "than existing dimensionality (" + str(data.shape[1]) + ")!")
+ return -1
+
+ # Get the options for running PCA.
+ s = True if options.find("-s") > -1 else False
+
+ # Perform PCA.
+ prep = mlpy.PCA(whiten = s)
+ prep.learn(data)
+ out = prep.transform(data, k)
+
+ return totalTimer.ElapsedTime()
+
+ try:
+ return RunPCAMlpy()
+ except TimeoutError as e:
+ Log.Warn("Script timed out after " + str(self.timeout) + " seconds")
+ return -2
'''
Perform Principal Components Analysis. If the method has been successfully
More information about the mlpack-svn
mailing list