[mlpack-svn] r15559 - mlpack/conf/jenkins-conf/benchmark/methods/shogun
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Fri Jul 26 09:10:20 EDT 2013
Author: marcus
Date: Fri Jul 26 09:10:20 2013
New Revision: 15559
Log:
Add timeout for the shogun benchmark scripts.
Modified:
mlpack/conf/jenkins-conf/benchmark/methods/shogun/allknn.py
mlpack/conf/jenkins-conf/benchmark/methods/shogun/gmm.py
mlpack/conf/jenkins-conf/benchmark/methods/shogun/kernel_pca.py
mlpack/conf/jenkins-conf/benchmark/methods/shogun/kmeans.py
mlpack/conf/jenkins-conf/benchmark/methods/shogun/lars.py
mlpack/conf/jenkins-conf/benchmark/methods/shogun/linear_regression.py
mlpack/conf/jenkins-conf/benchmark/methods/shogun/nbc.py
mlpack/conf/jenkins-conf/benchmark/methods/shogun/pca.py
Modified: mlpack/conf/jenkins-conf/benchmark/methods/shogun/allknn.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/methods/shogun/allknn.py (original)
+++ mlpack/conf/jenkins-conf/benchmark/methods/shogun/allknn.py Fri Jul 26 09:10:20 2013
@@ -33,11 +33,13 @@
Create the All K-Nearest-Neighbors benchmark instance.
@param dataset - Input dataset to perform All K-Nearest-Neighbors on.
+ @param timeout - The time until the timeout. Default no timeout.
@param verbose - Display informational messages.
'''
- def __init__(self, dataset, verbose=True):
+ def __init__(self, dataset, timeout=0, verbose=True):
self.verbose = verbose
self.dataset = dataset
+ self.timeout = timeout
'''
Use the shogun libary to implement All K-Nearest-Neighbors.
@@ -46,49 +48,58 @@
@return - Elapsed time in seconds or -1 if the method was not successful.
'''
def AllKnnShogun(self, options):
- totalTimer = Timer()
-
- # Load input dataset.
- # If the dataset contains two files then the second file is the query file.
- # In this case we add this to the command line.
- Log.Info("Loading dataset", self.verbose)
- if len(self.dataset) == 2:
- referenceData = np.genfromtxt(self.dataset[0], delimiter=',')
- queryData = np.genfromtxt(self.dataset[1], delimiter=',')
- queryFeat = RealFeatures(queryFeat.T)
- else:
- referenceData = np.genfromtxt(self.dataset, delimiter=',')
-
- # Labels are the last row of the dataset.
- labels = MulticlassLabels(referenceData[:, (referenceData.shape[1] - 1)])
- referenceData = referenceData[:,:-1]
-
- with totalTimer:
- # Get all the parameters.
- k = re.search("-k (\d+)", options)
- if not k:
- Log.Fatal("Required option: Number of furthest neighbors to find.")
- return -1
- else:
- k = int(k.group(1))
- if (k < 1 or k > referenceData.shape[0]):
- Log.Fatal("Invalid k: " + k.group(1) + "; must be greater than 0 and "
- + "less ")
- return -1
-
- referenceFeat = RealFeatures(referenceData.T)
- distance = EuclideanDistance(referenceFeat, referenceFeat)
-
- # Perform All K-Nearest-Neighbors.
- model = SKNN(k, distance, labels)
- model.train()
+ @timeout(self.timeout, os.strerror(errno.ETIMEDOUT))
+ def RunAllKnnShogun():
+ totalTimer = Timer()
+
+ # Load input dataset.
+ # If the dataset contains two files then the second file is the query file.
+ # In this case we add this to the command line.
+ Log.Info("Loading dataset", self.verbose)
if len(self.dataset) == 2:
- out = model.apply(queryFeat).get_labels()
+ referenceData = np.genfromtxt(self.dataset[0], delimiter=',')
+ queryData = np.genfromtxt(self.dataset[1], delimiter=',')
+ queryFeat = RealFeatures(queryFeat.T)
else:
- out = model.apply(referenceFeat).get_labels()
+ referenceData = np.genfromtxt(self.dataset, delimiter=',')
- return totalTimer.ElapsedTime()
+ # Labels are the last row of the dataset.
+ labels = MulticlassLabels(referenceData[:, (referenceData.shape[1] - 1)])
+ referenceData = referenceData[:,:-1]
+
+ with totalTimer:
+ # Get all the parameters.
+ k = re.search("-k (\d+)", options)
+ if not k:
+ Log.Fatal("Required option: Number of furthest neighbors to find.")
+ return -1
+ else:
+ k = int(k.group(1))
+ if (k < 1 or k > referenceData.shape[0]):
+ Log.Fatal("Invalid k: " + k.group(1) + "; must be greater than 0 and "
+ + "less ")
+ return -1
+
+ referenceFeat = RealFeatures(referenceData.T)
+ distance = EuclideanDistance(referenceFeat, referenceFeat)
+
+ # Perform All K-Nearest-Neighbors.
+ model = SKNN(k, distance, labels)
+ model.train()
+
+ if len(self.dataset) == 2:
+ out = model.apply(queryFeat).get_labels()
+ else:
+ out = model.apply(referenceFeat).get_labels()
+
+ return totalTimer.ElapsedTime()
+
+ try:
+ return RunAllKnnShogun()
+ except TimeoutError as e:
+ Log.Warn("Script timed out after " + str(self.timeout) + " seconds")
+ return -2
'''
Perform All K-Nearest-Neighbors. If the method has been successfully
Modified: mlpack/conf/jenkins-conf/benchmark/methods/shogun/gmm.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/methods/shogun/gmm.py (original)
+++ mlpack/conf/jenkins-conf/benchmark/methods/shogun/gmm.py Fri Jul 26 09:10:20 2013
@@ -32,11 +32,13 @@
Create the Gaussian Mixture Model benchmark instance.
@param dataset - Input dataset to perform Gaussian Mixture Model on.
+ @param timeout - The time until the timeout. Default no timeout.
@param verbose - Display informational messages.
'''
- def __init__(self, dataset, verbose=True):
+ def __init__(self, dataset, timeout=0, verbose=True):
self.verbose = verbose
self.dataset = dataset
+ self.timeout = timeout
'''
Use the shogun libary to implement Gaussian Mixture Model.
@@ -45,27 +47,36 @@
@return - Elapsed time in seconds or -1 if the method was not successful.
'''
def GMMShogun(self, options):
- totalTimer = Timer()
- # Load input dataset.
- dataPoints = np.genfromtxt(self.dataset, delimiter=',')
- dataFeat = RealFeatures(dataPoints.T)
-
- # Get all the parameters.
- g = re.search("-g (\d+)", options)
- n = re.search("-n (\d+)", options)
- s = re.search("-n (\d+)", options)
-
- g = 1 if not g else int(g.group(1))
- n = 250 if not n else int(n.group(1))
-
- # Create the Gaussian Mixture Model.
- model = Clustering.GMM(g)
- model.set_features(dataFeat)
- with totalTimer:
- model.train_em(1e-9, n, 1e-9)
-
- return totalTimer.ElapsedTime()
+ @timeout(self.timeout, os.strerror(errno.ETIMEDOUT))
+ def RunGMMShogun():
+ totalTimer = Timer()
+
+ # Load input dataset.
+ dataPoints = np.genfromtxt(self.dataset, delimiter=',')
+ dataFeat = RealFeatures(dataPoints.T)
+
+ # Get all the parameters.
+ g = re.search("-g (\d+)", options)
+ n = re.search("-n (\d+)", options)
+ s = re.search("-n (\d+)", options)
+
+ g = 1 if not g else int(g.group(1))
+ n = 250 if not n else int(n.group(1))
+
+ # Create the Gaussian Mixture Model.
+ model = Clustering.GMM(g)
+ model.set_features(dataFeat)
+ with totalTimer:
+ model.train_em(1e-9, n, 1e-9)
+
+ return totalTimer.ElapsedTime()
+
+ try:
+ return RunGMMShogun()
+ except TimeoutError as e:
+ Log.Warn("Script timed out after " + str(self.timeout) + " seconds")
+ return -2
'''
Perform Gaussian Mixture Model. If the method has been successfully
Modified: mlpack/conf/jenkins-conf/benchmark/methods/shogun/kernel_pca.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/methods/shogun/kernel_pca.py (original)
+++ mlpack/conf/jenkins-conf/benchmark/methods/shogun/kernel_pca.py Fri Jul 26 09:10:20 2013
@@ -33,11 +33,13 @@
Create the Kernel Principal Components Analysis benchmark instance.
@param dataset - Input dataset to perform KPCA on.
+ @param timeout - The time until the timeout. Default no timeout.
@param verbose - Display informational messages.
'''
- def __init__(self, dataset, verbose=True):
+ def __init__(self, dataset, timeout=0, verbose=True):
self.verbose = verbose
self.dataset = dataset
+ self.timeout = timeout
'''
Use the shogun libary to implement Kernel Principal Components Analysis.
@@ -46,54 +48,63 @@
@return - Elapsed time in seconds or -1 if the method was not successful.
'''
def KPCAShogun(self, options):
- totalTimer = Timer()
- # Load input dataset.
- Log.Info("Loading dataset", self.verbose)
- data = np.genfromtxt(self.dataset, delimiter=',')
- dataFeat = RealFeatures(data.T)
-
- with totalTimer:
- # Get the new dimensionality, if it is necessary.
- dimension = re.search('-d (\d+)', options)
- if not dimension:
- d = data.shape[1]
- else:
- d = int(dimension.group(1))
- if (d > data.shape[1]):
- Log.Fatal("New dimensionality (" + str(d) + ") cannot be greater "
- + "than existing dimensionality (" + str(data.shape[1]) + ")!")
- return -1
-
- # Get the kernel type and make sure it is valid.
- kernel = re.search("-k ([^\s]+)", options)
- if not kernel:
- Log.Fatal("Choose kernel type, valid choices are 'linear', 'hyptan'" +
- ", 'polynomial' and 'gaussian'.")
+ @timeout(self.timeout, os.strerror(errno.ETIMEDOUT))
+ def RunKPCAShogun():
+ totalTimer = Timer()
+
+ # Load input dataset.
+ Log.Info("Loading dataset", self.verbose)
+ data = np.genfromtxt(self.dataset, delimiter=',')
+ dataFeat = RealFeatures(data.T)
+
+ with totalTimer:
+ # Get the new dimensionality, if it is necessary.
+ dimension = re.search('-d (\d+)', options)
+ if not dimension:
+ d = data.shape[1]
+ else:
+ d = int(dimension.group(1))
+ if (d > data.shape[1]):
+ Log.Fatal("New dimensionality (" + str(d) + ") cannot be greater "
+ + "than existing dimensionality (" + str(data.shape[1]) + ")!")
+ return -1
+
+ # Get the kernel type and make sure it is valid.
+ kernel = re.search("-k ([^\s]+)", options)
+ if not kernel:
+ Log.Fatal("Choose kernel type, valid choices are 'linear', 'hyptan'" +
+ ", 'polynomial' and 'gaussian'.")
+ return -1
+ elif kernel.group(1) == "polynomial":
+ degree = re.search('-D (\d+)', options)
+ degree = 1 if not degree else int(degree.group(1))
+
+ kernel = PolyKernel(dataFeat, dataFeat, degree, True)
+ elif kernel.group(1) == "gaussian":
+ kernel = GaussianKernel(dataFeat, dataFeat, 2.0)
+ elif kernel.group(1) == "linear":
+ kernel = LinearKernel(dataFeat, dataFeat)
+ elif kernel.group(1) == "hyptan":
+ kernel = SigmoidKernel(dataFeat, dataFeat, 2, 1.0, 1.0)
+ else:
+ Log.Fatal("Invalid kernel type (" + kernel.group(1) + "); valid choices"
+ + " are 'linear', 'hyptan', 'polynomial' and 'gaussian'.")
return -1
- elif kernel.group(1) == "polynomial":
- degree = re.search('-D (\d+)', options)
- degree = 1 if not degree else int(degree.group(1))
-
- kernel = PolyKernel(dataFeat, dataFeat, degree, True)
- elif kernel.group(1) == "gaussian":
- kernel = GaussianKernel(dataFeat, dataFeat, 2.0)
- elif kernel.group(1) == "linear":
- kernel = LinearKernel(dataFeat, dataFeat)
- elif kernel.group(1) == "hyptan":
- kernel = SigmoidKernel(dataFeat, dataFeat, 2, 1.0, 1.0)
- else:
- Log.Fatal("Invalid kernel type (" + kernel.group(1) + "); valid choices"
- + " are 'linear', 'hyptan', 'polynomial' and 'gaussian'.")
- return -1
-
- # Perform Kernel Principal Components Analysis.
- model = KernelPCA(kernel)
- model.set_target_dim(d)
- model.init(dataFeat)
- model.apply_to_feature_matrix(dataFeat)
- return totalTimer.ElapsedTime()
+ # Perform Kernel Principal Components Analysis.
+ model = KernelPCA(kernel)
+ model.set_target_dim(d)
+ model.init(dataFeat)
+ model.apply_to_feature_matrix(dataFeat)
+
+ return totalTimer.ElapsedTime()
+
+ try:
+ return RunKPCAShogun()
+ except TimeoutError as e:
+ Log.Warn("Script timed out after " + str(self.timeout) + " seconds")
+ return -2
'''
Perform Kernel Principal Components Analysis. If the method has been
Modified: mlpack/conf/jenkins-conf/benchmark/methods/shogun/kmeans.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/methods/shogun/kmeans.py (original)
+++ mlpack/conf/jenkins-conf/benchmark/methods/shogun/kmeans.py Fri Jul 26 09:10:20 2013
@@ -33,11 +33,13 @@
Create the K-Means Clustering benchmark instance.
@param dataset - Input dataset to perform K-Means Clustering on.
+ @param timeout - The time until the timeout. Default no timeout.
@param verbose - Display informational messages.
'''
- def __init__(self, dataset, verbose=True):
+ def __init__(self, dataset, timeout=0, verbose=True):
self.verbose = verbose
self.dataset = dataset
+ self.timeout = timeout
'''
Use the shogun libary to implement K-Means Clustering.
@@ -46,7 +48,6 @@
@return - Elapsed time in seconds or -1 if the method was not successful.
'''
def KMeansShogun(self, options):
- totalTimer = Timer()
# Gather parameters.
clusters = re.search("-c (\d+)", options)
@@ -76,7 +77,11 @@
+ " " + self.dataset[1] + " " + clusters.group(1) + " "
+ str(maxIterations))
try:
- s = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=False)
+ s = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=False,
+ timeout=self.timeout)
+ except subprocess.TimeoutExpired as e:
+ Log.Warn(str(e))
+ return -2
except Exception as e:
Log.Fatal("Could not execute command: " + str(cmd))
return -1
@@ -93,30 +98,41 @@
return time
else:
- import numpy as np
- from shogun.Distance import EuclideanDistance
- from shogun.Features import RealFeatures
- from shogun import Clustering
- from shogun.Mathematics import Math_init_random
-
- if seed:
- Math_init_random(seed.group(1))
-
- data = np.genfromtxt(self.dataset, delimiter=',')
-
- dataFeat = RealFeatures(data.T)
- distance = EuclideanDistance(dataFeat, dataFeat)
-
- # Create the K-Means object and perform K-Means clustering.
- with totalTimer:
- model = Clustering.KMeans(int(clusters.group(1)), distance)
- model.set_max_iter(maxIterations)
- model.train()
- labels = model.apply().get_labels()
- centers = model.get_cluster_centers()
+ @timeout(self.timeout, os.strerror(errno.ETIMEDOUT))
+ def RunKMeansShogun():
+ import numpy as np
+ from shogun.Distance import EuclideanDistance
+ from shogun.Features import RealFeatures
+ from shogun import Clustering
+ from shogun.Mathematics import Math_init_random
+
+ totalTimer = Timer()
+
+ if seed:
+ Math_init_random(seed.group(1))
+
+ data = np.genfromtxt(self.dataset, delimiter=',')
+
+ dataFeat = RealFeatures(data.T)
+ distance = EuclideanDistance(dataFeat, dataFeat)
+
+ # Create the K-Means object and perform K-Means clustering.
+ with totalTimer:
+ model = Clustering.KMeans(int(clusters.group(1)), distance)
+ model.set_max_iter(maxIterations)
+ model.train()
- return totalTimer.ElapsedTime()
+ labels = model.apply().get_labels()
+ centers = model.get_cluster_centers()
+
+ return totalTimer.ElapsedTime()
+
+ try:
+ return RunKMeansShogun()
+ except TimeoutError as e:
+ Log.Warn("Script timed out after " + str(self.timeout) + " seconds")
+ return -2
'''
Perform K-Means Clustering. If the method has been successfully
Modified: mlpack/conf/jenkins-conf/benchmark/methods/shogun/lars.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/methods/shogun/lars.py (original)
+++ mlpack/conf/jenkins-conf/benchmark/methods/shogun/lars.py Fri Jul 26 09:10:20 2013
@@ -32,11 +32,13 @@
Create the All Least Angle Regression benchmark instance.
@param dataset - Input dataset to perform Least Angle Regression on.
+ @param timeout - The time until the timeout. Default no timeout.
@param verbose - Display informational messages.
'''
- def __init__(self, dataset, verbose=True):
+ def __init__(self, dataset, timeout=0, verbose=True):
self.verbose = verbose
self.dataset = dataset
+ self.timeout = timeout
'''
Use the shogun libary to implement Least Angle Regression.
@@ -45,28 +47,37 @@
@return - Elapsed time in seconds or -1 if the method was not successful.
'''
def LARSShogun(self, options):
- totalTimer = Timer()
- # Load input dataset.
- Log.Info("Loading dataset", self.verbose)
- inputData = np.genfromtxt(self.dataset[0], delimiter=',')
- responsesData = np.genfromtxt(self.dataset[1], delimiter=',')
- inputFeat = RealFeatures(inputData.T)
- responsesFeat = RegressionLabels(responsesData)
-
- # Get all the parameters.
- lambda1 = re.search("-l (\d+)", options)
- lambda1 = 0.0 if not lambda1 else int(lambda1.group(1))
-
- with totalTimer:
- # Perform LARS.
- model = LeastAngleRegression(False)
- model.set_max_l1_norm(lambda1)
- model.set_labels(responsesFeat)
- model.train(inputFeat)
- model.get_w(model.get_path_size() - 1)
-
- return totalTimer.ElapsedTime()
+ @timeout(self.timeout, os.strerror(errno.ETIMEDOUT))
+ def RunLARSShogun():
+ totalTimer = Timer()
+
+ # Load input dataset.
+ Log.Info("Loading dataset", self.verbose)
+ inputData = np.genfromtxt(self.dataset[0], delimiter=',')
+ responsesData = np.genfromtxt(self.dataset[1], delimiter=',')
+ inputFeat = RealFeatures(inputData.T)
+ responsesFeat = RegressionLabels(responsesData)
+
+ # Get all the parameters.
+ lambda1 = re.search("-l (\d+)", options)
+ lambda1 = 0.0 if not lambda1 else int(lambda1.group(1))
+
+ with totalTimer:
+ # Perform LARS.
+ model = LeastAngleRegression(False)
+ model.set_max_l1_norm(lambda1)
+ model.set_labels(responsesFeat)
+ model.train(inputFeat)
+ model.get_w(model.get_path_size() - 1)
+
+ return totalTimer.ElapsedTime()
+
+ try:
+ return RunLARSShogun()
+ except TimeoutError as e:
+ Log.Warn("Script timed out after " + str(self.timeout) + " seconds")
+ return -2
'''
Perform Least Angle Regression. If the method has been successfully
Modified: mlpack/conf/jenkins-conf/benchmark/methods/shogun/linear_regression.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/methods/shogun/linear_regression.py (original)
+++ mlpack/conf/jenkins-conf/benchmark/methods/shogun/linear_regression.py Fri Jul 26 09:10:20 2013
@@ -32,11 +32,13 @@
Create the Linear Regression benchmark instance.
@param dataset - Input dataset to perform Linear Regression on.
+ @param timeout - The time until the timeout. Default no timeout.
@param verbose - Display informational messages.
'''
- def __init__(self, dataset, verbose=True):
+ def __init__(self, dataset, timeout=0, verbose=True):
self.verbose = verbose
self.dataset = dataset
+ self.timeout = timeout
'''
Use the shogun libary to implement Linear Regression.
@@ -45,27 +47,36 @@
@return - Elapsed time in seconds or -1 if the method was not successful.
'''
def LinearRegressionShogun(self, options):
- totalTimer = Timer()
- # Load input dataset.
- # If the dataset contains two files then the second file is the responses
- # file. In this case we add this to the command line.
- Log.Info("Loading dataset", self.verbose)
- if len(self.dataset) == 2:
- X = np.genfromtxt(self.dataset[0], delimiter=',')
- y = np.genfromtxt(self.dataset[1], delimiter=',')
- else:
- X = np.genfromtxt(self.dataset, delimiter=',')
- y = X[:, (X.shape[1] - 1)]
- X = X[:,:-1]
-
- with totalTimer:
- # Perform linear regression.
- model = LeastSquaresRegression(RealFeatures(X.T), RegressionLabels(y))
- model.train()
- b = model.get_w()
-
- return totalTimer.ElapsedTime()
+ @timeout(self.timeout, os.strerror(errno.ETIMEDOUT))
+ def RunLinearRegressionShogun():
+ totalTimer = Timer()
+
+ # Load input dataset.
+ # If the dataset contains two files then the second file is the responses
+ # file. In this case we add this to the command line.
+ Log.Info("Loading dataset", self.verbose)
+ if len(self.dataset) == 2:
+ X = np.genfromtxt(self.dataset[0], delimiter=',')
+ y = np.genfromtxt(self.dataset[1], delimiter=',')
+ else:
+ X = np.genfromtxt(self.dataset, delimiter=',')
+ y = X[:, (X.shape[1] - 1)]
+ X = X[:,:-1]
+
+ with totalTimer:
+ # Perform linear regression.
+ model = LeastSquaresRegression(RealFeatures(X.T), RegressionLabels(y))
+ model.train()
+ b = model.get_w()
+
+ return totalTimer.ElapsedTime()
+
+ try:
+ return RunLinearRegressionShogun()
+ except TimeoutError as e:
+ Log.Warn("Script timed out after " + str(self.timeout) + " seconds")
+ return -2
'''
Perform Linear Regression. If the method has been successfully
Modified: mlpack/conf/jenkins-conf/benchmark/methods/shogun/nbc.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/methods/shogun/nbc.py (original)
+++ mlpack/conf/jenkins-conf/benchmark/methods/shogun/nbc.py Fri Jul 26 09:10:20 2013
@@ -32,11 +32,13 @@
Create the Naive Bayes Classifier benchmark instance.
@param dataset - Input dataset to perform NBC on.
+ @param timeout - The time until the timeout. Default no timeout.
@param verbose - Display informational messages.
'''
- def __init__(self, dataset, verbose=True):
+ def __init__(self, dataset, timeout=0, verbose=True):
self.verbose = verbose
self.dataset = dataset
+ self.timeout = timeout
'''
Use the shogun libary to implement Naive Bayes Classifier.
@@ -45,28 +47,37 @@
@return - Elapsed time in seconds or -1 if the method was not successful.
'''
def NBCShogun(self, options):
- totalTimer = Timer()
-
- Log.Info("Loading dataset", self.verbose)
- # Load train and test dataset.
- trainData = np.genfromtxt(self.dataset[0], delimiter=',')
- testData = np.genfromtxt(self.dataset[1], delimiter=',')
-
- # Labels are the last row of the training set.
- labels = MulticlassLabels(trainData[:, (trainData.shape[1] - 1)])
-
- with totalTimer:
- # Transform into features.
- trainFeat = RealFeatures(trainData[:,:-1].T)
- testFeat = RealFeatures(testData.T)
-
- # Create and train the classifier.
- nbc = GaussianNaiveBayes(trainFeat, labels)
- nbc.train()
- # Run Naive Bayes Classifier on the test dataset.
- nbc.apply(testFeat).get_labels()
- return totalTimer.ElapsedTime()
+ @timeout(self.timeout, os.strerror(errno.ETIMEDOUT))
+ def RunNBCShogun():
+ totalTimer = Timer()
+
+ Log.Info("Loading dataset", self.verbose)
+ # Load train and test dataset.
+ trainData = np.genfromtxt(self.dataset[0], delimiter=',')
+ testData = np.genfromtxt(self.dataset[1], delimiter=',')
+
+ # Labels are the last row of the training set.
+ labels = MulticlassLabels(trainData[:, (trainData.shape[1] - 1)])
+
+ with totalTimer:
+ # Transform into features.
+ trainFeat = RealFeatures(trainData[:,:-1].T)
+ testFeat = RealFeatures(testData.T)
+
+ # Create and train the classifier.
+ nbc = GaussianNaiveBayes(trainFeat, labels)
+ nbc.train()
+ # Run Naive Bayes Classifier on the test dataset.
+ nbc.apply(testFeat).get_labels()
+
+ return totalTimer.ElapsedTime()
+
+ try:
+ return RunNBCShogun()
+ except TimeoutError as e:
+ Log.Warn("Script timed out after " + str(self.timeout) + " seconds")
+ return -2
'''
Perform Naive Bayes Classifier. If the method has been successfully
Modified: mlpack/conf/jenkins-conf/benchmark/methods/shogun/pca.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/methods/shogun/pca.py (original)
+++ mlpack/conf/jenkins-conf/benchmark/methods/shogun/pca.py Fri Jul 26 09:10:20 2013
@@ -32,11 +32,13 @@
Create the Principal Components Analysis benchmark instance.
@param dataset - Input dataset to perform PCA on.
+ @param timeout - The time until the timeout. Default no timeout.
@param verbose - Display informational messages.
'''
- def __init__(self, dataset, verbose=True):
+ def __init__(self, dataset, timeout=0, verbose=True):
self.verbose = verbose
self.dataset = dataset
+ self.timeout = timeout
# Load input dataset.
Log.Info("Loading dataset", verbose)
@@ -49,35 +51,44 @@
@return - Elapsed time in seconds or -1 if the method was not successful.
'''
def PCAShogun(self, options):
- totalTimer = Timer()
-
- # Load input dataset.
- Log.Info("Loading dataset", self.verbose)
- feat = RealFeatures(self.data.T)
-
- with totalTimer:
- # Find out what dimension we want.
- match = re.search('-d (\d+)', options)
-
- if not match:
- k = self.data.shape[1]
- else:
- k = int(match.group(1))
- if (k > self.data.shape[1]):
- Log.Fatal("New dimensionality (" + str(k) + ") cannot be greater than"
- + "existing dimensionality (" + str(self.data.shape[1]) + ")!")
- return -1
-
- # Get the options for running PCA.
- s = True if options.find("-s") > -1 else False
-
- # Perform PCA.
- prep = ShogunPCA(s)
- prep.set_target_dim(k)
- prep.init(feat)
- prep.apply_to_feature_matrix(feat)
- return totalTimer.ElapsedTime()
+ @timeout(self.timeout, os.strerror(errno.ETIMEDOUT))
+ def RunPCAShogun():
+ totalTimer = Timer()
+
+ # Load input dataset.
+ Log.Info("Loading dataset", self.verbose)
+ feat = RealFeatures(self.data.T)
+
+ with totalTimer:
+ # Find out what dimension we want.
+ match = re.search('-d (\d+)', options)
+
+ if not match:
+ k = self.data.shape[1]
+ else:
+ k = int(match.group(1))
+ if (k > self.data.shape[1]):
+ Log.Fatal("New dimensionality (" + str(k) + ") cannot be greater than"
+ + "existing dimensionality (" + str(self.data.shape[1]) + ")!")
+ return -1
+
+ # Get the options for running PCA.
+ s = True if options.find("-s") > -1 else False
+
+ # Perform PCA.
+ prep = ShogunPCA(s)
+ prep.set_target_dim(k)
+ prep.init(feat)
+ prep.apply_to_feature_matrix(feat)
+
+ return totalTimer.ElapsedTime()
+
+ try:
+ return RunPCAShogun()
+ except TimeoutError as e:
+ Log.Warn("Script timed out after " + str(self.timeout) + " seconds")
+ return -2
'''
Perform Principal Components Analysis. If the method has been successfully
More information about the mlpack-svn
mailing list