Documente Academic
Documente Profesional
Documente Cultură
1. Implementation As I couldnt achieve a working krr-class, I wasnt able to try out the cross-validation function. In any case, I had added here a capture of the results a get when running both tests (kkr-test and cv-test). The biggest problem was to nd out how to predict with kkr.
2. roc-curve For my roc-curve, I have decide to choose 6 points to make 6 dierent classication and moreover, I get the analytical from the distributions parameters and also made a classication with this point. Finally I got the following roc-curves:
Figure 5: Roc-curves for n=10000 As we can see, in all cases, the analytical solution achieve a good classication. On the other hand, the empirical values give us sometimes a really good classication (i.e. 2.6875 for n=10000) or a really bad one (i.e. -1.3909 for n=10000).
Page 4 of 10
Write the functions cv zero one loss krr Write your implementations in the given functions stubs!
(c) Daniel Bartz, TU Berlin, 2013 import numpy as np import scipy.linalg as la import itertools as it import time import pylab as pl from mpl_toolkits.mplot3d import Axes3D import sys def zero_one_loss(y_true, y_pred): Compute the zerooneloss Param: y true: Classes classication y pred: Predicted classication Return: zerooneloss ypred = y_pred.copy() number = ypred.shape[0] ypred[ypred>0]=1 ypred[ypred<0]=1 return (1./number)*np.sum(ypred!=y_true) def cv(X, y, method, params, loss_function=zero_one_loss, nfolds=10, nrepetitions =5): Compute crossvalidation Params: X: (d x n) data set y: (1 x n) data labels method: Class with methods Fit and Predict to perfom cv params: Parameters for the t function of method loss funtion: Function to calculate the error of the predicted classication nfolds: Number of folds to devide the dataset nrepetition: Number of repetition to perfom on each nfolds Return: method with the cvloss value and the optimal values for params found during cv e = 0. d,x = X.shape Xcopy = X.copy() Ycopy = y.copy()
Page 5 of 10
56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
101 102 103 104 105 106 107 108 109 110 111 112 113
indexkernel= indexkernelparam = indexregularization = 1 kernel = kernelparams = reglarization = [] # Get params for training if kernel in params: indexkernel = params.index(kernel)+1 kernel = params[indexkernel] if kernelparam in params: indexkernelparam = params.index(kernelparam)+1 kernelparams = params[indexkernelparam] if regularization in params: indexregularization = params.index(regularization)+1 regularization = params[indexregularization] parameters = list(it.product(kernel,kernelparams,regularization)) enew = e = float(sys.maxint) timenew = time.time() kaux = 0 for k in parameters: for i in range(nrepetitions): folds = np.arange(x) np.random.shuffle(folds) idx = np.array_split(folds,nfolds) for j in range(nfolds): Trainset = np.delete(Xcopy,idx[j],1) Trainlabel = np.delete(Ycopy,idx[j]) Testset = X[:,idx[j]] if len(k) == 1: method = method.fit(Trainset,Trainlabel,kernel=k[0]) elif len(k) == 2: method = method.fit(Trainset,Trainlabel,kernel=k[0], kernelparameter=k[1]) elif len(k) == 3: method = method.fit(Trainset,Trainlabel,kernel=k[0], kernelparameter=k[1],regularization=k[2]) else: method = method.fit(Trainset,Trainlabel) method = method.predict(Testset) enew += loss_function(y[0,idx[j]],method.ypred) enew = enew*(1./(nfolds*nrepetitions)) if enew < e: e = enew optimalkernel = method.kernel optimalparameter = method.kernelparameter optimalregularization = method.regularization optimalalpha = method.alpha optimalypred = method.ypred timenew += time.time() print Tested %d casus out of %d. Running time: %f, Expected remain time: %f %(kaux+1, len(parameters),timenew,(len(parameters)*timenew*1.)/( kaux+1)timenew) kaux+=1 method.cvloss = e method.kernel = optimalkernel method.parameter = optimalparameter method.regularization = optimalregularization method.alpha = optimalalpha method.ypred = optimalypred return method
class krr():
Page 6 of 10
114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174
Class to perfom Kernel Ridge Regretion def __init__(self, kernel=linear, kernelparameter=1, regularization=0): self.kernel = kernel self.kernelparameter = kernelparameter self.regularization = regularization self.alpha = None self.cvloss = None def oneout(self,K,Y): regparams = np.arange(0.,0.1,0.004) err = float(sys.maxint) regparam = 0 n = K.shape[0] L,U = np.linalg.eig(K) aux1 = np.dot(U,L) for c in regparams: aux2 = 1/(L+c*np.eye(n)) S = np.dot(np.dot(aux1,aux2),U.T) SY = np.dot(S,Y.T) num = Y SY den = 1 np.dot(S,np.ones((n,1))) sume = ((num*1.)/den)**2 errnew = np.sum(sume)/(n*1.) if errnew < err: err = errnew regparam = c return regparam def gaussiankernel(self,X): d,n = X.shape res = np.zeros((n,n),dtype=np.float64) for i in range(n): x = X[:,i][:,np.newaxis] aux = xX aux = aux**2 aux = np.sum(aux,axis=0) const = 2*d*(self.kernelparameter*self.kernelparameter) res[i] += np.exp((1.*aux)/const) return res def fit(self, X, y, kernel=False, kernelparameter=False, regularization=False) : Train kkr Param: X: (d x n) Train dataset y: (1 x n) Real class classication of data in X kernel: Kernel to use to perform kkr kernelparameters: Parameter to use with Kernel regularization: Regularization parameter Return: self with all the attributes needed to perform a classication Y = y.copy() if kernel is not False: self.kernel = kernel if kernelparameter is not False: self.kernelparameter = kernelparameter if regularization is not False: self.regularization = regularization if self.kernel == linear: K = np.dot(X.T,X) elif self.kernel == polynomial:
Page 7 of 10
175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195
K = np.dot(X.T,X)+1. K = K**kernelparameter elif self.kernel == gaussian: K = self.gaussiankernel(X) else: raise Exception(Wrong kernel found) d,n = X.shape if regularization == 0: self.regularization = self.oneout(K,Y) alpha = K + regularization*np.eye(n) self.alpha = np.dot(np.linalg.inv(alpha),Y.T) self.x = X.copy() self.weight = np.dot(X,self.alpha) return self def predict(self, X): if self.alpha is None: raise Exception(Fitting not made) else: self.ypred = np.dot(X.T,self.weight).T return self
ps3 application.py
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
Write the functions roc curve krr app Write your code in the given functions stubs!
(c) Daniel Bartz, TU Berlin, 2013 import numpy as np import pylab as pl import random # import matplotlib as pl # from mpl toolkits.mplot3d import Axes3D # from matplotlib.lines import Line2D from scipy.stats import norm import os import sys import pickle
import ps3_implementation as imp imp = reload(imp) def predict(data,threshold): res = np.zeros(data.shape) for i in range(data.shape[0]): if data[i] <= threshold: res[i] = 1 else:
Page 8 of 10
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
res[i] = 1 return res def roc_curve(n): Plot the analytical and empirical roccurves Params: n: Number of samples numpos = n/2 numneg = nnumpos sigma = 1.0 muneg = 0.0 mupos = 2.0 datapos = np.random.normal(mupos,sigma,numpos) dataneg = np.random.normal(muneg,sigma,numneg) labelpos = np.ones(numpos) labelneg = np.ones(numneg)*1 data = np.concatenate((datapos,dataneg)) labels = np.concatenate((labelpos,labelneg)) colors = [b,g,r,c,m,y] indexes = np.random.randint(0,len(data),size=6) tries = data[indexes] legends = [i for i in tries] legends.append(1.0 Analytical) copylabel = labels.copy() #empirical solution for i in range(len(tries)): predictions = predict(data,tries[i]) indexs = np.argsort(predictions)[::1] copylabel = copylabel[indexs] tp, fp = 0.0,0.0 fp = 0.0 aux = predictions.copy() aux[aux==1] = 0 pnum = np.sum(aux) aux = predictions.copy() aux[aux==1] = 0 nnum = np.sum(aux)*1 xy = [] for j in range(n): xy.append([fp/nnum,tp/pnum]) if 1 == copylabel[j]: tp += 1 elif 1 == copylabel[j]: fp += 1 pl.plot([x[0] for x in xy], [y[1] for y in xy],colors[i]) #analytical solution predictions = predict(data, 1.0) indexs = np.argsort(predictions)[::1] copylabel = copylabel[indexs] tp, fp = 0.0,0.0 fp = 0.0 aux = predictions.copy() aux[aux==1] = 0 pnum = np.sum(aux) aux = predictions.copy() aux[aux==1] = 0 nnum = np.sum(aux)*1 xy = [] for j in range(n):
Page 9 of 10
99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
xy.append([fp/nnum,tp/pnum]) if 1 == copylabel[j]: tp += 1 elif 1 == copylabel[j]: fp += 1 pl.plot([x[0] for x in xy], [y[1] for y in xy],k) pl.ylim((0,1)) pl.xlim((0,1)) pl.xticks(pl.arange(0,1.1,0.1)) pl.yticks(pl.arange(0,1.1,0.1)) pl.grid(True) pl.xlabel(False positive rate) pl.ylabel(True positive rate) pl.legend(legends,fancybox=True, shadow=True, loc=4) pl.title(ROC curve) pl.show()
Page 10 of 10