機(jī)器學(xué)習(xí)技法作業(yè)二,Experiment with Bagging Ridge Regression.
13~14題為linear LSSVM算法,代碼如下,與RBF kernel的LSSVM不同的只是kernel的計(jì)算部分:
import numpy as np
from math import *
import matplotlib.pyplot as plt
def loaddata(file):
f = open(file)
try:
lines = f.readlines()
finally:
f.close()
example_num = len(lines)
dimension = len(lines[0].strip().split())
features = np.zeros((example_num, dimension))
labels = np.zeros((example_num, 1))
features[:,0] = 1
for index, line in enumerate(lines):
item = lines[index].strip().split()
features[index,1:] = [float(feature) for feature in item[0:-1]]
labels[index] = float(item[-1])
return features, labels
def LSSVM(X, Y, gama, lamb):
N = len(Y)
Lamb = np.eye(N) * lamb ##lambda矩陣
K = np.zeros((N,N)) ##初始化K矩陣 N×N
for i in range(N): ##計(jì)算K矩陣
for j in range(N):
#K[i,j] = exp(-gama * np.dot((X[i] - X[j]).T, (X[i] - X[j]))) ##RBF
K[i,j] = np.dot(X[i].T, X[j]) ##linear
beta = np.dot(np.linalg.inv(Lamb + K), Y)
return beta
def predict(Xmodel, Xtest, Ytest, beta, gama):
p = np.zeros((len(Ytest),1))
py = np.zeros((len(Ytest),1))
temp = np.zeros((len(Xmodel),1))
for i in range(len(Ytest)): #i--第i個(gè)測試數(shù)據(jù)
for j in range(len(Xmodel)): #j--第j個(gè)訓(xùn)練數(shù)據(jù)
temp[j] = np.dot(Xtest[i].T, Xmodel[j]) #linear
#temp[j] = exp(-gama * np.dot((Xtest[i] - Xmodel[j]).T, (Xtest[i] - Xmodel[j]))) #temp--1~N訓(xùn)練數(shù)據(jù)與第i個(gè)數(shù)據(jù)求Kernel
p[i] = np.dot(temp.T, beta)
py[i] = [1 if np.dot(temp.T, beta)>=0 else -1] #sigma(1~N):beta(N)*Kenel(xn,x)
accuracy = sum(py == Ytest)/len(Ytest)
return p,accuracy
X, Y = loaddata('hw2_lssvm_all.dat.txt')
gama = 0.125
lamb = 0.01
beta1 = LSSVM(X[:400,:], Y[:400], gama, lamb)
lamb = 0.1
beta2 = LSSVM(X[:400,:], Y[:400], gama, lamb)
lamb = 1
beta3 = LSSVM(X[:400,:], Y[:400], gama, lamb)
lamb = 10
beta4 = LSSVM(X[:400,:], Y[:400], gama, lamb)
lamb = 100
beta5 = LSSVM(X[:400,:], Y[:400], gama, lamb)
p,acc = predict(X[:400,:], X[401:,:], Y[401:], beta3, gama)
##acc = array([0.63636364])
15~16題是LSSVM與Bootstrapping結(jié)合,代碼如下:
import numpy as np
from math import *
import matplotlib.pyplot as plt
def loaddata(file):
f = open(file)
try:
lines = f.readlines()
finally:
f.close()
example_num = len(lines)
dimension = len(lines[0].strip().split()) #features添加了x0 = 1
features = np.zeros((example_num, dimension))
labels = np.zeros((example_num, 1))
features[:,0] = 1 #初始化features的x0 = 1
for index, line in enumerate(lines):
item = lines[index].strip().split()
features[index,1:] = [float(feature) for feature in item[0:-1]]
labels[index] = float(item[-1])
return features, labels
class Bagging(object):
def __init__(self, interation, X, Y): ##X, Y為訓(xùn)練數(shù)據(jù),為了引入訓(xùn)練數(shù)據(jù)維度
self.__inter = interation
self.__beta = np.zeros((interation, len(Y), 1)) ##保存gt的參數(shù)beta
self.__Xbar = np.zeros((interation, len(Y), X.shape[1])) ##保存bootstrap采樣得到的訓(xùn)練數(shù)據(jù)
self.__Ybar = np.zeros((interation, len(Y), 1))
def Bootstrap(self, X, Y): ##進(jìn)行bootstrap采樣,共進(jìn)行self.__inter次
N = len(Y)
for inter in range(self.__inter):
for i in range(N): ##進(jìn)行N次放回式隨機(jī)采樣
row = np.random.randint(N)
self.__Xbar[inter,i,:] = X[row,:]
self.__Ybar[inter,i,:] = Y[row,:]
def train(self, lamb): ##進(jìn)行訓(xùn)練
N = self.__Ybar.shape[1]
Lamb = np.eye(N) * lamb ##lambda矩陣
for inter in range(self.__inter): ##訓(xùn)練interation次
K = np.zeros((N,N)) ##初始化K矩陣 N×N
for i in range(N): ##計(jì)算K矩陣
for j in range(N):
#K[i,j] = exp(-gama * np.dot((X[i] - X[j]).T, (X[i] - X[j]))) ##RBF
K[i,j] = np.dot(self.__Xbar[inter,i], self.__Xbar[inter,j].T) ##linear
self.__beta[inter] = np.dot(np.linalg.inv(Lamb + K), self.__Ybar[inter]) ##根據(jù)公式計(jì)算訓(xùn)練參數(shù)beta
def predict(self, Xtest, Ytest): ##進(jìn)行預(yù)測,Ein,Eout均可
py = np.zeros((len(Ytest), 1)) ##初始化預(yù)測結(jié)果數(shù)組
for inter in range(self.__inter):
Xmodel = self.__Xbar[inter] ##讀入第inter次bootstrap采樣數(shù)據(jù)
temp = np.zeros((len(Xmodel),1))
for i in range(len(Ytest)): #i--第i個(gè)測試數(shù)據(jù)
for j in range(len(Xmodel)): #j--第j個(gè)訓(xùn)練數(shù)據(jù)
temp[j] = np.dot(Xtest[i], Xmodel[j].T) #linear
#temp[j] = exp(-gama * np.dot((Xtest[i] - Xmodel[j]).T, (Xtest[i] - Xmodel[j]))) #temp--1~N訓(xùn)練數(shù)據(jù)與第i個(gè)數(shù)據(jù)求Kernel
py[i,:] = py[i,:] + [1 if np.dot(temp.T, self.__beta[inter])>=0 else -1] ##將interation個(gè)hypotheses預(yù)測的結(jié)果累加,針對第i個(gè)test數(shù)據(jù)
for i in range(len(py)): ##voting,累加interation個(gè)假設(shè)函數(shù)的預(yù)測值,大于0說明預(yù)測為+1的函數(shù)較多,小于0相反
if py[i, :] >= 0:
py[i, :] = 1
else:
py[i, :] = -1
accuracy = sum(py == Ytest)/len(Ytest) ##計(jì)算預(yù)測精度
return py, accuracy
def get_Xbar(self):
return self.__Xbar
def get_Ybar(self):
return self.__Ybar
def get_beta(self):
return self.__beta
X, Y = loaddata('hw2_lssvm_all.dat.txt')
Bag = Bagging(250, X[:400,:], Y[:400]) ##初始化Bootstrap對象
Bag.Bootstrap(X[:400,:], Y[:400]) ##進(jìn)行Bootstrap采樣
Bag.train(0.01) ##進(jìn)行訓(xùn)練
py_in, acc_in = Bag.predict(X[:400,:], Y[:400]) ##預(yù)測Ein
py_out,acc_out = Bag.predict(X[400:,:], Y[400:]) ##預(yù)測Eout
##acc_in = array([0.68])
##acc_out = array([0.63636364])
##發(fā)現(xiàn)lambda對結(jié)果的影響較小