#!/usr/bin/env python
# -*- coding: utf-8 -*-
__author__="rochuan"
fromsklearn.feature_extractionimportDictVectorizer
importcsv
fromsklearnimportpreprocessing
fromsklearnimporttree
fromsklearn.externals.siximportStringIO
defimportCsv():
csvfile =file('csv_test.csv','wb')
writer = csv.writer(csvfile)
writer.writerow(['編號(hào)','色澤','根蒂','敲聲','紋理','臍部','觸感','好瓜'])
data = [
('1','青綠','蜷縮','濁響','清晰','凹陷','硬滑','是'),
('2','烏黑','蜷縮','沉悶','清晰','凹陷','硬滑','是'),
('3','烏黑','蜷縮','濁響','清晰','凹陷','硬滑','是'),
('4','青綠','蜷縮','沉悶','清晰','凹陷','硬滑','是'),
('5','淺白','蜷縮','濁響','清晰','凹陷','硬滑','是'),
('6','青綠','稍蜷','濁響','清晰','稍凹','軟粘','是'),
('7','烏黑','稍蜷','濁響','稍糊','稍凹','軟粘','是'),
('8','烏黑','稍蜷','濁響','清晰','稍凹','硬滑','是'),
('9','烏黑','稍蜷','沉悶','稍糊','稍凹','硬滑','否'),
('10','青綠','硬挺','清脆','清晰','平坦','軟粘','否'),
('11','淺白','硬挺','清脆','模糊','平坦','硬滑','否'),
('12','淺白','蜷縮','濁響','模糊','平坦','軟粘','否'),
('13','青綠','稍蜷','濁響','稍糊','凹陷','硬滑','否'),
('14','淺白','稍蜷','沉悶','稍糊','凹陷','硬滑','否'),
('15','烏黑','稍蜷','濁響','清晰','稍凹','軟粘','否'),
('16','淺白','蜷縮','濁響','模糊','平坦','硬滑','否'),
('17','青綠','蜷縮','沉悶','稍糊','稍凹','硬滑','否')
]
writer.writerows(data)
csvfile.close()
defmain():
csvfile =file('csv_test.csv','rb')
reader = csv.reader(csvfile)
headers = reader.next();
featureList = []
lebelList = []
# 對(duì)象轉(zhuǎn)換稱kv字典
forrowinreader:
lebelList.append(row[len(row) -1])
rowDist = {}
foriinrange(1,len(row) -1):
rowDist[headers[i]] = row[i]
featureList.append(rowDist)
printfeatureList;
vec = DictVectorizer()
dummyX = vec.fit_transform(featureList).toarray()
print"dummyX:", dummyX
lb = preprocessing.LabelBinarizer()
dummyY = lb.fit_transform(lebelList)
print"dummyY:", dummyY
clf = tree.DecisionTreeClassifier(criterion="entropy")# 創(chuàng)建一個(gè)分類器,entropy決定了用ID3算法
clf = clf.fit(dummyX, dummyY)
print"clf:"+str(clf)
withopen("doctione-tree.dot","w")asf:
f= tree.export_graphviz(clf,feature_names=vec.get_feature_names(),out_file=f)
newRow = dummyX[0, :]
newRow[0] =1;
newRow[2] =0;
print"newRow:"+str(newRow)
csvfile.close()
main()