|
利用python实现决策时程序,最后输出整个决策过程的pdf文件
- # -*- coding: utf-8 -*-
- """
- Created on Wed Nov 22 13:13:07 2017
- @author: suncaixin
- """
- #collect number
- from sklearn.feature_extraction import DictVectorizer
- import numpy as np
- import pandas as pd
- import csv
- from sklearn import tree
- from sklearn import preprocessing
- allElectrionicsData=open(r'数据地址','rt')
- reader=csv.reader(allElectrionicsData)
- headers=next(reader)
- print(headers)
- feature_list=[]
- label_list=[]
- for row in reader:
- label_list.append(row[len(row)-1])
- rowDict={}
- for i in range(1,len(row)-1):
- print(row[i])
- rowDict[headers[i]]=row[i]
- print('rowDict:',rowDict)
- feature_list.append(rowDict)
- print(feature_list)
- #tranform feature
- vec=DictVectorizer()
- dunmyX=vec.fit_transform(feature_list).toarray()
- print('dunmyX:',str(dunmyX))
- print(vec.get_feature_names())
- #class label transform
- lb=preprocessing.LabelBinarizer()
- dunmyY=lb.fit_transform(label_list)
- print('dunmyY:',str(dunmyY))
- #decision tree
- clf=tree.DecisionTreeClassifier(criterion='entropy')
- clf=clf.fit(dunmyX,dunmyY)
- print('clf',str(clf))
- #visulize model
- with open('allElectronicInformationGain.dot','w') as f:
- f=tree.export_graphviz(clf,feature_names=vec.get_feature_names(),out_file=f)
- #output pdf:dot -Tpdf C:\Users\suncaixin\allElectronicInformationGain.dot -o outpu.pdf
- #predict
- oneRowx=dunmyX[0,:]
- newRowx=oneRowx
- newRowx[0]=1
- newRowx[2]=0
- predictedY=clf.predict([newRowx])
- print('predicted:',str(predictedY))
复制代码
|
|