Supervised machine learning


Supervised machine learning

loc <- "http://archive.ics.uci.edu/ml/machine-learning-databases/" ds <- "breast-cancer-wisconsin/breast-cancer-wisconsin.data" url <- paste(loc, ds, sep="") breast <- read.table(url, sep=",", header=FALSE, na.strings="?") names(breast) <- c("ID", "clumpThickness", "sizeUniformity",? ? ? ? ? ? ? ? ? ? "shapeUniformity", "maginalAdhesion","singleEpithelialCellSize", "bareNuclei",? ? ? ? ? ? ? ? ? ? "blandChromatin", "normalNucleoli", "mitosis", "class") df <- breast[-1] df$class <- factor(df$class, levels=c(2,4),? ? ? ? ? ? ? ? ? ? labels=c("benign", "malignant")) set.seed(1234) train <- sample(nrow(df), 0.7*nrow(df)) df.train <- df[train,] df.validate <- df[-train,] table(df.train$class) table(df.validate$class)

1. logistic regression

fit.logit <- glm(class ~.,data = df.train,family? = binomial())summary(fit.logit)

#logit.fit.reduced <- step(fit.logit)prob <- predict(fit.logit,df.validate,type = "response")logit.pred <- factor(prob > .5,levels = c(FALSE,TRUE),labels = c("benign","maglignant"))logit.perf <- table(df.validate$class, logit.pred,? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? dnn=c("Actual", "Predicted"))logit.perf

2.1 decision treee

library(rpart)set.seed(1234)head(df.train)dtree <- rpart(class ~.,data = df.train,method = "class",parms = list(split = "information"))dtreedtree$cptableplotcp(dtree)dtree.pruned <- prune(dtree,cp=.0125)library(rpart.plot)prp(dtree.pruned,type = 2,extra= 104,fallen.leaves = T,main = "Decision Tree")dtree.pred <- predict(dtree.pruned, df.validate, type="class")dtree.perf <- table(df.validate$class,dtree.pred,dnn = c("actual","predicted"))dtree.perf#print(dtree);summary(dtree)

2.2. conditional inference tree

library(party)ctree <- ctree(class ~.,data = df.train)plot(ctree,main = "conditional inference tree")ctree.pred <- predict(ctree,df.validate,type = "response")ctree.predctree.perf <- table(df.validate$class,ctree.pred,dnn=c("actual","predicted"))prop.table(ctree.perf)

table(subset(df.train,df.train$normalNucleoli >3 & df.train$sizeUniformity <= 3 & df.train$bareNuclei <= 5,class))

3. random forest

library(randomForest)fit.forest <- randomForest(class ~.,data = df.train,na.action = na.roughfix,importance = T)fit.forest

importance(fit.forest,type = 2)forest.pred <- predict(fit.forest,df.validate)forest.perf <- table(df.validate$class,forest.pred,dnn = c("actual","predicted"))forest.perfprop.table(forest.perf)

4. Support vector machine

library(e1071)fit.svm <- svm(class~.,data = df.train)fit.svmsvm.pred <- predict(fit.svm, na.omit(df.validate))svm.perf <- table(na.omit(df.validate)$class,? ? ? ? ? ? ? ? ? svm.pred, dnn=c("Actual", "Predicted"))svm.perf

two para gamma,cost (>0)

tuned <- tune.svm(class ~.,data = df.train,gamma = 10^(-6:1),cost = 10^(-10:10))

tuned

fit.svmtuned <- svm(class~.,data = df.train,gamma = 0.01,cost =1)

rattle

loc <- "http://archive.ics.uci.edu/ml/machine-learning-databases/" ds <- "pima-indians-diabetes/pima-indians-diabetes.data" url <- paste(loc, ds, sep="") diabetes <- read.table(url, sep=",", header=FALSE) names(diabetes) <- c("npregant", "plasma", "bp", "triceps",? ? ? ? ? ? ? ? ? ? ? "insulin", "bmi", "pedigree", "age", "class") diabetes$class <- factor(diabetes$class, levels=c(0,1),? ? ? ? ? ? ? ? ? ? ? ? ? labels=c("normal", "diabetic"))

all these data from R action? edition 2

最后編輯于
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請聯(lián)系作者
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時(shí)請結(jié)合常識與多方信息審慎甄別。
平臺聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點(diǎn),簡書系信息發(fā)布平臺,僅提供信息存儲服務(wù)。

友情鏈接更多精彩內(nèi)容