# w2----------------
# Q2.1---------
rm(list = ls())
N <- 1000
x <- rexp(N,rate=0.5)
hist(x,freq = F)
xx <- seq(0,15,by=0.1)#沒(méi)記住怎么加的
lines(xx,dexp(xx,rate = 0.5),col='red')#這個(gè)叫theorical
lines(density(x),col='blue')#沒(méi)記住怎么加的,這個(gè)叫estimate,要分開(kāi)
?density()
lines(density(x,kernel = "rectangular"),col='orange')
# 怎么直接畫(huà)KDE
plot(density(x), lwd=3, col=2)
rug(x)
?rug#將數(shù)據(jù)的地毯表示(一維圖)添加到圖中。
density(x)$bw
ud <- density(x,bw=3.789586-3)#under-smoothing
ov <- density(x,bw=3.789586+3)#over-smoothing
plot(ud)
lines(ov)
# Q2.2---------
eps <- 0.95
N <- 100
u <- seq(0,1,length=N)
x <- numeric(N) # 讓showing的是generated sample,所以是x不是y
for(i in 1:N){#這個(gè)就是這樣定義的
? if(u[i]>eps){
? ? x[i] <- rt(1,df=3)
? }else{
? ? x[i] <- rnorm(1)
? }
}
hist(x)
# Q2.3---------
n <- 1000
set.seed(4060)
# (a)
x1 <- rnorm(n,mean=2,sd=1)
f1 <- density(x1)
hist(x1,freq = F)
xx1 <- seq(-1,7,by=0.1)
lines(xx1,dnorm(xx1,mean = 2,sd=1),col='red')
lines(f1,col='blue')
f1 <- density(x1,adjust =1.3)
lines(f1,col='orange')
mse <- mean((dt(density(x2)$x,df=3)-f2)^2)
lines(density(x2,adjust = 1.5),col='orange')
# (b)
x2 <- rt(n,df=3)
f2 <- density(x2)$y
hist(x2,freq = F,ylim = c(0,0.6))#這里要控制y的范圍!?。?/p>
xx2 <- seq(-15,15,by=0.1)
lines(xx2,dt(xx2,df=3),col='red')
lines(density(x2),col='blue')
hist(x2,freq = F,ylim = c(0,0.6),breaks = 21)
xx2 <- seq(-15,15,by=0.1)
lines(xx2,dt(xx2,df=3),col='red')
lines(density(x2),col='blue')
# (b)
x3 <- rexp(n,rate = 2)
f3 <- density(x3)$y
hist(x3,freq = F,ylim = c(0,1.5))
lines(density(x3),col='blue')
xx3 <- seq(0,3.5,length=1000)
lines(xx3,dexp(xx3,rate=3),col='red')
# Q2.4---------
library(KernSmooth)
dat <-? faithful
# (a)
bw = apply(faithful, 2, sd)/2
fx <- bkde2D(dat,bandwidth = bw)
plot(dat,xlim=c(1,6),ylim=c(35,105))
contour(fx$x1, fx$x2, fx$fhat,add = T)#沒(méi)記住
persp(fx$x1, fx$x2, fx$fhat, shade=.15, theta=-30, phi=15)
# 總結(jié)
# h越小,bw越小,adjust越小,KDE越squiggly,越高&narrow
# h越da,bw越大,adjust越大,KDE越smooth,越矮
# (b)
lp.fit = locpoly(faithful$eruptions, faithful$waiting, bandwidth=1)#沒(méi)記住
# 只問(wèn)local polynomial就這么寫(xiě),bw越小越高,這個(gè)不是默認(rèn)值,自己找一個(gè)
plot(faithful, pch=20)
lines(lp.fit, lwd=3, col=1)
?locpoly
# Q2.5---------
# 這里直接不會(huì)
# (a)
n = 1000
set.seed(4060)
Mu = c(1,4)#自己設(shè)一個(gè)
Sigma = c(2,1)#自己設(shè)一個(gè)
X1 = rnorm(N, mean=Mu[1], sd=Sigma[1])
X2 = rnorm(N, mean=Mu[2], sd=Sigma[2])
X = cbind(X1,X2)
# (b)
library('MASS')
bw = c(.5, .5) * 4
f.kde2d = kde2d(X1, X2, h=bw) # xy是說(shuō)兩個(gè)而已,不是x和y
names(f.kde2d)
plot(X,xlim=c(-6,12),ylim=c(0,8))
contour(f.kde2d$x,f.kde2d$y,f.kde2d$z,add = T)#這里是要3個(gè)的,或者直接寫(xiě)f.kde2d就行
# (c)不考
# w3----------------
rm(list = ls())
# Q5.1 MC integration-----
# 求e^(-x) 在 (2,4)上的積分(0.117)沒(méi)記住
# g(x)=e^(-x); x~f(x)=1/2=UNIF(2,4); 2*E[g(x)]=所求
# x~f(x)=1/2=UNIF(2,4)很重要?。。。?!
M <- 1000
a <- 2
b <- 4
x <- runif(M,a,b)
theta.est <- (sum(exp(-x))/M)*2;theta.est
# Q5.2-------
N <- 1000
x <- 1
a <- x
b <- -x
t <- runif(N,min=b,max = a)
prob <- ((sum((1/sqrt(2*pi))*exp(-0.5*(t^2))))/N)*2
rs <- 0.5+prob/2
# check:
pnorm(1)
# evaluate variance的話我們加一個(gè)MC求出estimator
M <- 999
v <- numeric(M)
for(i in 1:M){
? t <- runif(N,min=b,max = a)
? prob <- ((sum((1/sqrt(2*pi))*exp(-0.5*(t^2))))/N)*2
? rs <- 0.5+prob/2
? v[i] <- rs
}
mean(v)#check
var(v)
quantile(v,c(0.025,0.975))
# Q5.3-------
# (a)
n <- 1000
x <- rnorm(n)
is <- order(x)
xk <- x[is]
k <- 0
xnew <- xk[c((k+1):(n-k))]
length(xnew)
mn <- numeric(n/2)
for(k in 0:(n/2-1)){
? xnew <- xk[c((k+1):(n-k))]
? mn[k+1] <- mean(xnew)
}
mn
# (b)
k <- 1 #這里k是定下的
M = 1000
n = 20 #sample size
tm <- numeric(M)
mn <- numeric(M)
for (m in 1:M) {
? x <- rnorm(n,mean=15,sd=3)
? mn[m] <- mean(x)
? xs <- sort(x)
? xnew <- xs[c((k+1):(n-k))]
? tm[m] <- mean(xnew)
}
mse1 <- mean((mn-15)^2)
mse2 <- mean((tm-15)^2)
b1 <- mean(mn-15)
b2 <- mean(tm-15)
mse1
var(mn)+b1
mse2
var(tm)+b2
# Q5.4-------
# (a)
N <- 50
M <- 100
a <- 7
b <- 3
x <- rep(c(1:5), N/5)
m <- 0.5
s <- 1.2
rseed=0
LSvec = RMvec = matrix(0,2,M)
set.seed(rseed)
library(MASS)
library(VGAM)
# (b)
for(m in 1:M){
? e <- rlnorm(N,m,s)
? y <- a+b*x+e
? mylm <- lm(y~x)
? myrlm <- rlm(y~x,method="MM")
}
# Q5.5-------
M <- 10000
N <- 50
x <- rnorm(N,mean = 0,sd=2)
s1 <- s2 <- numeric(M)
?var()
for(m in 1:M){
? x <- rnorm(N,mean = 0,sd=2)
? s1[m] <- sd(x)
? s2[m] <- sqrt(mean((x-mean(x))^2))#這個(gè)是誘有偏的,記住
}
mean(s1)
mean(s2)
boxplot(s1,s2)
M <- 10000
N <- 100
x <- rnorm(N,mean = 0,sd=2)
s1 <- s2 <- numeric(M)
?var()
for(m in 1:M){
? x <- rnorm(N,mean = 0,sd=2)
? s1[m] <- sd(x)
? s2[m] <- ((N-1)*s1[m])/N
}
sd(s1)
sd(s2)
boxplot(s1,s2)
# w4----------------
rm(list = ls())
iris <- iris
x <- subset(iris$Sepal.Length,iris$Species=='setosa')
xbar <- mean(x,na.rm = T)
n <- length(x)
B <- 100
xb <- numeric(B)
for(b in 1:B){
? #idx <- sample(1:n,n,replace = T)
? tst <- sample(x,n,replace = T)#所以這是一樣的
? #xnew <- x[idx]
? xb[b] <- mean(tst)
}
mean(xb)
mean(xb)-xbar#bias
#estimate of SE:
sd(xb)
# Q5.9-------
cars <- cars
x <- cars$speed
y <- cars$dist
B <- 10000
n <- nrow(cars)
lm1 <- lm(y~x)
summary(lm1)
lm1$coefficients[2]
slp <- itc <- numeric(B)
for(b in 1:B){
? idx <- sample(1:n,n,replace = T)
? ynew <- y[idx]
? xnew <- x[idx]
? lmb <- lm(ynew~xnew)
? slp[b] <- lmb$coefficients[2]
? itc[b] <- lmb$coefficients[1]
}
# Bs SE estimate:
sd(slp)
sd(itc)
#Bs estimate of bias:
mean(slp)-coef(lm1)[2]
#naive bootstrapping 95%Ci
quantile(slp,c(0.25,0.975))
# MC matrix-------
# 讓matrix里的每一個(gè)值都是MC的mean estimate
M <- 100
mu <- 10
sigma <- 3
Ns <- c(20,30,50,100,200,500)
mt <- matrix(NA,nrow = M,ncol = length(Ns))
for (j in 1:length(Ns)) {
? for (m in 1:M) {
? ? x <- rnorm(Ns[j],mean = mu,sd=sigma)
? ? mt[m,j] <- mean(x)
? }
}
apply(mt,2,mean)
boxplot(mt,names=Ns,xlab='sample size N')
#計(jì)算theoretic CI:
lb <- mu-1.96*sigma/sqrt(Ns)#lower bound
ub <- mu+1.96*sigma/sqrt(Ns)#upper bound
#計(jì)算emprical/pracital CI:
lb <- apply(means,2,mean)-1.96*sigma/sqrt(Ns)#lower bound
ub <- apply(means,2,mean)+1.96*sigma/sqrt(Ns)#upper bound
# Q5.6-------
M <- 100
ndf <- c(2,4,10)
n <- 100
mt <- matrix(NA,nrow = M,ncol = 3)
for(j in 1:3){
? for (i in 1:M) {
? ? x <- rchisq(n,df=ndf[j])
? ? mt[i,j] <- mean(x)
? }
}
boxplot(mt[,1],mt[,2],mt[,3])
#可以看出來(lái)的是df=E(x),沒(méi)記住
sd <- apply(mt,2,sd)#the estimates of standard error of xbar
# w5----------------
# 20-21 的CA1 Q1------
set.seed(6040)
M <- 1000
N <- 1000
r <- numeric(M)
for(m in 1:M){
? x <- sample(1:N,N,replace = T)
? xnew <- unique(x)
? r[m] <- length(xnew)/N
}
mean(r)
# 20-21 的CA1 Q2------
set.seed(6015)
M <- 1000
N <- 100
a <- 3
b <- 2
mn <- numeric(M)
for(m in 1:M){
? x <- rgamma(N,shape = a,rate = b)
? mn[m] <- mean(x)
}
# (a)
mean(mn)
# (b)
# 不驚奇,因?yàn)閠rue value是1.5,MN越大,那么這個(gè)值越接近1.5
# (c)
sd(mn)
# 20-21 的CA1 Q3------
# 看好了,只有feet和girth
set.seed(4060)
dat <- trees
B <- 100
slp <- numeric(B)
n <- nrow(dat)
for(b in 1:B){
? idx <- sample(c(1:n),n,replace = T)
? ynew <- dat$Height[idx]
? xnew1 <- dat$Girth[idx]
? lmb <- lm(ynew~xnew1+xnew2)
? slp[b] <- lmb$coefficient[2]
}
lmb <- lm(dat$Height~dat$Girth+dat$Volume)
lmb$coefficients[2]
# (a)
boxplot(slp)
# (b)
mean(slp)
# (c)
sd(slp)
# (d)
# Bs的empirical CI
lso <- lm(y~x)$coef#這個(gè)就是y~x的斜率
2*lso[2] - quantile(slp, probs=c(0.975,0.025))#這個(gè)沒(méi)寫(xiě)出來(lái)
# w6----------------
mtcars <- mtcars
n <- nrow(mtcars)
dat <- mtcars[sample(1:n,n),]
y = dat$mpg
x <- dat[,-1]
glm1 <- glm(y~.,data = x)
yhat <- glm1$fitted.values#取fitted value
plot(y,yhat)
boxplot((y-yhat)^2)
mean((y-yhat)^2)
train.idx <- c(1:floor(n/2))
train.idx <- c(1:16)
glm.train <- glm(y[train.idx]~.,data = x[train.idx,])
o1 <- glm(y~.,data = x,subset = train.idx)#一樣的
ytrain <- glm.train$fitted.values
ytst <- predict(glm.train,newdata = x[-train.idx,])
mse.train <- mean((glm.train$residuals)^2)
mse.tst <- mean((y[-train.idx]-ytst)^2)
boxplot((glm.train$residuals)^2,(y-ytst)^2)
boxplot((ytrain-y[train.idx])^2)
# 加上Bs:
B <- 100
mseb.train <- mseb.tst <- numeric(B)
for(b in 1:B){
? idx <- sample(1:n,n,replace = T)
? glm.t <- glm(y~.,data=x,subset = idx)
? mseb.train[b] <- mean((glm.t$residuals)^2)
? ytst <- predict(glm.t,newdata=x[-idx,])
? mseb.tst[b] <- mean((y[-idx]-ytst)^2)
}
boxplot(mseb.train,mseb.tst)
# k-fold CV
K <- 5
msecv.train <- msecv.tst <- numeric(K)
folds <- cut(1:n,K,labels = F)
table(folds)
for(k in 1:K){
? idx <- which(folds!=k)
? glm.t <- glm(y~.,data = x,subset = idx)
? msecv.train[k] <- mean((glm.t$residuals)^2)
? ytst <- predict(glm.t,newdata = x[-idx,])
? msecv.tst[k] <- mean((y[-idx]-ytst)^2)
}
boxplot(msecv.train,msecv.tst)
B <- 10
mseb.train <- mseb.tst <- numeric(B)
for(b in 1:B){
? idx <- sample(1:n,n,replace = T)
? glm.t <- glm(y~.,data=x,subset = idx)
? mseb.train[b] <- mean((glm.t$residuals)^2)
? ytst <- predict(glm.t,newdata=x[-idx,])
? mseb.tst[b] <- mean((y[-idx]-ytst)^2)
}
boxplot(mseb.train,mseb.tst)
# Bs的話用了32-32*0.632個(gè),大約是10個(gè)做test
# CV里K=3的話那么大約用32/3=10個(gè)做test
# 所以我們期望K=3時(shí)兩對(duì)error是一樣的,但是k=5,CV的
# K-fold更accurate,不那么widespread,如果增加B widespread會(huì)變好,但是會(huì)得到很多outliers
# Q5.8--------------
rm(list = ls())
library(bootstrap)
dat <- law
cor(dat$LSAT,dat$GPA)
B <- 1000
n <- nrow(dat)
cr <- numeric(B)
for(b in 1:B){
? idx <- sample(1:n,n,replace = T)
? x1 <- dat$LSAT[idx]
? x2 <- dat$GPA[idx]
? cr[b] <- cor(x1,x2)
}
mean(cr)
# bias
mean(cr)-cor(dat$LSAT,dat$GPA)
# native CI
quantile(cr,c(0.025,0.975))
# Q5.9--------------
rm(list = ls())
dat <- cars
# (a)
lm1 <- lm(dat$dist~dat$speed)
lm1$coefficients
# (b)
B <- 10000
n <- nrow(dat)
slp <- numeric(B)
itc <- numeric(B)
for(b in 1:B){
? idx <- sample(1:n,n,replace = T)
? xb <- dat$speed[idx]
? yb <- dat$dist[idx]
? lm <- lm(yb~xb)
? slp[b] <- lm$coefficient[2]
? itc[b] <- lm$coefficient[1]
}
lm1$coefficient
mean(slp)
mean(itc)
# (c)
# 不會(huì)
# 這個(gè)題就是在說(shuō)讓看看兩個(gè)estimator的分布,分布的話用hist就行了
hist(slp)
hist(itc)
# (d)
# 不會(huì)
# 記住怎么看joint distr------
library(ellipse)
plot(itc,slp,cex=0.5,pch=20)
e <- ellipse(lm1)
plot(e,t='l')
bcoef <- cbind(itc,slp)
points(bcoef,pch=20,col=8,cex=.5)
points(itc,slp,pch=20,col=1,cex=.5)#是一樣的
sd(slp)
sd(itc)
# 用命令直接Bs--------
library(boot)
#自己定義一個(gè)func,但是這個(gè)func的第一個(gè)argument必須是data,第二個(gè)必須是bs的idx:
my.mean <- function(x, ib){
? return( mean(x[ib]) )
}
B = 100
x = cars$dist
ob = boot(x, my.mean, R=B)
# ob里的original就是true value,ie mean(x)
# Q5.10 這個(gè)是nls怎么用----------
rm(list = ls())
x = mtcars$disp
y = mtcars$mpg
plot(x,y,pch=20)#有inflexion(屈折),所以用nonlinear可能會(huì)更好
# nonlinear regression比如polynomial/exp
theta <- c(3,-0.01)
nls1 <- nls(y~exp(a+b*x),start = list(a=3,b=-0.01))
fitted(nls1)
plot(fitted(nls1),y)
plot(fitted(nls1)-y)#這個(gè)是residual,應(yīng)該沒(méi)有pattern才對(duì)
hist(fitted(nls1)-y)
# (a)這是怎么著nls的參數(shù)--------
summary(nls1)
summary(nls1)$parameter[1,1]
summary(nls1)$parameter[2,1]
plot(x,y,pch=20)
is = order(x)#返回的事從小到大的位置
#20 19 18 26 28? 3 ......
x[20]
min(x)
lines(x[is], fitted(nls1)[is], pch=20, cex=2, col=4, lwd=2)
?order
# (b)
z <- density(x)
plot(z)
# (c)
set.seed(1)
B = 100
n <- length(x)
theta1 <- numeric(B)
theta2 <- numeric(B)
for(b in 1:B){
? idx <- sample(1:n,n,replace = T)
? xnew <- x[idx]
? ynew <- y[idx]
? bnl <- nls(ynew~exp(a+b*xnew),start = list(a=3,b=-0.01))
? theta1[b] <- summary(bnl)$parameter[1,1]
? theta2[b] <- summary(bnl)$parameter[2,1]
}
mean(theta1)
mean(theta2)
sd(theta1)
sd(theta2)
#se:
sd(theta1)
# nonparametric 95% confidence intervals 就是native
# Q2.11----------
#? coerce強(qiáng)迫
rm(list = ls())
library(MASS)
x = Boston[,c('crim','indus','rm','tax')]
y = Boston$medv
lm1 <- lm(y~.,data = x)
# (a)
n <- nrow(x)
a <- c(1:floor(n/2))
lma <- lm(y~.,data = x,subset = a)
preds <- predict(lma,newdata = x[-a,])
length(preds)
rmsea <- sqrt(mean((y[-a]-preds)^2))
# (b)--------leave-one-out CV
preds <- resi <- numeric(n)
for(i in 1:n){
? tot <- c(1:n)
? b <- tot[-i]
? lmb <- lm(y~.,data = x,subset = b)
? preds[i] <- predict(lmb,newdata = x[i,])
? resi[i] <- y[i]-preds[i]
? rmseb <- sqrt(mean((resi)^2))
}
# (c)
K <- 5
folds <- cut(1:n,K,labels = F)
rmse <- numeric(K)
for(k in 1:K){
? train.idx <- which(folds!=k)
? tst.idx <- which(folds==k)
? lmc <- lm(y~.,data = x,subset = train.idx)
? predsc <- predict(lmc,newdata = x[tst.idx,])
? rmse[k] <- sqrt(mean((y[tst.idx]-predsc)^2))
}
# 但是這里我們忘記打亂了,so:
dat <- Boston[sample(1:n,n),]
x = dat[,c('crim','indus','rm','tax')]
y = dat$medv
K <- 5
folds <- cut(1:n,K,labels = F)
rmse <- numeric(K)
for(k in 1:K){
? train.idx <- which(folds!=k)
? tst.idx <- which(folds==k)
? lmc <- lm(y~.,data = x,subset = train.idx)
? predsc <- predict(lmc,newdata = x[tst.idx,])
? rmse[k] <- sqrt(mean((y[tst.idx]-predsc)^2))
}
rmse
# 重復(fù)R次的CV---------
# R里的每一次都要打亂一次
R <- 10
K <- 5
folds <- cut(1:n,K,labels = F)
rmse <- numeric(K*R)
for (r in 1:R) {
? is <- sample(1:n,n)
? x <- x[is,]
? y <- y[is]
? for(k in 1:K){
? ? train.idx <- which(folds!=k)
? ? tst.idx <- which(folds==k)
? ? lmc <- lm(y~.,data = x,subset = train.idx)
? ? predsc <- predict(lmc,newdata = x[tst.idx,])
? ? rmse <- c(rmse,sqrt(mean((y[tst.idx]-predsc)^2)))#這是在nested for loop里怎feed in
? }
}
rmse
# w7----------------
rm(list = ls())
# 課堂例題 section3 P8的--------
attach(pressure)
x <- temperature
y <- pressure
plot(x,y,pch=20,cex=0.5)#很明顯這不是一個(gè)linear pattern
lm1 <- lm(y~x)
lines(x,fitted(lm1))
lmf <- lm(y~x+I(x^2)+I(x^3)+I(x^4)+I(x^5))
plot(x,y,pch=20,cex=0.5)
lines(x,fitted(lmf))
nls1 <- nls(y~exp(a+b*x),start = list(a=0,b=0.02))
summary(nls1)
lines(x,fitted(nls1),col='red')
cc <- coef(nls1)
coefficients(nls1)
curve(exp(cc[1]+cc[2]*x),col='blue',add=T)#和lines一樣
nls1 <- nls(y~exp(a+b*x),start = list(a=0,b=0.02))
summary(nls1)
nlstst <- nls(y~exp(a+b*x),start = list(a=0,b=0.02),trace = T)
summary(nlstst)
crit <- function(theta,x,y){
? return( sum( (y-x*theta)^2 ) )
}
thbar = 1.8# theta的true value
#x = rep(c(1,3,7,8), len=100)
#y = x*thbar + rnorm(x)
optim.out = optim(par=c(1), fn=crit, x=x, y=y, method='L-BFGS-B',lower=c(-1), upper=c(0))
optim.out # $par是theta的值
curve(exp(1e-07*x),col='blue',add=T)
# grid search----------
# nls(y~exp(a+b*x),start = list(a=0,b=0.02))
# 現(xiàn)在假設(shè)一點(diǎn)都不知道ab的值
x <- temperature
y <- pressure
L <- 10
a.grid <- seq(-1,1,length=L)
b.grid <- seq(0.01,0.5,length=L)
crit <- matrix(NA,ncol=L,nrow=L)
for(i in 1:L){
? for(j in 1:L){
? ? crit[i,j] <- sum( (y-exp(a.grid[i]+b.grid[j]*x))^2 )
? }
}
persp(a.grid,b.grid,crit,theta=45,col = 'lightblue')
b.grid <- seq(0.01,0.02,length=L)
crit <- matrix(NA,ncol=L,nrow=L)
for(i in 1:L){
? for(j in 1:L){
? ? crit[i,j] <- sum( (y-exp(a.grid[i]+b.grid[j]*x))^2 )
? }
}
persp(a.grid,b.grid,crit,theta=45,col = 'lightblue')
contour(a.grid,b.grid,crit)
#什么樣的圖才是好的grid search---------
# BONUS: a nicer example of a criterion surface plot...
rm(list=ls())
set.seed(4060)
n = 100
x = runif(n,-1,1)
y = .2*x+.3*x^2
plot(x,y)
L = 100
ag = seq(-1,1,length=L)
bg = seq(-1,1,length=L)
crit = matrix(NA,nrow=L,ncol=L)
for(i in 1:L){
? for(j in 1:L){
? ? crit[i,j] = sum( (y-ag[i]*x-bg[j]*x^2)^2 )
? }
}
persp(ag,bg,crit,col='lightblue',theta=45)
contour(ag,bg,crit)
# how to fit any model using optim()----------
remove(list = ls())
x <- temperature
y <- pressure
crit <- function(th,x,y){
? a <- th[1]
? b <- th[2]
? return(sum(y-exp(a+b*x))^2)
}
range(x)
range(y)
?optim
opt <- optim(par = c(0,0.02),fn=crit,x=x,y=y)
# 參數(shù)par:Initial values for the parameters to be optimized over.
opt$par
opt <- optim(par = c(0,0.02),fn=crit,x=x,y=y,
? ? ? ? ? ? method = 'L-BFGS-B', lower=c(0.01,0.02), upper=c(0.1,0.03))
opt$par
opt <- optim(par = c(0,0.02),fn=crit,x=x,y=y,
? ? ? ? ? ? method = 'L-BFGS-B', lower=c(0,0.02), upper=c(0.1,0.03))
opt$par
opt <- optim(par = c(0,0.02),fn=crit,x=x,y=y,
? ? ? ? ? ? method = 'L-BFGS-B', lower=c(-1,0.02), upper=c(0.1,0.03))
opt$par
opt <- optim(par = c(0,0.02),fn=crit,x=x,y=y,
? ? ? ? ? ? method = 'L-BFGS-B', lower=c(-0.6,0.02), upper=c(0.1,0.03))
opt$par
nls1 <- nls(y~exp(a+b*x),start = list(a=0,b=0.02))
summary(nls1)
yhat <- exp(opt$par[1]+opt$par[2]*x)
plot(x,y)
lines(x,fitted(nls1),col='red')
lines(x,yhat,col='blue')
opt1 <- optim(par = c(0,0.02),fn=crit,x=x,y=y)
yhat1 <- exp(opt1$par[1]+opt1$par[2]*x)
lines(x,yhat1,col='orange')#這個(gè)接近nls
# 那么我們應(yīng)不應(yīng)該加第三個(gè)參數(shù)c呢?
crit2 <- function(th,x,y){
? # Must have theta as 1st parameter and return a single value...
? # Here we implement a simple linear regression model
? # for the sake fo the example:
? a = th[1]
? b = th[2]
? c = th[3] ## additional scaling parameter
? return( sum( (y-c*exp(a+b*x))^2 ) )
}
opt2 <- optim(par = c(0,0.02,1),fn=crit2,x=x,y=y)
opt2$par
yhat2 <- opt2$par[3]*exp(opt2$par[1] + (opt2$par[2])*x)#因?yàn)槭莝cale parameter
# The result is not great:因?yàn)楦患觕的一摸一樣,所以不用加這個(gè)paremeter了
plot(x, y, pch=20,
? ? xlab="Temperature (Celsius)",
? ? ylab="Vapor pressure (ml of mercury)",
? ? main="Example: polynomial regression")
lines(x, yhat2, col='navy')
lines(x, fitted(nls1), col='blue')
lines(x, yhat, col=2, lwd=3)
# w8----------------
#1D overfitting
rm(list = ls())
dat <- na.omit(airquality)
x <- dat$Temp
y <- dat$Ozone
plot(x,y)
dat <- data.frame(x,y)#為了方便寫(xiě)subset和predict,是data.frame不是as.data.frame!!!!
x <- data.frame(x)#這樣不行?。。?!
n <- nrow(x)
train.idx <- which(y<100)#which返回的也是位置
#tst.idx <- which(y>=100)
lmt <- lm(y~.,data = dat,subset = train.idx)
preds <- predict(lmt,newdata = dat[-train.idx,])
sqrt(mean( (preds-y[-train.idx])^2 ))
# multivariate overfitting
rm(list = ls())
dat <- mtcars
n <- nrow(dat)
R <- 20
K <- 2
fit <- numeric(K*R)
mse <- numeric(K*R)
fit.l <- numeric(K*R)
mse.l <- numeric(K*R)
folds <- cut(1:n,K,labels = F)
set.seed(4060)
library('glmnet')
for( r in 1:R){
? dat <- dat[sample(1:n,n),]
? x <- dat[,-1]
? y <- dat$mpg
? xm <- as.matrix(x)#全部都用這個(gè)就行,防止x有categorical
? #glm可以自己識(shí)別categorical
? for(k in 1:K){
? ? train.idx <- which(folds!=k)
? ? glm1 <- glm(y~.,data = x,subset = train.idx)
? ? yh <- fitted(glm1)
? ? fit <- c(fit,mean((yh-y[train.idx])^2))
? ? preds <- predict(glm1,newdata=x[-train.idx,])
? ? mse <- c(mse,mean((preds-y[-train.idx])^2))
? ? cv.lb = cv.glmnet(xm[train.idx,],y[train.idx],grouped=FALSE)
? ? # 10-fold cv
? ? # 上 是怎么找lasso的lambda(高階 參數(shù),不參與model,只是scale)
? ? lasso =? glmnet(xm[train.idx,],y[train.idx],lambda=cv.lb$lambda.min)
? ? yh.l = predict(lasso,newx=xm[train.idx,])#怎么求yhat
? ? fit.l[k+(r-1)*K] = mean((yh.l-y[train.idx])^2)
? ? yp.l = predict(lasso,newx=xm[-train.idx,])# test的prediction
? ? mse.l[k+(r-1)*K] = mean((yp.l-y[-train.idx])^2)#MSE
? }
}
boxplot(fit,mse,fit.l,mse.l,
? ? ? ? names=c("glm fit","glm error","lasso fit","lasso error"),
? ? ? ? ylim=c(0,300))
boxplot(fit,mse,fit.l,mse.l,
? ? ? ? names=c("glm fit","glm error","lasso fit","lasso error"),
? ? ? ? ylim=c(0,25))
#現(xiàn)在這里有categorical variable了
rm(list = ls( ))
library(ISLR)
set.seed(6026)
dat = na.omit(Hitters)
n = nrow(dat)
dat$Salary = log(dat$Salary)# 原始dta太skewed了,取log讓data更normal
REPS = 50
train.error = test.error = numeric(REPS)
lasso.train.error = lasso.test.error = numeric(REPS)
for(j in 1:REPS){
? dat = dat[sample(1:n, n, replace=FALSE),]
? i.train = c(1:200)# 用前200個(gè)作為training,后63個(gè)當(dāng)test
? train = dat[i.train,]
? test = dat[-i.train,]
? fit = glm(Salary~., data=train)
? pred = predict(fit, newdata=test)
? train.error[j] = mean(fit$residuals^2)
? test.error[j] = mean((pred-test$Salary)^2)
? xm = model.matrix(Salary~., data=train)[,-1]
? newxm = model.matrix(Salary~., data=test)[,-1]
? y = train$Salary
? lam = cv.glmnet(xm, y, data=train)
? lasso = glmnet(xm, y, data=train, lambda=lam$lambda.min)
? lasso.pred = predict(lasso, newx=newxm)
? lasso.fit = predict(lasso, newx=xm)
? lasso.train.error[j] = mean((lasso.fit-train$Salary)^2)
? lasso.test.error[j] = mean((lasso.pred-test$Salary)^2)
}
summary(fit)
coef(lasso) #怎么看被lasso mute down了-----------
par(font=2, font.axis=2)
boxplot(train.error, test.error,
? ? ? ? main="MLB player salary prediction",
? ? ? ? names=c("Training error", "Test error"),
? ? ? ? col=c('pink','cyan'))
# 另一個(gè)例子Prestige example
rm(list = ls( ))
library(car)? # contains dataset Prestige
library('glmnet')
pairs(Prestige)
y = Prestige$income
x = Prestige[,c("education","prestige","women")]
class(x) # note x is of class "data.frame" as opposed to "matrix"
lmo = lm(y~., data=x) # this works though (check out ?lm)
summary(lmo)
# how to predict from this fit:
newx = data.frame(education=seq(11,16,length=5),prestige=seq(14,80,length=5),women=seq(10,90,length=5))
lmo.pred = predict(lmo, newdata=newx)
# 只predict一個(gè):
newx = data.frame(education=c(13),prestige=c(61),women=c(12))
lmo.pred = predict(lmo, newdata=newx)
xm = model.matrix(y~., data=x)[,-1]
lasso.cv = cv.glmnet(xm, y, alpha=1)#Cross-validation for glmnet
lasso = glmnet(xm, y, alpha=1, lambda=lasso.cv$lambda.min)
# let's compare LASSO with our earlier linear regression output:
cbind(coef(lasso), coef(lmo))
# how to predict from this fit:
# ?predict.glmnet
class(newx)
lasso.pred = predict(lasso, newx=as.matrix(newx))
# NB:
cbind(lasso.pred, lmo.pred) # close enough to the lm() predictions
mean((lasso.pred-lmo.pred)^2)
# percentage error-----------
mean((lasso.pred-lmo.pred)/lmo.pred)*100 # less than 1% error...
#只有-0.1155595,所以origional model是夠好的
# 另一個(gè)model: Blood Pressure example
library(glmnet)
setwd('/Users/apple/Desktop/22-r/ST4060/ST4060txt-csv')
dat = read.table(file="Blood Pressure.txt",header=TRUE)
str(dat)
dat$PatientID = NULL # getting rid of this variable
# GLM fit
glm.fit = glm(Systolic~., data=dat)
mean(glm.fit$residuals^2)
# LASSO - note: fitting these models is rather cody...
alpha = 1
# prepare the data:
xm = model.matrix(Systolic~., data=dat)[,-1]
y = dat$Systolic
# compute best lambda:
lam = cv.glmnet(xm, y, alpha=alpha)
# c) fit model using best lambda:
lasso = glmnet(xm, y, lambda=lam$lambda.min)
# d) recover fitted values:
lasso.fit = predict(lasso, newx=xm)
# e) calculate RSS or MSE from LASSO
mean((lasso.fit-y)^2)
rm(list = ls( ))
dat = na.omit(airquality)
x = dat$Temp
y = dat$Ozone
plot(x,y)
nl.crit <- function(x, y, a, b){
? return( mean( (y - a*exp(b*x))^2 ) )
}
# GRID SEARCH:----------
Pa = Pb = 100
a.grid = seq(0,2,length=Pa)
b.grid = seq(0,1,length=Pb)
crit.values = matrix(NA, nrow=Pa, ncol=Pb)
for(i in 1:Pa){
? for(j in 1:Pb){
? ? crit.values[i,j] = nl.crit(x,y,a.grid[i],b.grid[j])
? }
}
ij.best = which.min(crit.values)
ij.best = arrayInd(ij.best, .dim=dim(crit.values))
# location of the minimum of the surface
# grid search solutions:
a.best = a.grid[ij.best[1]]
b.best = b.grid[ij.best[2]]
fitted = a.best * exp(b.best*x)
nlso = nls(y~a*exp(b*x), start=list(a=a.best, b=b.best))
a.best
b.best
plot(x, y, pch=20, xlab="Temperature (degrees F)",
? ? ylab="Ozone (ppb)",
? ? xlim=c(55,105))
points(x, fitted(nlso), pch=20, col='orange')
# 直接用optim也能smooth---------
dat = na.omit(airquality)
x = dat$Temp
y = dat$Ozone
#### Pick a nonlinear model and fit it using optim()
# First, write a function that evaluates the criterion:
criterion <- function(param,x,y){
? # The function has to be defined this way
? # (with param as its 1st argument)
? # if we are going to apply optim() to it... cf. ?optim
? a = param[1]
? b = param[2]
? return( mean( (y - a*exp(b*x))^2 ) )
}
# Then, apply optim() to it:
oo = optim(par=list(a=1, b=0.01), fn=criterion, x=x, y=y)
# now reconstruct fitted values from optim() output:
a.hat = oo$par[1]
b.hat = oo$par[2]
y.hat = a.hat * exp(b.hat*x)
plot(x,y)
is <- order(x)
lines(x[is], y.hat[is], pch=21, col=2)
# w9----------------
# B-spline--------
# using dataset ahmd$mx from library MortalityLaws, plot the crude force of mortality curve.
rm(list = ls())
library('MortalityLaws')
age <- c(0:110)
mM <- ahmd$mx[,4]
plot(dat$Age,mM,t='b',pch=20)#Crude force of mortality (2010)
plot(age,log(mM))
library('splines')
BM3 <- bs(age)
BM6 <- bs(age,df=6)
matplot(age,BM3,t='b')#Plot Columns of Matrices
matplot(age,BM6,t='b')
BMtst <- bs(age,knots = F)#
# Compute the design matrix for 3 dof’s and quote the projection for age 40 onto
# it. Then reconstruct the B-spline approximation to the crude male force data
# using linear regression. Plot the output over the data points and criticize.
age <- c(0:110)
mM <- ahmd$mx[,4]
BM <- bs(age)# compute bases
lm1 <- lm(mM~BM)
y <- numeric(length(age))#一個(gè)vector
for (i in 1:length(age)) {
? y[i] <- as.numeric(sum(lm1$coef[2:4]*BM[i,])+lm1$coef[1])
}
lm1$coefficients
plot(age,mM,pch=20)
lines(age,y,t='l',col=4)# 這兩條線是一樣的
lines(age,fitted(lm1),t='l',col=2)
y-fitted(lm1)
# 更好的B-spline:
BM <- bs(age,df=6)# compute bases
lm1 <- lm(mM~BM)
y <- numeric(length(age))
for (i in 1:length(age)) {
? y[i] <- as.numeric(sum(lm1$coef[2:7]*BM[i,])+lm1$coef[1])
}
plot(age,mM,pch=20)
lines(age,y,t='l',col=4)
lines(age,fitted(lm1),t='l',col=2)
# Continuing from previous exercise, compute the B-spline as a power series,
# i.e. compute the traditional cubic spline representation
# S(x)=β0+β1x+β2x2+β3x3+∑j=1Jαj(x?6?1xj)3+
# where (x?6?1xj)3+=max((x?6?1xj)3,0), using J=3 interior knots at 40, 60 and 80.
# Compare with the fitted values from the regression onto the design matrix
# obtained from splines::bs() for those knots.
x <- c(0:110)
mM <- ahmd$mx[,4]
BM <- bs(x,knots = c(40,60,80))#也叫design matrix
lm2 <- lm(mM~BM)
x <- 41
pmax((x-40)^3,0)
x <- 39
pmax((x-40)^3,0)
k40 <- pmax((x-40)^3,0)
k60 <- pmax((x-60)^3,0)
k80 <- pmax((x-80)^3,0)
lm3 <- lm(mM~x+I(x^2)+I(x^3)+k40+k60+k80+0)
plot(x,mM,pch=20)
lines(x,fitted(lm3),col='red')
lines(x,fitted(lm2),col='blue')
par(mfrow=c(1,2))
matplot(BM)# original basis for B-spline
POW <- cbind(x,x^2,x^3,k40,k60,k80)
matplot(POW)
# w10----------------
# loess是多個(gè)x,lowess只有一個(gè)x
rm(list = ls())
dat = na.omit(airquality)
t = dat$Temp
y = dat$Ozone
plot(t,y,xlim = c(50,110) )
range(t)
newx <- c(95:100)
abline(v=newx,col='blue')
lo <- loess(y~t)
summary(lo)
# check output values are in the same order as input values:
summary(c(lo$x - x)) # ok, they are
summary(c(lo$y - y)) #都是相同order
pred.lo <- predict(lo,newdata = data.frame(t=newx))# 這個(gè)t是喂進(jìn)去的
points(newx,pred.lo,col='red')#預(yù)測(cè)不了出了x的range 的點(diǎn)
low <- lowess(t,y)
summary(low)
summary(c(low$x - t))? ? ? # NO, THEY ARE NOT
summary(c(low$x - sort(t))) # coz lowess() sorted the input values!
sqrt( mean( (y-fitted(nlso))^2 ) )
sqrt( mean( (y-fitted(lo))^2 ) )
sqrt( mean( (y[order(x)]-low$y)^2 ) )
pred.low <- approx(low$x,low$y,xout = newx,rule = 2)
# lowess沒(méi)有predict,所以我們用了linear interpolation的方法
# rule=2用的是極限值,rule=1就是返回這個(gè)值
pred.low
pred.low$y
points(newx,pred.low$y,col='red',pch=20,cex=.8)
# 這個(gè)也是不夠好,因?yàn)槌隽朔秶竽鼐褪侵本€了,這是因?yàn)閍pproox
# 是linear interpolation,而且用的是極限值,所以說(shuō)當(dāng)右側(cè)沒(méi)有點(diǎn)
# 之后自然是一條直線
tp <- approx(lo$x,y=lo$fitted,xout = newx,rule = 2)
# 對(duì)loess用linear interpolation
tp$y
points(newx,tp$y,pch=20,col='orange')
# 這也不是最好的,畢竟都是在linear interpolation
# 所以用spline可以解決
# Using B-splines------------
library('splines')
KN <- quantile(t,c(0.15,0.40, 0.60, 0.70, 0.80, 0.90))
BM <- bs(t,knots = KN)
bl <- lm(y~BM)# 這種以后就不寫(xiě)了
ny = predict(BM, newx)
pred.bspline = predict(bl, newdata=as.data.frame(ny)) # nope 不匹配
# b-spline 的正確predict方式
B.spline = lm(y~.,data=data.frame(BM))# 不用lm(y~BM),所以下次lm就寫(xiě). 不寫(xiě)別的way
# 這樣pred才能work well
pred.bspline = predict(B.spline, newdata=data.frame(ny))
pred.bspline
points(newx,pred.bspline,col='red')
# P-spline--------
rm(list = ls())
dat = na.omit(airquality)
t = dat$Temp
y = dat$Ozone
plot(t,y,xlim = c(50,110) )
newx <- c(95:100)
abline(v=newx,col='blue')
ps <- smooth.spline(t,y,cv=T)
# how to predict from the P-spline:
pred.pspline = predict(ps, x=newx)#這個(gè)x=是個(gè)argument
KN <- quantile(t,c(0.15,0.40, 0.60, 0.70, 0.80, 0.90))
BM <- bs(t,knots = KN)
b.s <- lm(y~.,data = data.frame(BM))
ny = predict(BM, newx)
pred.bspline <- predict(b.s,newdata =data.frame(ny) )
points(t, fitted(b.s), pch=20, col='brown', cex=1.5)
points(t, fitted(ps), pch=15, col='navy')
legend("topleft", legend=c("data","NLS fit","B-spline","P-spline"),
? ? ? col=c(8,4,2,'navy'), pch=c(20,20,20,15), bty='n')
points(newx,pred.bspline, col='brown', pch=14)# 這幾個(gè)點(diǎn)follow general pattern
points(pred.pspline, col=3, pch=14)
plot(t,y,xlim = c(50,110) )
points(t, fitted(b.s), pch=20, col='brown', cex=1.5)
points(t, fitted(ps), pch=15, col='navy')
# 這里P比B好
# 控制B-spline
x <- t
KN = quantile(x, c(0.1, 0.4, 0.7))
BM = bs(x, knots=KN)
KN = quantile(x, c(0.05, 0.10, 0.20, 0.23, 0.45, 0.65, 0.95))
BM = bs(x, knots=KN)
KN = quantile(x, c(0.15,0.45, 0.65, 0.95))
BM = bs(x, knots=KN)
KN = quantile(x, c(0.15,0.45, 0.65, 0.95, 0.99)) # 這個(gè)很dramastic
BM = bs(x, knots=KN)
KN = quantile(x, c(0.15,0.45, 0.65, 0.70, 0.85, 0.91))? # 這個(gè)比較合
BM = bs(x, knots=KN)
KN = quantile(x, c(0.15,0.45, 0.65, 0.70, 0.85, 0.92))
BM = bs(x, knots=KN)
KN = quantile(x, c(0.15,0.45, 0.65, 0.70, 0.85, 0.93))
BM = bs(x, knots=KN)
# konts不同那么end處的estimate是很不一樣的,B-spline很難控制end
B.spline.df = lm(y~., data=as.data.frame(BM))
newBM = predict(BM, newx)
pred.bspline = predict(B.spline.df, newdata=as.data.frame(newBM))
plot(x, y, pch=20, col=8,
? ? xlab="Temperature (degrees F)", ylab="Ozone (ppb)",
? ? xlim=c(55,105))
points(x, fitted(B.spline.df), pch=20, col='red')
lines(newx, pred.bspline, col='navy', pch=8, t='b')
# 那么用P-spline控制
# NOTE: control degrees of freedom in P-splines
P.spline3 = smooth.spline(x, y, df=3)# df控制一個(gè)knot用多少data points
P.spline5 = smooth.spline(x, y, df=5)
P.spline10 = smooth.spline(x, y, df=10)# df越小線越平
pred3 = predict(P.spline3, x=newx)
pred5 = predict(P.spline5, x=newx)
pred10 = predict(P.spline10, x=newx)
#
plot(x, y, pch=20, col=8,
? ? xlab="Temperature (degrees F)", ylab="Ozone (ppb)",
? ? xlim=c(55,105))
points(x, fitted(P.spline3), pch=15, col='navy',cex=.5)
points(x, fitted(P.spline5), pch=15, col='blue',cex=.5)
points(x, fitted(P.spline10), pch=15, col='cyan',cex=.5)
points(pred3, col='navy', pch=20)
points(pred5, col='blue', pch=20)
points(pred10, col='cyan', pch=20)
# 其中有一條線與B-spline長(zhǎng)得很像,所以控制h may be a good way:
# NOTE: control smoothness (ie penalty) parameter in P-splines
P.spline10a = smooth.spline(x, y, df=10)
P.spline10b = smooth.spline(x, y, df=10, spar=.9)# spar=smoothing parameter
P.spline10c = smooth.spline(x, y, df=10, spar=.1)
#
plot(x, y, pch=20, col=8,
? ? xlab="Temperature (degrees F)", ylab="Ozone (ppb)",
? ? xlim=c(55,105))
points(x, fitted(P.spline10a), pch=15, col='navy',cex=.5)# 這個(gè)是默認(rèn)的
points(x, fitted(P.spline10b), pch=15, col='blue',cex=.5)# h=0.9,很平
points(x, fitted(P.spline10c), pch=15, col='red',cex=.5)# 很snesitive,h=0.1
# plot the latter as a curve (requires re-ordering the points):
is = order(x)# 就是怎么畫(huà)圖
xs = x[is]
lines(xs, fitted(P.spline10c)[is], pch=15, col='red')
# Q4.1----------
rm(list=ls())
setwd('/Users/apple/Desktop/22-r/ST4060/ST4060txt-csv')
library(splines)
LT <- read.table('irl_lifetable_2005.txt',header = T )
head(LT)
SLT = LT[c(1:106),]
mx = SLT[,8]
x = SLT$Age
onls = nls(mx~A+B*c^x,start=list(A=.0003, B=.00002, c=1.108))
ABc = summary(onls)$coef[,1]#這是true value,怎么也算不出來(lái)的
set.seed(1)
x = seq(0, 110, by=1)
mx = ABc[1]+ABc[2]*ABc[3]^x? # this is the theorical model
mxn = mx
s1 = which(x<86)
s2 = which(x>85)
mxn[s1] = pmax(0.005,mx[s1]+rnorm(length(s1),0,.03))
mxn[s2] = mx[s2]+rnorm(length(s2),0,.06)# mxn has noise in it
dat = cbind(x,mx,mxn)
# (a)
plot(x,mx)#這是data 的true value,怎么也算不出來(lái)的
points(x,mxn,cex=0.8,pch=20)#這是加上noise的data,相當(dāng)于我們的觀察值,ie這是要用的東西
nls.ns = nls(mxn~A+B*c^x,start=list(A=.0003, B=.00002, c=1.108))
lines(x,fitted(nls.ns),col='red')
ABc.ns = summary(nls.ns)$coef[,1]#取nls后的系數(shù)
# (b)
ps <- smooth.spline(x,mxn)
?smooth.spline
summary(ps)
lines(x,fitted(ps),col='blue')
sy = ps$y #人家說(shuō)Repeat the previous step, but on the smoothed curve obtained
#from a generic P-spline.所以才有的這一步!?。?!
nls.ps = nls(sy~A+B*c^x,start=list(A=.0003, B=.00002, c=1.108))
ABc.ps = summary(nls.ps)$coef[,1]
# MSE:
(ABc-ABc.ns)^2
(ABc-ABc.ps)^2
(ABc-ABc.ns)^2-(ABc-ABc.ps)^2 #還是ps后小一點(diǎn)
# percentage errors:
((ABc-ABc.ns)/ABc)
((ABc-ABc.ps)/ABc)
((ABc-ABc.ns)/ABc)-((ABc-ABc.ps)/ABc) #還是ps后小一點(diǎn)
# 以上is a way of doing a simulation using a real life data?
# 這里的啟示是:當(dāng)你fit a complicated model的時(shí)候,可以smooth the data (這里是p-spline)
# first to reduce the level of noise which a nonlinesr procedure would be very
# sensitive to
# Q4.2 ----------------------
rm(list = ls())
dat <- read.csv('insdata.csv')
age = dat$Age
mF = dat$mF
# (a)
sp <- smooth.spline(age,mF,cv=T)
plot(age,mF,pch=20,cex=0.8)
lines(age,fitted(sp),col='red')
# (b)
par = sp$spar
sp2 <- smooth.spline(age,mF,cv=F,spar=par/2)
lines(age,fitted(sp2),col='blue')
# spar=smoothing parameter
# df控制一個(gè)knot用多少data points
# (c)
sp$x-sp2$x # check p1$x和p2$x是一樣的
# (d)
sp$x-age#也要檢查
mean((mF-sp$y)^2)
mean((mF-sp2$y)^2)
sqrt(mean((mF-sp$y)^2))
sqrt(mean((mF-sp2$y)^2))
# 看數(shù)字的話第二個(gè)更好,因?yàn)镽MSE更小,但是這個(gè)第二個(gè)對(duì)應(yīng)的是上邊那個(gè)圖里
# 彎彎曲曲的藍(lán)線,這并不是我們想要的。
# 所以要搞清楚我們這里想要的是什么,在這里我們想要一個(gè)smooth curve,而不是一個(gè)好的fit
# 所以應(yīng)該是RMSE大的更好
sd(mF-sp$y) #var
sd(mF-sp2$y)#var
mean(mF-sp$y) #bias
mean(mF-sp2$y)#bias
# 可以看到bias是很小的,所以mse的大部分都是由var貢獻(xiàn)的,所以如果apline是smoother
# 的話,mse大
# (e)
KN <- quantile(age,c(0.25,0.5,0.75))
BM <- bs(age,knots = KN)
matplot(age,BM,t='b')#不要掉了age?。。。。。軸?。。?!
# (f)
round(BM[which(age==60),],4)#這是 coordinates
abline(v=60, lty=3)
abline(h=BM[which(age==60),], lty=2)#怎么indicate
# (g)
bs <- lm(mF~.,data=data.frame(BM))
bs$coefficients
# (h)
mean((bs$residuals)^2)
mean((mF-sp$y)^2)
mean((mF-sp2$y)^2)
plot(age,mF)
lines(sp,col='red')
lines(sp2,col='blue')
lines(age,fitted(bs),col='orange')
# b-spline is very sensitive to the tail end, that's because the way of
# arranging the knots
# This B-spline seems comparable to the 1st P-spline, and
# not as effective as the second one.
# (i)
# first, see that we're missing some ages:
plot(age)
# interpolation grid:
all.ages = seq(min(age),max(age),by=1)
plot(all.ages)
lip <- approx(sp$x,sp$y,xout = all.ages)#全寫(xiě)上沒(méi)事
sd(lip$y)
lo <- lowess(age,mF)
lilo <- approx(lo$x,lo$y,xout = all.ages)
lpred-lilo$y
sd(lilo$y)
# 問(wèn)----------
plot(age,mF)
lines(all.ages,lip$y,col='red')
lines(all.ages,lpred,col='blue')
# Q4.3 ----------------------
rm(list = ls())
library('car')
library(splines)
dat <- Prestige
x = Prestige$income
y = Prestige$prestige
# B-spline:
BM <- bs(x,df=6)
bs <- lm(y~.,data = data.frame(BM))
bmse <- mean((bs$residuals)^2)
# P-spline:
ps <- smooth.spline(x,y)
ps$x-x
# For the P-spline, we need to interpolate!
range(x)
range(ps$x)
# range相同,rule=1/2都是一樣的,xout的range出了x,那么就用rule=2
yp <- approx(ps$x,ps$y,xout=x)$y
yp2 <- approx(ps$x,ps$y,xout=x,rule = 2)$y
pmse <- mean((y-yp)^2)
# w11----------------
rm(list = ls())
x = iris[,c(2:4)]
y = iris[,5]
K <- 2
ko <- kmeans(x,K)
?kmeans
is <- c(1,3)
plot(x[,is],col=c(1,2,4)[ko$cluster],pch=20,cex=2)
z <- apply(x,2,scale)
(x[1,1]-mean(x$Sepal.Width))/sd(x$Sepal.Width)-z[1,1]
koz <- kmeans(z,K)
plot(x[,is],col=c(1,2,4)[koz$cluster],pch=20,cex=2)
pairs(iris[,1:4],pch=20)
cor(iris[,1:4])
dat <- EuStockMarkets
plot(EuStockMarkets, lwd=1.5)
pca <- prcomp(EuStockMarkets)
?prcomp #Principal Components Analysis
par(mfcol=c(4,1), mar=c(1,3,0,1))
# 分解four dimensional set of time series into time series
for(i in 1:4){
? plot(pca$x[,i], t='l', lwd=2)
}
summary(pca)
??EuStockMarkets
names(pca)
pca$x
plot(pca)
plot(prcomp(iris[,1:4]))
plot(prcomp(iris[,1:4], scale.=TRUE))
# Logistic regression based classification--------------
rm(list = ls())
library(ISLR)
dat <- Smarket
head(Smarket)
?Smarket
glm1 <- glm(Direction~Lag1+Lag2+Lag3+Lag4+Lag5+Volume,
? ? ? ? ? ? data=dat, family=binomial)
summary(glm1)
n <- nrow(dat)
itrain <- which(dat$Year<2005)
glm1.cv <- glm(Direction~Lag1+Lag2,
? ? ? ? ? ? ? data=dat,subset = itrain, family=binomial)
summary(glm1.cv)
po <- predict(glm1.cv,type = 'response')
summary(po)
glm.pred= rep("Down", length(po))
glm.pred[po>.5] = "Up"
tb <- table(glm.pred,dat$Direction[itrain])
tb
sum(tb)
length(itrain)
1-sum(diag(tb))/sum(tb)# overall error rate
Direction.2005 <- dat$Direction[!itrain]
po.2005 <- predict(glm1.cv,type ='response',newdata = dat[-itrain,])
length(po.2005)
length(po.2005)+length(po)
nrow(dat)
glm.pred.2005= rep("Down", length(po.2005))
glm.pred.2005[po.2005>.5] = "Up"
tb.test = table(glm.pred.2005,dat$Direction[-itrain])
1-sum(diag(tb.test))/sum(tb.test)
# Logistic regression based classification
# another example using iris--------------------------------------
# 這里化簡(jiǎn)到二維問(wèn)題
rm(list = ls())
iris <- iris
n <- nrow(iris)
n
x <- iris[sample(1:n,n),]
is.var <- (x$Species=='virginica') #這是在判斷,返回T/F
# is.var <- which(x$Species=='virginica') #返回位置,但是我們要T/F
x$Species <- is.var
glmo <- glm(Species~., x, family=binomial)
glmo$fitted.values - predict(glmo, type='response')
# 反正就看別xy都是同一個(gè)vector就行了:
a <- x$Sepal.Length
b <- x[,-1]
lm1 <- lm(a~.,data = b)
lm2 <- lm(a~.,data = x)
lm3 <- lm(Sepal.Length~.,data = x)
n = nrow(x)
K = 10
train.acc = test.acc = numeric(K)
folds = cut(1:n, K, labels=FALSE)
k = 1
# rm(k)
thr = .8
for(k in 1:K){
? itrain = which(folds!=k)
? glmo = glm(Species~., data=x,
? ? ? ? ? ? family=binomial,
? ? ? ? ? ? subset=itrain)
? tb1 = table(glmo$fitted.values>thr, x$Species[itrain])
? train.acc[k] = sum(diag(tb1))/sum(tb1)
? # prediction
? p.test = predict(glmo, x[-itrain,], type='response')
? tb2 = table(p.test>thr, x$Species[-itrain])
? test.acc[k] = sum(diag(tb2))/sum(tb2)
}
mean(train.acc)?
mean(test.acc)
tb1
tb2
# Illustration of hierarchical clustering ------------------------------------------------
# 分層聚類
data(eurodist)
hc = hclust(eurodist, method="ward")
plot(hc)
hc = hclust(eurodist, method="single")
plot(hc)
hc = hclust(eurodist, method="complete")
plot(hc)
# another Clustering examples
# partition-based clustering------------------------------------------------
rm(list = ls())
x = iris[,1:4]
plot(x[,1:2], pch=20)
plot(x[,1:2], pch=20, col=c(1,2,4)[iris$Species])
plot(x[,c(3,4)], pch=20,col=c(1,2,4)[iris$Species])
# hierarchical clustering
# 這里與下邊無(wú)關(guān)
xs = apply(x, 2, scale)
ho = hclust(dist(x), method="ward.D")
hc = cutree(ho, k=3)
plot(x[,1:2], pch=20, col=hc)