R筆記 ST4060 17/02/2022

# w2----------------

# Q2.1---------

rm(list = ls())

N <- 1000

x <- rexp(N,rate=0.5)

hist(x,freq = F)

xx <- seq(0,15,by=0.1)#沒(méi)記住怎么加的

lines(xx,dexp(xx,rate = 0.5),col='red')#這個(gè)叫theorical

lines(density(x),col='blue')#沒(méi)記住怎么加的,這個(gè)叫estimate,要分開(kāi)

?density()

lines(density(x,kernel = "rectangular"),col='orange')

# 怎么直接畫(huà)KDE

plot(density(x), lwd=3, col=2)

rug(x)

?rug#將數(shù)據(jù)的地毯表示(一維圖)添加到圖中。

density(x)$bw

ud <- density(x,bw=3.789586-3)#under-smoothing

ov <- density(x,bw=3.789586+3)#over-smoothing

plot(ud)

lines(ov)

# Q2.2---------

eps <- 0.95

N <- 100

u <- seq(0,1,length=N)

x <- numeric(N) # 讓showing的是generated sample,所以是x不是y

for(i in 1:N){#這個(gè)就是這樣定義的

? if(u[i]>eps){

? ? x[i] <- rt(1,df=3)

? }else{

? ? x[i] <- rnorm(1)

? }


}

hist(x)

# Q2.3---------

n <- 1000

set.seed(4060)

# (a)

x1 <- rnorm(n,mean=2,sd=1)

f1 <- density(x1)

hist(x1,freq = F)

xx1 <- seq(-1,7,by=0.1)

lines(xx1,dnorm(xx1,mean = 2,sd=1),col='red')

lines(f1,col='blue')

f1 <- density(x1,adjust =1.3)

lines(f1,col='orange')

mse <- mean((dt(density(x2)$x,df=3)-f2)^2)

lines(density(x2,adjust = 1.5),col='orange')

# (b)

x2 <- rt(n,df=3)

f2 <- density(x2)$y

hist(x2,freq = F,ylim = c(0,0.6))#這里要控制y的范圍!?。?/p>

xx2 <- seq(-15,15,by=0.1)

lines(xx2,dt(xx2,df=3),col='red')

lines(density(x2),col='blue')

hist(x2,freq = F,ylim = c(0,0.6),breaks = 21)

xx2 <- seq(-15,15,by=0.1)

lines(xx2,dt(xx2,df=3),col='red')

lines(density(x2),col='blue')

# (b)

x3 <- rexp(n,rate = 2)

f3 <- density(x3)$y

hist(x3,freq = F,ylim = c(0,1.5))

lines(density(x3),col='blue')

xx3 <- seq(0,3.5,length=1000)

lines(xx3,dexp(xx3,rate=3),col='red')

# Q2.4---------

library(KernSmooth)

dat <-? faithful

# (a)

bw = apply(faithful, 2, sd)/2

fx <- bkde2D(dat,bandwidth = bw)

plot(dat,xlim=c(1,6),ylim=c(35,105))

contour(fx$x1, fx$x2, fx$fhat,add = T)#沒(méi)記住

persp(fx$x1, fx$x2, fx$fhat, shade=.15, theta=-30, phi=15)

# 總結(jié)

# h越小,bw越小,adjust越小,KDE越squiggly,越高&narrow

# h越da,bw越大,adjust越大,KDE越smooth,越矮

# (b)

lp.fit = locpoly(faithful$eruptions, faithful$waiting, bandwidth=1)#沒(méi)記住

# 只問(wèn)local polynomial就這么寫(xiě),bw越小越高,這個(gè)不是默認(rèn)值,自己找一個(gè)

plot(faithful, pch=20)

lines(lp.fit, lwd=3, col=1)

?locpoly

# Q2.5---------

# 這里直接不會(huì)

# (a)

n = 1000

set.seed(4060)

Mu = c(1,4)#自己設(shè)一個(gè)

Sigma = c(2,1)#自己設(shè)一個(gè)

X1 = rnorm(N, mean=Mu[1], sd=Sigma[1])

X2 = rnorm(N, mean=Mu[2], sd=Sigma[2])

X = cbind(X1,X2)

# (b)

library('MASS')

bw = c(.5, .5) * 4

f.kde2d = kde2d(X1, X2, h=bw) # xy是說(shuō)兩個(gè)而已,不是x和y

names(f.kde2d)

plot(X,xlim=c(-6,12),ylim=c(0,8))

contour(f.kde2d$x,f.kde2d$y,f.kde2d$z,add = T)#這里是要3個(gè)的,或者直接寫(xiě)f.kde2d就行

# (c)不考

# w3----------------

rm(list = ls())

# Q5.1 MC integration-----

# 求e^(-x) 在 (2,4)上的積分(0.117)沒(méi)記住

# g(x)=e^(-x); x~f(x)=1/2=UNIF(2,4); 2*E[g(x)]=所求

# x~f(x)=1/2=UNIF(2,4)很重要?。。。?!

M <- 1000

a <- 2

b <- 4

x <- runif(M,a,b)

theta.est <- (sum(exp(-x))/M)*2;theta.est

# Q5.2-------

N <- 1000

x <- 1

a <- x

b <- -x

t <- runif(N,min=b,max = a)

prob <- ((sum((1/sqrt(2*pi))*exp(-0.5*(t^2))))/N)*2

rs <- 0.5+prob/2

# check:

pnorm(1)

# evaluate variance的話我們加一個(gè)MC求出estimator

M <- 999

v <- numeric(M)

for(i in 1:M){

? t <- runif(N,min=b,max = a)

? prob <- ((sum((1/sqrt(2*pi))*exp(-0.5*(t^2))))/N)*2

? rs <- 0.5+prob/2

? v[i] <- rs

}

mean(v)#check

var(v)

quantile(v,c(0.025,0.975))

# Q5.3-------

# (a)

n <- 1000

x <- rnorm(n)

is <- order(x)

xk <- x[is]

k <- 0

xnew <- xk[c((k+1):(n-k))]

length(xnew)

mn <- numeric(n/2)

for(k in 0:(n/2-1)){

? xnew <- xk[c((k+1):(n-k))]

? mn[k+1] <- mean(xnew)

}

mn

# (b)

k <- 1 #這里k是定下的

M = 1000

n = 20 #sample size

tm <- numeric(M)

mn <- numeric(M)

for (m in 1:M) {

? x <- rnorm(n,mean=15,sd=3)

? mn[m] <- mean(x)

? xs <- sort(x)

? xnew <- xs[c((k+1):(n-k))]

? tm[m] <- mean(xnew)

}

mse1 <- mean((mn-15)^2)

mse2 <- mean((tm-15)^2)

b1 <- mean(mn-15)

b2 <- mean(tm-15)

mse1

var(mn)+b1

mse2

var(tm)+b2

# Q5.4-------

# (a)

N <- 50

M <- 100

a <- 7

b <- 3

x <- rep(c(1:5), N/5)

m <- 0.5

s <- 1.2

rseed=0

LSvec = RMvec = matrix(0,2,M)

set.seed(rseed)

library(MASS)

library(VGAM)

# (b)

for(m in 1:M){

? e <- rlnorm(N,m,s)

? y <- a+b*x+e

? mylm <- lm(y~x)

? myrlm <- rlm(y~x,method="MM")

}

# Q5.5-------

M <- 10000

N <- 50

x <- rnorm(N,mean = 0,sd=2)

s1 <- s2 <- numeric(M)

?var()

for(m in 1:M){

? x <- rnorm(N,mean = 0,sd=2)

? s1[m] <- sd(x)

? s2[m] <- sqrt(mean((x-mean(x))^2))#這個(gè)是誘有偏的,記住

}

mean(s1)

mean(s2)

boxplot(s1,s2)

M <- 10000

N <- 100

x <- rnorm(N,mean = 0,sd=2)

s1 <- s2 <- numeric(M)

?var()

for(m in 1:M){

? x <- rnorm(N,mean = 0,sd=2)

? s1[m] <- sd(x)

? s2[m] <- ((N-1)*s1[m])/N

}

sd(s1)

sd(s2)

boxplot(s1,s2)

# w4----------------

rm(list = ls())

iris <- iris

x <- subset(iris$Sepal.Length,iris$Species=='setosa')

xbar <- mean(x,na.rm = T)

n <- length(x)

B <- 100

xb <- numeric(B)

for(b in 1:B){

? #idx <- sample(1:n,n,replace = T)

? tst <- sample(x,n,replace = T)#所以這是一樣的

? #xnew <- x[idx]

? xb[b] <- mean(tst)

}

mean(xb)

mean(xb)-xbar#bias

#estimate of SE:

sd(xb)

# Q5.9-------

cars <- cars

x <- cars$speed

y <- cars$dist

B <- 10000

n <- nrow(cars)

lm1 <- lm(y~x)

summary(lm1)

lm1$coefficients[2]

slp <- itc <- numeric(B)

for(b in 1:B){

? idx <- sample(1:n,n,replace = T)

? ynew <- y[idx]

? xnew <- x[idx]

? lmb <- lm(ynew~xnew)

? slp[b] <- lmb$coefficients[2]

? itc[b] <- lmb$coefficients[1]

}

# Bs SE estimate:

sd(slp)

sd(itc)

#Bs estimate of bias:

mean(slp)-coef(lm1)[2]

#naive bootstrapping 95%Ci

quantile(slp,c(0.25,0.975))

# MC matrix-------

# 讓matrix里的每一個(gè)值都是MC的mean estimate

M <- 100

mu <- 10

sigma <- 3

Ns <- c(20,30,50,100,200,500)

mt <- matrix(NA,nrow = M,ncol = length(Ns))

for (j in 1:length(Ns)) {

? for (m in 1:M) {

? ? x <- rnorm(Ns[j],mean = mu,sd=sigma)

? ? mt[m,j] <- mean(x)

? }

}

apply(mt,2,mean)

boxplot(mt,names=Ns,xlab='sample size N')

#計(jì)算theoretic CI:

lb <- mu-1.96*sigma/sqrt(Ns)#lower bound

ub <- mu+1.96*sigma/sqrt(Ns)#upper bound

#計(jì)算emprical/pracital CI:

lb <- apply(means,2,mean)-1.96*sigma/sqrt(Ns)#lower bound

ub <- apply(means,2,mean)+1.96*sigma/sqrt(Ns)#upper bound

# Q5.6-------

M <- 100

ndf <- c(2,4,10)

n <- 100

mt <- matrix(NA,nrow = M,ncol = 3)

for(j in 1:3){

? for (i in 1:M) {

? ? x <- rchisq(n,df=ndf[j])

? ? mt[i,j] <- mean(x)

? }

}

boxplot(mt[,1],mt[,2],mt[,3])

#可以看出來(lái)的是df=E(x),沒(méi)記住

sd <- apply(mt,2,sd)#the estimates of standard error of xbar

# w5----------------

# 20-21 的CA1 Q1------

set.seed(6040)

M <- 1000

N <- 1000

r <- numeric(M)

for(m in 1:M){

? x <- sample(1:N,N,replace = T)

? xnew <- unique(x)

? r[m] <- length(xnew)/N

}

mean(r)

# 20-21 的CA1 Q2------

set.seed(6015)

M <- 1000

N <- 100

a <- 3

b <- 2

mn <- numeric(M)

for(m in 1:M){

? x <- rgamma(N,shape = a,rate = b)

? mn[m] <- mean(x)

}

# (a)

mean(mn)

# (b)

# 不驚奇,因?yàn)閠rue value是1.5,MN越大,那么這個(gè)值越接近1.5

# (c)

sd(mn)

# 20-21 的CA1 Q3------

# 看好了,只有feet和girth

set.seed(4060)

dat <- trees

B <- 100

slp <- numeric(B)

n <- nrow(dat)

for(b in 1:B){

? idx <- sample(c(1:n),n,replace = T)

? ynew <- dat$Height[idx]

? xnew1 <- dat$Girth[idx]

? lmb <- lm(ynew~xnew1+xnew2)

? slp[b] <- lmb$coefficient[2]

}

lmb <- lm(dat$Height~dat$Girth+dat$Volume)

lmb$coefficients[2]

# (a)

boxplot(slp)

# (b)

mean(slp)

# (c)

sd(slp)

# (d)

# Bs的empirical CI

lso <- lm(y~x)$coef#這個(gè)就是y~x的斜率

2*lso[2] - quantile(slp, probs=c(0.975,0.025))#這個(gè)沒(méi)寫(xiě)出來(lái)

# w6----------------

mtcars <- mtcars

n <- nrow(mtcars)

dat <- mtcars[sample(1:n,n),]

y = dat$mpg

x <- dat[,-1]

glm1 <- glm(y~.,data = x)

yhat <- glm1$fitted.values#取fitted value

plot(y,yhat)

boxplot((y-yhat)^2)

mean((y-yhat)^2)

train.idx <- c(1:floor(n/2))

train.idx <- c(1:16)

glm.train <- glm(y[train.idx]~.,data = x[train.idx,])

o1 <- glm(y~.,data = x,subset = train.idx)#一樣的

ytrain <- glm.train$fitted.values

ytst <- predict(glm.train,newdata = x[-train.idx,])

mse.train <- mean((glm.train$residuals)^2)

mse.tst <- mean((y[-train.idx]-ytst)^2)

boxplot((glm.train$residuals)^2,(y-ytst)^2)

boxplot((ytrain-y[train.idx])^2)

# 加上Bs:

B <- 100

mseb.train <- mseb.tst <- numeric(B)

for(b in 1:B){

? idx <- sample(1:n,n,replace = T)

? glm.t <- glm(y~.,data=x,subset = idx)

? mseb.train[b] <- mean((glm.t$residuals)^2)

? ytst <- predict(glm.t,newdata=x[-idx,])

? mseb.tst[b] <- mean((y[-idx]-ytst)^2)

}

boxplot(mseb.train,mseb.tst)

# k-fold CV

K <- 5

msecv.train <- msecv.tst <- numeric(K)

folds <- cut(1:n,K,labels = F)

table(folds)

for(k in 1:K){

? idx <- which(folds!=k)

? glm.t <- glm(y~.,data = x,subset = idx)

? msecv.train[k] <- mean((glm.t$residuals)^2)

? ytst <- predict(glm.t,newdata = x[-idx,])

? msecv.tst[k] <- mean((y[-idx]-ytst)^2)

}

boxplot(msecv.train,msecv.tst)

B <- 10

mseb.train <- mseb.tst <- numeric(B)

for(b in 1:B){

? idx <- sample(1:n,n,replace = T)

? glm.t <- glm(y~.,data=x,subset = idx)

? mseb.train[b] <- mean((glm.t$residuals)^2)

? ytst <- predict(glm.t,newdata=x[-idx,])

? mseb.tst[b] <- mean((y[-idx]-ytst)^2)

}

boxplot(mseb.train,mseb.tst)

# Bs的話用了32-32*0.632個(gè),大約是10個(gè)做test

# CV里K=3的話那么大約用32/3=10個(gè)做test

# 所以我們期望K=3時(shí)兩對(duì)error是一樣的,但是k=5,CV的

# K-fold更accurate,不那么widespread,如果增加B widespread會(huì)變好,但是會(huì)得到很多outliers

# Q5.8--------------

rm(list = ls())

library(bootstrap)

dat <- law

cor(dat$LSAT,dat$GPA)

B <- 1000

n <- nrow(dat)

cr <- numeric(B)

for(b in 1:B){

? idx <- sample(1:n,n,replace = T)

? x1 <- dat$LSAT[idx]

? x2 <- dat$GPA[idx]

? cr[b] <- cor(x1,x2)

}

mean(cr)

# bias

mean(cr)-cor(dat$LSAT,dat$GPA)

# native CI

quantile(cr,c(0.025,0.975))

# Q5.9--------------

rm(list = ls())

dat <- cars

# (a)

lm1 <- lm(dat$dist~dat$speed)

lm1$coefficients

# (b)

B <- 10000

n <- nrow(dat)

slp <- numeric(B)

itc <- numeric(B)

for(b in 1:B){

? idx <- sample(1:n,n,replace = T)

? xb <- dat$speed[idx]

? yb <- dat$dist[idx]

? lm <- lm(yb~xb)

? slp[b] <- lm$coefficient[2]

? itc[b] <- lm$coefficient[1]

}

lm1$coefficient

mean(slp)

mean(itc)

# (c)

# 不會(huì)

# 這個(gè)題就是在說(shuō)讓看看兩個(gè)estimator的分布,分布的話用hist就行了

hist(slp)

hist(itc)

# (d)

# 不會(huì)

# 記住怎么看joint distr------

library(ellipse)

plot(itc,slp,cex=0.5,pch=20)

e <- ellipse(lm1)

plot(e,t='l')

bcoef <- cbind(itc,slp)

points(bcoef,pch=20,col=8,cex=.5)

points(itc,slp,pch=20,col=1,cex=.5)#是一樣的

sd(slp)

sd(itc)

# 用命令直接Bs--------

library(boot)

#自己定義一個(gè)func,但是這個(gè)func的第一個(gè)argument必須是data,第二個(gè)必須是bs的idx:

my.mean <- function(x, ib){

? return( mean(x[ib]) )

}

B = 100

x = cars$dist

ob = boot(x, my.mean, R=B)

# ob里的original就是true value,ie mean(x)

# Q5.10 這個(gè)是nls怎么用----------

rm(list = ls())

x = mtcars$disp

y = mtcars$mpg

plot(x,y,pch=20)#有inflexion(屈折),所以用nonlinear可能會(huì)更好

# nonlinear regression比如polynomial/exp

theta <- c(3,-0.01)

nls1 <- nls(y~exp(a+b*x),start = list(a=3,b=-0.01))

fitted(nls1)

plot(fitted(nls1),y)

plot(fitted(nls1)-y)#這個(gè)是residual,應(yīng)該沒(méi)有pattern才對(duì)

hist(fitted(nls1)-y)

# (a)這是怎么著nls的參數(shù)--------

summary(nls1)

summary(nls1)$parameter[1,1]

summary(nls1)$parameter[2,1]

plot(x,y,pch=20)

is = order(x)#返回的事從小到大的位置

#20 19 18 26 28? 3 ......

x[20]

min(x)

lines(x[is], fitted(nls1)[is], pch=20, cex=2, col=4, lwd=2)

?order

# (b)

z <- density(x)

plot(z)

# (c)

set.seed(1)

B = 100

n <- length(x)

theta1 <- numeric(B)

theta2 <- numeric(B)

for(b in 1:B){

? idx <- sample(1:n,n,replace = T)

? xnew <- x[idx]

? ynew <- y[idx]

? bnl <- nls(ynew~exp(a+b*xnew),start = list(a=3,b=-0.01))

? theta1[b] <- summary(bnl)$parameter[1,1]

? theta2[b] <- summary(bnl)$parameter[2,1]

}

mean(theta1)

mean(theta2)

sd(theta1)

sd(theta2)

#se:

sd(theta1)

# nonparametric 95% confidence intervals 就是native

# Q2.11----------

#? coerce強(qiáng)迫

rm(list = ls())

library(MASS)

x = Boston[,c('crim','indus','rm','tax')]

y = Boston$medv

lm1 <- lm(y~.,data = x)

# (a)

n <- nrow(x)

a <- c(1:floor(n/2))

lma <- lm(y~.,data = x,subset = a)

preds <- predict(lma,newdata = x[-a,])

length(preds)

rmsea <- sqrt(mean((y[-a]-preds)^2))

# (b)--------leave-one-out CV

preds <- resi <- numeric(n)

for(i in 1:n){

? tot <- c(1:n)

? b <- tot[-i]

? lmb <- lm(y~.,data = x,subset = b)

? preds[i] <- predict(lmb,newdata = x[i,])

? resi[i] <- y[i]-preds[i]

? rmseb <- sqrt(mean((resi)^2))

}

# (c)

K <- 5

folds <- cut(1:n,K,labels = F)

rmse <- numeric(K)

for(k in 1:K){

? train.idx <- which(folds!=k)

? tst.idx <- which(folds==k)

? lmc <- lm(y~.,data = x,subset = train.idx)

? predsc <- predict(lmc,newdata = x[tst.idx,])

? rmse[k] <- sqrt(mean((y[tst.idx]-predsc)^2))

}

# 但是這里我們忘記打亂了,so:

dat <- Boston[sample(1:n,n),]

x = dat[,c('crim','indus','rm','tax')]

y = dat$medv

K <- 5

folds <- cut(1:n,K,labels = F)

rmse <- numeric(K)

for(k in 1:K){

? train.idx <- which(folds!=k)

? tst.idx <- which(folds==k)

? lmc <- lm(y~.,data = x,subset = train.idx)

? predsc <- predict(lmc,newdata = x[tst.idx,])

? rmse[k] <- sqrt(mean((y[tst.idx]-predsc)^2))

}

rmse

# 重復(fù)R次的CV---------

# R里的每一次都要打亂一次

R <- 10

K <- 5

folds <- cut(1:n,K,labels = F)

rmse <- numeric(K*R)

for (r in 1:R) {

? is <- sample(1:n,n)

? x <- x[is,]

? y <- y[is]

? for(k in 1:K){

? ? train.idx <- which(folds!=k)

? ? tst.idx <- which(folds==k)

? ? lmc <- lm(y~.,data = x,subset = train.idx)

? ? predsc <- predict(lmc,newdata = x[tst.idx,])

? ? rmse <- c(rmse,sqrt(mean((y[tst.idx]-predsc)^2)))#這是在nested for loop里怎feed in

? }

}

rmse

# w7----------------

rm(list = ls())

# 課堂例題 section3 P8的--------

attach(pressure)

x <- temperature

y <- pressure

plot(x,y,pch=20,cex=0.5)#很明顯這不是一個(gè)linear pattern

lm1 <- lm(y~x)

lines(x,fitted(lm1))

lmf <- lm(y~x+I(x^2)+I(x^3)+I(x^4)+I(x^5))

plot(x,y,pch=20,cex=0.5)

lines(x,fitted(lmf))

nls1 <- nls(y~exp(a+b*x),start = list(a=0,b=0.02))

summary(nls1)

lines(x,fitted(nls1),col='red')

cc <- coef(nls1)

coefficients(nls1)

curve(exp(cc[1]+cc[2]*x),col='blue',add=T)#和lines一樣

nls1 <- nls(y~exp(a+b*x),start = list(a=0,b=0.02))

summary(nls1)

nlstst <- nls(y~exp(a+b*x),start = list(a=0,b=0.02),trace = T)

summary(nlstst)

crit <- function(theta,x,y){

? return( sum( (y-x*theta)^2 ) )

}

thbar = 1.8# theta的true value

#x = rep(c(1,3,7,8), len=100)

#y = x*thbar + rnorm(x)

optim.out = optim(par=c(1), fn=crit, x=x, y=y, method='L-BFGS-B',lower=c(-1), upper=c(0))

optim.out # $par是theta的值

curve(exp(1e-07*x),col='blue',add=T)

# grid search----------

# nls(y~exp(a+b*x),start = list(a=0,b=0.02))

# 現(xiàn)在假設(shè)一點(diǎn)都不知道ab的值

x <- temperature

y <- pressure

L <- 10

a.grid <- seq(-1,1,length=L)

b.grid <- seq(0.01,0.5,length=L)

crit <- matrix(NA,ncol=L,nrow=L)

for(i in 1:L){

? for(j in 1:L){

? ? crit[i,j] <- sum( (y-exp(a.grid[i]+b.grid[j]*x))^2 )


? }

}

persp(a.grid,b.grid,crit,theta=45,col = 'lightblue')

b.grid <- seq(0.01,0.02,length=L)

crit <- matrix(NA,ncol=L,nrow=L)

for(i in 1:L){

? for(j in 1:L){

? ? crit[i,j] <- sum( (y-exp(a.grid[i]+b.grid[j]*x))^2 )


? }

}

persp(a.grid,b.grid,crit,theta=45,col = 'lightblue')

contour(a.grid,b.grid,crit)

#什么樣的圖才是好的grid search---------

# BONUS: a nicer example of a criterion surface plot...

rm(list=ls())

set.seed(4060)

n = 100

x = runif(n,-1,1)

y = .2*x+.3*x^2

plot(x,y)

L = 100

ag = seq(-1,1,length=L)

bg = seq(-1,1,length=L)

crit = matrix(NA,nrow=L,ncol=L)

for(i in 1:L){

? for(j in 1:L){

? ? crit[i,j] = sum( (y-ag[i]*x-bg[j]*x^2)^2 )

? }

}

persp(ag,bg,crit,col='lightblue',theta=45)

contour(ag,bg,crit)

# how to fit any model using optim()----------

remove(list = ls())

x <- temperature

y <- pressure

crit <- function(th,x,y){

? a <- th[1]

? b <- th[2]

? return(sum(y-exp(a+b*x))^2)

}

range(x)

range(y)

?optim

opt <- optim(par = c(0,0.02),fn=crit,x=x,y=y)

# 參數(shù)par:Initial values for the parameters to be optimized over.

opt$par

opt <- optim(par = c(0,0.02),fn=crit,x=x,y=y,

? ? ? ? ? ? method = 'L-BFGS-B', lower=c(0.01,0.02), upper=c(0.1,0.03))

opt$par

opt <- optim(par = c(0,0.02),fn=crit,x=x,y=y,

? ? ? ? ? ? method = 'L-BFGS-B', lower=c(0,0.02), upper=c(0.1,0.03))

opt$par

opt <- optim(par = c(0,0.02),fn=crit,x=x,y=y,

? ? ? ? ? ? method = 'L-BFGS-B', lower=c(-1,0.02), upper=c(0.1,0.03))

opt$par

opt <- optim(par = c(0,0.02),fn=crit,x=x,y=y,

? ? ? ? ? ? method = 'L-BFGS-B', lower=c(-0.6,0.02), upper=c(0.1,0.03))

opt$par

nls1 <- nls(y~exp(a+b*x),start = list(a=0,b=0.02))

summary(nls1)

yhat <- exp(opt$par[1]+opt$par[2]*x)

plot(x,y)

lines(x,fitted(nls1),col='red')

lines(x,yhat,col='blue')

opt1 <- optim(par = c(0,0.02),fn=crit,x=x,y=y)

yhat1 <- exp(opt1$par[1]+opt1$par[2]*x)

lines(x,yhat1,col='orange')#這個(gè)接近nls

# 那么我們應(yīng)不應(yīng)該加第三個(gè)參數(shù)c呢?

crit2 <- function(th,x,y){

? # Must have theta as 1st parameter and return a single value...

? # Here we implement a simple linear regression model

? # for the sake fo the example:

? a = th[1]

? b = th[2]

? c = th[3] ## additional scaling parameter

? return( sum( (y-c*exp(a+b*x))^2 ) )

}

opt2 <- optim(par = c(0,0.02,1),fn=crit2,x=x,y=y)

opt2$par

yhat2 <- opt2$par[3]*exp(opt2$par[1] + (opt2$par[2])*x)#因?yàn)槭莝cale parameter

# The result is not great:因?yàn)楦患觕的一摸一樣,所以不用加這個(gè)paremeter了

plot(x, y, pch=20,

? ? xlab="Temperature (Celsius)",

? ? ylab="Vapor pressure (ml of mercury)",

? ? main="Example: polynomial regression")

lines(x, yhat2, col='navy')

lines(x, fitted(nls1), col='blue')

lines(x, yhat, col=2, lwd=3)

# w8----------------

#1D overfitting

rm(list = ls())

dat <- na.omit(airquality)

x <- dat$Temp

y <- dat$Ozone

plot(x,y)

dat <- data.frame(x,y)#為了方便寫(xiě)subset和predict,是data.frame不是as.data.frame!!!!

x <- data.frame(x)#這樣不行?。。?!

n <- nrow(x)

train.idx <- which(y<100)#which返回的也是位置

#tst.idx <- which(y>=100)

lmt <- lm(y~.,data = dat,subset = train.idx)

preds <- predict(lmt,newdata = dat[-train.idx,])

sqrt(mean( (preds-y[-train.idx])^2 ))

# multivariate overfitting

rm(list = ls())

dat <- mtcars

n <- nrow(dat)

R <- 20

K <- 2

fit <- numeric(K*R)

mse <- numeric(K*R)

fit.l <- numeric(K*R)

mse.l <- numeric(K*R)

folds <- cut(1:n,K,labels = F)

set.seed(4060)

library('glmnet')

for( r in 1:R){

? dat <- dat[sample(1:n,n),]

? x <- dat[,-1]

? y <- dat$mpg

? xm <- as.matrix(x)#全部都用這個(gè)就行,防止x有categorical

? #glm可以自己識(shí)別categorical

? for(k in 1:K){

? ? train.idx <- which(folds!=k)

? ? glm1 <- glm(y~.,data = x,subset = train.idx)

? ? yh <- fitted(glm1)

? ? fit <- c(fit,mean((yh-y[train.idx])^2))

? ? preds <- predict(glm1,newdata=x[-train.idx,])

? ? mse <- c(mse,mean((preds-y[-train.idx])^2))

? ? cv.lb = cv.glmnet(xm[train.idx,],y[train.idx],grouped=FALSE)

? ? # 10-fold cv

? ? # 上 是怎么找lasso的lambda(高階 參數(shù),不參與model,只是scale)

? ? lasso =? glmnet(xm[train.idx,],y[train.idx],lambda=cv.lb$lambda.min)

? ? yh.l = predict(lasso,newx=xm[train.idx,])#怎么求yhat

? ? fit.l[k+(r-1)*K] = mean((yh.l-y[train.idx])^2)

? ? yp.l = predict(lasso,newx=xm[-train.idx,])# test的prediction

? ? mse.l[k+(r-1)*K] = mean((yp.l-y[-train.idx])^2)#MSE

? }

}

boxplot(fit,mse,fit.l,mse.l,

? ? ? ? names=c("glm fit","glm error","lasso fit","lasso error"),

? ? ? ? ylim=c(0,300))

boxplot(fit,mse,fit.l,mse.l,

? ? ? ? names=c("glm fit","glm error","lasso fit","lasso error"),

? ? ? ? ylim=c(0,25))

#現(xiàn)在這里有categorical variable了

rm(list = ls( ))

library(ISLR)

set.seed(6026)

dat = na.omit(Hitters)

n = nrow(dat)

dat$Salary = log(dat$Salary)# 原始dta太skewed了,取log讓data更normal

REPS = 50

train.error = test.error = numeric(REPS)

lasso.train.error = lasso.test.error = numeric(REPS)

for(j in 1:REPS){

? dat = dat[sample(1:n, n, replace=FALSE),]


? i.train = c(1:200)# 用前200個(gè)作為training,后63個(gè)當(dāng)test

? train = dat[i.train,]

? test = dat[-i.train,]

? fit = glm(Salary~., data=train)

? pred = predict(fit, newdata=test)


? train.error[j] = mean(fit$residuals^2)

? test.error[j] = mean((pred-test$Salary)^2)


? xm = model.matrix(Salary~., data=train)[,-1]

? newxm = model.matrix(Salary~., data=test)[,-1]

? y = train$Salary

? lam = cv.glmnet(xm, y, data=train)

? lasso = glmnet(xm, y, data=train, lambda=lam$lambda.min)

? lasso.pred = predict(lasso, newx=newxm)


? lasso.fit = predict(lasso, newx=xm)

? lasso.train.error[j] = mean((lasso.fit-train$Salary)^2)

? lasso.test.error[j] = mean((lasso.pred-test$Salary)^2)

}

summary(fit)

coef(lasso) #怎么看被lasso mute down了-----------

par(font=2, font.axis=2)

boxplot(train.error, test.error,

? ? ? ? main="MLB player salary prediction",

? ? ? ? names=c("Training error", "Test error"),

? ? ? ? col=c('pink','cyan'))

# 另一個(gè)例子Prestige example

rm(list = ls( ))

library(car)? # contains dataset Prestige

library('glmnet')

pairs(Prestige)

y = Prestige$income

x = Prestige[,c("education","prestige","women")]

class(x) # note x is of class "data.frame" as opposed to "matrix"

lmo = lm(y~., data=x) # this works though (check out ?lm)

summary(lmo)

# how to predict from this fit:

newx = data.frame(education=seq(11,16,length=5),prestige=seq(14,80,length=5),women=seq(10,90,length=5))

lmo.pred = predict(lmo, newdata=newx)

# 只predict一個(gè):

newx = data.frame(education=c(13),prestige=c(61),women=c(12))

lmo.pred = predict(lmo, newdata=newx)

xm = model.matrix(y~., data=x)[,-1]

lasso.cv = cv.glmnet(xm, y, alpha=1)#Cross-validation for glmnet

lasso = glmnet(xm, y, alpha=1, lambda=lasso.cv$lambda.min)

# let's compare LASSO with our earlier linear regression output:

cbind(coef(lasso), coef(lmo))

# how to predict from this fit:

# ?predict.glmnet

class(newx)

lasso.pred = predict(lasso, newx=as.matrix(newx))

# NB:

cbind(lasso.pred, lmo.pred) # close enough to the lm() predictions

mean((lasso.pred-lmo.pred)^2)

# percentage error-----------

mean((lasso.pred-lmo.pred)/lmo.pred)*100 # less than 1% error...

#只有-0.1155595,所以origional model是夠好的

# 另一個(gè)model: Blood Pressure example

library(glmnet)

setwd('/Users/apple/Desktop/22-r/ST4060/ST4060txt-csv')

dat = read.table(file="Blood Pressure.txt",header=TRUE)

str(dat)

dat$PatientID = NULL # getting rid of this variable

# GLM fit

glm.fit = glm(Systolic~., data=dat)

mean(glm.fit$residuals^2)

# LASSO - note: fitting these models is rather cody...

alpha = 1

# prepare the data:

xm = model.matrix(Systolic~., data=dat)[,-1]

y = dat$Systolic

# compute best lambda:

lam = cv.glmnet(xm, y, alpha=alpha)

# c) fit model using best lambda:

lasso = glmnet(xm, y, lambda=lam$lambda.min)

# d) recover fitted values:

lasso.fit = predict(lasso, newx=xm)

# e) calculate RSS or MSE from LASSO

mean((lasso.fit-y)^2)

rm(list = ls( ))

dat = na.omit(airquality)

x = dat$Temp

y = dat$Ozone

plot(x,y)

nl.crit <- function(x, y, a, b){

? return( mean( (y - a*exp(b*x))^2 ) )

}

# GRID SEARCH:----------

Pa = Pb = 100

a.grid = seq(0,2,length=Pa)

b.grid = seq(0,1,length=Pb)

crit.values = matrix(NA, nrow=Pa, ncol=Pb)

for(i in 1:Pa){

? for(j in 1:Pb){

? ? crit.values[i,j] = nl.crit(x,y,a.grid[i],b.grid[j])

? }

}

ij.best = which.min(crit.values)

ij.best = arrayInd(ij.best, .dim=dim(crit.values))

# location of the minimum of the surface

# grid search solutions:

a.best = a.grid[ij.best[1]]

b.best = b.grid[ij.best[2]]

fitted = a.best * exp(b.best*x)

nlso = nls(y~a*exp(b*x), start=list(a=a.best, b=b.best))

a.best

b.best

plot(x, y, pch=20, xlab="Temperature (degrees F)",

? ? ylab="Ozone (ppb)",

? ? xlim=c(55,105))

points(x, fitted(nlso), pch=20, col='orange')

# 直接用optim也能smooth---------

dat = na.omit(airquality)

x = dat$Temp

y = dat$Ozone

#### Pick a nonlinear model and fit it using optim()

# First, write a function that evaluates the criterion:

criterion <- function(param,x,y){

? # The function has to be defined this way

? # (with param as its 1st argument)

? # if we are going to apply optim() to it... cf. ?optim

? a = param[1]

? b = param[2]

? return( mean( (y - a*exp(b*x))^2 ) )

}

# Then, apply optim() to it:

oo = optim(par=list(a=1, b=0.01), fn=criterion, x=x, y=y)

# now reconstruct fitted values from optim() output:

a.hat = oo$par[1]

b.hat = oo$par[2]

y.hat = a.hat * exp(b.hat*x)

plot(x,y)

is <- order(x)

lines(x[is], y.hat[is], pch=21, col=2)

# w9----------------

# B-spline--------

# using dataset ahmd$mx from library MortalityLaws, plot the crude force of mortality curve.

rm(list = ls())

library('MortalityLaws')

age <- c(0:110)

mM <- ahmd$mx[,4]

plot(dat$Age,mM,t='b',pch=20)#Crude force of mortality (2010)

plot(age,log(mM))

library('splines')

BM3 <- bs(age)

BM6 <- bs(age,df=6)

matplot(age,BM3,t='b')#Plot Columns of Matrices

matplot(age,BM6,t='b')

BMtst <- bs(age,knots = F)#

# Compute the design matrix for 3 dof’s and quote the projection for age 40 onto

# it. Then reconstruct the B-spline approximation to the crude male force data

# using linear regression. Plot the output over the data points and criticize.

age <- c(0:110)

mM <- ahmd$mx[,4]

BM <- bs(age)# compute bases

lm1 <- lm(mM~BM)

y <- numeric(length(age))#一個(gè)vector

for (i in 1:length(age)) {

? y[i] <- as.numeric(sum(lm1$coef[2:4]*BM[i,])+lm1$coef[1])

}

lm1$coefficients

plot(age,mM,pch=20)

lines(age,y,t='l',col=4)# 這兩條線是一樣的

lines(age,fitted(lm1),t='l',col=2)

y-fitted(lm1)

# 更好的B-spline:

BM <- bs(age,df=6)# compute bases

lm1 <- lm(mM~BM)

y <- numeric(length(age))

for (i in 1:length(age)) {

? y[i] <- as.numeric(sum(lm1$coef[2:7]*BM[i,])+lm1$coef[1])

}

plot(age,mM,pch=20)

lines(age,y,t='l',col=4)

lines(age,fitted(lm1),t='l',col=2)

# Continuing from previous exercise, compute the B-spline as a power series,

# i.e. compute the traditional cubic spline representation

# S(x)=β0+β1x+β2x2+β3x3+∑j=1Jαj(x?6?1xj)3+

# where (x?6?1xj)3+=max((x?6?1xj)3,0), using J=3 interior knots at 40, 60 and 80.

# Compare with the fitted values from the regression onto the design matrix

# obtained from splines::bs() for those knots.

x <- c(0:110)

mM <- ahmd$mx[,4]

BM <- bs(x,knots = c(40,60,80))#也叫design matrix

lm2 <- lm(mM~BM)

x <- 41

pmax((x-40)^3,0)

x <- 39

pmax((x-40)^3,0)

k40 <- pmax((x-40)^3,0)

k60 <- pmax((x-60)^3,0)

k80 <- pmax((x-80)^3,0)

lm3 <- lm(mM~x+I(x^2)+I(x^3)+k40+k60+k80+0)

plot(x,mM,pch=20)

lines(x,fitted(lm3),col='red')

lines(x,fitted(lm2),col='blue')

par(mfrow=c(1,2))

matplot(BM)# original basis for B-spline

POW <- cbind(x,x^2,x^3,k40,k60,k80)

matplot(POW)

# w10----------------

# loess是多個(gè)x,lowess只有一個(gè)x

rm(list = ls())

dat = na.omit(airquality)

t = dat$Temp

y = dat$Ozone

plot(t,y,xlim = c(50,110) )

range(t)

newx <- c(95:100)

abline(v=newx,col='blue')

lo <- loess(y~t)

summary(lo)

# check output values are in the same order as input values:

summary(c(lo$x - x)) # ok, they are

summary(c(lo$y - y)) #都是相同order

pred.lo <- predict(lo,newdata = data.frame(t=newx))# 這個(gè)t是喂進(jìn)去的

points(newx,pred.lo,col='red')#預(yù)測(cè)不了出了x的range 的點(diǎn)

low <- lowess(t,y)

summary(low)

summary(c(low$x - t))? ? ? # NO, THEY ARE NOT

summary(c(low$x - sort(t))) # coz lowess() sorted the input values!

sqrt( mean( (y-fitted(nlso))^2 ) )

sqrt( mean( (y-fitted(lo))^2 ) )

sqrt( mean( (y[order(x)]-low$y)^2 ) )

pred.low <- approx(low$x,low$y,xout = newx,rule = 2)

# lowess沒(méi)有predict,所以我們用了linear interpolation的方法

# rule=2用的是極限值,rule=1就是返回這個(gè)值

pred.low

pred.low$y

points(newx,pred.low$y,col='red',pch=20,cex=.8)

# 這個(gè)也是不夠好,因?yàn)槌隽朔秶竽鼐褪侵本€了,這是因?yàn)閍pproox

# 是linear interpolation,而且用的是極限值,所以說(shuō)當(dāng)右側(cè)沒(méi)有點(diǎn)

# 之后自然是一條直線

tp <- approx(lo$x,y=lo$fitted,xout = newx,rule = 2)

# 對(duì)loess用linear interpolation

tp$y

points(newx,tp$y,pch=20,col='orange')

# 這也不是最好的,畢竟都是在linear interpolation

# 所以用spline可以解決

# Using B-splines------------

library('splines')

KN <- quantile(t,c(0.15,0.40, 0.60, 0.70, 0.80, 0.90))

BM <- bs(t,knots = KN)

bl <- lm(y~BM)# 這種以后就不寫(xiě)了

ny = predict(BM, newx)

pred.bspline = predict(bl, newdata=as.data.frame(ny)) # nope 不匹配

# b-spline 的正確predict方式

B.spline = lm(y~.,data=data.frame(BM))# 不用lm(y~BM),所以下次lm就寫(xiě). 不寫(xiě)別的way

# 這樣pred才能work well

pred.bspline = predict(B.spline, newdata=data.frame(ny))

pred.bspline

points(newx,pred.bspline,col='red')

# P-spline--------

rm(list = ls())

dat = na.omit(airquality)

t = dat$Temp

y = dat$Ozone

plot(t,y,xlim = c(50,110) )

newx <- c(95:100)

abline(v=newx,col='blue')

ps <- smooth.spline(t,y,cv=T)

# how to predict from the P-spline:

pred.pspline = predict(ps, x=newx)#這個(gè)x=是個(gè)argument

KN <- quantile(t,c(0.15,0.40, 0.60, 0.70, 0.80, 0.90))

BM <- bs(t,knots = KN)

b.s <- lm(y~.,data = data.frame(BM))

ny = predict(BM, newx)

pred.bspline <- predict(b.s,newdata =data.frame(ny) )

points(t, fitted(b.s), pch=20, col='brown', cex=1.5)

points(t, fitted(ps), pch=15, col='navy')

legend("topleft", legend=c("data","NLS fit","B-spline","P-spline"),

? ? ? col=c(8,4,2,'navy'), pch=c(20,20,20,15), bty='n')

points(newx,pred.bspline, col='brown', pch=14)# 這幾個(gè)點(diǎn)follow general pattern

points(pred.pspline, col=3, pch=14)

plot(t,y,xlim = c(50,110) )

points(t, fitted(b.s), pch=20, col='brown', cex=1.5)

points(t, fitted(ps), pch=15, col='navy')

# 這里P比B好

# 控制B-spline

x <- t

KN = quantile(x, c(0.1, 0.4, 0.7))

BM = bs(x, knots=KN)

KN = quantile(x, c(0.05, 0.10, 0.20, 0.23, 0.45, 0.65, 0.95))

BM = bs(x, knots=KN)

KN = quantile(x, c(0.15,0.45, 0.65, 0.95))

BM = bs(x, knots=KN)

KN = quantile(x, c(0.15,0.45, 0.65, 0.95, 0.99)) # 這個(gè)很dramastic

BM = bs(x, knots=KN)

KN = quantile(x, c(0.15,0.45, 0.65, 0.70, 0.85, 0.91))? # 這個(gè)比較合

BM = bs(x, knots=KN)

KN = quantile(x, c(0.15,0.45, 0.65, 0.70, 0.85, 0.92))

BM = bs(x, knots=KN)

KN = quantile(x, c(0.15,0.45, 0.65, 0.70, 0.85, 0.93))

BM = bs(x, knots=KN)

# konts不同那么end處的estimate是很不一樣的,B-spline很難控制end

B.spline.df = lm(y~., data=as.data.frame(BM))

newBM = predict(BM, newx)

pred.bspline = predict(B.spline.df, newdata=as.data.frame(newBM))

plot(x, y, pch=20, col=8,

? ? xlab="Temperature (degrees F)", ylab="Ozone (ppb)",

? ? xlim=c(55,105))

points(x, fitted(B.spline.df), pch=20, col='red')

lines(newx, pred.bspline, col='navy', pch=8, t='b')

# 那么用P-spline控制

# NOTE: control degrees of freedom in P-splines

P.spline3 = smooth.spline(x, y, df=3)# df控制一個(gè)knot用多少data points

P.spline5 = smooth.spline(x, y, df=5)

P.spline10 = smooth.spline(x, y, df=10)# df越小線越平

pred3 = predict(P.spline3, x=newx)

pred5 = predict(P.spline5, x=newx)

pred10 = predict(P.spline10, x=newx)

#

plot(x, y, pch=20, col=8,

? ? xlab="Temperature (degrees F)", ylab="Ozone (ppb)",

? ? xlim=c(55,105))

points(x, fitted(P.spline3), pch=15, col='navy',cex=.5)

points(x, fitted(P.spline5), pch=15, col='blue',cex=.5)

points(x, fitted(P.spline10), pch=15, col='cyan',cex=.5)

points(pred3, col='navy', pch=20)

points(pred5, col='blue', pch=20)

points(pred10, col='cyan', pch=20)

# 其中有一條線與B-spline長(zhǎng)得很像,所以控制h may be a good way:

# NOTE: control smoothness (ie penalty) parameter in P-splines

P.spline10a = smooth.spline(x, y, df=10)

P.spline10b = smooth.spline(x, y, df=10, spar=.9)# spar=smoothing parameter

P.spline10c = smooth.spline(x, y, df=10, spar=.1)

#

plot(x, y, pch=20, col=8,

? ? xlab="Temperature (degrees F)", ylab="Ozone (ppb)",

? ? xlim=c(55,105))

points(x, fitted(P.spline10a), pch=15, col='navy',cex=.5)# 這個(gè)是默認(rèn)的

points(x, fitted(P.spline10b), pch=15, col='blue',cex=.5)# h=0.9,很平

points(x, fitted(P.spline10c), pch=15, col='red',cex=.5)# 很snesitive,h=0.1

# plot the latter as a curve (requires re-ordering the points):

is = order(x)# 就是怎么畫(huà)圖

xs = x[is]

lines(xs, fitted(P.spline10c)[is], pch=15, col='red')

# Q4.1----------

rm(list=ls())

setwd('/Users/apple/Desktop/22-r/ST4060/ST4060txt-csv')

library(splines)

LT <- read.table('irl_lifetable_2005.txt',header = T )

head(LT)

SLT = LT[c(1:106),]

mx = SLT[,8]

x = SLT$Age

onls = nls(mx~A+B*c^x,start=list(A=.0003, B=.00002, c=1.108))

ABc = summary(onls)$coef[,1]#這是true value,怎么也算不出來(lái)的

set.seed(1)

x = seq(0, 110, by=1)

mx = ABc[1]+ABc[2]*ABc[3]^x? # this is the theorical model

mxn = mx

s1 = which(x<86)

s2 = which(x>85)

mxn[s1] = pmax(0.005,mx[s1]+rnorm(length(s1),0,.03))

mxn[s2] = mx[s2]+rnorm(length(s2),0,.06)# mxn has noise in it

dat = cbind(x,mx,mxn)

# (a)

plot(x,mx)#這是data 的true value,怎么也算不出來(lái)的

points(x,mxn,cex=0.8,pch=20)#這是加上noise的data,相當(dāng)于我們的觀察值,ie這是要用的東西

nls.ns = nls(mxn~A+B*c^x,start=list(A=.0003, B=.00002, c=1.108))

lines(x,fitted(nls.ns),col='red')

ABc.ns = summary(nls.ns)$coef[,1]#取nls后的系數(shù)

# (b)

ps <- smooth.spline(x,mxn)

?smooth.spline

summary(ps)

lines(x,fitted(ps),col='blue')

sy = ps$y #人家說(shuō)Repeat the previous step, but on the smoothed curve obtained

#from a generic P-spline.所以才有的這一步!?。?!

nls.ps = nls(sy~A+B*c^x,start=list(A=.0003, B=.00002, c=1.108))

ABc.ps = summary(nls.ps)$coef[,1]

# MSE:

(ABc-ABc.ns)^2

(ABc-ABc.ps)^2

(ABc-ABc.ns)^2-(ABc-ABc.ps)^2 #還是ps后小一點(diǎn)

# percentage errors:

((ABc-ABc.ns)/ABc)

((ABc-ABc.ps)/ABc)

((ABc-ABc.ns)/ABc)-((ABc-ABc.ps)/ABc) #還是ps后小一點(diǎn)

# 以上is a way of doing a simulation using a real life data?

# 這里的啟示是:當(dāng)你fit a complicated model的時(shí)候,可以smooth the data (這里是p-spline)

# first to reduce the level of noise which a nonlinesr procedure would be very

# sensitive to

# Q4.2 ----------------------

rm(list = ls())

dat <- read.csv('insdata.csv')

age = dat$Age

mF = dat$mF

# (a)

sp <- smooth.spline(age,mF,cv=T)

plot(age,mF,pch=20,cex=0.8)

lines(age,fitted(sp),col='red')

# (b)

par = sp$spar

sp2 <- smooth.spline(age,mF,cv=F,spar=par/2)

lines(age,fitted(sp2),col='blue')

# spar=smoothing parameter

# df控制一個(gè)knot用多少data points

# (c)

sp$x-sp2$x # check p1$x和p2$x是一樣的

# (d)

sp$x-age#也要檢查

mean((mF-sp$y)^2)

mean((mF-sp2$y)^2)

sqrt(mean((mF-sp$y)^2))

sqrt(mean((mF-sp2$y)^2))

# 看數(shù)字的話第二個(gè)更好,因?yàn)镽MSE更小,但是這個(gè)第二個(gè)對(duì)應(yīng)的是上邊那個(gè)圖里

# 彎彎曲曲的藍(lán)線,這并不是我們想要的。

# 所以要搞清楚我們這里想要的是什么,在這里我們想要一個(gè)smooth curve,而不是一個(gè)好的fit

# 所以應(yīng)該是RMSE大的更好

sd(mF-sp$y) #var

sd(mF-sp2$y)#var

mean(mF-sp$y) #bias

mean(mF-sp2$y)#bias

# 可以看到bias是很小的,所以mse的大部分都是由var貢獻(xiàn)的,所以如果apline是smoother

# 的話,mse大

# (e)

KN <- quantile(age,c(0.25,0.5,0.75))

BM <- bs(age,knots = KN)

matplot(age,BM,t='b')#不要掉了age?。。。。。軸?。。?!

# (f)

round(BM[which(age==60),],4)#這是 coordinates

abline(v=60, lty=3)

abline(h=BM[which(age==60),], lty=2)#怎么indicate

# (g)

bs <- lm(mF~.,data=data.frame(BM))

bs$coefficients

# (h)

mean((bs$residuals)^2)

mean((mF-sp$y)^2)

mean((mF-sp2$y)^2)

plot(age,mF)

lines(sp,col='red')

lines(sp2,col='blue')

lines(age,fitted(bs),col='orange')

# b-spline is very sensitive to the tail end, that's because the way of

# arranging the knots

# This B-spline seems comparable to the 1st P-spline, and

# not as effective as the second one.

# (i)

# first, see that we're missing some ages:

plot(age)

# interpolation grid:

all.ages = seq(min(age),max(age),by=1)

plot(all.ages)

lip <- approx(sp$x,sp$y,xout = all.ages)#全寫(xiě)上沒(méi)事

sd(lip$y)

lo <- lowess(age,mF)

lilo <- approx(lo$x,lo$y,xout = all.ages)

lpred-lilo$y

sd(lilo$y)

# 問(wèn)----------

plot(age,mF)

lines(all.ages,lip$y,col='red')

lines(all.ages,lpred,col='blue')

# Q4.3 ----------------------

rm(list = ls())

library('car')

library(splines)

dat <- Prestige

x = Prestige$income

y = Prestige$prestige

# B-spline:

BM <- bs(x,df=6)

bs <- lm(y~.,data = data.frame(BM))

bmse <- mean((bs$residuals)^2)

# P-spline:

ps <- smooth.spline(x,y)

ps$x-x

# For the P-spline, we need to interpolate!

range(x)

range(ps$x)

# range相同,rule=1/2都是一樣的,xout的range出了x,那么就用rule=2

yp <- approx(ps$x,ps$y,xout=x)$y

yp2 <- approx(ps$x,ps$y,xout=x,rule = 2)$y

pmse <- mean((y-yp)^2)

# w11----------------

rm(list = ls())

x = iris[,c(2:4)]

y = iris[,5]

K <- 2

ko <- kmeans(x,K)

?kmeans

is <- c(1,3)

plot(x[,is],col=c(1,2,4)[ko$cluster],pch=20,cex=2)

z <- apply(x,2,scale)

(x[1,1]-mean(x$Sepal.Width))/sd(x$Sepal.Width)-z[1,1]

koz <- kmeans(z,K)

plot(x[,is],col=c(1,2,4)[koz$cluster],pch=20,cex=2)

pairs(iris[,1:4],pch=20)

cor(iris[,1:4])

dat <- EuStockMarkets

plot(EuStockMarkets, lwd=1.5)

pca <- prcomp(EuStockMarkets)

?prcomp #Principal Components Analysis

par(mfcol=c(4,1), mar=c(1,3,0,1))

# 分解four dimensional set of time series into time series

for(i in 1:4){

? plot(pca$x[,i], t='l', lwd=2)

}

summary(pca)

??EuStockMarkets

names(pca)

pca$x

plot(pca)

plot(prcomp(iris[,1:4]))

plot(prcomp(iris[,1:4], scale.=TRUE))

# Logistic regression based classification--------------

rm(list = ls())

library(ISLR)

dat <- Smarket

head(Smarket)

?Smarket

glm1 <- glm(Direction~Lag1+Lag2+Lag3+Lag4+Lag5+Volume,

? ? ? ? ? ? data=dat, family=binomial)

summary(glm1)

n <- nrow(dat)

itrain <- which(dat$Year<2005)

glm1.cv <- glm(Direction~Lag1+Lag2,

? ? ? ? ? ? ? data=dat,subset = itrain, family=binomial)

summary(glm1.cv)

po <- predict(glm1.cv,type = 'response')

summary(po)

glm.pred= rep("Down", length(po))

glm.pred[po>.5] = "Up"

tb <- table(glm.pred,dat$Direction[itrain])

tb

sum(tb)

length(itrain)

1-sum(diag(tb))/sum(tb)# overall error rate

Direction.2005 <- dat$Direction[!itrain]

po.2005 <- predict(glm1.cv,type ='response',newdata = dat[-itrain,])

length(po.2005)

length(po.2005)+length(po)

nrow(dat)

glm.pred.2005= rep("Down", length(po.2005))

glm.pred.2005[po.2005>.5] = "Up"

tb.test = table(glm.pred.2005,dat$Direction[-itrain])

1-sum(diag(tb.test))/sum(tb.test)

# Logistic regression based classification

# another example using iris--------------------------------------

# 這里化簡(jiǎn)到二維問(wèn)題

rm(list = ls())

iris <- iris

n <- nrow(iris)

n

x <- iris[sample(1:n,n),]

is.var <- (x$Species=='virginica') #這是在判斷,返回T/F

# is.var <- which(x$Species=='virginica') #返回位置,但是我們要T/F

x$Species <- is.var

glmo <- glm(Species~., x, family=binomial)

glmo$fitted.values - predict(glmo, type='response')

# 反正就看別xy都是同一個(gè)vector就行了:

a <- x$Sepal.Length

b <- x[,-1]

lm1 <- lm(a~.,data = b)

lm2 <- lm(a~.,data = x)

lm3 <- lm(Sepal.Length~.,data = x)

n = nrow(x)

K = 10

train.acc = test.acc = numeric(K)

folds = cut(1:n, K, labels=FALSE)

k = 1

# rm(k)

thr = .8

for(k in 1:K){

? itrain = which(folds!=k)

? glmo = glm(Species~., data=x,

? ? ? ? ? ? family=binomial,

? ? ? ? ? ? subset=itrain)

? tb1 = table(glmo$fitted.values>thr, x$Species[itrain])

? train.acc[k] = sum(diag(tb1))/sum(tb1)

? # prediction

? p.test = predict(glmo, x[-itrain,], type='response')

? tb2 = table(p.test>thr, x$Species[-itrain])

? test.acc[k] = sum(diag(tb2))/sum(tb2)

}

mean(train.acc)?

mean(test.acc)

tb1

tb2

# Illustration of hierarchical clustering ------------------------------------------------

# 分層聚類

data(eurodist)

hc = hclust(eurodist, method="ward")

plot(hc)

hc = hclust(eurodist, method="single")

plot(hc)

hc = hclust(eurodist, method="complete")

plot(hc)

# another Clustering examples

# partition-based clustering------------------------------------------------

rm(list = ls())

x = iris[,1:4]

plot(x[,1:2], pch=20)

plot(x[,1:2], pch=20, col=c(1,2,4)[iris$Species])

plot(x[,c(3,4)], pch=20,col=c(1,2,4)[iris$Species])

# hierarchical clustering

# 這里與下邊無(wú)關(guān)

xs = apply(x, 2, scale)

ho = hclust(dist(x), method="ward.D")

hc = cutree(ho, k=3)

plot(x[,1:2], pch=20, col=hc)

最后編輯于
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請(qǐng)聯(lián)系作者
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時(shí)請(qǐng)結(jié)合常識(shí)與多方信息審慎甄別。
平臺(tái)聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點(diǎn),簡(jiǎn)書(shū)系信息發(fā)布平臺(tái),僅提供信息存儲(chǔ)服務(wù)。

相關(guān)閱讀更多精彩內(nèi)容

友情鏈接更多精彩內(nèi)容