這個系列,我們陸續(xù)的學(xué)習(xí)一些散點圖的技巧,其實像前面的火山圖,PCA圖等均是散點圖,只不過對象和數(shù)據(jù)類型不太一樣而已。
今天我們學(xué)習(xí)如何給散點圖添加擬合曲線以及置信區(qū)間。下面這個圖是paper中的一個散點圖。

我們還是隨機生成了一組測試數(shù)據(jù)
library(Hmisc)
library(ggplot2)
data <- data.frame(gene1= sort(runif(50, min = 3, max = 7))[c(1:10,sample(11:41),42:50)],
? ? ? ? ? ? gene2=sort(runif(50, min = 21, max = 26), decreasing = T)[c(1:10,sample(11:41),42:50)])
runif函數(shù)是隨機生成min-max之間符合均勻分布的隨機數(shù)。
# 計算相關(guān)系數(shù)和p值,我們用的是rcorr函數(shù)。
res <- rcorr(data$gene1, data$gene2)
其實計算相關(guān)系數(shù)的包很多,大家隨便找個用就成。從中,我們提取p值和R值。
p_value <- res$P[1,2]
cor_value <- round(res$r[1,2], 2)
ggplot(data,aes(gene1, gene2))+
? geom_point()+
? #theme_bw()+? #4個邊框都有
? theme_classic()+? ? #邊框沒有右上
? geom_smooth(method = "lm", formula = y ~ x, color = "black", fill = "#b2e7fa",alpha = 0.8)+
? theme(
? ? panel.grid = element_blank(),
? ? axis.title = element_text(face = "bold.italic"),
? ? plot.title = element_text(hjust = 0.5)
? ? )+
? labs(title = paste0("R = ", cor_value, ", p = ", p_value))
注:#geom_smooth函數(shù)用來向散點圖中添加擬合曲線,當(dāng)然,這里只是用了lm直線擬合方法,同樣可以選擇一般線性模型glm、一般加性模型gam和曲線loess等。

這個是改成loess擬合的結(jié)果。
ggplot(data,aes(gene1, gene2))+
? geom_point()+
? #theme_bw()+
? theme_classic()+
? #geom_smooth(method = "lm", formula = y ~ x, color = "black", fill = "#b2e7fa",alpha = 0.8)+
? geom_smooth(method = "loess", formula = y ~ x, color = "black", fill = "#b2e7fa",alpha = 0.8)+
? theme(
? ? # 去除網(wǎng)格線:
? ? panel.grid = element_blank(),
? ? # 修改坐標(biāo)軸標(biāo)簽
? ? axis.title = element_text(face = "bold.italic"),
? ? # 標(biāo)題居中:
? ? plot.title = element_text(hjust = 0.5)
? ? )+
? labs(title = paste0("R = ", cor_value, ", p = ", p_value))

如果想像別人paper中,無非就是多幾組數(shù)據(jù),寫個for循環(huán)就行。
# 數(shù)據(jù)新增5列:
# 數(shù)據(jù)新增5列:
data$gene3 <- sort(runif(50, min = 13, max = 26), decreasing = T)[c(1:10,sample(11:41),42:50)]
data$gene4 <- sort(runif(50, min = 12, max = 21), decreasing = T)[c(1:10,sample(11:41),42:50)]
data$gene5 <- sort(runif(50, min = 10, max = 23), decreasing = T)[c(1:10,sample(11:41),42:50)]
data$gene6<- sort(runif(50, min = 9, max = 19), decreasing = T)[c(1:10,sample(11:41),42:50)]
data$gene7 <- sort(runif(50, min = 11, max = 22), decreasing = T)[c(1:10,sample(11:41),42:50)]
p_list <- list()
for (i in 2:ncol(data)) {
? res <- rcorr(data$gene1, data[,i])
? p_value <- signif(res$P[1,2], 2)
? cor_value <- round(res$r[1,2], 2)
? # 每次新建一個繪圖數(shù)據(jù)框:
? data_new <- data[,c(1,i)]
? colnames(data_new) <- c("gene1", "gene")
? head(data_new)?
? p<-ggplot(data_new,aes(gene1, gene))+
? geom_point()+
? #theme_bw()+
? theme_classic()+
? ylab(colnames(data)[i])+
? geom_smooth(method = "lm", formula = y ~ x, color = "black", fill = "#b2e7fa",alpha = 0.8)+
? #geom_smooth(method = "lm", formula = y ~ x, fill = "#b2e7fa", color = "#00aeef", alpha = 0.8)+
? theme(
? ? # 去除網(wǎng)格線:
? ? panel.grid = element_blank(),
? ? # 修改坐標(biāo)軸標(biāo)簽
? ? axis.title = element_text(face = "bold.italic"),
? ? # 標(biāo)題居中:
? ? plot.title = element_text(hjust = 0.5)
? ? )+
? labs(title = paste0("R = ", cor_value, ", p = ", p_value))
? p_list[[i-1]] <- p
}
library(cowplot)
library(patchwork)
p <- plot_grid(p_list[[1]], p_list[[2]], p_list[[3]], p_list[[4]], p_list[[5]], p_list[[6]], ncol = 3)
ggsave("plot.pdf", plot = p, height = 6, width = 9)
