前面學(xué)習(xí)了一些箱圖的畫圖技巧,這次我們試著重復(fù)一個paper中的圖。從這個paper中來看,是在普通箱圖的基礎(chǔ)上又分成了Tumor和Normal2個group,并且每個group進行了統(tǒng)計學(xué)的顯著性檢驗。另外按照mean/median進行了降序排序。但是并不是每個project都有2組。利用統(tǒng)計的方法是wilcoxon rank-sum test。

library(ggplot2)
library(ggpubr)
library(tidyverse)
library(latex2exp)
library(rstatix)
data <- read.csv("hsa-let-7a-5p.TCGA_PanCancer_Expression_Data.csv", header = T)
head(data)

data_new <- data %>%
group_by(project) %>%
mutate(median = median(expr), group_max = max(expr)) %>%? #按project來計算均值
arrange(desc(median))? ? #按median降序排序
# 調(diào)整因子順序,因為geom_boxplot在畫圖的時候,對于字符變量,會按照ASCII排序,所以要指定他們的順序。
data_new$project <- factor(data_new$project, levels = unique(data_new$project))
ggplot(data_new,aes(project, expr, fill = group))+
? geom_boxplot(outlier.shape = 21, outlier.fill = "white")+
? scale_fill_manual(values = c("Tumor" = "#d6503a", "Normal" = "#5488ef"))+
? theme_bw()+
? theme(axis.text = element_text(face = "bold"),
? ? ? ? axis.text.x = element_text(angle = 45, hjust = 1))
從圖中來看,實現(xiàn)了2個group的劃分,也實現(xiàn)了從大到小的排序。

下面我們來添加如何添加統(tǒng)計檢驗的結(jié)果,對于每個project對于tumor和normal進行顯著性檢驗。
如果只針對含有normal和tumor的project的話,用上個帖子的stat_compare_means就可以實現(xiàn)。
data_new1 <- data_new %>% group_by(project) %>% mutate(group_number = length(unique(group)))? ?#這里我們主要計算每個project含有g(shù)roup的數(shù)量是1還是2
data_new2 <- data_new1[which(data_new1$group_number == 2),]? #挑選group數(shù)量是2的project,也就是挑選了含有tumor和normal的project
ggplot(data_new2, aes(x = project, y =expr, fill = group))+
geom_boxplot(outlier.shape = 21, outlier.fill = "white")+
scale_fill_manual(name = NULL, values = c("Tumor" = "#d6503a", "Normal" = "#5488ef"))+
theme_bw()+
theme(axis.text = element_text(face = "bold"),
? ? ? ? axis.text.x = element_text(angle = 45, hjust = 1),
? ? ? ? axis.line = element_line(colour = "black"),
? ? ? ? panel.border = element_blank(),
? ? ? ? panel.background = element_blank())+
? stat_compare_means(data = data_new2,aes(x=project,y=expr,group=group),label = "p.signif")

但是需要注意的是并不是每一個都含有normal,所以我們可能需要首先計算一下顯著性檢驗,然后手動添加上去這個label。
data_new3 <- unique(data_new2[,c(2, 7,8)])
# 此數(shù)據(jù)用于添加顯著性檢驗的label:
stat_data <- data_new2 %>%
? group_by(project) %>%
? wilcox_test(expr ~ group) %>%
? adjust_pvalue(method = "fdr") %>%
? add_significance('p')%>%
add_xy_position(x = "project")
stat_data2 <- merge(stat_data, data_new3, by = "row.names", all = T)? #主要為了加入group的最大值,來控制label的Y軸位置。
ggplot(data_new, aes(x = project, y =expr, fill = group))+
?geom_boxplot(outlier.shape = 21, outlier.fill = "white")+
?scale_fill_manual(name = NULL, values = c("Tumor" = "#d6503a", "Normal" = "#5488ef"))+
?theme_bw()+
?theme(axis.text = element_text(face = "bold"),
? ? ? ? axis.text.x = element_text(angle = 45, hjust = 1),
? ? ? ? axis.line = element_line(colour = "black"),
? ? ? ? panel.border = element_blank(),
? ? ? ? panel.background = element_blank(),
? ? ? ? legend.position = c(0.99, 0.99),
? ? ? ? legend.justification = c(1,1))+
annotate(geom = "text", x=stat_data2$x, y = stat_data2$group_max+0.25, label = as.character(stat_data2$p.signif))? #來手動的添加統(tǒng)計檢驗的文本上去

最后修改一下坐標注釋等。
ggplot(data_new, aes(x = project, y =expr, fill = group))+
? geom_boxplot(outlier.shape = 21, outlier.fill = "white")+
? scale_fill_manual(name = NULL, values = c("Tumor" = "#d6503a", "Normal" = "#5488ef"))+
? theme_bw()+
? labs(y = TeX("miRNA Level ($log_{2}$CPM)", bold = T),x="")+
? theme(axis.text = element_text(face = "bold"),
? ? ? ? axis.text.x = element_text(angle = 45, hjust = 1),
? ? ? ? axis.line = element_line(colour = "black"),
? ? ? ? panel.border = element_blank(),
? ? ? ? panel.background = element_blank(),
? ? ? ? legend.position = c(0.99, 0.99),
? ? ? ? legend.justification = c(1,1))+
annotate(geom = "text", x=stat_data2$x, y = stat_data2$group_max+0.25, label = as.character(stat_data2$p.signif))

但是,還有一個中括號,我不太清楚怎么添加比較好。