組學(xué)關(guān)聯(lián)分析之——ggplot繪制九象限圖

Part1. 數(shù)據(jù)及包的載入

rm(list = ls())


#載入相關(guān)的R包;
library(dplyr)
library(ggplot2)
library(ggrepel)


#讀入兩個(gè)差異分析表(分別是轉(zhuǎn)錄組和翻譯組)并合并;
#設(shè)置工作目錄;
setwd("E:/R/demo/九象限圖")
#查看工作目錄中的文件;
dir()
#[1] "protein_exp.csv" "RNA_exp.csv"
#讀入數(shù)據(jù);
RNA =read.csv("RNA_exp.csv",header=T)
protein=read.csv("protein_exp.csv",header=T)

#查看數(shù)據(jù)的維度;
dim(RNA)
head(RNA)

dim(protein)
head(protein)

Part2. 兩類組學(xué)數(shù)據(jù)的整合

#通常組學(xué)的整合取交集
#合并兩個(gè)表格。選擇表頭為"id"的列進(jìn)行合并(取交集);
#suffixes:如果兩個(gè)表列名相同,則會(huì)在列名后加入suffixes(后綴)參數(shù)中對(duì)應(yīng)的后綴;
#all.x=FALSE,all.y=FALSE,表示輸出的是x,y表格的交集;
combine= merge(RNA,protein,
               by.x="id",
               by.y="id",
               suffixes = c("_RNA","_Protein") ,
               all.x=FALSE,
               all.y=FALSE)
#查看合并后表格的維度;
dim(combine)
#[1] 3619 7
#保存合并后的數(shù)據(jù);

Part3. 數(shù)據(jù)分組及相關(guān)性分析

#提取用于作圖的列組成新數(shù)據(jù)框;
data <- data.frame(combine[c(1,3,4,5,6,7)])
#查看前6行;
head(data)


#對(duì)數(shù)據(jù)進(jìn)行分組;
#生成顯著上下調(diào)數(shù)據(jù)標(biāo)簽;

data$part <- case_when(
  # 條件1: DEPs和DEGs都上調(diào)
  (data$log2FC_RNA >= 1 & data$log2FC_Protein >= log2(1.5) & 
     data$Qvalue_RNA <= 0.05 & data$Qvalue_Protein <= 0.05) ~
    "DEPs_DEGs_SameTrend_Up",
  
  # 條件2: DEPs和DEGs都下調(diào)
  (data$log2FC_RNA <= -1 & data$log2FC_Protein <= log2(2/3) & 
     data$Qvalue_RNA <= 0.05 & data$Qvalue_Protein <= 0.05) ~
    "DEPs_DEGs_SameTrend_Down",
  
  # 條件3: DEPs上調(diào),DEGs下調(diào)
  (data$log2FC_RNA <= -1 & data$log2FC_Protein >= log2(1.5) & 
     data$Qvalue_RNA <= 0.05 & data$Qvalue_Protein <= 0.05) ~
    "DEPs_Up_DEGs_Down",
  
  # 條件4: DEPs下調(diào),DEGs上調(diào)
  (data$log2FC_RNA >= 1 & data$log2FC_Protein <= log2(2/3) & 
     data$Qvalue_RNA <= 0.05 & data$Qvalue_Protein <= 0.05) ~
    "DEPs_Down_DEGs_Up",
  
  # 條件5: only_DEG_Up
  (data$Qvalue_RNA <= 0.05 & data$log2FC_RNA >= 1 & 
     (log2(2/3) < data$log2FC_Protein & data$log2FC_Protein < log2(1.5) | 
        data$Qvalue_Protein > 0.05)) ~
    "only_DEG_Up",
  
  # 條件6: only_DEG_Down
  (data$Qvalue_RNA <= 0.05 & data$log2FC_RNA <= -1 & 
     (log2(2/3) < data$log2FC_Protein & data$log2FC_Protein < log2(1.5) | 
        data$Qvalue_Protein > 0.05)) ~
    "only_DEG_Down",
  
  # 條件7: only_DEP_Up
  (data$Qvalue_Protein <= 0.05 & data$log2FC_Protein >= log2(1.5) & 
     (-1 < data$log2FC_RNA & data$log2FC_RNA < 1 | data$Qvalue_RNA > 0.05)) ~
    "only_DEP_Up",
  
  # 條件8: only_DEP_Down
  (data$Qvalue_Protein <= 0.05 & data$log2FC_Protein <= log2(2/3) & 
     (-1 < data$log2FC_RNA & data$log2FC_RNA < 1 | data$Qvalue_RNA > 0.05)) ~
    "only_DEP_Down",
  
  # 條件9: BothNone
  ((log2(2/3) < data$log2FC_Protein & data$log2FC_Protein < log2(1.5)) | 
     data$Qvalue_Protein >= 0.05) &
    (-1 < data$log2FC_RNA & data$log2FC_RNA < 1 | data$Qvalue_RNA >= 0.05) ~
    "BothNone",
   
  #TRUE ~ "Other" # 默認(rèn)情況
)

head(data)
data %>% count(part)  #統(tǒng)計(jì)分組情況

Part 4. 提取滿足條件的數(shù)據(jù)并進(jìn)行相關(guān)性分析


# 提取滿足條件的數(shù)據(jù)
filtered_data_same_trend <- data[data$part == "DEPs_DEGs_SameTrend_Up" |
                                   data$part == "DEPs_DEGs_SameTrend_Down", ]

filtered_data_cor_opposite<- data[data$part == " DEPs_Down_DEGs_Up" | 
                                    data$part == "DEPs_Up_DEGs_Down", ]
# 查看提取的數(shù)據(jù)
head(filtered_data_same_trend)
head(filtered_data_cor_opposite)


#### 計(jì)算數(shù)據(jù)相關(guān)性  ####
cor_same_trend = round(cor(filtered_data_same_trend$log2FC_RNA,filtered_data_same_trend$log2FC_Protein),2)
cor_opposite = round(cor(filtered_data_cor_opposite$log2FC_RNA,filtered_data_cor_opposite$log2FC_Protein),2)


correlation_test <- cor.test(filtered_data_same_trend$log2FC_RNA,filtered_data_same_trend$log2FC_Protein, method = "pearson")

cor_same_trend
cor_opposite

Part 5.1 繪圖—— 直角坐標(biāo)系

#### 3.1 繪制直角坐標(biāo)系繪圖 ####
{
# 定義繪制坐標(biāo)軸函數(shù):
draw_axis_line <- function(length_x, length_y, 
                           tick_step = NULL, lab_step = NULL){
  axis_x_begin <- -1*length_x
  axis_x_end <- length_x
  
  axis_y_begin  <- -1*length_y
  axis_y_end    <- length_y
  
  if (missing(tick_step))
    tick_step <- 1
  
  if (is.null(lab_step))
    lab_step <- 2
  
  # axis ticks data
  tick_x_frame <- data.frame(ticks = seq(axis_x_begin, axis_x_end, 
                                         by = tick_step))
  
  tick_y_frame <-  data.frame(ticks = seq(axis_y_begin, axis_y_end, 
                                          by = tick_step))
  
  # axis labels data
  lab_x_frame <- subset(data.frame(lab = seq(axis_x_begin, axis_x_end, 
                                             by = lab_step), zero = 0), 
                        lab != 0)
  
  lab_y_frame <- subset(data.frame(lab = seq(axis_y_begin, axis_y_end,
                                             by = lab_step),zero = 0), 
                        lab != 0)
  
  # set tick length
  tick_x_length = 0.05
  tick_y_length = 0.05
  
  # set zero point
  
  data <- data.frame(x = 0, y = 0)
  p <- ggplot(data = data) +
    
    # draw axis line
    geom_segment(y = 0, yend = 0, 
                 x = axis_x_begin, 
                 xend = axis_x_end,
                 size = 0.5) + 
    geom_segment(x = 0, xend = 0, 
                 y = axis_y_begin, 
                 yend = axis_y_end,
                 size = 0.5) +
    # x ticks
    geom_segment(data = tick_x_frame, 
                 aes(x = ticks, xend = ticks, 
                     y = 0, yend = 0 - tick_x_length)) +
    # y ticks
    geom_segment(data = tick_y_frame, 
                 aes(x = 0, xend = 0 - tick_y_length, 
                     y = ticks, yend = ticks)) + 
    
    # labels
    geom_text(data=lab_x_frame, aes(x=lab, y=zero, label=lab), vjust = 1.5) +
    geom_text(data=lab_y_frame, aes(x=zero, y=lab, label=lab), hjust= 1.5) +
    theme_minimal()+
    theme(panel.grid = element_blank(),axis.text = element_blank())
  return(p)
}

p <- draw_axis_line(8, 4)
p1 <- p + geom_point(data=data, aes(data$log2FC_RNA, data$log2FC_Protein, color = part))+
  scale_color_manual(values = c("DEPs_DEGs_SameTrend_Up" = "#dd8653",
                                "DEPs_DEGs_SameTrend_Down" = "#dd8653",
                                "DEPs_Up_DEGs_Down" = "#59a5d7",
                                "DEPs_Up_DEGs_Up" = "#59a5d7", 
                                "only_DEG_Up" = "#aa65a4", 
                                "only_DEG_Down" = "#aa65a4",
                                "only_DEP_Up" = "#99CC00",
                                "only_DEP_Down" = "#99CC00",
                                "BothNone"="#878787"),
                     values=alpha(0.7),
                     breaks = c("DEPs_DEGs_SameTrend_Up",
                                "DEPs_DEGs_SameTrend_Down",
                                "DEPs_Up_DEGs_Down",
                                "DEPs_Up_DEGs_Up", 
                                "only_DEG_Up" , 
                                "only_DEG_Down",
                                "only_DEP_Up",
                                "only_DEP_Down",
                                "BothNone"))+
  xlab("mRNA:FC(5mM Selenite/CK)")+
  ylab("Protein:FC(5mM Selenite/CK)")+
  theme(legend.position = "bottom")+
  annotate("text", label = "bolditalic(Brain)", parse = TRUE, 
           x = -2, y = 2, size = 4, colour = "black")+
  guides(color = guide_legend(title = "", ncol = 1, byrow = TRUE))
}

Part 5.2 繪圖—— ggplot繪圖


#開始嘗試?yán)L圖;
p0 <-ggplot(data,aes(log2FC_RNA,log2FC_Protein,color=part))
#添加散點(diǎn);
p1 <- p0+geom_point(size=1.2)+guides(color="none")
p1


#自定義半透明顏色;
mycolor <- c("gray80","#FF0000","#FF0000",
             "#FFA500", "#FFA500","#99CC00",
             "#99CC00","#0020AF","#0020AF")
#顏色對(duì)應(yīng)數(shù)據(jù)分組,可自行修改

p2 <- p1 + scale_colour_manual(name="",values=alpha(mycolor,0.7))
p2


#添加輔助線;
p3 <- p2+geom_hline(yintercept = c(log2(2/3),log2(1.5)),
                    size = 0.5,
                    color = "grey40",
                    lty = "dashed")+
  geom_vline(xintercept = c(-1,1),
             size = 0.5,
             color = "grey40",
             lty = "dashed")
p3


#調(diào)整橫軸和縱軸繪圖區(qū)域的范圍;
#設(shè)置y軸范圍(上下兩端的空白區(qū)域設(shè)為1),修改刻度標(biāo)簽;
#expansion函數(shù)設(shè)置坐標(biāo)軸范圍兩端空白區(qū)域的大小;mult為“倍數(shù)”模式,add為“加性”模式;
p4<-p3+
  scale_y_continuous(expand=expansion(add = c(0.5, 0.5)),
                     limits = c(-3, 3),
                     breaks = c(-3,-2,-1,0,1,2,3),
                     label = c("-3","-2","-1","0","1","2","3"))+
  scale_x_continuous(expand=expansion(add = c(0.5, 0.5)),
                     limits = c(-7, 7),
                     breaks = c(-5,-2.5,0,2.5,5),
                     label = c("-5","-2.5","0","2.5","5"))
p4


#自定義圖表主題,對(duì)圖表做精細(xì)調(diào)整;
top.mar=0.2
right.mar=0.2
bottom.mar=0.2
left.mar=0.2
#隱藏縱軸,并對(duì)字體樣式、坐標(biāo)軸的粗細(xì)、顏色、刻度長(zhǎng)度進(jìn)行限定;
mytheme<-theme_bw()+
  theme(text=element_text(family = "sans",colour ="gray30",size = 12),
        panel.grid = element_blank(),
        panel.border = element_rect(size = 0.8,colour = "gray30"),
        axis.line = element_blank(),
        axis.ticks = element_line(size = 0.6,colour = "gray30"),
        axis.ticks.length = unit(1.5,units = "mm"),
        plot.margin=unit(x=c(top.mar,right.mar,bottom.mar,left.mar),
                         units="inches"))
#應(yīng)用自定義主題;
p5 <- p4+mytheme
p5


#如果考慮差異的顯著性,則需要進(jìn)一步分組;
#生成至少在一個(gè)組學(xué)顯著上下調(diào)的數(shù)據(jù)標(biāo)簽;
data$sig <- case_when(data$Qvalue_RNA < 0.05 & data$Qvalue_Protein <0.05 ~ "sig",
                      data$Qvalue_RNA >= 0.05 | data$Qvalue_Protein >=0.05 ~ "no")
head(data)

#將作圖數(shù)據(jù)表格拆分成顯著和不顯著兩部分;
sig <- filter(data,sig == "sig")
non <- filter(data,sig == "no")

#重新進(jìn)行繪圖;
p6 <-ggplot(data,aes(log2FC_RNA,log2FC_Protein))+
  geom_point(data=non,aes(log2FC_RNA,log2FC_Protein),size=1.2,color="gray90")+
  geom_point(data=sig,aes(log2FC_RNA,log2FC_Protein,color=part),size=1.5)+
  guides(color="none")
p6


#自定義半透明顏色;
mycolor <- c("#99CC00","#FF9999","#c77cff","gray90")
p7 <- p6 + scale_colour_manual(name="",values=alpha(mycolor,0.7))+mytheme
p7

#添加輔助線并修改坐標(biāo)軸范圍;
p8 <- p7+geom_hline(yintercept = c(log(2/3),log2(1.5)),
                    size = 0.5,
                    color = "gray40",
                    lty = "dashed")+
  geom_vline(xintercept = c(-1,1),
             size = 0.5,
             color = "gray40",
             lty = "dashed")+
scale_y_continuous(expand=expansion(add = c(0.5, 0.5)),
                   limits = c(-3, 3),
                   breaks = c(-3,-2,-1,0,1,2,3),
                   label = c("-3","-2","-1","0","1","2","3"))+
scale_x_continuous(expand=expansion(add = c(0.5, 0.5)),
                     limits = c(-7, 7),
                     breaks = c(-5,-2.5,0,2.5,5),
                     label = c("-5","-2.5","0","2.5","5"))+
  annotate("text", x = 3, y = 2,
           label =  c('DEPs_DEGs_SameTrend 89', 'DEPs_DEGs_Opposite 72' ), 
           parse = TRUE)
p8


#計(jì)算兩個(gè)組學(xué)差異倍數(shù)的相關(guān)性,并取2位小數(shù);
cor = round(cor(data$log2FC_RNA,data$log2FC_Protein),2)
#準(zhǔn)備作為圖形的標(biāo)題;
lab = paste("Correlation of Same Trend:",cor_same_trend,sep="","***")
lab1= paste("SameTrend: 89")
lab2= paste("Opposite: 72")

lab
#[1] "correlation=0.35"
#在圖上添加文字標(biāo)簽;
p8+
  geom_text(x = -5., y = 2.5, label = lab, color="black",size = 2)+
  geom_text(x = 4., y = -2.2, label = lab1, color="black",size=2)+
  geom_text(x = 4., y = -2.5, label = lab2, color="black",size=2)

Part 6.1 繪圖簡(jiǎn)易代碼——不區(qū)分significant和no_significant

mycolor <- c("gray80","#FF0000","#FF0000",
             "#FFA500", "#FFA500","#99CC00",
             "#99CC00","#0020AF","#0020AF")



#自定義圖表主題,對(duì)圖表做精細(xì)調(diào)整;
top.mar=0.2
right.mar=0.2
bottom.mar=0.2
left.mar=0.2




p0 <-ggplot(data,aes(log2FC_RNA,log2FC_Protein,color=part))+
  geom_point(size=1.2)+guides(color="none")+
  scale_colour_manual(name="",values=alpha(mycolor,0.7))+
  geom_hline(yintercept = c(log2(2/3),log2(1.5)),
                    size = 0.5,
                    color = "grey40",
                    lty = "dashed")+
  geom_vline(xintercept = c(-1,1),
             size = 0.5,
             color = "grey40",
             lty = "dashed")+
  scale_y_continuous(expand=expansion(add = c(0.5, 0.5)),
                     limits = c(-3, 3),
                     breaks = c(-3,-2,-1,0,1,2,3),
                     label = c("-3","-2","-1","0","1","2","3"))+
  scale_x_continuous(expand=expansion(add = c(0.5, 0.5)),
                     limits = c(-7, 7),
                     breaks = c(-5,-2.5,0,2.5,5),
                     label = c("-5","-2.5","0","2.5","5"))+
  theme_bw()+
  theme(text=element_text(family = "sans",colour ="gray30",size = 12),
        panel.grid = element_blank(),
        panel.border = element_rect(size = 0.8,colour = "gray30"),
        axis.line = element_blank(),
        axis.ticks = element_line(size = 0.6,colour = "gray30"),
        axis.ticks.length = unit(1.5,units = "mm"),
        plot.margin=unit(x=c(top.mar,right.mar,bottom.mar,left.mar),
                         units="inches"))

Part 6 .2 繪圖簡(jiǎn)易代碼——區(qū)分significant和no_significant

#如果考慮差異的顯著性,則需要進(jìn)一步分組;
#生成至少在一個(gè)組學(xué)顯著上下調(diào)的數(shù)據(jù)標(biāo)簽;
data$sig <- case_when(data$Qvalue_RNA < 0.05 & data$Qvalue_Protein <0.05 ~ "sig",
                      data$Qvalue_RNA >= 0.05 | data$Qvalue_Protein >=0.05 ~ "no")
head(data)

#將作圖數(shù)據(jù)表格拆分成顯著和不顯著兩部分;
sig <- filter(data,sig == "sig")
non <- filter(data,sig == "no")



lab = paste("Correlation of Same Trend:",cor_same_trend,sep="","***")
lab1= paste("SameTrend: 89")
lab2= paste("Opposite: 72")


#重新進(jìn)行繪圖;
p1 <-ggplot(data,aes(log2FC_RNA,log2FC_Protein))+
  geom_point(data=non,aes(log2FC_RNA,log2FC_Protein),size=1.2,color="gray90")+
  geom_point(data=sig,aes(log2FC_RNA,log2FC_Protein,color=part),size=1.5)+
  guides(color="none")+
  scale_colour_manual(name="",values=alpha(mycolor,0.7))+
  theme_bw()+
  theme(text=element_text(family = "serif",colour ="black",size = 12,face="bold"),
        panel.grid = element_blank(),
        panel.border = element_rect(size = 0.8,colour = "gray30"),
        axis.line = element_blank(),
        axis.ticks = element_line(size = 0.6,colour = "gray30"),
        axis.ticks.length = unit(1.5,units = "mm"),
        plot.margin=unit(x=c(top.mar,right.mar,bottom.mar,left.mar),
                         units="inches"),
        plot.title = element_text(hjust = 0.5,vjust = 0,size=11))+
  geom_hline(yintercept = c(log(2/3),log2(1.5)),
                    size = 0.5,
                    color = "gray40",
                    lty = "dashed")+
  geom_vline(xintercept = c(-1,1),
             size = 0.5,
             color = "gray40",
             lty = "dashed")+
  scale_y_continuous(expand=expansion(add = c(0.5, 0.5)),
                     limits = c(-3, 3),
                     breaks = c(-3,-2,-1,0,1,2,3),
                     label = c("-3","-2","-1","0","1","2","3"))+
  scale_x_continuous(expand=expansion(add = c(0.5, 0.5)),
                     limits = c(-7, 7),
                     breaks = c(-5,-2.5,0,2.5,5),
                     label = c("-5","-2.5","0","2.5","5"))+
  geom_text(x = -4., y = 2.5, label = lab,  color="#000000",size =3.5,fontface="plain")+
  geom_text(x = 4., y = -2.2, label = lab1, color="#000000",size=3.5,fontface="plain")+
  geom_text(x = 4., y = -2.4, label = lab2, color="#000000",size=3.5,fontface="plain")+
  labs(title = "Correlation of Proteome and Transcriptome")

p1  





?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請(qǐng)聯(lián)系作者
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時(shí)請(qǐng)結(jié)合常識(shí)與多方信息審慎甄別。
平臺(tái)聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點(diǎn),簡(jiǎn)書系信息發(fā)布平臺(tái),僅提供信息存儲(chǔ)服務(wù)。

相關(guān)閱讀更多精彩內(nèi)容

友情鏈接更多精彩內(nèi)容