Part1. 數(shù)據(jù)及包的載入
rm(list = ls())
#載入相關(guān)的R包;
library(dplyr)
library(ggplot2)
library(ggrepel)
#讀入兩個(gè)差異分析表(分別是轉(zhuǎn)錄組和翻譯組)并合并;
#設(shè)置工作目錄;
setwd("E:/R/demo/九象限圖")
#查看工作目錄中的文件;
dir()
#[1] "protein_exp.csv" "RNA_exp.csv"
#讀入數(shù)據(jù);
RNA =read.csv("RNA_exp.csv",header=T)
protein=read.csv("protein_exp.csv",header=T)
#查看數(shù)據(jù)的維度;
dim(RNA)
head(RNA)
dim(protein)
head(protein)
Part2. 兩類組學(xué)數(shù)據(jù)的整合
#通常組學(xué)的整合取交集
#合并兩個(gè)表格。選擇表頭為"id"的列進(jìn)行合并(取交集);
#suffixes:如果兩個(gè)表列名相同,則會(huì)在列名后加入suffixes(后綴)參數(shù)中對(duì)應(yīng)的后綴;
#all.x=FALSE,all.y=FALSE,表示輸出的是x,y表格的交集;
combine= merge(RNA,protein,
by.x="id",
by.y="id",
suffixes = c("_RNA","_Protein") ,
all.x=FALSE,
all.y=FALSE)
#查看合并后表格的維度;
dim(combine)
#[1] 3619 7
#保存合并后的數(shù)據(jù);
Part3. 數(shù)據(jù)分組及相關(guān)性分析
#提取用于作圖的列組成新數(shù)據(jù)框;
data <- data.frame(combine[c(1,3,4,5,6,7)])
#查看前6行;
head(data)
#對(duì)數(shù)據(jù)進(jìn)行分組;
#生成顯著上下調(diào)數(shù)據(jù)標(biāo)簽;
data$part <- case_when(
# 條件1: DEPs和DEGs都上調(diào)
(data$log2FC_RNA >= 1 & data$log2FC_Protein >= log2(1.5) &
data$Qvalue_RNA <= 0.05 & data$Qvalue_Protein <= 0.05) ~
"DEPs_DEGs_SameTrend_Up",
# 條件2: DEPs和DEGs都下調(diào)
(data$log2FC_RNA <= -1 & data$log2FC_Protein <= log2(2/3) &
data$Qvalue_RNA <= 0.05 & data$Qvalue_Protein <= 0.05) ~
"DEPs_DEGs_SameTrend_Down",
# 條件3: DEPs上調(diào),DEGs下調(diào)
(data$log2FC_RNA <= -1 & data$log2FC_Protein >= log2(1.5) &
data$Qvalue_RNA <= 0.05 & data$Qvalue_Protein <= 0.05) ~
"DEPs_Up_DEGs_Down",
# 條件4: DEPs下調(diào),DEGs上調(diào)
(data$log2FC_RNA >= 1 & data$log2FC_Protein <= log2(2/3) &
data$Qvalue_RNA <= 0.05 & data$Qvalue_Protein <= 0.05) ~
"DEPs_Down_DEGs_Up",
# 條件5: only_DEG_Up
(data$Qvalue_RNA <= 0.05 & data$log2FC_RNA >= 1 &
(log2(2/3) < data$log2FC_Protein & data$log2FC_Protein < log2(1.5) |
data$Qvalue_Protein > 0.05)) ~
"only_DEG_Up",
# 條件6: only_DEG_Down
(data$Qvalue_RNA <= 0.05 & data$log2FC_RNA <= -1 &
(log2(2/3) < data$log2FC_Protein & data$log2FC_Protein < log2(1.5) |
data$Qvalue_Protein > 0.05)) ~
"only_DEG_Down",
# 條件7: only_DEP_Up
(data$Qvalue_Protein <= 0.05 & data$log2FC_Protein >= log2(1.5) &
(-1 < data$log2FC_RNA & data$log2FC_RNA < 1 | data$Qvalue_RNA > 0.05)) ~
"only_DEP_Up",
# 條件8: only_DEP_Down
(data$Qvalue_Protein <= 0.05 & data$log2FC_Protein <= log2(2/3) &
(-1 < data$log2FC_RNA & data$log2FC_RNA < 1 | data$Qvalue_RNA > 0.05)) ~
"only_DEP_Down",
# 條件9: BothNone
((log2(2/3) < data$log2FC_Protein & data$log2FC_Protein < log2(1.5)) |
data$Qvalue_Protein >= 0.05) &
(-1 < data$log2FC_RNA & data$log2FC_RNA < 1 | data$Qvalue_RNA >= 0.05) ~
"BothNone",
#TRUE ~ "Other" # 默認(rèn)情況
)
head(data)
data %>% count(part) #統(tǒng)計(jì)分組情況
Part 4. 提取滿足條件的數(shù)據(jù)并進(jìn)行相關(guān)性分析
# 提取滿足條件的數(shù)據(jù)
filtered_data_same_trend <- data[data$part == "DEPs_DEGs_SameTrend_Up" |
data$part == "DEPs_DEGs_SameTrend_Down", ]
filtered_data_cor_opposite<- data[data$part == " DEPs_Down_DEGs_Up" |
data$part == "DEPs_Up_DEGs_Down", ]
# 查看提取的數(shù)據(jù)
head(filtered_data_same_trend)
head(filtered_data_cor_opposite)
#### 計(jì)算數(shù)據(jù)相關(guān)性 ####
cor_same_trend = round(cor(filtered_data_same_trend$log2FC_RNA,filtered_data_same_trend$log2FC_Protein),2)
cor_opposite = round(cor(filtered_data_cor_opposite$log2FC_RNA,filtered_data_cor_opposite$log2FC_Protein),2)
correlation_test <- cor.test(filtered_data_same_trend$log2FC_RNA,filtered_data_same_trend$log2FC_Protein, method = "pearson")
cor_same_trend
cor_opposite
Part 5.1 繪圖—— 直角坐標(biāo)系
#### 3.1 繪制直角坐標(biāo)系繪圖 ####
{
# 定義繪制坐標(biāo)軸函數(shù):
draw_axis_line <- function(length_x, length_y,
tick_step = NULL, lab_step = NULL){
axis_x_begin <- -1*length_x
axis_x_end <- length_x
axis_y_begin <- -1*length_y
axis_y_end <- length_y
if (missing(tick_step))
tick_step <- 1
if (is.null(lab_step))
lab_step <- 2
# axis ticks data
tick_x_frame <- data.frame(ticks = seq(axis_x_begin, axis_x_end,
by = tick_step))
tick_y_frame <- data.frame(ticks = seq(axis_y_begin, axis_y_end,
by = tick_step))
# axis labels data
lab_x_frame <- subset(data.frame(lab = seq(axis_x_begin, axis_x_end,
by = lab_step), zero = 0),
lab != 0)
lab_y_frame <- subset(data.frame(lab = seq(axis_y_begin, axis_y_end,
by = lab_step),zero = 0),
lab != 0)
# set tick length
tick_x_length = 0.05
tick_y_length = 0.05
# set zero point
data <- data.frame(x = 0, y = 0)
p <- ggplot(data = data) +
# draw axis line
geom_segment(y = 0, yend = 0,
x = axis_x_begin,
xend = axis_x_end,
size = 0.5) +
geom_segment(x = 0, xend = 0,
y = axis_y_begin,
yend = axis_y_end,
size = 0.5) +
# x ticks
geom_segment(data = tick_x_frame,
aes(x = ticks, xend = ticks,
y = 0, yend = 0 - tick_x_length)) +
# y ticks
geom_segment(data = tick_y_frame,
aes(x = 0, xend = 0 - tick_y_length,
y = ticks, yend = ticks)) +
# labels
geom_text(data=lab_x_frame, aes(x=lab, y=zero, label=lab), vjust = 1.5) +
geom_text(data=lab_y_frame, aes(x=zero, y=lab, label=lab), hjust= 1.5) +
theme_minimal()+
theme(panel.grid = element_blank(),axis.text = element_blank())
return(p)
}
p <- draw_axis_line(8, 4)
p1 <- p + geom_point(data=data, aes(data$log2FC_RNA, data$log2FC_Protein, color = part))+
scale_color_manual(values = c("DEPs_DEGs_SameTrend_Up" = "#dd8653",
"DEPs_DEGs_SameTrend_Down" = "#dd8653",
"DEPs_Up_DEGs_Down" = "#59a5d7",
"DEPs_Up_DEGs_Up" = "#59a5d7",
"only_DEG_Up" = "#aa65a4",
"only_DEG_Down" = "#aa65a4",
"only_DEP_Up" = "#99CC00",
"only_DEP_Down" = "#99CC00",
"BothNone"="#878787"),
values=alpha(0.7),
breaks = c("DEPs_DEGs_SameTrend_Up",
"DEPs_DEGs_SameTrend_Down",
"DEPs_Up_DEGs_Down",
"DEPs_Up_DEGs_Up",
"only_DEG_Up" ,
"only_DEG_Down",
"only_DEP_Up",
"only_DEP_Down",
"BothNone"))+
xlab("mRNA:FC(5mM Selenite/CK)")+
ylab("Protein:FC(5mM Selenite/CK)")+
theme(legend.position = "bottom")+
annotate("text", label = "bolditalic(Brain)", parse = TRUE,
x = -2, y = 2, size = 4, colour = "black")+
guides(color = guide_legend(title = "", ncol = 1, byrow = TRUE))
}
Part 5.2 繪圖—— ggplot繪圖
#開始嘗試?yán)L圖;
p0 <-ggplot(data,aes(log2FC_RNA,log2FC_Protein,color=part))
#添加散點(diǎn);
p1 <- p0+geom_point(size=1.2)+guides(color="none")
p1
#自定義半透明顏色;
mycolor <- c("gray80","#FF0000","#FF0000",
"#FFA500", "#FFA500","#99CC00",
"#99CC00","#0020AF","#0020AF")
#顏色對(duì)應(yīng)數(shù)據(jù)分組,可自行修改
p2 <- p1 + scale_colour_manual(name="",values=alpha(mycolor,0.7))
p2
#添加輔助線;
p3 <- p2+geom_hline(yintercept = c(log2(2/3),log2(1.5)),
size = 0.5,
color = "grey40",
lty = "dashed")+
geom_vline(xintercept = c(-1,1),
size = 0.5,
color = "grey40",
lty = "dashed")
p3
#調(diào)整橫軸和縱軸繪圖區(qū)域的范圍;
#設(shè)置y軸范圍(上下兩端的空白區(qū)域設(shè)為1),修改刻度標(biāo)簽;
#expansion函數(shù)設(shè)置坐標(biāo)軸范圍兩端空白區(qū)域的大小;mult為“倍數(shù)”模式,add為“加性”模式;
p4<-p3+
scale_y_continuous(expand=expansion(add = c(0.5, 0.5)),
limits = c(-3, 3),
breaks = c(-3,-2,-1,0,1,2,3),
label = c("-3","-2","-1","0","1","2","3"))+
scale_x_continuous(expand=expansion(add = c(0.5, 0.5)),
limits = c(-7, 7),
breaks = c(-5,-2.5,0,2.5,5),
label = c("-5","-2.5","0","2.5","5"))
p4
#自定義圖表主題,對(duì)圖表做精細(xì)調(diào)整;
top.mar=0.2
right.mar=0.2
bottom.mar=0.2
left.mar=0.2
#隱藏縱軸,并對(duì)字體樣式、坐標(biāo)軸的粗細(xì)、顏色、刻度長(zhǎng)度進(jìn)行限定;
mytheme<-theme_bw()+
theme(text=element_text(family = "sans",colour ="gray30",size = 12),
panel.grid = element_blank(),
panel.border = element_rect(size = 0.8,colour = "gray30"),
axis.line = element_blank(),
axis.ticks = element_line(size = 0.6,colour = "gray30"),
axis.ticks.length = unit(1.5,units = "mm"),
plot.margin=unit(x=c(top.mar,right.mar,bottom.mar,left.mar),
units="inches"))
#應(yīng)用自定義主題;
p5 <- p4+mytheme
p5
#如果考慮差異的顯著性,則需要進(jìn)一步分組;
#生成至少在一個(gè)組學(xué)顯著上下調(diào)的數(shù)據(jù)標(biāo)簽;
data$sig <- case_when(data$Qvalue_RNA < 0.05 & data$Qvalue_Protein <0.05 ~ "sig",
data$Qvalue_RNA >= 0.05 | data$Qvalue_Protein >=0.05 ~ "no")
head(data)
#將作圖數(shù)據(jù)表格拆分成顯著和不顯著兩部分;
sig <- filter(data,sig == "sig")
non <- filter(data,sig == "no")
#重新進(jìn)行繪圖;
p6 <-ggplot(data,aes(log2FC_RNA,log2FC_Protein))+
geom_point(data=non,aes(log2FC_RNA,log2FC_Protein),size=1.2,color="gray90")+
geom_point(data=sig,aes(log2FC_RNA,log2FC_Protein,color=part),size=1.5)+
guides(color="none")
p6
#自定義半透明顏色;
mycolor <- c("#99CC00","#FF9999","#c77cff","gray90")
p7 <- p6 + scale_colour_manual(name="",values=alpha(mycolor,0.7))+mytheme
p7
#添加輔助線并修改坐標(biāo)軸范圍;
p8 <- p7+geom_hline(yintercept = c(log(2/3),log2(1.5)),
size = 0.5,
color = "gray40",
lty = "dashed")+
geom_vline(xintercept = c(-1,1),
size = 0.5,
color = "gray40",
lty = "dashed")+
scale_y_continuous(expand=expansion(add = c(0.5, 0.5)),
limits = c(-3, 3),
breaks = c(-3,-2,-1,0,1,2,3),
label = c("-3","-2","-1","0","1","2","3"))+
scale_x_continuous(expand=expansion(add = c(0.5, 0.5)),
limits = c(-7, 7),
breaks = c(-5,-2.5,0,2.5,5),
label = c("-5","-2.5","0","2.5","5"))+
annotate("text", x = 3, y = 2,
label = c('DEPs_DEGs_SameTrend 89', 'DEPs_DEGs_Opposite 72' ),
parse = TRUE)
p8
#計(jì)算兩個(gè)組學(xué)差異倍數(shù)的相關(guān)性,并取2位小數(shù);
cor = round(cor(data$log2FC_RNA,data$log2FC_Protein),2)
#準(zhǔn)備作為圖形的標(biāo)題;
lab = paste("Correlation of Same Trend:",cor_same_trend,sep="","***")
lab1= paste("SameTrend: 89")
lab2= paste("Opposite: 72")
lab
#[1] "correlation=0.35"
#在圖上添加文字標(biāo)簽;
p8+
geom_text(x = -5., y = 2.5, label = lab, color="black",size = 2)+
geom_text(x = 4., y = -2.2, label = lab1, color="black",size=2)+
geom_text(x = 4., y = -2.5, label = lab2, color="black",size=2)
Part 6.1 繪圖簡(jiǎn)易代碼——不區(qū)分significant和no_significant
mycolor <- c("gray80","#FF0000","#FF0000",
"#FFA500", "#FFA500","#99CC00",
"#99CC00","#0020AF","#0020AF")
#自定義圖表主題,對(duì)圖表做精細(xì)調(diào)整;
top.mar=0.2
right.mar=0.2
bottom.mar=0.2
left.mar=0.2
p0 <-ggplot(data,aes(log2FC_RNA,log2FC_Protein,color=part))+
geom_point(size=1.2)+guides(color="none")+
scale_colour_manual(name="",values=alpha(mycolor,0.7))+
geom_hline(yintercept = c(log2(2/3),log2(1.5)),
size = 0.5,
color = "grey40",
lty = "dashed")+
geom_vline(xintercept = c(-1,1),
size = 0.5,
color = "grey40",
lty = "dashed")+
scale_y_continuous(expand=expansion(add = c(0.5, 0.5)),
limits = c(-3, 3),
breaks = c(-3,-2,-1,0,1,2,3),
label = c("-3","-2","-1","0","1","2","3"))+
scale_x_continuous(expand=expansion(add = c(0.5, 0.5)),
limits = c(-7, 7),
breaks = c(-5,-2.5,0,2.5,5),
label = c("-5","-2.5","0","2.5","5"))+
theme_bw()+
theme(text=element_text(family = "sans",colour ="gray30",size = 12),
panel.grid = element_blank(),
panel.border = element_rect(size = 0.8,colour = "gray30"),
axis.line = element_blank(),
axis.ticks = element_line(size = 0.6,colour = "gray30"),
axis.ticks.length = unit(1.5,units = "mm"),
plot.margin=unit(x=c(top.mar,right.mar,bottom.mar,left.mar),
units="inches"))
Part 6 .2 繪圖簡(jiǎn)易代碼——區(qū)分significant和no_significant
#如果考慮差異的顯著性,則需要進(jìn)一步分組;
#生成至少在一個(gè)組學(xué)顯著上下調(diào)的數(shù)據(jù)標(biāo)簽;
data$sig <- case_when(data$Qvalue_RNA < 0.05 & data$Qvalue_Protein <0.05 ~ "sig",
data$Qvalue_RNA >= 0.05 | data$Qvalue_Protein >=0.05 ~ "no")
head(data)
#將作圖數(shù)據(jù)表格拆分成顯著和不顯著兩部分;
sig <- filter(data,sig == "sig")
non <- filter(data,sig == "no")
lab = paste("Correlation of Same Trend:",cor_same_trend,sep="","***")
lab1= paste("SameTrend: 89")
lab2= paste("Opposite: 72")
#重新進(jìn)行繪圖;
p1 <-ggplot(data,aes(log2FC_RNA,log2FC_Protein))+
geom_point(data=non,aes(log2FC_RNA,log2FC_Protein),size=1.2,color="gray90")+
geom_point(data=sig,aes(log2FC_RNA,log2FC_Protein,color=part),size=1.5)+
guides(color="none")+
scale_colour_manual(name="",values=alpha(mycolor,0.7))+
theme_bw()+
theme(text=element_text(family = "serif",colour ="black",size = 12,face="bold"),
panel.grid = element_blank(),
panel.border = element_rect(size = 0.8,colour = "gray30"),
axis.line = element_blank(),
axis.ticks = element_line(size = 0.6,colour = "gray30"),
axis.ticks.length = unit(1.5,units = "mm"),
plot.margin=unit(x=c(top.mar,right.mar,bottom.mar,left.mar),
units="inches"),
plot.title = element_text(hjust = 0.5,vjust = 0,size=11))+
geom_hline(yintercept = c(log(2/3),log2(1.5)),
size = 0.5,
color = "gray40",
lty = "dashed")+
geom_vline(xintercept = c(-1,1),
size = 0.5,
color = "gray40",
lty = "dashed")+
scale_y_continuous(expand=expansion(add = c(0.5, 0.5)),
limits = c(-3, 3),
breaks = c(-3,-2,-1,0,1,2,3),
label = c("-3","-2","-1","0","1","2","3"))+
scale_x_continuous(expand=expansion(add = c(0.5, 0.5)),
limits = c(-7, 7),
breaks = c(-5,-2.5,0,2.5,5),
label = c("-5","-2.5","0","2.5","5"))+
geom_text(x = -4., y = 2.5, label = lab, color="#000000",size =3.5,fontface="plain")+
geom_text(x = 4., y = -2.2, label = lab1, color="#000000",size=3.5,fontface="plain")+
geom_text(x = 4., y = -2.4, label = lab2, color="#000000",size=3.5,fontface="plain")+
labs(title = "Correlation of Proteome and Transcriptome")
p1