2022-12-11 斑馬魚8和16 時序GO 分析

Mfuzz做轉(zhuǎn)錄變化的時間趨勢分析后對每個趨勢分組挑一個代表性基因https://cloud.tencent.com/developer/article/2018571
使用clusterProfiler進行富集分析http://www.itdecent.cn/p/d484003dced5
clusterProfiler進行GO富集去冗余http://www.itdecent.cn/p/e5dc76debde6
針對8天和16天的斑馬魚進行時序分析,Mfuzz包實現(xiàn), 需要的數(shù)據(jù)是8和16天的標準counts值以及自己選擇的差異

image.png

if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager")  
BiocManager::install("rlang")
library(ggnewscale)
library(clusterProfiler) #用來做富集分析
library(topGO)#畫GO圖用的
library(pathview)  #看KEGG pathway的
library(enrichplot)
library(org.Hs.eg.db)#這個包里存有人的注釋文件
library(org.Dr.eg.db)#這個包里存有斑馬魚的注釋文件
library(ggplot2)
library(DOSE)
library(GO.db)
library(Mfuzz)
library(limma)
library(clusterProfiler)
library(org.Hs.eg.db)
library(ggplot2)
library(ggstatsplot)
library(tidyverse)
利用GENE_ID合并,避免斑馬魚基因名字會有過多重復(fù),all 和lian 進行合并
all<-read.csv('T8_T16_ALL_genes_時序分析.csv')    #########所有16天和8天的表達值
lian<-read.csv('T8_t16_共同差異基因時序分析/T8_16差異基因.csv')            ##########所需要的差異基因
TT=merge(lian,all,by='gene_id')                   #########上述兩個表格進行合并
write.csv(TT,"T8_t16_共同差異基因時序分析/T8_16d 差異基因1500時序分析.csv")
image.png

image.png

手動打開csv,全選所有數(shù)據(jù),選擇去除重復(fù)

ll<-read.csv('T8_t16_共同差異基因時序分析/T8_16d 差異基因1500時序分析.csv')
rownames(ll)<-ll[,1]#########第一列行名
df<-ll[,-1]              #########棄掉第一列
###############將數(shù)據(jù)框轉(zhuǎn)化為矩陣(A)
A<-as.matrix(df)  #=

去除表達量太低或者在不同時間點間變化太小的基因等步驟

###Mfuzz聚類時要求是一個ExpressionSet類型的對象,所以需要先用表達量構(gòu)建這樣一個對象

eset <- new("ExpressionSet",exprs = A)
# 根據(jù)標準差去除樣本間差異太小的基因
eset <- filter.std(eset,min.std=0)
# 10818 genes excluded. ,不同的數(shù)據(jù)集去除的基因數(shù)量不一樣
eset 
eset <- standardise(eset)
c <- 7          ########c 為聚類數(shù),可根據(jù)需要調(diào)整
m <- mestimate(eset) #  評估出最佳的m值
cl <- mfuzz(eset, c = c, m = m) # 聚類
cl$size # 查看每個cluster中的基因個數(shù)

## cluster cores
# membership values can also indicate the similarity of vectors to each other.
eset
##########################
cl.thres <- acore(eset,cl,min.acore=0.5)  ## a posteriori
a=cl.thres[[1]] ################ ##獲取該簇下的所有基因
a
#write.csv(a,"a.csv")   #############寫入表格中,篩選分值較高的分數(shù)的基因,,基因id轉(zhuǎn)換可參考下面的方式
table(cl$cluster)  #########  每個簇下的基因數(shù)量
unlist(lapply(cl.thres, nrow))#########經(jīng)過min,score篩選后剩下的各族基因數(shù)量,是不是對每個簇進一步簡化
# 1772 3113 1822 1931 2025 2659  786 2375 1640
lapply(cl.thres, head)
#獲得所有不同簇的基因
###for 循環(huán)用用

for(i in 1:7){
 
  write.csv(cl.thres[i],file = paste("all_",i,".csv",sep = " "))
  name=read.csv(file = paste("all_",i,".csv",sep = " "))
  colnames(name)<-c('ENSEMBL','ense','scores')
  ensembl_gene_id=name$ENSEMBL
  
  id <-bitr(ensembl_gene_id, fromType = "ENSEMBL", 
            toType = c("SYMBOL"),
            OrgDb = org.Dr.eg.db,drop =  FALSE )
  TT=merge(name,id,by='ENSEMBL') 
  write.csv(TT,file = paste("all_",i,".csv",sep = " "))
 
}

做圖

library(RColorBrewer)
color.2 <- colorRampPalette(rev(c("#ff0000", "Yellow", "OliveDrab1")))(1000)
pdf('mfuzz_clusters_plot_7_2.pdf',height = 7,width = 12)
mfuzz.plot(eset,cl,mfrow=c(3,3),
           new.window= FALSE,
           time.labels= colnames(eset) ,
           colo = color.2)
dev.off()
image.png

下一步對不同簇下基因進行富集分析

dev.new() ###重新打開做圖窗口
gene_id<-cl$cluster[cl$cluster == 1] # 提取某個cluster下的基因
gene_id
gene_id_2<-cl$cluster[cl$cluster == 2]
gene_id_all<-cl$cluster####提取所有cluster下的基因
#################
write.csv(gene_id,"gene_id.csv") 
write.csv(gene_id_all,"gene_id_all.csv") 
T<-read.csv("gene_id.csv")  ##546??
colnames(T)<- c('gene_id','row')
#################################################
##將ensemble變?yōu)镋NTERZ ID, 才能進行富集分析
columns(org.Dr.eg.db)
ensembl_gene_id=T$gene_id
id <-bitr(ensembl_gene_id, fromType = "ENSEMBL", 
          toType = c("ENTREZID"),
          OrgDb = org.Dr.eg.db,drop =  FALSE )
ENTREZ_ID = id$ENTREZID ####獲取entrez ID
image.png

image.png
##BP層面上的富集分析:
go_bp<-enrichGO(gene =ENTREZ_ID,OrgDb  = org.Dr.eg.db, keyType='ENTREZID', ont  = "BP", pAdjustMethod = "BH",pvalueCutoff = 0.05, qvalueCutoff = 0.05, readable=TRUE)
dim(go_bp)
egosimp <- simplify(go_bp,cutoff=0.7,by="p.adjust",select_fun = min,measure="Wang")

dim(egosimp)


image.png
#write.csv(go_bp@result,"go_bp.csv") 

##CC層面上的富集分析:
go_cc<-enrichGO(gene  = ENTREZ_ID,OrgDb  = org.Dr.eg.db,keyType   = 'ENTREZID', ont = "CC", pAdjustMethod = "BH",pvalueCutoff = 0.05, qvalueCutoff = 0.05)
##把結(jié)果導(dǎo)出保存 
#write.csv(go_bp@result,"go_bp.csv") 
dim(go_cc)##33  9
go_ccsimp <- simplify(go_bp,cutoff=0.7,by="p.adjust",select_fun = min,measure="Wang")
go_MF <- enrichGO(gene =ENTREZ_ID, OrgDb= org.Dr.eg.db,  keyType    = 'ENTREZID', ont = "MF",pAdjustMethod = "BH",pvalueCutoff = 0.05,qvalueCutoff = 0.05)
#write.csv(go_MF@result,"go_mf.csv") 

go_all<-enrichGO(gene  = ENTREZ_ID,OrgDb  = org.Dr.eg.db,keyType    = 'ENTREZID', ont = "ALL", pAdjustMethod = "BH",pvalueCutoff = 0.05, qvalueCutoff = 0.05,readable = TRUE)
go_allsimp <- simplify(go_all,cutoff=0.7,by="p.adjust",select_fun = min,measure="Wang")
dim(go_all)##
dim(go_allsimp)##
image.png
dotplot(go_bp,showCategory=10)
dotplot(egosimp,showCategory=10)

dotplot(go_allsimp,title='Top5 GO terms of each sub-class',showCategory=10,split='ONTOLOGY')+facet_grid(ONTOLOGY~.,scale="free")
cnetplot(egosimp, showCategory=5)

image.png

image.png

image.png
search_kegg_organism("zebrafish", by="common_name")
gene_kegg<-enrichKEGG(gene =ENTREZ_ID ,organism = 'dre',keyType='kegg',  pAdjustMethod = "BH",pvalueCutoff = 0.5, qvalueCutoff = 0.5,use_internal_data = FALSE)
dim(gene_kegg)

dotplot(gene_kegg)
barplot(gene_kegg)
enrichMap(gene_kegg)
cnetplot(gene_kegg, showCategory=5)

#將ENTREZID轉(zhuǎn)化為可讀的gene symbol
eKEGG <- setReadable(gene_kegg, OrgDb = org.Dr.eg.db, keyType="ENTREZID")
cnetplot(eKEGG, showCategory=5)




image.png

3.讀入gene_id_all文件差異基因,不同簇的基因匯總表,將ENSEMBL 轉(zhuǎn)為EntrezID

獲得做圖關(guān)鍵的兩列,ENTREZID和CLIUSTER

T<-read.csv("gene_id_all.csv")  ##546??
ensembl_gene_id=T$ENSEMBL
id <-bitr(ensembl_gene_id, fromType = "ENSEMBL", 
          toType = c("ENTREZID"),
          OrgDb = org.Dr.eg.db,drop =  FALSE )
tt=merge(T,id,by='ENSEMBL') 
gcSample=split(tt$ENTREZID, tt$CLUSTER)
gcSample

KEGG分析

YY <- compareCluster(gcSample,
                     fun = "enrichKEGG",
                     organism = "dre", pvalueCutoff = 0.05
)

pdf('mfuzz_clusters_plot_7_16.pdf',height = 10,width = 7)
p <- dotplot(YY,showCategory=8,label_format=100,font.size=12)

p + theme(axis.text.x = element_text(angle = 45,vjust = 0.5, hjust = 0.5,size = 13,face = "bold"))
dev.off()
image.png

GO分析

xx <- compareCluster(gcSample,
                     fun = "enrichGO",
                     OrgDb = "org.Dr.eg.db",
                     ont = "BP",
                     pAdjustMethod = "BH",
                     pvalueCutoff = 0.01,
                     qvalueCutoff = 0.05
)
dim(xx)
ego=simplify(xx,cutoff = 0.7,by="p.adjust",select_fun = min,measure = "Wang",semData = NULL)
dim(ego)
image.png

GO多組分析

pdf('mfuzz_clusters_plot_7_19.pdf',height = 10,width = 7)
p <- dotplot(ego,showCategory=8,label_format=100,font.size=12)

p + theme(axis.text.x = element_text(angle = 45,vjust = 0.5, hjust = 0.5,size = 13,face = "bold"))
dev.off()

image.png

改變x軸標簽與x軸的角度與距離angle = 45,vjust = 0.5, hjust = 0.5

獲取數(shù)據(jù),取出每個組最富集的10個條目,存儲起來# 不麻煩的吧

x <- ego@compareClusterResult
y = x %>% group_by(Cluster) %>% top_n(-10, pvalue)
y = x[x$Description %in% y$Description,]
############dotplot 不好做圖,需要把數(shù)據(jù)搞出來用ggplot2做圖
test=as.data.frame(y)
ggplot(test,aes(x=Cluster,y=Description))+geom_point(aes(color = p.adjust,size = Count))+
  scale_color_gradient(low = "red", high = "blue")+xlab("Fold Enrichment")+
  theme_bw()
test$Description = factor(test$Description,levels = test$Description,ordered = T)
p + theme(axis.text.x = element_text(
  angle = 45,
  vjust = 0.5, hjust = 0.5
))+coord_flip()             #coord_flip()  XY轉(zhuǎn)換

p + theme(axis.text.x = element_text(angle = 45,vjust = 0.5, hjust = 0.5
))+ scale_colour_gradient(low="red",high="green")+
  theme(legend.title = element_text(size = 15, face = 2))
最后編輯于
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請聯(lián)系作者
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時請結(jié)合常識與多方信息審慎甄別。
平臺聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點,簡書系信息發(fā)布平臺,僅提供信息存儲服務(wù)。
禁止轉(zhuǎn)載,如需轉(zhuǎn)載請通過簡信或評論聯(lián)系作者。

相關(guān)閱讀更多精彩內(nèi)容

友情鏈接更多精彩內(nèi)容