R語言GEO數(shù)據(jù)挖掘:步驟四:富集分析KEGG,GO

1.讀取第三部存儲數(shù)據(jù)(基因差異表達(dá)情況)

rm(list = ls())  ## 魔幻操作,一鍵清空~
load(file = 'deg.Rdata')
head(deg)
deg

2.設(shè)定閾值計算基因上調(diào)下調(diào)數(shù)量

## 不同的閾值,篩選到的差異基因數(shù)量就不一樣,后面的超幾何分布檢驗結(jié)果就大相徑庭。
logFC_t=1.5
deg$g=ifelse(deg$P.Value>0.05,'stable',
            ifelse( deg$logFC > logFC_t,'UP',
                    ifelse( deg$logFC < -logFC_t,'DOWN','stable') )
)#P>0.05輸出stable,其中設(shè)定當(dāng)logFC大于1.5為上調(diào)輸出'UP',大于-1.5為下調(diào)輸出'DOWN',,如果都不是則輸出'stable',從而增加了一列g(shù),篩選出了上調(diào)和下調(diào)的基因
table(deg$g)
得出上調(diào)41,下調(diào)88
head(deg)
deg多了提示基因上下調(diào)的一列g(shù)
deg$symbol=rownames(deg)
給deg增加一列基因名

3.ID轉(zhuǎn)換

library(ggplot2)
library(clusterProfiler)
library(org.Hs.eg.db)
df <- bitr(unique(deg$symbol), fromType = "SYMBOL",
           toType = c( "ENTREZID"),
           OrgDb = org.Hs.eg.db)
#bitr功能為ID轉(zhuǎn)換,
#bitr(geneID, fromType, toType, OrgDb, drop = TRUE);
#geneid :基因ID輸入 ; fromtype : 輸入ID型;toType:輸出ID型;orgdb :注釋數(shù)據(jù)庫)
head(df)
QQ截圖20190327194950.jpg
DEG=deg#把deg數(shù)據(jù)賦值給DEG數(shù)據(jù)
head(DEG)
得到DEG
DEG=merge(DEG,df,by.y='SYMBOL',by.x='symbol')
#把數(shù)據(jù)DEG,df通過,DEG的'symbol'列,df的'SYMBOL'列連接在一起,轉(zhuǎn)化ID
head(DEG)
save(DEG,file = 'anno_DEG.Rdata')
DEG

4.得出差異基因

gene_up= DEG[DEG$g == 'UP','ENTREZID'] #選出上調(diào)基因ID
gene_down=DEG[DEG$g == 'DOWN','ENTREZID'] #選出下調(diào)基因ID
gene_diff=c(gene_up,gene_down)#得出上下調(diào)基因ID
gene_all=as.character(DEG[ ,'ENTREZID'] )#得出所有基因ID
data(geneList, package="DOSE")#得出geneList數(shù)據(jù)
head(geneList)#查看數(shù)據(jù)
geneList
boxplot(geneList)#畫箱線圖
boxplot(DEG$logFC)#畫箱線圖
geneList=DEG$logFC#把DEG數(shù)據(jù)logFC列值賦值給數(shù)據(jù)geneList
QQ截圖20190327214317.jpg
names(geneList)=DEG$ENTREZID#把ID賦值給geneList數(shù)據(jù)的名字
得到geneList:ID和表達(dá)量的關(guān)系
geneList=sort(geneList,decreasing = T)#把數(shù)據(jù)進(jìn)行排序
排序之后的geneList

5. KEGG pathway analysis

做KEGG數(shù)據(jù)集超幾何分布檢驗分析,重點在結(jié)果的可視化及生物學(xué)意義的理解。

if(T){
  ###   over-representation test
  kk.up <- enrichKEGG(gene         = gene_up,
                      organism     = 'hsa',
                      universe     = gene_all,
                      pvalueCutoff = 0.9,
                      qvalueCutoff =0.9)
  head(kk.up)[,1:6]
  dotplot(kk.up );ggsave('kk.up.dotplot.png')
  kk.down <- enrichKEGG(gene         =  gene_down,
                        organism     = 'hsa',
                        universe     = gene_all,
                        pvalueCutoff = 0.9,
                        qvalueCutoff =0.9)
  head(kk.down)[,1:6]
  dotplot(kk.down );ggsave('kk.down.dotplot.png')
  kk.diff <- enrichKEGG(gene         = gene_diff,
                        organism     = 'hsa',
                        pvalueCutoff = 0.05)
  head(kk.diff)[,1:6]
  dotplot(kk.diff );ggsave('kk.diff.dotplot.png')
  
  kegg_diff_dt <- as.data.frame(kk.diff)
  kegg_down_dt <- as.data.frame(kk.down)
  kegg_up_dt <- as.data.frame(kk.up)
  down_kegg<-kegg_down_dt[kegg_down_dt$pvalue<0.05,];down_kegg$group=-1
  up_kegg<-kegg_up_dt[kegg_up_dt$pvalue<0.05,];up_kegg$group=1
  source('functions.R')
  g_kegg=kegg_plot(up_kegg,down_kegg)
  print(g_kegg)
  
  ggsave(g_kegg,filename = 'kegg_up_down.png')

6. GSEA

  kk_gse <- gseKEGG(geneList     = geneList,
                    organism     = 'hsa',
                    nPerm        = 1000,
                    minGSSize    = 120,
                    pvalueCutoff = 0.9,
                    verbose      = FALSE)
  head(kk_gse)[,1:6]
  gseaplot(kk_gse, geneSetID = rownames(kk_gse[1,]))
  
  down_kegg<-kk_gse[kk_gse$pvalue<0.05 & kk_gse$enrichmentScore < 0,];down_kegg$group=-1
  up_kegg<-kk_gse[kk_gse$pvalue<0.05 & kk_gse$enrichmentScore > 0,];up_kegg$group=1
  
  g_kegg=kegg_plot(up_kegg,down_kegg)
  print(g_kegg)
  ggsave(g_kegg,filename = 'kegg_up_down_gsea.png')
  
  
}

7.GO database analysis

做GO數(shù)據(jù)集超幾何分布檢驗分析,重點在結(jié)果的可視化及生物學(xué)意義的理解。

{
  
  g_list=list(gene_up=gene_up,
              gene_down=gene_down,
              gene_diff=gene_diff)
  
  if(F){
    go_enrich_results <- lapply( g_list , function(gene) {
      lapply( c('BP','MF','CC') , function(ont) {
        cat(paste('Now process ',ont ))
        ego <- enrichGO(gene          = gene,
                        universe      = gene_all,
                        OrgDb         = org.Hs.eg.db,
                        ont           = ont ,
                        pAdjustMethod = "BH",
                        pvalueCutoff  = 0.99,
                        qvalueCutoff  = 0.99,
                        readable      = TRUE)
        
        print( head(ego) )
        return(ego)
      })
    })
    save(go_enrich_results,file = 'go_enrich_results.Rdata')
    
  }
  
  
  load(file = 'go_enrich_results.Rdata')
  
  n1= c('gene_up','gene_down','gene_diff')
  n2= c('BP','MF','CC') 
  for (i in 1:3){
    for (j in 1:3){
      fn=paste0('dotplot_',n1[i],'_',n2[j],'.png')
      cat(paste0(fn,'\n'))
      png(fn,res=150,width = 1080)
      print( dotplot(go_enrich_results[[i]][[j]] ))
      dev.off()
    }
  }
  
  
}

把之前的數(shù)據(jù)設(shè)置好之后,后面的富集分析也是傻瓜式的

最后

感謝jimmy的生信技能樹團(tuán)隊!

感謝導(dǎo)師岑洪老師!

感謝健明、孫小潔,慧美等生信技能樹團(tuán)隊的老師一路以來的指導(dǎo)和鼓勵!

特別注明:此文中編碼來自生信技能樹健明老師

最后編輯于
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請聯(lián)系作者
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時請結(jié)合常識與多方信息審慎甄別。
平臺聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點,簡書系信息發(fā)布平臺,僅提供信息存儲服務(wù)。

相關(guān)閱讀更多精彩內(nèi)容

友情鏈接更多精彩內(nèi)容