1.讀取第三部存儲數(shù)據(jù)(基因差異表達(dá)情況)
rm(list = ls()) ## 魔幻操作,一鍵清空~
load(file = 'deg.Rdata')
head(deg)

deg
2.設(shè)定閾值計算基因上調(diào)下調(diào)數(shù)量
## 不同的閾值,篩選到的差異基因數(shù)量就不一樣,后面的超幾何分布檢驗結(jié)果就大相徑庭。
logFC_t=1.5
deg$g=ifelse(deg$P.Value>0.05,'stable',
ifelse( deg$logFC > logFC_t,'UP',
ifelse( deg$logFC < -logFC_t,'DOWN','stable') )
)#P>0.05輸出stable,其中設(shè)定當(dāng)logFC大于1.5為上調(diào)輸出'UP',大于-1.5為下調(diào)輸出'DOWN',,如果都不是則輸出'stable',從而增加了一列g(shù),篩選出了上調(diào)和下調(diào)的基因
table(deg$g)

得出上調(diào)41,下調(diào)88
head(deg)

deg多了提示基因上下調(diào)的一列g(shù)
deg$symbol=rownames(deg)

給deg增加一列基因名
3.ID轉(zhuǎn)換
library(ggplot2)
library(clusterProfiler)
library(org.Hs.eg.db)
df <- bitr(unique(deg$symbol), fromType = "SYMBOL",
toType = c( "ENTREZID"),
OrgDb = org.Hs.eg.db)
#bitr功能為ID轉(zhuǎn)換,
#bitr(geneID, fromType, toType, OrgDb, drop = TRUE);
#geneid :基因ID輸入 ; fromtype : 輸入ID型;toType:輸出ID型;orgdb :注釋數(shù)據(jù)庫)
head(df)

QQ截圖20190327194950.jpg
DEG=deg#把deg數(shù)據(jù)賦值給DEG數(shù)據(jù)
head(DEG)

得到DEG
DEG=merge(DEG,df,by.y='SYMBOL',by.x='symbol')
#把數(shù)據(jù)DEG,df通過,DEG的'symbol'列,df的'SYMBOL'列連接在一起,轉(zhuǎn)化ID
head(DEG)
save(DEG,file = 'anno_DEG.Rdata')

DEG
4.得出差異基因
gene_up= DEG[DEG$g == 'UP','ENTREZID'] #選出上調(diào)基因ID
gene_down=DEG[DEG$g == 'DOWN','ENTREZID'] #選出下調(diào)基因ID
gene_diff=c(gene_up,gene_down)#得出上下調(diào)基因ID
gene_all=as.character(DEG[ ,'ENTREZID'] )#得出所有基因ID
data(geneList, package="DOSE")#得出geneList數(shù)據(jù)
head(geneList)#查看數(shù)據(jù)

geneList
boxplot(geneList)#畫箱線圖
boxplot(DEG$logFC)#畫箱線圖
geneList=DEG$logFC#把DEG數(shù)據(jù)logFC列值賦值給數(shù)據(jù)geneList

QQ截圖20190327214317.jpg
names(geneList)=DEG$ENTREZID#把ID賦值給geneList數(shù)據(jù)的名字

得到geneList:ID和表達(dá)量的關(guān)系
geneList=sort(geneList,decreasing = T)#把數(shù)據(jù)進(jìn)行排序

排序之后的geneList
5. KEGG pathway analysis
做KEGG數(shù)據(jù)集超幾何分布檢驗分析,重點在結(jié)果的可視化及生物學(xué)意義的理解。
if(T){
### over-representation test
kk.up <- enrichKEGG(gene = gene_up,
organism = 'hsa',
universe = gene_all,
pvalueCutoff = 0.9,
qvalueCutoff =0.9)
head(kk.up)[,1:6]
dotplot(kk.up );ggsave('kk.up.dotplot.png')
kk.down <- enrichKEGG(gene = gene_down,
organism = 'hsa',
universe = gene_all,
pvalueCutoff = 0.9,
qvalueCutoff =0.9)
head(kk.down)[,1:6]
dotplot(kk.down );ggsave('kk.down.dotplot.png')
kk.diff <- enrichKEGG(gene = gene_diff,
organism = 'hsa',
pvalueCutoff = 0.05)
head(kk.diff)[,1:6]
dotplot(kk.diff );ggsave('kk.diff.dotplot.png')
kegg_diff_dt <- as.data.frame(kk.diff)
kegg_down_dt <- as.data.frame(kk.down)
kegg_up_dt <- as.data.frame(kk.up)
down_kegg<-kegg_down_dt[kegg_down_dt$pvalue<0.05,];down_kegg$group=-1
up_kegg<-kegg_up_dt[kegg_up_dt$pvalue<0.05,];up_kegg$group=1
source('functions.R')
g_kegg=kegg_plot(up_kegg,down_kegg)
print(g_kegg)
ggsave(g_kegg,filename = 'kegg_up_down.png')
6. GSEA
kk_gse <- gseKEGG(geneList = geneList,
organism = 'hsa',
nPerm = 1000,
minGSSize = 120,
pvalueCutoff = 0.9,
verbose = FALSE)
head(kk_gse)[,1:6]
gseaplot(kk_gse, geneSetID = rownames(kk_gse[1,]))
down_kegg<-kk_gse[kk_gse$pvalue<0.05 & kk_gse$enrichmentScore < 0,];down_kegg$group=-1
up_kegg<-kk_gse[kk_gse$pvalue<0.05 & kk_gse$enrichmentScore > 0,];up_kegg$group=1
g_kegg=kegg_plot(up_kegg,down_kegg)
print(g_kegg)
ggsave(g_kegg,filename = 'kegg_up_down_gsea.png')
}
7.GO database analysis
做GO數(shù)據(jù)集超幾何分布檢驗分析,重點在結(jié)果的可視化及生物學(xué)意義的理解。
{
g_list=list(gene_up=gene_up,
gene_down=gene_down,
gene_diff=gene_diff)
if(F){
go_enrich_results <- lapply( g_list , function(gene) {
lapply( c('BP','MF','CC') , function(ont) {
cat(paste('Now process ',ont ))
ego <- enrichGO(gene = gene,
universe = gene_all,
OrgDb = org.Hs.eg.db,
ont = ont ,
pAdjustMethod = "BH",
pvalueCutoff = 0.99,
qvalueCutoff = 0.99,
readable = TRUE)
print( head(ego) )
return(ego)
})
})
save(go_enrich_results,file = 'go_enrich_results.Rdata')
}
load(file = 'go_enrich_results.Rdata')
n1= c('gene_up','gene_down','gene_diff')
n2= c('BP','MF','CC')
for (i in 1:3){
for (j in 1:3){
fn=paste0('dotplot_',n1[i],'_',n2[j],'.png')
cat(paste0(fn,'\n'))
png(fn,res=150,width = 1080)
print( dotplot(go_enrich_results[[i]][[j]] ))
dev.off()
}
}
}
把之前的數(shù)據(jù)設(shè)置好之后,后面的富集分析也是傻瓜式的