上回說(shuō)到,我廢了九牛二虎之力,終于拿到了百十來(lái)個(gè)GB的GO EVIDENCE 數(shù)據(jù),你猜怎么著,接著往下進(jìn)行的時(shí)候發(fā)現(xiàn),R語(yǔ)言根本讀不了。。。。心疼自己一秒鐘
來(lái)吧,今天接著搞起:

library(GOstats)
ago <- read.delim('/vol3/agis/zhoushaoqun_group/wangyantao/GO/swiss_goev_sme',row.names = NULL)
#報(bào)錯(cuò)
> colnames(ago) <- c('gene_id','go_id','evi')
Error in names(x) <- value :
? 'names' attribute [3] must be the same length as the vector [1]

之前我用write.csv生成的EVIDENCE注釋文件,在讀取時(shí),列與列之間多了',',因此在讀入時(shí)加了sep=',',而這次列與列之間是空格,同理,要加入sep=' '。
ago <- read.delim('/vol3/agis/zhoushaoqun_group/wangyantao/GO/swiss_goev_sme',row.names = NULL,sep=' ')
成功讀入,進(jìn)入下一步
goframeData <- na.omit(data.frame(ago$go_id,ago$evi,ago$gene_id))
goFrame <- GOFrame(goframeData)
goAllFrame <- GOAllFrame(goFrame)
library('GSEABase')
gsc <- GeneSetCollection(goAllFrame,setType = GOCollection())
universe <- as.vector(ago$gene_id)
DEGfile <- read.csv('xxxxxx.csv',header = T,row.names = NULL)
#讀入差異表達(dá)基因#
genes = as.vector(DEGfile$gene_id)
params <- GSEAGOHyperGParams(name = 'custom',
? ? ? ? ? ? ? ? ? ? ? ? ? ? geneSetCollection = gsc,
? ? ? ? ? ? ? ? ? ? ? ? ? ? geneIds = genes,
? ? ? ? ? ? ? ? ? ? ? ? ? ? universeGeneIds = universe,
? ? ? ? ? ? ? ? ? ? ? ? ? ? ontology = 'CC',
? ? ? ? ? ? ? ? ? ? ? ? ? ? pvalueCutoff = 0.05,
? ? ? ? ? ? ? ? ? ? ? ? ? ? conditional = FALSE,
? ? ? ? ? ? ? ? ? ? ? ? ? ? testDirection = 'over')
Over <- hyperGTest(params)
write.csv(summary(Over),xxxxxx_GO_CC_hyper.csv')
問(wèn)題很多:
1、EVIDENCE注釋文件的gene_id與DESeq2差異表達(dá)的文件中的gene_id要保持一致
sed "s/.1 / /g" swiss_goev_sme > swiss_goev_sme.1
終于成功生成GO注釋文件,以下為全部腳本
> library(GOstats)
> ago <- read.delim('/vol3/agis/zhoushaoqun_group/wangyantao/GO/swiss_goev_sly.1',row.names = NULL,sep=' ')
> colnames(ago) <- c('gene_id','go_id','evi')
> goframeData <- na.omit(data.frame(ago$go_id,ago$evi,ago$gene_id))
> goFrame <- GOFrame(goframeData)
> goAllFrame <- GOAllFrame(goFrame)
There were 21 warnings (use warnings() to see them)
> library('GSEABase')
Loading required package: annotate
Loading required package: XML
Attaching package: ‘XML’
The following object is masked from ‘package:graph’:
? ? addNode
Warning messages:
1: package ‘GSEABase’ was built under R version 4.0.3
2: package ‘a(chǎn)nnotate’ was built under R version 4.0.3
> gsc <- GeneSetCollection(goAllFrame,setType = GOCollection())
> universe <- as.vector(ago$gene_id)
> DEGfile <- read.csv('/vol3/agis/zhoushaoqun_group/wangyantao/GO/diffgene_Sly48.48.0_deseq2.csv',header = T,row.names = NULL)
> genes = as.vector(DEGfile[,1])
> params <- GSEAGOHyperGParams(name = 'custom',
+? ? ? ? ? ? ? ? ? ? ? ? ? ? ? geneSetCollection = gsc,
+? ? ? ? ? ? ? ? ? ? ? ? ? ? ? geneIds = genes,
+? ? ? ? ? ? ? ? ? ? ? ? ? ? ? universeGeneIds = universe,
+? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ontology = 'CC',
+? ? ? ? ? ? ? ? ? ? ? ? ? ? ? pvalueCutoff = 0.05,
+? ? ? ? ? ? ? ? ? ? ? ? ? ? ? conditional = FALSE,
+? ? ? ? ? ? ? ? ? ? ? ? ? ? ? testDirection = 'over')
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? geneSetCollection = gsc,
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? geneIds = genes,
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? universeGeneIds = universe,
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ontology = 'MF',
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? pvalueCutoff = 0.05,
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? conditional = FALSE,
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? testDirection = 'over')
Warning messages:
1: In makeValidParams(.Object) : removing duplicate IDs in universeGeneIds
2: In makeValidParams(.Object) : removing geneIds not in universeGeneIds
> params1 <- GSEAGOHyperGParams(name = 'custom',
+? ? ? ? ? ? ? ? ? ? ? ? ? ? ? geneSetCollection = gsc,
+? ? ? ? ? ? ? ? ? ? ? ? ? ? ? geneIds = genes,
+? ? ? ? ? ? ? ? ? ? ? ? ? ? ? universeGeneIds = universe,
+? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ontology = 'BP',
+? ? ? ? ? ? ? ? ? ? ? ? ? ? ? pvalueCutoff = 0.05,
+? ? ? ? ? ? ? ? ? ? ? ? ? ? ? conditional = FALSE,
+? ? ? ? ? ? ? ? ? ? ? ? ? ? ? testDirection = 'over')
Warning messages:
1: In makeValidParams(.Object) : removing duplicate IDs in universeGeneIds
2: In makeValidParams(.Object) : removing geneIds not in universeGeneIds
> params2 <- GSEAGOHyperGParams(name = 'custom',
+? ? ? ? ? ? ? ? ? ? ? ? ? ? ? geneSetCollection = gsc,
+? ? ? ? ? ? ? ? ? ? ? ? ? ? ? geneIds = genes,
+? ? ? ? ? ? ? ? ? ? ? ? ? ? ? universeGeneIds = universe,
+? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ontology = 'MF',
+? ? ? ? ? ? ? ? ? ? ? ? ? ? ? pvalueCutoff = 0.05,
+? ? ? ? ? ? ? ? ? ? ? ? ? ? ? conditional = FALSE,
+? ? ? ? ? ? ? ? ? ? ? ? ? ? ? testDirection = 'over')
Warning messages:
1: In makeValidParams(.Object) : removing duplicate IDs in universeGeneIds
2: In makeValidParams(.Object) : removing geneIds not in universeGeneIds
>
> Over <- hyperGTest(params)
> Over1 <- hyperGTest(params1)
> Over2 <- hyperGTest(params2)
> write.csv(summary(Over),'Sly48_GO_CC_hyper.csv')
> write.csv(summary(Over1),'Sly48_GO_BP_hyper.csv')
> write.csv(summary(Over2),'Sly48_GO_MF_hyper.csv')
> library(ggplot2)
> library(RColorBrewer)
Warning message:
package ‘RColorBrewer’ was built under R version 4.0.3
> display.brewer.all()
> col3 <- brewer.pal(4,'Set3')[c(1,3,4)]
> cc = read.csv('Sly48_GO_CC_hyper.csv',header = T,row.names = NULL)
> bp = read.csv('Sly48_GO_BP_hyper.csv',header = T,row.names = NULL)
> mf = read.csv('Sly48_GO_MF_hyper.csv',header = T,row.names = NULL)
> cc2 <- cc[cc$Pvalue<1e-5,]
> bp2 <- bp[bp$Pvalue<1e-5,]
> mf2 <- mf[mf$Pvalue<1e-5,]
> colnames(cc2)[2] <- c('GOID')
> colnames(bp2)[2] <- c('GOID')
> colnames(mf2)[2] <- c('GOID')
> data <- rbind(mf2,cc2)
> data <- rbind(data,bp2)
> data2 <- data.frame(data,type = c(rep('MF',dim(mf2)[1]),rep('CC',dim(cc2)[1]),rep('BP',dim(bp2)[1])))
> p <- ggplot(data2,aes(x = Term,y = log2(Count),fill = type,order = type))+
+? geom_bar(stat = 'identity')+coord_flip()+
+? scale_fill_manual(values = col3,limits = c('BP','CC','MF'),labels = c('BP','CC','MF'))+
+? theme_bw()+theme(legend.title=element_blank(),
+? ? ? ? ? ? ? ? ? ? axis.text = element_text(size = 10),
+? ? ? ? ? ? ? ? ? ? axis.title.x = element_text(size = rel(1.5)),
+? ? ? ? ? ? ? ? ? ? axis.title.y = element_text(size = rel(1.5),angle = 90),
+? ? ? ? ? ? ? ? ? ? legend.text = element_text(size = rel(1.5)))+
+? ylab('Number of Genes (log2)') + xlab('Terms')
> ggsave('Sly48.go_hyper.tiff',p,device = 'tiff',scale = 1,width = 20,height = 30,dpi = 300)
>
> display.brewer.all()
> col3 <- brewer.pal(4,'Set3')[c(1,3,4)]
> cc = read.csv('Sly48_GO_CC_hyper.csv',header = T,row.names = NULL)
> bp = read.csv('Sly48_GO_BP_hyper.csv',header = T,row.names = NULL)
> mf = read.csv('Sly48_GO_MF_hyper.csv',header = T,row.names = NULL)
> cc2 <- cc[cc$Pvalue<1e-2,]
> bp2 <- bp[bp$Pvalue<1e-2,]
> mf2 <- mf[mf$Pvalue<1e-2,]
> colnames(cc2)[2] <- c('GOID')
> colnames(bp2)[2] <- c('GOID')
> colnames(mf2)[2] <- c('GOID')
> data <- rbind(mf2,cc2)
> data <- rbind(data,bp2)
> data2 <- data.frame(data,type = c(rep('MF',dim(mf2)[1]),rep('CC',dim(cc2)[1]),rep('BP',dim(bp2)[1])))
> p <- ggplot(data2,aes(x = Term,y = log2(Count),fill = type,order = type))+
+? geom_bar(stat = 'identity')+coord_flip()+
+? scale_fill_manual(values = col3,limits = c('BP','CC','MF'),labels = c('BP','CC','MF'))+
+? theme_bw()+theme(legend.title=element_blank(),
+? ? ? ? ? ? ? ? ? ? axis.text = element_text(size = 10),
+? ? ? ? ? ? ? ? ? ? axis.title.x = element_text(size = rel(1.5)),
+? ? ? ? ? ? ? ? ? ? axis.title.y = element_text(size = rel(1.5),angle = 90),
+? ? ? ? ? ? ? ? ? ? legend.text = element_text(size = rel(1.5)))+
+? ylab('Number of Genes (log2)') + xlab('Terms')
> ggsave('Sly48.go_hyper.tiff',p,device = 'tiff',scale = 1,width = 20,height = 30,dpi = 300)
一開(kāi)始的時(shí)候,pvalue設(shè)置為10的-5次方,結(jié)果并沒(méi)有生產(chǎn)圖片,后面我把pvalue改成10的-2次方,好歹有個(gè)結(jié)果。
經(jīng)過(guò)測(cè)試,以上腳本可用,開(kāi)始批量處理相關(guān)物種差異表達(dá)的GO注釋