2021-07-09:細(xì)數(shù)我在GO注釋時(shí)遇到的那些報(bào)錯(cuò)

上回說(shuō)到,我廢了九牛二虎之力,終于拿到了百十來(lái)個(gè)GB的GO EVIDENCE 數(shù)據(jù),你猜怎么著,接著往下進(jìn)行的時(shí)候發(fā)現(xiàn),R語(yǔ)言根本讀不了。。。。心疼自己一秒鐘

來(lái)吧,今天接著搞起:

library(GOstats)

ago <- read.delim('/vol3/agis/zhoushaoqun_group/wangyantao/GO/swiss_goev_sme',row.names = NULL)

#報(bào)錯(cuò)

> colnames(ago) <- c('gene_id','go_id','evi')

Error in names(x) <- value :

? 'names' attribute [3] must be the same length as the vector [1]


之前我用write.csv生成的EVIDENCE注釋文件,在讀取時(shí),列與列之間多了',',因此在讀入時(shí)加了sep=',',而這次列與列之間是空格,同理,要加入sep=' '。

ago <- read.delim('/vol3/agis/zhoushaoqun_group/wangyantao/GO/swiss_goev_sme',row.names = NULL,sep=' ')

成功讀入,進(jìn)入下一步

goframeData <- na.omit(data.frame(ago$go_id,ago$evi,ago$gene_id))

goFrame <- GOFrame(goframeData)

goAllFrame <- GOAllFrame(goFrame)

library('GSEABase')

gsc <- GeneSetCollection(goAllFrame,setType = GOCollection())

universe <- as.vector(ago$gene_id)

DEGfile <- read.csv('xxxxxx.csv',header = T,row.names = NULL)

#讀入差異表達(dá)基因#

genes = as.vector(DEGfile$gene_id)

params <- GSEAGOHyperGParams(name = 'custom',

? ? ? ? ? ? ? ? ? ? ? ? ? ? geneSetCollection = gsc,

? ? ? ? ? ? ? ? ? ? ? ? ? ? geneIds = genes,

? ? ? ? ? ? ? ? ? ? ? ? ? ? universeGeneIds = universe,

? ? ? ? ? ? ? ? ? ? ? ? ? ? ontology = 'CC',

? ? ? ? ? ? ? ? ? ? ? ? ? ? pvalueCutoff = 0.05,

? ? ? ? ? ? ? ? ? ? ? ? ? ? conditional = FALSE,

? ? ? ? ? ? ? ? ? ? ? ? ? ? testDirection = 'over')

Over <- hyperGTest(params)

write.csv(summary(Over),xxxxxx_GO_CC_hyper.csv')

問(wèn)題很多:

1、EVIDENCE注釋文件的gene_id與DESeq2差異表達(dá)的文件中的gene_id要保持一致

sed "s/.1 / /g" swiss_goev_sme > swiss_goev_sme.1

終于成功生成GO注釋文件,以下為全部腳本

> library(GOstats)

> ago <- read.delim('/vol3/agis/zhoushaoqun_group/wangyantao/GO/swiss_goev_sly.1',row.names = NULL,sep=' ')

> colnames(ago) <- c('gene_id','go_id','evi')

> goframeData <- na.omit(data.frame(ago$go_id,ago$evi,ago$gene_id))

> goFrame <- GOFrame(goframeData)

> goAllFrame <- GOAllFrame(goFrame)

There were 21 warnings (use warnings() to see them)

> library('GSEABase')

Loading required package: annotate

Loading required package: XML

Attaching package: ‘XML’

The following object is masked from ‘package:graph’:

? ? addNode

Warning messages:

1: package ‘GSEABase’ was built under R version 4.0.3

2: package ‘a(chǎn)nnotate’ was built under R version 4.0.3

> gsc <- GeneSetCollection(goAllFrame,setType = GOCollection())

> universe <- as.vector(ago$gene_id)

> DEGfile <- read.csv('/vol3/agis/zhoushaoqun_group/wangyantao/GO/diffgene_Sly48.48.0_deseq2.csv',header = T,row.names = NULL)

> genes = as.vector(DEGfile[,1])

> params <- GSEAGOHyperGParams(name = 'custom',

+? ? ? ? ? ? ? ? ? ? ? ? ? ? ? geneSetCollection = gsc,

+? ? ? ? ? ? ? ? ? ? ? ? ? ? ? geneIds = genes,

+? ? ? ? ? ? ? ? ? ? ? ? ? ? ? universeGeneIds = universe,

+? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ontology = 'CC',

+? ? ? ? ? ? ? ? ? ? ? ? ? ? ? pvalueCutoff = 0.05,

+? ? ? ? ? ? ? ? ? ? ? ? ? ? ? conditional = FALSE,

+? ? ? ? ? ? ? ? ? ? ? ? ? ? ? testDirection = 'over')

? ? ? ? ? ? ? ? ? ? ? ? ? ? ? geneSetCollection = gsc,

? ? ? ? ? ? ? ? ? ? ? ? ? ? ? geneIds = genes,

? ? ? ? ? ? ? ? ? ? ? ? ? ? ? universeGeneIds = universe,

? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ontology = 'MF',

? ? ? ? ? ? ? ? ? ? ? ? ? ? ? pvalueCutoff = 0.05,

? ? ? ? ? ? ? ? ? ? ? ? ? ? ? conditional = FALSE,

? ? ? ? ? ? ? ? ? ? ? ? ? ? ? testDirection = 'over')

Warning messages:

1: In makeValidParams(.Object) : removing duplicate IDs in universeGeneIds

2: In makeValidParams(.Object) : removing geneIds not in universeGeneIds

> params1 <- GSEAGOHyperGParams(name = 'custom',

+? ? ? ? ? ? ? ? ? ? ? ? ? ? ? geneSetCollection = gsc,

+? ? ? ? ? ? ? ? ? ? ? ? ? ? ? geneIds = genes,

+? ? ? ? ? ? ? ? ? ? ? ? ? ? ? universeGeneIds = universe,

+? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ontology = 'BP',

+? ? ? ? ? ? ? ? ? ? ? ? ? ? ? pvalueCutoff = 0.05,

+? ? ? ? ? ? ? ? ? ? ? ? ? ? ? conditional = FALSE,

+? ? ? ? ? ? ? ? ? ? ? ? ? ? ? testDirection = 'over')

Warning messages:

1: In makeValidParams(.Object) : removing duplicate IDs in universeGeneIds

2: In makeValidParams(.Object) : removing geneIds not in universeGeneIds

> params2 <- GSEAGOHyperGParams(name = 'custom',

+? ? ? ? ? ? ? ? ? ? ? ? ? ? ? geneSetCollection = gsc,

+? ? ? ? ? ? ? ? ? ? ? ? ? ? ? geneIds = genes,

+? ? ? ? ? ? ? ? ? ? ? ? ? ? ? universeGeneIds = universe,

+? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ontology = 'MF',

+? ? ? ? ? ? ? ? ? ? ? ? ? ? ? pvalueCutoff = 0.05,

+? ? ? ? ? ? ? ? ? ? ? ? ? ? ? conditional = FALSE,

+? ? ? ? ? ? ? ? ? ? ? ? ? ? ? testDirection = 'over')

Warning messages:

1: In makeValidParams(.Object) : removing duplicate IDs in universeGeneIds

2: In makeValidParams(.Object) : removing geneIds not in universeGeneIds

>

> Over <- hyperGTest(params)

> Over1 <- hyperGTest(params1)

> Over2 <- hyperGTest(params2)

> write.csv(summary(Over),'Sly48_GO_CC_hyper.csv')

> write.csv(summary(Over1),'Sly48_GO_BP_hyper.csv')

> write.csv(summary(Over2),'Sly48_GO_MF_hyper.csv')

> library(ggplot2)

> library(RColorBrewer)

Warning message:

package ‘RColorBrewer’ was built under R version 4.0.3

> display.brewer.all()

> col3 <- brewer.pal(4,'Set3')[c(1,3,4)]

> cc = read.csv('Sly48_GO_CC_hyper.csv',header = T,row.names = NULL)

> bp = read.csv('Sly48_GO_BP_hyper.csv',header = T,row.names = NULL)

> mf = read.csv('Sly48_GO_MF_hyper.csv',header = T,row.names = NULL)

> cc2 <- cc[cc$Pvalue<1e-5,]

> bp2 <- bp[bp$Pvalue<1e-5,]

> mf2 <- mf[mf$Pvalue<1e-5,]

> colnames(cc2)[2] <- c('GOID')

> colnames(bp2)[2] <- c('GOID')

> colnames(mf2)[2] <- c('GOID')

> data <- rbind(mf2,cc2)

> data <- rbind(data,bp2)

> data2 <- data.frame(data,type = c(rep('MF',dim(mf2)[1]),rep('CC',dim(cc2)[1]),rep('BP',dim(bp2)[1])))

> p <- ggplot(data2,aes(x = Term,y = log2(Count),fill = type,order = type))+

+? geom_bar(stat = 'identity')+coord_flip()+

+? scale_fill_manual(values = col3,limits = c('BP','CC','MF'),labels = c('BP','CC','MF'))+

+? theme_bw()+theme(legend.title=element_blank(),

+? ? ? ? ? ? ? ? ? ? axis.text = element_text(size = 10),

+? ? ? ? ? ? ? ? ? ? axis.title.x = element_text(size = rel(1.5)),

+? ? ? ? ? ? ? ? ? ? axis.title.y = element_text(size = rel(1.5),angle = 90),

+? ? ? ? ? ? ? ? ? ? legend.text = element_text(size = rel(1.5)))+

+? ylab('Number of Genes (log2)') + xlab('Terms')

> ggsave('Sly48.go_hyper.tiff',p,device = 'tiff',scale = 1,width = 20,height = 30,dpi = 300)

>

> display.brewer.all()

> col3 <- brewer.pal(4,'Set3')[c(1,3,4)]

> cc = read.csv('Sly48_GO_CC_hyper.csv',header = T,row.names = NULL)

> bp = read.csv('Sly48_GO_BP_hyper.csv',header = T,row.names = NULL)

> mf = read.csv('Sly48_GO_MF_hyper.csv',header = T,row.names = NULL)

> cc2 <- cc[cc$Pvalue<1e-2,]

> bp2 <- bp[bp$Pvalue<1e-2,]

> mf2 <- mf[mf$Pvalue<1e-2,]

> colnames(cc2)[2] <- c('GOID')

> colnames(bp2)[2] <- c('GOID')

> colnames(mf2)[2] <- c('GOID')

> data <- rbind(mf2,cc2)

> data <- rbind(data,bp2)

> data2 <- data.frame(data,type = c(rep('MF',dim(mf2)[1]),rep('CC',dim(cc2)[1]),rep('BP',dim(bp2)[1])))

> p <- ggplot(data2,aes(x = Term,y = log2(Count),fill = type,order = type))+

+? geom_bar(stat = 'identity')+coord_flip()+

+? scale_fill_manual(values = col3,limits = c('BP','CC','MF'),labels = c('BP','CC','MF'))+

+? theme_bw()+theme(legend.title=element_blank(),

+? ? ? ? ? ? ? ? ? ? axis.text = element_text(size = 10),

+? ? ? ? ? ? ? ? ? ? axis.title.x = element_text(size = rel(1.5)),

+? ? ? ? ? ? ? ? ? ? axis.title.y = element_text(size = rel(1.5),angle = 90),

+? ? ? ? ? ? ? ? ? ? legend.text = element_text(size = rel(1.5)))+

+? ylab('Number of Genes (log2)') + xlab('Terms')

> ggsave('Sly48.go_hyper.tiff',p,device = 'tiff',scale = 1,width = 20,height = 30,dpi = 300)

一開(kāi)始的時(shí)候,pvalue設(shè)置為10的-5次方,結(jié)果并沒(méi)有生產(chǎn)圖片,后面我把pvalue改成10的-2次方,好歹有個(gè)結(jié)果。

經(jīng)過(guò)測(cè)試,以上腳本可用,開(kāi)始批量處理相關(guān)物種差異表達(dá)的GO注釋

?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請(qǐng)聯(lián)系作者
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時(shí)請(qǐng)結(jié)合常識(shí)與多方信息審慎甄別。
平臺(tái)聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點(diǎn),簡(jiǎn)書(shū)系信息發(fā)布平臺(tái),僅提供信息存儲(chǔ)服務(wù)。

相關(guān)閱讀更多精彩內(nèi)容

友情鏈接更多精彩內(nèi)容