轉(zhuǎn)錄組專題:limma與芯片數(shù)據(jù)差異表達(dá)分析

#原始數(shù)據(jù)為count表
#source("https://bioconductor.org/biocLite.R")
#options(BioC_mirror="http://mirrors.ustc.edu.cn/bioc/")
#biocLite("limma")

#選擇路徑保存
setwd('E:/')

library(limma)
library(edgeR)
#表達(dá)矩陣
exprSet<-read.csv(file.choose(),header = T,sep = ",") #file="12_gene_count_matrix.csv"
head(exprSet)

#列名為樣本號
row.names(exprSet)<-exprSet[,1]
exprSet<-exprSet[,-1]
head(exprSet)

#分組信息
condition<-factor(c(rep("ASD",2),rep("Healthy",4),rep("ASD",1),rep("Healthy",2),rep("ASD",3)), levels = c("ASD","Healthy"))
condition

#分組矩陣
design<-model.matrix(~0+condition)
colnames(design)<-levels(condition)
rownames(design)<-colnames(exprSet)
design

v<-voom(exprSet, 
        design, 
        normalize = 'quantile', 
        plot=TRUE)

fit<-lmFit(v, design)
fit2<-eBayes(fit)

#聲明比較矩陣
cont.matrix<-makeContrasts(contrasts = c('ASD-Healthy'), levels = design)
fit3<-contrasts.fit(fit2, cont.matrix)

#結(jié)果
DEG1<-topTable(fit3, coef = 2, n = Inf) #
DEG2<-na.omit(DEG1)
head(DEG2); dim(DEG2)

#完整保存
write.table(diff_final,"diff_signif_final_limma.txt",row.names = T,quote = F,sep = "\t")

#設(shè)置閾值 FC=2^log2FC
p = 0.05
padj = 0.1
foldChange = 1.5

#FDR
diff_signif1<-DEG2[(DEG2$adj.P.Val < padj & 
                       (DEG2$logFC > foldChange | DEG2$logFC < (-foldChange))),]
dim(diff_signif1)

#不矯正
diff_signif2<-DEG2[(DEG2$P.Value < p & 
                          (DEG2$logFC > foldChange | DEG2$logFC < (-foldChange))),]
dim(diff_signif2)

#排序(選有用的三列)
diff_final<-diff_signif[order(diff_signif$logFC), c(1,4,5)] #選擇是否矯正
head(diff_final);dim(diff_final)

#篩選保存
write.table(diff_final,"diff_signif_final_limma.txt",row.names = T,quote = F,sep = "\t")

#save(diff_final, file = 'limma_diff.Rdata')

#差異基因注釋======================================================================

#注釋文件
ensembl2symbol<-read.table(file.choose(),header=T, sep="\t") #用矩陣,biomart自動有標(biāo)題
head(ensembl2symbol)

symbol2id<-read.table(file = file.choose(),header = T,sep = '\t')
head(symbol2id);colnames(symbol2id)<-c('gene_symbol','gene_id','gene_symbol2')
symbol2id<-symbol2id[,c(1,2)]

#DEG注釋(diff_final 或 DEG2)
#DEG<-read.table(file.choose(),header=T, sep="\t") 
#head(DEG)
colnames(diff_final)[1]<-"Ensembl"

#library(tidyr)
#y<-separate(MAT, col=ensembl,into=c("ENSG","dot"),sep="\\.",remove = T);head(y)

#ensembl2symbol
ensg2id_dif<-merge(diff_final,ensembl2symbol,by.x="Ensembl",by.y="Gene.stable.ID.version",all=F,sort=F)
head(ensg2id_dif); dim(ensg2id_dif)

exprSet_new<-ensg2id_dif[,c(14,2:13)];head(ensg2id_dif);dim(ensg2id_dif)

write.table(exprSet_new,"dif_note_limma.txt",row.names = F,quote = F,sep = "\t")

?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請聯(lián)系作者
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時請結(jié)合常識與多方信息審慎甄別。
平臺聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點,簡書系信息發(fā)布平臺,僅提供信息存儲服務(wù)。

友情鏈接更多精彩內(nèi)容