三大R包差異分析
if(!require(stringr))install.packages('stringr')
if(!require(ggplotify))install.packages("ggplotify")
if(!require(patchwork))install.packages("patchwork")
if(!require(cowplot))install.packages("cowplot")
if(!require(DESeq2))BiocManager::install('DESeq2')
if(!require(edgeR))BiocManager::install('edgeR')
if(!require(limma))BiocManager::install('limma')
rm(list = ls())
load("TCGA-CHOL_gdc.Rdata")
table(Group)
#> Group
#> normal tumor
#> 9 36
#deseq2----
library(DESeq2)
colData <- data.frame(row.names =colnames(exp),
condition=Group)#行名是表達(dá)矩陣的列名,只包含一列分組信息
#根據(jù)matrix生成deseq需要的輸入數(shù)據(jù),countData所需表達(dá)矩陣,design為colData的condition列
if(!file.exists(paste0(cancer_type,"_dd.Rdata"))){
dds <- DESeqDataSetFromMatrix(
countData = exp,
colData = colData,
design = ~ condition)
dds <- DESeq(dds)
save(dds,file = paste0(cancer_type,"_dd.Rdata"))
}#注意有設(shè)置跳過(guò)
load(file = paste0(cancer_type,"_dd.Rdata"))
# 兩兩比較
res <- results(dds, contrast = c("condition",rev(levels(Group)))) #results提取結(jié)果,提供誰(shuí)是處理誰(shuí)是對(duì)照,順序?yàn)閠umor在前normal在后,level提取水平,rev更換順序
resOrdered <- res[order(res$pvalue),] #按照P值排序,res提取代碼,order排序
DEG <- as.data.frame(resOrdered)#變成數(shù)據(jù)框
DEG = na.omit(DEG)#如果篩選標(biāo)準(zhǔn)較低,表達(dá)量低的基因得到的logFC和pValue可能為NA,這步為去掉NA
head(DEG)
#> baseMean log2FoldChange lfcSE stat pvalue
#> ENSG00000105607.11 2295.7526 -3.312580 0.1838483 -18.01800 1.407287e-72
#> ENSG00000052802.11 7540.8645 -3.770384 0.2190204 -17.21476 2.057882e-66
#> ENSG00000080709.13 468.7633 -5.608766 0.3347435 -16.75541 5.169907e-63
#> ENSG00000179152.17 1256.3050 -2.234469 0.1334827 -16.73976 6.725972e-63
#> ENSG00000042781.11 550.0380 -6.475292 0.3879044 -16.69301 1.473499e-62
#> ENSG00000182551.12 12197.3635 -3.543548 0.2163788 -16.37660 2.810313e-60
#> padj
#> ENSG00000105607.11 4.270836e-68
#> ENSG00000052802.11 3.122630e-62
#> ENSG00000080709.13 5.102995e-59
#> ENSG00000179152.17 5.102995e-59
#> ENSG00000042781.11 8.943551e-59
#> ENSG00000182551.12 1.229040e-56
#添加change列標(biāo)記基因上調(diào)下調(diào)
logFC_cutoff <- with(DEG,mean(abs(log2FoldChange)) + 2*sd(abs(log2FoldChange)) )#95%置信區(qū)間
#logFC_cutoff <- 2
k1 = (DEG$pvalue < 0.05)&(DEG$log2FoldChange < -logFC_cutoff)
k2 = (DEG$pvalue < 0.05)&(DEG$log2FoldChange > logFC_cutoff)
DEG$change = ifelse(k1,"DOWN",ifelse(k2,"UP","NOT"))
table(DEG$change)
#>
#> DOWN NOT UP
#> 783 28724 841
head(DEG)
#> baseMean log2FoldChange lfcSE stat pvalue
#> ENSG00000105607.11 2295.7526 -3.312580 0.1838483 -18.01800 1.407287e-72
#> ENSG00000052802.11 7540.8645 -3.770384 0.2190204 -17.21476 2.057882e-66
#> ENSG00000080709.13 468.7633 -5.608766 0.3347435 -16.75541 5.169907e-63
#> ENSG00000179152.17 1256.3050 -2.234469 0.1334827 -16.73976 6.725972e-63
#> ENSG00000042781.11 550.0380 -6.475292 0.3879044 -16.69301 1.473499e-62
#> ENSG00000182551.12 12197.3635 -3.543548 0.2163788 -16.37660 2.810313e-60
#> padj change
#> ENSG00000105607.11 4.270836e-68 NOT
#> ENSG00000052802.11 3.122630e-62 NOT
#> ENSG00000080709.13 5.102995e-59 DOWN
#> ENSG00000179152.17 5.102995e-59 NOT
#> ENSG00000042781.11 8.943551e-59 DOWN
#> ENSG00000182551.12 1.229040e-56 NOT
DESeq2_DEG <- DEG#為了不被后面覆蓋
#edgeR----
library(edgeR)
dge <- DGEList(counts=exp,group=Group)
dge$samples$lib.size <- colSums(dge$counts)
dge <- calcNormFactors(dge)
design <- model.matrix(~0+Group)
rownames(design)<-colnames(dge)
colnames(design)<-levels(Group)
dge <- estimateGLMCommonDisp(dge, design)
dge <- estimateGLMTrendedDisp(dge, design)
dge <- estimateGLMTagwiseDisp(dge, design)
fit <- glmFit(dge, design)
fit2 <- glmLRT(fit, contrast=c(-1,1)) #這是結(jié)果
DEG=topTags(fit2, n=nrow(exp))
DEG=as.data.frame(DEG)
logFC_cutoff <- with(DEG,mean(abs(logFC)) + 2*sd(abs(logFC)) )
#logFC_cutoff <- 2
k1 = (DEG$PValue < 0.05)&(DEG$logFC < -logFC_cutoff)
k2 = (DEG$PValue < 0.05)&(DEG$logFC > logFC_cutoff)
DEG$change = ifelse(k1,"DOWN",ifelse(k2,"UP","NOT"))
head(DEG)
#> logFC logCPM LR PValue FDR change
#> ENSG00000042781.11 -6.287813 2.978966 338.9533 1.078532e-75 3.273130e-71 DOWN
#> ENSG00000080709.13 -5.388444 2.749969 305.3198 2.284579e-68 3.466619e-64 DOWN
#> ENSG00000109929.8 -3.992376 5.734121 296.8176 1.625990e-66 1.644851e-62 NOT
#> ENSG00000213398.6 -4.555608 5.835332 295.3276 3.433673e-66 2.605128e-62 DOWN
#> ENSG00000105607.11 -3.112617 5.091139 292.9179 1.150241e-65 6.981505e-62 NOT
#> ENSG00000120158.10 -3.633224 4.588093 285.4740 4.816671e-64 2.436272e-60 NOT
table(DEG$change)
#>
#> DOWN NOT UP
#> 533 28643 1172
edgeR_DEG <- DEG
#####查看數(shù)據(jù)結(jié)果是否算反了,根據(jù)logFC -6.287813來(lái)看
as.numeric(exp["ENSG00000042781.11",])
boxplot(exp["ENSG00000042781.11",] ~ Group)#根據(jù)箱線圖看tumor低為下調(diào)基因logFC小于零
###limma----
library(limma)
design <- model.matrix(~0+Group)
colnames(design)=levels(Group)
rownames(design)=colnames(exp)
dge <- DGEList(counts=exp)
dge <- calcNormFactors(dge)
v <- voom(dge,design, normalize="quantile")
fit <- lmFit(v, design)
constrasts = paste(rev(levels(Group)),collapse = "-")
cont.matrix <- makeContrasts(contrasts=constrasts,levels = design)
fit2=contrasts.fit(fit,cont.matrix)
fit2=eBayes(fit2)
DEG = topTable(fit2, coef=constrasts, n=Inf)
DEG = na.omit(DEG)
logFC_cutoff <- with(DEG,mean(abs(logFC)) + 2*sd(abs(logFC)) )
#logFC_cutoff <- 2
k1 = (DEG$P.Value < 0.05)&(DEG$logFC < -logFC_cutoff)
k2 = (DEG$P.Value < 0.05)&(DEG$logFC > logFC_cutoff)
DEG$change = ifelse(k1,"DOWN",ifelse(k2,"UP","NOT"))
table(DEG$change)
#>
#> DOWN NOT UP
#> 1060 28915 373
head(DEG)
#> logFC AveExpr t P.Value adj.P.Val
#> ENSG00000042781.11 -6.464608 -0.1996274 -23.43702 9.460362e-28 2.871031e-23
#> ENSG00000080709.13 -5.525461 0.2993289 -21.42402 4.691172e-26 7.118385e-22
#> ENSG00000266964.4 -6.882650 0.1094543 -20.82023 1.604506e-25 1.217338e-21
#> ENSG00000253959.1 -5.171740 -5.6877937 -20.93329 1.271725e-25 1.217338e-21
#> ENSG00000251049.2 -5.861260 -5.4549397 -20.50083 3.111286e-25 1.888426e-21
#> ENSG00000204653.8 -7.427044 0.5359110 -19.20496 4.985070e-24 1.827535e-20
#> B change
#> ENSG00000042781.11 52.55580 DOWN
#> ENSG00000080709.13 48.85594 DOWN
#> ENSG00000266964.4 47.73131 DOWN
#> ENSG00000253959.1 46.44808 DOWN
#> ENSG00000251049.2 45.82235 DOWN
#> ENSG00000204653.8 44.45722 DOWN
limma_voom_DEG <- DEG
tj = data.frame(deseq2 = as.integer(table(DESeq2_DEG$change)),
edgeR = as.integer(table(edgeR_DEG$change)),
limma_voom = as.integer(table(limma_voom_DEG$change)),
row.names = c("down","not","up")
);tj
#> deseq2 edgeR limma_voom
#> down 783 533 1060
#> not 28724 28643 28915
#> up 841 1172 373
save(DESeq2_DEG,edgeR_DEG,limma_voom_DEG,Group,tj,file = paste0(cancer_type,"_DEG.Rdata"))
可視化
rm(list = ls())
load("TCGA-CHOL_gdc.Rdata")
load("TCGA-CHOL_DEG.Rdata")
if(!require(tinyarray))devtools::install_local("tinyarray-master.zip",upgrade = F)#如果需要更新的話卸載重裝更快
library(ggplot2)
library(tinyarray)
exp[1:4,1:4]
#> TCGA-W5-AA36-01A-11R-A41I-07 TCGA-W5-AA2H-01A-31R-A41I-07
#> ENSG00000000003.13 2504 226
#> ENSG00000000419.11 1272 1146
#> ENSG00000000457.12 504 602
#> ENSG00000000460.15 123 162
#> TCGA-ZU-A8S4-11A-11R-A41I-07 TCGA-WD-A7RX-01A-12R-A41I-07
#> ENSG00000000003.13 4107 9646
#> ENSG00000000419.11 741 1266
#> ENSG00000000457.12 312 1317
#> ENSG00000000460.15 170 451
# cpm 去除文庫(kù)大小的影響(標(biāo)準(zhǔn)化)
dat = log2(cpm(exp)+1)
pca.plot = draw_pca(dat,Group);pca.plot
save(pca.plot,file = paste0(cancer_type,"_pcaplot.Rdata"))
cg1 = rownames(DESeq2_DEG)[DESeq2_DEG$change !="NOT"]
cg2 = rownames(edgeR_DEG)[edgeR_DEG$change !="NOT"]
cg3 = rownames(limma_voom_DEG)[limma_voom_DEG$change !="NOT"]
h1 = draw_heatmap(dat[cg1,],Group,n_cutoff = 2)#n_cutoff參數(shù)截止范圍,使對(duì)比度更鮮明
#選取n_cutoff值可使用以下代碼看取值范圍
#m = t(scale(t(dat[cg1,])))
#range(m)
#boxplot(m)
h2 = draw_heatmap(dat[cg2,],Group,n_cutoff = 2)
h3 = draw_heatmap(dat[cg3,],Group,n_cutoff = 2)
m2d = function(x){
mean(abs(x))+2*sd(abs(x))
}#與上面閾值一樣
v1 = draw_volcano(DESeq2_DEG,pkg = 1,logFC_cutoff = m2d(DESeq2_DEG$log2FoldChange),symmetry = T)#pvalue默認(rèn)參數(shù)0.05,加參數(shù)symmetry = T 可以讓圖居中
v2 = draw_volcano(edgeR_DEG,pkg = 2,logFC_cutoff = m2d(edgeR_DEG$logFC))
v3 = draw_volcano(limma_voom_DEG,pkg = 3,logFC_cutoff = m2d(limma_voom_DEG$logFC))
library(patchwork)#只能拼ggplot2的圖,熱圖可以使用as.ggplot改變格式拼在一起
(h1 + h2 + h3) / (v1 + v2 + v3) +plot_layout(guides = 'collect') &theme(legend.position = "none")
ggsave(paste0(cancer_type,"_heat_vo.png"),width = 15,height = 10)
三大R包差異基因?qū)Ρ?/h3>
rm(list = ls())
load("TCGA-CHOL_gdc.Rdata")
load("TCGA-CHOL_DEG.Rdata")
load("TCGA-CHOL_pcaplot.Rdata")
UP=function(df){
rownames(df)[df$change=="UP"]
}
DOWN=function(df){
rownames(df)[df$change=="DOWN"]
}
up = intersect(intersect(UP(DESeq2_DEG),UP(edgeR_DEG)),UP(limma_voom_DEG))
down = intersect(intersect(DOWN(DESeq2_DEG),DOWN(edgeR_DEG)),DOWN(limma_voom_DEG))
dat = log2(cpm(exp)+1)
hp = draw_heatmap(dat[c(up,down),],Group,n_cutoff = 2)
#上調(diào)、下調(diào)基因分別畫(huà)維恩圖
up_genes = list(Deseq2 = UP(DESeq2_DEG),
edgeR = UP(edgeR_DEG),
limma = UP(limma_voom_DEG))
down_genes = list(Deseq2 = DOWN(DESeq2_DEG),
edgeR = DOWN(edgeR_DEG),
limma = DOWN(limma_voom_DEG))
up.plot <- draw_venn(up_genes,"UPgene")
down.plot <- draw_venn(down_genes,"DOWNgene")
#維恩圖拼圖,終于搞定
library(patchwork)
#up.plot + down.plot
# 拼圖
pca.plot + hp+up.plot +down.plot+ plot_layout(guides = "collect")
ggsave(paste0(cancer_type,"_heat_ve_pca.png"),width = 15,height = 10)
#如果熱圖按照原順序畫(huà)的沒(méi)有分組,將原矩陣按group順序重新排列再畫(huà)
m = dat[c(up,down),]
m = m[,order(Group)]
Group = sort(Group)
draw_heatmap(m,Group,n_cutoff = 2,cluster_cols=F)
rm(list = ls())
load("TCGA-CHOL_gdc.Rdata")
load("TCGA-CHOL_DEG.Rdata")
load("TCGA-CHOL_pcaplot.Rdata")
UP=function(df){
rownames(df)[df$change=="UP"]
}
DOWN=function(df){
rownames(df)[df$change=="DOWN"]
}
up = intersect(intersect(UP(DESeq2_DEG),UP(edgeR_DEG)),UP(limma_voom_DEG))
down = intersect(intersect(DOWN(DESeq2_DEG),DOWN(edgeR_DEG)),DOWN(limma_voom_DEG))
dat = log2(cpm(exp)+1)
hp = draw_heatmap(dat[c(up,down),],Group,n_cutoff = 2)
#上調(diào)、下調(diào)基因分別畫(huà)維恩圖
up_genes = list(Deseq2 = UP(DESeq2_DEG),
edgeR = UP(edgeR_DEG),
limma = UP(limma_voom_DEG))
down_genes = list(Deseq2 = DOWN(DESeq2_DEG),
edgeR = DOWN(edgeR_DEG),
limma = DOWN(limma_voom_DEG))
up.plot <- draw_venn(up_genes,"UPgene")
down.plot <- draw_venn(down_genes,"DOWNgene")
#維恩圖拼圖,終于搞定
library(patchwork)
#up.plot + down.plot
# 拼圖
pca.plot + hp+up.plot +down.plot+ plot_layout(guides = "collect")
ggsave(paste0(cancer_type,"_heat_ve_pca.png"),width = 15,height = 10)
#如果熱圖按照原順序畫(huà)的沒(méi)有分組,將原矩陣按group順序重新排列再畫(huà)
m = dat[c(up,down),]
m = m[,order(Group)]
Group = sort(Group)
draw_heatmap(m,Group,n_cutoff = 2,cluster_cols=F)
#########生信技能樹(shù)課程筆記