三、差異分析

三大R包差異分析

if(!require(stringr))install.packages('stringr')
if(!require(ggplotify))install.packages("ggplotify")
if(!require(patchwork))install.packages("patchwork")
if(!require(cowplot))install.packages("cowplot")
if(!require(DESeq2))BiocManager::install('DESeq2')
if(!require(edgeR))BiocManager::install('edgeR')
if(!require(limma))BiocManager::install('limma')

rm(list = ls())
load("TCGA-CHOL_gdc.Rdata")
table(Group)
#> Group
#> normal  tumor 
#>      9     36

#deseq2----
library(DESeq2)
colData <- data.frame(row.names =colnames(exp), 
                      condition=Group)#行名是表達(dá)矩陣的列名,只包含一列分組信息
#根據(jù)matrix生成deseq需要的輸入數(shù)據(jù),countData所需表達(dá)矩陣,design為colData的condition列
if(!file.exists(paste0(cancer_type,"_dd.Rdata"))){
  dds <- DESeqDataSetFromMatrix(
  countData = exp,
  colData = colData,
  design = ~ condition)
  dds <- DESeq(dds)
  save(dds,file = paste0(cancer_type,"_dd.Rdata"))
}#注意有設(shè)置跳過(guò)
load(file = paste0(cancer_type,"_dd.Rdata"))

# 兩兩比較
res <- results(dds, contrast = c("condition",rev(levels(Group)))) #results提取結(jié)果,提供誰(shuí)是處理誰(shuí)是對(duì)照,順序?yàn)閠umor在前normal在后,level提取水平,rev更換順序 
resOrdered <- res[order(res$pvalue),] #按照P值排序,res提取代碼,order排序
DEG <- as.data.frame(resOrdered)#變成數(shù)據(jù)框
DEG = na.omit(DEG)#如果篩選標(biāo)準(zhǔn)較低,表達(dá)量低的基因得到的logFC和pValue可能為NA,這步為去掉NA
head(DEG)
#>                      baseMean log2FoldChange     lfcSE      stat       pvalue
#> ENSG00000105607.11  2295.7526      -3.312580 0.1838483 -18.01800 1.407287e-72
#> ENSG00000052802.11  7540.8645      -3.770384 0.2190204 -17.21476 2.057882e-66
#> ENSG00000080709.13   468.7633      -5.608766 0.3347435 -16.75541 5.169907e-63
#> ENSG00000179152.17  1256.3050      -2.234469 0.1334827 -16.73976 6.725972e-63
#> ENSG00000042781.11   550.0380      -6.475292 0.3879044 -16.69301 1.473499e-62
#> ENSG00000182551.12 12197.3635      -3.543548 0.2163788 -16.37660 2.810313e-60
#>                            padj
#> ENSG00000105607.11 4.270836e-68
#> ENSG00000052802.11 3.122630e-62
#> ENSG00000080709.13 5.102995e-59
#> ENSG00000179152.17 5.102995e-59
#> ENSG00000042781.11 8.943551e-59
#> ENSG00000182551.12 1.229040e-56

#添加change列標(biāo)記基因上調(diào)下調(diào)
logFC_cutoff <- with(DEG,mean(abs(log2FoldChange)) + 2*sd(abs(log2FoldChange)) )#95%置信區(qū)間
#logFC_cutoff <- 2
k1 = (DEG$pvalue < 0.05)&(DEG$log2FoldChange < -logFC_cutoff)
k2 = (DEG$pvalue < 0.05)&(DEG$log2FoldChange > logFC_cutoff)
DEG$change = ifelse(k1,"DOWN",ifelse(k2,"UP","NOT"))
table(DEG$change)
#> 
#>  DOWN   NOT    UP 
#>   783 28724   841
head(DEG)
#>                      baseMean log2FoldChange     lfcSE      stat       pvalue
#> ENSG00000105607.11  2295.7526      -3.312580 0.1838483 -18.01800 1.407287e-72
#> ENSG00000052802.11  7540.8645      -3.770384 0.2190204 -17.21476 2.057882e-66
#> ENSG00000080709.13   468.7633      -5.608766 0.3347435 -16.75541 5.169907e-63
#> ENSG00000179152.17  1256.3050      -2.234469 0.1334827 -16.73976 6.725972e-63
#> ENSG00000042781.11   550.0380      -6.475292 0.3879044 -16.69301 1.473499e-62
#> ENSG00000182551.12 12197.3635      -3.543548 0.2163788 -16.37660 2.810313e-60
#>                            padj change
#> ENSG00000105607.11 4.270836e-68    NOT
#> ENSG00000052802.11 3.122630e-62    NOT
#> ENSG00000080709.13 5.102995e-59   DOWN
#> ENSG00000179152.17 5.102995e-59    NOT
#> ENSG00000042781.11 8.943551e-59   DOWN
#> ENSG00000182551.12 1.229040e-56    NOT

DESeq2_DEG <- DEG#為了不被后面覆蓋


#edgeR----
library(edgeR)

dge <- DGEList(counts=exp,group=Group)
dge$samples$lib.size <- colSums(dge$counts)
dge <- calcNormFactors(dge) 

design <- model.matrix(~0+Group)
rownames(design)<-colnames(dge)
colnames(design)<-levels(Group)

dge <- estimateGLMCommonDisp(dge, design)
dge <- estimateGLMTrendedDisp(dge, design)
dge <- estimateGLMTagwiseDisp(dge, design)

fit <- glmFit(dge, design)
fit2 <- glmLRT(fit, contrast=c(-1,1)) #這是結(jié)果

DEG=topTags(fit2, n=nrow(exp))
DEG=as.data.frame(DEG)
logFC_cutoff <- with(DEG,mean(abs(logFC)) + 2*sd(abs(logFC)) )

#logFC_cutoff <- 2
k1 = (DEG$PValue < 0.05)&(DEG$logFC < -logFC_cutoff)
k2 = (DEG$PValue < 0.05)&(DEG$logFC > logFC_cutoff)
DEG$change = ifelse(k1,"DOWN",ifelse(k2,"UP","NOT"))

head(DEG)
#>                        logFC   logCPM       LR       PValue          FDR change
#> ENSG00000042781.11 -6.287813 2.978966 338.9533 1.078532e-75 3.273130e-71   DOWN
#> ENSG00000080709.13 -5.388444 2.749969 305.3198 2.284579e-68 3.466619e-64   DOWN
#> ENSG00000109929.8  -3.992376 5.734121 296.8176 1.625990e-66 1.644851e-62    NOT
#> ENSG00000213398.6  -4.555608 5.835332 295.3276 3.433673e-66 2.605128e-62   DOWN
#> ENSG00000105607.11 -3.112617 5.091139 292.9179 1.150241e-65 6.981505e-62    NOT
#> ENSG00000120158.10 -3.633224 4.588093 285.4740 4.816671e-64 2.436272e-60    NOT
table(DEG$change)
#> 
#>  DOWN   NOT    UP 
#>   533 28643  1172
edgeR_DEG <- DEG


#####查看數(shù)據(jù)結(jié)果是否算反了,根據(jù)logFC -6.287813來(lái)看
as.numeric(exp["ENSG00000042781.11",])
boxplot(exp["ENSG00000042781.11",] ~ Group)#根據(jù)箱線圖看tumor低為下調(diào)基因logFC小于零


###limma----
library(limma)

design <- model.matrix(~0+Group)
colnames(design)=levels(Group)
rownames(design)=colnames(exp)

dge <- DGEList(counts=exp)
dge <- calcNormFactors(dge)

v <- voom(dge,design, normalize="quantile")
fit <- lmFit(v, design)

constrasts = paste(rev(levels(Group)),collapse = "-")
cont.matrix <- makeContrasts(contrasts=constrasts,levels = design) 
fit2=contrasts.fit(fit,cont.matrix)
fit2=eBayes(fit2)

DEG = topTable(fit2, coef=constrasts, n=Inf)
DEG = na.omit(DEG)

logFC_cutoff <- with(DEG,mean(abs(logFC)) + 2*sd(abs(logFC)) )

#logFC_cutoff <- 2
k1 = (DEG$P.Value < 0.05)&(DEG$logFC < -logFC_cutoff)
k2 = (DEG$P.Value < 0.05)&(DEG$logFC > logFC_cutoff)
DEG$change = ifelse(k1,"DOWN",ifelse(k2,"UP","NOT"))
table(DEG$change)
#> 
#>  DOWN   NOT    UP 
#>  1060 28915   373
head(DEG)
#>                        logFC    AveExpr         t      P.Value    adj.P.Val
#> ENSG00000042781.11 -6.464608 -0.1996274 -23.43702 9.460362e-28 2.871031e-23
#> ENSG00000080709.13 -5.525461  0.2993289 -21.42402 4.691172e-26 7.118385e-22
#> ENSG00000266964.4  -6.882650  0.1094543 -20.82023 1.604506e-25 1.217338e-21
#> ENSG00000253959.1  -5.171740 -5.6877937 -20.93329 1.271725e-25 1.217338e-21
#> ENSG00000251049.2  -5.861260 -5.4549397 -20.50083 3.111286e-25 1.888426e-21
#> ENSG00000204653.8  -7.427044  0.5359110 -19.20496 4.985070e-24 1.827535e-20
#>                           B change
#> ENSG00000042781.11 52.55580   DOWN
#> ENSG00000080709.13 48.85594   DOWN
#> ENSG00000266964.4  47.73131   DOWN
#> ENSG00000253959.1  46.44808   DOWN
#> ENSG00000251049.2  45.82235   DOWN
#> ENSG00000204653.8  44.45722   DOWN

limma_voom_DEG <- DEG
tj = data.frame(deseq2 = as.integer(table(DESeq2_DEG$change)),
           edgeR = as.integer(table(edgeR_DEG$change)),
           limma_voom = as.integer(table(limma_voom_DEG$change)),
           row.names = c("down","not","up")
          );tj
#>      deseq2 edgeR limma_voom
#> down    783   533       1060
#> not   28724 28643      28915
#> up      841  1172        373
save(DESeq2_DEG,edgeR_DEG,limma_voom_DEG,Group,tj,file = paste0(cancer_type,"_DEG.Rdata"))

可視化

rm(list = ls())
load("TCGA-CHOL_gdc.Rdata")
load("TCGA-CHOL_DEG.Rdata")
if(!require(tinyarray))devtools::install_local("tinyarray-master.zip",upgrade = F)#如果需要更新的話卸載重裝更快
library(ggplot2)
library(tinyarray)
exp[1:4,1:4]
#>                    TCGA-W5-AA36-01A-11R-A41I-07 TCGA-W5-AA2H-01A-31R-A41I-07
#> ENSG00000000003.13                         2504                          226
#> ENSG00000000419.11                         1272                         1146
#> ENSG00000000457.12                          504                          602
#> ENSG00000000460.15                          123                          162
#>                    TCGA-ZU-A8S4-11A-11R-A41I-07 TCGA-WD-A7RX-01A-12R-A41I-07
#> ENSG00000000003.13                         4107                         9646
#> ENSG00000000419.11                          741                         1266
#> ENSG00000000457.12                          312                         1317
#> ENSG00000000460.15                          170                          451
# cpm 去除文庫(kù)大小的影響(標(biāo)準(zhǔn)化)
dat = log2(cpm(exp)+1)
pca.plot = draw_pca(dat,Group);pca.plot
save(pca.plot,file = paste0(cancer_type,"_pcaplot.Rdata"))

cg1 = rownames(DESeq2_DEG)[DESeq2_DEG$change !="NOT"]
cg2 = rownames(edgeR_DEG)[edgeR_DEG$change !="NOT"]
cg3 = rownames(limma_voom_DEG)[limma_voom_DEG$change !="NOT"]

h1 = draw_heatmap(dat[cg1,],Group,n_cutoff = 2)#n_cutoff參數(shù)截止范圍,使對(duì)比度更鮮明
#選取n_cutoff值可使用以下代碼看取值范圍
#m = t(scale(t(dat[cg1,])))
#range(m)
#boxplot(m)

h2 = draw_heatmap(dat[cg2,],Group,n_cutoff = 2)
h3 = draw_heatmap(dat[cg3,],Group,n_cutoff = 2)

m2d = function(x){
  mean(abs(x))+2*sd(abs(x))
}#與上面閾值一樣

v1 = draw_volcano(DESeq2_DEG,pkg = 1,logFC_cutoff = m2d(DESeq2_DEG$log2FoldChange),symmetry = T)#pvalue默認(rèn)參數(shù)0.05,加參數(shù)symmetry = T 可以讓圖居中
v2 = draw_volcano(edgeR_DEG,pkg = 2,logFC_cutoff = m2d(edgeR_DEG$logFC))
v3 = draw_volcano(limma_voom_DEG,pkg = 3,logFC_cutoff = m2d(limma_voom_DEG$logFC))

library(patchwork)#只能拼ggplot2的圖,熱圖可以使用as.ggplot改變格式拼在一起
(h1 + h2 + h3) / (v1 + v2 + v3) +plot_layout(guides = 'collect') &theme(legend.position = "none")
ggsave(paste0(cancer_type,"_heat_vo.png"),width = 15,height = 10)

三大R包差異基因?qū)Ρ?/h3>
rm(list = ls())
load("TCGA-CHOL_gdc.Rdata")
load("TCGA-CHOL_DEG.Rdata")
load("TCGA-CHOL_pcaplot.Rdata")
UP=function(df){
  rownames(df)[df$change=="UP"]
}
DOWN=function(df){
  rownames(df)[df$change=="DOWN"]
}

up = intersect(intersect(UP(DESeq2_DEG),UP(edgeR_DEG)),UP(limma_voom_DEG))
down = intersect(intersect(DOWN(DESeq2_DEG),DOWN(edgeR_DEG)),DOWN(limma_voom_DEG))
dat = log2(cpm(exp)+1)
hp = draw_heatmap(dat[c(up,down),],Group,n_cutoff = 2)

#上調(diào)、下調(diào)基因分別畫(huà)維恩圖
up_genes = list(Deseq2 = UP(DESeq2_DEG),
          edgeR = UP(edgeR_DEG),
          limma = UP(limma_voom_DEG))

down_genes = list(Deseq2 = DOWN(DESeq2_DEG),
          edgeR = DOWN(edgeR_DEG),
          limma = DOWN(limma_voom_DEG))

up.plot <- draw_venn(up_genes,"UPgene")
down.plot <- draw_venn(down_genes,"DOWNgene")

#維恩圖拼圖,終于搞定

library(patchwork)
#up.plot + down.plot
# 拼圖
pca.plot + hp+up.plot +down.plot+ plot_layout(guides = "collect")
ggsave(paste0(cancer_type,"_heat_ve_pca.png"),width = 15,height = 10)
#如果熱圖按照原順序畫(huà)的沒(méi)有分組,將原矩陣按group順序重新排列再畫(huà)
m = dat[c(up,down),]
m = m[,order(Group)]
Group = sort(Group)
draw_heatmap(m,Group,n_cutoff = 2,cluster_cols=F)

#########生信技能樹(shù)課程筆記

最后編輯于
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請(qǐng)聯(lián)系作者
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時(shí)請(qǐng)結(jié)合常識(shí)與多方信息審慎甄別。
平臺(tái)聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點(diǎn),簡(jiǎn)書(shū)系信息發(fā)布平臺(tái),僅提供信息存儲(chǔ)服務(wù)。

相關(guān)閱讀更多精彩內(nèi)容

友情鏈接更多精彩內(nèi)容