超碰97精品,色AV中文字

1.導(dǎo)入數(shù)據(jù)

利用htseq計數(shù)得到的表達矩陣，下載到Windows中（再拷貝一個 .Rproj到下面文件夾中，直接雙擊R Project進入到當前路徑下的R中進行后續(xù)操作）

做R分析的文件夾

rm(list = ls())
options(stringsAsFactors = T)
library(DESeq2)
#載入數(shù)據(jù)，只有1個對照，2個實驗組（重復(fù)）
control <- read.table("SRR3589956_matrix.count",
                      sep="\t", col.names = c("gene_id","control"))
rep1 <- read.table("SRR3589957_matrix.count",
                   sep="\t", col.names = c("gene_id","rep1"))
rep2 <- read.table("SRR3589958_matrix.count",
                   sep="\t",col.names = c("gene_id","rep2"))
#將3個數(shù)據(jù)根據(jù)gene_id合并到一起
raw_count <- merge(merge(control, rep1, by="gene_id"), rep2, by="gene_id")
raw_count

raw_count

#刪除raw_count的前5行
raw_count_filt <- raw_count[-1:-5,]
#gsub匹配raw_count_filt的gene_id行，生成ENSEMBL（ENSEMBL是一個字符串格式的向量）
ENSEMBL <- gsub("(.*?)\\.\\d*?_\\d", "\\1", raw_count_filt$gene_id)
#將raw_count_filt的行名替換為ENSEMBL（猜想應(yīng)該是想要去掉raw_count_filt行名（即其gene_id）的小數(shù)點后面的數(shù)字）
row.names(raw_count_filt) <- ENSEMBL

#解決control沒有重復(fù)的問題，自己創(chuàng)造一個control2#注意：創(chuàng)造數(shù)據(jù)不是無根據(jù)捏造。要考慮到高通量測序的read是默認符合泊松分布的，于是可以這樣做：（1）計算KD重復(fù)組的均值差，作為泊松分布的均值；（2）使用概率函數(shù)rpois()隨機產(chǎn)生一個數(shù)值，前一步的均值作為lambda；（3）對一些read count 低于均值的直接加上對應(yīng)KD重復(fù)組之間的差值delta_mean <- abs(mean(raw_count_filt$rep1) - mean(raw_count_filt$rep2))sampleNum <- length(raw_count_filt$control)sampleMean <- mean(raw_count_filt$control)control2 <- integer(sampleNum)for (i in 1:sampleNum){  if(raw_count_filt$control[i] < sampleMean){    control2[i] <- raw_count_filt$control[i] + abs(raw_count_filt$rep1[i] - raw_count_filt$rep2[i])  }  else{    control2[i] <- raw_count_filt$control[i] + rpois(1,delta_mean)  }}#將創(chuàng)造好的control2添加到矩陣中去raw_count_filt$control2 <- control2

2.使用DESeq2進行差異基因分析

DESeq2要求的數(shù)據(jù)是raw count，沒必要進行FPKM/TPM/RPFKM/TMM標準化

#構(gòu)建DESeq2所需的DESeqDataSet對象library(DESeq2)countData <- raw_count_filt[,2:5]    #countData為基因表達矩陣condition <- factor(c("control","KD","KD","control"))     #condition為處理條件dds <- DESeqDataSetFromMatrix(countData, DataFrame(condition), design= ~ condition )

#過濾low_count的數(shù)據(jù)
nrow(dds)
dds <- dds[ rowSums(counts(dds)) > 1, ]
nrow(dds)  #看下圖，過濾了將近一半的數(shù)據(jù)

過濾前后基因的數(shù)量

#使用DESeq進行差異表達分析
dds <- DESeq(dds)        #出現(xiàn)如下紅色字符，表示運行成功

出現(xiàn)圖上紅色字段，表示運行成功

#查看結(jié)果
res <- results(dds)
mcols(res, use.names = TRUE)
summary(res)

res每一列表達的含義

基因差異表達的結(jié)果

可以看到上調(diào)和下調(diào)的基因占比，low_count的比率比較高，所以大部分基因表達量不高，主要集中在0附近（log2(1)=0,也就是變化1倍）。

3.畫MA圖

M代表： log fold change，能衡量基因表達量變化，上調(diào)或下調(diào)。
A代表：每個基因的count的均值。

#做一個沒有經(jīng)過 statistical moderation平緩log2 fold changes的MA圖
library(ggplot2)
#畫個MA圖
plotMA(res, ylim = c(-5,5))
#標注出MA圖中p值最小的基因
topGene <- rownames(res)[which.min(res$padj)]
with(res[topGene, ], {
  points(baseMean, log2FoldChange, col="dodgerblue", cex=2, lwd=2)
  text(baseMean, log2FoldChange, topGene, pos=2, col="dodgerblue")
})

MA圖，藍色圓圈標注的為p值最小的基因

#如果經(jīng)過lfcShrink 收縮log2 fold change， 結(jié)果會好看很多
res.shrink <- lfcShrink(dds, contrast = c("condition","KD","control"), res=res)
plotMA(res.shrink, ylim = c(-5,5))
topGene <- rownames(res)[which.min(res$padj)]
with(res[topGene, ], {
  points(baseMean, log2FoldChange, col="dodgerblue", cex=2, lwd=2)
  text(baseMean, log2FoldChange, topGene, pos=2, col="dodgerblue")
})

MA圖，藍色圓圈標注的為p值最小的基因

4.提取差異基因分析的結(jié)果

##提取差異基因分析的結(jié)果
table(res$padj<0.05)     #計算p值小于0.05的基因的個數(shù)
res_deseq <- res[order(res$padj),]   #根據(jù)res的padj值進行排序，并賦值給res_deseq
diff_gene_deseq2 <- subset(res_deseq, padj<0.05 & (log2FoldChange > 1 | log2FoldChange < -1))    #將res_deseq過濾：要求是p值小于0.05且log2FoldChange在1~-1范圍內(nèi)的

diff_gene_deseq2 <- row.names(diff_gene_deseq2)
res_diff_data <- merge(as.data.frame(res),as.data.frame(counts(dds,normalize=TRUE)),by="row.names",sort=FALSE)
write.csv(res_diff_data,file = "11_30_humen_data.csv",row.names = F)
##得到的11_30_humen_data.csv為差異表達分析的結(jié)果。

5.火山圖

#需要先將差異表達分析得到的結(jié)果11_30_humen_data.csv在Linux中進行處理
#第三列l(wèi)og2foldchange大于1為上調(diào)，小于-1下調(diào)，其他的不顯著（需要構(gòu)建significant列：up,down,no）  
perl -F',' -alne '$F[5]=~s/\r//;if(/baseMean/){print "gene\tlog2FoldChange\tpadj\tsignificant"} else{unless(/NA/){if($F[2] >=1 && $F[5]<0.05){print "$F[0]\t$F[2]\t$F[5]\tup"} elsif($F[2]<=-1 && $F[5]<0.05){print "$F[0]\t$F[2]\t$F[5]\tdown"} else{print "$F[0]\t$F[2]\t$F[5]\tno"}}}' 11_30_humen_data.csv >volcano.txt

#火山圖
library(ggplot2)
data <-read.table(file="volcano.txt",header = TRUE, row.names =1,sep = "\t")

volcano <-ggplot(data = data,aes(x=log2FoldChange,y= -1*log10(padj)))+geom_point(aes(color=significant))+scale_color_manual(values = c("red","grey","blue")) + labs(title="Volcano_Plot",x=expression((log[2](FC)), y=expression(-log[10](padj)) ))+geom_hline(yintercept=1.3,linetype=4)+geom_vline(xintercept=c(-1,1),linetype=4)
volcano

火山圖

6.gene聚類熱圖

library(genefilter)
library(pheatmap)

rld <- rlogTransformation(dds,blind = F)
#生成一個mm.DESeq2.pseudo.counts.csv矩陣
write.csv(assay(rld),file="mm.DESeq2.pseudo.counts.csv")   

#head了前20個差異較大的基因
topVarGene <- head(order(rowVars(assay(rld)),decreasing = TRUE),20) 

mat  <- assay(rld)[ topVarGene, ]
# mat <- mat - rowMeans(mat) 減去一個平均值，讓數(shù)值更加集中。得到的是第二個圖。
anno <- as.data.frame(colData(rld)[,c("condition","sizeFactor")])
pheatmap(mat, annotation_col = anno)

聚類圖之一

聚類圖之二

做了2個聚類的圖，都只做了前20個基因的聚類。第二幅圖中的基因更加的集中。

參考：轉(zhuǎn)錄組入門（7）：差異表達分析；轉(zhuǎn)錄組學習七（差異基因分析）

色偷偷精品伊人,欧洲久久精品,欧美综合婷婷骚逼,国产AV主播,国产最新探花在线,九色在线视频一区,伊人大交九欧美,1769亚洲,黄色成人av

2.1轉(zhuǎn)錄組 | 差異表達分析（DESeq）

2.1轉(zhuǎn)錄組 | 差異表達分析（DESeq）

1.導(dǎo)入數(shù)據(jù)

2.使用DESeq2進行差異基因分析

3.畫MA圖

4.提取差異基因分析的結(jié)果

5.火山圖

6.gene聚類熱圖

相關(guān)閱讀更多精彩內(nèi)容

友情鏈接更多精彩內(nèi)容

色偷偷精品伊人,欧洲久久精品,欧美综合婷婷骚逼,国产AV主播,国产最新探花在线,九色在线视频一区,伊人大交九 欧美,1769亚洲,黄色成人av

2.1轉(zhuǎn)錄組 | 差異表達分析（DESeq）

1.導(dǎo)入數(shù)據(jù)

2.使用DESeq2進行差異基因分析

3.畫MA圖

4.提取差異基因分析的結(jié)果

5.火山圖

6.gene聚類熱圖

相關(guān)閱讀更多精彩內(nèi)容

友情鏈接更多精彩內(nèi)容

色偷偷精品伊人,欧洲久久精品,欧美综合婷婷骚逼,国产AV主播,国产最新探花在线,九色在线视频一区,伊人大交九欧美,1769亚洲,黄色成人av