【表觀調(diào)控 實(shí)戰(zhàn)】六、peaks相關(guān)性繪圖與ChIPQC包使用

這里是佳奧!初步注釋了peaks,并在組件進(jìn)行了比較,下一步便是用.bw文件計(jì)算相關(guān)性了。

1 ChIP-Seq分析結(jié)果:peaks相關(guān)性繪圖

第五步,ChIP-Seq-correlation。

ChIP-Seq和RNA-Seq的.bw文件都可以放在IGV進(jìn)行可視化比較,但是要說明ChIP-Seq數(shù)據(jù)的相關(guān)性就需要另外的處理,一般是deeptools。

因?yàn)镃hIP-Seq數(shù)據(jù)無法通過基因?yàn)閱挝贿M(jìn)行定量。

這個(gè)程序是以KB為長(zhǎng)度進(jìn)行定量。

multiBigwigSummary  bins -b  *WT*bw  -o wt_results.npz -p 8

##繪制熱圖
plotCorrelation -in results.npz \
--corMethod spearman -- skipZeros \
--plotTitle "Spearman Correlation of Read Counts" \
--whatToPlot heatmap --colorMap RdYlBu --plotNumbers \
--plotFileFormat pdf \
-o heatmap_SpearmanCorr_readCounts.pdf \
--outFileCorMatrix SpearmanCorr_readCounts.tab

結(jié)果圖,不是很好看。


QQ截圖20220824155531.png

我們把結(jié)果.tab文件導(dǎo)入R進(jìn)行繪圖。

rm(list = ls())
options(stringsAsFactors = F)
a=read.table('SpearmanCorr_readCounts.tab')
pheatmap::pheatmap(a)
library(stringr)
ac=data.frame(group=str_split(rownames(a),'_',simplify = T)[,1])
rownames(ac)=colnames(a)
M=a
pheatmap::pheatmap(M,
                   annotation_col = ac) 
 

a=read.table('merge_SpearmanCorr_readCounts.tab')
pheatmap::pheatmap(a)
QQ截圖20220824155915.png
QQ截圖20220824155926.png
QQ截圖20220824161141.png

2 ChIPQC包的使用

第六步, ChIPQC。

library(ChIPQC)
samples = read.csv(file.path(system.file("extdata", package="ChIPQC"),
                             "example_QCexperiment.csv"))
samples

##自帶的example沒有附帶數(shù)據(jù),所以運(yùn)行不了
exampleExp = ChIPQC(samples,annotaiton="hg19")
QCmetrics(exampleExp)  #shows a summary of the main QC metrics
ChIPQCreport(exampleExp)

##表格需要以下內(nèi)容(.bam文件)

# SampleID: 樣本ID
# Tissue, Factor, Condition: 不同的實(shí)驗(yàn)設(shè)計(jì)對(duì)照信息
# 三列信息必須包含在sampleSheet里,如果沒有某一列的信息設(shè)為NA。
# Replicate : 重復(fù)樣本的編號(hào)
# bamReads : 實(shí)驗(yàn)組BAM 文件的路徑(data/bams)
# ControlID : 對(duì)照組樣本ID
# bamControl :對(duì)照組樣本的bam文件路徑
# Peaks :樣本peaks文件的路徑
# PeakCaller :peak類型的字符串,可以是raw,bed,narrow,macs等。

(bams=list.files(path = '~/fly/CHIP-SEQ/align/',
                 pattern = '*.raw.bam$', full.names = T))
bams=bams[grepl('WT',bams)]
peaks=list.files(path = '~/fly/CHIP-SEQ/peaks-single/',pattern = '*_raw_peaks.narrowPeak', full.names = T)
peaks=peaks[grepl('WT',peaks)]


library(stringr)
SampleID=sub('.raw.bam','',basename(bams))
Replicate=str_split(basename(bams),'_',simplify = T)[,3]
Factor=str_split(basename(bams),'_',simplify = T)[,1]


samples=data.frame(SampleID=SampleID,
                   Tissue='WT', 
                   Factor=Factor, 
                   Replicate=1,            
                   bamReads=bams,                           
                   Peaks=peaks) 

exampleExp = ChIPQC(samples,
                    chromosomes='2L',##只看了一條染色體
                    annotaiton="dm6")
QCmetrics(exampleExp)  #shows a summary of the main QC metrics
ChIPQCreport(exampleExp)

運(yùn)行成功會(huì)出現(xiàn)一個(gè)報(bào)表。


QQ截圖20220824201538.png

QQ截圖20220824201554.png

第七步,peaks-anno-dm6,自己走的流程,ChIP-Seq的peaks進(jìn)行注釋。

##anno-for-each-bed

bedPeaksFile        = 'Ez_SppsKO_1_raw_peaks.narrowPeak'; 
bedPeaksFile
## loading packages
require(ChIPseeker)
# BiocManager::install("TxDb.Dmelanogaster.UCSC.dm6.ensGene")
# BiocManager::install("org.Dm.eg.db")
require(TxDb.Dmelanogaster.UCSC.dm6.ensGene)##這里使用的參考基因組與文章中不一樣,dm6
txdb <- TxDb.Dmelanogaster.UCSC.dm6.ensGene
require(clusterProfiler) 
peak <- readPeakFile( bedPeaksFile ) 
seqlevels(peak)
keepChr= seqlevels(peak) %in% c('2L','2R','3L','3R','4','X','Y','M')##過濾要看的染色體
seqlevels(peak, pruning.mode="coarse") <- seqlevels(peak)[keepChr]
seqlevels(peak, pruning.mode="coarse") <- paste0('chr',seqlevels(peak))##這一步很關(guān)鍵,TxDb的染色體帶chr開頭,而數(shù)據(jù)原本不帶chr開頭
seqlevels(peak)
cat(paste0('there are ',length(peak),' peaks for this data' ))
peakAnno <- annotatePeak(peak, tssRegion=c(-3000, 3000), 
                         TxDb=txdb, annoDb="org.Dm.eg.db") 
peakAnno_df <- as.data.frame(peakAnno)
sampleName=basename(strsplit(bedPeaksFile,'\\.')[[1]][1])
print(sampleName)
plotAnnoPie(peakAnno) 
plotAnnoBar(peakAnno)
QQ截圖20220824201033.png
QQ截圖20220824201041.png
##anno-for-all-peaks

require(ChIPseeker) 
require(TxDb.Dmelanogaster.UCSC.dm6.ensGene)
txdb <- TxDb.Dmelanogaster.UCSC.dm6.ensGene
require(clusterProfiler)  
anno_bed <- function(bedPeaksFile){
  peak <- readPeakFile( bedPeaksFile )  
  seqlevels(peak)
  keepChr= seqlevels(peak) %in% c('2L','2R','3L','3R','4','X','Y','M')
  seqlevels(peak, pruning.mode="coarse") <- seqlevels(peak)[keepChr]
  seqlevels(peak, pruning.mode="coarse") <- paste0('chr',seqlevels(peak))
  seqlevels(peak)
  cat(paste0('there are ',length(peak),' peaks for this data' ))
  peakAnno <- annotatePeak(peak, tssRegion=c(-3000, 3000), 
                           TxDb=txdb, annoDb="org.Dm.eg.db") 
  peakAnno_df <- as.data.frame(peakAnno)
  sampleName=basename(strsplit(bedPeaksFile,'\\.')[[1]][1])
  return(peakAnno_df)
}

f=list.files(path = './',pattern = 'WT',full.names = T)

> f
[1] "./Pc_WT_peaks.narrowPeak"   "./Ph_WT_peaks.narrowPeak"  
[3] "./Pho_WT_peaks.narrowPeak"  "./Psc_WT_peaks.narrowPeak" 
[5] "./Spps_WT_peaks.narrowPeak"

tmp = lapply(f, anno_bed)
head(tmp[[1]])
df=do.call(rbind,lapply(tmp, function(x){
  #table(x$annotation)
  num1=length(grep('Promoter',x$annotation))
  num2=length(grep("5' UTR",x$annotation))
  num3=length(grep('Exon',x$annotation))
  num4=length(grep('Intron',x$annotation))
  num5=length(grep("3' UTR",x$annotation))
  num6=length(grep('Intergenic',x$annotation))
  return(c(num1,num2,num3,num4,num5,num6 ))
}))

colnames(df)=c('Promoter',"5' UTR",'Exon','Intron',"3' UTR",'Intergenic')
rownames(df)=unlist(lapply(f, function(x){ 
  (strsplit(basename(x),'\\.')[[1]][1])##這里的順序很重要,先對(duì)x路徑名取basename,再strsplit
}))
library(reshape2)
df2=melt(apply(df,1,function(x) x/sum(x)))
colnames(df2)=c('dis','sample','fraction')
library(ggpubr)
ggbarplot(df2, "sample", "fraction",
          fill = "dis", color = "dis", palette = "jco"  )
QQ截圖20220824203918.png

主要思路就是注釋。

下一篇我們繪制韋恩圖。

我們下一篇再見!

?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請(qǐng)聯(lián)系作者
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時(shí)請(qǐng)結(jié)合常識(shí)與多方信息審慎甄別。
平臺(tái)聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點(diǎn),簡(jiǎn)書系信息發(fā)布平臺(tái),僅提供信息存儲(chǔ)服務(wù)。

相關(guān)閱讀更多精彩內(nèi)容

友情鏈接更多精彩內(nèi)容