這里是佳奧!初步注釋了peaks,并在組件進(jìn)行了比較,下一步便是用.bw文件計(jì)算相關(guān)性了。
1 ChIP-Seq分析結(jié)果:peaks相關(guān)性繪圖
第五步,ChIP-Seq-correlation。
ChIP-Seq和RNA-Seq的.bw文件都可以放在IGV進(jìn)行可視化比較,但是要說明ChIP-Seq數(shù)據(jù)的相關(guān)性就需要另外的處理,一般是deeptools。
因?yàn)镃hIP-Seq數(shù)據(jù)無法通過基因?yàn)閱挝贿M(jìn)行定量。
這個(gè)程序是以KB為長(zhǎng)度進(jìn)行定量。
multiBigwigSummary bins -b *WT*bw -o wt_results.npz -p 8
##繪制熱圖
plotCorrelation -in results.npz \
--corMethod spearman -- skipZeros \
--plotTitle "Spearman Correlation of Read Counts" \
--whatToPlot heatmap --colorMap RdYlBu --plotNumbers \
--plotFileFormat pdf \
-o heatmap_SpearmanCorr_readCounts.pdf \
--outFileCorMatrix SpearmanCorr_readCounts.tab
結(jié)果圖,不是很好看。

QQ截圖20220824155531.png
我們把結(jié)果.tab文件導(dǎo)入R進(jìn)行繪圖。
rm(list = ls())
options(stringsAsFactors = F)
a=read.table('SpearmanCorr_readCounts.tab')
pheatmap::pheatmap(a)
library(stringr)
ac=data.frame(group=str_split(rownames(a),'_',simplify = T)[,1])
rownames(ac)=colnames(a)
M=a
pheatmap::pheatmap(M,
annotation_col = ac)
a=read.table('merge_SpearmanCorr_readCounts.tab')
pheatmap::pheatmap(a)

QQ截圖20220824155915.png

QQ截圖20220824155926.png

QQ截圖20220824161141.png
2 ChIPQC包的使用
第六步, ChIPQC。
library(ChIPQC)
samples = read.csv(file.path(system.file("extdata", package="ChIPQC"),
"example_QCexperiment.csv"))
samples
##自帶的example沒有附帶數(shù)據(jù),所以運(yùn)行不了
exampleExp = ChIPQC(samples,annotaiton="hg19")
QCmetrics(exampleExp) #shows a summary of the main QC metrics
ChIPQCreport(exampleExp)
##表格需要以下內(nèi)容(.bam文件)
# SampleID: 樣本ID
# Tissue, Factor, Condition: 不同的實(shí)驗(yàn)設(shè)計(jì)對(duì)照信息
# 三列信息必須包含在sampleSheet里,如果沒有某一列的信息設(shè)為NA。
# Replicate : 重復(fù)樣本的編號(hào)
# bamReads : 實(shí)驗(yàn)組BAM 文件的路徑(data/bams)
# ControlID : 對(duì)照組樣本ID
# bamControl :對(duì)照組樣本的bam文件路徑
# Peaks :樣本peaks文件的路徑
# PeakCaller :peak類型的字符串,可以是raw,bed,narrow,macs等。
(bams=list.files(path = '~/fly/CHIP-SEQ/align/',
pattern = '*.raw.bam$', full.names = T))
bams=bams[grepl('WT',bams)]
peaks=list.files(path = '~/fly/CHIP-SEQ/peaks-single/',pattern = '*_raw_peaks.narrowPeak', full.names = T)
peaks=peaks[grepl('WT',peaks)]
library(stringr)
SampleID=sub('.raw.bam','',basename(bams))
Replicate=str_split(basename(bams),'_',simplify = T)[,3]
Factor=str_split(basename(bams),'_',simplify = T)[,1]
samples=data.frame(SampleID=SampleID,
Tissue='WT',
Factor=Factor,
Replicate=1,
bamReads=bams,
Peaks=peaks)
exampleExp = ChIPQC(samples,
chromosomes='2L',##只看了一條染色體
annotaiton="dm6")
QCmetrics(exampleExp) #shows a summary of the main QC metrics
ChIPQCreport(exampleExp)
運(yùn)行成功會(huì)出現(xiàn)一個(gè)報(bào)表。

QQ截圖20220824201538.png

QQ截圖20220824201554.png
第七步,peaks-anno-dm6,自己走的流程,ChIP-Seq的peaks進(jìn)行注釋。
##anno-for-each-bed
bedPeaksFile = 'Ez_SppsKO_1_raw_peaks.narrowPeak';
bedPeaksFile
## loading packages
require(ChIPseeker)
# BiocManager::install("TxDb.Dmelanogaster.UCSC.dm6.ensGene")
# BiocManager::install("org.Dm.eg.db")
require(TxDb.Dmelanogaster.UCSC.dm6.ensGene)##這里使用的參考基因組與文章中不一樣,dm6
txdb <- TxDb.Dmelanogaster.UCSC.dm6.ensGene
require(clusterProfiler)
peak <- readPeakFile( bedPeaksFile )
seqlevels(peak)
keepChr= seqlevels(peak) %in% c('2L','2R','3L','3R','4','X','Y','M')##過濾要看的染色體
seqlevels(peak, pruning.mode="coarse") <- seqlevels(peak)[keepChr]
seqlevels(peak, pruning.mode="coarse") <- paste0('chr',seqlevels(peak))##這一步很關(guān)鍵,TxDb的染色體帶chr開頭,而數(shù)據(jù)原本不帶chr開頭
seqlevels(peak)
cat(paste0('there are ',length(peak),' peaks for this data' ))
peakAnno <- annotatePeak(peak, tssRegion=c(-3000, 3000),
TxDb=txdb, annoDb="org.Dm.eg.db")
peakAnno_df <- as.data.frame(peakAnno)
sampleName=basename(strsplit(bedPeaksFile,'\\.')[[1]][1])
print(sampleName)
plotAnnoPie(peakAnno)
plotAnnoBar(peakAnno)

QQ截圖20220824201033.png

QQ截圖20220824201041.png
##anno-for-all-peaks
require(ChIPseeker)
require(TxDb.Dmelanogaster.UCSC.dm6.ensGene)
txdb <- TxDb.Dmelanogaster.UCSC.dm6.ensGene
require(clusterProfiler)
anno_bed <- function(bedPeaksFile){
peak <- readPeakFile( bedPeaksFile )
seqlevels(peak)
keepChr= seqlevels(peak) %in% c('2L','2R','3L','3R','4','X','Y','M')
seqlevels(peak, pruning.mode="coarse") <- seqlevels(peak)[keepChr]
seqlevels(peak, pruning.mode="coarse") <- paste0('chr',seqlevels(peak))
seqlevels(peak)
cat(paste0('there are ',length(peak),' peaks for this data' ))
peakAnno <- annotatePeak(peak, tssRegion=c(-3000, 3000),
TxDb=txdb, annoDb="org.Dm.eg.db")
peakAnno_df <- as.data.frame(peakAnno)
sampleName=basename(strsplit(bedPeaksFile,'\\.')[[1]][1])
return(peakAnno_df)
}
f=list.files(path = './',pattern = 'WT',full.names = T)
> f
[1] "./Pc_WT_peaks.narrowPeak" "./Ph_WT_peaks.narrowPeak"
[3] "./Pho_WT_peaks.narrowPeak" "./Psc_WT_peaks.narrowPeak"
[5] "./Spps_WT_peaks.narrowPeak"
tmp = lapply(f, anno_bed)
head(tmp[[1]])
df=do.call(rbind,lapply(tmp, function(x){
#table(x$annotation)
num1=length(grep('Promoter',x$annotation))
num2=length(grep("5' UTR",x$annotation))
num3=length(grep('Exon',x$annotation))
num4=length(grep('Intron',x$annotation))
num5=length(grep("3' UTR",x$annotation))
num6=length(grep('Intergenic',x$annotation))
return(c(num1,num2,num3,num4,num5,num6 ))
}))
colnames(df)=c('Promoter',"5' UTR",'Exon','Intron',"3' UTR",'Intergenic')
rownames(df)=unlist(lapply(f, function(x){
(strsplit(basename(x),'\\.')[[1]][1])##這里的順序很重要,先對(duì)x路徑名取basename,再strsplit
}))
library(reshape2)
df2=melt(apply(df,1,function(x) x/sum(x)))
colnames(df2)=c('dis','sample','fraction')
library(ggpubr)
ggbarplot(df2, "sample", "fraction",
fill = "dis", color = "dis", palette = "jco" )

QQ截圖20220824203918.png
主要思路就是注釋。
下一篇我們繪制韋恩圖。
我們下一篇再見!