本次線下班涉及內(nèi)容NGS組學
RNA-seq
學徒第2周,RNA-seq數(shù)據(jù)分析實戰(zhàn)訓練 https://mubu.com/doc/38y7pmgzLg
ChIP-seq
學徒第4周,ChIP-seq數(shù)據(jù)分析實戰(zhàn)訓練 https://mubu.com/doc/11taEb9ZYg
ATAC-seq
學徒第5周-ATAC-seq數(shù)據(jù)處理實戰(zhàn) https://mubu.com/doc/2DG1mC2kdg
本次課程講義:https://mubu.com/doc/3Bd4aieYug
參考在果蠅探索PRC復合物(逆向收費讀文獻2019-18)
準備工作
提前學習以下內(nèi)容:
如果能下載就下載數(shù)據(jù),網(wǎng)絡(luò)原因我們拷貝數(shù)據(jù)練習
在Jimmy大神帶領(lǐng)下學習文章圖片復現(xiàn)
1,查看基因表達情況
rm(list = ls())
options(stringsAsFactors = F)
a=read.table('~/Downloads/10-12 tree/RNA/counts/all.counts.id.txt',header = T)
dim(a)
cg=a[a[,1]=='pho',7:16]
library(ggpubr)
library(stringr)
dat=data.frame(gene=as.numeric(cg),
sample=names(cg),
group=str_split(names(cg),'_',simplify = T)[,1]
)
ggbarplot(dat,x='sample',y='gene',
color = 'group',fill = 'group')
p <- ggbarplot(dat,x='sample',y='gene',
color = 'group',fill = 'group')
pp <- p + theme(axis.title.x=element_text(face="italic"),
axis.text.x = element_text(angle=50,vjust = 0.5))
pp
cg=a[a[,1]=='Spps',7:16]
library(ggpubr)
library(stringr)
dat=data.frame(gene=as.numeric(cg),
sample=names(cg),
group=str_split(names(cg),'_',simplify = T)[,1]
)
ggbarplot(dat,x='sample',y='gene',color = 'group',fill = 'group')
p <- ggbarplot(dat,x='sample',y='gene',
color = 'group',fill = 'group')
pp <- p + theme(axis.title.x=element_text(face="italic"),
axis.text.x = element_text(angle=50,vjust = 0.5))
pp
輸出結(jié)果如下


由于對畫圖的包不熟練,下去還需要繼續(xù)訓練。
2019-10-13日修改,曾老師講應該自行學習調(diào)整圖例盡量讓它好看一點,然后就下來學習了小潔老師的ggplot第八篇 圖片主題和格式修改
2查看相關(guān)性
rm(list = ls())
options(stringsAsFactors = F)
a=read.table('all.counts.id.txt',header = T)
dim(a)
dat=a[,7:16]
pheatmap::pheatmap(cor(dat))
library(stringr)
ac=data.frame(group=str_split(colnames(dat),'_',simplify = T)[,1])
rownames(ac)=colnames(dat)
M=cor(log(dat+1))
pheatmap::pheatmap(M,
annotation_col = ac)
pheatmap::pheatmap(scale(M),
annotation_col = ac)
cg=dat[,colnames(dat)[grepl('_1',colnames(dat))]]
library(ggpubr)
cg=log(cg+1)
pairs(cg)




3,查看差異表達
做到這一步的時候我才知道我前面做的不對,之前都沒有意識到,聽課不是特別跟得上,之前讀取數(shù)據(jù)我是把文件放在當前文件夾的,其實是可以讀取其他目錄的
rm(list = ls())
options(stringsAsFactors = F)
a=read.table('../counts/all.counts.id.txt',header = T)
dim(a)
dat=a[,7:16]
rownames(dat)=a[,1]
dat[1:4,1:4]
library(stringr)
group_list=str_split(colnames(dat),'_',simplify = T)[,1]
table(group_list)
# Firstly for DEseq2
if(T){
exprSet=dat
suppressMessages(library(DESeq2))
(colData <- data.frame(row.names=colnames(exprSet),
group_list=group_list) )
dds <- DESeqDataSetFromMatrix(countData = exprSet,
colData = colData,
design = ~ group_list)
dds <- DESeq(dds)
png("qc_dispersions.png", 1000, 1000, pointsize=20)
plotDispEsts(dds, main="Dispersion plot")
dev.off()
rld <- rlogTransformation(dds)
exprMatrix_rlog=assay(rld)
#write.csv(exprMatrix_rlog,'exprMatrix.rlog.csv' )
normalizedCounts1 <- t( t(counts(dds)) / sizeFactors(dds) )
# normalizedCounts2 <- counts(dds, normalized=T) # it's the same for the tpm value
# we also can try cpm or rpkm from edgeR pacage
exprMatrix_rpm=as.data.frame(normalizedCounts1)
head(exprMatrix_rpm)
#write.csv(exprMatrix_rpm,'exprMatrix.rpm.csv' )
png("DEseq_RAWvsNORM.png",height = 800,width = 800)
par(cex = 0.7)
n.sample=ncol(exprSet)
if(n.sample>40) par(cex = 0.5)
cols <- rainbow(n.sample*1.2)
par(mfrow=c(2,2))
boxplot(exprSet, col = cols,main="expression value",las=2)
boxplot(exprMatrix_rlog, col = cols,main="expression value",las=2)
hist(as.matrix(exprSet))
hist(exprMatrix_rlog)
dev.off()
library(RColorBrewer)
(mycols <- brewer.pal(8, "Dark2")[1:length(unique(group_list))])
cor(as.matrix(exprSet))
# Sample distance heatmap
sampleDists <- as.matrix(dist(t(exprMatrix_rlog)))
#install.packages("gplots",repos = "http://cran.us.r-project.org")
library(gplots)
png("qc-heatmap-samples.png", w=1000, h=1000, pointsize=20)
heatmap.2(as.matrix(sampleDists), key=F, trace="none",
col=colorpanel(100, "black", "white"),
ColSideColors=mycols[group_list], RowSideColors=mycols[group_list],
margin=c(10, 10), main="Sample Distance Matrix")
dev.off()
cor(exprMatrix_rlog)
table(group_list)
res <- results(dds,
contrast=c("group_list","SppsKO","WT"))
resOrdered <- res[order(res$padj),]
head(resOrdered)
DEG_SppsKO=as.data.frame(resOrdered)
DEG_SppsKO=na.omit(DEG_SppsKO)
table(group_list)
res <- results(dds,
contrast=c("group_list","PhoKO","WT"))
resOrdered <- res[order(res$padj),]
head(resOrdered)
DEG_PhoKO=as.data.frame(resOrdered)
DEG_PhoKO=na.omit(DEG_PhoKO)
save(DEG_PhoKO,DEG_SppsKO,file = 'deg_output.Rdata')
}
ac=data.frame(group=group_list)
rownames(ac)=colnames(dat)
這一步輸出結(jié)果較多:



跟前面一步的作用差不多,多方驗證,數(shù)據(jù)是可靠的。
差異分析:
rm(list = ls())
options(stringsAsFactors = F)
a=read.table('~/Downloads/10-12 tree/RNA/counts/all.counts.id.txt',header = T)
dim(a)
dat=a[,7:16]
rownames(dat)=a[,1]
dat[1:4,1:4]
library(stringr)
group_list=str_split(colnames(dat),'_',simplify = T)[,1]
table(group_list)
load(file = 'deg_output.Rdata')
library(ggpubr)
colnames(DEG_PhoKO)
DEG_PhoKO$log=log(DEG_PhoKO$baseMean+1)
DEG_PhoKO$change=ifelse(DEG_PhoKO$padj>0.05,'stable',
ifelse(DEG_PhoKO$log2FoldChange > 0,'up','down'))
table(DEG_PhoKO$change)
ggscatter(DEG_PhoKO,x="log" ,y="log2FoldChange",color = 'change')
DEG_SppsKO$log=log(DEG_SppsKO$baseMean+1)
DEG_SppsKO$change=ifelse(DEG_SppsKO$padj>0.05,'stable',
ifelse(DEG_SppsKO$log2FoldChange > 0,'up','down'))
table(DEG_SppsKO$change)
ggscatter(DEG_SppsKO,x="log" ,y="log2FoldChange",color = 'change')
library(UpSetR)
SppsKO_up=rownames(DEG_SppsKO[DEG_SppsKO$change=='up',])
SppsKO_down=rownames(DEG_SppsKO[DEG_SppsKO$change=='down',])
PhoKO_up=rownames(DEG_PhoKO[DEG_PhoKO$change=='up',])
PhoKO_down=rownames(DEG_PhoKO[DEG_PhoKO$change=='down',])
allG=unique(c(SppsKO_up,SppsKO_down,PhoKO_up,PhoKO_down))
df=data.frame(allG=allG,
SppsKO_up=as.numeric(allG %in% SppsKO_up),
SppsKO_down=as.numeric(allG %in% SppsKO_down),
PhoKO_up=as.numeric(allG %in% PhoKO_up),
PhoKO_down=as.numeric(allG %in% PhoKO_down))
upset(df)
load(file = 'deg_output.Rdata')
library(ggpubr)
DEG_PhoKO=DEG_PhoKO[rownames(DEG_SppsKO),]
po=data.frame(SppsKO=DEG_SppsKO$log2FoldChange,
PhoKO=DEG_PhoKO$log2FoldChange)
ggscatter(po,'SppsKO','PhoKO')
sp <- ggscatter(po,'SppsKO','PhoKO',
add = "reg.line", # Add regressin line
add.params = list(color = "blue", fill = "lightgray"), # Customize reg. line
conf.int = TRUE # Add confidence interval
)
# Add correlation coefficient
sp + stat_cor(method = "pearson", label.x = 3, label.y = 30)
輸出結(jié)果如下:





然后是今天的作業(yè):完成查找韋恩圖中重疊peaks基因座信息并轉(zhuǎn)換成
.bed文件根據(jù)曾老師的代碼找到了peaks所在染色體信息坐標,需要轉(zhuǎn)換成peaks所在promoter區(qū)域所對應的基因然后這一步還不會,先把前面的代碼放上來,后面再聯(lián)系一下
colnames(df)
# "SppsKO_up" "SppsKO_down" "PhoKO_up" "PhoKO_down"
SppsKO_up_uniq=df[df[,2]==1 & df[,3]==0 & df[,4]==0 & df[,5]==0 ,1]
SppsKO_down_uniq=df[df[,2]==0 & df[,3]==1 & df[,4]==0 & df[,5]==0 ,1]
PhoKO_up_uniq=df[df[,2]==0 & df[,3]==0 & df[,4]==1 & df[,5]==0 ,1]
PhoKO_down_uniq=df[df[,2]==0 & df[,3]==0 & df[,4]==0 & df[,5]==1 ,1]
SppsKO_up_PhoKO_up=df[df[,2]==1 & df[,3]==0 & df[,4]==1 & df[,5]==0 ,1]
SppsKO_down_PhoKO_down=df[df[,2]==0 & df[,3]==1 & df[,4]==0 & df[,5]==1 ,1]
library(org.Dm.eg.db)
t1=toTable(org.Dm.egCHRLOC)
t2=toTable(org.Dm.egCHRLOCEND)
t3=toTable(org.Dm.egSYMBOL)
# 作業(yè)
require(TxDb.Dmelanogaster.UCSC.dm6.ensGene )
txdb <- TxDb.Dmelanogaster.UCSC.dm6.ensGene
gs=as.data.frame(genes(txdb))
library(org.Dm.eg.db)
t100=toTable(org.Dm.egFLYBASE)
t101=toTable(org.Dm.egSYMBOL)
t200=merge(t100,t101,by='gene_id')
colnames(gs)[6]="flybase_id"
t300=merge(t200,gs,by="flybase_id")
SppsKO_up_uniq
SppsKO_up_bed=t300[match(SppsKO_up_uniq[SppsKO_up_uniq %in% t300$symbol ],
t300$symbol),4:6]
write.table(SppsKO_up_bed,sep = '\t',
col.names = F,file = 'SppsKO_up.bed'
,quote = F,row.names = F)
SppsKO_down_uniq
SppsKO_down_bed=t300[match(SppsKO_down_uniq[SppsKO_down_uniq %in% t300$symbol ],
t300$symbol),4:6]
write.table(SppsKO_down_bed,sep = '\t',
col.names = F,file = 'SppsKO_down.bed'
,quote = F,row.names = F)
4,call peaks
第一步先對得到的每個bed文件進行注釋
bedPeaksFile= 'oldBedFiles/Cg_WT.narrowPeak.bed';
bedPeaksFile
## loading packages
require(ChIPseeker)
# BiocManager::install("TxDb.Dmelanogaster.UCSC.dm3.ensGene")
# BiocManager::install("org.Dm.eg.db")
require(TxDb.Dmelanogaster.UCSC.dm3.ensGene )
txdb <- TxDb.Dmelanogaster.UCSC.dm3.ensGene
require(clusterProfiler)
peak <- readPeakFile( bedPeaksFile )
keepChr= !grepl('Het',seqlevels(peak))
seqlevels(peak, pruning.mode="coarse") <- seqlevels(peak)[keepChr]
cat(paste0('there are ',length(peak),' peaks for this data' ))
peakAnno <- annotatePeak(peak, tssRegion=c(-3000, 3000),
TxDb=txdb, annoDb="org.Dm.eg.db")
peakAnno_df <- as.data.frame(peakAnno)
sampleName=basename(strsplit(bedPeaksFile,'\\.')[[1]][1])
print(sampleName)
plotAnnoPie(peakAnno)
plotAnnoBar(peakAnno)
輸出結(jié)果如下


第二步對所有的peaks進行注釋
require(ChIPseeker)
# BiocManager::install("TxDb.Dmelanogaster.UCSC.dm3.ensGene")
# BiocManager::install("org.Dm.eg.db")
require(TxDb.Dmelanogaster.UCSC.dm3.ensGene )
txdb <- TxDb.Dmelanogaster.UCSC.dm3.ensGene
require(clusterProfiler)
bedPeaksFile = 'oldBedFiles/Cg_WT.narrowPeak.bed';
bedPeaksFile
anno_bed <- function(bedPeaksFile){
peak <- readPeakFile( bedPeaksFile )
keepChr= !grepl('Het',seqlevels(peak))
seqlevels(peak, pruning.mode="coarse") <- seqlevels(peak)[keepChr]
cat(paste0('there are ',length(peak),' peaks for this data' ))
peakAnno <- annotatePeak(peak, tssRegion=c(-3000, 3000),
TxDb=txdb, annoDb="org.Dm.eg.db")
peakAnno_df <- as.data.frame(peakAnno)
sampleName=basename(strsplit(bedPeaksFile,'\\.')[[1]][1])
return(peakAnno_df)
}
f=list.files(path = 'oldBedFiles/',pattern = 'WT',full.names = T)
tmp = lapply(f, anno_bed)
head(tmp[[1]])
df=do.call(rbind,lapply(tmp, function(x){
#table(x$annotation)
num1=length(grep('Promoter',x$annotation))
num2=length(grep("5' UTR",x$annotation))
num3=length(grep('Exon',x$annotation))
num4=length(grep('Intron',x$annotation))
num5=length(grep("3' UTR",x$annotation))
num6=length(grep('Intergenic',x$annotation))
return(c(num1,num2,num3,num4,num5,num6 ))
}))
colnames(df)=c('Promoter',"5' UTR",'Exon','Intron',"3' UTR",'Intergenic')
rownames(df)=unlist(lapply(f, function(x){
basename(strsplit(x,'\\.')[[1]][1])
}))
library(reshape2)
df2=melt(apply(df,1,function(x) x/sum(x)))
colnames(df2)=c('dis','sample','fraction')
library(ggpubr)
ggbarplot(df2, "sample", "fraction",
fill = "dis", color = "dis", palette = "jco" )

此步驟的作業(yè)是重新閱讀文獻,找到篩選條件優(yōu)化結(jié)果
5,重新找相關(guān)性
先用deeptools找到相關(guān)性表格的數(shù)據(jù)
# https://deeptools.readthedocs.io/en/develop/content/tools/multiBigwigSummary.html
# https://deeptools.readthedocs.io/en/develop/content/tools/plotCorrelation.html
multiBigwigSummary bins -b *WT*bw -o wt_results.npz -p 8
plotCorrelation -in wt_results.npz \
--corMethod spearman --skipZeros \
--plotTitle "Spearman Correlation of Read Counts" \
--whatToPlot heatmap --colorMap RdYlBu --plotNumbers \
--plotFileFormat pdf \
-o heatmap_SpearmanCorr_readCounts.pdf \
--outFileCorMatrix SpearmanCorr_readCounts.tab
multiBigwigSummary bins -b *WT*bw -o wt_merge_results.npz -p 8
plotCorrelation -in wt_merge_results.npz \
--corMethod spearman --skipZeros \
--plotTitle "Spearman Correlation of Read Counts" \
--whatToPlot heatmap --colorMap RdYlBu --plotNumbers \
--plotFileFormat pdf \
-o merge_heatmap_SpearmanCorr_readCounts.pdf \
--outFileCorMatrix merge_SpearmanCorr_readCounts.tab
這一步我還沒能運行,因為對deeptools的multiBigwigSummary函數(shù)也不是太懂,就先用曾老師跑完的數(shù)據(jù)用R重現(xiàn)相關(guān)性分析
rm(list = ls())
options(stringsAsFactors = F)
a=read.table('SpearmanCorr_readCounts.tab')
pheatmap::pheatmap(a)
library(stringr)
ac=data.frame(group=str_split(rownames(a),'_',simplify = T)[,1])
rownames(ac)=colnames(a)
M=a
pheatmap::pheatmap(M,
annotation_col = ac)
a=read.table('merge_SpearmanCorr_readCounts.tab')
pheatmap::pheatmap(a)
得到結(jié)果如下



6, ChIPQC,由于前面的步驟沒有走完,這一步也沒有完成
library(ChIPQC)
samples = read.csv(file.path(system.file("extdata", package="ChIPQC"),
"example_QCexperiment.csv"))
samples
exampleExp = ChIPQC(samples,annotaiton="hg19")
QCmetrics(exampleExp) #shows a summary of the main QC metrics
ChIPQCreport(exampleExp)
不太清楚是不是這個包不完整
報錯如下
> library(ChIPQC)
> samples = read.csv(file.path(system.file("extdata", package="ChIPQC"),
+ "example_QCexperiment.csv"))
> samples
SampleID Tissue Factor Replicate bamReads Peaks
1 CTCF_1 A549 CTCF 1 reads/SRR568129.bam peaks/SRR568129_chr22_peaks.bed
2 CTCF_2 A549 CTCF 2 reads/SRR568130.bam peaks/SRR568130_chr22_peaks.bed
3 cMYC_1 A549 cMYC 1 reads/SRR568131.bam peaks/SRR568131_chr22_peaks.bed
4 cMYC_2 A549 cMYC 2 reads/SRR568132.bam peaks/SRR568132_chr22_peaks.bed
5 E2F1_1 HeLa-S3 E2F1 1 reads/SRR502355.bam peaks/SRR502355_chr22_peaks.bed
6 E2F1_2 HeLa-S3 E2F1 2 reads/SRR502356.bam peaks/SRR502356_chr22_peaks.bed
> exampleExp = ChIPQC(samples,annotaiton="hg19")
CTCF_1 A549 CTCF 1 bed
Error in if (file.info(peaks)$size > 0) { :
missing value where TRUE/FALSE needed
需要進一步學習這個包
嘗試注釋掉這個例子繼續(xù)向下運行沒有運行成功
library(ChIPQC)
# samples = read.csv(file.path(system.file("extdata", package="ChIPQC"),
# "example_QCexperiment.csv"))
# samples
# exampleExp = ChIPQC(samples,annotaiton="hg19")
# QCmetrics(exampleExp) #shows a summary of the main QC metrics
# ChIPQCreport(exampleExp)
# SampleID: 樣本ID
# Tissue, Factor, Condition: 不同的實驗設(shè)計對照信息
# 三列信息必須包含在sampleSheet里,如果沒有某一列的信息設(shè)為NA。
# Replicate : 重復樣本的編號
# bamReads : 實驗組BAM 文件的路徑(data/bams)
# ControlID : 對照組樣本ID
# bamControl :對照組樣本的bam文件路徑
# Peaks :樣本peaks文件的路徑
# PeakCaller :peak類型的字符串,可以是raw,bed,narrow,macs等。
(bams=list.files(path = '~/Downloads/fly/CHIP-SEQ/align/',
pattern = '*.raw.bam$', full.names = T))
bams=bams[grepl('WT',bams)]
peaks=list.files(path = '~/Downloads/fly/CHIP-SEQ/align/peaks-single/',pattern = '*_raw_peaks.narrowPeak', full.names = T)
peaks=peaks[grepl('WT',peaks)]
library(stringr)
SampleID=sub('.raw.bam','',basename(bams))
Replicate=str_split(basename(bams),'_',simplify = T)[,3]
Factor=str_split(basename(bams),'_',simplify = T)[,1]
samples=data.frame(SampleID=SampleID,
Tissue='WT',
Factor=Factor,
Replicate=1,
bamReads=bams,
Peaks=peaks)
exampleExp = ChIPQC(samples,
chromosomes='2L',
annotaiton="dm6")
QCmetrics(exampleExp) #shows a summary of the main QC metrics
ChIPQCreport(exampleExp)
由于沒有運行call peaks的上游分析path = '~/Downloads/fly/CHIP-SEQ/align/peaks-single/'這個路徑中缺少上一步生成的peaks-single/文件
繼續(xù)進行下一步,因為曾老師把生成的數(shù)據(jù)傳輸給我們了
7.重新注釋
bedPeaksFile= 'Ez_SppsKO_1_raw_peaks.narrowPeak';
bedPeaksFile
## loading packages
require(ChIPseeker)
# BiocManager::install("TxDb.Dmelanogaster.UCSC.dm6.ensGene")
# BiocManager::install("org.Dm.eg.db")
require(TxDb.Dmelanogaster.UCSC.dm6.ensGene )
txdb <- TxDb.Dmelanogaster.UCSC.dm6.ensGene
require(clusterProfiler)
peak <- readPeakFile( bedPeaksFile )
seqlevels(peak)
keepChr= seqlevels(peak) %in% c('2L','2R','3L','3R','4','X','Y','M')
seqlevels(peak, pruning.mode="coarse") <- seqlevels(peak)[keepChr]
seqlevels(peak, pruning.mode="coarse") <- paste0('chr',seqlevels(peak))
seqlevels(peak)
cat(paste0('there are ',length(peak),' peaks for this data' ))
peakAnno <- annotatePeak(peak, tssRegion=c(-3000, 3000),
TxDb=txdb, annoDb="org.Dm.eg.db")
peakAnno_df <- as.data.frame(peakAnno)
sampleName=basename(strsplit(bedPeaksFile,'\\.')[[1]][1])
print(sampleName)
plotAnnoPie(peakAnno)
plotAnnoBar(peakAnno)
結(jié)果如下


require(ChIPseeker)
require(TxDb.Dmelanogaster.UCSC.dm6.ensGene )
txdb <- TxDb.Dmelanogaster.UCSC.dm6.ensGene
require(clusterProfiler)
anno_bed <- function(bedPeaksFile){
peak <- readPeakFile( bedPeaksFile )
seqlevels(peak)
keepChr= seqlevels(peak) %in% c('2L','2R','3L','3R','4','X','Y','M')
seqlevels(peak, pruning.mode="coarse") <- seqlevels(peak)[keepChr]
seqlevels(peak, pruning.mode="coarse") <- paste0('chr',seqlevels(peak))
seqlevels(peak)
cat(paste0('there are ',length(peak),' peaks for this data' ))
peakAnno <- annotatePeak(peak, tssRegion=c(-3000, 3000),
TxDb=txdb, annoDb="org.Dm.eg.db")
peakAnno_df <- as.data.frame(peakAnno)
sampleName=basename(strsplit(bedPeaksFile,'\\.')[[1]][1])
return(peakAnno_df)
}
f=list.files(path = './',pattern = 'WT',full.names = T)
f
tmp = lapply(f, anno_bed)
head(tmp[[1]])
df=do.call(rbind,lapply(tmp, function(x){
#table(x$annotation)
num1=length(grep('Promoter',x$annotation))
num2=length(grep("5' UTR",x$annotation))
num3=length(grep('Exon',x$annotation))
num4=length(grep('Intron',x$annotation))
num5=length(grep("3' UTR",x$annotation))
num6=length(grep('Intergenic',x$annotation))
return(c(num1,num2,num3,num4,num5,num6 ))
}))
colnames(df)=c('Promoter',"5' UTR",'Exon','Intron',"3' UTR",'Intergenic')
rownames(df)=unlist(lapply(f, function(x){
(strsplit(basename(x),'\\.')[[1]][1])
}))
library(reshape2)
df2=melt(apply(df,1,function(x) x/sum(x)))
colnames(df2)=c('dis','sample','fraction')
library(ggpubr)
ggbarplot(df2, "sample", "fraction",
fill = "dis", color = "dis", palette = "jco" )

8, 重新畫韋恩圖
rm(list=ls())
require(ChIPseeker)
require(TxDb.Dmelanogaster.UCSC.dm3.ensGene )
txdb <- TxDb.Dmelanogaster.UCSC.dm3.ensGene
require(clusterProfiler)
(bedFiles=list.files(pattern = '*bed'))
library(ChIPpeakAnno)
fs=lapply(bedFiles,function(x){
peak <- readPeakFile( x )
keepChr= !grepl('Het',seqlevels(peak))
seqlevels(peak, pruning.mode="coarse") <- seqlevels(peak)[keepChr]
peak
})
library(stringr)
ol <- findOverlapsOfPeaks(fs[[1]],fs[[2]],fs[[3]] )
png('3_factors_overlapVenn.png')
makeVennDiagram(ol,
NameOfPeaks=str_split(bedFiles,'_',simplify = T)[,1],
TxDb=txdb)
dev.off()

作業(yè):由于沒有好好讀文獻,之前用的參考基因組不是文章中所用的需要進一步優(yōu)化
進行基因組的坐標轉(zhuǎn)換,這一步真的有點難,一個是轉(zhuǎn)換的方式比較多,有點混淆,另一個是對果蠅的基因組不熟悉。
明天再繼續(xù)吧……