喜歡這個勞斯,來看他的生信繪圖課,每一節(jié)就幾分鐘。
https://www.bilibili.com/video/BV1XJ411m73p
R 繪圖系統(tǒng)
- 基礎繪圖
2.grid - lattice
- ggplot2
【現(xiàn)有python版本python
預習
data()
點圖、餅圖、條形圖、直方圖
Rgallery
一個站點,搜集了利用R語言繪制的圖,自行瀏覽、探索,用處很大
條形圖/柱狀圖
m <- read.csv("Rdata/homo_length.csv",header = T)
class(m)
head(m)
x <- m[1:24,]
x
class(x$length)
barplot(height = x$length) #繪圖時參數(shù)從少到多增加,方便找bug
barplot(height = x$length,names.arg = x$chr)#設置條形圖標簽,畫完發(fā)現(xiàn)沒顯示全
barplot(height = x$length,names.arg = x$chr,las=3) #設置標簽方向為豎
#colours() 包含657種顏色
yanse <- sample(colours(),24,replace = F)
barplot(height = x$length,names.arg = x$chr,col = yanse)
barplot(height = x$length,names.arg = x$chr,col = rainbow(7))
# horiz =T ,條形圖橫過來,beside= T, 并列條形圖, =F,堆積的條形圖
barplot(height = x$length,names.arg = x$chr,col = rainbow(7),border = F)
barplot(height = x$length,names.arg = x$chr,col = rainbow(7),border = F,
main = "Human chromosome length distribution",xlab = "Chromosome Name",ylab = "Chromosome Length") # 設置標題、x軸,y軸
分組條形圖
x <- read.csv("Rdata/sv_distrubution.csv",header = T,row.names = 1)
head(x)
# row.names = 1 ,表示第一列作為行名
x
barplot(x)
barplot(as.matrix(x))#按照突變類型畫圖的
barplot(t(as.matrix(x)))#改成按照染色體位置
barplot(t(as.matrix(x)),col = rainbow(4))
barplot(t(as.matrix(x)),col = rainbow(4),beside = T)
barplot(t(as.matrix(x)),col = rainbow(4),legend.text = colnames(x))#barplot可直接在函數(shù)內(nèi)部添加圖例
barplot(t(as.matrix(x)),col = rainbow(4),legend.text = colnames(x),ylim = c(0,800))
barplot(t(as.matrix(x)),col = rainbow(4),legend.text = colnames(x),ylim = c(0,800),
main = "SV Distribution",xlab="Chromosome Number",ylab="SV Numbers")
直方圖
#條形圖:離散形, 直方圖: 連續(xù)型
x <- read.table("Rdata/H37Rv.gff",sep = "\t",header = F,skip = 7,quote = "")#rawdata,基因組注釋文件,給出基因組每個位置具體功能;前面7行是注釋文件,跳過;quote= "" 表示里面的字符串不用引號括起來。
x <- x[x$V3=="gene",]
x <- abs(x$V5-x$V4+1) #基因的長度;起始位置的不同,取絕對值保險一點
length(x)
range(x)#查看最長最短的基因
hist(x)
hist(x,breaks = 80)# breaks
hist(x,breaks = c(0,500,1000,1500,2000,2500,15000))
hist(x,breaks = 80,freq = F)# 根據(jù)概率密度來畫圖,而不是默認的頻數(shù)
hist(x,breaks = 80,density = T)
hist(rivers,density = T,breaks = 10)
?hist
#添加密度先
h=hist(x,nclass=80,col="pink",xlab="Gene Length (bp)",main="Histogram of Gene Length");
h
rug(x);
xfit<-seq(min(x),max(x),length=100);
yfit<-dnorm(xfit,mean=mean(x),sd=sd(x));
yfit <- yfit*diff(h$mids[1:2])*length(x);
lines(xfit, yfit, col="blue", lwd=2);
散點圖
m <- read.table("Rdata/prok_representative.txt",sep="\t");
genome_size <- m[,2];
gene_size <- m[,4];
plot(genome_size,gene_size,pch=16,
xlab="Genome Size",ylab="Genes");
#pch選項指定點的參數(shù),0-25,共26種選項。 cex 設置點的大小
fit <- lm(gene_size ~ genome_size);
summary(fit)
abline( fit,col="blue",lwd=1.8 );
rr <- round( summary(fit)$adj.r.squared,2);
intercept <- round( summary(fit)$coefficients[1],2);
slope <- round( summary(fit)$coefficients[2],2);
eq <- bquote( atop( "y = " * .(slope) * " x + " * .(intercept), R^2 == .(rr) ) )
eq
text(12,6e3,eq);
餅圖
#統(tǒng)計學家最不喜歡的圖
x <- read.csv("Rdata/homo_length.csv",header = T)
x <- x[1:24,]
barplot(height = x$length,names.arg = x$chr)
pie(x$length/sum(x$length))
#3D餅圖
m <- read.table("Rdata/Species.txt");
m
x <- m[,3]
pie(x);
pie(x,col=rainbow(length(x)))
lbls <- paste(m[,1],m[,2],"\n",m[,3],"%" )
pie(x,col=rainbow(length(x)),labels = lbls)
pie(x,col=rainbow(length(x)),labels = lbls)
pie(x,col=rainbow(length(x)),labels = lbls,radius = 1)
pie(x,col=rainbow(length(x)),labels = lbls,radius = 1,cex=0.8)
install.packages("plotrix")
BiocManager::install('plotrix')
library(plotrix)
pie3D(x,col=rainbow(length(x)),labels = lbls)
pie3D(x,col=rainbow(length(x)),labels = lbls,cex=0.8)
pieplot <- pie3D(x,col=rainbow(length(x)),radius = 1,explode = 0.1)#餅圖炸開
pie3D.labels(pieplot,labels = lbls,labelcex = 0.8,height = 0.1,labelrad = 1.75)
#扇形圖
fan.plot(x,col=rainbow(length(x)),labels = lbls,cex=0.8,radius = 1)
箱線圖
boxplot(mpg ~cyl,data = mtcars)
m <- read.csv("Rdata/gene_expression.csv",header = T,row.names = 1)
head(m)
boxplot(m)
boxplot(m,outline=F) #outline=F 是把離群點去掉
install.packages("reshape2")
library(reshape2)
x <- melt(m)
head(x)
boxplot(value ~ variable,data = x) # y~x
boxplot(value ~ variable,data = x,outline=F)
boxplot(len ~ dose, data = ToothGrowth)
boxplot(len ~ dose:supp, data = ToothGrowth)
boxplot(len ~ dose:supp, data = ToothGrowth,col = c("orange", "red"))
boxplot(len ~ dose:supp, data = ToothGrowth,col = c("orange", "red"),notch=T)#添加缺口
boxplot(len ~ dose:supp, data = ToothGrowth,col = c("orange", "red"),width=c(1,0.5,1,0.5,1,0.5))#設置每個盒子寬度
boxplot(len ~ dose:supp, data = ToothGrowth,col = c("orange", "red"),varwidth=T)
boxplot(len ~ dose:supp, data = ToothGrowth,col = c("orange", "red"),boxwex=0.5)#盒子寬度
boxplot(len ~ dose:supp, data = ToothGrowth,col = c("orange", "red"),staplewex=0.5)#最大最小值的線的寬度
boxplot(len ~ dose:supp, data = ToothGrowth,col = c("orange", "red"),sep = ":",lex.order = T)#
par()函數(shù)
#參數(shù)集,用來設置選項,而不是繪圖
opar <- par(no.readonly = T)
?par
par()
x <- par()
x$pch#點的形狀
par(pch=16)#局部設置優(yōu)先級大于全局設置;重啟R 才回復
opar <- par(no.readonly = T)# 把不是只讀的選項輸出出來
opar
par(opar)#使用默認的設置
plot(women$height,women$weight)
plot(women$height,women$weight,type = "b",pch=16,col="red",lty=2,lwd=2,
main = "Main Title",sub = "Sub Title",xlab = "Heigth",
ylab = "Weight",xlim = c(50,100),ylim = c(100,200),cex=1.5,las=3,
adj=0.3,bty="c",fg="blue",tck=-0.03,col.main="green",cex.lab=2)
#↑注意每個選項的數(shù)值類型;不斷操作
顏色
#對比強烈;不張揚;色盲友好
colour()
rgb(0,1,0) #0~255 之間的數(shù)字
#FF0000,紅色,十六進制
hsv(0,1,0)
hcl(0,1,0)
#顏色轉(zhuǎn)換
rainbow(7)
pie(1:7,col=rainbow(7))
heat.colors(7)
pie(1:7,col=heat.colors(7))
terrain.colors(7)
pie(1:7,col=terrain.colors(7))
topo.colors(7)
pie(1:7,col=topo.colors(7))
cm.colors(7)
pie(1:7,col=cm.colors(7))
gray.colors(7)
pie(1:7,col=gray.colors(7))
# RColorBrewer
display.brewer.all()
library(RColorBrewer)
display.brewer.all()
display.brewer.pal("Set1")
brewer.pal.info
yanse <- brewer.pal(name = "Set1",n=7)
pie(1:7, col = yanse)
熱圖
#1 heatmap()
#2 gplots ∞? heatmap.2()
#3 pheatmap∞? pheatmap
#4 ComplexHeatmap∞?
example("heatmap")
m <- read.csv("Rdata/heatmap.csv",header = T,row.names = 1)
class(m)
x <- as.matrix(m)
heatmap(x)
heatmap(t(x))
# 比較重要的選項 顏色
heatmap(x,col=c("green","red"))
yanse <- colorRampPalette(c("red","black","green"))(nrow(x))# 漸變色
heatmap(x,col=yanse)
heatmap(x,col=yanse,RowSideColors = yanse)
heatmap(x,col=yanse,ColSideColors = colorRampPalette(c("red","black","green"))(ncol(x)))
heatmap(x,col=yanse,Rowv = NA) # 取消按行聚類
heatmap(x,col=yanse,Rowv = NA,Colv = NA)# 取消按行、按列聚類
heatmap(state.x77)
heatmap(state.x77,scale = "col")#標準化
#install.packages("gplots")
library(gplots)
heatmap.2(x)
example(heatmap.2)
heatmap.2(x)
heatmap.2(x,key = F)#不顯示圖例
heatmap.2(x,symkey = F)# 圖例顏色不對稱
heatmap.2(x,symkey = T,density.info = "none")
heatmap.2(x,symkey = T,trace = "none")#不添加豎線
heatmap.2(x,symkey = T,tracecol = "black")#線的顏色為黑
heatmap.2(x,dendrogram = "none")
heatmap.2(x,dendrogram = "row")
heatmap.2(x,dendrogram = "col")
#install.packages("pheatmap")
library(pheatmap) #很美觀,現(xiàn)在用的多
example("pheatmap")
pheatmap(x)
annotation_col <- data.frame(CellType=factor(rep(c("N1","T1","N2","T2"),each=5)))
rownames(annotation_col) <- colnames(x)
pheatmap(x,annotation_col = annotation_col)
pheatmap(x,annotation_col = annotation_col,display_numbers = T)#熱圖上顯示數(shù)字
pheatmap(x,annotation_col = annotation_col,display_numbers = T,number_format = "%.2f") #保留兩位
pheatmap(x,annotation_col = annotation_col,display_numbers = T,number_format = "%.1f",number_color = "black")
韋恩圖
#集合的計算
#1 venneuler包中的venneuler()-- 比較強大
#2 gplots venn() -比較簡單
#3 venn包中的venn()
#4 venndiagram包中的venn.diagram函數(shù) --常用
listA <- read.csv("Rdata/genes_list_A.txt",header=FALSE)
A <- listA$V1
listB <- read.csv("Rdata/genes_list_B.txt",header=FALSE)
B <- listB$V1
listC <- read.csv("Rdata/genes_list_C.txt",header=FALSE)
C <- listC$V1
listD <- read.csv("Rdata/genes_list_D.txt",header=FALSE)
D <- listD$V1
listE <- read.csv("Rdata/genes_list_E.txt",header=FALSE)
E <- listE$V1
length(A);length(B);length(C);length(D);length(E)
library(VennDiagram)
#輸入數(shù)據(jù)是列表;不能再繪圖窗口中顯示,必須保存到文件中
venn.diagram(list(C = C, D = D),fill = c("yellow","cyan"), cex = 1.5,filename = "venn2.png")# cex 樣品的標簽放大1.5倍
venn.diagram(list(A = A, C = C, D = D), fill = c("yellow","red","cyan"), cex = 1.5,filename="venn3.png")
venn.diagram(list(A = A, B = B, C = C, D = D), fill = c("yellow","red","cyan","forestgreen"), cex = 1.5,filename="venn4.png")
venn.diagram(list(A = A, B = B, C = C, D = D , E = E ), fill = c("yellow","red","cyan","forestgreen","lightblue"), cex = 1.5,filename="venn5.png")
曼哈頓圖
#二維散點圖展示大量數(shù)據(jù),用于全基因組關聯(lián)分析GWAS中
#通過曼哈頓圖可以比較容易找到差異顯著的 snps點
install.packages("qqman")
library(qqman)
library(RColorBrewer)
str(gwasResults)
head(gwasResults)
manhattan(gwasResults)# 默認點是5 和8
manhattan(gwasResults, main = "Manhattan Plot", ylim = c(0, 6), cex = 0.6,cex.axis = 0.9, col = c("blue4", "orange3"), suggestiveline =
F, genomewideline = F,chrlabs = c(1:20, "P", "Q"))
# 太小的點,也就是p只太小,是離群點,沒有實際意義
unique(gwasResults$CHR)
number <- length(unique(gwasResults$CHR))
yanse <- brewer.pal(n = 4,name = "Set1")
manhattan(gwasResults, main = "Manhattan Plot", ylim = c(0, 6), cex = 0.6,cex.axis = 0.9, col = yanse, suggestiveline =
F, genomewideline = F,chrlabs = c(1:20, "P", "Q"))
manhattan(subset(gwasResults,CHR==3))
# 高亮顯示部分SNP結果
snpsOfInterest
manhattan(gwasResults, highlight = snpsOfInterest)
# 注釋SNP結果
manhattan(gwasResults, annotatePval = 0.001)
manhattan(gwasResults, annotatePval = 0.001, annotateTop = FALSE)
火山圖
#RNA-seq中 1.表達量的值>2, 2.校正后的p值,也就是q值<0.05 ,0.0.1
m <- read.csv("Rdata/Deseq2.csv",header = T,row.names = 1)
head(m)
m <- na.omit(m)
plot(m$log2FoldChange,m$padj)
plot(m$log2FoldChange,-1*log10(m$padj))
plot(m$log2FoldChange,-1*log10(m$padj),xlim = c(-10,10),ylim=c(0,100))
#添加顏色;沒有col的選項,將數(shù)據(jù)分成三份,加上顏色
#好神奇
m <- transform(m,padj=-1*log10(m$padj))# 對數(shù)據(jù)框的列進行修改
down <- m[m$log2FoldChange<=-1,]
up <- m[m$log2FoldChange>=1,]
no <- m[m$log2FoldChange>-1 & m$log2FoldChange <1,]
plot(no$log2FoldChange,no$padj,xlim = c(-10,10),ylim=c(0,100),col="blue",pch=16,cex=0.8,main = "Gene Expression",xlab = "log2FoldChange",ylab="-log10(Qvalue)")
#低級繪圖命令,就是在已經(jīng)繪制好的圖中加上簡單的元素,線、點、文字等
points(up$log2FoldChange,up$padj,col="red",pch=16,cex=0.8)
points(down$log2FoldChange,down$padj,col="green",pch=16,cex=0.8)
GC-depth圖
#評估基因組拼接效果;不會放在發(fā)表文章中
opar <- par(no.readonly = T)#保留原始設置
par(mfrow=c(2,3)) #按行排列,兩行三列,六個圖;平均分配空間;不平均--layout
plot(pressure)
m <- read.table("Rdata/GC-depth.txt");
head(m)
nf <- layout(matrix(c(0,2,0,0,1,3),2,3,byrow=T),c(0.5,3,1),c(1,3,0.5),TRUE);
par(mar=c(5,5,0.5,0.5));#反復調(diào)整得到
layout.show(3)
par("mar")
par(mar=c(5,5,0.5,0.5));
x <- m[,1];
y <- m[,2];
plot(x,y,xlab='GC Content(%)',ylab='Depth',pch=16,col="#FF000077",xlim=c(0,100),ylim=c(0,max(y)))
hist(x,breaks = 100)
#將hist圖保存到變量里
xhist <- hist(x,breaks=100,plot=FALSE);
yhist <- hist(y,breaks=floor(max(y)-0),plot=FALSE);
par(mar=c(0,5,1,1));
barplot(xhist$counts,space=0,xlim=c(0,100) );
par(mar=c(5,0,1,1));
barplot(yhist$counts,space=0,horiz=TRUE,ylim=c(0,max(y) ) )
#難點
#1. layout()進行布局
#2. par()中 mar 進行設置
#3.將繪圖內(nèi)容保存為變量,再從對象中選出數(shù)據(jù)繪圖
COG功能注釋
#蛋白質(zhì)直系同源數(shù)據(jù)庫,包含藻類、細菌,真核生物中21個完整基因組的編碼蛋白、系統(tǒng)進化關系分類而成
m <- read.table("Rdata/cog.class.annot.txt",head=T,sep="\t");
head(m)
layout(matrix(c(1,2),nr=1),widths=c(20,13));
layout.show(2)
par( mar=c(3,4,4,1)+0.1 );
class <- c("J","A","K","L","B","D","Y","V","T","M","N","Z","W","U","O","C","G","E","F","H","I","P","Q","R","S");
t <- factor( as.character(m$Code),levels=class )
m <- m[order(t),]
barplot(m$Gene.Number,space=F,col=rainbow(25),ylab="Number of genes",names.arg = m$Code)
l <- c(0,5,15,23,25);
id<- c("INFORMATION STORAGE\nAND PROCESSING","CELLULAR PROCESSES\nAND SIGNALING","METABOLISM","POORLY\nCHARACTERIZED")
abline( v = l[c(-1,-5)]);
for( i in 2:length(l) ){
text( (l[i-1]+l[i])/2,max(m[,3])*1.1,id[i-1],cex=0.8,xpd=T)
}
par(mar=c(2,0,2,1)+0.1 );
plot(0,0,type="n",xlim=c(0,1),ylim=c(0,26),bty="n",axes=F,xlab="",ylab="")
for( i in 1:length(class) ){
text(0,26-i+0.5,paste(m$Code[i],m$Functional.Categories[i]),pos=4,cex=1,pty=T)
}
title(main = "COG function classification");
GO功能注釋條形圖
library(ggplot2)
go <- read.csv("Rdata/go.csv",header = T)
head(go)
go_sort <- go[order(go$Ontology,-go$Percentage),]
m <- go_sort[go_sort$Ontology=="Molecular function",][1:10,]
c <- go_sort[go_sort$Ontology=="Cellular component",][1:10,]
b <- go_sort[go_sort$Ontology=="Biological process",][1:10,]
slimgo <- rbind(b,c,m)
#先將Term轉(zhuǎn)為因子
slimgo$Term=factor(slimgo$Term,levels=slimgo$Term)
colnames(slimgo)
pp <- ggplot(data = slimgo, mapping = aes(x=Term,y=Percentage,fill=Ontology))
pp
pp+geom_bar(stat="identity")#分組式,而不是堆疊式
pp+geom_bar(stat="identity")+coord_flip()
pp+geom_bar(stat="identity")+coord_flip()+scale_x_discrete(limits=rev(levels(slimgo$Term)))
pp+geom_bar(stat="identity")+coord_flip()+scale_x_discrete(limits=rev(levels(slimgo$Term)))+guides(fill=FALSE)#去掉圖例
pp+geom_bar(stat="identity")+coord_flip()+scale_x_discrete(limits=rev(levels(slimgo$Term)))+guides(fill=FALSE)+theme_bw()
kegg功能注釋圖
library(ggplot2)
pathway <- read.csv("Rdata/kegg.csv",header=T)
head(pathway)
colnames(pathway)
pp <- ggplot(data=pathway,mapping = aes(x=richFactor,y=Pathway))
pp
pp + geom_point()
pp + geom_point(aes(size=AvsB))
pp + geom_point(aes(size=AvsB,color=Qvalue))
pp + geom_point(aes(size=AvsB,color=Qvalue)) + scale_colour_gradient(low="green",high="red")
pp + geom_point(aes(size=AvsB,color=Qvalue)) + scale_colour_gradient(low="green",high="red")+labs(title="Top20 of pathway enrichment",x="Rich factor",y="Pathway Name",color="-log10(Qvalue)",size="Gene Numbers")
pp + geom_point(aes(size=AvsB,color=Qvalue)) + scale_colour_gradient(low="green",high="red")+labs(title="Top20 of pathway enrichment",x="Rich factor",y="Pathway Name",color="-log10(Qvalue)",size="Gene Numbers")+theme_bw()