總結(jié)之前經(jīng)常用到的
ggplot2::geom_bar()繪制柱狀圖的用法。
- 1、基礎(chǔ)用法;
- 2、
position=參數(shù):調(diào)整兩種分組的展示方式; - 3、
stat=參數(shù):設(shè)置頻數(shù)統(tǒng)計(jì)方式; - 4、
geom_text()添加頻數(shù)注釋; - 5、雙向柱狀圖;
- 6、組內(nèi)排序;
- 7、柱狀圖+誤差棒
1、基礎(chǔ)用法
library(ggplot2)
library(patchwork)
library(carData) #示例數(shù)據(jù)
head(Salaries) #教職工資情況
# rank discipline yrs.since.phd yrs.service sex salary
# 1 Prof B 19 18 Male 139750
# 2 Prof B 20 16 Male 173200
# 3 AsstProf B 4 3 Male 79750
# 4 Prof B 45 39 Male 115000
# 5 Prof B 40 41 Male 141500
# 6 AssocProf B 6 6 Male 97000
table(Salaries$rank)
# AsstProf AssocProf Prof
# 67 64 266
table(Salaries$rank, Salaries$sex)
# Female Male
# AsstProf 11 56
# AssocProf 10 54
# Prof 18 248
p1 = ggplot(Salaries, aes(x=rank)) +
geom_bar()
p2 = ggplot(Salaries, aes(x=rank)) +
geom_bar() + scale_y_continuous(expand=c(0,0))
p3 =ggplot(Salaries, aes(x=rank, fill=rank)) +
geom_bar()
p1 + p2 + p3

2、position=參數(shù):調(diào)整兩種分組的展示方式
- Default:
position=stack
p1 <- ggplot(Salaries, aes(x=rank, fill=sex)) +
geom_bar(position="stack") + labs(title='position="stack"')
p2 <- ggplot(Salaries, aes(x=rank, fill=sex)) +
geom_bar(position="dodge") + labs(title='position="dodge"')
p3 <- ggplot(Salaries, aes(x=rank, fill=sex)) +
geom_bar(position="fill") + labs(title='position="fill"')
p1 + p2 + p3 + plot_layout(guides = 'collect')

3、stat=參數(shù):設(shè)置頻數(shù)統(tǒng)計(jì)方式
- Default:
stat="count"表示從給定的數(shù)據(jù)里,統(tǒng)計(jì)每個(gè)類別出現(xiàn)的次數(shù);此時(shí)aes()只需要給定x參數(shù)即可; -
stat="identity"表示直接指定每種類別的頻數(shù);此時(shí)aes()除了需要給定x參數(shù)交代類別,還需要指定y參數(shù)表示頻數(shù)值。
library(tidyverse)
dat = Salaries %>% group_by(rank) %>%
dplyr::summarise(n=n()) %>% as.data.frame()
dat
# rank n
# 1 AsstProf 67
# 2 AssocProf 64
# 3 Prof 266
p1 = ggplot(dat, aes(x=rank, y=n, fill=rank)) +
geom_bar(stat = "identity")
dat = Salaries %>% group_by(rank,sex) %>%
dplyr::summarise(n=n()) %>% as.data.frame()
dat
# rank sex n
# 1 AsstProf Female 11
# 2 AsstProf Male 56
# 3 AssocProf Female 10
# 4 AssocProf Male 54
# 5 Prof Female 18
# 6 Prof Male 248
p2 = ggplot(dat, aes(x=rank, y=n, fill=sex)) +
geom_bar(stat = "identity")
p1 + p2

4、geom_text()添加頻數(shù)注釋
- 相對更適合于
stat = "identity"的數(shù)據(jù)類型;如果不是,轉(zhuǎn)換一下即可
dat = Salaries %>% group_by(rank) %>%
dplyr::summarise(n=n())
p1=ggplot(dat, aes(x=rank, y=n)) +
geom_bar(stat="identity") +
geom_text(aes(label=n), vjust = -0.2)
# vjust<0,上移;vjust>0,下移
dat = Salaries %>% group_by(rank,sex) %>%
dplyr::summarise(n=n())
p2=ggplot(dat, aes(x=rank, y=n, fill=sex)) +
geom_bar(stat="identity", position = "dodge") +
geom_text(aes(label=n), vjust = -0.2,
position=position_dodge(width=0.9))
p1 + p2

5、雙向柱狀圖
dat = Salaries %>% group_by(rank,sex) %>%
dplyr::summarise(n=n())
dat_m = dat %>%
filter(sex=="Male") %>%
mutate(lab=n) %>% as.data.frame()
# rank sex n lab
# 1 AsstProf Male 56 56
# 2 AssocProf Male 54 54
# 3 Prof Male 248 248
dat_f = dat %>%
filter(sex=="Female") %>%
mutate(lab=-1*n) %>% as.data.frame() #注意要取相反值
# rank sex n lab
# 1 AsstProf Female 11 -11
# 2 AssocProf Female 10 -10
# 3 Prof Female 18 -18
ggplot() +
geom_bar(data=dat_m, aes(x=rank, y=lab, fill=sex),
stat = "identity", position = 'dodge') +
geom_text(data=dat_m, aes(x=rank, y=lab, label=n, vjust=-0.25)) +
geom_bar(data=dat_f, aes(x=rank, y=lab, fill=sex),
stat = "identity", position = 'dodge') +
geom_text(data=dat_f, aes(x=rank, y=lab, label=n, vjust=1.25)) +
scale_y_continuous(breaks=c(200, 100, 0, -20),
labels=c("200", "100", "0","20")) +
scale_fill_manual(values=c("#0072B5","#BC3C28"))

image.png
6、組內(nèi)排序
- 如果只有一種分組方式,調(diào)整柱子順序通過設(shè)置類別的因子水平即可。
- 但如果更復(fù)雜的情況--組內(nèi)排序。舉個(gè)例子:5個(gè)學(xué)生的三門課程成績,按照每門學(xué)科分組,將5個(gè)學(xué)生按照成績從低到高排序(或者從高到低排序)。
grade = data.frame(
subject=rep(c("Chineses","Math","English"), each=5),
name=rep(c("A","B","C","D","E"),3),
score=c(79,65,70,94,82,76,87,80,81,89,88,79,82,95,90))
# subject name score
# 1 Chineses A 79
# 2 Chineses B 65
# 3 Chineses C 70
# 4 Chineses D 94
# 5 Chineses E 82
# 6 Math A 76
# 7 Math B 87
# 8 Math C 80
# 9 Math D 81
# 10 Math E 89
# 11 English A 88
# 12 English B 79
# 13 English C 82
# 14 English D 95
# 15 English E 90
# 先按學(xué)科均分從高到低
# 然后每個(gè)學(xué)科內(nèi),成績從低到高學(xué)生排序
grade$subject=fct_reorder(grade$subject, grade$score, .desc=T)
library(tidytext)
p1 = ggplot(grade, aes(x=reorder_within(name,score,subject), y=score, fill=name)) +
geom_bar(stat = "identity") +
scale_x_reordered() +
facet_wrap(subject~. ,scales = "free_x")
# 先按學(xué)科均分從低到高
# 然后每個(gè)學(xué)科內(nèi),成績從高到低學(xué)生排序
grade$subject=fct_reorder(grade$subject, grade$score, .desc=F)
library(tidytext)
p2 = ggplot(grade, aes(x=reorder_within(name,-score,subject), y=score, fill=name)) +
geom_bar(stat = "identity") +
scale_x_reordered() +
facet_wrap(subject~. ,scales = "free_x")
p1 + p2 + plot_layout(guides = 'collect')
- 注意
reorder_within(個(gè)體,值,分組),還需要設(shè)置scale_x_reordered() , facet_wrap(variable~. ,scales = "free_x")
7、柱狀圖+誤差棒
- 分組的離散型變量可以用帶誤差棒的柱狀圖可視化。其中柱子的高度表示均值,誤差棒表示波動(dòng)水平的sd值
#首先定義可以計(jì)算分組離散變量的均值與sd值的函數(shù)
data_summary <- function(data, varname, groupnames){
require(plyr)
summary_func <- function(x, col){
c(mean = mean(x[[col]], na.rm=TRUE),
sd = sd(x[[col]], na.rm=TRUE))
}
data_sum<-ddply(data, groupnames, .fun=summary_func,
varname)
data_sum <- rename(data_sum, c("mean" = varname))
return(data_sum)
}
head(grade)
# subject name score
# 1 Chineses A 79
# 2 Chineses B 65
# 3 Chineses C 70
# 4 Chineses D 50
# 5 Chineses E 82
# 6 Math A 76
df1 = data_summary(grade, varname="score",
groupnames=c("subject"))
# subject score sd
# 1 Chineses 69.2 12.716131
# 2 Math 84.4 8.561542
# 3 English 85.8 5.019960
p1=ggplot(df1, aes(x=subject, y=score)) +
geom_bar(stat="identity", color="black") +
geom_errorbar(aes(ymin=score-sd, ymax=score+sd), width=.2) #雙向
p2=ggplot(df1, aes(x=subject, y=score)) +
geom_bar(stat="identity", color="black") +
geom_errorbar(aes(ymin=score, ymax=score+sd), width=.2) #單向
p1 + p2

df2 <- data_summary(Salaries, varname="salary",
groupnames=c("rank", "sex"))
# rank sex salary sd
# 1 AsstProf Female 78049.91 9371.996
# 2 AsstProf Male 81311.46 7901.343
# 3 AssocProf Female 88512.80 17965.286
# 4 AssocProf Male 94869.70 12890.817
# 5 Prof Female 121967.61 19619.583
# 6 Prof Male 127120.82 28213.808
ggplot(df2, aes(x=rank, y=salary, fill=sex)) +
geom_bar(stat="identity", color="black",
position=position_dodge()) +
geom_errorbar(aes(ymin=salary-sd, ymax=salary+sd), width=.2,
position=position_dodge(0.9)) +
theme_classic() +
scale_fill_manual(values=c('#999999','#E69F00'))

