前言
ggplot2 包含很多繪制線條的函數(shù):大致可分為如下幾類:
連接線:折線(
geom_line)、路徑線(geom_path)、階梯線(geom_step)參考線:水平線(
geom_hline)、豎直線(geom_vline)、斜線(geom_abline)線段和曲線:
geom_segment、geom_spoke、geom_curve函數(shù)曲線:
geom_function、stat_function
示例
1. 連接線
主要有三種連接線:
-
geom_path:按照它們?cè)跀?shù)據(jù)中出現(xiàn)的順序連接起來 -
geom_line:按 x 軸上變量的順序連接起來 -
geom_step:創(chuàng)建一個(gè)階梯圖,突出顯示數(shù)據(jù)的變化
常用參數(shù):
-
linetype:線條類型 -
size:線條大小 -
lineend:線端點(diǎn)樣式:round,butt,square -
linejoin:線連接點(diǎn)樣式:round,mitre,bevel -
arrow:使用grid::arrow()函數(shù)設(shè)置箭頭樣式
繪制一條簡(jiǎn)單的時(shí)間序列折線
ggplot(economics, aes(date, unemploy)) + geom_line()

繪制多條折線
economics_long %>%
subset(variable %in% c("uempmed", "unemploy")) %>%
ggplot(aes(date, value01, colour = variable)) +
geom_line()

翻轉(zhuǎn)線條
ggplot(economics, aes(unemploy, date)) + geom_line(orientation = "y")

如果我們更加關(guān)注 y 值的變化情況,可以使用 geom_step 繪制階梯圖
recent <- economics[economics$date > as.Date("2013-01-01"), ]
p1 <- ggplot(recent, aes(date, unemploy)) + geom_line()
p2 <- ggplot(recent, aes(date, unemploy)) + geom_step()
plot_grid(p1, p2)

而 geom_path 可以讓你探索兩個(gè)變量是如何隨著時(shí)間的推移而發(fā)生變化的
例如,失業(yè)率和個(gè)人儲(chǔ)蓄率隨時(shí)間的關(guān)系
esamp <- sample_n(economics, 10)
m <- ggplot(esamp, aes(unemploy/pop, psavert))
p1 <- m + geom_path()
p2 <- m + geom_path(aes(colour = as.numeric(date)))
plot_grid(p1, p2)

設(shè)置箭頭
c <- ggplot(economics, aes(x = date, y = pop))
c1 <- c + geom_line(arrow = arrow())
c2 <- c + geom_line(
arrow = arrow(angle = 15, ends = "both", type = "closed")
)
plot_grid(c1, c2)

更改連接線及端點(diǎn)樣式
base <- tibble(x = 1:3, y = c(4, 1, 9)) %>%
ggplot(aes(x, y))
b1 <- base + geom_path(size = 8)
b2 <- base + geom_path(size = 8, lineend = "round")
b3 <- base + geom_path(size = 8, lineend = "round", colour = "red")
b4 <- base + geom_path(size = 8, linejoin = "mitre", lineend = "butt")
plot_grid(b1, b2, b3, b4)

當(dāng)線條的中間有 NA 值時(shí),則會(huì)有一個(gè)斷點(diǎn)
df <- data.frame(x = 1:5, y = c(1, 2, NA, 4, 5))
ggplot(df, aes(x, y)) + geom_point() + geom_line()

設(shè)置線條類型
economics_long %>%
subset(variable %in% c("uempmed", "unemploy")) %>%
ggplot(aes(date, value01, colour = variable)) +
geom_line(aes(linetype = factor(variable))) +
scale_linetype_manual("variable", values = c(5, 3))

注意:無法同時(shí)設(shè)置漸變色與線條類型,下面的代碼將會(huì)報(bào)錯(cuò)
economics_long %>%
subset(variable %in% c("uempmed", "unemploy")) %>%
ggplot(aes(date, value01, group = variable)) +
geom_line(aes(colour = value01), linetype = 2)
2. 參考線
為圖形添加參考線對(duì)圖形的注釋非常有用,主要有水平、豎直和對(duì)角線三種參考線,對(duì)應(yīng)于三個(gè)函數(shù):
-
geom_hline:yintercept(y軸截距) -
geom_vline:xintercept(x軸截距) -
geom_abline:slope(斜率) 和intercept(截距)
p <- ggplot(mtcars, aes(wt, mpg)) + geom_point()
# 使用固定值
p1 <- p + geom_vline(xintercept = 5)
# 使用向量
p2 <- p + geom_vline(xintercept = 1:5)
# 水平線
p3 <- p + geom_hline(yintercept = 20)
# 斜線
p4 <- p + geom_abline(intercept = 31, slope = -5)
plot_grid(p1, p2, p3, p4)

計(jì)算擬合曲線的截距和斜率,然后繪制直線
> coef(lm(mpg ~ wt, data = mtcars))
(Intercept) wt
37.285126 -5.344472
> p + geom_abline(intercept = 37, slope = -5)

更簡(jiǎn)單的方式是使用 geom_smooth 繪制擬合直線
p + geom_smooth(method = "lm", se = FALSE)

在繪制分面圖形的時(shí)候,可以為不同的分面繪制不同的直線
p <- ggplot(mtcars, aes(mpg, wt)) +
geom_point() +
facet_wrap(~ cyl)
mean_wt <- data.frame(cyl = c(4, 6, 8), wt = c(2.28, 3.11, 4.00))
p + geom_hline(aes(yintercept = wt), mean_wt)

也可以添加其他屬性
ggplot(mtcars, aes(mpg, wt, colour = wt)) +
geom_point() +
geom_hline(aes(yintercept = wt, colour = wt), mean_wt) +
facet_wrap(~ cyl)

3. 線段和曲線
geom_segment 用于繪制兩個(gè)點(diǎn)之間的直線,geom_curve 用于繪制兩點(diǎn)的曲線。
兩個(gè)點(diǎn)通過四個(gè)參數(shù) (x, y) 和 (xend, yend) 指定坐標(biāo)。
例如,在散點(diǎn)圖中標(biāo)注兩點(diǎn)之間的連接線
b <- ggplot(mtcars, aes(wt, mpg)) +
geom_point()
df <- data.frame(x1 = 2.320, x2 = 3.520, y1 = 22.8, y2 = 15.5)
b +
geom_curve(aes(x = x1, y = y1, xend = x2, yend = y2, colour = "curve"), data = df) +
geom_segment(aes(x = x1, y = y1, xend = x2, yend = y2, colour = "segment"), data = df)

設(shè)置不同的曲率
b1 <- b + geom_curve(aes(x = x1, y = y1, xend = x2, yend = y2), data = df, curvature = -0.2)
b2 <- b + geom_curve(aes(x = x1, y = y1, xend = x2, yend = y2), data = df, curvature = 0.9)
plot_grid(b1, b2)

添加箭頭
b + geom_curve(
aes(x = x1, y = y1, xend = x2, yend = y2),
data = df,
arrow = arrow(length = unit(0.05, "npc"))
)

使用 geom_segment 通過設(shè)置線段大小來繪制直方圖
counts <- as.data.frame(table(x = rpois(100,5)))
counts$x <- as.numeric(as.character(counts$x))
ggplot(counts, aes(x, Freq)) +
geom_segment(aes(xend = x, yend = 0), size = 10, lineend = "butt")

而 geom_spoke 是由坐標(biāo)點(diǎn) (x, y) 以及角度 (angle) 和 半徑 (radius) 指定的線段
df <- expand.grid(x = 1:10, y=1:10)
df$angle <- runif(100, 0, 2*pi)
df$speed <- runif(100, 0, sqrt(0.1 * df$x))
ggplot(df, aes(x, y)) +
geom_point() +
geom_spoke(aes(angle = angle), radius = 0.5)

看起來像是散落的大頭針一樣
設(shè)置可變的半徑
ggplot(df, aes(x, y)) +
geom_point() +
geom_spoke(aes(angle = angle, radius = speed))

4. 函數(shù)曲線
使用 geom_function 或 stat_function 可以繪制指定函數(shù)的曲線,例如
set.seed(2021)
ggplot(data.frame(x = rnorm(100)), aes(x)) +
geom_density() +
geom_function(fun = dnorm, colour = "red")

繪制了函數(shù)在數(shù)據(jù)范圍內(nèi)的曲線
也可以只指定范圍,來繪制無數(shù)據(jù)的函數(shù)曲線
base <- ggplot() + xlim(-5, 5)
base + geom_function(fun = dnorm)

設(shè)置函數(shù)的參數(shù)值
base + geom_function(fun = dnorm, args = list(mean = 2, sd = .5))

其底層原理是在一些離散點(diǎn)上執(zhí)行函數(shù),然后用線將各函數(shù)值連接起來
b1 <- base + stat_function(fun = dnorm, geom = "point")
b2 <- base + stat_function(fun = dnorm, geom = "point", n = 20)
plot_grid(b1, b2)

下面兩行代碼效果是一樣的
b1 <- base + geom_function(fun = dnorm, n = 20)
b2 <- base + stat_function(fun = dnorm, geom = "line", n = 20)
plot_grid(b1, b2)

自定義函數(shù)
# 一張圖繪制不同的函數(shù)
p1 <- base +
geom_function(aes(colour = "normal"), fun = dnorm) +
geom_function(aes(colour = "t, df = 1"), fun = dt, args = list(df = 1))
# 使用匿名函數(shù)
p2 <- base + geom_function(fun = function(x) 0.5*exp(-abs(x)))
# 同上
p3 <- base + geom_function(fun = ~ 0.5*exp(-abs(.x)))
# 使用自定義函數(shù),效果同上
f <- function(x) 0.5*exp(-abs(x))
p4 <- base + geom_function(fun = f)
plot_grid(p1, p2, p3, p4)

樣式圖
1. 路線圖
sample_n(mtcars, 10) %>%
ggplot(aes(mpg, disp)) +
geom_point(colour = "#69b3a2", na.rm = TRUE) +
geom_segment(aes(xend = c(tail(mpg, n=-1), NA),
yend = c(tail(disp, n=-1), NA)),
arrow = arrow(length=unit(0.3,"cm")),
colour = "#69b3a2") +
geom_text(aes(label = disp), hjust = 1.2) +
theme_bw()

2. 坡度圖
library(ggrepel)
mpg %>%
group_by(year, manufacturer) %>%
summarise(value = sum(displ)) %>%
pivot_wider(names_from = year, values_from = value) %>%
mutate(class = if_else((`1999` - `2008`) > 0, "#8dd3c7", "#bebada")) %>%
ggplot() +
geom_segment(aes(x = 1, xend = 2, y = `1999`, yend = `2008`, colour = class),
size = .75, show.legend = FALSE) +
geom_vline(xintercept = 1, linetype = "solid", size = 1, colour = "#ff7f00") +
geom_vline(xintercept = 2, linetype = "solid", size = 1, colour = "#1f78b4") +
geom_point(aes(x = 1, y = `1999`), size = 3, shape = 21, fill = "green") +
geom_point(aes(x = 2, y = `2008`), size = 3, shape = 21, fill = "red") +
scale_colour_manual(labels = c("Up", "Down"), values = c("#8dd3c7", "#bebada")) +
xlim(.5, 2.5) +
geom_text_repel(aes(x = 1, y = `1999`, label = `1999`),
hjust = "left", size = 3.5) +
geom_text_repel(aes(x = 2, y = `2008`, label = `2008`),
hjust = "right", size = 3.5) +
geom_text(aes(y = 1.03*max(max(`1999`), max(`2008`))), label = "1999", x = 1,
size = 5, hjust = 1.2) +
geom_text(aes(y = 1.03*max(max(`1999`), max(`2008`))), label = "2008", x = 2,
size = 5, hjust = -.2) +
theme_void()

在這個(gè)例子中,由于點(diǎn)有重疊的現(xiàn)象,導(dǎo)致標(biāo)簽也會(huì)重疊在一起。
所以我們使用了 ggplot2 的擴(kuò)展包 ggrepel 的 geom_text_repel 來繪制不重疊標(biāo)簽。