Citation
@online{okola2025,
author = {Okola, Basil},
title = {Publication Ready Visualization in {R}},
date = {2025-10-29},
url = {https://bokola.github.io/posts/2025-10-29-pub-ready-viz/},
langid = {en}
}
October 29, 2025
@online{okola2025,
author = {Okola, Basil},
title = {Publication Ready Visualization in {R}},
date = {2025-10-29},
url = {https://bokola.github.io/posts/2025-10-29-pub-ready-viz/},
langid = {en}
}
We use following packages for this example
ipk = function(pkg){
new.pkg = list.pkg[!(list.pkg %in% installed.packages()[, "Package"])]
# if("practicalgg" %in% list.pkg){
# remotes::install_github("wilkelab/practicalgg")
# }
if(length(new.pkg)) install.packages(new.pkg, dependencies = T)
sapply(pkg, require, character.only = T)
}
list.pkg = c("tidyverse", "ggforce", "cowplot", "remotes", "tinter", "ggrepel")
#ggforce for geom_arc_bar()
#cowplot for theme_map()
ipk(list.pkg)We plot a simple pie chart
It is a general preference to plot on a cartesian coordinates, but this
would require the use of
geom_arc_bar() from ggforce. This
requires a little more data preparation up front but gives much more
predictable results on the back end.
bund_pie = bundestag %>%
arrange(seats) %>%
mutate(end_angle = 2*pi*cumsum(seats)/sum(seats), # ending angle for each pie slice
start_angle = lag(end_angle, default = 0), # starting angle for each pie slice
mid_angle = 0.5*(start_angle + end_angle), # middle of each pie slice, for the text label
# horizontal and vertical justifications depend on whether we're to the left/right
# or top/bottom of the pie
hjust = ifelse(mid_angle > pi, 1, 0),
vjust = ifelse(mid_angle < pi/2 | mid_angle > 3*pi/2, 0, 1)
)
bund_pie## # A tibble: 3 × 8
## party seats colors end_angle start_angle mid_angle hjust vjust
## <chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 FDP 39 #E7D739 0.494 0 0.247 0 0
## 2 SPD 214 #B6494A 3.20 0.494 1.85 0 1
## 3 CDU/CSU 243 #4E4E4E 6.28 3.20 4.74 1 0
# radius of of the pie and radius for outside and inside labels
rpie = 1
rlabel_out = 1.05*rpie
rlabel_in = 0.6*rpie
ggplot(bund_pie) +
geom_arc_bar(
aes(x0 = 0, y0 = 0, r0 = 0, r = rpie,
start = start_angle, end = end_angle, fill = party)
) +
coord_fixed()
Next we add labels representing the number of seats for each party
ggplot(bund_pie) +
geom_arc_bar(
aes(
x0 = 0, y0 = 0,
r0 = 0, r = rpie,
start = start_angle, end = end_angle, fill = party
)
) +
geom_text(
aes(
x = rlabel_in * sin(mid_angle),
y = rlabel_in * cos(mid_angle),
label = seats
),
size = 14/.pt # use 14 pt font size
) +
coord_fixed()
Then provide labels for the parties outside of the pie
ggplot(bund_pie) +
geom_arc_bar(
aes(
x0 = 0, y0 = 0, r0 = 0, r = rpie,
start = start_angle, end = end_angle, fill = party
)
) +
geom_text(
aes(
x = rlabel_in * sin(mid_angle),
y = rlabel_in*cos(mid_angle),
label = seats
),
size = 14/.pt
) +
geom_text(
aes(
x = rlabel_out*sin(mid_angle),
y = rlabel_out*cos(mid_angle),
label = party,
hjust = hjust, vjust = vjust
),
size = 14/.pt
) +
coord_fixed()
This plot shows how using cartesian coordinate system is helpful. We can
see exactly where elements lie and how we need to extend the limits to
fully show all the labels. The CDU/CSU label remains partially obsecured
at this point, but this will be fixed later as we remove the legend and
axis labels, resulting in slightly more space for the pie chart itself
as well as the labels. Next we change the pie colors. The dataset
provides appropriate party colors, and we use those directly with
scale_fill_identity(). Note that this scale eliminates the
legend. We don’t need a legend anyways, because we have direct labeled
the pie slices.
ggplot(bund_pie) +
geom_arc_bar(
aes(
x0 = 0, y0 = 0, r0 = 0, r = rpie,
start = start_angle, end = end_angle, fill = colors
)
) +
geom_text(
aes(
x = rlabel_in * sin(mid_angle),
y = rlabel_in * cos(mid_angle),
label = seats
),
size = 14/.pt
) +
geom_text(
aes(
x = rlabel_out * sin(mid_angle),
y = rlabel_out * cos(mid_angle),
label = party,
hjust = hjust, vjust = vjust
),
size = 14/.pt
) +
scale_x_continuous(
name = NULL,
limits = c(-1.5, 1.4),
expand = c(0, 0)
) +
scale_y_continuous(
name = NULL,
limits = c(-1.05, 1.15),
expand = c(0, 0)
) +
scale_fill_identity() +
coord_fixed()
The black colour for the text labels doesn’t work well on top of the
dark fill colours, and the black outline also looks overbearing, so
we’ll change those colours to white.
ggplot(bund_pie) +
geom_arc_bar(
aes(
x0 = 0, y0 = 0, r0 = 0, r = rpie,
start = start_angle, end = end_angle, fill = colors
),
color = 'white'
) +
geom_text(
aes(
x = rlabel_in * sin(mid_angle),
y = rlabel_in * cos(mid_angle),
label = seats
),
size = 14/.pt,
color = c("black", "white", "white")
) +
geom_text(
aes(
x = rlabel_out * sin(mid_angle),
y = rlabel_out * cos(mid_angle),
label = party,
hjust = hjust, vjust = vjust
),
size = 14/.pt
) +
scale_x_continuous(
name = NULL,
limits = c(-1.5, 1.4),
expand = c(0, 0)
) +
scale_y_continuous(
name = NULL,
limits = c(-1.05, 1.15),
expand = c(0, 0)
) +
scale_fill_identity() +
coord_fixed()
Finally, we apply a theme that removes the background grid and axes
ggplot(bund_pie) +
geom_arc_bar(
aes(
x0 = 0, y0 = 0, r0 = 0, r = rpie,
start = start_angle, end = end_angle, fill = colors
),
color = "white"
) +
geom_text(
aes(
x = rlabel_in * sin(mid_angle),
y = rlabel_in * cos(mid_angle),
label = seats
),
size = 14/.pt,
color = c("black", "white", "white")
) +
geom_text(
aes(
x = rlabel_out * sin(mid_angle),
y = rlabel_out * cos(mid_angle),
label = party,
hjust = hjust, vjust = vjust
),
size = 14/.pt
) +
scale_x_continuous(
name = NULL,
limits = c(-1.5, 1.4),
expand = c(0, 0)
) +
scale_y_continuous(
name = NULL,
limits = c(-1.05, 1.15),
expand = c(0, 0)
) +
scale_fill_identity() +
coord_fixed() +
theme_map()
# 2. Scatter plot with smoothing line
The data is provided as practicalgg::corruption. Let’s
look at it in a table form and in basic scatter plot form.
## Rows: 163
## Columns: 6
## $ country <chr> "Denmark", "New Zealand", "Finland", "Sweden", "Switzerland", …
## $ region <chr> "Europe and Central Asia", "Asia Pacific", "Europe and Central…
## $ year <dbl> 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 20…
## $ cpi <dbl> 91, 91, 90, 89, 86, 88, 85, 84, 83, 81, 85, 81, 79, 79, 77, 75…
## $ iso3c <chr> "DNK", "NZL", "FIN", "SWE", "CHE", "NOR", "SGP", "NLD", "CAN",…
## $ hdi <dbl> 0.925, 0.915, 0.895, 0.913, 0.939, 0.949, 0.925, 0.924, 0.920,…
Basic styling: point colors and theme
region_cols <- c("#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#999999")
ggplot(corruption, aes(cpi, hdi)) +
geom_point(
aes(color = region, fill = region),
size = 2.5, alpha = 0.5, shape = 21
) +
scale_color_manual(
values = darken(region_cols, 0.3)
) +
scale_fill_manual(
values = region_cols
) +
theme_minimal_hgrid(12, rel_small = 1) # font size 12 pt throughout
Add smoothing line
ggplot(corruption, aes(cpi, hdi)) +
geom_smooth(
aes(color = "y ~ log(x)", fill = "y ~ log(x)"),
method = "lm", formula = y ~ log(x), se = FALSE, fullrange = TRUE
) +
geom_point(
aes(color = region, fill = region),
size = 2.5, alpha = 0.5, shape = 21
) +
scale_color_manual(
values = darken(region_cols, 0.3)
) +
scale_fill_manual(
values = region_cols
) +
theme_minimal_hgrid(12, rel_small = 1)
Set the scale name for color and fill scale, to force merging of
guides
ggplot(corruption, aes(cpi, hdi)) +
geom_smooth(
aes(color = "y ~ log(x)", fill = "y ~ log(x)"),
method = "lm", formula = y ~ log(x), se = FALSE, fullrange = TRUE
) +
geom_point(
aes(color = region, fill = region),
size = 2.5, alpha = 0.5, shape = 21
) +
scale_color_manual(
name = NULL,
values = darken(region_cols, 0.3)
) +
scale_fill_manual(
name = NULL,
values = region_cols
) +
theme_minimal_hgrid(12, rel_small = 1)
Override legend aesthetics
ggplot(corruption, aes(cpi, hdi)) +
geom_smooth(
aes(color = "y ~ log(x)", fill = "y ~ log(x)"),
method = "lm", formula = y ~ log(x), se = FALSE, fullrange = TRUE
) +
geom_point(
aes(color = region, fill = region),
size = 2.5, alpha = 0.5, shape = 21
) +
scale_color_manual(
name = NULL,
values = darken(region_cols, 0.3)
) +
scale_fill_manual(
name = NULL,
values = region_cols
) +
guides(
color = guide_legend(
override.aes = list(
linetype = c(rep(0, 5), 1),
shape = c(rep(21, 5), NA)
)
)
) +
theme_minimal_hgrid(12, rel_small = 1)
Set x and y scales, move legend on top
ggplot(corruption, aes(cpi, hdi)) +
geom_smooth(
aes(color = "y ~ log(x)", fill = "y ~ log(x)"),
method = "lm", formula = y ~ log(x), se = FALSE, fullrange = TRUE
) +
geom_point(
aes(color = region, fill = region),
size = 2.5, alpha = 0.5, shape = 21
) +
scale_color_manual(
name = NULL,
values = darken(region_cols, 0.3)
) +
scale_fill_manual(
name = NULL,
values = region_cols
) +
scale_x_continuous(
name = "Corruption Perceptions Index, 2015 (100 = least corrupt)",
limits = c(10, 95),
breaks = c(20, 40, 60, 80, 100),
expand = c(0, 0)
) +
scale_y_continuous(
name = "Human Development Index, 2015\n(1.0 = most developed)",
limits = c(0.3, 1.05),
breaks = c(0.2, 0.4, 0.6, 0.8, 1.0),
expand = c(0, 0)
) +
guides(
color = guide_legend(
override.aes = list(
linetype = c(rep(0, 5), 1),
shape = c(rep(21, 5), NA)
)
)
) +
theme_minimal_hgrid(12, rel_small = 1) +
theme(
legend.position = "top",
legend.justification = "right",
legend.text = element_text(size = 9),
legend.box.spacing = unit(0, "pt")
)
Reformat legend into a single row
corruption <- corruption %>%
mutate(region = case_when(
region == "Middle East and North Africa" ~ "Middle East\nand North Africa",
region == "Europe and Central Asia" ~ "Europe and\nCentral Asia",
region == "Sub Saharan Africa" ~ " Sub-Saharan\nAfrica",
TRUE ~ region)
)
ggplot(corruption, aes(cpi, hdi)) +
geom_smooth(
aes(color = "y ~ log(x)", fill = "y ~ log(x)"),
method = "lm", formula = y ~ log(x), se = FALSE, fullrange = TRUE
) +
geom_point(
aes(color = region, fill = region),
size = 2.5, alpha = 0.5, shape = 21
) +
scale_color_manual(
name = NULL,
values = darken(region_cols, 0.3)
) +
scale_fill_manual(
name = NULL,
values = region_cols
) +
scale_x_continuous(
name = "Corruption Perceptions Index, 2015 (100 = least corrupt)",
limits = c(10, 95),
breaks = c(20, 40, 60, 80, 100),
expand = c(0,0)
) +
scale_y_continuous(
name = "Human Development Index, 2015\n(1.0 = most developed)",
limits = c(0.3, 1.05),
breaks = c(0.2, 0.4, 0.6, 0.8, 1.0),
expand = c(0, 0)
) +
guides(
color = guide_legend(
nrow = 1,
override.aes = list(
linetype = c(rep(0, 5), 1),
shape = c(rep(21, 5), NA)
)
)
) +
theme_minimal_hgrid(12, rel_small = 1) +
theme(
legend.position = "top",
legend.justification = "right",
legend.text = element_text(size = 9),
legend.box.spacing = unit(0, "pt")
)
Highlight select countries
country_highlight <- c("Germany", "Norway", "United States", "Greece", "Singapore", "Rwanda", "Russia", "Venezuela", "Sudan", "Iraq", "Ghana", "Niger", "Chad", "Kuwait", "Qatar", "Myanmar", "Nepal", "Chile", "Argentina", "Japan", "China")
corruption <- corruption %>%
mutate(
label = ifelse(country %in% country_highlight, country, "")
)
ggplot(corruption, aes(cpi, hdi)) +
geom_smooth(
aes(color = "y ~ log(x)", fill = "y ~ log(x)"),
method = 'lm', formula = y~log(x), se = FALSE, fullrange = TRUE
) +
geom_point(
aes(color = region, fill = region),
size = 2.5, alpha = 0.5, shape = 21
) +
geom_text_repel(
aes(label = label),
color = "black",
size = 9/.pt, # font size 9 pt
point.padding = 0.1,
box.padding = .6,
min.segment.length = 0,
seed = 7654
) +
scale_color_manual(
name = NULL,
values = darken(region_cols, 0.3)
) +
scale_fill_manual(
name = NULL,
values = region_cols
) +
scale_x_continuous(
name = "Corruption Perceptions Index, 2015 (100 = least corrupt)",
limits = c(10, 95),
breaks = c(20, 40, 60, 80, 100),
expand = c(0, 0)
) +
scale_y_continuous(
name = "Human Development Index, 2015\n(1.0 = most developed)",
limits = c(0.3, 1.05),
breaks = c(0.2, 0.4, 0.6, 0.8, 1.0),
expand = c(0, 0)
) +
guides(
color = guide_legend(
nrow = 1,
override.aes = list(
linetype = c(rep(0, 5), 1),
shape = c(rep(21, 5), NA)
)
)
) +
theme_minimal_hgrid(12, rel_small = 1) +
theme(
legend.position = "top",
legend.justification = "right",
legend.text = element_text(size = 9),
legend.box.spacing = unit(0, "pt")
)## Warning: ggrepel: 5 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
# 3 Health Status by age For this example we use the following
packages
The dataset is provided as practicalgg::happy. Let’s
look at its table form and basic density plot.
data_health = happy %>%
dplyr::select(age, health) %>%
na.omit() %>%
mutate(health = fct_rev(health)) # revert factor order
data_health## # A tibble: 38,361 × 2
## age health
## <dbl> <fct>
## 1 23 good
## 2 70 fair
## 3 48 excellent
## 4 27 good
## 5 61 good
## 6 26 good
## 7 28 excellent
## 8 27 good
## 9 21 excellent
## 10 30 fair
## # ℹ 38,351 more rows
ggplot(data_health, aes(x = age, y = stat(count))) +
geom_density(fill = "lightblue") +
facet_wrap(~health, nrow = 1)## Warning: `stat(count)` was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Add the overall distribution as a background.
ggplot(data_health, aes(x = age, y = stat(count))) +
geom_density(
data = select(data_health, -health),
aes(fill = "all people surveyed")
) +
geom_density(aes(fill = "highlighted group")) +
facet_wrap(~health, nrow = 1)
Define the scales
ggplot(data_health, aes(x = age, y = stat(count))) +
geom_density(
data = select(data_health, -health),
aes(fill = "all people surveyed")
) +
geom_density(aes(fill = "highlighted group")) +
scale_x_continuous(
name = "age (years)",
limits = c(15, 98),
expand = c(0, 0)
) +
scale_y_continuous(
name = "count",
expand = c(0, 0)
) +
scale_fill_manual(
values = c("#b3b3b3a0", "#2b8cbed0"),
name = NULL,
guide = guide_legend(direction = "horizontal")
) +
facet_wrap(~health, nrow = 1)Basic theme; move legend to bottom; remove outline around densities.
ggplot(data_health, aes(x = age, y = stat(count))) +
geom_density(
data = select(data_health, -health),
aes(fill = "all people surveyed"),
color = NA
) +
geom_density(aes(fill = "highligthed group"), color = NA) +
scale_x_continuous(
name = "age (years)",
limits = c(15, 98),
expand = c(0, 0)
) +
scale_y_continuous(
name = "count",
expand = c(0, 0)
) +
scale_fill_manual(
values = c("#b3b3b3a0", "#2b8cbed0"),
name = NULL,
guide = guide_legend(direction = "horizontal")
) +
facet_wrap(~health, nrow = 1) +
theme_minimal_hgrid(12) +
theme(
legend.position = "bottom",
legend.justification = "right"
)
Theme tweaks, Larger strip labels, move legend closer to plot, adjust
horizontal legend spacing.
ggplot(data_health, aes(x = age, y = stat(count))) +
geom_density(
data = select(data_health, -health),
aes(fill = "all people surveyed"),
color = NA
) +
geom_density(aes(fill = "highlighted group"), color = NA) +
scale_x_continuous(
name = "age (years)",
limits = c(15, 98),
expand = c(0, 0)
) +
scale_y_continuous(
name = "count",
expand = c(0, 0)
) +
scale_fill_manual(
values = c("#b3b3b3a0", "#2b8cbed0"),
name = NULL,
guide = guide_legend(direction = "horizontal")
) +
facet_wrap(~health, nrow = 1) +
theme_minimal_hgrid(12) +
theme(
strip.text = element_text(size = 12, margin = margin(0, 0, 6, 0, "pt")),
legend.position = "bottom",
legend.justification = "right",
legend.margin = margin(6, 0, 1.5, 0, "pt"),
legend.spacing.x = grid::unit(3, "pt"),
legend.spacing.y = grid::unit(0, "pt"),
legend.box.spacing = grid::unit(0, "pt")
)Remove axis line, add spacing between legend items.
ggplot(data_health, aes(x = age, y = stat(count))) +
geom_density(
data = select(data_health, -health),
# a simple workaround to a limitation in ggplot2:
# add a few spaces at the end of the legend text
# to space out the legend items
aes(fill = "all people surveyed "),
color = NA
) +
geom_density(aes(fill = "highlighted group"), color = NA) +
scale_x_continuous(
name = "age (years)",
limits = c(15, 98),
expand = c(0, 0)
) +
scale_y_continuous(
name = "count",
expand = c(0, 0)
) +
scale_fill_manual(
values = c("#b3b3b3a0", "#2b8cbed0"),
name = NULL,
guide = guide_legend(direction = "horizontal")
) +
facet_wrap(~health, nrow = 1) +
theme_minimal_hgrid(12) +
theme(
axis.line = element_blank(),
strip.text = element_text(size = 12, margin = margin(0, 0, 6, 0, "pt")),
legend.position = "bottom",
legend.justification = "right",
legend.margin = margin(6, 0, 1.5, 0, "pt"),
legend.spacing.x = grid::unit(3, "pt"),
legend.spacing.y = grid::unit(0, "pt"),
legend.box.spacing = grid::unit(0, "pt")
)Turn off clipping
ggplot(data_health, aes(x = age, y = stat(count))) +
geom_density(
data = select(data_health, -health),
aes(fill = "all people surveyed "),
color = NA
) +
geom_density(aes(fill = "highlighted group"), color = NA) +
scale_x_continuous(
name = "age (years)",
limits = c(15, 98),
expand = c(0, 0)
) +
scale_y_continuous(
name = "count",
expand = c(0, 0)
) +
scale_fill_manual(
values = c("#b3b3b3a0", "#2b8cbed0"),
name = NULL,
guide = guide_legend(direction = "horizontal")
) +
facet_wrap(~health, nrow = 1) +
coord_cartesian(clip = "off") +
theme_minimal_hgrid(12) +
theme(
axis.line = element_blank(),
strip.text = element_text(size = 12, margin = margin(0, 0, 6, 0, "pt")),
legend.position = "bottom",
legend.justification = "right",
legend.margin = margin(6, 0, 1.5, 0, "pt"),
legend.spacing.x = grid::unit(3, "pt"),
legend.spacing.y = grid::unit(0, "pt"),
legend.box.spacing = grid::unit(0, "pt")
)