Visible Data - From the archive: 30DayChartChallenge 2022 Pictogram

This post has been rescued from a side-project blog that I’ve disposed of and slowly reposting content from. In this post I created one of my favourite charts:

This chart was built for the “Pictogram” prompt in the 2022 #30DayChartChallenge. It’s a tired trope to see a normal distribution called something like the “para-normal distribution” with a ghost, I then thought about emoji, 5-sigma and then this idea came about.

To begin I created a dotplot and was lucky enough for my seed to generate a single outlier on the right-side:

Code

library(tidyverse)

data_hist <- {
  set.seed(1)
  tibble(
    x = rnorm(500,
              mean = 100,
              sd = 8)
  )
}

sd_of_data <- sd(data_hist$x)
mean_of_data <- mean(data_hist$x)

gg_hist_dot_plot <- data_hist %>%
  ggplot(aes(x = x)) +
  geom_dotplot(binwidth = 1, method = "dotdensity", dotsize = 1, position = "dodge", binpositions="bygroup")
gg_hist_dot_plot

In case you’ve not seen it, you can extract all of the coordinates of geoms from a chart via ggplot_build(). Which I’m going to use to add the emoji in place of the dots in the chart:

I then extracted the coordinates of the dots via ggplot_build()

build_gg <- ggplot_build(gg_hist_dot_plot)

Now I use {emo} to create a tibble containing data for my emojis:

library("emo")

vec_emojis <- c("3+" = emo::ji("scream"), "3" = emo::ji("fearful"), "2" = emo::ji("confused"), "1" = emo::ji("grin"))

data_emoji_positions <- build_gg$data %>%
  as.data.frame() %>%
  as_tibble() %>%
  select(x, xmin, xmax, y, stackpos) %>%
  mutate(sds_from_mean = case_when(
    x < ( mean_of_data - 3 * sd_of_data ) ~ "3+",
    x <= ( mean_of_data - 2 * sd_of_data ) ~ "3",
    x <= ( mean_of_data - 1 * sd_of_data ) ~ "2",
    x <= ( mean_of_data + 1 * sd_of_data ) ~ "1",
    x <= ( mean_of_data + 2 * sd_of_data ) ~ "2",
    x <= ( mean_of_data + 3 * sd_of_data ) ~ "3",
    x > ( mean_of_data + 3 * sd_of_data )~ "3+"
  )) %>%
  mutate(emoji_symbol = vec_emojis[sds_from_mean])

To visualise the standard deviation components of the Normal distribution I created two utility functions:

hist_function <- function(x){dnorm(x, mean = 100, sd = 8) * 700}

geom_dnorm_fill <- function(xlim, fill_color, alpha = 1){

  geom_area(stat = "function",
            fun = hist_function,
            fill = fill_color,
            alpha = alpha,
            xlim = xlim)

}

data_emoji_positions %>%
  ggplot(aes(x, y = stackpos)) +
  geom_dnorm_fill(c(mean_of_data - 4 * sd_of_data,
                    mean_of_data + 4 * sd_of_data),
                  viridis::viridis(5)[4]) +
  geom_dnorm_fill(c(mean_of_data - 3 * sd_of_data,
                    mean_of_data + 3 * sd_of_data),
                  viridis::viridis(5)[3]) +
  geom_dnorm_fill(c(mean_of_data - 2 * sd_of_data,
                    mean_of_data + 2 * sd_of_data),
                  viridis::viridis(5)[2]) +
  geom_dnorm_fill(c(mean_of_data - 1 * sd_of_data,
                    mean_of_data + 1 * sd_of_data),
                  viridis::viridis(5)[1])

I then combined everything together into the original version of the chart in 2022:

Code

library(hrbrthemes)
library(ggtext)

gg_emoji_histogram <- data_emoji_positions %>%
  ggplot(aes(x, y = stackpos)) +
geom_dnorm_fill(c(mean_of_data - 4 * sd_of_data,
                  mean_of_data + 4 * sd_of_data),
                viridis::viridis(5)[4]) +
geom_dnorm_fill(c(mean_of_data - 3 * sd_of_data,
                  mean_of_data + 3 * sd_of_data),
                viridis::viridis(5)[3]) +
geom_dnorm_fill(c(mean_of_data - 2 * sd_of_data,
                  mean_of_data + 2 * sd_of_data),
                viridis::viridis(5)[2]) +
geom_dnorm_fill(c(mean_of_data - 1 * sd_of_data,
                  mean_of_data + 1 * sd_of_data),
                viridis::viridis(5)[1]) +
geom_richtext(data = tibble(label = str_glue("{emo::ji('grin')} represent <span style='color:{viridis::viridis(5)[1]};font-weight:bold'>68% of the data</span>",
                                             "<br>",
                                             "{emo::ji('confused')} represent <span style='color:{viridis::viridis(5)[2]};font-weight:bold'>95% of the data</span>",
                                             "<br>",
                                             "{emo::ji('fearful')} represent <span style='color:{viridis::viridis(5)[3]};font-weight:bold'>99.7% of the data</span>",
                                             "<br>",
                                             "{emo::ji('scream')} represent <span style='color:{viridis::viridis(5)[4]};font-weight:bold'>the rest of the data</span>")),
              aes(label = label),
              family = "DM Sans",
              label.padding = unit(c(0.5, 0.5, 0.5, 0.5), "lines"),
              label.margin = unit(c(0, 0, 0, 0), "lines"),
              size = 7,
              x = 110,
              y = 30,
              hjust=0) +
  geom_curve(
    data = tibble(x = 120, y = 15, xend = max(data_emoji_positions$x) - 1, yend = 1),
    aes(x, y, yend = yend, xend = xend),
    # x = 120, y = 10, xend = max(data_emoji_positions$x), yend = 1,
    # data = df,
    arrow = arrow(length = unit(0.03, "npc")),
    curvature = 0.2,
    angle = 90
  ) +
  # geom_point() +
  geom_richtext(data = tibble(x = 115,
                y = 15,
                label = "Yup, that's me. You're probably<br>wondering how I ended up in<br> this situation..."),
                aes(x, y, label = label),
                label.colour = "transparent",
                hjust = 0,
                family = "Comic Sans MS",
                label.padding = unit(c(0, 0.25, 0.25, 0.25), "lines"),
                label.margin = unit(c(0, 0, 0, 0), "lines"),
                size = 6) +
  geom_richtext(aes(label = emoji_symbol),
                size = 5,
                fill = NA,
                label.color = NA, # remove background and outline
                label.padding = grid::unit(rep(0, 4), "pt")) +
  scale_y_continuous(expand = expansion(add = c(0, 5))) +
  NULL +
  labs(title = "#30DayChartChallenge 2022-04-02 Pictogram: Emojis and Standard Deviations",
       subtitle = "With apologies to everyone I present the <i>Emoji Standard Deviation Chart</i><br>Author: @charliejhadley",
       x = "",
       y = "") +
theme_ipsum_rc(grid="") +
  theme(plot.title = element_text(family = "Arvo"),
        plot.subtitle = element_markdown(),
        axis.text.x = element_blank(),
        axis.text.y = element_blank())

gg_emoji_histogram %>%
  ggsave(quarto_here("gg_emoji_histogram.png"),
         .,
         width = 18,
         height = 10,
         bg = "white")

When I recovered this post in 2024 I remembered about {geomtextpath} for adding labels to curves and moved the title to bound the distribution:

Code

library("geomtextpath")

gg_emoji_histogram_fancy <- data_emoji_positions %>%
  ggplot(aes(x, y = stackpos)) +
geom_dnorm_fill(c(mean_of_data - 4 * sd_of_data,
                  mean_of_data + 4 * sd_of_data),
                viridis::viridis(5)[4]) +
geom_dnorm_fill(c(mean_of_data - 3 * sd_of_data,
                  mean_of_data + 3 * sd_of_data),
                viridis::viridis(5)[3]) +
geom_dnorm_fill(c(mean_of_data - 2 * sd_of_data,
                  mean_of_data + 2 * sd_of_data),
                viridis::viridis(5)[2]) +
geom_dnorm_fill(c(mean_of_data - 1 * sd_of_data,
                  mean_of_data + 1 * sd_of_data),
                viridis::viridis(5)[1]) +
  geom_richtext(data = tibble(label = str_glue("{emo::ji('grin')} represent <span style='color:{viridis::viridis(5)[1]};font-weight:bold'>68% of the data</span>",
                                             "<br>",
                                             "{emo::ji('confused')} represent <span style='color:{viridis::viridis(5)[2]};font-weight:bold'>95% of the data</span>",
                                             "<br>",
                                             "{emo::ji('fearful')} represent <span style='color:{viridis::viridis(5)[3]};font-weight:bold'>99.7% of the data</span>",
                                             "<br>",
                                             "{emo::ji('scream')} represent <span style='color:{viridis::viridis(5)[4]};font-weight:bold'>the rest of the data</span>")),
              aes(label = label),
              family = "DM Sans",
              label.padding = unit(c(0.5, 0.5, 0.5, 0.5), "lines"),
              label.margin = unit(c(0, 0, 0, 0), "lines"),
              size = 7,
              x = 110,
              y = 30,
              hjust=0) +
  geom_curve(
    data = tibble(x = 120, y = 15, xend = max(data_emoji_positions$x) - 1, yend = 1),
    aes(x, y, yend = yend, xend = xend),
    # x = 120, y = 10, xend = max(data_emoji_positions$x), yend = 1,
    # data = df,
    arrow = arrow(length = unit(0.03, "npc")),
    curvature = 0.2,
    angle = 90
  ) +
  # geom_point() +
  geom_richtext(data = tibble(x = 115,
                y = 15,
                label = "Yup, that's me. You're probably<br>wondering how I ended up in<br> this situation..."),
                aes(x, y, label = label),
                label.colour = "transparent",
                hjust = 0,
                family = "Comic Sans MS",
                label.padding = unit(c(0, 0.25, 0.25, 0.25), "lines"),
                label.margin = unit(c(0, 0, 0, 0), "lines"),
                size = 6) +
  geom_richtext(aes(label = emoji_symbol),
                data = data_emoji_positions,
                size = 5,
                fill = NA,
                label.color = NA, # remove background and outline
                label.padding = grid::unit(rep(0, 4), "pt")) +
  geom_textpath(
    # data = filter(data_emoji_positions, x <= 100),
    stat = "function", 
    vjust = -0.5,
    linewidth = 0,
    fun = function(x){ifelse(between(x, 86, 100), dnorm(x, mean = 100, sd = 8) * 700, NA)},
    label = "#30DayChartChallenge 2022",
    # family = "Comic Sans MS",
    rich = TRUE,
    size = 8
  ) +
  geom_textpath(
    # data = filter(data_emoji_positions, x <= 100),
    stat = "function", 
    vjust = -0.2,
    linewidth = 0,
    fun = function(x){ifelse(between(x, 100, 117), dnorm(x, mean = 100, sd = 8) * 700, NA)},
    label = "Pictogram: Emojis and Standard Deviations",
    family = "Comic Sans MS",
    rich = TRUE,
    size = 8
  ) +
  scale_y_continuous(expand = expansion(add = c(0, 5))) +
  labs(x = "",
       y = "",
       caption = "Author: @charliejhadley") +
theme_ipsum_rc(grid="") +
  theme(plot.title = element_text(family = "Arvo"),
        plot.subtitle = element_markdown(),
        axis.text.x = element_blank(),
        axis.text.y = element_blank(),
        plot.caption.position = "plot")

gg_emoji_histogram_fancy %>% 
  ggsave(quarto_here("gg_emoji_histogram_fancy.png"),
         .,
         width = 18,
         height = 10,
         bg = "white")

Reuse

CC BY 4.0

Citation

BibTeX citation:

@online{hadley2024,
  author = {Hadley, Charlie},
  title = {From the Archive: {30DayChartChallenge} 2022 {Pictogram}},
  date = {2024-10-21},
  url = {https://visibledata.co.uk/posts/2024-10-21_30daychartchallenge-2022-emoji-standard-deviation-chart},
  langid = {en}
}

For attribution, please cite this work as:

Hadley, Charlie. 2024. “From the Archive: 30DayChartChallenge 2022 Pictogram.” October 21, 2024. https://visibledata.co.uk/posts/2024-10-21_30daychartchallenge-2022-emoji-standard-deviation-chart.