Scatterplot with R

Cedric Vidonne

Lei Chen

Scatterplot with R

A scatterplot is a type of visualization using Cartesian Coordinates to display two variables for a set of data. The data are displayed as a collection of dots. The position of each dot on the horizontal and vertical axis indicates the values for an individual data point.

More about: Scatterplot


Scatterplot

# Loading required packages
library(unhcrthemes)
library(tidyverse)
library(scales)
library(ggrepel)

# Loading data
df <- read_csv("https://raw.githubusercontent.com/GDS-ODSSS/unhcr-dataviz-platform/master/data/correlation/scatterplot.csv")

# Plot
ggplot(
  df,
  aes(
    x = refugee_number,
    y = idp_number
  )
) +
  geom_point(
    color = unhcr_pal(n = 1, "pal_blue"),
    size = 2.5
  ) +
  geom_text_repel(aes(label = region),
    size = 8 / .pt
  ) +
  labs(
    title = "Comparison of refugee and IDP population by region | 2021",
    y = "Number of IDPs",
    x = "Number of refugees",
    caption = "Source: UNHCR Refugee Data Finder\n© UNHCR, The UN Refugee Agency"
  ) +
  scale_x_continuous(labels = label_number_si()) +
  scale_y_continuous(
    labels = label_number_si(),
    breaks = pretty_breaks(n = 6)
  ) +
  theme_unhcr(
    grid = "XY",
    axis = FALSE,
    axis_title = "xy"
  )

A scatterplot showing comparison of refugee and IDP population by region | 2021


Scatterplot with colours

# Loading required packages
library(unhcrthemes)
library(tidyverse)
library(scales)
library(ggrepel)

# Loading data
df <- read_csv("https://raw.githubusercontent.com/GDS-ODSSS/unhcr-dataviz-platform/master/data/correlation/scatterplot.csv")

# Order regions for visualization
df$region <- factor(df$region,
  levels = c("East and Horn of Africa and Great Lakes", "Southern Africa", "West and Central Africa", "Americas", "Asia and the Pacific", "Europe", "Middle East and North Africa")
)

# Plot
ggplot(
  df,
  aes(
    x = refugee_number,
    y = idp_number
  )
) +
  geom_point(aes(color = region),
    size = 2.5
  ) +
  geom_text_repel(aes(label = region),
    size = 8 / .pt
  ) +
  labs(
    title = "Comparison of refugee and IDP population by region | 2021",
    y = "Number of IDPs",
    x = "Number of refugees",
    caption = "Source: UNHCR Refugee Data Finder\n© UNHCR, The UN Refugee Agency"
  ) +
  scale_x_continuous(labels = label_number_si()) +
  scale_y_continuous(
    labels = label_number_si(),
    breaks = pretty_breaks(n = 6)
  ) +
  scale_color_unhcr_d(palette = "pal_unhcr_region") +
  theme_unhcr(
    grid = "XY",
    axis = FALSE,
    axis_title = "xy",
    legend = FALSE
  )

A scatterplot showing comparison of refugee and IDP population by region | 2021


Related chart with R