Scatterplot with Python

Cedric Vidonne

Lei Chen

Scatterplot with Python

A scatterplot is a type of visualization using Cartesian Coordinates to display two variables for a set of data. The data are displayed as a collection of dots. The position of each dot on the horizontal and vertical axis indicates the values for an individual data point.

More about: Scatterplot


Scatterplot

# import libraries
import matplotlib.pyplot as plt
import pandas as pd
plt.style.use(['unhcrpyplotstyle','scatterplot'])

#load data set
df = pd.read_csv('https://raw.githubusercontent.com/GDS-ODSSS/unhcr-dataviz-platform/master/data/correlation/scatterplot.csv')

#compute data array for plotting
x = df['refugee_number']
y = df['idp_number']
label = df['region']

#plot the chart
fig, ax = plt.subplots()
ax.scatter(x, y, s=30)

# Loop for annotation of all points
for i in range(len(x)):
    plt.annotate(label[i], (x[i], y[i]), textcoords="offset points", xytext=(0,5), ha='left')

#set chart title
ax.set_title('Comparison of refugee and IDP population by region | 2021')

#set axis label
ax.set_xlabel('Number of refugees (millions)')
ax.set_ylabel('Number of IDPs (millions)')

#format axis tick labels
def number_formatter(x, pos):
    if x >= 1e6:
        s = '{:1.0f}M'.format(x*1e-6)
    elif x < 1e6 and x > 0:
        s = '{:1.0f}K'.format(x*1e-3)
    else: 
        s = '{:1.0f}'.format(x)
    return s
ax.xaxis.set_major_formatter(number_formatter)
ax.yaxis.set_major_formatter(number_formatter)

#set chart source and copyright
plt.annotate('Source: UNHCR Refugee Data Finder', (0,0), (0, -40), xycoords='axes fraction', textcoords='offset points', va='top', color = '#666666', fontsize=9)
plt.annotate('©UNHCR, The UN Refugee Agency', (0,0), (0, -50), xycoords='axes fraction', textcoords='offset points', va='top', color = '#666666', fontsize=9)

#adjust chart margin and layout
fig.tight_layout()

#show chart
plt.show()

A scatterplot showing comparison of refugee and IDP population by region | 2021


Scatterplot with colours

# import libraries
import matplotlib.pyplot as plt
import pandas as pd
plt.style.use(['unhcrpyplotstyle','scatterplot'])

#load data set
df = pd.read_csv('https://raw.githubusercontent.com/GDS-ODSSS/unhcr-dataviz-platform/master/data/correlation/scatterplot.csv')

#compute data array for plotting
x = df['refugee_number']
y = df['idp_number']
label = df['region']

#set colour palette
colour = ['#00B398','#E1CC0D','#589BE5','#18375F','#0072BC','#EF4A60','#8EBEFF']

#plot the chart
fig, ax = plt.subplots()
ax.scatter(x, y, s=30, c=colour)

# Loop for annotation of all points
for i in range(len(x)):
    plt.annotate(label[i], (x[i], y[i]), textcoords="offset points", xytext=(0,5), ha='left')

#set chart title
ax.set_title('Comparison of refugee and IDP population by region | 2021')

#set axis label
ax.set_xlabel('Number of refugees (millions)')
ax.set_ylabel('Number of IDPs (millions)')

#format axis tick labels
def number_formatter(x, pos):
    if x >= 1e6:
        s = '{:1.0f}M'.format(x*1e-6)
    elif x < 1e6 and x > 0:
        s = '{:1.0f}K'.format(x*1e-3)
    else: 
        s = '{:1.0f}'.format(x)
    return s
ax.xaxis.set_major_formatter(number_formatter)
ax.yaxis.set_major_formatter(number_formatter)

#set chart source and copyright
plt.annotate('Source: UNHCR Refugee Data Finder', (0,0), (0, -40), xycoords='axes fraction', textcoords='offset points', va='top', color = '#666666', fontsize=9)
plt.annotate('©UNHCR, The UN Refugee Agency', (0,0), (0, -50), xycoords='axes fraction', textcoords='offset points', va='top', color = '#666666', fontsize=9)

#adjust chart margin and layout
fig.tight_layout()

#show chart
plt.show()

A scatterplot showing comparison of refugee and IDP population by region | 2021


Related chart with Python