Bubble chart with Python
A bubble chart displays multi-dimensional data in a two-dimensional plot. It can be considered as a variation of the scatterplot, in which the dots are replaced with bubbles. However, unlike a scatterplot which has only two variables defined by the X and Y axis, on a bubble chart each data point (bubble) can be assigned with a third variable (by size of bubble) and a fourth variable (by colour of bubble).
More about: Bubble chart
Bubble chart
# import libraries
import matplotlib.pyplot as plt
import pandas as pd
'unhcrpyplotstyle','bubble'])
plt.style.use([
#load data set
= pd.read_csv('https://raw.githubusercontent.com/GDS-ODSSS/unhcr-dataviz-platform/master/data/correlation/bubble.csv')
df
#compute data array for plotting
= df['refugee_number']
x = df['idp_number']
y = df['total_number']
size = df['region']
label
#plot the chart
= plt.subplots()
fig, ax =size/70000)
ax.scatter(x, y, s
# Loop for annotation of all points
for i in range(len(x)):
="offset points", xytext=(0,10), ha='left')
plt.annotate(label[i], (x[i], y[i]), textcoords
#set chart title
'Comparison of refugee and IDP population by region | 2021')
ax.set_title(
#set axis label
'Number of refugees (millions)')
ax.set_xlabel('Number of IDPs (millions)')
ax.set_ylabel(
#format axis tick labels
def number_formatter(x, pos):
if x >= 1e6:
= '{:1.0f}M'.format(x*1e-6)
s elif x < 1e6 and x > 0:
= '{:1.0f}K'.format(x*1e-3)
s else:
= '{:1.0f}'.format(x)
s return s
ax.xaxis.set_major_formatter(number_formatter)
ax.yaxis.set_major_formatter(number_formatter)
#set chart source and copyright
'Source: UNHCR Refugee Data Finder', (0,0), (0, -40), xycoords='axes fraction', textcoords='offset points', va='top', color = '#666666', fontsize=9)
plt.annotate('©UNHCR, The UN Refugee Agency', (0,0), (0, -50), xycoords='axes fraction', textcoords='offset points', va='top', color = '#666666', fontsize=9)
plt.annotate(
#adjust chart margin and layout
fig.tight_layout()
#show chart
plt.show()
Bubble chart with colours
# import libraries
import matplotlib.pyplot as plt
import pandas as pd
'unhcrpyplotstyle','bubble'])
plt.style.use([
#load data set
= pd.read_csv('https://raw.githubusercontent.com/GDS-ODSSS/unhcr-dataviz-platform/master/data/correlation/bubble.csv')
df
#compute data array for plotting
= df['refugee_number']
x = df['idp_number']
y = df['total_number']
size = df['region']
label
#set colour palette
= ['#00B398','#E1CC0D','#589BE5','#18375F','#0072BC','#EF4A60','#8EBEFF']
colour
#plot the chart
= plt.subplots()
fig, ax =size/70000, c=colour)
ax.scatter(x, y, s
# Loop for annotation of all points
for i in range(len(x)):
="offset points", xytext=(0,10), ha='left')
plt.annotate(label[i], (x[i], y[i]), textcoords
#set chart title
'Comparison of refugee and IDP population by region | 2021')
ax.set_title(
#set axis label
'Number of refugees (millions)')
ax.set_xlabel('Number of IDPs (millions)')
ax.set_ylabel(
#format axis tick labels
def number_formatter(x, pos):
if x >= 1e6:
= '{:1.0f}M'.format(x*1e-6)
s elif x < 1e6 and x > 0:
= '{:1.0f}K'.format(x*1e-3)
s else:
= '{:1.0f}'.format(x)
s return s
ax.xaxis.set_major_formatter(number_formatter)
ax.yaxis.set_major_formatter(number_formatter)
#set chart source and copyright
'Source: UNHCR Refugee Data Finder', (0,0), (0, -40), xycoords='axes fraction', textcoords='offset points', va='top', color = '#666666', fontsize=9)
plt.annotate('©UNHCR, The UN Refugee Agency', (0,0), (0, -50), xycoords='axes fraction', textcoords='offset points', va='top', color = '#666666', fontsize=9)
plt.annotate(
#adjust chart margin and layout
fig.tight_layout()
#show chart
plt.show()