import wbdata
import pandas as pd
# Fetch country information from World Bank API
= wbdata.get_countries()
geo # Convert to DataFrame
= pd.DataFrame(geo)
geo # Extract nested dictionary information
'region'] = geo['region'].apply(lambda x: x['value'] if isinstance(x, dict) else None)
geo['adminregion'] = geo['adminregion'].apply(lambda x: x['value'] if isinstance(x, dict) else None)
geo['incomeLevel'] = geo['incomeLevel'].apply(lambda x: x['value'] if isinstance(x, dict) else None)
geo['lendingType'] = geo['lendingType'].apply(lambda x: x['value'] if isinstance(x, dict) else None)
geo[# Convert columns to appropriate data types
'longitude'] = pd.to_numeric(geo['longitude'], errors='coerce')
geo['latitude'] = pd.to_numeric(geo['latitude'], errors='coerce')
geo[# Set index to country ID
'id', inplace=True) geo.set_index(
A Mapplotlib theme for IOM
How-to
Examples in Practice
The World Bank Data API offer a series of indicators in relation with migration:
Core Migration Metrics
SM.POP.NETM
: Net migration — The net total of migrants during a period (immigrants minus emigrants)SM.POP.MIGR
: Net migration rate — The net number of migrants per 1,000 population during a periodSM.POP.TOTL
: International migrant stock, total — The total number of people living in a country other than where they were bornSM.POP.TOTL.ZS
: International migrant stock (% of population) — Migrant population as percentage of total population
Remittances
BX.TRF.PWKR.CD.DT
: Personal remittances received (current US$) — Sum of personal transfers and compensation, Migrant worker transfers and border worker salariesBX.TRF.PWKR.DT.GD.ZS
: Personal remittances (% of GDP) — Remittances as percentage of GDPSI.RMT.COST.OB.ZS
: Average transaction cost of sending remittances from a specific country (%)SI.RMT.COST.IB.ZS
: Average transaction cost of sending remittances to a specific country (%)
Pulling the geographic reference
We will use the wbdata module
import sys
"iompyplotstyle") # Add custom path
sys.path.append(import iompyplotstyle
Column chart
In a column chart, each category is represented by a vertical rectangle, with the height of the rectangle being proportional to the values being plotted. The chart with iompyplotstyle
+ column
styles:
for the first two examples, we will look at Migrant Stock for Developped Countries
import pandas as pd
import wbdata
# Fetch the data
= wbdata.get_dataframe({"SM.POP.NETM": "Net Migration"}, country='all', date=("2013","2023"), freq='Y', source=None, parse_dates=False, keep_levels=False, skip_cache=False)
migrant_stock # "2011","2012","2013","2014","2015","2016","2017","2018","2019","2020","2021"
# Reset the index of the main DataFrame to make 'country' a column
= migrant_stock.reset_index()
migrant_stock ={'country': 'country_name', 'date': 'year'}, inplace=True)
migrant_stock.rename(columns# Merge the main DataFrame with the country info to get the country code
= pd.merge(migrant_stock, geo, left_on='country_name', right_on='name', how='left')
migrant_stock #print(migrant_stock.columns)
#print(migrant_stock['year'].unique())
Let’s now calculate Net Migration Series in OECD Countries
# Clean up - drop rows with missing values and convert to numeric
= migrant_stock.dropna(subset=['Net Migration'])
migrant_stock 'Net Migration'] = pd.to_numeric(migrant_stock['Net Migration'])
migrant_stock['year'] = pd.to_numeric(migrant_stock['year'])
migrant_stock[
= (
all_migrant_stock_oecd_2013_2023 'country_name', 'Net Migration', 'region', 'incomeLevel','year' ]]
migrant_stock[["country_name == 'OECD members'")
.query('year', ascending=False)
.sort_values(10)
.head(=True)
.reset_index(drop
)# Save the top10_migrant_stock DataFrame to CSV
'data-raw/column.csv', index=False) all_migrant_stock_oecd_2013_2023.to_csv(
Basic column chart
# import libraries
import matplotlib.pyplot as plt
import pandas as pd
import iompyplotstyle
'iompyplotstyle','column'])
plt.style.use([
#load data set
= pd.read_csv('data-raw/column.csv')
df
#compute data array for plotting
= df['year']
x = df['Net Migration']
y
#plot the chart
= plt.subplots()
fig, ax = ax.bar(x, y)
bar_plot
#set chart title
'Net Migration in OECD Countries')
ax.set_title(
#set y-axis title
'Individuals (millions)')
ax.set_ylabel(
#set y-axis labels
=True)
ax.tick_params(labelleft
#set y-axis limit
= plt.ylim(0, max(y)+0.1*(max(y)))
ylimit
#set tick based on x value
ax.set_xticks(x)
#set grid
='y')
ax.grid(axis
#format y-axis tick labels
def number_formatter(x, pos):
if x >= 1e6:
= '{:1.0f}M'.format(x*1e-6)
s elif x < 1e6 and x >= 1e3:
= '{:1.0f}K'.format(x*1e-3)
s else:
= '{:1.0f}'.format(x)
s return s
ax.yaxis.set_major_formatter(number_formatter)
#set chart source and copyright
'Source: World Bank Data API', (0,0), (0, -25), xycoords='axes fraction', textcoords='offset points', va='top', color = '#666666', fontsize=9)
plt.annotate(' ', (0,0), (0, -35), xycoords='axes fraction', textcoords='offset points', va='top', color = '#666666', fontsize=9)
plt.annotate(
#adjust chart margin and layout
fig.tight_layout()
#show chart
plt.show()
Column chart with data label
# import libraries
import matplotlib.pyplot as plt
import pandas as pd
'iompyplotstyle','column'])
plt.style.use([
#load data set
= pd.read_csv('data-raw/column.csv')
df
#compute data array for plotting
= df['year']
x = df['Net Migration']
y
#plot the chart
= plt.subplots()
fig, ax = ax.bar(x, y)
bar_plot
#format y-axis tick labels
def number_formatter(x, pos):
if x >= 1e6:
= '{:1.0f}M'.format(x*1e-6)
s elif x < 1e6 and x >= 1e3:
= '{:1.0f}K'.format(x*1e-3)
s else:
= '{:1.0f}'.format(x)
s return s
ax.yaxis.set_major_formatter(number_formatter)
#set chart title
'Net Migration in OECD Countries')
ax.set_title(
#set subtitle
#plt.suptitle('Number of people in millions', x=0.025, y=0.88, ha='left')
Text(0.0, 1.0, 'Net Migration in OECD Countries')
Bar chart
A bar chart is a chart in which each category is represented by a horizontal rectangle, with the length of the rectangle proportional to the values being plotted. The horizontal axis shows data value, and the vertical axis displays the categories being compared.
The chart with iompyplotstyle
+ bar
styles:
Let’s compute net migration for High Income Countries in 2023:
# Clean up - drop rows with missing values and convert to numeric
= migrant_stock.dropna(subset=['Net Migration'])
migrant_stock 'Net Migration'] = pd.to_numeric(migrant_stock['Net Migration'])
migrant_stock[
= (
top10_migrant_stock_high_income_2023 'country_name', 'Net Migration', 'region', 'incomeLevel','year' ]]
migrant_stock[["incomeLevel == 'High income' & year == '2023'")
.query('Net Migration', ascending=False)
.sort_values(10)
.head(=True)
.reset_index(drop
)# Save the top10_migrant_stock DataFrame to CSV
'data-raw/bar.csv', index=False) top10_migrant_stock_high_income_2023.to_csv(
Basic bar chart
# import libraries
import matplotlib.pyplot as plt
import pandas as pd
from textwrap import wrap
'iompyplotstyle','bar'])
plt.style.use([
#load data set
= pd.read_csv('data-raw/bar.csv')
df
#sort value in descending order
'Net Migration',inplace=True)
df.sort_values(
#prepare data array for plotting
= df['country_name']
x = df['Net Migration']
y
#wrap long labels
= [ '\n'.join(wrap(l, 20)) for l in x ]
x
#plot the chart
= plt.subplots()
fig, ax = ax.barh(x, y)
bar_plot
#set chart title
'Top 10 Net Migration in High Income Countries | 2023')
ax.set_title(
#set x-axis title
'Individuals')
ax.set_xlabel(
#set x-axis label
=True)
ax.tick_params(labelbottom
#set x-axis limit
#ylimit = plt.xlim(0, max(y)+0.1*(max(y)))
#show grid below the bars
='x')
ax.grid(axis
#format x-axis tick labels
def number_formatter(x, pos):
if x >= 1e6:
= '{:1.0f}M'.format(x*1e-6)
s elif x < 1e6 and x >= 1e3:
= '{:1.0f}K'.format(x*1e-3)
s else:
= '{:1.0f}'.format(x)
s return s
ax.xaxis.set_major_formatter(number_formatter)
#set chart source and copyright
'Source: World Bank Data API', (0,0), (0, -40), xycoords='axes fraction', textcoords='offset points', va='top', color = '#666666', fontsize=9)
plt.annotate(' ', (0,0), (0, -50), xycoords='axes fraction', textcoords='offset points', va='top', color = '#666666', fontsize=9)
plt.annotate(
#adjust chart margin and layout
fig.tight_layout()
#show chart
plt.show()
Bar chart with data label
# import libraries
import matplotlib.pyplot as plt
import pandas as pd
from textwrap import wrap
'iompyplotstyle','bar'])
plt.style.use([
#load data set
= pd.read_csv('data-raw/bar.csv')
df
#sort value in descending order
'Net Migration', inplace=True)
df.sort_values(
#set chart title
'Top 10 Net Migration in High Income Countries | 2023')
ax.set_title(
#prepare data array for plotting
= df['country_name']
x = df['Net Migration']
y
#wrap long labels
= [ '\n'.join(wrap(l, 20)) for l in x ]
x
#plot the chart
= plt.subplots()
fig, ax = ax.barh(x, y) bar_plot
Line chart
A line chart is a type of chart that displays the evolution of one or several numeric variables over a continuous interval or time period. Typically, the x-axis is used for a timescale or a sequence of intervals, while the y-axis reports values across that progression.
The chart with iompyplotstyle
+ line
styles:
import wbdata
= wbdata.get_dataframe(
remittance ={'BX.TRF.PWKR.CD.DT': 'remittance' },
indicators='all',
country=("2013","2023"),
date='Y',
freq=None,
source=False,
parse_dates=False,
keep_levels=False
skip_cache
)= remittance.reset_index()
remittance ={'country': 'country_name', 'date': 'year'}, inplace=True)
remittance.rename(columns= pd.merge(remittance, geo, left_on='country_name', right_on='name', how='left') remittance
= (
remittance_world 'country_name', 'remittance','year' ]]
remittance[["country_name == 'World'")
.query('year', ascending=False)
.sort_values(
)'data-raw/line.csv', index=False) remittance_world.to_csv(
Single line chart
# import libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import iompyplotstyle
#plt.style.use(['iompyplotstyle','line'])
#compute data array for plotting
= pd.read_csv('data-raw/line.csv')
df
#compute data for plotting
= df['year']
x = df['remittance']
y
#plot the chart
= plt.subplots()
fig, ax = ax.plot(x, y)
bar_plot
#set chart title
'Evolution of Remittance ')
plt.title(#plt.suptitle('Personal transfers/compensation received')
#set y-axis label
'(current US$)')
ax.set_ylabel(
#set y-axis limit
= plt.ylim(0, max(y)+0.1*(max(y)))
ylimit
#format x-axis tick labels
def number_formatter(x, pos):
"""Format axis tick labels with appropriate units (K, M, B)"""
if abs(x) >= 1e9: # Billions
= '{:1.1f}B'.format(x*1e-9)
s elif abs(x) >= 1e6: # Millions
= '{:1.1f}M'.format(x*1e-6)
s elif abs(x) >= 1e3: # Thousands
= '{:1.1f}K'.format(x*1e-3)
s else: # Units
= '{:1.0f}'.format(x)
s
# Remove .0 from whole numbers for cleaner display
if '.0' in s:
= s.replace('.0', '')
s return s
ax.yaxis.set_major_formatter(number_formatter)
#set chart source and copyright
'Source: World Bank Data API', (0,0), (0, -25), xycoords='axes fraction', textcoords='offset points', va='top', color = '#666666', fontsize=9)
plt.annotate(' ', (0,0), (0, -35), xycoords='axes fraction', textcoords='offset points', va='top', color = '#666666', fontsize=9)
plt.annotate(
#adjust chart margin and layout
fig.tight_layout()
#show chart
plt.show()
Multiple line chart
#print(remittance['country_name'].unique())
= [
demographic_categories 'Early-demographic dividend',
'Late-demographic dividend',
'Post-demographic dividend',
'Pre-demographic dividend'
]
= (
remittance_demo 'country_name', 'remittance','year' ]]
remittance[["country_name in @demographic_categories")
.query('year', ascending=False)
.sort_values(
)= remittance_demo.pivot(index='year', columns='country_name', values='remittance')
remittance_demo = remittance_demo.reset_index()
remittance_demo 'data-raw/line2.csv', index=False) remittance_demo.to_csv(
# import libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
#plt.style.use(['iompyplotstyle','line'])
#compute data array for plotting
= pd.read_csv('data-raw/line2.csv')
df
#compute data for plotting
= df['year']
x = df['Early-demographic dividend']
y1 = df['Late-demographic dividend']
y2 = df['Post-demographic dividend']
y3 = df['Pre-demographic dividend']
y4
#plot the chart
= plt.subplots()
fig, ax = ax.plot(x, y1)
bar_plot = ax.plot(x, y2)
bar_plot = ax.plot(x, y3)
bar_plot = ax.plot(x, y4)
bar_plot
#set chart title
'Remittance Vs Demographic Dividend')
ax.set_title(
#set y-axis label
'(current US$)')
ax.set_ylabel(
#set y-axis limit
#ylimit = plt.ylim(0, 60 * 1e6)
#set direct labeling for lines
=x.iloc[-1]
early_xpos=y1.iloc[-1]
early_ypos"Early", (early_xpos,early_ypos),
plt.annotate(="offset points",
textcoords=(0,10),
xytext='center')
ha
=x.iloc[-1]
late_xpos=y2.iloc[-1]
late_ypos"Late", (late_xpos,late_ypos),
plt.annotate(="offset points",
textcoords=(0,10),
xytext='left')
ha
=x.iloc[-1]
post_xpos=y3.iloc[-1]
post_ypos"Post", (post_xpos,post_ypos),
plt.annotate(="offset points",
textcoords=(0,10),
xytext='left')
ha
=x.iloc[-1]
pre_xpos=y4.iloc[-1]
pre_ypos"Pre", (pre_xpos,pre_ypos),
plt.annotate(="offset points",
textcoords=(0,10),
xytext='left')
ha
#format x-axis tick labels
def number_formatter(x, pos):
"""Format axis tick labels with appropriate units (K, M, B)"""
if abs(x) >= 1e9: # Billions
= '{:1.1f}B'.format(x*1e-9)
s elif abs(x) >= 1e6: # Millions
= '{:1.1f}M'.format(x*1e-6)
s elif abs(x) >= 1e3: # Thousands
= '{:1.1f}K'.format(x*1e-3)
s else: # Units
= '{:1.0f}'.format(x)
s
# Remove .0 from whole numbers for cleaner display
if '.0' in s:
= s.replace('.0', '')
s return s
ax.yaxis.set_major_formatter(number_formatter)
#set chart source and copyright
'Source: World Bank Data API', (0,0), (0, -25), xycoords='axes fraction', textcoords='offset points', va='top', color = '#666666', fontsize=9)
plt.annotate(' ', (0,0), (0, -35), xycoords='axes fraction', textcoords='offset points', va='top', color = '#666666', fontsize=9)
plt.annotate(
#adjust chart margin and layout
fig.tight_layout()
#show chart
plt.show()
Scatterplot
A scatterplot is a type of visualization using Cartesian Coordinates to display two variables for a set of data. The data are displayed as a collection of dots. The position of each dot on the horizontal and vertical axis indicates the values for an individual data point.
The chart with iompyplotstyle
+ scatterplot
styles:
import wbdata
= wbdata.get_dataframe(
pop_and_perc ={'SP.POP.TOTL': 'total_population',
indicators'SM.POP.TOTL.ZS': 'percent_migrant',
'NY.GDP.PCAP.CD': 'gdp_capita' },
='all',
country=("1990","2015"),
date='Y',
freq=None,
source=False,
parse_dates=False,
keep_levels=False
skip_cache
)= pop_and_perc.reset_index()
pop_and_perc ={'country': 'country_name', 'date': 'year'}, inplace=True)
pop_and_perc.rename(columns= pd.merge(pop_and_perc, geo, left_on='country_name', right_on='name', how='left')
pop_and_perc # Clean up - drop rows with missing values and convert to numeric
= pop_and_perc.dropna(subset=['total_population'])
pop_and_perc = pop_and_perc.dropna(subset=['percent_migrant']) pop_and_perc
= (
pop_and_perc_high 'country_name', 'percent_migrant', 'total_population', 'incomeLevel', 'region', 'gdp_capita', 'year' ]]
pop_and_perc[["incomeLevel == 'High income' & year == '2015'")
.query('total_population', ascending=False)
.sort_values(10)
.head(
)'data-raw/scatterplot.csv', index=False) pop_and_perc_high.to_csv(
Scatterplot
# import libraries
import matplotlib.pyplot as plt
import pandas as pd
'iompyplotstyle','scatterplot'])
plt.style.use([
#load data set
= pd.read_csv('data-raw/scatterplot.csv')
df
#compute data array for plotting
= df['total_population']
x = df['percent_migrant']
y = df['country_name']
label
#plot the chart
= plt.subplots()
fig, ax =30)
ax.scatter(x, y, s
# Loop for annotation of all points
for i in range(len(x)):
="offset points", xytext=(0,5), ha='left')
plt.annotate(label[i], (x[i], y[i]), textcoords
#set chart title
'Migrants and Population for top 10 largest High Income Countries | 2015')
ax.set_title(
#set axis label
'Total Population (millions)')
ax.set_xlabel('Share of Migrants (%)')
ax.set_ylabel(
#format axis tick labels
def number_formatter(x, pos):
if x >= 1e6:
= '{:1.0f}M'.format(x*1e-6)
s elif x < 1e6 and x > 0:
= '{:1.0f}K'.format(x*1e-3)
s else:
= '{:1.0f}'.format(x)
s return s
ax.xaxis.set_major_formatter(number_formatter)#ax.yaxis.set_major_formatter(number_formatter)
#set chart source and copyright
'Source: World Bank Data API', (0,0), (0, -40), xycoords='axes fraction', textcoords='offset points', va='top', color = '#666666', fontsize=9)
plt.annotate(' ', (0,0), (0, -50), xycoords='axes fraction', textcoords='offset points', va='top', color = '#666666', fontsize=9)
plt.annotate(
#adjust chart margin and layout
fig.tight_layout()
#show chart
plt.show()
Scatterplot with colours
# import libraries
import matplotlib.pyplot as plt
import pandas as pd
'iompyplotstyle','scatterplot'])
plt.style.use([
#load data set
= pd.read_csv('data-raw/scatterplot.csv')
df
#compute data array for plotting
= df['total_population']
x = df['percent_migrant']
y = df['country_name']
label = df['region']
regions # Create a color mapping for regions
= regions.unique()
unique_regions = ['#00B398', '#E1CC0D', '#589BE5'] # Your chosen colors
color_palette = {region: color_palette[i] for i, region in enumerate(unique_regions)}
region_colors
# Map each point to its region color
= [region_colors[region] for region in regions]
point_colors
#plot the chart
= plt.subplots(figsize=(10, 6))
fig, ax = ax.scatter(x, y, s=100, c=point_colors, alpha=0.7) # Increased point size and added transparency
scatter
# Loop for annotation of all points
for i in range(len(x)):
="offset points", xytext=(0,5), ha='left')
plt.annotate(label[i], (x[i], y[i]), textcoords
#set chart title
'Migrants and Population for top 10 largest High Income Countries | 2015')
ax.set_title(
#set axis label
'Total Population (millions)')
ax.set_xlabel('Share of Migrants (%)')
ax.set_ylabel(
#format axis tick labels
def number_formatter(x, pos):
if x >= 1e6:
= '{:1.0f}M'.format(x*1e-6)
s elif x < 1e6 and x > 0:
= '{:1.0f}K'.format(x*1e-3)
s else:
= '{:1.0f}'.format(x)
s return s
ax.xaxis.set_major_formatter(number_formatter)#ax.yaxis.set_major_formatter(number_formatter)
#set chart source and copyright
'Source: World Bank Data API', (0,0), (0, -40), xycoords='axes fraction', textcoords='offset points', va='top', color = '#666666', fontsize=9)
plt.annotate(' ', (0,0), (0, -50), xycoords='axes fraction', textcoords='offset points', va='top', color = '#666666', fontsize=9)
plt.annotate(
#adjust chart margin and layout
fig.tight_layout()
#show chart
plt.show()
Bubble chart
A bubble chart displays multi-dimensional data in a two-dimensional plot. It can be considered as a variation of the scatterplot, in which the dots are replaced with bubbles. However, unlike a scatterplot which has only two variables defined by the X and Y axis, on a bubble chart each data point (bubble) can be assigned with a third variable (by size of bubble) and a fourth variable (by colour of bubble).
Bubble chart
# import libraries
import matplotlib.pyplot as plt
import pandas as pd
'iompyplotstyle','bubble'])
plt.style.use([
#load data set
= pd.read_csv('data-raw/scatterplot.csv')
df
#compute data array for plotting
= df['total_population']
size = df['percent_migrant']
y = df['gdp_capita']
x = df['country_name']
label
#plot the chart
= plt.subplots()
fig, ax =size/100000)
ax.scatter(x, y, s
# Loop for annotation of all points
for i in range(len(x)):
="offset points", xytext=(0,10), ha='left')
plt.annotate(label[i], (x[i], y[i]), textcoords
#set chart title
'Migrants, Population & Wealth for top 10 largest High Income Countries | 2015')
ax.set_title(
#set axis label
'GDP per capita (USD)')
ax.set_xlabel('Share of Migrants (%)')
ax.set_ylabel(
#set chart source and copyright
'Source: World Bank Data API', (0,0), (0, -40), xycoords='axes fraction', textcoords='offset points', va='top', color = '#666666', fontsize=9)
plt.annotate(' ', (0,0), (0, -50), xycoords='axes fraction', textcoords='offset points', va='top', color = '#666666', fontsize=9)
plt.annotate(
#adjust chart margin and layout
fig.tight_layout()
#show chart
plt.show()
Bubble chart with colours
# import libraries
import matplotlib.pyplot as plt
import pandas as pd
'iompyplotstyle','bubble'])
plt.style.use([
#load data set
= pd.read_csv('data-raw/scatterplot.csv')
df
#compute data array for plotting
= df['total_population']
size = df['percent_migrant']
y = df['gdp_capita']
x = df['country_name']
label = df['region']
regions # Create a color mapping for regions
= regions.unique()
unique_regions = ['#00B398', '#E1CC0D', '#589BE5'] # Your chosen colors
color_palette = {region: color_palette[i] for i, region in enumerate(unique_regions)}
region_colors
# Map each point to its region color
= [region_colors[region] for region in regions]
point_colors
#plot the chart
= plt.subplots(figsize=(10, 6))
fig, ax = ax.scatter(x, y, s=size/100000, c=point_colors, alpha=0.7) # Increased point size and added transparency
scatter
# Loop for annotation of all points
for i in range(len(x)):
="offset points", xytext=(0,10), ha='left')
plt.annotate(label[i], (x[i], y[i]), textcoords
#set chart title
'Migrants, Population & Wealth for top 10 largest High Income Countries | 2015')
ax.set_title(
#set axis label
'GDP per capita (USD)')
ax.set_xlabel('Share of Migrants (%)')
ax.set_ylabel(
#format axis tick labels
def number_formatter(x, pos):
if x >= 1e6:
= '{:1.0f}M'.format(x*1e-6)
s elif x < 1e6 and x > 0:
= '{:1.0f}K'.format(x*1e-3)
s else:
= '{:1.0f}'.format(x)
s return s
ax.xaxis.set_major_formatter(number_formatter)
ax.yaxis.set_major_formatter(number_formatter)
#set chart source and copyright
'Source: World Bank Data API', (0,0), (0, -40), xycoords='axes fraction', textcoords='offset points', va='top', color = '#666666', fontsize=9)
plt.annotate(' ', (0,0), (0, -50), xycoords='axes fraction', textcoords='offset points', va='top', color = '#666666', fontsize=9)
plt.annotate(
#adjust chart margin and layout
fig.tight_layout()
#show chart
plt.show()
Connected scatterplot
A connected scatterplot is a type of visualization that displays the evolution of a series of data points that are connected by straight line segments. In some cases, it is not the most intuitive to read; but it is impressive for storytelling.
# Single-step calculation without pivoting
= pop_and_perc.groupby('country_name')['percent_migrant'].agg(
change_df = lambda x: x[pop_and_perc['year'] == '2005'].mean(),
pct_2005 = lambda x: x[pop_and_perc['year'] == '2015'].mean()
pct_2015
).dropna()
'abs_change'] = change_df['pct_2015'] - change_df['pct_2005']
change_df[= change_df.sort_values('abs_change', key=abs, ascending=False).head(40)
top_changers print(top_changers)
pct_2005 pct_2015 abs_change
country_name
Northern Mariana Islands 58.320388 39.309969 -19.010419
United Arab Emirates 73.205122 88.404048 15.198926
Lebanon 18.981932 34.145680 15.163748
Kuwait 58.902838 73.639551 14.736713
Oman 26.573154 41.085874 14.512720
Maldives 14.770402 25.872182 11.101780
Luxembourg 32.897016 43.964134 11.067118
Armenia 15.559931 6.335893 -9.224038
Liechtenstein 54.223574 62.596254 8.372680
Singapore 38.050989 45.391792 7.340803
Monaco 63.038334 55.768466 -7.269868
Norway 7.809552 14.235611 6.426059
Saudi Arabia 26.275040 32.294942 6.019903
Cyprus 11.346755 16.834034 5.487280
Switzerland 24.369450 29.386686 5.017236
Cayman Islands 44.536536 39.565094 -4.971442
Bahrain 46.598786 51.126785 4.527999
Channel Islands 45.976617 50.281626 4.305010
Sweden 12.466995 16.767559 4.300563
Australia 24.060186 28.218410 4.158223
Belgium 8.245678 12.283533 4.037855
Palau 30.356156 26.602790 -3.753366
Malta 6.184934 9.898488 3.713555
Austria 13.798295 17.465726 3.667431
Israel 28.612892 24.946900 -3.665992
Turks and Caicos Islands 37.599244 34.037101 -3.562143
Latvia 16.912010 13.353240 -3.558770
United Kingdom 9.842476 13.200978 3.358502
Spain 9.365519 12.690237 3.324718
South Africa 2.504368 5.767090 3.262722
Seychelles 10.137807 13.258907 3.121099
Gambia, The 12.627539 9.670886 -2.956652
Canada 18.845865 21.801664 2.955799
Italy 6.742189 9.680768 2.938578
Brunei Darussalam 27.201987 24.275972 -2.926015
Iceland 8.590541 11.390150 2.799609
Bermuda 28.063387 30.846397 2.783010
Kyrgyz Republic 6.116681 3.440796 -2.675885
Gibraltar 31.674691 34.345221 2.670531
New Zealand 20.314707 22.959700 2.644993
= [
selected_countries 'United Arab Emirates',
'Sweden',
'Australia',
'South Africa'
]= (
pop_and_perc_high_year 'country_name', 'percent_migrant', 'total_population', 'incomeLevel', 'region', 'gdp_capita', 'year' ]]
pop_and_perc[["country_name in @selected_countries")
.query('total_population', ascending=False)
.sort_values(#.head(5)
)'data-raw/scatterplot_connected.csv', index=False) pop_and_perc_high_year.to_csv(
# Import libraries
import matplotlib.pyplot as plt
import pandas as pd
from matplotlib.ticker import MaxNLocator
'iompyplotstyle', 'connected_scatterplot'])
plt.style.use([
# Load dataset
= pd.read_csv('data-raw/scatterplot_connected.csv')
df
# Prepare data
= df['total_population']
size = df['gdp_capita']
x = df['percent_migrant']
y = df['country_name']
labels = df['region']
regions = df['year']
years
# Define colors for regions
= regions.unique()
unique_regions = ['#00B398', '#E1CC0D', '#589BE5', '#E84A3A'] # customize colors
color_palette = {region: color_palette[i % len(color_palette)] for i, region in enumerate(unique_regions)}
region_colors
# Create plot
= plt.subplots(figsize=(10, 6))
fig, ax
# Plot each country separately
for country in df['country_name'].unique():
= df[df['country_name'] == country]
country_data
# Scatter for population size
'gdp_capita'], country_data['percent_migrant'],
ax.scatter(country_data[=country_data['total_population'] / 1e6, # Bubble size scaled
s=region_colors[country_data['region'].iloc[0]],
color=0.7)
alpha
# Line connecting points over time
'gdp_capita'], country_data['percent_migrant'],
ax.plot(country_data[=region_colors[country_data['region'].iloc[0]],
color=country) # Legend will use country name here
label
# Annotate year on each point
for i in country_data.index:
="offset points", xytext=(3, 3), ha='left', fontsize=8)
ax.annotate(years[i], (x[i], y[i]), textcoords
# Axis Titles & Plot Title
'Evolution of Migrants, Population & Wealth for Selected Countries | 1990-2015', fontsize=14)
ax.set_title('GDP per capita (USD)', fontsize=12)
ax.set_xlabel('Share of Migrants (%)', fontsize=12)
ax.set_ylabel(
# Axis Ticks Limit
4))
ax.xaxis.set_major_locator(MaxNLocator(4))
ax.yaxis.set_major_locator(MaxNLocator(
# Legend with Country Names
='Country', bbox_to_anchor=(1.05, 1), loc='upper left')
ax.legend(title
# Chart Source
'Source: World Bank Data API', (0, 0), (0, -40), xycoords='axes fraction',
plt.annotate(='offset points', va='top', color='#666666', fontsize=9)
textcoords
# Layout & Save
fig.tight_layout()#plt.savefig('outputs/connected_scatterplot.png', dpi=300, bbox_inches='tight')
# Show Plot
plt.show()
Heatmap
A heatmap is a type of visualization that values are depicted through variations in colour within a two-dimensional matrix of cells. It allows us to visualize complex data and understand it at a glance.
= [
selected_region 'Europe & Central Asia', # France, Kazakhstan, Poland
'Middle East & North Africa', # Egypt, Saudi Arabia, Morocco
'Latin America & Caribbean', #Latin America & Caribbean Brazil, Mexico, Jamaica
'South Asia', # South Asia India, Pakistan, Bangladesh
'Sub-Saharan Africa', # Sub-Saharan Africa Nigeria, Kenya, South Africa
'North America', # North America USA, Canada
'East Asia & Pacific' # East Asia & Pacific China, Indonesia, Philippines
]
= (
all_migrant_region_2013_2023 'country_name', 'Net Migration', 'region', 'incomeLevel','year' ]]
migrant_stock[["country_name in @selected_region")
.query('year', ascending=False)
.sort_values(
)# Save the top10_migrant_stock DataFrame to CSV
'data-raw/heatmap.csv', index=False) all_migrant_region_2013_2023.to_csv(
# import libraries
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from textwrap import wrap
'iompyplotstyle', 'heatmap'])
plt.style.use([
#load and reshape data set
= pd.read_csv('data-raw/heatmap.csv')
df
# Create pivot table: Region x Year → Net Migration
= df.groupby(['country_name', 'year'])['Net Migration'].sum().reset_index()
heatmap_df = heatmap_df.pivot(index='country_name', columns='year', values='Net Migration')
heatmap_df
# Optional: Wrap region names for better display
= ['\n'.join(wrap(l, 20)) for l in heatmap_df.index]
heatmap_df.index
# Define number formatter for annotations
def number_formatter(x):
if x >= 1e6:
return '{:.1f}M'.format(x * 1e-6)
elif x >= 1e3:
return '{:.0f}K'.format(x * 1e-3)
else:
return '{:.0f}'.format(x)
# Create formatted annotation df without warnings
= heatmap_df.apply(lambda col: col.map(lambda x: number_formatter(x) if pd.notnull(x) else ''))
annot_df
# Plot heatmap
= plt.subplots(figsize=(12, 6))
fig, ax
sns.heatmap(
heatmap_df,=True,
annot=.5,
linewidths='Blues', # or try "coolwarm", "YlGnBu"
cmap=".0f",
fmt=True,
square=True
cbar
)
# Chart title
'Net Migration by Region (2013-2023)', fontsize=14, weight='bold')
ax.set_title(
# Remove axis labels (optional)
'')
ax.set_xlabel('')
ax.set_ylabel(
# Source annotation
'Source: World Bank Data API', (0,0), (0, -30), xycoords='axes fraction', textcoords='offset points', va='top', color = '#666666', fontsize=9)
plt.annotate(
fig.tight_layout()
#show chart
plt.show()