A Mapplotlib theme for IOM

How-to

Author

Edouard Legoupil, Chief Data Officer, IOM

Published

10 April 2025

Examples in Practice

The World Bank Data API offer a series of indicators in relation with migration:

Core Migration Metrics

SM.POP.NETM: Net migration — The net total of migrants during a period (immigrants minus emigrants)
SM.POP.MIGR: Net migration rate — The net number of migrants per 1,000 population during a period
SM.POP.TOTL: International migrant stock, total — The total number of people living in a country other than where they were born
SM.POP.TOTL.ZS: International migrant stock (% of population) — Migrant population as percentage of total population

Remittances

BX.TRF.PWKR.CD.DT: Personal remittances received (current US$) — Sum of personal transfers and compensation, Migrant worker transfers and border worker salaries
BX.TRF.PWKR.DT.GD.ZS: Personal remittances (% of GDP) — Remittances as percentage of GDP
SI.RMT.COST.OB.ZS: Average transaction cost of sending remittances from a specific country (%)
SI.RMT.COST.IB.ZS: Average transaction cost of sending remittances to a specific country (%)

Pulling the geographic reference

We will use the wbdata module

import wbdata
import pandas as pd
# Fetch country information from World Bank API
geo = wbdata.get_countries()
# Convert to DataFrame
geo = pd.DataFrame(geo)
# Extract nested dictionary information
geo['region'] = geo['region'].apply(lambda x: x['value'] if isinstance(x, dict) else None)
geo['adminregion'] = geo['adminregion'].apply(lambda x: x['value'] if isinstance(x, dict) else None)
geo['incomeLevel'] = geo['incomeLevel'].apply(lambda x: x['value'] if isinstance(x, dict) else None)
geo['lendingType'] = geo['lendingType'].apply(lambda x: x['value'] if isinstance(x, dict) else None)
# Convert columns to appropriate data types
geo['longitude'] = pd.to_numeric(geo['longitude'], errors='coerce')
geo['latitude'] = pd.to_numeric(geo['latitude'], errors='coerce')
# Set index to country ID
geo.set_index('id', inplace=True)

import sys
sys.path.append("iompyplotstyle")  # Add custom path
import iompyplotstyle

Column chart

In a column chart, each category is represented by a vertical rectangle, with the height of the rectangle being proportional to the values being plotted. The chart with iompyplotstyle + column styles:

for the first two examples, we will look at Migrant Stock for Developped Countries

import pandas as pd
import wbdata

# Fetch the data
migrant_stock = wbdata.get_dataframe({"SM.POP.NETM": "Net Migration"}, country='all', date=("2013","2023"), freq='Y', source=None, parse_dates=False, keep_levels=False, skip_cache=False)
# "2011","2012","2013","2014","2015","2016","2017","2018","2019","2020","2021"
# Reset the index of the main DataFrame to make 'country' a column
migrant_stock = migrant_stock.reset_index()
migrant_stock.rename(columns={'country': 'country_name', 'date': 'year'}, inplace=True)
# Merge the main DataFrame with the country info to get the country code
migrant_stock = pd.merge(migrant_stock, geo, left_on='country_name', right_on='name', how='left')
#print(migrant_stock.columns)
#print(migrant_stock['year'].unique())

Let’s now calculate Net Migration Series in OECD Countries

# Clean up - drop rows with missing values and convert to numeric
migrant_stock = migrant_stock.dropna(subset=['Net Migration'])
migrant_stock['Net Migration'] = pd.to_numeric(migrant_stock['Net Migration'])
migrant_stock['year'] = pd.to_numeric(migrant_stock['year'])

all_migrant_stock_oecd_2013_2023 = (
    migrant_stock[['country_name', 'Net Migration', 'region', 'incomeLevel','year' ]]
    .query("country_name == 'OECD members'")   
    .sort_values('year', ascending=False)
    .head(10)
    .reset_index(drop=True)
)
# Save the top10_migrant_stock DataFrame to CSV
all_migrant_stock_oecd_2013_2023.to_csv('data-raw/column.csv', index=False)

Basic column chart

# import libraries
import matplotlib.pyplot as plt
import pandas as pd
import iompyplotstyle 
plt.style.use(['iompyplotstyle','column'])

#load data set
df = pd.read_csv('data-raw/column.csv')

#compute data array for plotting
x = df['year']
y = df['Net Migration']

#plot the chart
fig, ax = plt.subplots()
bar_plot = ax.bar(x, y)

#set chart title
ax.set_title('Net Migration in OECD Countries')

#set y-axis title
ax.set_ylabel('Individuals (millions)')

#set y-axis labels
ax.tick_params(labelleft=True)

#set y-axis limit
ylimit = plt.ylim(0,  max(y)+0.1*(max(y)))

#set tick based on x value
ax.set_xticks(x)

#set grid
ax.grid(axis='y')

#format y-axis tick labels
def number_formatter(x, pos):
    if x >= 1e6:
        s = '{:1.0f}M'.format(x*1e-6)
    elif x < 1e6 and x >= 1e3:
        s = '{:1.0f}K'.format(x*1e-3)
    else: 
        s = '{:1.0f}'.format(x)
    return s
ax.yaxis.set_major_formatter(number_formatter)

#set chart source and copyright
plt.annotate('Source: World Bank Data API', (0,0), (0, -25), xycoords='axes fraction', textcoords='offset points', va='top', color = '#666666', fontsize=9)
plt.annotate(' ', (0,0), (0, -35), xycoords='axes fraction', textcoords='offset points', va='top', color = '#666666', fontsize=9)

#adjust chart margin and layout
fig.tight_layout()

#show chart
plt.show()

Column chart with data label

# import libraries
import matplotlib.pyplot as plt
import pandas as pd
plt.style.use(['iompyplotstyle','column'])

#load data set
df = pd.read_csv('data-raw/column.csv')

#compute data array for plotting
x = df['year']
y = df['Net Migration']

#plot the chart
fig, ax = plt.subplots()
bar_plot = ax.bar(x, y)

#format y-axis tick labels
def number_formatter(x, pos):
    if x >= 1e6:
        s = '{:1.0f}M'.format(x*1e-6)
    elif x < 1e6 and x >= 1e3:
        s = '{:1.0f}K'.format(x*1e-3)
    else: 
        s = '{:1.0f}'.format(x)
    return s
ax.yaxis.set_major_formatter(number_formatter)

#set chart title
ax.set_title('Net Migration in OECD Countries')

#set subtitle
#plt.suptitle('Number of people in millions', x=0.025, y=0.88, ha='left')

Text(0.0, 1.0, 'Net Migration in OECD Countries')

Bar chart

A bar chart is a chart in which each category is represented by a horizontal rectangle, with the length of the rectangle proportional to the values being plotted. The horizontal axis shows data value, and the vertical axis displays the categories being compared.

The chart with iompyplotstyle + bar styles:

Let’s compute net migration for High Income Countries in 2023:

# Clean up - drop rows with missing values and convert to numeric
migrant_stock = migrant_stock.dropna(subset=['Net Migration'])
migrant_stock['Net Migration'] = pd.to_numeric(migrant_stock['Net Migration'])

top10_migrant_stock_high_income_2023 = (
    migrant_stock[['country_name', 'Net Migration', 'region', 'incomeLevel','year' ]]
    .query("incomeLevel == 'High income'  & year == '2023'")  
    .sort_values('Net Migration', ascending=False)
    .head(10)
    .reset_index(drop=True)
)
# Save the top10_migrant_stock DataFrame to CSV
top10_migrant_stock_high_income_2023.to_csv('data-raw/bar.csv', index=False)

Basic bar chart

# import libraries
import matplotlib.pyplot as plt
import pandas as pd
from textwrap import wrap
plt.style.use(['iompyplotstyle','bar'])

#load data set
df = pd.read_csv('data-raw/bar.csv')

#sort value in descending order
df.sort_values('Net Migration',inplace=True)

#prepare data array for plotting
x = df['country_name']
y = df['Net Migration']

#wrap long labels
x = [ '\n'.join(wrap(l, 20)) for l in x ]

#plot the chart
fig, ax = plt.subplots()
bar_plot = ax.barh(x, y)

#set chart title
ax.set_title('Top 10 Net Migration in High Income Countries | 2023')

#set x-axis title
ax.set_xlabel('Individuals')

#set x-axis label
ax.tick_params(labelbottom=True)

#set x-axis limit
#ylimit = plt.xlim(0, max(y)+0.1*(max(y)))

#show grid below the bars
ax.grid(axis='x')

#format x-axis tick labels
def number_formatter(x, pos):
    if x >= 1e6:
        s = '{:1.0f}M'.format(x*1e-6)
    elif x < 1e6 and x >= 1e3:
        s = '{:1.0f}K'.format(x*1e-3)
    else: 
        s = '{:1.0f}'.format(x)
    return s
ax.xaxis.set_major_formatter(number_formatter)

#set chart source and copyright
plt.annotate('Source: World Bank Data API', (0,0), (0, -40), xycoords='axes fraction', textcoords='offset points', va='top', color = '#666666', fontsize=9)
plt.annotate(' ', (0,0), (0, -50), xycoords='axes fraction', textcoords='offset points', va='top', color = '#666666', fontsize=9)

#adjust chart margin and layout
fig.tight_layout()

#show chart
plt.show()

Bar chart with data label

# import libraries
import matplotlib.pyplot as plt
import pandas as pd
from textwrap import wrap
plt.style.use(['iompyplotstyle','bar'])

#load data set
df = pd.read_csv('data-raw/bar.csv')

#sort value in descending order
df.sort_values('Net Migration', inplace=True)


#set chart title
ax.set_title('Top 10 Net Migration in High Income Countries | 2023')

#prepare data array for plotting
x = df['country_name']
y = df['Net Migration']

#wrap long labels
x = [ '\n'.join(wrap(l, 20)) for l in x ]

#plot the chart
fig, ax = plt.subplots()
bar_plot = ax.barh(x, y)

Line chart

A line chart is a type of chart that displays the evolution of one or several numeric variables over a continuous interval or time period. Typically, the x-axis is used for a timescale or a sequence of intervals, while the y-axis reports values across that progression.

The chart with iompyplotstyle + line styles:

import wbdata
remittance = wbdata.get_dataframe(
        indicators={'BX.TRF.PWKR.CD.DT': 'remittance' },
        country='all', 
        date=("2013","2023"), 
        freq='Y', 
        source=None, 
        parse_dates=False, 
        keep_levels=False, 
        skip_cache=False
    )
remittance = remittance.reset_index()
remittance.rename(columns={'country': 'country_name', 'date': 'year'}, inplace=True)
remittance = pd.merge(remittance, geo, left_on='country_name', right_on='name', how='left')

remittance_world = (
    remittance[['country_name', 'remittance','year' ]]
    .query("country_name == 'World'")  
    .sort_values('year', ascending=False)
)
remittance_world.to_csv('data-raw/line.csv', index=False)

Single line chart

# import libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import iompyplotstyle
#plt.style.use(['iompyplotstyle','line'])

#compute data array for plotting
df = pd.read_csv('data-raw/line.csv')

#compute data for plotting
x = df['year']
y = df['remittance']

#plot the chart
fig, ax = plt.subplots()
bar_plot = ax.plot(x, y)

#set chart title
plt.title('Evolution of Remittance ')
#plt.suptitle('Personal transfers/compensation received')
#set y-axis label
ax.set_ylabel('(current US$)')

#set y-axis limit
ylimit = plt.ylim(0,  max(y)+0.1*(max(y)))

#format x-axis tick labels
def number_formatter(x, pos):
    """Format axis tick labels with appropriate units (K, M, B)"""
    if abs(x) >= 1e9:  # Billions
        s = '{:1.1f}B'.format(x*1e-9)
    elif abs(x) >= 1e6:  # Millions
        s = '{:1.1f}M'.format(x*1e-6)
    elif abs(x) >= 1e3:  # Thousands
        s = '{:1.1f}K'.format(x*1e-3)
    else:  # Units
        s = '{:1.0f}'.format(x)
    
    # Remove .0 from whole numbers for cleaner display
    if '.0' in s:
        s = s.replace('.0', '')
    return s
ax.yaxis.set_major_formatter(number_formatter)

#set chart source and copyright
plt.annotate('Source: World Bank Data API', (0,0), (0, -25), xycoords='axes fraction', textcoords='offset points', va='top', color = '#666666', fontsize=9)
plt.annotate(' ', (0,0), (0, -35), xycoords='axes fraction', textcoords='offset points', va='top', color = '#666666', fontsize=9)

#adjust chart margin and layout
fig.tight_layout()

#show chart
plt.show()

Multiple line chart

#print(remittance['country_name'].unique())
demographic_categories = [
    'Early-demographic dividend',
    'Late-demographic dividend',
    'Post-demographic dividend',
    'Pre-demographic dividend'
]

remittance_demo = (
    remittance[['country_name', 'remittance','year' ]]
    .query("country_name in @demographic_categories")  
    .sort_values('year', ascending=False)
)
remittance_demo = remittance_demo.pivot(index='year', columns='country_name', values='remittance')
remittance_demo = remittance_demo.reset_index()
remittance_demo.to_csv('data-raw/line2.csv', index=False)

# import libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
#plt.style.use(['iompyplotstyle','line'])

#compute data array for plotting
df = pd.read_csv('data-raw/line2.csv')



#compute data for plotting
x = df['year']
y1 = df['Early-demographic dividend']
y2 = df['Late-demographic dividend']
y3 = df['Post-demographic dividend']
y4 = df['Pre-demographic dividend'] 

#plot the chart
fig, ax = plt.subplots()
bar_plot = ax.plot(x, y1)
bar_plot = ax.plot(x, y2)
bar_plot = ax.plot(x, y3)
bar_plot = ax.plot(x, y4)

#set chart title
ax.set_title('Remittance Vs Demographic Dividend')

#set y-axis label
ax.set_ylabel('(current US$)')

#set y-axis limit
#ylimit = plt.ylim(0, 60 * 1e6)

#set direct labeling for lines

early_xpos=x.iloc[-1]
early_ypos=y1.iloc[-1]
plt.annotate("Early", (early_xpos,early_ypos),
                 textcoords="offset points",
                 xytext=(0,10), 
                 ha='center')

late_xpos=x.iloc[-1]
late_ypos=y2.iloc[-1]
plt.annotate("Late", (late_xpos,late_ypos),
                 textcoords="offset points",
                 xytext=(0,10), 
                 ha='left')

post_xpos=x.iloc[-1]
post_ypos=y3.iloc[-1]
plt.annotate("Post", (post_xpos,post_ypos),
                 textcoords="offset points",
                 xytext=(0,10), 
                 ha='left')

pre_xpos=x.iloc[-1]
pre_ypos=y4.iloc[-1]
plt.annotate("Pre", (pre_xpos,pre_ypos),
                 textcoords="offset points",
                 xytext=(0,10), 
                 ha='left')                 


#format x-axis tick labels
def number_formatter(x, pos):
    """Format axis tick labels with appropriate units (K, M, B)"""
    if abs(x) >= 1e9:  # Billions
        s = '{:1.1f}B'.format(x*1e-9)
    elif abs(x) >= 1e6:  # Millions
        s = '{:1.1f}M'.format(x*1e-6)
    elif abs(x) >= 1e3:  # Thousands
        s = '{:1.1f}K'.format(x*1e-3)
    else:  # Units
        s = '{:1.0f}'.format(x)
    
    # Remove .0 from whole numbers for cleaner display
    if '.0' in s:
        s = s.replace('.0', '')
    return s
ax.yaxis.set_major_formatter(number_formatter)

#set chart source and copyright
plt.annotate('Source: World Bank Data API', (0,0), (0, -25), xycoords='axes fraction', textcoords='offset points', va='top', color = '#666666', fontsize=9)
plt.annotate(' ', (0,0), (0, -35), xycoords='axes fraction', textcoords='offset points', va='top', color = '#666666', fontsize=9)

#adjust chart margin and layout
fig.tight_layout()

#show chart
plt.show()

Scatterplot

A scatterplot is a type of visualization using Cartesian Coordinates to display two variables for a set of data. The data are displayed as a collection of dots. The position of each dot on the horizontal and vertical axis indicates the values for an individual data point.

The chart with iompyplotstyle + scatterplot styles:

import wbdata
pop_and_perc = wbdata.get_dataframe(
        indicators={'SP.POP.TOTL': 'total_population',
                    'SM.POP.TOTL.ZS': 'percent_migrant',
                    'NY.GDP.PCAP.CD': 'gdp_capita' },
        country='all', 
        date=("1990","2015"), 
        freq='Y', 
        source=None, 
        parse_dates=False, 
        keep_levels=False, 
        skip_cache=False
    )
pop_and_perc = pop_and_perc.reset_index()
pop_and_perc.rename(columns={'country': 'country_name', 'date': 'year'}, inplace=True)
pop_and_perc = pd.merge(pop_and_perc, geo, left_on='country_name', right_on='name', how='left')    
# Clean up - drop rows with missing values and convert to numeric
pop_and_perc = pop_and_perc.dropna(subset=['total_population'])
pop_and_perc = pop_and_perc.dropna(subset=['percent_migrant'])

pop_and_perc_high = (
    pop_and_perc[['country_name', 'percent_migrant', 'total_population',  'incomeLevel', 'region', 'gdp_capita', 'year' ]]
    .query("incomeLevel == 'High income' & year == '2015'")  
    .sort_values('total_population', ascending=False)
    .head(10)
)
pop_and_perc_high.to_csv('data-raw/scatterplot.csv', index=False)

Scatterplot

# import libraries
import matplotlib.pyplot as plt
import pandas as pd
plt.style.use(['iompyplotstyle','scatterplot'])

#load data set
df = pd.read_csv('data-raw/scatterplot.csv')

#compute data array for plotting
x = df['total_population']
y = df['percent_migrant']
label = df['country_name']

#plot the chart
fig, ax = plt.subplots()
ax.scatter(x, y, s=30)

# Loop for annotation of all points
for i in range(len(x)):
    plt.annotate(label[i], (x[i], y[i]), textcoords="offset points", xytext=(0,5), ha='left')

#set chart title
ax.set_title('Migrants and Population for top 10 largest High Income Countries | 2015')

#set axis label
ax.set_xlabel('Total Population (millions)')
ax.set_ylabel('Share of Migrants (%)')

#format axis tick labels
def number_formatter(x, pos):
    if x >= 1e6:
        s = '{:1.0f}M'.format(x*1e-6)
    elif x < 1e6 and x > 0:
        s = '{:1.0f}K'.format(x*1e-3)
    else: 
        s = '{:1.0f}'.format(x)
    return s
ax.xaxis.set_major_formatter(number_formatter)
#ax.yaxis.set_major_formatter(number_formatter)

#set chart source and copyright
plt.annotate('Source: World Bank Data API', (0,0), (0, -40), xycoords='axes fraction', textcoords='offset points', va='top', color = '#666666', fontsize=9)
plt.annotate(' ', (0,0), (0, -50), xycoords='axes fraction', textcoords='offset points', va='top', color = '#666666', fontsize=9)

#adjust chart margin and layout
fig.tight_layout()

#show chart
plt.show()

Scatterplot with colours

# import libraries
import matplotlib.pyplot as plt
import pandas as pd
plt.style.use(['iompyplotstyle','scatterplot'])

#load data set
df = pd.read_csv('data-raw/scatterplot.csv')

#compute data array for plotting
x = df['total_population']
y = df['percent_migrant']
label = df['country_name']
regions = df['region']
# Create a color mapping for regions
unique_regions = regions.unique()
color_palette = ['#00B398', '#E1CC0D', '#589BE5']  # Your chosen colors
region_colors = {region: color_palette[i] for i, region in enumerate(unique_regions)}

# Map each point to its region color
point_colors = [region_colors[region] for region in regions]

#plot the chart
fig, ax = plt.subplots(figsize=(10, 6))
scatter = ax.scatter(x, y, s=100, c=point_colors, alpha=0.7)  # Increased point size and added transparency

# Loop for annotation of all points
for i in range(len(x)):
    plt.annotate(label[i], (x[i], y[i]), textcoords="offset points", xytext=(0,5), ha='left')

#set chart title
ax.set_title('Migrants and Population for top 10 largest High Income Countries | 2015')

#set axis label
ax.set_xlabel('Total Population (millions)')
ax.set_ylabel('Share of Migrants (%)')

#format axis tick labels
def number_formatter(x, pos):
    if x >= 1e6:
        s = '{:1.0f}M'.format(x*1e-6)
    elif x < 1e6 and x > 0:
        s = '{:1.0f}K'.format(x*1e-3)
    else: 
        s = '{:1.0f}'.format(x)
    return s
ax.xaxis.set_major_formatter(number_formatter)
#ax.yaxis.set_major_formatter(number_formatter)

#set chart source and copyright
plt.annotate('Source: World Bank Data API', (0,0), (0, -40), xycoords='axes fraction', textcoords='offset points', va='top', color = '#666666', fontsize=9)
plt.annotate(' ', (0,0), (0, -50), xycoords='axes fraction', textcoords='offset points', va='top', color = '#666666', fontsize=9)

#adjust chart margin and layout
fig.tight_layout()

#show chart
plt.show()

Bubble chart

A bubble chart displays multi-dimensional data in a two-dimensional plot. It can be considered as a variation of the scatterplot, in which the dots are replaced with bubbles. However, unlike a scatterplot which has only two variables defined by the X and Y axis, on a bubble chart each data point (bubble) can be assigned with a third variable (by size of bubble) and a fourth variable (by colour of bubble).

Bubble chart

# import libraries
import matplotlib.pyplot as plt
import pandas as pd
plt.style.use(['iompyplotstyle','bubble'])

#load data set
df = pd.read_csv('data-raw/scatterplot.csv')

#compute data array for plotting
size = df['total_population']
y = df['percent_migrant']
x = df['gdp_capita']
label = df['country_name']

#plot the chart
fig, ax = plt.subplots()
ax.scatter(x, y, s=size/100000)

# Loop for annotation of all points
for i in range(len(x)):
    plt.annotate(label[i], (x[i], y[i]), textcoords="offset points", xytext=(0,10), ha='left')

#set chart title
ax.set_title('Migrants, Population & Wealth for top 10 largest High Income Countries | 2015')

#set axis label
ax.set_xlabel('GDP per capita (USD)')
ax.set_ylabel('Share of Migrants (%)')

 

#set chart source and copyright
plt.annotate('Source: World Bank Data API', (0,0), (0, -40), xycoords='axes fraction', textcoords='offset points', va='top', color = '#666666', fontsize=9)
plt.annotate(' ', (0,0), (0, -50), xycoords='axes fraction', textcoords='offset points', va='top', color = '#666666', fontsize=9)

#adjust chart margin and layout
fig.tight_layout()

#show chart
plt.show()

Bubble chart with colours

# import libraries
import matplotlib.pyplot as plt
import pandas as pd
plt.style.use(['iompyplotstyle','bubble'])

#load data set
df = pd.read_csv('data-raw/scatterplot.csv')

#compute data array for plotting
size = df['total_population']
y = df['percent_migrant']
x = df['gdp_capita']
label = df['country_name']
regions = df['region']
# Create a color mapping for regions
unique_regions = regions.unique()
color_palette = ['#00B398', '#E1CC0D', '#589BE5']  # Your chosen colors
region_colors = {region: color_palette[i] for i, region in enumerate(unique_regions)}

# Map each point to its region color
point_colors = [region_colors[region] for region in regions]
 

#plot the chart
fig, ax = plt.subplots(figsize=(10, 6))
scatter = ax.scatter(x, y, s=size/100000, c=point_colors, alpha=0.7)  # Increased point size and added transparency


# Loop for annotation of all points
for i in range(len(x)):
    plt.annotate(label[i], (x[i], y[i]), textcoords="offset points", xytext=(0,10), ha='left')

#set chart title
ax.set_title('Migrants, Population & Wealth for top 10 largest High Income Countries | 2015')

#set axis label
ax.set_xlabel('GDP per capita (USD)')
ax.set_ylabel('Share of Migrants (%)')

#format axis tick labels
def number_formatter(x, pos):
    if x >= 1e6:
        s = '{:1.0f}M'.format(x*1e-6)
    elif x < 1e6 and x > 0:
        s = '{:1.0f}K'.format(x*1e-3)
    else: 
        s = '{:1.0f}'.format(x)
    return s
ax.xaxis.set_major_formatter(number_formatter)
ax.yaxis.set_major_formatter(number_formatter)

#set chart source and copyright
plt.annotate('Source: World Bank Data API', (0,0), (0, -40), xycoords='axes fraction', textcoords='offset points', va='top', color = '#666666', fontsize=9)
plt.annotate(' ', (0,0), (0, -50), xycoords='axes fraction', textcoords='offset points', va='top', color = '#666666', fontsize=9)

#adjust chart margin and layout
fig.tight_layout()

#show chart
plt.show()

Connected scatterplot

A connected scatterplot is a type of visualization that displays the evolution of a series of data points that are connected by straight line segments. In some cases, it is not the most intuitive to read; but it is impressive for storytelling.

# Single-step calculation without pivoting
change_df = pop_and_perc.groupby('country_name')['percent_migrant'].agg(
    pct_2005 = lambda x: x[pop_and_perc['year'] == '2005'].mean(),
    pct_2015 = lambda x: x[pop_and_perc['year'] == '2015'].mean()
).dropna()

change_df['abs_change'] = change_df['pct_2015'] - change_df['pct_2005']
top_changers = change_df.sort_values('abs_change', key=abs, ascending=False).head(40)
print(top_changers)

                           pct_2005   pct_2015  abs_change
country_name                                              
Northern Mariana Islands  58.320388  39.309969  -19.010419
United Arab Emirates      73.205122  88.404048   15.198926
Lebanon                   18.981932  34.145680   15.163748
Kuwait                    58.902838  73.639551   14.736713
Oman                      26.573154  41.085874   14.512720
Maldives                  14.770402  25.872182   11.101780
Luxembourg                32.897016  43.964134   11.067118
Armenia                   15.559931   6.335893   -9.224038
Liechtenstein             54.223574  62.596254    8.372680
Singapore                 38.050989  45.391792    7.340803
Monaco                    63.038334  55.768466   -7.269868
Norway                     7.809552  14.235611    6.426059
Saudi Arabia              26.275040  32.294942    6.019903
Cyprus                    11.346755  16.834034    5.487280
Switzerland               24.369450  29.386686    5.017236
Cayman Islands            44.536536  39.565094   -4.971442
Bahrain                   46.598786  51.126785    4.527999
Channel Islands           45.976617  50.281626    4.305010
Sweden                    12.466995  16.767559    4.300563
Australia                 24.060186  28.218410    4.158223
Belgium                    8.245678  12.283533    4.037855
Palau                     30.356156  26.602790   -3.753366
Malta                      6.184934   9.898488    3.713555
Austria                   13.798295  17.465726    3.667431
Israel                    28.612892  24.946900   -3.665992
Turks and Caicos Islands  37.599244  34.037101   -3.562143
Latvia                    16.912010  13.353240   -3.558770
United Kingdom             9.842476  13.200978    3.358502
Spain                      9.365519  12.690237    3.324718
South Africa               2.504368   5.767090    3.262722
Seychelles                10.137807  13.258907    3.121099
Gambia, The               12.627539   9.670886   -2.956652
Canada                    18.845865  21.801664    2.955799
Italy                      6.742189   9.680768    2.938578
Brunei Darussalam         27.201987  24.275972   -2.926015
Iceland                    8.590541  11.390150    2.799609
Bermuda                   28.063387  30.846397    2.783010
Kyrgyz Republic            6.116681   3.440796   -2.675885
Gibraltar                 31.674691  34.345221    2.670531
New Zealand               20.314707  22.959700    2.644993

selected_countries = [
    'United Arab Emirates',
    'Sweden',
    'Australia',
    'South Africa'
]
pop_and_perc_high_year = (
    pop_and_perc[['country_name', 'percent_migrant', 'total_population',  'incomeLevel', 'region', 'gdp_capita', 'year' ]]
    .query("country_name in @selected_countries")  
    .sort_values('total_population', ascending=False)
    #.head(5)
)
pop_and_perc_high_year.to_csv('data-raw/scatterplot_connected.csv', index=False)

# Import libraries
import matplotlib.pyplot as plt
import pandas as pd
from matplotlib.ticker import MaxNLocator

plt.style.use(['iompyplotstyle', 'connected_scatterplot'])

# Load dataset
df = pd.read_csv('data-raw/scatterplot_connected.csv')

# Prepare data
size = df['total_population']
x = df['gdp_capita']
y = df['percent_migrant']
labels = df['country_name']
regions = df['region']
years = df['year']

# Define colors for regions
unique_regions = regions.unique()
color_palette = ['#00B398', '#E1CC0D', '#589BE5', '#E84A3A']  # customize colors
region_colors = {region: color_palette[i % len(color_palette)] for i, region in enumerate(unique_regions)}

# Create plot
fig, ax = plt.subplots(figsize=(10, 6))

# Plot each country separately
for country in df['country_name'].unique():
    country_data = df[df['country_name'] == country]
    
    # Scatter for population size
    ax.scatter(country_data['gdp_capita'], country_data['percent_migrant'],
               s=country_data['total_population'] / 1e6,  # Bubble size scaled
               color=region_colors[country_data['region'].iloc[0]],
               alpha=0.7)

    # Line connecting points over time
    ax.plot(country_data['gdp_capita'], country_data['percent_migrant'],
            color=region_colors[country_data['region'].iloc[0]],
            label=country)  # Legend will use country name here

    # Annotate year on each point
    for i in country_data.index:
        ax.annotate(years[i], (x[i], y[i]), textcoords="offset points", xytext=(3, 3), ha='left', fontsize=8)

# Axis Titles & Plot Title
ax.set_title('Evolution of Migrants, Population & Wealth for Selected Countries | 1990-2015', fontsize=14)
ax.set_xlabel('GDP per capita (USD)', fontsize=12)
ax.set_ylabel('Share of Migrants (%)', fontsize=12)

# Axis Ticks Limit
ax.xaxis.set_major_locator(MaxNLocator(4))
ax.yaxis.set_major_locator(MaxNLocator(4))

# Legend with Country Names
ax.legend(title='Country', bbox_to_anchor=(1.05, 1), loc='upper left')

# Chart Source
plt.annotate('Source: World Bank Data API', (0, 0), (0, -40), xycoords='axes fraction',
             textcoords='offset points', va='top', color='#666666', fontsize=9)

# Layout & Save
fig.tight_layout()
#plt.savefig('outputs/connected_scatterplot.png', dpi=300, bbox_inches='tight')

# Show Plot
plt.show()

Heatmap

A heatmap is a type of visualization that values are depicted through variations in colour within a two-dimensional matrix of cells. It allows us to visualize complex data and understand it at a glance.

selected_region = [
    'Europe & Central Asia',    #   France, Kazakhstan, Poland
    'Middle East & North Africa',   #   Egypt, Saudi Arabia, Morocco
    'Latin America & Caribbean',    #Latin America & Caribbean  Brazil, Mexico, Jamaica
    'South Asia', # South Asia  India, Pakistan, Bangladesh
    'Sub-Saharan Africa', # Sub-Saharan Africa  Nigeria, Kenya, South Africa
    'North America', #  North America   USA, Canada
    'East Asia & Pacific' # East Asia & Pacific China, Indonesia, Philippines
]
 

all_migrant_region_2013_2023 = (
    migrant_stock[['country_name', 'Net Migration', 'region', 'incomeLevel','year' ]]
    .query("country_name in @selected_region")  
    .sort_values('year', ascending=False)
)
# Save the top10_migrant_stock DataFrame to CSV
all_migrant_region_2013_2023.to_csv('data-raw/heatmap.csv', index=False)

# import libraries
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from textwrap import wrap
plt.style.use(['iompyplotstyle', 'heatmap'])

#load and reshape data set
df = pd.read_csv('data-raw/heatmap.csv')

# Create pivot table: Region x Year → Net Migration
heatmap_df = df.groupby(['country_name', 'year'])['Net Migration'].sum().reset_index()
heatmap_df = heatmap_df.pivot(index='country_name', columns='year', values='Net Migration')

# Optional: Wrap region names for better display
heatmap_df.index = ['\n'.join(wrap(l, 20)) for l in heatmap_df.index]

# Define number formatter for annotations
def number_formatter(x):
    if x >= 1e6:
        return '{:.1f}M'.format(x * 1e-6)
    elif x >= 1e3:
        return '{:.0f}K'.format(x * 1e-3)
    else:
        return '{:.0f}'.format(x)

# Create formatted annotation df without warnings
annot_df = heatmap_df.apply(lambda col: col.map(lambda x: number_formatter(x) if pd.notnull(x) else ''))


# Plot heatmap
fig, ax = plt.subplots(figsize=(12, 6))

sns.heatmap(
    heatmap_df,
    annot=True,
    linewidths=.5,
    cmap='Blues',  # or try "coolwarm", "YlGnBu"
    fmt=".0f",
    square=True,
    cbar=True
)

# Chart title
ax.set_title('Net Migration by Region (2013-2023)', fontsize=14, weight='bold')

# Remove axis labels (optional)
ax.set_xlabel('')
ax.set_ylabel('')

# Source annotation
plt.annotate('Source: World Bank Data API', (0,0), (0, -30), xycoords='axes fraction', textcoords='offset points', va='top', color = '#666666', fontsize=9)

fig.tight_layout()

#show chart
plt.show()