World Bank Data360 API - Introduction and EDA

The World Bank Data360 platform provides programmatic access to hundreds of development datasets covering GDP, trade, poverty, health, education, and demographic indicators across 200+ countries spanning multiple decades. No API key or authentication is required.

API base URL: https://data360api.worldbank.org/data360/
API documentation: data360.worldbank.org/en/api
Data browser: data360.worldbank.org/en/search

Endpoint	Method	Purpose
`/searchv2`	POST	Search indicators across all datasets
`/indicators`	GET	List all indicators in a dataset
`/data`	GET	Fetch numeric observations
`/metadata`	POST	Get indicator metadata (title, source, notes)
`/disaggregation`	GET	Get available breakdown dimensions

This notebook uses the World Development Indicators (WB_WDI) which is the World Bank's primary dataset, covering over 1,400 time series across all member countries.

In [3]:

import json
import requests
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import pycountry # pip install pycountry

import warnings
warnings.filterwarnings('ignore')

from IPython.display import display, HTML

FL_BLUE   = '#2563eb'
FL_SLATE  = '#64748b'
FL_AMBER  = '#f59e0b'
FL_GREEN  = '#16a34a'
FL_RED    = '#ef4444'
FL_BG     = '#ffffff'
FL_GRID   = '#e2e8f0'
FL_TEXT   = '#0f172a'
FL_TEXT2  = '#334155'
FL_BORDER = '#e2e8f0'

matplotlib.rcParams.update({
    'figure.facecolor': FL_BG,    'axes.facecolor':   FL_BG,
    'axes.edgecolor':   FL_BORDER, 'axes.labelcolor':  FL_TEXT2,
    'axes.spines.top':  False,     'axes.spines.right':False,
    'axes.grid':        True,      'grid.color':       FL_GRID,
    'grid.linewidth':   0.7,       'xtick.color':      FL_TEXT2,
    'ytick.color':      FL_TEXT2,  'xtick.labelsize':  10,
    'ytick.labelsize':  10,        'axes.labelsize':   11,
    'axes.titlesize':   12,        'axes.titlecolor':  FL_TEXT,
    'axes.titlepad':    12,        'legend.frameon':   False,
    'legend.fontsize':  10,        'figure.dpi':       300,
    'savefig.bbox':     'tight',   'font.family':      'sans-serif',
    'font.sans-serif':  ['Inter', 'Helvetica Neue', 'Arial', 'DejaVu Sans'],
})

BASE_URL  = 'https://data360api.worldbank.org/data360'
WDI_DB    = 'WB_WDI'
SESSION   = requests.Session()
SESSION.headers.update({'accept': '*/*', 'Content-Type': 'application/json'})

def search(keyword, top=10):
    r = SESSION.post(f'{BASE_URL}/searchv2', json={
        'count': True,
        'select': 'series_description/idno, series_description/name, series_description/database_id',
        'search': keyword,
        'top': top,
    })
    r.raise_for_status()
    return r.json()

def get_indicators(dataset_id, top=20):
    r = SESSION.get(f'{BASE_URL}/indicators',
                    params={'datasetId': dataset_id, 'top': top})
    r.raise_for_status()
    return r.json()

def get_data(dataset_id, indicator_id, countries=None,
             year_from=None, year_to=None, skip=0, top=5000):
    params = {
        'DATABASE_ID': dataset_id,
        'INDICATOR':   indicator_id,
        'skip':        skip,
        'top':         top,
    }
    if countries:
        params['REF_AREA'] = ','.join(countries) if isinstance(countries, list) else countries
    if year_from:
        params['timePeriodFrom'] = year_from
    if year_to:
        params['timePeriodTo'] = year_to
    r = SESSION.get(f'{BASE_URL}/data', params=params)
    r.raise_for_status()
    return r.json()

def get_metadata(indicator_idno):
    r = SESSION.post(f'{BASE_URL}/metadata', json={
        'query': f"&$filter=series_description/idno eq '{indicator_idno}'"
    })
    r.raise_for_status()
    return r.json()

def get_disaggregation(dataset_id, indicator_id):
    r = SESSION.get(f'{BASE_URL}/disaggregation',
                    params={'datasetId': dataset_id, 'indicatorId': indicator_id})
    r.raise_for_status()
    return r.json()

def to_df(raw, value_col='OBS_VALUE', country_col='REF_AREA',
          time_col='TIME_PERIOD'):
    rows = raw.get('value', raw) if isinstance(raw, dict) else raw
    df = pd.DataFrame(rows)
    if df.empty:
        return df
    if value_col in df.columns:
        df[value_col] = pd.to_numeric(df[value_col], errors='coerce')
    if time_col in df.columns:
        df[time_col] = pd.to_numeric(df[time_col], errors='coerce')
    return df


_test = get_data(WDI_DB, 'WB_WDI_NY_GDP_PCAP_CD',
                 countries=['USA'], year_from=2022, year_to=2022)
_val = _test.get('value', [])
if _val:
    print(f'Connection OK - WDI GDP per capita (USA, 2022): ${float(_val[0]["OBS_VALUE"]):,.0f}')
else:
    print('Connected - no value returned for test query')

Connection OK - WDI GDP per capita (USA, 2022): $76,657

API structure

The Data360 API is organised around datasets (e.g. WB_WDI, WHO_GHO) containing indicators (e.g. WB_WDI_NY_GDP_PCAP_CD). Data is filtered by dimensions: REF_AREA (country ISO3 code), TIME_PERIOD (year), FREQ (frequency), and optional breakdowns (SEX, AGE, URBANISATION).

The indicator ID pattern for WDI is WB_WDI_ + the classic WDI series code (e.g. NY.GDP.PCAP.CD → NY_GDP_PCAP_CD). Discover series IDs via the search endpoint or the data browser.

In [4]:

# Search for GDP-related indicators across all datasets
results = search('GDP per capita', top=8)
print(f'Total matches: {results.get("@odata.count", "?")}\n')

hits = results.get('value', [])

def pick_search_fields(h):
    sd = h.get('series_description', {}) if isinstance(h.get('series_description'), dict) else {}
    return {
        'Indicator ID': h.get('series_description/idno') or sd.get('idno') or h.get('idno', ''),
        'Name': h.get('series_description/name') or sd.get('name') or h.get('name', ''),
        'Dataset': h.get('series_description/database_id') or sd.get('database_id') or h.get('database_id', ''),
    }

search_df = pd.DataFrame([pick_search_fields(h) for h in hits])
display(search_df[['Name','Dataset']].head())

Total matches: 1859

	Name	Dataset
0	GDP per capita, PPP	WB_SSGD
1	Log of GDP per capita, PPP	WB_SSGD
2	Government expenditure per student, tertiary (...	WB_WDI
3	Survey mean consumption or income per capita, ...	WB_WDI
4	Government expenditure per student, secondary ...	WB_WDI

In [5]:

print('Sample WDI indicators:')

wdi_indicators = get_indicators(WDI_DB, top=12)

ind_df = pd.DataFrame({
    'Indicator ID': wdi_indicators
})
display(ind_df.head())

Sample WDI indicators:

	Indicator ID
0	WB_WDI_AG_LND_EL5M_RU_K2
1	WB_WDI_AG_LND_TOTL_RU_K2
2	WB_WDI_AG_PRD_CROP_XD
3	WB_WDI_AG_PRD_FOOD_XD
4	WB_WDI_AG_PRD_LVSK_XD

Indicator metadata

The metadata endpoint returns the full descriptor for any indicator: title, long definition, source, periodicity, statistical concept, and more.

In [6]:

# Metadata for total population
meta_raw = get_metadata('WB_WDI_SP_POP_TOTL')
meta_items = meta_raw.get('value', [])


item = meta_items[0]
desc = item.get('series_description', {})
meta_info = item.get('metadata_information', {})

periodicity = desc.get('periodicity', '')
if isinstance(periodicity, dict):
    periodicity = periodicity.get('period', '')

producers = meta_info.get('producers', [])
producer_name = producers[0].get('name', '') if producers else ''

fields = [
    ('Indicator ID', desc.get('idno', '')),
    ('Name', desc.get('name', '')),
    ('Dataset', desc.get('database_id', '')),
    ('Database name', desc.get('database_name', '')),
    ('Periodicity', periodicity),
    ('Producer', producer_name),
    (
        'Time coverage',
        f"{desc.get('time_periods', [{}])[0].get('start', '')} - "
        f"{desc.get('time_periods', [{}])[0].get('end', '')}"
        if desc.get('time_periods') else ''
    ),
    ('Statistical concept', str(desc.get('statistical_concept', '') or '')[:120]),
    ('Limitation', str(desc.get('limitation', '') or '')[:120]),
    ('Aggregation method', desc.get('aggregation_method', '')),
    ('License', desc.get('license', [{}])[0].get('name', '') if desc.get('license') else ''),
]

meta_df = pd.DataFrame(fields, columns=['Field', 'Value'])
display(meta_df)

	Field	Value
0	Indicator ID	WB_WDI_SP_POP_TOTL
1	Name	Population, total
2	Dataset	WB_WDI
3	Database name	World Development Indicators (WDI)
4	Periodicity	Annual
5	Producer	Development Economics Data Group
6	Time coverage	1960 - 2024
7	Statistical concept	Estimates of total population describe the siz...
8	Limitation	Current population estimates for developing co...
9	Aggregation method	Sum
10	License	CC BY-4.0

Population

Get world population and the most populous countries

Indicator ID	Description
`WB_WDI_SP_POP_TOTL`	Population, total
`WB_WDI_SP_POP_GROW`	Population growth (% annual)

In [7]:

# World total population over time
pop_raw = get_data(
    WDI_DB,
    'WB_WDI_SP_POP_TOTL',
    countries=['WLD'],
    year_from=1960,
    year_to=2023
)

pop_grow_raw = get_data(
    WDI_DB,
    'WB_WDI_SP_POP_GROW',
    countries=['WLD'],
    year_from=1960,
    year_to=2023
)

pop_df = (
    to_df(pop_raw)
    .dropna(subset=['TIME_PERIOD', 'OBS_VALUE'])
    .sort_values('TIME_PERIOD')
    .drop_duplicates(subset=['TIME_PERIOD'])
    .copy()
)

grow_df = (
    to_df(pop_grow_raw)
    .dropna(subset=['TIME_PERIOD', 'OBS_VALUE'])
    .sort_values('TIME_PERIOD')
    .drop_duplicates(subset=['TIME_PERIOD'])
    .copy()
)

pop_countries_raw = get_data(
    WDI_DB,
    'WB_WDI_SP_POP_TOTL',
    year_from=2022,
    year_to=2022
)

pop_c_df = (
    to_df(pop_countries_raw)
    .dropna(subset=['REF_AREA', 'OBS_VALUE'])
    .copy()
)

valid_iso3 = {c.alpha_3 for c in pycountry.countries}
pop_c_df = pop_c_df[pop_c_df['REF_AREA'].isin(valid_iso3)].copy()

top10 = (
    pop_c_df.nlargest(10, 'OBS_VALUE')[['REF_AREA', 'OBS_VALUE']]
    .rename(columns={'REF_AREA': 'Country', 'OBS_VALUE': 'Population'})
    .sort_values('Population', ascending=True)
    .copy()
)

plt.figure(figsize=(8, 4.5))
plt.fill_between(
    pop_df['TIME_PERIOD'],
    pop_df['OBS_VALUE'] / 1e9,
    alpha=0.12,
    color=FL_BLUE
)
plt.plot(
    pop_df['TIME_PERIOD'],
    pop_df['OBS_VALUE'] / 1e9,
    color=FL_BLUE,
    linewidth=1.8
)
plt.xlabel('Year')
plt.ylabel('Population (billions)')
plt.title('World population')
plt.tick_params(length=0)
plt.tight_layout()
plt.show()

plt.figure(figsize=(8, 4.5))
plt.fill_between(
    grow_df['TIME_PERIOD'],
    grow_df['OBS_VALUE'],
    alpha=0.12,
    color=FL_SLATE
)
plt.plot(
    grow_df['TIME_PERIOD'],
    grow_df['OBS_VALUE'],
    color=FL_SLATE,
    linewidth=1.4
)
plt.axhline(0, color=FL_BORDER, linewidth=0.8)
plt.xlabel('Year')
plt.ylabel('Growth rate (%)')
plt.title('World population growth rate')
plt.gca().yaxis.set_major_formatter(
    mticker.FuncFormatter(lambda y, _: f'{y:.1f}%')
)
plt.tick_params(length=0)
plt.tight_layout()
plt.show()

plt.figure(figsize=(8, 4.5))
plt.barh(
    top10['Country'],
    top10['Population'] / 1e9,
    alpha=0.82,
    height=0.6
)
plt.xlabel('Population (billions)')
plt.title('10 most populous countries (2022)')
plt.tick_params(length=0)
plt.tight_layout()
plt.show()

No description has been provided for this image

GDP and economic output

GDP per capita is the primary cross-country wealth comparison metric in the WDI. GDP growth rate shows cyclical economic dynamics.

Indicator ID	Description
`WB_WDI_NY_GDP_PCAP_CD`	GDP per capita, current USD
`WB_WDI_NY_GDP_MKTP_KD_ZG`	GDP growth (% annual)

In [8]:

# GDP per capita for G7 countries
G7 = ['USA', 'GBR', 'DEU', 'FRA', 'JPN', 'ITA', 'CAN']
G7_COLORS = [FL_BLUE, FL_RED, FL_AMBER, FL_GREEN, FL_SLATE, '#8b5cf6', '#ec4899']

gdppc_raw = get_data(
    WDI_DB,
    'WB_WDI_NY_GDP_PCAP_CD',
    countries=G7,
    year_from=1990,
    year_to=2023
)

gdpgrow_raw = get_data(
    WDI_DB,
    'WB_WDI_NY_GDP_MKTP_KD_ZG',
    countries=['WLD', 'USA', 'CHN', 'DEU', 'IND'],
    year_from=1990,
    year_to=2023
)

gdppc_df = (
    to_df(gdppc_raw)
    .dropna(subset=['REF_AREA', 'TIME_PERIOD', 'OBS_VALUE'])
    .sort_values(['REF_AREA', 'TIME_PERIOD'])
    .drop_duplicates(subset=['REF_AREA', 'TIME_PERIOD'])
    .copy()
)

gdpgrow_df = (
    to_df(gdpgrow_raw)
    .dropna(subset=['REF_AREA', 'TIME_PERIOD', 'OBS_VALUE'])
    .sort_values(['REF_AREA', 'TIME_PERIOD'])
    .drop_duplicates(subset=['REF_AREA', 'TIME_PERIOD'])
    .copy()
)

plt.figure(figsize=(8, 4.5))
for country, color in zip(G7, G7_COLORS):
    sub = gdppc_df[gdppc_df['REF_AREA'] == country]
    if not sub.empty:
        plt.plot(
            sub['TIME_PERIOD'],
            sub['OBS_VALUE'] / 1000,
            color=color,
            linewidth=1.4,
            label=country
        )

plt.xlabel('Year')
plt.ylabel('GDP per capita ($000s)')
plt.title('GDP per capita - G7 countries')
plt.gca().yaxis.set_major_formatter(
    mticker.FuncFormatter(lambda y, _: f'${y:.0f}k')
)
plt.legend(fontsize=9, ncol=2)
plt.tick_params(length=0)
plt.tight_layout()
plt.show()

grow_countries = ['WLD', 'USA', 'CHN', 'DEU', 'IND']
grow_colors = [FL_SLATE, FL_BLUE, FL_RED, FL_AMBER, FL_GREEN]

plt.figure(figsize=(8, 4.5))
for country, color in zip(grow_countries, grow_colors):
    sub = gdpgrow_df[gdpgrow_df['REF_AREA'] == country]
    if not sub.empty:
        plt.plot(
            sub['TIME_PERIOD'],
            sub['OBS_VALUE'],
            color=color,
            linewidth=1.3,
            label=country
        )

plt.axhline(0, color=FL_BORDER, linewidth=0.8)
plt.xlabel('Year')
plt.ylabel('GDP growth (%)')
plt.title('GDP growth rate - selected economies')
plt.gca().yaxis.set_major_formatter(
    mticker.FuncFormatter(lambda y, _: f'{y:.1f}%')
)
plt.legend(fontsize=9)
plt.tick_params(length=0)
plt.tight_layout()
plt.show()

latest_gdp = get_data(
    WDI_DB,
    'WB_WDI_NY_GDP_PCAP_CD',
    countries=G7,
    year_from=2022,
    year_to=2022
)

lat_df = (
    to_df(latest_gdp)[['REF_AREA', 'OBS_VALUE']]
    .dropna(subset=['REF_AREA', 'OBS_VALUE'])
    .drop_duplicates(subset=['REF_AREA'])
    .rename(columns={
        'REF_AREA': 'Country',
        'OBS_VALUE': 'GDP per capita (USD, 2022)'
    })
    .sort_values('GDP per capita (USD, 2022)', ascending=False)
    .copy()
)

lat_df['GDP per capita (USD, 2022)'] = lat_df['GDP per capita (USD, 2022)'].map(lambda v: f'${v:,.0f}')
display(lat_df)

	Country	GDP per capita (USD, 2022)
4	USA	$76,657
0	CAN	$56,257
6	DEU	$50,507
3	GBR	$47,057
5	FRA	$40,989
1	ITA	$35,654
2	JPN	$34,066

Trade

Exports and imports as a share of GDP reveal how open an economy is to international trade. Trade openness is a key variable in development economics.

Indicator ID	Description
`WB_WDI_NE_EXP_GNFS_ZS`	Exports of goods and services (% of GDP)
`WB_WDI_NE_IMP_GNFS_ZS`	Imports of goods and services (% of GDP)
`WB_WDI_TG_VAL_TOTL_GD_ZS`	Merchandise trade (% of GDP)

In [9]:

TRADE_COUNTRIES = ['USA', 'CHN', 'DEU', 'JPN', 'KOR']
TRADE_COLORS = [FL_BLUE, FL_RED, FL_AMBER, FL_SLATE, FL_GREEN]

exp_raw = get_data(
    WDI_DB,
    'WB_WDI_NE_EXP_GNFS_ZS',
    countries=TRADE_COUNTRIES,
    year_from=1990,
    year_to=2023
)

imp_raw = get_data(
    WDI_DB,
    'WB_WDI_NE_IMP_GNFS_ZS',
    countries=TRADE_COUNTRIES,
    year_from=1990,
    year_to=2023
)

merch_raw = get_data(
    WDI_DB,
    'WB_WDI_TG_VAL_TOTL_GD_ZS',
    countries=TRADE_COUNTRIES,
    year_from=1990,
    year_to=2023
)

exp_df = (
    to_df(exp_raw)
    .dropna(subset=['REF_AREA', 'TIME_PERIOD', 'OBS_VALUE'])
    .sort_values(['REF_AREA', 'TIME_PERIOD'])
    .drop_duplicates(subset=['REF_AREA', 'TIME_PERIOD'])
    .copy()
)

imp_df = (
    to_df(imp_raw)
    .dropna(subset=['REF_AREA', 'TIME_PERIOD', 'OBS_VALUE'])
    .sort_values(['REF_AREA', 'TIME_PERIOD'])
    .drop_duplicates(subset=['REF_AREA', 'TIME_PERIOD'])
    .copy()
)

merch_df = (
    to_df(merch_raw)
    .dropna(subset=['REF_AREA', 'TIME_PERIOD', 'OBS_VALUE'])
    .sort_values(['REF_AREA', 'TIME_PERIOD'])
    .drop_duplicates(subset=['REF_AREA', 'TIME_PERIOD'])
    .copy()
)

plt.figure(figsize=(8, 4.5))
for country, color in zip(TRADE_COUNTRIES, TRADE_COLORS):
    exp_sub = exp_df[exp_df['REF_AREA'] == country]
    if not exp_sub.empty:
        plt.plot(
            exp_sub['TIME_PERIOD'],
            exp_sub['OBS_VALUE'],
            color=color,
            linewidth=1.3,
            label=country
        )

plt.xlabel('Year')
plt.ylabel('% of GDP')
plt.title('Exports of goods and services (% GDP)')
plt.gca().yaxis.set_major_formatter(
    mticker.FuncFormatter(lambda y, _: f'{y:.0f}%')
)
plt.legend(fontsize=9)
plt.tick_params(length=0)
plt.tight_layout()
plt.show()

plt.figure(figsize=(8, 4.5))
for country, color in zip(TRADE_COUNTRIES, TRADE_COLORS):
    imp_sub = imp_df[imp_df['REF_AREA'] == country]
    if not imp_sub.empty:
        plt.plot(
            imp_sub['TIME_PERIOD'],
            imp_sub['OBS_VALUE'],
            color=color,
            linewidth=1.3,
            label=country
        )

plt.xlabel('Year')
plt.ylabel('% of GDP')
plt.title('Imports of goods and services (% GDP)')
plt.gca().yaxis.set_major_formatter(
    mticker.FuncFormatter(lambda y, _: f'{y:.0f}%')
)
plt.legend(fontsize=9)
plt.tick_params(length=0)
plt.tight_layout()
plt.show()

Poverty

The World Bank's international poverty lines measure the share of the population living below $2.15 per day (2017 PPP). Regional and income group aggregates show global progress over time.

Indicator ID	Description
`WB_WDI_SI_POV_DDAY`	Poverty headcount ratio at $2.15/day (% of population)
`WB_WDI_SI_POV_LMIC`	Poverty headcount ratio at $3.65/day (% of population)

In [10]:

REGIONS = ['EAS', 'SAS', 'SSF', 'LAC', 'MNA', 'ECS']
REG_LABELS = {
    'EAS': 'East Asia',
    'SAS': 'South Asia',
    'SSF': 'Sub-Saharan Africa',
    'LAC': 'Latin America',
    'MNA': 'Middle East & N. Africa',
    'ECS': 'Europe & C. Asia'
}
REG_COLORS = [FL_BLUE, FL_GREEN, FL_RED, FL_AMBER, FL_SLATE, '#8b5cf6']

pov_raw = get_data(
    WDI_DB,
    'WB_WDI_SI_POV_DDAY',
    countries=REGIONS,
    year_from=1990,
    year_to=2022
)

pov2_raw = get_data(
    WDI_DB,
    'WB_WDI_SI_POV_LMIC',
    countries=['WLD'],
    year_from=1990,
    year_to=2022
)

pov_df = (
    to_df(pov_raw)
    .dropna(subset=['REF_AREA', 'TIME_PERIOD', 'OBS_VALUE'])
    .sort_values(['REF_AREA', 'TIME_PERIOD'])
    .drop_duplicates(subset=['REF_AREA', 'TIME_PERIOD'])
    .copy()
)

pov2_df = (
    to_df(pov2_raw)
    .dropna(subset=['REF_AREA', 'TIME_PERIOD', 'OBS_VALUE'])
    .sort_values(['REF_AREA', 'TIME_PERIOD'])
    .drop_duplicates(subset=['REF_AREA', 'TIME_PERIOD'])
    .copy()
)

plt.figure(figsize=(8, 4.5))
for region, color in zip(REGIONS, REG_COLORS):
    sub = pov_df[pov_df['REF_AREA'] == region]
    if not sub.empty:
        plt.plot(
            sub['TIME_PERIOD'],
            sub['OBS_VALUE'],
            color=color,
            linewidth=1.5,
            marker='o',
            markersize=3,
            label=REG_LABELS.get(region, region)
        )

plt.xlabel('Year')
plt.ylabel('Population below $2.15/day (%)')
plt.title('Extreme poverty headcount by region')
plt.gca().yaxis.set_major_formatter(
    mticker.FuncFormatter(lambda y, _: f'{y:.0f}%')
)
plt.legend(fontsize=8)
plt.tick_params(length=0)
plt.tight_layout()
plt.show()

plt.figure(figsize=(8, 4.5))
plt.fill_between(
    pov2_df['TIME_PERIOD'],
    pov2_df['OBS_VALUE'],
    alpha=0.12,
    color=FL_AMBER
)
plt.plot(
    pov2_df['TIME_PERIOD'],
    pov2_df['OBS_VALUE'],
    color=FL_AMBER,
    linewidth=1.8,
    marker='o',
    markersize=4
)
plt.xlabel('Year')
plt.ylabel('Population below $3.65/day (%)')
plt.title('Global poverty headcount at $3.65/day (World)')
plt.gca().yaxis.set_major_formatter(
    mticker.FuncFormatter(lambda y, _: f'{y:.0f}%')
)
plt.tick_params(length=0)
plt.tight_layout()
plt.show()

Health

Life expectancy and mortality are the primary health outcome indicators.

Indicator ID	Description
`WB_WDI_SP_DYN_LE00_IN`	Life expectancy at birth, total (years)
`WB_WDI_SH_DYN_MORT`	Under-5 mortality rate (per 1,000 live births)

In [11]:

HEALTH_COUNTRIES = ['WLD', 'USA', 'CHN', 'IND', 'NGA', 'BRA']
H_COLORS = [FL_SLATE, FL_BLUE, FL_RED, FL_GREEN, FL_AMBER, '#8b5cf6']

life_raw = get_data(
    WDI_DB,
    'WB_WDI_SP_DYN_LE00_IN',
    countries=HEALTH_COUNTRIES,
    year_from=1960,
    year_to=2022
)

mort_raw = get_data(
    WDI_DB,
    'WB_WDI_SH_DYN_MORT',
    countries=HEALTH_COUNTRIES,
    year_from=1960,
    year_to=2022
)

life_df = (
    to_df(life_raw)
    .dropna(subset=['REF_AREA', 'TIME_PERIOD', 'OBS_VALUE'])
    .sort_values(['REF_AREA', 'TIME_PERIOD'])
    .drop_duplicates(subset=['REF_AREA', 'TIME_PERIOD'])
    .copy()
)

mort_df = (
    to_df(mort_raw)
    .dropna(subset=['REF_AREA', 'TIME_PERIOD', 'OBS_VALUE'])
    .sort_values(['REF_AREA', 'TIME_PERIOD'])
    .drop_duplicates(subset=['REF_AREA', 'TIME_PERIOD'])
    .copy()
)

plt.figure(figsize=(8, 4.5))
for country, color in zip(HEALTH_COUNTRIES, H_COLORS):
    l_sub = life_df[life_df['REF_AREA'] == country]
    if not l_sub.empty:
        plt.plot(
            l_sub['TIME_PERIOD'],
            l_sub['OBS_VALUE'],
            color=color,
            linewidth=1.3,
            label=country
        )

plt.xlabel('Year')
plt.ylabel('Years')
plt.title('Life expectancy at birth')
plt.legend(fontsize=9)
plt.tick_params(length=0)
plt.tight_layout()
plt.show()

plt.figure(figsize=(8, 4.5))
for country, color in zip(HEALTH_COUNTRIES, H_COLORS):
    m_sub = mort_df[mort_df['REF_AREA'] == country]
    if not m_sub.empty:
        plt.plot(
            m_sub['TIME_PERIOD'],
            m_sub['OBS_VALUE'],
            color=color,
            linewidth=1.3,
            label=country
        )

plt.xlabel('Year')
plt.ylabel('Deaths per 1,000 live births')
plt.title('Under-5 mortality rate')
plt.legend(fontsize=9)
plt.tick_params(length=0)
plt.tight_layout()
plt.show()

# print('Available disaggregation dimensions for WB_WDI_SP_DYN_LE00_IN:')
# disagg = get_disaggregation(WDI_DB, 'WB_WDI_SP_DYN_LE00_IN')
# print(json.dumps(disagg, indent=2)[:600])

Education

School enrolment ratios and literacy rates.

Indicator ID	Description
`WB_WDI_SE_PRM_NENR`	School enrolment, primary (% net)
`WB_WDI_SE_TER_ENRR`	School enrolment, tertiary (% gross)
`WB_WDI_SE_ADT_LITR_ZS`	Literacy rate, adult total (% 15+ years)

In [12]:

EDU_COUNTRIES = ['WLD', 'USA', 'CHN', 'IND', 'NGA', 'BRA']
E_COLORS = [FL_SLATE, FL_BLUE, FL_RED, FL_GREEN, FL_AMBER, '#8b5cf6']

prim_raw = get_data(
    WDI_DB,
    'WB_WDI_SE_PRM_NENR',
    countries=EDU_COUNTRIES,
    year_from=1990,
    year_to=2022
)

tert_raw = get_data(
    WDI_DB,
    'WB_WDI_SE_TER_ENRR',
    countries=EDU_COUNTRIES,
    year_from=1990,
    year_to=2022
)

prim_df = (
    to_df(prim_raw)
    .dropna(subset=['REF_AREA', 'TIME_PERIOD', 'OBS_VALUE'])
    .sort_values(['REF_AREA', 'TIME_PERIOD'])
    .drop_duplicates(subset=['REF_AREA', 'TIME_PERIOD'])
    .copy()
)

tert_df = (
    to_df(tert_raw)
    .dropna(subset=['REF_AREA', 'TIME_PERIOD', 'OBS_VALUE'])
    .sort_values(['REF_AREA', 'TIME_PERIOD'])
    .drop_duplicates(subset=['REF_AREA', 'TIME_PERIOD'])
    .copy()
)

plt.figure(figsize=(8, 4.5))
for country, color in zip(EDU_COUNTRIES, E_COLORS):
    p_sub = prim_df[prim_df['REF_AREA'] == country]
    if not p_sub.empty:
        plt.plot(
            p_sub['TIME_PERIOD'],
            p_sub['OBS_VALUE'],
            color=color,
            linewidth=1.3,
            label=country
        )

plt.xlabel('Year')
plt.ylabel('Net enrolment (%)')
plt.title('Primary school enrolment (% net)')
plt.gca().yaxis.set_major_formatter(
    mticker.FuncFormatter(lambda y, _: f'{y:.0f}%')
)
plt.legend(fontsize=9)
plt.tick_params(length=0)
plt.tight_layout()
plt.show()

plt.figure(figsize=(8, 4.5))
for country, color in zip(EDU_COUNTRIES, E_COLORS):
    t_sub = tert_df[tert_df['REF_AREA'] == country]
    if not t_sub.empty:
        plt.plot(
            t_sub['TIME_PERIOD'],
            t_sub['OBS_VALUE'],
            color=color,
            linewidth=1.3,
            label=country
        )

plt.xlabel('Year')
plt.ylabel('Gross enrolment (%)')
plt.title('Tertiary school enrolment (% gross)')
plt.gca().yaxis.set_major_formatter(
    mticker.FuncFormatter(lambda y, _: f'{y:.0f}%')
)
plt.legend(fontsize=9)
plt.tick_params(length=0)
plt.tight_layout()
plt.show()

lit_raw = get_data(
    WDI_DB,
    'WB_WDI_SE_ADT_LITR_ZS',
    countries=['WLD', 'SSF', 'SAS'],
    year_from=2010,
    year_to=2022
)

lit_df = (
    to_df(lit_raw)
    .dropna(subset=['REF_AREA', 'TIME_PERIOD', 'OBS_VALUE'])[['REF_AREA', 'TIME_PERIOD', 'OBS_VALUE']]
    .rename(columns={
        'REF_AREA': 'Region',
        'TIME_PERIOD': 'Year',
        'OBS_VALUE': 'Adult literacy rate (%)'
    })
    .sort_values(['Region', 'Year'])
    .copy()
)

lit_df['Year'] = lit_df['Year'].astype(int)
lit_df['Adult literacy rate (%)'] = lit_df['Adult literacy rate (%)'].map(lambda v: f'{v:.1f}%')

print('Adult literacy rate - WLD / Sub-Saharan Africa / South Asia')
display(lit_df.tail(10))

Adult literacy rate - WLD / Sub-Saharan Africa / South Asia

	Region	Year	Adult literacy rate (%)
3	WLD	2013	85.2%
15	WLD	2014	85.6%
32	WLD	2015	86.0%
10	WLD	2016	86.3%
20	WLD	2017	86.5%
26	WLD	2018	86.8%
23	WLD	2019	86.8%
38	WLD	2020	87.0%
6	WLD	2021	87.2%
36	WLD	2022	87.4%