Ctrl K

World Bank Data360 API - Introduction and EDA

The World Bank Data360 platform provides programmatic access to hundreds of development datasets covering GDP, trade, poverty, health, education, and demographic indicators across 200+ countries spanning multiple decades. No API key or authentication is required.

API base URL: https://data360api.worldbank.org/data360/
API documentation: data360.worldbank.org/en/api
Data browser: data360.worldbank.org/en/search

Endpoint Method Purpose
/searchv2 POST Search indicators across all datasets
/indicators GET List all indicators in a dataset
/data GET Fetch numeric observations
/metadata POST Get indicator metadata (title, source, notes)
/disaggregation GET Get available breakdown dimensions

This notebook uses the World Development Indicators (WB_WDI) which is the World Bank's primary dataset, covering over 1,400 time series across all member countries.

In [3]:
import json
import requests
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import pycountry # pip install pycountry

import warnings
warnings.filterwarnings('ignore')

from IPython.display import display, HTML

FL_BLUE   = '#2563eb'
FL_SLATE  = '#64748b'
FL_AMBER  = '#f59e0b'
FL_GREEN  = '#16a34a'
FL_RED    = '#ef4444'
FL_BG     = '#ffffff'
FL_GRID   = '#e2e8f0'
FL_TEXT   = '#0f172a'
FL_TEXT2  = '#334155'
FL_BORDER = '#e2e8f0'

matplotlib.rcParams.update({
    'figure.facecolor': FL_BG,    'axes.facecolor':   FL_BG,
    'axes.edgecolor':   FL_BORDER, 'axes.labelcolor':  FL_TEXT2,
    'axes.spines.top':  False,     'axes.spines.right':False,
    'axes.grid':        True,      'grid.color':       FL_GRID,
    'grid.linewidth':   0.7,       'xtick.color':      FL_TEXT2,
    'ytick.color':      FL_TEXT2,  'xtick.labelsize':  10,
    'ytick.labelsize':  10,        'axes.labelsize':   11,
    'axes.titlesize':   12,        'axes.titlecolor':  FL_TEXT,
    'axes.titlepad':    12,        'legend.frameon':   False,
    'legend.fontsize':  10,        'figure.dpi':       300,
    'savefig.bbox':     'tight',   'font.family':      'sans-serif',
    'font.sans-serif':  ['Inter', 'Helvetica Neue', 'Arial', 'DejaVu Sans'],
})

BASE_URL  = 'https://data360api.worldbank.org/data360'
WDI_DB    = 'WB_WDI'
SESSION   = requests.Session()
SESSION.headers.update({'accept': '*/*', 'Content-Type': 'application/json'})

def search(keyword, top=10):
    r = SESSION.post(f'{BASE_URL}/searchv2', json={
        'count': True,
        'select': 'series_description/idno, series_description/name, series_description/database_id',
        'search': keyword,
        'top': top,
    })
    r.raise_for_status()
    return r.json()

def get_indicators(dataset_id, top=20):
    r = SESSION.get(f'{BASE_URL}/indicators',
                    params={'datasetId': dataset_id, 'top': top})
    r.raise_for_status()
    return r.json()

def get_data(dataset_id, indicator_id, countries=None,
             year_from=None, year_to=None, skip=0, top=5000):
    params = {
        'DATABASE_ID': dataset_id,
        'INDICATOR':   indicator_id,
        'skip':        skip,
        'top':         top,
    }
    if countries:
        params['REF_AREA'] = ','.join(countries) if isinstance(countries, list) else countries
    if year_from:
        params['timePeriodFrom'] = year_from
    if year_to:
        params['timePeriodTo'] = year_to
    r = SESSION.get(f'{BASE_URL}/data', params=params)
    r.raise_for_status()
    return r.json()

def get_metadata(indicator_idno):
    r = SESSION.post(f'{BASE_URL}/metadata', json={
        'query': f"&$filter=series_description/idno eq '{indicator_idno}'"
    })
    r.raise_for_status()
    return r.json()

def get_disaggregation(dataset_id, indicator_id):
    r = SESSION.get(f'{BASE_URL}/disaggregation',
                    params={'datasetId': dataset_id, 'indicatorId': indicator_id})
    r.raise_for_status()
    return r.json()

def to_df(raw, value_col='OBS_VALUE', country_col='REF_AREA',
          time_col='TIME_PERIOD'):
    rows = raw.get('value', raw) if isinstance(raw, dict) else raw
    df = pd.DataFrame(rows)
    if df.empty:
        return df
    if value_col in df.columns:
        df[value_col] = pd.to_numeric(df[value_col], errors='coerce')
    if time_col in df.columns:
        df[time_col] = pd.to_numeric(df[time_col], errors='coerce')
    return df


_test = get_data(WDI_DB, 'WB_WDI_NY_GDP_PCAP_CD',
                 countries=['USA'], year_from=2022, year_to=2022)
_val = _test.get('value', [])
if _val:
    print(f'Connection OK - WDI GDP per capita (USA, 2022): ${float(_val[0]["OBS_VALUE"]):,.0f}')
else:
    print('Connected - no value returned for test query')
Connection OK - WDI GDP per capita (USA, 2022): $76,657

API structure

The Data360 API is organised around datasets (e.g. WB_WDI, WHO_GHO) containing indicators (e.g. WB_WDI_NY_GDP_PCAP_CD). Data is filtered by dimensions: REF_AREA (country ISO3 code), TIME_PERIOD (year), FREQ (frequency), and optional breakdowns (SEX, AGE, URBANISATION).

The indicator ID pattern for WDI is WB_WDI_ + the classic WDI series code (e.g. NY.GDP.PCAP.CD → NY_GDP_PCAP_CD). Discover series IDs via the search endpoint or the data browser.

In [4]:
# Search for GDP-related indicators across all datasets
results = search('GDP per capita', top=8)
print(f'Total matches: {results.get("@odata.count", "?")}\n')

hits = results.get('value', [])

def pick_search_fields(h):
    sd = h.get('series_description', {}) if isinstance(h.get('series_description'), dict) else {}
    return {
        'Indicator ID': h.get('series_description/idno') or sd.get('idno') or h.get('idno', ''),
        'Name': h.get('series_description/name') or sd.get('name') or h.get('name', ''),
        'Dataset': h.get('series_description/database_id') or sd.get('database_id') or h.get('database_id', ''),
    }

search_df = pd.DataFrame([pick_search_fields(h) for h in hits])
display(search_df[['Name','Dataset']].head())
Total matches: 1859

Name Dataset
0 GDP per capita, PPP WB_SSGD
1 Log of GDP per capita, PPP WB_SSGD
2 Government expenditure per student, tertiary (... WB_WDI
3 Survey mean consumption or income per capita, ... WB_WDI
4 Government expenditure per student, secondary ... WB_WDI
In [5]:
print('Sample WDI indicators:')

wdi_indicators = get_indicators(WDI_DB, top=12)

ind_df = pd.DataFrame({
    'Indicator ID': wdi_indicators
})
display(ind_df.head())
Sample WDI indicators:
Indicator ID
0 WB_WDI_AG_LND_EL5M_RU_K2
1 WB_WDI_AG_LND_TOTL_RU_K2
2 WB_WDI_AG_PRD_CROP_XD
3 WB_WDI_AG_PRD_FOOD_XD
4 WB_WDI_AG_PRD_LVSK_XD

Indicator metadata

The metadata endpoint returns the full descriptor for any indicator: title, long definition, source, periodicity, statistical concept, and more.

In [6]:
# Metadata for total population
meta_raw = get_metadata('WB_WDI_SP_POP_TOTL')
meta_items = meta_raw.get('value', [])


item = meta_items[0]
desc = item.get('series_description', {})
meta_info = item.get('metadata_information', {})

periodicity = desc.get('periodicity', '')
if isinstance(periodicity, dict):
    periodicity = periodicity.get('period', '')

producers = meta_info.get('producers', [])
producer_name = producers[0].get('name', '') if producers else ''

fields = [
    ('Indicator ID', desc.get('idno', '')),
    ('Name', desc.get('name', '')),
    ('Dataset', desc.get('database_id', '')),
    ('Database name', desc.get('database_name', '')),
    ('Periodicity', periodicity),
    ('Producer', producer_name),
    (
        'Time coverage',
        f"{desc.get('time_periods', [{}])[0].get('start', '')} - "
        f"{desc.get('time_periods', [{}])[0].get('end', '')}"
        if desc.get('time_periods') else ''
    ),
    ('Statistical concept', str(desc.get('statistical_concept', '') or '')[:120]),
    ('Limitation', str(desc.get('limitation', '') or '')[:120]),
    ('Aggregation method', desc.get('aggregation_method', '')),
    ('License', desc.get('license', [{}])[0].get('name', '') if desc.get('license') else ''),
]

meta_df = pd.DataFrame(fields, columns=['Field', 'Value'])
display(meta_df)
Field Value
0 Indicator ID WB_WDI_SP_POP_TOTL
1 Name Population, total
2 Dataset WB_WDI
3 Database name World Development Indicators (WDI)
4 Periodicity Annual
5 Producer Development Economics Data Group
6 Time coverage 1960 - 2024
7 Statistical concept Estimates of total population describe the siz...
8 Limitation Current population estimates for developing co...
9 Aggregation method Sum
10 License CC BY-4.0

Population

Get world population and the most populous countries

Indicator ID Description
WB_WDI_SP_POP_TOTL Population, total
WB_WDI_SP_POP_GROW Population growth (% annual)
In [7]:
# World total population over time
pop_raw = get_data(
    WDI_DB,
    'WB_WDI_SP_POP_TOTL',
    countries=['WLD'],
    year_from=1960,
    year_to=2023
)

pop_grow_raw = get_data(
    WDI_DB,
    'WB_WDI_SP_POP_GROW',
    countries=['WLD'],
    year_from=1960,
    year_to=2023
)

pop_df = (
    to_df(pop_raw)
    .dropna(subset=['TIME_PERIOD', 'OBS_VALUE'])
    .sort_values('TIME_PERIOD')
    .drop_duplicates(subset=['TIME_PERIOD'])
    .copy()
)

grow_df = (
    to_df(pop_grow_raw)
    .dropna(subset=['TIME_PERIOD', 'OBS_VALUE'])
    .sort_values('TIME_PERIOD')
    .drop_duplicates(subset=['TIME_PERIOD'])
    .copy()
)

pop_countries_raw = get_data(
    WDI_DB,
    'WB_WDI_SP_POP_TOTL',
    year_from=2022,
    year_to=2022
)

pop_c_df = (
    to_df(pop_countries_raw)
    .dropna(subset=['REF_AREA', 'OBS_VALUE'])
    .copy()
)

valid_iso3 = {c.alpha_3 for c in pycountry.countries}
pop_c_df = pop_c_df[pop_c_df['REF_AREA'].isin(valid_iso3)].copy()

top10 = (
    pop_c_df.nlargest(10, 'OBS_VALUE')[['REF_AREA', 'OBS_VALUE']]
    .rename(columns={'REF_AREA': 'Country', 'OBS_VALUE': 'Population'})
    .sort_values('Population', ascending=True)
    .copy()
)

plt.figure(figsize=(8, 4.5))
plt.fill_between(
    pop_df['TIME_PERIOD'],
    pop_df['OBS_VALUE'] / 1e9,
    alpha=0.12,
    color=FL_BLUE
)
plt.plot(
    pop_df['TIME_PERIOD'],
    pop_df['OBS_VALUE'] / 1e9,
    color=FL_BLUE,
    linewidth=1.8
)
plt.xlabel('Year')
plt.ylabel('Population (billions)')
plt.title('World population')
plt.tick_params(length=0)
plt.tight_layout()
plt.show()

plt.figure(figsize=(8, 4.5))
plt.fill_between(
    grow_df['TIME_PERIOD'],
    grow_df['OBS_VALUE'],
    alpha=0.12,
    color=FL_SLATE
)
plt.plot(
    grow_df['TIME_PERIOD'],
    grow_df['OBS_VALUE'],
    color=FL_SLATE,
    linewidth=1.4
)
plt.axhline(0, color=FL_BORDER, linewidth=0.8)
plt.xlabel('Year')
plt.ylabel('Growth rate (%)')
plt.title('World population growth rate')
plt.gca().yaxis.set_major_formatter(
    mticker.FuncFormatter(lambda y, _: f'{y:.1f}%')
)
plt.tick_params(length=0)
plt.tight_layout()
plt.show()

plt.figure(figsize=(8, 4.5))
plt.barh(
    top10['Country'],
    top10['Population'] / 1e9,
    alpha=0.82,
    height=0.6
)
plt.xlabel('Population (billions)')
plt.title('10 most populous countries (2022)')
plt.tick_params(length=0)
plt.tight_layout()
plt.show()
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image

GDP and economic output

GDP per capita is the primary cross-country wealth comparison metric in the WDI. GDP growth rate shows cyclical economic dynamics.

Indicator ID Description
WB_WDI_NY_GDP_PCAP_CD GDP per capita, current USD
WB_WDI_NY_GDP_MKTP_KD_ZG GDP growth (% annual)
In [8]:
# GDP per capita for G7 countries
G7 = ['USA', 'GBR', 'DEU', 'FRA', 'JPN', 'ITA', 'CAN']
G7_COLORS = [FL_BLUE, FL_RED, FL_AMBER, FL_GREEN, FL_SLATE, '#8b5cf6', '#ec4899']

gdppc_raw = get_data(
    WDI_DB,
    'WB_WDI_NY_GDP_PCAP_CD',
    countries=G7,
    year_from=1990,
    year_to=2023
)

gdpgrow_raw = get_data(
    WDI_DB,
    'WB_WDI_NY_GDP_MKTP_KD_ZG',
    countries=['WLD', 'USA', 'CHN', 'DEU', 'IND'],
    year_from=1990,
    year_to=2023
)

gdppc_df = (
    to_df(gdppc_raw)
    .dropna(subset=['REF_AREA', 'TIME_PERIOD', 'OBS_VALUE'])
    .sort_values(['REF_AREA', 'TIME_PERIOD'])
    .drop_duplicates(subset=['REF_AREA', 'TIME_PERIOD'])
    .copy()
)

gdpgrow_df = (
    to_df(gdpgrow_raw)
    .dropna(subset=['REF_AREA', 'TIME_PERIOD', 'OBS_VALUE'])
    .sort_values(['REF_AREA', 'TIME_PERIOD'])
    .drop_duplicates(subset=['REF_AREA', 'TIME_PERIOD'])
    .copy()
)

plt.figure(figsize=(8, 4.5))
for country, color in zip(G7, G7_COLORS):
    sub = gdppc_df[gdppc_df['REF_AREA'] == country]
    if not sub.empty:
        plt.plot(
            sub['TIME_PERIOD'],
            sub['OBS_VALUE'] / 1000,
            color=color,
            linewidth=1.4,
            label=country
        )

plt.xlabel('Year')
plt.ylabel('GDP per capita ($000s)')
plt.title('GDP per capita - G7 countries')
plt.gca().yaxis.set_major_formatter(
    mticker.FuncFormatter(lambda y, _: f'${y:.0f}k')
)
plt.legend(fontsize=9, ncol=2)
plt.tick_params(length=0)
plt.tight_layout()
plt.show()

grow_countries = ['WLD', 'USA', 'CHN', 'DEU', 'IND']
grow_colors = [FL_SLATE, FL_BLUE, FL_RED, FL_AMBER, FL_GREEN]

plt.figure(figsize=(8, 4.5))
for country, color in zip(grow_countries, grow_colors):
    sub = gdpgrow_df[gdpgrow_df['REF_AREA'] == country]
    if not sub.empty:
        plt.plot(
            sub['TIME_PERIOD'],
            sub['OBS_VALUE'],
            color=color,
            linewidth=1.3,
            label=country
        )

plt.axhline(0, color=FL_BORDER, linewidth=0.8)
plt.xlabel('Year')
plt.ylabel('GDP growth (%)')
plt.title('GDP growth rate - selected economies')
plt.gca().yaxis.set_major_formatter(
    mticker.FuncFormatter(lambda y, _: f'{y:.1f}%')
)
plt.legend(fontsize=9)
plt.tick_params(length=0)
plt.tight_layout()
plt.show()

latest_gdp = get_data(
    WDI_DB,
    'WB_WDI_NY_GDP_PCAP_CD',
    countries=G7,
    year_from=2022,
    year_to=2022
)

lat_df = (
    to_df(latest_gdp)[['REF_AREA', 'OBS_VALUE']]
    .dropna(subset=['REF_AREA', 'OBS_VALUE'])
    .drop_duplicates(subset=['REF_AREA'])
    .rename(columns={
        'REF_AREA': 'Country',
        'OBS_VALUE': 'GDP per capita (USD, 2022)'
    })
    .sort_values('GDP per capita (USD, 2022)', ascending=False)
    .copy()
)

lat_df['GDP per capita (USD, 2022)'] = lat_df['GDP per capita (USD, 2022)'].map(lambda v: f'${v:,.0f}')
display(lat_df)
No description has been provided for this image
No description has been provided for this image
Country GDP per capita (USD, 2022)
4 USA $76,657
0 CAN $56,257
6 DEU $50,507
3 GBR $47,057
5 FRA $40,989
1 ITA $35,654
2 JPN $34,066

Trade

Exports and imports as a share of GDP reveal how open an economy is to international trade. Trade openness is a key variable in development economics.

Indicator ID Description
WB_WDI_NE_EXP_GNFS_ZS Exports of goods and services (% of GDP)
WB_WDI_NE_IMP_GNFS_ZS Imports of goods and services (% of GDP)
WB_WDI_TG_VAL_TOTL_GD_ZS Merchandise trade (% of GDP)
In [9]:
TRADE_COUNTRIES = ['USA', 'CHN', 'DEU', 'JPN', 'KOR']
TRADE_COLORS = [FL_BLUE, FL_RED, FL_AMBER, FL_SLATE, FL_GREEN]

exp_raw = get_data(
    WDI_DB,
    'WB_WDI_NE_EXP_GNFS_ZS',
    countries=TRADE_COUNTRIES,
    year_from=1990,
    year_to=2023
)

imp_raw = get_data(
    WDI_DB,
    'WB_WDI_NE_IMP_GNFS_ZS',
    countries=TRADE_COUNTRIES,
    year_from=1990,
    year_to=2023
)

merch_raw = get_data(
    WDI_DB,
    'WB_WDI_TG_VAL_TOTL_GD_ZS',
    countries=TRADE_COUNTRIES,
    year_from=1990,
    year_to=2023
)

exp_df = (
    to_df(exp_raw)
    .dropna(subset=['REF_AREA', 'TIME_PERIOD', 'OBS_VALUE'])
    .sort_values(['REF_AREA', 'TIME_PERIOD'])
    .drop_duplicates(subset=['REF_AREA', 'TIME_PERIOD'])
    .copy()
)

imp_df = (
    to_df(imp_raw)
    .dropna(subset=['REF_AREA', 'TIME_PERIOD', 'OBS_VALUE'])
    .sort_values(['REF_AREA', 'TIME_PERIOD'])
    .drop_duplicates(subset=['REF_AREA', 'TIME_PERIOD'])
    .copy()
)

merch_df = (
    to_df(merch_raw)
    .dropna(subset=['REF_AREA', 'TIME_PERIOD', 'OBS_VALUE'])
    .sort_values(['REF_AREA', 'TIME_PERIOD'])
    .drop_duplicates(subset=['REF_AREA', 'TIME_PERIOD'])
    .copy()
)

plt.figure(figsize=(8, 4.5))
for country, color in zip(TRADE_COUNTRIES, TRADE_COLORS):
    exp_sub = exp_df[exp_df['REF_AREA'] == country]
    if not exp_sub.empty:
        plt.plot(
            exp_sub['TIME_PERIOD'],
            exp_sub['OBS_VALUE'],
            color=color,
            linewidth=1.3,
            label=country
        )

plt.xlabel('Year')
plt.ylabel('% of GDP')
plt.title('Exports of goods and services (% GDP)')
plt.gca().yaxis.set_major_formatter(
    mticker.FuncFormatter(lambda y, _: f'{y:.0f}%')
)
plt.legend(fontsize=9)
plt.tick_params(length=0)
plt.tight_layout()
plt.show()

plt.figure(figsize=(8, 4.5))
for country, color in zip(TRADE_COUNTRIES, TRADE_COLORS):
    imp_sub = imp_df[imp_df['REF_AREA'] == country]
    if not imp_sub.empty:
        plt.plot(
            imp_sub['TIME_PERIOD'],
            imp_sub['OBS_VALUE'],
            color=color,
            linewidth=1.3,
            label=country
        )

plt.xlabel('Year')
plt.ylabel('% of GDP')
plt.title('Imports of goods and services (% GDP)')
plt.gca().yaxis.set_major_formatter(
    mticker.FuncFormatter(lambda y, _: f'{y:.0f}%')
)
plt.legend(fontsize=9)
plt.tick_params(length=0)
plt.tight_layout()
plt.show()
No description has been provided for this image
No description has been provided for this image

Poverty

The World Bank's international poverty lines measure the share of the population living below $2.15 per day (2017 PPP). Regional and income group aggregates show global progress over time.

Indicator ID Description
WB_WDI_SI_POV_DDAY Poverty headcount ratio at $2.15/day (% of population)
WB_WDI_SI_POV_LMIC Poverty headcount ratio at $3.65/day (% of population)
In [10]:
REGIONS = ['EAS', 'SAS', 'SSF', 'LAC', 'MNA', 'ECS']
REG_LABELS = {
    'EAS': 'East Asia',
    'SAS': 'South Asia',
    'SSF': 'Sub-Saharan Africa',
    'LAC': 'Latin America',
    'MNA': 'Middle East & N. Africa',
    'ECS': 'Europe & C. Asia'
}
REG_COLORS = [FL_BLUE, FL_GREEN, FL_RED, FL_AMBER, FL_SLATE, '#8b5cf6']

pov_raw = get_data(
    WDI_DB,
    'WB_WDI_SI_POV_DDAY',
    countries=REGIONS,
    year_from=1990,
    year_to=2022
)

pov2_raw = get_data(
    WDI_DB,
    'WB_WDI_SI_POV_LMIC',
    countries=['WLD'],
    year_from=1990,
    year_to=2022
)

pov_df = (
    to_df(pov_raw)
    .dropna(subset=['REF_AREA', 'TIME_PERIOD', 'OBS_VALUE'])
    .sort_values(['REF_AREA', 'TIME_PERIOD'])
    .drop_duplicates(subset=['REF_AREA', 'TIME_PERIOD'])
    .copy()
)

pov2_df = (
    to_df(pov2_raw)
    .dropna(subset=['REF_AREA', 'TIME_PERIOD', 'OBS_VALUE'])
    .sort_values(['REF_AREA', 'TIME_PERIOD'])
    .drop_duplicates(subset=['REF_AREA', 'TIME_PERIOD'])
    .copy()
)

plt.figure(figsize=(8, 4.5))
for region, color in zip(REGIONS, REG_COLORS):
    sub = pov_df[pov_df['REF_AREA'] == region]
    if not sub.empty:
        plt.plot(
            sub['TIME_PERIOD'],
            sub['OBS_VALUE'],
            color=color,
            linewidth=1.5,
            marker='o',
            markersize=3,
            label=REG_LABELS.get(region, region)
        )

plt.xlabel('Year')
plt.ylabel('Population below $2.15/day (%)')
plt.title('Extreme poverty headcount by region')
plt.gca().yaxis.set_major_formatter(
    mticker.FuncFormatter(lambda y, _: f'{y:.0f}%')
)
plt.legend(fontsize=8)
plt.tick_params(length=0)
plt.tight_layout()
plt.show()

plt.figure(figsize=(8, 4.5))
plt.fill_between(
    pov2_df['TIME_PERIOD'],
    pov2_df['OBS_VALUE'],
    alpha=0.12,
    color=FL_AMBER
)
plt.plot(
    pov2_df['TIME_PERIOD'],
    pov2_df['OBS_VALUE'],
    color=FL_AMBER,
    linewidth=1.8,
    marker='o',
    markersize=4
)
plt.xlabel('Year')
plt.ylabel('Population below $3.65/day (%)')
plt.title('Global poverty headcount at $3.65/day (World)')
plt.gca().yaxis.set_major_formatter(
    mticker.FuncFormatter(lambda y, _: f'{y:.0f}%')
)
plt.tick_params(length=0)
plt.tight_layout()
plt.show()
No description has been provided for this image
No description has been provided for this image

Health

Life expectancy and mortality are the primary health outcome indicators.

Indicator ID Description
WB_WDI_SP_DYN_LE00_IN Life expectancy at birth, total (years)
WB_WDI_SH_DYN_MORT Under-5 mortality rate (per 1,000 live births)
In [11]:
HEALTH_COUNTRIES = ['WLD', 'USA', 'CHN', 'IND', 'NGA', 'BRA']
H_COLORS = [FL_SLATE, FL_BLUE, FL_RED, FL_GREEN, FL_AMBER, '#8b5cf6']

life_raw = get_data(
    WDI_DB,
    'WB_WDI_SP_DYN_LE00_IN',
    countries=HEALTH_COUNTRIES,
    year_from=1960,
    year_to=2022
)

mort_raw = get_data(
    WDI_DB,
    'WB_WDI_SH_DYN_MORT',
    countries=HEALTH_COUNTRIES,
    year_from=1960,
    year_to=2022
)

life_df = (
    to_df(life_raw)
    .dropna(subset=['REF_AREA', 'TIME_PERIOD', 'OBS_VALUE'])
    .sort_values(['REF_AREA', 'TIME_PERIOD'])
    .drop_duplicates(subset=['REF_AREA', 'TIME_PERIOD'])
    .copy()
)

mort_df = (
    to_df(mort_raw)
    .dropna(subset=['REF_AREA', 'TIME_PERIOD', 'OBS_VALUE'])
    .sort_values(['REF_AREA', 'TIME_PERIOD'])
    .drop_duplicates(subset=['REF_AREA', 'TIME_PERIOD'])
    .copy()
)

plt.figure(figsize=(8, 4.5))
for country, color in zip(HEALTH_COUNTRIES, H_COLORS):
    l_sub = life_df[life_df['REF_AREA'] == country]
    if not l_sub.empty:
        plt.plot(
            l_sub['TIME_PERIOD'],
            l_sub['OBS_VALUE'],
            color=color,
            linewidth=1.3,
            label=country
        )

plt.xlabel('Year')
plt.ylabel('Years')
plt.title('Life expectancy at birth')
plt.legend(fontsize=9)
plt.tick_params(length=0)
plt.tight_layout()
plt.show()

plt.figure(figsize=(8, 4.5))
for country, color in zip(HEALTH_COUNTRIES, H_COLORS):
    m_sub = mort_df[mort_df['REF_AREA'] == country]
    if not m_sub.empty:
        plt.plot(
            m_sub['TIME_PERIOD'],
            m_sub['OBS_VALUE'],
            color=color,
            linewidth=1.3,
            label=country
        )

plt.xlabel('Year')
plt.ylabel('Deaths per 1,000 live births')
plt.title('Under-5 mortality rate')
plt.legend(fontsize=9)
plt.tick_params(length=0)
plt.tight_layout()
plt.show()

# print('Available disaggregation dimensions for WB_WDI_SP_DYN_LE00_IN:')
# disagg = get_disaggregation(WDI_DB, 'WB_WDI_SP_DYN_LE00_IN')
# print(json.dumps(disagg, indent=2)[:600])
No description has been provided for this image
No description has been provided for this image

Education

School enrolment ratios and literacy rates.

Indicator ID Description
WB_WDI_SE_PRM_NENR School enrolment, primary (% net)
WB_WDI_SE_TER_ENRR School enrolment, tertiary (% gross)
WB_WDI_SE_ADT_LITR_ZS Literacy rate, adult total (% 15+ years)
In [12]:
EDU_COUNTRIES = ['WLD', 'USA', 'CHN', 'IND', 'NGA', 'BRA']
E_COLORS = [FL_SLATE, FL_BLUE, FL_RED, FL_GREEN, FL_AMBER, '#8b5cf6']

prim_raw = get_data(
    WDI_DB,
    'WB_WDI_SE_PRM_NENR',
    countries=EDU_COUNTRIES,
    year_from=1990,
    year_to=2022
)

tert_raw = get_data(
    WDI_DB,
    'WB_WDI_SE_TER_ENRR',
    countries=EDU_COUNTRIES,
    year_from=1990,
    year_to=2022
)

prim_df = (
    to_df(prim_raw)
    .dropna(subset=['REF_AREA', 'TIME_PERIOD', 'OBS_VALUE'])
    .sort_values(['REF_AREA', 'TIME_PERIOD'])
    .drop_duplicates(subset=['REF_AREA', 'TIME_PERIOD'])
    .copy()
)

tert_df = (
    to_df(tert_raw)
    .dropna(subset=['REF_AREA', 'TIME_PERIOD', 'OBS_VALUE'])
    .sort_values(['REF_AREA', 'TIME_PERIOD'])
    .drop_duplicates(subset=['REF_AREA', 'TIME_PERIOD'])
    .copy()
)

plt.figure(figsize=(8, 4.5))
for country, color in zip(EDU_COUNTRIES, E_COLORS):
    p_sub = prim_df[prim_df['REF_AREA'] == country]
    if not p_sub.empty:
        plt.plot(
            p_sub['TIME_PERIOD'],
            p_sub['OBS_VALUE'],
            color=color,
            linewidth=1.3,
            label=country
        )

plt.xlabel('Year')
plt.ylabel('Net enrolment (%)')
plt.title('Primary school enrolment (% net)')
plt.gca().yaxis.set_major_formatter(
    mticker.FuncFormatter(lambda y, _: f'{y:.0f}%')
)
plt.legend(fontsize=9)
plt.tick_params(length=0)
plt.tight_layout()
plt.show()

plt.figure(figsize=(8, 4.5))
for country, color in zip(EDU_COUNTRIES, E_COLORS):
    t_sub = tert_df[tert_df['REF_AREA'] == country]
    if not t_sub.empty:
        plt.plot(
            t_sub['TIME_PERIOD'],
            t_sub['OBS_VALUE'],
            color=color,
            linewidth=1.3,
            label=country
        )

plt.xlabel('Year')
plt.ylabel('Gross enrolment (%)')
plt.title('Tertiary school enrolment (% gross)')
plt.gca().yaxis.set_major_formatter(
    mticker.FuncFormatter(lambda y, _: f'{y:.0f}%')
)
plt.legend(fontsize=9)
plt.tick_params(length=0)
plt.tight_layout()
plt.show()

lit_raw = get_data(
    WDI_DB,
    'WB_WDI_SE_ADT_LITR_ZS',
    countries=['WLD', 'SSF', 'SAS'],
    year_from=2010,
    year_to=2022
)

lit_df = (
    to_df(lit_raw)
    .dropna(subset=['REF_AREA', 'TIME_PERIOD', 'OBS_VALUE'])[['REF_AREA', 'TIME_PERIOD', 'OBS_VALUE']]
    .rename(columns={
        'REF_AREA': 'Region',
        'TIME_PERIOD': 'Year',
        'OBS_VALUE': 'Adult literacy rate (%)'
    })
    .sort_values(['Region', 'Year'])
    .copy()
)

lit_df['Year'] = lit_df['Year'].astype(int)
lit_df['Adult literacy rate (%)'] = lit_df['Adult literacy rate (%)'].map(lambda v: f'{v:.1f}%')

print('Adult literacy rate - WLD / Sub-Saharan Africa / South Asia')
display(lit_df.tail(10))
No description has been provided for this image
No description has been provided for this image
Adult literacy rate - WLD / Sub-Saharan Africa / South Asia
Region Year Adult literacy rate (%)
3 WLD 2013 85.2%
15 WLD 2014 85.6%
32 WLD 2015 86.0%
10 WLD 2016 86.3%
20 WLD 2017 86.5%
26 WLD 2018 86.8%
23 WLD 2019 86.8%
38 WLD 2020 87.0%
6 WLD 2021 87.2%
36 WLD 2022 87.4%