Binance API - BTCUSDT Limit Order Book EDA

This notebook collects live BTCUSDT order book and trade data directly from Binance's public REST API. No API key or authentication required for this pipeline. We further investigate the acquired data with an exploratory analysis covering data quality, mid-price dynamics, order book depth, trade flow, and a live LOB snapshot.

The REST depth endpoint returns the same 10-level bid/ask structure that the production WebSocket collector (@depth10@100ms) delivers. Every field name here matches the WebSocket stream schema exactly so this notebook doubles as field-level documentation for the production collector output.

Endpoints used (public, no auth required):

Field	REST response	WebSocket stream
`bids` / `asks`	✓ `[[price, qty], ...]`	✓ same
`lastUpdateId`	✓	✓
`E` (event_time ms)	- not present	✓
`T` (transaction_time ms)	- not present	✓
`local_ts`	added by notebook	added by collector
`e` (event type)	-	`'depthUpdate'`

Parameter	Value
Symbol	BTCUSDT (Bitcoin / USDT spot)
Collection window	2 minutes
Poll interval	100 ms
LOB depth	10 bid + 10 ask levels
Tick size	$0.01
Trades	Last 1,000 aggregated trades

References

In [12]:

import time
import json
import requests
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.ticker as mticker
import warnings
warnings.filterwarnings('ignore')

from datetime import datetime, timezone
from collections import Counter
from IPython.display import display, HTML

FL_BLUE   = '#2563eb'
FL_SLATE  = '#64748b'
FL_AMBER  = '#f59e0b'
FL_GREEN  = '#16a34a'
FL_RED    = '#ef4444'
FL_BG     = '#ffffff'
FL_GRID   = '#e2e8f0'
FL_TEXT   = '#0f172a'
FL_TEXT2  = '#334155'
FL_BORDER = '#e2e8f0'

matplotlib.rcParams.update({
    'figure.facecolor':  FL_BG,  'axes.facecolor':    FL_BG,
    'axes.edgecolor':    FL_BORDER, 'axes.labelcolor': FL_TEXT2,
    'axes.spines.top':   False,  'axes.spines.right': False,
    'axes.grid':         True,   'grid.color':        FL_GRID,
    'grid.linewidth':    0.7,    'xtick.color':       FL_TEXT2,
    'ytick.color':       FL_TEXT2, 'xtick.labelsize':  10,
    'ytick.labelsize':   10,     'axes.labelsize':    11,
    'axes.titlesize':    12,     'axes.titlecolor':   FL_TEXT,
    'axes.titlepad':     12,     'legend.frameon':    False,
    'legend.fontsize':   10,     'figure.dpi':        120,
    'savefig.bbox':      'tight', 'font.family':      'sans-serif',
    'font.sans-serif':   ['Inter', 'Helvetica Neue', 'Arial', 'DejaVu Sans'],
})

SYMBOL        = 'BTCUSDT'
N_LEVELS      = 10
POLL_INTERVAL = 0.10        # seconds
COLLECT_SECS  = 60*15       # data collection window (seconds)
TICK_SIZE     = 0.001       # BTCUSDT minimum price increment
BASE_URL      = 'https://api.binance.com/api/v3'

def safe_float(x):
    try: return float(x)
    except: return None

def safe_int(x):
    try: return int(x)
    except: return None

def ts_to_utc(ts, is_ms=False):
    """Convert Unix timestamp (seconds or ms) to UTC datetime string."""
    if ts is None: return None
    try:
        t = float(ts) / 1000 if is_ms else float(ts)
        return datetime.fromtimestamp(t, tz=timezone.utc).isoformat()
    except: return None

def detect_message_type(msg):
    if not isinstance(msg, dict): return 'unknown'
    if 'bids' in msg and 'asks' in msg: return 'depth'
    if msg.get('e') == 'aggTrade':      return 'aggTrade'
    if 'result' in msg and 'id' in msg: return 'subscribe_ack'
    return 'unknown'

def flatten_depth(msg, local_ts):
    """
    Flatten one depth message into a single dict.
    Field names match the production WebSocket stream schema exactly:
      E = event_time (ms),  T = transaction_time (ms),
      bids/asks = [[price_str, qty_str], ...]
    REST responses omit E and T; local_ts fills that role here.
    """
    bids = msg.get('bids', [])
    asks = msg.get('asks', [])
    row = {
        'lastUpdateId':     safe_int(msg.get('lastUpdateId')),
        'event_time':       msg.get('E'),       # None for REST; present in WS
        'transaction_time': msg.get('T'),       # None for REST; present in WS
        'local_ts':         local_ts,
    }
    for lvl in range(N_LEVELS):
        row[f'bid_px_{lvl+1}']  = safe_float(bids[lvl][0]) if lvl < len(bids) else None
        row[f'bid_qty_{lvl+1}'] = safe_float(bids[lvl][1]) if lvl < len(bids) else None
        row[f'ask_px_{lvl+1}']  = safe_float(asks[lvl][0]) if lvl < len(asks) else None
        row[f'ask_qty_{lvl+1}'] = safe_float(asks[lvl][1]) if lvl < len(asks) else None
    best_bid = row['bid_px_1']
    best_ask = row['ask_px_1']
    row['mid_price'] = (best_bid + best_ask) / 2 if best_bid and best_ask else None
    row['spread']    = best_ask - best_bid       if best_bid and best_ask else None
    return row

def flatten_trade(msg, local_ts):
    """Flatten one aggTrade message. Field names match WebSocket stream schema."""
    return {
        'agg_id':           safe_int(msg.get('a')),
        'price':            safe_float(msg.get('p')),
        'qty':              safe_float(msg.get('q')),
        'first_trade_id':   safe_int(msg.get('f')),
        'last_trade_id':    safe_int(msg.get('l')),
        'trade_time':       safe_int(msg.get('T')),    # ms
        'event_time':       safe_int(msg.get('E')),    # ms
        'is_buyer_maker':   msg.get('m'),              # True = sell aggressor
        'symbol':           msg.get('s', SYMBOL),
        'local_ts':         local_ts,
    }

def render_html_table(df, col_fmts=None):
    col_fmts = col_fmts or {}
    th = ('padding:8px 14px;text-align:left;font-size:11px;font-weight:600;'
          'color:#64748b;letter-spacing:0.04em;text-transform:uppercase;'
          'border-bottom:1px solid #e2e8f0;background:#f8fafc;')
    td_b = 'padding:8px 14px;font-size:13px;color:#334155;border-bottom:1px solid #f1f5f9;'
    td_m = td_b + 'font-family:ui-monospace,Menlo,monospace;white-space:nowrap;'
    num_cols = set(df.select_dtypes(include='number').columns)
    html = ['<div style="overflow-x:auto;margin:4px 0 16px;">',
            '<table style="border-collapse:collapse;width:100%;'
            'font-family:-apple-system,BlinkMacSystemFont,Inter,sans-serif;">',
            '<thead><tr>']
    for col in df.columns:
        html.append(f'<th style="{th}">{col}</th>')
    html.append('</tr></thead><tbody>')
    for i, (_, row) in enumerate(df.iterrows()):
        bg = '#ffffff' if i % 2 == 0 else '#f8fafc'
        html.append(f'<tr style="background:{bg}">')
        for col in df.columns:
            val = row[col]
            val_str = col_fmts[col](val) if col in col_fmts else (
                f'{val:,.4f}' if isinstance(val, float) else
                f'{val:,}'    if isinstance(val, (int, np.integer)) else str(val))
            html.append(f'<td style="{td_m if col in num_cols else td_b}">{val_str}</td>')
        html.append('</tr>')
    html.append('</tbody></table></div>')
    return ''.join(html)

print(f'Setup complete.')
print(f'Target: {SYMBOL} | {COLLECT_SECS}s window | {POLL_INTERVAL*1000:.0f}ms poll | ~{int(COLLECT_SECS/POLL_INTERVAL):,} expected snapshots')

Setup complete.
Target: BTCUSDT | 900s window | 100ms poll | ~9,000 expected snapshots

Data collection

The collector polls GET /api/v3/depth every 100ms for a short time, appending each response to depth_rows. After the window closes, GET /api/v3/aggTrades fetches the last 1,000 aggregated trades.

Each raw response is classified with detect_message_type() and flattened with flatten_depth(). local_ts (Unix seconds, local clock) is used as the primary timestamp instead.

In [13]:

session = requests.Session()

def fetch_depth():
    r = session.get(f'{BASE_URL}/depth',
                    params={'symbol': SYMBOL, 'limit': N_LEVELS}, timeout=5)
    r.raise_for_status()
    return r.json()

def fetch_agg_trades(limit=1000):
    r = session.get(f'{BASE_URL}/aggTrades',
                    params={'symbol': SYMBOL, 'limit': limit}, timeout=5)
    r.raise_for_status()
    return r.json()


print(f'Collecting {COLLECT_SECS}s of {SYMBOL} order book snapshots...')
t_start    = time.time()
depth_rows = []
type_counts = Counter()
errors     = 0

while time.time() - t_start < COLLECT_SECS:
    t_poll = time.time()
    try:
        raw = fetch_depth()
        msg_type = detect_message_type(raw)
        type_counts[msg_type] += 1
        if msg_type == 'depth':
            depth_rows.append(flatten_depth(raw, local_ts=t_poll))
    except Exception as exc:
        errors += 1
        type_counts['error'] += 1
    elapsed  = time.time() - t_poll
    time.sleep(max(0, POLL_INTERVAL - elapsed))
    if len(depth_rows) % 300 == 0 and depth_rows:
        pct = (time.time() - t_start) / COLLECT_SECS * 100
        print(f'  {pct:4.0f}%  {len(depth_rows):,} depth snapshots  errors={errors}')


print('Fetching recent aggregated trades...')
raw_trades  = fetch_agg_trades(limit=1000)
trade_rows  = [flatten_trade(t, local_ts=time.time()) for t in raw_trades]

print(f'\nCollection complete.')
print(f'Message types seen: {dict(type_counts)}')
print(f'Depth snapshots:    {len(depth_rows):,}')
print(f'Trades fetched:     {len(trade_rows):,}')
print(f'Errors:             {errors}')

Collecting 900s of BTCUSDT order book snapshots...
    11%  300 depth snapshots  errors=0
    21%  600 depth snapshots  errors=0
    32%  900 depth snapshots  errors=0
    43%  1,200 depth snapshots  errors=0
    53%  1,500 depth snapshots  errors=0
    64%  1,800 depth snapshots  errors=0
    75%  2,100 depth snapshots  errors=0
    85%  2,400 depth snapshots  errors=0
    96%  2,700 depth snapshots  errors=0
Fetching recent aggregated trades...

Collection complete.
Message types seen: {'depth': 2809}
Depth snapshots:    2,809
Trades fetched:     1,000
Errors:             0

Building the dataset

Raw rows are assembled into typed DataFrames. Derived columns follow the same naming convention as the production data prep pipeline:

event_gap_ms - milliseconds between consecutive event timestamps
local_gap_ms - milliseconds between consecutive local poll timestamps
update_id_gap - lastUpdateId delta (how many book updates occurred between polls)
mid_price_change - absolute mid-price change per snapshot
mid_price_return - percentage mid-price change per snapshot
spread_ticks - spread in minimum tick units ($0.01)

In [23]:

book = pd.DataFrame(depth_rows)

for col in ['lastUpdateId', 'event_time', 'transaction_time']:
    book[col] = pd.to_numeric(book[col], errors='coerce')

for lvl in range(1, N_LEVELS + 1):
    for pfx in ['bid_px_', 'bid_qty_', 'ask_px_', 'ask_qty_']:
        book[f'{pfx}{lvl}'] = pd.to_numeric(book[f'{pfx}{lvl}'], errors='coerce')

book['local_dt'] = pd.to_datetime(book['local_ts'], unit='s', utc=True)
book['ts'] = book['local_dt']
book['elapsed_s'] = book['local_ts'] - book['local_ts'].iloc[0]
book['local_utc'] = book['local_ts'].apply(lambda x: ts_to_utc(x, is_ms=False))

book['local_gap_ms'] = book['local_ts'].diff() * 1000
book['update_id_gap'] = book['lastUpdateId'].diff()
book['mid_price_change'] = book['mid_price'].diff()
book['mid_price_return'] = book['mid_price'].pct_change()
book['spread_ticks'] = (book['spread'] / TICK_SIZE).round()

trades = pd.DataFrame(trade_rows)
trades['trade_dt'] = pd.to_datetime(trades['trade_time'], unit='ms', utc=True)
trades['trade_utc'] = trades['trade_time'].apply(lambda x: ts_to_utc(x, is_ms=True))
trades['dollar_value'] = trades['price'] * trades['qty']

print('Collection summary')
print(f'Book rows:      {len(book):,}')
print(f'Trade rows:     {len(trades):,}')
print(f'Book columns:   {book.shape[1]}')
print(f'Trade columns:  {trades.shape[1]}')
print(f'Time range:     {book["local_utc"].iloc[0]} to {book["local_utc"].iloc[-1]}')
print(f'Duration:       {book["elapsed_s"].max():.1f}s')

book_cols_df = pd.DataFrame({
    'book_column': book.columns,
    'dtype': [str(book[c].dtype) for c in book.columns]
})

trade_cols_df = pd.DataFrame({
    'trade_column': trades.columns,
    'dtype': [str(trades[c].dtype) for c in trades.columns]
})

print('\nBook columns')
display(book_cols_df.head())

print('Trade columns')
display(trade_cols_df.head())

print('Book preview')
display(book.head())

print('Trades preview')
display(trades.head())

Collection summary
Book rows:      2,809
Trade rows:     1,000
Book columns:   55
Trade columns:  13
Time range:     2026-05-24T16:44:16.123758+00:00 to 2026-05-24T16:59:16.118159+00:00
Duration:       900.0s

Book columns

	book_column	dtype
0	lastUpdateId	int64
1	event_time	float64
2	transaction_time	float64
3	local_ts	float64
4	bid_px_1	float64

Trade columns

	trade_column	dtype
0	agg_id	int64
1	price	float64
2	qty	float64
3	first_trade_id	int64
4	last_trade_id	int64

Book preview

	lastUpdateId	event_time	transaction_time	local_ts	bid_px_1	bid_qty_1	ask_px_1	ask_qty_1	bid_px_2	bid_qty_2	...	spread	local_dt	ts	elapsed_s	local_utc	local_gap_ms	update_id_gap	mid_price_change	mid_price_return	spread_ticks
0	94188948025	NaN	NaN	1.779641e+09	76499.61	2.27896	76499.62	1.73519	76499.6	0.00084	...	0.01	2026-05-24 16:44:16.123758316+00:00	2026-05-24 16:44:16.123758316+00:00	0.000000	2026-05-24T16:44:16.123758+00:00	NaN	NaN	NaN	NaN	10.0
1	94188948066	NaN	NaN	1.779641e+09	76499.61	2.27896	76499.62	1.73519	76499.6	0.00084	...	0.01	2026-05-24 16:44:16.734374285+00:00	2026-05-24 16:44:16.734374285+00:00	0.610616	2026-05-24T16:44:16.734374+00:00	610.615969	41.0	0.0	0.0	10.0
2	94188948084	NaN	NaN	1.779641e+09	76499.61	2.27896	76499.62	1.73519	76499.6	0.00084	...	0.01	2026-05-24 16:44:17.041452646+00:00	2026-05-24 16:44:17.041452646+00:00	0.917694	2026-05-24T16:44:17.041453+00:00	307.078362	18.0	0.0	0.0	10.0
3	94188948107	NaN	NaN	1.779641e+09	76499.61	2.27903	76499.62	1.73519	76499.6	0.00084	...	0.01	2026-05-24 16:44:17.348654509+00:00	2026-05-24 16:44:17.348654509+00:00	1.224896	2026-05-24T16:44:17.348655+00:00	307.201862	23.0	0.0	0.0	10.0
4	94188948144	NaN	NaN	1.779641e+09	76499.61	2.27872	76499.62	1.73508	76499.6	0.00084	...	0.01	2026-05-24 16:44:17.651305199+00:00	2026-05-24 16:44:17.651305199+00:00	1.527547	2026-05-24T16:44:17.651305+00:00	302.650690	37.0	0.0	0.0	10.0

5 rows × 55 columns

Trades preview

	agg_id	price	qty	first_trade_id	last_trade_id	trade_time	event_time	is_buyer_maker	symbol	local_ts	trade_dt	trade_utc	dollar_value
0	3963778223	76639.52	0.00014	6318962293	6318962294	1779641839373	None	False	BTCUSDT	1.779642e+09	2026-05-24 16:57:19.373000+00:00	2026-05-24T16:57:19.373000+00:00	10.729533
1	3963778224	76639.77	0.00028	6318962295	6318962298	1779641839373	None	False	BTCUSDT	1.779642e+09	2026-05-24 16:57:19.373000+00:00	2026-05-24T16:57:19.373000+00:00	21.459136
2	3963778225	76639.87	0.00008	6318962299	6318962299	1779641839373	None	False	BTCUSDT	1.779642e+09	2026-05-24 16:57:19.373000+00:00	2026-05-24T16:57:19.373000+00:00	6.131190
3	3963778226	76639.88	0.00028	6318962300	6318962303	1779641839373	None	False	BTCUSDT	1.779642e+09	2026-05-24 16:57:19.373000+00:00	2026-05-24T16:57:19.373000+00:00	21.459166
4	3963778227	76639.99	0.00021	6318962304	6318962306	1779641839373	None	False	BTCUSDT	1.779642e+09	2026-05-24 16:57:19.373000+00:00	2026-05-24T16:57:19.373000+00:00	16.094398

Data quality checks

Before any analysis, verify the dataset is structurally sound: correct depth level counts, no null prices, expected polling cadence.

In [24]:

null_checks = pd.DataFrame([{
    'null_mid_price': int(book['mid_price'].isna().sum()),
    'null_spread': int(book['spread'].isna().sum()),
    'null_lastUpdateId': int(book['lastUpdateId'].isna().sum()),
    'null_bid_px_1': int(book['bid_px_1'].isna().sum()),
    'null_ask_px_1': int(book['ask_px_1'].isna().sum()),
}]).T.rename(columns={0: 'Count'})
null_checks.index.name = 'Column'
null_checks = null_checks.reset_index()

bid_level_counts = [
    sum(1 for lvl in range(1, N_LEVELS + 1) if not pd.isna(book.iloc[i][f'bid_px_{lvl}']))
    for i in range(min(500, len(book)))
]

ask_level_counts = [
    sum(1 for lvl in range(1, N_LEVELS + 1) if not pd.isna(book.iloc[i][f'ask_px_{lvl}']))
    for i in range(min(500, len(book)))
]

quality = pd.DataFrame([
    {
        'Check': 'depth snapshots collected',
        'Value': str(len(book)),
        'Status': 'ok' if len(book) > 500 else 'low'
    },
    {
        'Check': 'trades fetched',
        'Value': str(len(trades)),
        'Status': 'ok' if len(trades) > 0 else 'missing'
    },
    {
        'Check': 'null mid_price',
        'Value': str(int(book['mid_price'].isna().sum())),
        'Status': 'ok' if book['mid_price'].isna().sum() == 0 else 'warning'
    },
    {
        'Check': 'min bid levels sample 500',
        'Value': str(min(bid_level_counts)),
        'Status': 'ok' if min(bid_level_counts) == N_LEVELS else 'warning'
    },
    {
        'Check': 'min ask levels sample 500',
        'Value': str(min(ask_level_counts)),
        'Status': 'ok' if min(ask_level_counts) == N_LEVELS else 'warning'
    },
    {
        'Check': 'median poll gap',
        'Value': f"{book['local_gap_ms'].median():.1f} ms",
        'Status': 'ok' if book['local_gap_ms'].median() < 200 else 'warning'
    },
    {
        'Check': 'positive spread throughout',
        'Value': str((book['spread'] > 0).all()),
        'Status': 'ok' if (book['spread'] > 0).all() else 'warning'
    }
])

print('Quality checks')
display(quality)

print('Null checks')
display(null_checks)

Quality checks

	Check	Value	Status
0	depth snapshots collected	2809	ok
1	trades fetched	1000	ok
2	null mid_price	0	ok
3	min bid levels sample 500	10	ok
4	min ask levels sample 500	10	ok
5	median poll gap	307.2 ms	warning
6	positive spread throughout	True	ok

Null checks

	Column	Count
0	null_mid_price	0
1	null_spread	0
2	null_lastUpdateId	0
3	null_bid_px_1	0
4	null_ask_px_1	0

Dataset overview

Key statistics for the collected window.

In [25]:

overview = pd.DataFrame([
    {
        'Metric': 'Depth snapshots',
        'Value': f'{len(book):,}'
    },
    {
        'Metric': 'Duration (s)',
        'Value': f'{book["elapsed_s"].max():.1f}'
    },
    {
        'Metric': 'Median poll gap (ms)',
        'Value': f'{book["local_gap_ms"].median():.1f}'
    },
    {
        'Metric': 'p95 poll gap (ms)',
        'Value': f'{book["local_gap_ms"].quantile(0.95):.1f}'
    },
    {
        'Metric': 'Mid-price range ($)',
        'Value': f'${book["mid_price"].min():,.2f} to ${book["mid_price"].max():,.2f}'
    },
    {
        'Metric': 'Price range ($)',
        'Value': f'${book["mid_price"].max() - book["mid_price"].min():.2f}'
    },
    {
        'Metric': 'Median spread ($)',
        'Value': f'${book["spread"].median():.4f}'
    },
    {
        'Metric': 'Median spread (ticks)',
        'Value': f'{book["spread_ticks"].median():.0f}'
    },
    {
        'Metric': 'Median update_id gap',
        'Value': f'{book["update_id_gap"].median():.0f}'
    },
    {
        'Metric': 'Trades fetched',
        'Value': f'{len(trades):,}'
    },
    {
        'Metric': 'Collection start (UTC)',
        'Value': book['local_utc'].iloc[0]
    },
    {
        'Metric': 'Collection end (UTC)',
        'Value': book['local_utc'].iloc[-1]
    }
])

display(overview)

	Metric	Value
0	Depth snapshots	2,809
1	Duration (s)	900.0
2	Median poll gap (ms)	307.2
3	p95 poll gap (ms)	327.2
4	Mid-price range ($)	$76,499.61 to $76,699.99
5	Price range ($)	$200.38
6	Median spread ($)	$0.0100
7	Median spread (ticks)	10
8	Median update_id gap	48
9	Trades fetched	1,000
10	Collection start (UTC)	2026-05-24T16:44:16.123758+00:00
11	Collection end (UTC)	2026-05-24T16:59:16.118159+00:00

Mid-price, spread, and order book activity

Three panels over the full collection window:

Mid-price - (best_bid + best_ask) / 2 per snapshot
Spread in ticks - (ask_px_1 − bid_px_1) / 0.01
Update ID gap - how many book state changes occurred between polls, spikes indicate high order flow activity

In [26]:

plt.figure(figsize=(8, 4.5))
plt.plot(book['ts'], book['mid_price'], color=FL_BLUE, linewidth=1.2)
plt.ylabel('Mid-price (USDT)')
plt.title(f'BTCUSDT mid-price over {book["elapsed_s"].max():.0f}s ({len(book):,} snapshots)')
plt.gca().yaxis.set_major_formatter(
    mticker.FuncFormatter(lambda y, _: f'${y:,.0f}')
)
plt.tick_params(length=0)
plt.tight_layout()
plt.show()

plt.figure(figsize=(8, 4.5))
plt.fill_between(book['ts'], book['spread_ticks'], alpha=0.25, color=FL_AMBER)
plt.plot(book['ts'], book['spread_ticks'], color=FL_AMBER, linewidth=0.8)
plt.ylabel('Spread (ticks)')
plt.title('Bid ask spread in ticks ($0.01 each)')
plt.tick_params(length=0)
plt.tight_layout()
plt.show()

gap_clipped = book['update_id_gap'].clip(upper=book['update_id_gap'].quantile(0.99))

plt.figure(figsize=(8, 4.5))
plt.fill_between(book['ts'], gap_clipped, alpha=0.2, color=FL_SLATE)
plt.plot(book['ts'], gap_clipped, color=FL_SLATE, linewidth=0.8)
plt.ylabel('Update ID gap')
plt.title('Order book changes between consecutive polls (clipped at 99th percentile)')
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%H:%M:%S'))
plt.tick_params(length=0)
plt.tight_layout()
plt.show()

No description has been provided for this image

Order book depth profile

Average resting volume at each of the 10 price levels across all snapshots. Level 1 (best price) is the most contested. Deeper levels accumulate more volume as participants quote progressively wider spreads from the mid-price.

In [27]:

bid_avg = [book[f'bid_qty_{l}'].mean() for l in range(1, N_LEVELS + 1)]
ask_avg = [book[f'ask_qty_{l}'].mean() for l in range(1, N_LEVELS + 1)]

cum_bid = np.cumsum(bid_avg)
cum_ask = np.cumsum(ask_avg)
levels = np.arange(1, N_LEVELS + 1)

w = 0.35

plt.figure(figsize=(8, 4.5))
plt.bar(levels - w / 2, bid_avg, width=w, color=FL_GREEN, alpha=0.85, label='Bid')
plt.bar(levels + w / 2, ask_avg, width=w, color=FL_RED, alpha=0.85, label='Ask')
plt.xlabel('Level')
plt.ylabel('Average BTC quantity')
plt.title('Average resting volume per level')
plt.xticks(levels)
plt.legend()
plt.tick_params(length=0)
plt.tight_layout()
plt.show()

plt.figure(figsize=(8, 4.5))
plt.fill_between(levels, cum_bid, alpha=0.15, color=FL_GREEN)
plt.plot(
    levels,
    cum_bid,
    color=FL_GREEN,
    marker='o',
    markersize=5,
    linewidth=1.6,
    label='Bid cumulative'
)
plt.fill_between(levels, cum_ask, alpha=0.15, color=FL_RED)
plt.plot(
    levels,
    cum_ask,
    color=FL_RED,
    marker='o',
    markersize=5,
    linewidth=1.6,
    label='Ask cumulative'
)
plt.xlabel('Level')
plt.ylabel('Cumulative BTC quantity')
plt.title('Cumulative depth across 10 levels')
plt.xticks(levels)
plt.legend()
plt.tick_params(length=0)
plt.tight_layout()
plt.show()

depth_levels_df = pd.DataFrame({
    'Level': levels,
    'Bid avg': bid_avg,
    'Ask avg': ask_avg,
    'Cum bid': cum_bid,
    'Cum ask': cum_ask
})

depth_levels_df[['Bid avg', 'Ask avg', 'Cum bid', 'Cum ask']] = (
    depth_levels_df[['Bid avg', 'Ask avg', 'Cum bid', 'Cum ask']].round(4)
)

display(depth_levels_df)

	Level	Bid avg	Ask avg	Cum bid	Cum ask
0	1	3.0854	2.0412	3.0854	2.0412
1	2	0.0244	0.1564	3.1098	2.1977
2	3	0.0031	0.0736	3.1129	2.2712
3	4	0.0115	0.0165	3.1245	2.2877
4	5	0.0384	0.0686	3.1629	2.3564
5	6	0.0370	0.0568	3.1999	2.4132
6	7	0.0598	0.0568	3.2597	2.4700
7	8	0.0689	0.0635	3.3286	2.5335
8	9	0.0638	0.0862	3.3925	2.6196
9	10	0.0692	0.0778	3.4617	2.6974

LOB snapshot - single instant

A single order book snapshot at the midpoint of the collection window. Each horizontal bar represents one price level as green bars are resting buy orders (bid side), red bars are resting sell orders (ask side). The gap between the innermost bars is the bid-ask spread.

In [28]:

snap_idx = len(book) // 2
snap = book.iloc[snap_idx]

bid_px = [snap[f'bid_px_{l}'] for l in range(1, N_LEVELS + 1)]
bid_qty = [snap[f'bid_qty_{l}'] for l in range(1, N_LEVELS + 1)]
ask_px = [snap[f'ask_px_{l}'] for l in range(1, N_LEVELS + 1)]
ask_qty = [snap[f'ask_qty_{l}'] for l in range(1, N_LEVELS + 1)]

bid_data = [(p, q) for p, q in zip(bid_px, bid_qty) if pd.notna(p) and pd.notna(q) and q > 0]
ask_data = [(p, q) for p, q in zip(ask_px, ask_qty) if pd.notna(p) and pd.notna(q) and q > 0]

snap_time = snap['local_dt'].strftime('%H:%M:%S UTC') if hasattr(snap['local_dt'], 'strftime') else ''

all_px = [p for p, _ in bid_data + ask_data]
pr = max(all_px) - min(all_px) if len(all_px) > 1 else 1
bar_h = pr / max(len(bid_data + ask_data) * 1.8, 1)

plt.figure(figsize=(8, 4.5))
if bid_data:
    bp, bq = zip(*bid_data)
    plt.barh(bp, [-q for q in bq], height=bar_h, color=FL_GREEN, alpha=0.85, label='Bid')

if ask_data:
    ap, aq = zip(*ask_data)
    plt.barh(ap, list(aq), height=bar_h, color=FL_RED, alpha=0.85, label='Ask')

plt.axvline(0, color=FL_BORDER, linewidth=1)
plt.xlabel('Volume (BTC)  |  left = bid, right = ask')
plt.ylabel('Price (USDT)')
plt.title(f'BTCUSDT limit order book snapshot at {snap_time}')
plt.gca().yaxis.set_major_formatter(
    mticker.FuncFormatter(lambda y, _: f'${y:,.2f}')
)
plt.gca().xaxis.set_major_formatter(
    mticker.FuncFormatter(lambda x, _: f'{abs(x):.3f}')
)
plt.legend()
plt.tick_params(length=0)
plt.tight_layout()
plt.show()

if bid_data and ask_data:
    mid = (bid_data[0][0] + ask_data[0][0]) / 2
    spr = ask_data[0][0] - bid_data[0][0]

    snapshot_summary = pd.DataFrame([
        {'Metric': 'Snapshot time', 'Value': snap_time},
        {'Metric': 'Best bid', 'Value': f'${bid_data[0][0]:,.2f} ({bid_data[0][1]:.4f} BTC)'},
        {'Metric': 'Best ask', 'Value': f'${ask_data[0][0]:,.2f} ({ask_data[0][1]:.4f} BTC)'},
        {'Metric': 'Mid-price', 'Value': f'${mid:,.2f}'},
        {'Metric': 'Spread', 'Value': f'${spr:.4f}'},
        {'Metric': 'Spread (ticks)', 'Value': f'{spr / TICK_SIZE:.0f}'}
    ])

    display(snapshot_summary)

	Metric	Value
0	Snapshot time	16:51:44 UTC
1	Best bid	$76,595.99 (1.8710 BTC)
2	Best ask	$76,596.00 (2.0319 BTC)
3	Mid-price	$76,595.99
4	Spread	$0.0100
5	Spread (ticks)	10

Mid-price change distribution

Distribution of mid_price_change between consecutive 100ms snapshots. When the mid-price does move, it typically shifts by a small number of ticks.

In [29]:

changes = book['mid_price_change'].dropna()
nonzero = changes[changes != 0]

clip_all = changes.abs().quantile(0.995)

plt.figure(figsize=(8, 4.5))
plt.hist(
    changes.clip(-clip_all, clip_all),
    bins=60,
    color=FL_BLUE,
    alpha=0.85,
    edgecolor='none'
)
plt.axvline(0, color=FL_AMBER, linewidth=1.5, linestyle='--')
plt.xlabel(f'Change ($, clipped to ±{clip_all:.2f})')
plt.ylabel('Frequency')
plt.title('Mid-price change per 100ms across all snapshots')
plt.tick_params(length=0)
plt.tight_layout()
plt.show()

if len(nonzero) > 0:
    clip_nz = nonzero.abs().quantile(0.99)

    plt.figure(figsize=(8, 4.5))
    plt.hist(
        nonzero.clip(-clip_nz, clip_nz),
        bins=40,
        color=FL_AMBER,
        alpha=0.85,
        edgecolor='none'
    )
    plt.xlabel(f'Change ($, non-zero only, clipped to ±{clip_nz:.2f})')
    plt.ylabel('Frequency')
    plt.title(f'Non-zero mid-price moves ({len(nonzero):,} of {len(changes):,}, {len(nonzero) / len(changes):.1%})')
    plt.tick_params(length=0)
    plt.tight_layout()
    plt.show()

change_summary = pd.DataFrame([
    {
        'Metric': 'Zero changes',
        'Value': f'{(changes == 0).sum():,}',
        'Share': f'{(changes == 0).mean():.1%}'
    },
    {
        'Metric': 'Up moves',
        'Value': f'{(changes > 0).sum():,}',
        'Share': f'{(changes > 0).mean():.1%}'
    },
    {
        'Metric': 'Down moves',
        'Value': f'{(changes < 0).sum():,}',
        'Share': f'{(changes < 0).mean():.1%}'
    }
])

if len(nonzero) > 0:
    change_summary = pd.concat([
        change_summary,
        pd.DataFrame([
            {
                'Metric': 'Mean non-zero change',
                'Value': f'${nonzero.mean():.4f}',
                'Share': ''
            },
            {
                'Metric': 'Std non-zero change',
                'Value': f'${nonzero.std():.4f}',
                'Share': ''
            }
        ])
    ], ignore_index=True)

display(change_summary)

	Metric	Value	Share
0	Zero changes	2,724	97.0%
1	Up moves	63	2.2%
2	Down moves	21	0.7%
3	Mean non-zero change	$2.1077
4	Std non-zero change	$4.4696

Trade flow analysis

The aggTrades endpoint collapses consecutive fills at the same price and direction into single rows. The is_buyer_maker field (WebSocket field m) indicates aggressor side:

False - buyer initiated (market buy hitting resting ask)
True - seller initiated (market sell hitting resting bid)

Buy/sell imbalance over a short window is a leading indicator of short-term price direction in limit order book research.

In [30]:

buy_trades = trades[~trades['is_buyer_maker']]
sell_trades = trades[trades['is_buyer_maker']]

plt.figure(figsize=(8, 4.5))
plt.scatter(
    buy_trades['trade_dt'],
    buy_trades['price'],
    c=FL_GREEN,
    s=8,
    alpha=0.6,
    label='Buy aggressor'
)
plt.scatter(
    sell_trades['trade_dt'],
    sell_trades['price'],
    c=FL_RED,
    s=8,
    alpha=0.6,
    label='Sell aggressor'
)
plt.ylabel('Price (USDT)')
plt.title(f'Last {len(trades):,} aggTrades: price and direction')
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%H:%M:%S'))
plt.gca().yaxis.set_major_formatter(
    mticker.FuncFormatter(lambda y, _: f'${y:,.0f}')
)
plt.legend(fontsize=9)
plt.tick_params(length=0)
plt.tight_layout()
plt.show()

clip_qty = trades['qty'].quantile(0.99)

plt.figure(figsize=(8, 4.5))
plt.hist(
    trades['qty'].clip(upper=clip_qty),
    bins=40,
    color=FL_BLUE,
    alpha=0.85,
    edgecolor='none'
)
plt.xlabel('Trade size (BTC, clipped at 99th percentile)')
plt.ylabel('Frequency')
plt.title('Aggregated trade size distribution')
plt.tick_params(length=0)
plt.tight_layout()
plt.show()

buy_vol = float(buy_trades['qty'].sum())
sell_vol = float(sell_trades['qty'].sum())

plt.figure(figsize=(8, 4.5))
plt.bar(
    ['Buy', 'Sell'],
    [buy_vol, sell_vol],
    color=[FL_GREEN, FL_RED],
    alpha=0.85,
    width=0.45
)
plt.ylabel('Total volume (BTC)')
plt.title('Aggressor volume: buy vs sell')
plt.tick_params(length=0)

for i, v in enumerate([buy_vol, sell_vol]):
    plt.text(
        i,
        v + max(buy_vol, sell_vol) * 0.01,
        f'{v:.3f}',
        ha='center',
        fontsize=10,
        color=FL_TEXT2
    )

plt.tight_layout()
plt.show()

trade_summary = pd.DataFrame([
    {
        'Metric': 'Trades',
        'Value': f'{len(trades):,}'
    },
    {
        'Metric': 'Total volume',
        'Value': f'{trades["qty"].sum():.4f} BTC'
    },
    {
        'Metric': 'Total dollar value',
        'Value': f'${trades["dollar_value"].sum():,.0f}'
    },
    {
        'Metric': 'Buy aggressor trades',
        'Value': f'{(~trades["is_buyer_maker"]).sum():,} ({(~trades["is_buyer_maker"]).mean():.1%})'
    },
    {
        'Metric': 'Sell aggressor trades',
        'Value': f'{trades["is_buyer_maker"].sum():,} ({trades["is_buyer_maker"].mean():.1%})'
    },
    {
        'Metric': 'Median trade size',
        'Value': f'{trades["qty"].median():.4f} BTC'
    },
    {
        'Metric': 'Mean trade size',
        'Value': f'{trades["qty"].mean():.4f} BTC'
    }
])

display(trade_summary)

	Metric	Value
0	Trades	1,000
1	Total volume	8.3586 BTC
2	Total dollar value	$640,941
3	Buy aggressor trades	801 (80.1%)
4	Sell aggressor trades	199 (19.9%)
5	Median trade size	0.0008 BTC
6	Mean trade size	0.0084 BTC

Spread distribution and polling latency

Spread distribution in ticks alongside the inter-poll gap histogram.

In [31]:

spread_ticks = book['spread_ticks'].dropna().round().astype(int)
spread_counts = spread_ticks.value_counts().sort_index().head(15)
poll_gaps = book['local_gap_ms'].dropna()

plt.figure(figsize=(8, 4.5))
plt.bar(
    spread_counts.index,
    spread_counts.values,
    color=FL_BLUE,
    alpha=0.85,
    width=0.6
)

for x, y in zip(spread_counts.index, spread_counts.values):
    plt.text(
        x,
        y + spread_counts.max() * 0.01,
        f'{y / len(book):.0%}',
        ha='center',
        fontsize=9,
        color=FL_TEXT2
    )

plt.xlabel('Spread (ticks)')
plt.ylabel('Snapshot count')
plt.title('Bid ask spread distribution')
plt.tick_params(length=0)
plt.tight_layout()
plt.show()

plt.figure(figsize=(8, 4.5))
plt.hist(
    poll_gaps.clip(0, 300),
    bins=50,
    color=FL_SLATE,
    alpha=0.85,
    edgecolor='none'
)
plt.axvline(100, color=FL_AMBER, linewidth=1.5, linestyle='--', label='Target 100ms')
plt.axvline(
    poll_gaps.median(),
    color=FL_BLUE,
    linewidth=1.5,
    linestyle='--',
    label=f'Actual median {poll_gaps.median():.0f}ms'
)
plt.xlabel('Inter-poll gap (ms)')
plt.ylabel('Frequency')
plt.title('REST polling interval distribution')
plt.legend(fontsize=9)
plt.tick_params(length=0)
plt.tight_layout()
plt.show()

spread_gap_summary = pd.DataFrame([
    {
        'Metric': 'Most common spread',
        'Value': f'{spread_counts.idxmax()} tick(s)'
    },
    {
        'Metric': 'Share at most common spread',
        'Value': f'{spread_counts.max() / len(book):.1%}'
    },
    {
        'Metric': 'Median poll gap',
        'Value': f'{poll_gaps.median():.1f} ms'
    },
    {
        'Metric': 'p95 poll gap',
        'Value': f'{poll_gaps.quantile(0.95):.1f} ms'
    },
    {
        'Metric': 'Max poll gap',
        'Value': f'{poll_gaps.max():.1f} ms'
    }
])

display(spread_gap_summary)

	Metric	Value
0	Most common spread	10 tick(s)
1	Share at most common spread	100.0%
2	Median poll gap	307.2 ms
3	p95 poll gap	327.2 ms
4	Max poll gap	824.2 ms