Ctrl K

Binance API - BTCUSDT Limit Order Book EDA

This notebook collects live BTCUSDT order book and trade data directly from Binance's public REST API. No API key or authentication required for this pipeline. We further investigate the acquired data with an exploratory analysis covering data quality, mid-price dynamics, order book depth, trade flow, and a live LOB snapshot.

The REST depth endpoint returns the same 10-level bid/ask structure that the production WebSocket collector (@depth10@100ms) delivers. Every field name here matches the WebSocket stream schema exactly so this notebook doubles as field-level documentation for the production collector output.

Endpoints used (public, no auth required):

  • https://api.binance.com/api/v3/depth?symbol=BTCUSDT&limit=10
  • https://api.binance.com/api/v3/aggTrades?symbol=BTCUSDT&limit=1000
Field REST response WebSocket stream
bids / asks ✓ [[price, qty], ...] ✓ same
lastUpdateId ✓ ✓
E (event_time ms) - not present ✓
T (transaction_time ms) - not present ✓
local_ts added by notebook added by collector
e (event type) - 'depthUpdate'
Parameter Value
Symbol BTCUSDT (Bitcoin / USDT spot)
Collection window 2 minutes
Poll interval 100 ms
LOB depth 10 bid + 10 ask levels
Tick size $0.01
Trades Last 1,000 aggregated trades

References

  • Binance REST API - depth endpoint
  • Binance API documentation
  • WebSocket streams documentation
In [12]:
import time
import json
import requests
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.ticker as mticker
import warnings
warnings.filterwarnings('ignore')

from datetime import datetime, timezone
from collections import Counter
from IPython.display import display, HTML

FL_BLUE   = '#2563eb'
FL_SLATE  = '#64748b'
FL_AMBER  = '#f59e0b'
FL_GREEN  = '#16a34a'
FL_RED    = '#ef4444'
FL_BG     = '#ffffff'
FL_GRID   = '#e2e8f0'
FL_TEXT   = '#0f172a'
FL_TEXT2  = '#334155'
FL_BORDER = '#e2e8f0'

matplotlib.rcParams.update({
    'figure.facecolor':  FL_BG,  'axes.facecolor':    FL_BG,
    'axes.edgecolor':    FL_BORDER, 'axes.labelcolor': FL_TEXT2,
    'axes.spines.top':   False,  'axes.spines.right': False,
    'axes.grid':         True,   'grid.color':        FL_GRID,
    'grid.linewidth':    0.7,    'xtick.color':       FL_TEXT2,
    'ytick.color':       FL_TEXT2, 'xtick.labelsize':  10,
    'ytick.labelsize':   10,     'axes.labelsize':    11,
    'axes.titlesize':    12,     'axes.titlecolor':   FL_TEXT,
    'axes.titlepad':     12,     'legend.frameon':    False,
    'legend.fontsize':   10,     'figure.dpi':        120,
    'savefig.bbox':      'tight', 'font.family':      'sans-serif',
    'font.sans-serif':   ['Inter', 'Helvetica Neue', 'Arial', 'DejaVu Sans'],
})

SYMBOL        = 'BTCUSDT'
N_LEVELS      = 10
POLL_INTERVAL = 0.10        # seconds
COLLECT_SECS  = 60*15       # data collection window (seconds)
TICK_SIZE     = 0.001       # BTCUSDT minimum price increment
BASE_URL      = 'https://api.binance.com/api/v3'

def safe_float(x):
    try: return float(x)
    except: return None

def safe_int(x):
    try: return int(x)
    except: return None

def ts_to_utc(ts, is_ms=False):
    """Convert Unix timestamp (seconds or ms) to UTC datetime string."""
    if ts is None: return None
    try:
        t = float(ts) / 1000 if is_ms else float(ts)
        return datetime.fromtimestamp(t, tz=timezone.utc).isoformat()
    except: return None

def detect_message_type(msg):
    if not isinstance(msg, dict): return 'unknown'
    if 'bids' in msg and 'asks' in msg: return 'depth'
    if msg.get('e') == 'aggTrade':      return 'aggTrade'
    if 'result' in msg and 'id' in msg: return 'subscribe_ack'
    return 'unknown'

def flatten_depth(msg, local_ts):
    """
    Flatten one depth message into a single dict.
    Field names match the production WebSocket stream schema exactly:
      E = event_time (ms),  T = transaction_time (ms),
      bids/asks = [[price_str, qty_str], ...]
    REST responses omit E and T; local_ts fills that role here.
    """
    bids = msg.get('bids', [])
    asks = msg.get('asks', [])
    row = {
        'lastUpdateId':     safe_int(msg.get('lastUpdateId')),
        'event_time':       msg.get('E'),       # None for REST; present in WS
        'transaction_time': msg.get('T'),       # None for REST; present in WS
        'local_ts':         local_ts,
    }
    for lvl in range(N_LEVELS):
        row[f'bid_px_{lvl+1}']  = safe_float(bids[lvl][0]) if lvl < len(bids) else None
        row[f'bid_qty_{lvl+1}'] = safe_float(bids[lvl][1]) if lvl < len(bids) else None
        row[f'ask_px_{lvl+1}']  = safe_float(asks[lvl][0]) if lvl < len(asks) else None
        row[f'ask_qty_{lvl+1}'] = safe_float(asks[lvl][1]) if lvl < len(asks) else None
    best_bid = row['bid_px_1']
    best_ask = row['ask_px_1']
    row['mid_price'] = (best_bid + best_ask) / 2 if best_bid and best_ask else None
    row['spread']    = best_ask - best_bid       if best_bid and best_ask else None
    return row

def flatten_trade(msg, local_ts):
    """Flatten one aggTrade message. Field names match WebSocket stream schema."""
    return {
        'agg_id':           safe_int(msg.get('a')),
        'price':            safe_float(msg.get('p')),
        'qty':              safe_float(msg.get('q')),
        'first_trade_id':   safe_int(msg.get('f')),
        'last_trade_id':    safe_int(msg.get('l')),
        'trade_time':       safe_int(msg.get('T')),    # ms
        'event_time':       safe_int(msg.get('E')),    # ms
        'is_buyer_maker':   msg.get('m'),              # True = sell aggressor
        'symbol':           msg.get('s', SYMBOL),
        'local_ts':         local_ts,
    }

def render_html_table(df, col_fmts=None):
    col_fmts = col_fmts or {}
    th = ('padding:8px 14px;text-align:left;font-size:11px;font-weight:600;'
          'color:#64748b;letter-spacing:0.04em;text-transform:uppercase;'
          'border-bottom:1px solid #e2e8f0;background:#f8fafc;')
    td_b = 'padding:8px 14px;font-size:13px;color:#334155;border-bottom:1px solid #f1f5f9;'
    td_m = td_b + 'font-family:ui-monospace,Menlo,monospace;white-space:nowrap;'
    num_cols = set(df.select_dtypes(include='number').columns)
    html = ['<div style="overflow-x:auto;margin:4px 0 16px;">',
            '<table style="border-collapse:collapse;width:100%;'
            'font-family:-apple-system,BlinkMacSystemFont,Inter,sans-serif;">',
            '<thead><tr>']
    for col in df.columns:
        html.append(f'<th style="{th}">{col}</th>')
    html.append('</tr></thead><tbody>')
    for i, (_, row) in enumerate(df.iterrows()):
        bg = '#ffffff' if i % 2 == 0 else '#f8fafc'
        html.append(f'<tr style="background:{bg}">')
        for col in df.columns:
            val = row[col]
            val_str = col_fmts[col](val) if col in col_fmts else (
                f'{val:,.4f}' if isinstance(val, float) else
                f'{val:,}'    if isinstance(val, (int, np.integer)) else str(val))
            html.append(f'<td style="{td_m if col in num_cols else td_b}">{val_str}</td>')
        html.append('</tr>')
    html.append('</tbody></table></div>')
    return ''.join(html)

print(f'Setup complete.')
print(f'Target: {SYMBOL} | {COLLECT_SECS}s window | {POLL_INTERVAL*1000:.0f}ms poll | ~{int(COLLECT_SECS/POLL_INTERVAL):,} expected snapshots')
Setup complete.
Target: BTCUSDT | 900s window | 100ms poll | ~9,000 expected snapshots

Data collection

The collector polls GET /api/v3/depth every 100ms for a short time, appending each response to depth_rows. After the window closes, GET /api/v3/aggTrades fetches the last 1,000 aggregated trades.

Each raw response is classified with detect_message_type() and flattened with flatten_depth(). local_ts (Unix seconds, local clock) is used as the primary timestamp instead.

In [13]:
session = requests.Session()

def fetch_depth():
    r = session.get(f'{BASE_URL}/depth',
                    params={'symbol': SYMBOL, 'limit': N_LEVELS}, timeout=5)
    r.raise_for_status()
    return r.json()

def fetch_agg_trades(limit=1000):
    r = session.get(f'{BASE_URL}/aggTrades',
                    params={'symbol': SYMBOL, 'limit': limit}, timeout=5)
    r.raise_for_status()
    return r.json()


print(f'Collecting {COLLECT_SECS}s of {SYMBOL} order book snapshots...')
t_start    = time.time()
depth_rows = []
type_counts = Counter()
errors     = 0

while time.time() - t_start < COLLECT_SECS:
    t_poll = time.time()
    try:
        raw = fetch_depth()
        msg_type = detect_message_type(raw)
        type_counts[msg_type] += 1
        if msg_type == 'depth':
            depth_rows.append(flatten_depth(raw, local_ts=t_poll))
    except Exception as exc:
        errors += 1
        type_counts['error'] += 1
    elapsed  = time.time() - t_poll
    time.sleep(max(0, POLL_INTERVAL - elapsed))
    if len(depth_rows) % 300 == 0 and depth_rows:
        pct = (time.time() - t_start) / COLLECT_SECS * 100
        print(f'  {pct:4.0f}%  {len(depth_rows):,} depth snapshots  errors={errors}')


print('Fetching recent aggregated trades...')
raw_trades  = fetch_agg_trades(limit=1000)
trade_rows  = [flatten_trade(t, local_ts=time.time()) for t in raw_trades]

print(f'\nCollection complete.')
print(f'Message types seen: {dict(type_counts)}')
print(f'Depth snapshots:    {len(depth_rows):,}')
print(f'Trades fetched:     {len(trade_rows):,}')
print(f'Errors:             {errors}')
Collecting 900s of BTCUSDT order book snapshots...
    11%  300 depth snapshots  errors=0
    21%  600 depth snapshots  errors=0
    32%  900 depth snapshots  errors=0
    43%  1,200 depth snapshots  errors=0
    53%  1,500 depth snapshots  errors=0
    64%  1,800 depth snapshots  errors=0
    75%  2,100 depth snapshots  errors=0
    85%  2,400 depth snapshots  errors=0
    96%  2,700 depth snapshots  errors=0
Fetching recent aggregated trades...

Collection complete.
Message types seen: {'depth': 2809}
Depth snapshots:    2,809
Trades fetched:     1,000
Errors:             0

Building the dataset

Raw rows are assembled into typed DataFrames. Derived columns follow the same naming convention as the production data prep pipeline:

  • event_gap_ms - milliseconds between consecutive event timestamps
  • local_gap_ms - milliseconds between consecutive local poll timestamps
  • update_id_gap - lastUpdateId delta (how many book updates occurred between polls)
  • mid_price_change - absolute mid-price change per snapshot
  • mid_price_return - percentage mid-price change per snapshot
  • spread_ticks - spread in minimum tick units ($0.01)
In [23]:
book = pd.DataFrame(depth_rows)

for col in ['lastUpdateId', 'event_time', 'transaction_time']:
    book[col] = pd.to_numeric(book[col], errors='coerce')

for lvl in range(1, N_LEVELS + 1):
    for pfx in ['bid_px_', 'bid_qty_', 'ask_px_', 'ask_qty_']:
        book[f'{pfx}{lvl}'] = pd.to_numeric(book[f'{pfx}{lvl}'], errors='coerce')

book['local_dt'] = pd.to_datetime(book['local_ts'], unit='s', utc=True)
book['ts'] = book['local_dt']
book['elapsed_s'] = book['local_ts'] - book['local_ts'].iloc[0]
book['local_utc'] = book['local_ts'].apply(lambda x: ts_to_utc(x, is_ms=False))

book['local_gap_ms'] = book['local_ts'].diff() * 1000
book['update_id_gap'] = book['lastUpdateId'].diff()
book['mid_price_change'] = book['mid_price'].diff()
book['mid_price_return'] = book['mid_price'].pct_change()
book['spread_ticks'] = (book['spread'] / TICK_SIZE).round()

trades = pd.DataFrame(trade_rows)
trades['trade_dt'] = pd.to_datetime(trades['trade_time'], unit='ms', utc=True)
trades['trade_utc'] = trades['trade_time'].apply(lambda x: ts_to_utc(x, is_ms=True))
trades['dollar_value'] = trades['price'] * trades['qty']

print('Collection summary')
print(f'Book rows:      {len(book):,}')
print(f'Trade rows:     {len(trades):,}')
print(f'Book columns:   {book.shape[1]}')
print(f'Trade columns:  {trades.shape[1]}')
print(f'Time range:     {book["local_utc"].iloc[0]} to {book["local_utc"].iloc[-1]}')
print(f'Duration:       {book["elapsed_s"].max():.1f}s')

book_cols_df = pd.DataFrame({
    'book_column': book.columns,
    'dtype': [str(book[c].dtype) for c in book.columns]
})

trade_cols_df = pd.DataFrame({
    'trade_column': trades.columns,
    'dtype': [str(trades[c].dtype) for c in trades.columns]
})

print('\nBook columns')
display(book_cols_df.head())

print('Trade columns')
display(trade_cols_df.head())

print('Book preview')
display(book.head())

print('Trades preview')
display(trades.head())
Collection summary
Book rows:      2,809
Trade rows:     1,000
Book columns:   55
Trade columns:  13
Time range:     2026-05-24T16:44:16.123758+00:00 to 2026-05-24T16:59:16.118159+00:00
Duration:       900.0s

Book columns
book_column dtype
0 lastUpdateId int64
1 event_time float64
2 transaction_time float64
3 local_ts float64
4 bid_px_1 float64
Trade columns
trade_column dtype
0 agg_id int64
1 price float64
2 qty float64
3 first_trade_id int64
4 last_trade_id int64
Book preview
lastUpdateId event_time transaction_time local_ts bid_px_1 bid_qty_1 ask_px_1 ask_qty_1 bid_px_2 bid_qty_2 ... spread local_dt ts elapsed_s local_utc local_gap_ms update_id_gap mid_price_change mid_price_return spread_ticks
0 94188948025 NaN NaN 1.779641e+09 76499.61 2.27896 76499.62 1.73519 76499.6 0.00084 ... 0.01 2026-05-24 16:44:16.123758316+00:00 2026-05-24 16:44:16.123758316+00:00 0.000000 2026-05-24T16:44:16.123758+00:00 NaN NaN NaN NaN 10.0
1 94188948066 NaN NaN 1.779641e+09 76499.61 2.27896 76499.62 1.73519 76499.6 0.00084 ... 0.01 2026-05-24 16:44:16.734374285+00:00 2026-05-24 16:44:16.734374285+00:00 0.610616 2026-05-24T16:44:16.734374+00:00 610.615969 41.0 0.0 0.0 10.0
2 94188948084 NaN NaN 1.779641e+09 76499.61 2.27896 76499.62 1.73519 76499.6 0.00084 ... 0.01 2026-05-24 16:44:17.041452646+00:00 2026-05-24 16:44:17.041452646+00:00 0.917694 2026-05-24T16:44:17.041453+00:00 307.078362 18.0 0.0 0.0 10.0
3 94188948107 NaN NaN 1.779641e+09 76499.61 2.27903 76499.62 1.73519 76499.6 0.00084 ... 0.01 2026-05-24 16:44:17.348654509+00:00 2026-05-24 16:44:17.348654509+00:00 1.224896 2026-05-24T16:44:17.348655+00:00 307.201862 23.0 0.0 0.0 10.0
4 94188948144 NaN NaN 1.779641e+09 76499.61 2.27872 76499.62 1.73508 76499.6 0.00084 ... 0.01 2026-05-24 16:44:17.651305199+00:00 2026-05-24 16:44:17.651305199+00:00 1.527547 2026-05-24T16:44:17.651305+00:00 302.650690 37.0 0.0 0.0 10.0

5 rows × 55 columns

Trades preview
agg_id price qty first_trade_id last_trade_id trade_time event_time is_buyer_maker symbol local_ts trade_dt trade_utc dollar_value
0 3963778223 76639.52 0.00014 6318962293 6318962294 1779641839373 None False BTCUSDT 1.779642e+09 2026-05-24 16:57:19.373000+00:00 2026-05-24T16:57:19.373000+00:00 10.729533
1 3963778224 76639.77 0.00028 6318962295 6318962298 1779641839373 None False BTCUSDT 1.779642e+09 2026-05-24 16:57:19.373000+00:00 2026-05-24T16:57:19.373000+00:00 21.459136
2 3963778225 76639.87 0.00008 6318962299 6318962299 1779641839373 None False BTCUSDT 1.779642e+09 2026-05-24 16:57:19.373000+00:00 2026-05-24T16:57:19.373000+00:00 6.131190
3 3963778226 76639.88 0.00028 6318962300 6318962303 1779641839373 None False BTCUSDT 1.779642e+09 2026-05-24 16:57:19.373000+00:00 2026-05-24T16:57:19.373000+00:00 21.459166
4 3963778227 76639.99 0.00021 6318962304 6318962306 1779641839373 None False BTCUSDT 1.779642e+09 2026-05-24 16:57:19.373000+00:00 2026-05-24T16:57:19.373000+00:00 16.094398

Data quality checks

Before any analysis, verify the dataset is structurally sound: correct depth level counts, no null prices, expected polling cadence.

In [24]:
null_checks = pd.DataFrame([{
    'null_mid_price': int(book['mid_price'].isna().sum()),
    'null_spread': int(book['spread'].isna().sum()),
    'null_lastUpdateId': int(book['lastUpdateId'].isna().sum()),
    'null_bid_px_1': int(book['bid_px_1'].isna().sum()),
    'null_ask_px_1': int(book['ask_px_1'].isna().sum()),
}]).T.rename(columns={0: 'Count'})
null_checks.index.name = 'Column'
null_checks = null_checks.reset_index()

bid_level_counts = [
    sum(1 for lvl in range(1, N_LEVELS + 1) if not pd.isna(book.iloc[i][f'bid_px_{lvl}']))
    for i in range(min(500, len(book)))
]

ask_level_counts = [
    sum(1 for lvl in range(1, N_LEVELS + 1) if not pd.isna(book.iloc[i][f'ask_px_{lvl}']))
    for i in range(min(500, len(book)))
]

quality = pd.DataFrame([
    {
        'Check': 'depth snapshots collected',
        'Value': str(len(book)),
        'Status': 'ok' if len(book) > 500 else 'low'
    },
    {
        'Check': 'trades fetched',
        'Value': str(len(trades)),
        'Status': 'ok' if len(trades) > 0 else 'missing'
    },
    {
        'Check': 'null mid_price',
        'Value': str(int(book['mid_price'].isna().sum())),
        'Status': 'ok' if book['mid_price'].isna().sum() == 0 else 'warning'
    },
    {
        'Check': 'min bid levels sample 500',
        'Value': str(min(bid_level_counts)),
        'Status': 'ok' if min(bid_level_counts) == N_LEVELS else 'warning'
    },
    {
        'Check': 'min ask levels sample 500',
        'Value': str(min(ask_level_counts)),
        'Status': 'ok' if min(ask_level_counts) == N_LEVELS else 'warning'
    },
    {
        'Check': 'median poll gap',
        'Value': f"{book['local_gap_ms'].median():.1f} ms",
        'Status': 'ok' if book['local_gap_ms'].median() < 200 else 'warning'
    },
    {
        'Check': 'positive spread throughout',
        'Value': str((book['spread'] > 0).all()),
        'Status': 'ok' if (book['spread'] > 0).all() else 'warning'
    }
])

print('Quality checks')
display(quality)

print('Null checks')
display(null_checks)
Quality checks
Check Value Status
0 depth snapshots collected 2809 ok
1 trades fetched 1000 ok
2 null mid_price 0 ok
3 min bid levels sample 500 10 ok
4 min ask levels sample 500 10 ok
5 median poll gap 307.2 ms warning
6 positive spread throughout True ok
Null checks
Column Count
0 null_mid_price 0
1 null_spread 0
2 null_lastUpdateId 0
3 null_bid_px_1 0
4 null_ask_px_1 0

Dataset overview

Key statistics for the collected window.

In [25]:
overview = pd.DataFrame([
    {
        'Metric': 'Depth snapshots',
        'Value': f'{len(book):,}'
    },
    {
        'Metric': 'Duration (s)',
        'Value': f'{book["elapsed_s"].max():.1f}'
    },
    {
        'Metric': 'Median poll gap (ms)',
        'Value': f'{book["local_gap_ms"].median():.1f}'
    },
    {
        'Metric': 'p95 poll gap (ms)',
        'Value': f'{book["local_gap_ms"].quantile(0.95):.1f}'
    },
    {
        'Metric': 'Mid-price range ($)',
        'Value': f'${book["mid_price"].min():,.2f} to ${book["mid_price"].max():,.2f}'
    },
    {
        'Metric': 'Price range ($)',
        'Value': f'${book["mid_price"].max() - book["mid_price"].min():.2f}'
    },
    {
        'Metric': 'Median spread ($)',
        'Value': f'${book["spread"].median():.4f}'
    },
    {
        'Metric': 'Median spread (ticks)',
        'Value': f'{book["spread_ticks"].median():.0f}'
    },
    {
        'Metric': 'Median update_id gap',
        'Value': f'{book["update_id_gap"].median():.0f}'
    },
    {
        'Metric': 'Trades fetched',
        'Value': f'{len(trades):,}'
    },
    {
        'Metric': 'Collection start (UTC)',
        'Value': book['local_utc'].iloc[0]
    },
    {
        'Metric': 'Collection end (UTC)',
        'Value': book['local_utc'].iloc[-1]
    }
])

display(overview)
Metric Value
0 Depth snapshots 2,809
1 Duration (s) 900.0
2 Median poll gap (ms) 307.2
3 p95 poll gap (ms) 327.2
4 Mid-price range ($) $76,499.61 to $76,699.99
5 Price range ($) $200.38
6 Median spread ($) $0.0100
7 Median spread (ticks) 10
8 Median update_id gap 48
9 Trades fetched 1,000
10 Collection start (UTC) 2026-05-24T16:44:16.123758+00:00
11 Collection end (UTC) 2026-05-24T16:59:16.118159+00:00

Mid-price, spread, and order book activity

Three panels over the full collection window:

  • Mid-price - (best_bid + best_ask) / 2 per snapshot
  • Spread in ticks - (ask_px_1 − bid_px_1) / 0.01
  • Update ID gap - how many book state changes occurred between polls, spikes indicate high order flow activity
In [26]:
plt.figure(figsize=(8, 4.5))
plt.plot(book['ts'], book['mid_price'], color=FL_BLUE, linewidth=1.2)
plt.ylabel('Mid-price (USDT)')
plt.title(f'BTCUSDT mid-price over {book["elapsed_s"].max():.0f}s ({len(book):,} snapshots)')
plt.gca().yaxis.set_major_formatter(
    mticker.FuncFormatter(lambda y, _: f'${y:,.0f}')
)
plt.tick_params(length=0)
plt.tight_layout()
plt.show()

plt.figure(figsize=(8, 4.5))
plt.fill_between(book['ts'], book['spread_ticks'], alpha=0.25, color=FL_AMBER)
plt.plot(book['ts'], book['spread_ticks'], color=FL_AMBER, linewidth=0.8)
plt.ylabel('Spread (ticks)')
plt.title('Bid ask spread in ticks ($0.01 each)')
plt.tick_params(length=0)
plt.tight_layout()
plt.show()

gap_clipped = book['update_id_gap'].clip(upper=book['update_id_gap'].quantile(0.99))

plt.figure(figsize=(8, 4.5))
plt.fill_between(book['ts'], gap_clipped, alpha=0.2, color=FL_SLATE)
plt.plot(book['ts'], gap_clipped, color=FL_SLATE, linewidth=0.8)
plt.ylabel('Update ID gap')
plt.title('Order book changes between consecutive polls (clipped at 99th percentile)')
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%H:%M:%S'))
plt.tick_params(length=0)
plt.tight_layout()
plt.show()
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image

Order book depth profile

Average resting volume at each of the 10 price levels across all snapshots. Level 1 (best price) is the most contested. Deeper levels accumulate more volume as participants quote progressively wider spreads from the mid-price.

In [27]:
bid_avg = [book[f'bid_qty_{l}'].mean() for l in range(1, N_LEVELS + 1)]
ask_avg = [book[f'ask_qty_{l}'].mean() for l in range(1, N_LEVELS + 1)]

cum_bid = np.cumsum(bid_avg)
cum_ask = np.cumsum(ask_avg)
levels = np.arange(1, N_LEVELS + 1)

w = 0.35

plt.figure(figsize=(8, 4.5))
plt.bar(levels - w / 2, bid_avg, width=w, color=FL_GREEN, alpha=0.85, label='Bid')
plt.bar(levels + w / 2, ask_avg, width=w, color=FL_RED, alpha=0.85, label='Ask')
plt.xlabel('Level')
plt.ylabel('Average BTC quantity')
plt.title('Average resting volume per level')
plt.xticks(levels)
plt.legend()
plt.tick_params(length=0)
plt.tight_layout()
plt.show()

plt.figure(figsize=(8, 4.5))
plt.fill_between(levels, cum_bid, alpha=0.15, color=FL_GREEN)
plt.plot(
    levels,
    cum_bid,
    color=FL_GREEN,
    marker='o',
    markersize=5,
    linewidth=1.6,
    label='Bid cumulative'
)
plt.fill_between(levels, cum_ask, alpha=0.15, color=FL_RED)
plt.plot(
    levels,
    cum_ask,
    color=FL_RED,
    marker='o',
    markersize=5,
    linewidth=1.6,
    label='Ask cumulative'
)
plt.xlabel('Level')
plt.ylabel('Cumulative BTC quantity')
plt.title('Cumulative depth across 10 levels')
plt.xticks(levels)
plt.legend()
plt.tick_params(length=0)
plt.tight_layout()
plt.show()

depth_levels_df = pd.DataFrame({
    'Level': levels,
    'Bid avg': bid_avg,
    'Ask avg': ask_avg,
    'Cum bid': cum_bid,
    'Cum ask': cum_ask
})

depth_levels_df[['Bid avg', 'Ask avg', 'Cum bid', 'Cum ask']] = (
    depth_levels_df[['Bid avg', 'Ask avg', 'Cum bid', 'Cum ask']].round(4)
)

display(depth_levels_df)
No description has been provided for this image
No description has been provided for this image
Level Bid avg Ask avg Cum bid Cum ask
0 1 3.0854 2.0412 3.0854 2.0412
1 2 0.0244 0.1564 3.1098 2.1977
2 3 0.0031 0.0736 3.1129 2.2712
3 4 0.0115 0.0165 3.1245 2.2877
4 5 0.0384 0.0686 3.1629 2.3564
5 6 0.0370 0.0568 3.1999 2.4132
6 7 0.0598 0.0568 3.2597 2.4700
7 8 0.0689 0.0635 3.3286 2.5335
8 9 0.0638 0.0862 3.3925 2.6196
9 10 0.0692 0.0778 3.4617 2.6974

LOB snapshot - single instant

A single order book snapshot at the midpoint of the collection window. Each horizontal bar represents one price level as green bars are resting buy orders (bid side), red bars are resting sell orders (ask side). The gap between the innermost bars is the bid-ask spread.

In [28]:
snap_idx = len(book) // 2
snap = book.iloc[snap_idx]

bid_px = [snap[f'bid_px_{l}'] for l in range(1, N_LEVELS + 1)]
bid_qty = [snap[f'bid_qty_{l}'] for l in range(1, N_LEVELS + 1)]
ask_px = [snap[f'ask_px_{l}'] for l in range(1, N_LEVELS + 1)]
ask_qty = [snap[f'ask_qty_{l}'] for l in range(1, N_LEVELS + 1)]

bid_data = [(p, q) for p, q in zip(bid_px, bid_qty) if pd.notna(p) and pd.notna(q) and q > 0]
ask_data = [(p, q) for p, q in zip(ask_px, ask_qty) if pd.notna(p) and pd.notna(q) and q > 0]

snap_time = snap['local_dt'].strftime('%H:%M:%S UTC') if hasattr(snap['local_dt'], 'strftime') else ''

all_px = [p for p, _ in bid_data + ask_data]
pr = max(all_px) - min(all_px) if len(all_px) > 1 else 1
bar_h = pr / max(len(bid_data + ask_data) * 1.8, 1)

plt.figure(figsize=(8, 4.5))
if bid_data:
    bp, bq = zip(*bid_data)
    plt.barh(bp, [-q for q in bq], height=bar_h, color=FL_GREEN, alpha=0.85, label='Bid')

if ask_data:
    ap, aq = zip(*ask_data)
    plt.barh(ap, list(aq), height=bar_h, color=FL_RED, alpha=0.85, label='Ask')

plt.axvline(0, color=FL_BORDER, linewidth=1)
plt.xlabel('Volume (BTC)  |  left = bid, right = ask')
plt.ylabel('Price (USDT)')
plt.title(f'BTCUSDT limit order book snapshot at {snap_time}')
plt.gca().yaxis.set_major_formatter(
    mticker.FuncFormatter(lambda y, _: f'${y:,.2f}')
)
plt.gca().xaxis.set_major_formatter(
    mticker.FuncFormatter(lambda x, _: f'{abs(x):.3f}')
)
plt.legend()
plt.tick_params(length=0)
plt.tight_layout()
plt.show()

if bid_data and ask_data:
    mid = (bid_data[0][0] + ask_data[0][0]) / 2
    spr = ask_data[0][0] - bid_data[0][0]

    snapshot_summary = pd.DataFrame([
        {'Metric': 'Snapshot time', 'Value': snap_time},
        {'Metric': 'Best bid', 'Value': f'${bid_data[0][0]:,.2f} ({bid_data[0][1]:.4f} BTC)'},
        {'Metric': 'Best ask', 'Value': f'${ask_data[0][0]:,.2f} ({ask_data[0][1]:.4f} BTC)'},
        {'Metric': 'Mid-price', 'Value': f'${mid:,.2f}'},
        {'Metric': 'Spread', 'Value': f'${spr:.4f}'},
        {'Metric': 'Spread (ticks)', 'Value': f'{spr / TICK_SIZE:.0f}'}
    ])

    display(snapshot_summary)
No description has been provided for this image
Metric Value
0 Snapshot time 16:51:44 UTC
1 Best bid $76,595.99 (1.8710 BTC)
2 Best ask $76,596.00 (2.0319 BTC)
3 Mid-price $76,595.99
4 Spread $0.0100
5 Spread (ticks) 10

Mid-price change distribution

Distribution of mid_price_change between consecutive 100ms snapshots. When the mid-price does move, it typically shifts by a small number of ticks.

In [29]:
changes = book['mid_price_change'].dropna()
nonzero = changes[changes != 0]

clip_all = changes.abs().quantile(0.995)

plt.figure(figsize=(8, 4.5))
plt.hist(
    changes.clip(-clip_all, clip_all),
    bins=60,
    color=FL_BLUE,
    alpha=0.85,
    edgecolor='none'
)
plt.axvline(0, color=FL_AMBER, linewidth=1.5, linestyle='--')
plt.xlabel(f'Change ($, clipped to ±{clip_all:.2f})')
plt.ylabel('Frequency')
plt.title('Mid-price change per 100ms across all snapshots')
plt.tick_params(length=0)
plt.tight_layout()
plt.show()

if len(nonzero) > 0:
    clip_nz = nonzero.abs().quantile(0.99)

    plt.figure(figsize=(8, 4.5))
    plt.hist(
        nonzero.clip(-clip_nz, clip_nz),
        bins=40,
        color=FL_AMBER,
        alpha=0.85,
        edgecolor='none'
    )
    plt.xlabel(f'Change ($, non-zero only, clipped to ±{clip_nz:.2f})')
    plt.ylabel('Frequency')
    plt.title(f'Non-zero mid-price moves ({len(nonzero):,} of {len(changes):,}, {len(nonzero) / len(changes):.1%})')
    plt.tick_params(length=0)
    plt.tight_layout()
    plt.show()

change_summary = pd.DataFrame([
    {
        'Metric': 'Zero changes',
        'Value': f'{(changes == 0).sum():,}',
        'Share': f'{(changes == 0).mean():.1%}'
    },
    {
        'Metric': 'Up moves',
        'Value': f'{(changes > 0).sum():,}',
        'Share': f'{(changes > 0).mean():.1%}'
    },
    {
        'Metric': 'Down moves',
        'Value': f'{(changes < 0).sum():,}',
        'Share': f'{(changes < 0).mean():.1%}'
    }
])

if len(nonzero) > 0:
    change_summary = pd.concat([
        change_summary,
        pd.DataFrame([
            {
                'Metric': 'Mean non-zero change',
                'Value': f'${nonzero.mean():.4f}',
                'Share': ''
            },
            {
                'Metric': 'Std non-zero change',
                'Value': f'${nonzero.std():.4f}',
                'Share': ''
            }
        ])
    ], ignore_index=True)

display(change_summary)
No description has been provided for this image
No description has been provided for this image
Metric Value Share
0 Zero changes 2,724 97.0%
1 Up moves 63 2.2%
2 Down moves 21 0.7%
3 Mean non-zero change $2.1077
4 Std non-zero change $4.4696

Trade flow analysis

The aggTrades endpoint collapses consecutive fills at the same price and direction into single rows. The is_buyer_maker field (WebSocket field m) indicates aggressor side:

  • False - buyer initiated (market buy hitting resting ask)
  • True - seller initiated (market sell hitting resting bid)

Buy/sell imbalance over a short window is a leading indicator of short-term price direction in limit order book research.

In [30]:
buy_trades = trades[~trades['is_buyer_maker']]
sell_trades = trades[trades['is_buyer_maker']]

plt.figure(figsize=(8, 4.5))
plt.scatter(
    buy_trades['trade_dt'],
    buy_trades['price'],
    c=FL_GREEN,
    s=8,
    alpha=0.6,
    label='Buy aggressor'
)
plt.scatter(
    sell_trades['trade_dt'],
    sell_trades['price'],
    c=FL_RED,
    s=8,
    alpha=0.6,
    label='Sell aggressor'
)
plt.ylabel('Price (USDT)')
plt.title(f'Last {len(trades):,} aggTrades: price and direction')
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%H:%M:%S'))
plt.gca().yaxis.set_major_formatter(
    mticker.FuncFormatter(lambda y, _: f'${y:,.0f}')
)
plt.legend(fontsize=9)
plt.tick_params(length=0)
plt.tight_layout()
plt.show()

clip_qty = trades['qty'].quantile(0.99)

plt.figure(figsize=(8, 4.5))
plt.hist(
    trades['qty'].clip(upper=clip_qty),
    bins=40,
    color=FL_BLUE,
    alpha=0.85,
    edgecolor='none'
)
plt.xlabel('Trade size (BTC, clipped at 99th percentile)')
plt.ylabel('Frequency')
plt.title('Aggregated trade size distribution')
plt.tick_params(length=0)
plt.tight_layout()
plt.show()

buy_vol = float(buy_trades['qty'].sum())
sell_vol = float(sell_trades['qty'].sum())

plt.figure(figsize=(8, 4.5))
plt.bar(
    ['Buy', 'Sell'],
    [buy_vol, sell_vol],
    color=[FL_GREEN, FL_RED],
    alpha=0.85,
    width=0.45
)
plt.ylabel('Total volume (BTC)')
plt.title('Aggressor volume: buy vs sell')
plt.tick_params(length=0)

for i, v in enumerate([buy_vol, sell_vol]):
    plt.text(
        i,
        v + max(buy_vol, sell_vol) * 0.01,
        f'{v:.3f}',
        ha='center',
        fontsize=10,
        color=FL_TEXT2
    )

plt.tight_layout()
plt.show()

trade_summary = pd.DataFrame([
    {
        'Metric': 'Trades',
        'Value': f'{len(trades):,}'
    },
    {
        'Metric': 'Total volume',
        'Value': f'{trades["qty"].sum():.4f} BTC'
    },
    {
        'Metric': 'Total dollar value',
        'Value': f'${trades["dollar_value"].sum():,.0f}'
    },
    {
        'Metric': 'Buy aggressor trades',
        'Value': f'{(~trades["is_buyer_maker"]).sum():,} ({(~trades["is_buyer_maker"]).mean():.1%})'
    },
    {
        'Metric': 'Sell aggressor trades',
        'Value': f'{trades["is_buyer_maker"].sum():,} ({trades["is_buyer_maker"].mean():.1%})'
    },
    {
        'Metric': 'Median trade size',
        'Value': f'{trades["qty"].median():.4f} BTC'
    },
    {
        'Metric': 'Mean trade size',
        'Value': f'{trades["qty"].mean():.4f} BTC'
    }
])

display(trade_summary)
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
Metric Value
0 Trades 1,000
1 Total volume 8.3586 BTC
2 Total dollar value $640,941
3 Buy aggressor trades 801 (80.1%)
4 Sell aggressor trades 199 (19.9%)
5 Median trade size 0.0008 BTC
6 Mean trade size 0.0084 BTC

Spread distribution and polling latency

Spread distribution in ticks alongside the inter-poll gap histogram.

In [31]:
spread_ticks = book['spread_ticks'].dropna().round().astype(int)
spread_counts = spread_ticks.value_counts().sort_index().head(15)
poll_gaps = book['local_gap_ms'].dropna()

plt.figure(figsize=(8, 4.5))
plt.bar(
    spread_counts.index,
    spread_counts.values,
    color=FL_BLUE,
    alpha=0.85,
    width=0.6
)

for x, y in zip(spread_counts.index, spread_counts.values):
    plt.text(
        x,
        y + spread_counts.max() * 0.01,
        f'{y / len(book):.0%}',
        ha='center',
        fontsize=9,
        color=FL_TEXT2
    )

plt.xlabel('Spread (ticks)')
plt.ylabel('Snapshot count')
plt.title('Bid ask spread distribution')
plt.tick_params(length=0)
plt.tight_layout()
plt.show()

plt.figure(figsize=(8, 4.5))
plt.hist(
    poll_gaps.clip(0, 300),
    bins=50,
    color=FL_SLATE,
    alpha=0.85,
    edgecolor='none'
)
plt.axvline(100, color=FL_AMBER, linewidth=1.5, linestyle='--', label='Target 100ms')
plt.axvline(
    poll_gaps.median(),
    color=FL_BLUE,
    linewidth=1.5,
    linestyle='--',
    label=f'Actual median {poll_gaps.median():.0f}ms'
)
plt.xlabel('Inter-poll gap (ms)')
plt.ylabel('Frequency')
plt.title('REST polling interval distribution')
plt.legend(fontsize=9)
plt.tick_params(length=0)
plt.tight_layout()
plt.show()

spread_gap_summary = pd.DataFrame([
    {
        'Metric': 'Most common spread',
        'Value': f'{spread_counts.idxmax()} tick(s)'
    },
    {
        'Metric': 'Share at most common spread',
        'Value': f'{spread_counts.max() / len(book):.1%}'
    },
    {
        'Metric': 'Median poll gap',
        'Value': f'{poll_gaps.median():.1f} ms'
    },
    {
        'Metric': 'p95 poll gap',
        'Value': f'{poll_gaps.quantile(0.95):.1f} ms'
    },
    {
        'Metric': 'Max poll gap',
        'Value': f'{poll_gaps.max():.1f} ms'
    }
])

display(spread_gap_summary)
No description has been provided for this image
No description has been provided for this image
Metric Value
0 Most common spread 10 tick(s)
1 Share at most common spread 100.0%
2 Median poll gap 307.2 ms
3 p95 poll gap 327.2 ms
4 Max poll gap 824.2 ms