Binance API - BTCUSDT Limit Order Book EDA
This notebook collects live BTCUSDT order book and trade data directly from Binance's public REST API. No API key or authentication required for this pipeline. We further investigate the acquired data with an exploratory analysis covering data quality, mid-price dynamics, order book depth, trade flow, and a live LOB snapshot.
The REST depth endpoint returns the same 10-level bid/ask structure that the production WebSocket collector (@depth10@100ms) delivers. Every field name here matches the WebSocket stream schema exactly so this notebook doubles as field-level documentation for the production collector output.
Endpoints used (public, no auth required):
- https://api.binance.com/api/v3/depth?symbol=BTCUSDT&limit=10
- https://api.binance.com/api/v3/aggTrades?symbol=BTCUSDT&limit=1000
| Field | REST response | WebSocket stream |
|---|---|---|
bids / asks |
✓ [[price, qty], ...] |
✓ same |
lastUpdateId |
✓ | ✓ |
E (event_time ms) |
- not present | ✓ |
T (transaction_time ms) |
- not present | ✓ |
local_ts |
added by notebook | added by collector |
e (event type) |
- | 'depthUpdate' |
| Parameter | Value |
|---|---|
| Symbol | BTCUSDT (Bitcoin / USDT spot) |
| Collection window | 2 minutes |
| Poll interval | 100 ms |
| LOB depth | 10 bid + 10 ask levels |
| Tick size | $0.01 |
| Trades | Last 1,000 aggregated trades |
References
import time
import json
import requests
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.ticker as mticker
import warnings
warnings.filterwarnings('ignore')
from datetime import datetime, timezone
from collections import Counter
from IPython.display import display, HTML
FL_BLUE = '#2563eb'
FL_SLATE = '#64748b'
FL_AMBER = '#f59e0b'
FL_GREEN = '#16a34a'
FL_RED = '#ef4444'
FL_BG = '#ffffff'
FL_GRID = '#e2e8f0'
FL_TEXT = '#0f172a'
FL_TEXT2 = '#334155'
FL_BORDER = '#e2e8f0'
matplotlib.rcParams.update({
'figure.facecolor': FL_BG, 'axes.facecolor': FL_BG,
'axes.edgecolor': FL_BORDER, 'axes.labelcolor': FL_TEXT2,
'axes.spines.top': False, 'axes.spines.right': False,
'axes.grid': True, 'grid.color': FL_GRID,
'grid.linewidth': 0.7, 'xtick.color': FL_TEXT2,
'ytick.color': FL_TEXT2, 'xtick.labelsize': 10,
'ytick.labelsize': 10, 'axes.labelsize': 11,
'axes.titlesize': 12, 'axes.titlecolor': FL_TEXT,
'axes.titlepad': 12, 'legend.frameon': False,
'legend.fontsize': 10, 'figure.dpi': 120,
'savefig.bbox': 'tight', 'font.family': 'sans-serif',
'font.sans-serif': ['Inter', 'Helvetica Neue', 'Arial', 'DejaVu Sans'],
})
SYMBOL = 'BTCUSDT'
N_LEVELS = 10
POLL_INTERVAL = 0.10 # seconds
COLLECT_SECS = 60*15 # data collection window (seconds)
TICK_SIZE = 0.001 # BTCUSDT minimum price increment
BASE_URL = 'https://api.binance.com/api/v3'
def safe_float(x):
try: return float(x)
except: return None
def safe_int(x):
try: return int(x)
except: return None
def ts_to_utc(ts, is_ms=False):
"""Convert Unix timestamp (seconds or ms) to UTC datetime string."""
if ts is None: return None
try:
t = float(ts) / 1000 if is_ms else float(ts)
return datetime.fromtimestamp(t, tz=timezone.utc).isoformat()
except: return None
def detect_message_type(msg):
if not isinstance(msg, dict): return 'unknown'
if 'bids' in msg and 'asks' in msg: return 'depth'
if msg.get('e') == 'aggTrade': return 'aggTrade'
if 'result' in msg and 'id' in msg: return 'subscribe_ack'
return 'unknown'
def flatten_depth(msg, local_ts):
"""
Flatten one depth message into a single dict.
Field names match the production WebSocket stream schema exactly:
E = event_time (ms), T = transaction_time (ms),
bids/asks = [[price_str, qty_str], ...]
REST responses omit E and T; local_ts fills that role here.
"""
bids = msg.get('bids', [])
asks = msg.get('asks', [])
row = {
'lastUpdateId': safe_int(msg.get('lastUpdateId')),
'event_time': msg.get('E'), # None for REST; present in WS
'transaction_time': msg.get('T'), # None for REST; present in WS
'local_ts': local_ts,
}
for lvl in range(N_LEVELS):
row[f'bid_px_{lvl+1}'] = safe_float(bids[lvl][0]) if lvl < len(bids) else None
row[f'bid_qty_{lvl+1}'] = safe_float(bids[lvl][1]) if lvl < len(bids) else None
row[f'ask_px_{lvl+1}'] = safe_float(asks[lvl][0]) if lvl < len(asks) else None
row[f'ask_qty_{lvl+1}'] = safe_float(asks[lvl][1]) if lvl < len(asks) else None
best_bid = row['bid_px_1']
best_ask = row['ask_px_1']
row['mid_price'] = (best_bid + best_ask) / 2 if best_bid and best_ask else None
row['spread'] = best_ask - best_bid if best_bid and best_ask else None
return row
def flatten_trade(msg, local_ts):
"""Flatten one aggTrade message. Field names match WebSocket stream schema."""
return {
'agg_id': safe_int(msg.get('a')),
'price': safe_float(msg.get('p')),
'qty': safe_float(msg.get('q')),
'first_trade_id': safe_int(msg.get('f')),
'last_trade_id': safe_int(msg.get('l')),
'trade_time': safe_int(msg.get('T')), # ms
'event_time': safe_int(msg.get('E')), # ms
'is_buyer_maker': msg.get('m'), # True = sell aggressor
'symbol': msg.get('s', SYMBOL),
'local_ts': local_ts,
}
def render_html_table(df, col_fmts=None):
col_fmts = col_fmts or {}
th = ('padding:8px 14px;text-align:left;font-size:11px;font-weight:600;'
'color:#64748b;letter-spacing:0.04em;text-transform:uppercase;'
'border-bottom:1px solid #e2e8f0;background:#f8fafc;')
td_b = 'padding:8px 14px;font-size:13px;color:#334155;border-bottom:1px solid #f1f5f9;'
td_m = td_b + 'font-family:ui-monospace,Menlo,monospace;white-space:nowrap;'
num_cols = set(df.select_dtypes(include='number').columns)
html = ['<div style="overflow-x:auto;margin:4px 0 16px;">',
'<table style="border-collapse:collapse;width:100%;'
'font-family:-apple-system,BlinkMacSystemFont,Inter,sans-serif;">',
'<thead><tr>']
for col in df.columns:
html.append(f'<th style="{th}">{col}</th>')
html.append('</tr></thead><tbody>')
for i, (_, row) in enumerate(df.iterrows()):
bg = '#ffffff' if i % 2 == 0 else '#f8fafc'
html.append(f'<tr style="background:{bg}">')
for col in df.columns:
val = row[col]
val_str = col_fmts[col](val) if col in col_fmts else (
f'{val:,.4f}' if isinstance(val, float) else
f'{val:,}' if isinstance(val, (int, np.integer)) else str(val))
html.append(f'<td style="{td_m if col in num_cols else td_b}">{val_str}</td>')
html.append('</tr>')
html.append('</tbody></table></div>')
return ''.join(html)
print(f'Setup complete.')
print(f'Target: {SYMBOL} | {COLLECT_SECS}s window | {POLL_INTERVAL*1000:.0f}ms poll | ~{int(COLLECT_SECS/POLL_INTERVAL):,} expected snapshots')
Setup complete. Target: BTCUSDT | 900s window | 100ms poll | ~9,000 expected snapshots
Data collection
The collector polls GET /api/v3/depth every 100ms for a short time, appending each response to depth_rows. After the window closes, GET /api/v3/aggTrades fetches the last 1,000 aggregated trades.
Each raw response is classified with detect_message_type() and flattened with flatten_depth(). local_ts (Unix seconds, local clock) is used as the primary timestamp instead.
session = requests.Session()
def fetch_depth():
r = session.get(f'{BASE_URL}/depth',
params={'symbol': SYMBOL, 'limit': N_LEVELS}, timeout=5)
r.raise_for_status()
return r.json()
def fetch_agg_trades(limit=1000):
r = session.get(f'{BASE_URL}/aggTrades',
params={'symbol': SYMBOL, 'limit': limit}, timeout=5)
r.raise_for_status()
return r.json()
print(f'Collecting {COLLECT_SECS}s of {SYMBOL} order book snapshots...')
t_start = time.time()
depth_rows = []
type_counts = Counter()
errors = 0
while time.time() - t_start < COLLECT_SECS:
t_poll = time.time()
try:
raw = fetch_depth()
msg_type = detect_message_type(raw)
type_counts[msg_type] += 1
if msg_type == 'depth':
depth_rows.append(flatten_depth(raw, local_ts=t_poll))
except Exception as exc:
errors += 1
type_counts['error'] += 1
elapsed = time.time() - t_poll
time.sleep(max(0, POLL_INTERVAL - elapsed))
if len(depth_rows) % 300 == 0 and depth_rows:
pct = (time.time() - t_start) / COLLECT_SECS * 100
print(f' {pct:4.0f}% {len(depth_rows):,} depth snapshots errors={errors}')
print('Fetching recent aggregated trades...')
raw_trades = fetch_agg_trades(limit=1000)
trade_rows = [flatten_trade(t, local_ts=time.time()) for t in raw_trades]
print(f'\nCollection complete.')
print(f'Message types seen: {dict(type_counts)}')
print(f'Depth snapshots: {len(depth_rows):,}')
print(f'Trades fetched: {len(trade_rows):,}')
print(f'Errors: {errors}')
Collecting 900s of BTCUSDT order book snapshots...
11% 300 depth snapshots errors=0
21% 600 depth snapshots errors=0
32% 900 depth snapshots errors=0
43% 1,200 depth snapshots errors=0
53% 1,500 depth snapshots errors=0
64% 1,800 depth snapshots errors=0
75% 2,100 depth snapshots errors=0
85% 2,400 depth snapshots errors=0
96% 2,700 depth snapshots errors=0
Fetching recent aggregated trades...
Collection complete.
Message types seen: {'depth': 2809}
Depth snapshots: 2,809
Trades fetched: 1,000
Errors: 0
Building the dataset
Raw rows are assembled into typed DataFrames. Derived columns follow the same naming convention as the production data prep pipeline:
event_gap_ms- milliseconds between consecutive event timestampslocal_gap_ms- milliseconds between consecutive local poll timestampsupdate_id_gap-lastUpdateIddelta (how many book updates occurred between polls)mid_price_change- absolute mid-price change per snapshotmid_price_return- percentage mid-price change per snapshotspread_ticks- spread in minimum tick units ($0.01)
book = pd.DataFrame(depth_rows)
for col in ['lastUpdateId', 'event_time', 'transaction_time']:
book[col] = pd.to_numeric(book[col], errors='coerce')
for lvl in range(1, N_LEVELS + 1):
for pfx in ['bid_px_', 'bid_qty_', 'ask_px_', 'ask_qty_']:
book[f'{pfx}{lvl}'] = pd.to_numeric(book[f'{pfx}{lvl}'], errors='coerce')
book['local_dt'] = pd.to_datetime(book['local_ts'], unit='s', utc=True)
book['ts'] = book['local_dt']
book['elapsed_s'] = book['local_ts'] - book['local_ts'].iloc[0]
book['local_utc'] = book['local_ts'].apply(lambda x: ts_to_utc(x, is_ms=False))
book['local_gap_ms'] = book['local_ts'].diff() * 1000
book['update_id_gap'] = book['lastUpdateId'].diff()
book['mid_price_change'] = book['mid_price'].diff()
book['mid_price_return'] = book['mid_price'].pct_change()
book['spread_ticks'] = (book['spread'] / TICK_SIZE).round()
trades = pd.DataFrame(trade_rows)
trades['trade_dt'] = pd.to_datetime(trades['trade_time'], unit='ms', utc=True)
trades['trade_utc'] = trades['trade_time'].apply(lambda x: ts_to_utc(x, is_ms=True))
trades['dollar_value'] = trades['price'] * trades['qty']
print('Collection summary')
print(f'Book rows: {len(book):,}')
print(f'Trade rows: {len(trades):,}')
print(f'Book columns: {book.shape[1]}')
print(f'Trade columns: {trades.shape[1]}')
print(f'Time range: {book["local_utc"].iloc[0]} to {book["local_utc"].iloc[-1]}')
print(f'Duration: {book["elapsed_s"].max():.1f}s')
book_cols_df = pd.DataFrame({
'book_column': book.columns,
'dtype': [str(book[c].dtype) for c in book.columns]
})
trade_cols_df = pd.DataFrame({
'trade_column': trades.columns,
'dtype': [str(trades[c].dtype) for c in trades.columns]
})
print('\nBook columns')
display(book_cols_df.head())
print('Trade columns')
display(trade_cols_df.head())
print('Book preview')
display(book.head())
print('Trades preview')
display(trades.head())
Collection summary Book rows: 2,809 Trade rows: 1,000 Book columns: 55 Trade columns: 13 Time range: 2026-05-24T16:44:16.123758+00:00 to 2026-05-24T16:59:16.118159+00:00 Duration: 900.0s Book columns
| book_column | dtype | |
|---|---|---|
| 0 | lastUpdateId | int64 |
| 1 | event_time | float64 |
| 2 | transaction_time | float64 |
| 3 | local_ts | float64 |
| 4 | bid_px_1 | float64 |
Trade columns
| trade_column | dtype | |
|---|---|---|
| 0 | agg_id | int64 |
| 1 | price | float64 |
| 2 | qty | float64 |
| 3 | first_trade_id | int64 |
| 4 | last_trade_id | int64 |
Book preview
| lastUpdateId | event_time | transaction_time | local_ts | bid_px_1 | bid_qty_1 | ask_px_1 | ask_qty_1 | bid_px_2 | bid_qty_2 | ... | spread | local_dt | ts | elapsed_s | local_utc | local_gap_ms | update_id_gap | mid_price_change | mid_price_return | spread_ticks | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 94188948025 | NaN | NaN | 1.779641e+09 | 76499.61 | 2.27896 | 76499.62 | 1.73519 | 76499.6 | 0.00084 | ... | 0.01 | 2026-05-24 16:44:16.123758316+00:00 | 2026-05-24 16:44:16.123758316+00:00 | 0.000000 | 2026-05-24T16:44:16.123758+00:00 | NaN | NaN | NaN | NaN | 10.0 |
| 1 | 94188948066 | NaN | NaN | 1.779641e+09 | 76499.61 | 2.27896 | 76499.62 | 1.73519 | 76499.6 | 0.00084 | ... | 0.01 | 2026-05-24 16:44:16.734374285+00:00 | 2026-05-24 16:44:16.734374285+00:00 | 0.610616 | 2026-05-24T16:44:16.734374+00:00 | 610.615969 | 41.0 | 0.0 | 0.0 | 10.0 |
| 2 | 94188948084 | NaN | NaN | 1.779641e+09 | 76499.61 | 2.27896 | 76499.62 | 1.73519 | 76499.6 | 0.00084 | ... | 0.01 | 2026-05-24 16:44:17.041452646+00:00 | 2026-05-24 16:44:17.041452646+00:00 | 0.917694 | 2026-05-24T16:44:17.041453+00:00 | 307.078362 | 18.0 | 0.0 | 0.0 | 10.0 |
| 3 | 94188948107 | NaN | NaN | 1.779641e+09 | 76499.61 | 2.27903 | 76499.62 | 1.73519 | 76499.6 | 0.00084 | ... | 0.01 | 2026-05-24 16:44:17.348654509+00:00 | 2026-05-24 16:44:17.348654509+00:00 | 1.224896 | 2026-05-24T16:44:17.348655+00:00 | 307.201862 | 23.0 | 0.0 | 0.0 | 10.0 |
| 4 | 94188948144 | NaN | NaN | 1.779641e+09 | 76499.61 | 2.27872 | 76499.62 | 1.73508 | 76499.6 | 0.00084 | ... | 0.01 | 2026-05-24 16:44:17.651305199+00:00 | 2026-05-24 16:44:17.651305199+00:00 | 1.527547 | 2026-05-24T16:44:17.651305+00:00 | 302.650690 | 37.0 | 0.0 | 0.0 | 10.0 |
5 rows × 55 columns
Trades preview
| agg_id | price | qty | first_trade_id | last_trade_id | trade_time | event_time | is_buyer_maker | symbol | local_ts | trade_dt | trade_utc | dollar_value | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 3963778223 | 76639.52 | 0.00014 | 6318962293 | 6318962294 | 1779641839373 | None | False | BTCUSDT | 1.779642e+09 | 2026-05-24 16:57:19.373000+00:00 | 2026-05-24T16:57:19.373000+00:00 | 10.729533 |
| 1 | 3963778224 | 76639.77 | 0.00028 | 6318962295 | 6318962298 | 1779641839373 | None | False | BTCUSDT | 1.779642e+09 | 2026-05-24 16:57:19.373000+00:00 | 2026-05-24T16:57:19.373000+00:00 | 21.459136 |
| 2 | 3963778225 | 76639.87 | 0.00008 | 6318962299 | 6318962299 | 1779641839373 | None | False | BTCUSDT | 1.779642e+09 | 2026-05-24 16:57:19.373000+00:00 | 2026-05-24T16:57:19.373000+00:00 | 6.131190 |
| 3 | 3963778226 | 76639.88 | 0.00028 | 6318962300 | 6318962303 | 1779641839373 | None | False | BTCUSDT | 1.779642e+09 | 2026-05-24 16:57:19.373000+00:00 | 2026-05-24T16:57:19.373000+00:00 | 21.459166 |
| 4 | 3963778227 | 76639.99 | 0.00021 | 6318962304 | 6318962306 | 1779641839373 | None | False | BTCUSDT | 1.779642e+09 | 2026-05-24 16:57:19.373000+00:00 | 2026-05-24T16:57:19.373000+00:00 | 16.094398 |
Data quality checks
Before any analysis, verify the dataset is structurally sound: correct depth level counts, no null prices, expected polling cadence.
null_checks = pd.DataFrame([{
'null_mid_price': int(book['mid_price'].isna().sum()),
'null_spread': int(book['spread'].isna().sum()),
'null_lastUpdateId': int(book['lastUpdateId'].isna().sum()),
'null_bid_px_1': int(book['bid_px_1'].isna().sum()),
'null_ask_px_1': int(book['ask_px_1'].isna().sum()),
}]).T.rename(columns={0: 'Count'})
null_checks.index.name = 'Column'
null_checks = null_checks.reset_index()
bid_level_counts = [
sum(1 for lvl in range(1, N_LEVELS + 1) if not pd.isna(book.iloc[i][f'bid_px_{lvl}']))
for i in range(min(500, len(book)))
]
ask_level_counts = [
sum(1 for lvl in range(1, N_LEVELS + 1) if not pd.isna(book.iloc[i][f'ask_px_{lvl}']))
for i in range(min(500, len(book)))
]
quality = pd.DataFrame([
{
'Check': 'depth snapshots collected',
'Value': str(len(book)),
'Status': 'ok' if len(book) > 500 else 'low'
},
{
'Check': 'trades fetched',
'Value': str(len(trades)),
'Status': 'ok' if len(trades) > 0 else 'missing'
},
{
'Check': 'null mid_price',
'Value': str(int(book['mid_price'].isna().sum())),
'Status': 'ok' if book['mid_price'].isna().sum() == 0 else 'warning'
},
{
'Check': 'min bid levels sample 500',
'Value': str(min(bid_level_counts)),
'Status': 'ok' if min(bid_level_counts) == N_LEVELS else 'warning'
},
{
'Check': 'min ask levels sample 500',
'Value': str(min(ask_level_counts)),
'Status': 'ok' if min(ask_level_counts) == N_LEVELS else 'warning'
},
{
'Check': 'median poll gap',
'Value': f"{book['local_gap_ms'].median():.1f} ms",
'Status': 'ok' if book['local_gap_ms'].median() < 200 else 'warning'
},
{
'Check': 'positive spread throughout',
'Value': str((book['spread'] > 0).all()),
'Status': 'ok' if (book['spread'] > 0).all() else 'warning'
}
])
print('Quality checks')
display(quality)
print('Null checks')
display(null_checks)
Quality checks
| Check | Value | Status | |
|---|---|---|---|
| 0 | depth snapshots collected | 2809 | ok |
| 1 | trades fetched | 1000 | ok |
| 2 | null mid_price | 0 | ok |
| 3 | min bid levels sample 500 | 10 | ok |
| 4 | min ask levels sample 500 | 10 | ok |
| 5 | median poll gap | 307.2 ms | warning |
| 6 | positive spread throughout | True | ok |
Null checks
| Column | Count | |
|---|---|---|
| 0 | null_mid_price | 0 |
| 1 | null_spread | 0 |
| 2 | null_lastUpdateId | 0 |
| 3 | null_bid_px_1 | 0 |
| 4 | null_ask_px_1 | 0 |
Dataset overview
Key statistics for the collected window.
overview = pd.DataFrame([
{
'Metric': 'Depth snapshots',
'Value': f'{len(book):,}'
},
{
'Metric': 'Duration (s)',
'Value': f'{book["elapsed_s"].max():.1f}'
},
{
'Metric': 'Median poll gap (ms)',
'Value': f'{book["local_gap_ms"].median():.1f}'
},
{
'Metric': 'p95 poll gap (ms)',
'Value': f'{book["local_gap_ms"].quantile(0.95):.1f}'
},
{
'Metric': 'Mid-price range ($)',
'Value': f'${book["mid_price"].min():,.2f} to ${book["mid_price"].max():,.2f}'
},
{
'Metric': 'Price range ($)',
'Value': f'${book["mid_price"].max() - book["mid_price"].min():.2f}'
},
{
'Metric': 'Median spread ($)',
'Value': f'${book["spread"].median():.4f}'
},
{
'Metric': 'Median spread (ticks)',
'Value': f'{book["spread_ticks"].median():.0f}'
},
{
'Metric': 'Median update_id gap',
'Value': f'{book["update_id_gap"].median():.0f}'
},
{
'Metric': 'Trades fetched',
'Value': f'{len(trades):,}'
},
{
'Metric': 'Collection start (UTC)',
'Value': book['local_utc'].iloc[0]
},
{
'Metric': 'Collection end (UTC)',
'Value': book['local_utc'].iloc[-1]
}
])
display(overview)
| Metric | Value | |
|---|---|---|
| 0 | Depth snapshots | 2,809 |
| 1 | Duration (s) | 900.0 |
| 2 | Median poll gap (ms) | 307.2 |
| 3 | p95 poll gap (ms) | 327.2 |
| 4 | Mid-price range ($) | $76,499.61 to $76,699.99 |
| 5 | Price range ($) | $200.38 |
| 6 | Median spread ($) | $0.0100 |
| 7 | Median spread (ticks) | 10 |
| 8 | Median update_id gap | 48 |
| 9 | Trades fetched | 1,000 |
| 10 | Collection start (UTC) | 2026-05-24T16:44:16.123758+00:00 |
| 11 | Collection end (UTC) | 2026-05-24T16:59:16.118159+00:00 |
Mid-price, spread, and order book activity
Three panels over the full collection window:
- Mid-price -
(best_bid + best_ask) / 2per snapshot - Spread in ticks -
(ask_px_1 − bid_px_1) / 0.01 - Update ID gap - how many book state changes occurred between polls, spikes indicate high order flow activity
plt.figure(figsize=(8, 4.5))
plt.plot(book['ts'], book['mid_price'], color=FL_BLUE, linewidth=1.2)
plt.ylabel('Mid-price (USDT)')
plt.title(f'BTCUSDT mid-price over {book["elapsed_s"].max():.0f}s ({len(book):,} snapshots)')
plt.gca().yaxis.set_major_formatter(
mticker.FuncFormatter(lambda y, _: f'${y:,.0f}')
)
plt.tick_params(length=0)
plt.tight_layout()
plt.show()
plt.figure(figsize=(8, 4.5))
plt.fill_between(book['ts'], book['spread_ticks'], alpha=0.25, color=FL_AMBER)
plt.plot(book['ts'], book['spread_ticks'], color=FL_AMBER, linewidth=0.8)
plt.ylabel('Spread (ticks)')
plt.title('Bid ask spread in ticks ($0.01 each)')
plt.tick_params(length=0)
plt.tight_layout()
plt.show()
gap_clipped = book['update_id_gap'].clip(upper=book['update_id_gap'].quantile(0.99))
plt.figure(figsize=(8, 4.5))
plt.fill_between(book['ts'], gap_clipped, alpha=0.2, color=FL_SLATE)
plt.plot(book['ts'], gap_clipped, color=FL_SLATE, linewidth=0.8)
plt.ylabel('Update ID gap')
plt.title('Order book changes between consecutive polls (clipped at 99th percentile)')
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%H:%M:%S'))
plt.tick_params(length=0)
plt.tight_layout()
plt.show()
Order book depth profile
Average resting volume at each of the 10 price levels across all snapshots. Level 1 (best price) is the most contested. Deeper levels accumulate more volume as participants quote progressively wider spreads from the mid-price.
bid_avg = [book[f'bid_qty_{l}'].mean() for l in range(1, N_LEVELS + 1)]
ask_avg = [book[f'ask_qty_{l}'].mean() for l in range(1, N_LEVELS + 1)]
cum_bid = np.cumsum(bid_avg)
cum_ask = np.cumsum(ask_avg)
levels = np.arange(1, N_LEVELS + 1)
w = 0.35
plt.figure(figsize=(8, 4.5))
plt.bar(levels - w / 2, bid_avg, width=w, color=FL_GREEN, alpha=0.85, label='Bid')
plt.bar(levels + w / 2, ask_avg, width=w, color=FL_RED, alpha=0.85, label='Ask')
plt.xlabel('Level')
plt.ylabel('Average BTC quantity')
plt.title('Average resting volume per level')
plt.xticks(levels)
plt.legend()
plt.tick_params(length=0)
plt.tight_layout()
plt.show()
plt.figure(figsize=(8, 4.5))
plt.fill_between(levels, cum_bid, alpha=0.15, color=FL_GREEN)
plt.plot(
levels,
cum_bid,
color=FL_GREEN,
marker='o',
markersize=5,
linewidth=1.6,
label='Bid cumulative'
)
plt.fill_between(levels, cum_ask, alpha=0.15, color=FL_RED)
plt.plot(
levels,
cum_ask,
color=FL_RED,
marker='o',
markersize=5,
linewidth=1.6,
label='Ask cumulative'
)
plt.xlabel('Level')
plt.ylabel('Cumulative BTC quantity')
plt.title('Cumulative depth across 10 levels')
plt.xticks(levels)
plt.legend()
plt.tick_params(length=0)
plt.tight_layout()
plt.show()
depth_levels_df = pd.DataFrame({
'Level': levels,
'Bid avg': bid_avg,
'Ask avg': ask_avg,
'Cum bid': cum_bid,
'Cum ask': cum_ask
})
depth_levels_df[['Bid avg', 'Ask avg', 'Cum bid', 'Cum ask']] = (
depth_levels_df[['Bid avg', 'Ask avg', 'Cum bid', 'Cum ask']].round(4)
)
display(depth_levels_df)
| Level | Bid avg | Ask avg | Cum bid | Cum ask | |
|---|---|---|---|---|---|
| 0 | 1 | 3.0854 | 2.0412 | 3.0854 | 2.0412 |
| 1 | 2 | 0.0244 | 0.1564 | 3.1098 | 2.1977 |
| 2 | 3 | 0.0031 | 0.0736 | 3.1129 | 2.2712 |
| 3 | 4 | 0.0115 | 0.0165 | 3.1245 | 2.2877 |
| 4 | 5 | 0.0384 | 0.0686 | 3.1629 | 2.3564 |
| 5 | 6 | 0.0370 | 0.0568 | 3.1999 | 2.4132 |
| 6 | 7 | 0.0598 | 0.0568 | 3.2597 | 2.4700 |
| 7 | 8 | 0.0689 | 0.0635 | 3.3286 | 2.5335 |
| 8 | 9 | 0.0638 | 0.0862 | 3.3925 | 2.6196 |
| 9 | 10 | 0.0692 | 0.0778 | 3.4617 | 2.6974 |
LOB snapshot - single instant
A single order book snapshot at the midpoint of the collection window. Each horizontal bar represents one price level as green bars are resting buy orders (bid side), red bars are resting sell orders (ask side). The gap between the innermost bars is the bid-ask spread.
snap_idx = len(book) // 2
snap = book.iloc[snap_idx]
bid_px = [snap[f'bid_px_{l}'] for l in range(1, N_LEVELS + 1)]
bid_qty = [snap[f'bid_qty_{l}'] for l in range(1, N_LEVELS + 1)]
ask_px = [snap[f'ask_px_{l}'] for l in range(1, N_LEVELS + 1)]
ask_qty = [snap[f'ask_qty_{l}'] for l in range(1, N_LEVELS + 1)]
bid_data = [(p, q) for p, q in zip(bid_px, bid_qty) if pd.notna(p) and pd.notna(q) and q > 0]
ask_data = [(p, q) for p, q in zip(ask_px, ask_qty) if pd.notna(p) and pd.notna(q) and q > 0]
snap_time = snap['local_dt'].strftime('%H:%M:%S UTC') if hasattr(snap['local_dt'], 'strftime') else ''
all_px = [p for p, _ in bid_data + ask_data]
pr = max(all_px) - min(all_px) if len(all_px) > 1 else 1
bar_h = pr / max(len(bid_data + ask_data) * 1.8, 1)
plt.figure(figsize=(8, 4.5))
if bid_data:
bp, bq = zip(*bid_data)
plt.barh(bp, [-q for q in bq], height=bar_h, color=FL_GREEN, alpha=0.85, label='Bid')
if ask_data:
ap, aq = zip(*ask_data)
plt.barh(ap, list(aq), height=bar_h, color=FL_RED, alpha=0.85, label='Ask')
plt.axvline(0, color=FL_BORDER, linewidth=1)
plt.xlabel('Volume (BTC) | left = bid, right = ask')
plt.ylabel('Price (USDT)')
plt.title(f'BTCUSDT limit order book snapshot at {snap_time}')
plt.gca().yaxis.set_major_formatter(
mticker.FuncFormatter(lambda y, _: f'${y:,.2f}')
)
plt.gca().xaxis.set_major_formatter(
mticker.FuncFormatter(lambda x, _: f'{abs(x):.3f}')
)
plt.legend()
plt.tick_params(length=0)
plt.tight_layout()
plt.show()
if bid_data and ask_data:
mid = (bid_data[0][0] + ask_data[0][0]) / 2
spr = ask_data[0][0] - bid_data[0][0]
snapshot_summary = pd.DataFrame([
{'Metric': 'Snapshot time', 'Value': snap_time},
{'Metric': 'Best bid', 'Value': f'${bid_data[0][0]:,.2f} ({bid_data[0][1]:.4f} BTC)'},
{'Metric': 'Best ask', 'Value': f'${ask_data[0][0]:,.2f} ({ask_data[0][1]:.4f} BTC)'},
{'Metric': 'Mid-price', 'Value': f'${mid:,.2f}'},
{'Metric': 'Spread', 'Value': f'${spr:.4f}'},
{'Metric': 'Spread (ticks)', 'Value': f'{spr / TICK_SIZE:.0f}'}
])
display(snapshot_summary)
| Metric | Value | |
|---|---|---|
| 0 | Snapshot time | 16:51:44 UTC |
| 1 | Best bid | $76,595.99 (1.8710 BTC) |
| 2 | Best ask | $76,596.00 (2.0319 BTC) |
| 3 | Mid-price | $76,595.99 |
| 4 | Spread | $0.0100 |
| 5 | Spread (ticks) | 10 |
Mid-price change distribution
Distribution of mid_price_change between consecutive 100ms snapshots. When the mid-price does move, it typically shifts by a small number of ticks.
changes = book['mid_price_change'].dropna()
nonzero = changes[changes != 0]
clip_all = changes.abs().quantile(0.995)
plt.figure(figsize=(8, 4.5))
plt.hist(
changes.clip(-clip_all, clip_all),
bins=60,
color=FL_BLUE,
alpha=0.85,
edgecolor='none'
)
plt.axvline(0, color=FL_AMBER, linewidth=1.5, linestyle='--')
plt.xlabel(f'Change ($, clipped to ±{clip_all:.2f})')
plt.ylabel('Frequency')
plt.title('Mid-price change per 100ms across all snapshots')
plt.tick_params(length=0)
plt.tight_layout()
plt.show()
if len(nonzero) > 0:
clip_nz = nonzero.abs().quantile(0.99)
plt.figure(figsize=(8, 4.5))
plt.hist(
nonzero.clip(-clip_nz, clip_nz),
bins=40,
color=FL_AMBER,
alpha=0.85,
edgecolor='none'
)
plt.xlabel(f'Change ($, non-zero only, clipped to ±{clip_nz:.2f})')
plt.ylabel('Frequency')
plt.title(f'Non-zero mid-price moves ({len(nonzero):,} of {len(changes):,}, {len(nonzero) / len(changes):.1%})')
plt.tick_params(length=0)
plt.tight_layout()
plt.show()
change_summary = pd.DataFrame([
{
'Metric': 'Zero changes',
'Value': f'{(changes == 0).sum():,}',
'Share': f'{(changes == 0).mean():.1%}'
},
{
'Metric': 'Up moves',
'Value': f'{(changes > 0).sum():,}',
'Share': f'{(changes > 0).mean():.1%}'
},
{
'Metric': 'Down moves',
'Value': f'{(changes < 0).sum():,}',
'Share': f'{(changes < 0).mean():.1%}'
}
])
if len(nonzero) > 0:
change_summary = pd.concat([
change_summary,
pd.DataFrame([
{
'Metric': 'Mean non-zero change',
'Value': f'${nonzero.mean():.4f}',
'Share': ''
},
{
'Metric': 'Std non-zero change',
'Value': f'${nonzero.std():.4f}',
'Share': ''
}
])
], ignore_index=True)
display(change_summary)
| Metric | Value | Share | |
|---|---|---|---|
| 0 | Zero changes | 2,724 | 97.0% |
| 1 | Up moves | 63 | 2.2% |
| 2 | Down moves | 21 | 0.7% |
| 3 | Mean non-zero change | $2.1077 | |
| 4 | Std non-zero change | $4.4696 |
Trade flow analysis
The aggTrades endpoint collapses consecutive fills at the same price and direction into single rows. The is_buyer_maker field (WebSocket field m) indicates aggressor side:
False- buyer initiated (market buy hitting resting ask)True- seller initiated (market sell hitting resting bid)
Buy/sell imbalance over a short window is a leading indicator of short-term price direction in limit order book research.
buy_trades = trades[~trades['is_buyer_maker']]
sell_trades = trades[trades['is_buyer_maker']]
plt.figure(figsize=(8, 4.5))
plt.scatter(
buy_trades['trade_dt'],
buy_trades['price'],
c=FL_GREEN,
s=8,
alpha=0.6,
label='Buy aggressor'
)
plt.scatter(
sell_trades['trade_dt'],
sell_trades['price'],
c=FL_RED,
s=8,
alpha=0.6,
label='Sell aggressor'
)
plt.ylabel('Price (USDT)')
plt.title(f'Last {len(trades):,} aggTrades: price and direction')
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%H:%M:%S'))
plt.gca().yaxis.set_major_formatter(
mticker.FuncFormatter(lambda y, _: f'${y:,.0f}')
)
plt.legend(fontsize=9)
plt.tick_params(length=0)
plt.tight_layout()
plt.show()
clip_qty = trades['qty'].quantile(0.99)
plt.figure(figsize=(8, 4.5))
plt.hist(
trades['qty'].clip(upper=clip_qty),
bins=40,
color=FL_BLUE,
alpha=0.85,
edgecolor='none'
)
plt.xlabel('Trade size (BTC, clipped at 99th percentile)')
plt.ylabel('Frequency')
plt.title('Aggregated trade size distribution')
plt.tick_params(length=0)
plt.tight_layout()
plt.show()
buy_vol = float(buy_trades['qty'].sum())
sell_vol = float(sell_trades['qty'].sum())
plt.figure(figsize=(8, 4.5))
plt.bar(
['Buy', 'Sell'],
[buy_vol, sell_vol],
color=[FL_GREEN, FL_RED],
alpha=0.85,
width=0.45
)
plt.ylabel('Total volume (BTC)')
plt.title('Aggressor volume: buy vs sell')
plt.tick_params(length=0)
for i, v in enumerate([buy_vol, sell_vol]):
plt.text(
i,
v + max(buy_vol, sell_vol) * 0.01,
f'{v:.3f}',
ha='center',
fontsize=10,
color=FL_TEXT2
)
plt.tight_layout()
plt.show()
trade_summary = pd.DataFrame([
{
'Metric': 'Trades',
'Value': f'{len(trades):,}'
},
{
'Metric': 'Total volume',
'Value': f'{trades["qty"].sum():.4f} BTC'
},
{
'Metric': 'Total dollar value',
'Value': f'${trades["dollar_value"].sum():,.0f}'
},
{
'Metric': 'Buy aggressor trades',
'Value': f'{(~trades["is_buyer_maker"]).sum():,} ({(~trades["is_buyer_maker"]).mean():.1%})'
},
{
'Metric': 'Sell aggressor trades',
'Value': f'{trades["is_buyer_maker"].sum():,} ({trades["is_buyer_maker"].mean():.1%})'
},
{
'Metric': 'Median trade size',
'Value': f'{trades["qty"].median():.4f} BTC'
},
{
'Metric': 'Mean trade size',
'Value': f'{trades["qty"].mean():.4f} BTC'
}
])
display(trade_summary)
| Metric | Value | |
|---|---|---|
| 0 | Trades | 1,000 |
| 1 | Total volume | 8.3586 BTC |
| 2 | Total dollar value | $640,941 |
| 3 | Buy aggressor trades | 801 (80.1%) |
| 4 | Sell aggressor trades | 199 (19.9%) |
| 5 | Median trade size | 0.0008 BTC |
| 6 | Mean trade size | 0.0084 BTC |
Spread distribution and polling latency
Spread distribution in ticks alongside the inter-poll gap histogram.
spread_ticks = book['spread_ticks'].dropna().round().astype(int)
spread_counts = spread_ticks.value_counts().sort_index().head(15)
poll_gaps = book['local_gap_ms'].dropna()
plt.figure(figsize=(8, 4.5))
plt.bar(
spread_counts.index,
spread_counts.values,
color=FL_BLUE,
alpha=0.85,
width=0.6
)
for x, y in zip(spread_counts.index, spread_counts.values):
plt.text(
x,
y + spread_counts.max() * 0.01,
f'{y / len(book):.0%}',
ha='center',
fontsize=9,
color=FL_TEXT2
)
plt.xlabel('Spread (ticks)')
plt.ylabel('Snapshot count')
plt.title('Bid ask spread distribution')
plt.tick_params(length=0)
plt.tight_layout()
plt.show()
plt.figure(figsize=(8, 4.5))
plt.hist(
poll_gaps.clip(0, 300),
bins=50,
color=FL_SLATE,
alpha=0.85,
edgecolor='none'
)
plt.axvline(100, color=FL_AMBER, linewidth=1.5, linestyle='--', label='Target 100ms')
plt.axvline(
poll_gaps.median(),
color=FL_BLUE,
linewidth=1.5,
linestyle='--',
label=f'Actual median {poll_gaps.median():.0f}ms'
)
plt.xlabel('Inter-poll gap (ms)')
plt.ylabel('Frequency')
plt.title('REST polling interval distribution')
plt.legend(fontsize=9)
plt.tick_params(length=0)
plt.tight_layout()
plt.show()
spread_gap_summary = pd.DataFrame([
{
'Metric': 'Most common spread',
'Value': f'{spread_counts.idxmax()} tick(s)'
},
{
'Metric': 'Share at most common spread',
'Value': f'{spread_counts.max() / len(book):.1%}'
},
{
'Metric': 'Median poll gap',
'Value': f'{poll_gaps.median():.1f} ms'
},
{
'Metric': 'p95 poll gap',
'Value': f'{poll_gaps.quantile(0.95):.1f} ms'
},
{
'Metric': 'Max poll gap',
'Value': f'{poll_gaps.max():.1f} ms'
}
])
display(spread_gap_summary)
| Metric | Value | |
|---|---|---|
| 0 | Most common spread | 10 tick(s) |
| 1 | Share at most common spread | 100.0% |
| 2 | Median poll gap | 307.2 ms |
| 3 | p95 poll gap | 327.2 ms |
| 4 | Max poll gap | 824.2 ms |