Ctrl K

Bet Against Beta Strategy

This notebook implements and tests the Bet Against Beta (BAB) factor. The empirical finding that low beta stocks tend to outperform high-beta stocks on a risk-adjusted basis. The strategy buys the lowest-beta stocks each year and holds them for the following year, rebalancing annually.

The analysis proceeds in four steps:

  1. Universe construction - read nasdaq_screener.csv, filter to common stocks, rank by liquidity, keep the top 50
  2. Price fetch - download daily adjusted prices from Tiingo for all 50 stocks + QQQ benchmark
  3. Beta bucket analysis - estimate trailing-year beta vs QQQ for each stock, sort into quintiles, compare next-year returns across buckets
  4. BAB strategy - each year go long the 10 lowest-beta stocks, hold for one year, measure equity curve

Paper: Frazzini, A. & Pedersen, L.H. (2014). Betting against beta. Journal of Financial Economics, 111(1), 1–23. doi:10.1016/j.jfineco.2013.10.005

Parameter Value
Universe nasdaq_screener.csv → top 50 by dollar volume
Benchmark QQQ (NASDAQ-100 ETF)
Beta estimation Trailing calendar year vs QQQ
BAB long portfolio 10 lowest-beta stocks each year
Hold period 1 year, annually rebalanced
Starting capital $100,000
Period Jan 2018 – present
Data sources nasdaq_screener.csv, Tiingo daily adjusted prices

Sources

  • nasdaq_screener.csv: local universe file used in this notebook
  • Nasdaq Stock Screener: source used to generate a similar stock universe file
  • Tiingo API: daily adjusted price data for stocks and QQQ
In [7]:
import os
import time
import requests
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.ticker as mticker

from pathlib import Path
from dotenv import load_dotenv
from requests.adapters import HTTPAdapter
from urllib3.util import Retry
from IPython.display import display, HTML

FL_BLUE   = '#2563eb'
FL_SLATE  = '#64748b'
FL_AMBER  = '#f59e0b'
FL_GREEN  = '#16a34a'
FL_RED    = '#ef4444'
FL_BG     = '#ffffff'
FL_GRID   = '#e2e8f0'
FL_TEXT   = '#0f172a'
FL_TEXT2  = '#334155'
FL_BORDER = '#e2e8f0'

matplotlib.rcParams.update({
    'figure.facecolor':  FL_BG,
    'axes.facecolor':    FL_BG,
    'axes.edgecolor':    FL_BORDER,
    'axes.labelcolor':   FL_TEXT2,
    'axes.spines.top':   False,
    'axes.spines.right': False,
    'axes.grid':         True,
    'grid.color':        FL_GRID,
    'grid.linewidth':    0.7,
    'xtick.color':       FL_TEXT2,
    'ytick.color':       FL_TEXT2,
    'xtick.labelsize':   10,
    'ytick.labelsize':   10,
    'axes.labelsize':    11,
    'axes.titlesize':    12,
    'axes.titlecolor':   FL_TEXT,
    'axes.titlepad':     12,
    'legend.frameon':    False,
    'legend.fontsize':   10,
    'figure.dpi':        300,
    'savefig.bbox':      'tight',
    'font.family':       'sans-serif',
    'font.sans-serif':   ['Inter', 'Helvetica Neue', 'Arial', 'DejaVu Sans'],
})

PROJECT_ROOT = Path.cwd()
load_dotenv(PROJECT_ROOT / '.env')
TIINGO_API_KEY = os.getenv('TIINGO_API_KEY')
if not TIINGO_API_KEY:
    raise RuntimeError('TIINGO_API_KEY is missing from the project-level .env')

PRICE_THRESHOLD  = 5.0
MAX_STOCKS       = 50

BENCHMARK_TICKER = 'QQQ'
START_DATE       = '2018-01-01'
REQUEST_SLEEP    = 0.15
MIN_OBS_PER_YEAR = 100
N_BUCKETS        = 5
N_LONGS          = 10
STARTING_CAPITAL = 100_000.0
In [8]:
def make_session():
    s = requests.Session()
    s.mount('https://', HTTPAdapter(max_retries=Retry(
        total=5, backoff_factor=1,
        status_forcelist=[429, 500, 502, 503, 504],
        allowed_methods=['GET']
    )))
    return s


def fetch_tiingo_prices(session, ticker, start_date):
    resp = session.get(
        f'https://api.tiingo.com/tiingo/daily/{ticker}/prices',
        headers={'Authorization': f'Token {TIINGO_API_KEY}', 'Content-Type': 'application/json'},
        params={'startDate': start_date, 'resampleFreq': 'daily', 'format': 'json'},
        timeout=60
    )
    resp.raise_for_status()
    raw = pd.DataFrame(resp.json())
    if raw.empty:
        raise ValueError(f'No data for {ticker}')
    df = pd.DataFrame({
        'Ticker': ticker,
        'date':   pd.to_datetime(raw['date'], utc=True).dt.tz_localize(None),
        'Open':   pd.to_numeric(raw['adjOpen'],   errors='coerce'),
        'High':   pd.to_numeric(raw['adjHigh'],   errors='coerce'),
        'Low':    pd.to_numeric(raw['adjLow'],    errors='coerce'),
        'Close':  pd.to_numeric(raw['adjClose'],  errors='coerce'),
        'Volume': pd.to_numeric(raw['adjVolume'], errors='coerce'),
    })
    return df.dropna(subset=['date','Close']).sort_values('date').reset_index(drop=True)


def calc_beta(stock_returns, benchmark_returns):
    aligned = pd.concat([stock_returns, benchmark_returns], axis=1, join='inner').dropna()
    if len(aligned) < MIN_OBS_PER_YEAR:
        return np.nan
    x = aligned.iloc[:, 1]
    y = aligned.iloc[:, 0]
    x_var = x.var(ddof=1)
    if pd.isna(x_var) or x_var == 0:
        return np.nan
    return y.cov(x) / x_var


def build_daily_equity_curve_from_returns(daily_returns_df, starting_capital):
    if daily_returns_df.empty:
        return pd.DataFrame(columns=['date','daily_return','equity','cum_pnl','daily_pnl'])
    eq = daily_returns_df.copy().sort_values('date').reset_index(drop=True)
    eq['equity']    = starting_capital * (1 + eq['daily_return']).cumprod()
    eq['cum_pnl']   = eq['equity'] - starting_capital
    eq['daily_pnl'] = eq['equity'].diff().fillna(eq['equity'] - starting_capital)
    return eq


def calc_cagr_and_sharpe(daily_equity_df, starting_capital):
    if daily_equity_df.empty:
        return 0.0, 0.0
    years  = (daily_equity_df['date'].max() - daily_equity_df['date'].min()).days / 365.25
    ending = daily_equity_df['equity'].iloc[-1]
    cagr   = (ending / starting_capital) ** (1 / years) - 1 if years > 0 and ending > 0 else 0.0
    dr     = daily_equity_df['daily_return'].dropna()
    sharpe = (dr.mean() / dr.std(ddof=1)) * (252 ** 0.5) if len(dr) > 1 and dr.std(ddof=1) > 0 else 0.0
    return round(cagr, 6), round(sharpe, 6)


def fmt_pct(v):    return '-' if pd.isna(v) else f'{v:.1%}'
def fmt_f3(v):     return '-' if pd.isna(v) else f'{v:.3f}'
def fmt_dollar(v): return f'${v:>12,.0f}'
def fmt_pct2(v):   return f'{v:.2%}'
def fmt_f2(v):     return f'{v:.2f}'

Universe construction

The NASDAQ screener CSV (nasdaq_screener.csv) is read from public/data/. ETF-like instruments, ADRs, warrants, rights, preferred shares, and units are filtered out using name and industry keyword matching. Stocks below $5 or with missing market cap / volume are excluded. The remaining stocks are ranked by dollar volume (last sale × volume) and the top 50 are selected as the working universe.

The screener CSV can be downloaded from the page above for full reproducibility.

In [9]:
ETF_KEYWORDS = [
    ' etf','etf ','exchange traded','index fund',' fund',' trust',' portfolio',
    ' ishare',' ishares',' spdr',' invesco',' vanguard',' direxion',' proshares',
    ' global x',' first trust',' yieldshares',' etn',' note'
]
NON_COMMON_KEYWORDS = [
    'adr','ads','american depositary','depositary share','depositary shares',
    'warrant',' warrants',' right',' rights',' unit',' units',' preferred',
    ' preferreds',' preference share',' preference shares',' income shares',
    'ordinary shares','ordinary share'
]

SCREENER_CSV = Path('public/data/nasdaq_screener.csv')
if not SCREENER_CSV.exists():
    # fallback: look one level up (when running from project root)
    SCREENER_CSV = Path('data/nasdaq_screener.csv')


def contains_any(series, keywords):
    mask = pd.Series(False, index=series.index)
    for kw in keywords:
        mask |= series.str.contains(kw, case=False, na=False, regex=False)
    return mask


# Expected columns: Symbol, Name, Last Sale, Net Change, % Change,
#                   Market Cap, Country, IPO Year, Volume, Sector, Industry
raw_df = pd.read_csv(SCREENER_CSV)
print(f'Screener rows loaded: {len(raw_df)}  columns: {list(raw_df.columns)}')

df = raw_df.copy()
df['Symbol']   = df['Symbol'].astype(str).str.strip()
df['Name']     = df['Name'].astype(str).str.strip()
df['Industry'] = df['Industry'].astype(str).str.strip()
df['Last Sale']= pd.to_numeric(
    df['Last Sale'].astype(str)
      .str.replace('$', '', regex=False)
      .str.replace(',', '', regex=False)
      .str.strip(),
    errors='coerce'
)
df['Market Cap'] = pd.to_numeric(df['Market Cap'], errors='coerce')
df['Volume']     = pd.to_numeric(df['Volume'],     errors='coerce')

name_lc     = df['Name'].str.lower()
industry_lc = df['Industry'].str.lower()
mask_remove = (
    contains_any(name_lc,     ETF_KEYWORDS) | contains_any(industry_lc, ETF_KEYWORDS) |
    contains_any(name_lc,     NON_COMMON_KEYWORDS) | contains_any(industry_lc, NON_COMMON_KEYWORDS)
)

universe_df = (
    df.loc[~mask_remove]
    .copy()
    .loc[lambda x: x['Last Sale'] > PRICE_THRESHOLD]
    .loc[lambda x: x['Market Cap'].notna() & x['Volume'].notna() & (x['Volume'] > 0)]
)
universe_df['dollar_volume']       = universe_df['Last Sale'] * universe_df['Volume']
universe_df['market_cap_billions'] = universe_df['Market Cap'] / 1e9

bab_universe = (
    universe_df
    .sort_values(['dollar_volume', 'Market Cap'], ascending=[False, False])
    .head(MAX_STOCKS)
    .reset_index(drop=True)
)

selected_tickers = sorted(bab_universe['Symbol'].dropna().unique().tolist())
print(f'Universe: {len(selected_tickers)} stocks selected')
print('Tickers:', ', '.join(selected_tickers))
Screener rows loaded: 1236  columns: ['Symbol', 'Name', 'Last Sale', 'Net Change', '% Change', 'Market Cap', 'Country', 'IPO Year', 'Volume', 'Sector', 'Industry']
Universe: 50 stocks selected
Tickers: AAL, AAOI, AAPL, ADBE, ADI, AMAT, AMD, AMZN, APP, ASTS, AVGO, BKNG, BKR, COIN, COST, CRWD, CRWV, CSCO, FANG, GOOG, GOOGL, HON, HOOD, INTC, INTU, ISRG, KLAC, LITE, LRCX, META, MRVL, MSFT, MSTR, MU, NFLX, NVDA, ONDS, PANW, PEP, PLTR, QCOM, RKLB, SBUX, SNDK, SOFI, TSLA, ULTA, WBD, WDC, WMT

Price data

Daily adjusted OHLCV prices are fetched from Tiingo for all universe stocks plus the QQQ benchmark, starting January 2018.

In [10]:
session = make_session()
all_frames = []
fetch_rows = []
tickers_to_fetch = selected_tickers + [BENCHMARK_TICKER]

batch_size = 10
batch_ok = 0
batch_error = 0
batch_start = 1

for i, ticker in enumerate(tickers_to_fetch, start=1):
    try:
        time.sleep(REQUEST_SLEEP)
        df_p = fetch_tiingo_prices(session, ticker, START_DATE)
        all_frames.append(df_p)

        fetch_rows.append({
            'Ticker': ticker,
            'Status': 'OK',
            'First': df_p['date'].min().strftime('%Y-%m-%d'),
            'Last': df_p['date'].max().strftime('%Y-%m-%d'),
            'Rows': len(df_p),
            'Error': ''
        })
        batch_ok += 1

    except Exception as e:
        fetch_rows.append({
            'Ticker': ticker,
            'Status': 'ERROR',
            'First': '',
            'Last': '',
            'Rows': None,
            'Error': str(e)
        })
        batch_error += 1

    if i % batch_size == 0 or i == len(tickers_to_fetch):
        batch_end = i
        print(
            f'Batch {batch_start:>2}-{batch_end:<2} | {len(tickers_to_fetch)}  '
            f'OK: {batch_ok:<2}  ERROR: {batch_error:<2}'
        )
        batch_start = i + 1
        batch_ok = 0
        batch_error = 0

fetch_summary_df = pd.DataFrame(fetch_rows).sort_values(['Status', 'Ticker']).reset_index(drop=True)

prices_df = (
    pd.concat(all_frames, ignore_index=True)
    .sort_values(['Ticker', 'date'])
    .reset_index(drop=True)
)

valid_stock_tickers = fetch_summary_df.loc[
    (fetch_summary_df['Status'] == 'OK') & (fetch_summary_df['Ticker'] != BENCHMARK_TICKER),
    'Ticker'
].tolist()

fetch_overview_df = pd.DataFrame([
    {
        'Metric': 'Valid stock tickers',
        'Value': f'{len(valid_stock_tickers)} | {len(selected_tickers)}'
    },
    {
        'Metric': 'Fetch OK',
        'Value': f'{(fetch_summary_df["Status"] == "OK").sum():,}'
    },
    {
        'Metric': 'Fetch ERROR',
        'Value': f'{(fetch_summary_df["Status"] == "ERROR").sum():,}'
    }
])

display(fetch_overview_df)
Batch  1-10 | 51  OK: 10  ERROR: 0 
Batch 11-20 | 51  OK: 10  ERROR: 0 
Batch 21-30 | 51  OK: 10  ERROR: 0 
Batch 31-40 | 51  OK: 10  ERROR: 0 
Batch 41-50 | 51  OK: 10  ERROR: 0 
Batch 51-51 | 51  OK: 1   ERROR: 0 
Metric Value
0 Valid stock tickers 50 | 50
1 Fetch OK 51
2 Fetch ERROR 0

Data coverage

Fetch status and date range for each ticker.

In [11]:
summary_view = fetch_summary_df[['Ticker', 'Status', 'First', 'Last', 'Rows']].reset_index(drop=True).copy()
display(summary_view.head())
Ticker Status First Last Rows
0 AAL OK 2018-01-02 2026-05-22 2109
1 AAOI OK 2018-01-02 2026-05-22 2109
2 AAPL OK 2018-01-02 2026-05-22 2109
3 ADBE OK 2018-01-02 2026-05-22 2109
4 ADI OK 2018-01-02 2026-05-22 2109
In [12]:
prices_wide = (
    prices_df.pivot(index='date', columns='Ticker', values='Close')
    .sort_index()
)

returns_wide = prices_wide.pct_change()

if BENCHMARK_TICKER not in prices_wide.columns:
    raise ValueError(f'{BENCHMARK_TICKER} not found in fetched prices')

available_years = sorted(prices_wide.index.year.unique().tolist())
train_test_pairs = [
    (y, y + 1) for y in available_years if y + 1 in available_years
]

train_test_pairs_df = pd.DataFrame(train_test_pairs, columns=['Train year', 'Test year'])

pair_summary_df = pd.DataFrame([
    {
        'Metric': 'Available years',
        'Value': f'{available_years[0]} to {available_years[-1]}' if available_years else '-'
    },
    {
        'Metric': 'Train-test pairs',
        'Value': f'{len(train_test_pairs):,}'
    },
    {
        'Metric': 'Candidate tickers for beta',
        'Value': f'{len(valid_stock_tickers):,}'
    }
])

display(pair_summary_df)
#display(train_test_pairs_df.tail())
Metric Value
0 Available years 2018 to 2026
1 Train-test pairs 8
2 Candidate tickers for beta 50

Beta bucket analysis

Each year, trailing-year beta is estimated for every stock against QQQ returns. Stocks are sorted into 5 quintile buckets (1 = lowest beta, 5 = highest beta). The table and chart show the average next year return for each bucket, averaged across all available train-test year pairs.

The BAB hypothesis predicts that bucket 1 (low beta) should deliver competitive or superior returns to bucket 5 (high beta), despite lower systematic risk.

In [13]:
analysis_rows = []
beta_detail_rows = []

for train_year, test_year in train_test_pairs:
    train_mask = returns_wide.index.year == train_year
    test_mask = prices_wide.index.year == test_year
    bench_ret = returns_wide.loc[train_mask, BENCHMARK_TICKER].dropna()

    beta_est = []
    for ticker in valid_stock_tickers:
        if ticker not in returns_wide.columns:
            continue

        b = calc_beta(returns_wide.loc[train_mask, ticker].dropna(), bench_ret)
        if pd.isna(b):
            continue

        test_prices = prices_wide.loc[test_mask, ticker].dropna()
        if len(test_prices) < 2:
            continue

        next_yr_ret = test_prices.iloc[-1] / test_prices.iloc[0] - 1
        beta_est.append({
            'TrainYear': train_year,
            'TestYear': test_year,
            'Ticker': ticker,
            'Beta': b,
            'NextYearReturn': next_yr_ret
        })

    if not beta_est:
        continue

    beta_year_df = pd.DataFrame(beta_est).sort_values('Beta').reset_index(drop=True)
    n_buckets = min(N_BUCKETS, beta_year_df['Beta'].nunique())
    if n_buckets < 2:
        continue

    beta_year_df['BetaBucket'] = pd.qcut(
        beta_year_df['Beta'],
        q=n_buckets,
        labels=range(1, n_buckets + 1),
        duplicates='drop'
    ).astype(int)

    bucket_summary = (
        beta_year_df.groupby('BetaBucket', as_index=False)
        .agg(
            TrainYear=('TrainYear', 'first'),
            TestYear=('TestYear', 'first'),
            StockCount=('Ticker', 'count'),
            AvgBeta=('Beta', 'mean'),
            MedianBeta=('Beta', 'median'),
            AvgNextYearReturn=('NextYearReturn', 'mean'),
            MedianNextYearReturn=('NextYearReturn', 'median')
        )
    )

    analysis_rows.append(bucket_summary)
    beta_detail_rows.append(beta_year_df)

analysis_df = pd.concat(analysis_rows, ignore_index=True)
beta_detail_df = pd.concat(beta_detail_rows, ignore_index=True)

overall_bucket_summary = (
    analysis_df.groupby('BetaBucket', as_index=False)
    .agg(
        YearPairs=('TestYear', 'count'),
        AvgBeta=('AvgBeta', 'mean'),
        MedianBeta=('MedianBeta', 'mean'),
        AvgNextYearReturn=('AvgNextYearReturn', 'mean'),
        MedianNextYearReturn=('MedianNextYearReturn', 'mean')
    )
    .sort_values('BetaBucket')
    .reset_index(drop=True)
)

overall_bucket_summary = overall_bucket_summary.rename(columns={
    'BetaBucket': 'Bucket',
    'YearPairs': 'Year Pairs',
    'AvgBeta': 'Avg Beta',
    'MedianBeta': 'Median Beta',
    'AvgNextYearReturn': 'Avg Next-Yr Return',
    'MedianNextYearReturn': 'Median Next-Yr Return',
})

overall_bucket_summary['Avg Beta'] = overall_bucket_summary['Avg Beta'].map(fmt_f3)
overall_bucket_summary['Median Beta'] = overall_bucket_summary['Median Beta'].map(fmt_f3)
overall_bucket_summary['Avg Next-Yr Return'] = overall_bucket_summary['Avg Next-Yr Return'].map(fmt_pct)
overall_bucket_summary['Median Next-Yr Return'] = overall_bucket_summary['Median Next-Yr Return'].map(fmt_pct)

display(overall_bucket_summary)
Bucket Year Pairs Avg Beta Median Beta Avg Next-Yr Return Median Next-Yr Return
0 1 8 0.482 0.517 15.8% 10.1%
1 2 8 0.895 0.919 20.8% 18.0%
2 3 8 1.133 1.133 52.5% 29.3%
3 4 8 1.393 1.397 71.0% 48.0%
4 5 8 1.755 1.718 76.9% 53.7%

Bucket returns - overall and by year

The left chart shows average and median next-year return by bucket, pooled across all train-test pairs. The right chart shows the same breakdown per test year to reveal whether the low-beta premium is consistent or episodic.

In [14]:
plt.figure(figsize=(8, 4.5))

plt.plot(
    overall_bucket_summary['Bucket'],
    overall_bucket_summary['Avg Next-Yr Return'],
    marker='o',
    color=FL_BLUE,
    linewidth=1.8,
    label='Mean'
)
plt.plot(
    overall_bucket_summary['Bucket'],
    overall_bucket_summary['Median Next-Yr Return'],
    marker='s',
    color=FL_SLATE,
    linewidth=1.4,
    linestyle='--',
    label='Median'
)
plt.axhline(0, color=FL_GRID, linewidth=0.8)
plt.gca().yaxis.set_major_formatter(
    mticker.FuncFormatter(lambda y, _: f'{y:.0%}')
)
plt.xlabel('Beta bucket (1 = lowest)')
plt.ylabel('Next-year return')
plt.title('Average next-year return by beta bucket')
plt.xticks(overall_bucket_summary['Bucket'])
plt.legend()
plt.tick_params(axis='both', which='both', length=0)
plt.tight_layout()
plt.show()

plt.figure(figsize=(8, 4.5))

palette = [
    FL_BLUE, FL_AMBER, FL_GREEN, FL_RED, FL_SLATE,
    '#7c3aed', '#0891b2', '#db2777', '#65a30d', '#ea580c'
]

for idx, test_year in enumerate(sorted(analysis_df['TestYear'].unique())):
    yr_df = analysis_df.loc[analysis_df['TestYear'] == test_year].sort_values('BetaBucket')
    col = palette[idx % len(palette)]

    plt.plot(
        yr_df['BetaBucket'],
        yr_df['AvgNextYearReturn'],
        marker='o',
        color=col,
        linewidth=1.3,
        label=str(test_year)
    )

plt.axhline(0, color=FL_GRID, linewidth=0.8)
plt.gca().yaxis.set_major_formatter(
    mticker.FuncFormatter(lambda y, _: f'{y:.0%}')
)
plt.xlabel('Beta bucket (1 = lowest)')
plt.ylabel('Next-year return')
plt.title('Next-year return by beta bucket for each test year')
plt.xticks(overall_bucket_summary['Bucket'])
plt.legend(title='Test year', fontsize=9)
plt.tick_params(axis='both', which='both', length=0)
plt.tight_layout()
plt.show()
No description has been provided for this image
No description has been provided for this image

BAB strategy

Each year, the 10 lowest-beta stocks from the prior year's estimation are selected and held with equal weight for the entire following calendar year. The portfolio is rebalanced annually. The equity curve below compares the BAB strategy against QQQ buy-and-hold.

In [15]:
selection_rows = []
daily_return_segments = []

for train_year, test_year in train_test_pairs:
    train_mask = returns_wide.index.year == train_year
    test_mask = returns_wide.index.year == test_year
    bench_ret = returns_wide.loc[train_mask, BENCHMARK_TICKER].dropna()

    beta_rows = []
    for ticker in valid_stock_tickers:
        if ticker not in returns_wide.columns:
            continue

        b = calc_beta(returns_wide.loc[train_mask, ticker].dropna(), bench_ret)
        if not pd.isna(b):
            beta_rows.append({
                'Ticker': ticker,
                'Beta': b
            })

    if not beta_rows:
        continue

    beta_df = pd.DataFrame(beta_rows).sort_values('Beta').reset_index(drop=True)
    selected = beta_df.head(N_LONGS)

    selection_rows.append({
        'TrainYear': train_year,
        'HoldYear': test_year,
        'SelectedCount': len(selected),
        'AvgSelectedBeta': round(selected['Beta'].mean(), 4),
        'MedianSelectedBeta': round(selected['Beta'].median(), 4),
        'Tickers': ', '.join(selected['Ticker'].tolist()),
    })

    hold = returns_wide.loc[test_mask, selected['Ticker'].tolist()].copy()
    if hold.empty:
        continue

    daily_r = hold.mean(axis=1, skipna=True).dropna().rename('daily_return')
    seg = daily_r.reset_index()
    seg['TrainYear'] = train_year
    seg['HoldYear'] = test_year
    daily_return_segments.append(seg)

selection_summary_df = pd.DataFrame(selection_rows).sort_values('HoldYear').reset_index(drop=True)

daily_returns_df = (
    pd.concat(daily_return_segments, ignore_index=True)
    .sort_values('date')
    .drop_duplicates(subset=['date'], keep='first')
    .reset_index(drop=True)
)

bab_equity_df = build_daily_equity_curve_from_returns(
    daily_returns_df[['date', 'daily_return']],
    STARTING_CAPITAL
)
bab_equity_df['drawdown'] = bab_equity_df['equity'] - bab_equity_df['equity'].cummax()

qqq_ret = (
    returns_wide[[BENCHMARK_TICKER]]
    .rename(columns={BENCHMARK_TICKER: 'daily_return'})
    .dropna()
    .reset_index()
)
qqq_equity_df = build_daily_equity_curve_from_returns(qqq_ret, STARTING_CAPITAL)

cagr, sharpe = calc_cagr_and_sharpe(
    bab_equity_df[['date', 'daily_return', 'equity']],
    STARTING_CAPITAL
)
qqq_cagr, qqq_sharpe = calc_cagr_and_sharpe(
    qqq_equity_df[['date', 'daily_return', 'equity']],
    STARTING_CAPITAL
)

selection_display_df = selection_summary_df.copy()
selection_display_df['AvgSelectedBeta'] = selection_display_df['AvgSelectedBeta'].map(fmt_f3)
selection_display_df['MedianSelectedBeta'] = selection_display_df['MedianSelectedBeta'].map(fmt_f3)

performance_summary_df = pd.DataFrame([
    {
        'Strategy': 'BAB Low Beta',
        'CAGR': fmt_pct(cagr),
        'Sharpe': fmt_f3(sharpe),
        'Max Drawdown': fmt_dollar(bab_equity_df['drawdown'].min()),
        'End Equity': fmt_dollar(bab_equity_df['equity'].iloc[-1]),
    },
    {
        'Strategy': 'QQQ Buy and Hold',
        'CAGR': fmt_pct(qqq_cagr),
        'Sharpe': fmt_f3(qqq_sharpe),
        'Max Drawdown': fmt_dollar(qqq_equity_df['equity'].sub(qqq_equity_df['equity'].cummax()).min()),
        'End Equity': fmt_dollar(qqq_equity_df['equity'].iloc[-1]),
    }
])

display(performance_summary_df)
#display(selection_display_df)
Strategy CAGR Sharpe Max Drawdown End Equity
0 BAB Low Beta 16.6% 0.828 $     -73,846 $     310,890
1 QQQ Buy and Hold 20.5% 0.905 $     -92,011 $     478,324

Yearly selection

The 10 lowest-beta stocks selected each year and their average estimated beta.

In [16]:
selection_view = (
    selection_summary_df[
        ['TrainYear', 'HoldYear', 'SelectedCount', 'AvgSelectedBeta', 'MedianSelectedBeta', 'Tickers']
    ]
    .rename(columns={
        'TrainYear': 'Train Year',
        'HoldYear': 'Hold Year',
        'SelectedCount': 'N',
        'AvgSelectedBeta': 'Avg Beta',
        'MedianSelectedBeta': 'Median Beta',
        'Tickers': 'Selected Tickers'
    })
    .copy()
)

selection_view['Avg Beta'] = selection_view['Avg Beta'].map(fmt_f3)
selection_view['Median Beta'] = selection_view['Median Beta'].map(fmt_f3)

display(selection_view)
Train Year Hold Year N Avg Beta Median Beta Selected Tickers
0 2018 2019 10 0.545 0.594 PEP, WMT, SBUX, ULTA, HON, COST, MSTR, BKR, WB...
1 2019 2020 10 0.598 0.644 WMT, PEP, MSTR, COST, SBUX, WBD, PANW, HON, UL...
2 2020 2021 10 0.594 0.662 ASTS, ONDS, WMT, COST, MSTR, WBD, HON, PEP, AA...
3 2021 2022 10 0.364 0.349 WBD, WMT, PEP, BKR, HON, AAL, CSCO, COST, SBUX...
4 2022 2023 10 0.569 0.568 WMT, PEP, BKR, FANG, HON, CSCO, COST, ULTA, SB...
5 2023 2024 10 0.406 0.417 PEP, WMT, ULTA, FANG, BKR, HON, CSCO, SBUX, CO...
6 2024 2025 10 0.340 0.360 PEP, WMT, FANG, BKR, HON, CSCO, WBD, SBUX, ULT...
7 2025 2026 10 0.548 0.656 PEP, COST, WMT, HON, ADBE, ULTA, CSCO, INTU, N...

Performance summary

BAB Low Beta strategy vs QQQ buy-and-hold, both starting from $100,000.

  • CAGR - compound annual growth rate
  • Sharpe - annualised mean daily return ÷ standard deviation × √252, no risk-free rate
  • Max drawdown - largest peak-to-trough equity loss
In [17]:
perf_df = pd.DataFrame([
    {
        'Strategy': 'BAB Low Beta (long only)',
        'Start': bab_equity_df['date'].min().strftime('%Y-%m-%d'),
        'End': bab_equity_df['date'].max().strftime('%Y-%m-%d'),
        'Ending Equity': bab_equity_df['equity'].iloc[-1],
        'Total PnL': bab_equity_df['cum_pnl'].iloc[-1],
        'Max Drawdown': bab_equity_df['drawdown'].min(),
        'CAGR': cagr,
        'Sharpe': sharpe,
    },
    {
        'Strategy': 'QQQ Buy and Hold',
        'Start': qqq_equity_df['date'].min().strftime('%Y-%m-%d'),
        'End': qqq_equity_df['date'].max().strftime('%Y-%m-%d'),
        'Ending Equity': qqq_equity_df['equity'].iloc[-1],
        'Total PnL': qqq_equity_df['cum_pnl'].iloc[-1],
        'Max Drawdown': (qqq_equity_df['equity'] - qqq_equity_df['equity'].cummax()).min(),
        'CAGR': qqq_cagr,
        'Sharpe': qqq_sharpe,
    }
])

perf_view = perf_df.copy()
perf_view['Ending Equity'] = perf_view['Ending Equity'].map(fmt_dollar)
perf_view['Total PnL'] = perf_view['Total PnL'].map(fmt_dollar)
perf_view['Max Drawdown'] = perf_view['Max Drawdown'].map(fmt_dollar)
perf_view['CAGR'] = perf_view['CAGR'].map(fmt_pct2)
perf_view['Sharpe'] = perf_view['Sharpe'].map(fmt_f2)

display(perf_view.T)
0 1
Strategy BAB Low Beta (long only) QQQ Buy and Hold
Start 2019-01-02 2018-01-03
End 2026-05-22 2026-05-22
Ending Equity $     310,890 $     478,324
Total PnL $     210,890 $     378,324
Max Drawdown $     -73,846 $     -92,011
CAGR 16.60% 20.53%
Sharpe 0.83 0.91

Equity curves and drawdown

In [18]:
plt.figure(figsize=(8, 4.5))
plt.plot(
    bab_equity_df['date'],
    bab_equity_df['equity'],
    color=FL_BLUE,
    linewidth=2.0,
    label='BAB Low Beta'
)
plt.plot(
    qqq_equity_df['date'],
    qqq_equity_df['equity'],
    color=FL_AMBER,
    linewidth=1.5,
    linestyle='--',
    label='QQQ B&H'
)
plt.ylabel('Portfolio equity ($)')
plt.title('Portfolio equity from $100,000 starting capital')
plt.gca().yaxis.set_major_formatter(
    mticker.FuncFormatter(lambda y, _: f'${y:,.0f}')
)
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
plt.gca().xaxis.set_major_locator(mdates.YearLocator())
plt.legend()
plt.tick_params(axis='both', which='both', length=0)
plt.tight_layout()
plt.show()

qqq_dd = qqq_equity_df['equity'] - qqq_equity_df['equity'].cummax()

plt.figure(figsize=(8, 4.5))
plt.fill_between(
    bab_equity_df['date'],
    bab_equity_df['drawdown'],
    alpha=0.25,
    color=FL_BLUE
)
plt.plot(
    bab_equity_df['date'],
    bab_equity_df['drawdown'],
    color=FL_BLUE,
    linewidth=1.4,
    label='BAB Low Beta'
)
plt.plot(
    qqq_equity_df['date'],
    qqq_dd,
    color=FL_AMBER,
    linewidth=1.2,
    linestyle='--',
    label='QQQ B&H'
)
plt.axhline(0, color=FL_GRID, linewidth=0.8)
plt.ylabel('Drawdown ($)')
plt.title('Drawdown from equity peak')
plt.gca().yaxis.set_major_formatter(
    mticker.FuncFormatter(lambda y, _: f'${y:,.0f}')
)
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
plt.gca().xaxis.set_major_locator(mdates.YearLocator())
plt.legend()
plt.tick_params(axis='both', which='both', length=0)
plt.tight_layout()
plt.show()
No description has been provided for this image
No description has been provided for this image