Ctrl K

LSTM Regression Model Reference

This notebook is a practical reference for LSTM sequence regression.

It shows:

  • how to create sliding window sequence data
  • what the shape samples, time steps, channels means
  • how to define a small LSTM regressor in PyTorch
  • how to scale sequence features using training data only
  • how to train with early stopping
  • how to save predictions and metrics

Install packages

In [ ]:
pip install pandas numpy scikit-learn matplotlib torch

Create folders

In [2]:
from pathlib import Path

data_dir = Path("data")
output_dir = Path("outputs")

data_dir.mkdir(parents=True, exist_ok=True)
output_dir.mkdir(parents=True, exist_ok=True)

# print(data_dir.resolve())dp
# print(output_dir.resolve())

Build a small sequence dataset

In [3]:
import numpy as np
import pandas as pd

rng = np.random.default_rng(42)

n = 260
date_index = pd.date_range("2020-01-01", periods=n, freq="W")

trend = np.linspace(0, 2.5, n)
seasonal = np.sin(np.arange(n) / 8)
noise = rng.normal(0, 0.15, n)

value = 10 + trend + seasonal + noise

series_df = pd.DataFrame({
    "date": date_index,
    "value": value
})

series_df.to_csv(data_dir / "lstm_sequence_sample.csv", index=False)

series_df.head()
Out[3]:
date value
0 2020-01-05 10.045708
1 2020-01-12 9.978330
2 2020-01-19 10.379277
3 2020-01-26 10.536315
4 2020-02-02 10.225380

Create sliding windows

In [4]:
def make_windows(values, lookback):
    X_rows = []
    y_rows = []

    for i in range(lookback, len(values)):
        X_rows.append(values[i - lookback:i])
        y_rows.append(values[i])

    X = np.array(X_rows)
    y = np.array(y_rows)

    return X, y

lookback = 16

X_flat, y = make_windows(series_df["value"].values, lookback)

X = X_flat.reshape(X_flat.shape[0], X_flat.shape[1], 1)

print(X.shape)
print(y.shape)
(244, 16, 1)
(244,)

Split train and validation data

In [5]:
split_idx = int(len(X) * 0.80)

X_train_full = X[:split_idx]
y_train_full = y[:split_idx]
X_valid = X[split_idx:]
y_valid = y[split_idx:]

print(X_train_full.shape)
print(X_valid.shape)
(195, 16, 1)
(49, 16, 1)

Define the LSTM regressor

In [6]:
import torch
import torch.nn as nn

class LSTMRegressor(nn.Module):
    def __init__(self, channels, hidden=32, num_layers=2, dropout=0.0):
        super().__init__()

        eff_dropout = dropout if num_layers > 1 else 0.0

        self.input_layer = nn.Linear(channels, hidden)
        self.rnn = nn.LSTM(
            hidden,
            hidden,
            num_layers=num_layers,
            batch_first=True,
            dropout=eff_dropout,
            bidirectional=True
        )
        self.head = nn.Sequential(
            nn.LayerNorm(hidden * 2),
            nn.Linear(hidden * 2, hidden),
            nn.ReLU(),
            nn.Linear(hidden, 1)
        )

    def forward(self, x):
        h = self.input_layer(x)
        output, _ = self.rnn(h)
        y = self.head(output[:, -1])
        return y

Train with scaling and early stopping

In [7]:
def fit_sequence_model(X_train_full, y_train_full, X_valid, channels, device):
    channel_mean = X_train_full.reshape(-1, channels).mean(axis=0)
    channel_std = X_train_full.reshape(-1, channels).std(axis=0) + 1e-8

    X_train_scaled = ((X_train_full.reshape(-1, channels) - channel_mean) / channel_std).reshape(X_train_full.shape)
    X_valid_scaled = ((X_valid.reshape(-1, channels) - channel_mean) / channel_std).reshape(X_valid.shape)

    target_mean = y_train_full.mean()
    target_std = y_train_full.std() + 1e-8
    y_train_scaled = (y_train_full - target_mean) / target_std

    valid_size = max(1, int(0.15 * len(X_train_scaled)))

    X_train = X_train_scaled[:-valid_size]
    X_stop = X_train_scaled[-valid_size:]
    y_train = y_train_scaled[:-valid_size]
    y_stop = y_train_scaled[-valid_size:]

    tx = torch.tensor(X_train, dtype=torch.float32, device=device)
    ty = torch.tensor(y_train, dtype=torch.float32, device=device).unsqueeze(1)
    vx = torch.tensor(X_stop, dtype=torch.float32, device=device)
    vy = torch.tensor(y_stop, dtype=torch.float32, device=device).unsqueeze(1)

    model = LSTMRegressor(channels).to(device)
    optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-4)

    best_loss = np.inf
    patience_left = 10
    best_state = None

    for epoch in range(200):
        model.train()
        train_pred = model(tx)
        loss = nn.functional.mse_loss(train_pred, ty)

        optimizer.zero_grad()
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()

        model.eval()
        with torch.no_grad():
            stop_pred = model(vx)
            stop_loss = nn.functional.mse_loss(stop_pred, vy)

        if stop_loss.item() < best_loss:
            best_loss = stop_loss.item()
            patience_left = 10
            best_state = {k: v.detach().cpu() for k, v in model.state_dict().items()}
        else:
            patience_left -= 1
            if patience_left == 0:
                break

    model.load_state_dict(best_state)
    model.eval()

    with torch.no_grad():
        valid_tensor = torch.tensor(X_valid_scaled, dtype=torch.float32, device=device)
        pred_scaled = model(valid_tensor).cpu().squeeze(1).numpy()

    pred = pred_scaled * target_std + target_mean

    return model, pred

Fit and evaluate the model

In [8]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

np.random.seed(42)
torch.manual_seed(42)

device = "cuda" if torch.cuda.is_available() else "cpu"

model, pred = fit_sequence_model(
    X_train_full,
    y_train_full,
    X_valid,
    channels=1,
    device=device
)

metrics = pd.DataFrame([
    {
        "model": "LSTM",
        "lookback": lookback,
        "mae": mean_absolute_error(y_valid, pred),
        "rmse": np.sqrt(mean_squared_error(y_valid, pred)),
        "r2": r2_score(y_valid, pred)
    }
])

metrics
Out[8]:
model lookback mae rmse r2
0 LSTM 16 0.24525 0.292191 0.829471

Plot predictions

In [9]:
import matplotlib.pyplot as plt

valid_dates = series_df["date"].iloc[lookback + split_idx:].reset_index(drop=True)

plot_df = pd.DataFrame({
    "date": valid_dates,
    "actual": y_valid,
    "predicted": pred
})

plt.figure(figsize=(8, 4), dpi=300)
plt.plot(plot_df["date"], plot_df["actual"], label="Actual")
plt.plot(plot_df["date"], plot_df["predicted"], label="Predicted")
plt.xlabel("Date")
plt.ylabel("Value")
plt.title("LSTM Validation Predictions")
plt.legend()
plt.tight_layout()
plt.show()

plot_df.head()
No description has been provided for this image
Out[9]:
date actual predicted
0 2024-01-21 12.784236 12.710005
1 2024-01-28 13.030284 12.653138
2 2024-02-04 12.980277 12.694336
3 2024-02-11 13.015407 12.687913
4 2024-02-18 13.211040 12.689569