LSTM Regression Model Reference

This notebook is a practical reference for LSTM sequence regression.

It shows:

how to create sliding window sequence data
what the shape samples, time steps, channels means
how to define a small LSTM regressor in PyTorch
how to scale sequence features using training data only
how to train with early stopping
how to save predictions and metrics

Install packages

In [ ]:

pip install pandas numpy scikit-learn matplotlib torch

Create folders

In [2]:

from pathlib import Path

data_dir = Path("data")
output_dir = Path("outputs")

data_dir.mkdir(parents=True, exist_ok=True)
output_dir.mkdir(parents=True, exist_ok=True)

# print(data_dir.resolve())dp
# print(output_dir.resolve())

Build a small sequence dataset

In [3]:

import numpy as np
import pandas as pd

rng = np.random.default_rng(42)

n = 260
date_index = pd.date_range("2020-01-01", periods=n, freq="W")

trend = np.linspace(0, 2.5, n)
seasonal = np.sin(np.arange(n) / 8)
noise = rng.normal(0, 0.15, n)

value = 10 + trend + seasonal + noise

series_df = pd.DataFrame({
    "date": date_index,
    "value": value
})

series_df.to_csv(data_dir / "lstm_sequence_sample.csv", index=False)

series_df.head()

Out[3]:

	date	value
0	2020-01-05	10.045708
1	2020-01-12	9.978330
2	2020-01-19	10.379277
3	2020-01-26	10.536315
4	2020-02-02	10.225380

Create sliding windows

In [4]:

def make_windows(values, lookback):
    X_rows = []
    y_rows = []

    for i in range(lookback, len(values)):
        X_rows.append(values[i - lookback:i])
        y_rows.append(values[i])

    X = np.array(X_rows)
    y = np.array(y_rows)

    return X, y

lookback = 16

X_flat, y = make_windows(series_df["value"].values, lookback)

X = X_flat.reshape(X_flat.shape[0], X_flat.shape[1], 1)

print(X.shape)
print(y.shape)

(244, 16, 1)
(244,)

Split train and validation data

In [5]:

split_idx = int(len(X) * 0.80)

X_train_full = X[:split_idx]
y_train_full = y[:split_idx]
X_valid = X[split_idx:]
y_valid = y[split_idx:]

print(X_train_full.shape)
print(X_valid.shape)

(195, 16, 1)
(49, 16, 1)

Define the LSTM regressor

In [6]:

import torch
import torch.nn as nn

class LSTMRegressor(nn.Module):
    def __init__(self, channels, hidden=32, num_layers=2, dropout=0.0):
        super().__init__()

        eff_dropout = dropout if num_layers > 1 else 0.0

        self.input_layer = nn.Linear(channels, hidden)
        self.rnn = nn.LSTM(
            hidden,
            hidden,
            num_layers=num_layers,
            batch_first=True,
            dropout=eff_dropout,
            bidirectional=True
        )
        self.head = nn.Sequential(
            nn.LayerNorm(hidden * 2),
            nn.Linear(hidden * 2, hidden),
            nn.ReLU(),
            nn.Linear(hidden, 1)
        )

    def forward(self, x):
        h = self.input_layer(x)
        output, _ = self.rnn(h)
        y = self.head(output[:, -1])
        return y

Train with scaling and early stopping

In [7]:

def fit_sequence_model(X_train_full, y_train_full, X_valid, channels, device):
    channel_mean = X_train_full.reshape(-1, channels).mean(axis=0)
    channel_std = X_train_full.reshape(-1, channels).std(axis=0) + 1e-8

    X_train_scaled = ((X_train_full.reshape(-1, channels) - channel_mean) / channel_std).reshape(X_train_full.shape)
    X_valid_scaled = ((X_valid.reshape(-1, channels) - channel_mean) / channel_std).reshape(X_valid.shape)

    target_mean = y_train_full.mean()
    target_std = y_train_full.std() + 1e-8
    y_train_scaled = (y_train_full - target_mean) / target_std

    valid_size = max(1, int(0.15 * len(X_train_scaled)))

    X_train = X_train_scaled[:-valid_size]
    X_stop = X_train_scaled[-valid_size:]
    y_train = y_train_scaled[:-valid_size]
    y_stop = y_train_scaled[-valid_size:]

    tx = torch.tensor(X_train, dtype=torch.float32, device=device)
    ty = torch.tensor(y_train, dtype=torch.float32, device=device).unsqueeze(1)
    vx = torch.tensor(X_stop, dtype=torch.float32, device=device)
    vy = torch.tensor(y_stop, dtype=torch.float32, device=device).unsqueeze(1)

    model = LSTMRegressor(channels).to(device)
    optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-4)

    best_loss = np.inf
    patience_left = 10
    best_state = None

    for epoch in range(200):
        model.train()
        train_pred = model(tx)
        loss = nn.functional.mse_loss(train_pred, ty)

        optimizer.zero_grad()
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()

        model.eval()
        with torch.no_grad():
            stop_pred = model(vx)
            stop_loss = nn.functional.mse_loss(stop_pred, vy)

        if stop_loss.item() < best_loss:
            best_loss = stop_loss.item()
            patience_left = 10
            best_state = {k: v.detach().cpu() for k, v in model.state_dict().items()}
        else:
            patience_left -= 1
            if patience_left == 0:
                break

    model.load_state_dict(best_state)
    model.eval()

    with torch.no_grad():
        valid_tensor = torch.tensor(X_valid_scaled, dtype=torch.float32, device=device)
        pred_scaled = model(valid_tensor).cpu().squeeze(1).numpy()

    pred = pred_scaled * target_std + target_mean

    return model, pred

Fit and evaluate the model

In [8]:

from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

np.random.seed(42)
torch.manual_seed(42)

device = "cuda" if torch.cuda.is_available() else "cpu"

model, pred = fit_sequence_model(
    X_train_full,
    y_train_full,
    X_valid,
    channels=1,
    device=device
)

metrics = pd.DataFrame([
    {
        "model": "LSTM",
        "lookback": lookback,
        "mae": mean_absolute_error(y_valid, pred),
        "rmse": np.sqrt(mean_squared_error(y_valid, pred)),
        "r2": r2_score(y_valid, pred)
    }
])

metrics

Out[8]:

	model	lookback	mae	rmse	r2
0	LSTM	16	0.24525	0.292191	0.829471

Plot predictions

In [9]:

import matplotlib.pyplot as plt

valid_dates = series_df["date"].iloc[lookback + split_idx:].reset_index(drop=True)

plot_df = pd.DataFrame({
    "date": valid_dates,
    "actual": y_valid,
    "predicted": pred
})

plt.figure(figsize=(8, 4), dpi=300)
plt.plot(plot_df["date"], plot_df["actual"], label="Actual")
plt.plot(plot_df["date"], plot_df["predicted"], label="Predicted")
plt.xlabel("Date")
plt.ylabel("Value")
plt.title("LSTM Validation Predictions")
plt.legend()
plt.tight_layout()
plt.show()

plot_df.head()

No description has been provided for this image

Out[9]:

	date	actual	predicted
0	2024-01-21	12.784236	12.710005
1	2024-01-28	13.030284	12.653138
2	2024-02-04	12.980277	12.694336
3	2024-02-11	13.015407	12.687913
4	2024-02-18	13.211040	12.689569

LSTM Regression Model Reference

This notebook is a practical reference for LSTM sequence regression.

It shows:

how to create sliding window sequence data

what the shape samples, time steps, channels means

how to define a small LSTM regressor in PyTorch

how to scale sequence features using training data only

how to train with early stopping

how to save predictions and metrics

from pathlib import Path data_dir = Path("data") output_dir = Path("outputs") data_dir.mkdir(parents=True, exist_ok=True) output_dir.mkdir(parents=True, exist_ok=True) # print(data_dir.resolve())dp # print(output_dir.resolve())

import numpy as np import pandas as pd rng = np.random.default_rng(42) n = 260 date_index = pd.date_range("2020-01-01", periods=n, freq="W") trend = np.linspace(0, 2.5, n) seasonal = np.sin(np.arange(n) / 8) noise = rng.normal(0, 0.15, n) value = 10 + trend + seasonal + noise series_df = pd.DataFrame({ "date": date_index, "value": value }) series_df.to_csv(data_dir / "lstm_sequence_sample.csv", index=False) series_df.head()

date

value

2020-01-05

10.045708

2020-01-12

9.978330

2020-01-19

10.379277

2020-01-26

10.536315

2020-02-02

10.225380

def make_windows(values, lookback): X_rows = [] y_rows = [] for i in range(lookback, len(values)): X_rows.append(values[i - lookback:i]) y_rows.append(values[i]) X = np.array(X_rows) y = np.array(y_rows) return X, y lookback = 16 X_flat, y = make_windows(series_df["value"].values, lookback) X = X_flat.reshape(X_flat.shape[0], X_flat.shape[1], 1) print(X.shape) print(y.shape)

import torch import torch.nn as nn class LSTMRegressor(nn.Module): def __init__(self, channels, hidden=32, num_layers=2, dropout=0.0): super().__init__() eff_dropout = dropout if num_layers > 1 else 0.0 self.input_layer = nn.Linear(channels, hidden) self.rnn = nn.LSTM( hidden, hidden, num_layers=num_layers, batch_first=True, dropout=eff_dropout, bidirectional=True ) self.head = nn.Sequential( nn.LayerNorm(hidden * 2), nn.Linear(hidden * 2, hidden), nn.ReLU(), nn.Linear(hidden, 1) ) def forward(self, x): h = self.input_layer(x) output, _ = self.rnn(h) y = self.head(output[:, -1]) return y

def fit_sequence_model(X_train_full, y_train_full, X_valid, channels, device): channel_mean = X_train_full.reshape(-1, channels).mean(axis=0) channel_std = X_train_full.reshape(-1, channels).std(axis=0) + 1e-8 X_train_scaled = ((X_train_full.reshape(-1, channels) - channel_mean) / channel_std).reshape(X_train_full.shape) X_valid_scaled = ((X_valid.reshape(-1, channels) - channel_mean) / channel_std).reshape(X_valid.shape) target_mean = y_train_full.mean() target_std = y_train_full.std() + 1e-8 y_train_scaled = (y_train_full - target_mean) / target_std valid_size = max(1, int(0.15 * len(X_train_scaled))) X_train = X_train_scaled[:-valid_size] X_stop = X_train_scaled[-valid_size:] y_train = y_train_scaled[:-valid_size] y_stop = y_train_scaled[-valid_size:] tx = torch.tensor(X_train, dtype=torch.float32, device=device) ty = torch.tensor(y_train, dtype=torch.float32, device=device).unsqueeze(1) vx = torch.tensor(X_stop, dtype=torch.float32, device=device) vy = torch.tensor(y_stop, dtype=torch.float32, device=device).unsqueeze(1) model = LSTMRegressor(channels).to(device) optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-4) best_loss = np.inf patience_left = 10 best_state = None for epoch in range(200): model.train() train_pred = model(tx) loss = nn.functional.mse_loss(train_pred, ty) optimizer.zero_grad() loss.backward() nn.utils.clip_grad_norm_(model.parameters(), 1.0) optimizer.step() model.eval() with torch.no_grad(): stop_pred = model(vx) stop_loss = nn.functional.mse_loss(stop_pred, vy) if stop_loss.item() < best_loss: best_loss = stop_loss.item() patience_left = 10 best_state = {k: v.detach().cpu() for k, v in model.state_dict().items()} else: patience_left -= 1 if patience_left == 0: break model.load_state_dict(best_state) model.eval() with torch.no_grad(): valid_tensor = torch.tensor(X_valid_scaled, dtype=torch.float32, device=device) pred_scaled = model(valid_tensor).cpu().squeeze(1).numpy() pred = pred_scaled * target_std + target_mean return model, pred

from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score np.random.seed(42) torch.manual_seed(42) device = "cuda" if torch.cuda.is_available() else "cpu" model, pred = fit_sequence_model( X_train_full, y_train_full, X_valid, channels=1, device=device ) metrics = pd.DataFrame([ { "model": "LSTM", "lookback": lookback, "mae": mean_absolute_error(y_valid, pred), "rmse": np.sqrt(mean_squared_error(y_valid, pred)), "r2": r2_score(y_valid, pred) } ]) metrics

model

lookback

mae

rmse

LSTM

0.24525

0.292191

0.829471

import matplotlib.pyplot as plt valid_dates = series_df["date"].iloc[lookback + split_idx:].reset_index(drop=True) plot_df = pd.DataFrame({ "date": valid_dates, "actual": y_valid, "predicted": pred }) plt.figure(figsize=(8, 4), dpi=300) plt.plot(plot_df["date"], plot_df["actual"], label="Actual") plt.plot(plot_df["date"], plot_df["predicted"], label="Predicted") plt.xlabel("Date") plt.ylabel("Value") plt.title("LSTM Validation Predictions") plt.legend() plt.tight_layout() plt.show() plot_df.head()

date

actual

predicted

2024-01-21

12.784236

12.710005

2024-01-28

13.030284

12.653138

2024-02-04

12.980277

12.694336

2024-02-11

13.015407

12.687913

2024-02-18

13.211040

12.689569