"""
Robust Walk-Forward Training Utilities
This module provides core utilities for walk-forward analysis:
1. **Rademacher Complexity (Paleologo)**
- Compute empirical Rademacher complexity from checkpoint returns
- Apply haircut to OOS performance estimates
2. **Walk-Forward Efficiency (Pardo)**
- Compute WFE = OOS performance / IS performance
- Standard metric for assessing robustness
3. **Cycle Generation**
- Generate walk-forward train/test splits
- Support for rolling and expanding windows
Key References:
- Pardo, "The Evaluation and Optimization of Trading Strategies" (2008)
- Paleologo, "The Elements of Quantitative Investing" (2024), Ch. 6
"""
import numpy as np
from dataclasses import dataclass
from typing import List, Tuple
from datetime import datetime
# =============================================================================
# Data Classes
# =============================================================================
[docs]
@dataclass
class WalkForwardCycle:
"""Specification for a single walk-forward train/test cycle.
Defines one segment of a walk-forward analysis: a contiguous training
window followed by a contiguous test window. Date fields are set at
cycle-generation time; index fields are populated later once the price
data has been loaded and aligned.
Attributes
----------
cycle_number : int
Zero-based index of this cycle within the walk-forward sequence.
train_start_date : str
Training window start date (``"YYYY-MM-DD HH:MM:SS"``).
train_end_date : str
Training window end date (inclusive).
test_start_date : str
Test window start date, typically equal to ``train_end_date``.
test_end_date : str
Test window end date (inclusive).
train_start_idx : int
Row index into the price array for the start of training.
Default 0; populated after data loading.
train_end_idx : int
Row index for the end of training. Default 0.
test_start_idx : int
Row index for the start of testing. Default 0.
test_end_idx : int
Row index for the end of testing. Default 0.
"""
cycle_number: int
train_start_date: str
train_end_date: str
test_start_date: str
test_end_date: str
# Indices populated after data loading
train_start_idx: int = 0
train_end_idx: int = 0
test_start_idx: int = 0
test_end_idx: int = 0
# =============================================================================
# Rademacher Complexity Utilities
# =============================================================================
[docs]
def compute_empirical_rademacher(
returns_matrix: np.ndarray,
n_samples: int = 1000,
seed: int = 42,
) -> float:
"""
Compute empirical Rademacher complexity of a set of strategies.
The Rademacher complexity measures how well the strategy class can
"fit" random noise. Higher complexity = more overfitting risk.
Parameters
----------
returns_matrix : ndarray of shape (n_strategies, T)
Returns time series for each strategy (checkpoint)
n_samples : int
Number of random sign vectors to sample
seed : int
Random seed for reproducibility
Returns
-------
float
Empirical Rademacher complexity R̂
Notes
-----
R̂ = E_σ[sup_s (1/T) Σ_t σ_t r_s(t)]
where σ_t are random Rademacher variables (±1 with prob 0.5)
"""
if returns_matrix.ndim == 1:
returns_matrix = returns_matrix.reshape(1, -1)
n_strategies, T = returns_matrix.shape
if n_strategies == 0 or T == 0:
return 0.0
rng = np.random.RandomState(seed)
suprema = []
for _ in range(n_samples):
# Random Rademacher signs
sigma = rng.choice([-1, 1], size=T)
# Correlation of each strategy with random signs
correlations = returns_matrix @ sigma / T
# Supremum over strategies
sup = np.max(correlations)
suprema.append(sup)
return np.mean(suprema)
[docs]
def compute_rademacher_haircut(
observed_sharpe: float,
rademacher_complexity: float,
T: int,
delta: float = 0.05,
) -> Tuple[float, float]:
"""
Compute Rademacher-adjusted performance bound.
From Paleologo (2024):
θ_n ≥ θ̂_n - 2R̂ - estimation_error
Parameters
----------
observed_sharpe : float
Observed Sharpe ratio on test data
rademacher_complexity : float
Empirical Rademacher complexity R̂
T : int
Number of time periods in test data
delta : float
Confidence level (default 0.05 for 95% confidence)
Returns
-------
Tuple[float, float]
(adjusted_sharpe, haircut_magnitude)
"""
# Guard against T=0 which would cause division by zero
if T <= 0:
return float('nan'), float('nan')
# Estimation error term: 3√(2log(2/δ)/T)
estimation_error = 3 * np.sqrt(2 * np.log(2 / delta) / T)
# Total haircut
haircut = 2 * rademacher_complexity + estimation_error
adjusted_sharpe = observed_sharpe - haircut
return adjusted_sharpe, haircut
# =============================================================================
# Walk-Forward Efficiency (Pardo)
# =============================================================================
[docs]
def compute_walk_forward_efficiency(
is_sharpe: float,
oos_sharpe: float,
is_days: int,
oos_days: int,
) -> float:
"""
Compute Walk-Forward Efficiency (WFE) as per Pardo.
WFE = (Annualized OOS Performance) / (Annualized IS Performance)
A WFE of 0.5 or higher suggests robustness.
A WFE near 1.0 is ideal (OOS ≈ IS).
A WFE > 1.0 means OOS outperformed IS (unusual but possible).
Parameters
----------
is_sharpe : float
In-sample Sharpe ratio
oos_sharpe : float
Out-of-sample Sharpe ratio
is_days : int
Number of days in IS period
oos_days : int
Number of days in OOS period
Returns
-------
float
Walk-Forward Efficiency (returns NaN for undefined cases)
"""
# Handle edge cases where WFE is undefined
if is_sharpe <= 0:
# Can't compute meaningful ratio when IS is non-positive
# Return NaN to signal "undefined" rather than 0.0 which masks failure
# Callers should filter with np.isfinite() - see _aggregate_results
return float('nan')
return oos_sharpe / is_sharpe
# =============================================================================
# Cycle Generation Utilities
# =============================================================================
[docs]
def datetime_to_timestamp(date_string: str) -> float:
"""Convert a datetime string to a Unix timestamp.
Parameters
----------
date_string : str
Date in ``"YYYY-MM-DD HH:MM:SS"`` format.
Returns
-------
float
Seconds since the Unix epoch, interpreted in the local timezone.
"""
dt = datetime.strptime(date_string, "%Y-%m-%d %H:%M:%S")
return dt.timestamp()
[docs]
def timestamp_to_datetime(timestamp: float) -> str:
"""Convert a Unix timestamp to a datetime string.
Parameters
----------
timestamp : float
Seconds since the Unix epoch.
Returns
-------
str
Formatted date string in ``"YYYY-MM-DD HH:MM:SS"`` format.
"""
dt = datetime.fromtimestamp(timestamp)
return dt.strftime("%Y-%m-%d %H:%M:%S")
[docs]
def generate_walk_forward_cycles(
start_date: str,
end_date: str,
n_cycles: int,
keep_fixed_start: bool = False,
) -> List[WalkForwardCycle]:
"""
Generate walk-forward cycle specifications with equal-length test periods.
Divides [start_date, end_date] into (n_cycles + 1) equal segments.
Each cycle trains on segment i and tests on segment i+1.
Parameters
----------
start_date : str
Start date (format: "YYYY-MM-DD HH:MM:SS")
end_date : str
End date of walk-forward analysis (end of final test period)
n_cycles : int
Number of training/test cycles
keep_fixed_start : bool
If True, training always starts from start_date (expanding window).
If False (default), training window rolls forward (rolling window).
Returns
-------
List[WalkForwardCycle]
"""
start_ts = datetime_to_timestamp(start_date)
end_ts = datetime_to_timestamp(end_date)
# Create n_cycles + 1 segment boundaries
# n_cycles + 1 segments means n_cycles + 2 boundary points
n_segments = n_cycles + 1
times = np.linspace(start_ts, end_ts, n_segments + 1)
# Round to midnight
times = times - (times % (24 * 60 * 60))
cycles = []
for i in range(n_cycles):
if keep_fixed_start:
train_start = times[0]
else:
train_start = times[i]
train_end = times[i + 1]
test_start = times[i + 1]
test_end = times[i + 2]
cycles.append(WalkForwardCycle(
cycle_number=i,
train_start_date=timestamp_to_datetime(train_start),
train_end_date=timestamp_to_datetime(train_end),
test_start_date=timestamp_to_datetime(test_start),
test_end_date=timestamp_to_datetime(test_end),
))
return cycles