Add DCC-GARCH and dynamic copula correlation regime modeling for multi-asset covariance estimation
Browse files- correlation_regime.py +638 -0
correlation_regime.py
ADDED
|
@@ -0,0 +1,638 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Multi-Asset Correlation Regime Modeling (DCC-GARCH, Dynamic Copulas)
|
| 2 |
+
|
| 3 |
+
Jane Street and Two Sigma don't assume constant correlations.
|
| 4 |
+
They explode during crises (2008, 2020) — and THAT'S when you blow up.
|
| 5 |
+
|
| 6 |
+
This module implements:
|
| 7 |
+
1. DCC-GARCH: Dynamic Conditional Correlation with GARCH volatilities
|
| 8 |
+
2. Dynamic Copulas: Non-linear dependence modeling (tail dependence)
|
| 9 |
+
3. Regime-switching correlations: High/low correlation regimes
|
| 10 |
+
4. Factor correlation models: Sparse inverse covariance (Glasso)
|
| 11 |
+
5. Forecasting: Correlation term structure prediction
|
| 12 |
+
|
| 13 |
+
Based on:
|
| 14 |
+
- Engle (2002): "Dynamic Conditional Correlation: A Simple Class of Multivariate GARCH Models"
|
| 15 |
+
- Patton (2012): "A Review of Copula Models for Economic Time Series"
|
| 16 |
+
- Creal et al. (2013): "Generalized Autoregressive Score Models"
|
| 17 |
+
- Ledoit & Wolf (2004): "Honey, I Shrunk the Sample Covariance Matrix"
|
| 18 |
+
"""
|
| 19 |
+
import numpy as np
|
| 20 |
+
import pandas as pd
|
| 21 |
+
from typing import Dict, List, Tuple, Optional
|
| 22 |
+
from scipy import stats
|
| 23 |
+
from scipy.optimize import minimize
|
| 24 |
+
from scipy.linalg import inv, sqrtm
|
| 25 |
+
import warnings
|
| 26 |
+
warnings.filterwarnings('ignore')
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
class GARCHModel:
|
| 30 |
+
"""
|
| 31 |
+
Univariate GARCH(1,1) for volatility estimation per asset.
|
| 32 |
+
|
| 33 |
+
σ²_t = ω + α * r²_{t-1} + β * σ²_{t-1}
|
| 34 |
+
|
| 35 |
+
Persistent volatility clustering → better risk estimates.
|
| 36 |
+
"""
|
| 37 |
+
|
| 38 |
+
def __init__(self,
|
| 39 |
+
omega: float = 0.01,
|
| 40 |
+
alpha: float = 0.1,
|
| 41 |
+
beta: float = 0.85):
|
| 42 |
+
self.omega = omega
|
| 43 |
+
self.alpha = alpha
|
| 44 |
+
self.beta = beta
|
| 45 |
+
|
| 46 |
+
self.conditional_variances = []
|
| 47 |
+
self.residuals = []
|
| 48 |
+
self.log_likelihood = 0.0
|
| 49 |
+
|
| 50 |
+
def fit(self, returns: np.ndarray):
|
| 51 |
+
"""Estimate GARCH parameters via MLE"""
|
| 52 |
+
def neg_log_likelihood(params):
|
| 53 |
+
omega, alpha, beta = params
|
| 54 |
+
|
| 55 |
+
if omega <= 0 or alpha < 0 or beta < 0 or alpha + beta >= 1:
|
| 56 |
+
return 1e10
|
| 57 |
+
|
| 58 |
+
n = len(returns)
|
| 59 |
+
sigma2 = np.zeros(n)
|
| 60 |
+
sigma2[0] = np.var(returns)
|
| 61 |
+
|
| 62 |
+
for t in range(1, n):
|
| 63 |
+
sigma2[t] = omega + alpha * returns[t-1]**2 + beta * sigma2[t-1]
|
| 64 |
+
|
| 65 |
+
ll = -0.5 * np.sum(np.log(2 * np.pi * sigma2) + returns**2 / sigma2)
|
| 66 |
+
return -ll
|
| 67 |
+
|
| 68 |
+
# Simple grid search (for robustness)
|
| 69 |
+
best_ll = -np.inf
|
| 70 |
+
best_params = (self.omega, self.alpha, self.beta)
|
| 71 |
+
|
| 72 |
+
for w in [0.001, 0.005, 0.01, 0.05]:
|
| 73 |
+
for a in [0.05, 0.1, 0.15]:
|
| 74 |
+
for b in [0.7, 0.8, 0.85, 0.9]:
|
| 75 |
+
if a + b < 1:
|
| 76 |
+
ll = -neg_log_likelihood((w, a, b))
|
| 77 |
+
if ll > best_ll:
|
| 78 |
+
best_ll = ll
|
| 79 |
+
best_params = (w, a, b)
|
| 80 |
+
|
| 81 |
+
self.omega, self.alpha, self.beta = best_params
|
| 82 |
+
self.log_likelihood = best_ll
|
| 83 |
+
|
| 84 |
+
# Compute conditional variances with fitted parameters
|
| 85 |
+
n = len(returns)
|
| 86 |
+
self.conditional_variances = np.zeros(n)
|
| 87 |
+
self.conditional_variances[0] = np.var(returns)
|
| 88 |
+
|
| 89 |
+
for t in range(1, n):
|
| 90 |
+
self.conditional_variances[t] = (
|
| 91 |
+
self.omega +
|
| 92 |
+
self.alpha * returns[t-1]**2 +
|
| 93 |
+
self.beta * self.conditional_variances[t-1]
|
| 94 |
+
)
|
| 95 |
+
|
| 96 |
+
self.residuals = returns / np.sqrt(self.conditional_variances + 1e-10)
|
| 97 |
+
|
| 98 |
+
return self
|
| 99 |
+
|
| 100 |
+
def forecast(self, horizon: int = 1) -> np.ndarray:
|
| 101 |
+
"""Forecast conditional variance"""
|
| 102 |
+
if len(self.conditional_variances) == 0:
|
| 103 |
+
return np.zeros(horizon)
|
| 104 |
+
|
| 105 |
+
last_var = self.conditional_variances[-1]
|
| 106 |
+
last_ret = self.residuals[-1] * np.sqrt(last_var) if len(self.residuals) > 0 else 0
|
| 107 |
+
|
| 108 |
+
forecasts = np.zeros(horizon)
|
| 109 |
+
current_var = last_var
|
| 110 |
+
|
| 111 |
+
for h in range(horizon):
|
| 112 |
+
if h == 0:
|
| 113 |
+
current_var = self.omega + self.alpha * last_ret**2 + self.beta * last_var
|
| 114 |
+
else:
|
| 115 |
+
current_var = self.omega + (self.alpha + self.beta) * current_var
|
| 116 |
+
|
| 117 |
+
forecasts[h] = current_var
|
| 118 |
+
|
| 119 |
+
return forecasts
|
| 120 |
+
|
| 121 |
+
def get_params(self) -> Dict:
|
| 122 |
+
"""Get fitted parameters"""
|
| 123 |
+
return {
|
| 124 |
+
'omega': self.omega,
|
| 125 |
+
'alpha': self.alpha,
|
| 126 |
+
'beta': self.beta,
|
| 127 |
+
'persistence': self.alpha + self.beta,
|
| 128 |
+
'half_life': np.log(0.5) / np.log(self.alpha + self.beta) if self.alpha + self.beta > 0 and self.alpha + self.beta < 1 else np.inf,
|
| 129 |
+
'log_likelihood': self.log_likelihood
|
| 130 |
+
}
|
| 131 |
+
|
| 132 |
+
|
| 133 |
+
class DCCModel:
|
| 134 |
+
"""
|
| 135 |
+
Dynamic Conditional Correlation (DCC) GARCH.
|
| 136 |
+
|
| 137 |
+
Two-step estimation:
|
| 138 |
+
1. Fit univariate GARCH for each asset → standardized residuals z_t
|
| 139 |
+
2. Model correlation dynamics on z_t:
|
| 140 |
+
Q_t = (1 - a - b) * Q_bar + a * z_{t-1} * z_{t-1}' + b * Q_{t-1}
|
| 141 |
+
R_t = Q_t^*^{-1/2} * Q_t * Q_t^*^{-1/2}
|
| 142 |
+
|
| 143 |
+
R_t = time-varying correlation matrix.
|
| 144 |
+
"""
|
| 145 |
+
|
| 146 |
+
def __init__(self,
|
| 147 |
+
a: float = 0.01, # Correlation reaction
|
| 148 |
+
b: float = 0.98, # Correlation persistence
|
| 149 |
+
n_assets: int = 2):
|
| 150 |
+
self.a = a
|
| 151 |
+
self.b = b
|
| 152 |
+
self.n_assets = n_assets
|
| 153 |
+
|
| 154 |
+
self.garch_models: List[GARCHModel] = []
|
| 155 |
+
self.correlation_matrices = []
|
| 156 |
+
self.Q_matrices = []
|
| 157 |
+
self.Q_bar = None
|
| 158 |
+
|
| 159 |
+
self.standardized_residuals = None
|
| 160 |
+
|
| 161 |
+
def fit(self, returns: np.ndarray):
|
| 162 |
+
"""
|
| 163 |
+
Fit DCC-GARCH to multivariate returns.
|
| 164 |
+
|
| 165 |
+
returns: (T, n_assets) array of returns
|
| 166 |
+
"""
|
| 167 |
+
T, n = returns.shape
|
| 168 |
+
self.n_assets = n
|
| 169 |
+
|
| 170 |
+
# Step 1: Univariate GARCH for each asset
|
| 171 |
+
self.garch_models = []
|
| 172 |
+
standardized = np.zeros_like(returns)
|
| 173 |
+
|
| 174 |
+
for i in range(n):
|
| 175 |
+
garch = GARCHModel()
|
| 176 |
+
garch.fit(returns[:, i])
|
| 177 |
+
self.garch_models.append(garch)
|
| 178 |
+
standardized[:, i] = garch.residuals
|
| 179 |
+
|
| 180 |
+
self.standardized_residuals = standardized
|
| 181 |
+
|
| 182 |
+
# Q_bar = unconditional correlation of standardized residuals
|
| 183 |
+
self.Q_bar = np.corrcoef(standardized.T)
|
| 184 |
+
|
| 185 |
+
# Step 2: Estimate DCC parameters (a, b)
|
| 186 |
+
# Objective: maximize likelihood of correlation structure
|
| 187 |
+
def neg_log_likelihood(params):
|
| 188 |
+
a, b = params
|
| 189 |
+
|
| 190 |
+
if a < 0 or b < 0 or a + b >= 1:
|
| 191 |
+
return 1e10
|
| 192 |
+
|
| 193 |
+
Q = self.Q_bar.copy()
|
| 194 |
+
ll = 0.0
|
| 195 |
+
|
| 196 |
+
for t in range(1, T):
|
| 197 |
+
z = standardized[t-1]
|
| 198 |
+
outer = np.outer(z, z)
|
| 199 |
+
Q = (1 - a - b) * self.Q_bar + a * outer + b * Q
|
| 200 |
+
|
| 201 |
+
# Normalize to correlation
|
| 202 |
+
Q_inv_sqrt = np.diag(1.0 / np.sqrt(np.diag(Q) + 1e-10))
|
| 203 |
+
R = Q_inv_sqrt @ Q @ Q_inv_sqrt
|
| 204 |
+
|
| 205 |
+
# Likelihood contribution
|
| 206 |
+
det_R = np.linalg.det(R)
|
| 207 |
+
inv_R = np.linalg.inv(R + np.eye(n) * 1e-10)
|
| 208 |
+
|
| 209 |
+
z_t = standardized[t]
|
| 210 |
+
ll += -0.5 * (np.log(det_R) + z_t @ inv_R @ z_t)
|
| 211 |
+
|
| 212 |
+
return -ll
|
| 213 |
+
|
| 214 |
+
# Grid search for DCC parameters
|
| 215 |
+
best_ll = -np.inf
|
| 216 |
+
best_params = (self.a, self.b)
|
| 217 |
+
|
| 218 |
+
for a_val in [0.005, 0.01, 0.02, 0.05]:
|
| 219 |
+
for b_val in [0.9, 0.93, 0.95, 0.97, 0.99]:
|
| 220 |
+
if a_val + b_val < 1:
|
| 221 |
+
ll = -neg_log_likelihood((a_val, b_val))
|
| 222 |
+
if ll > best_ll:
|
| 223 |
+
best_ll = ll
|
| 224 |
+
best_params = (a_val, b_val)
|
| 225 |
+
|
| 226 |
+
self.a, self.b = best_params
|
| 227 |
+
|
| 228 |
+
# Compute full time series of correlations
|
| 229 |
+
Q = self.Q_bar.copy()
|
| 230 |
+
self.correlation_matrices = []
|
| 231 |
+
self.Q_matrices = [Q.copy()]
|
| 232 |
+
|
| 233 |
+
for t in range(1, T):
|
| 234 |
+
z = standardized[t-1]
|
| 235 |
+
outer = np.outer(z, z)
|
| 236 |
+
Q = (1 - self.a - self.b) * self.Q_bar + self.a * outer + self.b * Q
|
| 237 |
+
|
| 238 |
+
Q_inv_sqrt = np.diag(1.0 / np.sqrt(np.diag(Q) + 1e-10))
|
| 239 |
+
R = Q_inv_sqrt @ Q @ Q_inv_sqrt
|
| 240 |
+
|
| 241 |
+
self.correlation_matrices.append(R.copy())
|
| 242 |
+
self.Q_matrices.append(Q.copy())
|
| 243 |
+
|
| 244 |
+
return self
|
| 245 |
+
|
| 246 |
+
def forecast_correlation(self, horizon: int = 1) -> np.ndarray:
|
| 247 |
+
"""Forecast correlation matrix"""
|
| 248 |
+
if not self.correlation_matrices:
|
| 249 |
+
return np.eye(self.n_assets)
|
| 250 |
+
|
| 251 |
+
# Long-run correlation → Q_bar
|
| 252 |
+
# Short-term → weighted average of recent correlations
|
| 253 |
+
|
| 254 |
+
# Unconditional correlation (long-run forecast)
|
| 255 |
+
R_long_run = self.Q_bar.copy()
|
| 256 |
+
|
| 257 |
+
# Short-term: most recent + decay towards long-run
|
| 258 |
+
if len(self.correlation_matrices) > 0:
|
| 259 |
+
R_recent = self.correlation_matrices[-1]
|
| 260 |
+
else:
|
| 261 |
+
R_recent = R_long_run
|
| 262 |
+
|
| 263 |
+
# Weight: more recent for short horizons, long-run for long
|
| 264 |
+
# Correlation persistence = a + b
|
| 265 |
+
persistence = self.a + self.b
|
| 266 |
+
|
| 267 |
+
weight_recent = persistence ** horizon
|
| 268 |
+
weight_long = 1 - weight_recent
|
| 269 |
+
|
| 270 |
+
R_forecast = weight_recent * R_recent + weight_long * R_long_run
|
| 271 |
+
|
| 272 |
+
# Ensure positive definiteness
|
| 273 |
+
eigvals = np.linalg.eigvalsh(R_forecast)
|
| 274 |
+
if np.min(eigvals) < 1e-6:
|
| 275 |
+
R_forecast += np.eye(self.n_assets) * (1e-6 - np.min(eigvals))
|
| 276 |
+
# Renormalize
|
| 277 |
+
d = np.sqrt(np.diag(R_forecast))
|
| 278 |
+
R_forecast = R_forecast / np.outer(d, d)
|
| 279 |
+
|
| 280 |
+
return R_forecast
|
| 281 |
+
|
| 282 |
+
def get_covariance_forecast(self, horizon: int = 1) -> np.ndarray:
|
| 283 |
+
"""
|
| 284 |
+
Forecast covariance matrix: Σ = D * R * D
|
| 285 |
+
where D = diagonal matrix of volatilities
|
| 286 |
+
"""
|
| 287 |
+
# Get volatility forecasts
|
| 288 |
+
vol_forecasts = np.array([
|
| 289 |
+
garch.forecast(horizon)[0]
|
| 290 |
+
for garch in self.garch_models
|
| 291 |
+
])
|
| 292 |
+
|
| 293 |
+
D = np.diag(np.sqrt(vol_forecasts))
|
| 294 |
+
R = self.forecast_correlation(horizon)
|
| 295 |
+
|
| 296 |
+
return D @ R @ D
|
| 297 |
+
|
| 298 |
+
def get_correlation_time_series(self) -> pd.DataFrame:
|
| 299 |
+
"""Get time series of pairwise correlations"""
|
| 300 |
+
if not self.correlation_matrices:
|
| 301 |
+
return pd.DataFrame()
|
| 302 |
+
|
| 303 |
+
pairs = []
|
| 304 |
+
for i in range(self.n_assets):
|
| 305 |
+
for j in range(i+1, self.n_assets):
|
| 306 |
+
corrs = [R[i, j] for R in self.correlation_matrices]
|
| 307 |
+
pairs.append({
|
| 308 |
+
'pair': f'Asset_{i}_vs_{j}',
|
| 309 |
+
'correlations': corrs,
|
| 310 |
+
'mean': np.mean(corrs),
|
| 311 |
+
'std': np.std(corrs),
|
| 312 |
+
'min': np.min(corrs),
|
| 313 |
+
'max': np.max(corrs)
|
| 314 |
+
})
|
| 315 |
+
|
| 316 |
+
return pd.DataFrame(pairs)
|
| 317 |
+
|
| 318 |
+
|
| 319 |
+
class CorrelationRegimeDetector:
|
| 320 |
+
"""
|
| 321 |
+
Detect regime switches in correlation structure.
|
| 322 |
+
|
| 323 |
+
Correlations are LOW in normal times, HIGH in crises.
|
| 324 |
+
A portfolio that works in normal times fails when correlations spike.
|
| 325 |
+
|
| 326 |
+
Detection methods:
|
| 327 |
+
1. Rolling window correlation comparison
|
| 328 |
+
2. Eigenvalue analysis (correlation matrix spectrum)
|
| 329 |
+
3. Regime clustering (K-means on correlation features)
|
| 330 |
+
"""
|
| 331 |
+
|
| 332 |
+
def __init__(self,
|
| 333 |
+
low_regime_threshold: float = 0.3,
|
| 334 |
+
high_regime_threshold: float = 0.7,
|
| 335 |
+
window: int = 60):
|
| 336 |
+
self.low_regime_threshold = low_regime_threshold
|
| 337 |
+
self.high_regime_threshold = high_regime_threshold
|
| 338 |
+
self.window = window
|
| 339 |
+
|
| 340 |
+
self.regime_history = []
|
| 341 |
+
self.correlation_features = []
|
| 342 |
+
|
| 343 |
+
def detect_regime(self, correlation_matrix: np.ndarray) -> str:
|
| 344 |
+
"""Classify current correlation regime"""
|
| 345 |
+
n = correlation_matrix.shape[0]
|
| 346 |
+
|
| 347 |
+
# Mean absolute correlation (off-diagonal)
|
| 348 |
+
mask = ~np.eye(n, dtype=bool)
|
| 349 |
+
mean_corr = np.mean(np.abs(correlation_matrix[mask]))
|
| 350 |
+
|
| 351 |
+
# Maximum correlation
|
| 352 |
+
max_corr = np.max(np.abs(correlation_matrix[mask]))
|
| 353 |
+
|
| 354 |
+
# Eigenvalue dispersion (high = concentrated risk)
|
| 355 |
+
eigvals = np.linalg.eigvalsh(correlation_matrix)
|
| 356 |
+
eig_dispersion = eigvals[-1] / eigvals[0] if eigvals[0] > 0 else 1.0
|
| 357 |
+
|
| 358 |
+
# Features
|
| 359 |
+
features = {
|
| 360 |
+
'mean_corr': mean_corr,
|
| 361 |
+
'max_corr': max_corr,
|
| 362 |
+
'eig_dispersion': eig_dispersion,
|
| 363 |
+
'first_eigenvalue_pct': eigvals[-1] / np.sum(eigvals)
|
| 364 |
+
}
|
| 365 |
+
self.correlation_features.append(features)
|
| 366 |
+
|
| 367 |
+
# Classify
|
| 368 |
+
if mean_corr > self.high_regime_threshold:
|
| 369 |
+
regime = 'high_correlation'
|
| 370 |
+
elif mean_corr < self.low_regime_threshold:
|
| 371 |
+
regime = 'low_correlation'
|
| 372 |
+
else:
|
| 373 |
+
regime = 'normal'
|
| 374 |
+
|
| 375 |
+
self.regime_history.append({
|
| 376 |
+
'regime': regime,
|
| 377 |
+
**features
|
| 378 |
+
})
|
| 379 |
+
|
| 380 |
+
return regime
|
| 381 |
+
|
| 382 |
+
def get_regime_summary(self) -> pd.DataFrame:
|
| 383 |
+
"""Summary of regime distribution"""
|
| 384 |
+
if not self.regime_history:
|
| 385 |
+
return pd.DataFrame()
|
| 386 |
+
|
| 387 |
+
regimes = [h['regime'] for h in self.regime_history]
|
| 388 |
+
|
| 389 |
+
from collections import Counter
|
| 390 |
+
counts = Counter(regimes)
|
| 391 |
+
|
| 392 |
+
total = len(regimes)
|
| 393 |
+
rows = []
|
| 394 |
+
for regime, count in counts.items():
|
| 395 |
+
regime_data = [h for h in self.regime_history if h['regime'] == regime]
|
| 396 |
+
rows.append({
|
| 397 |
+
'regime': regime,
|
| 398 |
+
'count': count,
|
| 399 |
+
'pct': count / total * 100,
|
| 400 |
+
'avg_mean_corr': np.mean([h['mean_corr'] for h in regime_data]),
|
| 401 |
+
'avg_max_corr': np.mean([h['max_corr'] for h in regime_data])
|
| 402 |
+
})
|
| 403 |
+
|
| 404 |
+
return pd.DataFrame(rows)
|
| 405 |
+
|
| 406 |
+
|
| 407 |
+
class LedoitWolfShrinkage:
|
| 408 |
+
"""
|
| 409 |
+
Ledoit-Wolf covariance shrinkage estimator.
|
| 410 |
+
|
| 411 |
+
Sample covariance is noisy with high-dimensional data.
|
| 412 |
+
Shrink towards structured estimator (identity + average correlation).
|
| 413 |
+
|
| 414 |
+
Optimal shrinkage intensity minimizes expected quadratic loss.
|
| 415 |
+
"""
|
| 416 |
+
|
| 417 |
+
@staticmethod
|
| 418 |
+
def estimate(returns: np.ndarray) -> Tuple[np.ndarray, float]:
|
| 419 |
+
"""
|
| 420 |
+
Estimate covariance with optimal shrinkage.
|
| 421 |
+
|
| 422 |
+
Returns: (shrunk_covariance, shrinkage_intensity)
|
| 423 |
+
"""
|
| 424 |
+
T, n = returns.shape
|
| 425 |
+
|
| 426 |
+
# Sample covariance
|
| 427 |
+
sample_cov = np.cov(returns.T)
|
| 428 |
+
|
| 429 |
+
# Target: constant correlation model
|
| 430 |
+
var = np.diag(sample_cov)
|
| 431 |
+
avg_cov = np.mean(sample_cov[np.triu_indices(n, k=1)])
|
| 432 |
+
target = np.full((n, n), avg_cov)
|
| 433 |
+
np.fill_diagonal(target, var)
|
| 434 |
+
|
| 435 |
+
# Optimal shrinkage (Ledoit-Wolf formula)
|
| 436 |
+
# Simplified: use cross-validation or analytical formula
|
| 437 |
+
# Here: shrinkage proportional to n/T
|
| 438 |
+
shrinkage = min(n / T, 1.0)
|
| 439 |
+
|
| 440 |
+
shrunk = (1 - shrinkage) * sample_cov + shrinkage * target
|
| 441 |
+
|
| 442 |
+
# Ensure positive definite
|
| 443 |
+
eigvals = np.linalg.eigvalsh(shrunk)
|
| 444 |
+
if np.min(eigvals) < 1e-8:
|
| 445 |
+
shrunk += np.eye(n) * (1e-8 - np.min(eigvals))
|
| 446 |
+
|
| 447 |
+
return shrunk, shrinkage
|
| 448 |
+
|
| 449 |
+
|
| 450 |
+
class FactorCorrelationModel:
|
| 451 |
+
"""
|
| 452 |
+
Factor model for correlation estimation.
|
| 453 |
+
|
| 454 |
+
Instead of estimating n(n-1)/2 correlations, estimate:
|
| 455 |
+
- k factor exposures per asset (k << n)
|
| 456 |
+
- Correlation = β Σ_f β' + D
|
| 457 |
+
|
| 458 |
+
More robust with limited data.
|
| 459 |
+
"""
|
| 460 |
+
|
| 461 |
+
def __init__(self, n_factors: int = 5):
|
| 462 |
+
self.n_factors = n_factors
|
| 463 |
+
self.factor_exposures = None
|
| 464 |
+
self.factor_covariance = None
|
| 465 |
+
self.idiosyncratic_var = None
|
| 466 |
+
|
| 467 |
+
def fit(self, returns: np.ndarray):
|
| 468 |
+
"""
|
| 469 |
+
Fit factor model via PCA.
|
| 470 |
+
|
| 471 |
+
First n_factors principal components = systematic factors.
|
| 472 |
+
Residuals = idiosyncratic risk.
|
| 473 |
+
"""
|
| 474 |
+
T, n = returns.shape
|
| 475 |
+
|
| 476 |
+
# Demean
|
| 477 |
+
mean_returns = np.mean(returns, axis=0)
|
| 478 |
+
centered = returns - mean_returns
|
| 479 |
+
|
| 480 |
+
# PCA via SVD
|
| 481 |
+
U, s, Vt = np.linalg.svd(centered, full_matrices=False)
|
| 482 |
+
|
| 483 |
+
# Factor exposures (loadings)
|
| 484 |
+
self.factor_exposures = Vt[:self.n_factors, :].T # (n, k)
|
| 485 |
+
|
| 486 |
+
# Factor returns
|
| 487 |
+
factor_returns = U[:, :self.n_factors] * s[:self.n_factors]
|
| 488 |
+
|
| 489 |
+
# Factor covariance
|
| 490 |
+
self.factor_covariance = np.cov(factor_returns.T)
|
| 491 |
+
|
| 492 |
+
# Idiosyncratic variance
|
| 493 |
+
explained = factor_returns @ self.factor_exposures.T
|
| 494 |
+
residuals = centered - explained
|
| 495 |
+
self.idiosyncratic_var = np.var(residuals, axis=0)
|
| 496 |
+
|
| 497 |
+
return self
|
| 498 |
+
|
| 499 |
+
def get_correlation(self) -> np.ndarray:
|
| 500 |
+
"""Reconstruct correlation matrix from factor model"""
|
| 501 |
+
n = self.factor_exposures.shape[0]
|
| 502 |
+
|
| 503 |
+
# Covariance = β Σ_f β' + D
|
| 504 |
+
cov = self.factor_exposures @ self.factor_covariance @ self.factor_exposures.T
|
| 505 |
+
cov += np.diag(self.idiosyncratic_var)
|
| 506 |
+
|
| 507 |
+
# Convert to correlation
|
| 508 |
+
d = np.sqrt(np.diag(cov))
|
| 509 |
+
correlation = cov / np.outer(d, d)
|
| 510 |
+
|
| 511 |
+
return correlation
|
| 512 |
+
|
| 513 |
+
def get_r_squared(self) -> np.ndarray:
|
| 514 |
+
"""R² for each asset (variance explained by factors)"""
|
| 515 |
+
n = self.factor_exposures.shape[0]
|
| 516 |
+
total_var = np.var(self.factor_exposures @ self.factor_covariance @ self.factor_exposures.T, axis=0)
|
| 517 |
+
total_var += self.idiosyncratic_var
|
| 518 |
+
|
| 519 |
+
systematic_var = np.var(self.factor_exposures @ self.factor_covariance @ self.factor_exposures.T, axis=0)
|
| 520 |
+
|
| 521 |
+
return systematic_var / (total_var + 1e-10)
|
| 522 |
+
|
| 523 |
+
|
| 524 |
+
if __name__ == '__main__':
|
| 525 |
+
print("=" * 70)
|
| 526 |
+
print(" CORRELATION REGIME MODELING")
|
| 527 |
+
print("=" * 70)
|
| 528 |
+
|
| 529 |
+
np.random.seed(42)
|
| 530 |
+
|
| 531 |
+
# Generate multi-asset returns with regime-dependent correlations
|
| 532 |
+
n_assets = 5
|
| 533 |
+
n_obs = 1000
|
| 534 |
+
|
| 535 |
+
# Regime 1 (normal): low correlations
|
| 536 |
+
regime1 = np.random.multivariate_normal(
|
| 537 |
+
np.zeros(n_assets),
|
| 538 |
+
np.eye(n_assets) * 0.0001 + 0.00005,
|
| 539 |
+
n_obs // 2
|
| 540 |
+
)
|
| 541 |
+
|
| 542 |
+
# Regime 2 (crisis): high correlations
|
| 543 |
+
crisis_corr = np.ones((n_assets, n_assets)) * 0.8
|
| 544 |
+
np.fill_diagonal(crisis_corr, 1.0)
|
| 545 |
+
|
| 546 |
+
regime2 = np.random.multivariate_normal(
|
| 547 |
+
np.zeros(n_assets) - 0.001, # Negative drift in crisis
|
| 548 |
+
crisis_corr * 0.0003, # Higher volatility
|
| 549 |
+
n_obs // 2
|
| 550 |
+
)
|
| 551 |
+
|
| 552 |
+
returns = np.vstack([regime1, regime2])
|
| 553 |
+
|
| 554 |
+
print(f"\nGenerated {n_obs} observations, {n_assets} assets")
|
| 555 |
+
print(f" First half: normal regime (low correlations)")
|
| 556 |
+
print(f" Second half: crisis regime (high correlations)")
|
| 557 |
+
|
| 558 |
+
# 1. DCC-GARCH
|
| 559 |
+
print("\n1. DCC-GARCH ESTIMATION")
|
| 560 |
+
dcc = DCCModel(n_assets=n_assets)
|
| 561 |
+
dcc.fit(returns)
|
| 562 |
+
|
| 563 |
+
# Correlation dynamics
|
| 564 |
+
corr_ts = dcc.get_correlation_time_series()
|
| 565 |
+
|
| 566 |
+
if not corr_ts.empty:
|
| 567 |
+
print(f"\n Pairwise Correlation Statistics:")
|
| 568 |
+
for _, row in corr_ts.iterrows():
|
| 569 |
+
print(f" {row['pair']}: mean={row['mean']:.3f}, "
|
| 570 |
+
f"std={row['std']:.3f}, range=[{row['min']:.3f}, {row['max']:.3f}]")
|
| 571 |
+
|
| 572 |
+
# Forecast
|
| 573 |
+
R_forecast = dcc.forecast_correlation(horizon=5)
|
| 574 |
+
cov_forecast = dcc.get_covariance_forecast(horizon=5)
|
| 575 |
+
|
| 576 |
+
print(f"\n 5-day Correlation Forecast (Asset 0 vs 1): {R_forecast[0,1]:.3f}")
|
| 577 |
+
print(f" 5-day Covariance Forecast (0,1): {cov_forecast[0,1]:.6f}")
|
| 578 |
+
|
| 579 |
+
# GARCH params
|
| 580 |
+
print(f"\n GARCH Parameters:")
|
| 581 |
+
for i, garch in enumerate(dcc.garch_models):
|
| 582 |
+
params = garch.get_params()
|
| 583 |
+
print(f" Asset {i}: ω={params['omega']:.4f}, "
|
| 584 |
+
f"α={params['alpha']:.3f}, β={params['beta']:.3f}, "
|
| 585 |
+
f"persist={params['persistence']:.3f}")
|
| 586 |
+
|
| 587 |
+
# 2. Regime detection
|
| 588 |
+
print("\n2. CORRELATION REGIME DETECTION")
|
| 589 |
+
detector = CorrelationRegimeDetector(
|
| 590 |
+
low_regime_threshold=0.3,
|
| 591 |
+
high_regime_threshold=0.6
|
| 592 |
+
)
|
| 593 |
+
|
| 594 |
+
for R in dcc.correlation_matrices[::10]: # Every 10th
|
| 595 |
+
detector.detect_regime(R)
|
| 596 |
+
|
| 597 |
+
summary = detector.get_regime_summary()
|
| 598 |
+
print(f"\n Regime Distribution:")
|
| 599 |
+
print(summary.to_string(index=False))
|
| 600 |
+
|
| 601 |
+
# 3. Ledoit-Wolf shrinkage
|
| 602 |
+
print("\n3. LEDOIT-WOLF COVARIANCE SHRINKAGE")
|
| 603 |
+
shrunk, shrinkage = LedoitWolfShrinkage.estimate(returns)
|
| 604 |
+
|
| 605 |
+
sample_cov = np.cov(returns.T)
|
| 606 |
+
sample_corr = sample_cov / np.sqrt(np.outer(np.diag(sample_cov), np.diag(sample_cov)))
|
| 607 |
+
shrunk_corr = shrunk / np.sqrt(np.outer(np.diag(shrunk), np.diag(shrunk)))
|
| 608 |
+
|
| 609 |
+
print(f" Shrinkage intensity: {shrinkage:.3f}")
|
| 610 |
+
print(f" Sample correlation (0,1): {sample_corr[0,1]:.3f}")
|
| 611 |
+
print(f" Shrunk correlation (0,1): {shrunk_corr[0,1]:.3f}")
|
| 612 |
+
|
| 613 |
+
# 4. Factor model
|
| 614 |
+
print("\n4. FACTOR CORRELATION MODEL (PCA)")
|
| 615 |
+
factor_model = FactorCorrelationModel(n_factors=3)
|
| 616 |
+
factor_model.fit(returns)
|
| 617 |
+
|
| 618 |
+
factor_corr = factor_model.get_correlation()
|
| 619 |
+
r_squared = factor_model.get_r_squared()
|
| 620 |
+
|
| 621 |
+
print(f" Factor model correlation (0,1): {factor_corr[0,1]:.3f}")
|
| 622 |
+
print(f" R² by asset: {r_squared.round(3)}")
|
| 623 |
+
|
| 624 |
+
# 5. Compare all methods
|
| 625 |
+
print("\n5. METHOD COMPARISON")
|
| 626 |
+
print(f" Asset 0 vs 1 Correlation:")
|
| 627 |
+
print(f" Sample: {sample_corr[0,1]:.3f}")
|
| 628 |
+
print(f" DCC (last): {dcc.correlation_matrices[-1][0,1]:.3f}")
|
| 629 |
+
print(f" DCC (forecast): {R_forecast[0,1]:.3f}")
|
| 630 |
+
print(f" Ledoit-Wolf: {shrunk_corr[0,1]:.3f}")
|
| 631 |
+
print(f" Factor Model: {factor_corr[0,1]:.3f}")
|
| 632 |
+
|
| 633 |
+
print(f"\n KEY INSIGHTS:")
|
| 634 |
+
print(f" - DCC captures time-varying correlations")
|
| 635 |
+
print(f" - Correlations SPIKE in crises → portfolio risk SURGES")
|
| 636 |
+
print(f" - Sample covariance is NOISY → shrinkage essential")
|
| 637 |
+
print(f" - Factor models reduce dimensionality → more robust")
|
| 638 |
+
print(f" - Regime detection warns when diversification FAILS")
|