Premchan369
/

alphaforge-quant-system

@@ -1,109 +1,447 @@
-"""Online Learning - Adaptive model updates with drift detection"""
 import numpy as np
 import pandas as pd
-import torch
-import torch.nn as nn
-from typing import Dict, Optional
-from scipy.stats import ks_2samp
-class DriftDetector:
-    """Detect data drift using statistical tests"""
-    def __init__(self, significance=0.05, window=252):
-        self.significance = significance
-        self.window = window
-        self.reference_stats = {}
-        self.drift_history = []
-    def set_reference(self, data: np.ndarray, name: str = 'default'):
-        self.reference_stats[name] = {'mean': data.mean(axis=0), 'std': data.std(axis=0), 'data': data}
-    def detect_ks(self, new_data: np.ndarray, name: str = 'default') -> Dict:
-        ref = self.reference_stats.get(name)
-        if ref is None:
-            return {'drift': False, 'p_value': 1.0}
-        n_features = new_data.shape[1] if new_data.ndim > 1 else 1
-        drifts = []
-        p_values = []
-        for i in range(n_features):
-            col = i if new_data.ndim > 1 else 0
-            ref_feat = ref['data'][:, col] if ref['data'].ndim > 1 else ref['data']
-            new_feat = new_data[:, col] if new_data.ndim > 1 else new_data
-            stat, p = ks_2samp(ref_feat, new_feat)
-            drifts.append(p < self.significance)
-            p_values.append(p)
-        n_drift = sum(drifts)
-        overall_drift = n_drift > n_features * 0.3
-        return {'drift': overall_drift, 'p_values': p_values, 'n_features_drifted': n_drift, 'total_features': n_features}
-    def detect_cusum(self, residuals: np.ndarray, threshold: float = 5.0, drift: float = 1.0) -> Dict:
-        pos_cusum = np.zeros(len(residuals))
-        neg_cusum = np.zeros(len(residuals))
-        for t in range(1, len(residuals)):
-            pos_cusum[t] = max(0, pos_cusum[t-1] + residuals[t] - drift)
-            neg_cusum[t] = min(0, neg_cusum[t-1] + residuals[t] + drift)
-        alert = np.any(pos_cusum > threshold) or np.any(neg_cusum < -threshold)
-        return {'alert': alert, 'pos_cusum': pos_cusum, 'neg_cusum': neg_cusum}
-class OnlineLearner:
-    """Online learning with periodic model adaptation"""
-    def __init__(self, model: nn.Module, lr: float = 1e-5,
-                 adaptation_window: int = 21, drift_threshold: float = 0.3):
-        self.model = model
-        self.lr = lr
-        self.adaptation_window = adaptation_window
         self.drift_threshold = drift_threshold
-        self.drift_detector = DriftDetector()
-        self.adaptation_count = 0
-        self.ic_history = []
-        self.performance_history = []
-    def check_and_adapt(self, X_new: np.ndarray, y_new: np.ndarray,
-                        X_ref: Optional[np.ndarray] = None) -> Dict:
-        drift_result = self.drift_detector.detect_ks(X_new)
-        if drift_result['drift']:
-            print(f"⚠️ Drift detected: {drift_result['n_features_drifted']}/{drift_result['total_features']} features shifted")
-            self._adapt(X_new, y_new)
-            self.adaptation_count += 1
-            return {'adapted': True, 'drift': drift_result}
-        return {'adapted': False, 'drift': drift_result}
-    def _adapt(self, X: np.ndarray, y: np.ndarray, epochs: int = 5):
-        self.model.train()
-        optimizer = torch.optim.Adam(self.model.parameters(), lr=self.lr)
-        criterion = nn.MSELoss()
-        X_t = torch.FloatTensor(X)
-        y_t = torch.FloatTensor(y).unsqueeze(1)
-        for epoch in range(epochs):
-            optimizer.zero_grad()
-            pred = self.model(X_t)
-            loss = criterion(pred, y_t)
-            loss.backward()
-            optimizer.step()
-        print(f"  Adapted model with {epochs} epochs, loss={loss.item():.6f}")
-    def track_performance(self, predictions: np.ndarray, actuals: np.ndarray):
-        from scipy.stats import spearmanr
-        ic, _ = spearmanr(predictions, actuals)
-        self.ic_history.append(ic)
-        # Check if IC is degrading
-        if len(self.ic_history) > 63:
-            recent_ic = np.mean(self.ic_history[-21:])
-            long_ic = np.mean(self.ic_history[-63:])
-            degradation = (long_ic - recent_ic) / (abs(long_ic) + 1e-8)
-            if degradation > 0.3:
-                print(f"⚠️ IC degradation: recent={recent_ic:.4f}, long={long_ic:.4f}, degradation={degradation:.2%}")
-                return 'degrading'
-        return 'stable'

+"""Online Learning — Per-Symbol Adaptive Models
+Why this matters for Jane Street level:
+- Markets CHANGE. A model trained on SPY 2022 fails on SPY 2024.
+- Each asset has unique microstructure, seasonality, regime behavior.
+- Static models lose predictive power over time (model decay).
+Solution: Online / Continual Learning
+- Update models incrementally on every new observation
+- Per-symbol parameters (some assets trend, others mean-revert)
+- Meta-learning: learn HOW to adapt quickly
+- Concept drift detection: auto-detect when old model is wrong
+Based on:
+- Vapnik (1998): Online SVM
+- Cesa-Bianchi & Lugosi (2006): Prediction, Learning, Games
+- Finn et al. (2017): MAML (Model-Agnostic Meta-Learning)
+- Gama et al. (2014): A survey on concept drift adaptation
+"""
 import numpy as np
 import pandas as pd
+from typing import Dict, List, Tuple, Optional, Callable
+from collections import defaultdict
+import warnings
+warnings.filterwarnings('ignore')
+def sigmoid(x):
+    return 1 / (1 + np.exp(-np.clip(x, -500, 500)))
+class OnlineLogisticRegression:
+    """
+    Online logistic regression with adaptive learning rate.
+    Uses exponential weighting: recent data matters more.
+    Learning rate adapts to gradient variance.
+    """
+    def __init__(self,
+                 n_features: int = 10,
+                 initial_lr: float = 0.01,
+                 lr_decay: float = 0.999,
+                 l2_reg: float = 0.01,
+                 min_lr: float = 1e-6):
+        self.n_features = n_features
+        self.lr = initial_lr
+        self.initial_lr = initial_lr
+        self.lr_decay = lr_decay
+        self.l2_reg = l2_reg
+        self.min_lr = min_lr
+        self.weights = np.zeros(n_features)
+        self.bias = 0.0
+        # Adaptive state
+        self.grad_moment2 = np.zeros(n_features)
+        self.bias_moment2 = 0.0
+        self.t = 0
+        # Performance tracking
+        self.predictions = []
+        self.actuals = []
+        self.grad_norms = []
+    def predict_proba(self, x: np.ndarray) -> float:
+        """Predict probability of positive class"""
+        z = np.dot(x, self.weights) + self.bias
+        return sigmoid(z)
+    def predict(self, x: np.ndarray) -> int:
+        return 1 if self.predict_proba(x) > 0.5 else 0
+    def update(self, x: np.ndarray, y: int) -> Dict:
+        """
+        Single-step online update.
+        Args:
+            x: feature vector (n_features,)
+            y: label (0 or 1)
+        Returns:
+            Update metrics
+        """
+        self.t += 1
+        # Forward
+        z = np.dot(x, self.weights) + self.bias
+        pred = sigmoid(z)
+        # Gradient
+        error = pred - y
+        grad_w = error * x + self.l2_reg * self.weights
+        grad_b = error
+        # Adaptive learning rate (AdaGrad-like)
+        self.grad_moment2 += grad_w ** 2
+        self.bias_moment2 += grad_b ** 2
+        lr_w = self.lr / (np.sqrt(self.grad_moment2) + 1e-8)
+        lr_b = self.lr / (np.sqrt(self.bias_moment2) + 1e-8)
+        # Update
+        self.weights -= lr_w * grad_w
+        self.bias -= lr_b * grad_b
+        # Decay learning rate
+        self.lr = max(self.lr * self.lr_decay, self.min_lr)
+        # Track
+        self.predictions.append(pred)
+        self.actuals.append(y)
+        self.grad_norms.append(np.linalg.norm(grad_w))
+        return {
+            'pred': pred,
+            'error': error,
+            'grad_norm': np.linalg.norm(grad_w),
+            'lr': self.lr
+        }
+    def get_performance(self, last_n: int = 100) -> Dict:
+        """Get recent performance metrics"""
+        if len(self.actuals) < 2:
+            return {'accuracy': 0.5}
+        n = min(last_n, len(self.actuals))
+        preds = np.array(self.predictions[-n:]) > 0.5
+        actuals = np.array(self.actuals[-n:])
+        accuracy = np.mean(preds == actuals)
+        # Directional accuracy for returns
+        if len(actuals) >= 10:
+            # Use last 10 predictions as a sequence
+            pred_returns = np.diff(self.predictions[-10:])
+            actual_returns = np.diff(self.actuals[-10:])
+            directional = np.mean(np.sign(pred_returns) == np.sign(actual_returns)) if len(pred_returns) > 0 else 0.5
+        else:
+            directional = accuracy
+        return {
+            'accuracy': accuracy,
+            'directional_accuracy': directional,
+            'avg_grad_norm': np.mean(self.grad_norms[-n:]) if self.grad_norms else 0,
+            'current_lr': self.lr,
+            'n_updates': self.t
+        }
+class PerSymbolAdaptiveModel:
+    """
+    Maintain separate online models for each symbol.
+    Key insight: SPY behaves differently from TSLA.
+    Each asset needs its own:
+    - Feature weights
+    - Learning rate schedule
+    - Regime detection
+    """
+    def __init__(self,
+                 n_features: int = 10,
+                 base_lr: float = 0.01,
+                 symbols: Optional[List[str]] = None):
+        self.n_features = n_features
+        self.base_lr = base_lr
+        self.symbols = symbols or []
+        # Per-symbol models
+        self.models: Dict[str, OnlineLogisticRegression] = {}
+        # Performance tracking
+        self.symbol_performance: Dict[str, List[Dict]] = defaultdict(list)
+        # Auto-detect symbols
+        self.seen_symbols = set()
+    def _get_or_create_model(self, symbol: str) -> OnlineLogisticRegression:
+        """Get model for symbol, create if new"""
+        if symbol not in self.models:
+            # Meta-learn initial weights from similar symbols
+            init_weights = self._meta_initialize(symbol)
+            model = OnlineLogisticRegression(
+                n_features=self.n_features,
+                initial_lr=self.base_lr * np.random.uniform(0.8, 1.2)
+            )
+            if init_weights is not None:
+                model.weights = init_weights
+            self.models[symbol] = model
+            self.seen_symbols.add(symbol)
+        return self.models[symbol]
+    def _meta_initialize(self, new_symbol: str) -> Optional[np.ndarray]:
+        """
+        Meta-learning: initialize new symbol model from similar symbols.
+        Use average of best-performing similar models.
+        """
+        if len(self.models) < 3:
+            return None
+        # Get recent performance
+        perf = []
+        for sym, model in self.models.items():
+            p = model.get_performance(last_n=50)
+            perf.append((sym, p.get('accuracy', 0.5), model.weights))
+        # Use top 3 models as initialization
+        perf.sort(key=lambda x: x[1], reverse=True)
+        top_weights = [p[2] for p in perf[:3]]
+        return np.mean(top_weights, axis=0)
+    def update(self, symbol: str, x: np.ndarray, y: int) -> Dict:
+        """Update model for a specific symbol"""
+        model = self._get_or_create_model(symbol)
+        metrics = model.update(x, y)
+        # Track performance
+        perf = model.get_performance(last_n=20)
+        self.symbol_performance[symbol].append(perf)
+        metrics['symbol'] = symbol
+        return metrics
+    def predict(self, symbol: str, x: np.ndarray) -> Dict:
+        """Predict for a specific symbol"""
+        model = self._get_or_create_model(symbol)
+        prob = model.predict_proba(x)
+        return {
+            'symbol': symbol,
+            'probability': prob,
+            'prediction': 1 if prob > 0.5 else 0,
+            'confidence': abs(prob - 0.5) * 2,  # 0 = unsure, 1 = certain
+            'model_age': model.t
+        }
+    def get_symbol_ranking(self) -> pd.DataFrame:
+        """Rank symbols by recent model performance"""
+        rows = []
+        for symbol, model in self.models.items():
+            perf = model.get_performance(last_n=100)
+            rows.append({
+                'symbol': symbol,
+                'accuracy': perf['accuracy'],
+                'directional_accuracy': perf['directional_accuracy'],
+                'n_samples': model.t,
+                'current_lr': perf['current_lr'],
+                'grad_norm': perf['avg_grad_norm']
+            })
+        df = pd.DataFrame(rows)
+        if not df.empty:
+            df = df.sort_values('directional_accuracy', ascending=False)
+        return df
+    def detect_concept_drift(self, symbol: str,
+                             window_short: int = 50,
+                             window_long: int = 200) -> Dict:
+        """
+        Detect if the relationship between features and target has changed.
+        Uses accuracy comparison: recent vs older performance.
+        If recent << older → concept drift detected → need retraining/adaptation.
+        """
+        model = self.models.get(symbol)
+        if model is None or len(model.actuals) < window_long:
+            return {'drift_detected': False, 'reason': 'insufficient_data'}
+        recent = model.get_performance(last_n=window_short)['accuracy']
+        older = model.get_performance(last_n=window_long)['accuracy']
+        # Drift if recent accuracy significantly worse
+        drift_threshold = -0.15  # 15% accuracy drop
+        drift_score = recent - older
+        drift_detected = drift_score < drift_threshold
+        return {
+            'drift_detected': drift_detected,
+            'drift_score': drift_score,
+            'recent_accuracy': recent,
+            'older_accuracy': older,
+            'threshold': drift_threshold,
+            'action': 'reset_learning_rate' if drift_detected else 'continue',
+            'symbol': symbol
+        }
+    def adapt_to_drift(self, symbol: str):
+        """Adapt model when drift detected"""
+        model = self.models.get(symbol)
+        if model is None:
+            return
+        # Reset learning rate to initial (forget old, learn new)
+        model.lr = model.initial_lr * 2  # Higher LR to adapt faster
+        model.grad_moment2 = np.zeros(self.n_features)
+        model.bias_moment2 = 0.0
+        print(f"  [Drift] Reset learning rate for {symbol} to {model.lr:.4f}")
+    def get_full_state(self) -> Dict:
+        """Export full state for persistence"""
+        return {
+            'n_features': self.n_features,
+            'base_lr': self.base_lr,
+            'symbols': list(self.seen_symbols),
+            'models': {
+                sym: {
+                    'weights': model.weights.tolist(),
+                    'bias': model.bias,
+                    'n_updates': model.t,
+                    'lr': model.lr
+                }
+                for sym, model in self.models.items()
+            }
+        }
+class ConceptDriftMonitor:
+    """
+    System-wide concept drift monitoring across all symbols.
+    Automatically detects when markets have structurally changed
+    and triggers model adaptation.
+    """
+    def __init__(self,
+                 per_symbol_model: PerSymbolAdaptiveModel,
+                 check_interval: int = 100,
+                 drift_threshold: float = -0.15):
+        self.model = per_symbol_model
+        self.check_interval = check_interval
         self.drift_threshold = drift_threshold
+        self.step_count = 0
+        self.drift_history = []
+        self.adaptation_log = []
+    def check_all_symbols(self) -> List[Dict]:
+        """Check all symbols for drift and adapt if needed"""
+        self.step_count += 1
+        if self.step_count % self.check_interval != 0:
+            return []
+        results = []
+        for symbol in self.model.seen_symbols:
+            drift_result = self.model.detect_concept_drift(symbol)
+            results.append(drift_result)
+            if drift_result['drift_detected']:
+                self.model.adapt_to_drift(symbol)
+                self.drift_history.append({
+                    'step': self.step_count,
+                    'symbol': symbol,
+                    'score': drift_result['drift_score'],
+                    'recent_acc': drift_result['recent_accuracy'],
+                    'older_acc': drift_result['older_accuracy']
+                })
+        return results
+    def get_drift_summary(self) -> pd.DataFrame:
+        """Summary of all detected drifts"""
+        return pd.DataFrame(self.drift_history)
+if __name__ == '__main__':
+    print("=" * 70)
+    print("  ONLINE LEARNING — PER-SYMBOL ADAPTIVE MODELS")
+    print("=" * 70)
+    # Simulate multiple symbols with different behaviors
+    np.random.seed(42)
+    # Symbol A: Strong momentum signal
+    # Symbol B: Weak/noise
+    # Symbol C: Regime switch at step 500
+    model = PerSymbolAdaptiveModel(n_features=5, base_lr=0.05)
+    monitor = ConceptDriftMonitor(model, check_interval=100)
+    n_steps = 800
+    for step in range(n_steps):
+        # Symbol A: feature 0 predicts direction with 65% accuracy
+        x_a = np.random.randn(5)
+        true_dir_a = 1 if x_a[0] > 0 else 0
+        if np.random.rand() > 0.65:
+            true_dir_a = 1 - true_dir_a  # 35% noise
+        # Symbol B: no signal, pure noise
+        x_b = np.random.randn(5)
+        true_dir_b = np.random.randint(0, 2)
+        # Symbol C: regime switch at step 500
+        x_c = np.random.randn(5)
+        if step < 500:
+            true_dir_c = 1 if x_c[0] > 0 else 0  # feature 0 matters
+            if np.random.rand() > 0.6:
+                true_dir_c = 1 - true_dir_c
+        else:
+            # Regime switch: now feature 1 predicts (opposite!)
+            true_dir_c = 1 if x_c[1] < 0 else 0
+            if np.random.rand() > 0.6:
+                true_dir_c = 1 - true_dir_c
+        # Update models
+        model.update('AAPL', x_a, true_dir_a)
+        model.update('JUNK', x_b, true_dir_b)
+        model.update('REGIME', x_c, true_dir_c)
+        # Periodic drift check
+        if step % 100 == 0 and step > 0:
+            monitor.check_all_symbols()
+    # Results
+    print(f"\nTrained on {n_steps} steps per symbol")
+    print(f"\nPer-Symbol Performance:")
+    ranking = model.get_symbol_ranking()
+    print(ranking.to_string(index=False))
+    # Drift detection for REGIME symbol
+    drift_result = model.detect_concept_drift('REGIME', window_short=50, window_long=300)
+    print(f"\nREGIME Symbol Drift Detection:")
+    print(f"  Drift detected: {drift_result['drift_detected']}")
+    print(f"  Recent accuracy: {drift_result['recent_accuracy']:.3f}")
+    print(f"  Older accuracy:  {drift_result['older_accuracy']:.3f}")
+    print(f"  Drift score:     {drift_result['drift_score']:+.3f}")
+    print(f"\n  Key Insights:")
+    print(f"    - AAPL model should have ~60-65% accuracy (real signal)")
+    print(f"    - JUNK model should have ~50% accuracy (pure noise)")
+    print(f"    - REGIME model should detect drift at step 500")
+    print(f"    - Each symbol gets its OWN learning rate and weights")
+    print(f"    - Drift triggers adaptive LR reset")