timesfm-2.5-endpoint / handler.py
indievish
Fix: bypass from_pretrained, load weights manually to avoid proxies kwarg issue
5a5b228
"""
Custom handler for HuggingFace Inference Endpoints.
Uses TimesFM 2.5 (200M) installed from GitHub repo.
"""
import numpy as np
import torch
from typing import Any
class EndpointHandler:
def __init__(self, path: str = ""):
import timesfm
from timesfm.timesfm_2p5.timesfm_2p5_torch import (
TimesFM_2p5_200M_torch_module,
TimesFM_2p5_200M_torch,
)
from safetensors.torch import load_file
from huggingface_hub import hf_hub_download
import os
torch.set_float32_matmul_precision("high")
# Download weights manually to avoid from_pretrained kwargs issue
model_id = "google/timesfm-2.5-200m-pytorch"
weight_file = hf_hub_download(repo_id=model_id, filename="model.safetensors")
# Create model instance directly
self.tfm = TimesFM_2p5_200M_torch()
self.tfm.model = TimesFM_2p5_200M_torch_module()
self.tfm.model.load_checkpoint(weight_file, torch_compile=False)
self.tfm.compile(
timesfm.ForecastConfig(
max_context=1024,
max_horizon=128,
normalize_inputs=True,
use_continuous_quantile_head=True,
force_flip_invariance=True,
infer_is_positive=False,
fix_quantile_crossing=True,
)
)
def __call__(self, data: dict[str, Any]) -> dict[str, Any]:
inputs = data.get("inputs", [])
parameters = data.get("parameters", {})
horizon = min(parameters.get("horizon", 24), 128)
if not inputs or not isinstance(inputs, list):
return {"error": "inputs must be a non-empty list of numbers"}
input_array = [np.array(inputs, dtype=np.float64)]
point, quantiles = self.tfm.forecast(horizon=horizon, inputs=input_array)
return {
"point_forecast": point[0].tolist(),
"quantile_forecast": quantiles[0].tolist(),
}