eeshwar143
Clean submission history
e4accbb
"""Typed models for the SaaS support triage benchmark."""
from __future__ import annotations
from typing import Any, Literal
from pydantic import BaseModel, ConfigDict, Field
try:
from openenv.core.env_server.types import Action as OpenEnvAction
from openenv.core.env_server.types import Observation as OpenEnvObservation
except Exception: # pragma: no cover - compatibility fallback
OpenEnvAction = BaseModel
OpenEnvObservation = BaseModel
Priority = Literal["P1", "P2", "P3", "P4"]
QueueName = Literal["billing", "security", "technical", "success", "trust_safety"]
Disposition = Literal["respond", "request_info", "escalate", "close"]
Difficulty = Literal["easy", "medium", "hard"]
CustomerTier = Literal["starter", "growth", "enterprise"]
class TaskCard(BaseModel):
model_config = ConfigDict(extra="forbid")
task_id: str
title: str
difficulty: Difficulty
description: str
ticket_count: int
class TicketSnapshot(BaseModel):
model_config = ConfigDict(extra="forbid")
ticket_id: str
subject: str
body: str
customer_tier: CustomerTier
product_area: str
sla_hours: int
recent_events: list[str] = Field(default_factory=list)
class SupportQueueAction(OpenEnvAction):
model_config = ConfigDict(extra="forbid")
priority: Priority
queue: QueueName
disposition: Disposition
summary: str = Field(..., min_length=8, max_length=280)
response: str = Field(..., min_length=16, max_length=1200)
confidence: float = Field(default=0.5, ge=0.0, le=1.0)
class GradingBreakdown(BaseModel):
model_config = ConfigDict(extra="forbid")
priority_score: float = 0.0
queue_score: float = 0.0
disposition_score: float = 0.0
summary_score: float = 0.0
response_score: float = 0.0
penalty: float = 0.0
total: float = 0.0
class TicketFeedback(BaseModel):
model_config = ConfigDict(extra="forbid")
ticket_id: str
expected_priority: Priority
expected_queue: QueueName
expected_disposition: Disposition
breakdown: GradingBreakdown
feedback: str
class SupportQueueObservation(OpenEnvObservation):
model_config = ConfigDict(extra="forbid")
task_id: str
task_title: str
difficulty: Difficulty
instructions: str
current_index: int
total_tickets: int
ticket: TicketSnapshot
allowed_priorities: list[Priority] = Field(default_factory=lambda: ["P1", "P2", "P3", "P4"])
allowed_queues: list[QueueName] = Field(
default_factory=lambda: ["billing", "security", "technical", "success", "trust_safety"]
)
allowed_dispositions: list[Disposition] = Field(
default_factory=lambda: ["respond", "request_info", "escalate", "close"]
)
scoring_weights: dict[str, float] = Field(
default_factory=lambda: {
"priority": 0.30,
"queue": 0.25,
"disposition": 0.20,
"summary": 0.15,
"response": 0.10,
}
)
last_feedback: TicketFeedback | None = None
cumulative_reward: float = 0.0
reward: float = 0.0
done: bool = False
info: dict[str, Any] = Field(default_factory=dict)
class SupportQueueState(BaseModel):
model_config = ConfigDict(extra="forbid")
episode_id: str
task: TaskCard
current_index: int
total_tickets: int
done: bool
cumulative_reward: float
average_reward: float
ticket_scores: list[TicketFeedback] = Field(default_factory=list)
action_history: list[SupportQueueAction] = Field(default_factory=list)
processed_tickets: list[str] = Field(default_factory=list)