Spaces:
Sleeping
Sleeping
| """Typed models for the SaaS support triage benchmark.""" | |
| from __future__ import annotations | |
| from typing import Any, Literal | |
| from pydantic import BaseModel, ConfigDict, Field | |
| try: | |
| from openenv.core.env_server.types import Action as OpenEnvAction | |
| from openenv.core.env_server.types import Observation as OpenEnvObservation | |
| except Exception: # pragma: no cover - compatibility fallback | |
| OpenEnvAction = BaseModel | |
| OpenEnvObservation = BaseModel | |
| Priority = Literal["P1", "P2", "P3", "P4"] | |
| QueueName = Literal["billing", "security", "technical", "success", "trust_safety"] | |
| Disposition = Literal["respond", "request_info", "escalate", "close"] | |
| Difficulty = Literal["easy", "medium", "hard"] | |
| CustomerTier = Literal["starter", "growth", "enterprise"] | |
| class TaskCard(BaseModel): | |
| model_config = ConfigDict(extra="forbid") | |
| task_id: str | |
| title: str | |
| difficulty: Difficulty | |
| description: str | |
| ticket_count: int | |
| class TicketSnapshot(BaseModel): | |
| model_config = ConfigDict(extra="forbid") | |
| ticket_id: str | |
| subject: str | |
| body: str | |
| customer_tier: CustomerTier | |
| product_area: str | |
| sla_hours: int | |
| recent_events: list[str] = Field(default_factory=list) | |
| class SupportQueueAction(OpenEnvAction): | |
| model_config = ConfigDict(extra="forbid") | |
| priority: Priority | |
| queue: QueueName | |
| disposition: Disposition | |
| summary: str = Field(..., min_length=8, max_length=280) | |
| response: str = Field(..., min_length=16, max_length=1200) | |
| confidence: float = Field(default=0.5, ge=0.0, le=1.0) | |
| class GradingBreakdown(BaseModel): | |
| model_config = ConfigDict(extra="forbid") | |
| priority_score: float = 0.0 | |
| queue_score: float = 0.0 | |
| disposition_score: float = 0.0 | |
| summary_score: float = 0.0 | |
| response_score: float = 0.0 | |
| penalty: float = 0.0 | |
| total: float = 0.0 | |
| class TicketFeedback(BaseModel): | |
| model_config = ConfigDict(extra="forbid") | |
| ticket_id: str | |
| expected_priority: Priority | |
| expected_queue: QueueName | |
| expected_disposition: Disposition | |
| breakdown: GradingBreakdown | |
| feedback: str | |
| class SupportQueueObservation(OpenEnvObservation): | |
| model_config = ConfigDict(extra="forbid") | |
| task_id: str | |
| task_title: str | |
| difficulty: Difficulty | |
| instructions: str | |
| current_index: int | |
| total_tickets: int | |
| ticket: TicketSnapshot | |
| allowed_priorities: list[Priority] = Field(default_factory=lambda: ["P1", "P2", "P3", "P4"]) | |
| allowed_queues: list[QueueName] = Field( | |
| default_factory=lambda: ["billing", "security", "technical", "success", "trust_safety"] | |
| ) | |
| allowed_dispositions: list[Disposition] = Field( | |
| default_factory=lambda: ["respond", "request_info", "escalate", "close"] | |
| ) | |
| scoring_weights: dict[str, float] = Field( | |
| default_factory=lambda: { | |
| "priority": 0.30, | |
| "queue": 0.25, | |
| "disposition": 0.20, | |
| "summary": 0.15, | |
| "response": 0.10, | |
| } | |
| ) | |
| last_feedback: TicketFeedback | None = None | |
| cumulative_reward: float = 0.0 | |
| reward: float = 0.0 | |
| done: bool = False | |
| info: dict[str, Any] = Field(default_factory=dict) | |
| class SupportQueueState(BaseModel): | |
| model_config = ConfigDict(extra="forbid") | |
| episode_id: str | |
| task: TaskCard | |
| current_index: int | |
| total_tickets: int | |
| done: bool | |
| cumulative_reward: float | |
| average_reward: float | |
| ticket_scores: list[TicketFeedback] = Field(default_factory=list) | |
| action_history: list[SupportQueueAction] = Field(default_factory=list) | |
| processed_tickets: list[str] = Field(default_factory=list) | |