|
17 | 17 | from __future__ import annotations |
18 | 18 |
|
19 | 19 | import time |
20 | | -from collections import namedtuple |
| 20 | +from pydantic import BaseModel |
21 | 21 | from ray.util.metrics import Counter, Gauge, Histogram |
22 | 22 | from typing import Any, Callable |
23 | 23 |
|
|
47 | 47 | # --------------------------------------------------------------------------- |
48 | 48 | # Lazy caches – populated on first call per deployment / globally |
49 | 49 | # --------------------------------------------------------------------------- |
50 | | -_task_metrics_cache: dict[str, Any] = {} |
51 | | -_resource_metrics_cache: dict[str, Any] | None = None |
52 | | -_request_metrics_cache: dict[str, Any] = {} |
| 50 | +_task_metrics_cache: dict[str, TaskMetrics] = {} |
| 51 | +_resource_metrics_cache: ResourceMetrics | None = None |
| 52 | +_request_metrics_cache: dict[str, _RequestMetrics] = {} |
53 | 53 |
|
54 | 54 | # --------------------------------------------------------------------------- |
55 | | -# Named tuples for structured metric access |
| 55 | +# Pydantic models for structured metric access |
56 | 56 | # --------------------------------------------------------------------------- |
57 | | -TaskMetrics = namedtuple('TaskMetrics', [ |
58 | | - 'queue_depth', |
59 | | - 'tasks_total', |
60 | | - 'execution_seconds', |
61 | | - 'queue_wait_seconds', |
62 | | - 'rate_limit_rejections', |
63 | | - 'rate_limiter_active_tokens', |
64 | | -]) |
65 | | - |
66 | | -ResourceMetrics = namedtuple('ResourceMetrics', [ |
67 | | - 'active_sessions', |
68 | | - 'active_models', |
69 | | - 'active_sampling_sessions', |
70 | | - 'active_futures', |
71 | | -]) |
72 | | - |
73 | | -_RequestMetrics = namedtuple('_RequestMetrics', [ |
74 | | - 'requests_total', |
75 | | - 'request_duration_seconds', |
76 | | -]) |
| 57 | + |
| 58 | + |
| 59 | +class TaskMetrics(BaseModel): |
| 60 | + """Task queue metrics container. |
| 61 | +
|
| 62 | + Attributes: |
| 63 | + queue_depth: Current number of queued tasks. |
| 64 | + tasks_total: Total task completions. |
| 65 | + execution_seconds: Pure task execution time in seconds. |
| 66 | + queue_wait_seconds: Time from enqueue to execution start. |
| 67 | + rate_limit_rejections: Total rate-limit rejections. |
| 68 | + rate_limiter_active_tokens: Tokens tracked by rate limiter. |
| 69 | + """ |
| 70 | + |
| 71 | + queue_depth: Gauge |
| 72 | + tasks_total: Counter |
| 73 | + execution_seconds: Histogram |
| 74 | + queue_wait_seconds: Histogram |
| 75 | + rate_limit_rejections: Counter |
| 76 | + rate_limiter_active_tokens: Gauge |
| 77 | + |
| 78 | + |
| 79 | +class ResourceMetrics(BaseModel): |
| 80 | + """Resource gauge metrics container. |
| 81 | +
|
| 82 | + Attributes: |
| 83 | + active_sessions: Current active session count. |
| 84 | + active_models: Current registered model count. |
| 85 | + active_sampling_sessions: Current sampling session count. |
| 86 | + active_futures: Current future/request count. |
| 87 | + """ |
| 88 | + |
| 89 | + active_sessions: Gauge |
| 90 | + active_models: Gauge |
| 91 | + active_sampling_sessions: Gauge |
| 92 | + active_futures: Gauge |
| 93 | + |
| 94 | + |
| 95 | +class _RequestMetrics(BaseModel): |
| 96 | + """HTTP request metrics container (internal).""" |
| 97 | + |
| 98 | + requests_total: Counter |
| 99 | + request_duration_seconds: Histogram |
| 100 | + |
77 | 101 |
|
78 | 102 | # --------------------------------------------------------------------------- |
79 | 103 | # A. Request-level metrics (FastAPI middleware) |
@@ -146,7 +170,7 @@ async def metrics_middleware(request: Any, call_next: Callable) -> Any: |
146 | 170 | def get_task_metrics(deployment: str) -> TaskMetrics: |
147 | 171 | """Return (or create) per-deployment task-queue metrics. |
148 | 172 |
|
149 | | - Returns a :class:`TaskMetrics` namedtuple with: |
| 173 | + Returns a :class:`TaskMetrics` Pydantic model with: |
150 | 174 |
|
151 | 175 | - ``queue_depth`` – Gauge |
152 | 176 | - ``tasks_total`` – Counter |
@@ -204,7 +228,7 @@ def get_task_metrics(deployment: str) -> TaskMetrics: |
204 | 228 | def get_resource_metrics() -> ResourceMetrics: |
205 | 229 | """Return (or create) global resource gauge metrics. |
206 | 230 |
|
207 | | - Returns a :class:`ResourceMetrics` namedtuple with: |
| 231 | + Returns a :class:`ResourceMetrics` Pydantic model with: |
208 | 232 |
|
209 | 233 | - ``active_sessions`` – Gauge |
210 | 234 | - ``active_models`` – Gauge |
|
0 commit comments