Skip to content

Commit 74c6c19

Browse files
committed
update
1 parent 85cc08f commit 74c6c19

File tree

5 files changed

+55
-31
lines changed

5 files changed

+55
-31
lines changed

src/twinkle/server/gateway/server.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from typing import Any
1414

1515
import twinkle_client.types as types
16+
from twinkle.server.utils.metrics import create_metrics_middleware
1617
from twinkle.server.utils.state import get_server_state
1718
from twinkle.server.utils.validation import verify_request_token
1819
from twinkle.utils.logger import get_logger
@@ -93,7 +94,6 @@ def build_server_app(deploy_options: dict[str, Any],
9394
async def verify_token(request: Request, call_next):
9495
return await verify_request_token(request=request, call_next=call_next)
9596

96-
from twinkle.server.utils.metrics import create_metrics_middleware
9797
app.middleware('http')(create_metrics_middleware('Gateway'))
9898

9999
def get_self() -> GatewayServer:

src/twinkle/server/model/app.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import twinkle
1616
from twinkle import DeviceGroup, DeviceMesh
1717
from twinkle.server.utils.lifecycle import AdapterManagerMixin
18+
from twinkle.server.utils.metrics import create_metrics_middleware
1819
from twinkle.server.utils.state import ServerStateProxy, get_server_state
1920
from twinkle.server.utils.task_queue import TaskQueueConfig, TaskQueueMixin
2021
from twinkle.server.utils.validation import get_token_from_request, verify_request_token
@@ -164,7 +165,6 @@ def build_model_app(model_id: str,
164165
async def verify_token(request: Request, call_next):
165166
return await verify_request_token(request=request, call_next=call_next)
166167

167-
from twinkle.server.utils.metrics import create_metrics_middleware
168168
app.middleware('http')(create_metrics_middleware('Model'))
169169

170170
def get_self() -> ModelManagement:

src/twinkle/server/processor/app.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import twinkle
2222
from twinkle import DeviceGroup, DeviceMesh, get_logger
2323
from twinkle.server.utils.lifecycle import ProcessorManagerMixin
24+
from twinkle.server.utils.metrics import create_metrics_middleware
2425
from twinkle.server.utils.state import ServerStateProxy, get_server_state
2526
from twinkle.server.utils.validation import verify_request_token
2627
from .twinkle_handlers import _register_processor_routes
@@ -124,7 +125,6 @@ def build_processor_app(ncpu_proc_per_node: int,
124125
async def verify_token(request: Request, call_next):
125126
return await verify_request_token(request=request, call_next=call_next)
126127

127-
from twinkle.server.utils.metrics import create_metrics_middleware
128128
app.middleware('http')(create_metrics_middleware('Processor'))
129129

130130
def get_self() -> ProcessorManagement:

src/twinkle/server/sampler/app.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
import twinkle
1515
from twinkle import DeviceGroup, DeviceMesh
16+
from twinkle.server.utils.metrics import create_metrics_middleware
1617
from twinkle.server.utils.state import ServerStateProxy, get_server_state
1718
from twinkle.server.utils.task_queue import TaskQueueConfig, TaskQueueMixin
1819
from twinkle.server.utils.validation import get_token_from_request, verify_request_token
@@ -135,7 +136,6 @@ def build_sampler_app(model_id: str,
135136
async def verify_token(request: Request, call_next):
136137
return await verify_request_token(request=request, call_next=call_next)
137138

138-
from twinkle.server.utils.metrics import create_metrics_middleware
139139
app.middleware('http')(create_metrics_middleware('Sampler'))
140140

141141
def get_self() -> SamplerManagement:

src/twinkle/server/utils/metrics.py

Lines changed: 51 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from __future__ import annotations
1818

1919
import time
20-
from collections import namedtuple
20+
from pydantic import BaseModel
2121
from ray.util.metrics import Counter, Gauge, Histogram
2222
from typing import Any, Callable
2323

@@ -47,33 +47,57 @@
4747
# ---------------------------------------------------------------------------
4848
# Lazy caches – populated on first call per deployment / globally
4949
# ---------------------------------------------------------------------------
50-
_task_metrics_cache: dict[str, Any] = {}
51-
_resource_metrics_cache: dict[str, Any] | None = None
52-
_request_metrics_cache: dict[str, Any] = {}
50+
_task_metrics_cache: dict[str, TaskMetrics] = {}
51+
_resource_metrics_cache: ResourceMetrics | None = None
52+
_request_metrics_cache: dict[str, _RequestMetrics] = {}
5353

5454
# ---------------------------------------------------------------------------
55-
# Named tuples for structured metric access
55+
# Pydantic models for structured metric access
5656
# ---------------------------------------------------------------------------
57-
TaskMetrics = namedtuple('TaskMetrics', [
58-
'queue_depth',
59-
'tasks_total',
60-
'execution_seconds',
61-
'queue_wait_seconds',
62-
'rate_limit_rejections',
63-
'rate_limiter_active_tokens',
64-
])
65-
66-
ResourceMetrics = namedtuple('ResourceMetrics', [
67-
'active_sessions',
68-
'active_models',
69-
'active_sampling_sessions',
70-
'active_futures',
71-
])
72-
73-
_RequestMetrics = namedtuple('_RequestMetrics', [
74-
'requests_total',
75-
'request_duration_seconds',
76-
])
57+
58+
59+
class TaskMetrics(BaseModel):
60+
"""Task queue metrics container.
61+
62+
Attributes:
63+
queue_depth: Current number of queued tasks.
64+
tasks_total: Total task completions.
65+
execution_seconds: Pure task execution time in seconds.
66+
queue_wait_seconds: Time from enqueue to execution start.
67+
rate_limit_rejections: Total rate-limit rejections.
68+
rate_limiter_active_tokens: Tokens tracked by rate limiter.
69+
"""
70+
71+
queue_depth: Gauge
72+
tasks_total: Counter
73+
execution_seconds: Histogram
74+
queue_wait_seconds: Histogram
75+
rate_limit_rejections: Counter
76+
rate_limiter_active_tokens: Gauge
77+
78+
79+
class ResourceMetrics(BaseModel):
80+
"""Resource gauge metrics container.
81+
82+
Attributes:
83+
active_sessions: Current active session count.
84+
active_models: Current registered model count.
85+
active_sampling_sessions: Current sampling session count.
86+
active_futures: Current future/request count.
87+
"""
88+
89+
active_sessions: Gauge
90+
active_models: Gauge
91+
active_sampling_sessions: Gauge
92+
active_futures: Gauge
93+
94+
95+
class _RequestMetrics(BaseModel):
96+
"""HTTP request metrics container (internal)."""
97+
98+
requests_total: Counter
99+
request_duration_seconds: Histogram
100+
77101

78102
# ---------------------------------------------------------------------------
79103
# A. Request-level metrics (FastAPI middleware)
@@ -146,7 +170,7 @@ async def metrics_middleware(request: Any, call_next: Callable) -> Any:
146170
def get_task_metrics(deployment: str) -> TaskMetrics:
147171
"""Return (or create) per-deployment task-queue metrics.
148172
149-
Returns a :class:`TaskMetrics` namedtuple with:
173+
Returns a :class:`TaskMetrics` Pydantic model with:
150174
151175
- ``queue_depth`` – Gauge
152176
- ``tasks_total`` – Counter
@@ -204,7 +228,7 @@ def get_task_metrics(deployment: str) -> TaskMetrics:
204228
def get_resource_metrics() -> ResourceMetrics:
205229
"""Return (or create) global resource gauge metrics.
206230
207-
Returns a :class:`ResourceMetrics` namedtuple with:
231+
Returns a :class:`ResourceMetrics` Pydantic model with:
208232
209233
- ``active_sessions`` – Gauge
210234
- ``active_models`` – Gauge

0 commit comments

Comments
 (0)