Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,4 @@ bruno.json
.ruff_cache/
*.csv
*.png
METRICS_IMPLEMENTATION.md
26 changes: 26 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ The API provides multiple endpoints for authentication, documentation, and monit
| `/authenticate` | `POST` | Authenticates a user using their PESU credentials. |
| `/health` | `GET` | A health check endpoint to monitor the API's status. |
| `/readme` | `GET` | Redirects to the project's official GitHub repository. |
| `/metrics` | `GET` | Returns current application metrics and counters. |

### `/authenticate`

Expand Down Expand Up @@ -162,6 +163,31 @@ does not take any request parameters.

This endpoint redirects to the project's official GitHub repository. This endpoint does not take any request parameters.

### `/metrics`

This endpoint provides application metrics for monitoring authentication success rates, error counts, and system performance. It's useful for observability and debugging. This endpoint does not take any request parameters.

#### Response Object

| **Field** | **Type** | **Description** |
|-----------|------------|-------------------------------------------------------------------|
| `status` | `boolean` | `true` if metrics retrieved successfully, `false` if there was an error |
| `message` | `string` | Success message or error description |
| `timestamp` | `string` | A timezone offset timestamp indicating when metrics were retrieved |
| `metrics` | `object` | Dictionary containing all current metric counters |

The `metrics` object includes counters for:
- `auth_success_total` - Successful authentication attempts
- `auth_failure_total` - Failed authentication attempts
- `validation_error_total` - Request validation failures
- `pesu_academy_error_total` - PESU Academy service errors
- `unhandled_exception_total` - Unexpected application errors
- `csrf_token_error_total` - CSRF token extraction failures
- `profile_fetch_error_total` - Profile page fetch failures
- `profile_parse_error_total` - Profile parsing errors
- `csrf_token_refresh_success_total` - Successful background CSRF refreshes
- `csrf_token_refresh_failure_total` - Failed background CSRF refreshes

### Integrating your application with the PESUAuth API

Here are some examples of how you can integrate your application with the PESUAuth API using Python and cURL.
Expand Down
91 changes: 87 additions & 4 deletions app/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,17 @@

import pytz
import uvicorn
from fastapi import BackgroundTasks, FastAPI
from fastapi import BackgroundTasks, FastAPI, Response
from fastapi.exceptions import RequestValidationError
from fastapi.requests import Request
from fastapi.responses import JSONResponse, RedirectResponse
from pydantic import ValidationError

from app.docs import authenticate_docs, health_docs, readme_docs
from app.docs.metrics import metrics_docs
from app.exceptions.base import PESUAcademyError
from app.models import RequestModel, ResponseModel
from app.metrics import metrics # Global metrics instance
from app.models import MetricsResponseModel, RequestModel, ResponseModel
from app.pesu import PESUAcademy

IST = pytz.timezone("Asia/Kolkata")
Expand All @@ -29,8 +31,13 @@ async def _refresh_csrf_token_with_lock() -> None:
"""Refresh the CSRF token with a lock."""
logging.debug("Refreshing unauthenticated CSRF token...")
async with CSRF_TOKEN_REFRESH_LOCK:
await pesu_academy.prefetch_client_with_csrf_token()
logging.info("Unauthenticated CSRF token refreshed successfully.")
try:
await pesu_academy.prefetch_client_with_csrf_token()
metrics.inc("csrf_token_refresh_success_total")
logging.info("Unauthenticated CSRF token refreshed successfully.")
except Exception:
metrics.inc("csrf_token_refresh_failure_total")
raise


async def _csrf_token_refresh_loop() -> None:
Expand All @@ -40,6 +47,7 @@ async def _csrf_token_refresh_loop() -> None:
logging.debug("Refreshing unauthenticated CSRF token...")
await _refresh_csrf_token_with_lock()
except Exception:
metrics.inc("csrf_token_refresh_failure_total")
logging.exception("Failed to refresh unauthenticated CSRF token in the background.")
await asyncio.sleep(CSRF_TOKEN_REFRESH_INTERVAL_SECONDS)

Expand Down Expand Up @@ -94,12 +102,41 @@ async def lifespan(app: FastAPI) -> AsyncIterator[None]:
},
],
)


# --- Metrics Middleware ---
@app.middleware("http")
async def metrics_middleware(request: Request, call_next: callable) -> Response:
"""Middleware to track request metrics for every HTTP request.

Increments counters for total requests, per-route requests, success/failure, and latency.
"""
route = request.url.path
metrics.inc("requests_total")
metrics.inc(f"requests_total_route_{route}")
try:
response: Response = await call_next(request)
metrics.inc("requests_latency_sum") # For histogram/average in future
if 200 <= response.status_code < 300:
metrics.inc("requests_success")
else:
metrics.inc("requests_failed")
metrics.inc(f"requests_failed_status_{response.status_code}")
return response
except Exception as e:
metrics.inc("requests_failed")
metrics.inc(f"requests_failed_exception_{type(e).__name__}")
metrics.inc("requests_latency_sum")
raise


pesu_academy = PESUAcademy()


@app.exception_handler(RequestValidationError)
async def validation_exception_handler(request: Request, exc: RequestValidationError) -> JSONResponse:
"""Handler for request validation errors."""
metrics.inc("validation_error_total")
logging.exception("Request data could not be validated.")
errors = exc.errors()
message = "; ".join([f"{'.'.join(str(loc) for loc in e['loc'])}: {e['msg']}" for e in errors])
Expand All @@ -116,6 +153,19 @@ async def validation_exception_handler(request: Request, exc: RequestValidationE
@app.exception_handler(PESUAcademyError)
async def pesu_exception_handler(request: Request, exc: PESUAcademyError) -> JSONResponse:
"""Handler for PESUAcademy specific errors."""
metrics.inc("pesu_academy_error_total")

# Track specific error types
exc_type = type(exc).__name__.lower()
if "csrf" in exc_type:
metrics.inc("csrf_token_error_total")
elif "profilefetch" in exc_type:
metrics.inc("profile_fetch_error_total")
elif "profileparse" in exc_type:
metrics.inc("profile_parse_error_total")
elif "authentication" in exc_type:
metrics.inc("auth_failure_total")

Comment on lines +159 to +168
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is a much cleaner solution. Look into a middleware layer. Here is some pseudo code to get you started:

@app.middleware("http")
async def metrics_middleware(request: Request, call_next):
    metrics.inc("requests_total")
    start_time = time.time()

    try:
        response: Response = await call_next(request)
        latency = time.time() - start_time

        # Track successes vs failures
        if 200 <= response.status_code < 300:
            metrics.inc("requests_success")
        else:
            metrics.inc("requests_failed")
            metrics.inc(f"requests_failed_status_{response.status_code}")

        # Latency metrics
        metrics.inc("request_latency_sum", latency)

        # Also add route metrics: route = request.scope.get("route")

        return response

    except Exception as e:
        latency = time.time() - start_time
        metrics.inc("requests_failed")
        metrics.inc(f"requests_failed_exception_{type(e).__name__}")
        metrics.inc("request_latency_sum", latency)

        raise

Note, you will need to accordingly increment other metrics like how many with and without profile data by parsing the request.

logging.exception(f"PESUAcademyError: {exc.message}")
return JSONResponse(
status_code=exc.status_code,
Expand All @@ -130,6 +180,7 @@ async def pesu_exception_handler(request: Request, exc: PESUAcademyError) -> JSO
@app.exception_handler(Exception)
async def unhandled_exception_handler(request: Request, exc: Exception) -> JSONResponse:
"""Handler for unhandled exceptions."""
metrics.inc("unhandled_exception_total")
logging.exception("Unhandled exception occurred.")
return JSONResponse(
status_code=500,
Expand Down Expand Up @@ -160,6 +211,25 @@ async def health() -> JSONResponse:
)


@app.get(
"/metrics",
response_model=MetricsResponseModel,
response_class=JSONResponse,
responses=metrics_docs.response_examples,
tags=["Monitoring"],
)
async def get_metrics() -> MetricsResponseModel:
"""Get current application metrics."""
logging.debug("Metrics requested.")
current_metrics = metrics.get()
return MetricsResponseModel(
status=True,
message="Metrics retrieved successfully",
timestamp=datetime.datetime.now(IST),
metrics=current_metrics,
)


@app.get(
"/readme",
response_class=RedirectResponse,
Expand Down Expand Up @@ -196,9 +266,17 @@ async def authenticate(payload: RequestModel, background_tasks: BackgroundTasks)
profile = payload.profile
fields = payload.fields

# Track total auth requests and profile split
metrics.inc("auth_requests_total")
if profile:
metrics.inc("auth_requests_with_profile")
else:
metrics.inc("auth_requests_without_profile")

# Authenticate the user
authentication_result = {"timestamp": current_time}
logging.info(f"Authenticating user={username} with PESU Academy...")

authentication_result.update(
await pesu_academy.authenticate(
username=username,
Expand All @@ -207,6 +285,7 @@ async def authenticate(payload: RequestModel, background_tasks: BackgroundTasks)
fields=fields,
),
)

# Prefetch a new client with an unauthenticated CSRF token for the next request
background_tasks.add_task(_refresh_csrf_token_with_lock)

Expand All @@ -216,6 +295,10 @@ async def authenticate(payload: RequestModel, background_tasks: BackgroundTasks)
logging.info(f"Returning auth result for user={username}: {authentication_result}")
authentication_result = authentication_result.model_dump(exclude_none=True)
authentication_result["timestamp"] = current_time.isoformat()

# Track successful authentication only after validation succeeds
metrics.inc("auth_success_total")

return JSONResponse(
status_code=200,
content=authentication_result,
Expand Down
41 changes: 41 additions & 0 deletions app/docs/metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
"""Custom docs for the /metrics PESUAuth endpoint."""

from app.docs.base import ApiDocs

metrics_docs = ApiDocs(
request_examples={}, # GET endpoint doesn't need request examples
response_examples={
200: {
"description": "Metrics retrieved successfully",
"content": {
"application/json": {
"examples": {
"metrics_response": {
"summary": "Current Metrics",
"description": (
"All current application metrics including authentication counts and error rates"
),
"value": {
"status": True,
"message": "Metrics retrieved successfully",
"timestamp": "2025-08-28T15:30:45.123456+05:30",
"metrics": {
"auth_success_total": 150,
"auth_failure_total": 12,
Comment on lines +23 to +24
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should also track how many auth requests are received, including a split for how many with and without profile data

"validation_error_total": 8,
"pesu_academy_error_total": 5,
"unhandled_exception_total": 0,
"csrf_token_error_total": 2,
"profile_fetch_error_total": 1,
"profile_parse_error_total": 0,
"csrf_token_refresh_success_total": 45,
"csrf_token_refresh_failure_total": 1,
},
},
}
}
}
},
}
},
Comment on lines +8 to +40
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Create a model for this. The response model will also need an update.

)
35 changes: 35 additions & 0 deletions app/metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
"""Metrics collector for tracking authentication successes, failures, and error types."""

import threading
from collections import defaultdict


class MetricsCollector:
"""Thread-safe metrics collector for tracking application performance and usage."""

def __init__(self) -> None:
"""Initialize the metrics collector with thread safety."""
self.lock = threading.Lock()
self.metrics = defaultdict(int)

def inc(self, key: str) -> None:
"""Increment a metric counter by 1.

Args:
key (str): The metric key to increment.
"""
with self.lock:
self.metrics[key] += 1

def get(self) -> dict[str, int]:
"""Get a copy of all current metrics.

Returns:
dict[str, int]: Dictionary containing all metrics and their current values.
"""
with self.lock:
return dict(self.metrics)


# Global metrics instance
metrics = MetricsCollector()
1 change: 1 addition & 0 deletions app/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@

from .profile import ProfileModel as ProfileModel
from .request import RequestModel as RequestModel
from .response import MetricsResponseModel as MetricsResponseModel
from .response import ResponseModel as ResponseModel
45 changes: 45 additions & 0 deletions app/models/response.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,48 @@ class ResponseModel(BaseModel):
title="User Profile Data",
description="The user's profile data returned only if authentication succeeds and profile data was requested.",
)


class MetricsResponseModel(BaseModel):
"""Model representing the response from the /metrics endpoint."""

status: bool = Field(
...,
title="Metrics Status",
description="Indicates whether the metrics were retrieved successfully.",
json_schema_extra={"example": True},
)

message: str = Field(
...,
title="Metrics Message",
description="A human-readable message providing information about the metrics retrieval.",
json_schema_extra={"example": "Metrics retrieved successfully"},
)

timestamp: datetime = Field(
...,
title="Metrics Timestamp",
description="Timestamp of the metrics retrieval with timezone info.",
json_schema_extra={"example": "2025-08-28T15:30:45.123456+05:30"},
)

metrics: dict = Field(
...,
title="Metrics Data",
description="Dictionary containing all current metric counters.",
json_schema_extra={
"example": {
"auth_success_total": 150,
"auth_failure_total": 12,
"validation_error_total": 8,
"pesu_academy_error_total": 5,
"unhandled_exception_total": 0,
"csrf_token_error_total": 2,
"profile_fetch_error_total": 1,
"profile_parse_error_total": 0,
"csrf_token_refresh_success_total": 45,
"csrf_token_refresh_failure_total": 1,
}
},
)
Loading