diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 64bafa5..f685c3d 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -27,7 +27,8 @@ } }, "forwardPorts": [ - 5000 // test-client-web port + 5000, // test-client-web port + 3000 // grafana UI port ], // Use 'postCreateCommand' to run commands after the container is created. "postCreateCommand": "bash -c .devcontainer/post-create.sh", @@ -37,4 +38,5 @@ // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root. // "remoteUser": "root" + "mounts": [] } diff --git a/.gitignore b/.gitignore index 92ee90d..a9d5e1a 100644 --- a/.gitignore +++ b/.gitignore @@ -8,7 +8,7 @@ __pycache__/ # Distribution / packaging .Python -build/ +*/build/ develop-eggs/ dist/ downloads/ diff --git a/Makefile b/Makefile index ec08a05..ae2c850 100644 --- a/Makefile +++ b/Makefile @@ -95,3 +95,6 @@ docker-build-load-test: ## Build the AOAI Simulated API Load Test as a docker im erase-recording: ## Erase all *.recording files rm -rf "${makefile_dir}.recording" +start-telemetry: + -docker-compose -f build/telemetry-docker-compose.yaml down + docker-compose -f ./build/telemetry-docker-compose.yaml up \ No newline at end of file diff --git a/README.md b/README.md index b4a36a7..fbb804b 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,7 @@ WARNING: This is a work in progress! - [How to Get Started with the Azure OpenAI API Simulator](#how-to-get-started-with-the-azure-openai-api-simulator) - [Running and Deploying the Azure OpenAI API Simulator](#running-and-deploying-the-azure-openai-api-simulator) - [Configuring the Azure OpenAI API Simulator](#configuring-the-azure-openai-api-simulator) + - [Monitoring the Azure OpenAI API Simulator](#monitoring-the-azure-openai-api-simulator) - [Extending the Azure OpenAI API Simulator](#extending-the-azure-openai-api-simulator) - [Contributing to the Azure OpenAI API Simulator](#contributing-to-the-azure-openai-api-simulator) - [Changelog](#changelog) @@ -91,6 +92,12 @@ The document [Running and Deploying the Azure OpenAI API Simulator](./docs/runni The behaviour of the Azure OpenAI API Simulator is controlled via a range of [Azure OpenAI API Simulator Configuration Options](./docs/config.md). +### Monitoring the Azure OpenAI API Simulator + +The Azure OpenAI API Simulator is instrumented using OpenTelemetry and supports exporting telemetry to Azure Monitor or an OTLP endpoint. + +See the [telemetry documentation](./docs/telemetry.md) on how to configure the application to export telemetry and the types of metrics captured. + ### Extending the Azure OpenAI API Simulator There are also a number of [Azure OpenAI API Simulator Extension points](./docs/extending.md) that allow you to customise the behaviour of the Azure OpenAI API Simulator. Extensions can be used to modify the request/response, add latency, or even generate responses. diff --git a/build/telemetry-docker-compose.yaml b/build/telemetry-docker-compose.yaml new file mode 100644 index 0000000..0649f7c --- /dev/null +++ b/build/telemetry-docker-compose.yaml @@ -0,0 +1,8 @@ +services: + grafana-all-in-one: # https://grafana.com/blog/2024/03/13/an-opentelemetry-backend-in-a-docker-image-introducing-grafana/otel-lgtm/ + image: grafana/otel-lgtm + container_name: otel-lgtm + ports: + - "3000:3000" # Grafana Web UI + - "4317:4317" # OTLP gRPC receiver + - "4318:4318" # OTLP http receiver diff --git a/docs/config.md b/docs/config.md index bd812d7..430d3e0 100644 --- a/docs/config.md +++ b/docs/config.md @@ -116,6 +116,8 @@ The simulator supports a set of basic Open Telemetry configuration options. Thes | ----------------------------- | ----------------------------------------------------------------------------------------------- | | `OTEL_SERVICE_NAME` | Sets the value of the service name reported to Open Telemetry. Defaults to `aoai-api-simulator` | | `OTEL_METRIC_EXPORT_INTERVAL` | The time interval (in milliseconds) between the start of two export attempts.. | +| `APPLICATIONINSIGHTS_CONNECTION_STRING` | Sets up the app insights connection string for telemetry | +| `OTEL_EXPORTER_OTLP_ENDPOINT` | Sets up the OpenTelemetry OTLP exporter endpoint. This can be further customised using environment variables described [here](https://opentelemetry.io/docs/specs/otel/protocol/exporter/). i.e. `OTEL_EXPORTER_OTLP_TRACES_ENDPOINT`, `OTEL_EXPORTER_OTLP_METRICS_ENDPOINT` or `OTEL_EXPORTER_OTLP_LOGS_ENDPOINT` | ## Config API Endpoint diff --git a/docs/metrics.md b/docs/telemetry.md similarity index 53% rename from docs/metrics.md rename to docs/telemetry.md index a4c0e38..5be1f3c 100644 --- a/docs/metrics.md +++ b/docs/telemetry.md @@ -1,16 +1,24 @@ -# Azure OpenAI API Simulator Metrics +# Azure OpenAI API Simulator Telemetry + +This solution is instrumented using OpenTelemetry. The [Azure OpenTelemetry distribution](https://learn.microsoft.com/en-us/python/api/overview/azure/monitor-opentelemetry-readme?view=azure-python) library is used to instrument and export telemetry to an Azure Monitor instance defined in the `APPLICATIONINSIGHTS_CONNECTION_STRING` environment variable. + +In addition to this, the solution also supports exporting to an [OTLP receiver](https://github.com/open-telemetry/opentelemetry-collector/blob/main/receiver/otlpreceiver/README.md) (i.e. OpenTelemetry Collector) using the `OTEL_EXPORTER_OTLP_ENDPOINT` environment variable. + +## Metrics To help you understand how the API Simulator is performing, we provide a number of metrics that you can use to monitor the simulator. -- [Azure OpenAI API Simulator Metrics](#azure-openai-api-simulator-metrics) - - [aoai-api-simulator.latency.base](#aoai-api-simulatorlatencybase) - - [aoai-api-simulator.latency.full](#aoai-api-simulatorlatencyfull) - - [aoai-api-simulator.tokens.used](#aoai-api-simulatortokensused) - - [aoai-api-simulator.tokens.requested](#aoai-api-simulatortokensrequested) - - [aoai-api-simulator.tokens.rate-limit](#aoai-api-simulatortokensrate-limit) - - [aoai-api-simulator.limits](#aoai-api-simulatorlimits) +- [Azure OpenAI API Simulator Telemetry](#azure-openai-api-simulator-telemetry) + - [Metrics](#metrics) + - [aoai-api-simulator.latency.base](#aoai-api-simulatorlatencybase) + - [aoai-api-simulator.latency.full](#aoai-api-simulatorlatencyfull) + - [aoai-api-simulator.tokens.used](#aoai-api-simulatortokensused) + - [aoai-api-simulator.tokens.requested](#aoai-api-simulatortokensrequested) + - [aoai-api-simulator.tokens.rate-limit](#aoai-api-simulatortokensrate-limit) + - [aoai-api-simulator.limits](#aoai-api-simulatorlimits) + - [Running Locally](#running-locally) -## aoai-api-simulator.latency.base +### aoai-api-simulator.latency.base Units: `seconds` @@ -21,7 +29,7 @@ Dimensions: - `deployment`: The name of the deployment the metric relates to. - `status_code`: The HTTP status code of the response. -## aoai-api-simulator.latency.full +### aoai-api-simulator.latency.full Units: `seconds` @@ -32,7 +40,7 @@ Dimensions: - `deployment`: The name of the deployment the metric relates to. - `status_code`: The HTTP status code of the response. -## aoai-api-simulator.tokens.used +### aoai-api-simulator.tokens.used Units: `tokens` @@ -43,7 +51,7 @@ Dimensions: - `deployment`: The name of the deployment the metric relates to. - `token_type`: The type of token, e.g. `prompt` or `completion`. -## aoai-api-simulator.tokens.requested +### aoai-api-simulator.tokens.requested Units: `tokens` @@ -54,7 +62,7 @@ Dimensions: - `deployment`: The name of the deployment the metric relates to. - `token_type`: The type of token, e.g. `prompt` or `completion`. -## aoai-api-simulator.tokens.rate-limit +### aoai-api-simulator.tokens.rate-limit Units: `tokens` @@ -64,7 +72,7 @@ Dimensions: - `deployment`: The name of the deployment the metric relates to. -## aoai-api-simulator.limits +### aoai-api-simulator.limits Units: `requests` @@ -74,3 +82,9 @@ Dimensions: - `deployment`: The name of the deployment the metric relates to. - `limit_type`: The type of limit that was hit, e.g. `requests` or `tokens`. + +## Running Locally + +The `make start-telemetry` command starts the `grafana/otel-lgtm` container. This is an [all-in-one container](https://grafana.com/blog/2024/03/13/an-opentelemetry-backend-in-a-docker-image-introducing-grafana/otel-lgtm/) to capture traces, metrics and logs. + +It exposes `grafana` UI on port `3000`. diff --git a/infra/bicep/main.bicep b/infra/bicep/main.bicep index 537c8d1..dfab58f 100644 --- a/infra/bicep/main.bicep +++ b/infra/bicep/main.bicep @@ -170,7 +170,7 @@ resource azureOpenAIKeySecret 'Microsoft.KeyVault/vaults/secrets@2023-07-01' = { } resource appInsightsConnectionStringSecret 'Microsoft.KeyVault/vaults/secrets@2023-07-01' = { parent: vault - name: 'app-insights-connection-string' + name: 'applicationinsights-connection-string' properties: { value: appInsights.properties.ConnectionString } @@ -208,8 +208,8 @@ resource apiSim 'Microsoft.App/containerApps@2023-05-01' = { identity: managedIdentity.id } { - name: 'app-insights-connection-string' - keyVaultUrl: '${keyVaultUri}secrets/app-insights-connection-string' + name: 'applicationinsights-connection-string' + keyVaultUrl: '${keyVaultUri}secrets/applicationinsights-connection-string' identity: managedIdentity.id } { @@ -243,7 +243,7 @@ resource apiSim 'Microsoft.App/containerApps@2023-05-01' = { { name: 'AZURE_OPENAI_KEY', secretRef: 'azure-openai-key' } { name: 'OPENAI_DEPLOYMENT_CONFIG_PATH', value: '/mnt/deployment-config/simulator_deployment_config.json' } { name: 'LOG_LEVEL', value: logLevel } - { name: 'APPLICATIONINSIGHTS_CONNECTION_STRING', secretRef: 'app-insights-connection-string' } + { name: 'APPLICATIONINSIGHTS_CONNECTION_STRING', secretRef: 'applicationinsights-connection-string' } // Ensure cloudRoleName is set in telemetry // https://opentelemetry-python.readthedocs.io/en/latest/sdk/environment_variables.html#opentelemetry.sdk.environment_variables.OTEL_SERVICE_NAME { name: 'OTEL_SERVICE_NAME', value: apiSimulatorName } diff --git a/loadtest/common/config.py b/loadtest/common/config.py index fa765bf..5973add 100644 --- a/loadtest/common/config.py +++ b/loadtest/common/config.py @@ -1,7 +1,8 @@ import os api_key = os.getenv("API_KEY", os.getenv("SIMULATOR_API_KEY")) -app_insights_connection_string = os.getenv("APP_INSIGHTS_CONNECTION_STRING") +opentelemetry_exporter_otlp_endpoint = os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT") +applicationinsights_connection_string = os.getenv("APPLICATIONINSIGHTS_CONNECTION_STRING") log_analytics_workspace_id = os.getenv("LOG_ANALYTICS_WORKSPACE_ID") log_analytics_workspace_name = os.getenv("LOG_ANALYTICS_WORKSPACE_NAME") tenant_id = os.getenv("TENANT_ID") diff --git a/loadtest/common/locust_app_insights.py b/loadtest/common/locust_app_insights.py index fee407d..2572912 100644 --- a/loadtest/common/locust_app_insights.py +++ b/loadtest/common/locust_app_insights.py @@ -1,18 +1,18 @@ import logging -from opentelemetry import metrics + from azure.monitor.opentelemetry import configure_azure_monitor +from opentelemetry import metrics from .config import ( - app_insights_connection_string, + applicationinsights_connection_string, ) - histogram_request_latency: metrics.Histogram -if app_insights_connection_string: +if applicationinsights_connection_string: # Options: https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/monitor/azure-monitor-opentelemetry#usage logging.getLogger("azure").setLevel(logging.WARNING) - configure_azure_monitor(connection_string=app_insights_connection_string) + configure_azure_monitor(connection_string=applicationinsights_connection_string) histogram_request_latency = metrics.get_meter(__name__).create_histogram( "locust.request_latency", "Request latency", "s" ) diff --git a/loadtest/loadtest_chat_completions_1s_latency.py b/loadtest/loadtest_chat_completions_1s_latency.py index 6fd6992..fbadfa2 100644 --- a/loadtest/loadtest_chat_completions_1s_latency.py +++ b/loadtest/loadtest_chat_completions_1s_latency.py @@ -2,7 +2,7 @@ import os import requests -from common.config import api_key, app_insights_connection_string +from common.config import api_key, applicationinsights_connection_string from common.latency import set_simulator_chat_completions_latency from common.locust_app_insights import ( report_request_metric, @@ -22,7 +22,7 @@ def on_locust_init(environment: Environment, **_): """ Configure test """ - if app_insights_connection_string: + if applicationinsights_connection_string: logging.info("App Insights connection string found - enabling request metrics") environment.events.request.add_listener(report_request_metric) else: diff --git a/loadtest/loadtest_chat_completions_no_added_latency.py b/loadtest/loadtest_chat_completions_no_added_latency.py index 5adb480..bf92194 100644 --- a/loadtest/loadtest_chat_completions_no_added_latency.py +++ b/loadtest/loadtest_chat_completions_no_added_latency.py @@ -2,7 +2,7 @@ import os import requests -from common.config import api_key, app_insights_connection_string +from common.config import api_key, applicationinsights_connection_string from common.latency import set_simulator_chat_completions_latency from common.locust_app_insights import ( report_request_metric, @@ -26,7 +26,7 @@ def on_locust_init(environment: Environment, **_): """ Configure test """ - if app_insights_connection_string: + if applicationinsights_connection_string: logging.info("App Insights connection string found - enabling request metrics") environment.events.request.add_listener(report_request_metric) else: diff --git a/sample.env b/sample.env index e40a6d4..c6ca92c 100644 --- a/sample.env +++ b/sample.env @@ -29,6 +29,9 @@ AZURE_FORM_RECOGNIZER_KEY= # Open Telemetry Config (used within the simulator) OTEL_SERVICE_NAME=aoai-api-simulator-local-dev OTEL_METRIC_EXPORT_INTERVAL=10000 +# OTEL_EXPORTER_OTLP_ENDPOINT=http://host.docker.internal:4317 ## if running in docker outside of docker +OTEL_EXPORTER_OTLP_ENDPOINT= +APPLICATIONINSIGHTS_CONNECTION_STRING= # Test Client Config (used to direct the tests and test clients) diff --git a/scripts/_run-load-test-aca.sh b/scripts/_run-load-test-aca.sh index c13c67f..058f529 100755 --- a/scripts/_run-load-test-aca.sh +++ b/scripts/_run-load-test-aca.sh @@ -86,8 +86,8 @@ if [[ -z "${key_vault_name}" ]]; then echo "Key Vault Name not found in output.json" exit 1 fi -app_insights_connection_string=$(az keyvault secret show --vault-name "$key_vault_name" --name app-insights-connection-string --query value --output tsv) -if [[ -z "${app_insights_connection_string}" ]]; then +applicationinsights_connection_string=$(az keyvault secret show --vault-name "$key_vault_name" --name applicationinsights-connection-string --query value --output tsv) +if [[ -z "${applicationinsights_connection_string}" ]]; then echo "App Insights Connection String not found in Key Vault" exit 1 fi @@ -133,7 +133,7 @@ az containerapp job create \ --cpu "1" \ --memory "2Gi" \ --command "locust" \ - --env-vars "LOCUST_LOCUSTFILE=$TEST_FILE" "LOCUST_HOST=https://${api_fqdn}/" "LOCUST_USERS=$LOCUST_USERS" "LOCUST_SPAWN_RATE=$LOCUST_SPAWN_RATE" "LOCUST_AUTOSTART=true" "LOCUST_RUN_TIME=$LOCUST_RUN_TIME" "LOCUST_AUTOQUIT=10" "SIMULATOR_API_KEY=${SIMULATOR_API_KEY}" "APP_INSIGHTS_CONNECTION_STRING=${app_insights_connection_string}" "MAX_TOKENS=${MAX_TOKENS}" "DEPLOYMENT_NAME=${DEPLOYMENT_NAME}" ALLOW_429_RESPONSES=${ALLOW_429_RESPONSES} 1>&2 + --env-vars "LOCUST_LOCUSTFILE=$TEST_FILE" "LOCUST_HOST=https://${api_fqdn}/" "LOCUST_USERS=$LOCUST_USERS" "LOCUST_SPAWN_RATE=$LOCUST_SPAWN_RATE" "LOCUST_AUTOSTART=true" "LOCUST_RUN_TIME=$LOCUST_RUN_TIME" "LOCUST_AUTOQUIT=10" "SIMULATOR_API_KEY=${SIMULATOR_API_KEY}" "APP_INSIGHTS_CONNECTION_STRING=${applicationinsights_connection_string}" "MAX_TOKENS=${MAX_TOKENS}" "DEPLOYMENT_NAME=${DEPLOYMENT_NAME}" ALLOW_429_RESPONSES=${ALLOW_429_RESPONSES} 1>&2 start_time=$(date -u +"%Y-%m-%dT%H:%M:%SZ") diff --git a/src/aoai-api-simulator/requirements.txt b/src/aoai-api-simulator/requirements.txt index 5ca2113..9e4288b 100644 --- a/src/aoai-api-simulator/requirements.txt +++ b/src/aoai-api-simulator/requirements.txt @@ -2,10 +2,14 @@ fastapi==0.109.2 uvicorn[standard]==0.27.0.post1 gunicorn==22.0.0 requests==2.32.0 +opentelemetry-instrumentation-requests==0.48b0 PyYAML==6.0.1 tiktoken==0.6.0 nanoid==2.0.0 limits==3.8.0 +opentelemetry-api==1.27.0 +opentelemetry-sdk==1.27.0 +opentelemetry-exporter-otlp==1.27.0 azure-monitor-opentelemetry==1.3.0 pydantic-settings==2.2.1 python-multipart==0.0.9 diff --git a/src/aoai-api-simulator/src/aoai_api_simulator/config_loader.py b/src/aoai-api-simulator/src/aoai_api_simulator/config_loader.py index 10d9284..fea3194 100644 --- a/src/aoai-api-simulator/src/aoai_api_simulator/config_loader.py +++ b/src/aoai-api-simulator/src/aoai_api_simulator/config_loader.py @@ -11,10 +11,11 @@ from aoai_api_simulator.record_replay.handler import get_default_forwarders -def get_config_from_env_vars(logger: logging.Logger) -> Config: +def get_config_from_env_vars() -> Config: """ Load configuration from environment variables """ + logger = logging.getLogger() config = Config(generators=get_default_generators()) config.recording.forwarders = get_default_forwarders() config.openai_deployments = _load_openai_deployments(logger) diff --git a/src/aoai-api-simulator/src/aoai_api_simulator/latency.py b/src/aoai-api-simulator/src/aoai_api_simulator/latency.py index c93360a..988f8c2 100644 --- a/src/aoai-api-simulator/src/aoai_api_simulator/latency.py +++ b/src/aoai-api-simulator/src/aoai_api_simulator/latency.py @@ -2,7 +2,7 @@ import time from aoai_api_simulator import constants -from aoai_api_simulator.metrics import simulator_metrics +from aoai_api_simulator.telemetry import simulator_metrics from aoai_api_simulator.models import RequestContext from fastapi import Response diff --git a/src/aoai-api-simulator/src/aoai_api_simulator/limiters.py b/src/aoai-api-simulator/src/aoai_api_simulator/limiters.py index b084757..f508772 100644 --- a/src/aoai-api-simulator/src/aoai_api_simulator/limiters.py +++ b/src/aoai-api-simulator/src/aoai_api_simulator/limiters.py @@ -7,12 +7,12 @@ from typing import Awaitable, Callable from aoai_api_simulator import constants -from aoai_api_simulator.metrics import simulator_metrics from aoai_api_simulator.models import ( Config, OpenAIDeployment, RequestContext, ) +from aoai_api_simulator.telemetry import simulator_metrics from fastapi import Response logger = logging.getLogger(__name__) diff --git a/src/aoai-api-simulator/src/aoai_api_simulator/main.py b/src/aoai-api-simulator/src/aoai_api_simulator/main.py index 05a8e75..ef527a0 100644 --- a/src/aoai-api-simulator/src/aoai_api_simulator/main.py +++ b/src/aoai-api-simulator/src/aoai_api_simulator/main.py @@ -1,33 +1,15 @@ -import logging -import os - -from azure.monitor.opentelemetry import configure_azure_monitor +from aoai_api_simulator.app_builder import app as builder_app +from aoai_api_simulator.app_builder import apply_config # from opentelemetry import trace - from aoai_api_simulator.config_loader import get_config_from_env_vars, set_config -from aoai_api_simulator.app_builder import app as builder_app, apply_config - -log_level = os.getenv("LOG_LEVEL") or "INFO" - -logger = logging.getLogger(__name__) -logging.basicConfig(level=log_level) -logging.getLogger("azure").setLevel(logging.WARNING) - -application_insights_connection_string = os.getenv("APPLICATIONINSIGHTS_CONNECTION_STRING") -if application_insights_connection_string: - logger.info("🚀 Configuring Azure Monitor telemetry") +from aoai_api_simulator.telemetry import setup_auto_instrumentation, setup_telemetry - # Options: https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/monitor/azure-monitor-opentelemetry#usage - configure_azure_monitor(connection_string=application_insights_connection_string) -else: - logger.info("🚀 Azure Monitor telemetry not configured (set APPLICATIONINSIGHTS_CONNECTION_STRING)") +using_azure_monitor: bool = setup_telemetry() -# tracer = trace.get_tracer(__name__) - -config = get_config_from_env_vars(logger) +config = get_config_from_env_vars() set_config(config) - - apply_config() + app = builder_app # expose to gunicorn +setup_auto_instrumentation(app, using_azure_monitor) diff --git a/src/aoai-api-simulator/src/aoai_api_simulator/metrics.py b/src/aoai-api-simulator/src/aoai_api_simulator/metrics.py deleted file mode 100644 index a010db2..0000000 --- a/src/aoai-api-simulator/src/aoai_api_simulator/metrics.py +++ /dev/null @@ -1,57 +0,0 @@ -from dataclasses import dataclass -from opentelemetry import metrics - - -@dataclass -class SimulatorMetrics: - histogram_latency_base: metrics.Histogram - histogram_latency_full: metrics.Histogram - histogram_tokens_used: metrics.Histogram - histogram_tokens_requested: metrics.Histogram - histogram_tokens_rate_limit: metrics.Histogram - histogram_rate_limit: metrics.Histogram - - -def _get_simulator_metrics() -> SimulatorMetrics: - meter = metrics.get_meter(__name__) - return SimulatorMetrics( - # dimensions: deployment, status_code - histogram_latency_base=meter.create_histogram( - name="aoai-api-simulator.latency.base", - description="Latency of handling the request (before adding simulated latency)", - unit="seconds", - ), - # dimensions: deployment, status_code - histogram_latency_full=meter.create_histogram( - name="aoai-api-simulator.latency.full", - description="Full latency of handling the request (including simulated latency)", - unit="seconds", - ), - # dimensions: deployment, token_type - histogram_tokens_used=meter.create_histogram( - name="aoai-api-simulator.tokens.used", - description="Number of tokens used per request", - unit="tokens", - ), - # dimensions: deployment, token_type - histogram_tokens_requested=meter.create_histogram( - name="aoai-api-simulator.tokens.requested", - description="Number of tokens across all requests (success or not)", - unit="tokens", - ), - # dimensions: deployment - histogram_tokens_rate_limit=meter.create_histogram( - name="aoai-api-simulator.tokens.rate-limit", - description="Number of tokens that were counted for rate-limiting", - unit="tokens", - ), - # dimensions: deployment, reason - histogram_rate_limit=meter.create_histogram( - name="aoai-api-simulator.limits", - description="Number of requests that were rate-limited", - unit="requests", - ), - ) - - -simulator_metrics = _get_simulator_metrics() diff --git a/src/aoai-api-simulator/src/aoai_api_simulator/telemetry.py b/src/aoai-api-simulator/src/aoai_api_simulator/telemetry.py new file mode 100644 index 0000000..287e9ee --- /dev/null +++ b/src/aoai-api-simulator/src/aoai_api_simulator/telemetry.py @@ -0,0 +1,148 @@ +import logging +import os +from dataclasses import dataclass + +# from opentelemetry import trace +from azure.monitor.opentelemetry import configure_azure_monitor +from fastapi import FastAPI +from opentelemetry import metrics, trace +from opentelemetry.exporter.otlp.proto.grpc._log_exporter import OTLPLogExporter +from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter +from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter +from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor +from opentelemetry.instrumentation.requests import RequestsInstrumentor +from opentelemetry.sdk._logs import LoggerProvider, LoggingHandler +from opentelemetry.sdk._logs.export import BatchLogRecordProcessor +from opentelemetry.sdk.metrics import MeterProvider +from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader +from opentelemetry.sdk.resources import Resource +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import BatchSpanProcessor + +log_level = os.getenv("LOG_LEVEL") or "INFO" + +logger = logging.getLogger(__name__) +logging.basicConfig(level=log_level) +logging.getLogger("azure").setLevel(logging.WARNING) + +opentelemetry_exporter_otlp_endpoint = os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT") +application_insights_connection_string = os.getenv("APPLICATIONINSIGHTS_CONNECTION_STRING") + + +@dataclass +class SimulatorMetrics: + histogram_latency_base: metrics.Histogram + histogram_latency_full: metrics.Histogram + histogram_tokens_used: metrics.Histogram + histogram_tokens_requested: metrics.Histogram + histogram_tokens_rate_limit: metrics.Histogram + histogram_rate_limit: metrics.Histogram + + +def _get_simulator_metrics() -> SimulatorMetrics: + meter = metrics.get_meter(__name__) + return SimulatorMetrics( + # dimensions: deployment, status_code + histogram_latency_base=meter.create_histogram( + name="aoai-api-simulator.latency.base", + description="Latency of handling the request (before adding simulated latency)", + unit="seconds", + ), + # dimensions: deployment, status_code + histogram_latency_full=meter.create_histogram( + name="aoai-api-simulator.latency.full", + description="Full latency of handling the request (including simulated latency)", + unit="seconds", + ), + # dimensions: deployment, token_type + histogram_tokens_used=meter.create_histogram( + name="aoai-api-simulator.tokens.used", + description="Number of tokens used per request", + unit="tokens", + ), + # dimensions: deployment, token_type + histogram_tokens_requested=meter.create_histogram( + name="aoai-api-simulator.tokens.requested", + description="Number of tokens across all requests (success or not)", + unit="tokens", + ), + # dimensions: deployment + histogram_tokens_rate_limit=meter.create_histogram( + name="aoai-api-simulator.tokens.rate-limit", + description="Number of tokens that were counted for rate-limiting", + unit="tokens", + ), + # dimensions: deployment, reason + histogram_rate_limit=meter.create_histogram( + name="aoai-api-simulator.limits", + description="Number of requests that were rate-limited", + unit="requests", + ), + ) + + +simulator_metrics = _get_simulator_metrics() + + +def setup_telemetry() -> bool: + using_azure_monitor: bool + + if application_insights_connection_string: + logger.info("🚀 Configuring Azure Monitor telemetry") + + # Options: https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/monitor/azure-monitor-opentelemetry#usage + configure_azure_monitor(connection_string=application_insights_connection_string) + using_azure_monitor = True + else: + using_azure_monitor = False + logger.info("Azure Monitor telemetry not configured (set APPLICATIONINSIGHTS_CONNECTION_STRING)") + + if opentelemetry_exporter_otlp_endpoint: + logger.info("🚀 Configuring OTLP telemetry") + + # setup the instrumentors + resource = Resource(attributes={"service.name": os.getenv("OTEL_SERVICE_NAME", "aoai-api-simulator")}) + + # tracing + if not using_azure_monitor: + trace.set_tracer_provider(TracerProvider(resource=resource)) + + span_processor = BatchSpanProcessor(OTLPSpanExporter()) + trace.get_tracer_provider().add_span_processor(span_processor) + + # metrics + metric_reader = PeriodicExportingMetricReader(OTLPMetricExporter()) + + if not using_azure_monitor: + meter_provider = MeterProvider(resource=resource, metric_readers=[metric_reader]) + metrics.set_meter_provider(meter_provider) + else: + meter_provider = metrics.get_meter_provider() + # meter_provider.add_metric_reader() is not implemented in python sdk yet. + # adding it manually + meter_provider._all_metric_readers.add(metric_reader) + metric_reader._set_collect_callback(meter_provider._measurement_consumer.collect) + + # logging + logger_provider = LoggerProvider( + resource=resource, + ) + + batch_log_record_processor = BatchLogRecordProcessor(OTLPLogExporter()) + logger_provider.add_log_record_processor(batch_log_record_processor) + + handler = LoggingHandler(level=os.getenv("OTEL_LOG_LEVEL", "INFO"), logger_provider=logger_provider) + # Attach OTLP handler to root logger + logging.getLogger().addHandler(handler) + else: + logger.info("🚀 OTLP telemetry exporter not configured (set OTEL_EXPORTER_OTLP_ENDPOINT)") + + return using_azure_monitor + + +def setup_auto_instrumentation(app: FastAPI, using_azure_monitor: bool): + if not using_azure_monitor: + RequestsInstrumentor().instrument() + FastAPIInstrumentor.instrument_app(app) + else: + logger.info("Skipping instrumenting libraries as they are done by the Azure OTEL Distro already.")