diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index 64bafa5..f685c3d 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -27,7 +27,8 @@
 		}
 	},
 	"forwardPorts": [
-		5000 // test-client-web port
+		5000, // test-client-web port
+		3000 // grafana UI port
 	],
 	// Use 'postCreateCommand' to run commands after the container is created.
 	"postCreateCommand": "bash -c .devcontainer/post-create.sh",
@@ -37,4 +38,5 @@
 
 	// Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
 	// "remoteUser": "root"
+	"mounts": []
 }
diff --git a/.gitignore b/.gitignore
index 92ee90d..a9d5e1a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,7 +8,7 @@ __pycache__/
 
 # Distribution / packaging
 .Python
-build/
+*/build/
 develop-eggs/
 dist/
 downloads/
diff --git a/Makefile b/Makefile
index ec08a05..ae2c850 100644
--- a/Makefile
+++ b/Makefile
@@ -95,3 +95,6 @@ docker-build-load-test: ## Build the AOAI Simulated API Load Test as a docker im
 erase-recording: ## Erase all *.recording files
 	rm -rf "${makefile_dir}.recording"
 
+start-telemetry:
+	-docker-compose -f build/telemetry-docker-compose.yaml down
+	docker-compose -f ./build/telemetry-docker-compose.yaml up
\ No newline at end of file
diff --git a/README.md b/README.md
index b4a36a7..fbb804b 100644
--- a/README.md
+++ b/README.md
@@ -17,6 +17,7 @@ WARNING: This is a work in progress!
   - [How to Get Started with the Azure OpenAI API Simulator](#how-to-get-started-with-the-azure-openai-api-simulator)
     - [Running and Deploying the Azure OpenAI API Simulator](#running-and-deploying-the-azure-openai-api-simulator)
     - [Configuring the Azure OpenAI API Simulator](#configuring-the-azure-openai-api-simulator)
+    - [Monitoring the Azure OpenAI API Simulator](#monitoring-the-azure-openai-api-simulator)
     - [Extending the Azure OpenAI API Simulator](#extending-the-azure-openai-api-simulator)
     - [Contributing to the Azure OpenAI API Simulator](#contributing-to-the-azure-openai-api-simulator)
   - [Changelog](#changelog)
@@ -91,6 +92,12 @@ The document [Running and Deploying the Azure OpenAI API Simulator](./docs/runni
 
 The behaviour of the Azure OpenAI API Simulator is controlled via a range of [Azure OpenAI API Simulator Configuration Options](./docs/config.md).
 
+### Monitoring the Azure OpenAI API Simulator
+
+The Azure OpenAI API Simulator is instrumented using OpenTelemetry and supports exporting telemetry to Azure Monitor or an OTLP endpoint.
+
+See the [telemetry documentation](./docs/telemetry.md) on how to configure the application to export telemetry and the types of metrics captured.
+
 ### Extending the Azure OpenAI API Simulator
 
 There are also a number of [Azure OpenAI API Simulator Extension points](./docs/extending.md) that allow you to customise the behaviour of the Azure OpenAI API Simulator. Extensions can be used to modify the request/response, add latency, or even generate responses.
diff --git a/build/telemetry-docker-compose.yaml b/build/telemetry-docker-compose.yaml
new file mode 100644
index 0000000..0649f7c
--- /dev/null
+++ b/build/telemetry-docker-compose.yaml
@@ -0,0 +1,8 @@
+services:
+  grafana-all-in-one: # https://grafana.com/blog/2024/03/13/an-opentelemetry-backend-in-a-docker-image-introducing-grafana/otel-lgtm/
+    image: grafana/otel-lgtm
+    container_name: otel-lgtm
+    ports:
+      - "3000:3000"   # Grafana Web UI
+      - "4317:4317"   # OTLP gRPC receiver
+      - "4318:4318"   # OTLP http receiver
diff --git a/docs/config.md b/docs/config.md
index bd812d7..430d3e0 100644
--- a/docs/config.md
+++ b/docs/config.md
@@ -116,6 +116,8 @@ The simulator supports a set of basic Open Telemetry configuration options. Thes
 | ----------------------------- | ----------------------------------------------------------------------------------------------- |
 | `OTEL_SERVICE_NAME`           | Sets the value of the service name reported to Open Telemetry. Defaults to `aoai-api-simulator` |
 | `OTEL_METRIC_EXPORT_INTERVAL` | The time interval (in milliseconds) between the start of two export attempts..                  |
+| `APPLICATIONINSIGHTS_CONNECTION_STRING` | Sets up the app insights connection string for telemetry |
+| `OTEL_EXPORTER_OTLP_ENDPOINT` | Sets up the OpenTelemetry OTLP exporter endpoint. This can be further customised using environment variables described [here](https://opentelemetry.io/docs/specs/otel/protocol/exporter/). i.e. `OTEL_EXPORTER_OTLP_TRACES_ENDPOINT`, `OTEL_EXPORTER_OTLP_METRICS_ENDPOINT` or `OTEL_EXPORTER_OTLP_LOGS_ENDPOINT`  |
 
 ## Config API Endpoint
 
diff --git a/docs/metrics.md b/docs/telemetry.md
similarity index 53%
rename from docs/metrics.md
rename to docs/telemetry.md
index a4c0e38..5be1f3c 100644
--- a/docs/metrics.md
+++ b/docs/telemetry.md
@@ -1,16 +1,24 @@
-# Azure OpenAI API Simulator Metrics
+# Azure OpenAI API Simulator Telemetry
+
+This solution is instrumented using OpenTelemetry. The [Azure OpenTelemetry distribution](https://learn.microsoft.com/en-us/python/api/overview/azure/monitor-opentelemetry-readme?view=azure-python) library is used to instrument and export telemetry to an Azure Monitor instance defined in the `APPLICATIONINSIGHTS_CONNECTION_STRING` environment variable.
+
+In addition to this, the solution also supports exporting to an [OTLP receiver](https://github.com/open-telemetry/opentelemetry-collector/blob/main/receiver/otlpreceiver/README.md) (i.e. OpenTelemetry Collector) using the `OTEL_EXPORTER_OTLP_ENDPOINT` environment variable.
+
+## Metrics
 
 To help you understand how the API Simulator is performing, we provide a number of metrics that you can use to monitor the simulator.
 
-- [Azure OpenAI API Simulator Metrics](#azure-openai-api-simulator-metrics)
-  - [aoai-api-simulator.latency.base](#aoai-api-simulatorlatencybase)
-  - [aoai-api-simulator.latency.full](#aoai-api-simulatorlatencyfull)
-  - [aoai-api-simulator.tokens.used](#aoai-api-simulatortokensused)
-  - [aoai-api-simulator.tokens.requested](#aoai-api-simulatortokensrequested)
-  - [aoai-api-simulator.tokens.rate-limit](#aoai-api-simulatortokensrate-limit)
-  - [aoai-api-simulator.limits](#aoai-api-simulatorlimits)
+- [Azure OpenAI API Simulator Telemetry](#azure-openai-api-simulator-telemetry)
+  - [Metrics](#metrics)
+    - [aoai-api-simulator.latency.base](#aoai-api-simulatorlatencybase)
+    - [aoai-api-simulator.latency.full](#aoai-api-simulatorlatencyfull)
+    - [aoai-api-simulator.tokens.used](#aoai-api-simulatortokensused)
+    - [aoai-api-simulator.tokens.requested](#aoai-api-simulatortokensrequested)
+    - [aoai-api-simulator.tokens.rate-limit](#aoai-api-simulatortokensrate-limit)
+    - [aoai-api-simulator.limits](#aoai-api-simulatorlimits)
+  - [Running Locally](#running-locally)
 
-## aoai-api-simulator.latency.base
+### aoai-api-simulator.latency.base
 
 Units: `seconds`
 
@@ -21,7 +29,7 @@ Dimensions:
 - `deployment`: The name of the deployment the metric relates to.
 - `status_code`: The HTTP status code of the response.
 
-## aoai-api-simulator.latency.full
+### aoai-api-simulator.latency.full
 
 Units: `seconds`
 
@@ -32,7 +40,7 @@ Dimensions:
 - `deployment`: The name of the deployment the metric relates to.
 - `status_code`: The HTTP status code of the response.
 
-## aoai-api-simulator.tokens.used
+### aoai-api-simulator.tokens.used
 
 Units: `tokens`
 
@@ -43,7 +51,7 @@ Dimensions:
 - `deployment`: The name of the deployment the metric relates to.
 - `token_type`: The type of token, e.g. `prompt` or `completion`.
 
-## aoai-api-simulator.tokens.requested
+### aoai-api-simulator.tokens.requested
 
 Units: `tokens`
 
@@ -54,7 +62,7 @@ Dimensions:
 - `deployment`: The name of the deployment the metric relates to.
 - `token_type`: The type of token, e.g. `prompt` or `completion`.
 
-## aoai-api-simulator.tokens.rate-limit
+### aoai-api-simulator.tokens.rate-limit
 
 Units: `tokens`
 
@@ -64,7 +72,7 @@ Dimensions:
 
 - `deployment`: The name of the deployment the metric relates to.
 
-## aoai-api-simulator.limits
+### aoai-api-simulator.limits
 
 Units: `requests`
 
@@ -74,3 +82,9 @@ Dimensions:
 
 - `deployment`: The name of the deployment the metric relates to.
 - `limit_type`: The type of limit that was hit, e.g. `requests` or `tokens`.
+
+## Running Locally
+
+The `make start-telemetry` command starts the `grafana/otel-lgtm` container. This is an [all-in-one container](https://grafana.com/blog/2024/03/13/an-opentelemetry-backend-in-a-docker-image-introducing-grafana/otel-lgtm/) to capture traces, metrics and logs.
+
+It exposes `grafana` UI on port `3000`.
diff --git a/infra/bicep/main.bicep b/infra/bicep/main.bicep
index 537c8d1..dfab58f 100644
--- a/infra/bicep/main.bicep
+++ b/infra/bicep/main.bicep
@@ -170,7 +170,7 @@ resource azureOpenAIKeySecret 'Microsoft.KeyVault/vaults/secrets@2023-07-01' = {
 }
 resource appInsightsConnectionStringSecret 'Microsoft.KeyVault/vaults/secrets@2023-07-01' = {
   parent: vault
-  name: 'app-insights-connection-string'
+  name: 'applicationinsights-connection-string'
   properties: {
     value: appInsights.properties.ConnectionString
   }
@@ -208,8 +208,8 @@ resource apiSim 'Microsoft.App/containerApps@2023-05-01' = {
           identity: managedIdentity.id
         }
         {
-          name: 'app-insights-connection-string'
-          keyVaultUrl: '${keyVaultUri}secrets/app-insights-connection-string'
+          name: 'applicationinsights-connection-string'
+          keyVaultUrl: '${keyVaultUri}secrets/applicationinsights-connection-string'
           identity: managedIdentity.id
         }
         {
@@ -243,7 +243,7 @@ resource apiSim 'Microsoft.App/containerApps@2023-05-01' = {
             { name: 'AZURE_OPENAI_KEY', secretRef: 'azure-openai-key' }
             { name: 'OPENAI_DEPLOYMENT_CONFIG_PATH', value: '/mnt/deployment-config/simulator_deployment_config.json' }
             { name: 'LOG_LEVEL', value: logLevel }
-            { name: 'APPLICATIONINSIGHTS_CONNECTION_STRING', secretRef: 'app-insights-connection-string' }
+            { name: 'APPLICATIONINSIGHTS_CONNECTION_STRING', secretRef: 'applicationinsights-connection-string' }
             // Ensure cloudRoleName is set in telemetry
             // https://opentelemetry-python.readthedocs.io/en/latest/sdk/environment_variables.html#opentelemetry.sdk.environment_variables.OTEL_SERVICE_NAME
             { name: 'OTEL_SERVICE_NAME', value: apiSimulatorName }
diff --git a/loadtest/common/config.py b/loadtest/common/config.py
index fa765bf..5973add 100644
--- a/loadtest/common/config.py
+++ b/loadtest/common/config.py
@@ -1,7 +1,8 @@
 import os
 
 api_key = os.getenv("API_KEY", os.getenv("SIMULATOR_API_KEY"))
-app_insights_connection_string = os.getenv("APP_INSIGHTS_CONNECTION_STRING")
+opentelemetry_exporter_otlp_endpoint = os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT")
+applicationinsights_connection_string = os.getenv("APPLICATIONINSIGHTS_CONNECTION_STRING")
 log_analytics_workspace_id = os.getenv("LOG_ANALYTICS_WORKSPACE_ID")
 log_analytics_workspace_name = os.getenv("LOG_ANALYTICS_WORKSPACE_NAME")
 tenant_id = os.getenv("TENANT_ID")
diff --git a/loadtest/common/locust_app_insights.py b/loadtest/common/locust_app_insights.py
index fee407d..2572912 100644
--- a/loadtest/common/locust_app_insights.py
+++ b/loadtest/common/locust_app_insights.py
@@ -1,18 +1,18 @@
 import logging
-from opentelemetry import metrics
+
 from azure.monitor.opentelemetry import configure_azure_monitor
+from opentelemetry import metrics
 
 from .config import (
-    app_insights_connection_string,
+    applicationinsights_connection_string,
 )
 
-
 histogram_request_latency: metrics.Histogram
 
-if app_insights_connection_string:
+if applicationinsights_connection_string:
     # Options: https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/monitor/azure-monitor-opentelemetry#usage
     logging.getLogger("azure").setLevel(logging.WARNING)
-    configure_azure_monitor(connection_string=app_insights_connection_string)
+    configure_azure_monitor(connection_string=applicationinsights_connection_string)
     histogram_request_latency = metrics.get_meter(__name__).create_histogram(
         "locust.request_latency", "Request latency", "s"
     )
diff --git a/loadtest/loadtest_chat_completions_1s_latency.py b/loadtest/loadtest_chat_completions_1s_latency.py
index 6fd6992..fbadfa2 100644
--- a/loadtest/loadtest_chat_completions_1s_latency.py
+++ b/loadtest/loadtest_chat_completions_1s_latency.py
@@ -2,7 +2,7 @@
 import os
 
 import requests
-from common.config import api_key, app_insights_connection_string
+from common.config import api_key, applicationinsights_connection_string
 from common.latency import set_simulator_chat_completions_latency
 from common.locust_app_insights import (
     report_request_metric,
@@ -22,7 +22,7 @@ def on_locust_init(environment: Environment, **_):
     """
     Configure test
     """
-    if app_insights_connection_string:
+    if applicationinsights_connection_string:
         logging.info("App Insights connection string found - enabling request metrics")
         environment.events.request.add_listener(report_request_metric)
     else:
diff --git a/loadtest/loadtest_chat_completions_no_added_latency.py b/loadtest/loadtest_chat_completions_no_added_latency.py
index 5adb480..bf92194 100644
--- a/loadtest/loadtest_chat_completions_no_added_latency.py
+++ b/loadtest/loadtest_chat_completions_no_added_latency.py
@@ -2,7 +2,7 @@
 import os
 
 import requests
-from common.config import api_key, app_insights_connection_string
+from common.config import api_key, applicationinsights_connection_string
 from common.latency import set_simulator_chat_completions_latency
 from common.locust_app_insights import (
     report_request_metric,
@@ -26,7 +26,7 @@ def on_locust_init(environment: Environment, **_):
     """
     Configure test
     """
-    if app_insights_connection_string:
+    if applicationinsights_connection_string:
         logging.info("App Insights connection string found - enabling request metrics")
         environment.events.request.add_listener(report_request_metric)
     else:
diff --git a/sample.env b/sample.env
index e40a6d4..c6ca92c 100644
--- a/sample.env
+++ b/sample.env
@@ -29,6 +29,9 @@ AZURE_FORM_RECOGNIZER_KEY=
 #  Open Telemetry Config (used within the simulator)
 OTEL_SERVICE_NAME=aoai-api-simulator-local-dev
 OTEL_METRIC_EXPORT_INTERVAL=10000
+# OTEL_EXPORTER_OTLP_ENDPOINT=http://host.docker.internal:4317 ## if running in docker outside of docker
+OTEL_EXPORTER_OTLP_ENDPOINT=
+APPLICATIONINSIGHTS_CONNECTION_STRING=
 
 
 # Test Client Config (used to direct the tests and test clients)
diff --git a/scripts/_run-load-test-aca.sh b/scripts/_run-load-test-aca.sh
index c13c67f..058f529 100755
--- a/scripts/_run-load-test-aca.sh
+++ b/scripts/_run-load-test-aca.sh
@@ -86,8 +86,8 @@ if [[ -z "${key_vault_name}" ]]; then
 	echo "Key Vault Name not found in output.json"
 	exit 1
 fi
-app_insights_connection_string=$(az keyvault secret show --vault-name "$key_vault_name" --name app-insights-connection-string --query value --output tsv)
-if [[ -z "${app_insights_connection_string}" ]]; then
+applicationinsights_connection_string=$(az keyvault secret show --vault-name "$key_vault_name" --name applicationinsights-connection-string --query value --output tsv)
+if [[ -z "${applicationinsights_connection_string}" ]]; then
 	echo "App Insights Connection String not found in Key Vault"
 	exit 1
 fi
@@ -133,7 +133,7 @@ az containerapp job create \
   --cpu "1" \
   --memory "2Gi" \
   --command "locust" \
-  --env-vars "LOCUST_LOCUSTFILE=$TEST_FILE" "LOCUST_HOST=https://${api_fqdn}/" "LOCUST_USERS=$LOCUST_USERS" "LOCUST_SPAWN_RATE=$LOCUST_SPAWN_RATE" "LOCUST_AUTOSTART=true" "LOCUST_RUN_TIME=$LOCUST_RUN_TIME" "LOCUST_AUTOQUIT=10" "SIMULATOR_API_KEY=${SIMULATOR_API_KEY}" "APP_INSIGHTS_CONNECTION_STRING=${app_insights_connection_string}" "MAX_TOKENS=${MAX_TOKENS}" "DEPLOYMENT_NAME=${DEPLOYMENT_NAME}" ALLOW_429_RESPONSES=${ALLOW_429_RESPONSES} 1>&2
+  --env-vars "LOCUST_LOCUSTFILE=$TEST_FILE" "LOCUST_HOST=https://${api_fqdn}/" "LOCUST_USERS=$LOCUST_USERS" "LOCUST_SPAWN_RATE=$LOCUST_SPAWN_RATE" "LOCUST_AUTOSTART=true" "LOCUST_RUN_TIME=$LOCUST_RUN_TIME" "LOCUST_AUTOQUIT=10" "SIMULATOR_API_KEY=${SIMULATOR_API_KEY}" "APP_INSIGHTS_CONNECTION_STRING=${applicationinsights_connection_string}" "MAX_TOKENS=${MAX_TOKENS}" "DEPLOYMENT_NAME=${DEPLOYMENT_NAME}" ALLOW_429_RESPONSES=${ALLOW_429_RESPONSES} 1>&2
 
 
 start_time=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
diff --git a/src/aoai-api-simulator/requirements.txt b/src/aoai-api-simulator/requirements.txt
index 5ca2113..9e4288b 100644
--- a/src/aoai-api-simulator/requirements.txt
+++ b/src/aoai-api-simulator/requirements.txt
@@ -2,10 +2,14 @@ fastapi==0.109.2
 uvicorn[standard]==0.27.0.post1
 gunicorn==22.0.0
 requests==2.32.0
+opentelemetry-instrumentation-requests==0.48b0
 PyYAML==6.0.1
 tiktoken==0.6.0
 nanoid==2.0.0
 limits==3.8.0
+opentelemetry-api==1.27.0
+opentelemetry-sdk==1.27.0
+opentelemetry-exporter-otlp==1.27.0
 azure-monitor-opentelemetry==1.3.0
 pydantic-settings==2.2.1
 python-multipart==0.0.9
diff --git a/src/aoai-api-simulator/src/aoai_api_simulator/config_loader.py b/src/aoai-api-simulator/src/aoai_api_simulator/config_loader.py
index 10d9284..fea3194 100644
--- a/src/aoai-api-simulator/src/aoai_api_simulator/config_loader.py
+++ b/src/aoai-api-simulator/src/aoai_api_simulator/config_loader.py
@@ -11,10 +11,11 @@
 from aoai_api_simulator.record_replay.handler import get_default_forwarders
 
 
-def get_config_from_env_vars(logger: logging.Logger) -> Config:
+def get_config_from_env_vars() -> Config:
     """
     Load configuration from environment variables
     """
+    logger = logging.getLogger()
     config = Config(generators=get_default_generators())
     config.recording.forwarders = get_default_forwarders()
     config.openai_deployments = _load_openai_deployments(logger)
diff --git a/src/aoai-api-simulator/src/aoai_api_simulator/latency.py b/src/aoai-api-simulator/src/aoai_api_simulator/latency.py
index c93360a..988f8c2 100644
--- a/src/aoai-api-simulator/src/aoai_api_simulator/latency.py
+++ b/src/aoai-api-simulator/src/aoai_api_simulator/latency.py
@@ -2,7 +2,7 @@
 import time
 
 from aoai_api_simulator import constants
-from aoai_api_simulator.metrics import simulator_metrics
+from aoai_api_simulator.telemetry import simulator_metrics
 from aoai_api_simulator.models import RequestContext
 from fastapi import Response
 
diff --git a/src/aoai-api-simulator/src/aoai_api_simulator/limiters.py b/src/aoai-api-simulator/src/aoai_api_simulator/limiters.py
index b084757..f508772 100644
--- a/src/aoai-api-simulator/src/aoai_api_simulator/limiters.py
+++ b/src/aoai-api-simulator/src/aoai_api_simulator/limiters.py
@@ -7,12 +7,12 @@
 from typing import Awaitable, Callable
 
 from aoai_api_simulator import constants
-from aoai_api_simulator.metrics import simulator_metrics
 from aoai_api_simulator.models import (
     Config,
     OpenAIDeployment,
     RequestContext,
 )
+from aoai_api_simulator.telemetry import simulator_metrics
 from fastapi import Response
 
 logger = logging.getLogger(__name__)
diff --git a/src/aoai-api-simulator/src/aoai_api_simulator/main.py b/src/aoai-api-simulator/src/aoai_api_simulator/main.py
index 05a8e75..ef527a0 100644
--- a/src/aoai-api-simulator/src/aoai_api_simulator/main.py
+++ b/src/aoai-api-simulator/src/aoai_api_simulator/main.py
@@ -1,33 +1,15 @@
-import logging
-import os
-
-from azure.monitor.opentelemetry import configure_azure_monitor
+from aoai_api_simulator.app_builder import app as builder_app
+from aoai_api_simulator.app_builder import apply_config
 
 # from opentelemetry import trace
-
 from aoai_api_simulator.config_loader import get_config_from_env_vars, set_config
-from aoai_api_simulator.app_builder import app as builder_app, apply_config
-
-log_level = os.getenv("LOG_LEVEL") or "INFO"
-
-logger = logging.getLogger(__name__)
-logging.basicConfig(level=log_level)
-logging.getLogger("azure").setLevel(logging.WARNING)
-
-application_insights_connection_string = os.getenv("APPLICATIONINSIGHTS_CONNECTION_STRING")
-if application_insights_connection_string:
-    logger.info("🚀 Configuring Azure Monitor telemetry")
+from aoai_api_simulator.telemetry import setup_auto_instrumentation, setup_telemetry
 
-    # Options: https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/monitor/azure-monitor-opentelemetry#usage
-    configure_azure_monitor(connection_string=application_insights_connection_string)
-else:
-    logger.info("🚀 Azure Monitor telemetry not configured (set APPLICATIONINSIGHTS_CONNECTION_STRING)")
+using_azure_monitor: bool = setup_telemetry()
 
-# tracer = trace.get_tracer(__name__)
-
-config = get_config_from_env_vars(logger)
+config = get_config_from_env_vars()
 set_config(config)
-
-
 apply_config()
+
 app = builder_app  # expose to gunicorn
+setup_auto_instrumentation(app, using_azure_monitor)
diff --git a/src/aoai-api-simulator/src/aoai_api_simulator/metrics.py b/src/aoai-api-simulator/src/aoai_api_simulator/metrics.py
deleted file mode 100644
index a010db2..0000000
--- a/src/aoai-api-simulator/src/aoai_api_simulator/metrics.py
+++ /dev/null
@@ -1,57 +0,0 @@
-from dataclasses import dataclass
-from opentelemetry import metrics
-
-
-@dataclass
-class SimulatorMetrics:
-    histogram_latency_base: metrics.Histogram
-    histogram_latency_full: metrics.Histogram
-    histogram_tokens_used: metrics.Histogram
-    histogram_tokens_requested: metrics.Histogram
-    histogram_tokens_rate_limit: metrics.Histogram
-    histogram_rate_limit: metrics.Histogram
-
-
-def _get_simulator_metrics() -> SimulatorMetrics:
-    meter = metrics.get_meter(__name__)
-    return SimulatorMetrics(
-        # dimensions: deployment, status_code
-        histogram_latency_base=meter.create_histogram(
-            name="aoai-api-simulator.latency.base",
-            description="Latency of handling the request (before adding simulated latency)",
-            unit="seconds",
-        ),
-        # dimensions: deployment, status_code
-        histogram_latency_full=meter.create_histogram(
-            name="aoai-api-simulator.latency.full",
-            description="Full latency of handling the request (including simulated latency)",
-            unit="seconds",
-        ),
-        # dimensions: deployment, token_type
-        histogram_tokens_used=meter.create_histogram(
-            name="aoai-api-simulator.tokens.used",
-            description="Number of tokens used per request",
-            unit="tokens",
-        ),
-        # dimensions: deployment, token_type
-        histogram_tokens_requested=meter.create_histogram(
-            name="aoai-api-simulator.tokens.requested",
-            description="Number of tokens across all requests (success or not)",
-            unit="tokens",
-        ),
-        # dimensions: deployment
-        histogram_tokens_rate_limit=meter.create_histogram(
-            name="aoai-api-simulator.tokens.rate-limit",
-            description="Number of tokens that were counted for rate-limiting",
-            unit="tokens",
-        ),
-        # dimensions: deployment, reason
-        histogram_rate_limit=meter.create_histogram(
-            name="aoai-api-simulator.limits",
-            description="Number of requests that were rate-limited",
-            unit="requests",
-        ),
-    )
-
-
-simulator_metrics = _get_simulator_metrics()
diff --git a/src/aoai-api-simulator/src/aoai_api_simulator/telemetry.py b/src/aoai-api-simulator/src/aoai_api_simulator/telemetry.py
new file mode 100644
index 0000000..287e9ee
--- /dev/null
+++ b/src/aoai-api-simulator/src/aoai_api_simulator/telemetry.py
@@ -0,0 +1,148 @@
+import logging
+import os
+from dataclasses import dataclass
+
+# from opentelemetry import trace
+from azure.monitor.opentelemetry import configure_azure_monitor
+from fastapi import FastAPI
+from opentelemetry import metrics, trace
+from opentelemetry.exporter.otlp.proto.grpc._log_exporter import OTLPLogExporter
+from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter
+from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
+from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
+from opentelemetry.instrumentation.requests import RequestsInstrumentor
+from opentelemetry.sdk._logs import LoggerProvider, LoggingHandler
+from opentelemetry.sdk._logs.export import BatchLogRecordProcessor
+from opentelemetry.sdk.metrics import MeterProvider
+from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
+from opentelemetry.sdk.resources import Resource
+from opentelemetry.sdk.trace import TracerProvider
+from opentelemetry.sdk.trace.export import BatchSpanProcessor
+
+log_level = os.getenv("LOG_LEVEL") or "INFO"
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(level=log_level)
+logging.getLogger("azure").setLevel(logging.WARNING)
+
+opentelemetry_exporter_otlp_endpoint = os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT")
+application_insights_connection_string = os.getenv("APPLICATIONINSIGHTS_CONNECTION_STRING")
+
+
+@dataclass
+class SimulatorMetrics:
+    histogram_latency_base: metrics.Histogram
+    histogram_latency_full: metrics.Histogram
+    histogram_tokens_used: metrics.Histogram
+    histogram_tokens_requested: metrics.Histogram
+    histogram_tokens_rate_limit: metrics.Histogram
+    histogram_rate_limit: metrics.Histogram
+
+
+def _get_simulator_metrics() -> SimulatorMetrics:
+    meter = metrics.get_meter(__name__)
+    return SimulatorMetrics(
+        # dimensions: deployment, status_code
+        histogram_latency_base=meter.create_histogram(
+            name="aoai-api-simulator.latency.base",
+            description="Latency of handling the request (before adding simulated latency)",
+            unit="seconds",
+        ),
+        # dimensions: deployment, status_code
+        histogram_latency_full=meter.create_histogram(
+            name="aoai-api-simulator.latency.full",
+            description="Full latency of handling the request (including simulated latency)",
+            unit="seconds",
+        ),
+        # dimensions: deployment, token_type
+        histogram_tokens_used=meter.create_histogram(
+            name="aoai-api-simulator.tokens.used",
+            description="Number of tokens used per request",
+            unit="tokens",
+        ),
+        # dimensions: deployment, token_type
+        histogram_tokens_requested=meter.create_histogram(
+            name="aoai-api-simulator.tokens.requested",
+            description="Number of tokens across all requests (success or not)",
+            unit="tokens",
+        ),
+        # dimensions: deployment
+        histogram_tokens_rate_limit=meter.create_histogram(
+            name="aoai-api-simulator.tokens.rate-limit",
+            description="Number of tokens that were counted for rate-limiting",
+            unit="tokens",
+        ),
+        # dimensions: deployment, reason
+        histogram_rate_limit=meter.create_histogram(
+            name="aoai-api-simulator.limits",
+            description="Number of requests that were rate-limited",
+            unit="requests",
+        ),
+    )
+
+
+simulator_metrics = _get_simulator_metrics()
+
+
+def setup_telemetry() -> bool:
+    using_azure_monitor: bool
+
+    if application_insights_connection_string:
+        logger.info("🚀 Configuring Azure Monitor telemetry")
+
+        # Options: https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/monitor/azure-monitor-opentelemetry#usage
+        configure_azure_monitor(connection_string=application_insights_connection_string)
+        using_azure_monitor = True
+    else:
+        using_azure_monitor = False
+        logger.info("Azure Monitor telemetry not configured (set APPLICATIONINSIGHTS_CONNECTION_STRING)")
+
+    if opentelemetry_exporter_otlp_endpoint:
+        logger.info("🚀 Configuring OTLP telemetry")
+
+        # setup the instrumentors
+        resource = Resource(attributes={"service.name": os.getenv("OTEL_SERVICE_NAME", "aoai-api-simulator")})
+
+        # tracing
+        if not using_azure_monitor:
+            trace.set_tracer_provider(TracerProvider(resource=resource))
+
+        span_processor = BatchSpanProcessor(OTLPSpanExporter())
+        trace.get_tracer_provider().add_span_processor(span_processor)
+
+        # metrics
+        metric_reader = PeriodicExportingMetricReader(OTLPMetricExporter())
+
+        if not using_azure_monitor:
+            meter_provider = MeterProvider(resource=resource, metric_readers=[metric_reader])
+            metrics.set_meter_provider(meter_provider)
+        else:
+            meter_provider = metrics.get_meter_provider()
+            # meter_provider.add_metric_reader() is not implemented in python sdk yet.
+            # adding it manually
+            meter_provider._all_metric_readers.add(metric_reader)
+            metric_reader._set_collect_callback(meter_provider._measurement_consumer.collect)
+
+        # logging
+        logger_provider = LoggerProvider(
+            resource=resource,
+        )
+
+        batch_log_record_processor = BatchLogRecordProcessor(OTLPLogExporter())
+        logger_provider.add_log_record_processor(batch_log_record_processor)
+
+        handler = LoggingHandler(level=os.getenv("OTEL_LOG_LEVEL", "INFO"), logger_provider=logger_provider)
+        # Attach OTLP handler to root logger
+        logging.getLogger().addHandler(handler)
+    else:
+        logger.info("🚀 OTLP telemetry exporter not configured (set OTEL_EXPORTER_OTLP_ENDPOINT)")
+
+    return using_azure_monitor
+
+
+def setup_auto_instrumentation(app: FastAPI, using_azure_monitor: bool):
+    if not using_azure_monitor:
+        RequestsInstrumentor().instrument()
+        FastAPIInstrumentor.instrument_app(app)
+    else:
+        logger.info("Skipping instrumenting libraries as they are done by the Azure OTEL Distro already.")