From 555fbd4b779fb11a09e24c49575d7ac4ad674351 Mon Sep 17 00:00:00 2001 From: Ben Corlett Date: Mon, 16 Jun 2025 09:30:43 +0100 Subject: [PATCH] Add optional otel and jaeger components. --- Makefile | 20 ++++++++ docker-compose.yml | 89 ++++++++++++++++++++++++++++++++ otel_mapping.yml | 96 +++++++++++++++++++++++++++++++++++ otel_mapping_loadbalancer.yml | 30 +++++++++++ 4 files changed, 235 insertions(+) create mode 100644 otel_mapping.yml create mode 100644 otel_mapping_loadbalancer.yml diff --git a/Makefile b/Makefile index 602375d..0169348 100644 --- a/Makefile +++ b/Makefile @@ -7,6 +7,26 @@ beat: $(eval export DC_PROFILES=${DC_PROFILES} --profile beat) @true +.PHONY: otel-collector +otel-collector: + $(eval export DC_PROFILES=${DC_PROFILES} --profile otel-collector --profile jaeger) + $(eval export OTEL_EXPORT_TYPE=otlp) + $(eval export OTEL_COLLECTOR_ENDPOINT=otel-collector:4317) + @true + +.PHONY: otel-loadbalancer +otel-loadbalancer: + $(eval export DC_PROFILES=${DC_PROFILES} --profile otel-loadbalancer --profile otel-collector --profile jaeger) + $(eval export OTEL_EXPORT_TYPE=otlp) + $(eval export OTEL_COLLECTOR_ENDPOINT=otel-loadbalancer:4317) + $(eval export OTEL_COLLECTOR_REPLICAS=2) + @true + +.PHONY: otel-console +otel-console: + $(eval export OTEL_EXPORT_TYPE=console) + @true + .PHONY: antivirus antivirus: $(eval export DC_ANTIVIRUS=ANTIVIRUS_ENABLED=1) diff --git a/docker-compose.yml b/docker-compose.yml index 1ba4fa2..d27b26c 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -46,7 +46,10 @@ services: - private/notify-api.env environment: - ANTIVIRUS_ENABLED + - NOTIFY_APP_NAME=api-web - FLASK_RUN_EXTRA_FILES=/opt/venv/ + - OTEL_EXPORT_TYPE=${OTEL_EXPORT_TYPE:-none} + - OTEL_COLLECTOR_ENDPOINT=${OTEL_COLLECTOR_ENDPOINT:-none} depends_on: db: condition: service_healthy @@ -78,6 +81,7 @@ services: networks: db: + notify-api-celery: image: notifications-api container_name: notify-api-celery @@ -92,6 +96,9 @@ services: - ANTIVIRUS_ENABLED - MMG_URL - FIRETEXT_URL + - NOTIFY_APP_NAME=api-worker + - OTEL_EXPORT_TYPE=${OTEL_EXPORT_TYPE:-none} + - OTEL_COLLECTOR_ENDPOINT=${OTEL_COLLECTOR_ENDPOINT:-none} depends_on: db: condition: service_healthy @@ -116,7 +123,10 @@ services: - private/local-aws-creds.env - private/notify-api.env environment: + - NOTIFY_APP_NAME=api-celery-beat - ANTIVIRUS_ENABLED + - OTEL_EXPORT_TYPE=${OTEL_EXPORT_TYPE:-none} + - OTEL_COLLECTOR_ENDPOINT=${OTEL_COLLECTOR_ENDPOINT:-none} depends_on: db: condition: service_healthy @@ -128,6 +138,7 @@ services: db: apps: + notify-admin: image: notifications-admin container_name: notify-admin @@ -151,6 +162,9 @@ services: environment: - ANTIVIRUS_ENABLED - FLASK_RUN_EXTRA_FILES=/opt/venv/ + - NOTIFY_APP_NAME=admin + - OTEL_EXPORT_TYPE=${OTEL_EXPORT_TYPE:-none} + - OTEL_COLLECTOR_ENDPOINT=${OTEL_COLLECTOR_ENDPOINT:-none} depends_on: notify-api: condition: service_started @@ -182,6 +196,9 @@ services: - private/document-download-api.env environment: - ANTIVIRUS_ENABLED + - NOTIFY_APP_NAME=document-download-api + - OTEL_EXPORT_TYPE=${OTEL_EXPORT_TYPE:-none} + - OTEL_COLLECTOR_ENDPOINT=${OTEL_COLLECTOR_ENDPOINT:-none} networks: apps: aliases: @@ -205,6 +222,10 @@ services: env_file: - private/local-aws-creds.env - private/document-download-frontend.env + environment: + - NOTIFY_APP_NAME=document-download-frontend + - OTEL_EXPORT_TYPE=${OTEL_EXPORT_TYPE:-none} + - OTEL_COLLECTOR_ENDPOINT=${OTEL_COLLECTOR_ENDPOINT:-none} networks: apps: aliases: @@ -229,6 +250,9 @@ services: tty: true environment: - FLASK_RUN_EXTRA_FILES=/opt/venv/ + - NOTIFY_APP_NAME=template-preview-api + - OTEL_EXPORT_TYPE=${OTEL_EXPORT_TYPE:-none} + - OTEL_COLLECTOR_ENDPOINT=${OTEL_COLLECTOR_ENDPOINT:-none} env_file: - private/local-aws-creds.env - private/template-preview-api.env @@ -247,10 +271,15 @@ services: env_file: - private/local-aws-creds.env - private/template-preview-api.env + environment: + - NOTIFY_APP_NAME=template-preview-worker + - OTEL_EXPORT_TYPE=${OTEL_EXPORT_TYPE:-none} + - OTEL_COLLECTOR_ENDPOINT=${OTEL_COLLECTOR_ENDPOINT:-none} restart: always networks: apps: + antivirus-api: image: antivirus-api container_name: antivirus-api @@ -272,6 +301,9 @@ services: tty: true environment: - FLASK_RUN_EXTRA_FILES=/opt/venv/ + - NOTIFY_APP_NAME=antivirus-api + - OTEL_EXPORT_TYPE=${OTEL_EXPORT_TYPE:-none} + - OTEL_COLLECTOR_ENDPOINT=${OTEL_COLLECTOR_ENDPOINT:-none} env_file: - private/local-aws-creds.env - private/antivirus-api.env @@ -298,6 +330,9 @@ services: tty: true environment: - FLASK_RUN_EXTRA_FILES=/opt/venv/ + - NOTIFY_APP_NAME=antivirus-worker + - OTEL_EXPORT_TYPE=${OTEL_EXPORT_TYPE:-none} + - OTEL_COLLECTOR_ENDPOINT=${OTEL_COLLECTOR_ENDPOINT:-none} env_file: - private/local-aws-creds.env - private/antivirus-api.env @@ -305,6 +340,7 @@ services: networks: apps: + sms-provider-stub: image: sms-provider-stub container_name: sms-provider-stub @@ -325,6 +361,59 @@ services: aliases: - notify.localhost + otel-loadbalancer: + image: otel/opentelemetry-collector-contrib:latest + deploy: + mode: replicated + replicas: 2 + profiles: + - otel-loadbalancer + volumes: + - ./otel_mapping_loadbalancer.yml:/etc/otel/config.yaml + stdin_open: true + tty: true + command: [ "--config", "/etc/otel/config.yaml" ] + networks: + apps: + aliases: + - otel-loadbalancer.localhost + + otel-collector: + image: otel/opentelemetry-collector-contrib:latest + deploy: + mode: replicated + replicas: ${OTEL_COLLECTOR_REPLICAS:-1} + profiles: + - otel-collector + volumes: + - ./otel_mapping.yml:/etc/otel/config.yaml + ports: + - "127.0.0.1:9090-9091:9090" # Prometheus exporter + stdin_open: true + tty: true + command: [ "--config", "/etc/otel/config.yaml" ] + networks: + apps: + aliases: + - otel-collector.localhost + + jaeger: + image: jaegertracing/jaeger:latest + container_name: jaeger + profiles: + - jaeger + ports: + - "16686:16686" + - "4317:4317" + - "4318:4318" + - "5778:5778" + - "9411:9411" + restart: on-failure + networks: + apps: + aliases: + - jaeger.localhost + networks: db: redis: diff --git a/otel_mapping.yml b/otel_mapping.yml new file mode 100644 index 0000000..ee95eae --- /dev/null +++ b/otel_mapping.yml @@ -0,0 +1,96 @@ +receivers: + prometheus: + config: + scrape_configs: + - job_name: notifications-api + scrape_interval: 10s + static_configs: + - targets: ['notify-api.localhost:6011'] + labels: + service: notifications-api + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + +exporters: + prometheus: + endpoint: "0.0.0.0:9090" + otlp/jaeger: + endpoint: jaeger:4317 + tls: + insecure: true + # Adjust the endpoint as needed for your Prometheus server to scrape + +processors: + batch: + filter/simple: + error_mode: ignore + spans: + exclude: + match_type: strict + libraries: + - name: opentelemetry.instrumentation.wsgi + +connectors: + spanmetrics: + histogram: + explicit: + buckets: [100us, 1ms, 2ms, 6ms, 10ms, 100ms, 250ms, 1s, 10s, 25s] + unit: "s" + dimensions: + - name: http.method # flask + - name: http.status_code # flask + - name: net.peer.name # db/redis + - name: celery.task_name # celery + - name: messaging.destination # celery + namespace: "spans" + exemplars: + enabled: true + dimensions_cache_size: 1000 + aggregation_temporality: "AGGREGATION_TEMPORALITY_CUMULATIVE" + metrics_flush_interval: 15s + metrics_expiration: 5m + events: + enabled: true + dimensions: + - name: exception.type + - name: exception.message + resource_metrics_key_attributes: + - service.name + - telemetry.sdk.language + - telemetry.sdk.name + + signaltometrics: + spans: + - name: span_duration + description: Span duration as exponential histogram + unit: us + #attributes: # categorize by attributes + # - key: attribute.foo + exponential_histogram: + count: Int(AdjustedCount()) # Adjusted count here is calculated as a custom OTTL converter + value: Microseconds(end_time - start_time) + logs: + - description: Count of log records + name: logrecord_count + sum: + value: "1" + +service: + telemetry: + logs: + level: DEBUG + pipelines: + traces/metrics: + receivers: [otlp] + processors: [filter/simple, batch] + exporters: [spanmetrics] + #exporters: [signaltometrics] + traces/jaeger: + receivers: [otlp] + exporters: [otlp/jaeger] + metrics: + receivers: [otlp,spanmetrics] + #receivers: [signaltometrics] + exporters: [prometheus] diff --git a/otel_mapping_loadbalancer.yml b/otel_mapping_loadbalancer.yml new file mode 100644 index 0000000..1fb6f2f --- /dev/null +++ b/otel_mapping_loadbalancer.yml @@ -0,0 +1,30 @@ +receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + +processors: + +exporters: + loadbalancing: + routing_key: "traceID" + protocol: + otlp: + timeout: 2s + tls: + insecure: true + resolver: + dns: + hostname: otel-collector + port: "4317" + + +service: + pipelines: + traces: + receivers: + - otlp + processors: [] + exporters: + - loadbalancing \ No newline at end of file