diff --git a/UPGRADE_GUIDE.md b/UPGRADE_GUIDE.md new file mode 100644 index 0000000..b959021 --- /dev/null +++ b/UPGRADE_GUIDE.md @@ -0,0 +1,51 @@ +# Upgrading the IC Observability Stack + +This guide details the procedure for updating your existing deployment of **IC Observability Stack** to the latest version. Since the stack is provisioned using Docker, the upgrade process is generally safe, as all of the resources coming from this repository are codified and don't conflict with each other. + +**Disclaimer**: If you make your changes, either in Grafana using the UI or changes in the code there may be conflicts. More about resolving conflicts [here](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/addressing-merge-conflicts/resolving-a-merge-conflict-using-the-command-line). + +## Prerequsites + +Before starting the upgrade, ensure your system is ready: +1. *Clean Working Directory*: Ensure you have commited or stashed any local changes to the repository before pulling updates. To see if that is needed conslut the panel showing the [State of the directory](http://localhost:3000/d/robv9sv/observability-stack-github-status?orgId=1&from=now-6h&to=now&timezone=browser&refresh=10s&viewPanel=panel-6) or run the command: +```bash +git status +# If there are any changes you can pick to commit or stash them. +``` +2. Turn off the stack: +```bash +docker compose down +``` + +## Step 1: Pull the latest repository changes + +Navigate to the root directory (where this file is located) and fetch the latest code from the upstream branch: +```bash +git pull origin master +``` +This command will fetch and merge any new Dockerfiles, configuration files, or any other dependency updates int your local working copy. + +## Step 2: Update Local dependecies (setup) + +After every upgrade run the `setup.sh` to ensure that your environment is configured to work with the latest version of the stack properly. Navigate to the root directory (where this file is located) and run setup like the following: +```bash +./setup.sh +``` + +## Step 3: Spin the services back up + +Now that you have the newest version locally you can run the following command to bring the services back up: +```bash +# This will create the docker containers. +docker compose up -d +``` + +## Step 4: Verify the services + +After the services are back up you should: +* Check that victoria metrics [can scrape targets](http://localhost:9090/targets?search=) +* Check that you [can access grafana](http://localhost:3000) +* Check if there are any services that aren't running: +```bash +docker compose ps --format json | jq 'select(.State != "running") | {name: .Name, status: .Status, exitCode: .ExitCode}' +``` diff --git a/config/grafana/grafana.ini b/config/grafana/grafana.ini index 64a4103..25cd860 100644 --- a/config/grafana/grafana.ini +++ b/config/grafana/grafana.ini @@ -10,4 +10,5 @@ admin_password = password admin_email = admin@localhost secret_key = strongPassword@!!123 - +[dashboards] +default_home_dashboard_path = /etc/grafana/provisioning/dashboards/samples/observability_stack_status.json diff --git a/config/grafana/provisioning/dashboards/samples/observability_stack_status.json b/config/grafana/provisioning/dashboards/samples/observability_stack_status.json new file mode 100644 index 0000000..655c3ac --- /dev/null +++ b/config/grafana/provisioning/dashboards/samples/observability_stack_status.json @@ -0,0 +1,572 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "This dashboard shows the status of your observability stack and if there are any updates done that can be updated.", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 5, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 4, + "panels": [], + "title": "Installed and Remote versions", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "The version of observability stack that is currently running.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "left", + "cellOptions": { + "type": "color-text" + }, + "footer": { + "reducers": [] + }, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Time" + }, + "properties": [ + { + "id": "custom.width", + "value": 197 + } + ] + } + ] + }, + "gridPos": { + "h": 4, + "w": 11, + "x": 0, + "y": 1 + }, + "id": 1, + "options": { + "cellHeight": "lg", + "showHeader": true + }, + "pluginVersion": "12.2.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "git_installed_commit", + "format": "table", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Installed commit", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": false, + "Value": true, + "__name__": true + }, + "includeByName": {}, + "indexByName": {}, + "renameByName": {} + } + } + ], + "type": "table" + }, + { + "description": "This panel contains the explanation for the installed commit.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 13, + "w": 13, + "x": 11, + "y": 1 + }, + "id": 2, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "# Installed and Remote commit(s)\n\nThese panels show the **exact Git commit** (version) of the **IC Observability Stack**. \n\nThere are two versions:\n* **Installed commit** - version of the software that’s currently running.\n* **Remote commit** - most recent version of the software that you can get.\n\nEach commit represents a specific point in the codebase’s history — like a unique “fingerprint” for that version of the application.\n\nKnowing this value is important because it lets us:\n* **Identify which version is deployed** — helpful when debugging or confirming updates.\n* **Report issues accurately** — when you share the commit hash, developers can see the exact code you’re running.\n* **Track deployments** — so we can see when new code goes live and compare environments.\n\nIt is in your best intrest to make these two versions be **equal**.\n\nIf you need to report a problem or behavior difference, include this commit value so the engineering team can reproduce and investigate it precisely.\n", + "mode": "markdown" + }, + "pluginVersion": "12.2.1", + "title": "", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "This panel shows the most recent remote commit which the latest version of the ic observability stack", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "color-text" + }, + "footer": { + "reducers": [] + }, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Time" + }, + "properties": [ + { + "id": "custom.width", + "value": 195 + } + ] + } + ] + }, + "gridPos": { + "h": 4, + "w": 11, + "x": 0, + "y": 5 + }, + "id": 3, + "options": { + "cellHeight": "lg", + "showHeader": true + }, + "pluginVersion": "12.2.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "git_remote_commit", + "format": "table", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Remote commit", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Value": true, + "__name__": true + }, + "includeByName": {}, + "indexByName": {}, + "renameByName": {} + } + } + ], + "type": "table" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 14 + }, + "id": 5, + "panels": [], + "title": "Difference", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "This panel represents the local state of the git directory. It shows if something needs to be commited or the upgrade can be performed cleanly.", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "yellow", + "mode": "shades" + }, + "mappings": [ + { + "options": { + "Clean": { + "color": "green", + "index": 1 + }, + "Dirty": { + "color": "yellow", + "index": 0 + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": "" + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 8, + "x": 0, + "y": 15 + }, + "id": 6, + "options": { + "colorMode": "background", + "graphMode": "area", + "justifyMode": "center", + "orientation": "vertical", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^state$/", + "values": false + }, + "showPercentChange": false, + "textMode": "value", + "wideLayout": true + }, + "pluginVersion": "12.2.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "git_local_state", + "format": "table", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "State of the directory", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Value": true, + "__name__": true + }, + "includeByName": {}, + "indexByName": {}, + "orderByMode": "manual", + "renameByName": {} + } + } + ], + "type": "stat" + }, + { + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 16, + "x": 8, + "y": 15 + }, + "id": 9, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "# State and difference\n\nThese set of graphs show you the difference between your \nlocal copy of the **IC Observability Stack** and the newest version.\n\n* **Commits ahead** - shows the number of versions *your* local copy \n is in front of the newest one.\n* **Commits behind** - shows the number of versions *your* local copy\n is behind of the newest one.\n* **State** - shows whether there are some changes that need to be \n saved (commited), before the upgrade can happen.\n\nIt is in your best interest to keep the **commits behind** as low as \npossible. To do that, read more in the `UPGRADE_GUIDE.md`.\n\nSometimes, it may not be possible to easily perform the upgrade\ndue to your local state (if it is `Dirty`) or if you have some\ncommits (**commits ahead**) which are not compatible with the changes\ncoming from remote.", + "mode": "markdown" + }, + "pluginVersion": "12.2.1", + "title": "", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "If there are any commits ahead there may be issues with updating to the newest version as there may be conflicts with your changes and the ones coming in.", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "blue", + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "green", + "value": 0 + }, + { + "color": "blue", + "value": 1 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 20 + }, + "id": 7, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "value", + "wideLayout": true + }, + "pluginVersion": "12.2.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "last_over_time(git_commits_ahead[180d])", + "format": "table", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Commits ahead", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "How many changes are there to be fetched. It is in your best interest to keep this number as low as possible.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "green", + "value": 0 + }, + { + "color": "#EAB839", + "value": 5 + }, + { + "color": "red", + "value": 10 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 20 + }, + "id": 8, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "value", + "wideLayout": true + }, + "pluginVersion": "12.2.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "last_over_time(git_commits_behind[180d])", + "format": "table", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Commits behind", + "type": "stat" + } + ], + "preload": false, + "refresh": "10s", + "schemaVersion": 42, + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Observability stack github status", + "uid": "3", + "version": 20 +} diff --git a/docker-compose.yaml b/docker-compose.yaml index e6cf3b3..0719427 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -59,3 +59,18 @@ services: user: "${UID}:${GID}" depends_on: - victoriametrics + + obs-github-ingester: + build: + context: . + dockerfile: ./tools/python.Dockerfile + network_mode: host + environment: + VICTORIA_METRICS_URL: http://localhost:9090 + volumes: + - ./:/work_dir + working_dir: /work_dir + user: "${UID}:${GID}" + command: ./tools/obs-stack-github-ingester/obs_stack_github_ingester.py + depends_on: + - victoriametrics diff --git a/tools/obs-stack-github-ingester/obs_stack_github_ingester.py b/tools/obs-stack-github-ingester/obs_stack_github_ingester.py new file mode 100644 index 0000000..8d1bcee --- /dev/null +++ b/tools/obs-stack-github-ingester/obs_stack_github_ingester.py @@ -0,0 +1,206 @@ +import logging +import os +import subprocess +import time +from urllib.parse import urljoin + +import requests + +logging.basicConfig( + level=logging.DEBUG, + format="%(asctime)s - %(levelname)s - %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", +) + +REMOTE_URL = "https://github.com/dfinity/ic-observability-stack.git" +VICTORIA_METRICS_URL = os.getenv("VICTORIA_METRICS_URL", "http://localhost:9090") + + +def wait_for_victoria_metrics(victoria_url): + """Wait for VictoriaMetrics to be ready""" + + logging.info("Waiting for VictoriaMetrics to be ready...") + + while True: + try: + response = requests.get(f"{victoria_url}/-/ready", timeout=5) + if response.status_code == 200: + logging.info("✅ VictoriaMetrics is ready") + return + except requests.exceptions.RequestException: + continue + + logging.info(f" Waiting for VictoriaMetrics at {victoria_url}...") + time.sleep(2) + + +def _run_git_command(args): + """ + Run a git command and return its stripped output. + """ + try: + result = subprocess.run( + ["git"] + args, capture_output=True, text=True, check=True + ) + return result.stdout.strip() + except subprocess.CalledProcessError as e: + logging.error("Git command failed: %s", e.stderr.strip()) + return None + + +def get_current_commit(): + """ + Get the current commit hash. + """ + commit = _run_git_command(["rev-parse", "HEAD"]) + if commit: + logging.debug("Installed commit: %s", commit) + return commit + + +def get_local_state(): + """ + Clean or Dirty. + + If there are any uncommited changes. + """ + status = _run_git_command(["status", "--porcelain"]) + state = "Clean" if not status else "Dirty" + logging.debug("Local repository state: %s", state) + + return state + + +def get_remote_commit_hash(): + """ + Get the latest commit from remote origin. + """ + output = _run_git_command(["ls-remote", REMOTE_URL, "refs/heads/master"]) + if not output: + return "Unknown" + + commit = output.split()[0] + logging.debug("Remote head commit: %s", commit) + + return commit + + +def get_commits_difference(local_commit, remote_commit): + """ + Get the difference from installed and remote commit. + + Note: it is possible that the current branch has + a commit which isn't present on remote which should + be accounted for. + """ + local_origin_commit = _run_git_command( + ["merge-base", "HEAD", "refs/remotes/origin/master"] + ) + + if not all([local_commit, local_origin_commit, remote_commit]): + logging.warning( + "Cannot determine commits (missing refs): local_commit = %s, remote_commit = %s and local_origin_commit = %s", + local_commit, + remote_commit, + local_origin_commit, + ) + return {"ahead": "NaN", "behind": "NaN"} + + ahead_count = _run_git_command( + ["rev-list", "--count", f"{local_origin_commit}..{local_commit}"] + ) + + api_url = f"https://api.github.com/repos/dfinity/ic-observability-stack/compare/{remote_commit}...{local_origin_commit}" + response = requests.get( + api_url, + headers={ + "User-Agent": "python", + }, + timeout=10, + ) + try: + response.raise_for_status() + except Exception: + logging.error("Failed to fetch diff from remote: %s", response.text) + return {"ahead": ahead_count, "behind": "NaN"} + + behind_count = response.json()["behind_by"] + + return {"ahead": ahead_count, "behind": behind_count} + + +def make_line(metric_name, value, ts, **kwargs): + """ + Make a prometheus metric line + """ + + return f"{metric_name}{{ {', '.join([f'{key}="{value}"' for key, value in kwargs.items()])} }} {value} {ts}" + + +def send_to_victoria(metrics, victoria_url): + import_url = urljoin(victoria_url.rstrip("/") + "/", "api/v1/import/prometheus") + + response = requests.post( + import_url, + data=metrics.encode("utf-8"), + headers={"Content-Type": "text/plain"}, + timeout=30, + ) + + try: + response.raise_for_status() + except Exception: + logging.error("Failed to send metrics. Response: %s", response.text) + return + + logging.info("Successfully sent metrics to victoria") + + +def ingest_metrics(installed_commit, victoria_url): + timestamp_ms = int(time.time() * 1000) + state = get_local_state() + + remote_commit = get_remote_commit_hash() + + # Update the difference from the current commit + # because state can change during running of the + # stack. + difference = get_commits_difference(get_current_commit(), remote_commit) + + metrics = ( + "\n".join( + [ + make_line( + "git_installed_commit", 1, timestamp_ms, commit=installed_commit + ), + make_line("git_local_state", 1, timestamp_ms, state=state), + make_line("git_remote_commit", 1, timestamp_ms, commit=remote_commit), + make_line("git_commits_ahead", difference["ahead"], timestamp_ms), + make_line("git_commits_behind", difference["behind"], timestamp_ms), + ] + ) + + "\n" + ) + + send_to_victoria(metrics, victoria_url) + + +def main(): + logging.info("Running obs stack github ingester") + + # Only fetch installed commit on startup + installed_commit = get_current_commit() + + while True: + try: + ingest_metrics(installed_commit, VICTORIA_METRICS_URL) + except Exception as e: + logging.error("Something went wrong during last execution: %s", e) + + logging.info("Sleeping for 5 minutes") + + time.sleep(5 * 60) + + +if __name__ == "__main__": + main() diff --git a/tools/python.Dockerfile b/tools/python.Dockerfile index 292ee89..8cc8e5d 100644 --- a/tools/python.Dockerfile +++ b/tools/python.Dockerfile @@ -5,4 +5,7 @@ RUN pip install --no-cache-dir \ requests \ ic-py +RUN apt-get update && \ + apt-get install -y --no-install-recommends git + ENTRYPOINT ["python3"]