From 92942c8bfe5bbcfaff24e10646379aead6a1679e Mon Sep 17 00:00:00 2001 From: Alvaro Estrella Date: Fri, 16 May 2025 13:54:03 -0700 Subject: [PATCH 1/3] feat(health-endpoint): Added health check endpoint to UWAPI. Added the endpoint to the README. --- README.md | 48 ++++++++++++++++++++++++++++++++++++++ server.py | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 117 insertions(+) diff --git a/README.md b/README.md index 9a280f3..bfd438a 100644 --- a/README.md +++ b/README.md @@ -42,6 +42,54 @@ This file sets up the server and runs it from the given port in main. ] ``` +- `/health/` : + - Returns the current health and status of the UWAPI middleware and its dependant backend services. + + UWAPI health fields: + - `status`: (String) Always `"ok"`. Indicates that UWAPI is online. + - `http_code`: (Integer) Always HTTP `200`. Indicates that UWAPI is online. + - `timestamp`: (Integer) ISO 8601 UTC timestamp of when the health response was made (e.g., `"2025-05-16T20:25:55Z"`). + - `uptime`: (String) Time UWAPI has been running in `Xd Yh Zm Ws` format. + - `cpu_usage`: (String) The percentage of CPU currently being used by the UWAPI (e.g., `"21.2%"`). + - `memory_usage`: (String) The percentage of total system memory currently used by UWAPI (e.g., `"10.1%"`). + - `process_count`: (String) Number of processes currently running on the system. + - `backend_services`: (Object) An object describing the status of each backend service UWAPI depends on. + + For each available backend service the following health fields are appended `backend_services`: + - `status`: (String) `"ok"` if the service responded successfully; `"fail"` if it did not respond or returned an error. + - `http_code`: (Integer) Service health check response HTTP code. + - `latency`: (Integer) Present only if the health check succeeded. Round-trip latency in milliseconds to reach the service. + - `error`: (String)Present only if the health check failed. Contains a short error message. + + Below is an example response from `health/`: + + ```json + { + "backend_services": { + "gamesmanclassic": { + "http_code": 200, + "latency_ms": 109, + "status": "ok" + }, + "gamesmanone": { + "error": "HTTPConnectionPool(host='nyc.cs.berkeley.edu', port=8084): Max retries exceeded with url: /health (Caused by ConnectTimeoutError(\u003Curllib3.connection.HTTPConnection object at 0x106b46c10\u003E, 'Connection to nyc.cs.berkeley.edu timed out. (connect timeout=1.5)'))", + "status": "fail" + }, + "gamesmanpuzzles": { + "http_code": 200, + "latency_ms": 40, + "status": "ok" + } + }, + "cpu_usage": "6.1%", + "memory_usage": "58.5%", + "process_count": 887, + "status": "ok", + "timestamp": "2025-05-16T20:47:47Z", + "uptime": "0d 0h 0m 1s" + } + ``` + - `//` : - Returns general information about the game specified by `game_id`. This is used, for example, by GamesmanUni when a game is clicked in order to see which variants are available in order to render the list of available variants. diff --git a/server.py b/server.py index 1bf6a3a..b47851a 100644 --- a/server.py +++ b/server.py @@ -6,9 +6,21 @@ from games.models import Value, Remoteness from md_api import md_instr +from games.gamesman_classic import GamesmanClassic +from games.gamesman_one import GamesmanOne +from games.gamesman_puzzles import GamesmanPuzzles +import requests + +import time +import psutil +from datetime import datetime, timezone + app = Flask(__name__) CORS(app) +start_time = time.time() +BACKEND_SERVICES = [GamesmanClassic, GamesmanPuzzles, GamesmanOne] + # Helper Functions def error(a): @@ -165,6 +177,13 @@ def wrangle_move_objects_2Player(position_data): move_objs.sort(key=key_move_obj_by_move_value_then_delta_remoteness) +def format_time(seconds: float) -> str: + seconds = int(seconds) + days = seconds // 86400 + hours = (seconds % 86400) // 3600 + minutes = (seconds % 3600) // 60 + secs = seconds % 60 + return f"{days}d {hours}h {minutes}m {secs}s" # Routes @app.route("/") @@ -178,6 +197,56 @@ def get_games() -> list[dict[str, str]]: all_games.sort(key=lambda g: g['name']) return jsonify(all_games) +@app.route("/health") +def get_health(): + uptime_seconds = time.time() - start_time + uptime = format_time(uptime_seconds) + cpu_usage = psutil.cpu_percent(interval=0.1) + memory = psutil.virtual_memory() + memory_usage = f"{memory.percent}%" + process_count = len(psutil.pids()) + timestamp = datetime.now(timezone.utc).replace(microsecond=0).isoformat().replace('+00:00', 'Z') + + services_status = {} + + for service_cls in BACKEND_SERVICES: + service_name = service_cls.__name__.lower() + try: + start = time.time() + health_url = f"{service_cls.url}health" + res = requests.get(health_url, timeout=1.5) + latency_ms = round((time.time() - start) * 1000) + if res.status_code == 200: + services_status[service_name] = { + "status": "ok", + "http_code": res.status_code, + "latency_ms": latency_ms + } + else: + services_status[service_name] = { + "status": "fail", + "http_code": res.status_code, + "latency_ms": latency_ms, + "error": f"Status code {res.status_code}" + } + except Exception as e: + services_status[service_name] = { + "status": "fail", + "error": str(e) + } + + payload = { + "status": "ok", + "uptime": uptime, + "cpu_usage": f"{cpu_usage}%", + "memory_usage": memory_usage, + "process_count": process_count, + "timestamp": timestamp, + "backend_services": services_status + } + + return jsonify(payload), 200 + @app.route("//") def get_game(game_id: str): if game_id in games: From 33e2e2c68854ad66371b88d8a27c5760b2a1eec4 Mon Sep 17 00:00:00 2001 From: Alvaro Estrella Date: Fri, 16 May 2025 14:26:30 -0700 Subject: [PATCH 2/3] fix(health-endpoint): Removed the error field. --- README.md | 8 +++----- server.py | 1 - 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index bfd438a..aabf9e2 100644 --- a/README.md +++ b/README.md @@ -57,11 +57,10 @@ This file sets up the server and runs it from the given port in main. For each available backend service the following health fields are appended `backend_services`: - `status`: (String) `"ok"` if the service responded successfully; `"fail"` if it did not respond or returned an error. - - `http_code`: (Integer) Service health check response HTTP code. - - `latency`: (Integer) Present only if the health check succeeded. Round-trip latency in milliseconds to reach the service. - - `error`: (String)Present only if the health check failed. Contains a short error message. + - `http_code`: (Integer) Service healthcheck response HTTP code. + - `latency`: (Integer) Present only if the healthcheck succeeded. Round-trip latency in milliseconds to reach the service. - Below is an example response from `health/`: + Below is an example response from `/health/`: ```json { @@ -72,7 +71,6 @@ This file sets up the server and runs it from the given port in main. "status": "ok" }, "gamesmanone": { - "error": "HTTPConnectionPool(host='nyc.cs.berkeley.edu', port=8084): Max retries exceeded with url: /health (Caused by ConnectTimeoutError(\u003Curllib3.connection.HTTPConnection object at 0x106b46c10\u003E, 'Connection to nyc.cs.berkeley.edu timed out. (connect timeout=1.5)'))", "status": "fail" }, "gamesmanpuzzles": { diff --git a/server.py b/server.py index b47851a..81e8352 100644 --- a/server.py +++ b/server.py @@ -232,7 +232,6 @@ def get_health(): except Exception as e: services_status[service_name] = { "status": "fail", - "error": str(e) } payload = { From 7f2e342940accad50d9362fde681e428ce9d509c Mon Sep 17 00:00:00 2001 From: Alvaro Estrella Date: Fri, 16 May 2025 14:31:54 -0700 Subject: [PATCH 3/3] fix(README.md): Typo fixes. --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index aabf9e2..a30cb72 100644 --- a/README.md +++ b/README.md @@ -42,7 +42,7 @@ This file sets up the server and runs it from the given port in main. ] ``` -- `/health/` : +- `/health` : - Returns the current health and status of the UWAPI middleware and its dependant backend services. UWAPI health fields: @@ -50,8 +50,8 @@ This file sets up the server and runs it from the given port in main. - `http_code`: (Integer) Always HTTP `200`. Indicates that UWAPI is online. - `timestamp`: (Integer) ISO 8601 UTC timestamp of when the health response was made (e.g., `"2025-05-16T20:25:55Z"`). - `uptime`: (String) Time UWAPI has been running in `Xd Yh Zm Ws` format. - - `cpu_usage`: (String) The percentage of CPU currently being used by the UWAPI (e.g., `"21.2%"`). - - `memory_usage`: (String) The percentage of total system memory currently used by UWAPI (e.g., `"10.1%"`). + - `cpu_usage`: (String) The percentage of CPU currently used (e.g., `"21.2%"`). + - `memory_usage`: (String) The percentage of total system memory currently used (e.g., `"10.1%"`). - `process_count`: (String) Number of processes currently running on the system. - `backend_services`: (Object) An object describing the status of each backend service UWAPI depends on. @@ -60,7 +60,7 @@ This file sets up the server and runs it from the given port in main. - `http_code`: (Integer) Service healthcheck response HTTP code. - `latency`: (Integer) Present only if the healthcheck succeeded. Round-trip latency in milliseconds to reach the service. - Below is an example response from `/health/`: + Below is an example response from `/health`: ```json {