From 9f18380a16805daa347ff6939dfe65837dcb8c98 Mon Sep 17 00:00:00 2001
From: Bimantoro Maesa <bimantoro@gmail.com>
Date: Fri, 6 Mar 2026 23:36:55 +0700
Subject: [PATCH] feat(api)Add root endpoint and docs(readme): examples README
 for runnable scripts

---
 README.md          |  12 +++
 examples/README.md | 257 +++++++++++++++++++++++++++++++++++++++++++++
 mataserver/main.py |   5 +
 3 files changed, 274 insertions(+)
 create mode 100644 examples/README.md

diff --git a/README.md b/README.md
index ce4d1a8..27e32b1 100644
--- a/README.md
+++ b/README.md
@@ -249,6 +249,18 @@ asyncio.run(stream())
 
 See [docs/streaming.md](docs/streaming.md) for the full binary frame protocol specification and a complete async client example.
 
+### Runnable example scripts
+
+The [`examples/`](examples/) directory contains ready-to-run Python clients:
+
+| Script                                                     | Description                                        |
+| ---------------------------------------------------------- | -------------------------------------------------- |
+| [`examples/rest_infer.py`](examples/rest_infer.py)         | REST inference — detect, classify, segment         |
+| [`examples/rest_vlm.py`](examples/rest_vlm.py)             | REST inference — visual language model (VLM)       |
+| [`examples/ws_video_infer.py`](examples/ws_video_infer.py) | WebSocket video streaming — frame-by-frame results |
+
+See [`examples/README.md`](examples/README.md) for full usage, argument reference, and sample output for each script.
+
 ---
 
 ## Development Setup
diff --git a/examples/README.md b/examples/README.md
new file mode 100644
index 0000000..3c84d33
--- /dev/null
+++ b/examples/README.md
@@ -0,0 +1,257 @@
+# Examples
+
+This directory contains runnable client examples for MATA-SERVER.
+
+| Script                                   | Transport                        | Tasks covered                          |
+| ---------------------------------------- | -------------------------------- | -------------------------------------- |
+| [`rest_infer.py`](#rest_inferpy)         | REST (`POST /v1/infer`)          | `detect`, `classify`, `segment`        |
+| [`rest_vlm.py`](#rest_vlmpy)             | REST (`POST /v1/infer`)          | `vlm`                                  |
+| [`ws_video_infer.py`](#ws_video_inferpy) | WebSocket (`WS /v1/stream/{id}`) | `detect`, `segment`, `classify`, `vlm` |
+
+> **Prerequisites** — a MATA-SERVER instance must be reachable before running any example.
+> Start one locally with:
+>
+> ```bash
+> MATA_SERVER_AUTH_MODE=none mataserver serve
+> ```
+>
+> All examples default to `127.0.0.1:8110`. Pass `--host` / `--port` to override.
+
+---
+
+## rest_infer.py
+
+Runs single-shot inference against the REST API using a **base64-encoded image** payload.  
+Covers the three classic vision tasks — object detection, image classification, and instance segmentation.
+
+### Requirements
+
+```bash
+pip install requests
+```
+
+### Usage
+
+```bash
+# Run all three tasks with the default model set
+python examples/rest_infer.py --image examples/images/coco_cat_remote.jpg
+
+# Run a single task
+python examples/rest_infer.py \
+  --image examples/images/coco_cat_remote.jpg \
+  --task detect \
+  --model PekingU/rtdetr_r18vd
+
+# Zero-shot open-vocabulary detection with text prompts
+python examples/rest_infer.py \
+  --image examples/images/coco_cat_remote.jpg \
+  --task detect \
+  --model google/owlv2-base-patch16-ensemble \
+  --prompts "cat,dog,remote control"
+```
+
+### Arguments
+
+| Argument    | Default      | Description                                                          |
+| ----------- | ------------ | -------------------------------------------------------------------- |
+| `--image`   | _(required)_ | Path to an image file                                                |
+| `--task`    | all three    | One of `detect`, `classify`, `segment`                               |
+| `--model`   | task default | HuggingFace model repo ID; defaults to the bundled task-to-model map |
+| `--prompts` | —            | Comma-separated text prompts for zero-shot / open-vocabulary models  |
+| `--host`    | `127.0.0.1`  | Server hostname                                                      |
+| `--port`    | `8110`       | Server port                                                          |
+
+### Default models
+
+| Task       | Default model                                  |
+| ---------- | ---------------------------------------------- |
+| `detect`   | `PekingU/rtdetr_r18vd`                         |
+| `classify` | `google/vit-base-patch16-224`                  |
+| `segment`  | `facebook/mask2former-swin-tiny-coco-instance` |
+
+### Sample output
+
+```
+--- Task: DETECT  |  Model: PekingU/rtdetr_r18vd ---
+  3 detection(s)
+    [0.94] cat  bbox=[42, 10, 380, 470]
+    [0.81] remote  bbox=[200, 300, 260, 420]
+    [0.57] couch  bbox=[0, 250, 480, 480]
+  Full response keys: ['schema_version', 'task', 'model', 'timestamp', 'detections']
+
+--- Task: CLASSIFY  |  Model: google/vit-base-patch16-224 ---
+  5 class(es)
+    [0.84] tabby cat
+    [0.07] Egyptian cat
+    ...
+
+--- Task: SEGMENT  |  Model: facebook/mask2former-swin-tiny-coco-instance ---
+  4 segment(s)
+    [0.91] cat  bbox=[42, 10, 380, 470]
+    ...
+```
+
+---
+
+## rest_vlm.py
+
+Sends an image and a natural-language prompt to a **Visual Language Model** (VLM) via the REST API and prints the generated response text.
+
+### Requirements
+
+```bash
+pip install requests
+```
+
+### Usage
+
+```bash
+# Basic question about an image
+python examples/rest_vlm.py \
+  --image examples/images/coco_cat_remote.jpg \
+  --prompt "What do you see in this image?"
+
+# Control generation parameters
+python examples/rest_vlm.py \
+  --image examples/images/coco_cat_remote.jpg \
+  --prompt "List every object you can identify." \
+  --max-tokens 256 \
+  --temperature 0.3
+
+# Use a different VLM
+python examples/rest_vlm.py \
+  --image examples/images/coco_cat_remote.jpg \
+  --prompt "Describe the scene in one sentence." \
+  --model Qwen/Qwen2.5-VL-7B-Instruct
+```
+
+### Arguments
+
+| Argument        | Default                       | Description                                                   |
+| --------------- | ----------------------------- | ------------------------------------------------------------- |
+| `--image`       | _(required)_                  | Path to an image file                                         |
+| `--prompt`      | `"Describe this image."`      | Natural-language question or instruction                      |
+| `--model`       | `Qwen/Qwen2.5-VL-3B-Instruct` | VLM model repo ID                                             |
+| `--max-tokens`  | —                             | Maximum number of tokens to generate                          |
+| `--temperature` | —                             | Sampling temperature (`0.0` = greedy, higher = more creative) |
+| `--host`        | `127.0.0.1`                   | Server hostname                                               |
+| `--port`        | `8110`                        | Server port                                                   |
+
+### Sample output
+
+```
+--- VLM Inference ---
+  Model  : Qwen/Qwen2.5-VL-3B-Instruct
+  Prompt : 'What do you see in this image?'
+
+  Response:
+The image shows a cat sitting on a couch next to a remote control. The cat appears to be relaxed and is looking towards the camera.
+```
+
+---
+
+## ws_video_infer.py
+
+Streams a local video file to MATA-SERVER over a **WebSocket connection** and prints inference results frame-by-frame as they arrive.  
+Implements the full session lifecycle:
+
+1. `POST /v1/sessions` — create a streaming session and receive a `session_id`
+2. `WS /v1/stream/{session_id}` — connect and stream binary-encoded frames
+3. `DELETE /v1/sessions/{session_id}` — clean up after streaming ends
+
+Frames are encoded using the MATA binary wire format: a 13-byte header (`frame_id` uint32 BE + `timestamp` float64 BE + `encoding` uint8) followed by JPEG bytes.
+
+### Requirements
+
+```bash
+pip install aiohttp opencv-python
+```
+
+### Usage
+
+```bash
+# Object detection on a video
+python examples/ws_video_infer.py \
+  --video examples/videos/cup.mp4 \
+  --task detect
+
+# Limit to first 60 frames and cap the send rate
+python examples/ws_video_infer.py \
+  --video examples/videos/cup.mp4 \
+  --task detect \
+  --max-frames 60 \
+  --fps-limit 15
+
+# Use the "latest" frame policy — server always processes the newest frame
+# (drops intermediate frames when inference is slower than send rate)
+python examples/ws_video_infer.py \
+  --video examples/videos/cup.mp4 \
+  --model PekingU/rtdetr_r18vd \
+  --task detect \
+  --frame-policy latest
+
+# Authenticated server
+python examples/ws_video_infer.py \
+  --video examples/videos/cup.mp4 \
+  --task detect \
+  --api-key my-secret-key
+```
+
+### Arguments
+
+| Argument         | Default                | Description                                                            |
+| ---------------- | ---------------------- | ---------------------------------------------------------------------- |
+| `--video`        | _(required)_           | Path to a video file (`mp4`, `avi`, etc.)                              |
+| `--task`         | _(required)_           | Inference task: `detect`, `segment`, `classify`, `vlm`, etc.           |
+| `--model`        | `PekingU/rtdetr_r18vd` | HuggingFace model repo ID                                              |
+| `--max-frames`   | `0` (all)              | Maximum frames to send; `0` = entire video                             |
+| `--fps-limit`    | `0` (native)           | Cap send rate in frames per second; `0` = no limit                     |
+| `--frame-policy` | `queue`                | `queue` (process every frame in order) or `latest` (skip stale frames) |
+| `--api-key`      | —                      | Bearer token for authenticated servers                                 |
+| `--host`         | `127.0.0.1`            | Server hostname                                                        |
+| `--port`         | `8110`                 | Server port                                                            |
+
+### Frame policies
+
+| Policy   | Behaviour                                                                                  | Best for                                  |
+| -------- | ------------------------------------------------------------------------------------------ | ----------------------------------------- |
+| `queue`  | Every frame is queued and processed in order. No frames are dropped.                       | Offline analysis, accuracy-critical tasks |
+| `latest` | When the server is busy, older queued frames are dropped and only the most recent is kept. | Real-time / live-stream scenarios         |
+
+### Sample output
+
+```
+[1/3] Creating session  model='PekingU/rtdetr_r18vd'  task='detect'  frame_policy='queue'
+  session_id : sess_a1b2c3d4e5f6
+
+[2/3] Streaming 120/120 frames @ 30.0 fps
+  [frame    0] 2 detections
+  [frame    1] 2 detections
+  [frame    2] 3 detections
+  ...
+  Sent 120 frames. Waiting for results…
+
+  Sent    : 120 frames in 4.01s (29.9 fps)
+  Received: 118 results | 0 dropped | 0 errors
+
+[3/3] Deleting session sess_a1b2c3d4e5f6
+  Session deleted (204)
+```
+
+---
+
+## Sample assets
+
+| File                         | Description                                                                             |
+| ---------------------------- | --------------------------------------------------------------------------------------- |
+| `images/coco_cat_remote.jpg` | COCO-style photo with a cat and a TV remote — used by `rest_infer.py` and `rest_vlm.py` |
+| `videos/cup.mp4`             | Short clip of a cup — used by `ws_video_infer.py`                                       |
+
+---
+
+## Further reading
+
+- [API reference](../docs/api.md) — full endpoint specs and request/response schemas
+- [Streaming protocol](../docs/streaming.md) — binary frame format and WebSocket lifecycle
+- [Deployment guide](../docs/deployment.md) — Docker, GPU, and production configuration
+- [Root README](../README.md) — project overview, quick-start, and CLI reference
diff --git a/mataserver/main.py b/mataserver/main.py
index fa4e9ea..63fb245 100644
--- a/mataserver/main.py
+++ b/mataserver/main.py
@@ -107,6 +107,11 @@ def create_app() -> FastAPI:
 
     app.include_router(api_router, prefix="/v1")
 
+    @app.get("/", tags=["root"], include_in_schema=False)
+    async def root() -> dict[str, str]:
+        """Minimal liveness check — intentionally lightweight, no auth required."""
+        return {"status": "running", "message": "mataserver is running"}
+
     return app