From 199ad0ee98d7cb702418ae71fff5dbb8ccc4369b Mon Sep 17 00:00:00 2001 From: Bimantoro Maesa Date: Fri, 6 Mar 2026 21:26:31 +0700 Subject: [PATCH] Update server port from 8080 to 8110 across configuration files and documentation --- .env.example | 2 +- Dockerfile | 17 ++++++++++------- README.md | 24 ++++++++++++------------ configs/default.yaml | 2 +- docker-compose.yml | 8 +++++++- docs/api.md | 24 ++++++++++++------------ docs/deployment.md | 22 +++++++++++----------- docs/streaming.md | 6 +++--- examples/ws_video_infer.py | 3 ++- mataserver/config.py | 2 +- mataserver/models/registry.py | 5 +++++ tests/conftest.py | 2 +- tests/test_api/test_auth.py | 2 +- tests/test_api/test_health.py | 2 +- tests/test_api/test_infer.py | 2 +- tests/test_api/test_models.py | 2 +- tests/test_api/test_sessions.py | 2 +- 17 files changed, 71 insertions(+), 56 deletions(-) diff --git a/.env.example b/.env.example index 3dece3e..200cafc 100644 --- a/.env.example +++ b/.env.example @@ -9,7 +9,7 @@ MATA_SERVER_HOST=0.0.0.0 # TCP port the HTTP/WebSocket server listens on -MATA_SERVER_PORT=8080 +MATA_SERVER_PORT=8110 # Log level: debug | info | warning | error | critical MATA_SERVER_LOG_LEVEL=info diff --git a/Dockerfile b/Dockerfile index 46f6779..8638d6c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -42,9 +42,11 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ libgomp1 \ && rm -rf /var/lib/apt/lists/* -# Make python3.11 the default python / python3 +# Make python3.11 the default python / python3, and symlink into /usr/local/bin +# so the entry-point shebang written by pip (#!/usr/local/bin/python3.11) resolves RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.11 1 \ - && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1 + && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1 \ + && ln -sf /usr/bin/python3.11 /usr/local/bin/python3.11 # Copy installed packages from builder stage COPY --from=builder /install /usr/local @@ -57,16 +59,17 @@ WORKDIR /app # Pre-create data sub-directories so the volume mount point exists RUN mkdir -p /var/lib/mataserver/models \ - /var/lib/mataserver/cache \ - /var/lib/mataserver/blobs + /var/lib/mataserver/cache \ + /var/lib/mataserver/blobs # Persistent data volume (model weights, cache, blobs) VOLUME /var/lib/mataserver -EXPOSE 8080 +EXPOSE 8110 ENV MATA_SERVER_HOST=0.0.0.0 -ENV MATA_SERVER_PORT=8080 +ENV MATA_SERVER_PORT=8110 ENV MATA_SERVER_DATA_DIR=/var/lib/mataserver +ENV PYTHONPATH=/usr/local/lib/python3.11/site-packages -ENTRYPOINT ["mataserver"] +ENTRYPOINT ["mataserver", "serve"] diff --git a/README.md b/README.md index 812b871..ce4d1a8 100644 --- a/README.md +++ b/README.md @@ -51,7 +51,7 @@ cp .env.example .env mataserver serve ``` -The server starts on `http://0.0.0.0:8080`. Visit `http://localhost:8080/docs` for the interactive API explorer. +The server starts on `http://0.0.0.0:8110`. Visit `http://localhost:8110/docs` for the interactive API explorer. ### Docker @@ -60,13 +60,13 @@ The server starts on `http://0.0.0.0:8080`. Visit `http://localhost:8080/docs` f docker pull ghcr.io/datamata-io/mataserver:latest # Run (CPU-only) -docker run -p 8080:8080 \ +docker run -p 8110:8110 \ -e MATA_SERVER_AUTH_MODE=none \ -v mataserver-data:/var/lib/mataserver \ ghcr.io/datamata-io/mataserver:latest # Run with GPU (requires NVIDIA Container Toolkit) -docker run --gpus all -p 8080:8080 \ +docker run --gpus all -p 8110:8110 \ -e MATA_SERVER_AUTH_MODE=none \ -v mataserver-data:/var/lib/mataserver \ ghcr.io/datamata-io/mataserver:latest @@ -83,7 +83,7 @@ docker compose up -d Verify the server is running: ```bash -curl http://localhost:8080/v1/health +curl http://localhost:8110/v1/health # {"status":"ok","version":"0.1.0","gpu_available":false} ``` @@ -115,7 +115,7 @@ All settings use the `MATA_SERVER_` environment variable prefix and can also be | Variable | Default | Description | | ----------------------------- | --------------------- | -------------------------------------------------------------------------- | | `MATA_SERVER_HOST` | `0.0.0.0` | Bind address | -| `MATA_SERVER_PORT` | `8080` | Bind port | +| `MATA_SERVER_PORT` | `8110` | Bind port | | `MATA_SERVER_LOG_LEVEL` | `info` | Logging level (`debug`, `info`, `warning`, `error`) | | `MATA_SERVER_AUTH_MODE` | `api_key` | Auth mode: `api_key` (enforce bearer tokens) or `none` (open, dev only) | | `MATA_SERVER_API_KEYS` | _(empty)_ | Comma-separated list of valid API keys (required when `auth_mode=api_key`) | @@ -134,7 +134,7 @@ See `.env.example` for a fully annotated template with production-recommended va ## API Endpoints -Interactive docs are served at `http://localhost:8080/docs` (Swagger UI) and `http://localhost:8080/redoc`. +Interactive docs are served at `http://localhost:8110/docs` (Swagger UI) and `http://localhost:8110/redoc`. | Method | Path | Auth | Description | | ----------- | --------------------------- | -------- | ------------------------------------------------- | @@ -160,7 +160,7 @@ For full request/response schemas, per-endpoint error codes, and additional `cur ### Health check ```bash -curl http://localhost:8080/v1/health +curl http://localhost:8110/v1/health ``` ```json @@ -170,7 +170,7 @@ curl http://localhost:8080/v1/health ### Pull a model from HuggingFace ```bash -curl -X POST http://localhost:8080/v1/models/pull \ +curl -X POST http://localhost:8110/v1/models/pull \ -H "Authorization: Bearer your-api-key" \ -H "Content-Type: application/json" \ -d '{"source": "hf://datamata/rtdetr-l"}' @@ -185,7 +185,7 @@ curl -X POST http://localhost:8080/v1/models/pull \ ```bash IMAGE_B64=$(base64 -w0 /path/to/image.jpg) -curl -X POST http://localhost:8080/v1/infer \ +curl -X POST http://localhost:8110/v1/infer \ -H "Authorization: Bearer your-api-key" \ -H "Content-Type: application/json" \ -d "{\"model\": \"datamata/rtdetr-l\", \"image\": \"${IMAGE_B64}\", \"confidence\": 0.5}" @@ -206,7 +206,7 @@ curl -X POST http://localhost:8080/v1/infer \ ### Single-shot inference (file upload) ```bash -curl -X POST http://localhost:8080/v1/infer/upload \ +curl -X POST http://localhost:8110/v1/infer/upload \ -H "Authorization: Bearer your-api-key" \ -F "model=datamata/rtdetr-l" \ -F "confidence=0.5" \ @@ -217,7 +217,7 @@ curl -X POST http://localhost:8080/v1/infer/upload \ ```bash # 1. Create the session -SESSION=$(curl -s -X POST http://localhost:8080/v1/sessions \ +SESSION=$(curl -s -X POST http://localhost:8110/v1/sessions \ -H "Authorization: Bearer your-api-key" \ -H "Content-Type: application/json" \ -d '{"model": "datamata/rtdetr-l", "task": "detect"}' \ @@ -234,7 +234,7 @@ SESSION_ID = "sess_xxxxxxxxxxxx" # replace with session_id from above API_KEY = "your-api-key" async def stream(): - uri = f"ws://localhost:8080/v1/stream/{SESSION_ID}?token={API_KEY}" + uri = f"ws://localhost:8110/v1/stream/{SESSION_ID}?token={API_KEY}" async with websockets.connect(uri) as ws: with open("/path/to/image.jpg", "rb") as f: image = f.read() diff --git a/configs/default.yaml b/configs/default.yaml index 751de41..4bf5479 100644 --- a/configs/default.yaml +++ b/configs/default.yaml @@ -1,5 +1,5 @@ host: "0.0.0.0" -port: 8080 +port: 8110 log_level: "info" auth_mode: "none" keep_alive: 600 diff --git a/docker-compose.yml b/docker-compose.yml index 3a44730..ae6b5c4 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,9 +2,15 @@ services: mataserver: build: . ports: - - "8080:8080" + - "8110:8110" volumes: - mataserver-data:/var/lib/mataserver + environment: + # Explicitly set the Linux data path so a local .env containing a + # Windows MATA_SERVER_DATA_DIR value cannot override it inside the + # container. Docker Compose gives `environment` higher priority than + # `env_file`, so this always wins. + - MATA_SERVER_DATA_DIR=/var/lib/mataserver env_file: - .env deploy: diff --git a/docs/api.md b/docs/api.md index bb20d6c..404f4bf 100644 --- a/docs/api.md +++ b/docs/api.md @@ -220,7 +220,7 @@ mataserver load [--url URL] [--api-key KEY] mataserver load facebook/detr-resnet-50 # Explicit server address and API key -mataserver load facebook/detr-resnet-50 --url http://192.168.1.10:8080 --api-key my-secret +mataserver load facebook/detr-resnet-50 --url http://192.168.1.10:8110 --api-key my-secret # Using the alias mataserver warmup facebook/detr-resnet-50 @@ -361,7 +361,7 @@ Returns server status. **No authentication required.** **Example**: ```bash -curl http://localhost:8080/v1/health +curl http://localhost:8110/v1/health ``` --- @@ -414,7 +414,7 @@ Model `state` values: **Example**: ```bash -curl -H "Authorization: Bearer $KEY" http://localhost:8080/v1/models +curl -H "Authorization: Bearer $KEY" http://localhost:8110/v1/models ``` --- @@ -437,7 +437,7 @@ Retrieve full details for a specific model. ```bash curl -H "Authorization: Bearer $KEY" \ - http://localhost:8080/v1/models/PekingU/rtdetr_v2_r101vd + http://localhost:8110/v1/models/PekingU/rtdetr_v2_r101vd ``` --- @@ -477,7 +477,7 @@ Download a model from HuggingFace into the default HuggingFace cache (`~/.cache/ **Example**: ```bash -curl -X POST http://localhost:8080/v1/models/pull \ +curl -X POST http://localhost:8110/v1/models/pull \ -H "Authorization: Bearer $KEY" \ -H "Content-Type: application/json" \ -d '{"model": "PekingU/rtdetr_v2_r101vd", "task": "detect"}' @@ -512,7 +512,7 @@ Pre-load a model into memory without running inference. Useful for eliminating c **Example**: ```bash -curl -X POST http://localhost:8080/v1/models/warmup \ +curl -X POST http://localhost:8110/v1/models/warmup \ -H "Authorization: Bearer $KEY" \ -H "Content-Type: application/json" \ -d '{"model": "datamata/rtdetr-l"}' @@ -735,7 +735,7 @@ All fields are optional. Only send the parameters relevant to your model's task. ```bash IMAGE_B64=$(base64 -w0 photo.jpg) -curl -X POST http://localhost:8080/v1/infer \ +curl -X POST http://localhost:8110/v1/infer \ -H "Authorization: Bearer $KEY" \ -H "Content-Type: application/json" \ -d "{\"model\": \"datamata/rtdetr-l\", \"image\": \"$IMAGE_B64\", \"params\": {\"confidence\": 0.4}}" @@ -780,7 +780,7 @@ Run single-shot inference with a multipart form upload. Convenient when sending **Example**: ```bash -curl -X POST http://localhost:8080/v1/infer/upload \ +curl -X POST http://localhost:8110/v1/infer/upload \ -H "Authorization: Bearer $KEY" \ -F "model=datamata/rtdetr-l" \ -F "confidence=0.4" \ @@ -836,7 +836,7 @@ Create a new streaming session. The server ensures the requested model is loaded ```json { "session_id": "sess_3f8a1b2c9d4e", - "ws_url": "ws://localhost:8080/v1/stream/sess_3f8a1b2c9d4e" + "ws_url": "ws://localhost:8110/v1/stream/sess_3f8a1b2c9d4e" } ``` @@ -853,7 +853,7 @@ The `ws_url` uses `wss://` when the server is reached over HTTPS. **Example**: ```bash -curl -X POST http://localhost:8080/v1/sessions \ +curl -X POST http://localhost:8110/v1/sessions \ -H "Authorization: Bearer $KEY" \ -H "Content-Type: application/json" \ -d '{"model": "datamata/rtdetr-l", "task": "detect", "params": {"confidence": 0.5}}' @@ -876,7 +876,7 @@ Close and clean up a streaming session. If the WebSocket is still connected it w **Example**: ```bash -curl -X DELETE http://localhost:8080/v1/sessions/sess_3f8a1b2c9d4e \ +curl -X DELETE http://localhost:8110/v1/sessions/sess_3f8a1b2c9d4e \ -H "Authorization: Bearer $KEY" ``` @@ -907,5 +907,5 @@ WebSocket endpoint for real-time frame-by-frame inference. Clients send binary f **Example connection URL**: ``` -ws://localhost:8080/v1/stream/sess_3f8a1b2c9d4e?token=my-api-key +ws://localhost:8110/v1/stream/sess_3f8a1b2c9d4e?token=my-api-key ``` diff --git a/docs/deployment.md b/docs/deployment.md index 9e20d85..0d50712 100644 --- a/docs/deployment.md +++ b/docs/deployment.md @@ -35,7 +35,7 @@ The Dockerfile uses a two-stage build: a `python:3.11-slim` builder stage compil ### Run (CPU-Only) ```bash -docker run --rm -p 8080:8080 \ +docker run --rm -p 8110:8110 \ -v mataserver-data:/var/lib/mataserver \ -e MATA_SERVER_AUTH_MODE=api_key \ -e MATA_SERVER_API_KEYS=your-secret-key \ @@ -45,7 +45,7 @@ docker run --rm -p 8080:8080 \ ### Run (GPU) ```bash -docker run --rm --gpus all -p 8080:8080 \ +docker run --rm --gpus all -p 8110:8110 \ -v mataserver-data:/var/lib/mataserver \ -e MATA_SERVER_AUTH_MODE=api_key \ -e MATA_SERVER_API_KEYS=your-secret-key \ @@ -57,7 +57,7 @@ Pass `--gpus device=0` to restrict to a specific GPU. ### Verify Startup ```bash -curl http://localhost:8080/v1/health +curl http://localhost:8110/v1/health # {"status":"ok","version":"...","gpu_available":true} ``` @@ -102,7 +102,7 @@ services: mataserver: build: . ports: - - "8080:8080" + - "8110:8110" volumes: - mataserver-data:/var/lib/mataserver env_file: @@ -137,7 +137,7 @@ services: ## 3. Reverse Proxy Setup -A reverse proxy handles TLS termination, request buffering, and optional rate limiting. MATASERVER listens on HTTP -- do **not** expose port 8080 directly in production. +A reverse proxy handles TLS termination, request buffering, and optional rate limiting. MATASERVER listens on HTTP -- do **not** expose port 8110 directly in production. ### Caddy @@ -147,7 +147,7 @@ Caddy automatically obtains and renews TLS certificates via Let's Encrypt. ```caddy vision.example.com { - reverse_proxy localhost:8080 { + reverse_proxy localhost:8110 { # Forward the client IP so logs are accurate header_up X-Real-IP {remote_host} header_up X-Forwarded-For {remote_host} @@ -170,7 +170,7 @@ caddy run --config Caddyfile ```nginx upstream mataserver { - server 127.0.0.1:8080; + server 127.0.0.1:8110; keepalive 64; } @@ -312,7 +312,7 @@ All settings use the `MATA_SERVER_` prefix and can be provided via environment v | Variable | Default | Description | | ----------------------------- | --------------------- | ---------------------------------------------------------------------- | | `MATA_SERVER_HOST` | `0.0.0.0` | Network interface to bind | -| `MATA_SERVER_PORT` | `8080` | TCP port for HTTP/WebSocket | +| `MATA_SERVER_PORT` | `8110` | TCP port for HTTP/WebSocket | | `MATA_SERVER_LOG_LEVEL` | `info` | Log verbosity: `debug` \| `info` \| `warning` \| `error` \| `critical` | | `MATA_SERVER_AUTH_MODE` | `api_key` | Authentication mode: `api_key` \| `none` | | `MATA_SERVER_API_KEYS` | _(empty)_ | Comma-separated list of valid API keys | @@ -330,7 +330,7 @@ All settings use the `MATA_SERVER_` prefix and can be provided via environment v # Bind to all interfaces (reverse proxy handles external access) MATA_SERVER_HOST=0.0.0.0 -MATA_SERVER_PORT=8080 +MATA_SERVER_PORT=8110 MATA_SERVER_LOG_LEVEL=info # Always use api_key in production @@ -394,7 +394,7 @@ services: "CMD", "python", "-c", - "import urllib.request; urllib.request.urlopen('http://localhost:8080/v1/health')", + "import urllib.request; urllib.request.urlopen('http://localhost:8110/v1/health')", ] interval: 30s timeout: 5s @@ -465,7 +465,7 @@ docker volume inspect mataserver-data Use a bind mount when you need direct host access to model files: ```bash -docker run --rm --gpus all -p 8080:8080 \ +docker run --rm --gpus all -p 8110:8110 \ -v /data/mataserver:/var/lib/mataserver \ -e MATA_SERVER_API_KEYS=your-key \ mataserver diff --git a/docs/streaming.md b/docs/streaming.md index ca8a73e..b38f419 100644 --- a/docs/streaming.md +++ b/docs/streaming.md @@ -49,7 +49,7 @@ Client Server ```json { "session_id": "sess_a1b2c3d4e5f6", - "ws_url": "ws://localhost:8080/v1/stream/sess_a1b2c3d4e5f6", + "ws_url": "ws://localhost:8110/v1/stream/sess_a1b2c3d4e5f6", "frame_policy": "latest" } ``` @@ -255,7 +255,7 @@ global concurrency limit enforced by the server's internal scheduler. import asyncio, struct, time import websockets -SERVER = "ws://localhost:8080" +SERVER = "ws://localhost:8110" API_KEY = "my-api-key" MODEL = "datamata/rtdetr-l" @@ -271,7 +271,7 @@ async def stream_inference(image_path: str): # 1. Create session async with httpx.AsyncClient() as http: resp = await http.post( - f"http://localhost:8080/v1/sessions", + f"http://localhost:8110/v1/sessions", headers={"Authorization": f"Bearer {API_KEY}"}, json={"model": MODEL, "task": "detect", "params": {"confidence": 0.4}}, ) diff --git a/examples/ws_video_infer.py b/examples/ws_video_infer.py index 130876f..77970d6 100644 --- a/examples/ws_video_infer.py +++ b/examples/ws_video_infer.py @@ -143,6 +143,8 @@ async def receive_loop() -> None: if sleep_for > 0: await asyncio.sleep(sleep_for) + elapsed = time.perf_counter() - t0 + # Wait for in-flight results print(f" Sent {sent} frames. Waiting for results…") await asyncio.sleep(max(2.0, sent * 0.05)) @@ -153,7 +155,6 @@ async def receive_loop() -> None: except asyncio.CancelledError: pass - elapsed = time.perf_counter() - t0 print(f"\n Sent : {sent} frames in {elapsed:.2f}s ({sent/elapsed:.1f} fps)") print(f" Received: {received} results | {drops} dropped | {errors} errors") finally: diff --git a/mataserver/config.py b/mataserver/config.py index 040683e..585c8e5 100644 --- a/mataserver/config.py +++ b/mataserver/config.py @@ -19,7 +19,7 @@ class Settings(BaseSettings): # Server host: str = "0.0.0.0" - port: int = 8080 + port: int = 8110 log_level: str = "info" # Auth diff --git a/mataserver/models/registry.py b/mataserver/models/registry.py index 000b874..e248c3e 100644 --- a/mataserver/models/registry.py +++ b/mataserver/models/registry.py @@ -79,8 +79,13 @@ async def list_models(self) -> list[dict[str, Any]]: Each entry is a dict with at minimum ``model`` and ``task``. When the model is present in the local HF cache the dict also includes ``size_mb`` and ``last_accessed``. + + The registry is re-read from disk on every call so that models added + via the CLI (``mataserver pull``) are visible without restarting the + server. """ async with self._lock: + self._models = self._load_from_disk() models_copy = dict(self._models) cache_map = self._get_hf_cache_map() diff --git a/tests/conftest.py b/tests/conftest.py index c29ad6c..5dc3bfd 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -18,7 +18,7 @@ def test_settings(tmp_path): auth_mode="none", data_dir=tmp_path / "data", host="127.0.0.1", - port=8080, + port=8110, log_level="debug", ) diff --git a/tests/test_api/test_auth.py b/tests/test_api/test_auth.py index aee543c..2190c69 100644 --- a/tests/test_api/test_auth.py +++ b/tests/test_api/test_auth.py @@ -159,7 +159,7 @@ def test_real_health_endpoint_via_app_factory(self) -> None: settings = Settings.model_construct( host="0.0.0.0", - port=8080, + port=8110, log_level="info", auth_mode="api_key", api_keys=[_VALID_KEY], diff --git a/tests/test_api/test_health.py b/tests/test_api/test_health.py index 650ee5f..9bdd1e4 100644 --- a/tests/test_api/test_health.py +++ b/tests/test_api/test_health.py @@ -16,7 +16,7 @@ def _make_settings(auth_mode: str = "none", api_keys: list[str] | None = None) - auth_mode=auth_mode, api_keys=api_keys or [], host="127.0.0.1", - port=8080, + port=8110, log_level="debug", keep_alive=600, max_vram_util=0.85, diff --git a/tests/test_api/test_infer.py b/tests/test_api/test_infer.py index b1e6430..d10189d 100644 --- a/tests/test_api/test_infer.py +++ b/tests/test_api/test_infer.py @@ -24,7 +24,7 @@ def _make_settings(auth_mode: str = "none", api_keys: list[str] | None = None) - auth_mode=auth_mode, api_keys=api_keys or [], host="127.0.0.1", - port=8080, + port=8110, log_level="debug", keep_alive=600, max_vram_util=0.85, diff --git a/tests/test_api/test_models.py b/tests/test_api/test_models.py index d133b3b..6721403 100644 --- a/tests/test_api/test_models.py +++ b/tests/test_api/test_models.py @@ -21,7 +21,7 @@ def _make_settings(auth_mode: str = "none", api_keys: list[str] | None = None) - auth_mode=auth_mode, api_keys=api_keys or [], host="127.0.0.1", - port=8080, + port=8110, log_level="debug", keep_alive=600, max_vram_util=0.85, diff --git a/tests/test_api/test_sessions.py b/tests/test_api/test_sessions.py index 2844955..0182564 100644 --- a/tests/test_api/test_sessions.py +++ b/tests/test_api/test_sessions.py @@ -22,7 +22,7 @@ def _make_settings(auth_mode: str = "none", api_keys: list[str] | None = None) - auth_mode=auth_mode, api_keys=api_keys or [], host="127.0.0.1", - port=8080, + port=8110, log_level="debug", keep_alive=600, max_vram_util=0.85,