datamata-io · bimantoromaesa · Mar 6, 2026 · Mar 6, 2026
diff --git a/.env.example b/.env.example
@@ -9,7 +9,7 @@
 MATA_SERVER_HOST=0.0.0.0
 
 # TCP port the HTTP/WebSocket server listens on
-MATA_SERVER_PORT=8080
+MATA_SERVER_PORT=8110
 
 # Log level: debug | info | warning | error | critical
 MATA_SERVER_LOG_LEVEL=info

diff --git a/Dockerfile b/Dockerfile
@@ -42,9 +42,11 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
     libgomp1 \
     && rm -rf /var/lib/apt/lists/*
 
-# Make python3.11 the default python / python3
+# Make python3.11 the default python / python3, and symlink into /usr/local/bin
+# so the entry-point shebang written by pip (#!/usr/local/bin/python3.11) resolves
 RUN update-alternatives --install /usr/bin/python  python  /usr/bin/python3.11 1 \
- && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1
+    && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1 \
+    && ln -sf /usr/bin/python3.11 /usr/local/bin/python3.11
 
 # Copy installed packages from builder stage
 COPY --from=builder /install /usr/local
@@ -57,16 +59,17 @@ WORKDIR /app
 
 # Pre-create data sub-directories so the volume mount point exists
 RUN mkdir -p /var/lib/mataserver/models \
-             /var/lib/mataserver/cache \
-             /var/lib/mataserver/blobs
+    /var/lib/mataserver/cache \
+    /var/lib/mataserver/blobs
 
 # Persistent data volume (model weights, cache, blobs)
 VOLUME /var/lib/mataserver
 
-EXPOSE 8080
+EXPOSE 8110
 
 ENV MATA_SERVER_HOST=0.0.0.0
-ENV MATA_SERVER_PORT=8080
+ENV MATA_SERVER_PORT=8110
 ENV MATA_SERVER_DATA_DIR=/var/lib/mataserver
+ENV PYTHONPATH=/usr/local/lib/python3.11/site-packages
 
-ENTRYPOINT ["mataserver"]
+ENTRYPOINT ["mataserver", "serve"]
diff --git a/README.md b/README.md
@@ -51,7 +51,7 @@ cp .env.example .env
 mataserver serve
 ```
 
-The server starts on `http://0.0.0.0:8080`. Visit `http://localhost:8080/docs` for the interactive API explorer.
+The server starts on `http://0.0.0.0:8110`. Visit `http://localhost:8110/docs` for the interactive API explorer.
 
 ### Docker
 
@@ -60,13 +60,13 @@ The server starts on `http://0.0.0.0:8080`. Visit `http://localhost:8080/docs` f
 docker pull ghcr.io/datamata-io/mataserver:latest
 
 # Run (CPU-only)
-docker run -p 8080:8080 \
+docker run -p 8110:8110 \
   -e MATA_SERVER_AUTH_MODE=none \
   -v mataserver-data:/var/lib/mataserver \
   ghcr.io/datamata-io/mataserver:latest
 
 # Run with GPU (requires NVIDIA Container Toolkit)
-docker run --gpus all -p 8080:8080 \
+docker run --gpus all -p 8110:8110 \
   -e MATA_SERVER_AUTH_MODE=none \
   -v mataserver-data:/var/lib/mataserver \
   ghcr.io/datamata-io/mataserver:latest
@@ -83,7 +83,7 @@ docker compose up -d
 Verify the server is running:
 
 ```bash
-curl http://localhost:8080/v1/health
+curl http://localhost:8110/v1/health
 # {"status":"ok","version":"0.1.0","gpu_available":false}
 ```
 
@@ -115,7 +115,7 @@ All settings use the `MATA_SERVER_` environment variable prefix and can also be
 | Variable                      | Default               | Description                                                                |
 | ----------------------------- | --------------------- | -------------------------------------------------------------------------- |
 | `MATA_SERVER_HOST`            | `0.0.0.0`             | Bind address                                                               |
-| `MATA_SERVER_PORT`            | `8080`                | Bind port                                                                  |
+| `MATA_SERVER_PORT`            | `8110`                | Bind port                                                                  |
 | `MATA_SERVER_LOG_LEVEL`       | `info`                | Logging level (`debug`, `info`, `warning`, `error`)                        |
 | `MATA_SERVER_AUTH_MODE`       | `api_key`             | Auth mode: `api_key` (enforce bearer tokens) or `none` (open, dev only)    |
 | `MATA_SERVER_API_KEYS`        | _(empty)_             | Comma-separated list of valid API keys (required when `auth_mode=api_key`) |
@@ -134,7 +134,7 @@ See `.env.example` for a fully annotated template with production-recommended va
 
 ## API Endpoints
 
-Interactive docs are served at `http://localhost:8080/docs` (Swagger UI) and `http://localhost:8080/redoc`.
+Interactive docs are served at `http://localhost:8110/docs` (Swagger UI) and `http://localhost:8110/redoc`.
 
 | Method      | Path                        | Auth     | Description                                       |
 | ----------- | --------------------------- | -------- | ------------------------------------------------- |
@@ -160,7 +160,7 @@ For full request/response schemas, per-endpoint error codes, and additional `cur
 ### Health check
 
 ```bash
-curl http://localhost:8080/v1/health
+curl http://localhost:8110/v1/health
 ```
 
 ```json
@@ -170,7 +170,7 @@ curl http://localhost:8080/v1/health
 ### Pull a model from HuggingFace
 
 ```bash
-curl -X POST http://localhost:8080/v1/models/pull \
+curl -X POST http://localhost:8110/v1/models/pull \
   -H "Authorization: Bearer your-api-key" \
   -H "Content-Type: application/json" \
   -d '{"source": "hf://datamata/rtdetr-l"}'
@@ -185,7 +185,7 @@ curl -X POST http://localhost:8080/v1/models/pull \
 ```bash
 IMAGE_B64=$(base64 -w0 /path/to/image.jpg)
 
-curl -X POST http://localhost:8080/v1/infer \
+curl -X POST http://localhost:8110/v1/infer \
   -H "Authorization: Bearer your-api-key" \
   -H "Content-Type: application/json" \
   -d "{\"model\": \"datamata/rtdetr-l\", \"image\": \"${IMAGE_B64}\", \"confidence\": 0.5}"
@@ -206,7 +206,7 @@ curl -X POST http://localhost:8080/v1/infer \
 ### Single-shot inference (file upload)
 
 ```bash
-curl -X POST http://localhost:8080/v1/infer/upload \
+curl -X POST http://localhost:8110/v1/infer/upload \
   -H "Authorization: Bearer your-api-key" \
   -F "model=datamata/rtdetr-l" \
   -F "confidence=0.5" \
@@ -217,7 +217,7 @@ curl -X POST http://localhost:8080/v1/infer/upload \
 
 ```bash
 # 1. Create the session
-SESSION=$(curl -s -X POST http://localhost:8080/v1/sessions \
+SESSION=$(curl -s -X POST http://localhost:8110/v1/sessions \
   -H "Authorization: Bearer your-api-key" \
   -H "Content-Type: application/json" \
   -d '{"model": "datamata/rtdetr-l", "task": "detect"}' \
@@ -234,7 +234,7 @@ SESSION_ID = "sess_xxxxxxxxxxxx"  # replace with session_id from above
 API_KEY    = "your-api-key"
 
 async def stream():
-    uri = f"ws://localhost:8080/v1/stream/{SESSION_ID}?token={API_KEY}"
+    uri = f"ws://localhost:8110/v1/stream/{SESSION_ID}?token={API_KEY}"
     async with websockets.connect(uri) as ws:
         with open("/path/to/image.jpg", "rb") as f:
             image = f.read()

diff --git a/configs/default.yaml b/configs/default.yaml
@@ -1,5 +1,5 @@
 host: "0.0.0.0"
-port: 8080
+port: 8110
 log_level: "info"
 auth_mode: "none"
 keep_alive: 600

diff --git a/docker-compose.yml b/docker-compose.yml
@@ -2,9 +2,15 @@ services:
   mataserver:
     build: .
     ports:
-      - "8080:8080"
+      - "8110:8110"
     volumes:
       - mataserver-data:/var/lib/mataserver
+    environment:
+      # Explicitly set the Linux data path so a local .env containing a
+      # Windows MATA_SERVER_DATA_DIR value cannot override it inside the
+      # container.  Docker Compose gives `environment` higher priority than
+      # `env_file`, so this always wins.
+      - MATA_SERVER_DATA_DIR=/var/lib/mataserver
     env_file:
       - .env
     deploy:

diff --git a/docs/api.md b/docs/api.md
@@ -220,7 +220,7 @@ mataserver load <MODEL_ID> [--url URL] [--api-key KEY]
 mataserver load facebook/detr-resnet-50
 
 # Explicit server address and API key
-mataserver load facebook/detr-resnet-50 --url http://192.168.1.10:8080 --api-key my-secret
+mataserver load facebook/detr-resnet-50 --url http://192.168.1.10:8110 --api-key my-secret
 
 # Using the alias
 mataserver warmup facebook/detr-resnet-50
@@ -361,7 +361,7 @@ Returns server status. **No authentication required.**
 **Example**:
 
 ```bash
-curl http://localhost:8080/v1/health
+curl http://localhost:8110/v1/health
 ```
 
 ---
@@ -414,7 +414,7 @@ Model `state` values:
 **Example**:
 
 ```bash
-curl -H "Authorization: Bearer $KEY" http://localhost:8080/v1/models
+curl -H "Authorization: Bearer $KEY" http://localhost:8110/v1/models
 ```
 
 ---
@@ -437,7 +437,7 @@ Retrieve full details for a specific model.
 
 ```bash
 curl -H "Authorization: Bearer $KEY" \
-     http://localhost:8080/v1/models/PekingU/rtdetr_v2_r101vd
+     http://localhost:8110/v1/models/PekingU/rtdetr_v2_r101vd
 ```
 
 ---
@@ -477,7 +477,7 @@ Download a model from HuggingFace into the default HuggingFace cache (`~/.cache/
 **Example**:
 
 ```bash
-curl -X POST http://localhost:8080/v1/models/pull \
+curl -X POST http://localhost:8110/v1/models/pull \
   -H "Authorization: Bearer $KEY" \
   -H "Content-Type: application/json" \
   -d '{"model": "PekingU/rtdetr_v2_r101vd", "task": "detect"}'
@@ -512,7 +512,7 @@ Pre-load a model into memory without running inference. Useful for eliminating c
 **Example**:
 
 ```bash
-curl -X POST http://localhost:8080/v1/models/warmup \
+curl -X POST http://localhost:8110/v1/models/warmup \
   -H "Authorization: Bearer $KEY" \
   -H "Content-Type: application/json" \
   -d '{"model": "datamata/rtdetr-l"}'
@@ -735,7 +735,7 @@ All fields are optional. Only send the parameters relevant to your model's task.
 
 ```bash
 IMAGE_B64=$(base64 -w0 photo.jpg)
-curl -X POST http://localhost:8080/v1/infer \
+curl -X POST http://localhost:8110/v1/infer \
   -H "Authorization: Bearer $KEY" \
   -H "Content-Type: application/json" \
   -d "{\"model\": \"datamata/rtdetr-l\", \"image\": \"$IMAGE_B64\", \"params\": {\"confidence\": 0.4}}"
@@ -780,7 +780,7 @@ Run single-shot inference with a multipart form upload. Convenient when sending
 **Example**:
 
 ```bash
-curl -X POST http://localhost:8080/v1/infer/upload \
+curl -X POST http://localhost:8110/v1/infer/upload \
   -H "Authorization: Bearer $KEY" \
   -F "model=datamata/rtdetr-l" \
   -F "confidence=0.4" \
@@ -836,7 +836,7 @@ Create a new streaming session. The server ensures the requested model is loaded
 ```json
 {
   "session_id": "sess_3f8a1b2c9d4e",
-  "ws_url": "ws://localhost:8080/v1/stream/sess_3f8a1b2c9d4e"
+  "ws_url": "ws://localhost:8110/v1/stream/sess_3f8a1b2c9d4e"
 }
 ```
 
@@ -853,7 +853,7 @@ The `ws_url` uses `wss://` when the server is reached over HTTPS.
 **Example**:
 
 ```bash
-curl -X POST http://localhost:8080/v1/sessions \
+curl -X POST http://localhost:8110/v1/sessions \
   -H "Authorization: Bearer $KEY" \
   -H "Content-Type: application/json" \
   -d '{"model": "datamata/rtdetr-l", "task": "detect", "params": {"confidence": 0.5}}'
@@ -876,7 +876,7 @@ Close and clean up a streaming session. If the WebSocket is still connected it w
 **Example**:
 
 ```bash
-curl -X DELETE http://localhost:8080/v1/sessions/sess_3f8a1b2c9d4e \
+curl -X DELETE http://localhost:8110/v1/sessions/sess_3f8a1b2c9d4e \
   -H "Authorization: Bearer $KEY"
 ```
 
@@ -907,5 +907,5 @@ WebSocket endpoint for real-time frame-by-frame inference. Clients send binary f
 **Example connection URL**:
 
 ```
-ws://localhost:8080/v1/stream/sess_3f8a1b2c9d4e?token=my-api-key
+ws://localhost:8110/v1/stream/sess_3f8a1b2c9d4e?token=my-api-key
 ```
diff --git a/docs/deployment.md b/docs/deployment.md
@@ -35,7 +35,7 @@ The Dockerfile uses a two-stage build: a `python:3.11-slim` builder stage compil
 ### Run (CPU-Only)
 
 ```bash
-docker run --rm -p 8080:8080 \
+docker run --rm -p 8110:8110 \
   -v mataserver-data:/var/lib/mataserver \
   -e MATA_SERVER_AUTH_MODE=api_key \
   -e MATA_SERVER_API_KEYS=your-secret-key \
@@ -45,7 +45,7 @@ docker run --rm -p 8080:8080 \
 ### Run (GPU)
 
 ```bash
-docker run --rm --gpus all -p 8080:8080 \
+docker run --rm --gpus all -p 8110:8110 \
   -v mataserver-data:/var/lib/mataserver \
   -e MATA_SERVER_AUTH_MODE=api_key \
   -e MATA_SERVER_API_KEYS=your-secret-key \
@@ -57,7 +57,7 @@ Pass `--gpus device=0` to restrict to a specific GPU.
 ### Verify Startup
 
 ```bash
-curl http://localhost:8080/v1/health
+curl http://localhost:8110/v1/health
 # {"status":"ok","version":"...","gpu_available":true}
 ```
 
@@ -102,7 +102,7 @@ services:
   mataserver:
     build: .
     ports:
-      - "8080:8080"
+      - "8110:8110"
     volumes:
       - mataserver-data:/var/lib/mataserver
     env_file:
@@ -137,7 +137,7 @@ services:
 
 ## 3. Reverse Proxy Setup
 
-A reverse proxy handles TLS termination, request buffering, and optional rate limiting. MATASERVER listens on HTTP -- do **not** expose port 8080 directly in production.
+A reverse proxy handles TLS termination, request buffering, and optional rate limiting. MATASERVER listens on HTTP -- do **not** expose port 8110 directly in production.
 
 ### Caddy
 
@@ -147,7 +147,7 @@ Caddy automatically obtains and renews TLS certificates via Let's Encrypt.
 
 ```caddy
 vision.example.com {
-    reverse_proxy localhost:8080 {
+    reverse_proxy localhost:8110 {
         # Forward the client IP so logs are accurate
         header_up X-Real-IP {remote_host}
         header_up X-Forwarded-For {remote_host}
@@ -170,7 +170,7 @@ caddy run --config Caddyfile
 
 ```nginx
 upstream mataserver {
-    server 127.0.0.1:8080;
+    server 127.0.0.1:8110;
     keepalive 64;
 }
 
@@ -312,7 +312,7 @@ All settings use the `MATA_SERVER_` prefix and can be provided via environment v
 | Variable                      | Default               | Description                                                            |
 | ----------------------------- | --------------------- | ---------------------------------------------------------------------- |
 | `MATA_SERVER_HOST`            | `0.0.0.0`             | Network interface to bind                                              |
-| `MATA_SERVER_PORT`            | `8080`                | TCP port for HTTP/WebSocket                                            |
+| `MATA_SERVER_PORT`            | `8110`                | TCP port for HTTP/WebSocket                                            |
 | `MATA_SERVER_LOG_LEVEL`       | `info`                | Log verbosity: `debug` \| `info` \| `warning` \| `error` \| `critical` |
 | `MATA_SERVER_AUTH_MODE`       | `api_key`             | Authentication mode: `api_key` \| `none`                               |
 | `MATA_SERVER_API_KEYS`        | _(empty)_             | Comma-separated list of valid API keys                                 |
@@ -330,7 +330,7 @@ All settings use the `MATA_SERVER_` prefix and can be provided via environment v
 
 # Bind to all interfaces (reverse proxy handles external access)
 MATA_SERVER_HOST=0.0.0.0
-MATA_SERVER_PORT=8080
+MATA_SERVER_PORT=8110
 MATA_SERVER_LOG_LEVEL=info
 
 # Always use api_key in production
@@ -394,7 +394,7 @@ services:
           "CMD",
           "python",
           "-c",
-          "import urllib.request; urllib.request.urlopen('http://localhost:8080/v1/health')",
+          "import urllib.request; urllib.request.urlopen('http://localhost:8110/v1/health')",
         ]
       interval: 30s
       timeout: 5s
@@ -465,7 +465,7 @@ docker volume inspect mataserver-data
 Use a bind mount when you need direct host access to model files:
 
 ```bash
-docker run --rm --gpus all -p 8080:8080 \
+docker run --rm --gpus all -p 8110:8110 \
   -v /data/mataserver:/var/lib/mataserver \
   -e MATA_SERVER_API_KEYS=your-key \
   mataserver