From 199ad0ee98d7cb702418ae71fff5dbb8ccc4369b Mon Sep 17 00:00:00 2001
From: Bimantoro Maesa <bimantoro@gmail.com>
Date: Fri, 6 Mar 2026 21:26:31 +0700
Subject: [PATCH] Update server port from 8080 to 8110 across configuration
 files and documentation

---
 .env.example                    |  2 +-
 Dockerfile                      | 17 ++++++++++-------
 README.md                       | 24 ++++++++++++------------
 configs/default.yaml            |  2 +-
 docker-compose.yml              |  8 +++++++-
 docs/api.md                     | 24 ++++++++++++------------
 docs/deployment.md              | 22 +++++++++++-----------
 docs/streaming.md               |  6 +++---
 examples/ws_video_infer.py      |  3 ++-
 mataserver/config.py            |  2 +-
 mataserver/models/registry.py   |  5 +++++
 tests/conftest.py               |  2 +-
 tests/test_api/test_auth.py     |  2 +-
 tests/test_api/test_health.py   |  2 +-
 tests/test_api/test_infer.py    |  2 +-
 tests/test_api/test_models.py   |  2 +-
 tests/test_api/test_sessions.py |  2 +-
 17 files changed, 71 insertions(+), 56 deletions(-)

diff --git a/.env.example b/.env.example
index 3dece3e..200cafc 100644
--- a/.env.example
+++ b/.env.example
@@ -9,7 +9,7 @@
 MATA_SERVER_HOST=0.0.0.0
 
 # TCP port the HTTP/WebSocket server listens on
-MATA_SERVER_PORT=8080
+MATA_SERVER_PORT=8110
 
 # Log level: debug | info | warning | error | critical
 MATA_SERVER_LOG_LEVEL=info
diff --git a/Dockerfile b/Dockerfile
index 46f6779..8638d6c 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -42,9 +42,11 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
     libgomp1 \
     && rm -rf /var/lib/apt/lists/*
 
-# Make python3.11 the default python / python3
+# Make python3.11 the default python / python3, and symlink into /usr/local/bin
+# so the entry-point shebang written by pip (#!/usr/local/bin/python3.11) resolves
 RUN update-alternatives --install /usr/bin/python  python  /usr/bin/python3.11 1 \
- && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1
+    && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1 \
+    && ln -sf /usr/bin/python3.11 /usr/local/bin/python3.11
 
 # Copy installed packages from builder stage
 COPY --from=builder /install /usr/local
@@ -57,16 +59,17 @@ WORKDIR /app
 
 # Pre-create data sub-directories so the volume mount point exists
 RUN mkdir -p /var/lib/mataserver/models \
-             /var/lib/mataserver/cache \
-             /var/lib/mataserver/blobs
+    /var/lib/mataserver/cache \
+    /var/lib/mataserver/blobs
 
 # Persistent data volume (model weights, cache, blobs)
 VOLUME /var/lib/mataserver
 
-EXPOSE 8080
+EXPOSE 8110
 
 ENV MATA_SERVER_HOST=0.0.0.0
-ENV MATA_SERVER_PORT=8080
+ENV MATA_SERVER_PORT=8110
 ENV MATA_SERVER_DATA_DIR=/var/lib/mataserver
+ENV PYTHONPATH=/usr/local/lib/python3.11/site-packages
 
-ENTRYPOINT ["mataserver"]
+ENTRYPOINT ["mataserver", "serve"]
diff --git a/README.md b/README.md
index 812b871..ce4d1a8 100644
--- a/README.md
+++ b/README.md
@@ -51,7 +51,7 @@ cp .env.example .env
 mataserver serve
 ```
 
-The server starts on `http://0.0.0.0:8080`. Visit `http://localhost:8080/docs` for the interactive API explorer.
+The server starts on `http://0.0.0.0:8110`. Visit `http://localhost:8110/docs` for the interactive API explorer.
 
 ### Docker
 
@@ -60,13 +60,13 @@ The server starts on `http://0.0.0.0:8080`. Visit `http://localhost:8080/docs` f
 docker pull ghcr.io/datamata-io/mataserver:latest
 
 # Run (CPU-only)
-docker run -p 8080:8080 \
+docker run -p 8110:8110 \
   -e MATA_SERVER_AUTH_MODE=none \
   -v mataserver-data:/var/lib/mataserver \
   ghcr.io/datamata-io/mataserver:latest
 
 # Run with GPU (requires NVIDIA Container Toolkit)
-docker run --gpus all -p 8080:8080 \
+docker run --gpus all -p 8110:8110 \
   -e MATA_SERVER_AUTH_MODE=none \
   -v mataserver-data:/var/lib/mataserver \
   ghcr.io/datamata-io/mataserver:latest
@@ -83,7 +83,7 @@ docker compose up -d
 Verify the server is running:
 
 ```bash
-curl http://localhost:8080/v1/health
+curl http://localhost:8110/v1/health
 # {"status":"ok","version":"0.1.0","gpu_available":false}
 ```
 
@@ -115,7 +115,7 @@ All settings use the `MATA_SERVER_` environment variable prefix and can also be
 | Variable                      | Default               | Description                                                                |
 | ----------------------------- | --------------------- | -------------------------------------------------------------------------- |
 | `MATA_SERVER_HOST`            | `0.0.0.0`             | Bind address                                                               |
-| `MATA_SERVER_PORT`            | `8080`                | Bind port                                                                  |
+| `MATA_SERVER_PORT`            | `8110`                | Bind port                                                                  |
 | `MATA_SERVER_LOG_LEVEL`       | `info`                | Logging level (`debug`, `info`, `warning`, `error`)                        |
 | `MATA_SERVER_AUTH_MODE`       | `api_key`             | Auth mode: `api_key` (enforce bearer tokens) or `none` (open, dev only)    |
 | `MATA_SERVER_API_KEYS`        | _(empty)_             | Comma-separated list of valid API keys (required when `auth_mode=api_key`) |
@@ -134,7 +134,7 @@ See `.env.example` for a fully annotated template with production-recommended va
 
 ## API Endpoints
 
-Interactive docs are served at `http://localhost:8080/docs` (Swagger UI) and `http://localhost:8080/redoc`.
+Interactive docs are served at `http://localhost:8110/docs` (Swagger UI) and `http://localhost:8110/redoc`.
 
 | Method      | Path                        | Auth     | Description                                       |
 | ----------- | --------------------------- | -------- | ------------------------------------------------- |
@@ -160,7 +160,7 @@ For full request/response schemas, per-endpoint error codes, and additional `cur
 ### Health check
 
 ```bash
-curl http://localhost:8080/v1/health
+curl http://localhost:8110/v1/health
 ```
 
 ```json
@@ -170,7 +170,7 @@ curl http://localhost:8080/v1/health
 ### Pull a model from HuggingFace
 
 ```bash
-curl -X POST http://localhost:8080/v1/models/pull \
+curl -X POST http://localhost:8110/v1/models/pull \
   -H "Authorization: Bearer your-api-key" \
   -H "Content-Type: application/json" \
   -d '{"source": "hf://datamata/rtdetr-l"}'
@@ -185,7 +185,7 @@ curl -X POST http://localhost:8080/v1/models/pull \
 ```bash
 IMAGE_B64=$(base64 -w0 /path/to/image.jpg)
 
-curl -X POST http://localhost:8080/v1/infer \
+curl -X POST http://localhost:8110/v1/infer \
   -H "Authorization: Bearer your-api-key" \
   -H "Content-Type: application/json" \
   -d "{\"model\": \"datamata/rtdetr-l\", \"image\": \"${IMAGE_B64}\", \"confidence\": 0.5}"
@@ -206,7 +206,7 @@ curl -X POST http://localhost:8080/v1/infer \
 ### Single-shot inference (file upload)
 
 ```bash
-curl -X POST http://localhost:8080/v1/infer/upload \
+curl -X POST http://localhost:8110/v1/infer/upload \
   -H "Authorization: Bearer your-api-key" \
   -F "model=datamata/rtdetr-l" \
   -F "confidence=0.5" \
@@ -217,7 +217,7 @@ curl -X POST http://localhost:8080/v1/infer/upload \
 
 ```bash
 # 1. Create the session
-SESSION=$(curl -s -X POST http://localhost:8080/v1/sessions \
+SESSION=$(curl -s -X POST http://localhost:8110/v1/sessions \
   -H "Authorization: Bearer your-api-key" \
   -H "Content-Type: application/json" \
   -d '{"model": "datamata/rtdetr-l", "task": "detect"}' \
@@ -234,7 +234,7 @@ SESSION_ID = "sess_xxxxxxxxxxxx"  # replace with session_id from above
 API_KEY    = "your-api-key"
 
 async def stream():
-    uri = f"ws://localhost:8080/v1/stream/{SESSION_ID}?token={API_KEY}"
+    uri = f"ws://localhost:8110/v1/stream/{SESSION_ID}?token={API_KEY}"
     async with websockets.connect(uri) as ws:
         with open("/path/to/image.jpg", "rb") as f:
             image = f.read()
diff --git a/configs/default.yaml b/configs/default.yaml
index 751de41..4bf5479 100644
--- a/configs/default.yaml
+++ b/configs/default.yaml
@@ -1,5 +1,5 @@
 host: "0.0.0.0"
-port: 8080
+port: 8110
 log_level: "info"
 auth_mode: "none"
 keep_alive: 600
diff --git a/docker-compose.yml b/docker-compose.yml
index 3a44730..ae6b5c4 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -2,9 +2,15 @@ services:
   mataserver:
     build: .
     ports:
-      - "8080:8080"
+      - "8110:8110"
     volumes:
       - mataserver-data:/var/lib/mataserver
+    environment:
+      # Explicitly set the Linux data path so a local .env containing a
+      # Windows MATA_SERVER_DATA_DIR value cannot override it inside the
+      # container.  Docker Compose gives `environment` higher priority than
+      # `env_file`, so this always wins.
+      - MATA_SERVER_DATA_DIR=/var/lib/mataserver
     env_file:
       - .env
     deploy:
diff --git a/docs/api.md b/docs/api.md
index bb20d6c..404f4bf 100644
--- a/docs/api.md
+++ b/docs/api.md
@@ -220,7 +220,7 @@ mataserver load <MODEL_ID> [--url URL] [--api-key KEY]
 mataserver load facebook/detr-resnet-50
 
 # Explicit server address and API key
-mataserver load facebook/detr-resnet-50 --url http://192.168.1.10:8080 --api-key my-secret
+mataserver load facebook/detr-resnet-50 --url http://192.168.1.10:8110 --api-key my-secret
 
 # Using the alias
 mataserver warmup facebook/detr-resnet-50
@@ -361,7 +361,7 @@ Returns server status. **No authentication required.**
 **Example**:
 
 ```bash
-curl http://localhost:8080/v1/health
+curl http://localhost:8110/v1/health
 ```
 
 ---
@@ -414,7 +414,7 @@ Model `state` values:
 **Example**:
 
 ```bash
-curl -H "Authorization: Bearer $KEY" http://localhost:8080/v1/models
+curl -H "Authorization: Bearer $KEY" http://localhost:8110/v1/models
 ```
 
 ---
@@ -437,7 +437,7 @@ Retrieve full details for a specific model.
 
 ```bash
 curl -H "Authorization: Bearer $KEY" \
-     http://localhost:8080/v1/models/PekingU/rtdetr_v2_r101vd
+     http://localhost:8110/v1/models/PekingU/rtdetr_v2_r101vd
 ```
 
 ---
@@ -477,7 +477,7 @@ Download a model from HuggingFace into the default HuggingFace cache (`~/.cache/
 **Example**:
 
 ```bash
-curl -X POST http://localhost:8080/v1/models/pull \
+curl -X POST http://localhost:8110/v1/models/pull \
   -H "Authorization: Bearer $KEY" \
   -H "Content-Type: application/json" \
   -d '{"model": "PekingU/rtdetr_v2_r101vd", "task": "detect"}'
@@ -512,7 +512,7 @@ Pre-load a model into memory without running inference. Useful for eliminating c
 **Example**:
 
 ```bash
-curl -X POST http://localhost:8080/v1/models/warmup \
+curl -X POST http://localhost:8110/v1/models/warmup \
   -H "Authorization: Bearer $KEY" \
   -H "Content-Type: application/json" \
   -d '{"model": "datamata/rtdetr-l"}'
@@ -735,7 +735,7 @@ All fields are optional. Only send the parameters relevant to your model's task.
 
 ```bash
 IMAGE_B64=$(base64 -w0 photo.jpg)
-curl -X POST http://localhost:8080/v1/infer \
+curl -X POST http://localhost:8110/v1/infer \
   -H "Authorization: Bearer $KEY" \
   -H "Content-Type: application/json" \
   -d "{\"model\": \"datamata/rtdetr-l\", \"image\": \"$IMAGE_B64\", \"params\": {\"confidence\": 0.4}}"
@@ -780,7 +780,7 @@ Run single-shot inference with a multipart form upload. Convenient when sending
 **Example**:
 
 ```bash
-curl -X POST http://localhost:8080/v1/infer/upload \
+curl -X POST http://localhost:8110/v1/infer/upload \
   -H "Authorization: Bearer $KEY" \
   -F "model=datamata/rtdetr-l" \
   -F "confidence=0.4" \
@@ -836,7 +836,7 @@ Create a new streaming session. The server ensures the requested model is loaded
 ```json
 {
   "session_id": "sess_3f8a1b2c9d4e",
-  "ws_url": "ws://localhost:8080/v1/stream/sess_3f8a1b2c9d4e"
+  "ws_url": "ws://localhost:8110/v1/stream/sess_3f8a1b2c9d4e"
 }
 ```
 
@@ -853,7 +853,7 @@ The `ws_url` uses `wss://` when the server is reached over HTTPS.
 **Example**:
 
 ```bash
-curl -X POST http://localhost:8080/v1/sessions \
+curl -X POST http://localhost:8110/v1/sessions \
   -H "Authorization: Bearer $KEY" \
   -H "Content-Type: application/json" \
   -d '{"model": "datamata/rtdetr-l", "task": "detect", "params": {"confidence": 0.5}}'
@@ -876,7 +876,7 @@ Close and clean up a streaming session. If the WebSocket is still connected it w
 **Example**:
 
 ```bash
-curl -X DELETE http://localhost:8080/v1/sessions/sess_3f8a1b2c9d4e \
+curl -X DELETE http://localhost:8110/v1/sessions/sess_3f8a1b2c9d4e \
   -H "Authorization: Bearer $KEY"
 ```
 
@@ -907,5 +907,5 @@ WebSocket endpoint for real-time frame-by-frame inference. Clients send binary f
 **Example connection URL**:
 
 ```
-ws://localhost:8080/v1/stream/sess_3f8a1b2c9d4e?token=my-api-key
+ws://localhost:8110/v1/stream/sess_3f8a1b2c9d4e?token=my-api-key
 ```
diff --git a/docs/deployment.md b/docs/deployment.md
index 9e20d85..0d50712 100644
--- a/docs/deployment.md
+++ b/docs/deployment.md
@@ -35,7 +35,7 @@ The Dockerfile uses a two-stage build: a `python:3.11-slim` builder stage compil
 ### Run (CPU-Only)
 
 ```bash
-docker run --rm -p 8080:8080 \
+docker run --rm -p 8110:8110 \
   -v mataserver-data:/var/lib/mataserver \
   -e MATA_SERVER_AUTH_MODE=api_key \
   -e MATA_SERVER_API_KEYS=your-secret-key \
@@ -45,7 +45,7 @@ docker run --rm -p 8080:8080 \
 ### Run (GPU)
 
 ```bash
-docker run --rm --gpus all -p 8080:8080 \
+docker run --rm --gpus all -p 8110:8110 \
   -v mataserver-data:/var/lib/mataserver \
   -e MATA_SERVER_AUTH_MODE=api_key \
   -e MATA_SERVER_API_KEYS=your-secret-key \
@@ -57,7 +57,7 @@ Pass `--gpus device=0` to restrict to a specific GPU.
 ### Verify Startup
 
 ```bash
-curl http://localhost:8080/v1/health
+curl http://localhost:8110/v1/health
 # {"status":"ok","version":"...","gpu_available":true}
 ```
 
@@ -102,7 +102,7 @@ services:
   mataserver:
     build: .
     ports:
-      - "8080:8080"
+      - "8110:8110"
     volumes:
       - mataserver-data:/var/lib/mataserver
     env_file:
@@ -137,7 +137,7 @@ services:
 
 ## 3. Reverse Proxy Setup
 
-A reverse proxy handles TLS termination, request buffering, and optional rate limiting. MATASERVER listens on HTTP -- do **not** expose port 8080 directly in production.
+A reverse proxy handles TLS termination, request buffering, and optional rate limiting. MATASERVER listens on HTTP -- do **not** expose port 8110 directly in production.
 
 ### Caddy
 
@@ -147,7 +147,7 @@ Caddy automatically obtains and renews TLS certificates via Let's Encrypt.
 
 ```caddy
 vision.example.com {
-    reverse_proxy localhost:8080 {
+    reverse_proxy localhost:8110 {
         # Forward the client IP so logs are accurate
         header_up X-Real-IP {remote_host}
         header_up X-Forwarded-For {remote_host}
@@ -170,7 +170,7 @@ caddy run --config Caddyfile
 
 ```nginx
 upstream mataserver {
-    server 127.0.0.1:8080;
+    server 127.0.0.1:8110;
     keepalive 64;
 }
 
@@ -312,7 +312,7 @@ All settings use the `MATA_SERVER_` prefix and can be provided via environment v
 | Variable                      | Default               | Description                                                            |
 | ----------------------------- | --------------------- | ---------------------------------------------------------------------- |
 | `MATA_SERVER_HOST`            | `0.0.0.0`             | Network interface to bind                                              |
-| `MATA_SERVER_PORT`            | `8080`                | TCP port for HTTP/WebSocket                                            |
+| `MATA_SERVER_PORT`            | `8110`                | TCP port for HTTP/WebSocket                                            |
 | `MATA_SERVER_LOG_LEVEL`       | `info`                | Log verbosity: `debug` \| `info` \| `warning` \| `error` \| `critical` |
 | `MATA_SERVER_AUTH_MODE`       | `api_key`             | Authentication mode: `api_key` \| `none`                               |
 | `MATA_SERVER_API_KEYS`        | _(empty)_             | Comma-separated list of valid API keys                                 |
@@ -330,7 +330,7 @@ All settings use the `MATA_SERVER_` prefix and can be provided via environment v
 
 # Bind to all interfaces (reverse proxy handles external access)
 MATA_SERVER_HOST=0.0.0.0
-MATA_SERVER_PORT=8080
+MATA_SERVER_PORT=8110
 MATA_SERVER_LOG_LEVEL=info
 
 # Always use api_key in production
@@ -394,7 +394,7 @@ services:
           "CMD",
           "python",
           "-c",
-          "import urllib.request; urllib.request.urlopen('http://localhost:8080/v1/health')",
+          "import urllib.request; urllib.request.urlopen('http://localhost:8110/v1/health')",
         ]
       interval: 30s
       timeout: 5s
@@ -465,7 +465,7 @@ docker volume inspect mataserver-data
 Use a bind mount when you need direct host access to model files:
 
 ```bash
-docker run --rm --gpus all -p 8080:8080 \
+docker run --rm --gpus all -p 8110:8110 \
   -v /data/mataserver:/var/lib/mataserver \
   -e MATA_SERVER_API_KEYS=your-key \
   mataserver
diff --git a/docs/streaming.md b/docs/streaming.md
index ca8a73e..b38f419 100644
--- a/docs/streaming.md
+++ b/docs/streaming.md
@@ -49,7 +49,7 @@ Client                                  Server
    ```json
    {
      "session_id": "sess_a1b2c3d4e5f6",
-     "ws_url": "ws://localhost:8080/v1/stream/sess_a1b2c3d4e5f6",
+     "ws_url": "ws://localhost:8110/v1/stream/sess_a1b2c3d4e5f6",
      "frame_policy": "latest"
    }
    ```
@@ -255,7 +255,7 @@ global concurrency limit enforced by the server's internal scheduler.
 import asyncio, struct, time
 import websockets
 
-SERVER = "ws://localhost:8080"
+SERVER = "ws://localhost:8110"
 API_KEY = "my-api-key"
 MODEL = "datamata/rtdetr-l"
 
@@ -271,7 +271,7 @@ async def stream_inference(image_path: str):
     # 1. Create session
     async with httpx.AsyncClient() as http:
         resp = await http.post(
-            f"http://localhost:8080/v1/sessions",
+            f"http://localhost:8110/v1/sessions",
             headers={"Authorization": f"Bearer {API_KEY}"},
             json={"model": MODEL, "task": "detect", "params": {"confidence": 0.4}},
         )
diff --git a/examples/ws_video_infer.py b/examples/ws_video_infer.py
index 130876f..77970d6 100644
--- a/examples/ws_video_infer.py
+++ b/examples/ws_video_infer.py
@@ -143,6 +143,8 @@ async def receive_loop() -> None:
                 if sleep_for > 0:
                     await asyncio.sleep(sleep_for)
 
+            elapsed = time.perf_counter() - t0
+
             # Wait for in-flight results
             print(f"  Sent {sent} frames. Waiting for results…")
             await asyncio.sleep(max(2.0, sent * 0.05))
@@ -153,7 +155,6 @@ async def receive_loop() -> None:
             except asyncio.CancelledError:
                 pass
 
-        elapsed = time.perf_counter() - t0
         print(f"\n  Sent    : {sent} frames in {elapsed:.2f}s ({sent/elapsed:.1f} fps)")
         print(f"  Received: {received} results | {drops} dropped | {errors} errors")
     finally:
diff --git a/mataserver/config.py b/mataserver/config.py
index 040683e..585c8e5 100644
--- a/mataserver/config.py
+++ b/mataserver/config.py
@@ -19,7 +19,7 @@ class Settings(BaseSettings):
 
     # Server
     host: str = "0.0.0.0"
-    port: int = 8080
+    port: int = 8110
     log_level: str = "info"
 
     # Auth
diff --git a/mataserver/models/registry.py b/mataserver/models/registry.py
index 000b874..e248c3e 100644
--- a/mataserver/models/registry.py
+++ b/mataserver/models/registry.py
@@ -79,8 +79,13 @@ async def list_models(self) -> list[dict[str, Any]]:
         Each entry is a dict with at minimum ``model`` and ``task``.  When the
         model is present in the local HF cache the dict also includes
         ``size_mb`` and ``last_accessed``.
+
+        The registry is re-read from disk on every call so that models added
+        via the CLI (``mataserver pull``) are visible without restarting the
+        server.
         """
         async with self._lock:
+            self._models = self._load_from_disk()
             models_copy = dict(self._models)
 
         cache_map = self._get_hf_cache_map()
diff --git a/tests/conftest.py b/tests/conftest.py
index c29ad6c..5dc3bfd 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -18,7 +18,7 @@ def test_settings(tmp_path):
         auth_mode="none",
         data_dir=tmp_path / "data",
         host="127.0.0.1",
-        port=8080,
+        port=8110,
         log_level="debug",
     )
 
diff --git a/tests/test_api/test_auth.py b/tests/test_api/test_auth.py
index aee543c..2190c69 100644
--- a/tests/test_api/test_auth.py
+++ b/tests/test_api/test_auth.py
@@ -159,7 +159,7 @@ def test_real_health_endpoint_via_app_factory(self) -> None:
 
         settings = Settings.model_construct(
             host="0.0.0.0",
-            port=8080,
+            port=8110,
             log_level="info",
             auth_mode="api_key",
             api_keys=[_VALID_KEY],
diff --git a/tests/test_api/test_health.py b/tests/test_api/test_health.py
index 650ee5f..9bdd1e4 100644
--- a/tests/test_api/test_health.py
+++ b/tests/test_api/test_health.py
@@ -16,7 +16,7 @@ def _make_settings(auth_mode: str = "none", api_keys: list[str] | None = None) -
         auth_mode=auth_mode,
         api_keys=api_keys or [],
         host="127.0.0.1",
-        port=8080,
+        port=8110,
         log_level="debug",
         keep_alive=600,
         max_vram_util=0.85,
diff --git a/tests/test_api/test_infer.py b/tests/test_api/test_infer.py
index b1e6430..d10189d 100644
--- a/tests/test_api/test_infer.py
+++ b/tests/test_api/test_infer.py
@@ -24,7 +24,7 @@ def _make_settings(auth_mode: str = "none", api_keys: list[str] | None = None) -
         auth_mode=auth_mode,
         api_keys=api_keys or [],
         host="127.0.0.1",
-        port=8080,
+        port=8110,
         log_level="debug",
         keep_alive=600,
         max_vram_util=0.85,
diff --git a/tests/test_api/test_models.py b/tests/test_api/test_models.py
index d133b3b..6721403 100644
--- a/tests/test_api/test_models.py
+++ b/tests/test_api/test_models.py
@@ -21,7 +21,7 @@ def _make_settings(auth_mode: str = "none", api_keys: list[str] | None = None) -
         auth_mode=auth_mode,
         api_keys=api_keys or [],
         host="127.0.0.1",
-        port=8080,
+        port=8110,
         log_level="debug",
         keep_alive=600,
         max_vram_util=0.85,
diff --git a/tests/test_api/test_sessions.py b/tests/test_api/test_sessions.py
index 2844955..0182564 100644
--- a/tests/test_api/test_sessions.py
+++ b/tests/test_api/test_sessions.py
@@ -22,7 +22,7 @@ def _make_settings(auth_mode: str = "none", api_keys: list[str] | None = None) -
         auth_mode=auth_mode,
         api_keys=api_keys or [],
         host="127.0.0.1",
-        port=8080,
+        port=8110,
         log_level="debug",
         keep_alive=600,
         max_vram_util=0.85,