diff --git a/.gitignore b/.gitignore
index 988f840d..c6843c8e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
 *.bk
 __pycache__
 .DS_Store
+models
diff --git a/README.md b/README.md
index d4d9c48e..9556bfcd 100644
--- a/README.md
+++ b/README.md
@@ -4,6 +4,12 @@
 
 **A Gemini 2.5 Flash Level MLLM for Vision, Speech, and Full-Duplex Multimodal Live Streaming on Your Phone**
 
+</div>
+
+<sub>💡 **Tip**: If you find this repository's structure or content difficult to understand, visit [deepwiki](https://deepwiki.com/LujiaJin/MiniCPM-o) for a comprehensive detailed explanation.</sub>
+
+<div align="center">
+
   <strong>[中文](./README_zh.md) |
   English</strong>
 
diff --git a/README_zh.md b/README_zh.md
index f9a8d1c1..255631a4 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -4,6 +4,12 @@
 
 **端侧可用的 Gemini 2.5 Flash 级视觉、语音、全双工多模态实时流式大模型**
 
+</div>
+
+<sub>💡 **提示**: 如果你觉得这个仓库的结构或内容难以理解，请访问 [deepwiki](https://deepwiki.com/LujiaJin/MiniCPM-o) 获取详细的解读。</sub>
+
+<div align="center">
+
   <strong>中文 |
   [English](./README.md)</strong>
 
diff --git a/deploy/DEPLOY_WSL2_TO_H100_EN.md b/deploy/DEPLOY_WSL2_TO_H100_EN.md
new file mode 100644
index 00000000..ba15d8d4
--- /dev/null
+++ b/deploy/DEPLOY_WSL2_TO_H100_EN.md
@@ -0,0 +1,828 @@
+# MiniCPM-o 4.5 Offline Deployment Guide (Build Image in WSL2 → Upload to Intranet H100 Server → Local + Mobile Access)
+
+> Goal: Build a Docker image on your local Windows PC with WSL2, transfer the image and model to a company H100 server with no public internet access, start the service, and test full-duplex video calling in a local browser and on an Android phone.
+
+**Your Environment Quick Reference:**
+
+| Item | Value |
+| --- | --- |
+| Server SSH | `ssh -p $SSH_PORT $SSH_USER@$SSH_HOST` (port may change dynamically) |
+| GPU | NVIDIA H100 (driver 550.90.12) |
+| CUDA | 12.4 (fully matches the Dockerfile base image `cuda:12.4.1`) |
+| Local | Win10 + WSL2 Ubuntu |
+
+**Set SSH variables before each run (only change here):**
+
+```bash
+export SSH_HOST=127.0.0.1
+export SSH_PORT=54062
+export SSH_USER=your_user
+```
+
+PowerShell equivalent (use directly in Windows Terminal):
+
+```powershell
+$env:SSH_HOST = "127.0.0.1"
+$env:SSH_HOST = "<YOUR_HOST>"
+$env:SSH_PORT = "<YOUR_PORT>"
+$env:SSH_USER = "<YOUR_USER>"
+
+## PowerShell Daily Three-Command Quick Reference (Recommended)
+
+```powershell
+Set-MiniCPMSSH -Port "<YOUR_PORT>" -User "<YOUR_USER>"
+# 1) Update SSH parameters when port changes
+Set-MiniCPMSSH -Port "54062" -User "your_user"
+
+# 2) Start mobile mode (open tunnel + print accessible URL)
+Set-MiniCPMSSH -Port "<YOUR_PORT>" -User "<YOUR_USER>"
+Start-MiniCPMMobile
+
+# 3) Stop tunnel
+Stop-MiniCPMMobile
+scp -P $env:SSH_PORT .\file.tar.gz "$env:SSH_USER@$env:SSH_HOST:<YOUR_PATH>/deploy_pkg/"
+
+Quick recovery after port change:
+
+    [string]$Host = "<YOUR_HOST>",
+    [string]$User = "<YOUR_USER>"
+Restart-MiniCPMMobile
+```
+
+  $env:SSH_HOST = $Host
+  $env:SSH_PORT = $Port
+  $env:SSH_USER = $User
+ssh -p $env:SSH_PORT "$env:SSH_USER@$env:SSH_HOST"
+scp -P $env:SSH_PORT .\file.tar.gz "$env:SSH_USER@$env:SSH_HOST:/data/minicpmo/deploy_pkg/"
+```
+  Write-Host "[MiniCPM SSH] HOST=$env:SSH_HOST PORT=$env:SSH_PORT USER=$env:SSH_USER"
+Optional: Define a one-click function (only change the port going forward)
+
+```powershell
+function Set-MiniCPMSSH {
+Set-MiniCPMSSH -Port "<YOUR_PORT>" -User "<YOUR_USER>"
+  param(
+    [Parameter(Mandatory = $true)]
+    [string]$Port,
+    [string]$Host = "127.0.0.1",
+Open-MiniCPMTunnel -Mode local
+    [string]$User = "your_user"
+  )
+
+  $env:SSH_HOST = $Host
+Open-MiniCPMTunnel -Mode mobile
+  $env:SSH_PORT = $Port
+  $env:SSH_USER = $User
+
+  Write-Host "[MiniCPM SSH] HOST=$env:SSH_HOST PORT=$env:SSH_PORT USER=$env:SSH_USER"
+ssh -N -p $env:SSH_PORT `
+      -L 3000:127.0.0.1:3000 `
+      -L 3443:127.0.0.1:3443 `
+      -L 32550:127.0.0.1:32550 `
+      "$env:SSH_USER@$env:SSH_HOST"
+
+```powershell
+Set-MiniCPMSSH -Port "54062" -User "your_user"
+ssh -p $env:SSH_PORT "$env:SSH_USER@$env:SSH_HOST"
+      -L 0.0.0.0:3443:127.0.0.1:3443 `
+      "$env:SSH_USER@$env:SSH_HOST"
+Optional: Define a one-click tunnel function (local / mobile modes)
+
+```powershell
+function Open-MiniCPMTunnel {
+cd <YOUR_PATH>/MiniCPM-o
+  param(
+    [ValidateSet("local", "mobile")]
+    [string]$Mode = "local"
+  )
+mkdir -p <YOUR_PATH>/MiniCPM-o/models
+
+  if (-not $env:SSH_HOST -or -not $env:SSH_PORT -or -not $env:SSH_USER) {
+    throw "Please run Set-MiniCPMSSH first to set SSH_HOST/SSH_PORT/SSH_USER"
+  }
+
+du -sh <YOUR_PATH>/MiniCPM-o/models/MiniCPM-o-4_5
+ls -lh <YOUR_PATH>/MiniCPM-o/models/MiniCPM-o-4_5 | head
+  if ($Mode -eq "local") {
+    ssh -N -p $env:SSH_PORT `
+      -L 3000:127.0.0.1:3000 `
+      -L 3443:127.0.0.1:3443 `
+cd <YOUR_PATH>/MiniCPM-o
+      -L 32550:127.0.0.1:32550 `
+      "$env:SSH_USER@$env:SSH_HOST"
+  }
+  else {
+mkdir -p <YOUR_PATH>/deploy_pkg
+    ssh -N -p $env:SSH_PORT `
+      -L 0.0.0.0:3443:127.0.0.1:3443 `
+      "$env:SSH_USER@$env:SSH_HOST"
+  }
+}
+docker save -o <YOUR_PATH>/deploy_pkg/minicpmo-backend_latest.tar minicpmo-backend:latest
+docker save -o <YOUR_PATH>/deploy_pkg/minicpmo-frontend_latest.tar minicpmo-frontend:latest
+```
+
+Usage example:
+
+```powershell
+cp deploy/docker-compose.yml <YOUR_PATH>/deploy_pkg/
+cp deploy/nginx.docker.conf <YOUR_PATH>/deploy_pkg/
+# 1) Set dynamic SSH parameters
+Set-MiniCPMSSH -Port "54062" -User "your_user"
+
+# 2) Local access only (open http://127.0.0.1:3000 in browser)
+cd <YOUR_PATH>/deploy_pkg
+Open-MiniCPMTunnel -Mode local
+
+# 3) Mobile access (same WiFi, use https://laptop_lan_ip:3443)
+Open-MiniCPMTunnel -Mode mobile
+```
+gzip -1 minicpmo-backend_latest.tar
+gzip -1 minicpmo-frontend_latest.tar
+
+Optional: Auto-print mobile access URL
+
+```powershell
+function Get-MiniCPMLanUrl {
+cd <YOUR_PATH>/MiniCPM-o
+bash deploy/gen_ssl_cert.sh <YOUR_PATH>/deploy_pkg/certs
+  param(
+    [int]$Port = 3443
+  )
+
+This will generate `server.crt` and `server.key` under `<YOUR_PATH>/deploy_pkg/certs/`.
+  $ipv4List = Get-NetIPAddress -AddressFamily IPv4 |
+    Where-Object {
+      $_.IPAddress -notlike '127.*' -and
+      $_.IPAddress -notlike '169.254.*' -and
+ssh -p $SSH_PORT $SSH_USER@$SSH_HOST "mkdir -p <YOUR_PATH>/deploy_pkg"
+      $_.PrefixOrigin -ne 'WellKnown'
+    } |
+    Sort-Object -Property InterfaceMetric
+
+scp -P $SSH_PORT -o ServerAliveInterval=60 \
+    <YOUR_PATH>/deploy_pkg/minicpmo-backend_latest.tar.gz \
+    <YOUR_PATH>/deploy_pkg/minicpmo-frontend_latest.tar.gz \
+    <YOUR_PATH>/deploy_pkg/docker-compose.yml \
+    <YOUR_PATH>/deploy_pkg/nginx.docker.conf \
+  $SSH_USER@$SSH_HOST:<YOUR_PATH>/deploy_pkg/
+  $url = "https://$ip`:$Port"
+
+  Write-Host "[MiniCPM LAN URL] $url"
+  return $url
+ssh -p $SSH_PORT $SSH_USER@$SSH_HOST "mkdir -p <YOUR_PATH>/models"
+}
+```
+
+Usage example:
+scp -P $SSH_PORT -r -o ServerAliveInterval=60 \
+    <YOUR_PATH>/MiniCPM-o/models/MiniCPM-o-4_5 \
+  $SSH_USER@$SSH_HOST:<YOUR_PATH>/models/
+# Start mobile mode tunnel first (run in another terminal window)
+Open-MiniCPMTunnel -Mode mobile
+
+# Print mobile access URL in the current window
+  $SSH_USER@$SSH_HOST:<YOUR_PATH>/deploy_pkg/
+```
+
+Optional: One-click mobile mode startup (open tunnel + check port + print URL)
+
+```powershell
+function Start-MiniCPMMobile {
+export MODEL_PATH=<YOUR_PATH>/models/MiniCPM-o-4_5
+export CERTS_PATH=./certs
+export BACKEND_PORT=32550
+  param(
+    [int]$Port = 3443
+  )
+
+  -v ${MODEL_PATH}:/models/MiniCPM-o-4_5:ro \
+  if (-not $env:SSH_HOST -or -not $env:SSH_PORT -or -not $env:SSH_USER) {
+    throw "Please run Set-MiniCPMSSH first to set SSH_HOST/SSH_PORT/SSH_USER"
+  }
+
+  -v ${CERTS_PATH}:/etc/nginx/certs:ro \
+  $sshCmd = "ssh -N -p $env:SSH_PORT -L 0.0.0.0:$Port`:127.0.0.1:$Port $env:SSH_USER@$env:SSH_HOST"
+
+  # Open tunnel in a new window to avoid blocking the current terminal
+  $proc = Start-Process powershell -ArgumentList "-NoExit", "-Command", $sshCmd -PassThru
+  -v ${MODEL_PATH}:/models/MiniCPM-o-4_5:ro \
+  $env:MINICPM_MOBILE_SSH_PID = [string]$proc.Id
+  $env:MINICPM_MOBILE_PORT = [string]$Port
+  Start-Sleep -Seconds 2
+
+  -v ${CERTS_PATH}:/etc/nginx/certs:ro \
+  $listener = Get-NetTCPConnection -LocalPort $Port -State Listen -ErrorAction SilentlyContinue
+  if (-not $listener) {
+    Write-Warning "No listener detected on local port $Port. Please check whether SSH connected successfully."
+    return
+cd <YOUR_PATH>/deploy_pkg
+  }
+
+  $url = Get-MiniCPMLanUrl -Port $Port
+  Write-Host "[MiniCPM Mobile PID] $env:MINICPM_MOBILE_SSH_PID"
+  Write-Host "[MiniCPM Mobile Ready] Open on mobile browser: $url"
+}
+mkdir -p <YOUR_PATH>/runtime/certs
+cp docker-compose.yml <YOUR_PATH>/runtime/
+cp certs/server.* <YOUR_PATH>/runtime/certs/
+
+function Stop-MiniCPMMobile {
+  $pidText = $env:MINICPM_MOBILE_SSH_PID
+
+cd <YOUR_PATH>/runtime
+  if ($pidText) {
+    $pidValue = [int]$pidText
+    $proc = Get-Process -Id $pidValue -ErrorAction SilentlyContinue
+    if ($proc) {
+      Stop-Process -Id $pidValue -Force
+      Write-Host "[MiniCPM Mobile Stopped] Tunnel process stopped PID=$pidValue"
+export MODEL_PATH=<YOUR_PATH>/models/MiniCPM-o-4_5
+export CERTS_PATH=./certs
+export BACKEND_PORT=32550
+      Remove-Item Env:MINICPM_MOBILE_SSH_PID -ErrorAction SilentlyContinue
+      Remove-Item Env:MINICPM_MOBILE_PORT -ErrorAction SilentlyContinue
+      return
+    }
+ssh -N -p $SSH_PORT -L 3000:127.0.0.1:3000 -L 3443:127.0.0.1:3443 -L 32550:127.0.0.1:32550 $SSH_USER@$SSH_HOST
+  }
+
+  $port = if ($env:MINICPM_MOBILE_PORT) { [int]$env:MINICPM_MOBILE_PORT } else { 3443 }
+  $listeners = Get-NetTCPConnection -LocalPort $port -State Listen -ErrorAction SilentlyContinue
+ssh -N -p $env:SSH_PORT -L 3000:127.0.0.1:3000 -L 3443:127.0.0.1:3443 -L 32550:127.0.0.1:32550 "$env:SSH_USER@$env:SSH_HOST"
+  if (-not $listeners) {
+    Write-Host "[MiniCPM Mobile] No listener detected on port $port. Nothing to stop."
+    return
+  }
+ssh -N -p $SSH_PORT -L 0.0.0.0:3443:127.0.0.1:3443 $SSH_USER@$SSH_HOST
+
+  foreach ($item in $listeners) {
+    if ($item.OwningProcess -gt 0) {
+      try {
+Open on phone browser: `https://<YOUR_LAPTOP_LAN_IP>:3443`
+        Stop-Process -Id $item.OwningProcess -Force -ErrorAction Stop
+        Write-Host "[MiniCPM Mobile Stopped] Stopped process listening on port $port PID=$($item.OwningProcess)"
+      }
+      catch {
+        Write-Warning "Failed to stop PID=$($item.OwningProcess): $($_.Exception.Message)"
+      }
+    }
+  }
+
+  Remove-Item Env:MINICPM_MOBILE_SSH_PID -ErrorAction SilentlyContinue
+  Remove-Item Env:MINICPM_MOBILE_PORT -ErrorAction SilentlyContinue
+}
+
+function Restart-MiniCPMMobile {
+  param(
+    [int]$Port = 3443
+  )
+
+  Stop-MiniCPMMobile
+  Start-Sleep -Seconds 1
+  Start-MiniCPMMobile -Port $Port
+}
+```
+
+Usage example:
+
+```powershell
+# 1) Set dynamic SSH parameters first (only change here when port changes)
+Set-MiniCPMSSH -Port "54062" -User "your_user"
+
+# 2) One-click start mobile mode and output the accessible URL
+Start-MiniCPMMobile
+
+# 3) One-click restart mobile mode after port change (optional)
+Restart-MiniCPMMobile
+
+# 4) Stop mobile mode tunnel
+Stop-MiniCPMMobile
+```
+
+---
+
+## 0. Directory and File Overview
+
+This guide uses the newly created deployment files in your repository:
+
+- `deploy/Dockerfile.backend`: Backend inference service image (FastAPI + MiniCPM-o 4.5)
+- `deploy/Dockerfile.frontend`: Frontend image (Vue build + Nginx)
+- `deploy/nginx.docker.conf`: Nginx reverse proxy to backend container
+- `deploy/docker-compose.yml`: Two-container orchestration (frontend + backend)
+- `deploy/requirements.backend.txt`: Backend Python dependency list
+- `deploy/gen_ssl_cert.sh`: Self-signed SSL certificate generation script (required for mobile HTTPS)
+
+---
+
+## 1. Local (WSL2) Prerequisites
+
+Run in WSL2 Ubuntu:
+
+```bash
+cd /mnt/d/JiuTian/codes/MiniCPM-o
+
+# 1) Check Docker
+sudo docker --version
+sudo docker compose version
+
+# 2) If your current user cannot use docker directly, you can temporarily use sudo docker
+# Or add the user to the docker group (takes effect after re-login)
+# sudo usermod -aG docker $USER
+```
+
+> Note: The local 1050Ti does not participate in inference. The local machine is only responsible for building images and does not require a local GPU.
+
+---
+
+## 2. Download the Model Locally (for Upload to Intranet)
+
+It is recommended to download the HuggingFace model locally (where internet is available), then package and upload it.
+
+### 2.1 Install Download Tool
+
+```bash
+python3 -m pip install -U huggingface_hub
+```
+
+### 2.2 Download MiniCPM-o 4.5
+
+```bash
+mkdir -p /mnt/d/JiuTian/codes/MiniCPM-o/models
+python3 - << 'PY'
+from huggingface_hub import snapshot_download
+snapshot_download(
+    repo_id='openbmb/MiniCPM-o-4_5',
+    local_dir='/mnt/d/JiuTian/codes/MiniCPM-o/models/MiniCPM-o-4_5',
+    local_dir_use_symlinks=False,
+    resume_download=True
+)
+PY
+```
+
+After downloading, check the size and key files:
+
+```bash
+du -sh /mnt/d/JiuTian/codes/MiniCPM-o/models/MiniCPM-o-4_5
+ls -lh /mnt/d/JiuTian/codes/MiniCPM-o/models/MiniCPM-o-4_5 | head
+```
+
+---
+
+## 3. Build Two Images in WSL2
+
+Run from the repository root directory:
+
+```bash
+cd /mnt/d/JiuTian/codes/MiniCPM-o
+
+# Backend image
+docker build -f deploy/Dockerfile.backend -t minicpmo-backend:latest .
+
+# Frontend image
+docker build -f deploy/Dockerfile.frontend -t minicpmo-frontend:latest .
+```
+
+Verify the images exist:
+
+```bash
+docker images | grep minicpmo
+```
+
+---
+
+## 4. Export Images + Generate SSL Certificate
+
+### 4.1 Export Images as tar
+
+```bash
+mkdir -p /mnt/d/JiuTian/deploy_pkg
+
+docker save -o /mnt/d/JiuTian/deploy_pkg/minicpmo-backend_latest.tar minicpmo-backend:latest
+docker save -o /mnt/d/JiuTian/deploy_pkg/minicpmo-frontend_latest.tar minicpmo-frontend:latest
+
+# Package compose and nginx config
+cp deploy/docker-compose.yml /mnt/d/JiuTian/deploy_pkg/
+cp deploy/nginx.docker.conf /mnt/d/JiuTian/deploy_pkg/
+```
+
+Optional: Compress to reduce transfer size
+
+```bash
+cd /mnt/d/JiuTian/deploy_pkg
+gzip -1 minicpmo-backend_latest.tar
+gzip -1 minicpmo-frontend_latest.tar
+```
+
+### 4.2 Generate Self-Signed SSL Certificate (Required for Mobile HTTPS)
+
+```bash
+cd /mnt/d/JiuTian/codes/MiniCPM-o
+bash deploy/gen_ssl_cert.sh /mnt/d/JiuTian/deploy_pkg/certs
+```
+
+This will generate `server.crt` and `server.key` under `/mnt/d/JiuTian/deploy_pkg/certs/`.
+
+---
+
+## 5. Upload to the Intranet Server
+
+You have already passed company intranet authentication, and the port may change dynamically. Please use the SSH variables defined above.
+
+### 5.1 Upload Image Packages and Config Files
+
+```bash
+# First create the target directory on the server
+ssh -p $SSH_PORT $SSH_USER@$SSH_HOST "mkdir -p /data/minicpmo/deploy_pkg"
+
+# Upload image tar packages
+scp -P $SSH_PORT -o ServerAliveInterval=60 \
+    /mnt/d/JiuTian/deploy_pkg/minicpmo-backend_latest.tar.gz \
+    /mnt/d/JiuTian/deploy_pkg/minicpmo-frontend_latest.tar.gz \
+    /mnt/d/JiuTian/deploy_pkg/docker-compose.yml \
+    /mnt/d/JiuTian/deploy_pkg/nginx.docker.conf \
+  $SSH_USER@$SSH_HOST:/data/minicpmo/deploy_pkg/
+```
+
+### 5.2 Upload Model Weights
+
+```bash
+ssh -p $SSH_PORT $SSH_USER@$SSH_HOST "mkdir -p /data/models"
+
+scp -P $SSH_PORT -r -o ServerAliveInterval=60 \
+    /mnt/d/JiuTian/codes/MiniCPM-o/models/MiniCPM-o-4_5 \
+  $SSH_USER@$SSH_HOST:/data/models/
+```
+
+### 5.3 Upload SSL Certificate (Required for Mobile Access)
+
+```bash
+scp -P $SSH_PORT -r /mnt/d/JiuTian/deploy_pkg/certs \
+  $SSH_USER@$SSH_HOST:/data/minicpmo/deploy_pkg/
+```
+
+> If the port changes, simply update the `SSH_PORT` variable and retry the command.
+
+---
+
+## 6. H100 Server Preparation (One-Time)
+
+Log in to the server through the established tunnel:
+
+```bash
+ssh -p $SSH_PORT $SSH_USER@$SSH_HOST
+```
+
+Check the environment:
+
+```bash
+# Confirm NVIDIA driver (already confirmed: 550.90.12, CUDA 12.4 ✓)
+nvidia-smi
+
+# Check Docker
+docker --version
+docker compose version
+```
+
+### 6.1 Install NVIDIA Container Toolkit (If Not Installed)
+
+If `docker run --gpus all nvidia/cuda:12.4.1-base-ubuntu22.04 nvidia-smi` fails, you need to install the toolkit.
+
+Restart Docker after installation:
+
+```bash
+sudo systemctl restart docker
+```
+
+Verify again:
+
+```bash
+docker run --rm --gpus all nvidia/cuda:12.4.1-base-ubuntu22.04 nvidia-smi
+```
+
+---
+
+## 7. Load Images and Start Services on the H100 Server
+
+Run on the server (after logging in via `ssh -p $SSH_PORT $SSH_USER@$SSH_HOST`):
+
+```bash
+cd /data/minicpmo/deploy_pkg
+
+# If the uploaded files are .tar.gz, decompress first
+gunzip -f minicpmo-backend_latest.tar.gz || true
+gunzip -f minicpmo-frontend_latest.tar.gz || true
+
+# Load images
+docker load -i minicpmo-backend_latest.tar
+docker load -i minicpmo-frontend_latest.tar
+
+# Place runtime files
+mkdir -p /data/minicpmo/runtime/certs
+cp docker-compose.yml /data/minicpmo/runtime/
+cp certs/server.crt certs/server.key /data/minicpmo/runtime/certs/
+
+cd /data/minicpmo/runtime
+```
+
+### 7.1 Set Model Path and Start
+
+`docker-compose.yml` uses the `MODEL_PATH` environment variable. You can export it directly:
+
+```bash
+export MODEL_PATH=/data/models/MiniCPM-o-4_5
+export CERTS_PATH=./certs
+export BACKEND_PORT=32550
+
+# Compatible with both Compose commands: docker compose / docker-compose
+if docker compose version >/dev/null 2>&1; then
+  COMPOSE_CMD="docker compose"
+elif command -v docker-compose >/dev/null 2>&1; then
+  COMPOSE_CMD="docker-compose"
+else
+  echo "Compose not found. Please install docker-compose or the docker compose plugin first." && exit 1
+fi
+
+$COMPOSE_CMD -f docker-compose.yml up -d
+```
+
+If neither Compose option is available (`docker compose` / `docker-compose` both absent), you can start directly with `docker run`:
+
+```bash
+docker network create minicpmo-net || true
+docker rm -f minicpmo-backend minicpmo-frontend 2>/dev/null || true
+
+docker run -d \
+  --name minicpmo-backend \
+  --restart unless-stopped \
+  --gpus all \
+  -e BACKEND_PORT=${BACKEND_PORT:-32550} \
+  -p ${BACKEND_PORT:-32550}:${BACKEND_PORT:-32550} \
+  -v ${MODEL_PATH}:/models/MiniCPM-o-4_5:ro \
+  --network minicpmo-net \
+  minicpmo-backend:latest
+
+docker run -d \
+  --name minicpmo-frontend \
+  --restart unless-stopped \
+  -e BACKEND_PORT=${BACKEND_PORT:-32550} \
+  -p 3000:3000 \
+  -p 3443:3443 \
+  -v ${CERTS_PATH}:/etc/nginx/certs:ro \
+  --network minicpmo-net \
+  minicpmo-frontend:latest
+```
+
+If you encounter `Failed to Setup IP tables` or `No chain/target/match by that name`, you can bypass the bridge network and start with the `host` network instead:
+
+```bash
+docker rm -f minicpmo-backend minicpmo-frontend 2>/dev/null || true
+
+docker run -d \
+  --name minicpmo-backend \
+  --restart unless-stopped \
+  --gpus all \
+  --network host \
+  -e BACKEND_PORT=${BACKEND_PORT:-32550} \
+  -v ${MODEL_PATH}:/models/MiniCPM-o-4_5:ro \
+  minicpmo-backend:latest
+
+docker run -d \
+  --name minicpmo-frontend \
+  --restart unless-stopped \
+  --network host \
+  --add-host model-backend:127.0.0.1 \
+  -e BACKEND_PORT=${BACKEND_PORT:-32550} \
+  -v ${CERTS_PATH}:/etc/nginx/certs:ro \
+  minicpmo-frontend:latest
+```
+
+Check status:
+
+```bash
+if [ -z "$COMPOSE_CMD" ]; then
+  if docker compose version >/dev/null 2>&1; then
+    COMPOSE_CMD="docker compose"
+  else
+    COMPOSE_CMD="docker-compose"
+  fi
+fi
+
+$COMPOSE_CMD -f docker-compose.yml ps
+docker logs -f minicpmo-backend
+```
+
+If using the `docker run` approach, check status with:
+
+```bash
+docker ps --filter name=minicpmo
+docker logs -f minicpmo-backend
+```
+
+Health check:
+
+```bash
+curl http://127.0.0.1:32550/api/v1/health
+```
+
+Expected response:
+
+```json
+{"status":"OK"}
+```
+
+> The first model load may be slow (tens of seconds to a few minutes). Wait until the logs show model initialization complete before testing the frontend.
+
+---
+
+## 8. Local Computer Access (SSH Port Forwarding)
+
+You can already connect via SSH tunnel — just forward the service ports using the current port.
+
+Open a new terminal in local PowerShell or WSL:
+
+```bash
+ssh -N -p $SSH_PORT \
+  -L 3000:127.0.0.1:3000 \
+  -L 3443:127.0.0.1:3443 \
+  -L 32550:127.0.0.1:32550 \
+  $SSH_USER@$SSH_HOST
+```
+
+Keep this terminal connected. Then open in a local browser:
+
+- Frontend (HTTP): <http://127.0.0.1:3000>
+- Frontend (HTTPS): <https://127.0.0.1:3443> (self-signed cert, click "Continue" to proceed)
+- Backend health check: <http://127.0.0.1:32550/api/v1/health>
+
+> The browser will request camera/microphone permissions — click Allow. When accessing via `localhost`, HTTP is sufficient to obtain camera permissions.
+
+---
+
+## 9. Mobile Access (Full-Duplex Video Calling)
+
+### 9.1 Problem and Principle
+
+Mobile browsers (Chrome/Safari) **must use HTTPS** to access the camera and microphone (`localhost` is an exception, but the phone is not localhost).
+
+Solution: **Use the laptop as a relay** — Phone → Laptop WiFi LAN IP → SSH tunnel → Server.
+
+```text
+Mobile browser ──WiFi──▶ Laptop:3443 ──SSH tunnel──▶ H100:3443 ──Nginx──▶ Backend:32550
+   (HTTPS)               (bound to 0.0.0.0)
+```
+
+### 9.2 Steps
+
+#### Step 1: Establish an SSH Tunnel with All-Interface Binding
+
+```bash
+ssh -N -p $SSH_PORT \
+  -L 0.0.0.0:3443:127.0.0.1:3443 \
+  $SSH_USER@$SSH_HOST
+```
+
+> Key difference: `0.0.0.0:3443` makes all network interfaces on the laptop listen on port 3443, allowing phones on the same WiFi to connect.
+
+#### Step 2: Find the Laptop's LAN IP
+
+Run in PowerShell:
+
+```powershell
+ipconfig | Select-String "IPv4"
+```
+
+Assume the result is `192.168.1.100`.
+
+#### Step 3: Allow Port Through Windows Firewall
+
+Run in PowerShell (as Administrator):
+
+```powershell
+New-NetFirewallRule -DisplayName "MiniCPMo HTTPS" -Direction Inbound -LocalPort 3443 -Protocol TCP -Action Allow
+```
+
+#### Step 4: Access from Mobile Browser
+
+Make sure the phone and laptop are on the same WiFi, then enter in the mobile browser:
+
+```text
+https://192.168.1.100:3443
+```
+
+- **First visit** will show an "unsafe connection" warning (self-signed cert) — tap **"Advanced" → "Continue"**
+- The browser will then request camera/microphone permissions — tap **Allow**
+- Enter the video call page and start a full-duplex conversation
+
+### 9.3 iOS Safari Notes
+
+iOS Safari is stricter with self-signed certificates. If the above bypass doesn't work:
+
+1. Open `https://192.168.1.100:3443/certs/server.crt` in Safari on the phone (if you configured a cert download path), download and install the certificate
+2. Or send `server.crt` to the phone via AirDrop / WeChat, then go to **Settings → General → Profile → Install**
+3. Then go to **Settings → General → About → Certificate Trust Settings → Enable Full Trust**
+
+After that, Safari can access `https://192.168.1.100:3443` normally.
+
+---
+
+## 10. Common Issues and Troubleshooting
+
+### 10.1 Frontend Opens, but Cannot Start a Conversation
+
+Check backend logs:
+
+```bash
+docker logs --tail 200 minicpmo-backend
+```
+
+Key things to look for:
+
+- Whether the model path exists: `/models/MiniCPM-o-4_5`
+- Whether VRAM is sufficient (H100 usually has enough)
+- Whether `trust_remote_code` or dependency version errors appear
+
+### 10.2 GPU Not Visible Inside Container
+
+```bash
+docker exec -it minicpmo-backend nvidia-smi
+```
+
+If it fails, check the NVIDIA Container Toolkit and Docker daemon configuration first.
+
+### 10.3 WebSocket / SSE Anomalies
+
+This project has already disabled buffering and configured WebSocket upgrade in `nginx.docker.conf`.
+If issues persist, check whether the company's intranet gateway is blocking long-lived connections.
+
+### 10.4 Model Startup Is Too Slow
+
+The first startup may be slow; subsequent starts will be much faster. Check with:
+
+```bash
+nvidia-smi
+docker logs -f minicpmo-backend
+```
+
+---
+
+## 11. Optional Optimizations for Next Steps
+
+1. Switch the backend image to "offline wheel installation mode" to completely eliminate the need for pip internet access on the server.
+2. Use a private image registry (Harbor) instead of tar package transfers.
+3. Use systemd or cron for automatic container restart and log rotation.
+4. Replace the self-signed certificate with one issued by an enterprise CA to eliminate manual trust on mobile devices.
+
+---
+
+## 12. One-Click Command Quick Reference
+
+### H100 Side (Assuming Files Are Already Uploaded)
+
+```bash
+cd /data/minicpmo/deploy_pkg
+
+docker load -i minicpmo-backend_latest.tar
+docker load -i minicpmo-frontend_latest.tar
+
+mkdir -p /data/minicpmo/runtime/certs
+cp docker-compose.yml /data/minicpmo/runtime/
+cp certs/server.* /data/minicpmo/runtime/certs/
+
+cd /data/minicpmo/runtime
+export MODEL_PATH=/data/models/MiniCPM-o-4_5
+export CERTS_PATH=./certs
+export BACKEND_PORT=32550
+if docker compose version >/dev/null 2>&1; then
+  COMPOSE_CMD="docker compose"
+elif command -v docker-compose >/dev/null 2>&1; then
+  COMPOSE_CMD="docker-compose"
+else
+  echo "Compose not found. Please install docker-compose or the docker compose plugin first." && exit 1
+fi
+
+$COMPOSE_CMD -f docker-compose.yml up -d
+```
+
+### Local Computer (Open Tunnel)
+
+```bash
+ssh -N -p $SSH_PORT -L 3000:127.0.0.1:3000 -L 3443:127.0.0.1:3443 -L 32550:127.0.0.1:32550 $SSH_USER@$SSH_HOST
+```
+
+PowerShell version:
+
+```powershell
+ssh -N -p $env:SSH_PORT -L 3000:127.0.0.1:3000 -L 3443:127.0.0.1:3443 -L 32550:127.0.0.1:32550 "$env:SSH_USER@$env:SSH_HOST"
+```
+
+Open on local computer: <http://127.0.0.1:3000>
+
+### Mobile (Relayed Through Laptop)
+
+```bash
+# Bind all interfaces on the laptop
+ssh -N -p $SSH_PORT -L 0.0.0.0:3443:127.0.0.1:3443 $SSH_USER@$SSH_HOST
+```
+
+Open on mobile browser: `https://<laptop_lan_ip>:3443`
\ No newline at end of file
diff --git a/deploy/DEPLOY_WSL2_TO_H100_ZH.md b/deploy/DEPLOY_WSL2_TO_H100_ZH.md
new file mode 100644
index 00000000..54bf01fc
--- /dev/null
+++ b/deploy/DEPLOY_WSL2_TO_H100_ZH.md
@@ -0,0 +1,783 @@
+# MiniCPM-o 4.5 离线部署实战指南（WSL2 构建镜像 → 上传内网 H100 服务器 → 本地 + 手机访问）
+
+> 目标：你在本地 Windows系统PC + WSL2 构建 Docker 镜像，把镜像和模型传到无公网的公司 H100 服务器，启动服务后在本地浏览器和Android系统手机上测试全双工视频通话。
+
+**你的环境速查：**
+
+| 项目 | 值 |
+| --- | --- |
+| 服务器 SSH | `ssh -p $SSH_PORT $SSH_USER@$SSH_HOST`（端口可能动态变化） |
+| GPU | NVIDIA H100（驱动 550.90.12） |
+| CUDA | 12.4（与 Dockerfile 基础镜像 `cuda:12.4.1` 完全匹配） |
+| 本地 | Win10 + WSL2 Ubuntu |
+
+**每次执行前先设置 SSH 变量（只改这里即可）：**
+
+```bash
+export SSH_HOST=<YOUR_HOST>
+export SSH_PORT=<YOUR_PORT>
+export SSH_USER=<YOUR_USER>
+```
+
+PowerShell 等价写法（Windows 终端直接用）：
+
+```powershell
+$env:SSH_HOST = "<YOUR_HOST>"
+$env:SSH_PORT = "<YOUR_PORT>"
+$env:SSH_USER = "<YOUR_USER>"
+```
+
+## PowerShell 日常三命令速查（推荐）
+
+```powershell
+# 1) 端口变化时先更新 SSH 参数
+Set-MiniCPMSSH -Port "<YOUR_PORT>" -User "<YOUR_USER>"
+
+# 2) 启动手机模式（开隧道 + 打印可访问 URL）
+Start-MiniCPMMobile
+
+# 3) 结束隧道
+Stop-MiniCPMMobile
+```
+
+端口变化后的快速恢复：
+
+```powershell
+Set-MiniCPMSSH -Port "<YOUR_PORT>" -User "<YOUR_USER>"
+Restart-MiniCPMMobile
+```
+
+PowerShell 中引用变量时，`ssh/scp` 建议写成：
+
+```powershell
+ssh -p $env:SSH_PORT "$env:SSH_USER@$env:SSH_HOST"
+scp -P $env:SSH_PORT .\file.tar.gz "$env:SSH_USER@$env:SSH_HOST:<YOUR_PATH>/deploy_pkg/"
+```
+
+可选：定义一个一键函数（以后只改端口即可）
+
+```powershell
+function Set-MiniCPMSSH {
+  param(
+    [Parameter(Mandatory = $true)]
+    [string]$Port,
+    [string]$Host = "<YOUR_HOST>",
+    [string]$User = "<YOUR_USER>"
+  )
+
+  $env:SSH_HOST = $Host
+  $env:SSH_PORT = $Port
+  $env:SSH_USER = $User
+
+  Write-Host "[MiniCPM SSH] HOST=$env:SSH_HOST PORT=$env:SSH_PORT USER=$env:SSH_USER"
+}
+```
+
+使用示例：
+
+```powershell
+Set-MiniCPMSSH -Port "<YOUR_PORT>" -User "<YOUR_USER>"
+ssh -p $env:SSH_PORT "$env:SSH_USER@$env:SSH_HOST"
+```
+
+可选：定义一键开隧道函数（本机/手机两种模式）
+
+```powershell
+function Open-MiniCPMTunnel {
+  param(
+    [ValidateSet("local", "mobile")]
+    [string]$Mode = "local"
+  )
+
+  if (-not $env:SSH_HOST -or -not $env:SSH_PORT -or -not $env:SSH_USER) {
+    throw "请先执行 Set-MiniCPMSSH 设置 SSH_HOST/SSH_PORT/SSH_USER"
+  }
+
+  if ($Mode -eq "local") {
+    ssh -N -p $env:SSH_PORT `
+      -L 3000:127.0.0.1:3000 `
+      -L 3443:127.0.0.1:3443 `
+      -L 32550:127.0.0.1:32550 `
+      "$env:SSH_USER@$env:SSH_HOST"
+  }
+  else {
+    ssh -N -p $env:SSH_PORT `
+      -L 0.0.0.0:3443:127.0.0.1:3443 `
+      "$env:SSH_USER@$env:SSH_HOST"
+  }
+}
+```
+
+使用示例：
+
+```powershell
+# 1) 设置动态 SSH 参数
+Set-MiniCPMSSH -Port "54062" -User "your_user"
+
+# 2) 仅本机访问（浏览器打开 http://127.0.0.1:3000）
+Open-MiniCPMTunnel -Mode local
+
+# 3) 手机访问（同一 WiFi，用 https://笔记本局域网IP:3443）
+Open-MiniCPMTunnel -Mode mobile
+```
+
+可选：自动打印手机访问地址
+
+```powershell
+function Get-MiniCPMLanUrl {
+  param(
+    [int]$Port = 3443
+  )
+
+  $ipv4List = Get-NetIPAddress -AddressFamily IPv4 |
+    Where-Object {
+      $_.IPAddress -notlike '127.*' -and
+      $_.IPAddress -notlike '169.254.*' -and
+      $_.PrefixOrigin -ne 'WellKnown'
+    } |
+    Sort-Object -Property InterfaceMetric
+
+  if (-not $ipv4List) {
+    throw "未找到可用 IPv4 地址，请检查网卡/网络连接"
+  }
+
+  $ip = $ipv4List[0].IPAddress
+  $url = "https://$ip`:$Port"
+
+  Write-Host "[MiniCPM LAN URL] $url"
+  return $url
+}
+```
+
+使用示例：
+
+```powershell
+# 先开启手机模式隧道（在另一个终端窗口运行）
+Open-MiniCPMTunnel -Mode mobile
+
+# 当前窗口打印手机访问地址
+Get-MiniCPMLanUrl
+```
+
+可选：一键启动手机模式（开隧道 + 检查端口 + 打印 URL）
+
+```powershell
+function Start-MiniCPMMobile {
+  param(
+    [int]$Port = 3443
+  )
+
+  if (-not $env:SSH_HOST -or -not $env:SSH_PORT -or -not $env:SSH_USER) {
+    throw "请先执行 Set-MiniCPMSSH 设置 SSH_HOST/SSH_PORT/SSH_USER"
+  }
+
+  $sshCmd = "ssh -N -p $env:SSH_PORT -L 0.0.0.0:$Port`:127.0.0.1:$Port $env:SSH_USER@$env:SSH_HOST"
+
+  # 在新窗口开隧道，避免阻塞当前终端
+  $proc = Start-Process powershell -ArgumentList "-NoExit", "-Command", $sshCmd -PassThru
+  $env:MINICPM_MOBILE_SSH_PID = [string]$proc.Id
+  $env:MINICPM_MOBILE_PORT = [string]$Port
+  Start-Sleep -Seconds 2
+
+  $listener = Get-NetTCPConnection -LocalPort $Port -State Listen -ErrorAction SilentlyContinue
+  if (-not $listener) {
+    Write-Warning "未检测到本机 $Port 端口监听，请检查 SSH 是否连接成功。"
+    return
+  }
+
+  $url = Get-MiniCPMLanUrl -Port $Port
+  Write-Host "[MiniCPM Mobile PID] $env:MINICPM_MOBILE_SSH_PID"
+  Write-Host "[MiniCPM Mobile Ready] 手机浏览器访问: $url"
+}
+
+function Stop-MiniCPMMobile {
+  $pidText = $env:MINICPM_MOBILE_SSH_PID
+
+  if ($pidText) {
+    $pidValue = [int]$pidText
+    $proc = Get-Process -Id $pidValue -ErrorAction SilentlyContinue
+    if ($proc) {
+      Stop-Process -Id $pidValue -Force
+      Write-Host "[MiniCPM Mobile Stopped] 已停止隧道进程 PID=$pidValue"
+      Remove-Item Env:MINICPM_MOBILE_SSH_PID -ErrorAction SilentlyContinue
+      Remove-Item Env:MINICPM_MOBILE_PORT -ErrorAction SilentlyContinue
+      return
+    }
+  }
+
+  $port = if ($env:MINICPM_MOBILE_PORT) { [int]$env:MINICPM_MOBILE_PORT } else { 3443 }
+  $listeners = Get-NetTCPConnection -LocalPort $port -State Listen -ErrorAction SilentlyContinue
+  if (-not $listeners) {
+    Write-Host "[MiniCPM Mobile] 未检测到监听端口 $port，无需停止。"
+    return
+  }
+
+  foreach ($item in $listeners) {
+    if ($item.OwningProcess -gt 0) {
+      try {
+        Stop-Process -Id $item.OwningProcess -Force -ErrorAction Stop
+        Write-Host "[MiniCPM Mobile Stopped] 已停止监听端口 $port 的进程 PID=$($item.OwningProcess)"
+      }
+      catch {
+        Write-Warning "停止 PID=$($item.OwningProcess) 失败：$($_.Exception.Message)"
+      }
+    }
+  }
+
+  Remove-Item Env:MINICPM_MOBILE_SSH_PID -ErrorAction SilentlyContinue
+  Remove-Item Env:MINICPM_MOBILE_PORT -ErrorAction SilentlyContinue
+}
+
+function Restart-MiniCPMMobile {
+  param(
+    [int]$Port = 3443
+  )
+
+  Stop-MiniCPMMobile
+  Start-Sleep -Seconds 1
+  Start-MiniCPMMobile -Port $Port
+}
+```
+
+使用示例：
+
+```powershell
+# 1) 先设置动态 SSH 参数（端口变更时只改这里）
+Set-MiniCPMSSH -Port "54062" -User "your_user"
+
+# 2) 一键启动手机模式并输出可访问地址
+Start-MiniCPMMobile
+
+# 3) 端口变化后，一键重启手机模式（可选）
+Restart-MiniCPMMobile
+
+# 4) 结束手机模式隧道
+Stop-MiniCPMMobile
+```
+
+---
+
+## 0. 目录与文件说明
+
+本指南使用了你仓库中新建的部署文件：
+
+- `deploy/Dockerfile.backend`：后端推理服务镜像（FastAPI + MiniCPM-o 4.5）
+- `deploy/Dockerfile.frontend`：前端镜像（Vue build + Nginx）
+- `deploy/nginx.docker.conf`：Nginx 反向代理到后端容器
+- `deploy/docker-compose.yml`：双容器编排（frontend + backend）
+- `deploy/requirements.backend.txt`：后端 Python 依赖清单
+- `deploy/gen_ssl_cert.sh`：自签名 SSL 证书生成脚本（手机端 HTTPS 必需）
+
+---
+
+## 1. 本地（WSL2）前置准备
+
+在 WSL2 Ubuntu 执行：
+
+```bash
+cd <YOUR_PATH>/MiniCPM-o
+
+# 1) 检查 Docker
+sudo docker --version
+sudo docker compose version
+
+# 2) 如果你当前用户不能直接用 docker，可先临时用 sudo docker
+# 或将用户加入 docker 组（重新登录后生效）
+# sudo usermod -aG docker $USER
+```
+
+> 说明：本地 1050Ti 不参与推理，本地只负责构建镜像，不需要本地 GPU。
+
+---
+
+## 2. 本地下载模型（用于上传到内网）
+
+推荐在本地（有网环境）下载 HuggingFace 模型，再打包上传。
+
+### 2.1 安装下载工具
+
+```bash
+python3 -m pip install -U huggingface_hub
+```
+
+### 2.2 下载 MiniCPM-o 4.5
+
+```bash
+mkdir -p <YOUR_PATH>/MiniCPM-o/models
+python3 - << 'PY'
+from huggingface_hub import snapshot_download
+snapshot_download(
+    repo_id='openbmb/MiniCPM-o-4_5',
+    local_dir='<YOUR_PATH>/MiniCPM-o/models/MiniCPM-o-4_5',
+    local_dir_use_symlinks=False,
+    resume_download=True
+)
+PY
+```
+
+下载后检查体积和关键文件：
+
+```bash
+du -sh <YOUR_PATH>/MiniCPM-o/models/MiniCPM-o-4_5
+ls -lh <YOUR_PATH>/MiniCPM-o/models/MiniCPM-o-4_5 | head
+```
+
+---
+
+## 3. 在 WSL2 构建两个镜像
+
+在仓库根目录执行：
+
+```bash
+cd <YOUR_PATH>/MiniCPM-o
+
+# 后端镜像
+docker build -f deploy/Dockerfile.backend -t minicpmo-backend:latest .
+
+# 前端镜像
+docker build -f deploy/Dockerfile.frontend -t minicpmo-frontend:latest .
+```
+
+验证镜像存在：
+
+```bash
+docker images | grep minicpmo
+```
+
+---
+
+## 4. 导出镜像 + 生成 SSL 证书
+
+### 4.1 导出镜像为 tar
+
+```bash
+mkdir -p <YOUR_PATH>/deploy_pkg
+
+docker save -o <YOUR_PATH>/deploy_pkg/minicpmo-backend_latest.tar minicpmo-backend:latest
+docker save -o <YOUR_PATH>/deploy_pkg/minicpmo-frontend_latest.tar minicpmo-frontend:latest
+
+# 打包 compose 与 nginx 配置
+cp deploy/docker-compose.yml <YOUR_PATH>/deploy_pkg/
+cp deploy/nginx.docker.conf <YOUR_PATH>/deploy_pkg/
+```
+
+可选：压缩减少传输体积
+
+```bash
+cd <YOUR_PATH>/deploy_pkg
+gzip -1 minicpmo-backend_latest.tar
+gzip -1 minicpmo-frontend_latest.tar
+```
+
+### 4.2 生成自签名 SSL 证书（手机端 HTTPS 必需）
+
+```bash
+cd <YOUR_PATH>/MiniCPM-o
+bash deploy/gen_ssl_cert.sh <YOUR_PATH>/deploy_pkg/certs
+```
+
+这会在 `<YOUR_PATH>/deploy_pkg/certs/` 下生成 `server.crt` 和 `server.key`。
+
+---
+
+## 5. 上传到内网服务器
+
+你已经通过公司内网认证，且端口可能动态变化，请使用上面定义的 SSH 变量。
+
+### 5.1 上传镜像包和配置文件
+
+```bash
+# 先在服务器上创建目标目录
+ssh -p $SSH_PORT $SSH_USER@$SSH_HOST "mkdir -p <YOUR_PATH>/deploy_pkg"
+
+# 上传镜像 tar 包
+scp -P $SSH_PORT -o ServerAliveInterval=60 \
+    <YOUR_PATH>/deploy_pkg/minicpmo-backend_latest.tar.gz \
+    <YOUR_PATH>/deploy_pkg/minicpmo-frontend_latest.tar.gz \
+    <YOUR_PATH>/deploy_pkg/docker-compose.yml \
+    <YOUR_PATH>/deploy_pkg/nginx.docker.conf \
+  $SSH_USER@$SSH_HOST:<YOUR_PATH>/deploy_pkg/
+```
+
+### 5.2 上传模型权重
+
+```bash
+ssh -p $SSH_PORT $SSH_USER@$SSH_HOST "mkdir -p <YOUR_PATH>/models"
+
+scp -P $SSH_PORT -r -o ServerAliveInterval=60 \
+    <YOUR_PATH>/MiniCPM-o/models/MiniCPM-o-4_5 \
+  $SSH_USER@$SSH_HOST:<YOUR_PATH>/models/
+```
+
+### 5.3 上传 SSL 证书（手机端访问需要）
+
+```bash
+scp -P $SSH_PORT -r <YOUR_PATH>/deploy_pkg/certs \
+  $SSH_USER@$SSH_HOST:<YOUR_PATH>/deploy_pkg/
+```
+
+> 如果端口变更，只需要修改 `SSH_PORT` 变量并重试命令。
+
+---
+
+## 6. H100 服务器准备（一次性）
+
+通过已建立的隧道登录服务器：
+
+```bash
+ssh -p $SSH_PORT $SSH_USER@$SSH_HOST
+```
+
+检查环境：
+
+```bash
+# 确认 NVIDIA 驱动（你已确认: 550.90.12, CUDA 12.4 ✓）
+nvidia-smi
+
+# 检查 Docker
+docker --version
+docker compose version
+```
+
+### 6.1 安装 NVIDIA Container Toolkit（若未安装）
+
+如果 `docker run --gpus all nvidia/cuda:12.4.1-base-ubuntu22.04 nvidia-smi` 失败，需要安装 toolkit。
+
+安装后重启 Docker：
+
+```bash
+sudo systemctl restart docker
+```
+
+再验证：
+
+```bash
+docker run --rm --gpus all nvidia/cuda:12.4.1-base-ubuntu22.04 nvidia-smi
+```
+
+---
+
+## 7. H100 服务器加载镜像与启动服务
+
+在服务器上执行（通过 `ssh -p $SSH_PORT $SSH_USER@$SSH_HOST` 登录后）：
+
+```bash
+cd /data/minicpmo/deploy_pkg
+
+# 若上传的是 .tar.gz，先解压
+gunzip -f minicpmo-backend_latest.tar.gz || true
+gunzip -f minicpmo-frontend_latest.tar.gz || true
+
+# 加载镜像
+docker load -i minicpmo-backend_latest.tar
+docker load -i minicpmo-frontend_latest.tar
+
+# 放置运行时文件
+mkdir -p /data/minicpmo/runtime/certs
+cp docker-compose.yml /data/minicpmo/runtime/
+cp certs/server.crt certs/server.key /data/minicpmo/runtime/certs/
+
+cd /data/minicpmo/runtime
+```
+
+### 7.1 设置模型路径并启动
+
+`docker-compose.yml` 里用了 `MODEL_PATH` 环境变量。你可以直接导出：
+
+```bash
+export MODEL_PATH=<YOUR_PATH>/models/MiniCPM-o-4_5
+export CERTS_PATH=./certs
+export BACKEND_PORT=32550
+
+# 兼容两种 Compose 命令：docker compose / docker-compose
+if docker compose version >/dev/null 2>&1; then
+  COMPOSE_CMD="docker compose"
+elif command -v docker-compose >/dev/null 2>&1; then
+  COMPOSE_CMD="docker-compose"
+else
+  echo "未找到 Compose，请先安装 docker-compose 或 docker compose 插件" && exit 1
+fi
+
+$COMPOSE_CMD -f docker-compose.yml up -d
+```
+
+如果两种 Compose 都不可用（`docker compose` / `docker-compose` 都不存在），可直接用 `docker run` 启动：
+
+```bash
+docker network create minicpmo-net || true
+docker rm -f minicpmo-backend minicpmo-frontend 2>/dev/null || true
+
+docker run -d \
+  --name minicpmo-backend \
+  --restart unless-stopped \
+  --gpus all \
+  -e BACKEND_PORT=${BACKEND_PORT:-32550} \
+  -p ${BACKEND_PORT:-32550}:${BACKEND_PORT:-32550} \
+  -v ${MODEL_PATH}:/models/MiniCPM-o-4_5:ro \
+  --network minicpmo-net \
+  minicpmo-backend:latest
+
+docker run -d \
+  --name minicpmo-frontend \
+  --restart unless-stopped \
+  -e BACKEND_PORT=${BACKEND_PORT:-32550} \
+  -p 3000:3000 \
+  -p 3443:3443 \
+  -v ${CERTS_PATH}:/etc/nginx/certs:ro \
+  --network minicpmo-net \
+  minicpmo-frontend:latest
+```
+
+如果出现 `Failed to Setup IP tables` 或 `No chain/target/match by that name`，可先绕过 bridge 网络，改用 `host` 网络启动：
+
+```bash
+docker rm -f minicpmo-backend minicpmo-frontend 2>/dev/null || true
+
+docker run -d \
+  --name minicpmo-backend \
+  --restart unless-stopped \
+  --gpus all \
+  --network host \
+  -e BACKEND_PORT=${BACKEND_PORT:-32550} \
+  -v ${MODEL_PATH}:/models/MiniCPM-o-4_5:ro \
+  minicpmo-backend:latest
+
+docker run -d \
+  --name minicpmo-frontend \
+  --restart unless-stopped \
+  --network host \
+  --add-host model-backend:127.0.0.1 \
+  -e BACKEND_PORT=${BACKEND_PORT:-32550} \
+  -v ${CERTS_PATH}:/etc/nginx/certs:ro \
+  minicpmo-frontend:latest
+```
+
+查看状态：
+
+```bash
+if [ -z "$COMPOSE_CMD" ]; then
+  if docker compose version >/dev/null 2>&1; then
+    COMPOSE_CMD="docker compose"
+  else
+    COMPOSE_CMD="docker-compose"
+  fi
+fi
+
+$COMPOSE_CMD -f docker-compose.yml ps
+docker logs -f minicpmo-backend
+```
+
+若使用 `docker run` 方案，查看状态命令：
+
+```bash
+docker ps --filter name=minicpmo
+docker logs -f minicpmo-backend
+```
+
+健康检查：
+
+```bash
+curl http://127.0.0.1:32550/api/v1/health
+```
+
+应返回：
+
+```json
+{"status":"OK"}
+```
+
+> 首次加载模型会较慢（几十秒到数分钟），日志出现模型初始化完成后再测试前端。
+
+---
+
+## 8. 本地电脑访问（SSH 端口转发）
+
+你已能连通 SSH 隧道，只需基于当前端口做服务转发。
+
+在本地 PowerShell 或 WSL 新开一个终端：
+
+```bash
+ssh -N -p $SSH_PORT \
+  -L 3000:127.0.0.1:3000 \
+  -L 3443:127.0.0.1:3443 \
+  -L 32550:127.0.0.1:32550 \
+  $SSH_USER@$SSH_HOST
+```
+
+保持该终端不断开。然后在本地浏览器访问：
+
+- 前端页面（HTTP）：<http://127.0.0.1:3000>
+- 前端页面（HTTPS）：<https://127.0.0.1:3443>（自签名证书，需点击"继续前往"）
+- 后端健康检查：<http://127.0.0.1:32550/api/v1/health>
+
+> 浏览器会请求摄像头/麦克风权限，点击允许。本地用 `localhost` 访问时 HTTP 即可获取摄像头权限。
+
+---
+
+## 9. 手机端访问（全双工视频通话）
+
+### 9.1 问题与原理
+
+手机浏览器（Chrome/Safari）要调用摄像头和麦克风，**必须使用 HTTPS**（`localhost` 例外，但手机并非 localhost）。
+
+方案：**笔记本做中继** — 手机 → 笔记本 WiFi 局域网 IP → SSH 隧道 → 服务器。
+
+```text
+手机浏览器 ──WiFi──▶ 笔记本:3443 ──SSH隧道──▶ H100:3443 ──Nginx──▶ 后端:32550
+  (HTTPS)              (绑定 0.0.0.0)
+```
+
+### 9.2 操作步骤
+
+#### Step 1：建立"全接口绑定"的 SSH 隧道
+
+```bash
+ssh -N -p $SSH_PORT \
+  -L 0.0.0.0:3443:127.0.0.1:3443 \
+  $SSH_USER@$SSH_HOST
+```
+
+> 关键区别：`0.0.0.0:3443` 让笔记本的所有网卡都监听 3443 端口，同一 WiFi 的手机才能连入。
+
+#### Step 2：查看笔记本局域网 IP
+
+PowerShell 中执行：
+
+```powershell
+ipconfig | Select-String "IPv4"
+```
+
+假设得到 `192.168.1.100`。
+
+#### Step 3：Windows 防火墙放行端口
+
+PowerShell（管理员）执行：
+
+```powershell
+New-NetFirewallRule -DisplayName "MiniCPMo HTTPS" -Direction Inbound -LocalPort 3443 -Protocol TCP -Action Allow
+```
+
+#### Step 4：手机浏览器访问
+
+确保手机与笔记本连同一 WiFi，然后在手机浏览器输入：
+
+```text
+https://192.168.1.100:3443
+```
+
+- **首次访问**会提示"不安全连接"（自签名证书），选择 **「高级」→「继续前往」**
+- 接着浏览器会请求摄像头/麦克风权限，**允许**即可
+- 进入视频通话页面，开始全双工对话
+
+### 9.3 iOS Safari 注意事项
+
+iOS Safari 对自签名证书更严格。如果无法通过上述方式跳过：
+
+1. 在手机上用 Safari 打开 `https://192.168.1.100:3443/certs/server.crt`（若你配置了证书下载路径），下载安装证书
+2. 或者将 `server.crt` 通过 AirDrop / 微信发送到手机，在 **设置 → 通用 → 描述文件 → 安装**
+3. 再到 **设置 → 通用 → 关于本机 → 证书信任设置 → 启用完全信任**
+
+之后 Safari 访问 `https://192.168.1.100:3443` 即可正常使用。
+
+---
+
+## 10. 常见问题与排查
+
+### 10.1 前端能打开，但无法对话
+
+检查后端日志：
+
+```bash
+docker logs --tail 200 minicpmo-backend
+```
+
+重点看：
+
+- 模型路径是否存在：`/models/MiniCPM-o-4_5`
+- 显存是否足够（H100 通常充足）
+- 是否出现 `trust_remote_code` / 依赖版本错误
+
+### 10.2 容器内 GPU 不可见
+
+```bash
+docker exec -it minicpmo-backend nvidia-smi
+```
+
+若失败，优先检查 NVIDIA Container Toolkit 与 Docker daemon 配置。
+
+### 10.3 WebSocket / SSE 异常
+
+本项目已在 `nginx.docker.conf` 关闭缓冲并配置了 websocket upgrade。
+若仍异常，检查公司内网网关是否拦截长连接。
+
+### 10.4 模型启动太慢
+
+首次启动可能较慢；后续会快很多。可先看：
+
+```bash
+nvidia-smi
+docker logs -f minicpmo-backend
+```
+
+---
+
+## 11. 你下一步可以做的优化（可选）
+
+1. 将后端镜像改为“离线 wheel 安装模式”，彻底避免服务器 pip 联网需求。  
+2. 使用私有镜像仓库（Harbor）替代 tar 包传输。  
+3. 用 systemd 或 cron 做容器自动拉起与日志轮转。  
+4. 替换自签名证书为企业 CA 签发的证书，手机端免手动信任。
+
+---
+
+## 12. 一键启动命令速查
+
+### H100 侧（假设文件已上传）
+
+```bash
+cd <YOUR_PATH>/deploy_pkg
+
+docker load -i minicpmo-backend_latest.tar
+docker load -i minicpmo-frontend_latest.tar
+
+mkdir -p <YOUR_PATH>/runtime/certs
+cp docker-compose.yml <YOUR_PATH>/runtime/
+cp certs/server.* <YOUR_PATH>/runtime/certs/
+
+cd <YOUR_PATH>/runtime
+export MODEL_PATH=<YOUR_PATH>/models/MiniCPM-o-4_5
+export CERTS_PATH=./certs
+export BACKEND_PORT=32550
+if docker compose version >/dev/null 2>&1; then
+  COMPOSE_CMD="docker compose"
+elif command -v docker-compose >/dev/null 2>&1; then
+  COMPOSE_CMD="docker-compose"
+else
+  echo "未找到 Compose，请先安装 docker-compose 或 docker compose 插件" && exit 1
+fi
+
+$COMPOSE_CMD -f docker-compose.yml up -d
+```
+
+### 本地电脑（开隧道）
+
+```bash
+ssh -N -p $SSH_PORT -L 3000:127.0.0.1:3000 -L 3443:127.0.0.1:3443 -L 32550:127.0.0.1:32550 $SSH_USER@$SSH_HOST
+```
+
+PowerShell 版本：
+
+```powershell
+ssh -N -p $env:SSH_PORT -L 3000:127.0.0.1:3000 -L 3443:127.0.0.1:3443 -L 32550:127.0.0.1:32550 "$env:SSH_USER@$env:SSH_HOST"
+```
+
+本地电脑打开：<http://127.0.0.1:3000>
+
+### 手机端（通过笔记本中转）
+
+```bash
+# 笔记本绑定所有网卡
+ssh -N -p $SSH_PORT -L 0.0.0.0:3443:127.0.0.1:3443 $SSH_USER@$SSH_HOST
+```
+
+手机浏览器打开：`https://<YOUR_LAPTOP_LAN_IP>:3443`
diff --git a/deploy/Dockerfile.backend b/deploy/Dockerfile.backend
new file mode 100644
index 00000000..b4291181
--- /dev/null
+++ b/deploy/Dockerfile.backend
@@ -0,0 +1,71 @@
+# ============================================
+# MiniCPM-o 4.5 Backend Inference Service Dockerfile
+# Base image: NVIDIA CUDA 12.8 + Ubuntu 22.04
+# ============================================
+FROM nvidia/cuda:12.8.1-devel-ubuntu22.04
+
+# Avoid interactive prompts
+ENV DEBIAN_FRONTEND=noninteractive
+ENV PYTHONUNBUFFERED=1
+
+# ---- System dependencies ----
+RUN apt-get update && apt-get install -y \
+    python3.10 \
+    python3.10-dev \
+    python3-pip \
+    ffmpeg \
+    libsndfile1 \
+    libsndfile1-dev \
+    git \
+    wget \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+
+# Set python3.10 as default
+RUN ln -sf /usr/bin/python3.10 /usr/bin/python3 && \
+    ln -sf /usr/bin/python3 /usr/bin/python && \
+    python3 -m pip install --upgrade pip setuptools wheel
+
+# ---- PyTorch (CUDA 12.4) ----
+RUN pip install --no-cache-dir \
+    "torch>=2.3.0,<=2.8.0" \
+    "torchaudio<=2.8.0" \
+    --index-url https://download.pytorch.org/whl/cu124
+
+# ---- MiniCPM-o core dependencies ----
+RUN pip install --no-cache-dir \
+    "transformers==4.51.0" \
+    accelerate \
+    "minicpmo-utils[all]>=1.0.5" \
+    librosa \
+    soundfile \
+    onnxruntime \
+    sentencepiece \
+    Pillow \
+    numpy
+
+# ---- Web service dependencies ----
+RUN pip install --no-cache-dir \
+    fastapi \
+    uvicorn \
+    aiofiles \
+    pydantic
+
+# ---- Working directory ----
+WORKDIR /app
+
+# ---- Copy backend code ----
+COPY web_demos/minicpm-o_2.6/model_server.py /app/
+COPY web_demos/minicpm-o_2.6/vad_utils.py /app/
+COPY web_demos/minicpm-o_2.6/silero_vad.onnx /app/
+
+# ---- Copy TTS reference audios ----
+COPY assets/ref_audios/ /app/assets/ref_audios/
+
+# ---- Expose port ----
+EXPOSE 32550
+
+# ---- Startup command ----
+# Model path is mounted to /models/MiniCPM-o-4_5 via volume
+ENV BACKEND_PORT=32550
+CMD ["sh", "-lc", "python3 model_server.py --model /models/MiniCPM-o-4_5 --port ${BACKEND_PORT}"]
diff --git a/deploy/Dockerfile.frontend b/deploy/Dockerfile.frontend
new file mode 100644
index 00000000..5fc22e8f
--- /dev/null
+++ b/deploy/Dockerfile.frontend
@@ -0,0 +1,36 @@
+# ============================================
+# MiniCPM-o 4.5 Frontend Web Service Dockerfile
+# Multi-stage build: Node.js build + Nginx deployment
+# ============================================
+
+# ---- Stage 1: Build Vue project ----
+FROM node:20-alpine AS build-stage
+
+WORKDIR /build
+COPY web_demos/minicpm-o_2.6/web_server/ /build/
+
+# Install pnpm and build
+# Generate placeholder certificate files (vite.config.js server.https is also parsed during build)
+RUN npm install -g pnpm && \
+    touch key.pem cert.pem && \
+    pnpm install && \
+    pnpm run build
+
+# ---- Stage 2: Nginx static service ----
+FROM nginx:alpine AS production-stage
+
+# envsubst is used to render nginx config template at container startup
+RUN apk add --no-cache gettext
+
+# Copy build artifacts
+COPY --from=build-stage /build/dist /usr/share/nginx/html
+
+# Copy custom nginx config template (Docker network version)
+COPY deploy/nginx.docker.conf /etc/nginx/nginx.conf.template
+
+# Render nginx config with BACKEND_PORT at startup
+ENV BACKEND_PORT=32550
+
+EXPOSE 3000 3443
+
+CMD ["sh", "-lc", "envsubst '$$BACKEND_PORT' < /etc/nginx/nginx.conf.template > /etc/nginx/nginx.conf && nginx -g 'daemon off;'"]
diff --git a/deploy/docker-compose.yml b/deploy/docker-compose.yml
new file mode 100644
index 00000000..60141e28
--- /dev/null
+++ b/deploy/docker-compose.yml
@@ -0,0 +1,64 @@
+# ============================================
+# MiniCPM-o 4.5 Docker Compose Deployment Configuration
+# ============================================
+# Usage:
+#   docker compose -f deploy/docker-compose.yml up -d
+#
+# Prerequisites:
+#   1. NVIDIA Container Toolkit is installed on the server
+#   2. Model weights are placed in the ${MODEL_PATH} directory
+# ============================================
+
+services:
+  # ---- Backend Inference Service (GPU) ----
+  model-backend:
+    image: minicpmo-backend:latest
+    container_name: minicpmo-backend
+    restart: unless-stopped
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
+    volumes:
+      # Mount model weights directory (host path → container path)
+      - ${MODEL_PATH:-<YOUR_MODEL_PATH>}:/models/MiniCPM-o-4_5:ro
+    environment:
+      - BACKEND_PORT=${BACKEND_PORT:-32550}
+    ports:
+      - "${BACKEND_PORT:-32550}:${BACKEND_PORT:-32550}"
+    # Note: BACKEND_PORT is the application listening port (default 32550),
+    # which is different from the external SSH temporary tunnel port.
+    healthcheck:
+      test: ["CMD-SHELL", "curl -f http://localhost:${BACKEND_PORT:-32550}/api/v1/health || exit 1"]
+      interval: 30s
+      timeout: 10s
+      retries: 5
+      start_period: 120s  # Model loading may take a long time
+    networks:
+      - minicpmo-net
+
+  # ---- Frontend Web Service (Nginx) ----
+  web-frontend:
+    image: minicpmo-frontend:latest
+    container_name: minicpmo-frontend
+    restart: unless-stopped
+    ports:
+      - "3000:3000"
+      - "3443:3443"   # HTTPS (for mobile access)
+    volumes:
+      # Mount SSL certificate directory
+      - ${CERTS_PATH:-<YOUR_CERTS_PATH>}:/etc/nginx/certs:ro
+    environment:
+      - BACKEND_PORT=${BACKEND_PORT:-32550}
+    depends_on:
+      model-backend:
+        condition: service_started
+    networks:
+      - minicpmo-net
+
+networks:
+  minicpmo-net:
+    driver: bridge
diff --git a/deploy/gen_ssl_cert.sh b/deploy/gen_ssl_cert.sh
new file mode 100644
index 00000000..ac2e5d3b
--- /dev/null
+++ b/deploy/gen_ssl_cert.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+# ============================================
+# Generate self-signed SSL certificate (for Nginx HTTPS + mobile access)
+# Usage: bash deploy/gen_ssl_cert.sh [output directory]
+# ============================================
+set -e
+
+OUT_DIR="${1:-<YOUR_CERTS_OUTPUT_DIR>}"
+mkdir -p "$OUT_DIR"
+
+echo ">>> Generating self-signed SSL certificate to $OUT_DIR ..."
+openssl req -x509 -nodes -days 3650 \
+    -newkey rsa:2048 \
+    -keyout "$OUT_DIR/server.key" \
+    -out "$OUT_DIR/server.crt" \
+    -subj "/C=CN/ST=Local/L=Local/O=MiniCPMo/OU=Dev/CN=<YOUR_CN>" \
+    -addext "subjectAltName=IP:<YOUR_IP1>,IP:<YOUR_IP2>,DNS:<YOUR_DNS>"
+
+echo ">>> Certificate generated:"
+ls -lh "$OUT_DIR"/server.*
+echo ""
+echo ">>> Tip: After uploading the entire $OUT_DIR directory to the server,"
+echo "    create a certs/ directory next to docker-compose.yml and put server.crt + server.key inside"
diff --git a/deploy/nginx.docker.conf b/deploy/nginx.docker.conf
new file mode 100644
index 00000000..a5832d9c
--- /dev/null
+++ b/deploy/nginx.docker.conf
@@ -0,0 +1,122 @@
+user root;
+worker_processes auto;
+pid /run/nginx.pid;
+
+events {
+    worker_connections 768;
+}
+
+http {
+    # ---- Basic Settings ----
+    client_max_body_size 20M;
+    sendfile on;
+    tcp_nopush on;
+    tcp_nodelay on;
+    keepalive_timeout 65;
+    types_hash_max_size 2048;
+
+    include /etc/nginx/mime.types;
+    default_type application/octet-stream;
+
+    # ---- Logs ----
+    access_log /var/log/nginx/access.log;
+    error_log /var/log/nginx/error.log;
+
+    # ---- Gzip Compression ----
+    gzip on;
+
+    # ---- Virtual Host (HTTP, Local Access) ----
+    server {
+        listen 3000;
+        server_name _;
+
+        add_header Access-Control-Allow-Origin *;
+        add_header Access-Control-Allow-Headers X-Requested-With;
+        add_header Access-Control-Allow-Methods GET,POST,OPTIONS;
+
+        # Backend API requests → Forward to backend container (Docker service name: model-backend)
+        location /api/v1 {
+            proxy_pass http://model-backend:${BACKEND_PORT};
+            proxy_set_header Host $host;
+            proxy_set_header Connection "";
+            chunked_transfer_encoding off;
+            proxy_set_header X-Accel-Buffering off;
+            add_header X-Accel-Buffering off;
+            proxy_http_version 1.1;
+            # Disable buffering (required for SSE streaming responses)
+            proxy_buffering off;
+            proxy_cache off;
+            sendfile off;
+            tcp_nodelay on;
+        }
+
+        # WebSocket requests → Forward to backend container
+        location /ws {
+            proxy_pass http://model-backend:${BACKEND_PORT};
+            proxy_http_version 1.1;
+            proxy_set_header Upgrade $http_upgrade;
+            proxy_set_header Connection 'upgrade';
+            proxy_set_header Host $host;
+            proxy_cache_bypass $http_upgrade;
+        }
+
+        # Frontend static files
+        location / {
+            root /usr/share/nginx/html;
+            index index.html index.htm;
+            try_files $uri $uri/ /index.html;
+        }
+
+        location @router {
+            rewrite ^.*$ /index.html last;
+        }
+    }
+
+    # ---- Virtual Host (HTTPS, Mobile Access) ----
+    server {
+        listen 3443 ssl;
+        server_name localhost;
+
+        ssl_certificate     /etc/nginx/certs/server.crt;
+        ssl_certificate_key /etc/nginx/certs/server.key;
+        ssl_protocols       TLSv1.2 TLSv1.3;
+        ssl_ciphers         HIGH:!aNULL:!MD5;
+
+        add_header Access-Control-Allow-Origin *;
+        add_header Access-Control-Allow-Headers X-Requested-With;
+        add_header Access-Control-Allow-Methods GET,POST,OPTIONS;
+
+        location /api/v1 {
+            proxy_pass http://model-backend:${BACKEND_PORT};
+            proxy_set_header Host $host;
+            proxy_set_header Connection "";
+            chunked_transfer_encoding off;
+            proxy_set_header X-Accel-Buffering off;
+            add_header X-Accel-Buffering off;
+            proxy_http_version 1.1;
+            proxy_buffering off;
+            proxy_cache off;
+            sendfile off;
+            tcp_nodelay on;
+        }
+
+        location /ws {
+            proxy_pass http://model-backend:${BACKEND_PORT};
+            proxy_http_version 1.1;
+            proxy_set_header Upgrade $http_upgrade;
+            proxy_set_header Connection 'upgrade';
+            proxy_set_header Host $host;
+            proxy_cache_bypass $http_upgrade;
+        }
+
+        location / {
+            root /usr/share/nginx/html;
+            index index.html index.htm;
+            try_files $uri $uri/ /index.html;
+        }
+
+        location @router {
+            rewrite ^.*$ /index.html last;
+        }
+    }
+}
diff --git a/deploy/requirements.backend.txt b/deploy/requirements.backend.txt
new file mode 100644
index 00000000..60648e1b
--- /dev/null
+++ b/deploy/requirements.backend.txt
@@ -0,0 +1,29 @@
+# ============================================
+# MiniCPM-o 4.5 Backend Python Requirements
+# For offline environments: pip download / pip install
+# ============================================
+
+# == PyTorch (CUDA 12.4) ==
+# Note: PyTorch should be downloaded separately from https://download.pytorch.org/whl/cu124
+# torch>=2.3.0,<=2.8.0
+# torchaudio<=2.8.0
+
+# == Core Model Dependencies ==
+transformers==4.51.0
+accelerate
+minicpmo-utils[all]>=1.0.5
+sentencepiece
+
+# == Audio/Video Processing ==
+librosa
+soundfile
+onnxruntime
+Pillow
+numpy
+
+# == Web Service ==
+fastapi
+uvicorn[standard]
+aiofiles
+pydantic
+httpx
diff --git a/web_demos/minicpm-o_2.6/miniCPM2.6-CxDaeLI9.svg.bak b/web_demos/minicpm-o_2.6/miniCPM2.6-CxDaeLI9.svg.bak
new file mode 100644
index 00000000..9a4285cf
Binary files /dev/null and b/web_demos/minicpm-o_2.6/miniCPM2.6-CxDaeLI9.svg.bak differ
diff --git a/web_demos/minicpm-o_2.6/miniCPM4.5.svg b/web_demos/minicpm-o_2.6/miniCPM4.5.svg
new file mode 100644
index 00000000..dbb24656
--- /dev/null
+++ b/web_demos/minicpm-o_2.6/miniCPM4.5.svg
@@ -0,0 +1,28 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<svg viewBox="0 0 2199 258" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+    <title>编组 5</title>
+    <defs>
+        <linearGradient x1="45.9111958%" y1="57.6904311%" x2="4.78458419e-14%" y2="70.534914%" id="linearGradient-1">
+            <stop stop-color="#373ED8" offset="0%"></stop>
+            <stop stop-color="#497DFF" offset="100%"></stop>
+        </linearGradient>
+    </defs>
+    <g id="页面-1" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd">
+        <g id="画板备份-14" transform="translate(-1928, -1764)" fill-rule="nonzero">
+            <g id="编组-5" transform="translate(1928, 1764.9846)">
+                <path d="M760.177408,6.08426104 C780.959767,6.08426104 798.639412,11.1994393 813.310847,21.4099653 L814.826937,22.4868416 C827.871266,31.9421726 838.44267,44.5805385 846.551989,60.4441981 L846.854209,61.0497535 L805.164914,79.6080391 L804.700192,78.6693484 C800.249247,69.9367001 794.263119,63.1340103 786.753168,58.3199387 C778.149995,52.8050845 768.233984,50.0506369 757.083763,50.0506369 C744.833865,50.0506369 733.831738,53.5163071 724.177049,60.4281868 C714.581392,67.2978043 707.134706,76.9187061 701.846243,89.2224762 C696.604371,101.417853 693.991732,115.174193 693.991732,130.467076 C693.991732,146.161685 696.600155,160.26846 701.833896,172.765351 C707.114285,185.373628 714.549996,195.251726 724.136357,202.332561 C733.797042,209.468294 744.814803,213.049067 757.083763,213.049067 C767.641468,213.049067 777.212416,210.227573 785.712813,204.59744 L787.029919,203.697896 C793.997044,198.793482 800.045118,192.1801 805.176059,183.885785 L805.493229,183.359111 L847.470223,202.046016 L847.190902,202.601562 C838.534575,219.439902 826.824502,232.527091 812.037303,241.920218 C796.196991,251.982303 778.007922,257.015442 757.393128,257.015442 C735.537395,257.015442 716.157915,251.671101 699.179392,240.984619 C682.183395,230.287139 668.940168,215.394755 659.416171,196.246509 C649.861213,177.036014 645.075526,155.122604 645.075526,130.467076 C645.075526,106.236416 649.907085,84.6957154 659.55475,65.8023698 C669.170256,46.9720039 682.655972,32.3375052 700.053275,21.8391328 C717.453143,11.3392121 737.472005,6.08426104 760.177408,6.08426104 Z M472.804069,70.4320631 C490.215347,70.4320631 503.551514,75.9687387 513.08592,87.0440588 L513.922858,88.0433234 C522.993334,99.1752071 527.579887,114.927363 527.579887,135.416907 L527.579887,252.681061 L482.065262,252.681061 L482.066689,147.48212 C482.066689,137.230753 479.444996,128.966722 474.122797,122.834623 C468.710698,116.598944 461.272898,113.470346 452.076652,113.470346 C441.497963,113.470346 432.82714,116.858283 426.285393,123.629565 L425.517793,124.451905 C419.503248,131.122279 416.518055,139.98993 416.518055,150.885129 L416.517248,252.681061 L371.003248,252.681061 L371.003248,74.7611551 L416.517248,74.7611551 L416.518055,98.8935909 L423.358335,98.8935909 L424.345965,97.2602023 C429.414322,88.8779197 436.064565,82.3247602 444.331449,77.5591448 C452.566403,72.8119363 462.037327,70.4320631 472.804069,70.4320631 Z M605.481416,74.7611551 L605.481416,252.681061 L559.9708,252.681061 L559.9708,74.7611551 L605.481416,74.7611551 Z M335.78549,74.7611551 L335.78549,252.681061 L290.27149,252.681061 L290.27149,74.7611551 L335.78549,74.7611551 Z M0,10.4147082 L43.0533273,10.4147082 L122.24903,130.758127 L127.754059,130.758127 L206.947055,10.4147082 L250.000382,10.4147082 L250.000382,252.681061 L204.178374,252.681061 L204.180526,104.498777 L197.139835,104.498777 L143.308009,184.313596 L106.66868,184.313596 L52.5323692,105.736235 L45.5131981,105.736235 L45.5106162,252.681061 L0,252.681061 L0,10.4147082 Z M961.869908,10.4147082 C981.192886,10.4147082 997.923497,13.7715036 1012.08946,20.4554447 C1026.1438,27.0867173 1036.87643,36.5393059 1044.3624,48.8517555 C1051.8649,61.1913978 1055.62564,75.6900619 1055.62564,92.415251 C1055.62564,109.151297 1051.96203,123.607707 1044.65738,135.847946 C1037.36788,148.062782 1026.98507,157.46144 1013.43892,164.086199 C999.800233,170.756213 983.652345,174.105774 964.963553,174.105774 L922.598939,174.105774 L922.596926,252.681061 L875.228112,252.681061 L875.228112,10.4147082 L961.869908,10.4147082 Z M953.826433,52.8349168 L922.598939,52.8349168 L922.598939,131.686221 L953.826433,131.686221 C969.953126,131.686221 982.80491,128.309905 992.310812,121.456813 C1002.06659,114.423581 1007.0188,104.634314 1007.0188,92.415251 C1007.0188,80.2034681 1002.0737,70.3707714 992.331646,63.2341476 C982.822362,56.2680445 969.961757,52.8349168 953.826433,52.8349168 Z M335.78549,0 L335.78549,45.5106162 L290.27149,45.5106162 L290.27149,0 L335.78549,0 Z M605.119253,0 L605.119253,45.5106162 L559.605252,45.5106162 L559.605252,0 L605.119253,0 Z" id="形状" fill="#111111"></path>
+                <g id="M-V" transform="translate(1084.9431, 11.7574)" fill="#000111">
+                    <polygon id="路径" points="44.394 239.184 0 239.184 0 0 41.676 0 119.894 123.216 121.706 123.216 200.226 0 241.902 0 241.902 239.184 197.508 239.184 197.508 85.466 195.696 85.466 137.41 176.368 104.492 176.368 46.206 86.372 44.394 86.372"></polygon>
+                    <polygon id="路径" points="274.216 96.942 374.48 96.942 374.48 138.014 274.216 138.014"></polygon>
+                </g>
+                <g id="o" transform="translate(1501.3431, 42.9174)" fill="#000111">
+                    <path d="M95.4,213.12 C75.96,213.12 59.04,208.8 44.64,200.16 C30.24,191.52 19.2,179.16 11.52,163.08 C3.84,147 0,128.16 0,106.56 C0,84.96 3.84,66.12 11.52,50.04 C19.2,33.96 30.24,21.6 44.64,12.96 C59.04,4.32 75.96,0 95.4,0 C114.84,0 131.76,4.32 146.16,12.96 C160.56,21.6 171.66,33.96 179.46,50.04 C187.26,66.12 191.16,84.96 191.16,106.56 C191.16,128.16 187.26,147 179.46,163.08 C171.66,179.16 160.56,191.52 146.16,200.16 C131.76,208.8 114.84,213.12 95.4,213.12 Z M95.4,169.92 C110.52,169.92 122.46,164.22 131.22,152.82 C139.98,141.42 144.36,126 144.36,106.56 C144.36,86.88 139.98,71.4 131.22,60.12 C122.46,48.84 110.52,43.2 95.4,43.2 C80.52,43.2 68.7,48.84 59.94,60.12 C51.18,71.4 46.8,86.88 46.8,106.56 C46.8,126 51.18,141.42 59.94,152.82 C68.7,164.22 80.52,169.92 95.4,169.92 Z" id="形状"></path>
+                </g>
+                <g id="形状结合">
+                    <text x="1812" y="252" textLength="387" lengthAdjust="spacingAndGlyphs" font-family="Arial, Helvetica, sans-serif" font-size="340" font-weight="700" fill="#000111">4.5</text>
+                    <text x="1812" y="252" textLength="387" lengthAdjust="spacingAndGlyphs" font-family="Arial, Helvetica, sans-serif" font-size="340" font-weight="700" fill="url(#linearGradient-1)">4.5</text>
+                </g>
+            </g>
+        </g>
+    </g>
+</svg>
\ No newline at end of file
diff --git a/web_demos/minicpm-o_2.6/model_server.py b/web_demos/minicpm-o_2.6/model_server.py
index d9e86bdb..7e551b2d 100644
--- a/web_demos/minicpm-o_2.6/model_server.py
+++ b/web_demos/minicpm-o_2.6/model_server.py
@@ -91,7 +91,7 @@ def __init__(self):
         self.device='cuda:0'
         
         self.minicpmo_model_path = args.model #"openbmb/MiniCPM-o-2_6"
-        self.model_version = "2.6"
+        self.model_version = "4.5"
         with torch.no_grad():
             self.minicpmo_model = AutoModel.from_pretrained(self.minicpmo_model_path, trust_remote_code=True, torch_dtype=self.target_dtype, attn_implementation='sdpa')
         self.minicpmo_tokenizer = AutoTokenizer.from_pretrained(self.minicpmo_model_path, trust_remote_code=True)
@@ -103,6 +103,10 @@ def __init__(self):
         self.ref_path_default = "assets/ref_audios/default.wav"
         self.ref_path_female = "assets/ref_audios/female_example.wav"
         self.ref_path_male = "assets/ref_audios/male_example.wav"
+        self.tts_sample_rate = 24000  # 4.5 uses 24kHz (s3tokenizer)
+
+        # 4.5: init token2wav cache with default ref audio for streaming TTS
+        self._init_token2wav_with_ref(self.ref_path_default)
         
         self.input_audio_id = 0
         self.input_audio_vad_id = 0
@@ -119,7 +123,7 @@ def __init__(self):
         self.msg_type = 1
         
         self.speaking_time_stamp = 0
-        self.cycle_wait_time = 12800/24000 + 0.15
+        self.cycle_wait_time = 25 * 0.04 + 0.15  # 4.5: 25 audio tokens/chunk, each ~0.04s
         self.extra_wait_time = 2.5
         self.server_wait = True
         
@@ -203,69 +207,77 @@ def no_active_stream(self):
                 return True
         return False
 
+    def _init_token2wav_with_ref(self, ref_path):
+        """Initialize token2wav cache with a reference audio for streaming TTS (4.5 API)."""
+        try:
+            ref_audio, _ = librosa.load(ref_path, sr=16000, mono=True)
+            with torch.no_grad():
+                self.minicpmo_model.init_token2wav_cache(ref_audio)
+            logger.info(f"init_token2wav_cache done with ref: {ref_path}")
+        except Exception as e:
+            logger.error(f"init_token2wav_cache failed: {e}")
+
     def sys_prompt_init(self, msg_type):
         if self.past_session_id == self.session_id:
             return
         logger.info("### sys_prompt_init ###")
 
         logger.info(f'msg_type is {msg_type}')
-        if msg_type <= 1: #audio
-            audio_voice_clone_prompt = "Use the voice in the audio prompt to synthesize new content."
-            audio_assistant_prompt = "You are a helpful assistant with the above voice style."
-            ref_path = self.ref_path_default
 
-            
-            if self.customized_options is not None:
-                audio_voice_clone_prompt = self.customized_options['voice_clone_prompt']
-                audio_assistant_prompt = self.customized_options['assistant_prompt']
-                if self.customized_options['use_audio_prompt'] == 1:
-                    ref_path = self.ref_path_default
-                elif self.customized_options['use_audio_prompt'] == 2:
-                    ref_path = self.ref_path_female
-                elif self.customized_options['use_audio_prompt'] == 3:
-                    ref_path = self.ref_path_male
-
-            audio_prompt, sr = librosa.load(ref_path, sr=16000, mono=True)
-            sys_msg = {'role': 'user', 'content': [audio_voice_clone_prompt + "\n", audio_prompt, "\n" + audio_assistant_prompt]}
-        elif msg_type == 2: #video
-            voice_clone_prompt="你是一个AI助手。你能接受视频，音频和文本输入并输出语音和文本。模仿输入音频中的声音特征。"
-            assistant_prompt="作为助手，你将使用这种声音风格说话。"
+        # Determine ref audio path
+        ref_path = self.ref_path_default
+        language = "en"
+        if msg_type == 2:  # video
             ref_path = self.ref_path_video_default
-            
-            if self.customized_options is not None:
-                voice_clone_prompt = self.customized_options['voice_clone_prompt']
-                assistant_prompt = self.customized_options['assistant_prompt']
-                if self.customized_options['use_audio_prompt'] == 1:
-                    ref_path = self.ref_path_default
-                elif self.customized_options['use_audio_prompt'] == 2:
-                    ref_path = self.ref_path_female
-                elif self.customized_options['use_audio_prompt'] == 3:
-                    ref_path = self.ref_path_male
-                
-            audio_prompt, sr = librosa.load(ref_path, sr=16000, mono=True)
-            sys_msg = {'role': 'user', 'content': [voice_clone_prompt, audio_prompt, assistant_prompt]}
-        # elif msg_type == 3: #user start
-        #     assistant_prompt="作为助手，你将使用这种声音风格说话。"
-        #     if self.customized_options is not None:
-        #         assistant_prompt = self.customized_options['assistant_prompt']
-                
-        #     sys_msg = {'role': 'user', 'content': [assistant_prompt]}
-        
+            language = "zh"
+
+        if self.customized_options is not None:
+            if self.customized_options.get('use_audio_prompt') == 1:
+                ref_path = self.ref_path_default
+            elif self.customized_options.get('use_audio_prompt') == 2:
+                ref_path = self.ref_path_female
+            elif self.customized_options.get('use_audio_prompt') == 3:
+                ref_path = self.ref_path_male
+
+        # 4.5 API: use model.get_sys_prompt() to build system message
+        ref_audio, _ = librosa.load(ref_path, sr=16000, mono=True)
+        sys_msg = self.minicpmo_model.get_sys_prompt(
+            ref_audio=ref_audio,
+            mode="omni",
+            language=language,
+        )
+
+        # Re-init token2wav cache with the selected ref audio
+        self._init_token2wav_with_ref(ref_path)
+
         self.msg_type = msg_type
         msgs = [sys_msg]
-        if self.customized_options is not None:
-            if self.customized_options['use_audio_prompt'] > 0:
+
+        def safe_streaming_prefill(prompt_msgs):
+            try:
                 self.minicpmo_model.streaming_prefill(
                     session_id=str(self.session_id),
-                    msgs=msgs,
+                    msgs=prompt_msgs,
                     tokenizer=self.minicpmo_tokenizer,
+                    use_tts_template=True,
                 )
+                return True
+            except Exception as e:
+                logger.warning(f"streaming_prefill failed with audio prompt, fallback to text-only prompt: {e}")
+                fallback_msg = self.minicpmo_model.get_sys_prompt(ref_audio=None, mode="omni", language=language)
+                self.minicpmo_model.streaming_prefill(
+                    session_id=str(self.session_id),
+                    msgs=[fallback_msg],
+                    tokenizer=self.minicpmo_tokenizer,
+                    use_tts_template=True,
+                )
+                return False
+
+        if self.customized_options is not None:
+            if self.customized_options.get('use_audio_prompt', 0) > 0:
+                safe_streaming_prefill(msgs)
         if msg_type == 0:
-            self.minicpmo_model.streaming_prefill(
-                session_id=str(self.session_id),
-                msgs=msgs,
-                tokenizer=self.minicpmo_tokenizer,
-            )
+            safe_streaming_prefill(msgs)
             
         self.savedir = os.path.join(f"./log_data/{args.port}/", str(time.time()))
         if not os.path.exists(self.savedir):
@@ -297,7 +309,15 @@ def clear(self):
             self.audio_input = []
             self.image_prefill = None
             
-            if self.minicpmo_model.llm_past_key_values[0][0].shape[2]>8192:
+            kv = self.minicpmo_model.llm_past_key_values
+            kv_len = 0
+            if kv is not None:
+                if hasattr(kv, 'get_seq_length'):
+                    kv_len = kv.get_seq_length()
+                elif isinstance(kv, (list, tuple)) and len(kv) > 0:
+                    if isinstance(kv[0], (list, tuple)) and len(kv[0]) > 0:
+                        kv_len = kv[0][0].shape[2]
+            if kv_len > 8192:
                 self.session_id += 1  # to clear all kv cache
                 self.sys_prompt_flag = False
 
@@ -468,6 +488,8 @@ def prefill(self, audio, image, is_end):
                             msgs=msgs, 
                             tokenizer=self.minicpmo_tokenizer,
                             max_slice_nums=slice_nums,
+                            use_tts_template=True,
+                            is_last_chunk=(is_end),
                         )
 
                 self.input_audio_id += 1
@@ -504,49 +526,69 @@ async def generate(self):
                     with open(input_audio_path, 'rb') as wav_file:
                         audio_stream = wav_file.read()
                 except FileNotFoundError:
-                    print(f"File {input_audio_path} not found.")
+                    logger.warning(f"File {input_audio_path} not found.")
                 yield base64.b64encode(audio_stream).decode('utf-8'), "assistant:\n"
                 
-                print('=== gen start: ', time.time() - time_gen)
-                first_time = True
-                temp_time = time.time()
-                temp_time1 = time.time()
+                logger.info(f'=== gen start: {time.time() - time_gen:.3f}s ===')
                 with torch.inference_mode():
                     if self.stop_response:
                         self.generate_end()
                         return
                     self.minicpmo_model.config.stream_input=True
-                    msg = {"role":"user", "content": self.cnts}
-                    msgs = [msg]
                     text = ''
                     self.speaking_time_stamp = time.time()
+                    sr = self.tts_sample_rate  # 4.5 fixed 24kHz
                     try:
-                        for r in self.minicpmo_model.streaming_generate(
+                        for result in self.minicpmo_model.streaming_generate(
                             session_id=str(self.session_id),
                             tokenizer=self.minicpmo_tokenizer,
                             generate_audio=True,
-                            # enable_regenerate=True,
+                            use_tts_template=True,
+                            do_sample=True,
                         ):
                             if self.stop_response:
                                 self.generate_end()
                                 return
-                            audio_np, sr, text = r["audio_wav"], r["sampling_rate"], r["text"]
-
-                            output_audio_path = self.savedir + f'/output_audio_log/output_audio_{self.output_audio_id}.wav'
-                            self.output_audio_id += 1
-                            soundfile.write(output_audio_path, audio_np, samplerate=sr)
-                            audio_stream = None
-                            try:
-                                with open(output_audio_path, 'rb') as wav_file:
-                                    audio_stream = wav_file.read()
-                            except FileNotFoundError:
-                                print(f"File {output_audio_path} not found.")
-                            temp_time1 = time.time()
-                            print('text: ', text)
-                            yield base64.b64encode(audio_stream).decode('utf-8'), text
+                            # 4.5 API: yields (waveform_chunk: Tensor, text_chunk: str)
+                            # End signal: (None, None)
+                            if isinstance(result, tuple):
+                                waveform_chunk, text_chunk = result
+                            else:
+                                # fallback for unexpected format
+                                logger.warning(f"Unexpected streaming_generate result type: {type(result)}")
+                                continue
+
+                            if waveform_chunk is None:
+                                # generation complete signal
+                                break
+
+                            # Convert tensor to numpy, ensure 1D float32
+                            if isinstance(waveform_chunk, torch.Tensor):
+                                audio_np = waveform_chunk.cpu().float().numpy()
+                            else:
+                                audio_np = np.array(waveform_chunk, dtype=np.float32)
+                            audio_np = audio_np.squeeze()  # remove batch dims
+                            if audio_np.ndim == 0 or audio_np.size == 0:
+                                continue  # skip empty chunks
+
+                            # Resample from model's 24kHz to frontend's expected 16kHz
+                            audio_np = librosa.resample(audio_np, orig_sr=sr, target_sr=16000)
+
+                            if text_chunk:
+                                text += text_chunk
+
+                            # Encode audio chunk to WAV in memory (no disk I/O)
+                            audio_buffer = io.BytesIO()
+                            soundfile.write(audio_buffer, audio_np, samplerate=16000, format='WAV', subtype='PCM_16')
+                            audio_stream = audio_buffer.getvalue()
+
+                            # Send delta text (text_chunk), not accumulated text
+                            yield base64.b64encode(audio_stream).decode('utf-8'), text_chunk if text_chunk else ''
                             self.speaking_time_stamp += self.cycle_wait_time
                     except Exception as e:
                         logger.error(f"Error happened during generation: {str(e)}")
+                        import traceback
+                        traceback.print_exc()
                     yield None, '\n<end>'
 
         except Exception as e:
@@ -582,8 +624,7 @@ def upload_customized_audio(self, audio_data, audio_fmt):
                     output_audio_path = self.savedir + f'/customized_audio.wav'
                     soundfile.write(output_audio_path, audio_np, sr)
                     self.customized_audio = output_audio_path
-                    logger.info(f"processed customized {audio_fmt} audio")
-                    print(audio_np.shape, type(audio_np), sr)
+                    logger.info(f"processed customized {audio_fmt} audio, shape={audio_np.shape}, sr={sr}")
             else:
                 logger.info(f"empty customized audio, use default value instead.")
                 self.customized_audio = None
@@ -734,14 +775,14 @@ async def websocket_stream(websocket: WebSocket,
 
 async def generate_sse_response(request: Request, uid: Optional[str] = Header(None)):
     global stream_manager
-    print(f"uid: {uid}")
+    logger.info(f"uid: {uid}")
     try:
         # Wait for streaming to complete or timeout
         while not stream_manager.is_streaming_complete.is_set():
             # if stream_manager.is_timed_out():
             #     yield f"data: {json.dumps({'error': 'Stream timeout'})}\n\n"
             #     return
-            # print(f"{uid} whille not stream_manager.is_streaming_complete.is_set(), asyncio.sleep(0.1)")
+
             await asyncio.sleep(0.1)
 
         logger.info("streaming complete\n")
@@ -912,7 +953,7 @@ async def init_options(request: Request, uid: Optional[str] = Header(None)):
                     ctype = content["type"]
                     raise HTTPException(status_code=400, detail=f"Invalid content type: {ctype}")
         version = stream_manager.model_version
-        print(version)
+        logger.info(f"Model version: {version}")
         response = {
             "id": uid,
             "choices": {