diff --git a/.gitignore b/.gitignore
index 988f840d..c6843c8e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
*.bk
__pycache__
.DS_Store
+models
diff --git a/README.md b/README.md
index d4d9c48e..9556bfcd 100644
--- a/README.md
+++ b/README.md
@@ -4,6 +4,12 @@
**A Gemini 2.5 Flash Level MLLM for Vision, Speech, and Full-Duplex Multimodal Live Streaming on Your Phone**
+
+
+💡 **Tip**: If you find this repository's structure or content difficult to understand, visit [deepwiki](https://deepwiki.com/LujiaJin/MiniCPM-o) for a comprehensive detailed explanation.
+
+
+
[中文](./README_zh.md) |
English
diff --git a/README_zh.md b/README_zh.md
index f9a8d1c1..255631a4 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -4,6 +4,12 @@
**端侧可用的 Gemini 2.5 Flash 级视觉、语音、全双工多模态实时流式大模型**
+
+
+💡 **提示**: 如果你觉得这个仓库的结构或内容难以理解,请访问 [deepwiki](https://deepwiki.com/LujiaJin/MiniCPM-o) 获取详细的解读。
+
+
+
中文 |
[English](./README.md)
diff --git a/deploy/DEPLOY_WSL2_TO_H100_EN.md b/deploy/DEPLOY_WSL2_TO_H100_EN.md
new file mode 100644
index 00000000..ba15d8d4
--- /dev/null
+++ b/deploy/DEPLOY_WSL2_TO_H100_EN.md
@@ -0,0 +1,828 @@
+# MiniCPM-o 4.5 Offline Deployment Guide (Build Image in WSL2 → Upload to Intranet H100 Server → Local + Mobile Access)
+
+> Goal: Build a Docker image on your local Windows PC with WSL2, transfer the image and model to a company H100 server with no public internet access, start the service, and test full-duplex video calling in a local browser and on an Android phone.
+
+**Your Environment Quick Reference:**
+
+| Item | Value |
+| --- | --- |
+| Server SSH | `ssh -p $SSH_PORT $SSH_USER@$SSH_HOST` (port may change dynamically) |
+| GPU | NVIDIA H100 (driver 550.90.12) |
+| CUDA | 12.4 (fully matches the Dockerfile base image `cuda:12.4.1`) |
+| Local | Win10 + WSL2 Ubuntu |
+
+**Set SSH variables before each run (only change here):**
+
+```bash
+export SSH_HOST=127.0.0.1
+export SSH_PORT=54062
+export SSH_USER=your_user
+```
+
+PowerShell equivalent (use directly in Windows Terminal):
+
+```powershell
+$env:SSH_HOST = "127.0.0.1"
+$env:SSH_HOST = "
"
+$env:SSH_PORT = ""
+$env:SSH_USER = ""
+
+## PowerShell Daily Three-Command Quick Reference (Recommended)
+
+```powershell
+Set-MiniCPMSSH -Port "" -User ""
+# 1) Update SSH parameters when port changes
+Set-MiniCPMSSH -Port "54062" -User "your_user"
+
+# 2) Start mobile mode (open tunnel + print accessible URL)
+Set-MiniCPMSSH -Port "" -User ""
+Start-MiniCPMMobile
+
+# 3) Stop tunnel
+Stop-MiniCPMMobile
+scp -P $env:SSH_PORT .\file.tar.gz "$env:SSH_USER@$env:SSH_HOST:/deploy_pkg/"
+
+Quick recovery after port change:
+
+ [string]$Host = "",
+ [string]$User = ""
+Restart-MiniCPMMobile
+```
+
+ $env:SSH_HOST = $Host
+ $env:SSH_PORT = $Port
+ $env:SSH_USER = $User
+ssh -p $env:SSH_PORT "$env:SSH_USER@$env:SSH_HOST"
+scp -P $env:SSH_PORT .\file.tar.gz "$env:SSH_USER@$env:SSH_HOST:/data/minicpmo/deploy_pkg/"
+```
+ Write-Host "[MiniCPM SSH] HOST=$env:SSH_HOST PORT=$env:SSH_PORT USER=$env:SSH_USER"
+Optional: Define a one-click function (only change the port going forward)
+
+```powershell
+function Set-MiniCPMSSH {
+Set-MiniCPMSSH -Port "" -User ""
+ param(
+ [Parameter(Mandatory = $true)]
+ [string]$Port,
+ [string]$Host = "127.0.0.1",
+Open-MiniCPMTunnel -Mode local
+ [string]$User = "your_user"
+ )
+
+ $env:SSH_HOST = $Host
+Open-MiniCPMTunnel -Mode mobile
+ $env:SSH_PORT = $Port
+ $env:SSH_USER = $User
+
+ Write-Host "[MiniCPM SSH] HOST=$env:SSH_HOST PORT=$env:SSH_PORT USER=$env:SSH_USER"
+ssh -N -p $env:SSH_PORT `
+ -L 3000:127.0.0.1:3000 `
+ -L 3443:127.0.0.1:3443 `
+ -L 32550:127.0.0.1:32550 `
+ "$env:SSH_USER@$env:SSH_HOST"
+
+```powershell
+Set-MiniCPMSSH -Port "54062" -User "your_user"
+ssh -p $env:SSH_PORT "$env:SSH_USER@$env:SSH_HOST"
+ -L 0.0.0.0:3443:127.0.0.1:3443 `
+ "$env:SSH_USER@$env:SSH_HOST"
+Optional: Define a one-click tunnel function (local / mobile modes)
+
+```powershell
+function Open-MiniCPMTunnel {
+cd /MiniCPM-o
+ param(
+ [ValidateSet("local", "mobile")]
+ [string]$Mode = "local"
+ )
+mkdir -p /MiniCPM-o/models
+
+ if (-not $env:SSH_HOST -or -not $env:SSH_PORT -or -not $env:SSH_USER) {
+ throw "Please run Set-MiniCPMSSH first to set SSH_HOST/SSH_PORT/SSH_USER"
+ }
+
+du -sh /MiniCPM-o/models/MiniCPM-o-4_5
+ls -lh /MiniCPM-o/models/MiniCPM-o-4_5 | head
+ if ($Mode -eq "local") {
+ ssh -N -p $env:SSH_PORT `
+ -L 3000:127.0.0.1:3000 `
+ -L 3443:127.0.0.1:3443 `
+cd /MiniCPM-o
+ -L 32550:127.0.0.1:32550 `
+ "$env:SSH_USER@$env:SSH_HOST"
+ }
+ else {
+mkdir -p /deploy_pkg
+ ssh -N -p $env:SSH_PORT `
+ -L 0.0.0.0:3443:127.0.0.1:3443 `
+ "$env:SSH_USER@$env:SSH_HOST"
+ }
+}
+docker save -o /deploy_pkg/minicpmo-backend_latest.tar minicpmo-backend:latest
+docker save -o /deploy_pkg/minicpmo-frontend_latest.tar minicpmo-frontend:latest
+```
+
+Usage example:
+
+```powershell
+cp deploy/docker-compose.yml /deploy_pkg/
+cp deploy/nginx.docker.conf /deploy_pkg/
+# 1) Set dynamic SSH parameters
+Set-MiniCPMSSH -Port "54062" -User "your_user"
+
+# 2) Local access only (open http://127.0.0.1:3000 in browser)
+cd /deploy_pkg
+Open-MiniCPMTunnel -Mode local
+
+# 3) Mobile access (same WiFi, use https://laptop_lan_ip:3443)
+Open-MiniCPMTunnel -Mode mobile
+```
+gzip -1 minicpmo-backend_latest.tar
+gzip -1 minicpmo-frontend_latest.tar
+
+Optional: Auto-print mobile access URL
+
+```powershell
+function Get-MiniCPMLanUrl {
+cd /MiniCPM-o
+bash deploy/gen_ssl_cert.sh /deploy_pkg/certs
+ param(
+ [int]$Port = 3443
+ )
+
+This will generate `server.crt` and `server.key` under `/deploy_pkg/certs/`.
+ $ipv4List = Get-NetIPAddress -AddressFamily IPv4 |
+ Where-Object {
+ $_.IPAddress -notlike '127.*' -and
+ $_.IPAddress -notlike '169.254.*' -and
+ssh -p $SSH_PORT $SSH_USER@$SSH_HOST "mkdir -p /deploy_pkg"
+ $_.PrefixOrigin -ne 'WellKnown'
+ } |
+ Sort-Object -Property InterfaceMetric
+
+scp -P $SSH_PORT -o ServerAliveInterval=60 \
+ /deploy_pkg/minicpmo-backend_latest.tar.gz \
+ /deploy_pkg/minicpmo-frontend_latest.tar.gz \
+ /deploy_pkg/docker-compose.yml \
+ /deploy_pkg/nginx.docker.conf \
+ $SSH_USER@$SSH_HOST:/deploy_pkg/
+ $url = "https://$ip`:$Port"
+
+ Write-Host "[MiniCPM LAN URL] $url"
+ return $url
+ssh -p $SSH_PORT $SSH_USER@$SSH_HOST "mkdir -p /models"
+}
+```
+
+Usage example:
+scp -P $SSH_PORT -r -o ServerAliveInterval=60 \
+ /MiniCPM-o/models/MiniCPM-o-4_5 \
+ $SSH_USER@$SSH_HOST:/models/
+# Start mobile mode tunnel first (run in another terminal window)
+Open-MiniCPMTunnel -Mode mobile
+
+# Print mobile access URL in the current window
+ $SSH_USER@$SSH_HOST:/deploy_pkg/
+```
+
+Optional: One-click mobile mode startup (open tunnel + check port + print URL)
+
+```powershell
+function Start-MiniCPMMobile {
+export MODEL_PATH=/models/MiniCPM-o-4_5
+export CERTS_PATH=./certs
+export BACKEND_PORT=32550
+ param(
+ [int]$Port = 3443
+ )
+
+ -v ${MODEL_PATH}:/models/MiniCPM-o-4_5:ro \
+ if (-not $env:SSH_HOST -or -not $env:SSH_PORT -or -not $env:SSH_USER) {
+ throw "Please run Set-MiniCPMSSH first to set SSH_HOST/SSH_PORT/SSH_USER"
+ }
+
+ -v ${CERTS_PATH}:/etc/nginx/certs:ro \
+ $sshCmd = "ssh -N -p $env:SSH_PORT -L 0.0.0.0:$Port`:127.0.0.1:$Port $env:SSH_USER@$env:SSH_HOST"
+
+ # Open tunnel in a new window to avoid blocking the current terminal
+ $proc = Start-Process powershell -ArgumentList "-NoExit", "-Command", $sshCmd -PassThru
+ -v ${MODEL_PATH}:/models/MiniCPM-o-4_5:ro \
+ $env:MINICPM_MOBILE_SSH_PID = [string]$proc.Id
+ $env:MINICPM_MOBILE_PORT = [string]$Port
+ Start-Sleep -Seconds 2
+
+ -v ${CERTS_PATH}:/etc/nginx/certs:ro \
+ $listener = Get-NetTCPConnection -LocalPort $Port -State Listen -ErrorAction SilentlyContinue
+ if (-not $listener) {
+ Write-Warning "No listener detected on local port $Port. Please check whether SSH connected successfully."
+ return
+cd /deploy_pkg
+ }
+
+ $url = Get-MiniCPMLanUrl -Port $Port
+ Write-Host "[MiniCPM Mobile PID] $env:MINICPM_MOBILE_SSH_PID"
+ Write-Host "[MiniCPM Mobile Ready] Open on mobile browser: $url"
+}
+mkdir -p /runtime/certs
+cp docker-compose.yml /runtime/
+cp certs/server.* /runtime/certs/
+
+function Stop-MiniCPMMobile {
+ $pidText = $env:MINICPM_MOBILE_SSH_PID
+
+cd /runtime
+ if ($pidText) {
+ $pidValue = [int]$pidText
+ $proc = Get-Process -Id $pidValue -ErrorAction SilentlyContinue
+ if ($proc) {
+ Stop-Process -Id $pidValue -Force
+ Write-Host "[MiniCPM Mobile Stopped] Tunnel process stopped PID=$pidValue"
+export MODEL_PATH=/models/MiniCPM-o-4_5
+export CERTS_PATH=./certs
+export BACKEND_PORT=32550
+ Remove-Item Env:MINICPM_MOBILE_SSH_PID -ErrorAction SilentlyContinue
+ Remove-Item Env:MINICPM_MOBILE_PORT -ErrorAction SilentlyContinue
+ return
+ }
+ssh -N -p $SSH_PORT -L 3000:127.0.0.1:3000 -L 3443:127.0.0.1:3443 -L 32550:127.0.0.1:32550 $SSH_USER@$SSH_HOST
+ }
+
+ $port = if ($env:MINICPM_MOBILE_PORT) { [int]$env:MINICPM_MOBILE_PORT } else { 3443 }
+ $listeners = Get-NetTCPConnection -LocalPort $port -State Listen -ErrorAction SilentlyContinue
+ssh -N -p $env:SSH_PORT -L 3000:127.0.0.1:3000 -L 3443:127.0.0.1:3443 -L 32550:127.0.0.1:32550 "$env:SSH_USER@$env:SSH_HOST"
+ if (-not $listeners) {
+ Write-Host "[MiniCPM Mobile] No listener detected on port $port. Nothing to stop."
+ return
+ }
+ssh -N -p $SSH_PORT -L 0.0.0.0:3443:127.0.0.1:3443 $SSH_USER@$SSH_HOST
+
+ foreach ($item in $listeners) {
+ if ($item.OwningProcess -gt 0) {
+ try {
+Open on phone browser: `https://:3443`
+ Stop-Process -Id $item.OwningProcess -Force -ErrorAction Stop
+ Write-Host "[MiniCPM Mobile Stopped] Stopped process listening on port $port PID=$($item.OwningProcess)"
+ }
+ catch {
+ Write-Warning "Failed to stop PID=$($item.OwningProcess): $($_.Exception.Message)"
+ }
+ }
+ }
+
+ Remove-Item Env:MINICPM_MOBILE_SSH_PID -ErrorAction SilentlyContinue
+ Remove-Item Env:MINICPM_MOBILE_PORT -ErrorAction SilentlyContinue
+}
+
+function Restart-MiniCPMMobile {
+ param(
+ [int]$Port = 3443
+ )
+
+ Stop-MiniCPMMobile
+ Start-Sleep -Seconds 1
+ Start-MiniCPMMobile -Port $Port
+}
+```
+
+Usage example:
+
+```powershell
+# 1) Set dynamic SSH parameters first (only change here when port changes)
+Set-MiniCPMSSH -Port "54062" -User "your_user"
+
+# 2) One-click start mobile mode and output the accessible URL
+Start-MiniCPMMobile
+
+# 3) One-click restart mobile mode after port change (optional)
+Restart-MiniCPMMobile
+
+# 4) Stop mobile mode tunnel
+Stop-MiniCPMMobile
+```
+
+---
+
+## 0. Directory and File Overview
+
+This guide uses the newly created deployment files in your repository:
+
+- `deploy/Dockerfile.backend`: Backend inference service image (FastAPI + MiniCPM-o 4.5)
+- `deploy/Dockerfile.frontend`: Frontend image (Vue build + Nginx)
+- `deploy/nginx.docker.conf`: Nginx reverse proxy to backend container
+- `deploy/docker-compose.yml`: Two-container orchestration (frontend + backend)
+- `deploy/requirements.backend.txt`: Backend Python dependency list
+- `deploy/gen_ssl_cert.sh`: Self-signed SSL certificate generation script (required for mobile HTTPS)
+
+---
+
+## 1. Local (WSL2) Prerequisites
+
+Run in WSL2 Ubuntu:
+
+```bash
+cd /mnt/d/JiuTian/codes/MiniCPM-o
+
+# 1) Check Docker
+sudo docker --version
+sudo docker compose version
+
+# 2) If your current user cannot use docker directly, you can temporarily use sudo docker
+# Or add the user to the docker group (takes effect after re-login)
+# sudo usermod -aG docker $USER
+```
+
+> Note: The local 1050Ti does not participate in inference. The local machine is only responsible for building images and does not require a local GPU.
+
+---
+
+## 2. Download the Model Locally (for Upload to Intranet)
+
+It is recommended to download the HuggingFace model locally (where internet is available), then package and upload it.
+
+### 2.1 Install Download Tool
+
+```bash
+python3 -m pip install -U huggingface_hub
+```
+
+### 2.2 Download MiniCPM-o 4.5
+
+```bash
+mkdir -p /mnt/d/JiuTian/codes/MiniCPM-o/models
+python3 - << 'PY'
+from huggingface_hub import snapshot_download
+snapshot_download(
+ repo_id='openbmb/MiniCPM-o-4_5',
+ local_dir='/mnt/d/JiuTian/codes/MiniCPM-o/models/MiniCPM-o-4_5',
+ local_dir_use_symlinks=False,
+ resume_download=True
+)
+PY
+```
+
+After downloading, check the size and key files:
+
+```bash
+du -sh /mnt/d/JiuTian/codes/MiniCPM-o/models/MiniCPM-o-4_5
+ls -lh /mnt/d/JiuTian/codes/MiniCPM-o/models/MiniCPM-o-4_5 | head
+```
+
+---
+
+## 3. Build Two Images in WSL2
+
+Run from the repository root directory:
+
+```bash
+cd /mnt/d/JiuTian/codes/MiniCPM-o
+
+# Backend image
+docker build -f deploy/Dockerfile.backend -t minicpmo-backend:latest .
+
+# Frontend image
+docker build -f deploy/Dockerfile.frontend -t minicpmo-frontend:latest .
+```
+
+Verify the images exist:
+
+```bash
+docker images | grep minicpmo
+```
+
+---
+
+## 4. Export Images + Generate SSL Certificate
+
+### 4.1 Export Images as tar
+
+```bash
+mkdir -p /mnt/d/JiuTian/deploy_pkg
+
+docker save -o /mnt/d/JiuTian/deploy_pkg/minicpmo-backend_latest.tar minicpmo-backend:latest
+docker save -o /mnt/d/JiuTian/deploy_pkg/minicpmo-frontend_latest.tar minicpmo-frontend:latest
+
+# Package compose and nginx config
+cp deploy/docker-compose.yml /mnt/d/JiuTian/deploy_pkg/
+cp deploy/nginx.docker.conf /mnt/d/JiuTian/deploy_pkg/
+```
+
+Optional: Compress to reduce transfer size
+
+```bash
+cd /mnt/d/JiuTian/deploy_pkg
+gzip -1 minicpmo-backend_latest.tar
+gzip -1 minicpmo-frontend_latest.tar
+```
+
+### 4.2 Generate Self-Signed SSL Certificate (Required for Mobile HTTPS)
+
+```bash
+cd /mnt/d/JiuTian/codes/MiniCPM-o
+bash deploy/gen_ssl_cert.sh /mnt/d/JiuTian/deploy_pkg/certs
+```
+
+This will generate `server.crt` and `server.key` under `/mnt/d/JiuTian/deploy_pkg/certs/`.
+
+---
+
+## 5. Upload to the Intranet Server
+
+You have already passed company intranet authentication, and the port may change dynamically. Please use the SSH variables defined above.
+
+### 5.1 Upload Image Packages and Config Files
+
+```bash
+# First create the target directory on the server
+ssh -p $SSH_PORT $SSH_USER@$SSH_HOST "mkdir -p /data/minicpmo/deploy_pkg"
+
+# Upload image tar packages
+scp -P $SSH_PORT -o ServerAliveInterval=60 \
+ /mnt/d/JiuTian/deploy_pkg/minicpmo-backend_latest.tar.gz \
+ /mnt/d/JiuTian/deploy_pkg/minicpmo-frontend_latest.tar.gz \
+ /mnt/d/JiuTian/deploy_pkg/docker-compose.yml \
+ /mnt/d/JiuTian/deploy_pkg/nginx.docker.conf \
+ $SSH_USER@$SSH_HOST:/data/minicpmo/deploy_pkg/
+```
+
+### 5.2 Upload Model Weights
+
+```bash
+ssh -p $SSH_PORT $SSH_USER@$SSH_HOST "mkdir -p /data/models"
+
+scp -P $SSH_PORT -r -o ServerAliveInterval=60 \
+ /mnt/d/JiuTian/codes/MiniCPM-o/models/MiniCPM-o-4_5 \
+ $SSH_USER@$SSH_HOST:/data/models/
+```
+
+### 5.3 Upload SSL Certificate (Required for Mobile Access)
+
+```bash
+scp -P $SSH_PORT -r /mnt/d/JiuTian/deploy_pkg/certs \
+ $SSH_USER@$SSH_HOST:/data/minicpmo/deploy_pkg/
+```
+
+> If the port changes, simply update the `SSH_PORT` variable and retry the command.
+
+---
+
+## 6. H100 Server Preparation (One-Time)
+
+Log in to the server through the established tunnel:
+
+```bash
+ssh -p $SSH_PORT $SSH_USER@$SSH_HOST
+```
+
+Check the environment:
+
+```bash
+# Confirm NVIDIA driver (already confirmed: 550.90.12, CUDA 12.4 ✓)
+nvidia-smi
+
+# Check Docker
+docker --version
+docker compose version
+```
+
+### 6.1 Install NVIDIA Container Toolkit (If Not Installed)
+
+If `docker run --gpus all nvidia/cuda:12.4.1-base-ubuntu22.04 nvidia-smi` fails, you need to install the toolkit.
+
+Restart Docker after installation:
+
+```bash
+sudo systemctl restart docker
+```
+
+Verify again:
+
+```bash
+docker run --rm --gpus all nvidia/cuda:12.4.1-base-ubuntu22.04 nvidia-smi
+```
+
+---
+
+## 7. Load Images and Start Services on the H100 Server
+
+Run on the server (after logging in via `ssh -p $SSH_PORT $SSH_USER@$SSH_HOST`):
+
+```bash
+cd /data/minicpmo/deploy_pkg
+
+# If the uploaded files are .tar.gz, decompress first
+gunzip -f minicpmo-backend_latest.tar.gz || true
+gunzip -f minicpmo-frontend_latest.tar.gz || true
+
+# Load images
+docker load -i minicpmo-backend_latest.tar
+docker load -i minicpmo-frontend_latest.tar
+
+# Place runtime files
+mkdir -p /data/minicpmo/runtime/certs
+cp docker-compose.yml /data/minicpmo/runtime/
+cp certs/server.crt certs/server.key /data/minicpmo/runtime/certs/
+
+cd /data/minicpmo/runtime
+```
+
+### 7.1 Set Model Path and Start
+
+`docker-compose.yml` uses the `MODEL_PATH` environment variable. You can export it directly:
+
+```bash
+export MODEL_PATH=/data/models/MiniCPM-o-4_5
+export CERTS_PATH=./certs
+export BACKEND_PORT=32550
+
+# Compatible with both Compose commands: docker compose / docker-compose
+if docker compose version >/dev/null 2>&1; then
+ COMPOSE_CMD="docker compose"
+elif command -v docker-compose >/dev/null 2>&1; then
+ COMPOSE_CMD="docker-compose"
+else
+ echo "Compose not found. Please install docker-compose or the docker compose plugin first." && exit 1
+fi
+
+$COMPOSE_CMD -f docker-compose.yml up -d
+```
+
+If neither Compose option is available (`docker compose` / `docker-compose` both absent), you can start directly with `docker run`:
+
+```bash
+docker network create minicpmo-net || true
+docker rm -f minicpmo-backend minicpmo-frontend 2>/dev/null || true
+
+docker run -d \
+ --name minicpmo-backend \
+ --restart unless-stopped \
+ --gpus all \
+ -e BACKEND_PORT=${BACKEND_PORT:-32550} \
+ -p ${BACKEND_PORT:-32550}:${BACKEND_PORT:-32550} \
+ -v ${MODEL_PATH}:/models/MiniCPM-o-4_5:ro \
+ --network minicpmo-net \
+ minicpmo-backend:latest
+
+docker run -d \
+ --name minicpmo-frontend \
+ --restart unless-stopped \
+ -e BACKEND_PORT=${BACKEND_PORT:-32550} \
+ -p 3000:3000 \
+ -p 3443:3443 \
+ -v ${CERTS_PATH}:/etc/nginx/certs:ro \
+ --network minicpmo-net \
+ minicpmo-frontend:latest
+```
+
+If you encounter `Failed to Setup IP tables` or `No chain/target/match by that name`, you can bypass the bridge network and start with the `host` network instead:
+
+```bash
+docker rm -f minicpmo-backend minicpmo-frontend 2>/dev/null || true
+
+docker run -d \
+ --name minicpmo-backend \
+ --restart unless-stopped \
+ --gpus all \
+ --network host \
+ -e BACKEND_PORT=${BACKEND_PORT:-32550} \
+ -v ${MODEL_PATH}:/models/MiniCPM-o-4_5:ro \
+ minicpmo-backend:latest
+
+docker run -d \
+ --name minicpmo-frontend \
+ --restart unless-stopped \
+ --network host \
+ --add-host model-backend:127.0.0.1 \
+ -e BACKEND_PORT=${BACKEND_PORT:-32550} \
+ -v ${CERTS_PATH}:/etc/nginx/certs:ro \
+ minicpmo-frontend:latest
+```
+
+Check status:
+
+```bash
+if [ -z "$COMPOSE_CMD" ]; then
+ if docker compose version >/dev/null 2>&1; then
+ COMPOSE_CMD="docker compose"
+ else
+ COMPOSE_CMD="docker-compose"
+ fi
+fi
+
+$COMPOSE_CMD -f docker-compose.yml ps
+docker logs -f minicpmo-backend
+```
+
+If using the `docker run` approach, check status with:
+
+```bash
+docker ps --filter name=minicpmo
+docker logs -f minicpmo-backend
+```
+
+Health check:
+
+```bash
+curl http://127.0.0.1:32550/api/v1/health
+```
+
+Expected response:
+
+```json
+{"status":"OK"}
+```
+
+> The first model load may be slow (tens of seconds to a few minutes). Wait until the logs show model initialization complete before testing the frontend.
+
+---
+
+## 8. Local Computer Access (SSH Port Forwarding)
+
+You can already connect via SSH tunnel — just forward the service ports using the current port.
+
+Open a new terminal in local PowerShell or WSL:
+
+```bash
+ssh -N -p $SSH_PORT \
+ -L 3000:127.0.0.1:3000 \
+ -L 3443:127.0.0.1:3443 \
+ -L 32550:127.0.0.1:32550 \
+ $SSH_USER@$SSH_HOST
+```
+
+Keep this terminal connected. Then open in a local browser:
+
+- Frontend (HTTP):
+- Frontend (HTTPS): (self-signed cert, click "Continue" to proceed)
+- Backend health check:
+
+> The browser will request camera/microphone permissions — click Allow. When accessing via `localhost`, HTTP is sufficient to obtain camera permissions.
+
+---
+
+## 9. Mobile Access (Full-Duplex Video Calling)
+
+### 9.1 Problem and Principle
+
+Mobile browsers (Chrome/Safari) **must use HTTPS** to access the camera and microphone (`localhost` is an exception, but the phone is not localhost).
+
+Solution: **Use the laptop as a relay** — Phone → Laptop WiFi LAN IP → SSH tunnel → Server.
+
+```text
+Mobile browser ──WiFi──▶ Laptop:3443 ──SSH tunnel──▶ H100:3443 ──Nginx──▶ Backend:32550
+ (HTTPS) (bound to 0.0.0.0)
+```
+
+### 9.2 Steps
+
+#### Step 1: Establish an SSH Tunnel with All-Interface Binding
+
+```bash
+ssh -N -p $SSH_PORT \
+ -L 0.0.0.0:3443:127.0.0.1:3443 \
+ $SSH_USER@$SSH_HOST
+```
+
+> Key difference: `0.0.0.0:3443` makes all network interfaces on the laptop listen on port 3443, allowing phones on the same WiFi to connect.
+
+#### Step 2: Find the Laptop's LAN IP
+
+Run in PowerShell:
+
+```powershell
+ipconfig | Select-String "IPv4"
+```
+
+Assume the result is `192.168.1.100`.
+
+#### Step 3: Allow Port Through Windows Firewall
+
+Run in PowerShell (as Administrator):
+
+```powershell
+New-NetFirewallRule -DisplayName "MiniCPMo HTTPS" -Direction Inbound -LocalPort 3443 -Protocol TCP -Action Allow
+```
+
+#### Step 4: Access from Mobile Browser
+
+Make sure the phone and laptop are on the same WiFi, then enter in the mobile browser:
+
+```text
+https://192.168.1.100:3443
+```
+
+- **First visit** will show an "unsafe connection" warning (self-signed cert) — tap **"Advanced" → "Continue"**
+- The browser will then request camera/microphone permissions — tap **Allow**
+- Enter the video call page and start a full-duplex conversation
+
+### 9.3 iOS Safari Notes
+
+iOS Safari is stricter with self-signed certificates. If the above bypass doesn't work:
+
+1. Open `https://192.168.1.100:3443/certs/server.crt` in Safari on the phone (if you configured a cert download path), download and install the certificate
+2. Or send `server.crt` to the phone via AirDrop / WeChat, then go to **Settings → General → Profile → Install**
+3. Then go to **Settings → General → About → Certificate Trust Settings → Enable Full Trust**
+
+After that, Safari can access `https://192.168.1.100:3443` normally.
+
+---
+
+## 10. Common Issues and Troubleshooting
+
+### 10.1 Frontend Opens, but Cannot Start a Conversation
+
+Check backend logs:
+
+```bash
+docker logs --tail 200 minicpmo-backend
+```
+
+Key things to look for:
+
+- Whether the model path exists: `/models/MiniCPM-o-4_5`
+- Whether VRAM is sufficient (H100 usually has enough)
+- Whether `trust_remote_code` or dependency version errors appear
+
+### 10.2 GPU Not Visible Inside Container
+
+```bash
+docker exec -it minicpmo-backend nvidia-smi
+```
+
+If it fails, check the NVIDIA Container Toolkit and Docker daemon configuration first.
+
+### 10.3 WebSocket / SSE Anomalies
+
+This project has already disabled buffering and configured WebSocket upgrade in `nginx.docker.conf`.
+If issues persist, check whether the company's intranet gateway is blocking long-lived connections.
+
+### 10.4 Model Startup Is Too Slow
+
+The first startup may be slow; subsequent starts will be much faster. Check with:
+
+```bash
+nvidia-smi
+docker logs -f minicpmo-backend
+```
+
+---
+
+## 11. Optional Optimizations for Next Steps
+
+1. Switch the backend image to "offline wheel installation mode" to completely eliminate the need for pip internet access on the server.
+2. Use a private image registry (Harbor) instead of tar package transfers.
+3. Use systemd or cron for automatic container restart and log rotation.
+4. Replace the self-signed certificate with one issued by an enterprise CA to eliminate manual trust on mobile devices.
+
+---
+
+## 12. One-Click Command Quick Reference
+
+### H100 Side (Assuming Files Are Already Uploaded)
+
+```bash
+cd /data/minicpmo/deploy_pkg
+
+docker load -i minicpmo-backend_latest.tar
+docker load -i minicpmo-frontend_latest.tar
+
+mkdir -p /data/minicpmo/runtime/certs
+cp docker-compose.yml /data/minicpmo/runtime/
+cp certs/server.* /data/minicpmo/runtime/certs/
+
+cd /data/minicpmo/runtime
+export MODEL_PATH=/data/models/MiniCPM-o-4_5
+export CERTS_PATH=./certs
+export BACKEND_PORT=32550
+if docker compose version >/dev/null 2>&1; then
+ COMPOSE_CMD="docker compose"
+elif command -v docker-compose >/dev/null 2>&1; then
+ COMPOSE_CMD="docker-compose"
+else
+ echo "Compose not found. Please install docker-compose or the docker compose plugin first." && exit 1
+fi
+
+$COMPOSE_CMD -f docker-compose.yml up -d
+```
+
+### Local Computer (Open Tunnel)
+
+```bash
+ssh -N -p $SSH_PORT -L 3000:127.0.0.1:3000 -L 3443:127.0.0.1:3443 -L 32550:127.0.0.1:32550 $SSH_USER@$SSH_HOST
+```
+
+PowerShell version:
+
+```powershell
+ssh -N -p $env:SSH_PORT -L 3000:127.0.0.1:3000 -L 3443:127.0.0.1:3443 -L 32550:127.0.0.1:32550 "$env:SSH_USER@$env:SSH_HOST"
+```
+
+Open on local computer:
+
+### Mobile (Relayed Through Laptop)
+
+```bash
+# Bind all interfaces on the laptop
+ssh -N -p $SSH_PORT -L 0.0.0.0:3443:127.0.0.1:3443 $SSH_USER@$SSH_HOST
+```
+
+Open on mobile browser: `https://:3443`
\ No newline at end of file
diff --git a/deploy/DEPLOY_WSL2_TO_H100_ZH.md b/deploy/DEPLOY_WSL2_TO_H100_ZH.md
new file mode 100644
index 00000000..54bf01fc
--- /dev/null
+++ b/deploy/DEPLOY_WSL2_TO_H100_ZH.md
@@ -0,0 +1,783 @@
+# MiniCPM-o 4.5 离线部署实战指南(WSL2 构建镜像 → 上传内网 H100 服务器 → 本地 + 手机访问)
+
+> 目标:你在本地 Windows系统PC + WSL2 构建 Docker 镜像,把镜像和模型传到无公网的公司 H100 服务器,启动服务后在本地浏览器和Android系统手机上测试全双工视频通话。
+
+**你的环境速查:**
+
+| 项目 | 值 |
+| --- | --- |
+| 服务器 SSH | `ssh -p $SSH_PORT $SSH_USER@$SSH_HOST`(端口可能动态变化) |
+| GPU | NVIDIA H100(驱动 550.90.12) |
+| CUDA | 12.4(与 Dockerfile 基础镜像 `cuda:12.4.1` 完全匹配) |
+| 本地 | Win10 + WSL2 Ubuntu |
+
+**每次执行前先设置 SSH 变量(只改这里即可):**
+
+```bash
+export SSH_HOST=
+export SSH_PORT=
+export SSH_USER=
+```
+
+PowerShell 等价写法(Windows 终端直接用):
+
+```powershell
+$env:SSH_HOST = ""
+$env:SSH_PORT = ""
+$env:SSH_USER = ""
+```
+
+## PowerShell 日常三命令速查(推荐)
+
+```powershell
+# 1) 端口变化时先更新 SSH 参数
+Set-MiniCPMSSH -Port "" -User ""
+
+# 2) 启动手机模式(开隧道 + 打印可访问 URL)
+Start-MiniCPMMobile
+
+# 3) 结束隧道
+Stop-MiniCPMMobile
+```
+
+端口变化后的快速恢复:
+
+```powershell
+Set-MiniCPMSSH -Port "" -User ""
+Restart-MiniCPMMobile
+```
+
+PowerShell 中引用变量时,`ssh/scp` 建议写成:
+
+```powershell
+ssh -p $env:SSH_PORT "$env:SSH_USER@$env:SSH_HOST"
+scp -P $env:SSH_PORT .\file.tar.gz "$env:SSH_USER@$env:SSH_HOST:/deploy_pkg/"
+```
+
+可选:定义一个一键函数(以后只改端口即可)
+
+```powershell
+function Set-MiniCPMSSH {
+ param(
+ [Parameter(Mandatory = $true)]
+ [string]$Port,
+ [string]$Host = "",
+ [string]$User = ""
+ )
+
+ $env:SSH_HOST = $Host
+ $env:SSH_PORT = $Port
+ $env:SSH_USER = $User
+
+ Write-Host "[MiniCPM SSH] HOST=$env:SSH_HOST PORT=$env:SSH_PORT USER=$env:SSH_USER"
+}
+```
+
+使用示例:
+
+```powershell
+Set-MiniCPMSSH -Port "" -User ""
+ssh -p $env:SSH_PORT "$env:SSH_USER@$env:SSH_HOST"
+```
+
+可选:定义一键开隧道函数(本机/手机两种模式)
+
+```powershell
+function Open-MiniCPMTunnel {
+ param(
+ [ValidateSet("local", "mobile")]
+ [string]$Mode = "local"
+ )
+
+ if (-not $env:SSH_HOST -or -not $env:SSH_PORT -or -not $env:SSH_USER) {
+ throw "请先执行 Set-MiniCPMSSH 设置 SSH_HOST/SSH_PORT/SSH_USER"
+ }
+
+ if ($Mode -eq "local") {
+ ssh -N -p $env:SSH_PORT `
+ -L 3000:127.0.0.1:3000 `
+ -L 3443:127.0.0.1:3443 `
+ -L 32550:127.0.0.1:32550 `
+ "$env:SSH_USER@$env:SSH_HOST"
+ }
+ else {
+ ssh -N -p $env:SSH_PORT `
+ -L 0.0.0.0:3443:127.0.0.1:3443 `
+ "$env:SSH_USER@$env:SSH_HOST"
+ }
+}
+```
+
+使用示例:
+
+```powershell
+# 1) 设置动态 SSH 参数
+Set-MiniCPMSSH -Port "54062" -User "your_user"
+
+# 2) 仅本机访问(浏览器打开 http://127.0.0.1:3000)
+Open-MiniCPMTunnel -Mode local
+
+# 3) 手机访问(同一 WiFi,用 https://笔记本局域网IP:3443)
+Open-MiniCPMTunnel -Mode mobile
+```
+
+可选:自动打印手机访问地址
+
+```powershell
+function Get-MiniCPMLanUrl {
+ param(
+ [int]$Port = 3443
+ )
+
+ $ipv4List = Get-NetIPAddress -AddressFamily IPv4 |
+ Where-Object {
+ $_.IPAddress -notlike '127.*' -and
+ $_.IPAddress -notlike '169.254.*' -and
+ $_.PrefixOrigin -ne 'WellKnown'
+ } |
+ Sort-Object -Property InterfaceMetric
+
+ if (-not $ipv4List) {
+ throw "未找到可用 IPv4 地址,请检查网卡/网络连接"
+ }
+
+ $ip = $ipv4List[0].IPAddress
+ $url = "https://$ip`:$Port"
+
+ Write-Host "[MiniCPM LAN URL] $url"
+ return $url
+}
+```
+
+使用示例:
+
+```powershell
+# 先开启手机模式隧道(在另一个终端窗口运行)
+Open-MiniCPMTunnel -Mode mobile
+
+# 当前窗口打印手机访问地址
+Get-MiniCPMLanUrl
+```
+
+可选:一键启动手机模式(开隧道 + 检查端口 + 打印 URL)
+
+```powershell
+function Start-MiniCPMMobile {
+ param(
+ [int]$Port = 3443
+ )
+
+ if (-not $env:SSH_HOST -or -not $env:SSH_PORT -or -not $env:SSH_USER) {
+ throw "请先执行 Set-MiniCPMSSH 设置 SSH_HOST/SSH_PORT/SSH_USER"
+ }
+
+ $sshCmd = "ssh -N -p $env:SSH_PORT -L 0.0.0.0:$Port`:127.0.0.1:$Port $env:SSH_USER@$env:SSH_HOST"
+
+ # 在新窗口开隧道,避免阻塞当前终端
+ $proc = Start-Process powershell -ArgumentList "-NoExit", "-Command", $sshCmd -PassThru
+ $env:MINICPM_MOBILE_SSH_PID = [string]$proc.Id
+ $env:MINICPM_MOBILE_PORT = [string]$Port
+ Start-Sleep -Seconds 2
+
+ $listener = Get-NetTCPConnection -LocalPort $Port -State Listen -ErrorAction SilentlyContinue
+ if (-not $listener) {
+ Write-Warning "未检测到本机 $Port 端口监听,请检查 SSH 是否连接成功。"
+ return
+ }
+
+ $url = Get-MiniCPMLanUrl -Port $Port
+ Write-Host "[MiniCPM Mobile PID] $env:MINICPM_MOBILE_SSH_PID"
+ Write-Host "[MiniCPM Mobile Ready] 手机浏览器访问: $url"
+}
+
+function Stop-MiniCPMMobile {
+ $pidText = $env:MINICPM_MOBILE_SSH_PID
+
+ if ($pidText) {
+ $pidValue = [int]$pidText
+ $proc = Get-Process -Id $pidValue -ErrorAction SilentlyContinue
+ if ($proc) {
+ Stop-Process -Id $pidValue -Force
+ Write-Host "[MiniCPM Mobile Stopped] 已停止隧道进程 PID=$pidValue"
+ Remove-Item Env:MINICPM_MOBILE_SSH_PID -ErrorAction SilentlyContinue
+ Remove-Item Env:MINICPM_MOBILE_PORT -ErrorAction SilentlyContinue
+ return
+ }
+ }
+
+ $port = if ($env:MINICPM_MOBILE_PORT) { [int]$env:MINICPM_MOBILE_PORT } else { 3443 }
+ $listeners = Get-NetTCPConnection -LocalPort $port -State Listen -ErrorAction SilentlyContinue
+ if (-not $listeners) {
+ Write-Host "[MiniCPM Mobile] 未检测到监听端口 $port,无需停止。"
+ return
+ }
+
+ foreach ($item in $listeners) {
+ if ($item.OwningProcess -gt 0) {
+ try {
+ Stop-Process -Id $item.OwningProcess -Force -ErrorAction Stop
+ Write-Host "[MiniCPM Mobile Stopped] 已停止监听端口 $port 的进程 PID=$($item.OwningProcess)"
+ }
+ catch {
+ Write-Warning "停止 PID=$($item.OwningProcess) 失败:$($_.Exception.Message)"
+ }
+ }
+ }
+
+ Remove-Item Env:MINICPM_MOBILE_SSH_PID -ErrorAction SilentlyContinue
+ Remove-Item Env:MINICPM_MOBILE_PORT -ErrorAction SilentlyContinue
+}
+
+function Restart-MiniCPMMobile {
+ param(
+ [int]$Port = 3443
+ )
+
+ Stop-MiniCPMMobile
+ Start-Sleep -Seconds 1
+ Start-MiniCPMMobile -Port $Port
+}
+```
+
+使用示例:
+
+```powershell
+# 1) 先设置动态 SSH 参数(端口变更时只改这里)
+Set-MiniCPMSSH -Port "54062" -User "your_user"
+
+# 2) 一键启动手机模式并输出可访问地址
+Start-MiniCPMMobile
+
+# 3) 端口变化后,一键重启手机模式(可选)
+Restart-MiniCPMMobile
+
+# 4) 结束手机模式隧道
+Stop-MiniCPMMobile
+```
+
+---
+
+## 0. 目录与文件说明
+
+本指南使用了你仓库中新建的部署文件:
+
+- `deploy/Dockerfile.backend`:后端推理服务镜像(FastAPI + MiniCPM-o 4.5)
+- `deploy/Dockerfile.frontend`:前端镜像(Vue build + Nginx)
+- `deploy/nginx.docker.conf`:Nginx 反向代理到后端容器
+- `deploy/docker-compose.yml`:双容器编排(frontend + backend)
+- `deploy/requirements.backend.txt`:后端 Python 依赖清单
+- `deploy/gen_ssl_cert.sh`:自签名 SSL 证书生成脚本(手机端 HTTPS 必需)
+
+---
+
+## 1. 本地(WSL2)前置准备
+
+在 WSL2 Ubuntu 执行:
+
+```bash
+cd /MiniCPM-o
+
+# 1) 检查 Docker
+sudo docker --version
+sudo docker compose version
+
+# 2) 如果你当前用户不能直接用 docker,可先临时用 sudo docker
+# 或将用户加入 docker 组(重新登录后生效)
+# sudo usermod -aG docker $USER
+```
+
+> 说明:本地 1050Ti 不参与推理,本地只负责构建镜像,不需要本地 GPU。
+
+---
+
+## 2. 本地下载模型(用于上传到内网)
+
+推荐在本地(有网环境)下载 HuggingFace 模型,再打包上传。
+
+### 2.1 安装下载工具
+
+```bash
+python3 -m pip install -U huggingface_hub
+```
+
+### 2.2 下载 MiniCPM-o 4.5
+
+```bash
+mkdir -p /MiniCPM-o/models
+python3 - << 'PY'
+from huggingface_hub import snapshot_download
+snapshot_download(
+ repo_id='openbmb/MiniCPM-o-4_5',
+ local_dir='/MiniCPM-o/models/MiniCPM-o-4_5',
+ local_dir_use_symlinks=False,
+ resume_download=True
+)
+PY
+```
+
+下载后检查体积和关键文件:
+
+```bash
+du -sh /MiniCPM-o/models/MiniCPM-o-4_5
+ls -lh /MiniCPM-o/models/MiniCPM-o-4_5 | head
+```
+
+---
+
+## 3. 在 WSL2 构建两个镜像
+
+在仓库根目录执行:
+
+```bash
+cd /MiniCPM-o
+
+# 后端镜像
+docker build -f deploy/Dockerfile.backend -t minicpmo-backend:latest .
+
+# 前端镜像
+docker build -f deploy/Dockerfile.frontend -t minicpmo-frontend:latest .
+```
+
+验证镜像存在:
+
+```bash
+docker images | grep minicpmo
+```
+
+---
+
+## 4. 导出镜像 + 生成 SSL 证书
+
+### 4.1 导出镜像为 tar
+
+```bash
+mkdir -p /deploy_pkg
+
+docker save -o /deploy_pkg/minicpmo-backend_latest.tar minicpmo-backend:latest
+docker save -o /deploy_pkg/minicpmo-frontend_latest.tar minicpmo-frontend:latest
+
+# 打包 compose 与 nginx 配置
+cp deploy/docker-compose.yml /deploy_pkg/
+cp deploy/nginx.docker.conf /deploy_pkg/
+```
+
+可选:压缩减少传输体积
+
+```bash
+cd /deploy_pkg
+gzip -1 minicpmo-backend_latest.tar
+gzip -1 minicpmo-frontend_latest.tar
+```
+
+### 4.2 生成自签名 SSL 证书(手机端 HTTPS 必需)
+
+```bash
+cd /MiniCPM-o
+bash deploy/gen_ssl_cert.sh /deploy_pkg/certs
+```
+
+这会在 `/deploy_pkg/certs/` 下生成 `server.crt` 和 `server.key`。
+
+---
+
+## 5. 上传到内网服务器
+
+你已经通过公司内网认证,且端口可能动态变化,请使用上面定义的 SSH 变量。
+
+### 5.1 上传镜像包和配置文件
+
+```bash
+# 先在服务器上创建目标目录
+ssh -p $SSH_PORT $SSH_USER@$SSH_HOST "mkdir -p /deploy_pkg"
+
+# 上传镜像 tar 包
+scp -P $SSH_PORT -o ServerAliveInterval=60 \
+ /deploy_pkg/minicpmo-backend_latest.tar.gz \
+ /deploy_pkg/minicpmo-frontend_latest.tar.gz \
+ /deploy_pkg/docker-compose.yml \
+ /deploy_pkg/nginx.docker.conf \
+ $SSH_USER@$SSH_HOST:/deploy_pkg/
+```
+
+### 5.2 上传模型权重
+
+```bash
+ssh -p $SSH_PORT $SSH_USER@$SSH_HOST "mkdir -p /models"
+
+scp -P $SSH_PORT -r -o ServerAliveInterval=60 \
+ /MiniCPM-o/models/MiniCPM-o-4_5 \
+ $SSH_USER@$SSH_HOST:/models/
+```
+
+### 5.3 上传 SSL 证书(手机端访问需要)
+
+```bash
+scp -P $SSH_PORT -r /deploy_pkg/certs \
+ $SSH_USER@$SSH_HOST:/deploy_pkg/
+```
+
+> 如果端口变更,只需要修改 `SSH_PORT` 变量并重试命令。
+
+---
+
+## 6. H100 服务器准备(一次性)
+
+通过已建立的隧道登录服务器:
+
+```bash
+ssh -p $SSH_PORT $SSH_USER@$SSH_HOST
+```
+
+检查环境:
+
+```bash
+# 确认 NVIDIA 驱动(你已确认: 550.90.12, CUDA 12.4 ✓)
+nvidia-smi
+
+# 检查 Docker
+docker --version
+docker compose version
+```
+
+### 6.1 安装 NVIDIA Container Toolkit(若未安装)
+
+如果 `docker run --gpus all nvidia/cuda:12.4.1-base-ubuntu22.04 nvidia-smi` 失败,需要安装 toolkit。
+
+安装后重启 Docker:
+
+```bash
+sudo systemctl restart docker
+```
+
+再验证:
+
+```bash
+docker run --rm --gpus all nvidia/cuda:12.4.1-base-ubuntu22.04 nvidia-smi
+```
+
+---
+
+## 7. H100 服务器加载镜像与启动服务
+
+在服务器上执行(通过 `ssh -p $SSH_PORT $SSH_USER@$SSH_HOST` 登录后):
+
+```bash
+cd /data/minicpmo/deploy_pkg
+
+# 若上传的是 .tar.gz,先解压
+gunzip -f minicpmo-backend_latest.tar.gz || true
+gunzip -f minicpmo-frontend_latest.tar.gz || true
+
+# 加载镜像
+docker load -i minicpmo-backend_latest.tar
+docker load -i minicpmo-frontend_latest.tar
+
+# 放置运行时文件
+mkdir -p /data/minicpmo/runtime/certs
+cp docker-compose.yml /data/minicpmo/runtime/
+cp certs/server.crt certs/server.key /data/minicpmo/runtime/certs/
+
+cd /data/minicpmo/runtime
+```
+
+### 7.1 设置模型路径并启动
+
+`docker-compose.yml` 里用了 `MODEL_PATH` 环境变量。你可以直接导出:
+
+```bash
+export MODEL_PATH=/models/MiniCPM-o-4_5
+export CERTS_PATH=./certs
+export BACKEND_PORT=32550
+
+# 兼容两种 Compose 命令:docker compose / docker-compose
+if docker compose version >/dev/null 2>&1; then
+ COMPOSE_CMD="docker compose"
+elif command -v docker-compose >/dev/null 2>&1; then
+ COMPOSE_CMD="docker-compose"
+else
+ echo "未找到 Compose,请先安装 docker-compose 或 docker compose 插件" && exit 1
+fi
+
+$COMPOSE_CMD -f docker-compose.yml up -d
+```
+
+如果两种 Compose 都不可用(`docker compose` / `docker-compose` 都不存在),可直接用 `docker run` 启动:
+
+```bash
+docker network create minicpmo-net || true
+docker rm -f minicpmo-backend minicpmo-frontend 2>/dev/null || true
+
+docker run -d \
+ --name minicpmo-backend \
+ --restart unless-stopped \
+ --gpus all \
+ -e BACKEND_PORT=${BACKEND_PORT:-32550} \
+ -p ${BACKEND_PORT:-32550}:${BACKEND_PORT:-32550} \
+ -v ${MODEL_PATH}:/models/MiniCPM-o-4_5:ro \
+ --network minicpmo-net \
+ minicpmo-backend:latest
+
+docker run -d \
+ --name minicpmo-frontend \
+ --restart unless-stopped \
+ -e BACKEND_PORT=${BACKEND_PORT:-32550} \
+ -p 3000:3000 \
+ -p 3443:3443 \
+ -v ${CERTS_PATH}:/etc/nginx/certs:ro \
+ --network minicpmo-net \
+ minicpmo-frontend:latest
+```
+
+如果出现 `Failed to Setup IP tables` 或 `No chain/target/match by that name`,可先绕过 bridge 网络,改用 `host` 网络启动:
+
+```bash
+docker rm -f minicpmo-backend minicpmo-frontend 2>/dev/null || true
+
+docker run -d \
+ --name minicpmo-backend \
+ --restart unless-stopped \
+ --gpus all \
+ --network host \
+ -e BACKEND_PORT=${BACKEND_PORT:-32550} \
+ -v ${MODEL_PATH}:/models/MiniCPM-o-4_5:ro \
+ minicpmo-backend:latest
+
+docker run -d \
+ --name minicpmo-frontend \
+ --restart unless-stopped \
+ --network host \
+ --add-host model-backend:127.0.0.1 \
+ -e BACKEND_PORT=${BACKEND_PORT:-32550} \
+ -v ${CERTS_PATH}:/etc/nginx/certs:ro \
+ minicpmo-frontend:latest
+```
+
+查看状态:
+
+```bash
+if [ -z "$COMPOSE_CMD" ]; then
+ if docker compose version >/dev/null 2>&1; then
+ COMPOSE_CMD="docker compose"
+ else
+ COMPOSE_CMD="docker-compose"
+ fi
+fi
+
+$COMPOSE_CMD -f docker-compose.yml ps
+docker logs -f minicpmo-backend
+```
+
+若使用 `docker run` 方案,查看状态命令:
+
+```bash
+docker ps --filter name=minicpmo
+docker logs -f minicpmo-backend
+```
+
+健康检查:
+
+```bash
+curl http://127.0.0.1:32550/api/v1/health
+```
+
+应返回:
+
+```json
+{"status":"OK"}
+```
+
+> 首次加载模型会较慢(几十秒到数分钟),日志出现模型初始化完成后再测试前端。
+
+---
+
+## 8. 本地电脑访问(SSH 端口转发)
+
+你已能连通 SSH 隧道,只需基于当前端口做服务转发。
+
+在本地 PowerShell 或 WSL 新开一个终端:
+
+```bash
+ssh -N -p $SSH_PORT \
+ -L 3000:127.0.0.1:3000 \
+ -L 3443:127.0.0.1:3443 \
+ -L 32550:127.0.0.1:32550 \
+ $SSH_USER@$SSH_HOST
+```
+
+保持该终端不断开。然后在本地浏览器访问:
+
+- 前端页面(HTTP):
+- 前端页面(HTTPS):(自签名证书,需点击"继续前往")
+- 后端健康检查:
+
+> 浏览器会请求摄像头/麦克风权限,点击允许。本地用 `localhost` 访问时 HTTP 即可获取摄像头权限。
+
+---
+
+## 9. 手机端访问(全双工视频通话)
+
+### 9.1 问题与原理
+
+手机浏览器(Chrome/Safari)要调用摄像头和麦克风,**必须使用 HTTPS**(`localhost` 例外,但手机并非 localhost)。
+
+方案:**笔记本做中继** — 手机 → 笔记本 WiFi 局域网 IP → SSH 隧道 → 服务器。
+
+```text
+手机浏览器 ──WiFi──▶ 笔记本:3443 ──SSH隧道──▶ H100:3443 ──Nginx──▶ 后端:32550
+ (HTTPS) (绑定 0.0.0.0)
+```
+
+### 9.2 操作步骤
+
+#### Step 1:建立"全接口绑定"的 SSH 隧道
+
+```bash
+ssh -N -p $SSH_PORT \
+ -L 0.0.0.0:3443:127.0.0.1:3443 \
+ $SSH_USER@$SSH_HOST
+```
+
+> 关键区别:`0.0.0.0:3443` 让笔记本的所有网卡都监听 3443 端口,同一 WiFi 的手机才能连入。
+
+#### Step 2:查看笔记本局域网 IP
+
+PowerShell 中执行:
+
+```powershell
+ipconfig | Select-String "IPv4"
+```
+
+假设得到 `192.168.1.100`。
+
+#### Step 3:Windows 防火墙放行端口
+
+PowerShell(管理员)执行:
+
+```powershell
+New-NetFirewallRule -DisplayName "MiniCPMo HTTPS" -Direction Inbound -LocalPort 3443 -Protocol TCP -Action Allow
+```
+
+#### Step 4:手机浏览器访问
+
+确保手机与笔记本连同一 WiFi,然后在手机浏览器输入:
+
+```text
+https://192.168.1.100:3443
+```
+
+- **首次访问**会提示"不安全连接"(自签名证书),选择 **「高级」→「继续前往」**
+- 接着浏览器会请求摄像头/麦克风权限,**允许**即可
+- 进入视频通话页面,开始全双工对话
+
+### 9.3 iOS Safari 注意事项
+
+iOS Safari 对自签名证书更严格。如果无法通过上述方式跳过:
+
+1. 在手机上用 Safari 打开 `https://192.168.1.100:3443/certs/server.crt`(若你配置了证书下载路径),下载安装证书
+2. 或者将 `server.crt` 通过 AirDrop / 微信发送到手机,在 **设置 → 通用 → 描述文件 → 安装**
+3. 再到 **设置 → 通用 → 关于本机 → 证书信任设置 → 启用完全信任**
+
+之后 Safari 访问 `https://192.168.1.100:3443` 即可正常使用。
+
+---
+
+## 10. 常见问题与排查
+
+### 10.1 前端能打开,但无法对话
+
+检查后端日志:
+
+```bash
+docker logs --tail 200 minicpmo-backend
+```
+
+重点看:
+
+- 模型路径是否存在:`/models/MiniCPM-o-4_5`
+- 显存是否足够(H100 通常充足)
+- 是否出现 `trust_remote_code` / 依赖版本错误
+
+### 10.2 容器内 GPU 不可见
+
+```bash
+docker exec -it minicpmo-backend nvidia-smi
+```
+
+若失败,优先检查 NVIDIA Container Toolkit 与 Docker daemon 配置。
+
+### 10.3 WebSocket / SSE 异常
+
+本项目已在 `nginx.docker.conf` 关闭缓冲并配置了 websocket upgrade。
+若仍异常,检查公司内网网关是否拦截长连接。
+
+### 10.4 模型启动太慢
+
+首次启动可能较慢;后续会快很多。可先看:
+
+```bash
+nvidia-smi
+docker logs -f minicpmo-backend
+```
+
+---
+
+## 11. 你下一步可以做的优化(可选)
+
+1. 将后端镜像改为“离线 wheel 安装模式”,彻底避免服务器 pip 联网需求。
+2. 使用私有镜像仓库(Harbor)替代 tar 包传输。
+3. 用 systemd 或 cron 做容器自动拉起与日志轮转。
+4. 替换自签名证书为企业 CA 签发的证书,手机端免手动信任。
+
+---
+
+## 12. 一键启动命令速查
+
+### H100 侧(假设文件已上传)
+
+```bash
+cd /deploy_pkg
+
+docker load -i minicpmo-backend_latest.tar
+docker load -i minicpmo-frontend_latest.tar
+
+mkdir -p /runtime/certs
+cp docker-compose.yml /runtime/
+cp certs/server.* /runtime/certs/
+
+cd /runtime
+export MODEL_PATH=/models/MiniCPM-o-4_5
+export CERTS_PATH=./certs
+export BACKEND_PORT=32550
+if docker compose version >/dev/null 2>&1; then
+ COMPOSE_CMD="docker compose"
+elif command -v docker-compose >/dev/null 2>&1; then
+ COMPOSE_CMD="docker-compose"
+else
+ echo "未找到 Compose,请先安装 docker-compose 或 docker compose 插件" && exit 1
+fi
+
+$COMPOSE_CMD -f docker-compose.yml up -d
+```
+
+### 本地电脑(开隧道)
+
+```bash
+ssh -N -p $SSH_PORT -L 3000:127.0.0.1:3000 -L 3443:127.0.0.1:3443 -L 32550:127.0.0.1:32550 $SSH_USER@$SSH_HOST
+```
+
+PowerShell 版本:
+
+```powershell
+ssh -N -p $env:SSH_PORT -L 3000:127.0.0.1:3000 -L 3443:127.0.0.1:3443 -L 32550:127.0.0.1:32550 "$env:SSH_USER@$env:SSH_HOST"
+```
+
+本地电脑打开:
+
+### 手机端(通过笔记本中转)
+
+```bash
+# 笔记本绑定所有网卡
+ssh -N -p $SSH_PORT -L 0.0.0.0:3443:127.0.0.1:3443 $SSH_USER@$SSH_HOST
+```
+
+手机浏览器打开:`https://:3443`
diff --git a/deploy/Dockerfile.backend b/deploy/Dockerfile.backend
new file mode 100644
index 00000000..b4291181
--- /dev/null
+++ b/deploy/Dockerfile.backend
@@ -0,0 +1,71 @@
+# ============================================
+# MiniCPM-o 4.5 Backend Inference Service Dockerfile
+# Base image: NVIDIA CUDA 12.8 + Ubuntu 22.04
+# ============================================
+FROM nvidia/cuda:12.8.1-devel-ubuntu22.04
+
+# Avoid interactive prompts
+ENV DEBIAN_FRONTEND=noninteractive
+ENV PYTHONUNBUFFERED=1
+
+# ---- System dependencies ----
+RUN apt-get update && apt-get install -y \
+ python3.10 \
+ python3.10-dev \
+ python3-pip \
+ ffmpeg \
+ libsndfile1 \
+ libsndfile1-dev \
+ git \
+ wget \
+ curl \
+ && rm -rf /var/lib/apt/lists/*
+
+# Set python3.10 as default
+RUN ln -sf /usr/bin/python3.10 /usr/bin/python3 && \
+ ln -sf /usr/bin/python3 /usr/bin/python && \
+ python3 -m pip install --upgrade pip setuptools wheel
+
+# ---- PyTorch (CUDA 12.4) ----
+RUN pip install --no-cache-dir \
+ "torch>=2.3.0,<=2.8.0" \
+ "torchaudio<=2.8.0" \
+ --index-url https://download.pytorch.org/whl/cu124
+
+# ---- MiniCPM-o core dependencies ----
+RUN pip install --no-cache-dir \
+ "transformers==4.51.0" \
+ accelerate \
+ "minicpmo-utils[all]>=1.0.5" \
+ librosa \
+ soundfile \
+ onnxruntime \
+ sentencepiece \
+ Pillow \
+ numpy
+
+# ---- Web service dependencies ----
+RUN pip install --no-cache-dir \
+ fastapi \
+ uvicorn \
+ aiofiles \
+ pydantic
+
+# ---- Working directory ----
+WORKDIR /app
+
+# ---- Copy backend code ----
+COPY web_demos/minicpm-o_2.6/model_server.py /app/
+COPY web_demos/minicpm-o_2.6/vad_utils.py /app/
+COPY web_demos/minicpm-o_2.6/silero_vad.onnx /app/
+
+# ---- Copy TTS reference audios ----
+COPY assets/ref_audios/ /app/assets/ref_audios/
+
+# ---- Expose port ----
+EXPOSE 32550
+
+# ---- Startup command ----
+# Model path is mounted to /models/MiniCPM-o-4_5 via volume
+ENV BACKEND_PORT=32550
+CMD ["sh", "-lc", "python3 model_server.py --model /models/MiniCPM-o-4_5 --port ${BACKEND_PORT}"]
diff --git a/deploy/Dockerfile.frontend b/deploy/Dockerfile.frontend
new file mode 100644
index 00000000..5fc22e8f
--- /dev/null
+++ b/deploy/Dockerfile.frontend
@@ -0,0 +1,36 @@
+# ============================================
+# MiniCPM-o 4.5 Frontend Web Service Dockerfile
+# Multi-stage build: Node.js build + Nginx deployment
+# ============================================
+
+# ---- Stage 1: Build Vue project ----
+FROM node:20-alpine AS build-stage
+
+WORKDIR /build
+COPY web_demos/minicpm-o_2.6/web_server/ /build/
+
+# Install pnpm and build
+# Generate placeholder certificate files (vite.config.js server.https is also parsed during build)
+RUN npm install -g pnpm && \
+ touch key.pem cert.pem && \
+ pnpm install && \
+ pnpm run build
+
+# ---- Stage 2: Nginx static service ----
+FROM nginx:alpine AS production-stage
+
+# envsubst is used to render nginx config template at container startup
+RUN apk add --no-cache gettext
+
+# Copy build artifacts
+COPY --from=build-stage /build/dist /usr/share/nginx/html
+
+# Copy custom nginx config template (Docker network version)
+COPY deploy/nginx.docker.conf /etc/nginx/nginx.conf.template
+
+# Render nginx config with BACKEND_PORT at startup
+ENV BACKEND_PORT=32550
+
+EXPOSE 3000 3443
+
+CMD ["sh", "-lc", "envsubst '$$BACKEND_PORT' < /etc/nginx/nginx.conf.template > /etc/nginx/nginx.conf && nginx -g 'daemon off;'"]
diff --git a/deploy/docker-compose.yml b/deploy/docker-compose.yml
new file mode 100644
index 00000000..60141e28
--- /dev/null
+++ b/deploy/docker-compose.yml
@@ -0,0 +1,64 @@
+# ============================================
+# MiniCPM-o 4.5 Docker Compose Deployment Configuration
+# ============================================
+# Usage:
+# docker compose -f deploy/docker-compose.yml up -d
+#
+# Prerequisites:
+# 1. NVIDIA Container Toolkit is installed on the server
+# 2. Model weights are placed in the ${MODEL_PATH} directory
+# ============================================
+
+services:
+ # ---- Backend Inference Service (GPU) ----
+ model-backend:
+ image: minicpmo-backend:latest
+ container_name: minicpmo-backend
+ restart: unless-stopped
+ deploy:
+ resources:
+ reservations:
+ devices:
+ - driver: nvidia
+ count: 1
+ capabilities: [gpu]
+ volumes:
+ # Mount model weights directory (host path → container path)
+ - ${MODEL_PATH:-}:/models/MiniCPM-o-4_5:ro
+ environment:
+ - BACKEND_PORT=${BACKEND_PORT:-32550}
+ ports:
+ - "${BACKEND_PORT:-32550}:${BACKEND_PORT:-32550}"
+ # Note: BACKEND_PORT is the application listening port (default 32550),
+ # which is different from the external SSH temporary tunnel port.
+ healthcheck:
+ test: ["CMD-SHELL", "curl -f http://localhost:${BACKEND_PORT:-32550}/api/v1/health || exit 1"]
+ interval: 30s
+ timeout: 10s
+ retries: 5
+ start_period: 120s # Model loading may take a long time
+ networks:
+ - minicpmo-net
+
+ # ---- Frontend Web Service (Nginx) ----
+ web-frontend:
+ image: minicpmo-frontend:latest
+ container_name: minicpmo-frontend
+ restart: unless-stopped
+ ports:
+ - "3000:3000"
+ - "3443:3443" # HTTPS (for mobile access)
+ volumes:
+ # Mount SSL certificate directory
+ - ${CERTS_PATH:-}:/etc/nginx/certs:ro
+ environment:
+ - BACKEND_PORT=${BACKEND_PORT:-32550}
+ depends_on:
+ model-backend:
+ condition: service_started
+ networks:
+ - minicpmo-net
+
+networks:
+ minicpmo-net:
+ driver: bridge
diff --git a/deploy/gen_ssl_cert.sh b/deploy/gen_ssl_cert.sh
new file mode 100644
index 00000000..ac2e5d3b
--- /dev/null
+++ b/deploy/gen_ssl_cert.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+# ============================================
+# Generate self-signed SSL certificate (for Nginx HTTPS + mobile access)
+# Usage: bash deploy/gen_ssl_cert.sh [output directory]
+# ============================================
+set -e
+
+OUT_DIR="${1:-}"
+mkdir -p "$OUT_DIR"
+
+echo ">>> Generating self-signed SSL certificate to $OUT_DIR ..."
+openssl req -x509 -nodes -days 3650 \
+ -newkey rsa:2048 \
+ -keyout "$OUT_DIR/server.key" \
+ -out "$OUT_DIR/server.crt" \
+ -subj "/C=CN/ST=Local/L=Local/O=MiniCPMo/OU=Dev/CN=" \
+ -addext "subjectAltName=IP:,IP:,DNS:"
+
+echo ">>> Certificate generated:"
+ls -lh "$OUT_DIR"/server.*
+echo ""
+echo ">>> Tip: After uploading the entire $OUT_DIR directory to the server,"
+echo " create a certs/ directory next to docker-compose.yml and put server.crt + server.key inside"
diff --git a/deploy/nginx.docker.conf b/deploy/nginx.docker.conf
new file mode 100644
index 00000000..a5832d9c
--- /dev/null
+++ b/deploy/nginx.docker.conf
@@ -0,0 +1,122 @@
+user root;
+worker_processes auto;
+pid /run/nginx.pid;
+
+events {
+ worker_connections 768;
+}
+
+http {
+ # ---- Basic Settings ----
+ client_max_body_size 20M;
+ sendfile on;
+ tcp_nopush on;
+ tcp_nodelay on;
+ keepalive_timeout 65;
+ types_hash_max_size 2048;
+
+ include /etc/nginx/mime.types;
+ default_type application/octet-stream;
+
+ # ---- Logs ----
+ access_log /var/log/nginx/access.log;
+ error_log /var/log/nginx/error.log;
+
+ # ---- Gzip Compression ----
+ gzip on;
+
+ # ---- Virtual Host (HTTP, Local Access) ----
+ server {
+ listen 3000;
+ server_name _;
+
+ add_header Access-Control-Allow-Origin *;
+ add_header Access-Control-Allow-Headers X-Requested-With;
+ add_header Access-Control-Allow-Methods GET,POST,OPTIONS;
+
+ # Backend API requests → Forward to backend container (Docker service name: model-backend)
+ location /api/v1 {
+ proxy_pass http://model-backend:${BACKEND_PORT};
+ proxy_set_header Host $host;
+ proxy_set_header Connection "";
+ chunked_transfer_encoding off;
+ proxy_set_header X-Accel-Buffering off;
+ add_header X-Accel-Buffering off;
+ proxy_http_version 1.1;
+ # Disable buffering (required for SSE streaming responses)
+ proxy_buffering off;
+ proxy_cache off;
+ sendfile off;
+ tcp_nodelay on;
+ }
+
+ # WebSocket requests → Forward to backend container
+ location /ws {
+ proxy_pass http://model-backend:${BACKEND_PORT};
+ proxy_http_version 1.1;
+ proxy_set_header Upgrade $http_upgrade;
+ proxy_set_header Connection 'upgrade';
+ proxy_set_header Host $host;
+ proxy_cache_bypass $http_upgrade;
+ }
+
+ # Frontend static files
+ location / {
+ root /usr/share/nginx/html;
+ index index.html index.htm;
+ try_files $uri $uri/ /index.html;
+ }
+
+ location @router {
+ rewrite ^.*$ /index.html last;
+ }
+ }
+
+ # ---- Virtual Host (HTTPS, Mobile Access) ----
+ server {
+ listen 3443 ssl;
+ server_name localhost;
+
+ ssl_certificate /etc/nginx/certs/server.crt;
+ ssl_certificate_key /etc/nginx/certs/server.key;
+ ssl_protocols TLSv1.2 TLSv1.3;
+ ssl_ciphers HIGH:!aNULL:!MD5;
+
+ add_header Access-Control-Allow-Origin *;
+ add_header Access-Control-Allow-Headers X-Requested-With;
+ add_header Access-Control-Allow-Methods GET,POST,OPTIONS;
+
+ location /api/v1 {
+ proxy_pass http://model-backend:${BACKEND_PORT};
+ proxy_set_header Host $host;
+ proxy_set_header Connection "";
+ chunked_transfer_encoding off;
+ proxy_set_header X-Accel-Buffering off;
+ add_header X-Accel-Buffering off;
+ proxy_http_version 1.1;
+ proxy_buffering off;
+ proxy_cache off;
+ sendfile off;
+ tcp_nodelay on;
+ }
+
+ location /ws {
+ proxy_pass http://model-backend:${BACKEND_PORT};
+ proxy_http_version 1.1;
+ proxy_set_header Upgrade $http_upgrade;
+ proxy_set_header Connection 'upgrade';
+ proxy_set_header Host $host;
+ proxy_cache_bypass $http_upgrade;
+ }
+
+ location / {
+ root /usr/share/nginx/html;
+ index index.html index.htm;
+ try_files $uri $uri/ /index.html;
+ }
+
+ location @router {
+ rewrite ^.*$ /index.html last;
+ }
+ }
+}
diff --git a/deploy/requirements.backend.txt b/deploy/requirements.backend.txt
new file mode 100644
index 00000000..60648e1b
--- /dev/null
+++ b/deploy/requirements.backend.txt
@@ -0,0 +1,29 @@
+# ============================================
+# MiniCPM-o 4.5 Backend Python Requirements
+# For offline environments: pip download / pip install
+# ============================================
+
+# == PyTorch (CUDA 12.4) ==
+# Note: PyTorch should be downloaded separately from https://download.pytorch.org/whl/cu124
+# torch>=2.3.0,<=2.8.0
+# torchaudio<=2.8.0
+
+# == Core Model Dependencies ==
+transformers==4.51.0
+accelerate
+minicpmo-utils[all]>=1.0.5
+sentencepiece
+
+# == Audio/Video Processing ==
+librosa
+soundfile
+onnxruntime
+Pillow
+numpy
+
+# == Web Service ==
+fastapi
+uvicorn[standard]
+aiofiles
+pydantic
+httpx
diff --git a/web_demos/minicpm-o_2.6/miniCPM2.6-CxDaeLI9.svg.bak b/web_demos/minicpm-o_2.6/miniCPM2.6-CxDaeLI9.svg.bak
new file mode 100644
index 00000000..9a4285cf
Binary files /dev/null and b/web_demos/minicpm-o_2.6/miniCPM2.6-CxDaeLI9.svg.bak differ
diff --git a/web_demos/minicpm-o_2.6/miniCPM4.5.svg b/web_demos/minicpm-o_2.6/miniCPM4.5.svg
new file mode 100644
index 00000000..dbb24656
--- /dev/null
+++ b/web_demos/minicpm-o_2.6/miniCPM4.5.svg
@@ -0,0 +1,28 @@
+
+
\ No newline at end of file
diff --git a/web_demos/minicpm-o_2.6/model_server.py b/web_demos/minicpm-o_2.6/model_server.py
index d9e86bdb..7e551b2d 100644
--- a/web_demos/minicpm-o_2.6/model_server.py
+++ b/web_demos/minicpm-o_2.6/model_server.py
@@ -91,7 +91,7 @@ def __init__(self):
self.device='cuda:0'
self.minicpmo_model_path = args.model #"openbmb/MiniCPM-o-2_6"
- self.model_version = "2.6"
+ self.model_version = "4.5"
with torch.no_grad():
self.minicpmo_model = AutoModel.from_pretrained(self.minicpmo_model_path, trust_remote_code=True, torch_dtype=self.target_dtype, attn_implementation='sdpa')
self.minicpmo_tokenizer = AutoTokenizer.from_pretrained(self.minicpmo_model_path, trust_remote_code=True)
@@ -103,6 +103,10 @@ def __init__(self):
self.ref_path_default = "assets/ref_audios/default.wav"
self.ref_path_female = "assets/ref_audios/female_example.wav"
self.ref_path_male = "assets/ref_audios/male_example.wav"
+ self.tts_sample_rate = 24000 # 4.5 uses 24kHz (s3tokenizer)
+
+ # 4.5: init token2wav cache with default ref audio for streaming TTS
+ self._init_token2wav_with_ref(self.ref_path_default)
self.input_audio_id = 0
self.input_audio_vad_id = 0
@@ -119,7 +123,7 @@ def __init__(self):
self.msg_type = 1
self.speaking_time_stamp = 0
- self.cycle_wait_time = 12800/24000 + 0.15
+ self.cycle_wait_time = 25 * 0.04 + 0.15 # 4.5: 25 audio tokens/chunk, each ~0.04s
self.extra_wait_time = 2.5
self.server_wait = True
@@ -203,69 +207,77 @@ def no_active_stream(self):
return True
return False
+ def _init_token2wav_with_ref(self, ref_path):
+ """Initialize token2wav cache with a reference audio for streaming TTS (4.5 API)."""
+ try:
+ ref_audio, _ = librosa.load(ref_path, sr=16000, mono=True)
+ with torch.no_grad():
+ self.minicpmo_model.init_token2wav_cache(ref_audio)
+ logger.info(f"init_token2wav_cache done with ref: {ref_path}")
+ except Exception as e:
+ logger.error(f"init_token2wav_cache failed: {e}")
+
def sys_prompt_init(self, msg_type):
if self.past_session_id == self.session_id:
return
logger.info("### sys_prompt_init ###")
logger.info(f'msg_type is {msg_type}')
- if msg_type <= 1: #audio
- audio_voice_clone_prompt = "Use the voice in the audio prompt to synthesize new content."
- audio_assistant_prompt = "You are a helpful assistant with the above voice style."
- ref_path = self.ref_path_default
-
- if self.customized_options is not None:
- audio_voice_clone_prompt = self.customized_options['voice_clone_prompt']
- audio_assistant_prompt = self.customized_options['assistant_prompt']
- if self.customized_options['use_audio_prompt'] == 1:
- ref_path = self.ref_path_default
- elif self.customized_options['use_audio_prompt'] == 2:
- ref_path = self.ref_path_female
- elif self.customized_options['use_audio_prompt'] == 3:
- ref_path = self.ref_path_male
-
- audio_prompt, sr = librosa.load(ref_path, sr=16000, mono=True)
- sys_msg = {'role': 'user', 'content': [audio_voice_clone_prompt + "\n", audio_prompt, "\n" + audio_assistant_prompt]}
- elif msg_type == 2: #video
- voice_clone_prompt="你是一个AI助手。你能接受视频,音频和文本输入并输出语音和文本。模仿输入音频中的声音特征。"
- assistant_prompt="作为助手,你将使用这种声音风格说话。"
+ # Determine ref audio path
+ ref_path = self.ref_path_default
+ language = "en"
+ if msg_type == 2: # video
ref_path = self.ref_path_video_default
-
- if self.customized_options is not None:
- voice_clone_prompt = self.customized_options['voice_clone_prompt']
- assistant_prompt = self.customized_options['assistant_prompt']
- if self.customized_options['use_audio_prompt'] == 1:
- ref_path = self.ref_path_default
- elif self.customized_options['use_audio_prompt'] == 2:
- ref_path = self.ref_path_female
- elif self.customized_options['use_audio_prompt'] == 3:
- ref_path = self.ref_path_male
-
- audio_prompt, sr = librosa.load(ref_path, sr=16000, mono=True)
- sys_msg = {'role': 'user', 'content': [voice_clone_prompt, audio_prompt, assistant_prompt]}
- # elif msg_type == 3: #user start
- # assistant_prompt="作为助手,你将使用这种声音风格说话。"
- # if self.customized_options is not None:
- # assistant_prompt = self.customized_options['assistant_prompt']
-
- # sys_msg = {'role': 'user', 'content': [assistant_prompt]}
-
+ language = "zh"
+
+ if self.customized_options is not None:
+ if self.customized_options.get('use_audio_prompt') == 1:
+ ref_path = self.ref_path_default
+ elif self.customized_options.get('use_audio_prompt') == 2:
+ ref_path = self.ref_path_female
+ elif self.customized_options.get('use_audio_prompt') == 3:
+ ref_path = self.ref_path_male
+
+ # 4.5 API: use model.get_sys_prompt() to build system message
+ ref_audio, _ = librosa.load(ref_path, sr=16000, mono=True)
+ sys_msg = self.minicpmo_model.get_sys_prompt(
+ ref_audio=ref_audio,
+ mode="omni",
+ language=language,
+ )
+
+ # Re-init token2wav cache with the selected ref audio
+ self._init_token2wav_with_ref(ref_path)
+
self.msg_type = msg_type
msgs = [sys_msg]
- if self.customized_options is not None:
- if self.customized_options['use_audio_prompt'] > 0:
+
+ def safe_streaming_prefill(prompt_msgs):
+ try:
self.minicpmo_model.streaming_prefill(
session_id=str(self.session_id),
- msgs=msgs,
+ msgs=prompt_msgs,
tokenizer=self.minicpmo_tokenizer,
+ use_tts_template=True,
)
+ return True
+ except Exception as e:
+ logger.warning(f"streaming_prefill failed with audio prompt, fallback to text-only prompt: {e}")
+ fallback_msg = self.minicpmo_model.get_sys_prompt(ref_audio=None, mode="omni", language=language)
+ self.minicpmo_model.streaming_prefill(
+ session_id=str(self.session_id),
+ msgs=[fallback_msg],
+ tokenizer=self.minicpmo_tokenizer,
+ use_tts_template=True,
+ )
+ return False
+
+ if self.customized_options is not None:
+ if self.customized_options.get('use_audio_prompt', 0) > 0:
+ safe_streaming_prefill(msgs)
if msg_type == 0:
- self.minicpmo_model.streaming_prefill(
- session_id=str(self.session_id),
- msgs=msgs,
- tokenizer=self.minicpmo_tokenizer,
- )
+ safe_streaming_prefill(msgs)
self.savedir = os.path.join(f"./log_data/{args.port}/", str(time.time()))
if not os.path.exists(self.savedir):
@@ -297,7 +309,15 @@ def clear(self):
self.audio_input = []
self.image_prefill = None
- if self.minicpmo_model.llm_past_key_values[0][0].shape[2]>8192:
+ kv = self.minicpmo_model.llm_past_key_values
+ kv_len = 0
+ if kv is not None:
+ if hasattr(kv, 'get_seq_length'):
+ kv_len = kv.get_seq_length()
+ elif isinstance(kv, (list, tuple)) and len(kv) > 0:
+ if isinstance(kv[0], (list, tuple)) and len(kv[0]) > 0:
+ kv_len = kv[0][0].shape[2]
+ if kv_len > 8192:
self.session_id += 1 # to clear all kv cache
self.sys_prompt_flag = False
@@ -468,6 +488,8 @@ def prefill(self, audio, image, is_end):
msgs=msgs,
tokenizer=self.minicpmo_tokenizer,
max_slice_nums=slice_nums,
+ use_tts_template=True,
+ is_last_chunk=(is_end),
)
self.input_audio_id += 1
@@ -504,49 +526,69 @@ async def generate(self):
with open(input_audio_path, 'rb') as wav_file:
audio_stream = wav_file.read()
except FileNotFoundError:
- print(f"File {input_audio_path} not found.")
+ logger.warning(f"File {input_audio_path} not found.")
yield base64.b64encode(audio_stream).decode('utf-8'), "assistant:\n"
- print('=== gen start: ', time.time() - time_gen)
- first_time = True
- temp_time = time.time()
- temp_time1 = time.time()
+ logger.info(f'=== gen start: {time.time() - time_gen:.3f}s ===')
with torch.inference_mode():
if self.stop_response:
self.generate_end()
return
self.minicpmo_model.config.stream_input=True
- msg = {"role":"user", "content": self.cnts}
- msgs = [msg]
text = ''
self.speaking_time_stamp = time.time()
+ sr = self.tts_sample_rate # 4.5 fixed 24kHz
try:
- for r in self.minicpmo_model.streaming_generate(
+ for result in self.minicpmo_model.streaming_generate(
session_id=str(self.session_id),
tokenizer=self.minicpmo_tokenizer,
generate_audio=True,
- # enable_regenerate=True,
+ use_tts_template=True,
+ do_sample=True,
):
if self.stop_response:
self.generate_end()
return
- audio_np, sr, text = r["audio_wav"], r["sampling_rate"], r["text"]
-
- output_audio_path = self.savedir + f'/output_audio_log/output_audio_{self.output_audio_id}.wav'
- self.output_audio_id += 1
- soundfile.write(output_audio_path, audio_np, samplerate=sr)
- audio_stream = None
- try:
- with open(output_audio_path, 'rb') as wav_file:
- audio_stream = wav_file.read()
- except FileNotFoundError:
- print(f"File {output_audio_path} not found.")
- temp_time1 = time.time()
- print('text: ', text)
- yield base64.b64encode(audio_stream).decode('utf-8'), text
+ # 4.5 API: yields (waveform_chunk: Tensor, text_chunk: str)
+ # End signal: (None, None)
+ if isinstance(result, tuple):
+ waveform_chunk, text_chunk = result
+ else:
+ # fallback for unexpected format
+ logger.warning(f"Unexpected streaming_generate result type: {type(result)}")
+ continue
+
+ if waveform_chunk is None:
+ # generation complete signal
+ break
+
+ # Convert tensor to numpy, ensure 1D float32
+ if isinstance(waveform_chunk, torch.Tensor):
+ audio_np = waveform_chunk.cpu().float().numpy()
+ else:
+ audio_np = np.array(waveform_chunk, dtype=np.float32)
+ audio_np = audio_np.squeeze() # remove batch dims
+ if audio_np.ndim == 0 or audio_np.size == 0:
+ continue # skip empty chunks
+
+ # Resample from model's 24kHz to frontend's expected 16kHz
+ audio_np = librosa.resample(audio_np, orig_sr=sr, target_sr=16000)
+
+ if text_chunk:
+ text += text_chunk
+
+ # Encode audio chunk to WAV in memory (no disk I/O)
+ audio_buffer = io.BytesIO()
+ soundfile.write(audio_buffer, audio_np, samplerate=16000, format='WAV', subtype='PCM_16')
+ audio_stream = audio_buffer.getvalue()
+
+ # Send delta text (text_chunk), not accumulated text
+ yield base64.b64encode(audio_stream).decode('utf-8'), text_chunk if text_chunk else ''
self.speaking_time_stamp += self.cycle_wait_time
except Exception as e:
logger.error(f"Error happened during generation: {str(e)}")
+ import traceback
+ traceback.print_exc()
yield None, '\n'
except Exception as e:
@@ -582,8 +624,7 @@ def upload_customized_audio(self, audio_data, audio_fmt):
output_audio_path = self.savedir + f'/customized_audio.wav'
soundfile.write(output_audio_path, audio_np, sr)
self.customized_audio = output_audio_path
- logger.info(f"processed customized {audio_fmt} audio")
- print(audio_np.shape, type(audio_np), sr)
+ logger.info(f"processed customized {audio_fmt} audio, shape={audio_np.shape}, sr={sr}")
else:
logger.info(f"empty customized audio, use default value instead.")
self.customized_audio = None
@@ -734,14 +775,14 @@ async def websocket_stream(websocket: WebSocket,
async def generate_sse_response(request: Request, uid: Optional[str] = Header(None)):
global stream_manager
- print(f"uid: {uid}")
+ logger.info(f"uid: {uid}")
try:
# Wait for streaming to complete or timeout
while not stream_manager.is_streaming_complete.is_set():
# if stream_manager.is_timed_out():
# yield f"data: {json.dumps({'error': 'Stream timeout'})}\n\n"
# return
- # print(f"{uid} whille not stream_manager.is_streaming_complete.is_set(), asyncio.sleep(0.1)")
+
await asyncio.sleep(0.1)
logger.info("streaming complete\n")
@@ -912,7 +953,7 @@ async def init_options(request: Request, uid: Optional[str] = Header(None)):
ctype = content["type"]
raise HTTPException(status_code=400, detail=f"Invalid content type: {ctype}")
version = stream_manager.model_version
- print(version)
+ logger.info(f"Model version: {version}")
response = {
"id": uid,
"choices": {