diff --git a/.gitignore b/.gitignore index 988f840d..c6843c8e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ *.bk __pycache__ .DS_Store +models diff --git a/README.md b/README.md index d4d9c48e..9556bfcd 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,12 @@ **A Gemini 2.5 Flash Level MLLM for Vision, Speech, and Full-Duplex Multimodal Live Streaming on Your Phone** + + +💡 **Tip**: If you find this repository's structure or content difficult to understand, visit [deepwiki](https://deepwiki.com/LujiaJin/MiniCPM-o) for a comprehensive detailed explanation. + +
+ [中文](./README_zh.md) | English diff --git a/README_zh.md b/README_zh.md index f9a8d1c1..255631a4 100644 --- a/README_zh.md +++ b/README_zh.md @@ -4,6 +4,12 @@ **端侧可用的 Gemini 2.5 Flash 级视觉、语音、全双工多模态实时流式大模型** +
+ +💡 **提示**: 如果你觉得这个仓库的结构或内容难以理解,请访问 [deepwiki](https://deepwiki.com/LujiaJin/MiniCPM-o) 获取详细的解读。 + +
+ 中文 | [English](./README.md) diff --git a/deploy/DEPLOY_WSL2_TO_H100_EN.md b/deploy/DEPLOY_WSL2_TO_H100_EN.md new file mode 100644 index 00000000..ba15d8d4 --- /dev/null +++ b/deploy/DEPLOY_WSL2_TO_H100_EN.md @@ -0,0 +1,828 @@ +# MiniCPM-o 4.5 Offline Deployment Guide (Build Image in WSL2 → Upload to Intranet H100 Server → Local + Mobile Access) + +> Goal: Build a Docker image on your local Windows PC with WSL2, transfer the image and model to a company H100 server with no public internet access, start the service, and test full-duplex video calling in a local browser and on an Android phone. + +**Your Environment Quick Reference:** + +| Item | Value | +| --- | --- | +| Server SSH | `ssh -p $SSH_PORT $SSH_USER@$SSH_HOST` (port may change dynamically) | +| GPU | NVIDIA H100 (driver 550.90.12) | +| CUDA | 12.4 (fully matches the Dockerfile base image `cuda:12.4.1`) | +| Local | Win10 + WSL2 Ubuntu | + +**Set SSH variables before each run (only change here):** + +```bash +export SSH_HOST=127.0.0.1 +export SSH_PORT=54062 +export SSH_USER=your_user +``` + +PowerShell equivalent (use directly in Windows Terminal): + +```powershell +$env:SSH_HOST = "127.0.0.1" +$env:SSH_HOST = "" +$env:SSH_PORT = "" +$env:SSH_USER = "" + +## PowerShell Daily Three-Command Quick Reference (Recommended) + +```powershell +Set-MiniCPMSSH -Port "" -User "" +# 1) Update SSH parameters when port changes +Set-MiniCPMSSH -Port "54062" -User "your_user" + +# 2) Start mobile mode (open tunnel + print accessible URL) +Set-MiniCPMSSH -Port "" -User "" +Start-MiniCPMMobile + +# 3) Stop tunnel +Stop-MiniCPMMobile +scp -P $env:SSH_PORT .\file.tar.gz "$env:SSH_USER@$env:SSH_HOST:/deploy_pkg/" + +Quick recovery after port change: + + [string]$Host = "", + [string]$User = "" +Restart-MiniCPMMobile +``` + + $env:SSH_HOST = $Host + $env:SSH_PORT = $Port + $env:SSH_USER = $User +ssh -p $env:SSH_PORT "$env:SSH_USER@$env:SSH_HOST" +scp -P $env:SSH_PORT .\file.tar.gz "$env:SSH_USER@$env:SSH_HOST:/data/minicpmo/deploy_pkg/" +``` + Write-Host "[MiniCPM SSH] HOST=$env:SSH_HOST PORT=$env:SSH_PORT USER=$env:SSH_USER" +Optional: Define a one-click function (only change the port going forward) + +```powershell +function Set-MiniCPMSSH { +Set-MiniCPMSSH -Port "" -User "" + param( + [Parameter(Mandatory = $true)] + [string]$Port, + [string]$Host = "127.0.0.1", +Open-MiniCPMTunnel -Mode local + [string]$User = "your_user" + ) + + $env:SSH_HOST = $Host +Open-MiniCPMTunnel -Mode mobile + $env:SSH_PORT = $Port + $env:SSH_USER = $User + + Write-Host "[MiniCPM SSH] HOST=$env:SSH_HOST PORT=$env:SSH_PORT USER=$env:SSH_USER" +ssh -N -p $env:SSH_PORT ` + -L 3000:127.0.0.1:3000 ` + -L 3443:127.0.0.1:3443 ` + -L 32550:127.0.0.1:32550 ` + "$env:SSH_USER@$env:SSH_HOST" + +```powershell +Set-MiniCPMSSH -Port "54062" -User "your_user" +ssh -p $env:SSH_PORT "$env:SSH_USER@$env:SSH_HOST" + -L 0.0.0.0:3443:127.0.0.1:3443 ` + "$env:SSH_USER@$env:SSH_HOST" +Optional: Define a one-click tunnel function (local / mobile modes) + +```powershell +function Open-MiniCPMTunnel { +cd /MiniCPM-o + param( + [ValidateSet("local", "mobile")] + [string]$Mode = "local" + ) +mkdir -p /MiniCPM-o/models + + if (-not $env:SSH_HOST -or -not $env:SSH_PORT -or -not $env:SSH_USER) { + throw "Please run Set-MiniCPMSSH first to set SSH_HOST/SSH_PORT/SSH_USER" + } + +du -sh /MiniCPM-o/models/MiniCPM-o-4_5 +ls -lh /MiniCPM-o/models/MiniCPM-o-4_5 | head + if ($Mode -eq "local") { + ssh -N -p $env:SSH_PORT ` + -L 3000:127.0.0.1:3000 ` + -L 3443:127.0.0.1:3443 ` +cd /MiniCPM-o + -L 32550:127.0.0.1:32550 ` + "$env:SSH_USER@$env:SSH_HOST" + } + else { +mkdir -p /deploy_pkg + ssh -N -p $env:SSH_PORT ` + -L 0.0.0.0:3443:127.0.0.1:3443 ` + "$env:SSH_USER@$env:SSH_HOST" + } +} +docker save -o /deploy_pkg/minicpmo-backend_latest.tar minicpmo-backend:latest +docker save -o /deploy_pkg/minicpmo-frontend_latest.tar minicpmo-frontend:latest +``` + +Usage example: + +```powershell +cp deploy/docker-compose.yml /deploy_pkg/ +cp deploy/nginx.docker.conf /deploy_pkg/ +# 1) Set dynamic SSH parameters +Set-MiniCPMSSH -Port "54062" -User "your_user" + +# 2) Local access only (open http://127.0.0.1:3000 in browser) +cd /deploy_pkg +Open-MiniCPMTunnel -Mode local + +# 3) Mobile access (same WiFi, use https://laptop_lan_ip:3443) +Open-MiniCPMTunnel -Mode mobile +``` +gzip -1 minicpmo-backend_latest.tar +gzip -1 minicpmo-frontend_latest.tar + +Optional: Auto-print mobile access URL + +```powershell +function Get-MiniCPMLanUrl { +cd /MiniCPM-o +bash deploy/gen_ssl_cert.sh /deploy_pkg/certs + param( + [int]$Port = 3443 + ) + +This will generate `server.crt` and `server.key` under `/deploy_pkg/certs/`. + $ipv4List = Get-NetIPAddress -AddressFamily IPv4 | + Where-Object { + $_.IPAddress -notlike '127.*' -and + $_.IPAddress -notlike '169.254.*' -and +ssh -p $SSH_PORT $SSH_USER@$SSH_HOST "mkdir -p /deploy_pkg" + $_.PrefixOrigin -ne 'WellKnown' + } | + Sort-Object -Property InterfaceMetric + +scp -P $SSH_PORT -o ServerAliveInterval=60 \ + /deploy_pkg/minicpmo-backend_latest.tar.gz \ + /deploy_pkg/minicpmo-frontend_latest.tar.gz \ + /deploy_pkg/docker-compose.yml \ + /deploy_pkg/nginx.docker.conf \ + $SSH_USER@$SSH_HOST:/deploy_pkg/ + $url = "https://$ip`:$Port" + + Write-Host "[MiniCPM LAN URL] $url" + return $url +ssh -p $SSH_PORT $SSH_USER@$SSH_HOST "mkdir -p /models" +} +``` + +Usage example: +scp -P $SSH_PORT -r -o ServerAliveInterval=60 \ + /MiniCPM-o/models/MiniCPM-o-4_5 \ + $SSH_USER@$SSH_HOST:/models/ +# Start mobile mode tunnel first (run in another terminal window) +Open-MiniCPMTunnel -Mode mobile + +# Print mobile access URL in the current window + $SSH_USER@$SSH_HOST:/deploy_pkg/ +``` + +Optional: One-click mobile mode startup (open tunnel + check port + print URL) + +```powershell +function Start-MiniCPMMobile { +export MODEL_PATH=/models/MiniCPM-o-4_5 +export CERTS_PATH=./certs +export BACKEND_PORT=32550 + param( + [int]$Port = 3443 + ) + + -v ${MODEL_PATH}:/models/MiniCPM-o-4_5:ro \ + if (-not $env:SSH_HOST -or -not $env:SSH_PORT -or -not $env:SSH_USER) { + throw "Please run Set-MiniCPMSSH first to set SSH_HOST/SSH_PORT/SSH_USER" + } + + -v ${CERTS_PATH}:/etc/nginx/certs:ro \ + $sshCmd = "ssh -N -p $env:SSH_PORT -L 0.0.0.0:$Port`:127.0.0.1:$Port $env:SSH_USER@$env:SSH_HOST" + + # Open tunnel in a new window to avoid blocking the current terminal + $proc = Start-Process powershell -ArgumentList "-NoExit", "-Command", $sshCmd -PassThru + -v ${MODEL_PATH}:/models/MiniCPM-o-4_5:ro \ + $env:MINICPM_MOBILE_SSH_PID = [string]$proc.Id + $env:MINICPM_MOBILE_PORT = [string]$Port + Start-Sleep -Seconds 2 + + -v ${CERTS_PATH}:/etc/nginx/certs:ro \ + $listener = Get-NetTCPConnection -LocalPort $Port -State Listen -ErrorAction SilentlyContinue + if (-not $listener) { + Write-Warning "No listener detected on local port $Port. Please check whether SSH connected successfully." + return +cd /deploy_pkg + } + + $url = Get-MiniCPMLanUrl -Port $Port + Write-Host "[MiniCPM Mobile PID] $env:MINICPM_MOBILE_SSH_PID" + Write-Host "[MiniCPM Mobile Ready] Open on mobile browser: $url" +} +mkdir -p /runtime/certs +cp docker-compose.yml /runtime/ +cp certs/server.* /runtime/certs/ + +function Stop-MiniCPMMobile { + $pidText = $env:MINICPM_MOBILE_SSH_PID + +cd /runtime + if ($pidText) { + $pidValue = [int]$pidText + $proc = Get-Process -Id $pidValue -ErrorAction SilentlyContinue + if ($proc) { + Stop-Process -Id $pidValue -Force + Write-Host "[MiniCPM Mobile Stopped] Tunnel process stopped PID=$pidValue" +export MODEL_PATH=/models/MiniCPM-o-4_5 +export CERTS_PATH=./certs +export BACKEND_PORT=32550 + Remove-Item Env:MINICPM_MOBILE_SSH_PID -ErrorAction SilentlyContinue + Remove-Item Env:MINICPM_MOBILE_PORT -ErrorAction SilentlyContinue + return + } +ssh -N -p $SSH_PORT -L 3000:127.0.0.1:3000 -L 3443:127.0.0.1:3443 -L 32550:127.0.0.1:32550 $SSH_USER@$SSH_HOST + } + + $port = if ($env:MINICPM_MOBILE_PORT) { [int]$env:MINICPM_MOBILE_PORT } else { 3443 } + $listeners = Get-NetTCPConnection -LocalPort $port -State Listen -ErrorAction SilentlyContinue +ssh -N -p $env:SSH_PORT -L 3000:127.0.0.1:3000 -L 3443:127.0.0.1:3443 -L 32550:127.0.0.1:32550 "$env:SSH_USER@$env:SSH_HOST" + if (-not $listeners) { + Write-Host "[MiniCPM Mobile] No listener detected on port $port. Nothing to stop." + return + } +ssh -N -p $SSH_PORT -L 0.0.0.0:3443:127.0.0.1:3443 $SSH_USER@$SSH_HOST + + foreach ($item in $listeners) { + if ($item.OwningProcess -gt 0) { + try { +Open on phone browser: `https://:3443` + Stop-Process -Id $item.OwningProcess -Force -ErrorAction Stop + Write-Host "[MiniCPM Mobile Stopped] Stopped process listening on port $port PID=$($item.OwningProcess)" + } + catch { + Write-Warning "Failed to stop PID=$($item.OwningProcess): $($_.Exception.Message)" + } + } + } + + Remove-Item Env:MINICPM_MOBILE_SSH_PID -ErrorAction SilentlyContinue + Remove-Item Env:MINICPM_MOBILE_PORT -ErrorAction SilentlyContinue +} + +function Restart-MiniCPMMobile { + param( + [int]$Port = 3443 + ) + + Stop-MiniCPMMobile + Start-Sleep -Seconds 1 + Start-MiniCPMMobile -Port $Port +} +``` + +Usage example: + +```powershell +# 1) Set dynamic SSH parameters first (only change here when port changes) +Set-MiniCPMSSH -Port "54062" -User "your_user" + +# 2) One-click start mobile mode and output the accessible URL +Start-MiniCPMMobile + +# 3) One-click restart mobile mode after port change (optional) +Restart-MiniCPMMobile + +# 4) Stop mobile mode tunnel +Stop-MiniCPMMobile +``` + +--- + +## 0. Directory and File Overview + +This guide uses the newly created deployment files in your repository: + +- `deploy/Dockerfile.backend`: Backend inference service image (FastAPI + MiniCPM-o 4.5) +- `deploy/Dockerfile.frontend`: Frontend image (Vue build + Nginx) +- `deploy/nginx.docker.conf`: Nginx reverse proxy to backend container +- `deploy/docker-compose.yml`: Two-container orchestration (frontend + backend) +- `deploy/requirements.backend.txt`: Backend Python dependency list +- `deploy/gen_ssl_cert.sh`: Self-signed SSL certificate generation script (required for mobile HTTPS) + +--- + +## 1. Local (WSL2) Prerequisites + +Run in WSL2 Ubuntu: + +```bash +cd /mnt/d/JiuTian/codes/MiniCPM-o + +# 1) Check Docker +sudo docker --version +sudo docker compose version + +# 2) If your current user cannot use docker directly, you can temporarily use sudo docker +# Or add the user to the docker group (takes effect after re-login) +# sudo usermod -aG docker $USER +``` + +> Note: The local 1050Ti does not participate in inference. The local machine is only responsible for building images and does not require a local GPU. + +--- + +## 2. Download the Model Locally (for Upload to Intranet) + +It is recommended to download the HuggingFace model locally (where internet is available), then package and upload it. + +### 2.1 Install Download Tool + +```bash +python3 -m pip install -U huggingface_hub +``` + +### 2.2 Download MiniCPM-o 4.5 + +```bash +mkdir -p /mnt/d/JiuTian/codes/MiniCPM-o/models +python3 - << 'PY' +from huggingface_hub import snapshot_download +snapshot_download( + repo_id='openbmb/MiniCPM-o-4_5', + local_dir='/mnt/d/JiuTian/codes/MiniCPM-o/models/MiniCPM-o-4_5', + local_dir_use_symlinks=False, + resume_download=True +) +PY +``` + +After downloading, check the size and key files: + +```bash +du -sh /mnt/d/JiuTian/codes/MiniCPM-o/models/MiniCPM-o-4_5 +ls -lh /mnt/d/JiuTian/codes/MiniCPM-o/models/MiniCPM-o-4_5 | head +``` + +--- + +## 3. Build Two Images in WSL2 + +Run from the repository root directory: + +```bash +cd /mnt/d/JiuTian/codes/MiniCPM-o + +# Backend image +docker build -f deploy/Dockerfile.backend -t minicpmo-backend:latest . + +# Frontend image +docker build -f deploy/Dockerfile.frontend -t minicpmo-frontend:latest . +``` + +Verify the images exist: + +```bash +docker images | grep minicpmo +``` + +--- + +## 4. Export Images + Generate SSL Certificate + +### 4.1 Export Images as tar + +```bash +mkdir -p /mnt/d/JiuTian/deploy_pkg + +docker save -o /mnt/d/JiuTian/deploy_pkg/minicpmo-backend_latest.tar minicpmo-backend:latest +docker save -o /mnt/d/JiuTian/deploy_pkg/minicpmo-frontend_latest.tar minicpmo-frontend:latest + +# Package compose and nginx config +cp deploy/docker-compose.yml /mnt/d/JiuTian/deploy_pkg/ +cp deploy/nginx.docker.conf /mnt/d/JiuTian/deploy_pkg/ +``` + +Optional: Compress to reduce transfer size + +```bash +cd /mnt/d/JiuTian/deploy_pkg +gzip -1 minicpmo-backend_latest.tar +gzip -1 minicpmo-frontend_latest.tar +``` + +### 4.2 Generate Self-Signed SSL Certificate (Required for Mobile HTTPS) + +```bash +cd /mnt/d/JiuTian/codes/MiniCPM-o +bash deploy/gen_ssl_cert.sh /mnt/d/JiuTian/deploy_pkg/certs +``` + +This will generate `server.crt` and `server.key` under `/mnt/d/JiuTian/deploy_pkg/certs/`. + +--- + +## 5. Upload to the Intranet Server + +You have already passed company intranet authentication, and the port may change dynamically. Please use the SSH variables defined above. + +### 5.1 Upload Image Packages and Config Files + +```bash +# First create the target directory on the server +ssh -p $SSH_PORT $SSH_USER@$SSH_HOST "mkdir -p /data/minicpmo/deploy_pkg" + +# Upload image tar packages +scp -P $SSH_PORT -o ServerAliveInterval=60 \ + /mnt/d/JiuTian/deploy_pkg/minicpmo-backend_latest.tar.gz \ + /mnt/d/JiuTian/deploy_pkg/minicpmo-frontend_latest.tar.gz \ + /mnt/d/JiuTian/deploy_pkg/docker-compose.yml \ + /mnt/d/JiuTian/deploy_pkg/nginx.docker.conf \ + $SSH_USER@$SSH_HOST:/data/minicpmo/deploy_pkg/ +``` + +### 5.2 Upload Model Weights + +```bash +ssh -p $SSH_PORT $SSH_USER@$SSH_HOST "mkdir -p /data/models" + +scp -P $SSH_PORT -r -o ServerAliveInterval=60 \ + /mnt/d/JiuTian/codes/MiniCPM-o/models/MiniCPM-o-4_5 \ + $SSH_USER@$SSH_HOST:/data/models/ +``` + +### 5.3 Upload SSL Certificate (Required for Mobile Access) + +```bash +scp -P $SSH_PORT -r /mnt/d/JiuTian/deploy_pkg/certs \ + $SSH_USER@$SSH_HOST:/data/minicpmo/deploy_pkg/ +``` + +> If the port changes, simply update the `SSH_PORT` variable and retry the command. + +--- + +## 6. H100 Server Preparation (One-Time) + +Log in to the server through the established tunnel: + +```bash +ssh -p $SSH_PORT $SSH_USER@$SSH_HOST +``` + +Check the environment: + +```bash +# Confirm NVIDIA driver (already confirmed: 550.90.12, CUDA 12.4 ✓) +nvidia-smi + +# Check Docker +docker --version +docker compose version +``` + +### 6.1 Install NVIDIA Container Toolkit (If Not Installed) + +If `docker run --gpus all nvidia/cuda:12.4.1-base-ubuntu22.04 nvidia-smi` fails, you need to install the toolkit. + +Restart Docker after installation: + +```bash +sudo systemctl restart docker +``` + +Verify again: + +```bash +docker run --rm --gpus all nvidia/cuda:12.4.1-base-ubuntu22.04 nvidia-smi +``` + +--- + +## 7. Load Images and Start Services on the H100 Server + +Run on the server (after logging in via `ssh -p $SSH_PORT $SSH_USER@$SSH_HOST`): + +```bash +cd /data/minicpmo/deploy_pkg + +# If the uploaded files are .tar.gz, decompress first +gunzip -f minicpmo-backend_latest.tar.gz || true +gunzip -f minicpmo-frontend_latest.tar.gz || true + +# Load images +docker load -i minicpmo-backend_latest.tar +docker load -i minicpmo-frontend_latest.tar + +# Place runtime files +mkdir -p /data/minicpmo/runtime/certs +cp docker-compose.yml /data/minicpmo/runtime/ +cp certs/server.crt certs/server.key /data/minicpmo/runtime/certs/ + +cd /data/minicpmo/runtime +``` + +### 7.1 Set Model Path and Start + +`docker-compose.yml` uses the `MODEL_PATH` environment variable. You can export it directly: + +```bash +export MODEL_PATH=/data/models/MiniCPM-o-4_5 +export CERTS_PATH=./certs +export BACKEND_PORT=32550 + +# Compatible with both Compose commands: docker compose / docker-compose +if docker compose version >/dev/null 2>&1; then + COMPOSE_CMD="docker compose" +elif command -v docker-compose >/dev/null 2>&1; then + COMPOSE_CMD="docker-compose" +else + echo "Compose not found. Please install docker-compose or the docker compose plugin first." && exit 1 +fi + +$COMPOSE_CMD -f docker-compose.yml up -d +``` + +If neither Compose option is available (`docker compose` / `docker-compose` both absent), you can start directly with `docker run`: + +```bash +docker network create minicpmo-net || true +docker rm -f minicpmo-backend minicpmo-frontend 2>/dev/null || true + +docker run -d \ + --name minicpmo-backend \ + --restart unless-stopped \ + --gpus all \ + -e BACKEND_PORT=${BACKEND_PORT:-32550} \ + -p ${BACKEND_PORT:-32550}:${BACKEND_PORT:-32550} \ + -v ${MODEL_PATH}:/models/MiniCPM-o-4_5:ro \ + --network minicpmo-net \ + minicpmo-backend:latest + +docker run -d \ + --name minicpmo-frontend \ + --restart unless-stopped \ + -e BACKEND_PORT=${BACKEND_PORT:-32550} \ + -p 3000:3000 \ + -p 3443:3443 \ + -v ${CERTS_PATH}:/etc/nginx/certs:ro \ + --network minicpmo-net \ + minicpmo-frontend:latest +``` + +If you encounter `Failed to Setup IP tables` or `No chain/target/match by that name`, you can bypass the bridge network and start with the `host` network instead: + +```bash +docker rm -f minicpmo-backend minicpmo-frontend 2>/dev/null || true + +docker run -d \ + --name minicpmo-backend \ + --restart unless-stopped \ + --gpus all \ + --network host \ + -e BACKEND_PORT=${BACKEND_PORT:-32550} \ + -v ${MODEL_PATH}:/models/MiniCPM-o-4_5:ro \ + minicpmo-backend:latest + +docker run -d \ + --name minicpmo-frontend \ + --restart unless-stopped \ + --network host \ + --add-host model-backend:127.0.0.1 \ + -e BACKEND_PORT=${BACKEND_PORT:-32550} \ + -v ${CERTS_PATH}:/etc/nginx/certs:ro \ + minicpmo-frontend:latest +``` + +Check status: + +```bash +if [ -z "$COMPOSE_CMD" ]; then + if docker compose version >/dev/null 2>&1; then + COMPOSE_CMD="docker compose" + else + COMPOSE_CMD="docker-compose" + fi +fi + +$COMPOSE_CMD -f docker-compose.yml ps +docker logs -f minicpmo-backend +``` + +If using the `docker run` approach, check status with: + +```bash +docker ps --filter name=minicpmo +docker logs -f minicpmo-backend +``` + +Health check: + +```bash +curl http://127.0.0.1:32550/api/v1/health +``` + +Expected response: + +```json +{"status":"OK"} +``` + +> The first model load may be slow (tens of seconds to a few minutes). Wait until the logs show model initialization complete before testing the frontend. + +--- + +## 8. Local Computer Access (SSH Port Forwarding) + +You can already connect via SSH tunnel — just forward the service ports using the current port. + +Open a new terminal in local PowerShell or WSL: + +```bash +ssh -N -p $SSH_PORT \ + -L 3000:127.0.0.1:3000 \ + -L 3443:127.0.0.1:3443 \ + -L 32550:127.0.0.1:32550 \ + $SSH_USER@$SSH_HOST +``` + +Keep this terminal connected. Then open in a local browser: + +- Frontend (HTTP): +- Frontend (HTTPS): (self-signed cert, click "Continue" to proceed) +- Backend health check: + +> The browser will request camera/microphone permissions — click Allow. When accessing via `localhost`, HTTP is sufficient to obtain camera permissions. + +--- + +## 9. Mobile Access (Full-Duplex Video Calling) + +### 9.1 Problem and Principle + +Mobile browsers (Chrome/Safari) **must use HTTPS** to access the camera and microphone (`localhost` is an exception, but the phone is not localhost). + +Solution: **Use the laptop as a relay** — Phone → Laptop WiFi LAN IP → SSH tunnel → Server. + +```text +Mobile browser ──WiFi──▶ Laptop:3443 ──SSH tunnel──▶ H100:3443 ──Nginx──▶ Backend:32550 + (HTTPS) (bound to 0.0.0.0) +``` + +### 9.2 Steps + +#### Step 1: Establish an SSH Tunnel with All-Interface Binding + +```bash +ssh -N -p $SSH_PORT \ + -L 0.0.0.0:3443:127.0.0.1:3443 \ + $SSH_USER@$SSH_HOST +``` + +> Key difference: `0.0.0.0:3443` makes all network interfaces on the laptop listen on port 3443, allowing phones on the same WiFi to connect. + +#### Step 2: Find the Laptop's LAN IP + +Run in PowerShell: + +```powershell +ipconfig | Select-String "IPv4" +``` + +Assume the result is `192.168.1.100`. + +#### Step 3: Allow Port Through Windows Firewall + +Run in PowerShell (as Administrator): + +```powershell +New-NetFirewallRule -DisplayName "MiniCPMo HTTPS" -Direction Inbound -LocalPort 3443 -Protocol TCP -Action Allow +``` + +#### Step 4: Access from Mobile Browser + +Make sure the phone and laptop are on the same WiFi, then enter in the mobile browser: + +```text +https://192.168.1.100:3443 +``` + +- **First visit** will show an "unsafe connection" warning (self-signed cert) — tap **"Advanced" → "Continue"** +- The browser will then request camera/microphone permissions — tap **Allow** +- Enter the video call page and start a full-duplex conversation + +### 9.3 iOS Safari Notes + +iOS Safari is stricter with self-signed certificates. If the above bypass doesn't work: + +1. Open `https://192.168.1.100:3443/certs/server.crt` in Safari on the phone (if you configured a cert download path), download and install the certificate +2. Or send `server.crt` to the phone via AirDrop / WeChat, then go to **Settings → General → Profile → Install** +3. Then go to **Settings → General → About → Certificate Trust Settings → Enable Full Trust** + +After that, Safari can access `https://192.168.1.100:3443` normally. + +--- + +## 10. Common Issues and Troubleshooting + +### 10.1 Frontend Opens, but Cannot Start a Conversation + +Check backend logs: + +```bash +docker logs --tail 200 minicpmo-backend +``` + +Key things to look for: + +- Whether the model path exists: `/models/MiniCPM-o-4_5` +- Whether VRAM is sufficient (H100 usually has enough) +- Whether `trust_remote_code` or dependency version errors appear + +### 10.2 GPU Not Visible Inside Container + +```bash +docker exec -it minicpmo-backend nvidia-smi +``` + +If it fails, check the NVIDIA Container Toolkit and Docker daemon configuration first. + +### 10.3 WebSocket / SSE Anomalies + +This project has already disabled buffering and configured WebSocket upgrade in `nginx.docker.conf`. +If issues persist, check whether the company's intranet gateway is blocking long-lived connections. + +### 10.4 Model Startup Is Too Slow + +The first startup may be slow; subsequent starts will be much faster. Check with: + +```bash +nvidia-smi +docker logs -f minicpmo-backend +``` + +--- + +## 11. Optional Optimizations for Next Steps + +1. Switch the backend image to "offline wheel installation mode" to completely eliminate the need for pip internet access on the server. +2. Use a private image registry (Harbor) instead of tar package transfers. +3. Use systemd or cron for automatic container restart and log rotation. +4. Replace the self-signed certificate with one issued by an enterprise CA to eliminate manual trust on mobile devices. + +--- + +## 12. One-Click Command Quick Reference + +### H100 Side (Assuming Files Are Already Uploaded) + +```bash +cd /data/minicpmo/deploy_pkg + +docker load -i minicpmo-backend_latest.tar +docker load -i minicpmo-frontend_latest.tar + +mkdir -p /data/minicpmo/runtime/certs +cp docker-compose.yml /data/minicpmo/runtime/ +cp certs/server.* /data/minicpmo/runtime/certs/ + +cd /data/minicpmo/runtime +export MODEL_PATH=/data/models/MiniCPM-o-4_5 +export CERTS_PATH=./certs +export BACKEND_PORT=32550 +if docker compose version >/dev/null 2>&1; then + COMPOSE_CMD="docker compose" +elif command -v docker-compose >/dev/null 2>&1; then + COMPOSE_CMD="docker-compose" +else + echo "Compose not found. Please install docker-compose or the docker compose plugin first." && exit 1 +fi + +$COMPOSE_CMD -f docker-compose.yml up -d +``` + +### Local Computer (Open Tunnel) + +```bash +ssh -N -p $SSH_PORT -L 3000:127.0.0.1:3000 -L 3443:127.0.0.1:3443 -L 32550:127.0.0.1:32550 $SSH_USER@$SSH_HOST +``` + +PowerShell version: + +```powershell +ssh -N -p $env:SSH_PORT -L 3000:127.0.0.1:3000 -L 3443:127.0.0.1:3443 -L 32550:127.0.0.1:32550 "$env:SSH_USER@$env:SSH_HOST" +``` + +Open on local computer: + +### Mobile (Relayed Through Laptop) + +```bash +# Bind all interfaces on the laptop +ssh -N -p $SSH_PORT -L 0.0.0.0:3443:127.0.0.1:3443 $SSH_USER@$SSH_HOST +``` + +Open on mobile browser: `https://:3443` \ No newline at end of file diff --git a/deploy/DEPLOY_WSL2_TO_H100_ZH.md b/deploy/DEPLOY_WSL2_TO_H100_ZH.md new file mode 100644 index 00000000..54bf01fc --- /dev/null +++ b/deploy/DEPLOY_WSL2_TO_H100_ZH.md @@ -0,0 +1,783 @@ +# MiniCPM-o 4.5 离线部署实战指南(WSL2 构建镜像 → 上传内网 H100 服务器 → 本地 + 手机访问) + +> 目标:你在本地 Windows系统PC + WSL2 构建 Docker 镜像,把镜像和模型传到无公网的公司 H100 服务器,启动服务后在本地浏览器和Android系统手机上测试全双工视频通话。 + +**你的环境速查:** + +| 项目 | 值 | +| --- | --- | +| 服务器 SSH | `ssh -p $SSH_PORT $SSH_USER@$SSH_HOST`(端口可能动态变化) | +| GPU | NVIDIA H100(驱动 550.90.12) | +| CUDA | 12.4(与 Dockerfile 基础镜像 `cuda:12.4.1` 完全匹配) | +| 本地 | Win10 + WSL2 Ubuntu | + +**每次执行前先设置 SSH 变量(只改这里即可):** + +```bash +export SSH_HOST= +export SSH_PORT= +export SSH_USER= +``` + +PowerShell 等价写法(Windows 终端直接用): + +```powershell +$env:SSH_HOST = "" +$env:SSH_PORT = "" +$env:SSH_USER = "" +``` + +## PowerShell 日常三命令速查(推荐) + +```powershell +# 1) 端口变化时先更新 SSH 参数 +Set-MiniCPMSSH -Port "" -User "" + +# 2) 启动手机模式(开隧道 + 打印可访问 URL) +Start-MiniCPMMobile + +# 3) 结束隧道 +Stop-MiniCPMMobile +``` + +端口变化后的快速恢复: + +```powershell +Set-MiniCPMSSH -Port "" -User "" +Restart-MiniCPMMobile +``` + +PowerShell 中引用变量时,`ssh/scp` 建议写成: + +```powershell +ssh -p $env:SSH_PORT "$env:SSH_USER@$env:SSH_HOST" +scp -P $env:SSH_PORT .\file.tar.gz "$env:SSH_USER@$env:SSH_HOST:/deploy_pkg/" +``` + +可选:定义一个一键函数(以后只改端口即可) + +```powershell +function Set-MiniCPMSSH { + param( + [Parameter(Mandatory = $true)] + [string]$Port, + [string]$Host = "", + [string]$User = "" + ) + + $env:SSH_HOST = $Host + $env:SSH_PORT = $Port + $env:SSH_USER = $User + + Write-Host "[MiniCPM SSH] HOST=$env:SSH_HOST PORT=$env:SSH_PORT USER=$env:SSH_USER" +} +``` + +使用示例: + +```powershell +Set-MiniCPMSSH -Port "" -User "" +ssh -p $env:SSH_PORT "$env:SSH_USER@$env:SSH_HOST" +``` + +可选:定义一键开隧道函数(本机/手机两种模式) + +```powershell +function Open-MiniCPMTunnel { + param( + [ValidateSet("local", "mobile")] + [string]$Mode = "local" + ) + + if (-not $env:SSH_HOST -or -not $env:SSH_PORT -or -not $env:SSH_USER) { + throw "请先执行 Set-MiniCPMSSH 设置 SSH_HOST/SSH_PORT/SSH_USER" + } + + if ($Mode -eq "local") { + ssh -N -p $env:SSH_PORT ` + -L 3000:127.0.0.1:3000 ` + -L 3443:127.0.0.1:3443 ` + -L 32550:127.0.0.1:32550 ` + "$env:SSH_USER@$env:SSH_HOST" + } + else { + ssh -N -p $env:SSH_PORT ` + -L 0.0.0.0:3443:127.0.0.1:3443 ` + "$env:SSH_USER@$env:SSH_HOST" + } +} +``` + +使用示例: + +```powershell +# 1) 设置动态 SSH 参数 +Set-MiniCPMSSH -Port "54062" -User "your_user" + +# 2) 仅本机访问(浏览器打开 http://127.0.0.1:3000) +Open-MiniCPMTunnel -Mode local + +# 3) 手机访问(同一 WiFi,用 https://笔记本局域网IP:3443) +Open-MiniCPMTunnel -Mode mobile +``` + +可选:自动打印手机访问地址 + +```powershell +function Get-MiniCPMLanUrl { + param( + [int]$Port = 3443 + ) + + $ipv4List = Get-NetIPAddress -AddressFamily IPv4 | + Where-Object { + $_.IPAddress -notlike '127.*' -and + $_.IPAddress -notlike '169.254.*' -and + $_.PrefixOrigin -ne 'WellKnown' + } | + Sort-Object -Property InterfaceMetric + + if (-not $ipv4List) { + throw "未找到可用 IPv4 地址,请检查网卡/网络连接" + } + + $ip = $ipv4List[0].IPAddress + $url = "https://$ip`:$Port" + + Write-Host "[MiniCPM LAN URL] $url" + return $url +} +``` + +使用示例: + +```powershell +# 先开启手机模式隧道(在另一个终端窗口运行) +Open-MiniCPMTunnel -Mode mobile + +# 当前窗口打印手机访问地址 +Get-MiniCPMLanUrl +``` + +可选:一键启动手机模式(开隧道 + 检查端口 + 打印 URL) + +```powershell +function Start-MiniCPMMobile { + param( + [int]$Port = 3443 + ) + + if (-not $env:SSH_HOST -or -not $env:SSH_PORT -or -not $env:SSH_USER) { + throw "请先执行 Set-MiniCPMSSH 设置 SSH_HOST/SSH_PORT/SSH_USER" + } + + $sshCmd = "ssh -N -p $env:SSH_PORT -L 0.0.0.0:$Port`:127.0.0.1:$Port $env:SSH_USER@$env:SSH_HOST" + + # 在新窗口开隧道,避免阻塞当前终端 + $proc = Start-Process powershell -ArgumentList "-NoExit", "-Command", $sshCmd -PassThru + $env:MINICPM_MOBILE_SSH_PID = [string]$proc.Id + $env:MINICPM_MOBILE_PORT = [string]$Port + Start-Sleep -Seconds 2 + + $listener = Get-NetTCPConnection -LocalPort $Port -State Listen -ErrorAction SilentlyContinue + if (-not $listener) { + Write-Warning "未检测到本机 $Port 端口监听,请检查 SSH 是否连接成功。" + return + } + + $url = Get-MiniCPMLanUrl -Port $Port + Write-Host "[MiniCPM Mobile PID] $env:MINICPM_MOBILE_SSH_PID" + Write-Host "[MiniCPM Mobile Ready] 手机浏览器访问: $url" +} + +function Stop-MiniCPMMobile { + $pidText = $env:MINICPM_MOBILE_SSH_PID + + if ($pidText) { + $pidValue = [int]$pidText + $proc = Get-Process -Id $pidValue -ErrorAction SilentlyContinue + if ($proc) { + Stop-Process -Id $pidValue -Force + Write-Host "[MiniCPM Mobile Stopped] 已停止隧道进程 PID=$pidValue" + Remove-Item Env:MINICPM_MOBILE_SSH_PID -ErrorAction SilentlyContinue + Remove-Item Env:MINICPM_MOBILE_PORT -ErrorAction SilentlyContinue + return + } + } + + $port = if ($env:MINICPM_MOBILE_PORT) { [int]$env:MINICPM_MOBILE_PORT } else { 3443 } + $listeners = Get-NetTCPConnection -LocalPort $port -State Listen -ErrorAction SilentlyContinue + if (-not $listeners) { + Write-Host "[MiniCPM Mobile] 未检测到监听端口 $port,无需停止。" + return + } + + foreach ($item in $listeners) { + if ($item.OwningProcess -gt 0) { + try { + Stop-Process -Id $item.OwningProcess -Force -ErrorAction Stop + Write-Host "[MiniCPM Mobile Stopped] 已停止监听端口 $port 的进程 PID=$($item.OwningProcess)" + } + catch { + Write-Warning "停止 PID=$($item.OwningProcess) 失败:$($_.Exception.Message)" + } + } + } + + Remove-Item Env:MINICPM_MOBILE_SSH_PID -ErrorAction SilentlyContinue + Remove-Item Env:MINICPM_MOBILE_PORT -ErrorAction SilentlyContinue +} + +function Restart-MiniCPMMobile { + param( + [int]$Port = 3443 + ) + + Stop-MiniCPMMobile + Start-Sleep -Seconds 1 + Start-MiniCPMMobile -Port $Port +} +``` + +使用示例: + +```powershell +# 1) 先设置动态 SSH 参数(端口变更时只改这里) +Set-MiniCPMSSH -Port "54062" -User "your_user" + +# 2) 一键启动手机模式并输出可访问地址 +Start-MiniCPMMobile + +# 3) 端口变化后,一键重启手机模式(可选) +Restart-MiniCPMMobile + +# 4) 结束手机模式隧道 +Stop-MiniCPMMobile +``` + +--- + +## 0. 目录与文件说明 + +本指南使用了你仓库中新建的部署文件: + +- `deploy/Dockerfile.backend`:后端推理服务镜像(FastAPI + MiniCPM-o 4.5) +- `deploy/Dockerfile.frontend`:前端镜像(Vue build + Nginx) +- `deploy/nginx.docker.conf`:Nginx 反向代理到后端容器 +- `deploy/docker-compose.yml`:双容器编排(frontend + backend) +- `deploy/requirements.backend.txt`:后端 Python 依赖清单 +- `deploy/gen_ssl_cert.sh`:自签名 SSL 证书生成脚本(手机端 HTTPS 必需) + +--- + +## 1. 本地(WSL2)前置准备 + +在 WSL2 Ubuntu 执行: + +```bash +cd /MiniCPM-o + +# 1) 检查 Docker +sudo docker --version +sudo docker compose version + +# 2) 如果你当前用户不能直接用 docker,可先临时用 sudo docker +# 或将用户加入 docker 组(重新登录后生效) +# sudo usermod -aG docker $USER +``` + +> 说明:本地 1050Ti 不参与推理,本地只负责构建镜像,不需要本地 GPU。 + +--- + +## 2. 本地下载模型(用于上传到内网) + +推荐在本地(有网环境)下载 HuggingFace 模型,再打包上传。 + +### 2.1 安装下载工具 + +```bash +python3 -m pip install -U huggingface_hub +``` + +### 2.2 下载 MiniCPM-o 4.5 + +```bash +mkdir -p /MiniCPM-o/models +python3 - << 'PY' +from huggingface_hub import snapshot_download +snapshot_download( + repo_id='openbmb/MiniCPM-o-4_5', + local_dir='/MiniCPM-o/models/MiniCPM-o-4_5', + local_dir_use_symlinks=False, + resume_download=True +) +PY +``` + +下载后检查体积和关键文件: + +```bash +du -sh /MiniCPM-o/models/MiniCPM-o-4_5 +ls -lh /MiniCPM-o/models/MiniCPM-o-4_5 | head +``` + +--- + +## 3. 在 WSL2 构建两个镜像 + +在仓库根目录执行: + +```bash +cd /MiniCPM-o + +# 后端镜像 +docker build -f deploy/Dockerfile.backend -t minicpmo-backend:latest . + +# 前端镜像 +docker build -f deploy/Dockerfile.frontend -t minicpmo-frontend:latest . +``` + +验证镜像存在: + +```bash +docker images | grep minicpmo +``` + +--- + +## 4. 导出镜像 + 生成 SSL 证书 + +### 4.1 导出镜像为 tar + +```bash +mkdir -p /deploy_pkg + +docker save -o /deploy_pkg/minicpmo-backend_latest.tar minicpmo-backend:latest +docker save -o /deploy_pkg/minicpmo-frontend_latest.tar minicpmo-frontend:latest + +# 打包 compose 与 nginx 配置 +cp deploy/docker-compose.yml /deploy_pkg/ +cp deploy/nginx.docker.conf /deploy_pkg/ +``` + +可选:压缩减少传输体积 + +```bash +cd /deploy_pkg +gzip -1 minicpmo-backend_latest.tar +gzip -1 minicpmo-frontend_latest.tar +``` + +### 4.2 生成自签名 SSL 证书(手机端 HTTPS 必需) + +```bash +cd /MiniCPM-o +bash deploy/gen_ssl_cert.sh /deploy_pkg/certs +``` + +这会在 `/deploy_pkg/certs/` 下生成 `server.crt` 和 `server.key`。 + +--- + +## 5. 上传到内网服务器 + +你已经通过公司内网认证,且端口可能动态变化,请使用上面定义的 SSH 变量。 + +### 5.1 上传镜像包和配置文件 + +```bash +# 先在服务器上创建目标目录 +ssh -p $SSH_PORT $SSH_USER@$SSH_HOST "mkdir -p /deploy_pkg" + +# 上传镜像 tar 包 +scp -P $SSH_PORT -o ServerAliveInterval=60 \ + /deploy_pkg/minicpmo-backend_latest.tar.gz \ + /deploy_pkg/minicpmo-frontend_latest.tar.gz \ + /deploy_pkg/docker-compose.yml \ + /deploy_pkg/nginx.docker.conf \ + $SSH_USER@$SSH_HOST:/deploy_pkg/ +``` + +### 5.2 上传模型权重 + +```bash +ssh -p $SSH_PORT $SSH_USER@$SSH_HOST "mkdir -p /models" + +scp -P $SSH_PORT -r -o ServerAliveInterval=60 \ + /MiniCPM-o/models/MiniCPM-o-4_5 \ + $SSH_USER@$SSH_HOST:/models/ +``` + +### 5.3 上传 SSL 证书(手机端访问需要) + +```bash +scp -P $SSH_PORT -r /deploy_pkg/certs \ + $SSH_USER@$SSH_HOST:/deploy_pkg/ +``` + +> 如果端口变更,只需要修改 `SSH_PORT` 变量并重试命令。 + +--- + +## 6. H100 服务器准备(一次性) + +通过已建立的隧道登录服务器: + +```bash +ssh -p $SSH_PORT $SSH_USER@$SSH_HOST +``` + +检查环境: + +```bash +# 确认 NVIDIA 驱动(你已确认: 550.90.12, CUDA 12.4 ✓) +nvidia-smi + +# 检查 Docker +docker --version +docker compose version +``` + +### 6.1 安装 NVIDIA Container Toolkit(若未安装) + +如果 `docker run --gpus all nvidia/cuda:12.4.1-base-ubuntu22.04 nvidia-smi` 失败,需要安装 toolkit。 + +安装后重启 Docker: + +```bash +sudo systemctl restart docker +``` + +再验证: + +```bash +docker run --rm --gpus all nvidia/cuda:12.4.1-base-ubuntu22.04 nvidia-smi +``` + +--- + +## 7. H100 服务器加载镜像与启动服务 + +在服务器上执行(通过 `ssh -p $SSH_PORT $SSH_USER@$SSH_HOST` 登录后): + +```bash +cd /data/minicpmo/deploy_pkg + +# 若上传的是 .tar.gz,先解压 +gunzip -f minicpmo-backend_latest.tar.gz || true +gunzip -f minicpmo-frontend_latest.tar.gz || true + +# 加载镜像 +docker load -i minicpmo-backend_latest.tar +docker load -i minicpmo-frontend_latest.tar + +# 放置运行时文件 +mkdir -p /data/minicpmo/runtime/certs +cp docker-compose.yml /data/minicpmo/runtime/ +cp certs/server.crt certs/server.key /data/minicpmo/runtime/certs/ + +cd /data/minicpmo/runtime +``` + +### 7.1 设置模型路径并启动 + +`docker-compose.yml` 里用了 `MODEL_PATH` 环境变量。你可以直接导出: + +```bash +export MODEL_PATH=/models/MiniCPM-o-4_5 +export CERTS_PATH=./certs +export BACKEND_PORT=32550 + +# 兼容两种 Compose 命令:docker compose / docker-compose +if docker compose version >/dev/null 2>&1; then + COMPOSE_CMD="docker compose" +elif command -v docker-compose >/dev/null 2>&1; then + COMPOSE_CMD="docker-compose" +else + echo "未找到 Compose,请先安装 docker-compose 或 docker compose 插件" && exit 1 +fi + +$COMPOSE_CMD -f docker-compose.yml up -d +``` + +如果两种 Compose 都不可用(`docker compose` / `docker-compose` 都不存在),可直接用 `docker run` 启动: + +```bash +docker network create minicpmo-net || true +docker rm -f minicpmo-backend minicpmo-frontend 2>/dev/null || true + +docker run -d \ + --name minicpmo-backend \ + --restart unless-stopped \ + --gpus all \ + -e BACKEND_PORT=${BACKEND_PORT:-32550} \ + -p ${BACKEND_PORT:-32550}:${BACKEND_PORT:-32550} \ + -v ${MODEL_PATH}:/models/MiniCPM-o-4_5:ro \ + --network minicpmo-net \ + minicpmo-backend:latest + +docker run -d \ + --name minicpmo-frontend \ + --restart unless-stopped \ + -e BACKEND_PORT=${BACKEND_PORT:-32550} \ + -p 3000:3000 \ + -p 3443:3443 \ + -v ${CERTS_PATH}:/etc/nginx/certs:ro \ + --network minicpmo-net \ + minicpmo-frontend:latest +``` + +如果出现 `Failed to Setup IP tables` 或 `No chain/target/match by that name`,可先绕过 bridge 网络,改用 `host` 网络启动: + +```bash +docker rm -f minicpmo-backend minicpmo-frontend 2>/dev/null || true + +docker run -d \ + --name minicpmo-backend \ + --restart unless-stopped \ + --gpus all \ + --network host \ + -e BACKEND_PORT=${BACKEND_PORT:-32550} \ + -v ${MODEL_PATH}:/models/MiniCPM-o-4_5:ro \ + minicpmo-backend:latest + +docker run -d \ + --name minicpmo-frontend \ + --restart unless-stopped \ + --network host \ + --add-host model-backend:127.0.0.1 \ + -e BACKEND_PORT=${BACKEND_PORT:-32550} \ + -v ${CERTS_PATH}:/etc/nginx/certs:ro \ + minicpmo-frontend:latest +``` + +查看状态: + +```bash +if [ -z "$COMPOSE_CMD" ]; then + if docker compose version >/dev/null 2>&1; then + COMPOSE_CMD="docker compose" + else + COMPOSE_CMD="docker-compose" + fi +fi + +$COMPOSE_CMD -f docker-compose.yml ps +docker logs -f minicpmo-backend +``` + +若使用 `docker run` 方案,查看状态命令: + +```bash +docker ps --filter name=minicpmo +docker logs -f minicpmo-backend +``` + +健康检查: + +```bash +curl http://127.0.0.1:32550/api/v1/health +``` + +应返回: + +```json +{"status":"OK"} +``` + +> 首次加载模型会较慢(几十秒到数分钟),日志出现模型初始化完成后再测试前端。 + +--- + +## 8. 本地电脑访问(SSH 端口转发) + +你已能连通 SSH 隧道,只需基于当前端口做服务转发。 + +在本地 PowerShell 或 WSL 新开一个终端: + +```bash +ssh -N -p $SSH_PORT \ + -L 3000:127.0.0.1:3000 \ + -L 3443:127.0.0.1:3443 \ + -L 32550:127.0.0.1:32550 \ + $SSH_USER@$SSH_HOST +``` + +保持该终端不断开。然后在本地浏览器访问: + +- 前端页面(HTTP): +- 前端页面(HTTPS):(自签名证书,需点击"继续前往") +- 后端健康检查: + +> 浏览器会请求摄像头/麦克风权限,点击允许。本地用 `localhost` 访问时 HTTP 即可获取摄像头权限。 + +--- + +## 9. 手机端访问(全双工视频通话) + +### 9.1 问题与原理 + +手机浏览器(Chrome/Safari)要调用摄像头和麦克风,**必须使用 HTTPS**(`localhost` 例外,但手机并非 localhost)。 + +方案:**笔记本做中继** — 手机 → 笔记本 WiFi 局域网 IP → SSH 隧道 → 服务器。 + +```text +手机浏览器 ──WiFi──▶ 笔记本:3443 ──SSH隧道──▶ H100:3443 ──Nginx──▶ 后端:32550 + (HTTPS) (绑定 0.0.0.0) +``` + +### 9.2 操作步骤 + +#### Step 1:建立"全接口绑定"的 SSH 隧道 + +```bash +ssh -N -p $SSH_PORT \ + -L 0.0.0.0:3443:127.0.0.1:3443 \ + $SSH_USER@$SSH_HOST +``` + +> 关键区别:`0.0.0.0:3443` 让笔记本的所有网卡都监听 3443 端口,同一 WiFi 的手机才能连入。 + +#### Step 2:查看笔记本局域网 IP + +PowerShell 中执行: + +```powershell +ipconfig | Select-String "IPv4" +``` + +假设得到 `192.168.1.100`。 + +#### Step 3:Windows 防火墙放行端口 + +PowerShell(管理员)执行: + +```powershell +New-NetFirewallRule -DisplayName "MiniCPMo HTTPS" -Direction Inbound -LocalPort 3443 -Protocol TCP -Action Allow +``` + +#### Step 4:手机浏览器访问 + +确保手机与笔记本连同一 WiFi,然后在手机浏览器输入: + +```text +https://192.168.1.100:3443 +``` + +- **首次访问**会提示"不安全连接"(自签名证书),选择 **「高级」→「继续前往」** +- 接着浏览器会请求摄像头/麦克风权限,**允许**即可 +- 进入视频通话页面,开始全双工对话 + +### 9.3 iOS Safari 注意事项 + +iOS Safari 对自签名证书更严格。如果无法通过上述方式跳过: + +1. 在手机上用 Safari 打开 `https://192.168.1.100:3443/certs/server.crt`(若你配置了证书下载路径),下载安装证书 +2. 或者将 `server.crt` 通过 AirDrop / 微信发送到手机,在 **设置 → 通用 → 描述文件 → 安装** +3. 再到 **设置 → 通用 → 关于本机 → 证书信任设置 → 启用完全信任** + +之后 Safari 访问 `https://192.168.1.100:3443` 即可正常使用。 + +--- + +## 10. 常见问题与排查 + +### 10.1 前端能打开,但无法对话 + +检查后端日志: + +```bash +docker logs --tail 200 minicpmo-backend +``` + +重点看: + +- 模型路径是否存在:`/models/MiniCPM-o-4_5` +- 显存是否足够(H100 通常充足) +- 是否出现 `trust_remote_code` / 依赖版本错误 + +### 10.2 容器内 GPU 不可见 + +```bash +docker exec -it minicpmo-backend nvidia-smi +``` + +若失败,优先检查 NVIDIA Container Toolkit 与 Docker daemon 配置。 + +### 10.3 WebSocket / SSE 异常 + +本项目已在 `nginx.docker.conf` 关闭缓冲并配置了 websocket upgrade。 +若仍异常,检查公司内网网关是否拦截长连接。 + +### 10.4 模型启动太慢 + +首次启动可能较慢;后续会快很多。可先看: + +```bash +nvidia-smi +docker logs -f minicpmo-backend +``` + +--- + +## 11. 你下一步可以做的优化(可选) + +1. 将后端镜像改为“离线 wheel 安装模式”,彻底避免服务器 pip 联网需求。 +2. 使用私有镜像仓库(Harbor)替代 tar 包传输。 +3. 用 systemd 或 cron 做容器自动拉起与日志轮转。 +4. 替换自签名证书为企业 CA 签发的证书,手机端免手动信任。 + +--- + +## 12. 一键启动命令速查 + +### H100 侧(假设文件已上传) + +```bash +cd /deploy_pkg + +docker load -i minicpmo-backend_latest.tar +docker load -i minicpmo-frontend_latest.tar + +mkdir -p /runtime/certs +cp docker-compose.yml /runtime/ +cp certs/server.* /runtime/certs/ + +cd /runtime +export MODEL_PATH=/models/MiniCPM-o-4_5 +export CERTS_PATH=./certs +export BACKEND_PORT=32550 +if docker compose version >/dev/null 2>&1; then + COMPOSE_CMD="docker compose" +elif command -v docker-compose >/dev/null 2>&1; then + COMPOSE_CMD="docker-compose" +else + echo "未找到 Compose,请先安装 docker-compose 或 docker compose 插件" && exit 1 +fi + +$COMPOSE_CMD -f docker-compose.yml up -d +``` + +### 本地电脑(开隧道) + +```bash +ssh -N -p $SSH_PORT -L 3000:127.0.0.1:3000 -L 3443:127.0.0.1:3443 -L 32550:127.0.0.1:32550 $SSH_USER@$SSH_HOST +``` + +PowerShell 版本: + +```powershell +ssh -N -p $env:SSH_PORT -L 3000:127.0.0.1:3000 -L 3443:127.0.0.1:3443 -L 32550:127.0.0.1:32550 "$env:SSH_USER@$env:SSH_HOST" +``` + +本地电脑打开: + +### 手机端(通过笔记本中转) + +```bash +# 笔记本绑定所有网卡 +ssh -N -p $SSH_PORT -L 0.0.0.0:3443:127.0.0.1:3443 $SSH_USER@$SSH_HOST +``` + +手机浏览器打开:`https://:3443` diff --git a/deploy/Dockerfile.backend b/deploy/Dockerfile.backend new file mode 100644 index 00000000..b4291181 --- /dev/null +++ b/deploy/Dockerfile.backend @@ -0,0 +1,71 @@ +# ============================================ +# MiniCPM-o 4.5 Backend Inference Service Dockerfile +# Base image: NVIDIA CUDA 12.8 + Ubuntu 22.04 +# ============================================ +FROM nvidia/cuda:12.8.1-devel-ubuntu22.04 + +# Avoid interactive prompts +ENV DEBIAN_FRONTEND=noninteractive +ENV PYTHONUNBUFFERED=1 + +# ---- System dependencies ---- +RUN apt-get update && apt-get install -y \ + python3.10 \ + python3.10-dev \ + python3-pip \ + ffmpeg \ + libsndfile1 \ + libsndfile1-dev \ + git \ + wget \ + curl \ + && rm -rf /var/lib/apt/lists/* + +# Set python3.10 as default +RUN ln -sf /usr/bin/python3.10 /usr/bin/python3 && \ + ln -sf /usr/bin/python3 /usr/bin/python && \ + python3 -m pip install --upgrade pip setuptools wheel + +# ---- PyTorch (CUDA 12.4) ---- +RUN pip install --no-cache-dir \ + "torch>=2.3.0,<=2.8.0" \ + "torchaudio<=2.8.0" \ + --index-url https://download.pytorch.org/whl/cu124 + +# ---- MiniCPM-o core dependencies ---- +RUN pip install --no-cache-dir \ + "transformers==4.51.0" \ + accelerate \ + "minicpmo-utils[all]>=1.0.5" \ + librosa \ + soundfile \ + onnxruntime \ + sentencepiece \ + Pillow \ + numpy + +# ---- Web service dependencies ---- +RUN pip install --no-cache-dir \ + fastapi \ + uvicorn \ + aiofiles \ + pydantic + +# ---- Working directory ---- +WORKDIR /app + +# ---- Copy backend code ---- +COPY web_demos/minicpm-o_2.6/model_server.py /app/ +COPY web_demos/minicpm-o_2.6/vad_utils.py /app/ +COPY web_demos/minicpm-o_2.6/silero_vad.onnx /app/ + +# ---- Copy TTS reference audios ---- +COPY assets/ref_audios/ /app/assets/ref_audios/ + +# ---- Expose port ---- +EXPOSE 32550 + +# ---- Startup command ---- +# Model path is mounted to /models/MiniCPM-o-4_5 via volume +ENV BACKEND_PORT=32550 +CMD ["sh", "-lc", "python3 model_server.py --model /models/MiniCPM-o-4_5 --port ${BACKEND_PORT}"] diff --git a/deploy/Dockerfile.frontend b/deploy/Dockerfile.frontend new file mode 100644 index 00000000..5fc22e8f --- /dev/null +++ b/deploy/Dockerfile.frontend @@ -0,0 +1,36 @@ +# ============================================ +# MiniCPM-o 4.5 Frontend Web Service Dockerfile +# Multi-stage build: Node.js build + Nginx deployment +# ============================================ + +# ---- Stage 1: Build Vue project ---- +FROM node:20-alpine AS build-stage + +WORKDIR /build +COPY web_demos/minicpm-o_2.6/web_server/ /build/ + +# Install pnpm and build +# Generate placeholder certificate files (vite.config.js server.https is also parsed during build) +RUN npm install -g pnpm && \ + touch key.pem cert.pem && \ + pnpm install && \ + pnpm run build + +# ---- Stage 2: Nginx static service ---- +FROM nginx:alpine AS production-stage + +# envsubst is used to render nginx config template at container startup +RUN apk add --no-cache gettext + +# Copy build artifacts +COPY --from=build-stage /build/dist /usr/share/nginx/html + +# Copy custom nginx config template (Docker network version) +COPY deploy/nginx.docker.conf /etc/nginx/nginx.conf.template + +# Render nginx config with BACKEND_PORT at startup +ENV BACKEND_PORT=32550 + +EXPOSE 3000 3443 + +CMD ["sh", "-lc", "envsubst '$$BACKEND_PORT' < /etc/nginx/nginx.conf.template > /etc/nginx/nginx.conf && nginx -g 'daemon off;'"] diff --git a/deploy/docker-compose.yml b/deploy/docker-compose.yml new file mode 100644 index 00000000..60141e28 --- /dev/null +++ b/deploy/docker-compose.yml @@ -0,0 +1,64 @@ +# ============================================ +# MiniCPM-o 4.5 Docker Compose Deployment Configuration +# ============================================ +# Usage: +# docker compose -f deploy/docker-compose.yml up -d +# +# Prerequisites: +# 1. NVIDIA Container Toolkit is installed on the server +# 2. Model weights are placed in the ${MODEL_PATH} directory +# ============================================ + +services: + # ---- Backend Inference Service (GPU) ---- + model-backend: + image: minicpmo-backend:latest + container_name: minicpmo-backend + restart: unless-stopped + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 1 + capabilities: [gpu] + volumes: + # Mount model weights directory (host path → container path) + - ${MODEL_PATH:-}:/models/MiniCPM-o-4_5:ro + environment: + - BACKEND_PORT=${BACKEND_PORT:-32550} + ports: + - "${BACKEND_PORT:-32550}:${BACKEND_PORT:-32550}" + # Note: BACKEND_PORT is the application listening port (default 32550), + # which is different from the external SSH temporary tunnel port. + healthcheck: + test: ["CMD-SHELL", "curl -f http://localhost:${BACKEND_PORT:-32550}/api/v1/health || exit 1"] + interval: 30s + timeout: 10s + retries: 5 + start_period: 120s # Model loading may take a long time + networks: + - minicpmo-net + + # ---- Frontend Web Service (Nginx) ---- + web-frontend: + image: minicpmo-frontend:latest + container_name: minicpmo-frontend + restart: unless-stopped + ports: + - "3000:3000" + - "3443:3443" # HTTPS (for mobile access) + volumes: + # Mount SSL certificate directory + - ${CERTS_PATH:-}:/etc/nginx/certs:ro + environment: + - BACKEND_PORT=${BACKEND_PORT:-32550} + depends_on: + model-backend: + condition: service_started + networks: + - minicpmo-net + +networks: + minicpmo-net: + driver: bridge diff --git a/deploy/gen_ssl_cert.sh b/deploy/gen_ssl_cert.sh new file mode 100644 index 00000000..ac2e5d3b --- /dev/null +++ b/deploy/gen_ssl_cert.sh @@ -0,0 +1,23 @@ +#!/bin/bash +# ============================================ +# Generate self-signed SSL certificate (for Nginx HTTPS + mobile access) +# Usage: bash deploy/gen_ssl_cert.sh [output directory] +# ============================================ +set -e + +OUT_DIR="${1:-}" +mkdir -p "$OUT_DIR" + +echo ">>> Generating self-signed SSL certificate to $OUT_DIR ..." +openssl req -x509 -nodes -days 3650 \ + -newkey rsa:2048 \ + -keyout "$OUT_DIR/server.key" \ + -out "$OUT_DIR/server.crt" \ + -subj "/C=CN/ST=Local/L=Local/O=MiniCPMo/OU=Dev/CN=" \ + -addext "subjectAltName=IP:,IP:,DNS:" + +echo ">>> Certificate generated:" +ls -lh "$OUT_DIR"/server.* +echo "" +echo ">>> Tip: After uploading the entire $OUT_DIR directory to the server," +echo " create a certs/ directory next to docker-compose.yml and put server.crt + server.key inside" diff --git a/deploy/nginx.docker.conf b/deploy/nginx.docker.conf new file mode 100644 index 00000000..a5832d9c --- /dev/null +++ b/deploy/nginx.docker.conf @@ -0,0 +1,122 @@ +user root; +worker_processes auto; +pid /run/nginx.pid; + +events { + worker_connections 768; +} + +http { + # ---- Basic Settings ---- + client_max_body_size 20M; + sendfile on; + tcp_nopush on; + tcp_nodelay on; + keepalive_timeout 65; + types_hash_max_size 2048; + + include /etc/nginx/mime.types; + default_type application/octet-stream; + + # ---- Logs ---- + access_log /var/log/nginx/access.log; + error_log /var/log/nginx/error.log; + + # ---- Gzip Compression ---- + gzip on; + + # ---- Virtual Host (HTTP, Local Access) ---- + server { + listen 3000; + server_name _; + + add_header Access-Control-Allow-Origin *; + add_header Access-Control-Allow-Headers X-Requested-With; + add_header Access-Control-Allow-Methods GET,POST,OPTIONS; + + # Backend API requests → Forward to backend container (Docker service name: model-backend) + location /api/v1 { + proxy_pass http://model-backend:${BACKEND_PORT}; + proxy_set_header Host $host; + proxy_set_header Connection ""; + chunked_transfer_encoding off; + proxy_set_header X-Accel-Buffering off; + add_header X-Accel-Buffering off; + proxy_http_version 1.1; + # Disable buffering (required for SSE streaming responses) + proxy_buffering off; + proxy_cache off; + sendfile off; + tcp_nodelay on; + } + + # WebSocket requests → Forward to backend container + location /ws { + proxy_pass http://model-backend:${BACKEND_PORT}; + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection 'upgrade'; + proxy_set_header Host $host; + proxy_cache_bypass $http_upgrade; + } + + # Frontend static files + location / { + root /usr/share/nginx/html; + index index.html index.htm; + try_files $uri $uri/ /index.html; + } + + location @router { + rewrite ^.*$ /index.html last; + } + } + + # ---- Virtual Host (HTTPS, Mobile Access) ---- + server { + listen 3443 ssl; + server_name localhost; + + ssl_certificate /etc/nginx/certs/server.crt; + ssl_certificate_key /etc/nginx/certs/server.key; + ssl_protocols TLSv1.2 TLSv1.3; + ssl_ciphers HIGH:!aNULL:!MD5; + + add_header Access-Control-Allow-Origin *; + add_header Access-Control-Allow-Headers X-Requested-With; + add_header Access-Control-Allow-Methods GET,POST,OPTIONS; + + location /api/v1 { + proxy_pass http://model-backend:${BACKEND_PORT}; + proxy_set_header Host $host; + proxy_set_header Connection ""; + chunked_transfer_encoding off; + proxy_set_header X-Accel-Buffering off; + add_header X-Accel-Buffering off; + proxy_http_version 1.1; + proxy_buffering off; + proxy_cache off; + sendfile off; + tcp_nodelay on; + } + + location /ws { + proxy_pass http://model-backend:${BACKEND_PORT}; + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection 'upgrade'; + proxy_set_header Host $host; + proxy_cache_bypass $http_upgrade; + } + + location / { + root /usr/share/nginx/html; + index index.html index.htm; + try_files $uri $uri/ /index.html; + } + + location @router { + rewrite ^.*$ /index.html last; + } + } +} diff --git a/deploy/requirements.backend.txt b/deploy/requirements.backend.txt new file mode 100644 index 00000000..60648e1b --- /dev/null +++ b/deploy/requirements.backend.txt @@ -0,0 +1,29 @@ +# ============================================ +# MiniCPM-o 4.5 Backend Python Requirements +# For offline environments: pip download / pip install +# ============================================ + +# == PyTorch (CUDA 12.4) == +# Note: PyTorch should be downloaded separately from https://download.pytorch.org/whl/cu124 +# torch>=2.3.0,<=2.8.0 +# torchaudio<=2.8.0 + +# == Core Model Dependencies == +transformers==4.51.0 +accelerate +minicpmo-utils[all]>=1.0.5 +sentencepiece + +# == Audio/Video Processing == +librosa +soundfile +onnxruntime +Pillow +numpy + +# == Web Service == +fastapi +uvicorn[standard] +aiofiles +pydantic +httpx diff --git a/web_demos/minicpm-o_2.6/miniCPM2.6-CxDaeLI9.svg.bak b/web_demos/minicpm-o_2.6/miniCPM2.6-CxDaeLI9.svg.bak new file mode 100644 index 00000000..9a4285cf Binary files /dev/null and b/web_demos/minicpm-o_2.6/miniCPM2.6-CxDaeLI9.svg.bak differ diff --git a/web_demos/minicpm-o_2.6/miniCPM4.5.svg b/web_demos/minicpm-o_2.6/miniCPM4.5.svg new file mode 100644 index 00000000..dbb24656 --- /dev/null +++ b/web_demos/minicpm-o_2.6/miniCPM4.5.svg @@ -0,0 +1,28 @@ + + + 编组 5 + + + + + + + + + + + + + + + + + + + 4.5 + 4.5 + + + + + \ No newline at end of file diff --git a/web_demos/minicpm-o_2.6/model_server.py b/web_demos/minicpm-o_2.6/model_server.py index d9e86bdb..7e551b2d 100644 --- a/web_demos/minicpm-o_2.6/model_server.py +++ b/web_demos/minicpm-o_2.6/model_server.py @@ -91,7 +91,7 @@ def __init__(self): self.device='cuda:0' self.minicpmo_model_path = args.model #"openbmb/MiniCPM-o-2_6" - self.model_version = "2.6" + self.model_version = "4.5" with torch.no_grad(): self.minicpmo_model = AutoModel.from_pretrained(self.minicpmo_model_path, trust_remote_code=True, torch_dtype=self.target_dtype, attn_implementation='sdpa') self.minicpmo_tokenizer = AutoTokenizer.from_pretrained(self.minicpmo_model_path, trust_remote_code=True) @@ -103,6 +103,10 @@ def __init__(self): self.ref_path_default = "assets/ref_audios/default.wav" self.ref_path_female = "assets/ref_audios/female_example.wav" self.ref_path_male = "assets/ref_audios/male_example.wav" + self.tts_sample_rate = 24000 # 4.5 uses 24kHz (s3tokenizer) + + # 4.5: init token2wav cache with default ref audio for streaming TTS + self._init_token2wav_with_ref(self.ref_path_default) self.input_audio_id = 0 self.input_audio_vad_id = 0 @@ -119,7 +123,7 @@ def __init__(self): self.msg_type = 1 self.speaking_time_stamp = 0 - self.cycle_wait_time = 12800/24000 + 0.15 + self.cycle_wait_time = 25 * 0.04 + 0.15 # 4.5: 25 audio tokens/chunk, each ~0.04s self.extra_wait_time = 2.5 self.server_wait = True @@ -203,69 +207,77 @@ def no_active_stream(self): return True return False + def _init_token2wav_with_ref(self, ref_path): + """Initialize token2wav cache with a reference audio for streaming TTS (4.5 API).""" + try: + ref_audio, _ = librosa.load(ref_path, sr=16000, mono=True) + with torch.no_grad(): + self.minicpmo_model.init_token2wav_cache(ref_audio) + logger.info(f"init_token2wav_cache done with ref: {ref_path}") + except Exception as e: + logger.error(f"init_token2wav_cache failed: {e}") + def sys_prompt_init(self, msg_type): if self.past_session_id == self.session_id: return logger.info("### sys_prompt_init ###") logger.info(f'msg_type is {msg_type}') - if msg_type <= 1: #audio - audio_voice_clone_prompt = "Use the voice in the audio prompt to synthesize new content." - audio_assistant_prompt = "You are a helpful assistant with the above voice style." - ref_path = self.ref_path_default - - if self.customized_options is not None: - audio_voice_clone_prompt = self.customized_options['voice_clone_prompt'] - audio_assistant_prompt = self.customized_options['assistant_prompt'] - if self.customized_options['use_audio_prompt'] == 1: - ref_path = self.ref_path_default - elif self.customized_options['use_audio_prompt'] == 2: - ref_path = self.ref_path_female - elif self.customized_options['use_audio_prompt'] == 3: - ref_path = self.ref_path_male - - audio_prompt, sr = librosa.load(ref_path, sr=16000, mono=True) - sys_msg = {'role': 'user', 'content': [audio_voice_clone_prompt + "\n", audio_prompt, "\n" + audio_assistant_prompt]} - elif msg_type == 2: #video - voice_clone_prompt="你是一个AI助手。你能接受视频,音频和文本输入并输出语音和文本。模仿输入音频中的声音特征。" - assistant_prompt="作为助手,你将使用这种声音风格说话。" + # Determine ref audio path + ref_path = self.ref_path_default + language = "en" + if msg_type == 2: # video ref_path = self.ref_path_video_default - - if self.customized_options is not None: - voice_clone_prompt = self.customized_options['voice_clone_prompt'] - assistant_prompt = self.customized_options['assistant_prompt'] - if self.customized_options['use_audio_prompt'] == 1: - ref_path = self.ref_path_default - elif self.customized_options['use_audio_prompt'] == 2: - ref_path = self.ref_path_female - elif self.customized_options['use_audio_prompt'] == 3: - ref_path = self.ref_path_male - - audio_prompt, sr = librosa.load(ref_path, sr=16000, mono=True) - sys_msg = {'role': 'user', 'content': [voice_clone_prompt, audio_prompt, assistant_prompt]} - # elif msg_type == 3: #user start - # assistant_prompt="作为助手,你将使用这种声音风格说话。" - # if self.customized_options is not None: - # assistant_prompt = self.customized_options['assistant_prompt'] - - # sys_msg = {'role': 'user', 'content': [assistant_prompt]} - + language = "zh" + + if self.customized_options is not None: + if self.customized_options.get('use_audio_prompt') == 1: + ref_path = self.ref_path_default + elif self.customized_options.get('use_audio_prompt') == 2: + ref_path = self.ref_path_female + elif self.customized_options.get('use_audio_prompt') == 3: + ref_path = self.ref_path_male + + # 4.5 API: use model.get_sys_prompt() to build system message + ref_audio, _ = librosa.load(ref_path, sr=16000, mono=True) + sys_msg = self.minicpmo_model.get_sys_prompt( + ref_audio=ref_audio, + mode="omni", + language=language, + ) + + # Re-init token2wav cache with the selected ref audio + self._init_token2wav_with_ref(ref_path) + self.msg_type = msg_type msgs = [sys_msg] - if self.customized_options is not None: - if self.customized_options['use_audio_prompt'] > 0: + + def safe_streaming_prefill(prompt_msgs): + try: self.minicpmo_model.streaming_prefill( session_id=str(self.session_id), - msgs=msgs, + msgs=prompt_msgs, tokenizer=self.minicpmo_tokenizer, + use_tts_template=True, ) + return True + except Exception as e: + logger.warning(f"streaming_prefill failed with audio prompt, fallback to text-only prompt: {e}") + fallback_msg = self.minicpmo_model.get_sys_prompt(ref_audio=None, mode="omni", language=language) + self.minicpmo_model.streaming_prefill( + session_id=str(self.session_id), + msgs=[fallback_msg], + tokenizer=self.minicpmo_tokenizer, + use_tts_template=True, + ) + return False + + if self.customized_options is not None: + if self.customized_options.get('use_audio_prompt', 0) > 0: + safe_streaming_prefill(msgs) if msg_type == 0: - self.minicpmo_model.streaming_prefill( - session_id=str(self.session_id), - msgs=msgs, - tokenizer=self.minicpmo_tokenizer, - ) + safe_streaming_prefill(msgs) self.savedir = os.path.join(f"./log_data/{args.port}/", str(time.time())) if not os.path.exists(self.savedir): @@ -297,7 +309,15 @@ def clear(self): self.audio_input = [] self.image_prefill = None - if self.minicpmo_model.llm_past_key_values[0][0].shape[2]>8192: + kv = self.minicpmo_model.llm_past_key_values + kv_len = 0 + if kv is not None: + if hasattr(kv, 'get_seq_length'): + kv_len = kv.get_seq_length() + elif isinstance(kv, (list, tuple)) and len(kv) > 0: + if isinstance(kv[0], (list, tuple)) and len(kv[0]) > 0: + kv_len = kv[0][0].shape[2] + if kv_len > 8192: self.session_id += 1 # to clear all kv cache self.sys_prompt_flag = False @@ -468,6 +488,8 @@ def prefill(self, audio, image, is_end): msgs=msgs, tokenizer=self.minicpmo_tokenizer, max_slice_nums=slice_nums, + use_tts_template=True, + is_last_chunk=(is_end), ) self.input_audio_id += 1 @@ -504,49 +526,69 @@ async def generate(self): with open(input_audio_path, 'rb') as wav_file: audio_stream = wav_file.read() except FileNotFoundError: - print(f"File {input_audio_path} not found.") + logger.warning(f"File {input_audio_path} not found.") yield base64.b64encode(audio_stream).decode('utf-8'), "assistant:\n" - print('=== gen start: ', time.time() - time_gen) - first_time = True - temp_time = time.time() - temp_time1 = time.time() + logger.info(f'=== gen start: {time.time() - time_gen:.3f}s ===') with torch.inference_mode(): if self.stop_response: self.generate_end() return self.minicpmo_model.config.stream_input=True - msg = {"role":"user", "content": self.cnts} - msgs = [msg] text = '' self.speaking_time_stamp = time.time() + sr = self.tts_sample_rate # 4.5 fixed 24kHz try: - for r in self.minicpmo_model.streaming_generate( + for result in self.minicpmo_model.streaming_generate( session_id=str(self.session_id), tokenizer=self.minicpmo_tokenizer, generate_audio=True, - # enable_regenerate=True, + use_tts_template=True, + do_sample=True, ): if self.stop_response: self.generate_end() return - audio_np, sr, text = r["audio_wav"], r["sampling_rate"], r["text"] - - output_audio_path = self.savedir + f'/output_audio_log/output_audio_{self.output_audio_id}.wav' - self.output_audio_id += 1 - soundfile.write(output_audio_path, audio_np, samplerate=sr) - audio_stream = None - try: - with open(output_audio_path, 'rb') as wav_file: - audio_stream = wav_file.read() - except FileNotFoundError: - print(f"File {output_audio_path} not found.") - temp_time1 = time.time() - print('text: ', text) - yield base64.b64encode(audio_stream).decode('utf-8'), text + # 4.5 API: yields (waveform_chunk: Tensor, text_chunk: str) + # End signal: (None, None) + if isinstance(result, tuple): + waveform_chunk, text_chunk = result + else: + # fallback for unexpected format + logger.warning(f"Unexpected streaming_generate result type: {type(result)}") + continue + + if waveform_chunk is None: + # generation complete signal + break + + # Convert tensor to numpy, ensure 1D float32 + if isinstance(waveform_chunk, torch.Tensor): + audio_np = waveform_chunk.cpu().float().numpy() + else: + audio_np = np.array(waveform_chunk, dtype=np.float32) + audio_np = audio_np.squeeze() # remove batch dims + if audio_np.ndim == 0 or audio_np.size == 0: + continue # skip empty chunks + + # Resample from model's 24kHz to frontend's expected 16kHz + audio_np = librosa.resample(audio_np, orig_sr=sr, target_sr=16000) + + if text_chunk: + text += text_chunk + + # Encode audio chunk to WAV in memory (no disk I/O) + audio_buffer = io.BytesIO() + soundfile.write(audio_buffer, audio_np, samplerate=16000, format='WAV', subtype='PCM_16') + audio_stream = audio_buffer.getvalue() + + # Send delta text (text_chunk), not accumulated text + yield base64.b64encode(audio_stream).decode('utf-8'), text_chunk if text_chunk else '' self.speaking_time_stamp += self.cycle_wait_time except Exception as e: logger.error(f"Error happened during generation: {str(e)}") + import traceback + traceback.print_exc() yield None, '\n' except Exception as e: @@ -582,8 +624,7 @@ def upload_customized_audio(self, audio_data, audio_fmt): output_audio_path = self.savedir + f'/customized_audio.wav' soundfile.write(output_audio_path, audio_np, sr) self.customized_audio = output_audio_path - logger.info(f"processed customized {audio_fmt} audio") - print(audio_np.shape, type(audio_np), sr) + logger.info(f"processed customized {audio_fmt} audio, shape={audio_np.shape}, sr={sr}") else: logger.info(f"empty customized audio, use default value instead.") self.customized_audio = None @@ -734,14 +775,14 @@ async def websocket_stream(websocket: WebSocket, async def generate_sse_response(request: Request, uid: Optional[str] = Header(None)): global stream_manager - print(f"uid: {uid}") + logger.info(f"uid: {uid}") try: # Wait for streaming to complete or timeout while not stream_manager.is_streaming_complete.is_set(): # if stream_manager.is_timed_out(): # yield f"data: {json.dumps({'error': 'Stream timeout'})}\n\n" # return - # print(f"{uid} whille not stream_manager.is_streaming_complete.is_set(), asyncio.sleep(0.1)") + await asyncio.sleep(0.1) logger.info("streaming complete\n") @@ -912,7 +953,7 @@ async def init_options(request: Request, uid: Optional[str] = Header(None)): ctype = content["type"] raise HTTPException(status_code=400, detail=f"Invalid content type: {ctype}") version = stream_manager.model_version - print(version) + logger.info(f"Model version: {version}") response = { "id": uid, "choices": {