diff --git a/.current-upstream-version b/.current-upstream-version new file mode 100644 index 000000000..acf24e50e --- /dev/null +++ b/.current-upstream-version @@ -0,0 +1 @@ +v0.3.47 diff --git a/.github/workflows/sync-build.yml b/.github/workflows/sync-build.yml new file mode 100644 index 000000000..66b59d72b --- /dev/null +++ b/.github/workflows/sync-build.yml @@ -0,0 +1,104 @@ +name: Sync upstream & build custom image + +on: + schedule: + - cron: '0 */6 * * *' # ogni 6 ore + workflow_dispatch: # trigger manuale + push: + branches: [main] # rebuild ad ogni push su main + +jobs: + sync-and-build: + runs-on: ubuntu-latest + steps: + - name: Checkout fork + uses: actions/checkout@v4 + with: + fetch-depth: 0 + token: ${{ secrets.PAT_TOKEN }} + + - name: Notify Telegram (start) + run: | + curl -s -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \ + -d chat_id="${{ secrets.TELEGRAM_CHAT_ID }}" \ + -d parse_mode="Markdown" \ + -d text="🚀 *OpenFang Build Started*%0A%0ATrigger: \`${{ github.event_name }}\`%0ACommit: \`${GITHUB_SHA::7}\`%0A[View run](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }})" + + - name: Fetch upstream tags + if: github.event_name != 'push' + run: | + git remote add upstream https://github.com/RightNow-AI/openfang.git || true + git fetch upstream --tags + + - name: Check for new release + id: check + if: github.event_name != 'push' + run: | + LATEST_TAG=$(git tag -l 'v*' --sort=-v:refname | head -1) + CURRENT=$(cat .current-upstream-version 2>/dev/null || echo "none") + echo "latest=$LATEST_TAG" >> "$GITHUB_OUTPUT" + echo "current=$CURRENT" >> "$GITHUB_OUTPUT" + if [ "$LATEST_TAG" != "$CURRENT" ]; then + echo "new_release=true" >> "$GITHUB_OUTPUT" + else + echo "new_release=false" >> "$GITHUB_OUTPUT" + fi + + - name: Rebase on latest tag + if: github.event_name != 'push' && steps.check.outputs.new_release == 'true' + run: | + git config user.name "github-actions" + git config user.email "actions@github.com" + GIT_SEQUENCE_EDITOR=true git rebase ${{ steps.check.outputs.latest }} || while git rebase --skip 2>/dev/null; do :; done + echo "${{ steps.check.outputs.latest }}" > .current-upstream-version + git add .current-upstream-version + git commit -m "chore: sync to upstream ${{ steps.check.outputs.latest }}" || true + git push --force + + - name: Set up Docker Buildx + if: github.event_name == 'push' || steps.check.outputs.new_release == 'true' + uses: docker/setup-buildx-action@v3 + + - name: Login to Docker Hub + if: github.event_name == 'push' || steps.check.outputs.new_release == 'true' + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Build and push + if: github.event_name == 'push' || steps.check.outputs.new_release == 'true' + uses: docker/build-push-action@v6 + with: + context: . + push: true + tags: | + fliva/openfang:latest + fliva/openfang:${{ steps.check.outputs.latest || 'custom' }} + cache-from: type=gha + cache-to: type=gha,mode=max + + - name: Update Docker Hub description + if: github.event_name == 'push' || steps.check.outputs.new_release == 'true' + uses: peter-evans/dockerhub-description@v4 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + repository: fliva/openfang + readme-filepath: ./DOCKER_README.md + + - name: Notify Telegram (success) + if: success() && (github.event_name == 'push' || steps.check.outputs.new_release == 'true') + run: | + curl -s -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \ + -d chat_id="${{ secrets.TELEGRAM_CHAT_ID }}" \ + -d parse_mode="Markdown" \ + -d text="✅ *OpenFang Build OK*%0A%0ATag: \`${{ steps.check.outputs.latest || 'custom' }}\`%0ACommit: \`${GITHUB_SHA::7}\`%0A[View run](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }})" + + - name: Notify Telegram (failure) + if: failure() + run: | + curl -s -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \ + -d chat_id="${{ secrets.TELEGRAM_CHAT_ID }}" \ + -d parse_mode="Markdown" \ + -d text="❌ *OpenFang Build FAILED*%0A%0ACommit: \`${GITHUB_SHA::7}\`%0A[View run](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }})" diff --git a/Cargo.lock b/Cargo.lock index 73ba5baf3..6c7c32eb5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3792,7 +3792,7 @@ dependencies = [ [[package]] name = "openfang-api" -version = "0.3.46" +version = "0.3.47" dependencies = [ "async-trait", "axum", @@ -3829,7 +3829,7 @@ dependencies = [ [[package]] name = "openfang-channels" -version = "0.3.46" +version = "0.3.47" dependencies = [ "async-trait", "axum", @@ -3861,7 +3861,7 @@ dependencies = [ [[package]] name = "openfang-cli" -version = "0.3.46" +version = "0.3.47" dependencies = [ "clap", "clap_complete", @@ -3888,7 +3888,7 @@ dependencies = [ [[package]] name = "openfang-desktop" -version = "0.3.46" +version = "0.3.47" dependencies = [ "axum", "open", @@ -3914,7 +3914,7 @@ dependencies = [ [[package]] name = "openfang-extensions" -version = "0.3.46" +version = "0.3.47" dependencies = [ "aes-gcm", "argon2", @@ -3942,7 +3942,7 @@ dependencies = [ [[package]] name = "openfang-hands" -version = "0.3.46" +version = "0.3.47" dependencies = [ "chrono", "dashmap", @@ -3959,7 +3959,7 @@ dependencies = [ [[package]] name = "openfang-kernel" -version = "0.3.46" +version = "0.3.47" dependencies = [ "async-trait", "chrono", @@ -3996,7 +3996,7 @@ dependencies = [ [[package]] name = "openfang-memory" -version = "0.3.46" +version = "0.3.47" dependencies = [ "async-trait", "chrono", @@ -4015,7 +4015,7 @@ dependencies = [ [[package]] name = "openfang-migrate" -version = "0.3.46" +version = "0.3.47" dependencies = [ "chrono", "dirs 6.0.0", @@ -4034,7 +4034,7 @@ dependencies = [ [[package]] name = "openfang-runtime" -version = "0.3.46" +version = "0.3.47" dependencies = [ "anyhow", "async-trait", @@ -4068,7 +4068,7 @@ dependencies = [ [[package]] name = "openfang-skills" -version = "0.3.46" +version = "0.3.47" dependencies = [ "chrono", "hex", @@ -4091,7 +4091,7 @@ dependencies = [ [[package]] name = "openfang-types" -version = "0.3.46" +version = "0.3.47" dependencies = [ "async-trait", "chrono", @@ -4110,7 +4110,7 @@ dependencies = [ [[package]] name = "openfang-wire" -version = "0.3.46" +version = "0.3.47" dependencies = [ "async-trait", "chrono", @@ -8773,7 +8773,7 @@ checksum = "b9cc00251562a284751c9973bace760d86c0276c471b4be569fe6b068ee97a56" [[package]] name = "xtask" -version = "0.3.46" +version = "0.3.47" [[package]] name = "yoke" diff --git a/DOCKER_README.md b/DOCKER_README.md new file mode 100644 index 000000000..d3e09a054 --- /dev/null +++ b/DOCKER_README.md @@ -0,0 +1,42 @@ +# OpenFang for Lazycat NAS + +Custom [OpenFang](https://github.com/RightNow-AI/openfang) Docker image optimized for deployment on Lazycat LCMD Microserver. + +**Automatically rebuilt on every new upstream release via GitHub Actions.** + +## What's included + +- **OpenFang Agent OS** — Rust-based autonomous AI agent daemon +- **Claude Code CLI** — Anthropic's CLI for Claude, as LLM provider +- **Node.js 22** — JavaScript runtime +- **Python 3** — Python runtime +- **Go** — via Homebrew +- **Homebrew** — package manager for additional tools +- **uv** — fast Python package manager +- **gh** — GitHub CLI +- **gog** — [Google Workspace CLI](https://gogcli.sh/) (Gmail, Calendar, Drive, Sheets, etc.) +- **ffmpeg** — multimedia processing +- **jq** — JSON processor +- **git, curl, wget** — standard utilities + +## Non-root execution + +The image uses `gosu` to drop root privileges to the `openfang` user at runtime. This is required because Claude Code's `--dangerously-skip-permissions` flag refuses to run as root. + +The `openfang` user has passwordless `sudo` access, so it can still install system packages when needed. + +## Usage + +```bash +docker run -d \ + -p 4200:4200 \ + -v openfang-data:/data \ + -v openfang-home:/home/openfang \ + -e OPENFANG_HOME=/data \ + fliva/openfang:latest +``` + +## Source + +- **This fork**: [github.com/f-liva/openfang](https://github.com/f-liva/openfang) +- **Upstream**: [github.com/RightNow-AI/openfang](https://github.com/RightNow-AI/openfang) diff --git a/Dockerfile b/Dockerfile index d794943ed..b56fe096e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,11 +10,36 @@ COPY packages ./packages RUN cargo build --release --bin openfang FROM debian:bookworm-slim -RUN apt-get update && apt-get install -y ca-certificates && rm -rf /var/lib/apt/lists/* +RUN apt-get update && apt-get install -y ca-certificates curl git ffmpeg python3 python3-pip chromium gosu sudo procps build-essential jq && rm -rf /var/lib/apt/lists/* +RUN ln -s /usr/bin/python3 /usr/bin/python +RUN curl -LsSf https://astral.sh/uv/install.sh | sh +RUN (type -p wget >/dev/null || (apt-get update && apt-get install -y wget)) && \ + mkdir -p -m 755 /etc/apt/keyrings && \ + out=$(mktemp) && wget -qO "$out" https://cli.github.com/packages/githubcli-archive-keyring.gpg && \ + cat "$out" | tee /etc/apt/keyrings/githubcli-archive-keyring.gpg > /dev/null && \ + chmod go+r /etc/apt/keyrings/githubcli-archive-keyring.gpg && \ + echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | tee /etc/apt/sources.list.d/github-cli.list > /dev/null && \ + apt-get update && apt-get install -y gh && rm -rf /var/lib/apt/lists/* +RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - && \ + apt-get install -y nodejs && \ + npm install -g @anthropic-ai/claude-code @qwen-code/qwen-code && \ + rm -rf /var/lib/apt/lists/* +RUN useradd -m -s /bin/bash openfang && echo "openfang ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/openfang +USER openfang +RUN NONINTERACTIVE=1 /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" +RUN eval "$(/home/linuxbrew/.linuxbrew/bin/brew shellenv)" && brew install steipete/tap/gogcli +USER root +RUN echo 'eval "$(/home/linuxbrew/.linuxbrew/bin/brew shellenv)"' >> /home/openfang/.bashrc && \ + echo 'eval "$(/home/linuxbrew/.linuxbrew/bin/brew shellenv)"' >> /root/.bashrc && \ + echo 'export PATH="/data/npm-global/bin:$PATH"' >> /home/openfang/.bashrc && \ + echo 'export PATH="/data/npm-global/bin:$PATH"' >> /root/.bashrc COPY --from=builder /build/target/release/openfang /usr/local/bin/ COPY --from=builder /build/agents /opt/openfang/agents +RUN mkdir -p /data && chown openfang:openfang /data +COPY entrypoint.sh /usr/local/bin/entrypoint.sh +RUN chmod +x /usr/local/bin/entrypoint.sh EXPOSE 4200 VOLUME /data ENV OPENFANG_HOME=/data -ENTRYPOINT ["openfang"] +ENTRYPOINT ["entrypoint.sh"] CMD ["start"] diff --git a/crates/openfang-api/src/routes.rs b/crates/openfang-api/src/routes.rs index d659d7169..2241e93f7 100644 --- a/crates/openfang-api/src/routes.rs +++ b/crates/openfang-api/src/routes.rs @@ -358,10 +358,45 @@ pub async fn send_message( } } + // Convert metadata from the gateway into a SenderContext for the kernel + let sender_context = req.metadata.as_ref().map(|meta| { + openfang_types::message::SenderContext { + channel: meta.get("channel").and_then(|v| v.as_str().map(String::from)), + sender_id: meta.get("sender").and_then(|v| v.as_str().map(String::from)), + sender_name: meta.get("sender_name").and_then(|v| v.as_str().map(String::from)), + } + }); + + // SECURITY: Check allowed_users for channel-based messages (WhatsApp, etc.) + // If allowed_users is empty, all senders are permitted (open mode). + if let Some(ref ctx) = sender_context { + if let Some(ref sender_id) = ctx.sender_id { + if let Some(ref channel) = ctx.channel { + let channels_config = state.channels_config.read().await; + let blocked = match channel.as_str() { + "whatsapp" => channels_config.whatsapp.as_ref().map_or(false, |wa| { + !wa.allowed_users.is_empty() + && !wa.allowed_users.iter().any(|u| u == sender_id) + }), + _ => false, + }; + if blocked { + tracing::warn!( + "Rejected message from unlisted {channel} user {sender_id}" + ); + return ( + StatusCode::FORBIDDEN, + Json(serde_json::json!({"error": "Sender not in allowed_users list"})), + ); + } + } + } + } + let kernel_handle: Arc = state.kernel.clone() as Arc; match state .kernel - .send_message_with_handle(agent_id, &req.message, Some(kernel_handle)) + .send_message_with_handle_and_blocks(agent_id, &req.message, Some(kernel_handle), None, sender_context) .await { Ok(result) => { @@ -6852,6 +6887,7 @@ pub async fn set_provider_key( model: model_id, api_key_env: env_var.clone(), base_url: None, + vision_model: None, }; let mut guard = state .kernel diff --git a/crates/openfang-api/src/types.rs b/crates/openfang-api/src/types.rs index 80b71140a..b16e19199 100644 --- a/crates/openfang-api/src/types.rs +++ b/crates/openfang-api/src/types.rs @@ -42,6 +42,15 @@ pub struct MessageRequest { /// Optional file attachments (uploaded via /upload endpoint). #[serde(default)] pub attachments: Vec, + /// Optional channel metadata (sender identity, channel type). + /// + /// Used by external gateways (e.g. WhatsApp) to forward sender information + /// so the agent knows who is writing. Expected keys: + /// - `channel`: channel name (e.g. "whatsapp", "telegram") + /// - `sender`: platform-specific sender ID (e.g. phone number) + /// - `sender_name`: human-readable sender name + #[serde(default)] + pub metadata: Option>, } /// Response from sending a message. diff --git a/crates/openfang-api/static/css/layout.css b/crates/openfang-api/static/css/layout.css index 880e6ca3f..33919dac4 100644 --- a/crates/openfang-api/static/css/layout.css +++ b/crates/openfang-api/static/css/layout.css @@ -50,6 +50,11 @@ transition: opacity 0.2s, transform 0.2s; } +[data-theme="light"] .sidebar-logo img, +[data-theme="light"] .message-avatar img { + filter: invert(1); +} + .sidebar-logo img:hover { opacity: 1; transform: scale(1.05); diff --git a/crates/openfang-api/static/logo.png b/crates/openfang-api/static/logo.png index cfe72c0b5..7f900d403 100644 Binary files a/crates/openfang-api/static/logo.png and b/crates/openfang-api/static/logo.png differ diff --git a/crates/openfang-channels/src/bridge.rs b/crates/openfang-channels/src/bridge.rs index 3a8c3d68a..19d9598cf 100644 --- a/crates/openfang-channels/src/bridge.rs +++ b/crates/openfang-channels/src/bridge.rs @@ -434,6 +434,29 @@ async fn send_lifecycle_reaction( let _ = adapter.send_reaction(user, message_id, &reaction).await; } +/// Build a contextual prefix when the user is replying to a previous message. +/// +/// Returns `Some("[Replying to : ]\n\n")` if reply metadata exists. +fn build_reply_context(metadata: &std::collections::HashMap) -> Option { + // Need at least some quoted content + let quoted = metadata + .get("reply_to_text") + .and_then(|v| v.as_str()) + .or_else(|| metadata.get("reply_to_caption").and_then(|v| v.as_str())); + let quoted = quoted?; + let sender = metadata + .get("reply_to_sender") + .and_then(|v| v.as_str()) + .unwrap_or("Unknown"); + // Truncate very long quoted messages to keep context manageable + let truncated = if quoted.len() > 500 { + format!("{}…", "ed[..500]) + } else { + quoted.to_string() + }; + Some(format!("[Replying to {sender}: {truncated}]\n\n")) +} + /// Dispatch a single incoming message — handles bot commands or routes to an agent. /// /// Applies per-channel policies (DM/group filtering, rate limiting, formatting, threading). @@ -528,25 +551,41 @@ async fn dispatch_message( return; } - // For images: download, base64 encode, and send as multimodal content blocks + // For images: build content blocks with the image URL for vision models. + // We pass the original URL rather than downloading + base64-encoding because + // many providers (DashScope/Qwen, OpenAI) prefer or require direct URLs. if let ChannelContent::Image { ref url, ref caption } = message.content { - let blocks = download_image_to_blocks(url, caption.as_deref()).await; - if blocks.iter().any(|b| matches!(b, ContentBlock::Image { .. })) { - // We have actual image data — send as structured blocks for vision - dispatch_with_blocks( - blocks, - message, - handle, - router, - adapter, - ct_str, - thread_id, - output_format, - ) - .await; - return; + let mut blocks = Vec::new(); + // Prepend reply context if this is a reply to a previous message + if let Some(reply_ctx) = build_reply_context(&message.metadata) { + blocks.push(ContentBlock::Text { + text: reply_ctx, + provider_metadata: None, + }); } - // Image download failed — fall through to text description below + if let Some(cap) = caption { + if !cap.is_empty() { + blocks.push(ContentBlock::Text { + text: cap.clone(), + provider_metadata: None, + }); + } + } + blocks.push(ContentBlock::ImageUrl { + url: url.clone(), + }); + dispatch_with_blocks( + blocks, + message, + handle, + router, + adapter, + ct_str, + thread_id, + output_format, + ) + .await; + return; } let text = match &message.content { @@ -573,6 +612,12 @@ async fn dispatch_message( } }; + // Prepend reply context if this is a reply to a previous message + let text = match build_reply_context(&message.metadata) { + Some(ctx) => format!("{ctx}{text}"), + None => text, + }; + // Check if it's a slash command embedded in text (e.g. "/agents") if text.starts_with('/') { let parts: Vec<&str> = text.splitn(2, ' ').collect(); @@ -756,14 +801,22 @@ async fn dispatch_message( let _ = adapter.send_typing(&message.sender).await; // Lifecycle reaction: ⏳ Queued → 🤔 Thinking → ✅ Done / ❌ Error + let lifecycle_reactions = overrides + .as_ref() + .map(|o| o.lifecycle_reactions) + .unwrap_or(true); let msg_id = &message.platform_message_id; - send_lifecycle_reaction(adapter, &message.sender, msg_id, AgentPhase::Queued).await; - send_lifecycle_reaction(adapter, &message.sender, msg_id, AgentPhase::Thinking).await; + if lifecycle_reactions { + send_lifecycle_reaction(adapter, &message.sender, msg_id, AgentPhase::Queued).await; + send_lifecycle_reaction(adapter, &message.sender, msg_id, AgentPhase::Thinking).await; + } // Send to agent and relay response match handle.send_message(agent_id, &text).await { Ok(response) => { - send_lifecycle_reaction(adapter, &message.sender, msg_id, AgentPhase::Done).await; + if lifecycle_reactions { + send_lifecycle_reaction(adapter, &message.sender, msg_id, AgentPhase::Done).await; + } send_response(adapter, &message.sender, response, thread_id, output_format).await; handle .record_delivery(agent_id, ct_str, &message.sender.platform_id, true, None, thread_id) @@ -982,20 +1035,31 @@ async fn dispatch_with_blocks( let _ = adapter.send_typing(&message.sender).await; // Lifecycle reaction: ⏳ Queued → 🤔 Thinking → ✅ Done / ❌ Error + let lifecycle_reactions = handle + .channel_overrides(ct_str) + .await + .map(|o| o.lifecycle_reactions) + .unwrap_or(true); let msg_id = &message.platform_message_id; - send_lifecycle_reaction(adapter, &message.sender, msg_id, AgentPhase::Queued).await; - send_lifecycle_reaction(adapter, &message.sender, msg_id, AgentPhase::Thinking).await; + if lifecycle_reactions { + send_lifecycle_reaction(adapter, &message.sender, msg_id, AgentPhase::Queued).await; + send_lifecycle_reaction(adapter, &message.sender, msg_id, AgentPhase::Thinking).await; + } match handle.send_message_with_blocks(agent_id, blocks).await { Ok(response) => { - send_lifecycle_reaction(adapter, &message.sender, msg_id, AgentPhase::Done).await; + if lifecycle_reactions { + send_lifecycle_reaction(adapter, &message.sender, msg_id, AgentPhase::Done).await; + } send_response(adapter, &message.sender, response, thread_id, output_format).await; handle .record_delivery(agent_id, ct_str, &message.sender.platform_id, true, None, thread_id) .await; } Err(e) => { - send_lifecycle_reaction(adapter, &message.sender, msg_id, AgentPhase::Error).await; + if lifecycle_reactions { + send_lifecycle_reaction(adapter, &message.sender, msg_id, AgentPhase::Error).await; + } warn!("Agent error for {agent_id}: {e}"); let err_msg = format!("Agent error: {e}"); send_response( diff --git a/crates/openfang-channels/src/telegram.rs b/crates/openfang-channels/src/telegram.rs index f03829f77..a4eb1baed 100644 --- a/crates/openfang-channels/src/telegram.rs +++ b/crates/openfang-channels/src/telegram.rs @@ -825,6 +825,33 @@ async fn parse_telegram_update( } } + // Extract reply_to_message context so agents can see what the user is replying to. + if let Some(reply_msg) = message.get("reply_to_message") { + if let Some(reply_text) = reply_msg["text"].as_str() { + metadata.insert("reply_to_text".to_string(), serde_json::json!(reply_text)); + } + if let Some(reply_caption) = reply_msg["caption"].as_str() { + metadata.insert( + "reply_to_caption".to_string(), + serde_json::json!(reply_caption), + ); + } + if let Some(reply_id) = reply_msg["message_id"].as_i64() { + metadata.insert( + "reply_to_message_id".to_string(), + serde_json::json!(reply_id), + ); + } + // Sender of the quoted message + let reply_sender = reply_msg["from"]["first_name"] + .as_str() + .unwrap_or("Unknown"); + metadata.insert( + "reply_to_sender".to_string(), + serde_json::json!(reply_sender), + ); + } + Some(ChannelMessage { channel: ChannelType::Telegram, platform_message_id: message_id.to_string(), diff --git a/crates/openfang-channels/src/whatsapp.rs b/crates/openfang-channels/src/whatsapp.rs index 82ad5840d..b8508c3c3 100644 --- a/crates/openfang-channels/src/whatsapp.rs +++ b/crates/openfang-channels/src/whatsapp.rs @@ -162,8 +162,8 @@ impl WhatsAppAdapter { } /// Check if a phone number is allowed. - #[allow(dead_code)] - fn is_allowed(&self, phone: &str) -> bool { + /// Returns true if allowed_users is empty (open mode) or phone is in the list. + pub fn is_allowed(&self, phone: &str) -> bool { self.allowed_users.is_empty() || self.allowed_users.iter().any(|u| u == phone) } diff --git a/crates/openfang-kernel/src/kernel.rs b/crates/openfang-kernel/src/kernel.rs index 0b1b1cbe2..e0186ecce 100644 --- a/crates/openfang-kernel/src/kernel.rs +++ b/crates/openfang-kernel/src/kernel.rs @@ -1381,6 +1381,27 @@ impl OpenFangKernel { .await } + /// Send a message with channel sender context (from external gateways like WhatsApp). + /// + /// The sender context is injected into the agent's system prompt so the agent + /// can identify who is writing and apply appropriate privacy rules. + pub async fn send_message_with_sender_context( + &self, + agent_id: AgentId, + message: &str, + sender_context: openfang_types::message::SenderContext, + kernel_handle: Option>, + ) -> KernelResult { + self.send_message_with_handle_and_blocks( + agent_id, + message, + kernel_handle, + None, + Some(sender_context), + ) + .await + } + /// Send a multimodal message (text + images) to an agent and get a response. /// /// Used by channel bridges when a user sends a photo — the image is downloaded, @@ -1396,7 +1417,7 @@ impl OpenFangKernel { .get() .and_then(|w| w.upgrade()) .map(|arc| arc as Arc); - self.send_message_with_handle_and_blocks(agent_id, message, handle, Some(blocks)) + self.send_message_with_handle_and_blocks(agent_id, message, handle, Some(blocks), None) .await } @@ -1407,7 +1428,7 @@ impl OpenFangKernel { message: &str, kernel_handle: Option>, ) -> KernelResult { - self.send_message_with_handle_and_blocks(agent_id, message, kernel_handle, None) + self.send_message_with_handle_and_blocks(agent_id, message, kernel_handle, None, None) .await } @@ -1417,6 +1438,9 @@ impl OpenFangKernel { /// multimodal content (text + images) instead of just a text string. This /// enables vision models to process images sent from channels like Telegram. /// + /// When `sender_context` is `Some`, the sender identity is injected into the + /// agent's system prompt so it can distinguish between its owner and other users. + /// /// Per-agent locking ensures that concurrent messages for the same agent /// are serialized (preventing session corruption), while messages for /// different agents run in parallel. @@ -1426,6 +1450,7 @@ impl OpenFangKernel { message: &str, kernel_handle: Option>, content_blocks: Option>, + sender_context: Option, ) -> KernelResult { // Acquire per-agent lock to serialize concurrent messages for the same agent. // This prevents session corruption when multiple messages arrive in quick @@ -1455,7 +1480,7 @@ impl OpenFangKernel { self.execute_python_agent(&entry, agent_id, message).await } else { // Default: LLM agent loop (builtin:chat or any unrecognized module) - self.execute_llm_agent(&entry, agent_id, message, kernel_handle, content_blocks) + self.execute_llm_agent(&entry, agent_id, message, kernel_handle, content_blocks, sender_context) .await }; @@ -1698,6 +1723,8 @@ impl OpenFangKernel { .and_then(|(s, _)| s), user_name, channel_type: None, + sender_id: None, + sender_name: None, is_subagent: manifest .metadata .get("is_subagent") @@ -2064,6 +2091,7 @@ impl OpenFangKernel { message: &str, kernel_handle: Option>, content_blocks: Option>, + sender_context: Option, ) -> KernelResult { // Check metering quota before starting self.metering @@ -2168,7 +2196,9 @@ impl OpenFangKernel { .ok() .and_then(|(s, _)| s), user_name, - channel_type: None, + channel_type: sender_context.as_ref().and_then(|sc| sc.channel.clone()), + sender_id: sender_context.as_ref().and_then(|sc| sc.sender_id.clone()), + sender_name: sender_context.as_ref().and_then(|sc| sc.sender_name.clone()), is_subagent: manifest .metadata .get("is_subagent") @@ -2262,6 +2292,55 @@ impl OpenFangKernel { } } + // Vision model selection for image content. + // Priority: 1) explicit vision_model from config (forced override) + // 2) current agent model if it supports vision (no swap needed) + // 3) error — no vision capability available + if let Some(ref blocks) = content_blocks { + let has_images = blocks.iter().any(|b| { + matches!( + b, + openfang_types::message::ContentBlock::Image { .. } + | openfang_types::message::ContentBlock::ImageUrl { .. } + ) + }); + if has_images { + if let Some(ref vision_model) = self.config.default_model.vision_model { + // Explicit vision_model configured — always use it + info!( + agent = %manifest.name, + current_model = %manifest.model.model, + vision_model = %vision_model, + "Swapping to configured vision model for image content" + ); + manifest.model.model = vision_model.clone(); + manifest.model.provider = self.config.default_model.provider.clone(); + } else { + // No vision_model forced — check if current model handles vision + let current_supports_vision = self + .model_catalog + .read() + .ok() + .and_then(|cat| cat.find_model(&manifest.model.model).map(|m| m.supports_vision)) + .unwrap_or(false); + + if current_supports_vision { + info!( + agent = %manifest.name, + model = %manifest.model.model, + "Current model supports vision — no swap needed" + ); + } else { + warn!( + agent = %manifest.name, + model = %manifest.model.model, + "Image received but no vision_model configured and current model lacks vision support" + ); + } + } + } + } + let driver = self.resolve_driver(&manifest)?; // Look up model's actual context window from the catalog @@ -5002,7 +5081,7 @@ fn apply_budget_defaults( budget: &openfang_types::config::BudgetConfig, resources: &mut ResourceQuota, ) { - // Only override hourly if agent has unlimited (0.0) and global is set + // Only override hourly if agent has unlimited default (0.0) and global is set if budget.max_hourly_usd > 0.0 && resources.max_cost_per_hour_usd == 0.0 { resources.max_cost_per_hour_usd = budget.max_hourly_usd; } diff --git a/crates/openfang-memory/src/session.rs b/crates/openfang-memory/src/session.rs index 74862c372..a7d1a83aa 100644 --- a/crates/openfang-memory/src/session.rs +++ b/crates/openfang-memory/src/session.rs @@ -584,6 +584,9 @@ impl SessionStore { ContentBlock::Image { media_type, .. } => { text_parts.push(format!("[image: {media_type}]")); } + ContentBlock::ImageUrl { ref url } => { + text_parts.push(format!("[image: {url}]")); + } ContentBlock::Thinking { thinking } => { text_parts.push(format!( "[thinking: {}]", diff --git a/crates/openfang-runtime/src/compactor.rs b/crates/openfang-runtime/src/compactor.rs index 855705469..e9e246a86 100644 --- a/crates/openfang-runtime/src/compactor.rs +++ b/crates/openfang-runtime/src/compactor.rs @@ -399,6 +399,9 @@ fn build_conversation_text(messages: &[Message], config: &CompactionConfig) -> S ContentBlock::Image { media_type, .. } => { conversation_text.push_str(&format!("[Image: {media_type}]\n\n")); } + ContentBlock::ImageUrl { url } => { + conversation_text.push_str(&format!("[Image: {url}]\n\n")); + } ContentBlock::Thinking { .. } => {} ContentBlock::Unknown => {} } diff --git a/crates/openfang-runtime/src/drivers/anthropic.rs b/crates/openfang-runtime/src/drivers/anthropic.rs index 857774e26..4d79c2a81 100644 --- a/crates/openfang-runtime/src/drivers/anthropic.rs +++ b/crates/openfang-runtime/src/drivers/anthropic.rs @@ -573,6 +573,12 @@ fn convert_message(msg: &Message) -> ApiMessage { data: data.clone(), }, }), + ContentBlock::ImageUrl { url } => { + // Anthropic requires base64; pass as text description for now. + Some(ApiContentBlock::Text { + text: format!("[Image: {url}]"), + }) + } ContentBlock::ToolUse { id, name, input, .. } => Some(ApiContentBlock::ToolUse { id: id.clone(), name: name.clone(), diff --git a/crates/openfang-runtime/src/drivers/claude_code.rs b/crates/openfang-runtime/src/drivers/claude_code.rs index 1cdfe3b44..cb47940d9 100644 --- a/crates/openfang-runtime/src/drivers/claude_code.rs +++ b/crates/openfang-runtime/src/drivers/claude_code.rs @@ -7,8 +7,9 @@ use crate::llm_driver::{CompletionRequest, CompletionResponse, LlmDriver, LlmError, StreamEvent}; use async_trait::async_trait; -use openfang_types::message::{ContentBlock, Role, StopReason, TokenUsage}; +use openfang_types::message::{ContentBlock, MessageContent, Role, StopReason, TokenUsage}; use serde::Deserialize; +use std::path::PathBuf; use tokio::io::AsyncBufReadExt; use tracing::{debug, warn}; @@ -90,8 +91,12 @@ impl ClaudeCodeDriver { } /// Build a text prompt from the completion request messages. - fn build_prompt(request: &CompletionRequest) -> String { + /// + /// Image content blocks are referenced using Claude Code's `@path` syntax, + /// which tells the CLI to read the local file directly. + fn build_prompt(request: &CompletionRequest, image_files: &[PathBuf]) -> String { let mut parts = Vec::new(); + let mut img_idx = 0; if let Some(ref sys) = request.system { parts.push(format!("[System]\n{sys}")); @@ -103,15 +108,132 @@ impl ClaudeCodeDriver { Role::Assistant => "Assistant", Role::System => "System", }; - let text = msg.content.text_content(); - if !text.is_empty() { - parts.push(format!("[{role_label}]\n{text}")); + + let mut msg_parts = Vec::new(); + + match &msg.content { + MessageContent::Text(s) => { + if !s.is_empty() { + msg_parts.push(s.clone()); + } + } + MessageContent::Blocks(blocks) => { + for block in blocks { + match block { + ContentBlock::Text { text, .. } => { + if !text.is_empty() { + msg_parts.push(text.clone()); + } + } + ContentBlock::Image { .. } | ContentBlock::ImageUrl { .. } => { + if img_idx < image_files.len() { + let path = &image_files[img_idx]; + msg_parts.push(format!("@{}", path.display())); + img_idx += 1; + } + } + _ => {} + } + } + } + } + + if !msg_parts.is_empty() { + let combined = msg_parts.join("\n"); + parts.push(format!("[{role_label}]\n{combined}")); } } parts.join("\n\n") } + /// Extract image content blocks from messages and write them to temp files. + /// + /// Returns the list of temp file paths. The caller is responsible for + /// cleaning them up after the CLI finishes. + async fn extract_images_to_temp(request: &CompletionRequest) -> Vec { + use base64::Engine; + + let mut paths = Vec::new(); + let ts = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_millis()) + .unwrap_or(0); + + for msg in &request.messages { + if let MessageContent::Blocks(blocks) = &msg.content { + for (i, block) in blocks.iter().enumerate() { + match block { + ContentBlock::Image { media_type, data } => { + let ext = media_type + .strip_prefix("image/") + .unwrap_or("png"); + let path = PathBuf::from(format!( + "/tmp/openfang-img-{ts}-{i}.{ext}" + )); + if let Ok(bytes) = + base64::engine::general_purpose::STANDARD.decode(data) + { + if std::fs::write(&path, &bytes).is_ok() { + paths.push(path); + } + } + } + ContentBlock::ImageUrl { url } => { + // If it's a data URI, decode it; otherwise download + if let Some(rest) = url.strip_prefix("data:") { + // data:image/png;base64, + if let Some((meta, b64)) = rest.split_once(",") { + let ext = meta + .split(';') + .next() + .and_then(|m| m.strip_prefix("image/")) + .unwrap_or("png"); + let path = PathBuf::from(format!( + "/tmp/openfang-img-{ts}-{i}.{ext}" + )); + if let Ok(bytes) = + base64::engine::general_purpose::STANDARD.decode(b64) + { + if std::fs::write(&path, &bytes).is_ok() { + paths.push(path); + } + } + } + } else { + // HTTP(S) URL — try to download + let path = PathBuf::from(format!( + "/tmp/openfang-img-{ts}-{i}.jpg" + )); + match reqwest::get(url).await { + Ok(resp) => { + if let Ok(bytes) = resp.bytes().await { + if std::fs::write(&path, &bytes).is_ok() { + paths.push(path); + } + } + } + Err(e) => { + warn!(url = %url, error = %e, "Failed to download image for Claude CLI"); + } + } + } + } + _ => {} + } + } + } + } + paths + } + + /// Clean up temporary image files. + fn cleanup_temp_images(paths: &[PathBuf]) { + for p in paths { + let _ = std::fs::remove_file(p); + } + } + /// Map a model ID like "claude-code/opus" to CLI --model flag value. fn model_flag(model: &str) -> Option { let stripped = model @@ -125,6 +247,42 @@ impl ClaudeCodeDriver { } } + /// Build the CLI argument list for a completion request. + /// + /// Exposed as a testable method so unit tests can verify that + /// `--dangerously-skip-permissions`, `--model`, and output format flags + /// are set correctly. + fn build_args( + &self, + prompt: &str, + model_flag: Option<&str>, + streaming: bool, + ) -> Vec { + let mut args = vec![ + "-p".to_string(), + prompt.to_string(), + ]; + + if self.skip_permissions { + args.push("--dangerously-skip-permissions".to_string()); + } + + args.push("--output-format".to_string()); + if streaming { + args.push("stream-json".to_string()); + args.push("--verbose".to_string()); + } else { + args.push("json".to_string()); + } + + if let Some(model) = model_flag { + args.push("--model".to_string()); + args.push(model.to_string()); + } + + args + } + /// Apply security env filtering to a command. /// /// Instead of `env_clear()` (which breaks Node.js, NVM, SSL, proxies), @@ -196,22 +354,14 @@ impl LlmDriver for ClaudeCodeDriver { &self, request: CompletionRequest, ) -> Result { - let prompt = Self::build_prompt(&request); + let image_files = Self::extract_images_to_temp(&request).await; + let prompt = Self::build_prompt(&request, &image_files); let model_flag = Self::model_flag(&request.model); - let mut cmd = tokio::process::Command::new(&self.cli_path); - cmd.arg("-p") - .arg(&prompt) - .arg("--output-format") - .arg("json"); + let args = self.build_args(&prompt, model_flag.as_deref(), false); - if self.skip_permissions { - cmd.arg("--dangerously-skip-permissions"); - } - - if let Some(ref model) = model_flag { - cmd.arg("--model").arg(model); - } + let mut cmd = tokio::process::Command::new(&self.cli_path); + cmd.args(&args); Self::apply_env_filter(&mut cmd); @@ -230,6 +380,7 @@ impl LlmDriver for ClaudeCodeDriver { )))?; if !output.status.success() { + Self::cleanup_temp_images(&image_files); let stderr = String::from_utf8_lossy(&output.stderr).trim().to_string(); let stdout = String::from_utf8_lossy(&output.stdout).trim().to_string(); let detail = if !stderr.is_empty() { &stderr } else { &stdout }; @@ -261,6 +412,7 @@ impl LlmDriver for ClaudeCodeDriver { }); } + Self::cleanup_temp_images(&image_files); let stdout = String::from_utf8_lossy(&output.stdout); // Try JSON parse first @@ -299,23 +451,14 @@ impl LlmDriver for ClaudeCodeDriver { request: CompletionRequest, tx: tokio::sync::mpsc::Sender, ) -> Result { - let prompt = Self::build_prompt(&request); + let image_files = Self::extract_images_to_temp(&request).await; + let prompt = Self::build_prompt(&request, &image_files); let model_flag = Self::model_flag(&request.model); - let mut cmd = tokio::process::Command::new(&self.cli_path); - cmd.arg("-p") - .arg(&prompt) - .arg("--output-format") - .arg("stream-json") - .arg("--verbose"); - - if self.skip_permissions { - cmd.arg("--dangerously-skip-permissions"); - } + let args = self.build_args(&prompt, model_flag.as_deref(), true); - if let Some(ref model) = model_flag { - cmd.arg("--model").arg(model); - } + let mut cmd = tokio::process::Command::new(&self.cli_path); + cmd.args(&args); Self::apply_env_filter(&mut cmd); @@ -412,6 +555,8 @@ impl LlmDriver for ClaudeCodeDriver { .await .map_err(|e| LlmError::Http(format!("Claude CLI wait failed: {e}")))?; + Self::cleanup_temp_images(&image_files); + if !status.success() { warn!(code = ?status.code(), "Claude CLI exited with error"); } @@ -486,7 +631,7 @@ mod tests { thinking: None, }; - let prompt = ClaudeCodeDriver::build_prompt(&request); + let prompt = ClaudeCodeDriver::build_prompt(&request, &[]); assert!(prompt.contains("[System]")); assert!(prompt.contains("You are helpful.")); assert!(prompt.contains("[User]")); @@ -538,6 +683,34 @@ mod tests { assert!(!driver.skip_permissions); } + #[test] + fn test_build_args_with_skip_permissions() { + let driver = ClaudeCodeDriver::new(None, true); + let args = driver.build_args("hello", Some("opus"), false); + assert!(args.contains(&"--dangerously-skip-permissions".to_string()), + "should contain --dangerously-skip-permissions when skip_permissions=true"); + assert!(args.contains(&"json".to_string())); + assert!(args.contains(&"--model".to_string())); + assert!(args.contains(&"opus".to_string())); + } + + #[test] + fn test_build_args_without_skip_permissions() { + let driver = ClaudeCodeDriver::new(None, false); + let args = driver.build_args("hello", Some("sonnet"), false); + assert!(!args.contains(&"--dangerously-skip-permissions".to_string()), + "should NOT contain --dangerously-skip-permissions when skip_permissions=false"); + } + + #[test] + fn test_build_args_streaming() { + let driver = ClaudeCodeDriver::new(None, true); + let args = driver.build_args("hello", None, true); + assert!(args.contains(&"stream-json".to_string())); + assert!(args.contains(&"--verbose".to_string())); + assert!(!args.contains(&"--model".to_string()), "no model flag when model_flag is None"); + } + #[test] fn test_sensitive_env_list_coverage() { // Ensure all major provider keys are in the strip list diff --git a/crates/openfang-runtime/src/drivers/gemini.rs b/crates/openfang-runtime/src/drivers/gemini.rs index f70a9efe0..651b6088e 100644 --- a/crates/openfang-runtime/src/drivers/gemini.rs +++ b/crates/openfang-runtime/src/drivers/gemini.rs @@ -291,6 +291,14 @@ fn convert_messages( }, }); } + ContentBlock::ImageUrl { url } => { + // Gemini supports fileData for URL-based images; + // fall back to a text description if not supported. + parts.push(GeminiPart::Text { + text: format!("[Image: {url}]"), + thought_signature: None, + }); + } ContentBlock::ToolResult { content, tool_name, .. } => { diff --git a/crates/openfang-runtime/src/drivers/mod.rs b/crates/openfang-runtime/src/drivers/mod.rs index a6fd2f65f..113d9bca7 100644 --- a/crates/openfang-runtime/src/drivers/mod.rs +++ b/crates/openfang-runtime/src/drivers/mod.rs @@ -7,6 +7,7 @@ pub mod anthropic; pub mod claude_code; pub mod copilot; +pub mod qwen_code; pub mod fallback; pub mod gemini; pub mod openai; @@ -150,6 +151,11 @@ fn provider_defaults(provider: &str) -> Option { api_key_env: "", key_required: false, }), + "qwen-code" => Some(ProviderDefaults { + base_url: "", + api_key_env: "", + key_required: false, + }), "moonshot" | "kimi" | "kimi2" => Some(ProviderDefaults { base_url: MOONSHOT_BASE_URL, api_key_env: "MOONSHOT_API_KEY", @@ -309,6 +315,15 @@ pub fn create_driver(config: &DriverConfig) -> Result, LlmErr ))); } + // Qwen Code CLI — subprocess-based, no API key needed + if provider == "qwen-code" { + let cli_path = config.base_url.clone(); + return Ok(Arc::new(qwen_code::QwenCodeDriver::new( + cli_path, + config.skip_permissions, + ))); + } + // GitHub Copilot — wraps OpenAI-compatible driver with automatic token exchange. // The CopilotDriver exchanges the GitHub PAT for a Copilot API token on demand, // caches it, and refreshes when expired. @@ -411,7 +426,7 @@ pub fn create_driver(config: &DriverConfig) -> Result, LlmErr "Unknown provider '{}'. Supported: anthropic, gemini, openai, groq, openrouter, \ deepseek, together, mistral, fireworks, ollama, vllm, lmstudio, perplexity, \ cohere, ai21, cerebras, sambanova, huggingface, xai, replicate, github-copilot, \ - chutes, venice, codex, claude-code. Or set base_url for a custom OpenAI-compatible endpoint.", + chutes, venice, codex, claude-code, qwen-code. Or set base_url for a custom OpenAI-compatible endpoint.", provider ), }) @@ -486,6 +501,7 @@ pub fn known_providers() -> &'static [&'static str] { "venice", "codex", "claude-code", + "qwen-code", ] } @@ -587,7 +603,8 @@ mod tests { assert!(providers.contains(&"chutes")); assert!(providers.contains(&"codex")); assert!(providers.contains(&"claude-code")); - assert_eq!(providers.len(), 34); + assert!(providers.contains(&"qwen-code")); + assert_eq!(providers.len(), 35); } #[test] diff --git a/crates/openfang-runtime/src/drivers/openai.rs b/crates/openfang-runtime/src/drivers/openai.rs index 15a5a6657..c52e772cf 100644 --- a/crates/openfang-runtime/src/drivers/openai.rs +++ b/crates/openfang-runtime/src/drivers/openai.rs @@ -278,6 +278,13 @@ impl LlmDriver for OpenAIDriver { }, }); } + ContentBlock::ImageUrl { url } => { + parts.push(OaiContentPart::ImageUrl { + image_url: OaiImageUrl { + url: url.clone(), + }, + }); + } ContentBlock::Thinking { .. } => {} _ => {} } diff --git a/crates/openfang-runtime/src/drivers/qwen_code.rs b/crates/openfang-runtime/src/drivers/qwen_code.rs new file mode 100644 index 000000000..3734ebef9 --- /dev/null +++ b/crates/openfang-runtime/src/drivers/qwen_code.rs @@ -0,0 +1,652 @@ +//! Qwen Code CLI backend driver. +//! +//! Spawns the `qwen` CLI (Qwen Code) as a subprocess in print mode (`-p`), +//! which is non-interactive and handles its own authentication. +//! This allows users with Qwen Code installed to use it as an LLM provider +//! without needing a separate API key. + +use crate::llm_driver::{CompletionRequest, CompletionResponse, LlmDriver, LlmError, StreamEvent}; +use async_trait::async_trait; +use openfang_types::message::{ContentBlock, Role, StopReason, TokenUsage}; +use serde::Deserialize; +use tokio::io::AsyncBufReadExt; +use tracing::{debug, warn}; + +/// Environment variable names (and suffixes) to strip from the subprocess +/// to prevent leaking API keys from other providers. +const SENSITIVE_ENV_EXACT: &[&str] = &[ + "OPENAI_API_KEY", + "ANTHROPIC_API_KEY", + "GEMINI_API_KEY", + "GOOGLE_API_KEY", + "GROQ_API_KEY", + "DEEPSEEK_API_KEY", + "MISTRAL_API_KEY", + "TOGETHER_API_KEY", + "FIREWORKS_API_KEY", + "OPENROUTER_API_KEY", + "PERPLEXITY_API_KEY", + "COHERE_API_KEY", + "AI21_API_KEY", + "CEREBRAS_API_KEY", + "SAMBANOVA_API_KEY", + "HUGGINGFACE_API_KEY", + "XAI_API_KEY", + "REPLICATE_API_TOKEN", + "BRAVE_API_KEY", + "TAVILY_API_KEY", + "ELEVENLABS_API_KEY", +]; + +/// Suffixes that indicate a secret — remove any env var ending with these +/// unless it starts with `QWEN_`. +const SENSITIVE_SUFFIXES: &[&str] = &["_SECRET", "_TOKEN", "_PASSWORD"]; + +/// LLM driver that delegates to the Qwen Code CLI. +pub struct QwenCodeDriver { + cli_path: String, + skip_permissions: bool, +} + +impl QwenCodeDriver { + /// Create a new Qwen Code driver. + /// + /// `cli_path` overrides the CLI binary path; defaults to `"qwen"` on PATH. + /// `skip_permissions` adds `--yolo` to the spawned command so that the CLI + /// runs non-interactively (required for daemon mode). + pub fn new(cli_path: Option, skip_permissions: bool) -> Self { + if skip_permissions { + warn!( + "Qwen Code driver: --yolo enabled. \ + The CLI will not prompt for tool approvals. \ + OpenFang's own capability/RBAC system enforces access control." + ); + } + + Self { + cli_path: cli_path + .filter(|s| !s.is_empty()) + .unwrap_or_else(|| "qwen".to_string()), + skip_permissions, + } + } + + /// Detect if the Qwen Code CLI is available on PATH. + pub fn detect() -> Option { + let output = std::process::Command::new("qwen") + .arg("--version") + .stdout(std::process::Stdio::piped()) + .stderr(std::process::Stdio::null()) + .output() + .ok()?; + + if output.status.success() { + Some(String::from_utf8_lossy(&output.stdout).trim().to_string()) + } else { + None + } + } + + /// Build a text prompt from the completion request messages. + fn build_prompt(request: &CompletionRequest) -> String { + let mut parts = Vec::new(); + + if let Some(ref sys) = request.system { + parts.push(format!("[System]\n{sys}")); + } + + for msg in &request.messages { + let role_label = match msg.role { + Role::User => "User", + Role::Assistant => "Assistant", + Role::System => "System", + }; + let text = msg.content.text_content(); + if !text.is_empty() { + parts.push(format!("[{role_label}]\n{text}")); + } + } + + parts.join("\n\n") + } + + /// Map a model ID like "qwen-code/qwen3-coder" to CLI --model flag value. + fn model_flag(model: &str) -> Option { + let stripped = model + .strip_prefix("qwen-code/") + .unwrap_or(model); + match stripped { + "qwen3-coder" => Some("qwen3-coder".to_string()), + "qwen-coder-plus" => Some("qwen-coder-plus".to_string()), + "qwq-32b" => Some("qwq-32b".to_string()), + _ => Some(stripped.to_string()), + } + } + + /// Build the CLI argument list for a completion request. + /// + /// Exposed as a testable method so unit tests can verify that `--yolo`, + /// `--model`, and output format flags are set correctly. + fn build_args( + &self, + prompt: &str, + model_flag: Option<&str>, + streaming: bool, + ) -> Vec { + let mut args = vec![ + "-p".to_string(), + prompt.to_string(), + "--output-format".to_string(), + if streaming { + "stream-json".to_string() + } else { + "json".to_string() + }, + ]; + + if self.skip_permissions { + args.push("--yolo".to_string()); + } + + if let Some(model) = model_flag { + args.push("--model".to_string()); + args.push(model.to_string()); + } + + args + } + + /// Apply security env filtering to a command. + /// + /// Instead of `env_clear()` (which breaks Node.js, NVM, SSL, proxies), + /// we keep the full environment and only remove known sensitive API keys + /// from other LLM providers. + fn apply_env_filter(cmd: &mut tokio::process::Command) { + for key in SENSITIVE_ENV_EXACT { + cmd.env_remove(key); + } + // Remove any env var with a sensitive suffix, unless it's QWEN_* + for (key, _) in std::env::vars() { + if key.starts_with("QWEN_") { + continue; + } + let upper = key.to_uppercase(); + for suffix in SENSITIVE_SUFFIXES { + if upper.ends_with(suffix) { + cmd.env_remove(&key); + break; + } + } + } + } +} + +/// JSON output from `qwen -p --output-format json`. +/// +/// The CLI may return the response text in different fields depending on +/// version: `result`, `content`, or `text`. We try all three. +#[derive(Debug, Deserialize)] +struct QwenJsonOutput { + result: Option, + #[serde(default)] + content: Option, + #[serde(default)] + text: Option, + #[serde(default)] + usage: Option, + #[serde(default)] + #[allow(dead_code)] + cost_usd: Option, +} + +/// Usage stats from Qwen CLI JSON output. +#[derive(Debug, Deserialize, Default)] +struct QwenUsage { + #[serde(default)] + input_tokens: u64, + #[serde(default)] + output_tokens: u64, +} + +/// Stream JSON event from `qwen -p --output-format stream-json`. +#[derive(Debug, Deserialize)] +struct QwenStreamEvent { + #[serde(default)] + r#type: String, + #[serde(default)] + content: Option, + #[serde(default)] + result: Option, + #[serde(default)] + usage: Option, +} + +#[async_trait] +impl LlmDriver for QwenCodeDriver { + async fn complete( + &self, + request: CompletionRequest, + ) -> Result { + let prompt = Self::build_prompt(&request); + let model_flag = Self::model_flag(&request.model); + + let args = self.build_args(&prompt, model_flag.as_deref(), false); + + let mut cmd = tokio::process::Command::new(&self.cli_path); + cmd.args(&args); + + Self::apply_env_filter(&mut cmd); + + cmd.stdout(std::process::Stdio::piped()); + cmd.stderr(std::process::Stdio::piped()); + + debug!(cli = %self.cli_path, skip_permissions = self.skip_permissions, "Spawning Qwen Code CLI"); + + let output = cmd + .output() + .await + .map_err(|e| LlmError::Http(format!( + "Qwen Code CLI not found or failed to start ({}). \ + Install: npm install -g @qwen-code/qwen-code", + e + )))?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr).trim().to_string(); + let stdout = String::from_utf8_lossy(&output.stdout).trim().to_string(); + let detail = if !stderr.is_empty() { &stderr } else { &stdout }; + let code = output.status.code().unwrap_or(1); + + // Provide actionable error messages + let message = if detail.contains("not authenticated") + || detail.contains("auth") + || detail.contains("login") + || detail.contains("credentials") + { + format!( + "Qwen Code CLI is not authenticated. Run: qwen --auth-type qwen-oauth\nDetail: {detail}" + ) + } else if detail.contains("permission") + || detail.contains("--yolo") + { + format!( + "Qwen Code CLI requires permissions acceptance. \ + Run: qwen --yolo (once to accept)\nDetail: {detail}" + ) + } else { + format!("Qwen Code CLI exited with code {code}: {detail}") + }; + + return Err(LlmError::Api { + status: code as u16, + message, + }); + } + + let stdout = String::from_utf8_lossy(&output.stdout); + + // Try JSON parse first + if let Ok(parsed) = serde_json::from_str::(&stdout) { + let text = parsed.result + .or(parsed.content) + .or(parsed.text) + .unwrap_or_default(); + let usage = parsed.usage.unwrap_or_default(); + return Ok(CompletionResponse { + content: vec![ContentBlock::Text { text: text.clone(), provider_metadata: None }], + stop_reason: StopReason::EndTurn, + tool_calls: Vec::new(), + usage: TokenUsage { + input_tokens: usage.input_tokens, + output_tokens: usage.output_tokens, + }, + }); + } + + // Fallback: treat entire stdout as plain text + let text = stdout.trim().to_string(); + Ok(CompletionResponse { + content: vec![ContentBlock::Text { text, provider_metadata: None }], + stop_reason: StopReason::EndTurn, + tool_calls: Vec::new(), + usage: TokenUsage { + input_tokens: 0, + output_tokens: 0, + }, + }) + } + + async fn stream( + &self, + request: CompletionRequest, + tx: tokio::sync::mpsc::Sender, + ) -> Result { + let prompt = Self::build_prompt(&request); + let model_flag = Self::model_flag(&request.model); + + let args = self.build_args(&prompt, model_flag.as_deref(), true); + + let mut cmd = tokio::process::Command::new(&self.cli_path); + cmd.args(&args); + + Self::apply_env_filter(&mut cmd); + + cmd.stdout(std::process::Stdio::piped()); + cmd.stderr(std::process::Stdio::piped()); + + debug!(cli = %self.cli_path, skip_permissions = self.skip_permissions, "Spawning Qwen Code CLI (streaming)"); + + let mut child = cmd + .spawn() + .map_err(|e| LlmError::Http(format!( + "Qwen Code CLI not found or failed to start ({}). \ + Install: npm install -g @qwen-code/qwen-code", + e + )))?; + + let stdout = child + .stdout + .take() + .ok_or_else(|| LlmError::Http("No stdout from qwen CLI".to_string()))?; + + let reader = tokio::io::BufReader::new(stdout); + let mut lines = reader.lines(); + + let mut full_text = String::new(); + let mut final_usage = TokenUsage { + input_tokens: 0, + output_tokens: 0, + }; + + while let Ok(Some(line)) = lines.next_line().await { + if line.trim().is_empty() { + continue; + } + + match serde_json::from_str::(&line) { + Ok(event) => { + match event.r#type.as_str() { + "content" | "text" | "assistant" | "content_block_delta" => { + if let Some(ref content) = event.content { + full_text.push_str(content); + let _ = tx + .send(StreamEvent::TextDelta { + text: content.clone(), + }) + .await; + } + } + "result" | "done" | "complete" => { + if let Some(ref result) = event.result { + if full_text.is_empty() { + full_text = result.clone(); + let _ = tx + .send(StreamEvent::TextDelta { + text: result.clone(), + }) + .await; + } + } + if let Some(usage) = event.usage { + final_usage = TokenUsage { + input_tokens: usage.input_tokens, + output_tokens: usage.output_tokens, + }; + } + } + _ => { + // Unknown event type — try content field as fallback + if let Some(ref content) = event.content { + full_text.push_str(content); + let _ = tx + .send(StreamEvent::TextDelta { + text: content.clone(), + }) + .await; + } + } + } + } + Err(e) => { + // Not valid JSON — treat as raw text + warn!(line = %line, error = %e, "Non-JSON line from Qwen CLI"); + full_text.push_str(&line); + let _ = tx + .send(StreamEvent::TextDelta { text: line }) + .await; + } + } + } + + // Wait for process to finish + let status = child + .wait() + .await + .map_err(|e| LlmError::Http(format!("Qwen CLI wait failed: {e}")))?; + + if !status.success() { + warn!(code = ?status.code(), "Qwen CLI exited with error"); + } + + let _ = tx + .send(StreamEvent::ContentComplete { + stop_reason: StopReason::EndTurn, + usage: final_usage, + }) + .await; + + Ok(CompletionResponse { + content: vec![ContentBlock::Text { text: full_text, provider_metadata: None }], + stop_reason: StopReason::EndTurn, + tool_calls: Vec::new(), + usage: final_usage, + }) + } +} + +/// Check if the Qwen Code CLI is available and authenticated. +pub fn qwen_code_available() -> bool { + QwenCodeDriver::detect().is_some() + || qwen_credentials_exist() +} + +/// Check if Qwen credentials exist. +/// +/// Qwen Code stores session/credentials in `~/.qwen/` directory. +fn qwen_credentials_exist() -> bool { + if let Some(home) = home_dir() { + let qwen_dir = home.join(".qwen"); + qwen_dir.exists() + } else { + false + } +} + +/// Cross-platform home directory. +fn home_dir() -> Option { + #[cfg(target_os = "windows")] + { + std::env::var("USERPROFILE").ok().map(std::path::PathBuf::from) + } + #[cfg(not(target_os = "windows"))] + { + std::env::var("HOME").ok().map(std::path::PathBuf::from) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_build_prompt_simple() { + use openfang_types::message::{Message, MessageContent}; + + let request = CompletionRequest { + model: "qwen-code/qwen3-coder".to_string(), + messages: vec![Message { + role: Role::User, + content: MessageContent::text("Hello"), + }], + tools: vec![], + max_tokens: 1024, + temperature: 0.7, + system: Some("You are helpful.".to_string()), + thinking: None, + }; + + let prompt = QwenCodeDriver::build_prompt(&request); + assert!(prompt.contains("[System]")); + assert!(prompt.contains("You are helpful.")); + assert!(prompt.contains("[User]")); + assert!(prompt.contains("Hello")); + } + + #[test] + fn test_build_prompt_multi_turn() { + use openfang_types::message::{Message, MessageContent}; + + let request = CompletionRequest { + model: "qwen-code/qwen3-coder".to_string(), + messages: vec![ + Message { + role: Role::User, + content: MessageContent::text("What is 2+2?"), + }, + Message { + role: Role::Assistant, + content: MessageContent::text("4"), + }, + Message { + role: Role::User, + content: MessageContent::text("And 3+3?"), + }, + ], + tools: vec![], + max_tokens: 1024, + temperature: 0.7, + system: None, + thinking: None, + }; + + let prompt = QwenCodeDriver::build_prompt(&request); + assert!(prompt.contains("[User]\nWhat is 2+2?")); + assert!(prompt.contains("[Assistant]\n4")); + assert!(prompt.contains("[User]\nAnd 3+3?")); + } + + #[test] + fn test_model_flag_mapping() { + assert_eq!( + QwenCodeDriver::model_flag("qwen-code/qwen3-coder"), + Some("qwen3-coder".to_string()) + ); + assert_eq!( + QwenCodeDriver::model_flag("qwen-code/qwen-coder-plus"), + Some("qwen-coder-plus".to_string()) + ); + assert_eq!( + QwenCodeDriver::model_flag("qwen-code/qwq-32b"), + Some("qwq-32b".to_string()) + ); + assert_eq!( + QwenCodeDriver::model_flag("custom-model"), + Some("custom-model".to_string()) + ); + } + + #[test] + fn test_new_defaults_to_qwen() { + let driver = QwenCodeDriver::new(None, true); + assert_eq!(driver.cli_path, "qwen"); + assert!(driver.skip_permissions); + } + + #[test] + fn test_new_with_custom_path() { + let driver = QwenCodeDriver::new(Some("/usr/local/bin/qwen".to_string()), true); + assert_eq!(driver.cli_path, "/usr/local/bin/qwen"); + } + + #[test] + fn test_new_with_empty_path() { + let driver = QwenCodeDriver::new(Some(String::new()), true); + assert_eq!(driver.cli_path, "qwen"); + } + + #[test] + fn test_skip_permissions_disabled() { + let driver = QwenCodeDriver::new(None, false); + assert!(!driver.skip_permissions); + } + + #[test] + fn test_build_args_with_yolo() { + let driver = QwenCodeDriver::new(None, true); + let args = driver.build_args("hello", Some("qwen3-coder"), false); + assert!(args.contains(&"--yolo".to_string()), "should contain --yolo when skip_permissions=true"); + assert!(args.contains(&"json".to_string())); + assert!(args.contains(&"--model".to_string())); + assert!(args.contains(&"qwen3-coder".to_string())); + } + + #[test] + fn test_build_args_without_yolo() { + let driver = QwenCodeDriver::new(None, false); + let args = driver.build_args("hello", Some("qwen3-coder"), false); + assert!(!args.contains(&"--yolo".to_string()), "should NOT contain --yolo when skip_permissions=false"); + } + + #[test] + fn test_build_args_streaming() { + let driver = QwenCodeDriver::new(None, true); + let args = driver.build_args("hello", None, true); + assert!(args.contains(&"stream-json".to_string())); + assert!(!args.contains(&"json".to_string()) || args.contains(&"stream-json".to_string())); + assert!(!args.contains(&"--model".to_string()), "no model flag when model_flag is None"); + } + + #[test] + fn test_sensitive_env_list_coverage() { + assert!(SENSITIVE_ENV_EXACT.contains(&"OPENAI_API_KEY")); + assert!(SENSITIVE_ENV_EXACT.contains(&"ANTHROPIC_API_KEY")); + assert!(SENSITIVE_ENV_EXACT.contains(&"GEMINI_API_KEY")); + assert!(SENSITIVE_ENV_EXACT.contains(&"GROQ_API_KEY")); + assert!(SENSITIVE_ENV_EXACT.contains(&"DEEPSEEK_API_KEY")); + } + + #[test] + fn test_json_output_parsing() { + let json = r#"{"result":"Hello world","usage":{"input_tokens":10,"output_tokens":5}}"#; + let parsed: QwenJsonOutput = serde_json::from_str(json).unwrap(); + assert_eq!(parsed.result.unwrap(), "Hello world"); + assert_eq!(parsed.usage.unwrap().input_tokens, 10); + } + + #[test] + fn test_json_output_content_fallback() { + let json = r#"{"content":"Fallback text"}"#; + let parsed: QwenJsonOutput = serde_json::from_str(json).unwrap(); + assert!(parsed.result.is_none()); + assert_eq!(parsed.content.unwrap(), "Fallback text"); + } + + #[test] + fn test_stream_event_parsing() { + let json = r#"{"type":"content","content":"chunk"}"#; + let event: QwenStreamEvent = serde_json::from_str(json).unwrap(); + assert_eq!(event.r#type, "content"); + assert_eq!(event.content.unwrap(), "chunk"); + } + + #[test] + fn test_stream_event_result() { + let json = r#"{"type":"result","result":"final answer","usage":{"input_tokens":100,"output_tokens":50}}"#; + let event: QwenStreamEvent = serde_json::from_str(json).unwrap(); + assert_eq!(event.r#type, "result"); + assert_eq!(event.result.unwrap(), "final answer"); + let usage = event.usage.unwrap(); + assert_eq!(usage.input_tokens, 100); + assert_eq!(usage.output_tokens, 50); + } +} diff --git a/crates/openfang-runtime/src/model_catalog.rs b/crates/openfang-runtime/src/model_catalog.rs index 8822c460e..851123a07 100644 --- a/crates/openfang-runtime/src/model_catalog.rs +++ b/crates/openfang-runtime/src/model_catalog.rs @@ -56,15 +56,29 @@ impl ModelCatalog { /// Only checks presence — never reads or stores the actual secret. pub fn detect_auth(&mut self) { for provider in &mut self.providers { - // Claude Code is special: no API key needed, but we probe for CLI - // installation so the dashboard shows "Configured" vs "Not Installed". + // Claude Code: detect CLI installation + authentication if provider.id == "claude-code" { - provider.auth_status = - if crate::drivers::claude_code::claude_code_available() { - AuthStatus::Configured - } else { - AuthStatus::Missing - }; + let cli_installed = crate::drivers::claude_code::ClaudeCodeDriver::detect().is_some(); + if cli_installed && crate::drivers::claude_code::claude_code_available() { + provider.auth_status = AuthStatus::Configured; + } else if cli_installed { + provider.auth_status = AuthStatus::Missing; + } else { + provider.auth_status = AuthStatus::NotRequired; + } + continue; + } + + // Qwen Code: detect CLI installation + authentication + if provider.id == "qwen-code" { + let cli_installed = crate::drivers::qwen_code::QwenCodeDriver::detect().is_some(); + if cli_installed && crate::drivers::qwen_code::qwen_code_available() { + provider.auth_status = AuthStatus::Configured; + } else if cli_installed { + provider.auth_status = AuthStatus::Missing; + } else { + provider.auth_status = AuthStatus::NotRequired; + } continue; } @@ -83,7 +97,6 @@ impl ModelCatalog { std::env::var("OPENAI_API_KEY").is_ok() || read_codex_credential().is_some() } - // claude-code is handled above (before key_required check) _ => false, }; @@ -755,6 +768,16 @@ fn builtin_providers() -> Vec { auth_status: AuthStatus::NotRequired, model_count: 0, }, + // ── Qwen Code CLI ───────────────────────────────────────── + ProviderInfo { + id: "qwen-code".into(), + display_name: "Qwen Code".into(), + api_key_env: String::new(), + base_url: String::new(), + key_required: false, + auth_status: AuthStatus::NotRequired, + model_count: 0, + }, ] } @@ -828,6 +851,11 @@ fn builtin_aliases() -> HashMap { ("claude-code-opus", "claude-code/opus"), ("claude-code-sonnet", "claude-code/sonnet"), ("claude-code-haiku", "claude-code/haiku"), + // Qwen Code aliases + ("qwen-code", "qwen-code/qwen3-coder"), + ("qwen-code-qwen3", "qwen-code/qwen3-coder"), + ("qwen-code-plus", "qwen-code/qwen-coder-plus"), + ("qwen-code-qwq", "qwen-code/qwq-32b"), ]; pairs .into_iter() @@ -3416,6 +3444,51 @@ fn builtin_models() -> Vec { aliases: vec!["claude-code-haiku".into()], }, // ══════════════════════════════════════════════════════════════ + // Qwen Code CLI (3) — subprocess-based + // ══════════════════════════════════════════════════════════════ + ModelCatalogEntry { + id: "qwen-code/qwen3-coder".into(), + display_name: "Qwen3 Coder (CLI)".into(), + provider: "qwen-code".into(), + tier: ModelTier::Smart, + context_window: 131_072, + max_output_tokens: 65_536, + input_cost_per_m: 0.0, + output_cost_per_m: 0.0, + supports_tools: false, + supports_vision: false, + supports_streaming: true, + aliases: vec!["qwen-code".into(), "qwen-code-qwen3".into()], + }, + ModelCatalogEntry { + id: "qwen-code/qwen-coder-plus".into(), + display_name: "Qwen Coder Plus (CLI)".into(), + provider: "qwen-code".into(), + tier: ModelTier::Frontier, + context_window: 131_072, + max_output_tokens: 65_536, + input_cost_per_m: 0.0, + output_cost_per_m: 0.0, + supports_tools: false, + supports_vision: false, + supports_streaming: true, + aliases: vec!["qwen-code-plus".into()], + }, + ModelCatalogEntry { + id: "qwen-code/qwq-32b".into(), + display_name: "QwQ 32B (CLI)".into(), + provider: "qwen-code".into(), + tier: ModelTier::Balanced, + context_window: 131_072, + max_output_tokens: 65_536, + input_cost_per_m: 0.0, + output_cost_per_m: 0.0, + supports_tools: false, + supports_vision: false, + supports_streaming: true, + aliases: vec!["qwen-code-qwq".into()], + }, + // ══════════════════════════════════════════════════════════════ // Chutes.ai (5) // ══════════════════════════════════════════════════════════════ ModelCatalogEntry { @@ -3917,4 +3990,29 @@ mod tests { let entry = catalog.find_model("claude-code").unwrap(); assert_eq!(entry.id, "claude-code/sonnet"); } + + #[test] + fn test_qwen_code_provider() { + let catalog = ModelCatalog::new(); + let qc = catalog.get_provider("qwen-code").unwrap(); + assert_eq!(qc.display_name, "Qwen Code"); + assert!(!qc.key_required); + } + + #[test] + fn test_qwen_code_models() { + let catalog = ModelCatalog::new(); + let models = catalog.models_by_provider("qwen-code"); + assert_eq!(models.len(), 3); + assert!(models.iter().any(|m| m.id == "qwen-code/qwen3-coder")); + assert!(models.iter().any(|m| m.id == "qwen-code/qwen-coder-plus")); + assert!(models.iter().any(|m| m.id == "qwen-code/qwq-32b")); + } + + #[test] + fn test_qwen_code_aliases() { + let catalog = ModelCatalog::new(); + let entry = catalog.find_model("qwen-code").unwrap(); + assert_eq!(entry.id, "qwen-code/qwen3-coder"); + } } diff --git a/crates/openfang-runtime/src/prompt_builder.rs b/crates/openfang-runtime/src/prompt_builder.rs index e0a8bd2a6..91b8a7b3b 100644 --- a/crates/openfang-runtime/src/prompt_builder.rs +++ b/crates/openfang-runtime/src/prompt_builder.rs @@ -37,6 +37,10 @@ pub struct PromptContext { pub user_name: Option, /// Channel type (telegram, discord, web, etc.). pub channel_type: Option, + /// Platform-specific sender ID (e.g. phone number) — from channel gateway metadata. + pub sender_id: Option, + /// Human-readable sender display name — from channel gateway metadata. + pub sender_name: Option, /// Whether this agent was spawned as a subagent. pub is_subagent: bool, /// Whether this agent has autonomous config. @@ -144,7 +148,21 @@ pub fn build_system_prompt(ctx: &PromptContext) -> String { // Section 9 — Channel Awareness (skip for subagents) if !ctx.is_subagent { if let Some(ref channel) = ctx.channel_type { - sections.push(build_channel_section(channel)); + let mut section = build_channel_section(channel); + // Append sender identity when available (from channel gateway metadata) + if ctx.sender_id.is_some() || ctx.sender_name.is_some() { + section.push_str("\n\n### Current Message Sender\n"); + if let Some(ref name) = ctx.sender_name { + section.push_str(&format!("- **Name**: {name}\n")); + } + if let Some(ref id) = ctx.sender_id { + section.push_str(&format!("- **Platform ID**: {id}\n")); + } + section.push_str("\nIMPORTANT: Check this sender identity against your USER.md to determine \ + if this is your owner/master or someone else. If it is NOT your owner, \ + read and follow PRIVACY-RULES.md before responding."); + } + sections.push(section); } } diff --git a/crates/openfang-types/src/agent.rs b/crates/openfang-types/src/agent.rs index 5f002b778..f8d59fcfd 100644 --- a/crates/openfang-types/src/agent.rs +++ b/crates/openfang-types/src/agent.rs @@ -267,7 +267,7 @@ impl Default for ResourceQuota { max_tool_calls_per_minute: 60, max_llm_tokens_per_hour: 0, // unlimited by default max_network_bytes_per_hour: 100 * 1024 * 1024, // 100 MB - max_cost_per_hour_usd: 0.0, // unlimited by default + max_cost_per_hour_usd: 0.0, // unlimited max_cost_per_day_usd: 0.0, // unlimited max_cost_per_month_usd: 0.0, // unlimited } diff --git a/crates/openfang-types/src/config.rs b/crates/openfang-types/src/config.rs index 76c3a5107..3cd103d99 100644 --- a/crates/openfang-types/src/config.rs +++ b/crates/openfang-types/src/config.rs @@ -68,7 +68,7 @@ pub enum OutputFormat { } /// Per-channel behavior overrides. -#[derive(Debug, Clone, Default, Serialize, Deserialize)] +#[derive(Debug, Clone, Serialize, Deserialize)] #[serde(default)] pub struct ChannelOverrides { /// Model override (uses agent's default if None). @@ -89,6 +89,27 @@ pub struct ChannelOverrides { pub usage_footer: Option, /// Typing indicator mode override. pub typing_mode: Option, + /// Enable lifecycle emoji reactions on messages (⏳→🤔→✅/❌). + /// Defaults to true; set to false to disable. + #[serde(default = "default_true")] + pub lifecycle_reactions: bool, +} + +impl Default for ChannelOverrides { + fn default() -> Self { + Self { + model: None, + system_prompt: None, + dm_policy: DmPolicy::default(), + group_policy: GroupPolicy::default(), + rate_limit_per_user: 0, + threading: false, + output_format: None, + usage_footer: None, + typing_mode: None, + lifecycle_reactions: true, + } + } } /// Controls what usage info appears in response footers. @@ -1393,6 +1414,10 @@ pub struct DefaultModelConfig { pub api_key_env: String, /// Optional base URL override. pub base_url: Option, + /// Optional vision-capable model for image messages. + /// When set, agents receiving images will automatically use this model + /// instead of the default (which may not support vision). + pub vision_model: Option, } impl Default for DefaultModelConfig { @@ -1402,6 +1427,7 @@ impl Default for DefaultModelConfig { model: "claude-sonnet-4-20250514".to_string(), api_key_env: "ANTHROPIC_API_KEY".to_string(), base_url: None, + vision_model: None, } } } @@ -3595,6 +3621,7 @@ mod tests { assert!(!ov.threading); assert!(ov.output_format.is_none()); assert!(ov.model.is_none()); + assert!(ov.lifecycle_reactions); } #[test] diff --git a/crates/openfang-types/src/message.rs b/crates/openfang-types/src/message.rs index 99be59571..ac8fcd9bd 100644 --- a/crates/openfang-types/src/message.rs +++ b/crates/openfang-types/src/message.rs @@ -2,6 +2,20 @@ use serde::{Deserialize, Serialize}; +/// Sender context forwarded from channel gateways (e.g. WhatsApp, Telegram). +/// +/// Carries the identity of the person who sent the message so the agent +/// can distinguish between its owner and other users. +#[derive(Debug, Clone, Default)] +pub struct SenderContext { + /// Channel name (e.g. "whatsapp", "telegram"). + pub channel: Option, + /// Platform-specific sender ID (e.g. phone number, Telegram user ID). + pub sender_id: Option, + /// Human-readable sender display name. + pub sender_name: Option, +} + /// A message in an LLM conversation. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Message { @@ -56,6 +70,12 @@ pub enum ContentBlock { /// Base64-encoded image data. data: String, }, + /// A URL-referenced image (for providers like DashScope that prefer URLs over base64). + #[serde(rename = "image_url")] + ImageUrl { + /// The URL of the image. + url: String, + }, /// A tool use request from the assistant. #[serde(rename = "tool_use")] ToolUse { @@ -144,6 +164,7 @@ impl MessageContent { ContentBlock::Thinking { thinking } => thinking.len(), ContentBlock::ToolUse { .. } | ContentBlock::Image { .. } + | ContentBlock::ImageUrl { .. } | ContentBlock::Unknown => 0, }) .sum(), diff --git a/entrypoint.sh b/entrypoint.sh new file mode 100644 index 000000000..621b91e31 --- /dev/null +++ b/entrypoint.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# Drop root privileges and run openfang as the openfang user +chown -R openfang:openfang /data 2>/dev/null +chown -R openfang:openfang /home/openfang 2>/dev/null +exec gosu openfang openfang "$@" diff --git a/packages/whatsapp-gateway/index.js b/packages/whatsapp-gateway/index.js index b6a00a747..7c9e973d7 100644 --- a/packages/whatsapp-gateway/index.js +++ b/packages/whatsapp-gateway/index.js @@ -20,6 +20,8 @@ let qrDataUrl = ''; // latest QR code as data:image/png;base64,... let connStatus = 'disconnected'; // disconnected | qr_ready | connected let qrExpired = false; let statusMessage = 'Not started'; +let reconnectAttempts = 0; // track consecutive reconnection attempts +const MAX_RECONNECT_DELAY = 60_000; // cap backoff at 60s // --------------------------------------------------------------------------- // Baileys connection @@ -84,6 +86,7 @@ async function startConnection() { statusMessage = 'Logged out. Generate a new QR code to reconnect.'; qrDataUrl = ''; sock = null; + reconnectAttempts = 0; // Remove auth store so next connect gets a fresh QR const fs = require('node:fs'); const path = require('node:path'); @@ -91,18 +94,17 @@ async function startConnection() { if (fs.existsSync(authPath)) { fs.rmSync(authPath, { recursive: true, force: true }); } - } else if (statusCode === DisconnectReason.restartRequired || - statusCode === DisconnectReason.timedOut) { - // Recoverable — reconnect automatically - console.log('[gateway] Reconnecting...'); - statusMessage = 'Reconnecting...'; - setTimeout(() => startConnection(), 2000); } else { - // QR expired or other non-recoverable close - qrExpired = true; + // All other close reasons are recoverable — reconnect with backoff. + // This includes: connectionClosed (428), connectionLost (408), + // connectionReplaced (440), restartRequired (515), timedOut (408), + // multideviceMismatch, and QR expiry. + reconnectAttempts++; + const delay = Math.min(2000 * Math.pow(1.5, reconnectAttempts - 1), MAX_RECONNECT_DELAY); + console.log(`[gateway] Reconnecting in ${Math.round(delay / 1000)}s (attempt ${reconnectAttempts})...`); connStatus = 'disconnected'; - statusMessage = 'QR code expired. Click "Generate New QR" to retry.'; - qrDataUrl = ''; + statusMessage = `Reconnecting (attempt ${reconnectAttempts})...`; + setTimeout(() => startConnection(), delay); } } @@ -110,6 +112,7 @@ async function startConnection() { connStatus = 'connected'; qrExpired = false; qrDataUrl = ''; + reconnectAttempts = 0; statusMessage = 'Connected to WhatsApp'; console.log('[gateway] Connected to WhatsApp!'); } @@ -332,11 +335,24 @@ const server = http.createServer(async (req, res) => { } }); -server.listen(PORT, '127.0.0.1', () => { +server.listen(PORT, '127.0.0.1', async () => { console.log(`[gateway] WhatsApp Web gateway listening on http://127.0.0.1:${PORT}`); console.log(`[gateway] OpenFang URL: ${OPENFANG_URL}`); console.log(`[gateway] Default agent: ${DEFAULT_AGENT}`); - console.log('[gateway] Waiting for POST /login/start to begin QR flow...'); + + // Auto-connect if auth credentials already exist (previous session) + const fs = require('node:fs'); + const authPath = require('node:path').join(__dirname, 'auth_store', 'creds.json'); + if (fs.existsSync(authPath)) { + console.log('[gateway] Found existing auth — auto-connecting...'); + try { + await startConnection(); + } catch (err) { + console.error('[gateway] Auto-connect failed:', err.message); + } + } else { + console.log('[gateway] No auth found — waiting for POST /login/start to begin QR flow...'); + } }); // Graceful shutdown