Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 14 additions & 14 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/openfang-api/src/routes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6852,6 +6852,7 @@ pub async fn set_provider_key(
model: model_id,
api_key_env: env_var.clone(),
base_url: None,
vision_model: None,
};
let mut guard = state
.kernel
Expand Down
43 changes: 26 additions & 17 deletions crates/openfang-channels/src/bridge.rs
Original file line number Diff line number Diff line change
Expand Up @@ -528,25 +528,34 @@ async fn dispatch_message(
return;
}

// For images: download, base64 encode, and send as multimodal content blocks
// For images: build content blocks with the image URL for vision models.
// We pass the original URL rather than downloading + base64-encoding because
// many providers (DashScope/Qwen, OpenAI) prefer or require direct URLs.
if let ChannelContent::Image { ref url, ref caption } = message.content {
let blocks = download_image_to_blocks(url, caption.as_deref()).await;
if blocks.iter().any(|b| matches!(b, ContentBlock::Image { .. })) {
// We have actual image data — send as structured blocks for vision
dispatch_with_blocks(
blocks,
message,
handle,
router,
adapter,
ct_str,
thread_id,
output_format,
)
.await;
return;
let mut blocks = Vec::new();
if let Some(cap) = caption {
if !cap.is_empty() {
blocks.push(ContentBlock::Text {
text: cap.clone(),
provider_metadata: None,
});
}
}
// Image download failed — fall through to text description below
blocks.push(ContentBlock::ImageUrl {
url: url.clone(),
});
dispatch_with_blocks(
blocks,
message,
handle,
router,
adapter,
ct_str,
thread_id,
output_format,
)
.await;
return;
}

let text = match &message.content {
Expand Down
49 changes: 49 additions & 0 deletions crates/openfang-kernel/src/kernel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2262,6 +2262,55 @@ impl OpenFangKernel {
}
}

// Vision model selection for image content.
// Priority: 1) explicit vision_model from config (forced override)
// 2) current agent model if it supports vision (no swap needed)
// 3) error — no vision capability available
if let Some(ref blocks) = content_blocks {
let has_images = blocks.iter().any(|b| {
matches!(
b,
openfang_types::message::ContentBlock::Image { .. }
| openfang_types::message::ContentBlock::ImageUrl { .. }
)
});
if has_images {
if let Some(ref vision_model) = self.config.default_model.vision_model {
// Explicit vision_model configured — always use it
info!(
agent = %manifest.name,
current_model = %manifest.model.model,
vision_model = %vision_model,
"Swapping to configured vision model for image content"
);
manifest.model.model = vision_model.clone();
manifest.model.provider = self.config.default_model.provider.clone();
} else {
// No vision_model forced — check if current model handles vision
let current_supports_vision = self
.model_catalog
.read()
.ok()
.and_then(|cat| cat.find_model(&manifest.model.model).map(|m| m.supports_vision))
.unwrap_or(false);

if current_supports_vision {
info!(
agent = %manifest.name,
model = %manifest.model.model,
"Current model supports vision — no swap needed"
);
} else {
warn!(
agent = %manifest.name,
model = %manifest.model.model,
"Image received but no vision_model configured and current model lacks vision support"
);
}
}
}
}

let driver = self.resolve_driver(&manifest)?;

// Look up model's actual context window from the catalog
Expand Down
3 changes: 3 additions & 0 deletions crates/openfang-memory/src/session.rs
Original file line number Diff line number Diff line change
Expand Up @@ -584,6 +584,9 @@ impl SessionStore {
ContentBlock::Image { media_type, .. } => {
text_parts.push(format!("[image: {media_type}]"));
}
ContentBlock::ImageUrl { ref url } => {
text_parts.push(format!("[image: {url}]"));
}
ContentBlock::Thinking { thinking } => {
text_parts.push(format!(
"[thinking: {}]",
Expand Down
3 changes: 3 additions & 0 deletions crates/openfang-runtime/src/compactor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,9 @@ fn build_conversation_text(messages: &[Message], config: &CompactionConfig) -> S
ContentBlock::Image { media_type, .. } => {
conversation_text.push_str(&format!("[Image: {media_type}]\n\n"));
}
ContentBlock::ImageUrl { url } => {
conversation_text.push_str(&format!("[Image: {url}]\n\n"));
}
ContentBlock::Thinking { .. } => {}
ContentBlock::Unknown => {}
}
Expand Down
6 changes: 6 additions & 0 deletions crates/openfang-runtime/src/drivers/anthropic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -573,6 +573,12 @@ fn convert_message(msg: &Message) -> ApiMessage {
data: data.clone(),
},
}),
ContentBlock::ImageUrl { url } => {
// Anthropic requires base64; pass as text description for now.
Some(ApiContentBlock::Text {
text: format!("[Image: {url}]"),
})
}
ContentBlock::ToolUse { id, name, input, .. } => Some(ApiContentBlock::ToolUse {
id: id.clone(),
name: name.clone(),
Expand Down
Loading