From 97e0f6b4872d444aed306f6d63a9915b626e0089 Mon Sep 17 00:00:00 2001 From: Dejan Stefanoski Date: Thu, 5 Feb 2026 12:57:35 +0100 Subject: [PATCH] fix: multi-host fallback integration and accurate health status Fixes: - Multi-host fallback now works in main chat loop - Accurate host health status display in /hosts command - Clear user feedback when fallback occurs or fails Changes: - internal/assistant/assistant.go: Add hostPool field, SetHostPool(), switchToFallbackProvider(), isHostRetryableError() - internal/provider/hostpool.go: Verify connectivity before marking healthy - cmd/repl.go: Wire HostPool to Assistant after initialization The HostPool was not connected to the Assistant, so automatic fallback never triggered during chat. Now when primary host fails, the system automatically switches to a healthy fallback host and notifies the user. Additionally, hosts were incorrectly marked healthy before actual connectivity verification. Now DetectModels() must succeed before a host is considered healthy. --- CHANGELOG.md | 26 ++++++++++++++-- cmd/repl.go | 3 ++ internal/assistant/assistant.go | 54 +++++++++++++++++++++++++++++++++ internal/provider/hostpool.go | 11 ++++--- 4 files changed, 87 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b2c723e..86f7a9f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,24 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [2.0.1] - 2026-02-05 + +### Fixed + +- **Multi-host fallback now works in main chat loop** - HostPool is properly wired to Assistant for automatic + fallback when the primary host becomes unavailable +- **Accurate host health status display** - `/hosts` now shows correct healthy/total count; hosts are only marked + healthy after connectivity verification via `DetectModels()` +- **Fallback notification** - Users now see clear feedback when fallback occurs: + "Primary host unavailable, switched to: " +- **Fallback failure reporting** - When no fallback is available, the error is now properly reported to the user + +### Added + +- `isHostRetryableError()` helper for detecting connection errors that should trigger fallback +- `switchToFallbackProvider()` method in Assistant for seamless host switching +- `SetHostPool()` method to wire HostPool to Assistant + ## [2.0.0] - 2026-02-04 ### Added @@ -26,8 +44,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed -- Version bump from 1.x to 2.x due to configuration schema changes -- Agent registry now supports initialization from host pool +- Version bumps from 1.x to 2.x due to configuration schema changes +- Agent registry now supports initialization from the host pool - TaskBridge supports both single-host and multi-host operation modes ### Fixed @@ -114,7 +132,9 @@ The project evolved through the following milestones before being open-sourced: - **v0.3.12** - File reference autocomplete, permissions system - **v0.3.8** - Native OpenAI function calling, security tools -[Unreleased]: https://github.com/tara-vision/taracode/compare/v2.0.0...HEAD +[Unreleased]: https://github.com/tara-vision/taracode/compare/v2.0.1...HEAD + +[2.0.1]: https://github.com/tara-vision/taracode/compare/v2.0.0...v2.0.1 [2.0.0]: https://github.com/tara-vision/taracode/compare/v1.0.3...v2.0.0 diff --git a/cmd/repl.go b/cmd/repl.go index c1415f8..c0f1b99 100644 --- a/cmd/repl.go +++ b/cmd/repl.go @@ -238,6 +238,9 @@ func startREPL() { ui.SuccessStyle.Render(ui.IconSuccess), hostPool.HealthyCount(), hostPool.HostCount()) + + // Wire HostPool to Assistant for automatic fallback (v2.0) + asst.SetHostPool(hostPool) } // Initialize TaskBridge for multi-agent orchestration diff --git a/internal/assistant/assistant.go b/internal/assistant/assistant.go index 70920ab..975f009 100644 --- a/internal/assistant/assistant.go +++ b/internal/assistant/assistant.go @@ -81,6 +81,22 @@ func isRetryable(err error) bool { strings.Contains(errMsg, "temporary failure") } +// isHostRetryableError checks if an error indicates a host connection failure +// that should trigger a fallback to another host (v2.0 multi-host support) +func isHostRetryableError(err error) bool { + if err == nil { + return false + } + errMsg := strings.ToLower(err.Error()) + return strings.Contains(errMsg, "connection refused") || + strings.Contains(errMsg, "host is down") || + strings.Contains(errMsg, "no such host") || + strings.Contains(errMsg, "i/o timeout") || + strings.Contains(errMsg, "dial tcp") || + strings.Contains(errMsg, "network is unreachable") || + strings.Contains(errMsg, "connection reset") +} + // withRetry executes fn with exponential backoff retry for transient errors func withRetry[T any](ctx gocontext.Context, operation string, fn func() (T, error)) (T, error) { var result T @@ -142,6 +158,9 @@ type Assistant struct { // Last AI response (for suggestion detection) lastResponse string + + // Multi-host fallback support (v2.0) + hostPool *provider.HostPool } // StreamFilter handles real-time filtering of think tags during streaming @@ -923,6 +942,30 @@ func (a *Assistant) GetProvider() provider.Provider { return a.provider } +// SetHostPool sets the host pool for multi-host fallback support (v2.0) +func (a *Assistant) SetHostPool(pool *provider.HostPool) { + a.hostPool = pool +} + +// switchToFallbackProvider attempts to switch to a healthy fallback host (v2.0) +// Returns the name of the host switched to, or an error if no fallback available +func (a *Assistant) switchToFallbackProvider() (string, error) { + if a.hostPool == nil { + return "", fmt.Errorf("no host pool configured") + } + + prov, hostName, err := a.hostPool.GetDefaultWithFallback() + if err != nil { + return "", err + } + + // Update provider and client + a.provider = prov + a.client = prov.CreateClient() + + return hostName, nil +} + // AddMCPToolDefinition adds an MCP tool definition for LLM function calling func (a *Assistant) AddMCPToolDefinition(tool openai.Tool) { a.toolDefs = append(a.toolDefs, tool) @@ -2220,6 +2263,17 @@ func (a *Assistant) processMessageStreamingWithImages(userMessage string, images stream, err = a.client.CreateChatCompletionStream(ctx, req) } + // If still failing with connection error, try host fallback (v2.0) + if err != nil && isHostRetryableError(err) && a.hostPool != nil { + if hostName, switchErr := a.switchToFallbackProvider(); switchErr == nil { + fmt.Printf("\n%s Primary host unavailable, switched to: %s\n", ui.IconWarning, hostName) + stream, err = a.client.CreateChatCompletionStream(ctx, req) + } else { + // Fallback also failed - log for visibility + fmt.Printf("\n%s Primary host unavailable, no fallback available: %v\n", ui.IconWarning, switchErr) + } + } + if err != nil { if thinkingSpinner != nil { thinkingSpinner.Stop() diff --git a/internal/provider/hostpool.go b/internal/provider/hostpool.go index f2613da..c650c45 100644 --- a/internal/provider/hostpool.go +++ b/internal/provider/hostpool.go @@ -69,14 +69,17 @@ func (p *HostPool) Connect(ctx context.Context, name string) error { } conn.Provider = prov - conn.MarkHealthy(latency) - // Detect available models + // Detect available models to verify connectivity models, err := prov.DetectModels(ctx) - if err == nil { - conn.Models = models + if err != nil { + conn.MarkUnavailable(err) + return fmt.Errorf("failed to verify host %q connectivity: %w", name, err) } + conn.Models = models + conn.MarkHealthy(latency) + return nil }