diff --git a/CHANGELOG.md b/CHANGELOG.md index b2c723e..86f7a9f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,24 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [2.0.1] - 2026-02-05 + +### Fixed + +- **Multi-host fallback now works in main chat loop** - HostPool is properly wired to Assistant for automatic + fallback when the primary host becomes unavailable +- **Accurate host health status display** - `/hosts` now shows correct healthy/total count; hosts are only marked + healthy after connectivity verification via `DetectModels()` +- **Fallback notification** - Users now see clear feedback when fallback occurs: + "Primary host unavailable, switched to: " +- **Fallback failure reporting** - When no fallback is available, the error is now properly reported to the user + +### Added + +- `isHostRetryableError()` helper for detecting connection errors that should trigger fallback +- `switchToFallbackProvider()` method in Assistant for seamless host switching +- `SetHostPool()` method to wire HostPool to Assistant + ## [2.0.0] - 2026-02-04 ### Added @@ -26,8 +44,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed -- Version bump from 1.x to 2.x due to configuration schema changes -- Agent registry now supports initialization from host pool +- Version bumps from 1.x to 2.x due to configuration schema changes +- Agent registry now supports initialization from the host pool - TaskBridge supports both single-host and multi-host operation modes ### Fixed @@ -114,7 +132,9 @@ The project evolved through the following milestones before being open-sourced: - **v0.3.12** - File reference autocomplete, permissions system - **v0.3.8** - Native OpenAI function calling, security tools -[Unreleased]: https://github.com/tara-vision/taracode/compare/v2.0.0...HEAD +[Unreleased]: https://github.com/tara-vision/taracode/compare/v2.0.1...HEAD + +[2.0.1]: https://github.com/tara-vision/taracode/compare/v2.0.0...v2.0.1 [2.0.0]: https://github.com/tara-vision/taracode/compare/v1.0.3...v2.0.0 diff --git a/cmd/repl.go b/cmd/repl.go index c1415f8..c0f1b99 100644 --- a/cmd/repl.go +++ b/cmd/repl.go @@ -238,6 +238,9 @@ func startREPL() { ui.SuccessStyle.Render(ui.IconSuccess), hostPool.HealthyCount(), hostPool.HostCount()) + + // Wire HostPool to Assistant for automatic fallback (v2.0) + asst.SetHostPool(hostPool) } // Initialize TaskBridge for multi-agent orchestration diff --git a/internal/assistant/assistant.go b/internal/assistant/assistant.go index 70920ab..975f009 100644 --- a/internal/assistant/assistant.go +++ b/internal/assistant/assistant.go @@ -81,6 +81,22 @@ func isRetryable(err error) bool { strings.Contains(errMsg, "temporary failure") } +// isHostRetryableError checks if an error indicates a host connection failure +// that should trigger a fallback to another host (v2.0 multi-host support) +func isHostRetryableError(err error) bool { + if err == nil { + return false + } + errMsg := strings.ToLower(err.Error()) + return strings.Contains(errMsg, "connection refused") || + strings.Contains(errMsg, "host is down") || + strings.Contains(errMsg, "no such host") || + strings.Contains(errMsg, "i/o timeout") || + strings.Contains(errMsg, "dial tcp") || + strings.Contains(errMsg, "network is unreachable") || + strings.Contains(errMsg, "connection reset") +} + // withRetry executes fn with exponential backoff retry for transient errors func withRetry[T any](ctx gocontext.Context, operation string, fn func() (T, error)) (T, error) { var result T @@ -142,6 +158,9 @@ type Assistant struct { // Last AI response (for suggestion detection) lastResponse string + + // Multi-host fallback support (v2.0) + hostPool *provider.HostPool } // StreamFilter handles real-time filtering of think tags during streaming @@ -923,6 +942,30 @@ func (a *Assistant) GetProvider() provider.Provider { return a.provider } +// SetHostPool sets the host pool for multi-host fallback support (v2.0) +func (a *Assistant) SetHostPool(pool *provider.HostPool) { + a.hostPool = pool +} + +// switchToFallbackProvider attempts to switch to a healthy fallback host (v2.0) +// Returns the name of the host switched to, or an error if no fallback available +func (a *Assistant) switchToFallbackProvider() (string, error) { + if a.hostPool == nil { + return "", fmt.Errorf("no host pool configured") + } + + prov, hostName, err := a.hostPool.GetDefaultWithFallback() + if err != nil { + return "", err + } + + // Update provider and client + a.provider = prov + a.client = prov.CreateClient() + + return hostName, nil +} + // AddMCPToolDefinition adds an MCP tool definition for LLM function calling func (a *Assistant) AddMCPToolDefinition(tool openai.Tool) { a.toolDefs = append(a.toolDefs, tool) @@ -2220,6 +2263,17 @@ func (a *Assistant) processMessageStreamingWithImages(userMessage string, images stream, err = a.client.CreateChatCompletionStream(ctx, req) } + // If still failing with connection error, try host fallback (v2.0) + if err != nil && isHostRetryableError(err) && a.hostPool != nil { + if hostName, switchErr := a.switchToFallbackProvider(); switchErr == nil { + fmt.Printf("\n%s Primary host unavailable, switched to: %s\n", ui.IconWarning, hostName) + stream, err = a.client.CreateChatCompletionStream(ctx, req) + } else { + // Fallback also failed - log for visibility + fmt.Printf("\n%s Primary host unavailable, no fallback available: %v\n", ui.IconWarning, switchErr) + } + } + if err != nil { if thinkingSpinner != nil { thinkingSpinner.Stop() diff --git a/internal/provider/hostpool.go b/internal/provider/hostpool.go index f2613da..c650c45 100644 --- a/internal/provider/hostpool.go +++ b/internal/provider/hostpool.go @@ -69,14 +69,17 @@ func (p *HostPool) Connect(ctx context.Context, name string) error { } conn.Provider = prov - conn.MarkHealthy(latency) - // Detect available models + // Detect available models to verify connectivity models, err := prov.DetectModels(ctx) - if err == nil { - conn.Models = models + if err != nil { + conn.MarkUnavailable(err) + return fmt.Errorf("failed to verify host %q connectivity: %w", name, err) } + conn.Models = models + conn.MarkHealthy(latency) + return nil }