diff --git a/fix-setup-waitdelay.md b/fix-setup-waitdelay.md new file mode 100644 index 00000000..c9cb0b6a --- /dev/null +++ b/fix-setup-waitdelay.md @@ -0,0 +1,64 @@ +# Fix: Treat `exec.ErrWaitDelay` as success in setup commands + +## Context + +When Kilroy runs setup commands for the hangar pipeline, `bin/setup` starts a background Postgres daemon via `pg_ctl start`. The daemon inherits the stdout/stderr pipes from its parent process. After `bin/setup` exits successfully (exit code 0), Go's `cmd.Wait()` waits for the pipes to close — but Postgres keeps them open. After 3 seconds (`WaitDelay`), Go forcefully closes the pipes and returns `exec.ErrWaitDelay`. Kilroy treats this as a failure, aborting the pipeline. + +The setup command itself succeeded. A grandchild daemon holding a pipe open is not an error. + +## Change + +**File:** `internal/attractor/engine/setup_commands.go` + +Add `"errors"` to the import block. + +Replace the error handling block (lines 52-70) with: + +```go +err := cmd.Run() +if errors.Is(err, exec.ErrWaitDelay) { + e.appendProgress(map[string]any{ + "event": "setup_command_ok", + "index": i, + "command": cmdStr, + "stdout": strings.TrimSpace(stdout.String()), + "warning": "child process held I/O pipes open past WaitDelay; treated as success", + }) +} else if err != nil { + e.appendProgress(map[string]any{ + "event": "setup_command_failed", + "index": i, + "command": cmdStr, + "error": err.Error(), + "stdout": strings.TrimSpace(stdout.String()), + "stderr": strings.TrimSpace(stderr.String()), + }) + return fmt.Errorf("setup command [%d] %q failed: %w", i, cmdStr, err) +} else { + e.appendProgress(map[string]any{ + "event": "setup_command_ok", + "index": i, + "command": cmdStr, + "stdout": strings.TrimSpace(stdout.String()), + }) +} +``` + +## Test + +**File:** `internal/attractor/engine/setup_commands_test.go` + +Add a test that runs a setup command which spawns a background process holding pipes open, and verify it succeeds rather than returning an error. For example: + +```go +func TestSetupCommands_BackgroundDaemonDoesNotFail(t *testing.T) { + // command that exits 0 but leaves a child holding stdout open + // e.g.: "sh -c 'sleep 60 &'" +} +``` + +## Verification + +1. Run existing tests: `go test ./internal/attractor/engine/ -run TestSetupCommands` +2. Run the new test +3. Re-run `/kilroy:run hangar` from the orchestration repo to confirm the pipeline gets past setup diff --git a/internal/attractor/engine/setup_commands.go b/internal/attractor/engine/setup_commands.go index 1c51836c..96ecd665 100644 --- a/internal/attractor/engine/setup_commands.go +++ b/internal/attractor/engine/setup_commands.go @@ -3,6 +3,7 @@ package engine import ( "bytes" "context" + "errors" "fmt" "os/exec" "strings" @@ -50,7 +51,15 @@ func (e *Engine) executeSetupCommands(ctx context.Context) error { cmd.Stderr = &stderr err := cmd.Run() - if err != nil { + if errors.Is(err, exec.ErrWaitDelay) { + e.appendProgress(map[string]any{ + "event": "setup_command_ok", + "index": i, + "command": cmdStr, + "stdout": strings.TrimSpace(stdout.String()), + "warning": "child process held I/O pipes open past WaitDelay; treated as success", + }) + } else if err != nil { e.appendProgress(map[string]any{ "event": "setup_command_failed", "index": i, @@ -60,14 +69,14 @@ func (e *Engine) executeSetupCommands(ctx context.Context) error { "stderr": strings.TrimSpace(stderr.String()), }) return fmt.Errorf("setup command [%d] %q failed: %w", i, cmdStr, err) + } else { + e.appendProgress(map[string]any{ + "event": "setup_command_ok", + "index": i, + "command": cmdStr, + "stdout": strings.TrimSpace(stdout.String()), + }) } - - e.appendProgress(map[string]any{ - "event": "setup_command_ok", - "index": i, - "command": cmdStr, - "stdout": strings.TrimSpace(stdout.String()), - }) } return nil diff --git a/internal/attractor/engine/setup_commands_test.go b/internal/attractor/engine/setup_commands_test.go index ada524e0..f513b97b 100644 --- a/internal/attractor/engine/setup_commands_test.go +++ b/internal/attractor/engine/setup_commands_test.go @@ -168,6 +168,34 @@ func TestSetupCommands_FailsOnError_StopsEarly(t *testing.T) { } } +func TestSetupCommands_BackgroundDaemonDoesNotFail(t *testing.T) { + worktree := t.TempDir() + logsRoot := t.TempDir() + + // This command exits 0 immediately but leaves a background child holding + // stdout open, which triggers exec.ErrWaitDelay after WaitDelay expires. + // The engine should treat this as success, not failure. + e := &Engine{ + LogsRoot: logsRoot, + WorktreeDir: worktree, + Options: RunOptions{RunID: "test-bg-daemon"}, + RunConfig: &RunConfigFile{ + Setup: struct { + Commands []string `json:"commands,omitempty" yaml:"commands,omitempty"` + TimeoutMS int `json:"timeout_ms,omitempty" yaml:"timeout_ms,omitempty"` + }{ + Commands: []string{"sh -c 'sleep 60 & echo started'"}, + TimeoutMS: 30000, + }, + }, + } + + err := e.executeSetupCommands(context.Background()) + if err != nil { + t.Fatalf("expected success when background daemon holds pipes open, got: %v", err) + } +} + func TestSetupCommands_Timeout(t *testing.T) { worktree := t.TempDir() logsRoot := t.TempDir()