Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions cmd/urunc/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,16 @@ import (
"errors"
"fmt"
"os"
"time"

"github.com/sirupsen/logrus"
"github.com/urfave/cli/v3"
m "github.com/urunc-dev/urunc/internal/metrics"
"github.com/urunc-dev/urunc/pkg/unikontainers"
)

const execveErrorCheckTimeout = 100 * time.Millisecond

var startCommand = &cli.Command{
Name: "start",
Usage: "executes the user defined process in a created container",
Expand Down Expand Up @@ -98,10 +101,10 @@ func startUnikontainer(cmd *cli.Command) error {
}
metrics.Capture(m.TS13)

// wait ContainerStarted message on start.sock from reexec process
err = unikontainer.AwaitMsg(unikontainers.StartSuccess)
// Wait for success, then briefly check for error (catches Execve failures)
err = unikontainer.AwaitMsgWithErrorCheck(unikontainers.StartSuccess, unikontainers.StartErr, execveErrorCheckTimeout)
if err != nil {
err = fmt.Errorf("failed to get message from successful start from reexec: %w", err)
err = fmt.Errorf("failed to start container: %w", err)
return err
}

Expand Down
55 changes: 55 additions & 0 deletions pkg/unikontainers/ipc.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,3 +169,58 @@ func AwaitMessage(listener *net.UnixListener, expectedMessage IPCMessage) error
}
return nil
}

// AwaitMessageWithErrorCheck waits for a success message, then briefly waits for
// a potential error message to catch Execve failures after StartSuccess is sent.
func AwaitMessageWithErrorCheck(listener *net.UnixListener, successMsg, errorMsg IPCMessage, errorCheckTimeout time.Duration) error {
conn, err := listener.AcceptUnix()
if err != nil {
return err
}
defer func() {
closeErr := conn.Close()
if closeErr != nil {
logrus.WithError(closeErr).Error("failed to close connection")
}
}()

bufSize := len(successMsg)
if len(errorMsg) > bufSize {
bufSize = len(errorMsg)
}

buf := make([]byte, bufSize)
n, err := conn.Read(buf)
if err != nil {
return fmt.Errorf("failed to read from socket: %w", err)
}
msg := string(buf[0:n])

if msg == string(errorMsg) {
return fmt.Errorf("container execution failed: received error signal from reexec")
}

if msg != string(successMsg) {
return fmt.Errorf("received unexpected message: %s", msg)
}

// Wait briefly for potential error after success (catches Execve failures)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How exactly does this work? Which process will write in case of execve failures?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The reexec process is responsible for writing the error.

When syscall.Exec() fails and returns, the reexec process sends StartErr over the socket before exiting:

err = unikontainer.Exec(metrics)
if err != nil {
    unikontainer.SendMessage(unikontainers.StartErr)
}

The parent process (urunc start) does not detect the failure directly; it only observes the error via the socket message sent by the reexec process.

if errorCheckTimeout > 0 {
if err = conn.SetReadDeadline(time.Now().Add(errorCheckTimeout)); err != nil {
logrus.WithError(err).Warn("failed to set read deadline for error check")
return nil
}

errBuf := make([]byte, len(errorMsg))
n, readErr := conn.Read(errBuf)
if readErr == nil && n > 0 {
errMsg := string(errBuf[0:n])
if errMsg == string(errorMsg) {
return fmt.Errorf("VMM execution failed after signaling start")
}
logrus.WithField("message", errMsg).Warn("received unexpected message after success")
}
}

return nil
}
16 changes: 9 additions & 7 deletions pkg/unikontainers/unikontainers.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (
"strings"
"sync"
"syscall"
"time"

"github.com/urunc-dev/urunc/pkg/network"
"github.com/urunc-dev/urunc/pkg/unikontainers/hypervisors"
Expand Down Expand Up @@ -493,12 +494,8 @@ func (u *Unikontainer) Exec(metrics m.Writer) error {

uniklog.Debug("calling vmm execve")
metrics.Capture(m.TS18)
// metrics.Wait()
// TODO: We set the state to running and notify urunc Start that the monitor
// started, but we might encounter issues with the monitor execution. We need
// to revisit this and check if a failed monitor execution affects this approach.
// If it affects then we need to re-design the whole spawning of the monitor.
// Notify urunc start
// Notify urunc start. If Execve fails after this, urunc start will detect it
// via AwaitMsgWithErrorCheck which waits briefly for a potential StartErr.
err = u.SendMessage(StartSuccess)
if err != nil {
return err
Expand Down Expand Up @@ -1102,11 +1099,16 @@ func (u *Unikontainer) DestroyConn(isReexec bool) error {
return nil
}

// AwaitMessage waits for a specific message in the listener of unikontainer instance
// AwaitMsg waits for a specific message in the listener of unikontainer instance
func (u *Unikontainer) AwaitMsg(msg IPCMessage) error {
return AwaitMessage(u.Listener, msg)
}

// AwaitMsgWithErrorCheck waits for success, then checks for a follow-up error message
func (u *Unikontainer) AwaitMsgWithErrorCheck(successMsg, errorMsg IPCMessage, errorTimeout time.Duration) error {
return AwaitMessageWithErrorCheck(u.Listener, successMsg, errorMsg, errorTimeout)
}

// SendMessage sends message over the active connection
func (u *Unikontainer) SendMessage(message IPCMessage) error {
conn := u.Conn
Expand Down