diff --git a/cmd/rootlesskit/main.go b/cmd/rootlesskit/main.go index c1e060f7..be8d3418 100644 --- a/cmd/rootlesskit/main.go +++ b/cmd/rootlesskit/main.go @@ -170,6 +170,11 @@ See https://rootlesscontaine.rs/getting-started/common/ . Aliases: []string{"p"}, Usage: "publish ports. e.g. \"127.0.0.1:8080:80/tcp\"", }, CategoryPort), + Categorize(&cli.BoolFlag{ + Name: "userns", + Usage: "create a User namespace (Usually this should be always set to true)", + Value: true, + }, CategoryProcess), Categorize(&cli.BoolFlag{ Name: "pidns", Usage: "create a PID namespace", @@ -325,6 +330,7 @@ func createParentOpt(clicontext *cli.Context) (parent.Opt, error) { PipeFDEnvKey: pipeFDEnvKey, StateDirEnvKey: stateDirEnvKey, ChildUseActivationEnvKey: childUseActivationEnvKey, + NoCreateUserNS: !clicontext.Bool("userns"), CreatePIDNS: clicontext.Bool("pidns"), CreateCgroupNS: clicontext.Bool("cgroupns"), CreateUTSNS: clicontext.Bool("utsns"), @@ -603,6 +609,7 @@ func createChildOpt(clicontext *cli.Context) (child.Opt, error) { DetachNetNS: detachNetNS, Propagation: clicontext.String("propagation"), EvacuateCgroup2: clicontext.String("evacuate-cgroup2") != "", + NoCreateUserNS: !clicontext.Bool("userns"), } switch reaperStr := clicontext.String("reaper"); reaperStr { case "auto": diff --git a/pkg/child/child.go b/pkg/child/child.go index 41f880a4..d094b101 100644 --- a/pkg/child/child.go +++ b/pkg/child/child.go @@ -284,6 +284,7 @@ type Opt struct { Propagation string // mount propagation type Reaper bool EvacuateCgroup2 bool // needs to correspond to parent.Opt.EvacuateCgroup2 is set + NoCreateUserNS bool } // statPIDNS is from https://github.com/containerd/containerd/blob/v1.7.2/services/introspection/pidns_linux.go#L25-L36 @@ -378,14 +379,11 @@ func Child(opt Opt) error { msg *messages.Message err error ) - if ok, err := hasCaps(); err != nil { - return err - } else if !ok { + if opt.NoCreateUserNS { msg, err = messages.WaitFor(pipeR, messages.Name(messages.ParentHello{})) if err != nil { return err } - msgChildHello := &messages.Message{ U: messages.U{ ChildHello: &messages.ChildHello{}, @@ -394,14 +392,32 @@ func Child(opt Opt) error { if err := messages.Send(pipe2W, msgChildHello); err != nil { return err } - - msg, err = messages.WaitFor(pipeR, messages.Name(messages.ParentInitIdmapCompleted{})) - if err != nil { + } else { + if ok, err := hasCaps(); err != nil { return err - } + } else if !ok { + msg, err = messages.WaitFor(pipeR, messages.Name(messages.ParentHello{})) + if err != nil { + return err + } - if err := gainCaps(); err != nil { - return fmt.Errorf("failed to gain the caps inside the user namespace: %w", err) + msgChildHello := &messages.Message{ + U: messages.U{ + ChildHello: &messages.ChildHello{}, + }, + } + if err := messages.Send(pipe2W, msgChildHello); err != nil { + return err + } + + msg, err = messages.WaitFor(pipeR, messages.Name(messages.ParentInitIdmapCompleted{})) + if err != nil { + return err + } + + if err := gainCaps(); err != nil { + return fmt.Errorf("failed to gain the caps inside the user namespace: %w", err) + } } } @@ -583,8 +599,12 @@ func NewNetNsWithPathWithoutEnter(p string) error { if err := os.WriteFile(p, nil, 0400); err != nil { return err } - // this is hard (not impossible though) to reimplement in Go: https://github.com/cloudflare/slirpnetstack/commit/d7766a8a77f0093d3cb7a94bd0ccbe3f67d411ba - cmd := exec.Command("unshare", "-n", "mount", "--bind", "/proc/self/ns/net", p) + // this is hard (not impossible though) to reimplement in Go without re-execing: https://github.com/cloudflare/slirpnetstack/commit/d7766a8a77f0093d3cb7a94bd0ccbe3f67d411ba + selfExe, err := os.Executable() + if err != nil { + return err + } + cmd := exec.Command(selfExe, "--userns=false", "--net=none", "--", "mount", "--bind", "/proc/self/ns/net", p) out, err := cmd.CombinedOutput() if err != nil { return fmt.Errorf("failed to execute %v: %w (out=%q)", cmd.Args, err, string(out)) diff --git a/pkg/network/lxcusernic/lxcusernic.go b/pkg/network/lxcusernic/lxcusernic.go index bf9746af..2b4033c0 100644 --- a/pkg/network/lxcusernic/lxcusernic.go +++ b/pkg/network/lxcusernic/lxcusernic.go @@ -20,6 +20,7 @@ import ( "github.com/rootless-containers/rootlesskit/v2/pkg/common" "github.com/rootless-containers/rootlesskit/v2/pkg/messages" "github.com/rootless-containers/rootlesskit/v2/pkg/network" + "github.com/rootless-containers/rootlesskit/v2/pkg/network/parentutils" "github.com/sirupsen/logrus" ) @@ -70,6 +71,14 @@ func (d *parentDriver) MTU() int { } func (d *parentDriver) ConfigureNetwork(childPID int, stateDir, detachedNetNSPath string) (*messages.ParentInitNetworkDriverCompleted, func() error, error) { + sameUserNSAsCurrent, err := parentutils.SameUserNSAsCurrent(childPID) + if err != nil { + return nil, nil, err + } + if sameUserNSAsCurrent { + return nil, nil, fmt.Errorf("driver %q needs userns", DriverName) + } + if detachedNetNSPath != "" { cmd := exec.Command("nsenter", "-t", strconv.Itoa(childPID), "-n"+detachedNetNSPath, "--no-fork", "-m", "-U", "--preserve-credentials", "sleep", "infinity") cmd.SysProcAttr = &syscall.SysProcAttr{ diff --git a/pkg/network/none/none.go b/pkg/network/none/none.go index 844fa146..dcc6300b 100644 --- a/pkg/network/none/none.go +++ b/pkg/network/none/none.go @@ -3,14 +3,12 @@ package none import ( "context" "os" - "os/exec" - "strconv" - "syscall" "github.com/rootless-containers/rootlesskit/v2/pkg/api" "github.com/rootless-containers/rootlesskit/v2/pkg/common" "github.com/rootless-containers/rootlesskit/v2/pkg/messages" "github.com/rootless-containers/rootlesskit/v2/pkg/network" + "github.com/rootless-containers/rootlesskit/v2/pkg/network/parentutils" ) func NewParentDriver() (network.ParentDriver, error) { @@ -35,21 +33,15 @@ func (d *parentDriver) Info(ctx context.Context) (*api.NetworkDriverInfo, error) func (d *parentDriver) ConfigureNetwork(childPID int, stateDir, detachedNetNSPath string) (*messages.ParentInitNetworkDriverCompleted, func() error, error) { var cleanups []func() error - if detachedNetNSPath != "" { - cmd := exec.Command("nsenter", "-t", strconv.Itoa(childPID), "-n"+detachedNetNSPath, "-m", "-U", "--no-fork", "--preserve-credentials", "sleep", "infinity") - cmd.SysProcAttr = &syscall.SysProcAttr{ - Pdeathsig: syscall.SIGKILL, - } - err := cmd.Start() - if err != nil { - return nil, nil, err - } - childPID = cmd.Process.Pid + sameUserNSAsCurrent, err := parentutils.SameUserNSAsCurrent(childPID) + if err != nil { + return nil, nil, err } + userns := !sameUserNSAsCurrent cmds := [][]string{ - []string{"nsenter", "-t", strconv.Itoa(childPID), "-n", "-m", "-U", "--no-fork", "--preserve-credentials", "ip", "address", "add", "127.0.0.1/8", "dev", "lo"}, - []string{"nsenter", "-t", strconv.Itoa(childPID), "-n", "-m", "-U", "--no-fork", "--preserve-credentials", "ip", "link", "set", "lo", "up"}, + parentutils.NSEnter(childPID, detachedNetNSPath, userns, []string{"ip", "address", "add", "127.0.0.1/8", "dev", "lo"}), + parentutils.NSEnter(childPID, detachedNetNSPath, userns, []string{"ip", "link", "set", "lo", "up"}), } if err := common.Execs(os.Stderr, os.Environ(), cmds); err != nil { return nil, nil, err diff --git a/pkg/network/parentutils/parentutils.go b/pkg/network/parentutils/parentutils.go index 34ab6ecb..178dae2d 100644 --- a/pkg/network/parentutils/parentutils.go +++ b/pkg/network/parentutils/parentutils.go @@ -3,15 +3,21 @@ package parentutils import ( "fmt" "os" + "path/filepath" "strconv" "github.com/rootless-containers/rootlesskit/v2/pkg/common" ) func PrepareTap(childPID int, childNetNsPath string, tap string) error { + sameUserNSAsCurrent, err := SameUserNSAsCurrent(childPID) + if err != nil { + return err + } + userns := !sameUserNSAsCurrent cmds := [][]string{ - nsenter(childPID, childNetNsPath, []string{"ip", "tuntap", "add", "name", tap, "mode", "tap"}), - nsenter(childPID, childNetNsPath, []string{"ip", "link", "set", tap, "up"}), + NSEnter(childPID, childNetNsPath, userns, []string{"ip", "tuntap", "add", "name", tap, "mode", "tap"}), + NSEnter(childPID, childNetNsPath, userns, []string{"ip", "link", "set", tap, "up"}), } if err := common.Execs(os.Stderr, os.Environ(), cmds); err != nil { return fmt.Errorf("executing %v: %w", cmds, err) @@ -19,14 +25,34 @@ func PrepareTap(childPID int, childNetNsPath string, tap string) error { return nil } -func nsenter(childPID int, childNetNsPath string, cmd []string) []string { +func NSEnter(childPID int, childNetNsPath string, userns bool, cmd []string) []string { fullCmd := []string{"nsenter", "-t", strconv.Itoa(childPID)} if childNetNsPath != "" { fullCmd = append(fullCmd, "-n"+childNetNsPath) } else { fullCmd = append(fullCmd, "-n") } - fullCmd = append(fullCmd, []string{"-m", "-U", "--preserve-credentials"}...) + fullCmd = append(fullCmd, "-m") + if userns { + fullCmd = append(fullCmd, []string{"-U", "--preserve-credentials"}...) + } fullCmd = append(fullCmd, cmd...) return fullCmd } + +func SameNS(pid [2]int, nsName string) (bool, error) { + var links [2]string + for i := 0; i < 2; i++ { + p := filepath.Join("/proc", strconv.Itoa(pid[i]), "ns", filepath.Clean(nsName)) + var err error + links[i], err = os.Readlink(p) + if err != nil { + return false, err + } + } + return links[0] == links[1], nil +} + +func SameUserNSAsCurrent(pid int) (bool, error) { + return SameNS([2]int{os.Getpid(), pid}, "user") +} diff --git a/pkg/parent/parent.go b/pkg/parent/parent.go index 733d603e..669de2bc 100644 --- a/pkg/parent/parent.go +++ b/pkg/parent/parent.go @@ -36,6 +36,7 @@ type Opt struct { NetworkDriver network.ParentDriver // nil for HostNetwork PortDriver port.ParentDriver // nil for --port-driver=none PublishPorts []port.Spec + NoCreateUserNS bool CreatePIDNS bool CreateCgroupNS bool CreateUTSNS bool @@ -78,7 +79,7 @@ func checkPreflight(opt Opt) error { return fmt.Errorf("state dir is inaccessible: %w", err) } - if os.Geteuid() == 0 { + if os.Geteuid() == 0 && !opt.NoCreateUserNS { logrus.Warn("Running RootlessKit as the root user is unsupported.") } @@ -176,7 +177,11 @@ func Parent(opt Opt) error { cmd := exec.Command("/proc/self/exe", os.Args[1:]...) cmd.SysProcAttr = &syscall.SysProcAttr{ Pdeathsig: syscall.SIGKILL, - Cloneflags: syscall.CLONE_NEWUSER | syscall.CLONE_NEWNS, + Cloneflags: syscall.CLONE_NEWNS, + } + + if !opt.NoCreateUserNS { + cmd.SysProcAttr.Cloneflags |= syscall.CLONE_NEWUSER } if opt.NetworkDriver != nil { @@ -228,19 +233,21 @@ func Parent(opt Opt) error { return err } - if err := setupUIDGIDMap(cmd.Process.Pid, opt.SubidSource); err != nil { - return fmt.Errorf("failed to setup UID/GID map: %w", err) - } - msgParentInitIdmapCompleted := &messages.Message{ - U: messages.U{ - ParentInitIdmapCompleted: &messages.ParentInitIdmapCompleted{}, - }, - } - if err := messages.Send(pipeW, msgParentInitIdmapCompleted); err != nil { - return err - } - if _, err := messages.WaitFor(pipe2R, messages.Name(messages.ChildInitUserNSCompleted{})); err != nil { - return err + if !opt.NoCreateUserNS { + if err := setupUIDGIDMap(cmd.Process.Pid, opt.SubidSource); err != nil { + return fmt.Errorf("failed to setup UID/GID map: %w", err) + } + msgParentInitIdmapCompleted := &messages.Message{ + U: messages.U{ + ParentInitIdmapCompleted: &messages.ParentInitIdmapCompleted{}, + }, + } + if err := messages.Send(pipeW, msgParentInitIdmapCompleted); err != nil { + return err + } + if _, err := messages.WaitFor(pipe2R, messages.Name(messages.ChildInitUserNSCompleted{})); err != nil { + return err + } } sigc := sigproxy.ForwardAllSignals(context.TODO(), cmd.Process.Pid)