From 629f6d329abc1a9727e25b2cc2a852396077144a Mon Sep 17 00:00:00 2001 From: lifubang Date: Wed, 30 Oct 2024 00:15:00 +0800 Subject: [PATCH 1/2] tests: join more than one container namespaces Signed-off-by: lifubang --- tests/integration/run.bats | 62 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/tests/integration/run.bats b/tests/integration/run.bats index c6e30709402..3822e88b628 100644 --- a/tests/integration/run.bats +++ b/tests/integration/run.bats @@ -208,6 +208,68 @@ function teardown() { grep -E '^boottime\s+1337\s+3141519$' <<<"$output" } +# issue: https://github.com/opencontainers/runc/issues/4390 +@test "runc run [joining more than one container namespaces]" { + requires timens root + + update_config '.process.args = ["sleep", "infinity"]' + + if [ $EUID -eq 0 ]; then + update_config '.linux.namespaces += [{"type": "user"}] + | .linux.uidMappings = [{"containerID": 0, "hostID": 100000, "size": 100}] + | .linux.gidMappings = [{"containerID": 0, "hostID": 200000, "size": 200}]' + remap_rootfs + fi + runc run -d --console-socket "$CONSOLE_SOCKET" target_ctr1 + [ "$status" -eq 0 ] + pid1="$(__runc state target_ctr1 | jq .pid)" + + update_config '.linux.namespaces += [{"type": "time"}] + | .linux.timeOffsets = { + "monotonic": { "secs": 7881, "nanosecs": 2718281 }, + "boottime": { "secs": 1337, "nanosecs": 3141519 } + }' + runc run -d --console-socket "$CONSOLE_SOCKET" target_ctr2 + [ "$status" -eq 0 ] + pid2="$(__runc state target_ctr2 | jq .pid)" + + update_config '.linux.namespaces |= map_values(.path = if .type == "user" then "/proc/'"$pid1"'/ns/user" + elif .type == "pid" then "/proc/'"$pid1"'/ns/pid" + elif .type == "time" then "/proc/'"$pid2"'/ns/time" + else "" end)' + # Remove the userns and timens configuration (they cannot be changed). + update_config '.linux |= (del(.uidMappings) | del(.gidMappings) | del(.timeOffsets))' + runc run -d --console-socket "$CONSOLE_SOCKET" attached_ctr + [ "$status" -eq 0 ] + + # Make sure there are two sleep processes in our container. + runc exec attached_ctr ps aux + [ "$status" -eq 0 ] + run -0 grep "sleep infinity" <<<"$output" + [ "${#lines[@]}" -eq 2 ] + + # ... that the userns mappings are the same... + runc exec attached_ctr cat /proc/self/uid_map + [ "$status" -eq 0 ] + if [ $EUID -eq 0 ]; then + grep -E '^\s+0\s+100000\s+100$' <<<"$output" + else + grep -E '^\s+0\s+'$EUID'\s+1$' <<<"$output" + fi + runc exec attached_ctr cat /proc/self/gid_map + [ "$status" -eq 0 ] + if [ $EUID -eq 0 ]; then + grep -E '^\s+0\s+200000\s+200$' <<<"$output" + else + grep -E '^\s+0\s+'$EUID'\s+1$' <<<"$output" + fi + + # ... as well as the timens offsets. + runc exec attached_ctr cat /proc/self/timens_offsets + grep -E '^monotonic\s+7881\s+2718281$' <<<"$output" + grep -E '^boottime\s+1337\s+3141519$' <<<"$output" +} + @test "runc run [execve error]" { cat <rootfs/run.sh #!/mmnnttbb foo bar From 8e79d03170a7be6827a07b083ef25d319acdc0f8 Mon Sep 17 00:00:00 2001 From: lifubang Date: Wed, 30 Oct 2024 23:33:45 +0800 Subject: [PATCH 2/2] libct: fix user ns join order for rootful container If we are rootless and there are userns-owned namespaces, we need to be in the userns in order to have the necessary permissions to do setns. This is what 2cd9c31 fixed. But for rootful container, if we join the userns in first step, we may can't join some namespaces which have no permissions for the userns we have joined. Signed-off-by: lifubang --- libcontainer/container_linux.go | 10 +++++++--- libcontainer/nsenter/nsexec.c | 7 ++++--- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index c02116177ad..e66ca53c43c 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -959,9 +959,13 @@ func (c *Container) currentOCIState() (*specs.State, error) { // orderNamespacePaths sorts namespace paths into a list of paths that we // can setns in order. -func (c *Container) orderNamespacePaths(namespaces map[configs.NamespaceType]string) ([]string, error) { +func (c *Container) orderNamespacePaths(namespaces map[configs.NamespaceType]string, rootless bool) ([]string, error) { paths := []string{} - for _, ns := range configs.NamespaceTypes() { + nsTypes := configs.NamespaceTypes() + if !rootless { + nsTypes = append(nsTypes[1:], nsTypes[0]) + } + for _, ns := range nsTypes { // Remove namespaces that we don't need to join. if !c.config.Namespaces.Contains(ns) { @@ -1037,7 +1041,7 @@ func (c *Container) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Namespa // write custom namespace paths if len(nsMaps) > 0 { - nsPaths, err := c.orderNamespacePaths(nsMaps) + nsPaths, err := c.orderNamespacePaths(nsMaps, c.config.RootlessEUID) if err != nil { return nil, err } diff --git a/libcontainer/nsenter/nsexec.c b/libcontainer/nsenter/nsexec.c index 565b2ca2030..fdb083af51a 100644 --- a/libcontainer/nsenter/nsexec.c +++ b/libcontainer/nsenter/nsexec.c @@ -492,9 +492,10 @@ void join_namespaces(char *nslist) /* * The ordering in which we join namespaces is important. We should - * always join the user namespace *first*. This is all guaranteed - * from the container_linux.go side of this, so we're just going to - * follow the order given to us. + * always join the user namespace *first* for rootless container, but + * join the user namespace *last* for rootful container. This is all + * guaranteed from the container_linux.go side of this, so we're just + * going to follow the order given to us. */ for (i = 0; i < num; i++) {