From fc158be70eeaeddd8837f73010e9f8556735d421 Mon Sep 17 00:00:00 2001 From: Rodny Molina Date: Mon, 3 Nov 2025 02:59:11 +0000 Subject: [PATCH 1/4] test: Fix CoreDNS loop detection and Flannel compatibility for K8s 1.32 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Changes: - Pin Flannel to v0.25.1 for compatibility with K8s 1.32 and containerd - Add coredns_fix_loop() function to prevent DNS forwarding loops - Configure CoreDNS to use external DNS (8.8.8.8) instead of /etc/resolv.conf - Automatically apply CoreDNS fix (its configmap) during cluster initialization The above fixes the CoreDNS loop issue that occurs in Kubernetes-in-Docker setups. Here's the explanation: 1. CoreDNS Configuration: By default, CoreDNS (the DNS server for Kubernetes) is configured to forward DNS queries it can't resolve to the nameservers listed in /etc/resolv.conf 2. Container Nesting: In our setup, we have: - Host linux machine - K8s node container (running inside a priv container with Docker+Sysbox) - Pods inside the K8s node (running inside the K8s node container) 3. The Loop: Inside the K8s node container, /etc/resolv.conf points back to 127.0.0.1 (localhost) or to the container's own IP address. This creates a circular reference: - Pod needs to resolve DNS → asks CoreDNS - CoreDNS can't resolve → forwards to /etc/resolv.conf - /etc/resolv.conf points to 127.0.0.1 → goes back to CoreDNS - Infinite loop detected! 4. CoreDNS Loop Detection: CoreDNS has a "loop" plugin that detects this circular forwarding and crashes the pod with a FATAL error to prevent infinite loops: ```[FATAL] plugin/loop: Loop (127.0.0.1:41329 -> :53) detected``` The Fix is to replace the CoreDNS forwarding target from /etc/resolv.conf to external DNS servers (8.8.8.8, 8.8.4.4): Signed-off-by: Rodny Molina --- tests/kind/kind-custom-net.bats | 2 +- tests/scr/kindbox | 32 ++++++++++++++++++++++++++++++++ tests/scr/testSysbox | 1 - 3 files changed, 33 insertions(+), 2 deletions(-) diff --git a/tests/kind/kind-custom-net.bats b/tests/kind/kind-custom-net.bats index 3b05eea2..a4259103 100644 --- a/tests/kind/kind-custom-net.bats +++ b/tests/kind/kind-custom-net.bats @@ -28,7 +28,7 @@ export num_workers=2 export KUBECONFIG=${HOME}/.kube/${cluster}-config # Cluster's node image. -export k8s_version="v1.21.12" +export k8s_version="v1.32.9" export node_image="${CTR_IMG_REPO}/k8s-node-test:${k8s_version}" function teardown() { diff --git a/tests/scr/kindbox b/tests/scr/kindbox index 438d6f66..ef5389eb 100755 --- a/tests/scr/kindbox +++ b/tests/scr/kindbox @@ -173,6 +173,29 @@ function flannel_unconfig() { fi } +function coredns_fix_loop() { + local node=$1 + local output + + # Fix CoreDNS loop detection issue by forwarding to external DNS (8.8.8.8) instead + # of /etc/resolv.conf. This is specific to running Kubernetes inside containers (like + # our kindbox setup) because the container's /etc/resolv.conf doesn't point to real + # external nameservers; instead, it points to Docker's internal DNS or localhost, which + # eventually loops back to CoreDNS itself. + output=$(sh -c "docker exec ${node} sh -c 'kubectl get configmap coredns -n kube-system -o yaml | sed \"s|forward . /etc/resolv.conf|forward . 8.8.8.8 8.8.4.4|g\" | kubectl apply -f -'" 2>&1) + if [[ $? -ne 0 ]]; then + echo "$output" + return 1 + fi + + # Restart CoreDNS pods to apply the new configuration. + output=$(sh -c "docker exec ${node} sh -c 'kubectl delete pods -n kube-system -l k8s-app=kube-dns'" 2>&1) + if [[ $? -ne 0 ]]; then + echo "$output" + return 1 + fi +} + function weave_config() { local node=$1 local output @@ -339,6 +362,15 @@ function k8s_master_init() { return 1 fi + [[ $VERBOSE ]] && printf " - Fixing CoreDNS loop detection on $node ...\n" + + # Required to fix DNS forwarding loop + output=$(coredns_fix_loop ${node}) + if [[ $? -ne 0 ]]; then + ERR="coredns fix failed on ${node}: ${output}" + return 1 + fi + [[ $VERBOSE ]] && printf " - Waiting for $node to be ready ...\n" output=$(wait_for_node_ready ${node}) diff --git a/tests/scr/testSysbox b/tests/scr/testSysbox index 52f28d78..d7729d8f 100755 --- a/tests/scr/testSysbox +++ b/tests/scr/testSysbox @@ -365,7 +365,6 @@ function main() { test_with_docker_userns_remap test_sysbox_config test_sysbox_lifecycle - fi } From b8e5f6c940deca77acc9cfe203368a445be4b66b Mon Sep 17 00:00:00 2001 From: Rodny Molina Date: Mon, 3 Nov 2025 03:22:57 +0000 Subject: [PATCH 2/4] test: Update KinD tets to point to recent k8s releases Signed-off-by: Rodny Molina --- tests/pods/k8s-in-pod.bats | 2 +- tests/pods/manifests/k8s-master-container.json | 2 +- tests/pods/manifests/k8s-worker-container.json | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/pods/k8s-in-pod.bats b/tests/pods/k8s-in-pod.bats index 83b6b311..10947516 100644 --- a/tests/pods/k8s-in-pod.bats +++ b/tests/pods/k8s-in-pod.bats @@ -50,7 +50,7 @@ function teardown() { crictl exec $k8s_worker_syscont sh -c 'echo "127.0.0.1 localhost" > /etc/hosts' # Initialize the K8s master pod - crictl exec $k8s_master_syscont sh -c "kubeadm init --kubernetes-version=v1.21.12 --pod-network-cidr=10.244.0.0/16" + crictl exec $k8s_master_syscont sh -c "kubeadm init --kubernetes-version=v1.32.9 --pod-network-cidr=10.244.0.0/16" # Configure kubectl to talk to inner K8s cluster crictl_kubectl_config $k8s_master_syscont "inner-cluster" diff --git a/tests/pods/manifests/k8s-master-container.json b/tests/pods/manifests/k8s-master-container.json index e641e13d..e2ca6087 100644 --- a/tests/pods/manifests/k8s-master-container.json +++ b/tests/pods/manifests/k8s-master-container.json @@ -3,7 +3,7 @@ "name": "k8s-master" }, "image":{ - "image": "ghcr.io/nestybox/k8s-node-test:v1.21.12" + "image": "ghcr.io/nestybox/k8s-node-test:v1.32.9" }, "command": [ "/sbin/init" diff --git a/tests/pods/manifests/k8s-worker-container.json b/tests/pods/manifests/k8s-worker-container.json index 72a1975c..b7a5c1dd 100644 --- a/tests/pods/manifests/k8s-worker-container.json +++ b/tests/pods/manifests/k8s-worker-container.json @@ -3,7 +3,7 @@ "name": "k8s-worker" }, "image":{ - "image": "ghcr.io/nestybox/k8s-node-test:v1.21.12" + "image": "ghcr.io/nestybox/k8s-node-test:v1.32.9" }, "command": [ "/sbin/init" From baa5578eafc72ac12c2ca730f06a273b6e30c9c9 Mon Sep 17 00:00:00 2001 From: Rodny Molina Date: Mon, 3 Nov 2025 03:42:03 +0000 Subject: [PATCH 3/4] test: Reduce scope of the CI job Signed-off-by: Rodny Molina --- tests/scr/testSysbox | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/tests/scr/testSysbox b/tests/scr/testSysbox index d7729d8f..af4c1508 100755 --- a/tests/scr/testSysbox +++ b/tests/scr/testSysbox @@ -89,7 +89,7 @@ function run_ci_tests() { printf "\nExecuting multi-arch tests ... \n" bats --tap tests/multi-arch printf "\nExecuting buildx + buildkit tests ... \n" - bats --tap tests/buildx/basic.bats + bats --tap tests/buildx/build.bats printf "\nExecuting xattr syscall tests ... \n" bats --tap tests/syscall/xattr printf "\nExecuting basic mount syscall-interception tests ... \n" @@ -358,11 +358,17 @@ function main() { fi test_with_idmapped_and_shiftfs - test_with_idmapped_only - test_with_shiftfs_only - test_with_rootfs_cloning - test_with_containerd_image_store - test_with_docker_userns_remap + + # Skip all this scenarios when running CI workflows to keep execution time + # under 1h. + if [ -z "$TEST_SYSBOX_CI" ]; then + test_with_idmapped_only + test_with_shiftfs_only + test_with_rootfs_cloning + test_with_containerd_image_store + test_with_docker_userns_remap + fi + test_sysbox_config test_sysbox_lifecycle fi From c2aeb17957754eea7a9ebc0eb05f7a6fb552c3c9 Mon Sep 17 00:00:00 2001 From: Rodny Molina Date: Mon, 3 Nov 2025 03:46:28 +0000 Subject: [PATCH 4/4] test: Add CI workflow for PR testing This provides automated validation of code changes before merge, using the same containerized test environment as our local development. Signed-off-by: Rodny Molina --- .github/workflows/test-pr.yml | 60 +++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 .github/workflows/test-pr.yml diff --git a/.github/workflows/test-pr.yml b/.github/workflows/test-pr.yml new file mode 100644 index 00000000..7f3fe3fd --- /dev/null +++ b/.github/workflows/test-pr.yml @@ -0,0 +1,60 @@ +name: Sysbox Tests + +on: + pull_request: + branches: + - master + - main + paths-ignore: + - '**.md' + - 'docs/**' + - 'LICENSE' + - 'MAINTAINERS' + - 'OSS_DISCLOSURES.md' + +jobs: + test: + name: Run Sysbox Tests + runs-on: ubuntu-22.04 + timeout-minutes: 120 + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + submodules: recursive + fetch-depth: 0 + + - name: Free up disk space + run: | + sudo rm -rf /usr/share/dotnet + sudo rm -rf /opt/ghc + sudo rm -rf /usr/local/share/boost + sudo docker system prune -af + df -h + + - name: Build test container image + run: | + make test-img + + - name: Prepare test volumes + run: | + sudo mkdir -p /var/tmp/sysbox-test-var-lib + sudo mkdir -p /var/tmp/sysbox-test-scratch + sudo mkdir -p /var/tmp/sysbox-test-run-sysbox + + - name: Run Sysbox CI tests in container + run: | + make test-sysbox-ci + + - name: Collect logs on failure + if: failure() + run: | + docker logs sysbox-test 2>&1 | tail -n 1000 || true + docker ps -a + + - name: Cleanup + if: always() + run: | + make test-cleanup || true + docker system prune -a -f || true