sc2-sys · csegarragonz · Feb 11, 2025 · Jan 20, 2025 · Jan 22, 2025 · Jan 24, 2025
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -171,6 +171,66 @@ jobs:
           sleep 5
           echo "Knative test succesful!"
 
+      - name: "Run nydus host-share test"
+        # Host-share mechanisms seem not to work with TDX
+        if: ${{ matrix.tee != 'tdx' }}
+        run: |
+          # Change the snapshotter mode and purge (necessary to clear
+          # containred's content store)
+          ./bin/inv_wrapper.sh nydus-snapshotter.set-mode host-share
+          sleep 2
+          ./bin/inv_wrapper.sh nydus-snapshotter.purge
+
+          export SC2_RUNTIME_CLASS=qemu-${{ matrix.tee }}-sc2
+          export POD_LABEL="apps.sc2.io/name=helloworld-py"
+
+          # When updating the runtime we update all the config files, so we
+          # need to re-start the cache
+          sudo -E ./vm-cache/target/release/vm-cache restart
+
+          # ----- Python Test ----
+
+          echo "Running python test..."
+          envsubst < ./demo-apps/helloworld-py/deployment.yaml | ./bin/kubectl apply -f -
+
+          # Wait for pod to be ready
+          until [ "$(./bin/kubectl get pods -l ${POD_LABEL} -o 'jsonpath={..status.conditions[?(@.type=="Ready")].status}')" = "True" ]; do echo "Waiting for pod to be ready..."; sleep 2; done
+          sleep 1
+
+          # Get the pod's IP
+          service_ip=$(./bin/kubectl get services -o jsonpath='{.items[?(@.metadata.name=="coco-helloworld-py-node-port")].spec.clusterIP}')
+          [ "$(curl --retry 3 -X GET ${service_ip}:8080)" = "Hello World!" ]
+          envsubst < ./demo-apps/helloworld-py/deployment.yaml | ./bin/kubectl delete -f -
+
+          # Wait for pod to be deleted
+          ./bin/kubectl wait --for=delete -l ${POD_LABEL} pod --timeout=30s
+
+          # Extra cautionary sleep
+          sleep 5
+          echo "Python test succesful!"
+
+          # ----- Knative Test ----
+          envsubst < ./demo-apps/helloworld-knative/service.yaml | ./bin/kubectl apply -f -
+          sleep 1
+
+          # Get the service URL
+          service_url=$(./bin/kubectl get ksvc helloworld-knative  --output=custom-columns=URL:.status.url --no-headers)
+          [ "$(curl --retry 3 ${service_url})" = "Hello World!" ]
+
+          # Wait for pod to be deleted
+          envsubst < ./demo-apps/helloworld-knative/service.yaml | ./bin/kubectl delete -f -
+          ./bin/kubectl wait --for=delete -l ${POD_LABEL} pod --timeout=60s
+
+          # Extra cautionary sleep
+          sleep 5
+          echo "Knative test succesful!"
+
+          # Change the snapshotter mode back again (and purge)
+          #
+          ./bin/inv_wrapper.sh nydus-snapshotter.set-mode guest-pull
+          sleep 2
+          ./bin/inv_wrapper.sh nydus-snapshotter.purge
+
       - name: "Enable default-memory annotation"
         run: |
           for runtime_class in ${{ matrix.runtime_classes }}; do
@@ -182,8 +242,12 @@ jobs:
 
           # Aftre changing the annotation of the qemu-snp-sc2 runtime class we
           # need to restart the VM cache
-          sudo -E ./vm-cache/target/release/vm-cache stop
-          sudo -E ./vm-cache/target/release/vm-cache background
+          sudo -E ./vm-cache/target/release/vm-cache restart
+
+      - name: "Fetch content (see #130)"
+        run: |
+          sudo ctr -n k8s.io content fetch -k sc2cr.io/system/knative-sidecar@sha256:79d5f6031f308cee209c4c32eeab9113b29a1ed4096c5d657504096734ca3b1d
+          sudo ctr -n k8s.io content fetch registry.k8s.io/pause:3.8
 
       - name: "Run knative chaining demo"
         run: |

diff --git a/.gitignore b/.gitignore
@@ -1,4 +1,5 @@
 # Installed binaries
+bbolt
 cosign
 crictl
 kubeadm

diff --git a/README.md b/README.md
@@ -81,6 +81,7 @@ For further documentation, you may want to check these other documents:
 * [CoCo Upgrade](./docs/upgrade_coco.md) - upgrade the current CoCo version.
 * [Guest Components](./docs/guest_components.md) - instructions to patch components inside SC2 guests.
 * [Host Kernel](./docs/host_kernel.md) - bump the kernel version in the host.
+* [Image Pull](./docs/image_pull.md) - details on the image-pulling mechanisms supported in SC2.
 * [K8s](./docs/k8s.md) - documentation about configuring a single-node Kubernetes cluster.
 * [Kata](./docs/kata.md) - instructions to build our custom Kata fork and `initrd` images.
 * [Key Broker Service](./docs/kbs.md) - docs on using and patching the KBS.

diff --git a/docs/image_pull.md b/docs/image_pull.md
@@ -0,0 +1,54 @@
+## Image Pull
+
+This document describes the different mechanisms to get a container image
+inside a cVM in SC2. We _always_ assume that the integrity of container images
+must be validated. We also consider the situation in which their confidentiality
+must also be preserved.
+
+### Guest Pull
+
+The guest pull mechanism always pulls the container image inside the guest cVM.
+This is the default mechanism in CoCo as it allows the most secure, and simplest
+deployment: users sign (and encrypt) container images locally, they upload
+them to a container registry, pull them inside the cVM, and decrypt them inside
+the cVM.
+
+Albeit secure, this mechanism has high performance overheads as the image must
+be pulled every single time, precluding any caching benefits.
+
+To mitigate the performance overheads, we can convert the OCI image to a
+Nydus image, that supports lazy loading of container data.
+
+### Host Share
+
+The host share mechanism mounts a container image from the host to the guest.
+Given that the host is untrusted, this mechanism only works for images that
+do not have confidentiality requirements. To maintain integrity, we mount
+the image with `dm-verity`, and validate the `dm-verity` device as part of
+attestation.
+
+We choose to mount individual layers separately (rather than whole images),
+but we should measure that the former is actually better than the latter.
+
+We could mount encrypted images from the host to the guest, but we would be
+losing on the de-duplication opportunities in the host.
+
+### Usage
+
+Each image pull mechanism is implemented as a different remote snapshotter
+in containerd, all of them based on the [nydus-snapshotter](
+https://github.com/containerd/nydus-snapshotter/) plus our modifications.
+
+To switch between different image-pulling mechanisms, you only need to change
+the snapshotter mode:
+
+```bash
+inv nydus-snapshotter.set-mode [guest-pull,host-share]
+```
+
+If you see any snapshotter related issues (either in the `containerd` or the
+`nydus-snapshotter` journal logs), you can purge the snapshotters:
+
+```bash
+inv nydus-snapshotter.purge
+```
diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md
@@ -151,14 +151,27 @@ ctr -n k8s.io content fetch ${IMAGE_NAME}
 the image name is the image tag appearing right before the error message in
 the pod logs.
 
-### Nydus snapshot corruption
+### Rootfs Mount Issue
 
-Sometimes, after hot-replacing the nydus-snapshotter, snapshots become corrupted,
-and we can see the error below.
+Sometimes, if we are mixing and matching different snapshotters, we may run
+into the following error:
 
 ```
 Failed to create pod sandbox: rpc error: code = Unknown desc = failed to create containerd task: failed to create shim task: failed to mount /run/kata-containers/shared/containers/0a583f0691d78e2036425f99bdac8e03302158320c1c55a5c6482cae7e729009/rootfs to /run/kata-containers/0a583f0691d78e2036425f99bdac8e03302158320c1c55a5c6482cae7e729009/rootfs, with error: ENOENT: No such file or directory
 ```
 
-The only solution I found was to bump to a more up-to-date version of nydus.
-This seemed to fix the issue.
+this is because the pause image bundle has not been unpacked correctly. Note
+that the pause image bundle is unpacked into the `/run/kata-containers/shared`
+directory, and then mounted into the `/run/kata-containers/<cid>` one.
+
+This usually happens when containerd believes that we already have the pause
+image, so we do not need to pull it. This prevents the snapshotter from
+generating the respective Kata virtual volumes.
+
+As a rule of thumb, a good fix is to remove all images involved in the app
+from the content store, and purge snapshotter caches:
+
+```bash
+sudo crictl rmi <hash>
+inv nydus-snapshotter.purge
+```
diff --git a/tasks/containerd.py b/tasks/containerd.py
@@ -2,9 +2,14 @@
 from os import stat
 from os.path import join
 from subprocess import run
-from tasks.util.containerd import is_containerd_active, restart_containerd
+from tasks.util.containerd import (
+    is_containerd_active,
+    restart_containerd,
+    wait_for_containerd_socket,
+)
 from tasks.util.docker import copy_from_ctr_image, is_ctr_running
 from tasks.util.env import (
+    BIN_DIR,
     CONF_FILES_DIR,
     CONTAINERD_CONFIG_FILE,
     CONTAINERD_CONFIG_ROOT,
@@ -14,7 +19,8 @@
     print_dotted_line,
 )
 from tasks.util.toml import update_toml
-from tasks.util.versions import CONTAINERD_VERSION
+from tasks.util.versions import CONTAINERD_VERSION, GO_VERSION
+from time import sleep
 
 CONTAINERD_CTR_NAME = "containerd-workon"
 CONTAINERD_IMAGE_TAG = (
@@ -31,23 +37,24 @@
 CONTAINERD_HOST_BINPATH = "/usr/bin"
 
 
-def do_build(debug=False):
-    docker_cmd = "docker build -t {} -f {} .".format(
+def do_build(nocache=False):
+    docker_cmd = "docker build{} -t {} -f {} .".format(
+        " --no-cache" if nocache else "",
         CONTAINERD_IMAGE_TAG,
         join(PROJ_ROOT, "docker", "containerd.dockerfile"),
     )
-    result = run(docker_cmd, shell=True, capture_output=True, cwd=PROJ_ROOT)
-    assert result.returncode == 0, print(result.stderr.decode("utf-8").strip())
-    if debug:
-        print(result.stdout.decode("utf-8").strip())
+    run(docker_cmd, shell=True, check=True, cwd=PROJ_ROOT)
 
 
 @task
-def build(ctx):
+def build(ctx, nocache=False, push=False):
     """
     Build the containerd fork for CoCo
     """
-    do_build(debug=True)
+    do_build(nocache=nocache)
+
+    if push:
+        run(f"docker push {CONTAINERD_IMAGE_TAG}", shell=True, check=True)
 
 
 @task
@@ -73,19 +80,23 @@ def cli(ctx, mount_path=join(PROJ_ROOT, "..", "containerd")):
 
 
 @task
-def set_log_level(ctx, log_level):
+def stop(ctx):
     """
-    Set containerd's log level, must be one in: info, debug
+    Stop the containerd work-on container
     """
-    allowed_log_levels = ["info", "debug"]
-    if log_level not in allowed_log_levels:
-        print(
-            "Unsupported log level '{}'. Must be one in: {}".format(
-                log_level, allowed_log_levels
-            )
-        )
-        return
+    result = run(
+        "docker rm -f {}".format(CONTAINERD_CTR_NAME),
+        shell=True,
+        check=True,
+        capture_output=True,
+    )
+    assert result.returncode == 0
+
 
+def set_log_level(log_level):
+    """
+    Set containerd's log level, must be one in: info, debug
+    """
     updated_toml_str = """
     [debug]
     level = "{log_level}"
@@ -158,7 +169,9 @@ def install(debug=False, clean=False):
     # Populate the default config file for a clean start
     run(f"sudo mkdir -p {CONTAINERD_CONFIG_ROOT}", shell=True, check=True)
     if clean:
-        config_cmd = "containerd config default > {}".format(CONTAINERD_CONFIG_FILE)
+        config_cmd = "{}/containerd config default > {}".format(
+            host_base_path, CONTAINERD_CONFIG_FILE
+        )
         config_cmd = "sudo bash -c '{}'".format(config_cmd)
         run(config_cmd, shell=True, check=True)
 
@@ -169,4 +182,68 @@ def install(debug=False, clean=False):
     if stat(CONTAINERD_CONFIG_FILE).st_size == 0:
         raise RuntimeError("containerd config file is empty!")
 
+    # Wait for containerd to be ready
+    sleep(2)
+    while not is_containerd_active():
+        if debug:
+            print("Waiting for containerd to be active...")
+
+        sleep(2)
+
+    # Then make sure we can dial the socket
+    wait_for_containerd_socket()
+
+    print("Success!")
+
+
+def install_bbolt(debug=False, clean=False):
+    print_dotted_line("Installing bbolt")
+
+    wait_for_containerd_socket()
+
+    tmp_ctr_name = "bbolt_install"
+    if is_ctr_running(tmp_ctr_name):
+        result = run(f"docker rm -f {tmp_ctr_name}", shell=True, capture_output=True)
+        assert result.returncode == 0
+
+    def rm_container():
+        result = run(f"docker rm -f {tmp_ctr_name}", shell=True, capture_output=True)
+        assert result.returncode == 0
+
+    result = run(
+        f"docker run -d -it --name {tmp_ctr_name} golang:{GO_VERSION} bash",
+        shell=True,
+        capture_output=True,
+    )
+    if result.returncode != 0:
+        print(result.stderr.decode("utf-8").strip()),
+        rm_container()
+        raise RuntimeError("Error running container")
+
+    result = run(
+        f"docker exec {tmp_ctr_name} go install go.etcd.io/bbolt/cmd/bbolt@latest",
+        shell=True,
+        capture_output=True,
+    )
+    if result.returncode != 0:
+        print(result.stderr.decode("utf-8").strip()),
+        rm_container()
+        raise RuntimeError("Error execing into container")
+    if debug:
+        print(result.stdout.decode("utf-8").strip())
+
+    result = run(
+        f"docker cp {tmp_ctr_name}:/go/bin/bbolt {BIN_DIR}/bbolt",
+        shell=True,
+        capture_output=True,
+    )
+    if result.returncode != 0:
+        print(result.stderr.decode("utf-8").strip()),
+        rm_container()
+        raise RuntimeError("Error cp-ing from container")
+    if debug:
+        print(result.stdout.decode("utf-8").strip())
+
+    rm_container()
+
     print("Success!")
diff --git a/tasks/kata.py b/tasks/kata.py
@@ -67,20 +67,10 @@ def stop(ctx):
     stop_kata_workon_ctr()
 
 
-@task
-def set_log_level(ctx, log_level):
+def set_log_level(log_level):
     """
     Set kata's log level, must be one in: info, debug
     """
-    allowed_log_levels = ["info", "debug"]
-    if log_level not in allowed_log_levels:
-        print(
-            "Unsupported log level '{}'. Must be one in: {}".format(
-                log_level, allowed_log_levels
-            )
-        )
-        return
-
     enable_debug = str(log_level == "debug").lower()
 
     for runtime in KATA_RUNTIMES + SC2_RUNTIMES:
@@ -146,6 +136,7 @@ def hot_replace_shim(ctx, runtime="qemu-snp-sc2"):
             ),
         ),
         sc2=runtime in SC2_RUNTIMES,
+        hot_replace=True,
     )
 
     restart_containerd()
-Original file line number
+Diff line change
@@ -1,4 +1,5 @@
     # Installed binaries
+    bbolt
     cosign
     crictl
     kubeadm
@@ Expand Down @@