Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
05e4c78
nydus: support host-sharing
csegarragonz Jan 20, 2025
b265312
nydus: minor fix
csegarragonz Jan 22, 2025
5ae8036
nydus: change names in set mode
csegarragonz Jan 24, 2025
a3a029f
single entrypoint to set log level
csegarragonz Jan 31, 2025
7fc5a49
add method to stop containerd and nydus-sn containers
csegarragonz Jan 31, 2025
25236dc
nydus: install host-sharing as separate snapshotter
csegarragonz Jan 31, 2025
0b4028f
improve purge + add doc
csegarragonz Feb 3, 2025
b46a1a4
docs: update
csegarragonz Feb 3, 2025
668c4eb
docs: update
csegarragonz Feb 3, 2025
95e373f
gha: purge when setting snapshotter mode
csegarragonz Feb 6, 2025
eb324d0
nydus-snapshotter: better clean-up
csegarragonz Feb 6, 2025
e2a569d
ns: cleanup and debug
csegarragonz Feb 6, 2025
0d00170
gha: add sleep before purge
csegarragonz Feb 6, 2025
0a7ba15
gha: more debugging
csegarragonz Feb 6, 2025
a655fed
gha: restart vm-cache after change snapshotter mode
csegarragonz Feb 6, 2025
7c0a11c
gha: export variable before
csegarragonz Feb 6, 2025
294fa97
gha: fix
csegarragonz Feb 6, 2025
ae794d2
tools: update check-fork-hashes tool
csegarragonz Feb 7, 2025
b23cf7a
nydus-image: add support for hot-replacing and patching
csegarragonz Feb 7, 2025
337bccc
nydus-snapshotter: fix purge by waiting on metadata to be gc-ed
csegarragonz Feb 7, 2025
b2a5d08
check-hashes: run cargo fmt
csegarragonz Feb 7, 2025
05daf59
containerd: fix nit in bbolt install
csegarragonz Feb 7, 2025
1f1ebe6
containerd: fix bbolt clean-up
csegarragonz Feb 7, 2025
96647c7
gha: add debug logging
csegarragonz Feb 10, 2025
34d7e41
bbolt: more installation fix-ups
csegarragonz Feb 10, 2025
d1ef0ec
ns: fix typo
csegarragonz Feb 10, 2025
1d18319
gha: temporarily disable host-share tests with tdx
csegarragonz Feb 10, 2025
68cfe5e
gha: remove --debug
csegarragonz Feb 10, 2025
5222854
ns: fix deploy without --debug
csegarragonz Feb 11, 2025
e012412
gha: fix knative tests
csegarragonz Feb 11, 2025
00431d4
gha: run knative chaining in host-share
csegarragonz Feb 11, 2025
80494f6
gha: fixes
csegarragonz Feb 11, 2025
4cbbf14
gha: more fetch
csegarragonz Feb 11, 2025
f8ab3de
gha: tests passing
csegarragonz Feb 11, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 66 additions & 2 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,66 @@ jobs:
sleep 5
echo "Knative test succesful!"

- name: "Run nydus host-share test"
# Host-share mechanisms seem not to work with TDX
if: ${{ matrix.tee != 'tdx' }}
run: |
# Change the snapshotter mode and purge (necessary to clear
# containred's content store)
./bin/inv_wrapper.sh nydus-snapshotter.set-mode host-share
sleep 2
./bin/inv_wrapper.sh nydus-snapshotter.purge

export SC2_RUNTIME_CLASS=qemu-${{ matrix.tee }}-sc2
export POD_LABEL="apps.sc2.io/name=helloworld-py"

# When updating the runtime we update all the config files, so we
# need to re-start the cache
sudo -E ./vm-cache/target/release/vm-cache restart

# ----- Python Test ----

echo "Running python test..."
envsubst < ./demo-apps/helloworld-py/deployment.yaml | ./bin/kubectl apply -f -

# Wait for pod to be ready
until [ "$(./bin/kubectl get pods -l ${POD_LABEL} -o 'jsonpath={..status.conditions[?(@.type=="Ready")].status}')" = "True" ]; do echo "Waiting for pod to be ready..."; sleep 2; done
sleep 1

# Get the pod's IP
service_ip=$(./bin/kubectl get services -o jsonpath='{.items[?(@.metadata.name=="coco-helloworld-py-node-port")].spec.clusterIP}')
[ "$(curl --retry 3 -X GET ${service_ip}:8080)" = "Hello World!" ]
envsubst < ./demo-apps/helloworld-py/deployment.yaml | ./bin/kubectl delete -f -

# Wait for pod to be deleted
./bin/kubectl wait --for=delete -l ${POD_LABEL} pod --timeout=30s

# Extra cautionary sleep
sleep 5
echo "Python test succesful!"

# ----- Knative Test ----
envsubst < ./demo-apps/helloworld-knative/service.yaml | ./bin/kubectl apply -f -
sleep 1

# Get the service URL
service_url=$(./bin/kubectl get ksvc helloworld-knative --output=custom-columns=URL:.status.url --no-headers)
[ "$(curl --retry 3 ${service_url})" = "Hello World!" ]

# Wait for pod to be deleted
envsubst < ./demo-apps/helloworld-knative/service.yaml | ./bin/kubectl delete -f -
./bin/kubectl wait --for=delete -l ${POD_LABEL} pod --timeout=60s

# Extra cautionary sleep
sleep 5
echo "Knative test succesful!"

# Change the snapshotter mode back again (and purge)
#
./bin/inv_wrapper.sh nydus-snapshotter.set-mode guest-pull
sleep 2
./bin/inv_wrapper.sh nydus-snapshotter.purge

- name: "Enable default-memory annotation"
run: |
for runtime_class in ${{ matrix.runtime_classes }}; do
Expand All @@ -182,8 +242,12 @@ jobs:

# Aftre changing the annotation of the qemu-snp-sc2 runtime class we
# need to restart the VM cache
sudo -E ./vm-cache/target/release/vm-cache stop
sudo -E ./vm-cache/target/release/vm-cache background
sudo -E ./vm-cache/target/release/vm-cache restart

- name: "Fetch content (see #130)"
run: |
sudo ctr -n k8s.io content fetch -k sc2cr.io/system/knative-sidecar@sha256:79d5f6031f308cee209c4c32eeab9113b29a1ed4096c5d657504096734ca3b1d
sudo ctr -n k8s.io content fetch registry.k8s.io/pause:3.8

- name: "Run knative chaining demo"
run: |
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# Installed binaries
bbolt
cosign
crictl
kubeadm
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ For further documentation, you may want to check these other documents:
* [CoCo Upgrade](./docs/upgrade_coco.md) - upgrade the current CoCo version.
* [Guest Components](./docs/guest_components.md) - instructions to patch components inside SC2 guests.
* [Host Kernel](./docs/host_kernel.md) - bump the kernel version in the host.
* [Image Pull](./docs/image_pull.md) - details on the image-pulling mechanisms supported in SC2.
* [K8s](./docs/k8s.md) - documentation about configuring a single-node Kubernetes cluster.
* [Kata](./docs/kata.md) - instructions to build our custom Kata fork and `initrd` images.
* [Key Broker Service](./docs/kbs.md) - docs on using and patching the KBS.
Expand Down
54 changes: 54 additions & 0 deletions docs/image_pull.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
## Image Pull

This document describes the different mechanisms to get a container image
inside a cVM in SC2. We _always_ assume that the integrity of container images
must be validated. We also consider the situation in which their confidentiality
must also be preserved.

### Guest Pull

The guest pull mechanism always pulls the container image inside the guest cVM.
This is the default mechanism in CoCo as it allows the most secure, and simplest
deployment: users sign (and encrypt) container images locally, they upload
them to a container registry, pull them inside the cVM, and decrypt them inside
the cVM.

Albeit secure, this mechanism has high performance overheads as the image must
be pulled every single time, precluding any caching benefits.

To mitigate the performance overheads, we can convert the OCI image to a
Nydus image, that supports lazy loading of container data.

### Host Share

The host share mechanism mounts a container image from the host to the guest.
Given that the host is untrusted, this mechanism only works for images that
do not have confidentiality requirements. To maintain integrity, we mount
the image with `dm-verity`, and validate the `dm-verity` device as part of
attestation.

We choose to mount individual layers separately (rather than whole images),
but we should measure that the former is actually better than the latter.

We could mount encrypted images from the host to the guest, but we would be
losing on the de-duplication opportunities in the host.

### Usage

Each image pull mechanism is implemented as a different remote snapshotter
in containerd, all of them based on the [nydus-snapshotter](
https://github.com/containerd/nydus-snapshotter/) plus our modifications.

To switch between different image-pulling mechanisms, you only need to change
the snapshotter mode:

```bash
inv nydus-snapshotter.set-mode [guest-pull,host-share]
```

If you see any snapshotter related issues (either in the `containerd` or the
`nydus-snapshotter` journal logs), you can purge the snapshotters:

```bash
inv nydus-snapshotter.purge
```
23 changes: 18 additions & 5 deletions docs/troubleshooting.md
Original file line number Diff line number Diff line change
Expand Up @@ -151,14 +151,27 @@ ctr -n k8s.io content fetch ${IMAGE_NAME}
the image name is the image tag appearing right before the error message in
the pod logs.

### Nydus snapshot corruption
### Rootfs Mount Issue

Sometimes, after hot-replacing the nydus-snapshotter, snapshots become corrupted,
and we can see the error below.
Sometimes, if we are mixing and matching different snapshotters, we may run
into the following error:

```
Failed to create pod sandbox: rpc error: code = Unknown desc = failed to create containerd task: failed to create shim task: failed to mount /run/kata-containers/shared/containers/0a583f0691d78e2036425f99bdac8e03302158320c1c55a5c6482cae7e729009/rootfs to /run/kata-containers/0a583f0691d78e2036425f99bdac8e03302158320c1c55a5c6482cae7e729009/rootfs, with error: ENOENT: No such file or directory
```

The only solution I found was to bump to a more up-to-date version of nydus.
This seemed to fix the issue.
this is because the pause image bundle has not been unpacked correctly. Note
that the pause image bundle is unpacked into the `/run/kata-containers/shared`
directory, and then mounted into the `/run/kata-containers/<cid>` one.

This usually happens when containerd believes that we already have the pause
image, so we do not need to pull it. This prevents the snapshotter from
generating the respective Kata virtual volumes.

As a rule of thumb, a good fix is to remove all images involved in the app
from the content store, and purge snapshotter caches:

```bash
sudo crictl rmi <hash>
inv nydus-snapshotter.purge
```
119 changes: 98 additions & 21 deletions tasks/containerd.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,14 @@
from os import stat
from os.path import join
from subprocess import run
from tasks.util.containerd import is_containerd_active, restart_containerd
from tasks.util.containerd import (
is_containerd_active,
restart_containerd,
wait_for_containerd_socket,
)
from tasks.util.docker import copy_from_ctr_image, is_ctr_running
from tasks.util.env import (
BIN_DIR,
CONF_FILES_DIR,
CONTAINERD_CONFIG_FILE,
CONTAINERD_CONFIG_ROOT,
Expand All @@ -14,7 +19,8 @@
print_dotted_line,
)
from tasks.util.toml import update_toml
from tasks.util.versions import CONTAINERD_VERSION
from tasks.util.versions import CONTAINERD_VERSION, GO_VERSION
from time import sleep

CONTAINERD_CTR_NAME = "containerd-workon"
CONTAINERD_IMAGE_TAG = (
Expand All @@ -31,23 +37,24 @@
CONTAINERD_HOST_BINPATH = "/usr/bin"


def do_build(debug=False):
docker_cmd = "docker build -t {} -f {} .".format(
def do_build(nocache=False):
docker_cmd = "docker build{} -t {} -f {} .".format(
" --no-cache" if nocache else "",
CONTAINERD_IMAGE_TAG,
join(PROJ_ROOT, "docker", "containerd.dockerfile"),
)
result = run(docker_cmd, shell=True, capture_output=True, cwd=PROJ_ROOT)
assert result.returncode == 0, print(result.stderr.decode("utf-8").strip())
if debug:
print(result.stdout.decode("utf-8").strip())
run(docker_cmd, shell=True, check=True, cwd=PROJ_ROOT)


@task
def build(ctx):
def build(ctx, nocache=False, push=False):
"""
Build the containerd fork for CoCo
"""
do_build(debug=True)
do_build(nocache=nocache)

if push:
run(f"docker push {CONTAINERD_IMAGE_TAG}", shell=True, check=True)


@task
Expand All @@ -73,19 +80,23 @@ def cli(ctx, mount_path=join(PROJ_ROOT, "..", "containerd")):


@task
def set_log_level(ctx, log_level):
def stop(ctx):
"""
Set containerd's log level, must be one in: info, debug
Stop the containerd work-on container
"""
allowed_log_levels = ["info", "debug"]
if log_level not in allowed_log_levels:
print(
"Unsupported log level '{}'. Must be one in: {}".format(
log_level, allowed_log_levels
)
)
return
result = run(
"docker rm -f {}".format(CONTAINERD_CTR_NAME),
shell=True,
check=True,
capture_output=True,
)
assert result.returncode == 0


def set_log_level(log_level):
"""
Set containerd's log level, must be one in: info, debug
"""
updated_toml_str = """
[debug]
level = "{log_level}"
Expand Down Expand Up @@ -158,7 +169,9 @@ def install(debug=False, clean=False):
# Populate the default config file for a clean start
run(f"sudo mkdir -p {CONTAINERD_CONFIG_ROOT}", shell=True, check=True)
if clean:
config_cmd = "containerd config default > {}".format(CONTAINERD_CONFIG_FILE)
config_cmd = "{}/containerd config default > {}".format(
host_base_path, CONTAINERD_CONFIG_FILE
)
config_cmd = "sudo bash -c '{}'".format(config_cmd)
run(config_cmd, shell=True, check=True)

Expand All @@ -169,4 +182,68 @@ def install(debug=False, clean=False):
if stat(CONTAINERD_CONFIG_FILE).st_size == 0:
raise RuntimeError("containerd config file is empty!")

# Wait for containerd to be ready
sleep(2)
while not is_containerd_active():
if debug:
print("Waiting for containerd to be active...")

sleep(2)

# Then make sure we can dial the socket
wait_for_containerd_socket()

print("Success!")


def install_bbolt(debug=False, clean=False):
print_dotted_line("Installing bbolt")

wait_for_containerd_socket()

tmp_ctr_name = "bbolt_install"
if is_ctr_running(tmp_ctr_name):
result = run(f"docker rm -f {tmp_ctr_name}", shell=True, capture_output=True)
assert result.returncode == 0

def rm_container():
result = run(f"docker rm -f {tmp_ctr_name}", shell=True, capture_output=True)
assert result.returncode == 0

result = run(
f"docker run -d -it --name {tmp_ctr_name} golang:{GO_VERSION} bash",
shell=True,
capture_output=True,
)
if result.returncode != 0:
print(result.stderr.decode("utf-8").strip()),
rm_container()
raise RuntimeError("Error running container")

result = run(
f"docker exec {tmp_ctr_name} go install go.etcd.io/bbolt/cmd/bbolt@latest",
shell=True,
capture_output=True,
)
if result.returncode != 0:
print(result.stderr.decode("utf-8").strip()),
rm_container()
raise RuntimeError("Error execing into container")
if debug:
print(result.stdout.decode("utf-8").strip())

result = run(
f"docker cp {tmp_ctr_name}:/go/bin/bbolt {BIN_DIR}/bbolt",
shell=True,
capture_output=True,
)
if result.returncode != 0:
print(result.stderr.decode("utf-8").strip()),
rm_container()
raise RuntimeError("Error cp-ing from container")
if debug:
print(result.stdout.decode("utf-8").strip())

rm_container()

print("Success!")
13 changes: 2 additions & 11 deletions tasks/kata.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,20 +67,10 @@ def stop(ctx):
stop_kata_workon_ctr()


@task
def set_log_level(ctx, log_level):
def set_log_level(log_level):
"""
Set kata's log level, must be one in: info, debug
"""
allowed_log_levels = ["info", "debug"]
if log_level not in allowed_log_levels:
print(
"Unsupported log level '{}'. Must be one in: {}".format(
log_level, allowed_log_levels
)
)
return

enable_debug = str(log_level == "debug").lower()

for runtime in KATA_RUNTIMES + SC2_RUNTIMES:
Expand Down Expand Up @@ -146,6 +136,7 @@ def hot_replace_shim(ctx, runtime="qemu-snp-sc2"):
),
),
sc2=runtime in SC2_RUNTIMES,
hot_replace=True,
)

restart_containerd()
Loading