From 4e07dbdfaf5489a7a4caf9db2bb531bbf99aa562 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Mon, 1 Dec 2025 19:05:36 +0000 Subject: [PATCH] work around race in virtio serial console The ebpf-go CI has been plagued by a non-deterministic hang of unit tests. It affects all packages and manifests as a write to stdout getting stuck, followed by the test timing out. This triggers a goroutine dump, which in turn unblocks the stuck write to stdout. Its possible to reproduce this behaviour using the following commandline: taskset -c 0 vimto -smp cpus=2 -kernel ghcr.io/cilium/ci-kernels:6.15.3 \ exec -- sh -c 'seq 1 1000000 | while read i; do echo "line $i"; done' After a few seconds the output will freeze. Inspecting the stack of the executing program shows something like the following: [<0>] wait_port_writable+0x139/0x2d0 [<0>] port_fops_write+0x88/0x130 [<0>] vfs_write+0xf3/0x450 [<0>] ksys_write+0x6d/0xe0 [<0>] do_syscall_64+0x9e/0x1a0 [<0>] entry_SYSCALL_64_after_hwframe+0x77/0x7f 1 0x1 0x7ffdf4878c80 0x9 0x0 0x0 0x0 0x7ffdf4878c20 0x7f592daed77e As far as I can tell it is critical that execution is restricted to a single CPU on the host side, while qemu presents two vCPU to the VM. Passing ioeventfd=off to the serial console device works around this problem. See https://github.com/cilium/ebpf/issues/1734 for more details. --- qemu.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/qemu.go b/qemu.go index 2876eac..0978532 100644 --- a/qemu.go +++ b/qemu.go @@ -560,7 +560,10 @@ func (vios *virtioSerialPorts) Cmdline() []string { args := []string{ // There seems to be an off by one error with max_ports. - "-device", fmt.Sprintf("virtio-serial,max_ports=%d", len(vios.Chardevs)+1), + // + // ioeventfd=off works around a weird race condition where writing to the serial + // console from inside the guest may get stuck waiting for a wakeup. + "-device", fmt.Sprintf("virtio-serial,max_ports=%d,ioeventfd=off", len(vios.Chardevs)+1), } for dev, name := range vios.Chardevs { args = append(args,