diff --git a/pkg/network/network.go b/pkg/network/network.go index 6aa55e22..ceeb2b94 100644 --- a/pkg/network/network.go +++ b/pkg/network/network.go @@ -18,6 +18,7 @@ import ( "errors" "fmt" "net" + "regexp" "strings" "github.com/jackpal/gateway" @@ -389,3 +390,62 @@ func deleteTapDevice(device netlink.Link) error { } return nil } + +func cleanupOrphanTaps() error { + netlog.Debug("running cleanupOrphanTaps (carrier-state based)") + + // If there is no container interface (e.g. no eth0), do not attempt to create/delete taps. + // This avoids touching taps in netns that aren't ready or belong to other runtimes (ctr). + if _, err := netlink.LinkByName(DefaultInterface); err != nil { + netlog.Debug("no container interface found in namespace; skipping orphan TAP cleanup") + return nil + } + + // Per design: assume at-most-one unikernel per netns. No inter-process netns lock is used. + + handle, err := netlink.NewHandle() + if err != nil { + return fmt.Errorf("failed to get netlink handle: %w", err) + } + defer handle.Close() + + links, err := handle.LinkList() + if err != nil { + return fmt.Errorf("failed to list links: %w", err) + } + + tapRe := regexp.MustCompile(`^tap.*_urunc$`) + for _, link := range links { + attrs := link.Attrs() + if attrs == nil { + continue + } + name := attrs.Name + if !tapRe.MatchString(name) { + continue + } + + // The device is in a 'Zombie' state: Administrative status is UP, but + // Operational status is DOWN with NO-CARRIER. + // In the Linux TUN/TAP driver model, NO-CARRIER on an UP interface + // definitively proves that no userspace process holds the file descriptor + // for this device. + if (attrs.Flags&net.FlagRunning) != 0 || attrs.OperState == netlink.OperUp { + return fmt.Errorf("found tap %s with carrier/oper state UP: aborting cleanup (unikernel may be running)", name) + } + + netlog.Debugf("deleting orphan tap %s (no carrier)", name) + if err := deleteAllTCFilters(link); err != nil { + return fmt.Errorf("failed to delete tc filters for %s: %w", name, err) + } + if err := deleteAllQDiscs(link); err != nil { + return fmt.Errorf("failed to delete qdiscs for %s: %w", name, err) + } + if err := deleteTapDevice(link); err != nil { + return fmt.Errorf("failed to delete tap %s: %w", name, err) + } + netlog.Debugf("deleted orphan tap %s", name) + } + + return nil +} diff --git a/pkg/network/network_dynamic.go b/pkg/network/network_dynamic.go index f4264bbe..58990549 100644 --- a/pkg/network/network_dynamic.go +++ b/pkg/network/network_dynamic.go @@ -35,6 +35,11 @@ type DynamicNetwork struct { // for multiple unikernels in the same pod/network namespace. // See: https://github.com/urunc-dev/urunc/issues/13 func (n DynamicNetwork) NetworkSetup(uid uint32, gid uint32) (*UnikernelNetworkInfo, error) { + // Attempt to clean up orphan TAPs created by urunc in this netns + if err := cleanupOrphanTaps(); err != nil { + return nil, fmt.Errorf("cleanupOrphanTaps failed: %w", err) + } + tapIndex, err := getTapIndex() if err != nil { return nil, fmt.Errorf("getTapIndex failed: %w", err)