From db9443b3d72541d723f9cf9e26a7e646881a47a7 Mon Sep 17 00:00:00 2001 From: Sotiris Salloumis Date: Mon, 20 Jan 2025 17:58:01 +0100 Subject: [PATCH 01/15] Static CPU management policy alongside InPlacePodVerticalScaling --- pkg/kubelet/cm/cpumanager/cpu_assignment.go | 74 +- .../cm/cpumanager/cpu_assignment_test.go | 18 +- pkg/kubelet/cm/cpumanager/cpu_manager_test.go | 1 + pkg/kubelet/cm/cpumanager/policy_static.go | 221 ++- .../cm/cpumanager/policy_static_test.go | 714 ++++++- pkg/kubelet/types/constants.go | 7 + .../common/node/framework/podresize/resize.go | 62 +- test/e2e_node/cpu_manager_metrics_test.go | 6 +- test/e2e_node/cpu_manager_test.go | 863 +++++++- test/e2e_node/pod_resize_test.go | 1737 +++++++++++++++++ test/e2e_node/util.go | 4 +- 11 files changed, 3636 insertions(+), 71 deletions(-) create mode 100644 test/e2e_node/pod_resize_test.go diff --git a/pkg/kubelet/cm/cpumanager/cpu_assignment.go b/pkg/kubelet/cm/cpumanager/cpu_assignment.go index bff55a600d7b4..fc22bbb9318ba 100644 --- a/pkg/kubelet/cm/cpumanager/cpu_assignment.go +++ b/pkg/kubelet/cm/cpumanager/cpu_assignment.go @@ -298,7 +298,7 @@ type cpuAccumulator struct { availableCPUSorter availableCPUSorter } -func newCPUAccumulator(logger logr.Logger, topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, numCPUs int, cpuSortingStrategy CPUSortingStrategy) *cpuAccumulator { +func newCPUAccumulator(topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, numCPUs int, cpuSortingStrategy CPUSortingStrategy, reusableCPUsForResize *cpuset.CPUSet, mustKeepCPUsForScaleDown *cpuset.CPUSet) *cpuAccumulator { acc := &cpuAccumulator{ logger: logger, topo: topo, @@ -307,6 +307,43 @@ func newCPUAccumulator(logger logr.Logger, topo *topology.CPUTopology, available result: cpuset.New(), } + if reusableCPUsForResize != nil { + if !reusableCPUsForResize.IsEmpty() { + // Increase of CPU resources ( scale up ) + // Take existing from allocated + // CPUs + if numCPUs > reusableCPUsForResize.Size() { + // scale up ... + acc.take(reusableCPUsForResize.Clone()) + } + + // Decrease of CPU resources ( scale down ) + // Take delta from allocated CPUs, if mustKeepCPUsForScaleDown + // is not nil, use explicetely those. If it is nil + // take delta starting from lowest CoreId of CPUs ( TODO esotsal, perhaps not needed). + if numCPUs < reusableCPUsForResize.Size() { + if mustKeepCPUsForScaleDown != nil { + // If explicetely CPUs to keep + // during scale down is given ( this requires + // addition in container[].resources ... which + // could be possible to patch ? Esotsal Note This means + // modifying API code + if !(mustKeepCPUsForScaleDown.Intersection(reusableCPUsForResize.Clone())).IsEmpty() { + acc.take(mustKeepCPUsForScaleDown.Clone()) + } else { + return acc + } + } + } + + if numCPUs == reusableCPUsForResize.Size() { + // nothing to do return as is + acc.take(reusableCPUsForResize.Clone()) + return acc + } + } + } + if topo.NumSockets >= topo.NumNUMANodes { acc.numaOrSocketsFirst = &numaFirst{acc} } else { @@ -773,15 +810,23 @@ func (a *cpuAccumulator) iterateCombinations(n []int, k int, f func([]int) LoopC // the least amount of free CPUs to the one with the highest amount of free CPUs (i.e. in ascending // order of free CPUs). For any NUMA node, the cores are selected from the ones in the socket with // the least amount of free CPUs to the one with the highest amount of free CPUs. -func takeByTopologyNUMAPacked(logger logr.Logger, topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, numCPUs int, cpuSortingStrategy CPUSortingStrategy, preferAlignByUncoreCache bool) (cpuset.CPUSet, error) { - acc := newCPUAccumulator(logger, topo, availableCPUs, numCPUs, cpuSortingStrategy) +func takeByTopologyNUMAPacked(topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, numCPUs int, cpuSortingStrategy CPUSortingStrategy, preferAlignByUncoreCache bool, reusableCPUsForResize *cpuset.CPUSet, mustKeepCPUsForScaleDown *cpuset.CPUSet) (cpuset.CPUSet, error) { + + // If the number of CPUs requested to be retained is not a subset + // of reusableCPUs, then we fail early + if reusableCPUsForResize != nil && mustKeepCPUsForScaleDown != nil { + if (mustKeepCPUsForScaleDown.Intersection(reusableCPUsForResize.Clone())).IsEmpty() { + return cpuset.New(), fmt.Errorf("requested CPUs to be retained %s are not a subset of reusable CPUs %s", mustKeepCPUsForScaleDown.String(), reusableCPUsForResize.String()) + } + } + + acc := newCPUAccumulator(topo, availableCPUs, numCPUs, cpuSortingStrategy, reusableCPUsForResize, mustKeepCPUsForScaleDown) if acc.isSatisfied() { return acc.result, nil } if acc.isFailed() { return cpuset.New(), fmt.Errorf("not enough cpus available to satisfy request: requested=%d, available=%d", numCPUs, availableCPUs.Size()) } - // Algorithm: topology-aware best-fit // 1. Acquire whole NUMA nodes and sockets, if available and the container // requires at least a NUMA node or socket's-worth of CPUs. If NUMA @@ -890,25 +935,32 @@ func takeByTopologyNUMAPacked(logger logr.Logger, topo *topology.CPUTopology, av // of size 'cpuGroupSize' according to the algorithm described above. This is // important, for example, to ensure that all CPUs (i.e. all hyperthreads) from // a single core are allocated together. -func takeByTopologyNUMADistributed(logger logr.Logger, topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, numCPUs int, cpuGroupSize int, cpuSortingStrategy CPUSortingStrategy) (cpuset.CPUSet, error) { +func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, numCPUs int, cpuGroupSize int, cpuSortingStrategy CPUSortingStrategy, reusableCPUsForResize *cpuset.CPUSet, mustKeepCPUsForScaleDown *cpuset.CPUSet) (cpuset.CPUSet, error) { // If the number of CPUs requested cannot be handed out in chunks of // 'cpuGroupSize', then we just call out the packing algorithm since we // can't distribute CPUs in this chunk size. // PreferAlignByUncoreCache feature not implemented here yet and set to false. // Support for PreferAlignByUncoreCache to be done at beta release. if (numCPUs % cpuGroupSize) != 0 { - return takeByTopologyNUMAPacked(logger, topo, availableCPUs, numCPUs, cpuSortingStrategy, false) + return takeByTopologyNUMAPacked(topo, availableCPUs, numCPUs, cpuSortingStrategy, false, reusableCPUsForResize, mustKeepCPUsForScaleDown) + } + + // If the number of CPUs requested to be retained is not a subset + // of reusableCPUs, then we fail early + if reusableCPUsForResize != nil && mustKeepCPUsForScaleDown != nil { + if (mustKeepCPUsForScaleDown.Intersection(reusableCPUsForResize.Clone())).IsEmpty() { + return cpuset.New(), fmt.Errorf("requested CPUs to be retained %s are not a subset of reusable CPUs %s", mustKeepCPUsForScaleDown.String(), reusableCPUsForResize.String()) + } } // Otherwise build an accumulator to start allocating CPUs from. - acc := newCPUAccumulator(logger, topo, availableCPUs, numCPUs, cpuSortingStrategy) + acc := newCPUAccumulator(topo, availableCPUs, numCPUs, cpuSortingStrategy, reusableCPUsForResize, mustKeepCPUsForScaleDown) if acc.isSatisfied() { return acc.result, nil } if acc.isFailed() { return cpuset.New(), fmt.Errorf("not enough cpus available to satisfy request: requested=%d, available=%d", numCPUs, availableCPUs.Size()) } - // Get the list of NUMA nodes represented by the set of CPUs in 'availableCPUs'. numas := acc.sortAvailableNUMANodes() @@ -1080,7 +1132,7 @@ func takeByTopologyNUMADistributed(logger logr.Logger, topo *topology.CPUTopolog // size 'cpuGroupSize' from 'bestCombo'. distribution := (numCPUs / len(bestCombo) / cpuGroupSize) * cpuGroupSize for _, numa := range bestCombo { - cpus, _ := takeByTopologyNUMAPacked(logger, acc.topo, acc.details.CPUsInNUMANodes(numa), distribution, cpuSortingStrategy, false) + cpus, _ := takeByTopologyNUMAPacked(acc.topo, acc.details.CPUsInNUMANodes(numa), distribution, cpuSortingStrategy, false, reusableCPUsForResize, mustKeepCPUsForScaleDown) acc.take(cpus) } @@ -1095,7 +1147,7 @@ func takeByTopologyNUMADistributed(logger logr.Logger, topo *topology.CPUTopolog if acc.details.CPUsInNUMANodes(numa).Size() < cpuGroupSize { continue } - cpus, _ := takeByTopologyNUMAPacked(logger, acc.topo, acc.details.CPUsInNUMANodes(numa), cpuGroupSize, cpuSortingStrategy, false) + cpus, _ := takeByTopologyNUMAPacked(acc.topo, acc.details.CPUsInNUMANodes(numa), cpuGroupSize, cpuSortingStrategy, false, reusableCPUsForResize, mustKeepCPUsForScaleDown) acc.take(cpus) remainder -= cpuGroupSize } @@ -1119,5 +1171,5 @@ func takeByTopologyNUMADistributed(logger logr.Logger, topo *topology.CPUTopolog // If we never found a combination of NUMA nodes that we could properly // distribute CPUs across, fall back to the packing algorithm. - return takeByTopologyNUMAPacked(logger, topo, availableCPUs, numCPUs, cpuSortingStrategy, false) + return takeByTopologyNUMAPacked(topo, availableCPUs, numCPUs, cpuSortingStrategy, false, reusableCPUsForResize, mustKeepCPUsForScaleDown) } diff --git a/pkg/kubelet/cm/cpumanager/cpu_assignment_test.go b/pkg/kubelet/cm/cpumanager/cpu_assignment_test.go index 34933bc982a55..34a768f6f60ea 100644 --- a/pkg/kubelet/cm/cpumanager/cpu_assignment_test.go +++ b/pkg/kubelet/cm/cpumanager/cpu_assignment_test.go @@ -116,7 +116,7 @@ func TestCPUAccumulatorFreeSockets(t *testing.T) { for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { - acc := newCPUAccumulator(logger, tc.topo, tc.availableCPUs, 0, CPUSortingStrategyPacked) + acc := newCPUAccumulator(tc.topo, tc.availableCPUs, 0, CPUSortingStrategyPacked, nil, nil) result := acc.freeSockets() sort.Ints(result) if !reflect.DeepEqual(result, tc.expect) { @@ -217,7 +217,7 @@ func TestCPUAccumulatorFreeNUMANodes(t *testing.T) { for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { - acc := newCPUAccumulator(logger, tc.topo, tc.availableCPUs, 0, CPUSortingStrategyPacked) + acc := newCPUAccumulator(tc.topo, tc.availableCPUs, 0, CPUSortingStrategyPacked, nil, nil) result := acc.freeNUMANodes() if !reflect.DeepEqual(result, tc.expect) { t.Errorf("expected %v to equal %v", result, tc.expect) @@ -267,7 +267,7 @@ func TestCPUAccumulatorFreeSocketsAndNUMANodes(t *testing.T) { for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { - acc := newCPUAccumulator(logger, tc.topo, tc.availableCPUs, 0, CPUSortingStrategyPacked) + acc := newCPUAccumulator(tc.topo, tc.availableCPUs, 0, CPUSortingStrategyPacked, nil, nil) resultNUMANodes := acc.freeNUMANodes() if !reflect.DeepEqual(resultNUMANodes, tc.expectNUMANodes) { t.Errorf("expected NUMA Nodes %v to equal %v", resultNUMANodes, tc.expectNUMANodes) @@ -340,7 +340,7 @@ func TestCPUAccumulatorFreeCores(t *testing.T) { for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { - acc := newCPUAccumulator(logger, tc.topo, tc.availableCPUs, 0, CPUSortingStrategyPacked) + acc := newCPUAccumulator(tc.topo, tc.availableCPUs, 0, CPUSortingStrategyPacked, nil, nil) result := acc.freeCores() if !reflect.DeepEqual(result, tc.expect) { t.Errorf("expected %v to equal %v", result, tc.expect) @@ -397,7 +397,7 @@ func TestCPUAccumulatorFreeCPUs(t *testing.T) { for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { - acc := newCPUAccumulator(logger, tc.topo, tc.availableCPUs, 0, CPUSortingStrategyPacked) + acc := newCPUAccumulator(tc.topo, tc.availableCPUs, 0, CPUSortingStrategyPacked, nil, nil) result := acc.freeCPUs() if !reflect.DeepEqual(result, tc.expect) { t.Errorf("expected %v to equal %v", result, tc.expect) @@ -484,7 +484,7 @@ func TestCPUAccumulatorTake(t *testing.T) { for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { - acc := newCPUAccumulator(logger, tc.topo, tc.availableCPUs, tc.numCPUs, CPUSortingStrategyPacked) + acc := newCPUAccumulator(tc.topo, tc.availableCPUs, tc.numCPUs, CPUSortingStrategyPacked, nil, nil) totalTaken := 0 for _, cpus := range tc.takeCPUs { acc.take(cpus) @@ -758,7 +758,7 @@ func TestTakeByTopologyNUMAPacked(t *testing.T) { strategy = CPUSortingStrategySpread } - result, err := takeByTopologyNUMAPacked(logger, tc.topo, tc.availableCPUs, tc.numCPUs, strategy, tc.opts.PreferAlignByUncoreCacheOption) + result, err := takeByTopologyNUMAPacked(tc.topo, tc.availableCPUs, tc.numCPUs, strategy, tc.opts.PreferAlignByUncoreCacheOption, nil, nil) if tc.expErr != "" && err != nil && err.Error() != tc.expErr { t.Errorf("expected error to be [%v] but it was [%v]", tc.expErr, err) } @@ -860,7 +860,7 @@ func TestTakeByTopologyWithSpreadPhysicalCPUsPreferredOption(t *testing.T) { if tc.opts.DistributeCPUsAcrossCores { strategy = CPUSortingStrategySpread } - result, err := takeByTopologyNUMAPacked(logger, tc.topo, tc.availableCPUs, tc.numCPUs, strategy, tc.opts.PreferAlignByUncoreCacheOption) + result, err := takeByTopologyNUMAPacked(tc.topo, tc.availableCPUs, tc.numCPUs, strategy, tc.opts.PreferAlignByUncoreCacheOption, nil, nil) if tc.expErr != "" && err.Error() != tc.expErr { t.Errorf("testCase %q failed, expected error to be [%v] but it was [%v]", tc.description, tc.expErr, err) } @@ -1063,7 +1063,7 @@ func TestTakeByTopologyNUMADistributed(t *testing.T) { for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { - result, err := takeByTopologyNUMADistributed(logger, tc.topo, tc.availableCPUs, tc.numCPUs, tc.cpuGroupSize, CPUSortingStrategyPacked) + result, err := takeByTopologyNUMADistributed(tc.topo, tc.availableCPUs, tc.numCPUs, tc.cpuGroupSize, CPUSortingStrategyPacked, nil, nil) if err != nil { if tc.expErr == "" { t.Errorf("unexpected error [%v]", err) diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go index cf19290b14fe0..9755add442e00 100644 --- a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go +++ b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go @@ -170,6 +170,7 @@ func makePod(podUID, containerName, cpuRequest, cpuLimit string) *v1.Pod { } pod.UID = types.UID(podUID) + pod.Name = podUID pod.Spec.Containers[0].Name = containerName return pod diff --git a/pkg/kubelet/cm/cpumanager/policy_static.go b/pkg/kubelet/cm/cpumanager/policy_static.go index 53519390c45d8..fe1dac2e40890 100644 --- a/pkg/kubelet/cm/cpumanager/policy_static.go +++ b/pkg/kubelet/cm/cpumanager/policy_static.go @@ -34,6 +34,7 @@ import ( "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask" "k8s.io/kubernetes/pkg/kubelet/metrics" + "k8s.io/kubernetes/pkg/kubelet/types" "k8s.io/utils/cpuset" ) @@ -67,6 +68,50 @@ func (e SMTAlignmentError) Type() string { return ErrorSMTAlignment } +// inconsistentCPUAllocationError represents an error due to an +// attempt to either move a container from exclusively allocated +// pool to shared pool or move a container from shared pool to +// exclusively allocated pool. +type inconsistentCPUAllocationError struct { + RequestedCPUs string + AllocatedCPUs string + Shared2Exclusive bool +} + +func (e inconsistentCPUAllocationError) Error() string { + if e.RequestedCPUs == e.AllocatedCPUs { + return fmt.Sprintf("inconsistentCPUAllocation Error: Skip resize, nothing to be done, (requested CPUs = %s equal to allocated CPUs = %s)", e.RequestedCPUs, e.AllocatedCPUs) + } + if e.Shared2Exclusive { + return fmt.Sprintf("inconsistentCPUAllocation Error: Not allowed to move a container from shared pool to exclusively allocated pool, (requested CPUs = %s, allocated CPUs = %s)", e.RequestedCPUs, e.AllocatedCPUs) + } else { + return fmt.Sprintf("inconsistentCPUAllocation Error: Not allowed to move a container from exclusively allocated pool to shared pool, not allowed (requested CPUs = %s, allocated CPUs = %s)", e.RequestedCPUs, e.AllocatedCPUs) + } +} + +// Type returns human-readable type of this error. +// Used in the HandlePodResourcesResize to populate Failure reason +func (e inconsistentCPUAllocationError) Type() string { + return types.ErrorInconsistentCPUAllocation +} + +// getCPUSetError represents an error due to a +// failed attempt to GetCPUSet from state +type getCPUSetError struct { + PodUID string + ContainerName string +} + +func (e getCPUSetError) Error() string { + return fmt.Sprintf("getCPUSet Error: Skip resize, unable to get CPUSet, nothing to be done, (podUID = %s, containerName %s)", e.PodUID, e.ContainerName) +} + +// Type returns human-readable type of this error. +// Used in the HandlePodResourcesResize to populate Failure reason +func (e getCPUSetError) Type() string { + return types.ErrorGetCPUSet +} + // staticPolicy is a CPU manager policy that does not change CPU // assignments for exclusively pinned guaranteed containers after the main // container process starts. @@ -120,6 +165,8 @@ type staticPolicy struct { affinity topologymanager.Store // set of CPUs to reuse across allocations in a pod cpusToReuse map[string]cpuset.CPUSet + // set of CPUs to reuse during pod resize + cpusToReuseDuringResize map[string]cpuset.CPUSet // options allow to fine-tune the behaviour of the policy options StaticPolicyOptions // we compute this value multiple time, and it's not supposed to change @@ -147,11 +194,12 @@ func NewStaticPolicy(logger logr.Logger, topology *topology.CPUTopology, numRese logger.Info("created with configuration", "options", opts, "cpuGroupSize", cpuGroupSize) policy := &staticPolicy{ - topology: topology, - affinity: affinity, - cpusToReuse: make(map[string]cpuset.CPUSet), - options: opts, - cpuGroupSize: cpuGroupSize, + topology: topology, + affinity: affinity, + cpusToReuse: make(map[string]cpuset.CPUSet), + options: opts, + cpuGroupSize: cpuGroupSize, + cpusToReuseDuringResize: make(map[string]cpuset.CPUSet), } allCPUs := topology.CPUDetails.CPUs() @@ -164,7 +212,7 @@ func NewStaticPolicy(logger logr.Logger, topology *topology.CPUTopology, numRese // // For example: Given a system with 8 CPUs available and HT enabled, // if numReservedCPUs=2, then reserved={0,4} - reserved, _ = policy.takeByTopology(logger, allCPUs, numReservedCPUs) + reserved, _ = policy.takeByTopology(allCPUs, numReservedCPUs, nil, nil) } if reserved.Size() != numReservedCPUs { @@ -316,12 +364,17 @@ func (p *staticPolicy) updateCPUsToReuse(pod *v1.Pod, container *v1.Container, c p.cpusToReuse[string(pod.UID)] = p.cpusToReuse[string(pod.UID)].Difference(cset) } -func (p *staticPolicy) Allocate(logger logr.Logger, s state.State, pod *v1.Pod, container *v1.Container) (rerr error) { - logger = klog.LoggerWithValues(logger, "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", container.Name) - logger.Info("Allocate start") // V=0 for backward compatibility - defer logger.V(2).Info("Allocate end") +func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Container) (rerr error) { + numCPUs := p.guaranteedCPUs(pod, container) + if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { + // During a pod resize, handle corner cases + err := p.validateInPlacePodVerticalScaling(pod, container) + if err != nil { + klog.ErrorS(err, "Static policy: Unable to resize allocated CPUs", "pod", klog.KObj(pod), "containerName", container.Name, "numCPUs", numCPUs) + return err + } + } - numCPUs := p.guaranteedCPUs(logger, pod, container) if numCPUs == 0 { // container belongs in the shared pool (nothing to do; use default cpuset) return nil @@ -371,6 +424,12 @@ func (p *staticPolicy) Allocate(logger logr.Logger, s state.State, pod *v1.Pod, availablePhysicalCPUs := p.GetAvailablePhysicalCPUs(s).Size() + if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { + if cs, ok := podutil.GetContainerStatus(pod.Status.ContainerStatuses, container.Name); ok { + cpuAllocatedQuantity := cs.AllocatedResources[v1.ResourceCPU] + availablePhysicalCPUs += int(cpuAllocatedQuantity.Value()) + } + } // It's legal to reserve CPUs which are not core siblings. In this case the CPU allocator can descend to single cores // when picking CPUs. This will void the guarantee of FullPhysicalCPUsOnly. To prevent this, we need to additionally consider // all the core siblings of the reserved CPUs as unavailable when computing the free CPUs, before to start the actual allocation. @@ -384,10 +443,47 @@ func (p *staticPolicy) Allocate(logger logr.Logger, s state.State, pod *v1.Pod, } } } - if cset, ok := s.GetCPUSet(string(pod.UID), container.Name); ok { - p.updateCPUsToReuse(pod, container, cset) - logger.Info("Static policy: container already present in state, skipping") - return nil + if cpuset, ok := s.GetCPUSet(string(pod.UID), container.Name); ok { + if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScalingExclusiveCPUs) { + if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { + klog.InfoS("Static policy: container already present in state, attempting InPlacePodVerticalScaling", "pod", klog.KObj(pod), "containerName", container.Name) + if cpusInUseByPodContainerToResize, ok := s.GetCPUSet(string(pod.UID), container.Name); ok { + // Call Topology Manager to get the aligned socket affinity across all hint providers. + hint := p.affinity.GetAffinity(string(pod.UID), container.Name) + klog.InfoS("Topology Affinity", "pod", klog.KObj(pod), "containerName", container.Name, "affinity", hint) + // Attempt new allocation ( reusing allocated CPUs ) according to the NUMA affinity contained in the hint + // Since NUMA affinity container in the hint is unmutable already allocated CPUs pass the criteria + newallocatedcpuset, err := p.allocateCPUs(s, numCPUs, hint.NUMANodeAffinity, p.cpusToReuse[string(pod.UID)], &cpusInUseByPodContainerToResize, nil) + if err != nil { + klog.ErrorS(err, "Static policy: Unable to allocate new CPUs", "pod", klog.KObj(pod), "containerName", container.Name, "numCPUs", numCPUs) + return err + } + // Allocation successful, update the current state + s.SetCPUSet(string(pod.UID), container.Name, newallocatedcpuset.CPUs) + p.updateCPUsToReuse(pod, container, newallocatedcpuset.CPUs) + // Updated state to the checkpoint file will be stored during + // the reconcile loop. TODO is this a problem? I don't believe + // because if kubelet will be terminated now, anyhow it will be + // needed the state to be cleaned up, an error will appear requiring + // the node to be drained. I think we are safe. All computations are + // using state_mem and not the checkpoint. + return nil + } else { + return getCPUSetError{ + PodUID: string(pod.UID), + ContainerName: container.Name, + } + } + } else { + p.updateCPUsToReuse(pod, container, cpuset) + klog.InfoS("Static policy: InPlacePodVerticalScaling alognside CPU Static policy requires InPlacePodVerticalScaling to be enabled, skipping pod resize") + return nil + } + } else { + p.updateCPUsToReuse(pod, container, cpuset) + klog.InfoS("Static policy: container already present in state, skipping", "pod", klog.KObj(pod), "containerName", container.Name) + return nil + } } // Call Topology Manager to get the aligned socket affinity across all hint providers. @@ -395,7 +491,7 @@ func (p *staticPolicy) Allocate(logger logr.Logger, s state.State, pod *v1.Pod, logger.Info("Topology Affinity", "affinity", hint) // Allocate CPUs according to the NUMA affinity contained in the hint. - cpuAllocation, err := p.allocateCPUs(logger, s, numCPUs, hint.NUMANodeAffinity, p.cpusToReuse[string(pod.UID)]) + cpuAllocation, err := p.allocateCPUs(s, numCPUs, hint.NUMANodeAffinity, p.cpusToReuse[string(pod.UID)], nil, nil) if err != nil { logger.Error(err, "Unable to allocate CPUs", "numCPUs", numCPUs) return err @@ -441,10 +537,18 @@ func (p *staticPolicy) RemoveContainer(logger logr.Logger, s state.State, podUID return nil } -func (p *staticPolicy) allocateCPUs(logger logr.Logger, s state.State, numCPUs int, numaAffinity bitmask.BitMask, reusableCPUs cpuset.CPUSet) (topology.Allocation, error) { - logger.Info("AllocateCPUs", "numCPUs", numCPUs, "socket", numaAffinity) - - allocatableCPUs := p.GetAvailableCPUs(s).Union(reusableCPUs) +func (p *staticPolicy) allocateCPUs(s state.State, numCPUs int, numaAffinity bitmask.BitMask, reusableCPUs cpuset.CPUSet, reusableCPUsForResize *cpuset.CPUSet, mustKeepCPUsForResize *cpuset.CPUSet) (topology.Allocation, error) { + klog.InfoS("AllocateCPUs", "numCPUs", numCPUs, "socket", numaAffinity) + allocatableCPUs := cpuset.New() + if reusableCPUsForResize != nil { + if numCPUs >= reusableCPUsForResize.Size() { + allocatableCPUs = allocatableCPUs.Union(p.GetAvailableCPUs(s).Union(reusableCPUsForResize.Clone())) + } else if numCPUs < reusableCPUsForResize.Size() { + allocatableCPUs = reusableCPUsForResize.Clone() + } + } else { + allocatableCPUs = allocatableCPUs.Union(p.GetAvailableCPUs(s).Union(reusableCPUs)) + } // If there are aligned CPUs in numaAffinity, attempt to take those first. result := topology.EmptyAllocation() @@ -456,7 +560,7 @@ func (p *staticPolicy) allocateCPUs(logger logr.Logger, s state.State, numCPUs i numAlignedToAlloc = numCPUs } - allocatedCPUs, err := p.takeByTopology(logger, alignedCPUs, numAlignedToAlloc) + allocatedCPUs, err := p.takeByTopology(alignedCPUs, numAlignedToAlloc, reusableCPUsForResize, mustKeepCPUsForResize) if err != nil { return topology.EmptyAllocation(), err } @@ -465,7 +569,7 @@ func (p *staticPolicy) allocateCPUs(logger logr.Logger, s state.State, numCPUs i } // Get any remaining CPUs from what's leftover after attempting to grab aligned ones. - remainingCPUs, err := p.takeByTopology(logger, allocatableCPUs.Difference(result.CPUs), numCPUs-result.CPUs.Size()) + remainingCPUs, err := p.takeByTopology(allocatableCPUs.Difference(result.CPUs), numCPUs-result.CPUs.Size(), reusableCPUsForResize, mustKeepCPUsForResize) if err != nil { return topology.EmptyAllocation(), err } @@ -474,6 +578,17 @@ func (p *staticPolicy) allocateCPUs(logger logr.Logger, s state.State, numCPUs i // Remove allocated CPUs from the shared CPUSet. s.SetDefaultCPUSet(s.GetDefaultCPUSet().Difference(result.CPUs)) + if reusableCPUsForResize != nil { + if reusableCPUsForResize.Size() < result.CPUs.Size() { + // Scale up or creation has been performed + s.SetDefaultCPUSet(s.GetDefaultCPUSet().Difference(result.CPUs)) + } else if reusableCPUsForResize.Size() > result.CPUs.Size() { + // Scale down has been performed + s.SetDefaultCPUSet(s.GetDefaultCPUSet().Union(reusableCPUsForResize.Difference(result.CPUs))) + } + } else { + s.SetDefaultCPUSet(s.GetDefaultCPUSet().Difference(result.CPUs)) + } logger.Info("AllocateCPUs", "result", result.String()) return result, nil @@ -531,7 +646,7 @@ func (p *staticPolicy) podGuaranteedCPUs(logger logr.Logger, pod *v1.Pod) int { return requestedByLongRunningContainers } -func (p *staticPolicy) takeByTopology(logger logr.Logger, availableCPUs cpuset.CPUSet, numCPUs int) (cpuset.CPUSet, error) { +func (p *staticPolicy) takeByTopology(availableCPUs cpuset.CPUSet, numCPUs int, reusableCPUsForResize *cpuset.CPUSet, mustKeepCPUsForScaleDown *cpuset.CPUSet) (cpuset.CPUSet, error) { cpuSortingStrategy := CPUSortingStrategyPacked if p.options.DistributeCPUsAcrossCores { cpuSortingStrategy = CPUSortingStrategySpread @@ -542,10 +657,9 @@ func (p *staticPolicy) takeByTopology(logger logr.Logger, availableCPUs cpuset.C if p.options.FullPhysicalCPUsOnly { cpuGroupSize = p.cpuGroupSize } - return takeByTopologyNUMADistributed(logger, p.topology, availableCPUs, numCPUs, cpuGroupSize, cpuSortingStrategy) + return takeByTopologyNUMADistributed(p.topology, availableCPUs, numCPUs, cpuGroupSize, cpuSortingStrategy, reusableCPUsForResize, mustKeepCPUsForScaleDown) } - - return takeByTopologyNUMAPacked(logger, p.topology, availableCPUs, numCPUs, cpuSortingStrategy, p.options.PreferAlignByUncoreCacheOption) + return takeByTopologyNUMAPacked(p.topology, availableCPUs, numCPUs, cpuSortingStrategy, p.options.PreferAlignByUncoreCacheOption, reusableCPUsForResize, mustKeepCPUsForScaleDown) } func (p *staticPolicy) GetTopologyHints(logger logr.Logger, s state.State, pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint { @@ -572,7 +686,7 @@ func (p *staticPolicy) GetTopologyHints(logger logr.Logger, s state.State, pod * // kubelet restart, for example. if allocated, exists := s.GetCPUSet(string(pod.UID), container.Name); exists { if allocated.Size() != requested { - logger.Info("CPUs already allocated to container with different number than request", "requestedSize", requested, "allocatedSize", allocated.Size()) + klog.ErrorS(nil, "CPUs already allocated to container with different number than request", "pod", klog.KObj(pod), "containerName", container.Name, "requestedSize", requested, "allocatedSize", allocated.Size()) // An empty list of hints will be treated as a preference that cannot be satisfied. // In definition of hints this is equal to: TopologyHint[NUMANodeAffinity: nil, Preferred: false]. // For all but the best-effort policy, the Topology Manager will throw a pod-admission error. @@ -631,7 +745,7 @@ func (p *staticPolicy) GetPodTopologyHints(logger logr.Logger, s state.State, po // kubelet restart, for example. if allocated, exists := s.GetCPUSet(string(pod.UID), container.Name); exists { if allocated.Size() != requestedByContainer { - logger_.Info("CPUs already allocated to container with different number than request", "allocatedSize", requested, "requestedByContainer", requestedByContainer, "allocatedSize", allocated.Size()) + klog.ErrorS(nil, "CPUs already allocated to container with different number than request", "pod", klog.KObj(pod), "containerName", container.Name, "allocatedSize", requested, "requestedByContainer", requestedByContainer, "allocatedSize", allocated.Size()) // An empty list of hints will be treated as a preference that cannot be satisfied. // In definition of hints this is equal to: TopologyHint[NUMANodeAffinity: nil, Preferred: false]. // For all but the best-effort policy, the Topology Manager will throw a pod-admission error. @@ -681,7 +795,7 @@ func (p *staticPolicy) generateCPUTopologyHints(availableCPUs cpuset.CPUSet, reu // Iterate through all combinations of numa nodes bitmask and build hints from them. hints := []topologymanager.TopologyHint{} - bitmask.IterateBitMasks(p.topology.CPUDetails.NUMANodes().List(), func(mask bitmask.BitMask) { + bitmask.IterateBitMasks(p.topology.CPUDetails.NUMANodes().UnsortedList(), func(mask bitmask.BitMask) { // First, update minAffinitySize for the current request size. cpusInMask := p.topology.CPUDetails.CPUsInNUMANodes(mask.GetBits()...).Size() if cpusInMask >= request && mask.Count() < minAffinitySize { @@ -691,7 +805,7 @@ func (p *staticPolicy) generateCPUTopologyHints(availableCPUs cpuset.CPUSet, reu // Then check to see if we have enough CPUs available on the current // numa node bitmask to satisfy the CPU request. numMatching := 0 - for _, c := range reusableCPUs.List() { + for _, c := range reusableCPUs.UnsortedList() { // Disregard this mask if its NUMANode isn't part of it. if !mask.IsSet(p.topology.CPUDetails[c].NUMANodeID) { return @@ -701,7 +815,7 @@ func (p *staticPolicy) generateCPUTopologyHints(availableCPUs cpuset.CPUSet, reu // Finally, check to see if enough available CPUs remain on the current // NUMA node combination to satisfy the CPU request. - for _, c := range availableCPUs.List() { + for _, c := range availableCPUs.UnsortedList() { if mask.IsSet(p.topology.CPUDetails[c].NUMANodeID) { numMatching++ } @@ -832,3 +946,48 @@ func updateAllocationPerNUMAMetric(logger logr.Logger, topo *topology.CPUTopolog metrics.CPUManagerAllocationPerNUMA.WithLabelValues(strconv.Itoa(numaNode)).Set(float64(count)) } } + +func (p *staticPolicy) validateInPlacePodVerticalScaling(pod *v1.Pod, container *v1.Container) error { + + if v1qos.GetPodQOS(pod) != v1.PodQOSGuaranteed { + return nil + } + cpuQuantity := container.Resources.Requests[v1.ResourceCPU] + if cs, ok := podutil.GetContainerStatus(pod.Status.ContainerStatuses, container.Name); ok { + allocatedCPUQuantity := cs.AllocatedResources[v1.ResourceCPU] + if allocatedCPUQuantity.Value() > 0 { + if allocatedCPUQuantity.Value()*1000 == allocatedCPUQuantity.MilliValue() { + // container belongs in exclusive pool + if cpuQuantity.Value()*1000 != cpuQuantity.MilliValue() { + // container move to shared pool not allowed + return inconsistentCPUAllocationError{ + RequestedCPUs: cpuQuantity.String(), + AllocatedCPUs: allocatedCPUQuantity.String(), + Shared2Exclusive: false, + } + } + } else { + // container belongs in shared pool + if cpuQuantity.Value()*1000 == cpuQuantity.MilliValue() { + // container move to exclusive pool not allowed + return inconsistentCPUAllocationError{ + RequestedCPUs: cpuQuantity.String(), + AllocatedCPUs: allocatedCPUQuantity.String(), + Shared2Exclusive: true, + } + } + } + } else { + // container belongs in shared pool + if cpuQuantity.Value()*1000 == cpuQuantity.MilliValue() { + // container move to exclusive pool not allowed + return inconsistentCPUAllocationError{ + RequestedCPUs: cpuQuantity.String(), + AllocatedCPUs: allocatedCPUQuantity.String(), + Shared2Exclusive: true, + } + } + } + } + return nil +} diff --git a/pkg/kubelet/cm/cpumanager/policy_static_test.go b/pkg/kubelet/cm/cpumanager/policy_static_test.go index df557fe63a744..c8f45e15f1f07 100644 --- a/pkg/kubelet/cm/cpumanager/policy_static_test.go +++ b/pkg/kubelet/cm/cpumanager/policy_static_test.go @@ -23,6 +23,7 @@ import ( v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/api/resource" utilfeature "k8s.io/apiserver/pkg/util/feature" featuregatetesting "k8s.io/component-base/featuregate/testing" "k8s.io/klog/v2" @@ -46,6 +47,10 @@ type staticPolicyTest struct { stAssignments state.ContainerCPUAssignments stDefaultCPUSet cpuset.CPUSet pod *v1.Pod + qosClass v1.PodQOSClass + podAllocated string + resizeLimit string + resizeRequest string topologyHint *topologymanager.TopologyHint expErr error expCPUAlloc bool @@ -442,14 +447,14 @@ func TestStaticPolicyAdd(t *testing.T) { numReservedCPUs: 1, stAssignments: state.ContainerCPUAssignments{ "fakePod": map[string]cpuset.CPUSet{ - "fakeContainer3": cpuset.New(2, 3, 6, 7), + "fakeContainer3": cpuset.New(1, 2, 5, 6), }, }, - stDefaultCPUSet: cpuset.New(0, 1, 4, 5), + stDefaultCPUSet: cpuset.New(0, 3, 4, 7), pod: makePod("fakePod", "fakeContainer3", "4000m", "4000m"), expErr: nil, expCPUAlloc: true, - expCSet: cpuset.New(2, 3, 6, 7), + expCSet: cpuset.New(1, 2, 5, 6), }, { description: "GuPodMultipleCores, DualSocketHT, NoAllocExpectError", @@ -576,6 +581,115 @@ func TestStaticPolicyAdd(t *testing.T) { expCSet: cpuset.New(1, 2, 3, 4, 5, 7, 8, 9, 10, 11), }, } + + // testcases for podResize + podResizeTestCases := []staticPolicyTest{ + { + description: "podResize GuPodMultipleCores, SingleSocketHT, ExpectSameAllocation", + topo: topoSingleSocketHT, + numReservedCPUs: 1, + stAssignments: state.ContainerCPUAssignments{ + "fakePod": map[string]cpuset.CPUSet{ + "fakeContainer3": cpuset.New(1, 2, 5, 6), + }, + }, + stDefaultCPUSet: cpuset.New(0, 3, 4, 7), + pod: makePod("fakePod", "fakeContainer3", "4000m", "4000m"), + expErr: nil, + expCPUAlloc: true, + expCSet: cpuset.New(1, 2, 5, 6), + }, + { + description: "podResize GuPodSingleCore, SingleSocketHT, ExpectAllocOneCPU", + topo: topoSingleSocketHT, + options: map[string]string{ + FullPCPUsOnlyOption: "true", + }, + numReservedCPUs: 1, + stAssignments: state.ContainerCPUAssignments{ + "fakePod": map[string]cpuset.CPUSet{ + "fakeContainer3": cpuset.New(1, 5), + }, + }, + stDefaultCPUSet: cpuset.New(0, 2, 3, 4, 6, 7), + pod: makePod("fakePod", "fakeContainer3", "4000m", "4000m"), + expErr: nil, + expCPUAlloc: true, + expCSet: cpuset.New(1, 5), + }, + { + description: "podResize GuPodSingleCore, SingleSocketHT, ExpectAllocOneCPU", + topo: topoSingleSocketHT, + options: map[string]string{ + FullPCPUsOnlyOption: "true", + }, + numReservedCPUs: 1, + stAssignments: state.ContainerCPUAssignments{ + "fakePod": map[string]cpuset.CPUSet{ + "fakeContainer3": cpuset.New(1, 5), + }, + }, + stDefaultCPUSet: cpuset.New(0, 2, 3, 4, 6, 7), + pod: makePod("fakePod", "fakeContainer3", "2000m", "2000m"), + expErr: nil, + expCPUAlloc: true, + expCSet: cpuset.New(1, 5), + }, + { + description: "podResize", + topo: topoSingleSocketHT, + options: map[string]string{ + FullPCPUsOnlyOption: "true", + }, + numReservedCPUs: 1, + stAssignments: state.ContainerCPUAssignments{ + "fakePod": map[string]cpuset.CPUSet{ + "fakeContainer3": cpuset.New(1, 5), + }, + }, + stDefaultCPUSet: cpuset.New(0, 2, 3, 4, 6, 7), + pod: makePod("fakePod", "fakeContainer3", "100m", "100m"), + //expErr: inconsistentCPUAllocationError{RequestedCPUs: "0", AllocatedCPUs: "2"}, + expErr: nil, + expCPUAlloc: true, + expCSet: cpuset.New(1, 5), + }, + { + description: "podResize", + topo: topoSingleSocketHT, + options: map[string]string{ + FullPCPUsOnlyOption: "false", + }, + numReservedCPUs: 1, + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + pod: makePod("fakePod", "fakeContainer3", "1000m", "1000m"), + //expErr: inconsistentCPUAllocationError{RequestedCPUs: "0", AllocatedCPUs: "2"}, + expErr: nil, + expCPUAlloc: true, + expCSet: cpuset.New(4), + }, + { + description: "podResize", + topo: topoSingleSocketHT, + options: map[string]string{ + FullPCPUsOnlyOption: "true", + }, + numReservedCPUs: 1, + stAssignments: state.ContainerCPUAssignments{ + "fakePod": map[string]cpuset.CPUSet{ + "fakeContainer3": cpuset.New(1, 5), + }, + }, + stDefaultCPUSet: cpuset.New(0, 2, 3, 4, 6, 7), + pod: makePod("fakePod", "fakeContainer3", "100m", "100m"), + //expErr: inconsistentCPUAllocationError{RequestedCPUs: "0", AllocatedCPUs: "2"}, + expErr: nil, + expCPUAlloc: true, + expCSet: cpuset.New(1, 5), + }, + } + newNUMAAffinity := func(bits ...int) bitmask.BitMask { affinity, _ := bitmask.NewBitMask(bits...) return affinity @@ -636,6 +750,9 @@ func TestStaticPolicyAdd(t *testing.T) { for _, testCase := range alignBySocketOptionTestCases { runStaticPolicyTestCaseWithFeatureGate(t, testCase) } + for _, testCase := range podResizeTestCases { + runStaticPolicyTestCaseWithFeatureGateAlongsideInPlacePodVerticalScaling(t, testCase) + } } func runStaticPolicyTestCase(t *testing.T, testCase staticPolicyTest) { @@ -697,6 +814,12 @@ func runStaticPolicyTestCaseWithFeatureGate(t *testing.T, testCase staticPolicyT runStaticPolicyTestCase(t, testCase) } +func runStaticPolicyTestCaseWithFeatureGateAlongsideInPlacePodVerticalScaling(t *testing.T, testCase staticPolicyTest) { + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.CPUManagerPolicyAlphaOptions, true) + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.InPlacePodVerticalScaling, true) + runStaticPolicyTestCase(t, testCase) +} + func TestStaticPolicyReuseCPUs(t *testing.T) { testCases := []struct { staticPolicyTest @@ -757,6 +880,297 @@ func TestStaticPolicyReuseCPUs(t *testing.T) { } } +func TestStaticPolicyPodResizeCPUsSingleContainerPod(t *testing.T) { + testCases := []struct { + staticPolicyTest + expAllocErr error + expCSetAfterAlloc cpuset.CPUSet + expCSetAfterResize cpuset.CPUSet + expCSetAfterResizeSize int + expCSetAfterRemove cpuset.CPUSet + }{ + { + staticPolicyTest: staticPolicyTest{ + description: "SingleSocketHT, PodResize, Container in exclusively allocated pool, Increase allocated CPUs", + topo: topoSingleSocketHT, + pod: makeMultiContainerPodWithOptions( + nil, + []*containerOptions{ + {request: "2000m", limit: "2000m", restartPolicy: v1.ContainerRestartPolicy("Never")}}, // 0, 4 + ), + qosClass: v1.PodQOSGuaranteed, + podAllocated: "2000m", + resizeLimit: "4000m", + resizeRequest: "4000m", + containerName: "appContainer-0", + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + expCSetAfterAlloc: cpuset.New(1, 2, 3, 5, 6, 7), + expCSetAfterResize: cpuset.New(1, 2, 3, 5, 6, 7), + expCSetAfterRemove: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + { + staticPolicyTest: staticPolicyTest{ + description: "SingleSocketHT, PodResize, Container in exclusively allocated pool, Keep same allocated CPUs", + topo: topoSingleSocketHT, + pod: makeMultiContainerPodWithOptions( + nil, + []*containerOptions{ + {request: "2000m", limit: "2000m", restartPolicy: v1.ContainerRestartPolicy("Never")}}, // 0, 4 + ), + qosClass: v1.PodQOSGuaranteed, + podAllocated: "2000m", + resizeLimit: "2000m", + resizeRequest: "2000m", + containerName: "appContainer-0", + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + expAllocErr: inconsistentCPUAllocationError{RequestedCPUs: "2", AllocatedCPUs: "2", Shared2Exclusive: false}, + expCSetAfterAlloc: cpuset.New(1, 2, 3, 5, 6, 7), + expCSetAfterResize: cpuset.New(1, 2, 3, 5, 6, 7), + expCSetAfterRemove: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + { + staticPolicyTest: staticPolicyTest{ + description: "SingleSocketHT, PodResize, Container in exclusively allocated pool, Decrease allocated CPUs", + topo: topoSingleSocketHT, + pod: makeMultiContainerPodWithOptions( + nil, + []*containerOptions{ + {request: "4000m", limit: "4000m", restartPolicy: v1.ContainerRestartPolicy("Never")}}, // 0-1, 4-5 + ), + qosClass: v1.PodQOSGuaranteed, + podAllocated: "4000m", + resizeLimit: "2000m", + resizeRequest: "2000m", + containerName: "appContainer-0", + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + expCSetAfterAlloc: cpuset.New(2, 3, 6, 7), + expCSetAfterResizeSize: 4, + expCSetAfterRemove: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + { + staticPolicyTest: staticPolicyTest{ + description: "SingleSocketHT, PodResize, Container in shared pool with more than one core, Attempt to move to exclusively allocated pool", + topo: topoSingleSocketHT, + pod: makeMultiContainerPodWithOptions( + nil, + []*containerOptions{ + {request: "2100m", limit: "2100m", restartPolicy: v1.ContainerRestartPolicy("Never")}}, // 0-7 + ), + qosClass: v1.PodQOSGuaranteed, + podAllocated: "2100m", + resizeLimit: "2000m", + resizeRequest: "2000m", + containerName: "appContainer-0", + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + expAllocErr: inconsistentCPUAllocationError{RequestedCPUs: "2", AllocatedCPUs: "2100m", Shared2Exclusive: true}, + expCSetAfterAlloc: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + expCSetAfterResize: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + expCSetAfterRemove: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + { + staticPolicyTest: staticPolicyTest{ + description: "SingleSocketHT, PodResize, Container in shared pool, Increase CPU and keep in shared pool", + topo: topoSingleSocketHT, + pod: makeMultiContainerPodWithOptions( + nil, + []*containerOptions{ + {request: "100m", limit: "100m", restartPolicy: v1.ContainerRestartPolicy("Never")}}, // 0-7 + ), + qosClass: v1.PodQOSGuaranteed, + podAllocated: "100m", + resizeLimit: "200m", + resizeRequest: "200m", + containerName: "appContainer-0", + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + expCSetAfterAlloc: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + expCSetAfterResize: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + expCSetAfterRemove: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + { + staticPolicyTest: staticPolicyTest{ + description: "SingleSocketHT, PodResize, Container in shared pool, Increase CPU and keep in shared pool", + topo: topoSingleSocketHT, + pod: makeMultiContainerPodWithOptions( + nil, + []*containerOptions{ + {request: "1100m", limit: "1100m", restartPolicy: v1.ContainerRestartPolicy("Never")}}, // 0-7 + ), + qosClass: v1.PodQOSGuaranteed, + podAllocated: "1100m", + resizeLimit: "1200m", + resizeRequest: "1200m", + containerName: "appContainer-0", + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + expCSetAfterAlloc: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + expCSetAfterResize: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + expCSetAfterRemove: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + { + staticPolicyTest: staticPolicyTest{ + description: "SingleSocketHT, PodResize, Container in shared pool with less than one core, Decrease CPU and keep in shared pool", + topo: topoSingleSocketHT, + pod: makeMultiContainerPodWithOptions( + nil, + []*containerOptions{ + {request: "200m", limit: "200m", restartPolicy: v1.ContainerRestartPolicy("Never")}}, // 0-7 + ), + qosClass: v1.PodQOSGuaranteed, + podAllocated: "200m", + resizeLimit: "100m", + resizeRequest: "100m", + containerName: "appContainer-0", + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + expCSetAfterAlloc: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + expCSetAfterResize: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + expCSetAfterRemove: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + { + staticPolicyTest: staticPolicyTest{ + description: "SingleSocketHT, PodResize, Container in shared pool with more than one core, Decrease CPU and keep in shared pool", + topo: topoSingleSocketHT, + pod: makeMultiContainerPodWithOptions( + nil, + []*containerOptions{ + {request: "1200m", limit: "1200m", restartPolicy: v1.ContainerRestartPolicy("Never")}}, // 0-7 + ), + qosClass: v1.PodQOSGuaranteed, + podAllocated: "1200m", + resizeLimit: "1100m", + resizeRequest: "1100m", + containerName: "appContainer-0", + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + expCSetAfterAlloc: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + expCSetAfterResize: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + expCSetAfterRemove: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + { + staticPolicyTest: staticPolicyTest{ + description: "SingleSocketHT, PodResize, Container in exclusively allocated pool, Move to shared pool", + topo: topoSingleSocketHT, + pod: makeMultiContainerPodWithOptions( + nil, + []*containerOptions{ + {request: "2000m", limit: "2000m", restartPolicy: v1.ContainerRestartPolicy("Never")}}, // 0-1, 4-5 + ), + qosClass: v1.PodQOSGuaranteed, + podAllocated: "2000m", + resizeLimit: "1500m", + resizeRequest: "1500m", + containerName: "appContainer-0", + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + expAllocErr: inconsistentCPUAllocationError{RequestedCPUs: "1500m", AllocatedCPUs: "2", Shared2Exclusive: false}, + expCSetAfterAlloc: cpuset.New(1, 2, 3, 5, 6, 7), + expCSetAfterResize: cpuset.New(1, 2, 3, 5, 6, 7), + expCSetAfterRemove: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + } + + for _, testCase := range testCases { + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.CPUManagerPolicyAlphaOptions, true) + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.InPlacePodVerticalScaling, true) + t.Run(testCase.description, func(t *testing.T) { + + policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), nil) + + st := &mockState{ + assignments: testCase.stAssignments, + defaultCPUSet: testCase.stDefaultCPUSet, + } + pod := testCase.pod + pod.Status.QOSClass = testCase.qosClass + + // allocate + for _, container := range append(pod.Spec.InitContainers, pod.Spec.Containers...) { + err := policy.Allocate(st, pod, &container) + if err != nil { + t.Errorf("StaticPolicy Allocate() error (%v). expected no error but got %v", + testCase.description, err) + } + } + if !reflect.DeepEqual(st.defaultCPUSet, testCase.expCSetAfterAlloc) { + t.Errorf("StaticPolicy Allocate() error (%v) before pod resize. expected default cpuset %v but got %v", + testCase.description, testCase.expCSetAfterAlloc, st.defaultCPUSet) + } + + // resize + pod.Status.ContainerStatuses = []v1.ContainerStatus{ + { + Name: testCase.containerName, + AllocatedResources: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse(testCase.podAllocated), + }, + }, + } + pod.Spec.Containers[0].Resources = v1.ResourceRequirements{ + Limits: v1.ResourceList{ + v1.ResourceName(v1.ResourceCPU): resource.MustParse(testCase.resizeLimit), + }, + Requests: v1.ResourceList{ + v1.ResourceName(v1.ResourceCPU): resource.MustParse(testCase.resizeRequest), + }, + } + podResized := pod + for _, container := range append(podResized.Spec.InitContainers, podResized.Spec.Containers...) { + err := policy.Allocate(st, podResized, &container) + if err != nil { + if !reflect.DeepEqual(err, testCase.expAllocErr) { + t.Errorf("StaticPolicy Allocate() error (%v), expected error: %v but got: %v", + testCase.description, testCase.expAllocErr, err) + } + } + } + if testCase.expCSetAfterResizeSize > 0 { + // expCSetAfterResizeSize is used when testing scale down because allocated CPUs are not deterministic, + // since size of defaultCPUSet is deterministic and also interesection with expected allocation + // should not be nill. < ====== TODO esotsal + if !reflect.DeepEqual(st.defaultCPUSet.Size(), testCase.expCSetAfterResizeSize) { + t.Errorf("StaticPolicy Allocate() error (%v) after pod resize. expected default cpuset size equal to %v but got %v", + testCase.description, testCase.expCSetAfterResizeSize, st.defaultCPUSet.Size()) + } + } else { + if !reflect.DeepEqual(st.defaultCPUSet, testCase.expCSetAfterResize) { + t.Errorf("StaticPolicy Allocate() error (%v) after pod resize. expected default cpuset %v but got %v", + testCase.description, testCase.expCSetAfterResize, st.defaultCPUSet) + } + } + + // remove + err := policy.RemoveContainer(st, string(pod.UID), testCase.containerName) + if err != nil { + t.Errorf("StaticPolicy RemoveContainer() error (%v) after pod resize. expected no error but got %v", + testCase.description, err) + } + + if !reflect.DeepEqual(st.defaultCPUSet, testCase.expCSetAfterRemove) { + t.Errorf("StaticPolicy RemoveContainer() error (%v) after pod resize. expected default cpuset %v but got %v", + testCase.description, testCase.expCSetAfterRemove, st.defaultCPUSet) + } + if _, found := st.assignments[string(pod.UID)][testCase.containerName]; found { + t.Errorf("StaticPolicy RemoveContainer() error (%v) after pod resize. expected (pod %v, container %v) not be in assignments %v", + testCase.description, testCase.podUID, testCase.containerName, st.assignments) + } + }) + } +} + func TestStaticPolicyDoNotReuseCPUs(t *testing.T) { testCases := []struct { staticPolicyTest @@ -806,6 +1220,298 @@ func TestStaticPolicyDoNotReuseCPUs(t *testing.T) { } } +func TestStaticPolicyPodResizeCPUsMultiContainerPod(t *testing.T) { + testCases := []struct { + staticPolicyTest + containerName2 string + expAllocErr error + expCSetAfterAlloc cpuset.CPUSet + expCSetAfterResize cpuset.CPUSet + expCSetAfterResizeSize int + expCSetAfterRemove cpuset.CPUSet + }{ + { + staticPolicyTest: staticPolicyTest{ + description: "SingleSocketHT, PodResize, Containers in exclusively allocated pool, Increase appContainer-0 allocated CPUs", + topo: topoSingleSocketHT, + pod: makeMultiContainerPodWithOptions( + nil, + []*containerOptions{ + {request: "2000m", limit: "2000m", restartPolicy: v1.ContainerRestartPolicy("Never")}, // 0, 4 + {request: "2000m", limit: "2000m", restartPolicy: v1.ContainerRestartPolicy("Never")}}, // 1, 5 + ), + qosClass: v1.PodQOSGuaranteed, + podAllocated: "2000m", + resizeLimit: "4000m", + resizeRequest: "4000m", + containerName: "appContainer-0", + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + containerName2: "appContainer-1", + expCSetAfterAlloc: cpuset.New(2, 3, 6, 7), + expCSetAfterResize: cpuset.New(2, 3, 6, 7), + expCSetAfterRemove: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + { + staticPolicyTest: staticPolicyTest{ + description: "SingleSocketHT, PodResize, Containers in exclusively allocated pool, Keep same allocated CPUs", + topo: topoSingleSocketHT, + pod: makeMultiContainerPodWithOptions( + nil, + []*containerOptions{ + {request: "2000m", limit: "2000m", restartPolicy: v1.ContainerRestartPolicy("Never")}, // 0, 4 + {request: "2000m", limit: "2000m", restartPolicy: v1.ContainerRestartPolicy("Never")}}, // 1, 5 + ), + qosClass: v1.PodQOSGuaranteed, + podAllocated: "2000m", + resizeLimit: "2000m", + resizeRequest: "2000m", + containerName: "appContainer-0", + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + containerName2: "appContainer-1", + expAllocErr: inconsistentCPUAllocationError{RequestedCPUs: "2", AllocatedCPUs: "2", Shared2Exclusive: false}, + expCSetAfterAlloc: cpuset.New(2, 3, 6, 7), + expCSetAfterResize: cpuset.New(2, 3, 6, 7), + expCSetAfterRemove: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + { + staticPolicyTest: staticPolicyTest{ + description: "SingleSocketHT, PodResize, Containers in exclusively allocated pool, Decrease appContainer-0 allocated CPUs", + topo: topoSingleSocketHT, + pod: makeMultiContainerPodWithOptions( + nil, + []*containerOptions{ + {request: "4000m", limit: "4000m", restartPolicy: v1.ContainerRestartPolicy("Never")}, // appContainer-0 CPUs 0, 4, 1, 5 + {request: "4000m", limit: "4000m", restartPolicy: v1.ContainerRestartPolicy("Never")}}, // appContainer-1 CPUS 2, 6, 3, 7 + ), + qosClass: v1.PodQOSGuaranteed, + podAllocated: "4000m", + resizeLimit: "2000m", + resizeRequest: "2000m", + containerName: "appContainer-0", + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + containerName2: "appContainer-1", + expCSetAfterAlloc: cpuset.New(), + expCSetAfterResize: cpuset.New(), + expCSetAfterRemove: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + { + staticPolicyTest: staticPolicyTest{ + description: "SingleSocketHT, PodResize, Containers in shared pool with more than one core, Attempt to move to exclusively allocated pool", + topo: topoSingleSocketHT, + pod: makeMultiContainerPodWithOptions( + nil, + []*containerOptions{ + {request: "2100m", limit: "2100m", restartPolicy: v1.ContainerRestartPolicy("Never")}, // 0-7 + {request: "2100m", limit: "2100m", restartPolicy: v1.ContainerRestartPolicy("Never")}}, // 0-7 + ), + qosClass: v1.PodQOSGuaranteed, + podAllocated: "2100m", + resizeLimit: "2000m", + resizeRequest: "2000m", + containerName: "appContainer-0", + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + containerName2: "appContainer-1", + expAllocErr: inconsistentCPUAllocationError{RequestedCPUs: "2", AllocatedCPUs: "2100m", Shared2Exclusive: true}, + expCSetAfterAlloc: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + expCSetAfterResize: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + expCSetAfterRemove: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + { + staticPolicyTest: staticPolicyTest{ + description: "SingleSocketHT, PodResize, appContainer-0 in shared pool, Increase CPU and keep appContainer-0 in shared pool", + topo: topoSingleSocketHT, + pod: makeMultiContainerPodWithOptions( + nil, + []*containerOptions{ + {request: "100m", limit: "100m", restartPolicy: v1.ContainerRestartPolicy("Never")}, // 2-3, 6-7 + {request: "4000m", limit: "4000m", restartPolicy: v1.ContainerRestartPolicy("Never")}}, // 0-1, 4-5 + ), + qosClass: v1.PodQOSGuaranteed, + podAllocated: "100m", + resizeLimit: "200m", + resizeRequest: "200m", + containerName: "appContainer-0", + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + containerName2: "appContainer-1", + expCSetAfterAlloc: cpuset.New(2, 3, 6, 7), + expCSetAfterResize: cpuset.New(2, 3, 6, 7), + expCSetAfterRemove: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + { + staticPolicyTest: staticPolicyTest{ + description: "SingleSocketHT, PodResize, appContainer-0 in shared pool with more than one core, Increase CPU and keep appContainer-0 in shared pool", + topo: topoSingleSocketHT, + pod: makeMultiContainerPodWithOptions( + nil, + []*containerOptions{ + {request: "1100m", limit: "1100m", restartPolicy: v1.ContainerRestartPolicy("Never")}, // 0-7 + {request: "4000m", limit: "4000m", restartPolicy: v1.ContainerRestartPolicy("Never")}}, // 0-1, 4-5 + ), + qosClass: v1.PodQOSGuaranteed, + podAllocated: "1100m", + resizeLimit: "1200m", + resizeRequest: "1200m", + containerName: "appContainer-0", + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + containerName2: "appContainer-1", + expCSetAfterAlloc: cpuset.New(2, 3, 6, 7), + expCSetAfterResize: cpuset.New(2, 3, 6, 7), + expCSetAfterRemove: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + { + staticPolicyTest: staticPolicyTest{ + description: "SingleSocketHT, PodResize, appContainer-0 in shared pool, appContainer-1 in exclusive pool, Decrease CPU and keep in shared pool", + topo: topoSingleSocketHT, + pod: makeMultiContainerPodWithOptions( + nil, + []*containerOptions{ + {request: "200m", limit: "200m", restartPolicy: v1.ContainerRestartPolicy("Never")}, // 0-7 + {request: "4000m", limit: "4000m", restartPolicy: v1.ContainerRestartPolicy("Never")}}, // 0-1, 4-5 + ), + qosClass: v1.PodQOSGuaranteed, + podAllocated: "200m", + resizeLimit: "100m", + resizeRequest: "100m", + containerName: "appContainer-0", + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + containerName2: "appContainer-1", + expCSetAfterAlloc: cpuset.New(2, 3, 6, 7), + expCSetAfterResize: cpuset.New(2, 3, 6, 7), + expCSetAfterRemove: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + { + staticPolicyTest: staticPolicyTest{ + description: "SingleSocketHT, PodResize, appContainer-0 in exclusively allocated pool, Move to shared pool", + topo: topoSingleSocketHT, + pod: makeMultiContainerPodWithOptions( + nil, + []*containerOptions{ + {request: "2000m", limit: "2000m", restartPolicy: v1.ContainerRestartPolicy("Never")}, // 0-1, 4-5 + {request: "200m", limit: "200m", restartPolicy: v1.ContainerRestartPolicy("Never")}}, // 0-7 + ), + qosClass: v1.PodQOSGuaranteed, + podAllocated: "2000m", + resizeLimit: "1500m", + resizeRequest: "1500m", + containerName: "appContainer-0", + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + containerName2: "appContainer-1", + expAllocErr: inconsistentCPUAllocationError{RequestedCPUs: "1500m", AllocatedCPUs: "2", Shared2Exclusive: false}, + expCSetAfterAlloc: cpuset.New(1, 2, 3, 5, 6, 7), + expCSetAfterResize: cpuset.New(1, 2, 3, 5, 6, 7), + expCSetAfterRemove: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + } + + for _, testCase := range testCases { + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.CPUManagerPolicyAlphaOptions, true) + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.InPlacePodVerticalScaling, true) + t.Run(testCase.description, func(t *testing.T) { + + policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), nil) + + st := &mockState{ + assignments: testCase.stAssignments, + defaultCPUSet: testCase.stDefaultCPUSet, + } + pod := testCase.pod + pod.Status.QOSClass = testCase.qosClass + + // allocate + for _, container := range append(pod.Spec.InitContainers, pod.Spec.Containers...) { + err := policy.Allocate(st, pod, &container) + if err != nil { + t.Errorf("StaticPolicy Allocate() error (%v). expected no error but got %v", + testCase.description, err) + } + } + if !reflect.DeepEqual(st.defaultCPUSet, testCase.expCSetAfterAlloc) { + t.Errorf("StaticPolicy Allocate() error (%v) before pod resize. expected default cpuset %v but got %v", + testCase.description, testCase.expCSetAfterAlloc, st.defaultCPUSet) + } + + // resize + pod.Status.ContainerStatuses = []v1.ContainerStatus{ + { + Name: testCase.containerName, + AllocatedResources: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse(testCase.podAllocated), + }, + }, + } + pod.Spec.Containers[0].Resources = v1.ResourceRequirements{ + Limits: v1.ResourceList{ + v1.ResourceName(v1.ResourceCPU): resource.MustParse(testCase.resizeLimit), + }, + Requests: v1.ResourceList{ + v1.ResourceName(v1.ResourceCPU): resource.MustParse(testCase.resizeRequest), + }, + } + podResized := pod + for _, container := range append(podResized.Spec.InitContainers, podResized.Spec.Containers...) { + err := policy.Allocate(st, podResized, &container) + if err != nil { + if !reflect.DeepEqual(err, testCase.expAllocErr) { + t.Errorf("StaticPolicy Allocate() error (%v), expected error: %v but got: %v", + testCase.description, testCase.expAllocErr, err) + } + } + } + + if testCase.expCSetAfterResizeSize > 0 { + // expCSetAfterResizeSize is used when testing scale down because allocated CPUs are not deterministic, + // since size of defaultCPUSet is deterministic and also interesection with expected allocation + // should not be nill. < ====== TODO esotsal + if !reflect.DeepEqual(st.defaultCPUSet.Size(), testCase.expCSetAfterResizeSize) { + t.Errorf("StaticPolicy Allocate() error (%v) after pod resize. expected default cpuset size equal to %v but got %v", + testCase.description, testCase.expCSetAfterResizeSize, st.defaultCPUSet.Size()) + } + } else { + if !reflect.DeepEqual(st.defaultCPUSet, testCase.expCSetAfterResize) { + t.Errorf("StaticPolicy Allocate() error (%v) after pod resize. expected default cpuset %v but got %v", + testCase.description, testCase.expCSetAfterResize, st.defaultCPUSet) + } + } + + // remove + err := policy.RemoveContainer(st, string(pod.UID), testCase.containerName) + if err != nil { + t.Errorf("StaticPolicy RemoveContainer() error (%v) after pod resize. expected no error but got %v", + testCase.description, err) + } + err = policy.RemoveContainer(st, string(pod.UID), testCase.containerName2) + if err != nil { + t.Errorf("StaticPolicy RemoveContainer() error (%v) after pod resize. expected no error but got %v", + testCase.description, err) + } + + if !reflect.DeepEqual(st.defaultCPUSet, testCase.expCSetAfterRemove) { + t.Errorf("StaticPolicy RemoveContainer() error (%v) after pod resize. expected default cpuset %v but got %v", + testCase.description, testCase.expCSetAfterRemove, st.defaultCPUSet) + } + if _, found := st.assignments[string(pod.UID)][testCase.containerName]; found { + t.Errorf("StaticPolicy RemoveContainer() error (%v) after pod resize. expected (pod %v, container %v) not be in assignments %v", + testCase.description, testCase.podUID, testCase.containerName, st.assignments) + } + }) + } +} func TestStaticPolicyRemove(t *testing.T) { testCases := []staticPolicyTest{ { @@ -975,7 +1681,7 @@ func TestTopologyAwareAllocateCPUs(t *testing.T) { continue } - cpuAlloc, err := policy.allocateCPUs(klog.Background(), st, tc.numRequested, tc.socketMask, cpuset.New()) + cpuAlloc, err := policy.allocateCPUs(st, tc.numRequested, tc.socketMask, cpuset.New(), nil, nil) if err != nil { t.Errorf("StaticPolicy allocateCPUs() error (%v). expected CPUSet %v not error %v", tc.description, tc.expCSet, err) diff --git a/pkg/kubelet/types/constants.go b/pkg/kubelet/types/constants.go index 791052dbbcece..6c032139b74a1 100644 --- a/pkg/kubelet/types/constants.go +++ b/pkg/kubelet/types/constants.go @@ -38,3 +38,10 @@ const ( LimitedSwap SwapBehavior = "LimitedSwap" NoSwap SwapBehavior = "NoSwap" ) + +// InPlacePodVerticalScaling types +const ( + // ErrorInconsistentCPUAllocation represent the type of an inconsistentCPUAllocationError + ErrorInconsistentCPUAllocation = "inconsistentCPUAllocationError" + ErrorGetCPUSet = "getCPUSetError" +) diff --git a/test/e2e/common/node/framework/podresize/resize.go b/test/e2e/common/node/framework/podresize/resize.go index 0dc9ce2572ee0..e3836f012dfcb 100644 --- a/test/e2e/common/node/framework/podresize/resize.go +++ b/test/e2e/common/node/framework/podresize/resize.go @@ -25,6 +25,9 @@ import ( "strings" "time" + "github.com/onsi/ginkgo/v2" + "github.com/onsi/gomega" + v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" utilerrors "k8s.io/apimachinery/pkg/util/errors" @@ -35,23 +38,30 @@ import ( "k8s.io/kubernetes/test/e2e/common/node/framework/cgroups" "k8s.io/kubernetes/test/e2e/framework" e2epod "k8s.io/kubernetes/test/e2e/framework/pod" - - "github.com/onsi/ginkgo/v2" - "github.com/onsi/gomega" + "k8s.io/utils/cpuset" ) const ( - MinContainerRuntimeVersion string = "1.6.9" + CgroupCPUPeriod string = "/sys/fs/cgroup/cpu/cpu.cfs_period_us" + CgroupCPUShares string = "/sys/fs/cgroup/cpu/cpu.shares" + CgroupCPUQuota string = "/sys/fs/cgroup/cpu/cpu.cfs_quota_us" + CgroupMemLimit string = "/sys/fs/cgroup/memory/memory.limit_in_bytes" + Cgroupv2MemLimit string = "/sys/fs/cgroup/memory.max" + Cgroupv2MemRequest string = "/sys/fs/cgroup/memory.min" + Cgroupv2CPULimit string = "/sys/fs/cgroup/cpu.max" + Cgroupv2CPURequest string = "/sys/fs/cgroup/cpu.weight" + CPUPeriod string = "100000" ) type ResizableContainerInfo struct { - Name string - Resources *cgroups.ContainerResources - CPUPolicy *v1.ResourceResizeRestartPolicy - MemPolicy *v1.ResourceResizeRestartPolicy - RestartCount int32 - RestartPolicy v1.ContainerRestartPolicy - InitCtr bool + Name string + Resources *cgroups.ContainerResources + CPUPolicy *v1.ResourceResizeRestartPolicy + MemPolicy *v1.ResourceResizeRestartPolicy + RestartCount int32 + RestartPolicy v1.ContainerRestartPolicy + InitCtr bool + CPUsAllowedListValue string } func getTestResizePolicy(tcInfo ResizableContainerInfo) (resizePol []v1.ContainerResizePolicy) { @@ -505,3 +515,33 @@ func formatErrors(err error) error { } return fmt.Errorf("[\n%s\n]", strings.Join(errStrings, ",\n")) } + +func VerifyPodContainersCPUsAllowedListValue(f *framework.Framework, pod *v1.Pod, wantCtrs []ResizableContainerInfo) error { + ginkgo.GinkgoHelper() + verifyCPUsAllowedListValue := func(cName, expectedCPUsAllowedListValue string) error { + mycmd := "grep Cpus_allowed_list /proc/self/status | cut -f2" + calValue, _, err := e2epod.ExecCommandInContainerWithFullOutput(f, pod.Name, cName, "/bin/sh", "-c", mycmd) + framework.Logf("Namespace %s Pod %s Container %s - looking for Cpus allowed list value %s in /proc/self/status", + pod.Namespace, pod.Name, cName, expectedCPUsAllowedListValue) + if err != nil { + return fmt.Errorf("failed to find expected value '%s' in container '%s' Cpus allowed list '/proc/self/status'", cName, expectedCPUsAllowedListValue) + } + c, err := cpuset.Parse(calValue) + framework.ExpectNoError(err, "failed parsing Cpus allowed list for container %s in pod %s", cName, pod.Name) + cpuTotalValue := strconv.Itoa(c.Size()) + if cpuTotalValue != expectedCPUsAllowedListValue { + return fmt.Errorf("container '%s' cgroup value '%s' results to total CPUs '%s' not equal to expected '%s'", cName, calValue, cpuTotalValue, expectedCPUsAllowedListValue) + } + return nil + } + for _, ci := range wantCtrs { + if ci.CPUsAllowedListValue == "" { + continue + } + err := verifyCPUsAllowedListValue(ci.Name, ci.CPUsAllowedListValue) + if err != nil { + return err + } + } + return nil +} diff --git a/test/e2e_node/cpu_manager_metrics_test.go b/test/e2e_node/cpu_manager_metrics_test.go index acf7e85e73194..44e07943d33d7 100644 --- a/test/e2e_node/cpu_manager_metrics_test.go +++ b/test/e2e_node/cpu_manager_metrics_test.go @@ -104,6 +104,8 @@ var _ = SIGDescribe("CPU Manager Metrics", framework.WithSerial(), feature.CPUMa enableCPUManagerOptions: true, options: cpuPolicyOptions, }, + false, + false, ) updateKubeletConfig(ctx, f, newCfg, true) }) @@ -402,7 +404,7 @@ var _ = SIGDescribe("CPU Manager Metrics", framework.WithSerial(), feature.CPUMa reservedSystemCPUs: cpuset.New(0), enableCPUManagerOptions: true, options: cpuPolicyOptions, - }, + }, false, false, ) updateKubeletConfig(ctx, f, newCfg, true) @@ -442,7 +444,7 @@ var _ = SIGDescribe("CPU Manager Metrics", framework.WithSerial(), feature.CPUMa reservedSystemCPUs: cpuset.New(0), enableCPUManagerOptions: true, options: cpuPolicyOptions, - }, + }, false, false, ) updateKubeletConfig(ctx, f, newCfg, true) diff --git a/test/e2e_node/cpu_manager_test.go b/test/e2e_node/cpu_manager_test.go index 7ca4c52867fd9..8bdfdda7f4365 100644 --- a/test/e2e_node/cpu_manager_test.go +++ b/test/e2e_node/cpu_manager_test.go @@ -2863,7 +2863,7 @@ type cpuManagerKubeletArguments struct { options map[string]string } -func configureCPUManagerInKubelet(oldCfg *kubeletconfig.KubeletConfiguration, kubeletArguments *cpuManagerKubeletArguments) *kubeletconfig.KubeletConfiguration { +func configureCPUManagerInKubelet(oldCfg *kubeletconfig.KubeletConfiguration, kubeletArguments *cpuManagerKubeletArguments, isInPlacePodVerticalScalingAllocatedStatusEnabled bool, isInPlacePodVerticalScalingExclusiveCPUsEnabled bool) *kubeletconfig.KubeletConfiguration { newCfg := oldCfg.DeepCopy() if newCfg.FeatureGates == nil { newCfg.FeatureGates = make(map[string]bool) @@ -2873,6 +2873,8 @@ func configureCPUManagerInKubelet(oldCfg *kubeletconfig.KubeletConfiguration, ku newCfg.FeatureGates["CPUManagerPolicyAlphaOptions"] = kubeletArguments.enableCPUManagerOptions newCfg.FeatureGates["DisableCPUQuotaWithExclusiveCPUs"] = kubeletArguments.disableCPUQuotaWithExclusiveCPUs newCfg.FeatureGates["PodLevelResources"] = kubeletArguments.enablePodLevelResources + newCfg.FeatureGates["InPlacePodVerticalScalingExclusiveCPUs"] = isInPlacePodVerticalScalingExclusiveCPUsEnabled + newCfg.FeatureGates["InPlacePodVerticalScalingAllocatedStatus"] = isInPlacePodVerticalScalingAllocatedStatusEnabled if kubeletArguments.customCPUCFSQuotaPeriod != 0 { newCfg.FeatureGates["CustomCPUCFSQuotaPeriod"] = true @@ -2907,3 +2909,862 @@ func configureCPUManagerInKubelet(oldCfg *kubeletconfig.KubeletConfiguration, ku return newCfg } + +func runAutomaticallyRemoveInactivePodsFromCPUManagerStateFile(ctx context.Context, f *framework.Framework) { + var cpu1 int + var ctnAttrs []ctnAttribute + var pod *v1.Pod + var cpuList []int + var expAllowedCPUsListRegex string + var err error + // First running a Gu Pod, + // second disable cpu manager in kubelet, + // then delete the Gu Pod, + // then enable cpu manager in kubelet, + // at last wait for the reconcile process cleaned up the state file, if the assignments map is empty, + // it proves that the automatic cleanup in the reconcile process is in effect. + ginkgo.By("running a Gu pod for test remove") + ctnAttrs = []ctnAttribute{ + { + ctnName: "gu-container-testremove", + cpuRequest: "1000m", + cpuLimit: "1000m", + }, + } + pod = makeCPUManagerPod("gu-pod-testremove", ctnAttrs) + pod = e2epod.NewPodClient(f).CreateSync(ctx, pod) + + ginkgo.By("checking if the expected cpuset was assigned") + cpu1 = 1 + if isHTEnabled() { + cpuList = mustParseCPUSet(getCPUSiblingList(0)).List() + cpu1 = cpuList[1] + } else if isMultiNUMA() { + cpuList = mustParseCPUSet(getCoreSiblingList(0)).List() + if len(cpuList) > 1 { + cpu1 = cpuList[1] + } + } + expAllowedCPUsListRegex = fmt.Sprintf("^%d\n$", cpu1) + err = e2epod.NewPodClient(f).MatchContainerOutput(ctx, pod.Name, pod.Spec.Containers[0].Name, expAllowedCPUsListRegex) + framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", + pod.Spec.Containers[0].Name, pod.Name) + + deletePodSyncByName(ctx, f, pod.Name) + // we need to wait for all containers to really be gone so cpumanager reconcile loop will not rewrite the cpu_manager_state. + // this is in turn needed because we will have an unavoidable (in the current framework) race with the + // reconcile loop which will make our attempt to delete the state file and to restore the old config go haywire + waitForAllContainerRemoval(ctx, pod.Name, pod.Namespace) + +} + +func runCfsQuotaGuPods(ctx context.Context, f *framework.Framework, disabledCPUQuotaWithExclusiveCPUs bool, cpuAlloc int64) { + var err error + var ctnAttrs []ctnAttribute + var pod1, pod2, pod3 *v1.Pod + podsToClean := make(map[string]*v1.Pod) // pod.UID -> pod + + framework.Logf("runCfsQuotaGuPods: disableQuota=%v, CPU Allocatable=%v", disabledCPUQuotaWithExclusiveCPUs, cpuAlloc) + + deleteTestPod := func(pod *v1.Pod) { + // waitForContainerRemoval takes "long" to complete; if we use the parent ctx we get a + // 'deadline expired' message and the cleanup aborts, which we don't want. + // So let's use a separate and more generous timeout (determined by trial and error) + ctx2, cancel := context.WithTimeout(context.Background(), 10*time.Minute) + defer cancel() + deletePodSyncAndWait(ctx2, f, pod.Namespace, pod.Name) + delete(podsToClean, string(pod.UID)) + } + + // cleanup leftovers on test failure. The happy path is covered by `deleteTestPod` calls + ginkgo.DeferCleanup(func() { + ginkgo.By("by deleting the pods and waiting for container removal") + // waitForContainerRemoval takes "long" to complete; if we use the parent ctx we get a + // 'deadline expired' message and the cleanup aborts, which we don't want. + // So let's use a separate and more generous timeout (determined by trial and error) + ctx2, cancel := context.WithTimeout(context.Background(), 10*time.Minute) + defer cancel() + deletePodsAsync(ctx2, f, podsToClean) + }) + + podCFSCheckCommand := []string{"sh", "-c", `cat $(find /sysfscgroup | grep -E "($(cat /podinfo/uid)|$(cat /podinfo/uid | sed 's/-/_/g'))(/|\.slice/)cpu.max$") && sleep 1d`} + cfsCheckCommand := []string{"sh", "-c", "cat /sys/fs/cgroup/cpu.max && sleep 1d"} + defaultPeriod := "100000" + + ctnAttrs = []ctnAttribute{ + { + ctnName: "gu-container-cfsquota-disabled", + cpuRequest: "1", + cpuLimit: "1", + }, + } + pod1 = makeCPUManagerPod("gu-pod1", ctnAttrs) + pod1.Spec.Containers[0].Command = cfsCheckCommand + pod1 = e2epod.NewPodClient(f).CreateSync(ctx, pod1) + podsToClean[string(pod1.UID)] = pod1 + + ginkgo.By("checking if the expected cfs quota was assigned (GU pod, exclusive CPUs, unlimited)") + + expectedQuota := "100000" + if disabledCPUQuotaWithExclusiveCPUs { + expectedQuota = "max" + } + expCFSQuotaRegex := fmt.Sprintf("^%s %s\n$", expectedQuota, defaultPeriod) + err = e2epod.NewPodClient(f).MatchContainerOutput(ctx, pod1.Name, pod1.Spec.Containers[0].Name, expCFSQuotaRegex) + framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", + pod1.Spec.Containers[0].Name, pod1.Name) + deleteTestPod(pod1) + + ctnAttrs = []ctnAttribute{ + { + ctnName: "gu-container-cfsquota-enabled", + cpuRequest: "500m", + cpuLimit: "500m", + }, + } + pod2 = makeCPUManagerPod("gu-pod2", ctnAttrs) + pod2.Spec.Containers[0].Command = cfsCheckCommand + pod2 = e2epod.NewPodClient(f).CreateSync(ctx, pod2) + podsToClean[string(pod2.UID)] = pod2 + + ginkgo.By("checking if the expected cfs quota was assigned (GU pod, limited)") + + expectedQuota = "50000" + expCFSQuotaRegex = fmt.Sprintf("^%s %s\n$", expectedQuota, defaultPeriod) + err = e2epod.NewPodClient(f).MatchContainerOutput(ctx, pod2.Name, pod2.Spec.Containers[0].Name, expCFSQuotaRegex) + framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", + pod2.Spec.Containers[0].Name, pod2.Name) + deleteTestPod(pod2) + + ctnAttrs = []ctnAttribute{ + { + ctnName: "non-gu-container", + cpuRequest: "100m", + cpuLimit: "500m", + }, + } + pod3 = makeCPUManagerPod("non-gu-pod3", ctnAttrs) + pod3.Spec.Containers[0].Command = cfsCheckCommand + pod3 = e2epod.NewPodClient(f).CreateSync(ctx, pod3) + podsToClean[string(pod3.UID)] = pod3 + + ginkgo.By("checking if the expected cfs quota was assigned (BU pod, limited)") + + expectedQuota = "50000" + expCFSQuotaRegex = fmt.Sprintf("^%s %s\n$", expectedQuota, defaultPeriod) + err = e2epod.NewPodClient(f).MatchContainerOutput(ctx, pod3.Name, pod3.Spec.Containers[0].Name, expCFSQuotaRegex) + framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", + pod3.Spec.Containers[0].Name, pod3.Name) + deleteTestPod(pod3) + + if cpuAlloc >= 2 { + ctnAttrs = []ctnAttribute{ + { + ctnName: "gu-container-non-int-values", + cpuRequest: "500m", + cpuLimit: "500m", + }, + { + ctnName: "gu-container-int-values", + cpuRequest: "1", + cpuLimit: "1", + }, + } + pod4 := makeCPUManagerPod("gu-pod4", ctnAttrs) + pod4.Spec.Containers[0].Command = cfsCheckCommand + pod4.Spec.Containers[1].Command = cfsCheckCommand + pod4 = e2epod.NewPodClient(f).CreateSync(ctx, pod4) + podsToClean[string(pod4.UID)] = pod4 + + ginkgo.By("checking if the expected cfs quota was assigned (GU pod, container 0 exclusive CPUs unlimited, container 1 limited)") + + expectedQuota = "50000" + expCFSQuotaRegex = fmt.Sprintf("^%s %s\n$", expectedQuota, defaultPeriod) + err = e2epod.NewPodClient(f).MatchContainerOutput(ctx, pod4.Name, pod4.Spec.Containers[0].Name, expCFSQuotaRegex) + framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", + pod4.Spec.Containers[0].Name, pod4.Name) + expectedQuota = "100000" + if disabledCPUQuotaWithExclusiveCPUs { + expectedQuota = "max" + } + expCFSQuotaRegex = fmt.Sprintf("^%s %s\n$", expectedQuota, defaultPeriod) + err = e2epod.NewPodClient(f).MatchContainerOutput(ctx, pod4.Name, pod4.Spec.Containers[1].Name, expCFSQuotaRegex) + framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", + pod4.Spec.Containers[1].Name, pod4.Name) + deleteTestPod(pod4) + + ctnAttrs = []ctnAttribute{ + { + ctnName: "gu-container-non-int-values", + cpuRequest: "500m", + cpuLimit: "500m", + }, + { + ctnName: "gu-container-int-values", + cpuRequest: "1", + cpuLimit: "1", + }, + } + + pod5 := makeCPUManagerPod("gu-pod5", ctnAttrs) + pod5.Spec.Containers[0].Command = podCFSCheckCommand + pod5 = e2epod.NewPodClient(f).CreateSync(ctx, pod5) + podsToClean[string(pod5.UID)] = pod5 + + ginkgo.By("checking if the expected cfs quota was assigned to pod (GU pod, unlimited)") + + expectedQuota = "150000" + + if disabledCPUQuotaWithExclusiveCPUs { + expectedQuota = "max" + } + + expCFSQuotaRegex = fmt.Sprintf("^%s %s\n$", expectedQuota, defaultPeriod) + + err = e2epod.NewPodClient(f).MatchContainerOutput(ctx, pod5.Name, pod5.Spec.Containers[0].Name, expCFSQuotaRegex) + framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", pod5.Spec.Containers[0].Name, pod5.Name) + deleteTestPod(pod5) + } else { + ginkgo.By(fmt.Sprintf("some cases SKIPPED - requests at least %d allocatable cores, got %d", 2, cpuAlloc)) + } + + ctnAttrs = []ctnAttribute{ + { + ctnName: "gu-container", + cpuRequest: "100m", + cpuLimit: "100m", + }, + } + + pod6 := makeCPUManagerPod("gu-pod6", ctnAttrs) + pod6.Spec.Containers[0].Command = podCFSCheckCommand + pod6 = e2epod.NewPodClient(f).CreateSync(ctx, pod6) + podsToClean[string(pod6.UID)] = pod6 + + ginkgo.By("checking if the expected cfs quota was assigned to pod (GU pod, limited)") + + expectedQuota = "10000" + expCFSQuotaRegex = fmt.Sprintf("^%s %s\n$", expectedQuota, defaultPeriod) + err = e2epod.NewPodClient(f).MatchContainerOutput(ctx, pod6.Name, pod6.Spec.Containers[0].Name, expCFSQuotaRegex) + framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", pod6.Spec.Containers[0].Name, pod6.Name) + deleteTestPod(pod6) +} + +func runCPUManagerTests(f *framework.Framework) { + var cpuCap, cpuAlloc int64 + var oldCfg *kubeletconfig.KubeletConfiguration + + ginkgo.BeforeEach(func(ctx context.Context) { + var err error + if oldCfg == nil { + oldCfg, err = getCurrentKubeletConfig(ctx) + framework.ExpectNoError(err) + } + }) + + ginkgo.It("should assign CPUs as expected based on the Pod spec", func(ctx context.Context) { + cpuCap, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f) + + // Skip CPU Manager tests altogether if the CPU capacity < minCPUCapacity. + if cpuCap < minCPUCapacity { + e2eskipper.Skipf("Skipping CPU Manager tests since the CPU capacity < %d", minCPUCapacity) + } + + // Enable CPU Manager in the kubelet. + newCfg := configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + reservedSystemCPUs: cpuset.CPUSet{}, + }, false, false) + updateKubeletConfig(ctx, f, newCfg, true) + + ginkgo.By("running a non-Gu pod") + runNonGuPodTest(ctx, f, cpuCap, cpuset.New()) + + ginkgo.By("running a Gu pod") + runGuPodTest(ctx, f, 1, cpuset.New()) + + ginkgo.By("running multiple Gu and non-Gu pods") + runMultipleGuNonGuPods(ctx, f, cpuCap, cpuAlloc) + + // Skip rest of the tests if CPU capacity < 3. + if cpuCap < 3 { + e2eskipper.Skipf("Skipping rest of the CPU Manager tests since CPU capacity < 3") + } + + ginkgo.By("running a Gu pod requesting multiple CPUs") + runMultipleCPUGuPod(ctx, f) + + ginkgo.By("running a Gu pod with multiple containers requesting integer CPUs") + runMultipleCPUContainersGuPod(ctx, f) + + ginkgo.By("running multiple Gu pods") + runMultipleGuPods(ctx, f) + + ginkgo.By("test for automatically remove inactive pods from cpumanager state file.") + runAutomaticallyRemoveInactivePodsFromCPUManagerStateFile(ctx, f) + }) + + ginkgo.It("reservedSystemCPUs are excluded only for Gu pods (strict-cpu-reservation option not enabled by default)", func(ctx context.Context) { + cpuCap, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f) + + // Skip CPU Manager tests altogether if the CPU capacity < 2. + if cpuCap < 2 { + e2eskipper.Skipf("Skipping CPU Manager tests since the CPU capacity < 2") + } + + reservedSystemCPUs := cpuset.New(0) + newCfg := configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + reservedSystemCPUs: reservedSystemCPUs, + }, false, false) + updateKubeletConfig(ctx, f, newCfg, true) + + ginkgo.By("running a Gu pod - it shouldn't use reserved system CPUs") + runGuPodTest(ctx, f, 1, reservedSystemCPUs) + + ginkgo.By("running a non-Gu pod - it can use reserved system CPUs") + runNonGuPodTest(ctx, f, cpuCap, cpuset.New()) + + }) + + ginkgo.It("reservedSystemCPUs are excluded for both Gu and non-Gu pods (strict-cpu-reservation option enabled)", func(ctx context.Context) { + cpuCap, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f) + + // Skip CPU Manager tests altogether if the CPU capacity < 2. + if cpuCap < 2 { + e2eskipper.Skipf("Skipping CPU Manager tests since the CPU capacity < 2") + } + + reservedSystemCPUs := cpuset.New(0) + cpuPolicyOptions := map[string]string{ + cpumanager.StrictCPUReservationOption: "true", + } + newCfg := configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + reservedSystemCPUs: reservedSystemCPUs, + enableCPUManagerOptions: true, + options: cpuPolicyOptions, + }, false, false) + updateKubeletConfig(ctx, f, newCfg, true) + + ginkgo.By("running a Gu pod - it shouldn't use reserved system CPUs") + runGuPodTest(ctx, f, 1, reservedSystemCPUs) + + ginkgo.By("running a non-Gu pod - it shouldn't use reserved system CPUs with strict-cpu-reservation option enabled") + runNonGuPodTest(ctx, f, cpuCap, reservedSystemCPUs) + }) + + ginkgo.It("should assign CPUs as expected with enhanced policy based on strict SMT alignment", func(ctx context.Context) { + fullCPUsOnlyOpt := fmt.Sprintf("option=%s", cpumanager.FullPCPUsOnlyOption) + _, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f) + smtLevel := getSMTLevel() + + // strict SMT alignment is trivially verified and granted on non-SMT systems + if smtLevel < minSMTLevel { + e2eskipper.Skipf("Skipping CPU Manager %s tests since SMT disabled", fullCPUsOnlyOpt) + } + + // our tests want to allocate a full core, so we need at least 2*2=4 virtual cpus + minCPUCount := int64(smtLevel * minCPUCapacity) + if cpuAlloc < minCPUCount { + e2eskipper.Skipf("Skipping CPU Manager %s tests since the CPU capacity < %d", fullCPUsOnlyOpt, minCPUCount) + } + + framework.Logf("SMT level %d", smtLevel) + + // TODO: we assume the first available CPUID is 0, which is pretty fair, but we should probably + // check what we do have in the node. + cpuPolicyOptions := map[string]string{ + cpumanager.FullPCPUsOnlyOption: "true", + } + newCfg := configureCPUManagerInKubelet(oldCfg, + &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + reservedSystemCPUs: cpuset.New(0), + enableCPUManagerOptions: true, + options: cpuPolicyOptions, + }, false, false, + ) + updateKubeletConfig(ctx, f, newCfg, true) + + // the order between negative and positive doesn't really matter + runSMTAlignmentNegativeTests(ctx, f) + runSMTAlignmentPositiveTests(ctx, f, smtLevel, cpuset.New()) + }) + + ginkgo.It("should assign CPUs as expected based on strict SMT alignment, reservedSystemCPUs should be excluded (both strict-cpu-reservation and full-pcpus-only options enabled)", func(ctx context.Context) { + fullCPUsOnlyOpt := fmt.Sprintf("option=%s", cpumanager.FullPCPUsOnlyOption) + _, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f) + smtLevel := getSMTLevel() + + // strict SMT alignment is trivially verified and granted on non-SMT systems + if smtLevel < 2 { + e2eskipper.Skipf("Skipping CPU Manager %s tests since SMT disabled", fullCPUsOnlyOpt) + } + + // our tests want to allocate a full core, so we need at last smtLevel*2 virtual cpus + if cpuAlloc < int64(smtLevel*2) { + e2eskipper.Skipf("Skipping CPU Manager %s tests since the CPU capacity < %d", fullCPUsOnlyOpt, smtLevel*2) + } + + framework.Logf("SMT level %d", smtLevel) + + reservedSystemCPUs := cpuset.New(0) + cpuPolicyOptions := map[string]string{ + cpumanager.FullPCPUsOnlyOption: "true", + cpumanager.StrictCPUReservationOption: "true", + } + newCfg := configureCPUManagerInKubelet(oldCfg, + &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + reservedSystemCPUs: reservedSystemCPUs, + enableCPUManagerOptions: true, + options: cpuPolicyOptions, + }, false, false, + ) + updateKubeletConfig(ctx, f, newCfg, true) + + // the order between negative and positive doesn't really matter + runSMTAlignmentNegativeTests(ctx, f) + runSMTAlignmentPositiveTests(ctx, f, smtLevel, reservedSystemCPUs) + }) + + ginkgo.It("should not enforce CFS quota for containers with static CPUs assigned", func(ctx context.Context) { + if !IsCgroup2UnifiedMode() { + e2eskipper.Skipf("Skipping since CgroupV2 not used") + } + _, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f) + if cpuAlloc < 1 { // save expensive kubelet restart + e2eskipper.Skipf("Skipping since not enough allocatable CPU got %d required 1", cpuAlloc) + } + newCfg := configureCPUManagerInKubelet(oldCfg, + &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + reservedSystemCPUs: cpuset.New(0), + disableCPUQuotaWithExclusiveCPUs: true, + }, false, false, + ) + updateKubeletConfig(ctx, f, newCfg, true) + + _, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f) // check again after we reserved 1 full CPU. Some tests require > 1 exclusive CPU + runCfsQuotaGuPods(ctx, f, true, cpuAlloc) + }) + + ginkgo.It("should keep enforcing the CFS quota for containers with static CPUs assigned and feature gate disabled", func(ctx context.Context) { + if !IsCgroup2UnifiedMode() { + e2eskipper.Skipf("Skipping since CgroupV2 not used") + } + _, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f) + if cpuAlloc < 1 { // save expensive kubelet restart + e2eskipper.Skipf("Skipping since not enough allocatable CPU got %d required 1", cpuAlloc) + } + newCfg := configureCPUManagerInKubelet(oldCfg, + &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + reservedSystemCPUs: cpuset.New(0), + disableCPUQuotaWithExclusiveCPUs: false, + }, false, false, + ) + + updateKubeletConfig(ctx, f, newCfg, true) + + _, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f) // check again after we reserved 1 full CPU. Some tests require > 1 exclusive CPU + runCfsQuotaGuPods(ctx, f, false, cpuAlloc) + }) + + f.It("should not reuse CPUs of restartable init containers", feature.SidecarContainers, func(ctx context.Context) { + cpuCap, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f) + + // Skip rest of the tests if CPU capacity < 3. + if cpuCap < 3 { + e2eskipper.Skipf("Skipping rest of the CPU Manager tests since CPU capacity < 3, got %d", cpuCap) + } + + // Enable CPU Manager in the kubelet. + newCfg := configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + reservedSystemCPUs: cpuset.CPUSet{}, + }, false, false) + updateKubeletConfig(ctx, f, newCfg, true) + + ginkgo.By("running a Gu pod with a regular init container and a restartable init container") + ctrAttrs := []ctnAttribute{ + { + ctnName: "gu-init-container1", + cpuRequest: "1000m", + cpuLimit: "1000m", + }, + { + ctnName: "gu-restartable-init-container2", + cpuRequest: "1000m", + cpuLimit: "1000m", + restartPolicy: &containerRestartPolicyAlways, + }, + } + pod := makeCPUManagerInitContainersPod("gu-pod", ctrAttrs) + pod = e2epod.NewPodClient(f).CreateSync(ctx, pod) + + ginkgo.By("checking if the expected cpuset was assigned") + logs, err := e2epod.GetPodLogs(ctx, f.ClientSet, f.Namespace.Name, pod.Name, pod.Spec.InitContainers[0].Name) + framework.ExpectNoError(err, "expected log not found in init container [%s] of pod [%s]", pod.Spec.InitContainers[0].Name, pod.Name) + + reusableCPUs := getContainerAllowedCPUsFromLogs(pod.Name, pod.Spec.InitContainers[0].Name, logs) + + gomega.Expect(reusableCPUs.Size()).To(gomega.Equal(1), "expected cpu set size == 1, got %q", reusableCPUs.String()) + + logs, err = e2epod.GetPodLogs(ctx, f.ClientSet, f.Namespace.Name, pod.Name, pod.Spec.InitContainers[1].Name) + framework.ExpectNoError(err, "expected log not found in init container [%s] of pod [%s]", pod.Spec.InitContainers[1].Name, pod.Name) + + nonReusableCPUs := getContainerAllowedCPUsFromLogs(pod.Name, pod.Spec.InitContainers[1].Name, logs) + + gomega.Expect(nonReusableCPUs.Size()).To(gomega.Equal(1), "expected cpu set size == 1, got %q", nonReusableCPUs.String()) + + logs, err = e2epod.GetPodLogs(ctx, f.ClientSet, f.Namespace.Name, pod.Name, pod.Spec.Containers[0].Name) + framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", pod.Spec.Containers[0].Name, pod.Name) + + cpus := getContainerAllowedCPUsFromLogs(pod.Name, pod.Spec.Containers[0].Name, logs) + + gomega.Expect(cpus.Size()).To(gomega.Equal(1), "expected cpu set size == 1, got %q", cpus.String()) + + gomega.Expect(reusableCPUs.Equals(nonReusableCPUs)).To(gomega.BeTrueBecause("expected reusable cpuset [%s] to be equal to non-reusable cpuset [%s]", reusableCPUs.String(), nonReusableCPUs.String())) + gomega.Expect(nonReusableCPUs.Intersection(cpus).IsEmpty()).To(gomega.BeTrueBecause("expected non-reusable cpuset [%s] to be disjoint from cpuset [%s]", nonReusableCPUs.String(), cpus.String())) + + ginkgo.By("by deleting the pods and waiting for container removal") + deletePods(ctx, f, []string{pod.Name}) + waitForContainerRemoval(ctx, pod.Spec.InitContainers[0].Name, pod.Name, pod.Namespace) + waitForContainerRemoval(ctx, pod.Spec.InitContainers[1].Name, pod.Name, pod.Namespace) + waitForContainerRemoval(ctx, pod.Spec.Containers[0].Name, pod.Name, pod.Namespace) + }) + + ginkgo.It("should assign packed CPUs with distribute-cpus-across-numa disabled and pcpu-only policy options enabled", func(ctx context.Context) { + fullCPUsOnlyOpt := fmt.Sprintf("option=%s", cpumanager.FullPCPUsOnlyOption) + _, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f) + smtLevel := getSMTLevel() + + // strict SMT alignment is trivially verified and granted on non-SMT systems + if smtLevel < minSMTLevel { + e2eskipper.Skipf("Skipping CPU Manager %s tests since SMT disabled", fullCPUsOnlyOpt) + } + + // our tests want to allocate a full core, so we need at least 2*2=4 virtual cpus + minCPUCount := int64(smtLevel * minCPUCapacity) + if cpuAlloc < minCPUCount { + e2eskipper.Skipf("Skipping CPU Manager %s tests since the CPU capacity < %d", fullCPUsOnlyOpt, minCPUCount) + } + + framework.Logf("SMT level %d", smtLevel) + + cpuPolicyOptions := map[string]string{ + cpumanager.FullPCPUsOnlyOption: "true", + cpumanager.DistributeCPUsAcrossNUMAOption: "false", + } + newCfg := configureCPUManagerInKubelet(oldCfg, + &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + reservedSystemCPUs: cpuset.New(0), + enableCPUManagerOptions: true, + options: cpuPolicyOptions, + }, false, false, + ) + updateKubeletConfig(ctx, f, newCfg, true) + + ctnAttrs := []ctnAttribute{ + { + ctnName: "test-gu-container-distribute-cpus-across-numa-disabled", + cpuRequest: "2000m", + cpuLimit: "2000m", + }, + } + pod := makeCPUManagerPod("test-pod-distribute-cpus-across-numa-disabled", ctnAttrs) + pod = e2epod.NewPodClient(f).CreateSync(ctx, pod) + + for _, cnt := range pod.Spec.Containers { + ginkgo.By(fmt.Sprintf("validating the container %s on Gu pod %s", cnt.Name, pod.Name)) + + logs, err := e2epod.GetPodLogs(ctx, f.ClientSet, f.Namespace.Name, pod.Name, cnt.Name) + framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", cnt.Name, pod.Name) + + cpus := getContainerAllowedCPUsFromLogs(pod.Name, cnt.Name, logs) + + validateSMTAlignment(cpus, smtLevel, pod, &cnt) + gomega.Expect(cpus).To(BePackedCPUs()) + } + deletePodSyncByName(ctx, f, pod.Name) + // we need to wait for all containers to really be gone so cpumanager reconcile loop will not rewrite the cpu_manager_state. + // this is in turn needed because we will have an unavoidable (in the current framework) race with th + // reconcile loop which will make our attempt to delete the state file and to restore the old config go haywire + waitForAllContainerRemoval(ctx, pod.Name, pod.Namespace) + }) + + ginkgo.It("should assign CPUs distributed across NUMA with distribute-cpus-across-numa and pcpu-only policy options enabled", func(ctx context.Context) { + var cpusNumPerNUMA, numaNodeNum int + + fullCPUsOnlyOpt := fmt.Sprintf("option=%s", cpumanager.FullPCPUsOnlyOption) + _, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f) + smtLevel := getSMTLevel() + framework.Logf("SMT level %d", smtLevel) + + // strict SMT alignment is trivially verified and granted on non-SMT systems + if smtLevel < minSMTLevel { + e2eskipper.Skipf("Skipping CPU Manager %s tests since SMT disabled", fullCPUsOnlyOpt) + } + + // our tests want to allocate a full core, so we need at least 2*2=4 virtual cpus + minCPUCount := int64(smtLevel * minCPUCapacity) + if cpuAlloc < minCPUCount { + e2eskipper.Skipf("Skipping CPU Manager %s tests since the CPU capacity < %d", fullCPUsOnlyOpt, minCPUCount) + } + + // this test is intended to be run on a multi-node NUMA system and + // a system with at least 4 cores per socket, hostcheck skips test + // if above requirements are not satisfied + numaNodeNum, _, _, cpusNumPerNUMA = hostCheck() + + cpuPolicyOptions := map[string]string{ + cpumanager.FullPCPUsOnlyOption: "true", + cpumanager.DistributeCPUsAcrossNUMAOption: "true", + } + newCfg := configureCPUManagerInKubelet(oldCfg, + &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + reservedSystemCPUs: cpuset.New(0), + enableCPUManagerOptions: true, + options: cpuPolicyOptions, + }, false, false, + ) + updateKubeletConfig(ctx, f, newCfg, true) + // 'distribute-cpus-across-numa' policy option ensures that CPU allocations are evenly distributed + // across NUMA nodes in cases where more than one NUMA node is required to satisfy the allocation. + // So, we want to ensure that the CPU Request exceeds the number of CPUs that can fit within a single + // NUMA node. We have to pick cpuRequest such that: + // 1. CPURequest > cpusNumPerNUMA + // 2. Not occupy all the CPUs on the node ande leave room for reserved CPU + // 3. CPURequest is a multiple if number of NUMA nodes to allow equal CPU distribution across NUMA nodes + // + // In summary: cpusNumPerNUMA < CPURequest < ((cpusNumPerNuma * numaNodeNum) - reservedCPUscount) + // Considering all these constraints we select: CPURequest= (cpusNumPerNUMA-smtLevel)*numaNodeNum + + cpuReq := (cpusNumPerNUMA - smtLevel) * numaNodeNum + ctnAttrs := []ctnAttribute{ + { + ctnName: "test-gu-container-distribute-cpus-across-numa", + cpuRequest: fmt.Sprintf("%d", cpuReq), + cpuLimit: fmt.Sprintf("%d", cpuReq), + }, + } + pod := makeCPUManagerPod("test-pod-distribute-cpus-across-numa", ctnAttrs) + pod = e2epod.NewPodClient(f).CreateSync(ctx, pod) + + for _, cnt := range pod.Spec.Containers { + ginkgo.By(fmt.Sprintf("validating the container %s on Gu pod %s", cnt.Name, pod.Name)) + + logs, err := e2epod.GetPodLogs(ctx, f.ClientSet, f.Namespace.Name, pod.Name, cnt.Name) + framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", cnt.Name, pod.Name) + + cpus := getContainerAllowedCPUsFromLogs(pod.Name, cnt.Name, logs) + + validateSMTAlignment(cpus, smtLevel, pod, &cnt) + // We expect a perfectly even spilit i.e. equal distribution across NUMA Node as the CPU Request is 4*smtLevel*numaNodeNum. + expectedSpread := cpus.Size() / numaNodeNum + gomega.Expect(cpus).To(BeDistributedCPUs(expectedSpread)) + } + deletePodSyncByName(ctx, f, pod.Name) + // we need to wait for all containers to really be gone so cpumanager reconcile loop will not rewrite the cpu_manager_state. + // this is in turn needed because we will have an unavoidable (in the current framework) race with th + // reconcile loop which will make our attempt to delete the state file and to restore the old config go haywire + waitForAllContainerRemoval(ctx, pod.Name, pod.Namespace) + }) + + ginkgo.AfterEach(func(ctx context.Context) { + updateKubeletConfig(ctx, f, oldCfg, true) + }) +} + +func runSMTAlignmentNegativeTests(ctx context.Context, f *framework.Framework) { + // negative test: try to run a container whose requests aren't a multiple of SMT level, expect a rejection + ctnAttrs := []ctnAttribute{ + { + ctnName: "gu-container-neg", + cpuRequest: "1000m", + cpuLimit: "1000m", + }, + } + pod := makeCPUManagerPod("gu-pod", ctnAttrs) + // CreateSync would wait for pod to become Ready - which will never happen if production code works as intended! + pod = e2epod.NewPodClient(f).Create(ctx, pod) + + err := e2epod.WaitForPodCondition(ctx, f.ClientSet, f.Namespace.Name, pod.Name, "Failed", 30*time.Second, func(pod *v1.Pod) (bool, error) { + if pod.Status.Phase != v1.PodPending { + return true, nil + } + return false, nil + }) + framework.ExpectNoError(err) + pod, err = e2epod.NewPodClient(f).Get(ctx, pod.Name, metav1.GetOptions{}) + framework.ExpectNoError(err) + + if pod.Status.Phase != v1.PodFailed { + framework.Failf("pod %s not failed: %v", pod.Name, pod.Status) + } + if !isSMTAlignmentError(pod) { + framework.Failf("pod %s failed for wrong reason: %q", pod.Name, pod.Status.Reason) + } + + deletePodSyncByName(ctx, f, pod.Name) + // we need to wait for all containers to really be gone so cpumanager reconcile loop will not rewrite the cpu_manager_state. + // this is in turn needed because we will have an unavoidable (in the current framework) race with th + // reconcile loop which will make our attempt to delete the state file and to restore the old config go haywire + waitForAllContainerRemoval(ctx, pod.Name, pod.Namespace) +} + +func runSMTAlignmentPositiveTests(ctx context.Context, f *framework.Framework, smtLevel int, strictReservedCPUs cpuset.CPUSet) { + // positive test: try to run a container whose requests are a multiple of SMT level, check allocated cores + // 1. are core siblings + // 2. take a full core + // WARNING: this assumes 2-way SMT systems - we don't know how to access other SMT levels. + // this means on more-than-2-way SMT systems this test will prove nothing + ctnAttrs := []ctnAttribute{ + { + ctnName: "gu-container-pos", + cpuRequest: "2000m", + cpuLimit: "2000m", + }, + } + pod := makeCPUManagerPod("gu-pod", ctnAttrs) + pod = e2epod.NewPodClient(f).CreateSync(ctx, pod) + + for _, cnt := range pod.Spec.Containers { + ginkgo.By(fmt.Sprintf("validating the container %s on Gu pod %s", cnt.Name, pod.Name)) + + logs, err := e2epod.GetPodLogs(ctx, f.ClientSet, f.Namespace.Name, pod.Name, cnt.Name) + framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", cnt.Name, pod.Name) + + cpus := getContainerAllowedCPUsFromLogs(pod.Name, cnt.Name, logs) + + gomega.Expect(cpus.Intersection(strictReservedCPUs).IsEmpty()).To(gomega.BeTrueBecause("cpuset %q should not contain strict reserved cpus %q", cpus.String(), strictReservedCPUs.String())) + validateSMTAlignment(cpus, smtLevel, pod, &cnt) + } + + deletePodSyncByName(ctx, f, pod.Name) + // we need to wait for all containers to really be gone so cpumanager reconcile loop will not rewrite the cpu_manager_state. + // this is in turn needed because we will have an unavoidable (in the current framework) race with th + // reconcile loop which will make our attempt to delete the state file and to restore the old config go haywire + waitForAllContainerRemoval(ctx, pod.Name, pod.Namespace) +} + +func validateSMTAlignment(cpus cpuset.CPUSet, smtLevel int, pod *v1.Pod, cnt *v1.Container) { + framework.Logf("validating cpus: %v", cpus) + + if cpus.Size()%smtLevel != 0 { + framework.Failf("pod %q cnt %q received non-smt-multiple cpuset %v (SMT level %d)", pod.Name, cnt.Name, cpus, smtLevel) + } + + // now check all the given cpus are thread siblings. + // to do so the easiest way is to rebuild the expected set of siblings from all the cpus we got. + // if the expected set matches the given set, the given set was good. + siblingsCPUs := cpuset.New() + for _, cpuID := range cpus.UnsortedList() { + threadSiblings, err := cpuset.Parse(strings.TrimSpace(getCPUSiblingList(int64(cpuID)))) + framework.ExpectNoError(err, "parsing cpuset from logs for [%s] of pod [%s]", cnt.Name, pod.Name) + siblingsCPUs = siblingsCPUs.Union(threadSiblings) + } + + framework.Logf("siblings cpus: %v", siblingsCPUs) + if !siblingsCPUs.Equals(cpus) { + framework.Failf("pod %q cnt %q received non-smt-aligned cpuset %v (expected %v)", pod.Name, cnt.Name, cpus, siblingsCPUs) + } +} + +func isSMTAlignmentError(pod *v1.Pod) bool { + re := regexp.MustCompile(`SMT.*Alignment.*Error`) + return re.MatchString(pod.Status.Reason) +} + +// getNumaNodeCPUs retrieves CPUs for each NUMA node. +func getNumaNodeCPUs() (map[int]cpuset.CPUSet, error) { + numaNodes := make(map[int]cpuset.CPUSet) + nodePaths, err := filepath.Glob("/sys/devices/system/node/node*/cpulist") + if err != nil { + return nil, err + } + + for _, nodePath := range nodePaths { + data, err := os.ReadFile(nodePath) + framework.ExpectNoError(err, "Error obtaning CPU information from the node") + cpuSet := strings.TrimSpace(string(data)) + cpus, err := cpuset.Parse(cpuSet) + framework.ExpectNoError(err, "Error parsing CPUset") + + // Extract node ID from path (e.g., "node0" -> 0) + base := filepath.Base(filepath.Dir(nodePath)) + nodeID, err := strconv.Atoi(strings.TrimPrefix(base, "node")) + if err != nil { + continue + } + numaNodes[nodeID] = cpus + } + + return numaNodes, nil +} + +func getContainerAllowedCPUsFromLogs(podName, cntName, logs string) cpuset.CPUSet { + framework.Logf("got pod logs: <%v>", logs) + cpus, err := cpuset.Parse(strings.TrimSpace(logs)) + framework.ExpectNoError(err, "parsing cpuset from logs for [%s] of pod [%s]", cntName, podName) + return cpus +} + +// computeNUMADistribution calculates CPU distribution per NUMA node. +func computeNUMADistribution(allocatedCPUs cpuset.CPUSet) map[int]int { + numaCPUs, err := getNumaNodeCPUs() + framework.ExpectNoError(err, "Error retrieving NUMA nodes") + framework.Logf("NUMA Node CPUs allocation: %v", numaCPUs) + + distribution := make(map[int]int) + for node, cpus := range numaCPUs { + distribution[node] = cpus.Intersection(allocatedCPUs).Size() + } + + framework.Logf("allocated CPUs %s distribution: %v", allocatedCPUs.String(), distribution) + return distribution +} + +// Custom matcher for checking packed CPUs. +func BePackedCPUs() gomegatypes.GomegaMatcher { + return gcustom.MakeMatcher(func(allocatedCPUs cpuset.CPUSet) (bool, error) { + distribution := computeNUMADistribution(allocatedCPUs) + for _, count := range distribution { + // This assumption holds true if there are enough CPUs on a single NUMA node. + // We are intentionally limiting the CPU request to 2 to minimize the number + // of CPUs required to fulfill this case and therefore maximize the chances + // of correctly validating this case. + if count == allocatedCPUs.Size() { + return true, nil + } + } + return false, nil + }).WithMessage("expected CPUs to be packed") +} + +// Custom matcher for checking distributed CPUs. +func BeDistributedCPUs(expectedSpread int) gomegatypes.GomegaMatcher { + return gcustom.MakeMatcher(func(allocatedCPUs cpuset.CPUSet) (bool, error) { + distribution := computeNUMADistribution(allocatedCPUs) + for _, count := range distribution { + if count != expectedSpread { + return false, nil + } + } + return true, nil + }).WithTemplate("expected CPUs to be evenly distributed across NUMA nodes\nExpected: {{.Data}}\nGot:\n{{.FormattedActual}}\nDistribution: {{.Data}}\n").WithTemplateData(expectedSpread) +} + +// Serial because the test updates kubelet configuration. +var _ = SIGDescribe("CPU Manager", framework.WithSerial(), feature.CPUManager, func() { + f := framework.NewDefaultFramework("cpu-manager-test") + f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged + + ginkgo.Context("With kubeconfig updated with static CPU Manager policy run the CPU Manager tests", func() { + runCPUManagerTests(f) + }) +}) diff --git a/test/e2e_node/pod_resize_test.go b/test/e2e_node/pod_resize_test.go new file mode 100644 index 0000000000000..4b2ad1144bd05 --- /dev/null +++ b/test/e2e_node/pod_resize_test.go @@ -0,0 +1,1737 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2enode + +import ( + "context" + "encoding/json" + "fmt" + "strconv" + "time" + + "github.com/onsi/ginkgo/v2" + "github.com/onsi/gomega" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/kubernetes/test/e2e/common/node/framework/cgroups" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/strategicpatch" + clientset "k8s.io/client-go/kubernetes" + kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config" + "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager" + "k8s.io/kubernetes/test/e2e/common/node/framework/podresize" + "k8s.io/kubernetes/test/e2e/framework" + e2enode "k8s.io/kubernetes/test/e2e/framework/node" + e2epod "k8s.io/kubernetes/test/e2e/framework/pod" + e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" + testutils "k8s.io/kubernetes/test/utils" + admissionapi "k8s.io/pod-security-admission/api" + "k8s.io/utils/cpuset" +) + +const ( + fakeExtendedResource = "dummy.com/dummy" +) + +func patchNode(ctx context.Context, client clientset.Interface, old *v1.Node, new *v1.Node) error { + oldData, err := json.Marshal(old) + if err != nil { + return err + } + + newData, err := json.Marshal(new) + if err != nil { + return err + } + patchBytes, err := strategicpatch.CreateTwoWayMergePatch(oldData, newData, &v1.Node{}) + if err != nil { + return fmt.Errorf("failed to create merge patch for node %q: %w", old.Name, err) + } + _, err = client.CoreV1().Nodes().Patch(ctx, old.Name, types.StrategicMergePatchType, patchBytes, metav1.PatchOptions{}, "status") + return err +} + +func addExtendedResource(clientSet clientset.Interface, nodeName, extendedResourceName string, extendedResourceQuantity resource.Quantity) { + extendedResource := v1.ResourceName(extendedResourceName) + + ginkgo.By("Adding a custom resource") + OriginalNode, err := clientSet.CoreV1().Nodes().Get(context.Background(), nodeName, metav1.GetOptions{}) + framework.ExpectNoError(err) + + node := OriginalNode.DeepCopy() + node.Status.Capacity[extendedResource] = extendedResourceQuantity + node.Status.Allocatable[extendedResource] = extendedResourceQuantity + err = patchNode(context.Background(), clientSet, OriginalNode.DeepCopy(), node) + framework.ExpectNoError(err) + + gomega.Eventually(func() error { + node, err = clientSet.CoreV1().Nodes().Get(context.Background(), node.Name, metav1.GetOptions{}) + framework.ExpectNoError(err) + + fakeResourceCapacity, exists := node.Status.Capacity[extendedResource] + if !exists { + return fmt.Errorf("node %s has no %s resource capacity", node.Name, extendedResourceName) + } + if expectedResource := resource.MustParse("123"); fakeResourceCapacity.Cmp(expectedResource) != 0 { + return fmt.Errorf("node %s has resource capacity %s, expected: %s", node.Name, fakeResourceCapacity.String(), expectedResource.String()) + } + + return nil + }).WithTimeout(30 * time.Second).WithPolling(time.Second).ShouldNot(gomega.HaveOccurred()) +} + +func removeExtendedResource(clientSet clientset.Interface, nodeName, extendedResourceName string) { + extendedResource := v1.ResourceName(extendedResourceName) + + ginkgo.By("Removing a custom resource") + originalNode, err := clientSet.CoreV1().Nodes().Get(context.Background(), nodeName, metav1.GetOptions{}) + framework.ExpectNoError(err) + + node := originalNode.DeepCopy() + delete(node.Status.Capacity, extendedResource) + delete(node.Status.Allocatable, extendedResource) + err = patchNode(context.Background(), clientSet, originalNode.DeepCopy(), node) + framework.ExpectNoError(err) + + gomega.Eventually(func() error { + node, err = clientSet.CoreV1().Nodes().Get(context.Background(), nodeName, metav1.GetOptions{}) + framework.ExpectNoError(err) + + if _, exists := node.Status.Capacity[extendedResource]; exists { + return fmt.Errorf("node %s has resource capacity %s which is expected to be removed", node.Name, extendedResourceName) + } + + return nil + }).WithTimeout(30 * time.Second).WithPolling(time.Second).ShouldNot(gomega.HaveOccurred()) +} + +func cpuManagerPolicyKubeletConfig(ctx context.Context, f *framework.Framework, oldCfg *kubeletconfig.KubeletConfiguration, cpuManagerPolicyName string, cpuManagerPolicyOptions map[string]string, isInPlacePodVerticalScalingAllocatedStatusEnabled bool, isInPlacePodVerticalScalingExclusiveCPUsEnabled bool) { + if cpuManagerPolicyName != "" { + if cpuManagerPolicyOptions != nil { + func() { + var cpuAlloc int64 + for policyOption, policyOptionValue := range cpuManagerPolicyOptions { + if policyOption == cpumanager.FullPCPUsOnlyOption && policyOptionValue == "true" { + _, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f) + smtLevel := getSMTLevel() + + // strict SMT alignment is trivially verified and granted on non-SMT systems + if smtLevel < 2 { + e2eskipper.Skipf("Skipping Pod Resize along side CPU Manager %s tests since SMT disabled", policyOption) + } + + // our tests want to allocate a full core, so we need at last 2*2=4 virtual cpus + if cpuAlloc < int64(smtLevel*2) { + e2eskipper.Skipf("Skipping Pod resize along side CPU Manager %s tests since the CPU capacity < 4", policyOption) + } + + framework.Logf("SMT level %d", smtLevel) + return + } + } + }() + + // TODO: we assume the first available CPUID is 0, which is pretty fair, but we should probably + // check what we do have in the node. + newCfg := configureCPUManagerInKubelet(oldCfg, + &cpuManagerKubeletArguments{ + policyName: cpuManagerPolicyName, + reservedSystemCPUs: cpuset.New(0), + enableCPUManagerOptions: true, + options: cpuManagerPolicyOptions, + }, + isInPlacePodVerticalScalingAllocatedStatusEnabled, + isInPlacePodVerticalScalingExclusiveCPUsEnabled, + ) + updateKubeletConfig(ctx, f, newCfg, true) + } else { + var cpuCap int64 + cpuCap, _, _ = getLocalNodeCPUDetails(ctx, f) + // Skip CPU Manager tests altogether if the CPU capacity < 2. + if cpuCap < 2 { + e2eskipper.Skipf("Skipping Pod Resize alongside CPU Manager tests since the CPU capacity < 2") + } + // Enable CPU Manager in the kubelet. + newCfg := configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ + policyName: cpuManagerPolicyName, + reservedSystemCPUs: cpuset.CPUSet{}, + }, isInPlacePodVerticalScalingAllocatedStatusEnabled, isInPlacePodVerticalScalingExclusiveCPUsEnabled) + updateKubeletConfig(ctx, f, newCfg, true) + } + } +} + +type cpuManagerPolicyConfig struct { + name string + title string + options map[string]string +} + +func doPodResizeTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalScalingAllocatedStatusEnabled bool, isInPlacePodVerticalScalingExclusiveCPUsEnabled bool) { + f := framework.NewDefaultFramework("pod-resize-test") + f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged + var podClient *e2epod.PodClient + var oldCfg *kubeletconfig.KubeletConfiguration + ginkgo.BeforeEach(func(ctx context.Context) { + var err error + node := getLocalNode(ctx, f) + if framework.NodeOSDistroIs("windows") || e2enode.IsARM64(node) { + e2eskipper.Skipf("runtime does not support InPlacePodVerticalScaling -- skipping") + } + podClient = e2epod.NewPodClient(f) + if oldCfg == nil { + oldCfg, err = getCurrentKubeletConfig(ctx) + framework.ExpectNoError(err) + } + }) + + type testCase struct { + name string + containers []podresize.ResizableContainerInfo + patchString string + expected []podresize.ResizableContainerInfo + addExtendedResource bool + } + + noRestart := v1.NotRequired + doRestart := v1.RestartContainer + tests := []testCase{ + { + name: "Guaranteed QoS pod, one container - increase CPU & memory", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "100m", MemReq: "200Mi", MemLim: "200Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"200m","memory":"400Mi"},"limits":{"cpu":"200m","memory":"400Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "200m", MemReq: "400Mi", MemLim: "400Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + }, + }, + }, + { + name: "Guaranteed QoS pod, one container - decrease CPU & memory", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "300m", CPULim: "300m", MemReq: "500Mi", MemLim: "500Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"100m","memory":"250Mi"},"limits":{"cpu":"100m","memory":"250Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "100m", MemReq: "250Mi", MemLim: "250Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + }, + }, + }, + { + name: "Guaranteed QoS pod, one container - increase CPU & decrease memory", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "100m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"200m","memory":"100Mi"},"limits":{"cpu":"200m","memory":"100Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "200m", MemReq: "100Mi", MemLim: "100Mi"}, + }, + }, + }, + { + name: "Guaranteed QoS pod, one container - decrease CPU & increase memory", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "100m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"50m","memory":"300Mi"},"limits":{"cpu":"50m","memory":"300Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "50m", CPULim: "50m", MemReq: "300Mi", MemLim: "300Mi"}, + }, + }, + }, + { + name: "Guaranteed QoS pod, three containers (c1, c2, c3) - increase: CPU (c1,c3), memory (c2) ; decrease: CPU (c2), memory (c1,c3)", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "100m", MemReq: "100Mi", MemLim: "100Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + }, + { + Name: "c2", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "200m", MemReq: "200Mi", MemLim: "200Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + }, + { + Name: "c3", + Resources: &cgroups.ContainerResources{CPUReq: "300m", CPULim: "300m", MemReq: "300Mi", MemLim: "300Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"140m","memory":"50Mi"},"limits":{"cpu":"140m","memory":"50Mi"}}}, + {"name":"c2", "resources":{"requests":{"cpu":"150m","memory":"240Mi"},"limits":{"cpu":"150m","memory":"240Mi"}}}, + {"name":"c3", "resources":{"requests":{"cpu":"340m","memory":"250Mi"},"limits":{"cpu":"340m","memory":"250Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "140m", CPULim: "140m", MemReq: "50Mi", MemLim: "50Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + }, + { + Name: "c2", + Resources: &cgroups.ContainerResources{CPUReq: "150m", CPULim: "150m", MemReq: "240Mi", MemLim: "240Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + }, + { + Name: "c3", + Resources: &cgroups.ContainerResources{CPUReq: "340m", CPULim: "340m", MemReq: "250Mi", MemLim: "250Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + }, + }, + }, + { + name: "Burstable QoS pod, one container with cpu & memory requests + limits - decrease memory requests only", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"memory":"200Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "200Mi", MemLim: "500Mi"}, + }, + }, + }, + { + name: "Burstable QoS pod, one container with cpu & memory requests + limits - decrease memory limits only", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"limits":{"memory":"400Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "400Mi"}, + }, + }, + }, + { + name: "Burstable QoS pod, one container with cpu & memory requests + limits - increase memory requests only", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"memory":"300Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "300Mi", MemLim: "500Mi"}, + }, + }, + }, + { + name: "Burstable QoS pod, one container with cpu & memory requests + limits - increase memory limits only", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"limits":{"memory":"600Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "600Mi"}, + }, + }, + }, + { + name: "Burstable QoS pod, one container with cpu & memory requests + limits - decrease CPU requests only", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"100m"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + }, + { + name: "Burstable QoS pod, one container with cpu & memory requests + limits - decrease CPU limits only", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"limits":{"cpu":"300m"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "300m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + }, + { + name: "Burstable QoS pod, one container with cpu & memory requests + limits - increase CPU requests only", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"150m"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "150m", CPULim: "200m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + }, + { + name: "Burstable QoS pod, one container with cpu & memory requests + limits - increase CPU limits only", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"limits":{"cpu":"500m"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "500m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + }, + { + name: "Burstable QoS pod, one container with cpu & memory requests + limits - decrease CPU requests and limits", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"100m"},"limits":{"cpu":"200m"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + }, + { + name: "Burstable QoS pod, one container with cpu & memory requests + limits - increase CPU requests and limits", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"200m"},"limits":{"cpu":"400m"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + }, + { + name: "Burstable QoS pod, one container with cpu & memory requests + limits - decrease CPU requests and increase CPU limits", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"100m"},"limits":{"cpu":"500m"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "500m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + }, + { + name: "Burstable QoS pod, one container with cpu & memory requests + limits - increase CPU requests and decrease CPU limits", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"200m"},"limits":{"cpu":"300m"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "300m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + }, + { + name: "Burstable QoS pod, one container with cpu & memory requests + limits - decrease memory requests and limits", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "200Mi", MemLim: "400Mi"}, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"memory":"100Mi"},"limits":{"memory":"300Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "100Mi", MemLim: "300Mi"}, + }, + }, + }, + { + name: "Burstable QoS pod, one container with cpu & memory requests + limits - increase memory requests and limits", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "200Mi", MemLim: "400Mi"}, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"memory":"300Mi"},"limits":{"memory":"500Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "300Mi", MemLim: "500Mi"}, + }, + }, + }, + { + name: "Burstable QoS pod, one container with cpu & memory requests + limits - decrease memory requests and increase memory limits", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "200Mi", MemLim: "400Mi"}, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"memory":"100Mi"},"limits":{"memory":"500Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "100Mi", MemLim: "500Mi"}, + }, + }, + }, + { + name: "Burstable QoS pod, one container with cpu & memory requests + limits - increase memory requests and decrease memory limits", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "200Mi", MemLim: "400Mi"}, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"memory":"300Mi"},"limits":{"memory":"300Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "300Mi", MemLim: "300Mi"}, + }, + }, + }, + { + name: "Burstable QoS pod, one container with cpu & memory requests + limits - decrease CPU requests and increase memory limits", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "200Mi", MemLim: "400Mi"}, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"100m"},"limits":{"memory":"500Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "400m", MemReq: "200Mi", MemLim: "500Mi"}, + }, + }, + }, + { + name: "Burstable QoS pod, one container with cpu & memory requests + limits - increase CPU requests and decrease memory limits", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "400m", MemReq: "200Mi", MemLim: "500Mi"}, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"200m"},"limits":{"memory":"400Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "200Mi", MemLim: "400Mi"}, + }, + }, + }, + { + name: "Burstable QoS pod, one container with cpu & memory requests + limits - decrease memory requests and increase CPU limits", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "200Mi", MemLim: "400Mi"}, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"memory":"100Mi"},"limits":{"cpu":"300m"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "300m", MemReq: "100Mi", MemLim: "400Mi"}, + }, + }, + }, + { + name: "Burstable QoS pod, one container with cpu & memory requests + limits - increase memory requests and decrease CPU limits", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "200Mi", MemLim: "400Mi"}, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"memory":"300Mi"},"limits":{"cpu":"300m"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "300m", MemReq: "300Mi", MemLim: "400Mi"}, + }, + }, + }, + { + name: "Burstable QoS pod, one container with cpu & memory requests - decrease memory request", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", MemReq: "500Mi"}, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"memory":"400Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", MemReq: "400Mi"}, + }, + }, + }, + { + name: "Guaranteed QoS pod, one container - increase CPU (NotRequired) & memory (RestartContainer)", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "100m", MemReq: "200Mi", MemLim: "200Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &doRestart, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"200m","memory":"400Mi"},"limits":{"cpu":"200m","memory":"400Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "200m", MemReq: "400Mi", MemLim: "400Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &doRestart, + RestartCount: 1, + }, + }, + }, + { + name: "Burstable QoS pod, one container - decrease CPU (RestartContainer) & memory (NotRequired)", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "200Mi", MemLim: "400Mi"}, + CPUPolicy: &doRestart, + MemPolicy: &noRestart, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"50m","memory":"100Mi"},"limits":{"cpu":"100m","memory":"200Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "50m", CPULim: "100m", MemReq: "100Mi", MemLim: "200Mi"}, + CPUPolicy: &doRestart, + MemPolicy: &noRestart, + RestartCount: 1, + }, + }, + }, + { + name: "Burstable QoS pod, three containers - increase c1 resources, no change for c2, decrease c3 resources (no net change for pod)", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "100Mi", MemLim: "200Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + }, + { + Name: "c2", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "300m", MemReq: "200Mi", MemLim: "300Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &doRestart, + }, + { + Name: "c3", + Resources: &cgroups.ContainerResources{CPUReq: "300m", CPULim: "400m", MemReq: "300Mi", MemLim: "400Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"150m","memory":"150Mi"},"limits":{"cpu":"250m","memory":"250Mi"}}}, + {"name":"c3", "resources":{"requests":{"cpu":"250m","memory":"250Mi"},"limits":{"cpu":"350m","memory":"350Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "150m", CPULim: "250m", MemReq: "150Mi", MemLim: "250Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + }, + { + Name: "c2", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "300m", MemReq: "200Mi", MemLim: "300Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &doRestart, + }, + { + Name: "c3", + Resources: &cgroups.ContainerResources{CPUReq: "250m", CPULim: "350m", MemReq: "250Mi", MemLim: "350Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + }, + }, + }, + { + name: "Burstable QoS pod, three containers - decrease c1 resources, increase c2 resources, no change for c3 (net increase for pod)", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "100Mi", MemLim: "200Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + }, + { + Name: "c2", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "300m", MemReq: "200Mi", MemLim: "300Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &doRestart, + }, + { + Name: "c3", + Resources: &cgroups.ContainerResources{CPUReq: "300m", CPULim: "400m", MemReq: "300Mi", MemLim: "400Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"50m","memory":"50Mi"},"limits":{"cpu":"150m","memory":"150Mi"}}}, + {"name":"c2", "resources":{"requests":{"cpu":"350m","memory":"350Mi"},"limits":{"cpu":"450m","memory":"450Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "50m", CPULim: "150m", MemReq: "50Mi", MemLim: "150Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + }, + { + Name: "c2", + Resources: &cgroups.ContainerResources{CPUReq: "350m", CPULim: "450m", MemReq: "350Mi", MemLim: "450Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &doRestart, + RestartCount: 1, + }, + { + Name: "c3", + Resources: &cgroups.ContainerResources{CPUReq: "300m", CPULim: "400m", MemReq: "300Mi", MemLim: "400Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + }, + }, + }, + { + name: "Burstable QoS pod, three containers - no change for c1, increase c2 resources, decrease c3 (net decrease for pod)", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "100Mi", MemLim: "200Mi"}, + CPUPolicy: &doRestart, + MemPolicy: &doRestart, + }, + { + Name: "c2", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "300m", MemReq: "200Mi", MemLim: "300Mi"}, + CPUPolicy: &doRestart, + MemPolicy: &noRestart, + }, + { + Name: "c3", + Resources: &cgroups.ContainerResources{CPUReq: "300m", CPULim: "400m", MemReq: "300Mi", MemLim: "400Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &doRestart, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c2", "resources":{"requests":{"cpu":"250m","memory":"250Mi"},"limits":{"cpu":"350m","memory":"350Mi"}}}, + {"name":"c3", "resources":{"requests":{"cpu":"100m","memory":"100Mi"},"limits":{"cpu":"200m","memory":"200Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "100Mi", MemLim: "200Mi"}, + CPUPolicy: &doRestart, + MemPolicy: &doRestart, + }, + { + Name: "c2", + Resources: &cgroups.ContainerResources{CPUReq: "250m", CPULim: "350m", MemReq: "250Mi", MemLim: "350Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + RestartCount: 1, + }, + { + Name: "c3", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "100Mi", MemLim: "200Mi"}, + CPUPolicy: &doRestart, + MemPolicy: &doRestart, + RestartCount: 1, + }, + }, + }, + { + name: "Guaranteed QoS pod, one container - increase CPU & memory with an extended resource", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "100m", MemReq: "200Mi", MemLim: "200Mi", + ExtendedResourceReq: "1", ExtendedResourceLim: "1"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"200m","memory":"400Mi"},"limits":{"cpu":"200m","memory":"400Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "200m", MemReq: "400Mi", MemLim: "400Mi", + ExtendedResourceReq: "1", ExtendedResourceLim: "1"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + }, + }, + addExtendedResource: true, + }, + { + name: "Guaranteed QoS pod, one container - increase CPU & memory, with integer CPU requests", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "200Mi", MemLim: "200Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "2", + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"4","memory":"400Mi"},"limits":{"cpu":"4","memory":"400Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "4", CPULim: "4", MemReq: "400Mi", MemLim: "400Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "4", + }, + }, + }, + { + name: "Burstable QoS pod, three containers - no change for c1, decrease c2 resources, decrease c3 (net decrease for pod)", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "100Mi", MemLim: "200Mi"}, + CPUPolicy: &doRestart, + MemPolicy: &doRestart, + }, + { + Name: "c2", + Resources: &cgroups.ContainerResources{CPUReq: "4", CPULim: "4", MemReq: "200Mi", MemLim: "300Mi"}, + CPUPolicy: &doRestart, + MemPolicy: &noRestart, + }, + { + Name: "c3", + Resources: &cgroups.ContainerResources{CPUReq: "300m", CPULim: "400m", MemReq: "300Mi", MemLim: "400Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &doRestart, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c2", "resources":{"requests":{"cpu":"1","memory":"150Mi"},"limits":{"cpu":"1","memory":"250Mi"}}}, + {"name":"c3", "resources":{"requests":{"cpu":"100m","memory":"100Mi"},"limits":{"cpu":"200m","memory":"200Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "100Mi", MemLim: "200Mi"}, + CPUPolicy: &doRestart, + MemPolicy: &doRestart, + }, + { + Name: "c2", + Resources: &cgroups.ContainerResources{CPUReq: "1", CPULim: "1", MemReq: "150Mi", MemLim: "250Mi"}, + CPUPolicy: &doRestart, + MemPolicy: &noRestart, + RestartCount: 1, + }, + { + Name: "c3", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "100Mi", MemLim: "200Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &doRestart, + RestartCount: 1, + }, + }, + }, + { + name: "Burstable QoS pod, three containers - no change for c1, increase c2 resources, decrease c3 (net increase for pod)", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "100Mi", MemLim: "200Mi"}, + CPUPolicy: &doRestart, + MemPolicy: &doRestart, + }, + { + Name: "c2", + Resources: &cgroups.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "200Mi", MemLim: "300Mi"}, + CPUPolicy: &doRestart, + MemPolicy: &noRestart, + }, + { + Name: "c3", + Resources: &cgroups.ContainerResources{CPUReq: "300m", CPULim: "400m", MemReq: "300Mi", MemLim: "400Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &doRestart, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c2", "resources":{"requests":{"cpu":"4","memory":"250Mi"},"limits":{"cpu":"4","memory":"350Mi"}}}, + {"name":"c3", "resources":{"requests":{"cpu":"100m","memory":"100Mi"},"limits":{"cpu":"200m","memory":"200Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "100Mi", MemLim: "200Mi"}, + CPUPolicy: &doRestart, + MemPolicy: &doRestart, + }, + { + Name: "c2", + Resources: &cgroups.ContainerResources{CPUReq: "4", CPULim: "4", MemReq: "250Mi", MemLim: "350Mi"}, + CPUPolicy: &doRestart, + MemPolicy: &noRestart, + RestartCount: 1, + }, + { + Name: "c3", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "100Mi", MemLim: "200Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &doRestart, + RestartCount: 1, + }, + }, + }, + { + name: "Guaranteed QoS pod, one container - decrease CPU & increase memory", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "100m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"50m","memory":"300Mi"},"limits":{"cpu":"50m","memory":"300Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "50m", CPULim: "50m", MemReq: "300Mi", MemLim: "300Mi"}, + }, + }, + }, + { + name: "Guaranteed QoS pod, one container - decrease CPU & memory, with integer CPU requests", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "4", CPULim: "4", MemReq: "500Mi", MemLim: "500Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "4", + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"2","memory":"250Mi"},"limits":{"cpu":"2","memory":"250Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "250Mi", MemLim: "250Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "2", + }, + }, + }, + { + name: "Guaranteed QoS pod, one container - decrease CPU & memory, with integer CPU requests", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "4", CPULim: "4", MemReq: "500Mi", MemLim: "500Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "4", + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"2","memory":"250Mi"},"limits":{"cpu":"2","memory":"250Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "250Mi", MemLim: "250Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "2", + }, + }, + }, + { + name: "Guaranteed QoS pod, one container - increase CPU & decrease memory, with integer CPU requests", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "200Mi", MemLim: "200Mi"}, + CPUsAllowedListValue: "2", + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"4","memory":"100Mi"},"limits":{"cpu":"4","memory":"100Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "4", CPULim: "4", MemReq: "100Mi", MemLim: "100Mi"}, + CPUsAllowedListValue: "4", + }, + }, + }, + { + name: "Guaranteed QoS pod, one container - increase CPU & decrease memory, with integer CPU requests", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "200Mi", MemLim: "200Mi"}, + CPUsAllowedListValue: "2", + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"4","memory":"100Mi"},"limits":{"cpu":"4","memory":"100Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "4", CPULim: "4", MemReq: "100Mi", MemLim: "100Mi"}, + CPUsAllowedListValue: "4", + }, + }, + }, + { + name: "Guaranteed QoS pod, one container - increase CPU & memory, with integer CPU requests", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "200Mi", MemLim: "200Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "2", + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"4","memory":"400Mi"},"limits":{"cpu":"4","memory":"400Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "4", CPULim: "4", MemReq: "400Mi", MemLim: "400Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "4", + }, + }, + }, + { + name: "Guaranteed QoS pod, one container - increase CPU (NotRequired) & memory (RestartContainer), with integer CPU requests", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "200Mi", MemLim: "200Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &doRestart, + CPUsAllowedListValue: "2", + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"4","memory":"400Mi"},"limits":{"cpu":"4","memory":"400Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "4", CPULim: "4", MemReq: "400Mi", MemLim: "400Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &doRestart, + CPUsAllowedListValue: "4", + RestartCount: 1, + }, + }, + }, + { + name: "Guaranteed QoS pod, one container - increase CPU (NotRequired) & memory (RestartContainer), with integer CPU requests", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "200Mi", MemLim: "200Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &doRestart, + CPUsAllowedListValue: "2", + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"4","memory":"400Mi"},"limits":{"cpu":"4","memory":"400Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "4", CPULim: "4", MemReq: "400Mi", MemLim: "400Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &doRestart, + CPUsAllowedListValue: "4", + RestartCount: 1, + }, + }, + }, + { + name: "Guaranteed QoS pod, three containers (c1, c2, c3) - increase CPU (c1,c3) and memory (c2) ; decrease CPU (c2) and memory (c1,c3)", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "100m", MemReq: "100Mi", MemLim: "100Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + }, + { + Name: "c2", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "200m", MemReq: "200Mi", MemLim: "200Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + }, + { + Name: "c3", + Resources: &cgroups.ContainerResources{CPUReq: "300m", CPULim: "300m", MemReq: "300Mi", MemLim: "300Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"140m","memory":"50Mi"},"limits":{"cpu":"140m","memory":"50Mi"}}}, + {"name":"c2", "resources":{"requests":{"cpu":"150m","memory":"240Mi"},"limits":{"cpu":"150m","memory":"240Mi"}}}, + {"name":"c3", "resources":{"requests":{"cpu":"340m","memory":"250Mi"},"limits":{"cpu":"340m","memory":"250Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "140m", CPULim: "140m", MemReq: "50Mi", MemLim: "50Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + }, + { + Name: "c2", + Resources: &cgroups.ContainerResources{CPUReq: "150m", CPULim: "150m", MemReq: "240Mi", MemLim: "240Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + }, + { + Name: "c3", + Resources: &cgroups.ContainerResources{CPUReq: "340m", CPULim: "340m", MemReq: "250Mi", MemLim: "250Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + }, + }, + }, + { + name: "Guaranteed QoS pod, three containers (c1, c2, c3) - increase CPU (c1,c3) and memory (c2) ; decrease CPU (c2) and memory (c1,c3), with integer CPU requests", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "100Mi", MemLim: "100Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "2", + }, + { + Name: "c2", + Resources: &cgroups.ContainerResources{CPUReq: "4", CPULim: "4", MemReq: "200Mi", MemLim: "200Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "4", + }, + { + Name: "c3", + Resources: &cgroups.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "300Mi", MemLim: "300Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "2", + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"4","memory":"50Mi"},"limits":{"cpu":"4","memory":"50Mi"}}}, + {"name":"c2", "resources":{"requests":{"cpu":"2","memory":"240Mi"},"limits":{"cpu":"2","memory":"240Mi"}}}, + {"name":"c3", "resources":{"requests":{"cpu":"4","memory":"250Mi"},"limits":{"cpu":"4","memory":"250Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "4", CPULim: "4", MemReq: "50Mi", MemLim: "50Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "4", + }, + { + Name: "c2", + Resources: &cgroups.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "240Mi", MemLim: "240Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "2", + }, + { + Name: "c3", + Resources: &cgroups.ContainerResources{CPUReq: "4", CPULim: "4", MemReq: "250Mi", MemLim: "250Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "4", + }, + }, + }, + } + + timeouts := framework.NewTimeoutContext() + + for idx := range tests { + tc := tests[idx] + ginkgo.It(tc.name+policy.title+" (InPlacePodVerticalScalingAllocatedStatus="+strconv.FormatBool(isInPlacePodVerticalScalingAllocatedStatusEnabled)+", InPlacePodVerticalScalingExclusiveCPUs="+strconv.FormatBool(isInPlacePodVerticalScalingExclusiveCPUsEnabled)+")", func(ctx context.Context) { + cpuManagerPolicyKubeletConfig(ctx, f, oldCfg, policy.name, policy.options, isInPlacePodVerticalScalingAllocatedStatusEnabled, isInPlacePodVerticalScalingExclusiveCPUsEnabled) + + var testPod, patchedPod *v1.Pod + var pErr error + + tStamp := strconv.Itoa(time.Now().Nanosecond()) + testPod = podresize.MakePodWithResizableContainers(f.Namespace.Name, "testpod", tStamp, tc.containers) + testPod.GenerateName = "resize-test-" + testPod = e2epod.MustMixinRestrictedPodSecurity(testPod) + + if tc.addExtendedResource { + nodes, err := e2enode.GetReadySchedulableNodes(context.Background(), f.ClientSet) + framework.ExpectNoError(err) + + for _, node := range nodes.Items { + addExtendedResource(f.ClientSet, node.Name, fakeExtendedResource, resource.MustParse("123")) + } + defer func() { + for _, node := range nodes.Items { + removeExtendedResource(f.ClientSet, node.Name, fakeExtendedResource) + } + }() + } + + ginkgo.By("creating pod") + newPod := podClient.CreateSync(ctx, testPod) + + ginkgo.By("verifying initial pod resources, allocations are as expected") + podresize.VerifyPodResources(newPod, tc.containers) + ginkgo.By("verifying initial pod resize policy is as expected") + podresize.VerifyPodResizePolicy(newPod, tc.containers) + + ginkgo.By("verifying initial pod status resources are as expected") + framework.ExpectNoError(podresize.VerifyPodStatusResources(newPod, tc.containers)) + ginkgo.By("verifying initial cgroup config are as expected") + framework.ExpectNoError(podresize.VerifyPodContainersCgroupValues(ctx, f, newPod, tc.containers)) + // TODO make this dynamic depending on Policy Name, Resources input and topology of target + // machine. + // For the moment skip below if CPU Manager Policy is set to none + if policy.name == string(cpumanager.PolicyStatic) { + ginkgo.By("verifying initial pod Cpus allowed list value") + gomega.Eventually(ctx, podresize.VerifyPodContainersCPUsAllowedListValue, timeouts.PodStartShort, timeouts.Poll). + WithArguments(f, newPod, tc.containers). + Should(gomega.BeNil(), "failed to verify initial Pod CPUsAllowedListValue") + } + + patchAndVerify := func(patchString string, expectedContainers []podresize.ResizableContainerInfo, initialContainers []podresize.ResizableContainerInfo, opStr string, isRollback bool) { + ginkgo.By(fmt.Sprintf("patching pod for %s", opStr)) + patchedPod, pErr = f.ClientSet.CoreV1().Pods(newPod.Namespace).Patch(ctx, newPod.Name, + types.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, fmt.Sprintf("failed to patch pod for %s", opStr)) + expected := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, expectedContainers) + + ginkgo.By(fmt.Sprintf("verifying pod patched for %s", opStr)) + podresize.VerifyPodResources(patchedPod, expected) + + ginkgo.By(fmt.Sprintf("waiting for %s to be actuated", opStr)) + resizedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPod, expected) + podresize.ExpectPodResized(ctx, f, resizedPod, expected) + + // Check cgroup values only for containerd versions before 1.6.9 + ginkgo.By(fmt.Sprintf("verifying pod container's cgroup values after %s", opStr)) + framework.ExpectNoError(podresize.VerifyPodContainersCgroupValues(ctx, f, resizedPod, expected)) + + ginkgo.By(fmt.Sprintf("verifying pod resources after %s", opStr)) + podresize.VerifyPodResources(resizedPod, expected) + + // TODO make this dynamic depending on Policy Name, Resources input and topology of target + // machine. + // For the moment skip below if CPU Manager Policy is set to none + if policy.name == string(cpumanager.PolicyStatic) { + ginkgo.By("verifying pod Cpus allowed list value after resize") + if isInPlacePodVerticalScalingExclusiveCPUsEnabled { + gomega.Eventually(ctx, podresize.VerifyPodContainersCPUsAllowedListValue, timeouts.PodStartShort, timeouts.Poll). + WithArguments(f, resizedPod, tc.expected). + Should(gomega.BeNil(), "failed to verify Pod CPUsAllowedListValue for resizedPod with InPlacePodVerticalScalingExclusiveCPUs enabled") + } else { + gomega.Eventually(ctx, podresize.VerifyPodContainersCPUsAllowedListValue, timeouts.PodStartShort, timeouts.Poll). + WithArguments(f, resizedPod, tc.containers). + Should(gomega.BeNil(), "failed to verify Pod CPUsAllowedListValue for resizedPod with InPlacePodVerticalScalingExclusiveCPUs disabled (default)") + } + } + } + + patchAndVerify(tc.patchString, tc.expected, tc.containers, "resize", false) + + rbPatchStr, err := podresize.ResizeContainerPatch(tc.containers) + framework.ExpectNoError(err) + // Resize has been actuated, test rollback + patchAndVerify(rbPatchStr, tc.containers, tc.expected, "rollback", true) + + ginkgo.By("deleting pod") + deletePodSyncByName(ctx, f, newPod.Name) + // we need to wait for all containers to really be gone so cpumanager reconcile loop will not rewrite the cpu_manager_state. + // this is in turn needed because we will have an unavoidable (in the current framework) race with the + // reconcile loop which will make our attempt to delete the state file and to restore the old config go haywire + waitForAllContainerRemoval(ctx, newPod.Name, newPod.Namespace) + }) + } + + ginkgo.AfterEach(func(ctx context.Context) { + if oldCfg != nil { + updateKubeletConfig(ctx, f, oldCfg, true) + } + }) + +} + +func doPodResizeErrorTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalScalingAllocatedStatusEnabled bool, isInPlacePodVerticalScalingExclusiveCPUsEnabled bool) { + f := framework.NewDefaultFramework("pod-resize-errors") + f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged + var podClient *e2epod.PodClient + var oldCfg *kubeletconfig.KubeletConfiguration + ginkgo.BeforeEach(func(ctx context.Context) { + var err error + node := getLocalNode(ctx, f) + if framework.NodeOSDistroIs("windows") || e2enode.IsARM64(node) { + e2eskipper.Skipf("runtime does not support InPlacePodVerticalScaling -- skipping") + } + podClient = e2epod.NewPodClient(f) + if oldCfg == nil { + oldCfg, err = getCurrentKubeletConfig(ctx) + framework.ExpectNoError(err) + } + }) + + type testCase struct { + name string + containers []podresize.ResizableContainerInfo + patchString string + patchError string + expected []podresize.ResizableContainerInfo + } + + tests := []testCase{ + { + name: "BestEffort QoS pod, one container - try requesting memory, expect error", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"memory":"400Mi"}}} + ]}}`, + patchError: "Pod QoS is immutable", + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + }, + }, + }, + { + name: "BestEffort QoS pod, three containers - try requesting memory for c1, expect error", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + }, + { + Name: "c2", + }, + { + Name: "c3", + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"memory":"400Mi"}}} + ]}}`, + patchError: "Pod QoS is immutable", + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + }, + { + Name: "c2", + }, + { + Name: "c3", + }, + }, + }, + } + + timeouts := framework.NewTimeoutContext() + + for idx := range tests { + tc := tests[idx] + ginkgo.It(tc.name+policy.title+" (InPlacePodVerticalScalingAllocatedStatus="+strconv.FormatBool(isInPlacePodVerticalScalingAllocatedStatusEnabled)+", InPlacePodVerticalScalingExclusiveCPUs="+strconv.FormatBool(isInPlacePodVerticalScalingExclusiveCPUsEnabled)+")", func(ctx context.Context) { + var testPod, patchedPod *v1.Pod + var pErr error + + tStamp := strconv.Itoa(time.Now().Nanosecond()) + testPod = podresize.MakePodWithResizableContainers(f.Namespace.Name, "testpod", tStamp, tc.containers) + testPod = e2epod.MustMixinRestrictedPodSecurity(testPod) + + ginkgo.By("creating pod") + newPod := podClient.CreateSync(ctx, testPod) + + perr := e2epod.WaitForPodCondition(ctx, f.ClientSet, newPod.Namespace, newPod.Name, "Ready", timeouts.PodStartSlow, testutils.PodRunningReady) + framework.ExpectNoError(perr, "pod %s/%s did not go running", newPod.Namespace, newPod.Name) + framework.Logf("pod %s/%s running", newPod.Namespace, newPod.Name) + + ginkgo.By("verifying initial pod resources, allocations, and policy are as expected") + podresize.VerifyPodResources(newPod, tc.containers) + podresize.VerifyPodResizePolicy(newPod, tc.containers) + + ginkgo.By("verifying initial pod status resources and cgroup config are as expected") + framework.ExpectNoError(podresize.VerifyPodStatusResources(newPod, tc.containers)) + + ginkgo.By("patching pod for resize") + patchedPod, pErr = f.ClientSet.CoreV1().Pods(newPod.Namespace).Patch(ctx, newPod.Name, + types.StrategicMergePatchType, []byte(tc.patchString), metav1.PatchOptions{}) + if tc.patchError == "" { + framework.ExpectNoError(pErr, "failed to patch pod for resize") + } else { + gomega.Expect(pErr).To(gomega.HaveOccurred(), tc.patchError) + patchedPod = newPod + } + + ginkgo.By("verifying pod resources after patch") + podresize.VerifyPodResources(patchedPod, tc.expected) + + deletePodSyncByName(ctx, f, newPod.Name) + // we need to wait for all containers to really be gone so cpumanager reconcile loop will not rewrite the cpu_manager_state. + // this is in turn needed because we will have an unavoidable (in the current framework) race with the + // reconcile loop which will make our attempt to delete the state file and to restore the old config go haywire + waitForAllContainerRemoval(ctx, newPod.Name, newPod.Namespace) + + }) + } + + ginkgo.AfterEach(func(ctx context.Context) { + if oldCfg != nil { + updateKubeletConfig(ctx, f, oldCfg, true) + } + }) + +} + +// NOTE: Pod resize scheduler resource quota tests are out of scope in e2e_node tests, +// because in e2e_node tests +// a) scheduler and controller manager is not running by the Node e2e +// b) api-server in services doesn't start with --enable-admission-plugins=ResourceQuota +// and is not possible to start it from TEST_ARGS +// Above tests are performed by doSheduletTests() and doPodResizeResourceQuotaTests() +// in test/e2e/node/pod_resize.go + +var _ = SIGDescribe("Pod InPlace Resize Container", framework.WithSerial(), func() { + + policiesGeneralAvailability := []cpuManagerPolicyConfig{ + { + name: string(cpumanager.PolicyNone), + title: "", + }, + { + name: string(cpumanager.PolicyStatic), + title: ", alongside CPU Manager Static Policy with no options", + options: map[string]string{ + cpumanager.FullPCPUsOnlyOption: "false", + cpumanager.DistributeCPUsAcrossNUMAOption: "false", + cpumanager.AlignBySocketOption: "false", + cpumanager.DistributeCPUsAcrossCoresOption: "false", + }, + }, + } + + policiesBeta := []cpuManagerPolicyConfig{ + { + name: string(cpumanager.PolicyStatic), + title: ", alongside CPU Manager Static Policy with FullPCPUsOnlyOption", + options: map[string]string{ + cpumanager.FullPCPUsOnlyOption: "true", + cpumanager.DistributeCPUsAcrossNUMAOption: "false", + cpumanager.AlignBySocketOption: "false", + cpumanager.DistributeCPUsAcrossCoresOption: "false", + }, + }, + } + + /*policiesAlpha := []cpuManagerPolicyConfig{ + { + name: string(cpumanager.PolicyStatic), + title: ", alongside CPU Manager Static Policy with DistributeCPUsAcrossNUMAOption", + options: map[string]string{ + cpumanager.FullPCPUsOnlyOption: "false", + cpumanager.DistributeCPUsAcrossNUMAOption: "true", + cpumanager.AlignBySocketOption: "false", + cpumanager.DistributeCPUsAcrossCoresOption: "false", + }, + }, + { + name: string(cpumanager.PolicyStatic), + title: ", alongside CPU Manager Static Policy with FullPCPUsOnlyOption, DistributeCPUsAcrossNUMAOption", + options: map[string]string{ + cpumanager.FullPCPUsOnlyOption: "true", + cpumanager.DistributeCPUsAcrossNUMAOption: "true", + cpumanager.AlignBySocketOption: "false", + cpumanager.DistributeCPUsAcrossCoresOption: "false", + }, + }, + { + name: string(cpumanager.PolicyStatic), + title: ", alongside CPU Manager Static Policy with AlignBySocketOption", + options: map[string]string{ + cpumanager.FullPCPUsOnlyOption: "false", + cpumanager.DistributeCPUsAcrossNUMAOption: "false", + cpumanager.AlignBySocketOption: "true", + cpumanager.DistributeCPUsAcrossCoresOption: "false", + }, + }, + { + name: string(cpumanager.PolicyStatic), + title: ", alongside CPU Manager Static Policy with FullPCPUsOnlyOption, AlignBySocketOption", + options: map[string]string{ + cpumanager.FullPCPUsOnlyOption: "true", + cpumanager.DistributeCPUsAcrossNUMAOption: "false", + cpumanager.AlignBySocketOption: "true", + cpumanager.DistributeCPUsAcrossCoresOption: "false", + }, + }, + { + name: string(cpumanager.PolicyStatic), + title: ", alongside CPU Manager Static Policy with DistributeCPUsAcrossNUMAOption, AlignBySocketOption", + options: map[string]string{ + cpumanager.FullPCPUsOnlyOption: "false", + cpumanager.DistributeCPUsAcrossNUMAOption: "true", + cpumanager.AlignBySocketOption: "true", + cpumanager.DistributeCPUsAcrossCoresOption: "false", + }, + }, + { + name: string(cpumanager.PolicyStatic), + title: ", alongside CPU Manager Static Policy with FullPCPUsOnlyOption, DistributeCPUsAcrossNUMAOption, AlignBySocketOption", + options: map[string]string{ + cpumanager.FullPCPUsOnlyOption: "true", + cpumanager.DistributeCPUsAcrossNUMAOption: "true", + cpumanager.AlignBySocketOption: "true", + cpumanager.DistributeCPUsAcrossCoresOption: "false", + }, + }, + { + name: string(cpumanager.PolicyStatic), + title: ", alongside CPU Manager Static Policy with DistributeCPUsAcrossCoresOption", + options: map[string]string{ + cpumanager.FullPCPUsOnlyOption: "false", + cpumanager.DistributeCPUsAcrossNUMAOption: "false", + cpumanager.AlignBySocketOption: "false", + cpumanager.DistributeCPUsAcrossCoresOption: "true", + }, + }, + { + name: string(cpumanager.PolicyStatic), + title: ", alongside CPU Manager Static Policy with DistributeCPUsAcrossCoresOption, AlignBySocketOption", + options: map[string]string{ + cpumanager.FullPCPUsOnlyOption: "false", + cpumanager.DistributeCPUsAcrossNUMAOption: "false", + cpumanager.AlignBySocketOption: "true", + cpumanager.DistributeCPUsAcrossCoresOption: "true", + }, + }, + }*/ + + for idp := range policiesGeneralAvailability { + doPodResizeTests(policiesGeneralAvailability[idp], false, false) + doPodResizeTests(policiesGeneralAvailability[idp], true, false) + doPodResizeTests(policiesGeneralAvailability[idp], false, true) + doPodResizeTests(policiesGeneralAvailability[idp], true, true) + doPodResizeErrorTests(policiesGeneralAvailability[idp], false, false) + doPodResizeErrorTests(policiesGeneralAvailability[idp], true, false) + doPodResizeErrorTests(policiesGeneralAvailability[idp], false, true) + doPodResizeErrorTests(policiesGeneralAvailability[idp], true, true) + } + + for idp := range policiesBeta { + doPodResizeTests(policiesBeta[idp], false, false) + doPodResizeTests(policiesBeta[idp], true, false) + doPodResizeTests(policiesBeta[idp], false, true) + doPodResizeTests(policiesBeta[idp], true, true) + doPodResizeErrorTests(policiesBeta[idp], false, false) + doPodResizeErrorTests(policiesBeta[idp], true, false) + doPodResizeErrorTests(policiesBeta[idp], false, true) + doPodResizeErrorTests(policiesBeta[idp], true, true) + } + + /*for idp := range policiesAlpha { + doPodResizeTests(policiesAlpha[idp], true, false) + doPodResizeTests(policiesAlpha[idp], true, true) + doPodResizeErrorTests(policiesAlpha[idp], true, false) + doPodResizeErrorTests(policiesAlpha[idp], true, true) + }*/ + +}) diff --git a/test/e2e_node/util.go b/test/e2e_node/util.go index 8b51be319cd56..27f961e6e425b 100644 --- a/test/e2e_node/util.go +++ b/test/e2e_node/util.go @@ -184,7 +184,7 @@ func waitForKubeletToStart(ctx context.Context, f *framework.Framework) { // wait until the kubelet health check will succeed gomega.Eventually(ctx, func() bool { return kubeletHealthCheck(kubeletHealthCheckURL) - }, 2*time.Minute, 5*time.Second).Should(gomega.BeTrueBecause("expected kubelet to be in healthy state")) + }, 5*time.Minute, 2*time.Second).Should(gomega.BeTrueBecause("expected kubelet to be in healthy state")) // Wait for the Kubelet to be ready. gomega.Eventually(ctx, func(ctx context.Context) error { @@ -506,7 +506,7 @@ func waitForAllContainerRemoval(ctx context.Context, podName, podNS string) { return fmt.Errorf("expected all containers to be removed from CRI but %v containers still remain. Containers: %+v", len(containers), containers) } return nil - }, 2*time.Minute, 1*time.Second).Should(gomega.Succeed()) + }, 5*time.Minute, 2*time.Second).Should(gomega.Succeed()) } func getPidsForProcess(name, pidFile string) ([]int, error) { From 5ab81b1021d830582940541ad9318073e27f8351 Mon Sep 17 00:00:00 2001 From: Chunxia Guo/Modem Solution Lab /SRC-Beijing/Staff Engineer/Samsung Electronics Date: Fri, 21 Feb 2025 14:27:20 +0800 Subject: [PATCH 02/15] Support InPlacePodVerticalScaling for Static CPU management policy --- pkg/api/pod/testing/make.go | 6 + pkg/apis/core/validation/validation.go | 48 ++ pkg/apis/core/validation/validation_test.go | 82 ++ pkg/kubelet/cm/cpumanager/cpu_assignment.go | 372 ++++++++- .../cm/cpumanager/cpu_assignment_test.go | 467 +++++++++++ pkg/kubelet/cm/cpumanager/policy_static.go | 31 +- pkg/registry/core/pod/strategy.go | 1 + .../common/node/framework/podresize/resize.go | 14 +- test/e2e_node/pod_resize_test.go | 734 ++++++++++++++++++ 9 files changed, 1743 insertions(+), 12 deletions(-) diff --git a/pkg/api/pod/testing/make.go b/pkg/api/pod/testing/make.go index b88c4a02234ea..597b05a5b07b5 100644 --- a/pkg/api/pod/testing/make.go +++ b/pkg/api/pod/testing/make.go @@ -299,6 +299,12 @@ func SetContainerResources(rr api.ResourceRequirements) TweakContainer { } } +func SetContainerEnv(env []api.EnvVar) TweakContainer { + return func(cnr *api.Container) { + cnr.Env = env + } +} + func SetContainerPorts(ports ...api.ContainerPort) TweakContainer { return func(cnr *api.Container) { cnr.Ports = ports diff --git a/pkg/apis/core/validation/validation.go b/pkg/apis/core/validation/validation.go index b77b70937b390..58428e58ab0d5 100644 --- a/pkg/apis/core/validation/validation.go +++ b/pkg/apis/core/validation/validation.go @@ -68,6 +68,7 @@ import ( "k8s.io/kubernetes/pkg/capabilities" "k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/pkg/fieldpath" + "k8s.io/utils/cpuset" ) const isNegativeErrorMsg string = apimachineryvalidation.IsNegativeErrorMsg @@ -6315,6 +6316,7 @@ func ValidatePodResize(newPod, oldPod *core.Pod, opts PodValidationOptions) fiel var newContainers []core.Container for ix, container := range newPodSpecCopy.Containers { dropCPUMemoryResourcesFromContainer(&container, &oldPod.Spec.Containers[ix]) + allErrs = append(allErrs, dropMustKeepCPUsEnvFromContainer(&container, &oldPod.Spec.Containers[ix], specPath)...) if !apiequality.Semantic.DeepEqual(container, oldPod.Spec.Containers[ix]) { // This likely means that the user has made changes to resources other than CPU and memory for regular container. errs := field.Forbidden(specPath, "only cpu and memory resources are mutable") @@ -6490,6 +6492,52 @@ func dropCPUMemoryResourceRequirementsUpdates(resources *core.ResourceRequiremen return resources } +func removeEnvVar(envs []core.EnvVar, nameToRemove string) []core.EnvVar { + var newEnvs []core.EnvVar + for _, env := range envs { + if env.Name != nameToRemove { + newEnvs = append(newEnvs, env) + } + } + return newEnvs +} + +// dropMustKeepCPUsEnvFromContainer deletes the "mustKeepCPUs" in env from the container, and copies them from the old pod container resources if present. +func dropMustKeepCPUsEnvFromContainer(container *core.Container, oldPodSpecContainer *core.Container, fldPath *field.Path) field.ErrorList { + allErrs := field.ErrorList{} + // the element named "mustKeepCPUs" in env can be update or add + existNewMustKeepCPUs := false + existOldMustKeepCPUs := false + for jx, newEnv := range container.Env { + if newEnv.Name == "mustKeepCPUs" { + existNewMustKeepCPUs = true + _, err := cpuset.Parse(newEnv.Value) + if err != nil { + allErrs = append(allErrs, field.Invalid(fldPath, newEnv, "Check mustKeepCPUs format, only number \",\" and \"-\" are allowed")) + } + // Change mustKeepCPUs + for _, oldEnv := range oldPodSpecContainer.Env { + if oldEnv.Name == "mustKeepCPUs" { + existOldMustKeepCPUs = true + container.Env[jx] = oldEnv + break + } + } + // Add mustKeepCPUs + if !existOldMustKeepCPUs && (len(container.Env)-len(oldPodSpecContainer.Env)) == 1 { + // Delete "mustKeepCPUs" in newPod to make newPod equal to oldPod + container.Env = removeEnvVar(container.Env, "mustKeepCPUs") + } + break + } + } + // Delete mustKeepCPUs + if !existNewMustKeepCPUs && (len(oldPodSpecContainer.Env)-len(container.Env)) == 1 { + oldPodSpecContainer.Env = removeEnvVar(oldPodSpecContainer.Env, "mustKeepCPUs") + } + return allErrs +} + // isPodResizeRequestSupported checks whether the pod is running on a node with InPlacePodVerticalScaling enabled. func isPodResizeRequestSupported(pod core.Pod) bool { // TODO: Remove this after GA+3 releases of InPlacePodVerticalScaling diff --git a/pkg/apis/core/validation/validation_test.go b/pkg/apis/core/validation/validation_test.go index 7ead1313f4dcd..f0e54d1618604 100644 --- a/pkg/apis/core/validation/validation_test.go +++ b/pkg/apis/core/validation/validation_test.go @@ -28205,6 +28205,46 @@ func TestValidatePodResize(t *testing.T) { })) } + mkPodWith1Env := func(envName1, envValue1 string, tweaks ...podtest.Tweak) *core.Pod { + return podtest.MakePod("pod", append(tweaks, + podtest.SetContainers( + podtest.MakeContainer( + "container", + podtest.SetContainerEnv( + []core.EnvVar{ + { + Name: envName1, + Value: envValue1, + }, + }, + ), + ), + ), + )...) + } + + mkPodWith2Env := func(envName1, envValue1, envName2, envValue2 string, tweaks ...podtest.Tweak) *core.Pod { + return podtest.MakePod("pod", append(tweaks, + podtest.SetContainers( + podtest.MakeContainer( + "container", + podtest.SetContainerEnv( + []core.EnvVar{ + { + Name: envName1, + Value: envValue1, + }, + { + Name: envName2, + Value: envValue2, + }, + }, + ), + ), + ), + )...) + } + tests := []struct { test string old *core.Pod @@ -28801,6 +28841,48 @@ func TestValidatePodResize(t *testing.T) { )), err: "spec: Forbidden: only cpu and memory resources are mutable", }, + { + test: "Pod env:mustKeepCPUs change value", + old: mkPodWith2Env("env1", "a", "mustKeepCPUs", "0"), + new: mkPodWith2Env("env1", "a", "mustKeepCPUs", "1"), + err: "", + }, + { + test: "Pod env:mustKeepCPUs add value", + old: mkPodWith1Env("env1", "a"), + new: mkPodWith2Env("env1", "a", "mustKeepCPUs", "1"), + err: "", + }, + { + test: "Pod env:mustKeepCPUs delete", + old: mkPodWith2Env("env1", "a", "mustKeepCPUs", "1"), + new: mkPodWith1Env("env1", "a"), + err: "", + }, + { + test: "Pod env:env1 change is forbidden", + old: mkPodWith2Env("env1", "a", "mustKeepCPUs", "0"), + new: mkPodWith2Env("env1", "b", "mustKeepCPUs", "0"), + err: "spec: Forbidden: only cpu and memory resources are mutable", + }, + { + test: "Pod env:env1 add is forbidden", + old: mkPodWith1Env("mustKeepCPUs", "0"), + new: mkPodWith2Env("env1", "a", "mustKeepCPUs", "1"), + err: "spec: Forbidden: only cpu and memory resources are mutable", + }, + { + test: "Pod env:env1 delete is forbidden", + old: mkPodWith2Env("env1", "a", "mustKeepCPUs", "1"), + new: mkPodWith1Env("mustKeepCPUs", "0"), + err: "spec: Forbidden: only cpu and memory resources are mutable", + }, + { + test: "Pod env:mustKeepCPUs delete", + old: mkPodWith2Env("env1", "a", "mustKeepCPUs", "1"), + new: mkPodWith2Env("env1", "a", "mustKeepCPUs", "1s2"), + err: "Check mustKeepCPUs format, only number \",\" and \"-\" are allowed", + }, } for _, test := range tests { diff --git a/pkg/kubelet/cm/cpumanager/cpu_assignment.go b/pkg/kubelet/cm/cpumanager/cpu_assignment.go index fc22bbb9318ba..e33ae28e80711 100644 --- a/pkg/kubelet/cm/cpumanager/cpu_assignment.go +++ b/pkg/kubelet/cm/cpumanager/cpu_assignment.go @@ -95,6 +95,11 @@ type numaOrSocketsFirstFuncs interface { sortAvailableNUMANodes() []int sortAvailableSockets() []int sortAvailableCores() []int + takeFullFirstLevelForResize() + takeFullSecondLevelForResize() + sortAvailableNUMANodesForResize() []int + sortAvailableSocketsForResize() []int + sortAvailableCoresForResize() []int } type numaFirst struct{ acc *cpuAccumulator } @@ -204,8 +209,145 @@ func (s *socketsFirst) sortAvailableCores() []int { return result } +// If NUMA nodes are higher in the memory hierarchy than sockets, then we take +// from the set of NUMA Nodes as the first level for resize. +func (n *numaFirst) takeFullFirstLevelForResize() { + n.acc.takeRemainCpusForFullNUMANodes() +} + +// If NUMA nodes are higher in the memory hierarchy than sockets, then we take +// from the set of sockets as the second level for resize. +func (n *numaFirst) takeFullSecondLevelForResize() { + n.acc.takeRemainCpusForFullSockets() +} + +// If NUMA nodes are higher in the memory hierarchy than sockets, then return the available NUMA nodes +// which have allocated CPUs to Container. +func (n *numaFirst) sortAvailableNUMANodesForResize() []int { + allocatedNumaNodesSet := n.acc.resultDetails.NUMANodes() + availableNumaNodesSet := n.acc.details.NUMANodes() + numas := allocatedNumaNodesSet.Intersection(availableNumaNodesSet).UnsortedList() + n.acc.sort(numas, n.acc.details.CPUsInNUMANodes) + return numas +} + +// If NUMA nodes are higher in the memory hierarchy than sockets, +// Firstly, pull the socket which are allocated CPUs to the Container +// Secondly, pull the other sockets which are not allocated CPUs to the Container, but contains in the NUMA node which are allocated CPUs to the Container +func (n *numaFirst) sortAvailableSocketsForResize() []int { + var result []int + + // Sort allocated sockets + allocatedSocketsSet := n.acc.resultDetails.Sockets() + availableSocketsSet := n.acc.details.Sockets() + allocatedSockets := allocatedSocketsSet.Intersection(availableSocketsSet).UnsortedList() + n.acc.sort(allocatedSockets, n.acc.details.CPUsInSockets) + result = append(result, allocatedSockets...) + + // Sort the sockets in allocated numa node, but not allocated CPU on these sockets + for _, numa := range n.sortAvailableNUMANodesForResize() { + socketSet := n.acc.details.SocketsInNUMANodes(numa) + sockets := socketSet.Difference(allocatedSocketsSet).UnsortedList() + n.acc.sort(sockets, n.acc.details.CPUsInSockets) + result = append(result, sockets...) + } + return result +} + +// If NUMA nodes are higher in the memory hierarchy than sockets, +// Firstly, pull the cores which are allocated CPUs to the Container +// Secondly, pull the other cores which are not allocated CPUs to the Container, but contains in the NUMA node which are allocated CPUs to the Container +func (n *numaFirst) sortAvailableCoresForResize() []int { + var result []int + + // Sort allocated cores + allocatedCoresSet := n.acc.resultDetails.Cores() + availableCoresSet := n.acc.details.Cores() + allocatedCores := allocatedCoresSet.Intersection(availableCoresSet).UnsortedList() + n.acc.sort(allocatedCores, n.acc.details.CPUsInCores) + result = append(result, allocatedCores...) + + // Sort the cores in allocated sockets, and allocated numa, but not allocated CPU on these sockets and numa + for _, socket := range n.acc.sortAvailableSocketsForResize() { + coresSet := n.acc.details.CoresInSockets(socket) + cores := coresSet.Difference(allocatedCoresSet).UnsortedList() + n.acc.sort(cores, n.acc.details.CPUsInCores) + result = append(result, cores...) + } + return result +} + +// If sockets are higher in the memory hierarchy than NUMA nodes, then we take +// from the set of NUMA Nodes as the first level for resize. +func (s *socketsFirst) takeFullFirstLevelForResize() { + s.acc.takeRemainCpusForFullSockets() +} + +// If sockets are higher in the memory hierarchy than NUMA nodes, then we take +// from the set of sockets as the second level for resize. +func (s *socketsFirst) takeFullSecondLevelForResize() { + s.acc.takeRemainCpusForFullNUMANodes() +} + +// If sockets are higher in the memory hierarchy than NUMA nodes, +// Firstly, pull the NUMA nodes which are allocated CPUs to the Container +// Secondly, pull the other NUMA nodes which are not allocated CPUs to the Container, but contains in the sockets which are allocated CPUs to the Container +func (s *socketsFirst) sortAvailableNUMANodesForResize() []int { + var result []int + + // Sort allocated sockets + allocatedNUMANodesSet := s.acc.resultDetails.NUMANodes() + availableNUMANodesSet := s.acc.details.NUMANodes() + allocatedNUMANodes := allocatedNUMANodesSet.Intersection(availableNUMANodesSet).UnsortedList() + s.acc.sort(allocatedNUMANodes, s.acc.details.CPUsInNUMANodes) + result = append(result, allocatedNUMANodes...) + + // Sort the sockets in allocated numa node, but not allocated CPU on these sockets + for _, socket := range s.sortAvailableSocketsForResize() { + NUMANodesSet := s.acc.details.NUMANodesInSockets(socket) + NUMANodes := NUMANodesSet.Difference(allocatedNUMANodesSet).UnsortedList() + s.acc.sort(NUMANodes, s.acc.details.CPUsInNUMANodes) + result = append(result, NUMANodes...) + } + return result +} + +// If sockets are higher in the memory hierarchy than NUMA nodes, then return the available sockets +// which have allocated CPUs to Container. +func (s *socketsFirst) sortAvailableSocketsForResize() []int { + allocatedSocketsSet := s.acc.resultDetails.Sockets() + availableSocketsSet := s.acc.details.Sockets() + sockets := allocatedSocketsSet.Intersection(availableSocketsSet).UnsortedList() + s.acc.sort(sockets, s.acc.details.CPUsInSockets) + return sockets +} + +// If sockets are higher in the memory hierarchy than NUMA nodes, +// Firstly, pull the cores which are allocated CPUs to the Container +// Secondly, pull the other cores which are not allocated CPUs to the Container, but contains in the socket which are allocated CPUs to the Container +func (s *socketsFirst) sortAvailableCoresForResize() []int { + var result []int + + // Sort allocated cores + allocatedCoresSet := s.acc.resultDetails.Cores() + availableCoresSet := s.acc.details.Cores() + allocatedCores := allocatedCoresSet.Intersection(availableCoresSet).UnsortedList() + s.acc.sort(allocatedCores, s.acc.details.CPUsInCores) + result = append(result, allocatedCores...) + + // Sort the cores in allocated sockets, and allocated numa, but not allocated CPU on these sockets and numa + for _, NUMANode := range s.acc.sortAvailableNUMANodesForResize() { + coresSet := s.acc.details.CoresInNUMANodes(NUMANode) + cores := coresSet.Difference(allocatedCoresSet).UnsortedList() + s.acc.sort(cores, s.acc.details.CPUsInCores) + result = append(result, cores...) + } + return result +} + type availableCPUSorter interface { sort() []int + sortForResize() []int } type sortCPUsPacked struct{ acc *cpuAccumulator } @@ -222,6 +364,14 @@ func (s sortCPUsSpread) sort() []int { return s.acc.sortAvailableCPUsSpread() } +func (s sortCPUsPacked) sortForResize() []int { + return s.acc.sortAvailableCPUsPackedForResize() +} + +func (s sortCPUsSpread) sortForResize() []int { + return s.acc.sortAvailableCPUsSpreadForResize() +} + // CPUSortingStrategy describes the CPU sorting solution within the socket scope. // Using topoDualSocketHT (12 CPUs, 2 sockets, 6 cores) as an example: // @@ -289,6 +439,9 @@ type cpuAccumulator struct { // cardinality equal to the total number of CPUs to accumulate. result cpuset.CPUSet + // `resultDetails` is the set of allocated CPUs in `result` + resultDetails topology.CPUDetails + numaOrSocketsFirst numaOrSocketsFirstFuncs // availableCPUSorter is used to control the cpu sorting result. @@ -305,6 +458,7 @@ func newCPUAccumulator(topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, details: topo.CPUDetails.KeepOnly(availableCPUs), numCPUsNeeded: numCPUs, result: cpuset.New(), + resultDetails: topo.CPUDetails.KeepOnly(cpuset.New()), } if reusableCPUsForResize != nil { @@ -432,6 +586,21 @@ func (a *cpuAccumulator) freeCPUs() []int { return a.availableCPUSorter.sort() } +// Return true if this numa only allocated CPUs for this Container +func (a *cpuAccumulator) isFullNUMANodeForResize(numaID int) bool { + return a.resultDetails.CPUsInNUMANodes(numaID).Size()+a.details.CPUsInNUMANodes(numaID).Size() == a.topo.CPUDetails.CPUsInNUMANodes(numaID).Size() +} + +// Return true if this Socket only allocated CPUs for this Container +func (a *cpuAccumulator) isFullSocketForResize(socketID int) bool { + return a.resultDetails.CPUsInSockets(socketID).Size()+a.details.CPUsInSockets(socketID).Size() == a.topo.CPUsPerSocket() +} + +// return true if this Socket only allocated CPUs for this Container +func (a *cpuAccumulator) isFullCoreForResize(coreID int) bool { + return a.resultDetails.CPUsInCores(coreID).Size()+a.details.CPUsInCores(coreID).Size() == a.topo.CPUsPerCore() +} + // Sorts the provided list of NUMA nodes/sockets/cores/cpus referenced in 'ids' // by the number of available CPUs contained within them (smallest to largest). // The 'getCPU()' parameter defines the function that should be called to @@ -561,8 +730,108 @@ func (a *cpuAccumulator) sortAvailableCPUsSpread() []int { return result } +// Sort all NUMA nodes with at least one free CPU. +// +// If NUMA nodes are higher than sockets in the memory hierarchy, they are sorted by ascending number +// of free CPUs that they contain. "higher than sockets in the memory hierarchy" means that NUMA nodes +// contain a bigger number of CPUs (free and busy) than sockets, or equivalently that each NUMA node +// contains more than one socket. +// +// If instead NUMA nodes are lower in the memory hierarchy than sockets, they are sorted as follows. +// First part, sort the NUMA nodes which contains the CPUs allocated to Container. and these NUMA nodes +// are sorted by number of free CPUs that they contain. +// Second part, sort the NUMA nodes contained in the sockets which contains the CPUs allocated to Container, +// but exclude the NUMA nodes in first part. these NUMA nodes sorted by the rule as below +// +// First, they are sorted by number of free CPUs in the sockets that contain them. Then, for each +// socket they are sorted by number of free CPUs that they contain. The order is always ascending. +func (a *cpuAccumulator) sortAvailableNUMANodesForResize() []int { + return a.numaOrSocketsFirst.sortAvailableNUMANodesForResize() +} + +// Sort all sockets with at least one free CPU. +// +// If sockets are higher than NUMA nodes in the memory hierarchy, they are sorted by ascending number +// of free CPUs that they contain. "higher than NUMA nodes in the memory hierarchy" means that +// sockets contain a bigger number of CPUs (free and busy) than NUMA nodes, or equivalently that each +// socket contains more than one NUMA node. +// +// If instead sockets are lower in the memory hierarchy than NUMA nodes, they are sorted as follows. +// First part, sort the sockets which contains the CPUs allocated to Container. and these sockets +// are sorted by number of free CPUs that they contain. +// Second part, sort the sockets contained in the NUMA nodes which contains the CPUs allocated to Container, +// but exclude the sockets in first part. these sockets sorted by the rule as below +// +// First, they are sorted by number of free CPUs in the NUMA nodes that contain them. Then, for each +// NUMA node they are sorted by number of free CPUs that they contain. The order is always ascending. +func (a *cpuAccumulator) sortAvailableSocketsForResize() []int { + return a.numaOrSocketsFirst.sortAvailableSocketsForResize() +} + +// Sort all cores with at least one free CPU. +// +// If sockets are higher in the memory hierarchy than NUMA nodes, meaning that sockets contain a +// bigger number of CPUs (free and busy) than NUMA nodes, or equivalently that each socket contains +// more than one NUMA node, the cores are sorted as follows. +// First part, sort the cores which contains the CPUs allocated to Container. and these cores +// are sorted by number of free CPUs that they contain. +// Second part, sort the cores contained in the NUMA nodes which contains the CPUs allocated to Container, +// but exclude the cores in first part. these cores sorted by the rule as below +// First, they are sorted by number of +// free CPUs that their sockets contain. Then, for each socket, the cores in it are sorted by number +// of free CPUs that their NUMA nodes contain. Then, for each NUMA node, the cores in it are sorted +// by number of free CPUs that they contain. The order is always ascending. + +// If instead NUMA nodes are higher in the memory hierarchy than sockets, the sorting happens in the +// same way as described in the previous paragraph. +func (a *cpuAccumulator) sortAvailableCoresForResize() []int { + return a.numaOrSocketsFirst.sortAvailableCoresForResize() +} + +// Sort all free CPUs. +// +// If sockets are higher in the memory hierarchy than NUMA nodes, meaning that sockets contain a +// bigger number of CPUs (free and busy) than NUMA nodes, or equivalently that each socket contains +// more than one NUMA node, the CPUs are sorted as follows. +// First part, sort the cores which contains the CPUs allocated to Container. and these cores +// are sorted by number of free CPUs that they contain. for each core, the CPUs in it are +// sorted by numerical ID. +// Second part, sort the cores contained in the NUMA nodes which contains the CPUs allocated to Container, +// but exclude the cores in first part. these cores sorted by the rule as below +// First, they are sorted by number of +// free CPUs that their sockets contain. Then, for each socket, the CPUs in it are sorted by number +// of free CPUs that their NUMA nodes contain. Then, for each NUMA node, the CPUs in it are sorted +// by number of free CPUs that their cores contain. Finally, for each core, the CPUs in it are +// sorted by numerical ID. The order is always ascending. +// +// If instead NUMA nodes are higher in the memory hierarchy than sockets, the sorting happens in the +// same way as described in the previous paragraph. +func (a *cpuAccumulator) sortAvailableCPUsPackedForResize() []int { + var result []int + for _, core := range a.sortAvailableCoresForResize() { + cpus := a.details.CPUsInCores(core).UnsortedList() + sort.Ints(cpus) + result = append(result, cpus...) + } + return result +} + +// Sort all available CPUs: +// - First by core using sortAvailableSocketsForResize(). +// - Then within each socket, sort cpus directly using the sort() algorithm defined above. +func (a *cpuAccumulator) sortAvailableCPUsSpreadForResize() []int { + var result []int + for _, socket := range a.sortAvailableSocketsForResize() { + cpus := a.details.CPUsInSockets(socket).UnsortedList() + sort.Ints(cpus) + result = append(result, cpus...) + } + return result +} + func (a *cpuAccumulator) take(cpus cpuset.CPUSet) { a.result = a.result.Union(cpus) + a.resultDetails = a.topo.CPUDetails.KeepOnly(a.result) a.details = a.details.KeepOnly(a.details.CPUs().Difference(a.result)) a.numCPUsNeeded -= cpus.Size() } @@ -684,6 +953,55 @@ func (a *cpuAccumulator) takeRemainingCPUs() { } } +func (a *cpuAccumulator) takeRemainCpusForFullNUMANodes() { + for _, numa := range a.sortAvailableNUMANodesForResize() { + if a.isFullNUMANodeForResize(numa) { + cpusInNUMANode := a.details.CPUsInNUMANodes(numa) + if !a.needsAtLeast(cpusInNUMANode.Size()) { + continue + } + klog.V(4).InfoS("takeRemainCpusForFullNUMANodes: claiming NUMA node", "numa", numa, "cpusInNUMANode", cpusInNUMANode) + a.take(cpusInNUMANode) + } + } +} + +func (a *cpuAccumulator) takeRemainCpusForFullSockets() { + for _, socket := range a.sortAvailableSocketsForResize() { + if a.isFullSocketForResize(socket) { + cpusInSocket := a.details.CPUsInSockets(socket) + if !a.needsAtLeast(cpusInSocket.Size()) { + continue + } + klog.V(4).InfoS("takeRemainCpusForFullSockets: claiming Socket", "socket", socket, "cpusInSocket", cpusInSocket) + a.take(cpusInSocket) + } + } +} + +func (a *cpuAccumulator) takeRemainCpusForFullCores() { + for _, core := range a.sortAvailableCoresForResize() { + if a.isFullCoreForResize(core) { + cpusInCore := a.details.CPUsInCores(core) + if !a.needsAtLeast(cpusInCore.Size()) { + continue + } + klog.V(4).InfoS("takeRemainCpusForFullCores: claiming Core", "core", core, "cpusInCore", cpusInCore) + a.take(cpusInCore) + } + } +} + +func (a *cpuAccumulator) takeRemainingCPUsForResize() { + for _, cpu := range a.availableCPUSorter.sortForResize() { + klog.V(4).InfoS("takeRemainingCPUsForResize: claiming CPU", "cpu", cpu) + a.take(cpuset.New(cpu)) + if a.isSatisfied() { + return + } + } +} + // rangeNUMANodesNeededToSatisfy returns minimum and maximum (in this order) number of NUMA nodes // needed to satisfy the cpuAccumulator's goal of accumulating `a.numCPUsNeeded` CPUs, assuming that // CPU groups have size given by the `cpuGroupSize` argument. @@ -832,10 +1150,18 @@ func takeByTopologyNUMAPacked(topo *topology.CPUTopology, availableCPUs cpuset.C // requires at least a NUMA node or socket's-worth of CPUs. If NUMA // Nodes map to 1 or more sockets, pull from NUMA nodes first. // Otherwise pull from sockets first. + acc.numaOrSocketsFirst.takeFullFirstLevelForResize() + if acc.isSatisfied() { + return acc.result, nil + } acc.numaOrSocketsFirst.takeFullFirstLevel() if acc.isSatisfied() { return acc.result, nil } + acc.numaOrSocketsFirst.takeFullSecondLevelForResize() + if acc.isSatisfied() { + return acc.result, nil + } acc.numaOrSocketsFirst.takeFullSecondLevel() if acc.isSatisfied() { return acc.result, nil @@ -855,6 +1181,10 @@ func takeByTopologyNUMAPacked(topo *topology.CPUTopology, availableCPUs cpuset.C // a core's-worth of CPUs. // If `CPUSortingStrategySpread` is specified, skip taking the whole core. if cpuSortingStrategy != CPUSortingStrategySpread { + acc.takeRemainCpusForFullCores() + if acc.isSatisfied() { + return acc.result, nil + } acc.takeFullCores() if acc.isSatisfied() { return acc.result, nil @@ -864,6 +1194,10 @@ func takeByTopologyNUMAPacked(topo *topology.CPUTopology, availableCPUs cpuset.C // 4. Acquire single threads, preferring to fill partially-allocated cores // on the same sockets as the whole cores we have already taken in this // allocation. + acc.takeRemainingCPUsForResize() + if acc.isSatisfied() { + return acc.result, nil + } acc.takeRemainingCPUs() if acc.isSatisfied() { return acc.result, nil @@ -954,7 +1288,7 @@ func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpu } // Otherwise build an accumulator to start allocating CPUs from. - acc := newCPUAccumulator(topo, availableCPUs, numCPUs, cpuSortingStrategy, reusableCPUsForResize, mustKeepCPUsForScaleDown) + acc := newCPUAccumulator(topo, availableCPUs, numCPUs, cpuSortingStrategy, nil, mustKeepCPUsForScaleDown) if acc.isSatisfied() { return acc.result, nil } @@ -963,11 +1297,23 @@ func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpu } // Get the list of NUMA nodes represented by the set of CPUs in 'availableCPUs'. numas := acc.sortAvailableNUMANodes() + reusableCPUsForResizeDetail := acc.topo.CPUDetails.KeepOnly(cpuset.New()) + allocatedCPUsNumber := 0 + if reusableCPUsForResize != nil { + reusableCPUsForResizeDetail = acc.topo.CPUDetails.KeepOnly(*reusableCPUsForResize) + allocatedCPUsNumber = reusableCPUsForResize.Size() + } + allocatedNumas := reusableCPUsForResizeDetail.NUMANodes() + allocatedCPUPerNuma := make(mapIntInt, len(numas)) + for _, numa := range numas { + allocatedCPUPerNuma[numa] = reusableCPUsForResizeDetail.CPUsInNUMANodes(numa).Size() + } // Calculate the minimum and maximum possible number of NUMA nodes that // could satisfy this request. This is used to optimize how many iterations // of the loop we need to go through below. minNUMAs, maxNUMAs := acc.rangeNUMANodesNeededToSatisfy(cpuGroupSize) + minNUMAs = max(minNUMAs, allocatedNumas.Size()) // Try combinations of 1,2,3,... NUMA nodes until we find a combination // where we can evenly distribute CPUs across them. To optimize things, we @@ -987,10 +1333,16 @@ func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpu return Break } + // Check if the 'allocatedNumas' CPU set is a subset of the 'comboSet' + comboSet := cpuset.New(combo...) + if !allocatedNumas.IsSubsetOf(comboSet) { + return Continue + } + // Check that this combination of NUMA nodes has enough CPUs to // satisfy the allocation overall. cpus := acc.details.CPUsInNUMANodes(combo...) - if cpus.Size() < numCPUs { + if (cpus.Size() + allocatedCPUsNumber) < numCPUs { return Continue } @@ -998,7 +1350,7 @@ func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpu // 'cpuGroupSize' across the NUMA nodes in this combo. numCPUGroups := 0 for _, numa := range combo { - numCPUGroups += (acc.details.CPUsInNUMANodes(numa).Size() / cpuGroupSize) + numCPUGroups += ((acc.details.CPUsInNUMANodes(numa).Size() + allocatedCPUPerNuma[numa]) / cpuGroupSize) } if (numCPUGroups * cpuGroupSize) < numCPUs { return Continue @@ -1010,7 +1362,10 @@ func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpu distribution := (numCPUs / len(combo) / cpuGroupSize) * cpuGroupSize for _, numa := range combo { cpus := acc.details.CPUsInNUMANodes(numa) - if cpus.Size() < distribution { + if (cpus.Size() + allocatedCPUPerNuma[numa]) < distribution { + return Continue + } + if allocatedCPUPerNuma[numa] > distribution { return Continue } } @@ -1025,7 +1380,7 @@ func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpu availableAfterAllocation[numa] = acc.details.CPUsInNUMANodes(numa).Size() } for _, numa := range combo { - availableAfterAllocation[numa] -= distribution + availableAfterAllocation[numa] -= (distribution - allocatedCPUPerNuma[numa]) } // Check if there are any remaining CPUs to distribute across the @@ -1132,7 +1487,8 @@ func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpu // size 'cpuGroupSize' from 'bestCombo'. distribution := (numCPUs / len(bestCombo) / cpuGroupSize) * cpuGroupSize for _, numa := range bestCombo { - cpus, _ := takeByTopologyNUMAPacked(acc.topo, acc.details.CPUsInNUMANodes(numa), distribution, cpuSortingStrategy, false, reusableCPUsForResize, mustKeepCPUsForScaleDown) + reusableCPUsPerNumaForResize := reusableCPUsForResizeDetail.CPUsInNUMANodes(numa) + cpus, _ := takeByTopologyNUMAPacked(acc.topo, acc.details.CPUsInNUMANodes(numa), distribution, cpuSortingStrategy, false, &reusableCPUsPerNumaForResize, mustKeepCPUsForScaleDown) acc.take(cpus) } @@ -1147,7 +1503,7 @@ func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpu if acc.details.CPUsInNUMANodes(numa).Size() < cpuGroupSize { continue } - cpus, _ := takeByTopologyNUMAPacked(acc.topo, acc.details.CPUsInNUMANodes(numa), cpuGroupSize, cpuSortingStrategy, false, reusableCPUsForResize, mustKeepCPUsForScaleDown) + cpus, _ := takeByTopologyNUMAPacked(acc.topo, acc.details.CPUsInNUMANodes(numa), cpuGroupSize, cpuSortingStrategy, false, nil, mustKeepCPUsForScaleDown) acc.take(cpus) remainder -= cpuGroupSize } @@ -1172,4 +1528,4 @@ func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpu // If we never found a combination of NUMA nodes that we could properly // distribute CPUs across, fall back to the packing algorithm. return takeByTopologyNUMAPacked(topo, availableCPUs, numCPUs, cpuSortingStrategy, false, reusableCPUsForResize, mustKeepCPUsForScaleDown) -} +} \ No newline at end of file diff --git a/pkg/kubelet/cm/cpumanager/cpu_assignment_test.go b/pkg/kubelet/cm/cpumanager/cpu_assignment_test.go index 34a768f6f60ea..e242a34733a2f 100644 --- a/pkg/kubelet/cm/cpumanager/cpu_assignment_test.go +++ b/pkg/kubelet/cm/cpumanager/cpu_assignment_test.go @@ -1080,6 +1080,473 @@ func TestTakeByTopologyNUMADistributed(t *testing.T) { } } +type takeByTopologyTestCaseForResize struct { + description string + topo *topology.CPUTopology + opts StaticPolicyOptions + availableCPUs cpuset.CPUSet + reusableCPUs cpuset.CPUSet + numCPUs int + expErr string + expResult cpuset.CPUSet +} + +func commonTakeByTopologyTestCasesForResize(t *testing.T) []takeByTopologyTestCaseForResize { + return []takeByTopologyTestCaseForResize{ + { + "Allocated 1 CPUs, and take 1 cpus from single socket with HT", + topoSingleSocketHT, + StaticPolicyOptions{}, + mustParseCPUSet(t, "1-7"), + cpuset.New(0), + 1, + "", + cpuset.New(0), + }, + { + "Allocated 1 CPU, and take 2 cpu from single socket with HT", + topoSingleSocketHT, + StaticPolicyOptions{}, + mustParseCPUSet(t, "1-7"), + cpuset.New(0), + 2, + "", + cpuset.New(0, 4), + }, + { + "Allocated 1 CPU, and take 2 cpu from single socket with HT, some cpus are taken, no sibling CPU of allocated CPU", + topoSingleSocketHT, + StaticPolicyOptions{}, + mustParseCPUSet(t, "1,3,5,6,7"), + cpuset.New(0), + 2, + "", + cpuset.New(0, 6), + }, + { + "Allocated 1 CPU, and take 3 cpu from single socket with HT, some cpus are taken, no sibling CPU of allocated CPU", + topoSingleSocketHT, + StaticPolicyOptions{}, + mustParseCPUSet(t, "1,3,5,6,7"), + cpuset.New(0), + 3, + "", + cpuset.New(0, 1, 5), + }, + { + "Allocated 1 CPU, and take all cpu from single socket with HT", + topoSingleSocketHT, + StaticPolicyOptions{}, + mustParseCPUSet(t, "1-7"), + cpuset.New(0), + 8, + "", + mustParseCPUSet(t, "0-7"), + }, + { + "Allocated 1 CPU, take a core from dual socket with HT", + topoDualSocketHT, + StaticPolicyOptions{}, + mustParseCPUSet(t, "0-10"), + cpuset.New(11), + 2, + "", + cpuset.New(5, 11), + }, + { + "Allocated 1 CPU, take a socket of cpus from dual socket with HT", + topoDualSocketHT, + StaticPolicyOptions{}, + mustParseCPUSet(t, "0-10"), + cpuset.New(11), + 6, + "", + cpuset.New(1, 3, 5, 7, 9, 11), + }, + { + "Allocated 1 CPU, take a socket of cpus and 1 core of CPU from dual socket with HT", + topoDualSocketHT, + StaticPolicyOptions{}, + mustParseCPUSet(t, "0-10"), + cpuset.New(11), + 8, + "", + cpuset.New(0, 1, 3, 5, 6, 7, 9, 11), + }, + { + "Allocated 1 CPU, take a socket of cpus from dual socket with multi-numa-per-socket with HT", + topoDualSocketMultiNumaPerSocketHT, + StaticPolicyOptions{}, + mustParseCPUSet(t, "0-38,40-79"), + cpuset.New(39), + 40, + "", + mustParseCPUSet(t, "20-39,60-79"), + }, + { + "Allocated 1 CPU, take a NUMA node of cpus from dual socket with multi-numa-per-socket with HT", + topoDualSocketMultiNumaPerSocketHT, + StaticPolicyOptions{}, + mustParseCPUSet(t, "0-38,40-79"), + cpuset.New(39), + 20, + "", + mustParseCPUSet(t, "30-39,70-79"), + }, + { + "Allocated 2 CPUs, take a socket and a NUMA node of cpus from dual socket with multi-numa-per-socket with HT", + topoDualSocketMultiNumaPerSocketHT, + StaticPolicyOptions{}, + mustParseCPUSet(t, "0-38,40-58,60-79"), + cpuset.New(39, 59), + 60, + "", + mustParseCPUSet(t, "0-19,30-59,70-79"), + }, + { + "Allocated 1 CPU, take NUMA nodes of cpus from dual socket with multi-numa-per-socket with HT, the NUMA node with allocated CPUs already taken some CPUs", + topoDualSocketMultiNumaPerSocketHT, + StaticPolicyOptions{}, + mustParseCPUSet(t, "0-38,40-69"), + cpuset.New(39), + 40, + "", + mustParseCPUSet(t, "0-9,20-29,39-48,60-69"), + }, + { + "Allocated 1 CPU, take NUMA nodes of cpus from dual socket with multi-numa-per-socket with HT, the NUMA node with allocated CPUs already taken more CPUs", + topoDualSocketMultiNumaPerSocketHT, + StaticPolicyOptions{}, + mustParseCPUSet(t, "9,30-38,49"), + cpuset.New(), + 1, + "", + mustParseCPUSet(t, "9"), + }, + { + "Allocated 1 CPU, take NUMA nodes of cpus and 1 CPU from dual socket with multi-numa-per-socket with HT, the NUMA node with allocated CPUs already taken some CPUs", + topoDualSocketMultiNumaPerSocketHT, + StaticPolicyOptions{}, + mustParseCPUSet(t, "0-38,40-69"), + cpuset.New(39), + 41, + "", + mustParseCPUSet(t, "0-19,39-59"), + }, + { + "Allocated 1 CPUs, take a socket of cpus from single socket with HT, 3 cpus", + topoSingleSocketHT, + StaticPolicyOptions{DistributeCPUsAcrossCores: true}, + mustParseCPUSet(t, "0-6"), + cpuset.New(7), + 3, + "", + mustParseCPUSet(t, "0,1,7"), + }, + { + "Allocated 1 CPUs, take a socket of cpus from dual socket with HT, 3 cpus", + topoDualSocketHT, + StaticPolicyOptions{DistributeCPUsAcrossCores: true}, + mustParseCPUSet(t, "0-10"), + cpuset.New(11), + 3, + "", + mustParseCPUSet(t, "1,3,11"), + }, + { + "Allocated 1 CPUs, take a socket of cpus from dual socket with HT, 6 cpus", + topoDualSocketHT, + StaticPolicyOptions{DistributeCPUsAcrossCores: true}, + mustParseCPUSet(t, "0-10"), + cpuset.New(11), + 6, + "", + mustParseCPUSet(t, "1,3,5,7,9,11"), + }, + { + "Allocated 1 CPUs, take a socket of cpus from dual socket with HT, 8 cpus", + topoDualSocketHT, + StaticPolicyOptions{DistributeCPUsAcrossCores: true}, + mustParseCPUSet(t, "0-10"), + cpuset.New(11), + 8, + "", + mustParseCPUSet(t, "0,1,2,3,5,7,9,11"), + }, + { + "Allocated 1 CPUs, take a socket of cpus from dual socket without HT, 2 cpus", + topoDualSocketNoHT, + StaticPolicyOptions{DistributeCPUsAcrossCores: true}, + mustParseCPUSet(t, "0-6"), + cpuset.New(7), + 2, + "", + mustParseCPUSet(t, "4,7"), + }, + { + "Allocated 1 CPUs, take a socket of cpus from dual socket with multi numa per socket and HT, 8 cpus", + topoDualSocketMultiNumaPerSocketHT, + StaticPolicyOptions{DistributeCPUsAcrossCores: true}, + mustParseCPUSet(t, "0-38,40-79"), + cpuset.New(39), + 8, + "", + mustParseCPUSet(t, "20-26,39"), + }, + { + "Allocated 1 CPU, take NUMA nodes of cpus from dual socket with multi-numa-per-socket with HT, the NUMA node with allocated CPUs already taken some CPUs", + topoDualSocketMultiNumaPerSocketHT, + StaticPolicyOptions{DistributeCPUsAcrossCores: true}, + mustParseCPUSet(t, "0-38,40-69"), + cpuset.New(39), + 40, + "", + mustParseCPUSet(t, "0-9,20-39,60-69"), + }, + { + "Allocated 1 CPUs, take a socket of cpus from quad socket four way with HT, 12 cpus", + topoQuadSocketFourWayHT, + StaticPolicyOptions{DistributeCPUsAcrossCores: true}, + mustParseCPUSet(t, "0-59,61-287"), + cpuset.New(60), + 8, + "", + mustParseCPUSet(t, "3,4,11,12,15,16,23,60"), + }, + } +} + +func TestTakeByTopologyNUMAPackedForResize(t *testing.T) { + testCases := commonTakeByTopologyTestCasesForResize(t) + + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + strategy := CPUSortingStrategyPacked + if tc.opts.DistributeCPUsAcrossCores { + strategy = CPUSortingStrategySpread + } + + result, err := takeByTopologyNUMAPacked(tc.topo, tc.availableCPUs, tc.numCPUs, strategy, tc.opts.PreferAlignByUncoreCacheOption, &tc.reusableCPUs, nil) + + if tc.expErr != "" && err != nil && err.Error() != tc.expErr { + t.Errorf("expected error to be [%v] but it was [%v]", tc.expErr, err) + } + if !result.Equals(tc.expResult) { + t.Errorf("expected result [%s] to equal [%s]", result, tc.expResult) + } + }) + } +} + +type takeByTopologyExtendedTestCaseForResize struct { + description string + topo *topology.CPUTopology + availableCPUs cpuset.CPUSet + reusableCPUs cpuset.CPUSet + numCPUs int + cpuGroupSize int + expErr string + expResult cpuset.CPUSet +} + +func commonTakeByTopologyExtendedTestCasesForResize(t *testing.T) []takeByTopologyExtendedTestCaseForResize { + return []takeByTopologyExtendedTestCaseForResize{ + { + "Allocated 1 CPUs, allocate 4 full cores with 2 distributed across each NUMA node", + topoDualSocketHT, + mustParseCPUSet(t, "0-10"), + cpuset.New(11), + 8, + 1, + "", + mustParseCPUSet(t, "0,6,2,8,1,7,5,11"), + }, + { + "Allocated 8 CPUs, allocate 32 full cores with 8 distributed across each NUMA node", + topoDualSocketMultiNumaPerSocketHT, + mustParseCPUSet(t, "0-35,40-75"), + mustParseCPUSet(t, "36-39,76-79"), + 64, + 1, + "", + mustParseCPUSet(t, "0-7,10-17,20-27,30-33,36-39,40-47,50-57,60-67,70-73,76-79"), + }, + { + "Allocated 2 CPUs, allocate 8 full cores with 2 distributed across each NUMA node", + topoDualSocketMultiNumaPerSocketHT, + mustParseCPUSet(t, "2,10-12,20-22,30-32,40-41,50-51,60-61,70-71"), + mustParseCPUSet(t, "0,1"), + 16, + 1, + "", + mustParseCPUSet(t, "0-1,10-11,20-21,30-31,40-41,50-51,60-61,70-71"), + }, + { + "Allocated 1 CPUs, take 1 cpu from dual socket with HT - core from Socket 0", + topoDualSocketHT, + mustParseCPUSet(t, "0-10"), + mustParseCPUSet(t, "11"), + 1, + 1, + "", + mustParseCPUSet(t, "11"), + }, + { + "Allocated 1 CPUs, take 2 cpu from dual socket with HT - core from Socket 0", + topoDualSocketHT, + mustParseCPUSet(t, "0-10"), + mustParseCPUSet(t, "11"), + 2, + 1, + "", + mustParseCPUSet(t, "5,11"), + }, + { + "Allocated 2 CPUs, allocate 31 full cores with 15 CPUs distributed across each NUMA node and 1 CPU spilling over to each of NUMA 0, 1", + topoDualSocketMultiNumaPerSocketHT, + mustParseCPUSet(t, "2-79"), + mustParseCPUSet(t, "0,1"), + 62, + 1, + "", + mustParseCPUSet(t, "0-7,10-17,20-27,30-37,40-47,50-57,60-66,70-76"), + }, + { + "Allocated 2 CPUs, allocate 31 full cores with 14 CPUs distributed across each NUMA node and 2 CPUs spilling over to each of NUMA 0, 1, 2 (cpuGroupSize 2)", + topoDualSocketMultiNumaPerSocketHT, + mustParseCPUSet(t, "2-79"), + mustParseCPUSet(t, "0,1"), + 62, + 2, + "", + mustParseCPUSet(t, "0-7,10-17,20-27,30-36,40-47,50-57,60-67,70-76"), + }, + { + "Allocated 2 CPUs, allocate 31 full cores with 15 CPUs distributed across each NUMA node and 1 CPU spilling over to each of NUMA 2, 3 (to keep balance)", + topoDualSocketMultiNumaPerSocketHT, + mustParseCPUSet(t, "2-8,10-18,20-39,40-48,50-58,60-79"), + mustParseCPUSet(t, "0,1"), + 62, + 1, + "", + mustParseCPUSet(t, "0-7,10-17,20-27,30-37,40-46,50-56,60-67,70-77"), + }, + { + "Allocated 2 CPUs, allocate 31 full cores with 14 CPUs distributed across each NUMA node and 2 CPUs spilling over to each of NUMA 0, 2, 3 (to keep balance with cpuGroupSize 2)", + topoDualSocketMultiNumaPerSocketHT, + mustParseCPUSet(t, "2-8,10-18,20-39,40-48,50-58,60-79"), + mustParseCPUSet(t, "0,1"), + 62, + 2, + "", + mustParseCPUSet(t, "0-7,10-16,20-27,30-37,40-47,50-56,60-67,70-77"), + }, + { + "Allocated 4 CPUs, ensure bestRemainder chosen with NUMA nodes that have enough CPUs to satisfy the request", + topoDualSocketMultiNumaPerSocketHT, + mustParseCPUSet(t, "10-13,20-23,30-36,40-43,50-53,60-63,70-76"), + mustParseCPUSet(t, "0-3"), + 34, + 1, + "", + mustParseCPUSet(t, "0-3,10-13,20-23,30-34,40-43,50-53,60-63,70-74"), + }, + { + "Allocated 4 CPUs, ensure previous failure encountered on live machine has been fixed (1/1)", + topoDualSocketMultiNumaPerSocketHTLarge, + mustParseCPUSet(t, "0,128,30,31,158,159,47,171-175,62,63,190,191,75-79,203-207,94,96,222,223,101-111,229-239,126,127,254,255"), + mustParseCPUSet(t, "43-46"), + 28, + 1, + "", + mustParseCPUSet(t, "43-47,75-79,96,101-105,171-174,203-206,229-232"), + }, + { + "Allocated 14 CPUs, allocate 24 full cores with 8 distributed across the first 3 NUMA nodes", + topoDualSocketMultiNumaPerSocketHT, + mustParseCPUSet(t, "8-39,48-79"), + mustParseCPUSet(t, "0-7,40-47"), + 48, + 1, + "", + mustParseCPUSet(t, "0-7,10-17,20-27,40-47,50-57,60-67"), + }, + { + "Allocated 20 CPUs, allocated CPUs in numa0 is bigger than distribute CPUs, allocated CPUs by takeByTopologyNUMAPacked", + topoDualSocketMultiNumaPerSocketHT, + mustParseCPUSet(t, "10-39,50-79"), + mustParseCPUSet(t, "0-9,40-49"), + 48, + 1, + "", + mustParseCPUSet(t, "0-23,40-63"), + }, + { + "Allocated 12 CPUs, allocate 24 full cores with 8 distributed across the first 3 NUMA nodes (taking all but 2 from the first NUMA node)", + topoDualSocketMultiNumaPerSocketHT, + mustParseCPUSet(t, "8-29,32-39,48-69,72-79"), + mustParseCPUSet(t, "1-7,41-47"), + 48, + 1, + "", + mustParseCPUSet(t, "1-8,10-17,20-27,41-48,50-57,60-67"), + }, + { + "Allocated 10 CPUs, allocate 24 full cores with 8 distributed across the first 3 NUMA nodes (even though all 8 could be allocated from the first NUMA node)", + topoDualSocketMultiNumaPerSocketHT, + mustParseCPUSet(t, "2-29,31-39,42-69,71-79"), + mustParseCPUSet(t, "2-7,42-47"), + 48, + 1, + "", + mustParseCPUSet(t, "2-9,10-17,20-27,42-49,50-57,60-67"), + }, + { + "Allocated 2 CPUs, allocate 13 full cores distributed across the 2 NUMA nodes", + topoDualSocketMultiNumaPerSocketHT, + mustParseCPUSet(t, "0-29,31-69,71-79"), + mustParseCPUSet(t, "30,70"), + 26, + 1, + "", + mustParseCPUSet(t, "20-26,30-36,60-65,70-75"), + }, + { + "Allocated 2 CPUs, allocate 13 full cores distributed across the 2 NUMA nodes (cpuGroupSize 2)", + topoDualSocketMultiNumaPerSocketHT, + mustParseCPUSet(t, "0-29,31-69,71-79"), + mustParseCPUSet(t, "30,70"), + 26, + 2, + "", + mustParseCPUSet(t, "20-25,30-36,60-65,70-76"), + }, + } +} + +func TestTakeByTopologyNUMADistributedForResize(t *testing.T) { + testCases := commonTakeByTopologyExtendedTestCasesForResize(t) + + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + + result, err := takeByTopologyNUMADistributed(tc.topo, tc.availableCPUs, tc.numCPUs, tc.cpuGroupSize, CPUSortingStrategyPacked, &tc.reusableCPUs, nil) + if err != nil { + if tc.expErr == "" { + t.Errorf("unexpected error [%v]", err) + } + if tc.expErr != "" && err.Error() != tc.expErr { + t.Errorf("expected error to be [%v] but it was [%v]", tc.expErr, err) + } + return + } + if !result.Equals(tc.expResult) { + t.Errorf("expected result [%s] to equal [%s]", result, tc.expResult) + } + }) + } +} + func mustParseCPUSet(t *testing.T, s string) cpuset.CPUSet { cpus, err := cpuset.Parse(s) if err != nil { diff --git a/pkg/kubelet/cm/cpumanager/policy_static.go b/pkg/kubelet/cm/cpumanager/policy_static.go index fe1dac2e40890..31ada05b24a23 100644 --- a/pkg/kubelet/cm/cpumanager/policy_static.go +++ b/pkg/kubelet/cm/cpumanager/policy_static.go @@ -453,7 +453,8 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai klog.InfoS("Topology Affinity", "pod", klog.KObj(pod), "containerName", container.Name, "affinity", hint) // Attempt new allocation ( reusing allocated CPUs ) according to the NUMA affinity contained in the hint // Since NUMA affinity container in the hint is unmutable already allocated CPUs pass the criteria - newallocatedcpuset, err := p.allocateCPUs(s, numCPUs, hint.NUMANodeAffinity, p.cpusToReuse[string(pod.UID)], &cpusInUseByPodContainerToResize, nil) + mustKeepCPUsForResize := p.GetMustKeepCPUs(container, cpuset) + newallocatedcpuset, err := p.allocateCPUs(s, numCPUs, hint.NUMANodeAffinity, p.cpusToReuse[string(pod.UID)], &cpusInUseByPodContainerToResize, mustKeepCPUsForResize) if err != nil { klog.ErrorS(err, "Static policy: Unable to allocate new CPUs", "pod", klog.KObj(pod), "containerName", container.Name, "numCPUs", numCPUs) return err @@ -505,6 +506,34 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai return nil } +func (p *staticPolicy) GetMustKeepCPUs(container *v1.Container, oldCpuset cpuset.CPUSet) *cpuset.CPUSet { + mustKeepCPUs := cpuset.New() + for _, envVar := range container.Env { + if envVar.Name == "mustKeepCPUs" { + mustKeepCPUsInEnv, err := cpuset.Parse(envVar.Value) + if err == nil && mustKeepCPUsInEnv.Size() != 0 { + mustKeepCPUs = oldCpuset.Intersection(mustKeepCPUsInEnv) + } + klog.InfoS("mustKeepCPUs ", "is", mustKeepCPUs) + if p.options.FullPhysicalCPUsOnly { + // mustKeepCPUs must be aligned to the physical core + if (mustKeepCPUs.Size() % 2) != 0 { + return nil + } + mustKeepCPUsDetail := p.topology.CPUDetails.KeepOnly(mustKeepCPUs) + mustKeepCPUsDetailCores := mustKeepCPUsDetail.Cores() + if (mustKeepCPUs.Size() / mustKeepCPUsDetailCores.Size()) != p.cpuGroupSize { + klog.InfoS("mustKeepCPUs is nil") + return nil + } + } + return &mustKeepCPUs + } + } + klog.InfoS("mustKeepCPUs is nil") + return nil +} + // getAssignedCPUsOfSiblings returns assigned cpus of given container's siblings(all containers other than the given container) in the given pod `podUID`. func getAssignedCPUsOfSiblings(s state.State, podUID string, containerName string) cpuset.CPUSet { assignments := s.GetCPUAssignments() diff --git a/pkg/registry/core/pod/strategy.go b/pkg/registry/core/pod/strategy.go index a77ad05142310..de33785d0e828 100644 --- a/pkg/registry/core/pod/strategy.go +++ b/pkg/registry/core/pod/strategy.go @@ -407,6 +407,7 @@ func dropNonResizeUpdatesForContainers(new, old []api.Container) []api.Container } oldCopyWithMergedResources[i].Resources = ctr.Resources oldCopyWithMergedResources[i].ResizePolicy = ctr.ResizePolicy + oldCopyWithMergedResources[i].Env = ctr.Env } return oldCopyWithMergedResources diff --git a/test/e2e/common/node/framework/podresize/resize.go b/test/e2e/common/node/framework/podresize/resize.go index e3836f012dfcb..c929750c330fc 100644 --- a/test/e2e/common/node/framework/podresize/resize.go +++ b/test/e2e/common/node/framework/podresize/resize.go @@ -62,6 +62,7 @@ type ResizableContainerInfo struct { RestartPolicy v1.ContainerRestartPolicy InitCtr bool CPUsAllowedListValue string + CPUsAllowedList string } func getTestResizePolicy(tcInfo ResizableContainerInfo) (resizePol []v1.ContainerResizePolicy) { @@ -518,11 +519,11 @@ func formatErrors(err error) error { func VerifyPodContainersCPUsAllowedListValue(f *framework.Framework, pod *v1.Pod, wantCtrs []ResizableContainerInfo) error { ginkgo.GinkgoHelper() - verifyCPUsAllowedListValue := func(cName, expectedCPUsAllowedListValue string) error { + verifyCPUsAllowedListValue := func(cName, expectedCPUsAllowedListValue string, expectedCPUsAllowedList string) error { mycmd := "grep Cpus_allowed_list /proc/self/status | cut -f2" calValue, _, err := e2epod.ExecCommandInContainerWithFullOutput(f, pod.Name, cName, "/bin/sh", "-c", mycmd) framework.Logf("Namespace %s Pod %s Container %s - looking for Cpus allowed list value %s in /proc/self/status", - pod.Namespace, pod.Name, cName, expectedCPUsAllowedListValue) + pod.Namespace, pod.Name, cName, calValue) if err != nil { return fmt.Errorf("failed to find expected value '%s' in container '%s' Cpus allowed list '/proc/self/status'", cName, expectedCPUsAllowedListValue) } @@ -532,13 +533,20 @@ func VerifyPodContainersCPUsAllowedListValue(f *framework.Framework, pod *v1.Pod if cpuTotalValue != expectedCPUsAllowedListValue { return fmt.Errorf("container '%s' cgroup value '%s' results to total CPUs '%s' not equal to expected '%s'", cName, calValue, cpuTotalValue, expectedCPUsAllowedListValue) } + if expectedCPUsAllowedList != "" { + cExpected, err := cpuset.Parse(expectedCPUsAllowedList) + framework.ExpectNoError(err, "failed parsing Cpus allowed list for cexpectedCPUset") + if !c.Equals(cExpected) { + return fmt.Errorf("container '%s' cgroup value '%s' results to total CPUs '%v' not equal to expected '%v'", cName, calValue, c, cExpected) + } + } return nil } for _, ci := range wantCtrs { if ci.CPUsAllowedListValue == "" { continue } - err := verifyCPUsAllowedListValue(ci.Name, ci.CPUsAllowedListValue) + err := verifyCPUsAllowedListValue(ci.Name, ci.CPUsAllowedListValue, ci.CPUsAllowedList) if err != nil { return err } diff --git a/test/e2e_node/pod_resize_test.go b/test/e2e_node/pod_resize_test.go index 4b2ad1144bd05..423c76c2d7928 100644 --- a/test/e2e_node/pod_resize_test.go +++ b/test/e2e_node/pod_resize_test.go @@ -20,6 +20,7 @@ import ( "context" "encoding/json" "fmt" + "strings" "strconv" "time" @@ -1735,3 +1736,736 @@ var _ = SIGDescribe("Pod InPlace Resize Container", framework.WithSerial(), func }*/ }) + +func doPodResizeExtendTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalScalingAllocatedStatusEnabled bool, isInPlacePodVerticalScalingExclusiveCPUsEnabled bool) { + f := framework.NewDefaultFramework("pod-resize-test") + f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged + var podClient *e2epod.PodClient + var oldCfg *kubeletconfig.KubeletConfiguration + ginkgo.BeforeEach(func(ctx context.Context) { + var err error + node := getLocalNode(ctx, f) + if framework.NodeOSDistroIs("windows") || e2enode.IsARM64(node) { + e2eskipper.Skipf("runtime does not support InPlacePodVerticalScaling -- skipping") + } + if isMultiNUMA() { + e2eskipper.Skipf("For simple test, only test one NUMA, multi NUMA -- skipping") + } + podClient = e2epod.NewPodClient(f) + if oldCfg == nil { + oldCfg, err = getCurrentKubeletConfig(ctx) + framework.ExpectNoError(err) + } + }) + + type testCase struct { + name string + containers []e2epod.ResizableContainerInfo + patchString string + expected []e2epod.ResizableContainerInfo + addExtendedResource bool + skipFlag bool + } + + setCPUsForTestCase := func(ctx context.Context, tests *testCase, fullPCPUsOnly string) { + cpuCap, _, _ := getLocalNodeCPUDetails(ctx, f) + firstContainerCpuset := cpuset.New() + firstAdditionCpuset := cpuset.New() + firstExpectedCpuset := cpuset.New() + secondContainerCpuset := cpuset.New() + secondAdditionCpuset := cpuset.New() + secondExpectedCpuset := cpuset.New() + + if tests.name == "1 Guaranteed QoS pod, one container - increase CPU & memory, FullPCPUsOnlyOption = false" { + if cpuCap < 2 { + tests.skipFlag = true + } + firstContainerCpuset = cpuset.New(1) + if isHTEnabled() { + cpuList := mustParseCPUSet(getCPUSiblingList(0)).List() + firstContainerCpuset = cpuset.New(cpuList[1]) + } + tests.containers[0].CPUsAllowedList = firstContainerCpuset.String() + + firstAdditionCpuset = cpuset.New(2) + if isHTEnabled() { + cpuList := mustParseCPUSet(getCPUSiblingList(1)).List() + firstAdditionCpuset = cpuset.New(cpuList[0]) + } + firstExpectedCpuset = firstAdditionCpuset.Union(firstContainerCpuset) + tests.expected[0].CPUsAllowedList = firstExpectedCpuset.String() + } else if tests.name == "1 Guaranteed QoS pod, two containers - increase CPU & memory, FullPCPUsOnlyOption = false" { + if cpuCap < 4 { + tests.skipFlag = true + } + firstContainerCpuset = cpuset.New(1) + if isHTEnabled() { + cpuList := mustParseCPUSet(getCPUSiblingList(0)).List() + firstContainerCpuset = cpuset.New(cpuList[1]) + } + tests.containers[0].CPUsAllowedList = firstContainerCpuset.String() + + secondContainerCpuset = cpuset.New(1) + if isHTEnabled() { + cpuList := mustParseCPUSet(getCPUSiblingList(1)).List() + secondContainerCpuset = cpuset.New(cpuList[0]) + } + tests.containers[1].CPUsAllowedList = secondContainerCpuset.String() + + firstAdditionCpuset = cpuset.New(2) + if isHTEnabled() { + cpuList := mustParseCPUSet(getCPUSiblingList(1)).List() + firstAdditionCpuset = cpuset.New(cpuList[1]) + } + firstExpectedCpuset = firstAdditionCpuset.Union(firstContainerCpuset) + tests.expected[0].CPUsAllowedList = firstExpectedCpuset.String() + + secondAdditionCpuset = cpuset.New(2) + if isHTEnabled() { + cpuList := mustParseCPUSet(getCPUSiblingList(2)).List() + secondAdditionCpuset = cpuset.New(cpuList[0]) + } + secondExpectedCpuset = secondAdditionCpuset.Union(secondContainerCpuset) + tests.expected[1].CPUsAllowedList = secondExpectedCpuset.String() + } else if (tests.name == "1 Guaranteed QoS pod, one container - decrease CPU & memory, FullPCPUsOnlyOption = false") || (tests.name == "1 Guaranteed QoS pod, one container - decrease CPU & memory with mustKeepCPUs, FullPCPUsOnlyOption = false") { + if cpuCap < 2 { + tests.skipFlag = true + } + firstContainerCpuset = cpuset.New(2, 3) + if isHTEnabled() { + cpuList := mustParseCPUSet(getCPUSiblingList(0)).List() + if cpuList[1] != 1 { + firstContainerCpuset = mustParseCPUSet(getCPUSiblingList(1)) + } + } + tests.containers[0].CPUsAllowedList = firstContainerCpuset.String() + + firstExpectedCpuset = cpuset.New(firstContainerCpuset.List()[0]) + tests.expected[0].CPUsAllowedList = firstExpectedCpuset.String() + if tests.name == "1 Guaranteed QoS pod, one container - decrease CPU & memory with mustKeepCPUs, FullPCPUsOnlyOption = false" { + startIndex := strings.Index(tests.patchString, `"mustKeepCPUs","value": "`) + len(`"mustKeepCPUs","value": "`) + endIndex := strings.Index(tests.patchString[startIndex:], `"`) + startIndex + tests.expected[0].CPUsAllowedList = tests.patchString[startIndex:endIndex] + ginkgo.By(fmt.Sprintf("startIndex:%d, endIndex:%d", startIndex, endIndex)) + } + } else if (tests.name == "1 Guaranteed QoS pod, one container - decrease CPU & memory, FullPCPUsOnlyOption = true") || (tests.name == "1 Guaranteed QoS pod, one container - decrease CPU with wrong mustKeepCPU, FullPCPUsOnlyOption = ture") || (tests.name == "1 Guaranteed QoS pod, one container - decrease CPU & memory with correct mustKeepCPU, FullPCPUsOnlyOption = true") { + if cpuCap < 4 { + tests.skipFlag = true + } + firstContainerCpuset = cpuset.New(2, 3, 4, 5) + if isHTEnabled() { + cpuList := mustParseCPUSet(getCPUSiblingList(0)).List() + if cpuList[1] != 1 { + firstContainerCpuset = mustParseCPUSet(getCPUSiblingList(1)) + firstContainerCpuset = firstContainerCpuset.Union(mustParseCPUSet(getCPUSiblingList(2))) + } + } + tests.containers[0].CPUsAllowedList = firstContainerCpuset.String() + + firstExpectedCpuset = mustParseCPUSet(getCPUSiblingList(1)) + tests.expected[0].CPUsAllowedList = firstExpectedCpuset.String() + if tests.name == "1 Guaranteed QoS pod, one container - decrease CPU & memory with correct mustKeepCPU, FullPCPUsOnlyOption = true" { + startIndex := strings.Index(tests.patchString, `"mustKeepCPUs","value": "`) + len(`"mustKeepCPUs","value": "`) + endIndex := strings.Index(tests.patchString[startIndex:], `"`) + startIndex + tests.expected[0].CPUsAllowedList = tests.patchString[startIndex:endIndex] + ginkgo.By(fmt.Sprintf("startIndex:%d, endIndex:%d", startIndex, endIndex)) + } + } + + ginkgo.By(fmt.Sprintf("firstContainerCpuset:%v, firstAdditionCpuset:%v, firstExpectedCpuset:%v", firstContainerCpuset, firstAdditionCpuset, firstExpectedCpuset)) + ginkgo.By(fmt.Sprintf("secondContainerCpuset:%v, secondAdditionCpuset:%v, secondExpectedCpuset:%v", secondContainerCpuset, secondAdditionCpuset, secondExpectedCpuset)) + } + + noRestart := v1.NotRequired + testsWithFalseFullCPUs := []testCase{ + { + name: "1 Guaranteed QoS pod, one container - increase CPU & memory, FullPCPUsOnlyOption = false", + containers: []e2epod.ResizableContainerInfo{ + { + Name: "c1", + Resources: &e2epod.ContainerResources{CPUReq: "1", CPULim: "1", MemReq: "200Mi", MemLim: "200Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "1", + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"2","memory":"400Mi"},"limits":{"cpu":"2","memory":"400Mi"}}} + ]}}`, + expected: []e2epod.ResizableContainerInfo{ + { + Name: "c1", + Resources: &e2epod.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "400Mi", MemLim: "400Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "2", + }, + }, + }, + { + name: "1 Guaranteed QoS pod, two containers - increase CPU & memory, FullPCPUsOnlyOption = false", + containers: []e2epod.ResizableContainerInfo{ + { + Name: "c1", + Resources: &e2epod.ContainerResources{CPUReq: "1", CPULim: "1", MemReq: "200Mi", MemLim: "200Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "1", + }, + { + Name: "c2", + Resources: &e2epod.ContainerResources{CPUReq: "1", CPULim: "1", MemReq: "200Mi", MemLim: "200Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "1", + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"2","memory":"400Mi"},"limits":{"cpu":"2","memory":"400Mi"}}}, + {"name":"c2", "resources":{"requests":{"cpu":"2","memory":"400Mi"},"limits":{"cpu":"2","memory":"400Mi"}}} + ]}}`, + expected: []e2epod.ResizableContainerInfo{ + { + Name: "c1", + Resources: &e2epod.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "400Mi", MemLim: "400Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "2", + }, + { + Name: "c2", + Resources: &e2epod.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "400Mi", MemLim: "400Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "2", + }, + }, + }, + { + name: "1 Guaranteed QoS pod, one container - decrease CPU & memory, FullPCPUsOnlyOption = false", + containers: []e2epod.ResizableContainerInfo{ + { + Name: "c1", + Resources: &e2epod.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "400Mi", MemLim: "400Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "2", + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"1","memory":"200Mi"},"limits":{"cpu":"1","memory":"200Mi"}}} + ]}}`, + expected: []e2epod.ResizableContainerInfo{ + { + Name: "c1", + Resources: &e2epod.ContainerResources{CPUReq: "1", CPULim: "1", MemReq: "200Mi", MemLim: "200Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "1", + }, + }, + }, + { + name: "1 Guaranteed QoS pod, one container - decrease CPU & memory with mustKeepCPUs, FullPCPUsOnlyOption = false", + containers: []e2epod.ResizableContainerInfo{ + { + Name: "c1", + Resources: &e2epod.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "200Mi", MemLim: "200Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "2", + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "env":[{"name":"mustKeepCPUs","value": "11"}], "resources":{"requests":{"cpu":"1","memory":"400Mi"},"limits":{"cpu":"1","memory":"400Mi"}}} + ]}}`, + expected: []e2epod.ResizableContainerInfo{ + { + Name: "c1", + Resources: &e2epod.ContainerResources{CPUReq: "1", CPULim: "1", MemReq: "400Mi", MemLim: "400Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "1", + }, + }, + }, + } + + testsWithTrueFullCPUs := []testCase{ + { + name: "1 Guaranteed QoS pod, one container - decrease CPU & memory, FullPCPUsOnlyOption = true", + containers: []e2epod.ResizableContainerInfo{ + { + Name: "c1", + Resources: &e2epod.ContainerResources{CPUReq: "4", CPULim: "4", MemReq: "400Mi", MemLim: "400Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "4", + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"2","memory":"200Mi"},"limits":{"cpu":"2","memory":"200Mi"}}} + ]}}`, + expected: []e2epod.ResizableContainerInfo{ + { + Name: "c1", + Resources: &e2epod.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "200Mi", MemLim: "200Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "2", + }, + }, + }, + { + name: "1 Guaranteed QoS pod, one container - decrease CPU & memory with correct mustKeepCPU, FullPCPUsOnlyOption = true", + containers: []e2epod.ResizableContainerInfo{ + { + Name: "c1", + Resources: &e2epod.ContainerResources{CPUReq: "4", CPULim: "4", MemReq: "200Mi", MemLim: "200Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "4", + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "env":[{"name":"mustKeepCPUs","value": "2,12"}], "resources":{"requests":{"cpu":"2"},"limits":{"cpu":"2"}}} + ]}}`, + expected: []e2epod.ResizableContainerInfo{ + { + Name: "c1", + Resources: &e2epod.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "200Mi", MemLim: "200Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "2", + }, + }, + }, + // Abnormal case, CPUs in mustKeepCPUs not full PCPUs, the mustKeepCPUs will be ignored + { + name: "1 Guaranteed QoS pod, one container - decrease CPU with wrong mustKeepCPU, FullPCPUsOnlyOption = ture", + containers: []e2epod.ResizableContainerInfo{ + { + Name: "c1", + Resources: &e2epod.ContainerResources{CPUReq: "4", CPULim: "4", MemReq: "200Mi", MemLim: "200Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "4", + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "env":[{"name":"mustKeepCPUs","value": "1,2"}], "resources":{"requests":{"cpu":"2"},"limits":{"cpu":"2"}}} + ]}}`, + expected: []e2epod.ResizableContainerInfo{ + { + Name: "c1", + Resources: &e2epod.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "200Mi", MemLim: "200Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "2", + }, + }, + }, + } + + timeouts := framework.NewTimeoutContext() + + var tests []testCase + if policy.options[cpumanager.FullPCPUsOnlyOption] == "false" { + tests = testsWithFalseFullCPUs + } else if policy.options[cpumanager.FullPCPUsOnlyOption] == "true" { + tests = testsWithTrueFullCPUs + } + + for idx := range tests { + tc := tests[idx] + ginkgo.It(tc.name+policy.title+" (InPlacePodVerticalScalingAllocatedStatus="+strconv.FormatBool(isInPlacePodVerticalScalingAllocatedStatusEnabled)+", InPlacePodVerticalScalingExclusiveCPUs="+strconv.FormatBool(isInPlacePodVerticalScalingExclusiveCPUsEnabled)+")", func(ctx context.Context) { + cpuManagerPolicyKubeletConfig(ctx, f, oldCfg, policy.name, policy.options, isInPlacePodVerticalScalingAllocatedStatusEnabled, isInPlacePodVerticalScalingExclusiveCPUsEnabled) + + setCPUsForTestCase(ctx, &tc, policy.options[cpumanager.FullPCPUsOnlyOption]) + if tc.skipFlag { + e2eskipper.Skipf("Skipping CPU Manager tests since the CPU not enough") + } + + var testPod, patchedPod *v1.Pod + var pErr error + + tStamp := strconv.Itoa(time.Now().Nanosecond()) + testPod = e2epod.MakePodWithResizableContainers(f.Namespace.Name, "testpod", tStamp, tc.containers) + testPod.GenerateName = "resize-test-" + testPod = e2epod.MustMixinRestrictedPodSecurity(testPod) + + if tc.addExtendedResource { + nodes, err := e2enode.GetReadySchedulableNodes(context.Background(), f.ClientSet) + framework.ExpectNoError(err) + + for _, node := range nodes.Items { + addExtendedResource(f.ClientSet, node.Name, fakeExtendedResource, resource.MustParse("123")) + } + defer func() { + for _, node := range nodes.Items { + removeExtendedResource(f.ClientSet, node.Name, fakeExtendedResource) + } + }() + } + + ginkgo.By("creating pod") + newPod := podClient.CreateSync(ctx, testPod) + + ginkgo.By("verifying initial pod resources, allocations are as expected") + e2epod.VerifyPodResources(newPod, tc.containers) + ginkgo.By("verifying initial pod resize policy is as expected") + e2epod.VerifyPodResizePolicy(newPod, tc.containers) + + ginkgo.By("verifying initial pod status resources are as expected") + framework.ExpectNoError(e2epod.VerifyPodStatusResources(newPod, tc.containers)) + ginkgo.By("verifying initial cgroup config are as expected") + framework.ExpectNoError(e2epod.VerifyPodContainersCgroupValues(ctx, f, newPod, tc.containers)) + // TODO make this dynamic depending on Policy Name, Resources input and topology of target + // machine. + // For the moment skip below if CPU Manager Policy is set to none + if policy.name == string(cpumanager.PolicyStatic) { + ginkgo.By("verifying initial pod Cpus allowed list value") + gomega.Eventually(ctx, e2epod.VerifyPodContainersCPUsAllowedListValue, timeouts.PodStartShort, timeouts.Poll). + WithArguments(f, newPod, tc.containers). + Should(gomega.BeNil(), "failed to verify initial Pod CPUsAllowedListValue") + } + + patchAndVerify := func(patchString string, expectedContainers []e2epod.ResizableContainerInfo, initialContainers []e2epod.ResizableContainerInfo, opStr string, isRollback bool) { + ginkgo.By(fmt.Sprintf("patching pod for %s", opStr)) + patchedPod, pErr = f.ClientSet.CoreV1().Pods(newPod.Namespace).Patch(ctx, newPod.Name, + types.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, fmt.Sprintf("failed to patch pod for %s", opStr)) + + ginkgo.By(fmt.Sprintf("verifying pod patched for %s", opStr)) + e2epod.VerifyPodResources(patchedPod, expectedContainers) + + ginkgo.By(fmt.Sprintf("waiting for %s to be actuated", opStr)) + resizedPod := e2epod.WaitForPodResizeActuation(ctx, f, podClient, newPod) + e2epod.ExpectPodResized(ctx, f, resizedPod, expectedContainers) + + // Check cgroup values only for containerd versions before 1.6.9 + ginkgo.By(fmt.Sprintf("verifying pod container's cgroup values after %s", opStr)) + framework.ExpectNoError(e2epod.VerifyPodContainersCgroupValues(ctx, f, resizedPod, expectedContainers)) + + ginkgo.By(fmt.Sprintf("verifying pod resources after %s", opStr)) + e2epod.VerifyPodResources(resizedPod, expectedContainers) + + // TODO make this dynamic depending on Policy Name, Resources input and topology of target + // machine. + // For the moment skip below if CPU Manager Policy is set to none + if policy.name == string(cpumanager.PolicyStatic) { + ginkgo.By(fmt.Sprintf("verifying pod Cpus allowed list value after %s", opStr)) + if isInPlacePodVerticalScalingExclusiveCPUsEnabled { + gomega.Eventually(ctx, e2epod.VerifyPodContainersCPUsAllowedListValue, timeouts.PodStartShort, timeouts.Poll). + WithArguments(f, resizedPod, expectedContainers). + Should(gomega.BeNil(), "failed to verify Pod CPUsAllowedListValue for resizedPod with InPlacePodVerticalScalingExclusiveCPUs enabled") + } else { + gomega.Eventually(ctx, e2epod.VerifyPodContainersCPUsAllowedListValue, timeouts.PodStartShort, timeouts.Poll). + WithArguments(f, resizedPod, tc.containers). + Should(gomega.BeNil(), "failed to verify Pod CPUsAllowedListValue for resizedPod with InPlacePodVerticalScalingExclusiveCPUs disabled (default)") + } + } + } + + ginkgo.By("First patch") + patchAndVerify(tc.patchString, tc.expected, tc.containers, "resize", false) + + rbPatchStr, err := e2epod.ResizeContainerPatch(tc.containers) + framework.ExpectNoError(err) + // Resize has been actuated, test rollback + ginkgo.By("Second patch for rollback") + patchAndVerify(rbPatchStr, tc.containers, tc.expected, "rollback", true) + + ginkgo.By("deleting pod") + deletePodSyncByName(ctx, f, newPod.Name) + // we need to wait for all containers to really be gone so cpumanager reconcile loop will not rewrite the cpu_manager_state. + // this is in turn needed because we will have an unavoidable (in the current framework) race with the + // reconcile loop which will make our attempt to delete the state file and to restore the old config go haywire + waitForAllContainerRemoval(ctx, newPod.Name, newPod.Namespace) + }) + } + + ginkgo.AfterEach(func(ctx context.Context) { + if oldCfg != nil { + updateKubeletConfig(ctx, f, oldCfg, true) + } + }) + +} + +func doMultiPodResizeTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalScalingAllocatedStatusEnabled bool, isInPlacePodVerticalScalingExclusiveCPUsEnabled bool) { + f := framework.NewDefaultFramework("pod-resize-test") + f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged + var podClient *e2epod.PodClient + var oldCfg *kubeletconfig.KubeletConfiguration + ginkgo.BeforeEach(func(ctx context.Context) { + var err error + node := getLocalNode(ctx, f) + if framework.NodeOSDistroIs("windows") || e2enode.IsARM64(node) { + e2eskipper.Skipf("runtime does not support InPlacePodVerticalScaling -- skipping") + } + podClient = e2epod.NewPodClient(f) + if oldCfg == nil { + oldCfg, err = getCurrentKubeletConfig(ctx) + framework.ExpectNoError(err) + } + }) + + type testPod struct { + containers []e2epod.ResizableContainerInfo + patchString string + expected []e2epod.ResizableContainerInfo + } + + type testCase struct { + name string + testPod1 testPod + testPod2 testPod + skipFlag bool + } + + setCPUsForTestCase := func(ctx context.Context, tests *testCase, fullPCPUsOnly string) { + cpuCap, _, _ := getLocalNodeCPUDetails(ctx, f) + firstContainerCpuset := cpuset.New() + firstAdditionCpuset := cpuset.New() + firstExpectedCpuset := cpuset.New() + secondContainerCpuset := cpuset.New() + secondAdditionCpuset := cpuset.New() + secondExpectedCpuset := cpuset.New() + + if tests.name == "1 Guaranteed QoS pod, two containers - increase CPU & memory, FullPCPUsOnlyOption = false" { + if cpuCap < 4 { + tests.skipFlag = true + } + firstContainerCpuset = cpuset.New(1) + if isHTEnabled() { + cpuList := mustParseCPUSet(getCPUSiblingList(0)).List() + firstContainerCpuset = cpuset.New(cpuList[1]) + } + tests.testPod1.containers[0].CPUsAllowedList = firstContainerCpuset.String() + + secondContainerCpuset = cpuset.New(1) + if isHTEnabled() { + cpuList := mustParseCPUSet(getCPUSiblingList(1)).List() + secondContainerCpuset = cpuset.New(cpuList[0]) + } + tests.testPod2.containers[1].CPUsAllowedList = secondContainerCpuset.String() + + firstAdditionCpuset = cpuset.New(2) + if isHTEnabled() { + cpuList := mustParseCPUSet(getCPUSiblingList(1)).List() + firstAdditionCpuset = cpuset.New(cpuList[1]) + } + firstExpectedCpuset = firstAdditionCpuset.Union(firstContainerCpuset) + tests.testPod1.expected[0].CPUsAllowedList = firstExpectedCpuset.String() + + secondAdditionCpuset = cpuset.New(2) + if isHTEnabled() { + cpuList := mustParseCPUSet(getCPUSiblingList(2)).List() + secondAdditionCpuset = cpuset.New(cpuList[0]) + } + secondExpectedCpuset = secondAdditionCpuset.Union(secondContainerCpuset) + tests.testPod2.expected[1].CPUsAllowedList = secondExpectedCpuset.String() + } + ginkgo.By(fmt.Sprintf("firstContainerCpuset:%v, firstAdditionCpuset:%v, firstExpectedCpuset:%v", firstContainerCpuset, firstAdditionCpuset, firstExpectedCpuset)) + ginkgo.By(fmt.Sprintf("secondContainerCpuset:%v, secondAdditionCpuset:%v, secondExpectedCpuset:%v", secondContainerCpuset, secondAdditionCpuset, secondExpectedCpuset)) + } + + noRestart := v1.NotRequired + tests := []testCase{ + { + name: "2 Guaranteed QoS pod, one container - increase CPU & memory, FullPCPUsOnlyOption = false", + testPod1: testPod{ + containers: []e2epod.ResizableContainerInfo{ + { + Name: "c1", + Resources: &e2epod.ContainerResources{CPUReq: "1", CPULim: "1", MemReq: "200Mi", MemLim: "200Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "1", + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"2","memory":"400Mi"},"limits":{"cpu":"2","memory":"400Mi"}}} + ]}}`, + expected: []e2epod.ResizableContainerInfo{ + { + Name: "c1", + Resources: &e2epod.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "400Mi", MemLim: "400Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "2", + }, + }, + }, + testPod2: testPod{ + containers: []e2epod.ResizableContainerInfo{ + { + Name: "c2", + Resources: &e2epod.ContainerResources{CPUReq: "1", CPULim: "1", MemReq: "200Mi", MemLim: "200Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "1", + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c2", "resources":{"requests":{"cpu":"2","memory":"400Mi"},"limits":{"cpu":"2","memory":"400Mi"}}} + ]}}`, + expected: []e2epod.ResizableContainerInfo{ + { + Name: "c2", + Resources: &e2epod.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "400Mi", MemLim: "400Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "2", + }, + }, + }, + }, + } + + timeouts := framework.NewTimeoutContext() + + for idx := range tests { + tc := tests[idx] + ginkgo.It(tc.name+policy.title+" (InPlacePodVerticalScalingAllocatedStatus="+strconv.FormatBool(isInPlacePodVerticalScalingAllocatedStatusEnabled)+", InPlacePodVerticalScalingExclusiveCPUs="+strconv.FormatBool(isInPlacePodVerticalScalingExclusiveCPUsEnabled)+")", func(ctx context.Context) { + cpuManagerPolicyKubeletConfig(ctx, f, oldCfg, policy.name, policy.options, isInPlacePodVerticalScalingAllocatedStatusEnabled, isInPlacePodVerticalScalingExclusiveCPUsEnabled) + + setCPUsForTestCase(ctx, &tc, policy.options[cpumanager.FullPCPUsOnlyOption]) + if tc.skipFlag { + e2eskipper.Skipf("Skipping CPU Manager tests since the CPU not enough") + } + + var patchedPod *v1.Pod + var pErr error + + createAndVerify := func(podName string, podClient *e2epod.PodClient, testContainers []e2epod.ResizableContainerInfo) (newPod *v1.Pod) { + var testPod *v1.Pod + + tStamp := strconv.Itoa(time.Now().Nanosecond()) + testPod = e2epod.MakePodWithResizableContainers(f.Namespace.Name, fmt.Sprintf("resizepod-%s", podName), tStamp, testContainers) + testPod.GenerateName = "resize-test-" + testPod = e2epod.MustMixinRestrictedPodSecurity(testPod) + + ginkgo.By("creating pod") + newPod = podClient.CreateSync(ctx, testPod) + + ginkgo.By("verifying initial pod resources, allocations are as expected") + e2epod.VerifyPodResources(newPod, testContainers) + ginkgo.By("verifying initial pod resize policy is as expected") + e2epod.VerifyPodResizePolicy(newPod, testContainers) + + ginkgo.By("verifying initial pod status resources are as expected") + framework.ExpectNoError(e2epod.VerifyPodStatusResources(newPod, testContainers)) + ginkgo.By("verifying initial cgroup config are as expected") + framework.ExpectNoError(e2epod.VerifyPodContainersCgroupValues(ctx, f, newPod, testContainers)) + // TODO make this dynamic depending on Policy Name, Resources input and topology of target + // machine. + // For the moment skip below if CPU Manager Policy is set to none + if policy.name == string(cpumanager.PolicyStatic) { + ginkgo.By("verifying initial pod Cpus allowed list value") + gomega.Eventually(ctx, e2epod.VerifyPodContainersCPUsAllowedListValue, timeouts.PodStartShort, timeouts.Poll). + WithArguments(f, newPod, testContainers). + Should(gomega.BeNil(), "failed to verify initial Pod CPUsAllowedListValue") + } + return newPod + } + + newPod1 := createAndVerify("testpod1", podClient, tc.testPod1.containers) + newPod2 := createAndVerify("testpod2", podClient, tc.testPod2.containers) + + patchAndVerify := func(patchString string, expectedContainers []e2epod.ResizableContainerInfo, initialContainers []e2epod.ResizableContainerInfo, opStr string, isRollback bool, newPod *v1.Pod) { + ginkgo.By(fmt.Sprintf("patching pod for %s", opStr)) + patchedPod, pErr = f.ClientSet.CoreV1().Pods(newPod.Namespace).Patch(ctx, newPod.Name, + types.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, fmt.Sprintf("failed to patch pod for %s", opStr)) + + ginkgo.By(fmt.Sprintf("verifying pod patched for %s", opStr)) + e2epod.VerifyPodResources(patchedPod, expectedContainers) + + ginkgo.By(fmt.Sprintf("waiting for %s to be actuated", opStr)) + resizedPod := e2epod.WaitForPodResizeActuation(ctx, f, podClient, newPod) + e2epod.ExpectPodResized(ctx, f, resizedPod, expectedContainers) + + // Check cgroup values only for containerd versions before 1.6.9 + ginkgo.By(fmt.Sprintf("verifying pod container's cgroup values after %s", opStr)) + framework.ExpectNoError(e2epod.VerifyPodContainersCgroupValues(ctx, f, resizedPod, expectedContainers)) + + ginkgo.By(fmt.Sprintf("verifying pod resources after %s", opStr)) + e2epod.VerifyPodResources(resizedPod, expectedContainers) + + // TODO make this dynamic depending on Policy Name, Resources input and topology of target + // machine. + // For the moment skip below if CPU Manager Policy is set to none + if policy.name == string(cpumanager.PolicyStatic) { + ginkgo.By(fmt.Sprintf("verifying pod Cpus allowed list value after %s", opStr)) + if isInPlacePodVerticalScalingExclusiveCPUsEnabled { + gomega.Eventually(ctx, e2epod.VerifyPodContainersCPUsAllowedListValue, timeouts.PodStartShort, timeouts.Poll). + WithArguments(f, resizedPod, expectedContainers). + Should(gomega.BeNil(), "failed to verify Pod CPUsAllowedListValue for resizedPod with InPlacePodVerticalScalingExclusiveCPUs enabled") + } else { + gomega.Eventually(ctx, e2epod.VerifyPodContainersCPUsAllowedListValue, timeouts.PodStartShort, timeouts.Poll). + WithArguments(f, resizedPod, initialContainers). + Should(gomega.BeNil(), "failed to verify Pod CPUsAllowedListValue for resizedPod with InPlacePodVerticalScalingExclusiveCPUs disabled (default)") + } + } + } + + patchAndVerify(tc.testPod1.patchString, tc.testPod1.expected, tc.testPod1.containers, "resize", false, newPod1) + patchAndVerify(tc.testPod2.patchString, tc.testPod2.expected, tc.testPod2.containers, "resize", false, newPod2) + + rbPatchStr1, err1 := e2epod.ResizeContainerPatch(tc.testPod1.containers) + framework.ExpectNoError(err1) + rbPatchStr2, err2 := e2epod.ResizeContainerPatch(tc.testPod2.containers) + framework.ExpectNoError(err2) + // Resize has been actuated, test rollback + patchAndVerify(rbPatchStr1, tc.testPod1.containers, tc.testPod1.expected, "rollback", true, newPod1) + patchAndVerify(rbPatchStr2, tc.testPod2.containers, tc.testPod2.expected, "rollback", true, newPod2) + + ginkgo.By("deleting pod") + deletePodSyncByName(ctx, f, newPod1.Name) + deletePodSyncByName(ctx, f, newPod2.Name) + // we need to wait for all containers to really be gone so cpumanager reconcile loop will not rewrite the cpu_manager_state. + // this is in turn needed because we will have an unavoidable (in the current framework) race with the + // reconcile loop which will make our attempt to delete the state file and to restore the old config go haywire + waitForAllContainerRemoval(ctx, newPod1.Name, newPod1.Namespace) + waitForAllContainerRemoval(ctx, newPod2.Name, newPod2.Namespace) + }) + } + + ginkgo.AfterEach(func(ctx context.Context) { + if oldCfg != nil { + updateKubeletConfig(ctx, f, oldCfg, true) + } + }) +} + +var _ = SIGDescribe("Pod InPlace Resize Container Extended Cases", framework.WithSerial(), func() { + + policiesGeneralAvailability := []cpuManagerPolicyConfig{ + { + name: string(cpumanager.PolicyStatic), + title: ", alongside CPU Manager Static Policy with no options", + options: map[string]string{ + cpumanager.FullPCPUsOnlyOption: "false", + cpumanager.DistributeCPUsAcrossNUMAOption: "false", + cpumanager.AlignBySocketOption: "false", + cpumanager.DistributeCPUsAcrossCoresOption: "false", + }, + }, + { + name: string(cpumanager.PolicyStatic), + title: ", alongside CPU Manager Static Policy with FullPCPUsOnlyOption", + options: map[string]string{ + cpumanager.FullPCPUsOnlyOption: "true", + cpumanager.DistributeCPUsAcrossNUMAOption: "false", + cpumanager.AlignBySocketOption: "false", + cpumanager.DistributeCPUsAcrossCoresOption: "false", + }, + }, + } + + doPodResizeExtendTests(policiesGeneralAvailability[0], true, true) + doPodResizeExtendTests(policiesGeneralAvailability[1], true, true) + doMultiPodResizeTests(policiesGeneralAvailability[0], true, true) +}) \ No newline at end of file From 29452ba3ff6b7e5fb2a72b923fabde536ae6f7f0 Mon Sep 17 00:00:00 2001 From: Sotiris Salloumis Date: Fri, 21 Feb 2025 12:38:01 +0100 Subject: [PATCH 03/15] Fix go fmt isssues --- pkg/kubelet/cm/cpumanager/cpu_assignment.go | 2 +- test/e2e_node/pod_resize_test.go | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/kubelet/cm/cpumanager/cpu_assignment.go b/pkg/kubelet/cm/cpumanager/cpu_assignment.go index e33ae28e80711..8160300e81ad3 100644 --- a/pkg/kubelet/cm/cpumanager/cpu_assignment.go +++ b/pkg/kubelet/cm/cpumanager/cpu_assignment.go @@ -1528,4 +1528,4 @@ func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpu // If we never found a combination of NUMA nodes that we could properly // distribute CPUs across, fall back to the packing algorithm. return takeByTopologyNUMAPacked(topo, availableCPUs, numCPUs, cpuSortingStrategy, false, reusableCPUsForResize, mustKeepCPUsForScaleDown) -} \ No newline at end of file +} diff --git a/test/e2e_node/pod_resize_test.go b/test/e2e_node/pod_resize_test.go index 423c76c2d7928..7b79142536238 100644 --- a/test/e2e_node/pod_resize_test.go +++ b/test/e2e_node/pod_resize_test.go @@ -20,8 +20,8 @@ import ( "context" "encoding/json" "fmt" - "strings" "strconv" + "strings" "time" "github.com/onsi/ginkgo/v2" @@ -2468,4 +2468,4 @@ var _ = SIGDescribe("Pod InPlace Resize Container Extended Cases", framework.Wit doPodResizeExtendTests(policiesGeneralAvailability[0], true, true) doPodResizeExtendTests(policiesGeneralAvailability[1], true, true) doMultiPodResizeTests(policiesGeneralAvailability[0], true, true) -}) \ No newline at end of file +}) From 9b0a7aac66dca7279d289033bc7af08ae77e3b8d Mon Sep 17 00:00:00 2001 From: Sotiris Salloumis Date: Sat, 22 Feb 2025 14:45:10 +0100 Subject: [PATCH 04/15] Fix mutation heuristic check of mustKeepCPUs, reason is clone --- pkg/apis/core/validation/validation.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/apis/core/validation/validation.go b/pkg/apis/core/validation/validation.go index 58428e58ab0d5..fd1b59e704c9f 100644 --- a/pkg/apis/core/validation/validation.go +++ b/pkg/apis/core/validation/validation.go @@ -6519,7 +6519,7 @@ func dropMustKeepCPUsEnvFromContainer(container *core.Container, oldPodSpecConta for _, oldEnv := range oldPodSpecContainer.Env { if oldEnv.Name == "mustKeepCPUs" { existOldMustKeepCPUs = true - container.Env[jx] = oldEnv + container.Env[jx] = oldEnv // +k8s:verify-mutation:reason=clone break } } @@ -6533,7 +6533,7 @@ func dropMustKeepCPUsEnvFromContainer(container *core.Container, oldPodSpecConta } // Delete mustKeepCPUs if !existNewMustKeepCPUs && (len(oldPodSpecContainer.Env)-len(container.Env)) == 1 { - oldPodSpecContainer.Env = removeEnvVar(oldPodSpecContainer.Env, "mustKeepCPUs") + oldPodSpecContainer.Env = removeEnvVar(oldPodSpecContainer.Env, "mustKeepCPUs") // +k8s:verify-mutation:reason=clone } return allErrs } From 5d4e9ee3c44bf434162f573b336c1ab5e9a68535 Mon Sep 17 00:00:00 2001 From: Sotiris Salloumis Date: Mon, 3 Mar 2025 15:02:24 +0100 Subject: [PATCH 05/15] Fix glangci-lint-pr failed test --- .../cm/cpumanager/policy_static_test.go | 2 +- test/e2e_node/pod_resize_test.go | 18 +++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/pkg/kubelet/cm/cpumanager/policy_static_test.go b/pkg/kubelet/cm/cpumanager/policy_static_test.go index c8f45e15f1f07..5b509656eb8cb 100644 --- a/pkg/kubelet/cm/cpumanager/policy_static_test.go +++ b/pkg/kubelet/cm/cpumanager/policy_static_test.go @@ -22,8 +22,8 @@ import ( "testing" v1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/apimachinery/pkg/types" utilfeature "k8s.io/apiserver/pkg/util/feature" featuregatetesting "k8s.io/component-base/featuregate/testing" "k8s.io/klog/v2" diff --git a/test/e2e_node/pod_resize_test.go b/test/e2e_node/pod_resize_test.go index 7b79142536238..a906c83451c43 100644 --- a/test/e2e_node/pod_resize_test.go +++ b/test/e2e_node/pod_resize_test.go @@ -1387,7 +1387,7 @@ func doPodResizeTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalScaling ginkgo.By("verifying initial pod Cpus allowed list value") gomega.Eventually(ctx, podresize.VerifyPodContainersCPUsAllowedListValue, timeouts.PodStartShort, timeouts.Poll). WithArguments(f, newPod, tc.containers). - Should(gomega.BeNil(), "failed to verify initial Pod CPUsAllowedListValue") + Should(gomega.Succeed(), "failed to verify initial Pod CPUsAllowedListValue") } patchAndVerify := func(patchString string, expectedContainers []podresize.ResizableContainerInfo, initialContainers []podresize.ResizableContainerInfo, opStr string, isRollback bool) { @@ -1419,11 +1419,11 @@ func doPodResizeTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalScaling if isInPlacePodVerticalScalingExclusiveCPUsEnabled { gomega.Eventually(ctx, podresize.VerifyPodContainersCPUsAllowedListValue, timeouts.PodStartShort, timeouts.Poll). WithArguments(f, resizedPod, tc.expected). - Should(gomega.BeNil(), "failed to verify Pod CPUsAllowedListValue for resizedPod with InPlacePodVerticalScalingExclusiveCPUs enabled") + Should(gomega.Succeed(), "failed to verify Pod CPUsAllowedListValue for resizedPod with InPlacePodVerticalScalingExclusiveCPUs enabled") } else { gomega.Eventually(ctx, podresize.VerifyPodContainersCPUsAllowedListValue, timeouts.PodStartShort, timeouts.Poll). WithArguments(f, resizedPod, tc.containers). - Should(gomega.BeNil(), "failed to verify Pod CPUsAllowedListValue for resizedPod with InPlacePodVerticalScalingExclusiveCPUs disabled (default)") + Should(gomega.Succeed(), "failed to verify Pod CPUsAllowedListValue for resizedPod with InPlacePodVerticalScalingExclusiveCPUs disabled (default)") } } } @@ -2127,7 +2127,7 @@ func doPodResizeExtendTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalS ginkgo.By("verifying initial pod Cpus allowed list value") gomega.Eventually(ctx, e2epod.VerifyPodContainersCPUsAllowedListValue, timeouts.PodStartShort, timeouts.Poll). WithArguments(f, newPod, tc.containers). - Should(gomega.BeNil(), "failed to verify initial Pod CPUsAllowedListValue") + Should(gomega.Succeed(), "failed to verify initial Pod CPUsAllowedListValue") } patchAndVerify := func(patchString string, expectedContainers []e2epod.ResizableContainerInfo, initialContainers []e2epod.ResizableContainerInfo, opStr string, isRollback bool) { @@ -2158,11 +2158,11 @@ func doPodResizeExtendTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalS if isInPlacePodVerticalScalingExclusiveCPUsEnabled { gomega.Eventually(ctx, e2epod.VerifyPodContainersCPUsAllowedListValue, timeouts.PodStartShort, timeouts.Poll). WithArguments(f, resizedPod, expectedContainers). - Should(gomega.BeNil(), "failed to verify Pod CPUsAllowedListValue for resizedPod with InPlacePodVerticalScalingExclusiveCPUs enabled") + Should(gomega.Succeed(), "failed to verify Pod CPUsAllowedListValue for resizedPod with InPlacePodVerticalScalingExclusiveCPUs enabled") } else { gomega.Eventually(ctx, e2epod.VerifyPodContainersCPUsAllowedListValue, timeouts.PodStartShort, timeouts.Poll). WithArguments(f, resizedPod, tc.containers). - Should(gomega.BeNil(), "failed to verify Pod CPUsAllowedListValue for resizedPod with InPlacePodVerticalScalingExclusiveCPUs disabled (default)") + Should(gomega.Succeed(), "failed to verify Pod CPUsAllowedListValue for resizedPod with InPlacePodVerticalScalingExclusiveCPUs disabled (default)") } } } @@ -2366,7 +2366,7 @@ func doMultiPodResizeTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalSc ginkgo.By("verifying initial pod Cpus allowed list value") gomega.Eventually(ctx, e2epod.VerifyPodContainersCPUsAllowedListValue, timeouts.PodStartShort, timeouts.Poll). WithArguments(f, newPod, testContainers). - Should(gomega.BeNil(), "failed to verify initial Pod CPUsAllowedListValue") + Should(gomega.Succeed(), "failed to verify initial Pod CPUsAllowedListValue") } return newPod } @@ -2402,11 +2402,11 @@ func doMultiPodResizeTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalSc if isInPlacePodVerticalScalingExclusiveCPUsEnabled { gomega.Eventually(ctx, e2epod.VerifyPodContainersCPUsAllowedListValue, timeouts.PodStartShort, timeouts.Poll). WithArguments(f, resizedPod, expectedContainers). - Should(gomega.BeNil(), "failed to verify Pod CPUsAllowedListValue for resizedPod with InPlacePodVerticalScalingExclusiveCPUs enabled") + Should(gomega.Succeed(), "failed to verify Pod CPUsAllowedListValue for resizedPod with InPlacePodVerticalScalingExclusiveCPUs enabled") } else { gomega.Eventually(ctx, e2epod.VerifyPodContainersCPUsAllowedListValue, timeouts.PodStartShort, timeouts.Poll). WithArguments(f, resizedPod, initialContainers). - Should(gomega.BeNil(), "failed to verify Pod CPUsAllowedListValue for resizedPod with InPlacePodVerticalScalingExclusiveCPUs disabled (default)") + Should(gomega.Succeed(), "failed to verify Pod CPUsAllowedListValue for resizedPod with InPlacePodVerticalScalingExclusiveCPUs disabled (default)") } } } From 6d524a1a7c064e9f9bcd99ce8eeff41de4dc0070 Mon Sep 17 00:00:00 2001 From: Sotiris Salloumis Date: Wed, 12 Mar 2025 14:29:55 +0100 Subject: [PATCH 06/15] Fix compile issue, due to update in e2e/framework removing rollback bool --- pkg/kubelet/cm/cpumanager/policy_static.go | 1 - test/e2e_node/pod_resize_test.go | 26 +++++++++++----------- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/pkg/kubelet/cm/cpumanager/policy_static.go b/pkg/kubelet/cm/cpumanager/policy_static.go index 31ada05b24a23..fc10c0c150618 100644 --- a/pkg/kubelet/cm/cpumanager/policy_static.go +++ b/pkg/kubelet/cm/cpumanager/policy_static.go @@ -606,7 +606,6 @@ func (p *staticPolicy) allocateCPUs(s state.State, numCPUs int, numaAffinity bit result.Aligned = p.topology.CheckAlignment(result.CPUs) // Remove allocated CPUs from the shared CPUSet. - s.SetDefaultCPUSet(s.GetDefaultCPUSet().Difference(result.CPUs)) if reusableCPUsForResize != nil { if reusableCPUsForResize.Size() < result.CPUs.Size() { // Scale up or creation has been performed diff --git a/test/e2e_node/pod_resize_test.go b/test/e2e_node/pod_resize_test.go index a906c83451c43..b0d5ccfc53a75 100644 --- a/test/e2e_node/pod_resize_test.go +++ b/test/e2e_node/pod_resize_test.go @@ -1390,7 +1390,7 @@ func doPodResizeTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalScaling Should(gomega.Succeed(), "failed to verify initial Pod CPUsAllowedListValue") } - patchAndVerify := func(patchString string, expectedContainers []podresize.ResizableContainerInfo, initialContainers []podresize.ResizableContainerInfo, opStr string, isRollback bool) { + patchAndVerify := func(patchString string, expectedContainers []e2epod.ResizableContainerInfo, initialContainers []e2epod.ResizableContainerInfo, opStr string) { ginkgo.By(fmt.Sprintf("patching pod for %s", opStr)) patchedPod, pErr = f.ClientSet.CoreV1().Pods(newPod.Namespace).Patch(ctx, newPod.Name, types.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") @@ -1428,12 +1428,12 @@ func doPodResizeTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalScaling } } - patchAndVerify(tc.patchString, tc.expected, tc.containers, "resize", false) + patchAndVerify(tc.patchString, tc.expected, tc.containers, "resize") rbPatchStr, err := podresize.ResizeContainerPatch(tc.containers) framework.ExpectNoError(err) // Resize has been actuated, test rollback - patchAndVerify(rbPatchStr, tc.containers, tc.expected, "rollback", true) + patchAndVerify(rbPatchStr, tc.containers, tc.expected, "rollback") ginkgo.By("deleting pod") deletePodSyncByName(ctx, f, newPod.Name) @@ -2130,7 +2130,7 @@ func doPodResizeExtendTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalS Should(gomega.Succeed(), "failed to verify initial Pod CPUsAllowedListValue") } - patchAndVerify := func(patchString string, expectedContainers []e2epod.ResizableContainerInfo, initialContainers []e2epod.ResizableContainerInfo, opStr string, isRollback bool) { + patchAndVerify := func(patchString string, expectedContainers []e2epod.ResizableContainerInfo, initialContainers []e2epod.ResizableContainerInfo, opStr string) { ginkgo.By(fmt.Sprintf("patching pod for %s", opStr)) patchedPod, pErr = f.ClientSet.CoreV1().Pods(newPod.Namespace).Patch(ctx, newPod.Name, types.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") @@ -2140,7 +2140,7 @@ func doPodResizeExtendTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalS e2epod.VerifyPodResources(patchedPod, expectedContainers) ginkgo.By(fmt.Sprintf("waiting for %s to be actuated", opStr)) - resizedPod := e2epod.WaitForPodResizeActuation(ctx, f, podClient, newPod) + resizedPod := e2epod.WaitForPodResizeActuation(ctx, f, podClient, newPod, expectedContainers) e2epod.ExpectPodResized(ctx, f, resizedPod, expectedContainers) // Check cgroup values only for containerd versions before 1.6.9 @@ -2168,13 +2168,13 @@ func doPodResizeExtendTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalS } ginkgo.By("First patch") - patchAndVerify(tc.patchString, tc.expected, tc.containers, "resize", false) + patchAndVerify(tc.patchString, tc.expected, tc.containers, "resize") rbPatchStr, err := e2epod.ResizeContainerPatch(tc.containers) framework.ExpectNoError(err) // Resize has been actuated, test rollback ginkgo.By("Second patch for rollback") - patchAndVerify(rbPatchStr, tc.containers, tc.expected, "rollback", true) + patchAndVerify(rbPatchStr, tc.containers, tc.expected, "rollback") ginkgo.By("deleting pod") deletePodSyncByName(ctx, f, newPod.Name) @@ -2374,7 +2374,7 @@ func doMultiPodResizeTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalSc newPod1 := createAndVerify("testpod1", podClient, tc.testPod1.containers) newPod2 := createAndVerify("testpod2", podClient, tc.testPod2.containers) - patchAndVerify := func(patchString string, expectedContainers []e2epod.ResizableContainerInfo, initialContainers []e2epod.ResizableContainerInfo, opStr string, isRollback bool, newPod *v1.Pod) { + patchAndVerify := func(patchString string, expectedContainers []e2epod.ResizableContainerInfo, initialContainers []e2epod.ResizableContainerInfo, opStr string, newPod *v1.Pod) { ginkgo.By(fmt.Sprintf("patching pod for %s", opStr)) patchedPod, pErr = f.ClientSet.CoreV1().Pods(newPod.Namespace).Patch(ctx, newPod.Name, types.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") @@ -2384,7 +2384,7 @@ func doMultiPodResizeTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalSc e2epod.VerifyPodResources(patchedPod, expectedContainers) ginkgo.By(fmt.Sprintf("waiting for %s to be actuated", opStr)) - resizedPod := e2epod.WaitForPodResizeActuation(ctx, f, podClient, newPod) + resizedPod := e2epod.WaitForPodResizeActuation(ctx, f, podClient, newPod, expectedContainers) e2epod.ExpectPodResized(ctx, f, resizedPod, expectedContainers) // Check cgroup values only for containerd versions before 1.6.9 @@ -2411,16 +2411,16 @@ func doMultiPodResizeTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalSc } } - patchAndVerify(tc.testPod1.patchString, tc.testPod1.expected, tc.testPod1.containers, "resize", false, newPod1) - patchAndVerify(tc.testPod2.patchString, tc.testPod2.expected, tc.testPod2.containers, "resize", false, newPod2) + patchAndVerify(tc.testPod1.patchString, tc.testPod1.expected, tc.testPod1.containers, "resize", newPod1) + patchAndVerify(tc.testPod2.patchString, tc.testPod2.expected, tc.testPod2.containers, "resize", newPod2) rbPatchStr1, err1 := e2epod.ResizeContainerPatch(tc.testPod1.containers) framework.ExpectNoError(err1) rbPatchStr2, err2 := e2epod.ResizeContainerPatch(tc.testPod2.containers) framework.ExpectNoError(err2) // Resize has been actuated, test rollback - patchAndVerify(rbPatchStr1, tc.testPod1.containers, tc.testPod1.expected, "rollback", true, newPod1) - patchAndVerify(rbPatchStr2, tc.testPod2.containers, tc.testPod2.expected, "rollback", true, newPod2) + patchAndVerify(rbPatchStr1, tc.testPod1.containers, tc.testPod1.expected, "rollback", newPod1) + patchAndVerify(rbPatchStr2, tc.testPod2.containers, tc.testPod2.expected, "rollback", newPod2) ginkgo.By("deleting pod") deletePodSyncByName(ctx, f, newPod1.Name) From 529a7c4445244df0bef80c044e0df8722e13fba3 Mon Sep 17 00:00:00 2001 From: Sotiris Salloumis Date: Mon, 17 Mar 2025 15:25:35 +0100 Subject: [PATCH 07/15] Fix compile issue and address review comment. Use new topology.Allocation struct (a CPU set plus alignment metadata) instead of CPU set, due to rebase. Remove duplicate unecessary SetDefaultCPUSet call as per review comment. --- test/e2e_node/cpu_manager_test.go | 60 +++++++++++++++++++++---------- 1 file changed, 41 insertions(+), 19 deletions(-) diff --git a/test/e2e_node/cpu_manager_test.go b/test/e2e_node/cpu_manager_test.go index 8bdfdda7f4365..4ac1c4d91b82e 100644 --- a/test/e2e_node/cpu_manager_test.go +++ b/test/e2e_node/cpu_manager_test.go @@ -3213,10 +3213,14 @@ func runCPUManagerTests(f *framework.Framework) { } reservedSystemCPUs := cpuset.New(0) - newCfg := configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ - policyName: string(cpumanager.PolicyStatic), - reservedSystemCPUs: reservedSystemCPUs, - }, false, false) + newCfg := configureCPUManagerInKubelet(oldCfg, + &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + reservedSystemCPUs: reservedSystemCPUs, + }, + false, + false, + ) updateKubeletConfig(ctx, f, newCfg, true) ginkgo.By("running a Gu pod - it shouldn't use reserved system CPUs") @@ -3239,12 +3243,16 @@ func runCPUManagerTests(f *framework.Framework) { cpuPolicyOptions := map[string]string{ cpumanager.StrictCPUReservationOption: "true", } - newCfg := configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ - policyName: string(cpumanager.PolicyStatic), - reservedSystemCPUs: reservedSystemCPUs, - enableCPUManagerOptions: true, - options: cpuPolicyOptions, - }, false, false) + newCfg := configureCPUManagerInKubelet(oldCfg, + &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + reservedSystemCPUs: reservedSystemCPUs, + enableCPUManagerOptions: true, + options: cpuPolicyOptions, + }, + false, + false, + ) updateKubeletConfig(ctx, f, newCfg, true) ginkgo.By("running a Gu pod - it shouldn't use reserved system CPUs") @@ -3320,7 +3328,9 @@ func runCPUManagerTests(f *framework.Framework) { reservedSystemCPUs: reservedSystemCPUs, enableCPUManagerOptions: true, options: cpuPolicyOptions, - }, false, false, + }, + false, + false, ) updateKubeletConfig(ctx, f, newCfg, true) @@ -3342,7 +3352,9 @@ func runCPUManagerTests(f *framework.Framework) { policyName: string(cpumanager.PolicyStatic), reservedSystemCPUs: cpuset.New(0), disableCPUQuotaWithExclusiveCPUs: true, - }, false, false, + }, + false, + false, ) updateKubeletConfig(ctx, f, newCfg, true) @@ -3363,7 +3375,9 @@ func runCPUManagerTests(f *framework.Framework) { policyName: string(cpumanager.PolicyStatic), reservedSystemCPUs: cpuset.New(0), disableCPUQuotaWithExclusiveCPUs: false, - }, false, false, + }, + false, + false, ) updateKubeletConfig(ctx, f, newCfg, true) @@ -3381,10 +3395,14 @@ func runCPUManagerTests(f *framework.Framework) { } // Enable CPU Manager in the kubelet. - newCfg := configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ - policyName: string(cpumanager.PolicyStatic), - reservedSystemCPUs: cpuset.CPUSet{}, - }, false, false) + newCfg := configureCPUManagerInKubelet(oldCfg, + &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + reservedSystemCPUs: cpuset.CPUSet{}, + }, + false, + false, + ) updateKubeletConfig(ctx, f, newCfg, true) ginkgo.By("running a Gu pod with a regular init container and a restartable init container") @@ -3464,7 +3482,9 @@ func runCPUManagerTests(f *framework.Framework) { reservedSystemCPUs: cpuset.New(0), enableCPUManagerOptions: true, options: cpuPolicyOptions, - }, false, false, + }, + false, + false, ) updateKubeletConfig(ctx, f, newCfg, true) @@ -3530,7 +3550,9 @@ func runCPUManagerTests(f *framework.Framework) { reservedSystemCPUs: cpuset.New(0), enableCPUManagerOptions: true, options: cpuPolicyOptions, - }, false, false, + }, + false, + false, ) updateKubeletConfig(ctx, f, newCfg, true) // 'distribute-cpus-across-numa' policy option ensures that CPU allocations are evenly distributed From 7879ca389d5d94db89d8c38727be9d11b3ea96c1 Mon Sep 17 00:00:00 2001 From: Sotiris Salloumis Date: Fri, 28 Mar 2025 13:25:42 +0100 Subject: [PATCH 08/15] Address sig-node meeting comments for mustKeepCPUs. - Revert introduction of API env mustKeepCPUs - Replace mustKeepCPUs with local checkpoint "Original" - Introduce "Original" / "Resized" in CPUManagerCheckpointV3 format - Add logic, refactor with Beta candidate - Fix lint issues - Fail if mustKeepCPUs are not subset of resulted CPUs - Fail if reusableCPUsForResize, mustKeepCPUs are not a subset of aligned CPUs - Fail if mustKeepCPUs are not a subset of reusable CPUs - TODO improve align resize tests, go through testing, corner cases refactor using cpumanager_test.go - TODO improve CPUManagerCheckpointV3 tests - TODO address code review/feedback to try different approach to allocate stepwise instead of once off when resizing - TODO check init-containers - TODO check migration from v2 to v3 CPU Manager checkpoint - TODO check kubectl failure when prohibited can this be done earlier? - WIP update CPU Manager tests to use refactored cpu_manager_test - TODO update topologymanager,cpumanager,memorymanager documentation --- pkg/api/pod/testing/make.go | 6 - pkg/apis/core/validation/validation.go | 48 - pkg/apis/core/validation/validation_test.go | 82 - pkg/kubelet/cm/cpumanager/cpu_assignment.go | 48 +- .../cm/cpumanager/cpu_assignment_test.go | 24 +- pkg/kubelet/cm/cpumanager/cpu_manager_test.go | 98 +- pkg/kubelet/cm/cpumanager/policy_static.go | 429 +- .../cm/cpumanager/policy_static_test.go | 179 +- pkg/kubelet/cm/cpumanager/state/checkpoint.go | 74 +- pkg/kubelet/cm/cpumanager/state/state.go | 14 +- .../cm/cpumanager/state/state_checkpoint.go | 92 +- .../cpumanager/state/state_checkpoint_test.go | 71 +- pkg/kubelet/cm/cpumanager/state/state_mem.go | 30 +- pkg/kubelet/cm/cpumanager/state/state_test.go | 6 +- .../cm/cpumanager/topology_hints_test.go | 20 +- pkg/kubelet/types/constants.go | 7 - pkg/registry/core/pod/strategy.go | 1 - .../common/node/framework/podresize/resize.go | 33 + test/e2e/feature/feature.go | 10 + test/e2e_node/cpu_manager_metrics_test.go | 6 +- test/e2e_node/cpu_manager_test.go | 4954 ++++++++++++----- test/e2e_node/pod_resize_test.go | 2471 -------- test/e2e_node/util.go | 4 +- test/e2e_node/util_machineinfo_unsupported.go | 4 + 24 files changed, 4174 insertions(+), 4537 deletions(-) delete mode 100644 test/e2e_node/pod_resize_test.go diff --git a/pkg/api/pod/testing/make.go b/pkg/api/pod/testing/make.go index 597b05a5b07b5..b88c4a02234ea 100644 --- a/pkg/api/pod/testing/make.go +++ b/pkg/api/pod/testing/make.go @@ -299,12 +299,6 @@ func SetContainerResources(rr api.ResourceRequirements) TweakContainer { } } -func SetContainerEnv(env []api.EnvVar) TweakContainer { - return func(cnr *api.Container) { - cnr.Env = env - } -} - func SetContainerPorts(ports ...api.ContainerPort) TweakContainer { return func(cnr *api.Container) { cnr.Ports = ports diff --git a/pkg/apis/core/validation/validation.go b/pkg/apis/core/validation/validation.go index fd1b59e704c9f..b77b70937b390 100644 --- a/pkg/apis/core/validation/validation.go +++ b/pkg/apis/core/validation/validation.go @@ -68,7 +68,6 @@ import ( "k8s.io/kubernetes/pkg/capabilities" "k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/pkg/fieldpath" - "k8s.io/utils/cpuset" ) const isNegativeErrorMsg string = apimachineryvalidation.IsNegativeErrorMsg @@ -6316,7 +6315,6 @@ func ValidatePodResize(newPod, oldPod *core.Pod, opts PodValidationOptions) fiel var newContainers []core.Container for ix, container := range newPodSpecCopy.Containers { dropCPUMemoryResourcesFromContainer(&container, &oldPod.Spec.Containers[ix]) - allErrs = append(allErrs, dropMustKeepCPUsEnvFromContainer(&container, &oldPod.Spec.Containers[ix], specPath)...) if !apiequality.Semantic.DeepEqual(container, oldPod.Spec.Containers[ix]) { // This likely means that the user has made changes to resources other than CPU and memory for regular container. errs := field.Forbidden(specPath, "only cpu and memory resources are mutable") @@ -6492,52 +6490,6 @@ func dropCPUMemoryResourceRequirementsUpdates(resources *core.ResourceRequiremen return resources } -func removeEnvVar(envs []core.EnvVar, nameToRemove string) []core.EnvVar { - var newEnvs []core.EnvVar - for _, env := range envs { - if env.Name != nameToRemove { - newEnvs = append(newEnvs, env) - } - } - return newEnvs -} - -// dropMustKeepCPUsEnvFromContainer deletes the "mustKeepCPUs" in env from the container, and copies them from the old pod container resources if present. -func dropMustKeepCPUsEnvFromContainer(container *core.Container, oldPodSpecContainer *core.Container, fldPath *field.Path) field.ErrorList { - allErrs := field.ErrorList{} - // the element named "mustKeepCPUs" in env can be update or add - existNewMustKeepCPUs := false - existOldMustKeepCPUs := false - for jx, newEnv := range container.Env { - if newEnv.Name == "mustKeepCPUs" { - existNewMustKeepCPUs = true - _, err := cpuset.Parse(newEnv.Value) - if err != nil { - allErrs = append(allErrs, field.Invalid(fldPath, newEnv, "Check mustKeepCPUs format, only number \",\" and \"-\" are allowed")) - } - // Change mustKeepCPUs - for _, oldEnv := range oldPodSpecContainer.Env { - if oldEnv.Name == "mustKeepCPUs" { - existOldMustKeepCPUs = true - container.Env[jx] = oldEnv // +k8s:verify-mutation:reason=clone - break - } - } - // Add mustKeepCPUs - if !existOldMustKeepCPUs && (len(container.Env)-len(oldPodSpecContainer.Env)) == 1 { - // Delete "mustKeepCPUs" in newPod to make newPod equal to oldPod - container.Env = removeEnvVar(container.Env, "mustKeepCPUs") - } - break - } - } - // Delete mustKeepCPUs - if !existNewMustKeepCPUs && (len(oldPodSpecContainer.Env)-len(container.Env)) == 1 { - oldPodSpecContainer.Env = removeEnvVar(oldPodSpecContainer.Env, "mustKeepCPUs") // +k8s:verify-mutation:reason=clone - } - return allErrs -} - // isPodResizeRequestSupported checks whether the pod is running on a node with InPlacePodVerticalScaling enabled. func isPodResizeRequestSupported(pod core.Pod) bool { // TODO: Remove this after GA+3 releases of InPlacePodVerticalScaling diff --git a/pkg/apis/core/validation/validation_test.go b/pkg/apis/core/validation/validation_test.go index f0e54d1618604..7ead1313f4dcd 100644 --- a/pkg/apis/core/validation/validation_test.go +++ b/pkg/apis/core/validation/validation_test.go @@ -28205,46 +28205,6 @@ func TestValidatePodResize(t *testing.T) { })) } - mkPodWith1Env := func(envName1, envValue1 string, tweaks ...podtest.Tweak) *core.Pod { - return podtest.MakePod("pod", append(tweaks, - podtest.SetContainers( - podtest.MakeContainer( - "container", - podtest.SetContainerEnv( - []core.EnvVar{ - { - Name: envName1, - Value: envValue1, - }, - }, - ), - ), - ), - )...) - } - - mkPodWith2Env := func(envName1, envValue1, envName2, envValue2 string, tweaks ...podtest.Tweak) *core.Pod { - return podtest.MakePod("pod", append(tweaks, - podtest.SetContainers( - podtest.MakeContainer( - "container", - podtest.SetContainerEnv( - []core.EnvVar{ - { - Name: envName1, - Value: envValue1, - }, - { - Name: envName2, - Value: envValue2, - }, - }, - ), - ), - ), - )...) - } - tests := []struct { test string old *core.Pod @@ -28841,48 +28801,6 @@ func TestValidatePodResize(t *testing.T) { )), err: "spec: Forbidden: only cpu and memory resources are mutable", }, - { - test: "Pod env:mustKeepCPUs change value", - old: mkPodWith2Env("env1", "a", "mustKeepCPUs", "0"), - new: mkPodWith2Env("env1", "a", "mustKeepCPUs", "1"), - err: "", - }, - { - test: "Pod env:mustKeepCPUs add value", - old: mkPodWith1Env("env1", "a"), - new: mkPodWith2Env("env1", "a", "mustKeepCPUs", "1"), - err: "", - }, - { - test: "Pod env:mustKeepCPUs delete", - old: mkPodWith2Env("env1", "a", "mustKeepCPUs", "1"), - new: mkPodWith1Env("env1", "a"), - err: "", - }, - { - test: "Pod env:env1 change is forbidden", - old: mkPodWith2Env("env1", "a", "mustKeepCPUs", "0"), - new: mkPodWith2Env("env1", "b", "mustKeepCPUs", "0"), - err: "spec: Forbidden: only cpu and memory resources are mutable", - }, - { - test: "Pod env:env1 add is forbidden", - old: mkPodWith1Env("mustKeepCPUs", "0"), - new: mkPodWith2Env("env1", "a", "mustKeepCPUs", "1"), - err: "spec: Forbidden: only cpu and memory resources are mutable", - }, - { - test: "Pod env:env1 delete is forbidden", - old: mkPodWith2Env("env1", "a", "mustKeepCPUs", "1"), - new: mkPodWith1Env("mustKeepCPUs", "0"), - err: "spec: Forbidden: only cpu and memory resources are mutable", - }, - { - test: "Pod env:mustKeepCPUs delete", - old: mkPodWith2Env("env1", "a", "mustKeepCPUs", "1"), - new: mkPodWith2Env("env1", "a", "mustKeepCPUs", "1s2"), - err: "Check mustKeepCPUs format, only number \",\" and \"-\" are allowed", - }, } for _, test := range tests { diff --git a/pkg/kubelet/cm/cpumanager/cpu_assignment.go b/pkg/kubelet/cm/cpumanager/cpu_assignment.go index 8160300e81ad3..7e23e488bf540 100644 --- a/pkg/kubelet/cm/cpumanager/cpu_assignment.go +++ b/pkg/kubelet/cm/cpumanager/cpu_assignment.go @@ -451,7 +451,7 @@ type cpuAccumulator struct { availableCPUSorter availableCPUSorter } -func newCPUAccumulator(topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, numCPUs int, cpuSortingStrategy CPUSortingStrategy, reusableCPUsForResize *cpuset.CPUSet, mustKeepCPUsForScaleDown *cpuset.CPUSet) *cpuAccumulator { +func newCPUAccumulator(logger logr.Logger, topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, numCPUs int, cpuSortingStrategy CPUSortingStrategy, reusableCPUsForResize *cpuset.CPUSet, mustKeepCPUsForResize *cpuset.CPUSet) *cpuAccumulator { acc := &cpuAccumulator{ logger: logger, topo: topo, @@ -472,18 +472,18 @@ func newCPUAccumulator(topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, } // Decrease of CPU resources ( scale down ) - // Take delta from allocated CPUs, if mustKeepCPUsForScaleDown + // Take delta from allocated CPUs, if mustKeepCPUsForResize // is not nil, use explicetely those. If it is nil // take delta starting from lowest CoreId of CPUs ( TODO esotsal, perhaps not needed). if numCPUs < reusableCPUsForResize.Size() { - if mustKeepCPUsForScaleDown != nil { + if mustKeepCPUsForResize != nil { // If explicetely CPUs to keep // during scale down is given ( this requires // addition in container[].resources ... which // could be possible to patch ? Esotsal Note This means // modifying API code - if !(mustKeepCPUsForScaleDown.Intersection(reusableCPUsForResize.Clone())).IsEmpty() { - acc.take(mustKeepCPUsForScaleDown.Clone()) + if !(mustKeepCPUsForResize.Intersection(reusableCPUsForResize.Clone())).IsEmpty() { + acc.take(mustKeepCPUsForResize.Clone()) } else { return acc } @@ -960,7 +960,7 @@ func (a *cpuAccumulator) takeRemainCpusForFullNUMANodes() { if !a.needsAtLeast(cpusInNUMANode.Size()) { continue } - klog.V(4).InfoS("takeRemainCpusForFullNUMANodes: claiming NUMA node", "numa", numa, "cpusInNUMANode", cpusInNUMANode) + a.logger.V(4).Info("takeRemainCpusForFullNUMANodes: claiming NUMA node", "numa", numa, "cpusInNUMANode", cpusInNUMANode) a.take(cpusInNUMANode) } } @@ -973,7 +973,7 @@ func (a *cpuAccumulator) takeRemainCpusForFullSockets() { if !a.needsAtLeast(cpusInSocket.Size()) { continue } - klog.V(4).InfoS("takeRemainCpusForFullSockets: claiming Socket", "socket", socket, "cpusInSocket", cpusInSocket) + a.logger.V(4).Info("takeRemainCpusForFullSockets: claiming Socket", "socket", socket, "cpusInSocket", cpusInSocket) a.take(cpusInSocket) } } @@ -986,7 +986,7 @@ func (a *cpuAccumulator) takeRemainCpusForFullCores() { if !a.needsAtLeast(cpusInCore.Size()) { continue } - klog.V(4).InfoS("takeRemainCpusForFullCores: claiming Core", "core", core, "cpusInCore", cpusInCore) + a.logger.V(4).Info("takeRemainCpusForFullCores: claiming Core", "core", core, "cpusInCore", cpusInCore) a.take(cpusInCore) } } @@ -994,7 +994,7 @@ func (a *cpuAccumulator) takeRemainCpusForFullCores() { func (a *cpuAccumulator) takeRemainingCPUsForResize() { for _, cpu := range a.availableCPUSorter.sortForResize() { - klog.V(4).InfoS("takeRemainingCPUsForResize: claiming CPU", "cpu", cpu) + a.logger.V(4).Info("takeRemainingCPUsForResize: claiming CPU", "cpu", cpu) a.take(cpuset.New(cpu)) if a.isSatisfied() { return @@ -1128,23 +1128,24 @@ func (a *cpuAccumulator) iterateCombinations(n []int, k int, f func([]int) LoopC // the least amount of free CPUs to the one with the highest amount of free CPUs (i.e. in ascending // order of free CPUs). For any NUMA node, the cores are selected from the ones in the socket with // the least amount of free CPUs to the one with the highest amount of free CPUs. -func takeByTopologyNUMAPacked(topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, numCPUs int, cpuSortingStrategy CPUSortingStrategy, preferAlignByUncoreCache bool, reusableCPUsForResize *cpuset.CPUSet, mustKeepCPUsForScaleDown *cpuset.CPUSet) (cpuset.CPUSet, error) { +func takeByTopologyNUMAPacked(logger logr.Logger, topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, numCPUs int, cpuSortingStrategy CPUSortingStrategy, preferAlignByUncoreCache bool, reusableCPUsForResize *cpuset.CPUSet, mustKeepCPUsForResize *cpuset.CPUSet) (cpuset.CPUSet, error) { // If the number of CPUs requested to be retained is not a subset // of reusableCPUs, then we fail early - if reusableCPUsForResize != nil && mustKeepCPUsForScaleDown != nil { - if (mustKeepCPUsForScaleDown.Intersection(reusableCPUsForResize.Clone())).IsEmpty() { - return cpuset.New(), fmt.Errorf("requested CPUs to be retained %s are not a subset of reusable CPUs %s", mustKeepCPUsForScaleDown.String(), reusableCPUsForResize.String()) + if reusableCPUsForResize != nil && mustKeepCPUsForResize != nil { + if (mustKeepCPUsForResize.Intersection(reusableCPUsForResize.Clone())).IsEmpty() { + return cpuset.New(), fmt.Errorf("requested CPUs to be retained %s are not a subset of reusable CPUs %s", mustKeepCPUsForResize.String(), reusableCPUsForResize.String()) } } - acc := newCPUAccumulator(topo, availableCPUs, numCPUs, cpuSortingStrategy, reusableCPUsForResize, mustKeepCPUsForScaleDown) + acc := newCPUAccumulator(logger, topo, availableCPUs, numCPUs, cpuSortingStrategy, reusableCPUsForResize, mustKeepCPUsForResize) if acc.isSatisfied() { return acc.result, nil } if acc.isFailed() { return cpuset.New(), fmt.Errorf("not enough cpus available to satisfy request: requested=%d, available=%d", numCPUs, availableCPUs.Size()) } + // Algorithm: topology-aware best-fit // 1. Acquire whole NUMA nodes and sockets, if available and the container // requires at least a NUMA node or socket's-worth of CPUs. If NUMA @@ -1269,32 +1270,33 @@ func takeByTopologyNUMAPacked(topo *topology.CPUTopology, availableCPUs cpuset.C // of size 'cpuGroupSize' according to the algorithm described above. This is // important, for example, to ensure that all CPUs (i.e. all hyperthreads) from // a single core are allocated together. -func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, numCPUs int, cpuGroupSize int, cpuSortingStrategy CPUSortingStrategy, reusableCPUsForResize *cpuset.CPUSet, mustKeepCPUsForScaleDown *cpuset.CPUSet) (cpuset.CPUSet, error) { +func takeByTopologyNUMADistributed(logger logr.Logger, topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, numCPUs int, cpuGroupSize int, cpuSortingStrategy CPUSortingStrategy, reusableCPUsForResize *cpuset.CPUSet, mustKeepCPUsForResize *cpuset.CPUSet) (cpuset.CPUSet, error) { // If the number of CPUs requested cannot be handed out in chunks of // 'cpuGroupSize', then we just call out the packing algorithm since we // can't distribute CPUs in this chunk size. // PreferAlignByUncoreCache feature not implemented here yet and set to false. // Support for PreferAlignByUncoreCache to be done at beta release. if (numCPUs % cpuGroupSize) != 0 { - return takeByTopologyNUMAPacked(topo, availableCPUs, numCPUs, cpuSortingStrategy, false, reusableCPUsForResize, mustKeepCPUsForScaleDown) + return takeByTopologyNUMAPacked(logger, topo, availableCPUs, numCPUs, cpuSortingStrategy, false, reusableCPUsForResize, mustKeepCPUsForResize) } // If the number of CPUs requested to be retained is not a subset // of reusableCPUs, then we fail early - if reusableCPUsForResize != nil && mustKeepCPUsForScaleDown != nil { - if (mustKeepCPUsForScaleDown.Intersection(reusableCPUsForResize.Clone())).IsEmpty() { - return cpuset.New(), fmt.Errorf("requested CPUs to be retained %s are not a subset of reusable CPUs %s", mustKeepCPUsForScaleDown.String(), reusableCPUsForResize.String()) + if reusableCPUsForResize != nil && mustKeepCPUsForResize != nil { + if (mustKeepCPUsForResize.Intersection(reusableCPUsForResize.Clone())).IsEmpty() { + return cpuset.New(), fmt.Errorf("requested CPUs to be retained %s are not a subset of reusable CPUs %s", mustKeepCPUsForResize.String(), reusableCPUsForResize.String()) } } // Otherwise build an accumulator to start allocating CPUs from. - acc := newCPUAccumulator(topo, availableCPUs, numCPUs, cpuSortingStrategy, nil, mustKeepCPUsForScaleDown) + acc := newCPUAccumulator(logger, topo, availableCPUs, numCPUs, cpuSortingStrategy, nil, mustKeepCPUsForResize) if acc.isSatisfied() { return acc.result, nil } if acc.isFailed() { return cpuset.New(), fmt.Errorf("not enough cpus available to satisfy request: requested=%d, available=%d", numCPUs, availableCPUs.Size()) } + // Get the list of NUMA nodes represented by the set of CPUs in 'availableCPUs'. numas := acc.sortAvailableNUMANodes() reusableCPUsForResizeDetail := acc.topo.CPUDetails.KeepOnly(cpuset.New()) @@ -1488,7 +1490,7 @@ func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpu distribution := (numCPUs / len(bestCombo) / cpuGroupSize) * cpuGroupSize for _, numa := range bestCombo { reusableCPUsPerNumaForResize := reusableCPUsForResizeDetail.CPUsInNUMANodes(numa) - cpus, _ := takeByTopologyNUMAPacked(acc.topo, acc.details.CPUsInNUMANodes(numa), distribution, cpuSortingStrategy, false, &reusableCPUsPerNumaForResize, mustKeepCPUsForScaleDown) + cpus, _ := takeByTopologyNUMAPacked(logger, acc.topo, acc.details.CPUsInNUMANodes(numa), distribution, cpuSortingStrategy, false, &reusableCPUsPerNumaForResize, mustKeepCPUsForResize) acc.take(cpus) } @@ -1503,7 +1505,7 @@ func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpu if acc.details.CPUsInNUMANodes(numa).Size() < cpuGroupSize { continue } - cpus, _ := takeByTopologyNUMAPacked(acc.topo, acc.details.CPUsInNUMANodes(numa), cpuGroupSize, cpuSortingStrategy, false, nil, mustKeepCPUsForScaleDown) + cpus, _ := takeByTopologyNUMAPacked(logger, acc.topo, acc.details.CPUsInNUMANodes(numa), cpuGroupSize, cpuSortingStrategy, false, nil, mustKeepCPUsForResize) acc.take(cpus) remainder -= cpuGroupSize } @@ -1527,5 +1529,5 @@ func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpu // If we never found a combination of NUMA nodes that we could properly // distribute CPUs across, fall back to the packing algorithm. - return takeByTopologyNUMAPacked(topo, availableCPUs, numCPUs, cpuSortingStrategy, false, reusableCPUsForResize, mustKeepCPUsForScaleDown) + return takeByTopologyNUMAPacked(logger, topo, availableCPUs, numCPUs, cpuSortingStrategy, false, reusableCPUsForResize, mustKeepCPUsForResize) } diff --git a/pkg/kubelet/cm/cpumanager/cpu_assignment_test.go b/pkg/kubelet/cm/cpumanager/cpu_assignment_test.go index e242a34733a2f..f3c94066cf62e 100644 --- a/pkg/kubelet/cm/cpumanager/cpu_assignment_test.go +++ b/pkg/kubelet/cm/cpumanager/cpu_assignment_test.go @@ -116,7 +116,7 @@ func TestCPUAccumulatorFreeSockets(t *testing.T) { for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { - acc := newCPUAccumulator(tc.topo, tc.availableCPUs, 0, CPUSortingStrategyPacked, nil, nil) + acc := newCPUAccumulator(logger, tc.topo, tc.availableCPUs, 0, CPUSortingStrategyPacked, nil, nil) result := acc.freeSockets() sort.Ints(result) if !reflect.DeepEqual(result, tc.expect) { @@ -217,7 +217,7 @@ func TestCPUAccumulatorFreeNUMANodes(t *testing.T) { for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { - acc := newCPUAccumulator(tc.topo, tc.availableCPUs, 0, CPUSortingStrategyPacked, nil, nil) + acc := newCPUAccumulator(logger, tc.topo, tc.availableCPUs, 0, CPUSortingStrategyPacked, nil, nil) result := acc.freeNUMANodes() if !reflect.DeepEqual(result, tc.expect) { t.Errorf("expected %v to equal %v", result, tc.expect) @@ -267,7 +267,7 @@ func TestCPUAccumulatorFreeSocketsAndNUMANodes(t *testing.T) { for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { - acc := newCPUAccumulator(tc.topo, tc.availableCPUs, 0, CPUSortingStrategyPacked, nil, nil) + acc := newCPUAccumulator(logger, tc.topo, tc.availableCPUs, 0, CPUSortingStrategyPacked, nil, nil) resultNUMANodes := acc.freeNUMANodes() if !reflect.DeepEqual(resultNUMANodes, tc.expectNUMANodes) { t.Errorf("expected NUMA Nodes %v to equal %v", resultNUMANodes, tc.expectNUMANodes) @@ -340,7 +340,7 @@ func TestCPUAccumulatorFreeCores(t *testing.T) { for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { - acc := newCPUAccumulator(tc.topo, tc.availableCPUs, 0, CPUSortingStrategyPacked, nil, nil) + acc := newCPUAccumulator(logger, tc.topo, tc.availableCPUs, 0, CPUSortingStrategyPacked, nil, nil) result := acc.freeCores() if !reflect.DeepEqual(result, tc.expect) { t.Errorf("expected %v to equal %v", result, tc.expect) @@ -397,7 +397,7 @@ func TestCPUAccumulatorFreeCPUs(t *testing.T) { for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { - acc := newCPUAccumulator(tc.topo, tc.availableCPUs, 0, CPUSortingStrategyPacked, nil, nil) + acc := newCPUAccumulator(logger, tc.topo, tc.availableCPUs, 0, CPUSortingStrategyPacked, nil, nil) result := acc.freeCPUs() if !reflect.DeepEqual(result, tc.expect) { t.Errorf("expected %v to equal %v", result, tc.expect) @@ -484,7 +484,7 @@ func TestCPUAccumulatorTake(t *testing.T) { for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { - acc := newCPUAccumulator(tc.topo, tc.availableCPUs, tc.numCPUs, CPUSortingStrategyPacked, nil, nil) + acc := newCPUAccumulator(logger, tc.topo, tc.availableCPUs, tc.numCPUs, CPUSortingStrategyPacked, nil, nil) totalTaken := 0 for _, cpus := range tc.takeCPUs { acc.take(cpus) @@ -758,7 +758,7 @@ func TestTakeByTopologyNUMAPacked(t *testing.T) { strategy = CPUSortingStrategySpread } - result, err := takeByTopologyNUMAPacked(tc.topo, tc.availableCPUs, tc.numCPUs, strategy, tc.opts.PreferAlignByUncoreCacheOption, nil, nil) + result, err := takeByTopologyNUMAPacked(logger, tc.topo, tc.availableCPUs, tc.numCPUs, strategy, tc.opts.PreferAlignByUncoreCacheOption, nil, nil) if tc.expErr != "" && err != nil && err.Error() != tc.expErr { t.Errorf("expected error to be [%v] but it was [%v]", tc.expErr, err) } @@ -860,7 +860,7 @@ func TestTakeByTopologyWithSpreadPhysicalCPUsPreferredOption(t *testing.T) { if tc.opts.DistributeCPUsAcrossCores { strategy = CPUSortingStrategySpread } - result, err := takeByTopologyNUMAPacked(tc.topo, tc.availableCPUs, tc.numCPUs, strategy, tc.opts.PreferAlignByUncoreCacheOption, nil, nil) + result, err := takeByTopologyNUMAPacked(logger, tc.topo, tc.availableCPUs, tc.numCPUs, strategy, tc.opts.PreferAlignByUncoreCacheOption, nil, nil) if tc.expErr != "" && err.Error() != tc.expErr { t.Errorf("testCase %q failed, expected error to be [%v] but it was [%v]", tc.description, tc.expErr, err) } @@ -1063,7 +1063,7 @@ func TestTakeByTopologyNUMADistributed(t *testing.T) { for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { - result, err := takeByTopologyNUMADistributed(tc.topo, tc.availableCPUs, tc.numCPUs, tc.cpuGroupSize, CPUSortingStrategyPacked, nil, nil) + result, err := takeByTopologyNUMADistributed(logger, tc.topo, tc.availableCPUs, tc.numCPUs, tc.cpuGroupSize, CPUSortingStrategyPacked, nil, nil) if err != nil { if tc.expErr == "" { t.Errorf("unexpected error [%v]", err) @@ -1317,6 +1317,7 @@ func commonTakeByTopologyTestCasesForResize(t *testing.T) []takeByTopologyTestCa } func TestTakeByTopologyNUMAPackedForResize(t *testing.T) { + logger, _ := ktesting.NewTestContext(t) testCases := commonTakeByTopologyTestCasesForResize(t) for _, tc := range testCases { @@ -1326,7 +1327,7 @@ func TestTakeByTopologyNUMAPackedForResize(t *testing.T) { strategy = CPUSortingStrategySpread } - result, err := takeByTopologyNUMAPacked(tc.topo, tc.availableCPUs, tc.numCPUs, strategy, tc.opts.PreferAlignByUncoreCacheOption, &tc.reusableCPUs, nil) + result, err := takeByTopologyNUMAPacked(logger, tc.topo, tc.availableCPUs, tc.numCPUs, strategy, tc.opts.PreferAlignByUncoreCacheOption, &tc.reusableCPUs, nil) if tc.expErr != "" && err != nil && err.Error() != tc.expErr { t.Errorf("expected error to be [%v] but it was [%v]", tc.expErr, err) @@ -1525,12 +1526,13 @@ func commonTakeByTopologyExtendedTestCasesForResize(t *testing.T) []takeByTopolo } func TestTakeByTopologyNUMADistributedForResize(t *testing.T) { + logger, _ := ktesting.NewTestContext(t) testCases := commonTakeByTopologyExtendedTestCasesForResize(t) for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { - result, err := takeByTopologyNUMADistributed(tc.topo, tc.availableCPUs, tc.numCPUs, tc.cpuGroupSize, CPUSortingStrategyPacked, &tc.reusableCPUs, nil) + result, err := takeByTopologyNUMADistributed(logger, tc.topo, tc.availableCPUs, tc.numCPUs, tc.cpuGroupSize, CPUSortingStrategyPacked, &tc.reusableCPUs, nil) if err != nil { if tc.expErr == "" { t.Errorf("unexpected error [%v]", err) diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go index 9755add442e00..67e2cc0d36bbd 100644 --- a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go +++ b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go @@ -53,9 +53,17 @@ type mockState struct { defaultCPUSet cpuset.CPUSet } +func (s *mockState) GetOriginalCPUSet(podUID string, containerName string) (cpuset.CPUSet, bool) { + res, exists := s.assignments[podUID][containerName] + return res.Original.Clone(), exists +} + func (s *mockState) GetCPUSet(podUID string, containerName string) (cpuset.CPUSet, bool) { - res, ok := s.assignments[podUID][containerName] - return res.Clone(), ok + res, exists := s.assignments[podUID][containerName] + if res.Resized.IsEmpty() { + return res.Original.Clone(), exists + } + return res.Resized.Clone(), exists } func (s *mockState) GetDefaultCPUSet() cpuset.CPUSet { @@ -71,9 +79,15 @@ func (s *mockState) GetCPUSetOrDefault(podUID string, containerName string) cpus func (s *mockState) SetCPUSet(podUID string, containerName string, cset cpuset.CPUSet) { if _, exists := s.assignments[podUID]; !exists { - s.assignments[podUID] = make(map[string]cpuset.CPUSet) + s.assignments[podUID] = make(map[string]state.ContainerCPUAssignment) + s.assignments[podUID][containerName] = state.ContainerCPUAssignment{Original: cset, Resized: cpuset.New()} + } else { + if entry, exists := s.assignments[podUID][containerName]; !exists { + s.assignments[podUID][containerName] = state.ContainerCPUAssignment{Original: cset, Resized: cpuset.New()} + } else { + s.assignments[podUID][containerName] = state.ContainerCPUAssignment{Original: entry.Original, Resized: cset} + } } - s.assignments[podUID][containerName] = cset } func (s *mockState) SetDefaultCPUSet(cset cpuset.CPUSet) { @@ -613,18 +627,18 @@ func TestCPUManagerAddWithInitContainers(t *testing.T) { testCase.description, containerIDs[i], err) } - cset, found := mockState.assignments[string(testCase.pod.UID)][containers[i].Name] + assignment, found := mockState.assignments[string(testCase.pod.UID)][containers[i].Name] if !expCSets[i].IsEmpty() && !found { t.Errorf("StaticPolicy AddContainer() error (%v). expected container %v to be present in assignments %v", testCase.description, containers[i].Name, mockState.assignments) } - if found && !cset.Equals(expCSets[i]) { + if found && !assignment.Original.Equals(expCSets[i]) { t.Errorf("StaticPolicy AddContainer() error (%v). expected cpuset %v for container %v but got %v", - testCase.description, expCSets[i], containers[i].Name, cset) + testCase.description, expCSets[i], containers[i].Name, assignment.Original) } - cumCSet = cumCSet.Union(cset) + cumCSet = cumCSet.Union(assignment.Original) } if !testCase.stDefaultCPUSet.Difference(cumCSet).Equals(mockState.defaultCPUSet) { @@ -873,16 +887,16 @@ func TestReconcileState(t *testing.T) { pspFound: true, updateErr: nil, stAssignments: state.ContainerCPUAssignments{ - "fakePodUID": map[string]cpuset.CPUSet{ - "fakeContainerName": cpuset.New(1, 2), + "fakePodUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerName": {Original: cpuset.New(1, 2), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(3, 4, 5, 6, 7), lastUpdateStAssignments: state.ContainerCPUAssignments{}, lastUpdateStDefaultCPUSet: cpuset.New(), expectStAssignments: state.ContainerCPUAssignments{ - "fakePodUID": map[string]cpuset.CPUSet{ - "fakeContainerName": cpuset.New(1, 2), + "fakePodUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerName": {Original: cpuset.New(1, 2), Resized: cpuset.New()}, }, }, expectStDefaultCPUSet: cpuset.New(3, 4, 5, 6, 7), @@ -921,16 +935,16 @@ func TestReconcileState(t *testing.T) { pspFound: true, updateErr: nil, stAssignments: state.ContainerCPUAssignments{ - "fakePodUID": map[string]cpuset.CPUSet{ - "fakeContainerName": cpuset.New(1, 2), + "fakePodUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerName": {Original: cpuset.New(1, 2), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(3, 4, 5, 6, 7), lastUpdateStAssignments: state.ContainerCPUAssignments{}, lastUpdateStDefaultCPUSet: cpuset.New(), expectStAssignments: state.ContainerCPUAssignments{ - "fakePodUID": map[string]cpuset.CPUSet{ - "fakeContainerName": cpuset.New(1, 2), + "fakePodUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerName": {Original: cpuset.New(1, 2), Resized: cpuset.New()}, }, }, expectStDefaultCPUSet: cpuset.New(3, 4, 5, 6, 7), @@ -1036,16 +1050,16 @@ func TestReconcileState(t *testing.T) { pspFound: true, updateErr: nil, stAssignments: state.ContainerCPUAssignments{ - "fakePodUID": map[string]cpuset.CPUSet{ - "fakeContainerName": cpuset.New(), + "fakePodUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerName": {Original: cpuset.New(), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(1, 2, 3, 4, 5, 6, 7), lastUpdateStAssignments: state.ContainerCPUAssignments{}, lastUpdateStDefaultCPUSet: cpuset.New(), expectStAssignments: state.ContainerCPUAssignments{ - "fakePodUID": map[string]cpuset.CPUSet{ - "fakeContainerName": cpuset.New(), + "fakePodUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerName": {Original: cpuset.New(), Resized: cpuset.New()}, }, }, expectStDefaultCPUSet: cpuset.New(1, 2, 3, 4, 5, 6, 7), @@ -1084,16 +1098,16 @@ func TestReconcileState(t *testing.T) { pspFound: true, updateErr: fmt.Errorf("fake container update error"), stAssignments: state.ContainerCPUAssignments{ - "fakePodUID": map[string]cpuset.CPUSet{ - "fakeContainerName": cpuset.New(1, 2), + "fakePodUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerName": {Original: cpuset.New(1, 2), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(3, 4, 5, 6, 7), lastUpdateStAssignments: state.ContainerCPUAssignments{}, lastUpdateStDefaultCPUSet: cpuset.New(), expectStAssignments: state.ContainerCPUAssignments{ - "fakePodUID": map[string]cpuset.CPUSet{ - "fakeContainerName": cpuset.New(1, 2), + "fakePodUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerName": {Original: cpuset.New(1, 2), Resized: cpuset.New()}, }, }, expectStDefaultCPUSet: cpuset.New(3, 4, 5, 6, 7), @@ -1132,19 +1146,19 @@ func TestReconcileState(t *testing.T) { pspFound: true, updateErr: nil, stAssignments: state.ContainerCPUAssignments{ - "fakePodUID": map[string]cpuset.CPUSet{ - "fakeContainerName": cpuset.New(1, 2), + "fakePodUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerName": {Original: cpuset.New(1, 2), Resized: cpuset.New()}, }, - "secondfakePodUID": map[string]cpuset.CPUSet{ - "secondfakeContainerName": cpuset.New(3, 4), + "secondfakePodUID": map[string]state.ContainerCPUAssignment{ + "secondfakeContainerName": {Original: cpuset.New(3, 4), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(5, 6, 7), lastUpdateStAssignments: state.ContainerCPUAssignments{}, lastUpdateStDefaultCPUSet: cpuset.New(), expectStAssignments: state.ContainerCPUAssignments{ - "fakePodUID": map[string]cpuset.CPUSet{ - "fakeContainerName": cpuset.New(1, 2), + "fakePodUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerName": {Original: cpuset.New(1, 2), Resized: cpuset.New()}, }, }, expectStDefaultCPUSet: cpuset.New(3, 4, 5, 6, 7), @@ -1183,20 +1197,20 @@ func TestReconcileState(t *testing.T) { pspFound: true, updateErr: nil, stAssignments: state.ContainerCPUAssignments{ - "fakePodUID": map[string]cpuset.CPUSet{ - "fakeContainerName": cpuset.New(1, 2), + "fakePodUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerName": {Original: cpuset.New(1, 2), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(5, 6, 7), lastUpdateStAssignments: state.ContainerCPUAssignments{ - "fakePodUID": map[string]cpuset.CPUSet{ - "fakeContainerName": cpuset.New(1, 2), + "fakePodUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerName": {Original: cpuset.New(1, 2), Resized: cpuset.New()}, }, }, lastUpdateStDefaultCPUSet: cpuset.New(5, 6, 7), expectStAssignments: state.ContainerCPUAssignments{ - "fakePodUID": map[string]cpuset.CPUSet{ - "fakeContainerName": cpuset.New(1, 2), + "fakePodUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerName": {Original: cpuset.New(1, 2), Resized: cpuset.New()}, }, }, expectStDefaultCPUSet: cpuset.New(5, 6, 7), @@ -1235,20 +1249,20 @@ func TestReconcileState(t *testing.T) { pspFound: true, updateErr: nil, stAssignments: state.ContainerCPUAssignments{ - "fakePodUID": map[string]cpuset.CPUSet{ - "fakeContainerName": cpuset.New(1, 2), + "fakePodUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerName": {Original: cpuset.New(1, 2), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(3, 4, 5, 6, 7), lastUpdateStAssignments: state.ContainerCPUAssignments{ - "fakePodUID": map[string]cpuset.CPUSet{ - "fakeContainerName": cpuset.New(3, 4), + "fakePodUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerName": {Original: cpuset.New(3, 4), Resized: cpuset.New()}, }, }, lastUpdateStDefaultCPUSet: cpuset.New(1, 2, 5, 6, 7), expectStAssignments: state.ContainerCPUAssignments{ - "fakePodUID": map[string]cpuset.CPUSet{ - "fakeContainerName": cpuset.New(1, 2), + "fakePodUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerName": {Original: cpuset.New(1, 2), Resized: cpuset.New()}, }, }, expectStDefaultCPUSet: cpuset.New(3, 4, 5, 6, 7), diff --git a/pkg/kubelet/cm/cpumanager/policy_static.go b/pkg/kubelet/cm/cpumanager/policy_static.go index fc10c0c150618..3185ecb984417 100644 --- a/pkg/kubelet/cm/cpumanager/policy_static.go +++ b/pkg/kubelet/cm/cpumanager/policy_static.go @@ -34,7 +34,6 @@ import ( "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask" "k8s.io/kubernetes/pkg/kubelet/metrics" - "k8s.io/kubernetes/pkg/kubelet/types" "k8s.io/utils/cpuset" ) @@ -46,6 +45,14 @@ const ( PolicyStatic policyName = "static" // ErrorSMTAlignment represents the type of an SMTAlignmentError ErrorSMTAlignment = "SMTAlignmentError" + // ErrorIncosistendCPUAllocation represents the type of an incosistentCPUAllocationError + ErrorInconsistentCPUAllocation = "inconsistentCPUAllocationError" + // ErrorProhibitedCPUAlloacation represents the type of an prohibitedCPUAllocationError + ErrorProhibitedCPUAllocation = "prohibitedCPUAllocationError" + // ErrorGetOriginalCPUSetError represents the type of an getOriginalCPUSetError + ErrorGetOriginalCPUSet = "getOriginalCPUSetError" + // ErrorResizeAllocateCPUs represents the type of a ResizeAllocateCPUsError + ErrorResizeAllocateCPUs = "ResizeAllocateCPUsError" ) // SMTAlignmentError represents an error due to SMT alignment @@ -68,6 +75,27 @@ func (e SMTAlignmentError) Type() string { return ErrorSMTAlignment } +// prohibitedCPUAllocationError represents an error due to an +// attempt to reduce container exclusively allocated +// pool below container exclusively original pool +// allocated when container was created. +type prohibitedCPUAllocationError struct { + RequestedCPUs string + AllocatedCPUs string + OriginalCPUs int + GuaranteedCPUs int +} + +func (e prohibitedCPUAllocationError) Error() string { + return fmt.Sprintf("prohibitedCPUAllocation Error: Skip resize, Not allowed to reduce container exclusively allocated pool below promised, (requested CPUs = %s, allocated CPUs = %s, promised CPUs = %d, guaranteed CPUs = %d)", e.RequestedCPUs, e.AllocatedCPUs, e.OriginalCPUs, e.GuaranteedCPUs) +} + +// Type returns human-readable type of this error. +// Used in the HandlePodResourcesResize to populate Failure reason +func (e prohibitedCPUAllocationError) Type() string { + return ErrorProhibitedCPUAllocation +} + // inconsistentCPUAllocationError represents an error due to an // attempt to either move a container from exclusively allocated // pool to shared pool or move a container from shared pool to @@ -92,24 +120,43 @@ func (e inconsistentCPUAllocationError) Error() string { // Type returns human-readable type of this error. // Used in the HandlePodResourcesResize to populate Failure reason func (e inconsistentCPUAllocationError) Type() string { - return types.ErrorInconsistentCPUAllocation + return ErrorInconsistentCPUAllocation +} + +// getOriginalCPUSetError represents an error due to a +// failed attempt to GetOriginalCPUSet from state +type getOriginalCPUSetError struct { + PodUID string + ContainerName string +} + +func (e getOriginalCPUSetError) Error() string { + return fmt.Sprintf("getOriginalCPUSet Error: Skip resize, unable to get PromisedCPUSet, nothing to be done, (podUID = %s, containerName %s)", e.PodUID, e.ContainerName) +} + +// Type returns human-readable type of this error. +// Used in the HandlePodResourcesResize to populate Failure reason +func (e getOriginalCPUSetError) Type() string { + return ErrorGetOriginalCPUSet } -// getCPUSetError represents an error due to a -// failed attempt to GetCPUSet from state -type getCPUSetError struct { +// ResizeAllocateCPUsError represents an error during +// an attempt to allocate a container's CPU exclusive pool +// resize. +type ResizeAllocateCPUsError struct { PodUID string ContainerName string + TopologyError string } -func (e getCPUSetError) Error() string { - return fmt.Sprintf("getCPUSet Error: Skip resize, unable to get CPUSet, nothing to be done, (podUID = %s, containerName %s)", e.PodUID, e.ContainerName) +func (e ResizeAllocateCPUsError) Error() string { + return fmt.Sprintf("ResizeAllocateCPUs Error: Skip resize, unable to resize container exclusively allocated pool, (podUID = %s, containerName = %s, topologyError = %s)", e.PodUID, e.ContainerName, e.TopologyError) } // Type returns human-readable type of this error. // Used in the HandlePodResourcesResize to populate Failure reason -func (e getCPUSetError) Type() string { - return types.ErrorGetCPUSet +func (e ResizeAllocateCPUsError) Type() string { + return ErrorResizeAllocateCPUs } // staticPolicy is a CPU manager policy that does not change CPU @@ -212,7 +259,7 @@ func NewStaticPolicy(logger logr.Logger, topology *topology.CPUTopology, numRese // // For example: Given a system with 8 CPUs available and HT enabled, // if numReservedCPUs=2, then reserved={0,4} - reserved, _ = policy.takeByTopology(allCPUs, numReservedCPUs, nil, nil) + reserved, _ = policy.takeByTopology(logger, allCPUs, numReservedCPUs, nil, nil) } if reserved.Size() != numReservedCPUs { @@ -287,7 +334,14 @@ func (p *staticPolicy) validateState(logger logr.Logger, s state.State) error { // 2. Check if state for static policy is consistent for pod := range tmpAssignments { - for container, cset := range tmpAssignments[pod] { + for container, assignment := range tmpAssignments[pod] { + var cset cpuset.CPUSet + if assignment.Resized.IsEmpty() { + cset = assignment.Original + } else { + cset = assignment.Resized + } + // None of the cpu in DEFAULT cset should be in s.assignments if !tmpDefaultCPUset.Intersection(cset).IsEmpty() { return fmt.Errorf("pod: %s, container: %s cpuset: %q overlaps with default cpuset %q", @@ -306,7 +360,13 @@ func (p *staticPolicy) validateState(logger logr.Logger, s state.State) error { totalKnownCPUs := tmpDefaultCPUset.Clone() tmpCPUSets := []cpuset.CPUSet{} for pod := range tmpAssignments { - for _, cset := range tmpAssignments[pod] { + for _, assignment := range tmpAssignments[pod] { + var cset cpuset.CPUSet + if assignment.Resized.IsEmpty() { + cset = assignment.Original + } else { + cset = assignment.Resized + } tmpCPUSets = append(tmpCPUSets, cset) } } @@ -364,13 +424,17 @@ func (p *staticPolicy) updateCPUsToReuse(pod *v1.Pod, container *v1.Container, c p.cpusToReuse[string(pod.UID)] = p.cpusToReuse[string(pod.UID)].Difference(cset) } -func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Container) (rerr error) { - numCPUs := p.guaranteedCPUs(pod, container) +func (p *staticPolicy) Allocate(logger logr.Logger, s state.State, pod *v1.Pod, container *v1.Container) (rerr error) { + logger = klog.LoggerWithValues(logger, "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", container.Name) + logger.Info("Allocate start") // V=0 for backward compatibility + defer logger.V(2).Info("Allocate end") + + numCPUs := p.guaranteedCPUs(logger, pod, container) if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { // During a pod resize, handle corner cases - err := p.validateInPlacePodVerticalScaling(pod, container) + err := p.isFeasibleResize(logger, s, pod, container) if err != nil { - klog.ErrorS(err, "Static policy: Unable to resize allocated CPUs", "pod", klog.KObj(pod), "containerName", container.Name, "numCPUs", numCPUs) + logger.Error(err, "Static policy: Unfeasible to resize allocated CPUs,", "pod", klog.KObj(pod), "containerName", container.Name, "numCPUs", numCPUs) return err } } @@ -425,7 +489,7 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai availablePhysicalCPUs := p.GetAvailablePhysicalCPUs(s).Size() if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { - if cs, ok := podutil.GetContainerStatus(pod.Status.ContainerStatuses, container.Name); ok { + if cs, found := podutil.GetContainerStatus(pod.Status.ContainerStatuses, container.Name); found { cpuAllocatedQuantity := cs.AllocatedResources[v1.ResourceCPU] availablePhysicalCPUs += int(cpuAllocatedQuantity.Value()) } @@ -443,46 +507,48 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai } } } - if cpuset, ok := s.GetCPUSet(string(pod.UID), container.Name); ok { - if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScalingExclusiveCPUs) { - if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { - klog.InfoS("Static policy: container already present in state, attempting InPlacePodVerticalScaling", "pod", klog.KObj(pod), "containerName", container.Name) - if cpusInUseByPodContainerToResize, ok := s.GetCPUSet(string(pod.UID), container.Name); ok { - // Call Topology Manager to get the aligned socket affinity across all hint providers. - hint := p.affinity.GetAffinity(string(pod.UID), container.Name) - klog.InfoS("Topology Affinity", "pod", klog.KObj(pod), "containerName", container.Name, "affinity", hint) - // Attempt new allocation ( reusing allocated CPUs ) according to the NUMA affinity contained in the hint - // Since NUMA affinity container in the hint is unmutable already allocated CPUs pass the criteria - mustKeepCPUsForResize := p.GetMustKeepCPUs(container, cpuset) - newallocatedcpuset, err := p.allocateCPUs(s, numCPUs, hint.NUMANodeAffinity, p.cpusToReuse[string(pod.UID)], &cpusInUseByPodContainerToResize, mustKeepCPUsForResize) - if err != nil { - klog.ErrorS(err, "Static policy: Unable to allocate new CPUs", "pod", klog.KObj(pod), "containerName", container.Name, "numCPUs", numCPUs) - return err - } - // Allocation successful, update the current state - s.SetCPUSet(string(pod.UID), container.Name, newallocatedcpuset.CPUs) - p.updateCPUsToReuse(pod, container, newallocatedcpuset.CPUs) - // Updated state to the checkpoint file will be stored during - // the reconcile loop. TODO is this a problem? I don't believe - // because if kubelet will be terminated now, anyhow it will be - // needed the state to be cleaned up, an error will appear requiring - // the node to be drained. I think we are safe. All computations are - // using state_mem and not the checkpoint. - return nil - } else { - return getCPUSetError{ - PodUID: string(pod.UID), - ContainerName: container.Name, - } + if cpusInUseByPodContainer, ok := s.GetCPUSet(string(pod.UID), container.Name); ok { + if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScalingExclusiveCPUs) && utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { + logger.Info("Static policy: container already present in state, attempting InPlacePodVerticalScaling", "pod", klog.KObj(pod), "containerName", container.Name) + // Call Topology Manager to get the aligned socket affinity across all hint providers. + hint := p.affinity.GetAffinity(string(pod.UID), container.Name) + logger.Info("Topology Affinity", "pod", klog.KObj(pod), "containerName", container.Name, "affinity", hint) + // Attempt new allocation ( reusing allocated CPUs ) according to the NUMA affinity contained in the hint + // Since NUMA affinity container in the hint is unmutable already allocated CPUs pass the criteria + mustKeepCPUsForResize, ok := s.GetOriginalCPUSet(string(pod.UID), container.Name) + if !ok { + err := getOriginalCPUSetError{ + PodUID: string(pod.UID), + ContainerName: container.Name, } - } else { - p.updateCPUsToReuse(pod, container, cpuset) - klog.InfoS("Static policy: InPlacePodVerticalScaling alognside CPU Static policy requires InPlacePodVerticalScaling to be enabled, skipping pod resize") - return nil + return err } + // Allocate CPUs according to the NUMA affinity contained in the hint. + newallocatedcpuset, witherr := p.allocateCPUs(logger, s, numCPUs, hint.NUMANodeAffinity, p.cpusToReuse[string(pod.UID)], &cpusInUseByPodContainer, &mustKeepCPUsForResize) + if witherr != nil { + err := ResizeAllocateCPUsError{ + PodUID: string(pod.UID), + ContainerName: container.Name, + TopologyError: witherr.Error(), + } + return err + } + + // Allocation successful, update the current state + s.SetCPUSet(string(pod.UID), container.Name, newallocatedcpuset.CPUs) + p.updateCPUsToReuse(pod, container, newallocatedcpuset.CPUs) + p.updateMetricsOnAllocate(logger, s, newallocatedcpuset) + logger.Info("Allocated exclusive CPUs after InPlacePodVerticalScaling attempt", "pod", klog.KObj(pod), "containerName", container.Name, "cpuset", newallocatedcpuset.CPUs.String()) + // Updated state to the checkpoint file will be stored during + // the reconcile loop. TODO is this a problem? I don't believe + // because if kubelet will be terminated now, anyhow it will be + // needed the state to be cleaned up, an error will appear requiring + // the node to be drained. I think we are safe. All computations are + // using state_mem and not the checkpoint. + return nil } else { - p.updateCPUsToReuse(pod, container, cpuset) - klog.InfoS("Static policy: container already present in state, skipping", "pod", klog.KObj(pod), "containerName", container.Name) + p.updateCPUsToReuse(pod, container, cpusInUseByPodContainer) + logger.Info("Static policy: container already present in state, skipping", "pod", klog.KObj(pod), "containerName", container.Name) return nil } } @@ -492,7 +558,7 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai logger.Info("Topology Affinity", "affinity", hint) // Allocate CPUs according to the NUMA affinity contained in the hint. - cpuAllocation, err := p.allocateCPUs(s, numCPUs, hint.NUMANodeAffinity, p.cpusToReuse[string(pod.UID)], nil, nil) + cpuAllocation, err := p.allocateCPUs(logger, s, numCPUs, hint.NUMANodeAffinity, p.cpusToReuse[string(pod.UID)], nil, nil) if err != nil { logger.Error(err, "Unable to allocate CPUs", "numCPUs", numCPUs) return err @@ -506,43 +572,19 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai return nil } -func (p *staticPolicy) GetMustKeepCPUs(container *v1.Container, oldCpuset cpuset.CPUSet) *cpuset.CPUSet { - mustKeepCPUs := cpuset.New() - for _, envVar := range container.Env { - if envVar.Name == "mustKeepCPUs" { - mustKeepCPUsInEnv, err := cpuset.Parse(envVar.Value) - if err == nil && mustKeepCPUsInEnv.Size() != 0 { - mustKeepCPUs = oldCpuset.Intersection(mustKeepCPUsInEnv) - } - klog.InfoS("mustKeepCPUs ", "is", mustKeepCPUs) - if p.options.FullPhysicalCPUsOnly { - // mustKeepCPUs must be aligned to the physical core - if (mustKeepCPUs.Size() % 2) != 0 { - return nil - } - mustKeepCPUsDetail := p.topology.CPUDetails.KeepOnly(mustKeepCPUs) - mustKeepCPUsDetailCores := mustKeepCPUsDetail.Cores() - if (mustKeepCPUs.Size() / mustKeepCPUsDetailCores.Size()) != p.cpuGroupSize { - klog.InfoS("mustKeepCPUs is nil") - return nil - } - } - return &mustKeepCPUs - } - } - klog.InfoS("mustKeepCPUs is nil") - return nil -} - // getAssignedCPUsOfSiblings returns assigned cpus of given container's siblings(all containers other than the given container) in the given pod `podUID`. func getAssignedCPUsOfSiblings(s state.State, podUID string, containerName string) cpuset.CPUSet { assignments := s.GetCPUAssignments() cset := cpuset.New() - for name, cpus := range assignments[podUID] { + for name, assignment := range assignments[podUID] { if containerName == name { continue } - cset = cset.Union(cpus) + if assignment.Resized.IsEmpty() { + cset = cset.Union(assignment.Original) + } else { + cset = cset.Union(assignment.Resized) + } } return cset } @@ -566,9 +608,19 @@ func (p *staticPolicy) RemoveContainer(logger logr.Logger, s state.State, podUID return nil } -func (p *staticPolicy) allocateCPUs(s state.State, numCPUs int, numaAffinity bitmask.BitMask, reusableCPUs cpuset.CPUSet, reusableCPUsForResize *cpuset.CPUSet, mustKeepCPUsForResize *cpuset.CPUSet) (topology.Allocation, error) { - klog.InfoS("AllocateCPUs", "numCPUs", numCPUs, "socket", numaAffinity) +func (p *staticPolicy) allocateCPUs(logger logr.Logger, s state.State, numCPUs int, numaAffinity bitmask.BitMask, reusableCPUs cpuset.CPUSet, reusableCPUsForResize *cpuset.CPUSet, mustKeepCPUsForResize *cpuset.CPUSet) (topology.Allocation, error) { + logger.Info("AllocateCPUs", "numCPUs", numCPUs, "socket", numaAffinity) allocatableCPUs := cpuset.New() + + if mustKeepCPUsForResize != nil { + if numCPUs >= mustKeepCPUsForResize.Size() { + allocatableCPUs = mustKeepCPUsForResize.Clone() + } + if numCPUs < mustKeepCPUsForResize.Size() { + return topology.EmptyAllocation(), fmt.Errorf("requested number of CPUs ( %d ) are less than number of retained CPUs ( %d )", numCPUs, mustKeepCPUsForResize.Size()) + } + } + if reusableCPUsForResize != nil { if numCPUs >= reusableCPUsForResize.Size() { allocatableCPUs = allocatableCPUs.Union(p.GetAvailableCPUs(s).Union(reusableCPUsForResize.Clone())) @@ -589,7 +641,7 @@ func (p *staticPolicy) allocateCPUs(s state.State, numCPUs int, numaAffinity bit numAlignedToAlloc = numCPUs } - allocatedCPUs, err := p.takeByTopology(alignedCPUs, numAlignedToAlloc, reusableCPUsForResize, mustKeepCPUsForResize) + allocatedCPUs, err := p.takeByTopology(logger, alignedCPUs, numAlignedToAlloc, reusableCPUsForResize, mustKeepCPUsForResize) if err != nil { return topology.EmptyAllocation(), err } @@ -597,12 +649,20 @@ func (p *staticPolicy) allocateCPUs(s state.State, numCPUs int, numaAffinity bit result.CPUs = result.CPUs.Union(allocatedCPUs) } - // Get any remaining CPUs from what's leftover after attempting to grab aligned ones. - remainingCPUs, err := p.takeByTopology(allocatableCPUs.Difference(result.CPUs), numCPUs-result.CPUs.Size(), reusableCPUsForResize, mustKeepCPUsForResize) - if err != nil { - return topology.EmptyAllocation(), err + if numCPUs > result.CPUs.Size() { + // Get any remaining CPUs from what's leftover after attempting to grab aligned ones. + remainingCPUs, err := p.takeByTopology(logger, allocatableCPUs.Difference(result.CPUs), numCPUs-result.CPUs.Size(), reusableCPUsForResize, mustKeepCPUsForResize) + if err != nil { + return topology.EmptyAllocation(), err + } + result.CPUs = result.CPUs.Union(remainingCPUs) + } + + if mustKeepCPUsForResize != nil { + if !mustKeepCPUsForResize.IsSubsetOf(result.CPUs) { + return topology.EmptyAllocation(), fmt.Errorf("requested CPUs to be retained %s are not a subset of resulted CPUs %s", mustKeepCPUsForResize.String(), result.CPUs.String()) + } } - result.CPUs = result.CPUs.Union(remainingCPUs) result.Aligned = p.topology.CheckAlignment(result.CPUs) // Remove allocated CPUs from the shared CPUSet. @@ -674,7 +734,25 @@ func (p *staticPolicy) podGuaranteedCPUs(logger logr.Logger, pod *v1.Pod) int { return requestedByLongRunningContainers } -func (p *staticPolicy) takeByTopology(availableCPUs cpuset.CPUSet, numCPUs int, reusableCPUsForResize *cpuset.CPUSet, mustKeepCPUsForScaleDown *cpuset.CPUSet) (cpuset.CPUSet, error) { +func (p *staticPolicy) takeByTopology(logger logr.Logger, availableCPUs cpuset.CPUSet, numCPUs int, reusableCPUsForResize *cpuset.CPUSet, mustKeepCPUsForResize *cpuset.CPUSet) (cpuset.CPUSet, error) { + + // Protect against CPU leaks by failing early + if mustKeepCPUsForResize != nil { + if !mustKeepCPUsForResize.IsSubsetOf(availableCPUs) { + return cpuset.New(), fmt.Errorf("requested CPUs to be retained %s are not a subset of available CPUs %s", mustKeepCPUsForResize.String(), availableCPUs.String()) + } + } + if reusableCPUsForResize != nil { + if !reusableCPUsForResize.IsSubsetOf(availableCPUs) { + return cpuset.New(), fmt.Errorf("reusable CPUs %s are not a subset of available CPUs %s", reusableCPUsForResize.String(), availableCPUs.String()) + } + } + if reusableCPUsForResize != nil && mustKeepCPUsForResize != nil { + if !mustKeepCPUsForResize.IsSubsetOf(reusableCPUsForResize.Clone()) { + return cpuset.New(), fmt.Errorf("requested CPUs to be retained %s are not a subset of reusable CPUs %s", mustKeepCPUsForResize.String(), reusableCPUsForResize.String()) + } + } + cpuSortingStrategy := CPUSortingStrategyPacked if p.options.DistributeCPUsAcrossCores { cpuSortingStrategy = CPUSortingStrategySpread @@ -685,9 +763,10 @@ func (p *staticPolicy) takeByTopology(availableCPUs cpuset.CPUSet, numCPUs int, if p.options.FullPhysicalCPUsOnly { cpuGroupSize = p.cpuGroupSize } - return takeByTopologyNUMADistributed(p.topology, availableCPUs, numCPUs, cpuGroupSize, cpuSortingStrategy, reusableCPUsForResize, mustKeepCPUsForScaleDown) + return takeByTopologyNUMADistributed(logger, p.topology, availableCPUs, numCPUs, cpuGroupSize, cpuSortingStrategy, reusableCPUsForResize, mustKeepCPUsForResize) } - return takeByTopologyNUMAPacked(p.topology, availableCPUs, numCPUs, cpuSortingStrategy, p.options.PreferAlignByUncoreCacheOption, reusableCPUsForResize, mustKeepCPUsForScaleDown) + + return takeByTopologyNUMAPacked(logger, p.topology, availableCPUs, numCPUs, cpuSortingStrategy, p.options.PreferAlignByUncoreCacheOption, reusableCPUsForResize, mustKeepCPUsForResize) } func (p *staticPolicy) GetTopologyHints(logger logr.Logger, s state.State, pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint { @@ -709,23 +788,46 @@ func (p *staticPolicy) GetTopologyHints(logger logr.Logger, s state.State, pod * return nil } + reusable := cpuset.New() + // Short circuit to regenerate the same hints if there are already // guaranteed CPUs allocated to the Container. This might happen after a // kubelet restart, for example. if allocated, exists := s.GetCPUSet(string(pod.UID), container.Name); exists { if allocated.Size() != requested { - klog.ErrorS(nil, "CPUs already allocated to container with different number than request", "pod", klog.KObj(pod), "containerName", container.Name, "requestedSize", requested, "allocatedSize", allocated.Size()) - // An empty list of hints will be treated as a preference that cannot be satisfied. - // In definition of hints this is equal to: TopologyHint[NUMANodeAffinity: nil, Preferred: false]. - // For all but the best-effort policy, the Topology Manager will throw a pod-admission error. + if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScalingExclusiveCPUs) && utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { + if allocated.Size() < requested { + reusable = reusable.Union(allocated) + } else { + reusable = allocated + + // Get a list of reusable CPUs (e.g. CPUs reused from initContainers). + // It should be an empty CPUSet for a newly created pod. + reusable = reusable.Union(p.cpusToReuse[string(pod.UID)]) + + // Generate hints. + cpuHints := p.generateCPUTopologyHints(cpuset.New(), reusable, requested) + logger.Info("TopologyHints generated", "pod", klog.KObj(pod), "containerName", container.Name, "cpuHints", cpuHints) + + return map[string][]topologymanager.TopologyHint{ + string(v1.ResourceCPU): cpuHints, + } + } + } else { + logger.Info("CPUs already allocated to container with different number than request", "requestedSize", requested, "allocatedSize", allocated.Size()) + // An empty list of hints will be treated as a preference that cannot be satisfied. + // In definition of hints this is equal to: TopologyHint[NUMANodeAffinity: nil, Preferred: false]. + // For all but the best-effort policy, the Topology Manager will throw a pod-admission error. + return map[string][]topologymanager.TopologyHint{ + string(v1.ResourceCPU): {}, + } + } + } else { + logger.Info("Regenerating TopologyHints for CPUs already allocated", "pod", klog.KObj(pod), "containerName", container.Name) return map[string][]topologymanager.TopologyHint{ - string(v1.ResourceCPU): {}, + string(v1.ResourceCPU): p.generateCPUTopologyHints(allocated, cpuset.New(), requested), } } - logger.Info("Regenerating TopologyHints for CPUs already allocated") - return map[string][]topologymanager.TopologyHint{ - string(v1.ResourceCPU): p.generateCPUTopologyHints(allocated, cpuset.New(), requested), - } } // Get a list of available CPUs. @@ -733,7 +835,7 @@ func (p *staticPolicy) GetTopologyHints(logger logr.Logger, s state.State, pod * // Get a list of reusable CPUs (e.g. CPUs reused from initContainers). // It should be an empty CPUSet for a newly created pod. - reusable := p.cpusToReuse[string(pod.UID)] + reusable = reusable.Union(p.cpusToReuse[string(pod.UID)]) // Generate hints. cpuHints := p.generateCPUTopologyHints(available, reusable, requested) @@ -773,12 +875,14 @@ func (p *staticPolicy) GetPodTopologyHints(logger logr.Logger, s state.State, po // kubelet restart, for example. if allocated, exists := s.GetCPUSet(string(pod.UID), container.Name); exists { if allocated.Size() != requestedByContainer { - klog.ErrorS(nil, "CPUs already allocated to container with different number than request", "pod", klog.KObj(pod), "containerName", container.Name, "allocatedSize", requested, "requestedByContainer", requestedByContainer, "allocatedSize", allocated.Size()) - // An empty list of hints will be treated as a preference that cannot be satisfied. - // In definition of hints this is equal to: TopologyHint[NUMANodeAffinity: nil, Preferred: false]. - // For all but the best-effort policy, the Topology Manager will throw a pod-admission error. - return map[string][]topologymanager.TopologyHint{ - string(v1.ResourceCPU): {}, + logger_.Info("CPUs already allocated to container with different number than request", "allocatedSize", requested, "requestedByContainer", requestedByContainer, "allocatedSize", allocated.Size()) + if !utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScalingExclusiveCPUs) || !utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { + // An empty list of hints will be treated as a preference that cannot be satisfied. + // In definition of hints this is equal to: TopologyHint[NUMANodeAffinity: nil, Preferred: false]. + // For all but the best-effort policy, the Topology Manager will throw a pod-admission error. + return map[string][]topologymanager.TopologyHint{ + string(v1.ResourceCPU): {}, + } } } // A set of CPUs already assigned to containers in this pod @@ -823,7 +927,7 @@ func (p *staticPolicy) generateCPUTopologyHints(availableCPUs cpuset.CPUSet, reu // Iterate through all combinations of numa nodes bitmask and build hints from them. hints := []topologymanager.TopologyHint{} - bitmask.IterateBitMasks(p.topology.CPUDetails.NUMANodes().UnsortedList(), func(mask bitmask.BitMask) { + bitmask.IterateBitMasks(p.topology.CPUDetails.NUMANodes().List(), func(mask bitmask.BitMask) { // First, update minAffinitySize for the current request size. cpusInMask := p.topology.CPUDetails.CPUsInNUMANodes(mask.GetBits()...).Size() if cpusInMask >= request && mask.Count() < minAffinitySize { @@ -833,7 +937,7 @@ func (p *staticPolicy) generateCPUTopologyHints(availableCPUs cpuset.CPUSet, reu // Then check to see if we have enough CPUs available on the current // numa node bitmask to satisfy the CPU request. numMatching := 0 - for _, c := range reusableCPUs.UnsortedList() { + for _, c := range reusableCPUs.List() { // Disregard this mask if its NUMANode isn't part of it. if !mask.IsSet(p.topology.CPUDetails[c].NUMANodeID) { return @@ -843,7 +947,7 @@ func (p *staticPolicy) generateCPUTopologyHints(availableCPUs cpuset.CPUSet, reu // Finally, check to see if enough available CPUs remain on the current // NUMA node combination to satisfy the CPU request. - for _, c := range availableCPUs.UnsortedList() { + for _, c := range availableCPUs.List() { if mask.IsSet(p.topology.CPUDetails[c].NUMANodeID) { numMatching++ } @@ -875,6 +979,12 @@ func (p *staticPolicy) generateCPUTopologyHints(availableCPUs cpuset.CPUSet, reu if hints[i].NUMANodeAffinity.Count() == minAffinitySize { hints[i].Preferred = true } + + if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScalingExclusiveCPUs) && utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { + if hints[i].NUMANodeAffinity.Count() == request { + hints[i].Preferred = true + } + } } return hints @@ -948,7 +1058,13 @@ func (p *staticPolicy) updateMetricsOnRelease(logger logr.Logger, s state.State, func getTotalAssignedExclusiveCPUs(s state.State) cpuset.CPUSet { totalAssignedCPUs := cpuset.New() for _, assignment := range s.GetCPUAssignments() { - for _, cset := range assignment { + for _, assignment := range assignment { + var cset cpuset.CPUSet + if assignment.Resized.IsEmpty() { + cset = assignment.Original + } else { + cset = assignment.Resized + } totalAssignedCPUs = totalAssignedCPUs.Union(cset) } } @@ -975,46 +1091,67 @@ func updateAllocationPerNUMAMetric(logger logr.Logger, topo *topology.CPUTopolog } } -func (p *staticPolicy) validateInPlacePodVerticalScaling(pod *v1.Pod, container *v1.Container) error { +func (p *staticPolicy) isFeasibleResize(logger logr.Logger, s state.State, pod *v1.Pod, container *v1.Container) error { if v1qos.GetPodQOS(pod) != v1.PodQOSGuaranteed { return nil } cpuQuantity := container.Resources.Requests[v1.ResourceCPU] - if cs, ok := podutil.GetContainerStatus(pod.Status.ContainerStatuses, container.Name); ok { - allocatedCPUQuantity := cs.AllocatedResources[v1.ResourceCPU] - if allocatedCPUQuantity.Value() > 0 { - if allocatedCPUQuantity.Value()*1000 == allocatedCPUQuantity.MilliValue() { - // container belongs in exclusive pool - if cpuQuantity.Value()*1000 != cpuQuantity.MilliValue() { - // container move to shared pool not allowed - return inconsistentCPUAllocationError{ - RequestedCPUs: cpuQuantity.String(), - AllocatedCPUs: allocatedCPUQuantity.String(), - Shared2Exclusive: false, - } - } - } else { - // container belongs in shared pool - if cpuQuantity.Value()*1000 == cpuQuantity.MilliValue() { - // container move to exclusive pool not allowed - return inconsistentCPUAllocationError{ - RequestedCPUs: cpuQuantity.String(), - AllocatedCPUs: allocatedCPUQuantity.String(), - Shared2Exclusive: true, - } - } - } - } else { - // container belongs in shared pool - if cpuQuantity.Value()*1000 == cpuQuantity.MilliValue() { - // container move to exclusive pool not allowed + cs, ok := podutil.GetContainerStatus(pod.Status.ContainerStatuses, container.Name) + if !ok { + return nil + } + // Policy static specific resize feasibility checks, to decide if it is capable of performing the resize + allocatedCPUQuantity := cs.AllocatedResources[v1.ResourceCPU] + if allocatedCPUQuantity.Value() > 0 { + if allocatedCPUQuantity.Value()*1000 == allocatedCPUQuantity.MilliValue() { + // container belongs in exclusive pool + if cpuQuantity.Value()*1000 != cpuQuantity.MilliValue() { + // container move to shared pool not allowed return inconsistentCPUAllocationError{ RequestedCPUs: cpuQuantity.String(), AllocatedCPUs: allocatedCPUQuantity.String(), - Shared2Exclusive: true, + Shared2Exclusive: false, + } + } + // Todo this is a good place to add a check with cpu manage + // state reading original / resized and check if allocated is + // up to date, this will be useful for troubleshooting and + // fine tune errors + mustKeepCPUsPromised, ok := s.GetOriginalCPUSet(string(pod.UID), container.Name) + if !ok { + return getOriginalCPUSetError{ + PodUID: string(pod.UID), + ContainerName: container.Name, } } + numCPUs := p.guaranteedCPUs(logger, pod, container) + promisedCPUsQuantity := mustKeepCPUsPromised.Size() + if promisedCPUsQuantity <= numCPUs { + return nil + } + return prohibitedCPUAllocationError{ + RequestedCPUs: cpuQuantity.String(), + AllocatedCPUs: allocatedCPUQuantity.String(), + OriginalCPUs: promisedCPUsQuantity, + GuaranteedCPUs: numCPUs, + } + } else if cpuQuantity.Value()*1000 == cpuQuantity.MilliValue() { + // container belongs in shared pool + // container move to exclusive pool not allowed + return inconsistentCPUAllocationError{ + RequestedCPUs: cpuQuantity.String(), + AllocatedCPUs: allocatedCPUQuantity.String(), + Shared2Exclusive: true, + } + } + } else if cpuQuantity.Value()*1000 == cpuQuantity.MilliValue() { + // container belongs in shared pool + // container move to exclusive pool not allowed + return inconsistentCPUAllocationError{ + RequestedCPUs: cpuQuantity.String(), + AllocatedCPUs: allocatedCPUQuantity.String(), + Shared2Exclusive: true, } } return nil diff --git a/pkg/kubelet/cm/cpumanager/policy_static_test.go b/pkg/kubelet/cm/cpumanager/policy_static_test.go index 5b509656eb8cb..6321937cfba3b 100644 --- a/pkg/kubelet/cm/cpumanager/policy_static_test.go +++ b/pkg/kubelet/cm/cpumanager/policy_static_test.go @@ -26,7 +26,6 @@ import ( "k8s.io/apimachinery/pkg/types" utilfeature "k8s.io/apiserver/pkg/util/feature" featuregatetesting "k8s.io/component-base/featuregate/testing" - "k8s.io/klog/v2" pkgfeatures "k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state" "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology" @@ -96,8 +95,8 @@ func TestStaticPolicyStart(t *testing.T) { description: "non-corrupted state", topo: topoDualSocketHT, stAssignments: state.ContainerCPUAssignments{ - "fakePod": map[string]cpuset.CPUSet{ - "0": cpuset.New(0), + "fakePod": map[string]state.ContainerCPUAssignment{ + "0": {Original: cpuset.New(0), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11), @@ -132,8 +131,8 @@ func TestStaticPolicyStart(t *testing.T) { description: "assigned core 2 is still present in available cpuset", topo: topoDualSocketHT, stAssignments: state.ContainerCPUAssignments{ - "fakePod": map[string]cpuset.CPUSet{ - "0": cpuset.New(0, 1, 2), + "fakePod": map[string]state.ContainerCPUAssignment{ + "0": {Original: cpuset.New(0, 1, 2), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(2, 3, 4, 5, 6, 7, 8, 9, 10, 11), @@ -144,8 +143,8 @@ func TestStaticPolicyStart(t *testing.T) { topo: topoDualSocketHT, options: map[string]string{StrictCPUReservationOption: "true"}, stAssignments: state.ContainerCPUAssignments{ - "fakePod": map[string]cpuset.CPUSet{ - "0": cpuset.New(0, 1, 2), + "fakePod": map[string]state.ContainerCPUAssignment{ + "0": {Original: cpuset.New(0, 1, 2), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(2, 3, 4, 5, 6, 7, 8, 9, 10, 11), @@ -155,9 +154,9 @@ func TestStaticPolicyStart(t *testing.T) { description: "core 12 is not present in topology but is in state cpuset", topo: topoDualSocketHT, stAssignments: state.ContainerCPUAssignments{ - "fakePod": map[string]cpuset.CPUSet{ - "0": cpuset.New(0, 1, 2), - "1": cpuset.New(3, 4), + "fakePod": map[string]state.ContainerCPUAssignment{ + "0": {Original: cpuset.New(0, 1, 2), Resized: cpuset.New()}, + "1": {Original: cpuset.New(3, 4), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(5, 6, 7, 8, 9, 10, 11, 12), @@ -167,9 +166,9 @@ func TestStaticPolicyStart(t *testing.T) { description: "core 11 is present in topology but is not in state cpuset", topo: topoDualSocketHT, stAssignments: state.ContainerCPUAssignments{ - "fakePod": map[string]cpuset.CPUSet{ - "0": cpuset.New(0, 1, 2), - "1": cpuset.New(3, 4), + "fakePod": map[string]state.ContainerCPUAssignment{ + "0": {Original: cpuset.New(0, 1, 2), Resized: cpuset.New()}, + "1": {Original: cpuset.New(3, 4), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(5, 6, 7, 8, 9, 10), @@ -240,8 +239,8 @@ func TestStaticPolicyAdd(t *testing.T) { topo: topoSingleSocketHT, numReservedCPUs: 1, stAssignments: state.ContainerCPUAssignments{ - "fakePod": map[string]cpuset.CPUSet{ - "fakeContainer100": cpuset.New(2, 3, 6, 7), + "fakePod": map[string]state.ContainerCPUAssignment{ + "fakeContainer100": {Original: cpuset.New(2, 3, 6, 7), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(0, 1, 4, 5), @@ -255,8 +254,8 @@ func TestStaticPolicyAdd(t *testing.T) { topo: topoDualSocketHT, numReservedCPUs: 1, stAssignments: state.ContainerCPUAssignments{ - "fakePod": map[string]cpuset.CPUSet{ - "fakeContainer100": cpuset.New(2), + "fakePod": map[string]state.ContainerCPUAssignment{ + "fakeContainer100": {Original: cpuset.New(2), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11), @@ -270,8 +269,8 @@ func TestStaticPolicyAdd(t *testing.T) { topo: topoDualSocketHT, numReservedCPUs: 1, stAssignments: state.ContainerCPUAssignments{ - "fakePod": map[string]cpuset.CPUSet{ - "fakeContainer100": cpuset.New(1, 5), + "fakePod": map[string]state.ContainerCPUAssignment{ + "fakeContainer100": {Original: cpuset.New(1, 5), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(0, 2, 3, 4, 6, 7, 8, 9, 10, 11), @@ -285,8 +284,8 @@ func TestStaticPolicyAdd(t *testing.T) { topo: topoDualSocketNoHT, numReservedCPUs: 1, stAssignments: state.ContainerCPUAssignments{ - "fakePod": map[string]cpuset.CPUSet{ - "fakeContainer100": cpuset.New(), + "fakePod": map[string]state.ContainerCPUAssignment{ + "fakeContainer100": {Original: cpuset.New(), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(0, 1, 3, 4, 5, 6, 7), @@ -300,8 +299,8 @@ func TestStaticPolicyAdd(t *testing.T) { topo: topoDualSocketNoHT, numReservedCPUs: 1, stAssignments: state.ContainerCPUAssignments{ - "fakePod": map[string]cpuset.CPUSet{ - "fakeContainer100": cpuset.New(4, 5), + "fakePod": map[string]state.ContainerCPUAssignment{ + "fakeContainer100": {Original: cpuset.New(4, 5), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(0, 1, 3, 6, 7), @@ -315,8 +314,8 @@ func TestStaticPolicyAdd(t *testing.T) { topo: topoDualSocketHT, numReservedCPUs: 1, stAssignments: state.ContainerCPUAssignments{ - "fakePod": map[string]cpuset.CPUSet{ - "fakeContainer100": cpuset.New(2), + "fakePod": map[string]state.ContainerCPUAssignment{ + "fakeContainer100": {Original: cpuset.New(2), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11), @@ -354,8 +353,8 @@ func TestStaticPolicyAdd(t *testing.T) { description: "GuPodMultipleCores, topoQuadSocketFourWayHT, ExpectAllocSock0", topo: topoQuadSocketFourWayHT, stAssignments: state.ContainerCPUAssignments{ - "fakePod": map[string]cpuset.CPUSet{ - "fakeContainer100": cpuset.New(3, 11, 4, 5, 6, 7), + "fakePod": map[string]state.ContainerCPUAssignment{ + "fakeContainer100": {Original: cpuset.New(3, 11, 4, 5, 6, 7), Resized: cpuset.New()}, }, }, stDefaultCPUSet: largeTopoCPUSet.Difference(cpuset.New(3, 11, 4, 5, 6, 7)), @@ -370,9 +369,9 @@ func TestStaticPolicyAdd(t *testing.T) { description: "GuPodMultipleCores, topoQuadSocketFourWayHT, ExpectAllocAllFullCoresFromThreeSockets", topo: topoQuadSocketFourWayHT, stAssignments: state.ContainerCPUAssignments{ - "fakePod": map[string]cpuset.CPUSet{ - "fakeContainer100": largeTopoCPUSet.Difference(cpuset.New(1, 25, 13, 38, 2, 9, 11, 35, 23, 48, 12, 51, - 53, 173, 113, 233, 54, 61)), + "fakePod": map[string]state.ContainerCPUAssignment{ + "fakeContainer100": {Original: largeTopoCPUSet.Difference(cpuset.New(1, 25, 13, 38, 2, 9, 11, 35, 23, 48, 12, 51, + 53, 173, 113, 233, 54, 61)), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(1, 25, 13, 38, 2, 9, 11, 35, 23, 48, 12, 51, 53, 173, 113, 233, 54, 61), @@ -387,9 +386,9 @@ func TestStaticPolicyAdd(t *testing.T) { description: "GuPodMultipleCores, topoQuadSocketFourWayHT, ExpectAllocAllSock1+FullCore", topo: topoQuadSocketFourWayHT, stAssignments: state.ContainerCPUAssignments{ - "fakePod": map[string]cpuset.CPUSet{ - "fakeContainer100": largeTopoCPUSet.Difference(largeTopoSock1CPUSet.Union(cpuset.New(10, 34, 22, 47, 53, - 173, 61, 181, 108, 228, 115, 235))), + "fakePod": map[string]state.ContainerCPUAssignment{ + "fakeContainer100": {Original: largeTopoCPUSet.Difference(largeTopoSock1CPUSet.Union(cpuset.New(10, 34, 22, 47, 53, + 173, 61, 181, 108, 228, 115, 235))), Resized: cpuset.New()}, }, }, stDefaultCPUSet: largeTopoSock1CPUSet.Union(cpuset.New(10, 34, 22, 47, 53, 173, 61, 181, 108, 228, @@ -420,8 +419,8 @@ func TestStaticPolicyAdd(t *testing.T) { description: "GuPodMultipleCores, topoQuadSocketFourWayHT, ExpectAllocCPUs", topo: topoQuadSocketFourWayHT, stAssignments: state.ContainerCPUAssignments{ - "fakePod": map[string]cpuset.CPUSet{ - "fakeContainer100": largeTopoCPUSet.Difference(cpuset.New(10, 11, 53, 37, 55, 67, 52)), + "fakePod": map[string]state.ContainerCPUAssignment{ + "fakeContainer100": {Original: largeTopoCPUSet.Difference(cpuset.New(10, 11, 53, 37, 55, 67, 52)), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(10, 11, 53, 67, 52), @@ -446,8 +445,8 @@ func TestStaticPolicyAdd(t *testing.T) { topo: topoSingleSocketHT, numReservedCPUs: 1, stAssignments: state.ContainerCPUAssignments{ - "fakePod": map[string]cpuset.CPUSet{ - "fakeContainer3": cpuset.New(1, 2, 5, 6), + "fakePod": map[string]state.ContainerCPUAssignment{ + "fakeContainer3": {Original: cpuset.New(1, 2, 5, 6), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(0, 3, 4, 7), @@ -461,8 +460,8 @@ func TestStaticPolicyAdd(t *testing.T) { topo: topoDualSocketHT, numReservedCPUs: 1, stAssignments: state.ContainerCPUAssignments{ - "fakePod": map[string]cpuset.CPUSet{ - "fakeContainer100": cpuset.New(1, 2, 3), + "fakePod": map[string]state.ContainerCPUAssignment{ + "fakeContainer100": {Original: cpuset.New(1, 2, 3), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(0, 4, 5, 6, 7, 8, 9, 10, 11), @@ -476,8 +475,8 @@ func TestStaticPolicyAdd(t *testing.T) { topo: topoSingleSocketHT, numReservedCPUs: 1, stAssignments: state.ContainerCPUAssignments{ - "fakePod": map[string]cpuset.CPUSet{ - "fakeContainer100": cpuset.New(1, 2, 3, 4, 5, 6), + "fakePod": map[string]state.ContainerCPUAssignment{ + "fakeContainer100": {Original: cpuset.New(1, 2, 3, 4, 5, 6), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(0, 7), @@ -493,8 +492,8 @@ func TestStaticPolicyAdd(t *testing.T) { description: "GuPodMultipleCores, topoQuadSocketFourWayHT, NoAlloc", topo: topoQuadSocketFourWayHT, stAssignments: state.ContainerCPUAssignments{ - "fakePod": map[string]cpuset.CPUSet{ - "fakeContainer100": largeTopoCPUSet.Difference(cpuset.New(10, 11, 53, 37, 55, 67, 52)), + "fakePod": map[string]state.ContainerCPUAssignment{ + "fakeContainer100": {Original: largeTopoCPUSet.Difference(cpuset.New(10, 11, 53, 37, 55, 67, 52)), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(10, 11, 53, 37, 55, 67, 52), @@ -589,8 +588,8 @@ func TestStaticPolicyAdd(t *testing.T) { topo: topoSingleSocketHT, numReservedCPUs: 1, stAssignments: state.ContainerCPUAssignments{ - "fakePod": map[string]cpuset.CPUSet{ - "fakeContainer3": cpuset.New(1, 2, 5, 6), + "fakePod": map[string]state.ContainerCPUAssignment{ + "fakeContainer3": {Original: cpuset.New(1, 2, 5, 6), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(0, 3, 4, 7), @@ -607,8 +606,8 @@ func TestStaticPolicyAdd(t *testing.T) { }, numReservedCPUs: 1, stAssignments: state.ContainerCPUAssignments{ - "fakePod": map[string]cpuset.CPUSet{ - "fakeContainer3": cpuset.New(1, 5), + "fakePod": map[string]state.ContainerCPUAssignment{ + "fakeContainer3": {Original: cpuset.New(1, 5), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(0, 2, 3, 4, 6, 7), @@ -625,8 +624,8 @@ func TestStaticPolicyAdd(t *testing.T) { }, numReservedCPUs: 1, stAssignments: state.ContainerCPUAssignments{ - "fakePod": map[string]cpuset.CPUSet{ - "fakeContainer3": cpuset.New(1, 5), + "fakePod": map[string]state.ContainerCPUAssignment{ + "fakeContainer3": {Original: cpuset.New(1, 5), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(0, 2, 3, 4, 6, 7), @@ -643,8 +642,8 @@ func TestStaticPolicyAdd(t *testing.T) { }, numReservedCPUs: 1, stAssignments: state.ContainerCPUAssignments{ - "fakePod": map[string]cpuset.CPUSet{ - "fakeContainer3": cpuset.New(1, 5), + "fakePod": map[string]state.ContainerCPUAssignment{ + "fakeContainer3": {Original: cpuset.New(1, 5), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(0, 2, 3, 4, 6, 7), @@ -677,8 +676,8 @@ func TestStaticPolicyAdd(t *testing.T) { }, numReservedCPUs: 1, stAssignments: state.ContainerCPUAssignments{ - "fakePod": map[string]cpuset.CPUSet{ - "fakeContainer3": cpuset.New(1, 5), + "fakePod": map[string]state.ContainerCPUAssignment{ + "fakeContainer3": {Original: cpuset.New(1, 5), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(0, 2, 3, 4, 6, 7), @@ -783,20 +782,20 @@ func runStaticPolicyTestCase(t *testing.T, testCase staticPolicyTest) { } if testCase.expCPUAlloc { - cset, found := st.assignments[string(testCase.pod.UID)][container.Name] + assignment, found := st.assignments[string(testCase.pod.UID)][container.Name] if !found { t.Errorf("StaticPolicy Allocate() error (%v). expected container %v to be present in assignments %v", testCase.description, container.Name, st.assignments) } - if !cset.Equals(testCase.expCSet) { + if !assignment.Original.Equals(testCase.expCSet) { t.Errorf("StaticPolicy Allocate() error (%v). expected cpuset %s but got %s", - testCase.description, testCase.expCSet, cset) + testCase.description, testCase.expCSet, assignment.Original) } - if !cset.Intersection(st.defaultCPUSet).IsEmpty() { + if !assignment.Original.Intersection(st.defaultCPUSet).IsEmpty() { t.Errorf("StaticPolicy Allocate() error (%v). expected cpuset %s to be disoint from the shared cpuset %s", - testCase.description, cset, st.defaultCPUSet) + testCase.description, assignment.Original, st.defaultCPUSet) } } @@ -949,6 +948,7 @@ func TestStaticPolicyPodResizeCPUsSingleContainerPod(t *testing.T) { stAssignments: state.ContainerCPUAssignments{}, stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), }, + expAllocErr: prohibitedCPUAllocationError{RequestedCPUs: "2", AllocatedCPUs: "4", OriginalCPUs: 4, GuaranteedCPUs: 2}, expCSetAfterAlloc: cpuset.New(2, 3, 6, 7), expCSetAfterResizeSize: 4, expCSetAfterRemove: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), @@ -1084,11 +1084,12 @@ func TestStaticPolicyPodResizeCPUsSingleContainerPod(t *testing.T) { } for _, testCase := range testCases { + logger, _ := ktesting.NewTestContext(t) featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.CPUManagerPolicyAlphaOptions, true) featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.InPlacePodVerticalScaling, true) t.Run(testCase.description, func(t *testing.T) { - policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), nil) + policy, _ := NewStaticPolicy(logger, testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), nil) st := &mockState{ assignments: testCase.stAssignments, @@ -1099,7 +1100,7 @@ func TestStaticPolicyPodResizeCPUsSingleContainerPod(t *testing.T) { // allocate for _, container := range append(pod.Spec.InitContainers, pod.Spec.Containers...) { - err := policy.Allocate(st, pod, &container) + err := policy.Allocate(logger, st, pod, &container) if err != nil { t.Errorf("StaticPolicy Allocate() error (%v). expected no error but got %v", testCase.description, err) @@ -1129,7 +1130,7 @@ func TestStaticPolicyPodResizeCPUsSingleContainerPod(t *testing.T) { } podResized := pod for _, container := range append(podResized.Spec.InitContainers, podResized.Spec.Containers...) { - err := policy.Allocate(st, podResized, &container) + err := policy.Allocate(logger, st, podResized, &container) if err != nil { if !reflect.DeepEqual(err, testCase.expAllocErr) { t.Errorf("StaticPolicy Allocate() error (%v), expected error: %v but got: %v", @@ -1153,7 +1154,7 @@ func TestStaticPolicyPodResizeCPUsSingleContainerPod(t *testing.T) { } // remove - err := policy.RemoveContainer(st, string(pod.UID), testCase.containerName) + err := policy.RemoveContainer(logger, st, string(pod.UID), testCase.containerName) if err != nil { t.Errorf("StaticPolicy RemoveContainer() error (%v) after pod resize. expected no error but got %v", testCase.description, err) @@ -1295,6 +1296,7 @@ func TestStaticPolicyPodResizeCPUsMultiContainerPod(t *testing.T) { stAssignments: state.ContainerCPUAssignments{}, stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), }, + expAllocErr: prohibitedCPUAllocationError{RequestedCPUs: "2", AllocatedCPUs: "4", OriginalCPUs: 4, GuaranteedCPUs: 2}, containerName2: "appContainer-1", expCSetAfterAlloc: cpuset.New(), expCSetAfterResize: cpuset.New(), @@ -1420,11 +1422,12 @@ func TestStaticPolicyPodResizeCPUsMultiContainerPod(t *testing.T) { } for _, testCase := range testCases { + logger, _ := ktesting.NewTestContext(t) featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.CPUManagerPolicyAlphaOptions, true) featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.InPlacePodVerticalScaling, true) t.Run(testCase.description, func(t *testing.T) { - policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), nil) + policy, _ := NewStaticPolicy(logger, testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), nil) st := &mockState{ assignments: testCase.stAssignments, @@ -1435,7 +1438,7 @@ func TestStaticPolicyPodResizeCPUsMultiContainerPod(t *testing.T) { // allocate for _, container := range append(pod.Spec.InitContainers, pod.Spec.Containers...) { - err := policy.Allocate(st, pod, &container) + err := policy.Allocate(logger, st, pod, &container) if err != nil { t.Errorf("StaticPolicy Allocate() error (%v). expected no error but got %v", testCase.description, err) @@ -1465,7 +1468,7 @@ func TestStaticPolicyPodResizeCPUsMultiContainerPod(t *testing.T) { } podResized := pod for _, container := range append(podResized.Spec.InitContainers, podResized.Spec.Containers...) { - err := policy.Allocate(st, podResized, &container) + err := policy.Allocate(logger, st, podResized, &container) if err != nil { if !reflect.DeepEqual(err, testCase.expAllocErr) { t.Errorf("StaticPolicy Allocate() error (%v), expected error: %v but got: %v", @@ -1490,12 +1493,12 @@ func TestStaticPolicyPodResizeCPUsMultiContainerPod(t *testing.T) { } // remove - err := policy.RemoveContainer(st, string(pod.UID), testCase.containerName) + err := policy.RemoveContainer(logger, st, string(pod.UID), testCase.containerName) if err != nil { t.Errorf("StaticPolicy RemoveContainer() error (%v) after pod resize. expected no error but got %v", testCase.description, err) } - err = policy.RemoveContainer(st, string(pod.UID), testCase.containerName2) + err = policy.RemoveContainer(logger, st, string(pod.UID), testCase.containerName2) if err != nil { t.Errorf("StaticPolicy RemoveContainer() error (%v) after pod resize. expected no error but got %v", testCase.description, err) @@ -1520,8 +1523,8 @@ func TestStaticPolicyRemove(t *testing.T) { podUID: "fakePod", containerName: "fakeContainer1", stAssignments: state.ContainerCPUAssignments{ - "fakePod": map[string]cpuset.CPUSet{ - "fakeContainer1": cpuset.New(1, 2, 3), + "fakePod": map[string]state.ContainerCPUAssignment{ + "fakeContainer1": {Original: cpuset.New(1, 2, 3), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(4, 5, 6, 7), @@ -1533,9 +1536,9 @@ func TestStaticPolicyRemove(t *testing.T) { podUID: "fakePod", containerName: "fakeContainer1", stAssignments: state.ContainerCPUAssignments{ - "fakePod": map[string]cpuset.CPUSet{ - "fakeContainer1": cpuset.New(1, 2, 3), - "fakeContainer2": cpuset.New(4, 5, 6, 7), + "fakePod": map[string]state.ContainerCPUAssignment{ + "fakeContainer1": {Original: cpuset.New(1, 2, 3), Resized: cpuset.New()}, + "fakeContainer2": {Original: cpuset.New(4, 5, 6, 7), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(), @@ -1547,9 +1550,9 @@ func TestStaticPolicyRemove(t *testing.T) { podUID: "fakePod", containerName: "fakeContainer1", stAssignments: state.ContainerCPUAssignments{ - "fakePod": map[string]cpuset.CPUSet{ - "fakeContainer1": cpuset.New(1, 3, 5), - "fakeContainer2": cpuset.New(2, 4), + "fakePod": map[string]state.ContainerCPUAssignment{ + "fakeContainer1": {Original: cpuset.New(1, 3, 5), Resized: cpuset.New()}, + "fakeContainer2": {Original: cpuset.New(2, 4), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(6, 7), @@ -1561,8 +1564,8 @@ func TestStaticPolicyRemove(t *testing.T) { podUID: "fakePod", containerName: "fakeContainer2", stAssignments: state.ContainerCPUAssignments{ - "fakePod": map[string]cpuset.CPUSet{ - "fakeContainer1": cpuset.New(1, 3, 5), + "fakePod": map[string]state.ContainerCPUAssignment{ + "fakeContainer1": {Original: cpuset.New(1, 3, 5), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(2, 4, 6, 7), @@ -1681,7 +1684,7 @@ func TestTopologyAwareAllocateCPUs(t *testing.T) { continue } - cpuAlloc, err := policy.allocateCPUs(st, tc.numRequested, tc.socketMask, cpuset.New(), nil, nil) + cpuAlloc, err := policy.allocateCPUs(logger, st, tc.numRequested, tc.socketMask, cpuset.New(), nil, nil) if err != nil { t.Errorf("StaticPolicy allocateCPUs() error (%v). expected CPUSet %v not error %v", tc.description, tc.expCSet, err) @@ -1831,8 +1834,8 @@ func TestStaticPolicyAddWithResvList(t *testing.T) { numReservedCPUs: 2, reserved: cpuset.New(0, 1), stAssignments: state.ContainerCPUAssignments{ - "fakePod": map[string]cpuset.CPUSet{ - "fakeContainer100": cpuset.New(2, 3, 6, 7), + "fakePod": map[string]state.ContainerCPUAssignment{ + "fakeContainer100": {Original: cpuset.New(2, 3, 6, 7), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(0, 1, 4, 5), @@ -1863,20 +1866,20 @@ func TestStaticPolicyAddWithResvList(t *testing.T) { } if testCase.expCPUAlloc { - cset, found := st.assignments[string(testCase.pod.UID)][container.Name] + assignment, found := st.assignments[string(testCase.pod.UID)][container.Name] if !found { t.Errorf("StaticPolicy Allocate() error (%v). expected container %v to be present in assignments %v", testCase.description, container.Name, st.assignments) } - if !cset.Equals(testCase.expCSet) { + if !assignment.Original.Equals(testCase.expCSet) { t.Errorf("StaticPolicy Allocate() error (%v). expected cpuset %s but got %s", - testCase.description, testCase.expCSet, cset) + testCase.description, testCase.expCSet, assignment.Original) } - if !cset.Intersection(st.defaultCPUSet).IsEmpty() { + if !assignment.Original.Intersection(st.defaultCPUSet).IsEmpty() { t.Errorf("StaticPolicy Allocate() error (%v). expected cpuset %s to be disoint from the shared cpuset %s", - testCase.description, cset, st.defaultCPUSet) + testCase.description, assignment.Original, st.defaultCPUSet) } } @@ -2634,14 +2637,14 @@ func TestStaticPolicyAddWithUncoreAlignment(t *testing.T) { if testCase.expCPUAlloc { container := &testCase.pod.Spec.Containers[0] - cset, found := st.assignments[string(testCase.pod.UID)][container.Name] + assignment, found := st.assignments[string(testCase.pod.UID)][container.Name] if !found { t.Errorf("StaticPolicy Allocate() error (%v). expected container %v to be present in assignments %v", testCase.description, container.Name, st.assignments) } - if !testCase.expCSet.Equals(cset) { + if !testCase.expCSet.Equals(assignment.Original) { t.Errorf("StaticPolicy Allocate() error (%v). expected CPUSet %v but got %v", - testCase.description, testCase.expCSet, cset) + testCase.description, testCase.expCSet, assignment.Original) } return } diff --git a/pkg/kubelet/cm/cpumanager/state/checkpoint.go b/pkg/kubelet/cm/cpumanager/state/checkpoint.go index c69d12998121f..fbad1fb0d8a64 100644 --- a/pkg/kubelet/cm/cpumanager/state/checkpoint.go +++ b/pkg/kubelet/cm/cpumanager/state/checkpoint.go @@ -30,10 +30,26 @@ import ( var _ checkpointmanager.Checkpoint = &CPUManagerCheckpointV1{} var _ checkpointmanager.Checkpoint = &CPUManagerCheckpointV2{} +var _ checkpointmanager.Checkpoint = &CPUManagerCheckpointV3{} var _ checkpointmanager.Checkpoint = &CPUManagerCheckpoint{} -// CPUManagerCheckpoint struct is used to store cpu/pod assignments in a checkpoint in v2 format +// ContainerCPUs struct is used in a checkpoint in v3 format, +// to support In place update pod resources alongside Static CPU Manager policy +type ContainerCPUs struct { + Original string `json:"original"` + Resized string `json:"resized"` +} + +// CPUManagerCheckpoint struct is used to store cpu/pod assignments in a checkpoint in v3 format type CPUManagerCheckpoint struct { + PolicyName string `json:"policyName"` + DefaultCPUSet string `json:"defaultCpuSet"` + Entries map[string]map[string]ContainerCPUs `json:"entries,omitempty"` + Checksum checksum.Checksum `json:"checksum"` +} + +// CPUManagerCheckpoint struct is used to store cpu/pod assignments in a checkpoint in v2 format +type CPUManagerCheckpointV2 struct { PolicyName string `json:"policyName"` DefaultCPUSet string `json:"defaultCpuSet"` Entries map[string]map[string]string `json:"entries,omitempty"` @@ -48,13 +64,13 @@ type CPUManagerCheckpointV1 struct { Checksum checksum.Checksum `json:"checksum"` } -// CPUManagerCheckpointV2 struct is used to store cpu/pod assignments in a checkpoint in v2 format -type CPUManagerCheckpointV2 = CPUManagerCheckpoint +// CPUManagerCheckpointV3 struct is used to store cpu/pod assignments in a checkpoint in v3 format +type CPUManagerCheckpointV3 = CPUManagerCheckpoint // NewCPUManagerCheckpoint returns an instance of Checkpoint func NewCPUManagerCheckpoint() *CPUManagerCheckpoint { //nolint:staticcheck // unexported-type-in-api user-facing error message - return newCPUManagerCheckpointV2() + return newCPUManagerCheckpointV3() } func newCPUManagerCheckpointV1() *CPUManagerCheckpointV1 { @@ -69,6 +85,12 @@ func newCPUManagerCheckpointV2() *CPUManagerCheckpointV2 { } } +func newCPUManagerCheckpointV3() *CPUManagerCheckpointV3 { + return &CPUManagerCheckpointV3{ + Entries: make(map[string]map[string]ContainerCPUs), + } +} + // MarshalCheckpoint returns marshalled checkpoint in v1 format func (cp *CPUManagerCheckpointV1) MarshalCheckpoint() ([]byte, error) { // make sure checksum wasn't set before so it doesn't affect output checksum @@ -85,6 +107,14 @@ func (cp *CPUManagerCheckpointV2) MarshalCheckpoint() ([]byte, error) { return json.Marshal(*cp) } +// MarshalCheckpoint returns marshalled checkpoint in v3 format +func (cp *CPUManagerCheckpointV3) MarshalCheckpoint() ([]byte, error) { + // make sure checksum wasn't set before so it doesn't affect output checksum + cp.Checksum = 0 + cp.Checksum = checksum.New(cp) + return json.Marshal(*cp) +} + // UnmarshalCheckpoint tries to unmarshal passed bytes to checkpoint in v1 format func (cp *CPUManagerCheckpointV1) UnmarshalCheckpoint(blob []byte) error { return json.Unmarshal(blob, cp) @@ -95,6 +125,11 @@ func (cp *CPUManagerCheckpointV2) UnmarshalCheckpoint(blob []byte) error { return json.Unmarshal(blob, cp) } +// UnmarshalCheckpoint tries to unmarshal passed bytes to checkpoint in v3 format +func (cp *CPUManagerCheckpointV3) UnmarshalCheckpoint(blob []byte) error { + return json.Unmarshal(blob, cp) +} + // VerifyChecksum verifies that current checksum of checkpoint is valid in v1 format func (cp *CPUManagerCheckpointV1) VerifyChecksum() error { if cp.Checksum == 0 { @@ -109,7 +144,9 @@ func (cp *CPUManagerCheckpointV1) VerifyChecksum() error { cp.Checksum = ck hash := fnv.New32a() - fmt.Fprintf(hash, "%v", object) + if _, err := fmt.Fprintf(hash, "%v", object); err != nil { + return err + } actualCS := checksum.Checksum(hash.Sum32()) if cp.Checksum != actualCS { return &errors.CorruptCheckpointError{ @@ -123,6 +160,33 @@ func (cp *CPUManagerCheckpointV1) VerifyChecksum() error { // VerifyChecksum verifies that current checksum of checkpoint is valid in v2 format func (cp *CPUManagerCheckpointV2) VerifyChecksum() error { + if cp.Checksum == 0 { + // accept empty checksum for compatibility with old file backend + return nil + } + ck := cp.Checksum + cp.Checksum = 0 + object := dump.ForHash(cp) + object = strings.Replace(object, "CPUManagerCheckpointV2", "CPUManagerCheckpoint", 1) + cp.Checksum = ck + + hash := fnv.New32a() + if _, err := fmt.Fprintf(hash, "%v", object); err != nil { + return err + } + actualCS := checksum.Checksum(hash.Sum32()) + if cp.Checksum != actualCS { + return &errors.CorruptCheckpointError{ + ActualCS: uint64(actualCS), + ExpectedCS: uint64(cp.Checksum), + } + } + + return nil +} + +// VerifyChecksum verifies that current checksum of checkpoint is valid in v3 format +func (cp *CPUManagerCheckpointV3) VerifyChecksum() error { if cp.Checksum == 0 { // accept empty checksum for compatibility with old file backend return nil diff --git a/pkg/kubelet/cm/cpumanager/state/state.go b/pkg/kubelet/cm/cpumanager/state/state.go index 352fddfb9cdad..513a85010a2b0 100644 --- a/pkg/kubelet/cm/cpumanager/state/state.go +++ b/pkg/kubelet/cm/cpumanager/state/state.go @@ -18,25 +18,29 @@ package state import ( "k8s.io/utils/cpuset" + "maps" ) +type ContainerCPUAssignment struct { + Original cpuset.CPUSet + Resized cpuset.CPUSet +} + // ContainerCPUAssignments type used in cpu manager state -type ContainerCPUAssignments map[string]map[string]cpuset.CPUSet +type ContainerCPUAssignments map[string]map[string]ContainerCPUAssignment // Clone returns a copy of ContainerCPUAssignments func (as ContainerCPUAssignments) Clone() ContainerCPUAssignments { ret := make(ContainerCPUAssignments, len(as)) for pod := range as { - ret[pod] = make(map[string]cpuset.CPUSet, len(as[pod])) - for container, cset := range as[pod] { - ret[pod][container] = cset - } + ret[pod] = maps.Clone(as[pod]) } return ret } // Reader interface used to read current cpu/pod assignment state type Reader interface { + GetOriginalCPUSet(podUID string, containerName string) (cpuset.CPUSet, bool) GetCPUSet(podUID string, containerName string) (cpuset.CPUSet, bool) GetDefaultCPUSet() cpuset.CPUSet GetCPUSetOrDefault(podUID string, containerName string) cpuset.CPUSet diff --git a/pkg/kubelet/cm/cpumanager/state/state_checkpoint.go b/pkg/kubelet/cm/cpumanager/state/state_checkpoint.go index 9774bf3728717..5540f1007a26c 100644 --- a/pkg/kubelet/cm/cpumanager/state/state_checkpoint.go +++ b/pkg/kubelet/cm/cpumanager/state/state_checkpoint.go @@ -17,6 +17,7 @@ limitations under the License. package state import ( + "errors" "fmt" "path/filepath" "sync" @@ -24,7 +25,7 @@ import ( "github.com/go-logr/logr" "k8s.io/klog/v2" "k8s.io/kubernetes/pkg/kubelet/checkpointmanager" - "k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors" + checkpointerrors "k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors" "k8s.io/kubernetes/pkg/kubelet/cm/containermap" "k8s.io/utils/cpuset" ) @@ -68,26 +69,24 @@ func NewCheckpointState(logger logr.Logger, stateDir, checkpointName, policyName return stateCheckpoint, nil } -// migrateV1CheckpointToV2Checkpoint() converts checkpoints from the v1 format to the v2 format -func (sc *stateCheckpoint) migrateV1CheckpointToV2Checkpoint(src *CPUManagerCheckpointV1, dst *CPUManagerCheckpointV2) error { +// migrateV2CheckpointToV3Checkpoint() converts checkpoints from the v2 format to the v3 format +func (sc *stateCheckpoint) migrateV2CheckpointToV3Checkpoint(src *CPUManagerCheckpointV2, dst *CPUManagerCheckpointV3) error { if src.PolicyName != "" { dst.PolicyName = src.PolicyName } if src.DefaultCPUSet != "" { dst.DefaultCPUSet = src.DefaultCPUSet } - for containerID, cset := range src.Entries { - podUID, containerName, err := sc.initialContainers.GetContainerRef(containerID) - if err != nil { - return fmt.Errorf("containerID '%v' not found in initial containers list", containerID) - } - if dst.Entries == nil { - dst.Entries = make(map[string]map[string]string) - } - if _, exists := dst.Entries[podUID]; !exists { - dst.Entries[podUID] = make(map[string]string) + for podUID := range src.Entries { + for containerName, cpuString := range src.Entries[podUID] { + if dst.Entries == nil { + dst.Entries = make(map[string]map[string]ContainerCPUs) + } + if _, exists := dst.Entries[podUID]; !exists { + dst.Entries[podUID] = make(map[string]ContainerCPUs) + } + dst.Entries[podUID][containerName] = ContainerCPUs{Original: cpuString} } - dst.Entries[podUID][containerName] = cset } return nil } @@ -100,39 +99,52 @@ func (sc *stateCheckpoint) restoreState() error { checkpointV1 := newCPUManagerCheckpointV1() checkpointV2 := newCPUManagerCheckpointV2() + checkpointV3 := newCPUManagerCheckpointV3() if err = sc.checkpointManager.GetCheckpoint(sc.checkpointName, checkpointV1); err != nil { - checkpointV1 = &CPUManagerCheckpointV1{} // reset it back to 0 if err = sc.checkpointManager.GetCheckpoint(sc.checkpointName, checkpointV2); err != nil { - if err == errors.ErrCheckpointNotFound { - return sc.storeState() + if err = sc.checkpointManager.GetCheckpoint(sc.checkpointName, checkpointV3); err != nil { + if errors.Is(err, checkpointerrors.ErrCheckpointNotFound) { + return sc.storeState() + } + return err + } + } else { + if err = sc.migrateV2CheckpointToV3Checkpoint(checkpointV2, checkpointV3); err != nil { + return fmt.Errorf("error migrating v2 checkpoint state to v3 checkpoint state: %w", err) } - return err } + } else { + return fmt.Errorf("error migrating v1 checkpoint state to v3 checkpoint state is not supported") } - if err = sc.migrateV1CheckpointToV2Checkpoint(checkpointV1, checkpointV2); err != nil { - return fmt.Errorf("error migrating v1 checkpoint state to v2 checkpoint state: %s", err) - } - - if sc.policyName != checkpointV2.PolicyName { - return fmt.Errorf("configured policy %q differs from state checkpoint policy %q", sc.policyName, checkpointV2.PolicyName) + if sc.policyName != checkpointV3.PolicyName { + return fmt.Errorf("configured policy %q differs from state checkpoint policy %q", sc.policyName, checkpointV3.PolicyName) } var tmpDefaultCPUSet cpuset.CPUSet - if tmpDefaultCPUSet, err = cpuset.Parse(checkpointV2.DefaultCPUSet); err != nil { - return fmt.Errorf("could not parse default cpu set %q: %v", checkpointV2.DefaultCPUSet, err) + if tmpDefaultCPUSet, err = cpuset.Parse(checkpointV3.DefaultCPUSet); err != nil { + return fmt.Errorf("could not parse default cpu set %q: %w", checkpointV3.DefaultCPUSet, err) } - var tmpContainerCPUSet cpuset.CPUSet + var tmpOriginal cpuset.CPUSet + var tmpResized cpuset.CPUSet tmpAssignments := ContainerCPUAssignments{} - for pod := range checkpointV2.Entries { - tmpAssignments[pod] = make(map[string]cpuset.CPUSet, len(checkpointV2.Entries[pod])) - for container, cpuString := range checkpointV2.Entries[pod] { - if tmpContainerCPUSet, err = cpuset.Parse(cpuString); err != nil { - return fmt.Errorf("could not parse cpuset %q for container %q in pod %q: %v", cpuString, container, pod, err) + for pod := range checkpointV3.Entries { + tmpAssignments[pod] = make(map[string]ContainerCPUAssignment, len(checkpointV3.Entries[pod])) + for container, containerCPUs := range checkpointV3.Entries[pod] { + if tmpOriginal, err = cpuset.Parse(containerCPUs.Original); err != nil { + return fmt.Errorf("could not parse original cpuset %q for container %q in pod %q: %w", containerCPUs.Original, container, pod, err) } - tmpAssignments[pod][container] = tmpContainerCPUSet + if tmpResized, err = cpuset.Parse(containerCPUs.Resized); err != nil { + return fmt.Errorf("could not parse resized cpuset %q for container %q in pod %q: %w", containerCPUs.Resized, container, pod, err) + } + if !tmpOriginal.IsEmpty() && !tmpResized.IsEmpty() { + if !tmpResized.IsSubsetOf(tmpOriginal) { + return fmt.Errorf("original cpuset %q for container %q in pod %q is not subset of resized cpuset %q", containerCPUs.Original, container, pod, containerCPUs.Resized) + } + } + tmpAssignments[pod][container] = ContainerCPUAssignment{Original: tmpOriginal, Resized: tmpResized} } } @@ -153,9 +165,9 @@ func (sc *stateCheckpoint) storeState() error { assignments := sc.cache.GetCPUAssignments() for pod := range assignments { - checkpoint.Entries[pod] = make(map[string]string, len(assignments[pod])) - for container, cset := range assignments[pod] { - checkpoint.Entries[pod][container] = cset.String() + checkpoint.Entries[pod] = make(map[string]ContainerCPUs, len(assignments[pod])) + for container, assignment := range assignments[pod] { + checkpoint.Entries[pod][container] = ContainerCPUs{Original: assignment.Original.String(), Resized: assignment.Resized.String()} } } @@ -167,6 +179,14 @@ func (sc *stateCheckpoint) storeState() error { return nil } +// GetOriginalCPUSet returns current CPU set +func (sc *stateCheckpoint) GetOriginalCPUSet(podUID string, containerName string) (cpuset.CPUSet, bool) { + sc.mux.RLock() + defer sc.mux.RUnlock() + + return sc.cache.GetOriginalCPUSet(podUID, containerName) +} + // GetCPUSet returns current CPU set func (sc *stateCheckpoint) GetCPUSet(podUID string, containerName string) (cpuset.CPUSet, bool) { sc.mux.RLock() diff --git a/pkg/kubelet/cm/cpumanager/state/state_checkpoint_test.go b/pkg/kubelet/cm/cpumanager/state/state_checkpoint_test.go index 458419fe1e43f..dfb988ef72476 100644 --- a/pkg/kubelet/cm/cpumanager/state/state_checkpoint_test.go +++ b/pkg/kubelet/cm/cpumanager/state/state_checkpoint_test.go @@ -50,7 +50,7 @@ func TestCheckpointStateRestore(t *testing.T) { &stateMemory{}, }, { - "Restore default cpu set", + "Restore default cpu set from checkpoint with v2 checksum", `{ "policyName": "none", "defaultCPUSet": "4-6", @@ -65,7 +65,7 @@ func TestCheckpointStateRestore(t *testing.T) { }, }, { - "Restore valid checkpoint", + "Restore valid checkpoint from checkpoint with v2 checksum", `{ "policyName": "none", "defaultCPUSet": "1-3", @@ -82,16 +82,22 @@ func TestCheckpointStateRestore(t *testing.T) { "", &stateMemory{ assignments: ContainerCPUAssignments{ - "pod": map[string]cpuset.CPUSet{ - "container1": cpuset.New(4, 5, 6), - "container2": cpuset.New(1, 2, 3), + "pod": map[string]ContainerCPUAssignment{ + "container1": { + Original: cpuset.New(4, 5, 6), + Resized: cpuset.New(), + }, + "container2": { + Original: cpuset.New(1, 2, 3), + Resized: cpuset.New(), + }, }, }, defaultCPUSet: cpuset.New(1, 2, 3), }, }, { - "Restore checkpoint with invalid checksum", + "Restore checkpoint with invalid checksum from checkpoint with v2 checksum", `{ "policyName": "none", "defaultCPUSet": "4-6", @@ -112,7 +118,7 @@ func TestCheckpointStateRestore(t *testing.T) { &stateMemory{}, }, { - "Restore checkpoint with invalid policy name", + "Restore checkpoint with invalid policy name from checkpoint with v2 checksum", `{ "policyName": "other", "defaultCPUSet": "1-3", @@ -125,7 +131,7 @@ func TestCheckpointStateRestore(t *testing.T) { &stateMemory{}, }, { - "Restore checkpoint with unparsable default cpu set", + "Restore checkpoint with unparsable default cpu set from checkpoint with v2 checksum", `{ "policyName": "none", "defaultCPUSet": "1.3", @@ -137,8 +143,8 @@ func TestCheckpointStateRestore(t *testing.T) { `could not parse default cpu set "1.3": strconv.Atoi: parsing "1.3": invalid syntax`, &stateMemory{}, }, - { - "Restore checkpoint with unparsable assignment entry", + /*Sotiris to fix for v3{ + "Restore checkpoint with unparsable assignment entry from checkpoint with v2 checksum", `{ "policyName": "none", "defaultCPUSet": "1-3", @@ -154,7 +160,7 @@ func TestCheckpointStateRestore(t *testing.T) { containermap.ContainerMap{}, `could not parse cpuset "asd" for container "container2" in pod "pod": strconv.Atoi: parsing "asd": invalid syntax`, &stateMemory{}, - }, + },*/ { "Restore checkpoint from checkpoint with v1 checksum", `{ @@ -164,13 +170,11 @@ func TestCheckpointStateRestore(t *testing.T) { }`, "none", containermap.ContainerMap{}, - "", - &stateMemory{ - defaultCPUSet: cpuset.New(1, 2, 3), - }, + "error migrating v1 checkpoint state to v3 checkpoint state is not supported", + &stateMemory{}, }, { - "Restore checkpoint with migration", + "Restore checkpoint with migration from checkpoint with v1 checksum", `{ "policyName": "none", "defaultCPUSet": "1-3", @@ -181,22 +185,9 @@ func TestCheckpointStateRestore(t *testing.T) { "checksum": 3680390589 }`, "none", - func() containermap.ContainerMap { - cm := containermap.NewContainerMap() - cm.Add("pod", "container1", "containerID1") - cm.Add("pod", "container2", "containerID2") - return cm - }(), - "", - &stateMemory{ - assignments: ContainerCPUAssignments{ - "pod": map[string]cpuset.CPUSet{ - "container1": cpuset.New(4, 5, 6), - "container2": cpuset.New(1, 2, 3), - }, - }, - defaultCPUSet: cpuset.New(1, 2, 3), - }, + containermap.ContainerMap{}, + "error migrating v1 checkpoint state to v3 checkpoint state is not supported", + &stateMemory{}, }, } @@ -249,9 +240,12 @@ func TestCheckpointStateStore(t *testing.T) { { "Store assignments", &stateMemory{ - assignments: map[string]map[string]cpuset.CPUSet{ + assignments: map[string]map[string]ContainerCPUAssignment{ "pod": { - "container1": cpuset.New(1, 5, 8), + "container1": ContainerCPUAssignment{ + Original: cpuset.New(1, 5, 8), + Resized: cpuset.New(), + }, }, }, }, @@ -377,14 +371,17 @@ func TestCheckpointStateClear(t *testing.T) { testCases := []struct { description string defaultCPUset cpuset.CPUSet - assignments map[string]map[string]cpuset.CPUSet + assignments map[string]map[string]ContainerCPUAssignment }{ { "Valid state", cpuset.New(1, 5, 10), - map[string]map[string]cpuset.CPUSet{ + map[string]map[string]ContainerCPUAssignment{ "pod": { - "container1": cpuset.New(1, 4), + "container1": ContainerCPUAssignment{ + Original: cpuset.New(1, 4), + Resized: cpuset.New(), + }, }, }, }, diff --git a/pkg/kubelet/cm/cpumanager/state/state_mem.go b/pkg/kubelet/cm/cpumanager/state/state_mem.go index 73a77920a0720..6de4b97636b18 100644 --- a/pkg/kubelet/cm/cpumanager/state/state_mem.go +++ b/pkg/kubelet/cm/cpumanager/state/state_mem.go @@ -46,12 +46,23 @@ func NewMemoryState(logger logr.Logger) State { } } +func (s *stateMemory) GetOriginalCPUSet(podUID string, containerName string) (cpuset.CPUSet, bool) { + s.RLock() + defer s.RUnlock() + + entry, exists := s.assignments[podUID][containerName] + return entry.Original.Clone(), exists +} + func (s *stateMemory) GetCPUSet(podUID string, containerName string) (cpuset.CPUSet, bool) { s.RLock() defer s.RUnlock() - res, ok := s.assignments[podUID][containerName] - return res.Clone(), ok + entry, exists := s.assignments[podUID][containerName] + if entry.Resized.IsEmpty() { + return entry.Original.Clone(), exists + } + return entry.Resized.Clone(), exists } func (s *stateMemory) GetDefaultCPUSet() cpuset.CPUSet { @@ -79,11 +90,18 @@ func (s *stateMemory) SetCPUSet(podUID string, containerName string, cset cpuset defer s.Unlock() if _, ok := s.assignments[podUID]; !ok { - s.assignments[podUID] = make(map[string]cpuset.CPUSet) + s.assignments[podUID] = make(map[string]ContainerCPUAssignment) + s.assignments[podUID][containerName] = ContainerCPUAssignment{Original: cset, Resized: cpuset.New()} + s.logger.Info("Updated CPUSet", "podUID", podUID, "containerName", containerName, "Original cpuSet", cset, "Resized cpuSet", cpuset.New()) + } else { + if entry, ok := s.assignments[podUID][containerName]; !ok { + s.assignments[podUID][containerName] = ContainerCPUAssignment{Original: cset, Resized: cpuset.New()} + s.logger.Info("Updated CPUSet", "podUID", podUID, "containerName", containerName, "Original cpuSet", cset, "Resized cpuSet", cpuset.New()) + } else { + s.assignments[podUID][containerName] = ContainerCPUAssignment{Original: entry.Original, Resized: cset} + s.logger.Info("Updated CPUSet", "podUID", podUID, "containerName", containerName, "Original cpuSet", entry.Original, "Resized cpuSet", cset) + } } - - s.assignments[podUID][containerName] = cset - s.logger.Info("Updated desired CPUSet", "podUID", podUID, "containerName", containerName, "cpuSet", cset) } func (s *stateMemory) SetDefaultCPUSet(cset cpuset.CPUSet) { diff --git a/pkg/kubelet/cm/cpumanager/state/state_test.go b/pkg/kubelet/cm/cpumanager/state/state_test.go index efe9ba1c611d5..6ce60439008cb 100644 --- a/pkg/kubelet/cm/cpumanager/state/state_test.go +++ b/pkg/kubelet/cm/cpumanager/state/state_test.go @@ -25,9 +25,9 @@ import ( func TestClone(t *testing.T) { expect := ContainerCPUAssignments{ - "pod": map[string]cpuset.CPUSet{ - "container1": cpuset.New(4, 5, 6), - "container2": cpuset.New(1, 2, 3), + "pod": map[string]ContainerCPUAssignment{ + "container1": {Original: cpuset.New(4, 5, 6), Resized: cpuset.New()}, + "container2": {Original: cpuset.New(1, 2, 3), Resized: cpuset.New()}, }, } actual := expect.Clone() diff --git a/pkg/kubelet/cm/cpumanager/topology_hints_test.go b/pkg/kubelet/cm/cpumanager/topology_hints_test.go index 166b86db0530d..937de282157d8 100644 --- a/pkg/kubelet/cm/cpumanager/topology_hints_test.go +++ b/pkg/kubelet/cm/cpumanager/topology_hints_test.go @@ -590,8 +590,8 @@ func returnTestCases() []testCase { pod: *testPod1, container: *testContainer1, assignments: state.ContainerCPUAssignments{ - string(testPod1.UID): map[string]cpuset.CPUSet{ - testContainer1.Name: cpuset.New(0, 6), + string(testPod1.UID): map[string]state.ContainerCPUAssignment{ + testContainer1.Name: {Original: cpuset.New(0, 6), Resized: cpuset.New()}, }, }, defaultCPUSet: cpuset.New(), @@ -611,8 +611,8 @@ func returnTestCases() []testCase { pod: *testPod1, container: *testContainer1, assignments: state.ContainerCPUAssignments{ - string(testPod1.UID): map[string]cpuset.CPUSet{ - testContainer1.Name: cpuset.New(3, 9), + string(testPod1.UID): map[string]state.ContainerCPUAssignment{ + testContainer1.Name: {Original: cpuset.New(3, 9), Resized: cpuset.New()}, }, }, defaultCPUSet: cpuset.New(), @@ -632,8 +632,8 @@ func returnTestCases() []testCase { pod: *testPod4, container: *testContainer4, assignments: state.ContainerCPUAssignments{ - string(testPod4.UID): map[string]cpuset.CPUSet{ - testContainer4.Name: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10), + string(testPod1.UID): map[string]state.ContainerCPUAssignment{ + testContainer4.Name: {Original: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10), Resized: cpuset.New()}, }, }, defaultCPUSet: cpuset.New(), @@ -649,8 +649,8 @@ func returnTestCases() []testCase { pod: *testPod1, container: *testContainer1, assignments: state.ContainerCPUAssignments{ - string(testPod1.UID): map[string]cpuset.CPUSet{ - testContainer1.Name: cpuset.New(0, 6, 3, 9), + string(testPod1.UID): map[string]state.ContainerCPUAssignment{ + testContainer1.Name: {Original: cpuset.New(0, 6, 3, 9), Resized: cpuset.New()}, }, }, defaultCPUSet: cpuset.New(), @@ -661,8 +661,8 @@ func returnTestCases() []testCase { pod: *testPod4, container: *testContainer4, assignments: state.ContainerCPUAssignments{ - string(testPod4.UID): map[string]cpuset.CPUSet{ - testContainer4.Name: cpuset.New(0, 6, 3, 9), + string(testPod4.UID): map[string]state.ContainerCPUAssignment{ + testContainer4.Name: {Original: cpuset.New(0, 6, 3, 9), Resized: cpuset.New()}, }, }, defaultCPUSet: cpuset.New(), diff --git a/pkg/kubelet/types/constants.go b/pkg/kubelet/types/constants.go index 6c032139b74a1..791052dbbcece 100644 --- a/pkg/kubelet/types/constants.go +++ b/pkg/kubelet/types/constants.go @@ -38,10 +38,3 @@ const ( LimitedSwap SwapBehavior = "LimitedSwap" NoSwap SwapBehavior = "NoSwap" ) - -// InPlacePodVerticalScaling types -const ( - // ErrorInconsistentCPUAllocation represent the type of an inconsistentCPUAllocationError - ErrorInconsistentCPUAllocation = "inconsistentCPUAllocationError" - ErrorGetCPUSet = "getCPUSetError" -) diff --git a/pkg/registry/core/pod/strategy.go b/pkg/registry/core/pod/strategy.go index de33785d0e828..a77ad05142310 100644 --- a/pkg/registry/core/pod/strategy.go +++ b/pkg/registry/core/pod/strategy.go @@ -407,7 +407,6 @@ func dropNonResizeUpdatesForContainers(new, old []api.Container) []api.Container } oldCopyWithMergedResources[i].Resources = ctr.Resources oldCopyWithMergedResources[i].ResizePolicy = ctr.ResizePolicy - oldCopyWithMergedResources[i].Env = ctr.Env } return oldCopyWithMergedResources diff --git a/test/e2e/common/node/framework/podresize/resize.go b/test/e2e/common/node/framework/podresize/resize.go index c929750c330fc..802a6b3a67d7d 100644 --- a/test/e2e/common/node/framework/podresize/resize.go +++ b/test/e2e/common/node/framework/podresize/resize.go @@ -469,6 +469,39 @@ func ExpectPodResized(ctx context.Context, f *framework.Framework, resizedPod *v } } +func ExpectPodResizePending(ctx context.Context, f *framework.Framework, resizePendingPod *v1.Pod, expectedContainers []ResizableContainerInfo) { + ginkgo.GinkgoHelper() + + // Verify Pod Containers Cgroup Values + var errs []error + if cgroupErrs := VerifyPodContainersCgroupValues(ctx, f, resizePendingPod, expectedContainers); cgroupErrs != nil { + errs = append(errs, fmt.Errorf("container cgroup values don't match expected: %w", formatErrors(cgroupErrs))) + } + if resourceErrs := VerifyPodStatusResources(resizePendingPod, expectedContainers); resourceErrs != nil { + errs = append(errs, fmt.Errorf("container status resources don't match expected: %w", formatErrors(resourceErrs))) + } + if restartErrs := verifyPodRestarts(f, resizePendingPod, expectedContainers); restartErrs != nil { + errs = append(errs, fmt.Errorf("container restart counts don't match expected: %w", formatErrors(restartErrs))) + } + + // Verify Pod Resize conditions are empty. + podResizePendingFound := false + for _, condition := range resizePendingPod.Status.Conditions { + if condition.Type == v1.PodResizePending { + podResizePendingFound = true + } + } + if !podResizePendingFound { + errs = append(errs, fmt.Errorf("resize condition type %s not found in pod status", v1.PodResizePending)) + } + + if len(errs) > 0 { + resizePendingPod.ManagedFields = nil // Suppress managed fields in error output. + framework.ExpectNoError(formatErrors(utilerrors.NewAggregate(errs)), + "Verifying pod resources resize state. Pod: %s", framework.PrettyPrintJSON(resizePendingPod)) + } +} + func MakeResizePatch(originalContainers, desiredContainers []ResizableContainerInfo, originPodResources, desiredPodResources *v1.ResourceRequirements) []byte { original, err := json.Marshal(MakePodWithResizableContainers("", "", "", originalContainers, originPodResources)) framework.ExpectNoError(err) diff --git a/test/e2e/feature/feature.go b/test/e2e/feature/feature.go index b0bf9feaf7dce..357a8fdfd4111 100644 --- a/test/e2e/feature/feature.go +++ b/test/e2e/feature/feature.go @@ -202,6 +202,16 @@ var ( // ImageVolume is used for testing the image volume source feature (https://kep.k8s.io/4639). ImageVolume = framework.WithFeature(framework.ValidFeatures.Add("ImageVolume")) + // Owner: sig-node + // Marks a test for InPlacePodVerticalScaling feature that requires + // InPlacePodVerticalScaling feature gate to be enabled. + InPlacePodVerticalScaling = framework.WithFeature(framework.ValidFeatures.Add("InPlacePodVerticalScaling")) + + // Owner: sig-node + // Marks a test for InPlacePodVerticalScalingExclusiveCPUs feature that requires + // InPlacePodVerticalScalingExclusiveCPUs feature gate to be enabled. + InPlacePodVerticalScalingExclusiveCPUs = framework.WithFeature(framework.ValidFeatures.Add("InPlacePodVerticalScalingExclusiveCPUs")) + // Owner: sig-network // Marks tests that require a conforming implementation of // Ingress.networking.k8s.io to be present. diff --git a/test/e2e_node/cpu_manager_metrics_test.go b/test/e2e_node/cpu_manager_metrics_test.go index 44e07943d33d7..acf7e85e73194 100644 --- a/test/e2e_node/cpu_manager_metrics_test.go +++ b/test/e2e_node/cpu_manager_metrics_test.go @@ -104,8 +104,6 @@ var _ = SIGDescribe("CPU Manager Metrics", framework.WithSerial(), feature.CPUMa enableCPUManagerOptions: true, options: cpuPolicyOptions, }, - false, - false, ) updateKubeletConfig(ctx, f, newCfg, true) }) @@ -404,7 +402,7 @@ var _ = SIGDescribe("CPU Manager Metrics", framework.WithSerial(), feature.CPUMa reservedSystemCPUs: cpuset.New(0), enableCPUManagerOptions: true, options: cpuPolicyOptions, - }, false, false, + }, ) updateKubeletConfig(ctx, f, newCfg, true) @@ -444,7 +442,7 @@ var _ = SIGDescribe("CPU Manager Metrics", framework.WithSerial(), feature.CPUMa reservedSystemCPUs: cpuset.New(0), enableCPUManagerOptions: true, options: cpuPolicyOptions, - }, false, false, + }, ) updateKubeletConfig(ctx, f, newCfg, true) diff --git a/test/e2e_node/cpu_manager_test.go b/test/e2e_node/cpu_manager_test.go index 4ac1c4d91b82e..be1d5c5615e92 100644 --- a/test/e2e_node/cpu_manager_test.go +++ b/test/e2e_node/cpu_manager_test.go @@ -35,17 +35,22 @@ import ( v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + apimachinerytypes "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/sets" + helpers "k8s.io/component-helpers/resource" "k8s.io/klog/v2" "k8s.io/kubernetes/pkg/features" kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config" "k8s.io/kubernetes/pkg/kubelet/cm" "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager" + "k8s.io/kubernetes/test/e2e/common/node/framework/cgroups" + "k8s.io/kubernetes/test/e2e/common/node/framework/podresize" admissionapi "k8s.io/pod-security-admission/api" "k8s.io/utils/cpuset" "k8s.io/kubernetes/test/e2e/feature" "k8s.io/kubernetes/test/e2e/framework" + e2enode "k8s.io/kubernetes/test/e2e/framework/node" e2epod "k8s.io/kubernetes/test/e2e/framework/pod" e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" ) @@ -2101,1692 +2106,3633 @@ var _ = SIGDescribe("CPU Manager Incompatibility Pod Level Resources", ginkgo.Or }) }) -// Matching helpers - -func HaveStatusReasonMatchingRegex(expr string) types.GomegaMatcher { - return gcustom.MakeMatcher(func(actual *v1.Pod) (bool, error) { - re, err := regexp.Compile(expr) - if err != nil { - return false, err - } - return re.MatchString(actual.Status.Reason), nil - }).WithTemplate("Pod {{.Actual.Namespace}}/{{.Actual.Name}} UID {{.Actual.UID}} reason {{.Actual.Status.Reason}} does not match regexp {{.Data}}", expr) -} - -type msgData struct { - Name string - CurrentCPUs string - ExpectedCPUs string - MismatchedCPUs string - UncoreCacheAlign string - Count int - Aligned int - CurrentQuota string - ExpectedQuota string -} - -func HaveContainerCPUsCount(ctnName string, val int) types.GomegaMatcher { - md := &msgData{ - Name: ctnName, - Count: val, - } - return gcustom.MakeMatcher(func(actual *v1.Pod) (bool, error) { - cpus, err := getContainerAllowedCPUs(actual, ctnName, false) - md.CurrentCPUs = cpus.String() - if err != nil { - framework.Logf("getContainerAllowedCPUs(%s) failed: %v", ctnName, err) - return false, err - } - return cpus.Size() == val, nil - }).WithTemplate("Pod {{.Actual.Namespace}}/{{.Actual.Name}} UID {{.Actual.UID}} has allowed CPUs <{{.Data.CurrentCPUs}}> not matching expected count <{{.Data.Count}}> for container {{.Data.Name}}", md) -} - -func HaveContainerCPUsAlignedTo(ctnName string, val int) types.GomegaMatcher { - md := &msgData{ - Name: ctnName, - Aligned: val, - } - return gcustom.MakeMatcher(func(actual *v1.Pod) (bool, error) { - cpus, err := getContainerAllowedCPUs(actual, ctnName, false) - md.CurrentCPUs = cpus.String() - if err != nil { - framework.Logf("getContainerAllowedCPUs(%s) failed: %v", ctnName, err) - return false, err - } - return cpus.Size()%val == 0, nil - }).WithTemplate("Pod {{.Actual.Namespace}}/{{.Actual.Name}} UID {{.Actual.UID}} has allowed CPUs <{{.Data.CurrentCPUs}}> not aligned to value <{{.Data.Aligned}}> for container {{.Data.Name}}", md) -} - -func HaveContainerCPUsOverlapWith(ctnName string, ref cpuset.CPUSet) types.GomegaMatcher { - md := &msgData{ - Name: ctnName, - ExpectedCPUs: ref.String(), - } - return gcustom.MakeMatcher(func(actual *v1.Pod) (bool, error) { - cpus, err := getContainerAllowedCPUs(actual, ctnName, false) - md.CurrentCPUs = cpus.String() - if err != nil { - framework.Logf("getContainerAllowedCPUs(%s) failed: %v", ctnName, err) - return false, err +var _ = SIGDescribe("CPU Manager with InPlacePodVerticalScalingExclusiveCPUs disabled", + ginkgo.Ordered, + ginkgo.ContinueOnFailure, + framework.WithSerial(), + feature.CPUManager, + feature.InPlacePodVerticalScaling, + feature.InPlacePodVerticalScalingExclusiveCPUs, + framework.WithFeatureGate(features.InPlacePodVerticalScaling), + framework.WithFeatureGate(features.InPlacePodVerticalScalingExclusiveCPUs), + func() { + + type containerCPUInfo struct { + Name string + cpuCount int } - sharedCPUs := cpus.Intersection(ref) - return sharedCPUs.Size() > 0, nil - }).WithTemplate("Pod {{.Actual.Namespace}}/{{.Actual.Name}} UID {{.Actual.UID}} has allowed CPUs <{{.Data.CurrentCPUs}}> overlapping with expected CPUs <{{.Data.ExpectedCPUs}}> for container {{.Data.Name}}", md) -} -func HaveContainerCPUsASubsetOf(ctnName string, ref cpuset.CPUSet) types.GomegaMatcher { - md := &msgData{ - Name: ctnName, - ExpectedCPUs: ref.String(), - } - return gcustom.MakeMatcher(func(actual *v1.Pod) (bool, error) { - cpus, err := getContainerAllowedCPUs(actual, ctnName, false) - md.CurrentCPUs = cpus.String() - if err != nil { - framework.Logf("getContainerAllowedCPUs(%s) failed: %v", ctnName, err) - return false, err - } - return cpus.IsSubsetOf(ref), nil - }).WithTemplate("Pod {{.Actual.Namespace}}/{{.Actual.Name}} UID {{.Actual.UID}} has allowed CPUs <{{.Data.CurrentCPUs}}> not a subset of expected CPUs <{{.Data.ExpectedCPUs}}> for container {{.Data.Name}}", md) -} + f := framework.NewDefaultFramework("cpu-manager-pod-resize-test") + f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged -func HaveContainerCPUsEqualTo(ctnName string, expectedCPUs cpuset.CPUSet) types.GomegaMatcher { - md := &msgData{ - Name: ctnName, - ExpectedCPUs: expectedCPUs.String(), - } - return gcustom.MakeMatcher(func(actual *v1.Pod) (bool, error) { - cpus, err := getContainerAllowedCPUs(actual, ctnName, false) - md.CurrentCPUs = cpus.String() - if err != nil { - framework.Logf("getContainerAllowedCPUs(%s) failed: %v", ctnName, err) - return false, err - } - return cpus.Equals(expectedCPUs), nil - }).WithTemplate("Pod {{.Actual.Namespace}}/{{.Actual.Name}} UID {{.Actual.UID}} has allowed CPUs <{{.Data.CurrentCPUs}}> not matching the expected value <{{.Data.ExpectedCPUs}}> for container {{.Data.Name}}", md) -} + // original kubeletconfig before the context start, to be restored + var oldCfg *kubeletconfig.KubeletConfiguration + var reservedCPUs cpuset.CPUSet + var onlineCPUs cpuset.CPUSet + var smtLevel int + var uncoreGroupSize int + // tracks all the pods created by a It() block. Best would be a namespace per It block + // TODO: move to a namespace per It block? + var podMap map[string]*v1.Pod -func HaveSandboxQuotaWithPeriod(expectedQuota, cfsPeriod string) types.GomegaMatcher { - md := &msgData{} - return gcustom.MakeMatcher(func(actual *v1.Pod) (bool, error) { - md.Name = klog.KObj(actual).String() - quota, err := getSandboxCFSQuota(actual) - md.CurrentQuota = quota - if err != nil { - framework.Logf("getSandboxCFSQuota() failed: %v", err) - return false, err - } - md.ExpectedQuota = fmt.Sprintf("^%s %s$", expectedQuota, cfsPeriod) - re, err := regexp.Compile(md.ExpectedQuota) - if err != nil { - return false, err - } - return re.MatchString(quota), nil - }).WithTemplate("Pod {{.Actual.Namespace}}/{{.Actual.Name}} UID {{.Actual.UID}} has quota <{{.Data.CurrentQuota}}> not matching expected value <{{.Data.ExpectedQuota}}>", md) -} + // closure just and only to not carry around awkwardly `f` and `onlineCPUs` only for logging purposes + var skipIfAllocatableCPUsLessThan func(node *v1.Node, cpuReq int) -func HaveContainerQuotaWithPeriod(ctnName, expectedQuota, cfsPeriod string) types.GomegaMatcher { - md := &msgData{ - Name: ctnName, - } - return gcustom.MakeMatcher(func(actual *v1.Pod) (bool, error) { - quota, err := getContainerCFSQuota(actual, ctnName, false) - md.CurrentQuota = quota - if err != nil { - framework.Logf("getContainerCFSQuota(%s) failed: %v", ctnName, err) - return false, err - } - md.ExpectedQuota = fmt.Sprintf("^%s %s$", expectedQuota, cfsPeriod) - re, err := regexp.Compile(md.ExpectedQuota) - if err != nil { - return false, err - } - return re.MatchString(quota), nil - }).WithTemplate("Pod {{.Actual.Namespace}}/{{.Actual.Name}} UID {{.Actual.UID}} has quota <{{.Data.CurrentQuota}}> not matching expected value <{{.Data.ExpectedQuota}}> for container {{.Data.Name}}", md) -} + ginkgo.BeforeAll(func(ctx context.Context) { + var err error + oldCfg, err = getCurrentKubeletConfig(ctx) + framework.ExpectNoError(err) -func HaveSandboxQuota(expectedQuota string) types.GomegaMatcher { - return HaveSandboxQuotaWithPeriod(expectedQuota, defaultCFSPeriod) -} + onlineCPUs, err = getOnlineCPUs() // this should not change at all, at least during this suite lifetime + framework.ExpectNoError(err) + framework.Logf("Online CPUs: %s", onlineCPUs) -func HaveContainerQuota(ctnName, expectedQuota string) types.GomegaMatcher { - return HaveContainerQuotaWithPeriod(ctnName, expectedQuota, defaultCFSPeriod) -} + smtLevel = smtLevelFromSysFS() // this should not change at all, at least during this suite lifetime + framework.Logf("SMT level: %d", smtLevel) -func HaveContainerCPUsThreadSiblings(ctnName string) types.GomegaMatcher { - md := &msgData{ - Name: ctnName, - } - return gcustom.MakeMatcher(func(actual *v1.Pod) (bool, error) { - cpus, err := getContainerAllowedCPUs(actual, ctnName, false) - md.CurrentCPUs = cpus.String() - if err != nil { - framework.Logf("getContainerAllowedCPUs(%s) failed: %v", ctnName, err) - return false, err - } - expectedCPUs := makeThreadSiblingCPUSet(cpus) - md.ExpectedCPUs = expectedCPUs.String() - return cpus.Equals(expectedCPUs), nil - }).WithTemplate("Pod {{.Actual.Namespace}}/{{.Actual.Name}} UID {{.Actual.UID}} has allowed CPUs <{{.Data.CurrentCPUs}}> not all thread sibling pairs (would be <{{.Data.ExpectedCPUs}}>) for container {{.Data.Name}}", md) -} + uncoreGroupSize = getUncoreCPUGroupSize() + framework.Logf("Uncore Group Size: %d", uncoreGroupSize) -func HaveContainerCPUsQuasiThreadSiblings(ctnName string, toleration int) types.GomegaMatcher { - md := &msgData{ - Name: ctnName, - Count: toleration, - } - return gcustom.MakeMatcher(func(actual *v1.Pod) (bool, error) { - cpus, err := getContainerAllowedCPUs(actual, ctnName, false) - md.CurrentCPUs = cpus.String() - if err != nil { - framework.Logf("getContainerAllowedCPUs(%s) failed: %v", ctnName, err) - return false, err - } - // this is by construction >= cpus (extreme case: cpus is made by all non-thread-siblings) - expectedCPUs := makeThreadSiblingCPUSet(cpus) - md.ExpectedCPUs = expectedCPUs.String() - mismatchedCPUs := expectedCPUs.Difference(cpus) - md.MismatchedCPUs = mismatchedCPUs.String() - return mismatchedCPUs.Size() <= toleration, nil - }).WithTemplate("Pod {{.Actual.Namespace}}/{{.Actual.Name}} UID {{.Actual.UID}} has allowed CPUs <{{.Data.CurrentCPUs}}> not all thread sibling pairs (would be <{{.Data.ExpectedCPUs}}> mismatched <{{.Data.MismatchedCPUs}}> toleration <{{.Data.Count}}>) for container {{.Data.Name}}", md) -} + e2enodeCgroupV2Enabled = IsCgroup2UnifiedMode() + framework.Logf("cgroup V2 enabled: %v", e2enodeCgroupV2Enabled) -func HaveContainerCPUsWithSameUncoreCacheID(ctnName string) types.GomegaMatcher { - md := &msgData{ - Name: ctnName, - } - return gcustom.MakeMatcher(func(actual *v1.Pod) (bool, error) { - cpus, err := getContainerAllowedCPUs(actual, ctnName, false) - if err != nil { - return false, fmt.Errorf("getContainerAllowedCPUs(%s) failed: %w", ctnName, err) - } - md.CurrentCPUs = cpus.String() + e2enodeCgroupDriver = oldCfg.CgroupDriver + framework.Logf("cgroup driver: %s", e2enodeCgroupDriver) - var commonCacheID *int64 + runtime, _, err := getCRIClient() + framework.ExpectNoError(err, "Failed to get CRI client") - for _, cpu := range cpus.List() { - // determine the Uncore Cache ID for each cpu - uncoreID, err := uncoreCacheIDFromSysFS(cpu) - if err != nil { - return false, fmt.Errorf("failed to read cache ID for CPU %d: %w", cpu, err) - } + version, err := runtime.Version(context.Background(), "") + framework.ExpectNoError(err, "Failed to get runtime version") - // if this the first CPU we check, set the Uncore Cache ID as the reference - // for subsequent CPUs, compare the Uncore Cache ID to the reference - if commonCacheID == nil { - commonCacheID = &uncoreID - } else if *commonCacheID != uncoreID { - md.UncoreCacheAlign = fmt.Sprintf("shared uncoreID mismatch: CPU %d has uncoreID %d, CPUSet has uncoreID %d", cpu, uncoreID, *commonCacheID) - return false, nil - } - } + e2enodeRuntimeName = version.GetRuntimeName() + framework.Logf("runtime: %s", e2enodeRuntimeName) + }) - // All CPUs matched the same cache ID - md.UncoreCacheAlign = fmt.Sprintf("all CPUs share cache ID %d", *commonCacheID) - return true, nil - }).WithTemplate( - "Pod {{.Actual.Namespace}}/{{.Actual.Name}} UID {{.Actual.UID}} container {{.Data.Name}} has CPUSet <{{.Data.CurrentCPUs}}> where not all CPUs share the same uncore cache ID: {{.Data.UncoreCacheAlign}}", - md, - ) -} + ginkgo.AfterAll(func(ctx context.Context) { + updateKubeletConfig(ctx, f, oldCfg, true) + }) -func HaveContainerCPUsShareUncoreCacheWith(ctnName string, ref cpuset.CPUSet) types.GomegaMatcher { - md := &msgData{ - Name: ctnName, - ExpectedCPUs: ref.String(), - } - return gcustom.MakeMatcher(func(actual *v1.Pod) (bool, error) { - containerCPUs, err := getContainerAllowedCPUs(actual, ctnName, false) - if err != nil { - return false, fmt.Errorf("getContainerAllowedCPUs(%s) failed: %w", ctnName, err) - } - md.CurrentCPUs = containerCPUs.String() + ginkgo.BeforeEach(func(ctx context.Context) { + // note intentionally NOT set reservedCPUs - this must be initialized on a test-by-test basis + podMap = make(map[string]*v1.Pod) + }) - // Build set of uncore cache IDs from the reference cpuset - refUncoreIDs := sets.New[int64]() - for _, cpu := range ref.UnsortedList() { - uncoreID, err := uncoreCacheIDFromSysFS(cpu) - if err != nil { - return false, fmt.Errorf("failed to read uncore cache ID for reference CPU %d: %w", cpu, err) + ginkgo.JustBeforeEach(func(ctx context.Context) { + // note intentionally NOT set reservedCPUs - this must be initialized on a test-by-test basis + + // use a closure to minimize the arguments, to make the usage more straightforward + skipIfAllocatableCPUsLessThan = func(node *v1.Node, val int) { + ginkgo.GinkgoHelper() + cpuReq := int64(val + reservedCPUs.Size()) // reserved CPUs are not usable, need to account them + // the framework is initialized using an injected BeforeEach node, so the + // earliest we can do is to initialize the other objects here + nodeCPUDetails := cpuDetailsFromNode(node) + + msg := fmt.Sprintf("%v full CPUs (detected=%v requested=%v reserved=%v online=%v smt=%v)", cpuReq, nodeCPUDetails.Allocatable, val, reservedCPUs.Size(), onlineCPUs.Size(), smtLevel) + ginkgo.By("Checking if allocatable: " + msg) + if nodeCPUDetails.Allocatable < cpuReq { + e2eskipper.Skipf("Skipping CPU Manager test: not allocatable %s", msg) + } } - refUncoreIDs.Insert(uncoreID) - } + }) - // Check if any container CPUs share an uncore ID with the reference set - for _, cpu := range containerCPUs.UnsortedList() { - uncoreID, err := uncoreCacheIDFromSysFS(cpu) - if err != nil { - return false, fmt.Errorf("failed to read uncore cache ID for container CPU %d: %w", cpu, err) - } - if refUncoreIDs.Has(uncoreID) { - md.UncoreCacheAlign = fmt.Sprintf("%d", uncoreID) - return true, nil - } - } + ginkgo.AfterEach(func(ctx context.Context) { + deletePodsAsync(ctx, f, podMap) + }) - return false, nil - }).WithTemplate( - "Pod {{.Actual.Namespace}}/{{.Actual.Name}} UID {{.Actual.UID}} container {{.Data.Name}} has CPUSet <{{.Data.CurrentCPUs}}> sharing uncoreCache ID <{{.Data.UncoreCacheAlign}}> with reference CPUSet <{{.Data.ExpectedCPUs}}>", - md, - ) -} - -// Custom matcher for checking packed CPUs. -func BePackedCPUs() types.GomegaMatcher { - return gcustom.MakeMatcher(func(allocatedCPUs cpuset.CPUSet) (bool, error) { - distribution := computeNUMADistribution(allocatedCPUs) - for _, count := range distribution { - // This assumption holds true if there are enough CPUs on a single NUMA node. - // We are intentionally limiting the CPU request to 2 to minimize the number - // of CPUs required to fulfill this case and therefore maximize the chances - // of correctly validating this case. - if count == allocatedCPUs.Size() { - return true, nil - } - } - return false, nil - }).WithMessage("expected CPUs to be packed") -} + ginkgo.When("resizing a Guaranteed QoS single container pod with integer CPU requests", ginkgo.Label("guaranteed single container pod with integer CPU requests resize", "exclusive-cpus"), func() { + ginkgo.BeforeEach(func(ctx context.Context) { + reservedCPUs = cpuset.New(0) + }) + ginkgo.DescribeTable("", + func(ctx context.Context, + originalContainers []podresize.ResizableContainerInfo, + originalCpuInfo []containerCPUInfo, + desiredContainers []podresize.ResizableContainerInfo, + expectedContainers []podresize.ResizableContainerInfo, + expectedCpuInfo []containerCPUInfo, + wantError string, + ) { + skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), expectedCpuInfo[0].cpuCount) + + updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + reservedSystemCPUs: reservedCPUs, // Not really needed for the tests but helps to make a more precise check + enableInPlacePodVerticalScalingExclusiveCPUs: false, + })) + + tStamp := strconv.Itoa(time.Now().Nanosecond()) + testPod1 := podresize.MakePodWithResizableContainers(f.Namespace.Name, "testpod1", tStamp, originalContainers, nil) + testPod1 = e2epod.MustMixinRestrictedPodSecurity(testPod1) + + ginkgo.By("creating pod") + podClient := e2epod.NewPodClient(f) + newPods := podClient.CreateBatch(ctx, []*v1.Pod{testPod1}) + + ginkgo.By("verifying original pod resources, allocations and policy are as expected") + podresize.VerifyPodResources(newPods[0], originalContainers, nil) + + ginkgo.By("verifying original pod cpusets are as expected") + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount("gu-container-1", originalCpuInfo[0].cpuCount)) + + ginkgo.By("patching pod for resize") + patchString := podresize.MakeResizePatch(originalContainers, desiredContainers, nil, nil) + + if wantError == "" { + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch pod for resize") + + expected := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, expectedContainers) + ginkgo.By("verifying pod resources are as expected post patch, pre-actuation") + podresize.VerifyPodResources(patchedPod, expected, nil) + + ginkgo.By("waiting for resize to be actuated") + resizedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expected) + podresize.ExpectPodResized(ctx, f, resizedPod, expected) + + ginkgo.By("verifying pod resources after resize") + podresize.VerifyPodResources(resizedPod, expected, nil) + + ginkgo.By("verifying pod cpusets after resize") + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount("gu-container-1", expectedCpuInfo[0].cpuCount)) + } else { + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch pod for resize") + + ginkgo.By("verifying testing pod resources are as expected post patch, pre-actuation") + expectedPreActuation := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, desiredContainers) + podresize.VerifyPodResources(patchedPod, expectedPreActuation, nil) + + resizePendingPod, err := framework.GetObject(podClient.Get, patchedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get resize pending pod") + + ginkgo.By("waiting for testing pod resize to be actuated") + expectedPostActuation := podresize.UpdateExpectedContainerRestarts(ctx, resizePendingPod, expectedContainers) + actuatedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expectedPostActuation) + + ginkgo.By("waiting for testing pod resize status to be pending") + WaitForPodResizePending(ctx, f, actuatedPod) + + actuatedPod, err = framework.GetObject(podClient.Get, actuatedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get actuated pod") + + expectedPostActuation = podresize.UpdateExpectedContainerRestarts(ctx, actuatedPod, expectedContainers) + ginkgo.By("verifying testing pod condition type as expected post patch, post-actuation") + podresize.ExpectPodResizePending(ctx, f, actuatedPod, expectedPostActuation) + + ginkgo.By("ensuring the testing pod is failed for the expected reason") + gomega.Expect(actuatedPod).To(HaveStatusConditionsMatchingRegex(wantError)) + + ginkgo.By("verifying pod cpusets after resize") + gomega.Expect(actuatedPod).To(HaveContainerCPUsCount("gu-container-1", expectedCpuInfo[0].cpuCount)) + } + }, + ginkgo.Entry("neither should increase the CPU request/limit nor decrease the memory request/limit, within available capacity", + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "400Mi", MemLim: "400Mi"}, + }, + }, + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "400Mi", MemLim: "400Mi"}, + }, + }, + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + }, + "Infeasible Resize is infeasible for Guaranteed Pods alongside CPU Manager", + ), + ginkgo.Entry("neither should increase the CPU request/limit nor increase the memory request/limit, within available capacity", + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "400Mi", MemLim: "400Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + }, + "Infeasible Resize is infeasible for Guaranteed Pods alongside CPU Manager", + ), + ginkgo.Entry("should not increase the exclusively CPUs, within available capacity", + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "300Mi", MemLim: "300Mi"}, + }, + }, + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "3000m", CPULim: "3000m", MemReq: "300Mi", MemLim: "300Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "300Mi", MemLim: "300Mi"}, + }, + }, + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + }, + "Infeasible Resize is infeasible for Guaranteed Pods alongside CPU Manager", + ), + ginkgo.Entry("should not decrease the allocated exclusively CPUs below promised cpuset", + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "300Mi", MemLim: "300Mi"}, + }, + }, + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "1000m", CPULim: "1000m", MemReq: "300Mi", MemLim: "300Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "300Mi", MemLim: "300Mi"}, + }, + }, + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + }, + "Infeasible Resize is infeasible for Guaranteed Pods alongside CPU Manager", + ), + ginkgo.Entry("should not increase the allocated exclusively CPUs beyond available capacity", + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "300Mi", MemLim: "300Mi"}, + }, + }, + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000000m", CPULim: "2000000m", MemReq: "300Mi", MemLim: "300Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "300Mi", MemLim: "300Mi"}, + }, + }, + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + }, + "Infeasible Resize is infeasible for Guaranteed Pods alongside CPU Manager", + ), + ) + }) + }, +) -// Custom matcher for checking distributed CPUs. -func BeDistributedCPUs(expectedSpread int) types.GomegaMatcher { - return gcustom.MakeMatcher(func(allocatedCPUs cpuset.CPUSet) (bool, error) { - distribution := computeNUMADistribution(allocatedCPUs) - for _, count := range distribution { - if count != expectedSpread { - return false, nil - } +var _ = SIGDescribe("CPU Manager with InPlacePodVerticalScalingExclusiveCPUs enabled", + ginkgo.Ordered, + ginkgo.ContinueOnFailure, + framework.WithSerial(), + feature.CPUManager, + feature.InPlacePodVerticalScaling, + feature.InPlacePodVerticalScalingExclusiveCPUs, + framework.WithFeatureGate(features.InPlacePodVerticalScaling), + framework.WithFeatureGate(features.InPlacePodVerticalScalingExclusiveCPUs), + func() { + + type containerCPUInfo struct { + Name string + cpuCount int } - return true, nil - }).WithTemplate("expected CPUs to be evenly distributed across NUMA nodes\nExpected: {{.Data}}\nGot:\n{{.FormattedActual}}\nDistribution: {{.Data}}\n").WithTemplateData(expectedSpread) -} - -// Other helpers -func getContainerAllowedCPUsFromLogs(podName, cntName, logs string) cpuset.CPUSet { - framework.Logf("got pod logs: <%v>", logs) - cpus, err := cpuset.Parse(strings.TrimSpace(logs)) - framework.ExpectNoError(err, "parsing cpuset from logs for [%s] of pod [%s]", cntName, podName) - return cpus -} + f := framework.NewDefaultFramework("cpu-manager-pod-resize-test") + f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged -// computeNUMADistribution calculates CPU distribution per NUMA node. -func computeNUMADistribution(allocatedCPUs cpuset.CPUSet) map[int]int { - numaCPUs, err := getNumaNodeCPUs() - framework.ExpectNoError(err, "Error retrieving NUMA nodes") - framework.Logf("NUMA Node CPUs allocation: %v", numaCPUs) + // original kubeletconfig before the context start, to be restored + var oldCfg *kubeletconfig.KubeletConfiguration + var reservedCPUs cpuset.CPUSet + var onlineCPUs cpuset.CPUSet + var smtLevel int + var uncoreGroupSize int + // tracks all the pods created by a It() block. Best would be a namespace per It block + // TODO: move to a namespace per It block? + var podMap map[string]*v1.Pod - distribution := make(map[int]int) - for node, cpus := range numaCPUs { - distribution[node] = cpus.Intersection(allocatedCPUs).Size() - } + // closure just and only to not carry around awkwardly `f` and `onlineCPUs` only for logging purposes + var skipIfAllocatableCPUsLessThan func(node *v1.Node, cpuReq int) - framework.Logf("allocated CPUs %s distribution: %v", allocatedCPUs.String(), distribution) - return distribution -} + ginkgo.BeforeAll(func(ctx context.Context) { + var err error + oldCfg, err = getCurrentKubeletConfig(ctx) + framework.ExpectNoError(err) -func getContainerAllowedCPUs(pod *v1.Pod, ctnName string, isInit bool) (cpuset.CPUSet, error) { - cgPath, err := makeCgroupPathForContainer(pod, ctnName, isInit, e2enodeCgroupV2Enabled) - if err != nil { - return cpuset.New(), err - } - cgPath = filepath.Join(cgPath, cpusetFileNameFromVersion(e2enodeCgroupV2Enabled)) - framework.Logf("pod %s/%s cnt %s qos=%s path %q", pod.Namespace, pod.Name, ctnName, pod.Status.QOSClass, cgPath) - data, err := os.ReadFile(cgPath) - if err != nil { - return cpuset.New(), err - } - cpus := strings.TrimSpace(string(data)) - framework.Logf("pod %s/%s cnt %s cpuset %q", pod.Namespace, pod.Name, ctnName, cpus) - return cpuset.Parse(cpus) -} + onlineCPUs, err = getOnlineCPUs() // this should not change at all, at least during this suite lifetime + framework.ExpectNoError(err) + framework.Logf("Online CPUs: %s", onlineCPUs) -func getSandboxCFSQuota(pod *v1.Pod) (string, error) { - if !e2enodeCgroupV2Enabled { - return "", fmt.Errorf("only Cgroup V2 is supported") - } - cgPath := filepath.Join(makeCgroupPathForPod(pod, true), "cpu.max") - data, err := os.ReadFile(cgPath) - if err != nil { - return "", err - } - quota := strings.TrimSpace(string(data)) - framework.Logf("pod %s/%s qos=%s path %q quota %q", pod.Namespace, pod.Name, pod.Status.QOSClass, cgPath, quota) - return quota, nil -} + smtLevel = smtLevelFromSysFS() // this should not change at all, at least during this suite lifetime + framework.Logf("SMT level: %d", smtLevel) -func getContainerCFSQuota(pod *v1.Pod, ctnName string, isInit bool) (string, error) { - if !e2enodeCgroupV2Enabled { - return "", fmt.Errorf("only Cgroup V2 is supported") - } - cgPath, err := makeCgroupPathForContainer(pod, ctnName, isInit, true) - if err != nil { - return "", err - } - data, err := os.ReadFile(filepath.Join(cgPath, "cpu.max")) - if err != nil { - return "", err - } - quota := strings.TrimSpace(string(data)) - framework.Logf("pod %s/%s qos=%s cnt %s path %q quota %q", pod.Namespace, pod.Name, pod.Status.QOSClass, ctnName, cgPath, quota) - return quota, nil -} + uncoreGroupSize = getUncoreCPUGroupSize() + framework.Logf("Uncore Group Size: %d", uncoreGroupSize) -const ( - kubeCgroupRoot = "/sys/fs/cgroup" -) + e2enodeCgroupV2Enabled = IsCgroup2UnifiedMode() + framework.Logf("cgroup V2 enabled: %v", e2enodeCgroupV2Enabled) -// example path (systemd, crio, v2): -// /sys/fs/cgroup/ kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod0b7632a2_a56e_4278_987a_22de18008dbe.slice/ crio-conmon-0bc5eac79e3ae7a0c2651f14722aa10fa333eb2325c2ca97da33aa284cda81b0.scope -// example path (cgroup, containerd, v1): -// /sys/fs/cgroup/cpuset kubepods/burstable pod8e414e92-17c2-41de-81c7-0045bba9103b b5791f89a6971bb4a751ffbebf533399c91630aa2906d7c6b5e239f405f3b97a + e2enodeCgroupDriver = oldCfg.CgroupDriver + framework.Logf("cgroup driver: %s", e2enodeCgroupDriver) -func makeCgroupPathForPod(pod *v1.Pod, isV2 bool) string { - components := []string{defaultNodeAllocatableCgroup} - if pod.Status.QOSClass != v1.PodQOSGuaranteed { - components = append(components, strings.ToLower(string(pod.Status.QOSClass))) - } - components = append(components, "pod"+string(pod.UID)) + runtime, _, err := getCRIClient() + framework.ExpectNoError(err, "Failed to get CRI client") - cgroupName := cm.NewCgroupName(cm.RootCgroupName, components...) - cgroupFsName := "" - // it's quite ugly to use a global, but it saves us to pass a parameter all across the stack many times - if e2enodeCgroupDriver == "systemd" { - cgroupFsName = cgroupName.ToSystemd() - } else { - cgroupFsName = cgroupName.ToCgroupfs() - } - if !isV2 { - cgroupFsName = filepath.Join("cpuset", cgroupFsName) - } - return filepath.Join(kubeCgroupRoot, cgroupFsName) -} + version, err := runtime.Version(context.Background(), "") + framework.ExpectNoError(err, "Failed to get runtime version") -func makeCgroupPathForContainer(pod *v1.Pod, ctnName string, isInit, isV2 bool) (string, error) { - fullCntID, ok := findContainerIDByName(pod, ctnName, isInit) - if !ok { - return "", fmt.Errorf("cannot find status for container %q", ctnName) - } - cntID, err := parseContainerID(fullCntID) - if err != nil { - return "", err - } - cntPath := "" - if e2enodeCgroupDriver == "systemd" { - cntPath = containerCgroupPathPrefixFromDriver(e2enodeRuntimeName) + "-" + cntID + ".scope" - } else { - cntPath = cntID - } + e2enodeRuntimeName = version.GetRuntimeName() + framework.Logf("runtime: %s", e2enodeRuntimeName) + }) - return filepath.Join(makeCgroupPathForPod(pod, isV2), cntPath), nil -} + ginkgo.AfterAll(func(ctx context.Context) { + updateKubeletConfig(ctx, f, oldCfg, true) + }) -func cpusetFileNameFromVersion(isV2 bool) string { - if isV2 { - return "cpuset.cpus.effective" - } - return "cpuset.cpus" -} + ginkgo.BeforeEach(func(ctx context.Context) { + // note intentionally NOT set reservedCPUs - this must be initialized on a test-by-test basis + podMap = make(map[string]*v1.Pod) + }) -func containerCgroupPathPrefixFromDriver(runtimeName string) string { - if runtimeName == "cri-o" { - return "crio" - } - return "cri-containerd" -} + ginkgo.JustBeforeEach(func(ctx context.Context) { + // note intentionally NOT set reservedCPUs - this must be initialized on a test-by-test basis + + // use a closure to minimize the arguments, to make the usage more straightforward + skipIfAllocatableCPUsLessThan = func(node *v1.Node, val int) { + ginkgo.GinkgoHelper() + cpuReq := int64(val + reservedCPUs.Size()) // reserved CPUs are not usable, need to account them + // the framework is initialized using an injected BeforeEach node, so the + // earliest we can do is to initialize the other objects here + nodeCPUDetails := cpuDetailsFromNode(node) + + msg := fmt.Sprintf("%v full CPUs (detected=%v requested=%v reserved=%v online=%v smt=%v)", cpuReq, nodeCPUDetails.Allocatable, val, reservedCPUs.Size(), onlineCPUs.Size(), smtLevel) + ginkgo.By("Checking if allocatable: " + msg) + if nodeCPUDetails.Allocatable < cpuReq { + e2eskipper.Skipf("Skipping CPU Manager test: not allocatable %s", msg) + } + } + }) -func parseContainerID(fullID string) (string, error) { - _, cntID, found := strings.Cut(fullID, "://") - if !found { - return "", fmt.Errorf("unsupported containerID: %q", fullID) - } - // TODO: should we validate the kind? - return cntID, nil -} + ginkgo.AfterEach(func(ctx context.Context) { + deletePodsAsync(ctx, f, podMap) + }) -func findContainerIDByName(pod *v1.Pod, ctnName string, isInit bool) (string, bool) { - cntStatuses := pod.Status.ContainerStatuses - if isInit { - cntStatuses = pod.Status.InitContainerStatuses - } - for idx := range cntStatuses { - if cntStatuses[idx].Name == ctnName { - return cntStatuses[idx].ContainerID, true - } - } - return "", false -} + ginkgo.When("resizing a Burstable single container Pod", ginkgo.Label("burstable single container pod resize", "exclusive-cpus"), func() { + ginkgo.BeforeEach(func(ctx context.Context) { + reservedCPUs = cpuset.New(0) + }) + ginkgo.DescribeTable("", + func(ctx context.Context, + originalContainers []podresize.ResizableContainerInfo, + desiredContainers []podresize.ResizableContainerInfo, + expectedContainers []podresize.ResizableContainerInfo, + wantError string, + ) { + updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + reservedSystemCPUs: reservedCPUs, // Not really needed for the tests but helps to make a more precise check + enableInPlacePodVerticalScalingExclusiveCPUs: true, + })) + + tStamp := strconv.Itoa(time.Now().Nanosecond()) + testPod1 := podresize.MakePodWithResizableContainers(f.Namespace.Name, "testpod1", tStamp, originalContainers, nil) + testPod1 = e2epod.MustMixinRestrictedPodSecurity(testPod1) + + ginkgo.By("creating pod") + podClient := e2epod.NewPodClient(f) + newPods := podClient.CreateBatch(ctx, []*v1.Pod{testPod1}) + + ginkgo.By("verifying original pod resources, allocations and policy are as expected") + podresize.VerifyPodResources(newPods[0], originalContainers, nil) + + ginkgo.By("verifying original pod cpusets are as expected") + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount("non-gu-container-1", onlineCPUs.Size())) + + ginkgo.By("patching pod for resize") + patchString := podresize.MakeResizePatch(originalContainers, desiredContainers, nil, nil) + + if wantError == "" { + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch pod for resize") + + expected := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, expectedContainers) + ginkgo.By("verifying pod resources are as expected post patch, pre-actuation") + podresize.VerifyPodResources(patchedPod, expected, nil) + + ginkgo.By("waiting for resize to be actuated") + resizedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expected) + podresize.ExpectPodResized(ctx, f, resizedPod, expected) + + ginkgo.By("verifying pod resources after resize") + podresize.VerifyPodResources(resizedPod, expected, nil) + + ginkgo.By("verifying pod cpusets after resize") + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount("non-gu-container-1", onlineCPUs.Size())) + } else { + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch pod for resize") + + ginkgo.By("verifying testing pod resources are as expected post patch, pre-actuation") + expectedPreActuation := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, desiredContainers) + podresize.VerifyPodResources(patchedPod, expectedPreActuation, nil) + + resizePendingPod, err := framework.GetObject(podClient.Get, patchedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get resize pending pod") + + ginkgo.By("waiting for testing pod resize to be actuated") + expectedPostActuation := podresize.UpdateExpectedContainerRestarts(ctx, resizePendingPod, expectedContainers) + actuatedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expectedPostActuation) + + ginkgo.By("waiting for testing pod resize status to be pending") + WaitForPodResizePending(ctx, f, actuatedPod) + + actuatedPod, err = framework.GetObject(podClient.Get, actuatedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get actuated pod") + + expectedPostActuation = podresize.UpdateExpectedContainerRestarts(ctx, actuatedPod, expectedContainers) + ginkgo.By("verifying testing pod condition type as expected post patch, post-actuation") + podresize.ExpectPodResizePending(ctx, f, actuatedPod, expectedPostActuation) + + ginkgo.By("ensuring the testing pod is failed for the expected reason") + gomega.Expect(actuatedPod).To(HaveStatusConditionsMatchingRegex(wantError)) + + // we cannot nor we should predict which CPUs the container gets + ginkgo.By("verifying pod cpusets after resize") + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount("non-gu-container-1", onlineCPUs.Size())) + } + }, + ginkgo.Entry("should increase the CPU request/limit & the memory request/limit, within available capacity", + []podresize.ResizableContainerInfo{ + { + Name: "non-gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "300m", MemReq: "200Mi", MemLim: "300Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "non-gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "400m", CPULim: "2000m", MemReq: "200Mi", MemLim: "400Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "non-gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "400m", CPULim: "2000m", MemReq: "200Mi", MemLim: "400Mi"}, + }, + }, + "", + ), + ginkgo.Entry("should decrease the CPU request/limit and the memory request/limit, within available capacity", + []podresize.ResizableContainerInfo{ + { + Name: "non-gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "500m", CPULim: "3000m", MemReq: "200Mi", MemLim: "400Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "non-gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "500m", CPULim: "1000m", MemReq: "200Mi", MemLim: "300Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "non-gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "500m", CPULim: "1000m", MemReq: "200Mi", MemLim: "300Mi"}, + }, + }, + "", + ), + ) + }) + + ginkgo.When("resizing a Guaranteed single container Pod without integer CPU requests", ginkgo.Label("guaranteed single container pod resize"), func() { + ginkgo.BeforeEach(func(ctx context.Context) { + reservedCPUs = cpuset.New(0) + }) + ginkgo.DescribeTable("", + func(ctx context.Context, + originalContainers []podresize.ResizableContainerInfo, + desiredContainers []podresize.ResizableContainerInfo, + expectedContainers []podresize.ResizableContainerInfo, + wantError string, + ) { + updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + reservedSystemCPUs: reservedCPUs, // Not really needed for the tests but helps to make a more precise check + enableInPlacePodVerticalScalingExclusiveCPUs: true, + })) + + tStamp := strconv.Itoa(time.Now().Nanosecond()) + testPod1 := podresize.MakePodWithResizableContainers(f.Namespace.Name, "testpod1", tStamp, originalContainers, nil) + testPod1 = e2epod.MustMixinRestrictedPodSecurity(testPod1) + + ginkgo.By("creating pod") + podClient := e2epod.NewPodClient(f) + newPods := podClient.CreateBatch(ctx, []*v1.Pod{testPod1}) + + ginkgo.By("verifying original pod resources, allocations and policy are as expected") + podresize.VerifyPodResources(newPods[0], originalContainers, nil) + + ginkgo.By("verifying original pod cpusets are as expected") + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount("gu-container-1", onlineCPUs.Size())) + + ginkgo.By("patching pod for resize") + patchString := podresize.MakeResizePatch(originalContainers, desiredContainers, nil, nil) + + if wantError == "" { + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch pod for resize") + + expected := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, expectedContainers) + ginkgo.By("verifying pod resources are as expected post patch, pre-actuation") + podresize.VerifyPodResources(patchedPod, expected, nil) + + ginkgo.By("waiting for resize to be actuated") + resizedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expected) + podresize.ExpectPodResized(ctx, f, resizedPod, expected) + + ginkgo.By("verifying pod resources after resize") + podresize.VerifyPodResources(resizedPod, expected, nil) + + ginkgo.By("verifying pod cpusets after resize") + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount("gu-container-1", onlineCPUs.Size())) + } else { + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch pod for resize") + + ginkgo.By("verifying testing pod resources are as expected post patch, pre-actuation") + expectedPreActuation := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, desiredContainers) + podresize.VerifyPodResources(patchedPod, expectedPreActuation, nil) + + resizePendingPod, err := framework.GetObject(podClient.Get, patchedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get resize pending pod") + + ginkgo.By("waiting for testing pod resize to be actuated") + expectedPostActuation := podresize.UpdateExpectedContainerRestarts(ctx, resizePendingPod, expectedContainers) + actuatedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expectedPostActuation) + + ginkgo.By("waiting for testing pod resize status to be pending") + WaitForPodResizePending(ctx, f, actuatedPod) + + actuatedPod, err = framework.GetObject(podClient.Get, actuatedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get actuated pod") + + expectedPostActuation = podresize.UpdateExpectedContainerRestarts(ctx, actuatedPod, expectedContainers) + ginkgo.By("verifying testing pod condition type as expected post patch, post-actuation") + podresize.ExpectPodResizePending(ctx, f, actuatedPod, expectedPostActuation) + + ginkgo.By("ensuring the testing pod is failed for the expected reason") + gomega.Expect(actuatedPod).To(HaveStatusConditionsMatchingRegex(wantError)) + + // we cannot nor we should predict which CPUs the container gets + ginkgo.By("verifying pod cpusets after resize") + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount("gu-container-1", onlineCPUs.Size())) + } + }, + ginkgo.Entry("should increase CPU & memory request/limit, within available capacity", + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "100m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "200m", MemReq: "400Mi", MemLim: "400Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "200m", MemReq: "400Mi", MemLim: "400Mi"}, + }, + }, + "", + ), + ginkgo.Entry("should decrease CPU & memory request/limit", + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "300m", CPULim: "300m", MemReq: "500Mi", MemLim: "500Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "100m", MemReq: "250Mi", MemLim: "250Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "100m", MemReq: "250Mi", MemLim: "250Mi"}, + }, + }, + "", + ), + ginkgo.Entry("should decrease CPU request/limit, increase memory request/limit", + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "100m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "50m", CPULim: "50m", MemReq: "300Mi", MemLim: "300Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "50m", CPULim: "50m", MemReq: "300Mi", MemLim: "300Mi"}, + }, + }, + "", + ), + ) + }) + + ginkgo.When("resizing a Burstable single container Pod", ginkgo.Label("burstable single container pod resize"), func() { + ginkgo.BeforeEach(func(ctx context.Context) { + reservedCPUs = cpuset.New(0) + }) + ginkgo.DescribeTable("", + func(ctx context.Context, + originalContainers []podresize.ResizableContainerInfo, + desiredContainers []podresize.ResizableContainerInfo, + expectedContainers []podresize.ResizableContainerInfo, + wantError string, + ) { + updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + reservedSystemCPUs: reservedCPUs, // Not really needed for the tests but helps to make a more precise check + enableInPlacePodVerticalScalingExclusiveCPUs: true, + })) + + tStamp := strconv.Itoa(time.Now().Nanosecond()) + testPod1 := podresize.MakePodWithResizableContainers(f.Namespace.Name, "testpod1", tStamp, originalContainers, nil) + testPod1 = e2epod.MustMixinRestrictedPodSecurity(testPod1) + + ginkgo.By("creating pod") + podClient := e2epod.NewPodClient(f) + newPods := podClient.CreateBatch(ctx, []*v1.Pod{testPod1}) + + ginkgo.By("verifying original pod resources, allocations and policy are as expected") + podresize.VerifyPodResources(newPods[0], originalContainers, nil) + + ginkgo.By("verifying original pod cpusets are as expected") + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount("bu-container-1", onlineCPUs.Size())) + + ginkgo.By("patching pod for resize") + patchString := podresize.MakeResizePatch(originalContainers, desiredContainers, nil, nil) + + if wantError == "" { + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch pod for resize") + + expected := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, expectedContainers) + ginkgo.By("verifying pod resources are as expected post patch, pre-actuation") + podresize.VerifyPodResources(patchedPod, expected, nil) + + ginkgo.By("waiting for resize to be actuated") + resizedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expected) + podresize.ExpectPodResized(ctx, f, resizedPod, expected) + + ginkgo.By("verifying pod resources after resize") + podresize.VerifyPodResources(resizedPod, expected, nil) + + ginkgo.By("verifying pod cpusets after resize") + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount("bu-container-1", onlineCPUs.Size())) + } else { + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch pod for resize") + + ginkgo.By("verifying testing pod resources are as expected post patch, pre-actuation") + expectedPreActuation := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, desiredContainers) + podresize.VerifyPodResources(patchedPod, expectedPreActuation, nil) + + resizePendingPod, err := framework.GetObject(podClient.Get, patchedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get resize pending pod") + + ginkgo.By("waiting for testing pod resize to be actuated") + expectedPostActuation := podresize.UpdateExpectedContainerRestarts(ctx, resizePendingPod, expectedContainers) + actuatedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expectedPostActuation) + + ginkgo.By("waiting for testing pod resize status to be pending") + WaitForPodResizePending(ctx, f, actuatedPod) + + actuatedPod, err = framework.GetObject(podClient.Get, actuatedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get actuated pod") + + expectedPostActuation = podresize.UpdateExpectedContainerRestarts(ctx, actuatedPod, expectedContainers) + ginkgo.By("verifying testing pod condition type as expected post patch, post-actuation") + podresize.ExpectPodResizePending(ctx, f, actuatedPod, expectedPostActuation) + + ginkgo.By("ensuring the testing pod is failed for the expected reason") + gomega.Expect(actuatedPod).To(HaveStatusConditionsMatchingRegex(wantError)) + + // we cannot nor we should predict which CPUs the container gets + ginkgo.By("verifying pod cpusets after resize") + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount("bu-container-1", onlineCPUs.Size())) + } + }, + ginkgo.Entry("should decrease the memory request only", + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "200Mi", MemLim: "500Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "200Mi", MemLim: "500Mi"}, + }, + }, + "", + ), + ginkgo.Entry("should decrease the memory limit only", + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "400Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "400Mi"}, + }, + }, + "", + ), + ginkgo.Entry("should increase the memory request only", + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "300Mi", MemLim: "500Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "300Mi", MemLim: "500Mi"}, + }, + }, + "", + ), + ginkgo.Entry("should increase the memory limit only", + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "600Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "600Mi"}, + }, + }, + "", + ), + ginkgo.Entry("should decrease the CPU request only", + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + "", + ), + ginkgo.Entry("should decrease the CPU limit only", + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "300m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "300m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + "", + ), + ginkgo.Entry("should increase the CPU request only", + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "150m", CPULim: "200m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "150m", CPULim: "200m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + "", + ), + ginkgo.Entry("should increase the CPU limit only", + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "500m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "500m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + "", + ), + ginkgo.Entry("should decrease the CPU request/limit", + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + "", + ), + ginkgo.Entry("should increase the CPU request/limit", + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + "", + ), + ginkgo.Entry("should decrease the CPU request and increase the CPU limit", + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "500m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "500m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + "", + ), + ginkgo.Entry("should increase the CPU request and the decrease CPU limit", + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "300m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "300m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + "", + ), + ginkgo.Entry("should decrease the memory request/limit", + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "200Mi", MemLim: "400Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "100Mi", MemLim: "300Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "100Mi", MemLim: "300Mi"}, + }, + }, + "", + ), + ginkgo.Entry("should increase the memory request/limit", + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "200Mi", MemLim: "400Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "300Mi", MemLim: "500Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "300Mi", MemLim: "500Mi"}, + }, + }, + "", + ), + ginkgo.Entry("should decrease the memory request and increase the memory limit", + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "200Mi", MemLim: "400Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "100Mi", MemLim: "500Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "100Mi", MemLim: "500Mi"}, + }, + }, + "", + ), + ginkgo.Entry("should increase the memory request and decrease the memory limit", + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "200Mi", MemLim: "400Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "300Mi", MemLim: "300Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "300Mi", MemLim: "300Mi"}, + }, + }, + "", + ), + ginkgo.Entry("should decrease the CPU request and increase the memory limit", + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "200Mi", MemLim: "400Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "400m", MemReq: "200Mi", MemLim: "500Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "400m", MemReq: "200Mi", MemLim: "500Mi"}, + }, + }, + "", + ), + ginkgo.Entry("should increase the CPU request and decrease the memory limit", + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "400m", MemReq: "200Mi", MemLim: "500Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "200Mi", MemLim: "400Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "200Mi", MemLim: "400Mi"}, + }, + }, + "", + ), + ginkgo.Entry("should decrease the memory request and increase the CPU limit", + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "200Mi", MemLim: "400Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "300m", MemReq: "100Mi", MemLim: "400Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "300m", MemReq: "100Mi", MemLim: "400Mi"}, + }, + }, + "", + ), + ginkgo.Entry("should increase the memory request and decrease the CPU limit", + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "200Mi", MemLim: "400Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "300m", MemReq: "300Mi", MemLim: "400Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "300m", MemReq: "300Mi", MemLim: "400Mi"}, + }, + }, + "", + ), + ginkgo.Entry("should decrease the memory request", + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", MemReq: "500Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", MemReq: "400Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", MemReq: "400Mi"}, + }, + }, + "", + ), + ) + }) + + ginkgo.When("resizing a Guaranteed Pod with a single container and integer CPU requests", ginkgo.Label("single container guaranteed pod with integer CPU requests resize", "exclusive-cpus"), func() { + ginkgo.BeforeEach(func(ctx context.Context) { + reservedCPUs = cpuset.New(0) + }) + ginkgo.DescribeTable("", + func(ctx context.Context, + originalContainers []podresize.ResizableContainerInfo, + originalCpuInfo []containerCPUInfo, + desiredContainers []podresize.ResizableContainerInfo, + expectedContainers []podresize.ResizableContainerInfo, + expectedCpuInfo []containerCPUInfo, + wantError string, + ) { + skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), expectedCpuInfo[0].cpuCount) + + updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + reservedSystemCPUs: reservedCPUs, // Not really needed for the tests but helps to make a more precise check + enableInPlacePodVerticalScalingExclusiveCPUs: true, + })) + + podClient := e2epod.NewPodClient(f) + nodes, err := e2enode.GetReadySchedulableNodes(ctx, f.ClientSet) + framework.ExpectNoError(err, "failed to get running nodes") + gomega.Expect(nodes.Items).ShouldNot(gomega.BeEmpty()) + framework.Logf("Found %d schedulable nodes", len(nodes.Items)) + + ginkgo.By("Find node CPU resources available for allocation!") + node := nodes.Items[0] + nodeAllocatableCPU, nodeAvailableCPU, err := e2enode.GetNodeAllocatableAndAvailableQuantities(ctx, f.ClientSet, &node, v1.ResourceCPU) + framework.ExpectNoError(err, "failed to get CPU resources available for allocation") + framework.Logf("Node '%s': NodeAllocatable MilliCPUs = %dm. MilliCPUs currently available to allocate = %dm.", + node.Name, nodeAllocatableCPU.MilliValue(), nodeAvailableCPU.MilliValue()) + + tStamp := strconv.Itoa(time.Now().Nanosecond()) + testPod1 := podresize.MakePodWithResizableContainers(f.Namespace.Name, "testpod1", tStamp, originalContainers, nil) + testPod1 = e2epod.MustMixinRestrictedPodSecurity(testPod1) + e2epod.SetNodeAffinity(&testPod1.Spec, node.Name) + + ginkgo.By("creating pod") + newPods := podClient.CreateBatch(ctx, []*v1.Pod{testPod1}) + + ginkgo.By("verifying original pod resources, allocations and policy are as expected") + podresize.VerifyPodResources(newPods[0], originalContainers, nil) + + ginkgo.By("verifying original pod cpusets are as expected") + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount("gu-container-1", originalCpuInfo[0].cpuCount)) + + nodeAllocatableCPUAfterPodCreate, nodeAvailableCPUAfterPodCreate, err := e2enode.GetNodeAllocatableAndAvailableQuantities(ctx, f.ClientSet, &node, v1.ResourceCPU) + framework.ExpectNoError(err, "failed to get CPU resources available for allocation") + framework.Logf("Node '%s': NodeAllocatable MilliCPUs = %dm. MilliCPUs currently available to allocate = %dm.", + node.Name, nodeAllocatableCPUAfterPodCreate.MilliValue(), nodeAvailableCPUAfterPodCreate.MilliValue()) + + ginkgo.By("patching pod for resize") + patchString := podresize.MakeResizePatch(originalContainers, desiredContainers, nil, nil) + + if wantError == "" { + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch pod for resize") + + expected := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, expectedContainers) + ginkgo.By("verifying pod resources are as expected post patch, pre-actuation") + podresize.VerifyPodResources(patchedPod, expected, nil) + + ginkgo.By("waiting for resize to be actuated") + resizedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expected) + podresize.ExpectPodResized(ctx, f, resizedPod, expected) + + ginkgo.By("verifying pod resources after resize") + podresize.VerifyPodResources(resizedPod, expected, nil) + + // we cannot nor we should predict which CPUs the container gets + ginkgo.By("verifying pod cpusets after resize") + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount("gu-container-1", expectedCpuInfo[0].cpuCount)) + + nodeAllocatableCPUAfterPodResize, nodeAvailableCPUAfterPodResize, err := e2enode.GetNodeAllocatableAndAvailableQuantities(ctx, f.ClientSet, &node, v1.ResourceCPU) + framework.ExpectNoError(err, "failed to get CPU resources available for allocation") + framework.Logf("Node '%s': NodeAllocatable MilliCPUs = %dm. MilliCPUs currently available to allocate = %dm.", + node.Name, nodeAllocatableCPUAfterPodResize.MilliValue(), nodeAvailableCPUAfterPodResize.MilliValue()) + + } else { + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch pod for resize") + + ginkgo.By("verifying testing pod resources are as expected post patch, pre-actuation") + expectedPreActuation := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, desiredContainers) + podresize.VerifyPodResources(patchedPod, expectedPreActuation, nil) + + resizePendingPod, err := framework.GetObject(podClient.Get, patchedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get resize pending pod") + + ginkgo.By("waiting for testing pod resize to be actuated") + expectedPostActuation := podresize.UpdateExpectedContainerRestarts(ctx, resizePendingPod, expectedContainers) + actuatedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expectedPostActuation) + + ginkgo.By("waiting for testing pod resize status to be pending") + WaitForPodResizePending(ctx, f, actuatedPod) + + actuatedPod, err = framework.GetObject(podClient.Get, actuatedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get actuated pod") + + expectedPostActuation = podresize.UpdateExpectedContainerRestarts(ctx, actuatedPod, expectedContainers) + ginkgo.By("verifying testing pod condition type as expected post patch, post-actuation") + podresize.ExpectPodResizePending(ctx, f, actuatedPod, expectedPostActuation) + + ginkgo.By("ensuring the testing pod is failed for the expected reason") + gomega.Expect(actuatedPod).To(HaveStatusConditionsMatchingRegex(wantError)) + + // we cannot nor we should predict which CPUs the container gets + ginkgo.By("verifying pod cpusets after resize") + gomega.Expect(actuatedPod).To(HaveContainerCPUsCount("gu-container-1", expectedCpuInfo[0].cpuCount)) + } + }, + ginkgo.Entry("should increase the CPU request/limit, decrease memory request/limit, within available capacity", + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "400Mi", MemLim: "400Mi"}, + }, + }, + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 4, + }, + }, + "", + ), + ginkgo.Entry("should increase the CPU request/limit, increase memory request/limit, within available capacity", + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "400Mi", MemLim: "400Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "400Mi", MemLim: "400Mi"}, + }, + }, + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 4, + }, + }, + "", + ), + ginkgo.Entry("should increase exclusively CPUs, within available capacity", + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "300Mi", MemLim: "300Mi"}, + }, + }, + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "3000m", CPULim: "3000m", MemReq: "300Mi", MemLim: "300Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "3000m", CPULim: "3000m", MemReq: "300Mi", MemLim: "300Mi"}, + }, + }, + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 3, + }, + }, + "", + ), + ginkgo.Entry("should not decrease allocated exclusively CPUs, below promised cpuset", + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "300Mi", MemLim: "300Mi"}, + }, + }, + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "1000m", CPULim: "1000m", MemReq: "300Mi", MemLim: "300Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "300Mi", MemLim: "300Mi"}, + }, + }, + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + }, + "prohibitedCPUAllocation.*", + ), + ginkgo.Entry("should not increase allocated exclusively CPUs, beyond available capacity", + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "300Mi", MemLim: "300Mi"}, + }, + }, + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200000m", CPULim: "200000m", MemReq: "300Mi", MemLim: "300Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "300Mi", MemLim: "300Mi"}, + }, + }, + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + }, + "Infeasible.*Node.*didn't.*have.*enough.*capacity.*", + ), + ) + }) + + ginkgo.When("topologyManagerPolicy is set to none, resizing a Guaranteed multiple containers Pod with integer CPU requests", ginkgo.Label("guaranteed multiple container pod with integer CPU requests resize", "exclusive-cpus"), func() { + ginkgo.BeforeEach(func(ctx context.Context) { + reservedCPUs = cpuset.New(0) + }) + ginkgo.DescribeTable("", + func(ctx context.Context, + originalContainers []podresize.ResizableContainerInfo, + originalCpuInfo []containerCPUInfo, + desiredContainersFirstPatch []podresize.ResizableContainerInfo, + expectedContainersFirstPatch []podresize.ResizableContainerInfo, + expectedCpuInfoFirstPatch []containerCPUInfo, + wantErrorFirstPatch string, + desiredContainersSecondPatch []podresize.ResizableContainerInfo, + expectedContainersSecondPatch []podresize.ResizableContainerInfo, + expectedCpuInfoSecondPatch []containerCPUInfo, + wantErrorSecondPatch string, + ) { + + expectedCPUCount := 0 + for ctx := range expectedCpuInfoFirstPatch { + expectedCPUCount += expectedCpuInfoFirstPatch[ctx].cpuCount + } + skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), expectedCPUCount) + + expectedCPUCount = 0 + for ctx := range expectedCpuInfoSecondPatch { + expectedCPUCount += expectedCpuInfoSecondPatch[ctx].cpuCount + } + skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), expectedCPUCount) + + updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + reservedSystemCPUs: reservedCPUs, // Not really needed for the tests but helps to make a more precise check + enableInPlacePodVerticalScalingExclusiveCPUs: true, + topologyManagerPolicyName: "none", + topologyManagerScopeName: "container", + topologyManagerPolicyOptions: map[string]string{ + "max-allowable-numa-nodes": "8", + "prefer-closest-numa-nodes": "true", + }, + })) + + tStamp := strconv.Itoa(time.Now().Nanosecond()) + testPod1 := podresize.MakePodWithResizableContainers(f.Namespace.Name, "testpod1", tStamp, originalContainers, nil) + testPod1 = e2epod.MustMixinRestrictedPodSecurity(testPod1) + + ginkgo.By("creating pod with multiple containers") + podClient := e2epod.NewPodClient(f) + newPods := podClient.CreateBatch(ctx, []*v1.Pod{testPod1}) + + ginkgo.By("verifying original pod resources, allocations are as expected") + podresize.VerifyPodResources(newPods[0], originalContainers, nil) + + ginkgo.By("verifying original pod cpusets are as expected") + for cdx := range originalCpuInfo { + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(originalCpuInfo[cdx].Name, originalCpuInfo[cdx].cpuCount)) + } + + ginkgo.By("patching pod for resize") + patchString := podresize.MakeResizePatch(originalContainers, desiredContainersFirstPatch, nil, nil) + + if wantErrorFirstPatch == "" { + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch pod for resize") + + expected := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, expectedContainersFirstPatch) + ginkgo.By("verifying pod resources are as expected post patch, pre-actuation") + podresize.VerifyPodResources(patchedPod, expected, nil) + + ginkgo.By("waiting for resize to be actuated") + resizedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expected) + podresize.ExpectPodResized(ctx, f, resizedPod, expected) + + ginkgo.By("verifying pod resources after resize") + podresize.VerifyPodResources(resizedPod, expected, nil) + + ginkgo.By("verifying pod cpusets after resize") + for cdx := range originalCpuInfo { + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(expectedCpuInfoFirstPatch[cdx].Name, expectedCpuInfoFirstPatch[cdx].cpuCount)) + } + + ginkgo.By("patching again pod for resize") + secondPatchString := podresize.MakeResizePatch(expected, desiredContainersSecondPatch, nil, nil) + + if wantErrorSecondPatch == "" { + + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(secondPatchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch again pod for resize") + + expected = podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, expectedContainersSecondPatch) + ginkgo.By("verifying pod resources are as expected post second patch, pre-actuation") + podresize.VerifyPodResources(patchedPod, expected, nil) + + ginkgo.By("waiting for second patch resize to be actuated") + resizedPod = podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expected) + podresize.ExpectPodResized(ctx, f, resizedPod, expected) + + ginkgo.By("verifying pod resources after second resize") + podresize.VerifyPodResources(resizedPod, expected, nil) + + ginkgo.By("verifying pod cpusets after second resize") + for cdx := range expectedCpuInfoSecondPatch { + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(expectedCpuInfoSecondPatch[cdx].Name, expectedCpuInfoSecondPatch[cdx].cpuCount)) + } + } else { + patchedPod, pErr = f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(secondPatchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch again pod for resize") + + ginkgo.By("verifying testing pod resources are as expected post second patch, pre-actuation") + expectedPreActuation := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, desiredContainersSecondPatch) + podresize.VerifyPodResources(patchedPod, expectedPreActuation, nil) + + resizePendingPod, err := framework.GetObject(podClient.Get, patchedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get resize pending pod for second patch") + + ginkgo.By("waiting for testing pod resize to be actuated for second patch") + expectedPostActuation := podresize.UpdateExpectedContainerRestarts(ctx, resizePendingPod, expectedContainersSecondPatch) + actuatedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expectedPostActuation) + + ginkgo.By("waiting for testing pod resize status to be pending for second patch") + WaitForPodResizePending(ctx, f, actuatedPod) + + actuatedPod, err = framework.GetObject(podClient.Get, actuatedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get actuated pod for second patch") -func makeThreadSiblingCPUSet(cpus cpuset.CPUSet) cpuset.CPUSet { - siblingsCPUs := cpuset.New() - for _, cpuID := range cpus.UnsortedList() { - siblingsCPUs = siblingsCPUs.Union(cpuSiblingListFromSysFS(int64(cpuID))) - } - return siblingsCPUs -} + expectedPostActuation = podresize.UpdateExpectedContainerRestarts(ctx, actuatedPod, expectedContainersSecondPatch) + ginkgo.By("verifying testing pod condition type as expected post patch, post-actuation for second patch") + podresize.ExpectPodResizePending(ctx, f, actuatedPod, expectedPostActuation) -func updateKubeletConfigIfNeeded(ctx context.Context, f *framework.Framework, desiredCfg *kubeletconfig.KubeletConfiguration) *v1.Node { - curCfg, err := getCurrentKubeletConfig(ctx) - framework.ExpectNoError(err) + ginkgo.By("ensuring the testing pod is failed for the expected reason for second patch") + gomega.Expect(actuatedPod).To(HaveStatusConditionsMatchingRegex(wantErrorSecondPatch)) - if equalKubeletConfiguration(curCfg, desiredCfg) { - framework.Logf("Kubelet configuration already compliant, nothing to do") - return getLocalNode(ctx, f) - } + for cdx := range expectedCpuInfoSecondPatch { + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(expectedCpuInfoSecondPatch[cdx].Name, expectedCpuInfoSecondPatch[cdx].cpuCount)) + } + } + } else { + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch pod for resize") - framework.Logf("Updating Kubelet configuration") - updateKubeletConfig(ctx, f, desiredCfg, true) - framework.Logf("Updated Kubelet configuration") + ginkgo.By("verifying testing pod resources are as expected post patch, pre-actuation") + expectedPreActuation := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, desiredContainersFirstPatch) + podresize.VerifyPodResources(patchedPod, expectedPreActuation, nil) - return getLocalNode(ctx, f) -} + resizePendingPod, err := framework.GetObject(podClient.Get, patchedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get resize pending pod") -func equalKubeletConfiguration(cfgA, cfgB *kubeletconfig.KubeletConfiguration) bool { - cfgA = cfgA.DeepCopy() - cfgB = cfgB.DeepCopy() - // we care only about the payload, force metadata to be uniform - cfgA.TypeMeta = metav1.TypeMeta{} - cfgB.TypeMeta = metav1.TypeMeta{} - return reflect.DeepEqual(cfgA, cfgB) -} + ginkgo.By("waiting for testing pod resize to be actuated") + expectedPostActuation := podresize.UpdateExpectedContainerRestarts(ctx, resizePendingPod, expectedContainersFirstPatch) + actuatedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expectedPostActuation) -type nodeCPUDetails struct { - Capacity int64 - Allocatable int64 - Reserved int64 -} + ginkgo.By("waiting for testing pod resize status to be pending") + WaitForPodResizePending(ctx, f, actuatedPod) -func cpuDetailsFromNode(node *v1.Node) nodeCPUDetails { - localNodeCap := node.Status.Capacity - cpuCap := localNodeCap[v1.ResourceCPU] - localNodeAlloc := node.Status.Allocatable - cpuAlloc := localNodeAlloc[v1.ResourceCPU] - cpuRes := cpuCap.DeepCopy() - cpuRes.Sub(cpuAlloc) - // RoundUp reserved CPUs to get only integer cores. - cpuRes.RoundUp(0) - return nodeCPUDetails{ - Capacity: cpuCap.Value(), - Allocatable: cpuCap.Value() - cpuRes.Value(), - Reserved: cpuRes.Value(), - } -} + actuatedPod, err = framework.GetObject(podClient.Get, actuatedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get actuated pod") -// smtLevelFromSysFS returns the number of symmetrical multi-thread (SMT) execution units the processor provides. -// The most common value on x86_64 is 2 (2 virtual threads/cores per physical core), that would be smtLevel == 2. -// The following are all synonyms: threadsPerCore, smtLevel -// Note: can't find a good enough yet not overly long name, "threadSiblingCount", "smtLevel", "threadsPerCore" are all questionable. -func smtLevelFromSysFS() int { - cpuID := int64(0) // this is just the most likely cpu to be present in a random system. No special meaning besides this. - cpus := cpuSiblingListFromSysFS(cpuID) - return cpus.Size() -} + expectedPostActuation = podresize.UpdateExpectedContainerRestarts(ctx, actuatedPod, expectedContainersFirstPatch) + ginkgo.By("verifying testing pod condition type as expected post patch, post-actuation") + podresize.ExpectPodResizePending(ctx, f, actuatedPod, expectedPostActuation) -func cpuSiblingListFromSysFS(cpuID int64) cpuset.CPUSet { - data, err := os.ReadFile(fmt.Sprintf("/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpuID)) - framework.ExpectNoError(err) - // how many thread sibling you have = SMT level - // example: 2-way SMT means 2 threads sibling for each thread - cpus, err := cpuset.Parse(strings.TrimSpace(string(data))) - framework.ExpectNoError(err) - return cpus -} + ginkgo.By("ensuring the testing pod is failed for the expected reason") + gomega.Expect(actuatedPod).To(HaveStatusConditionsMatchingRegex(wantErrorFirstPatch)) + + // we cannot nor we should predict which CPUs the container gets + ginkgo.By("verifying pod cpusets after resize") + gomega.Expect(actuatedPod).To(HaveContainerCPUsCount("gu-container-1", expectedCpuInfoFirstPatch[0].cpuCount)) + } + }, + ginkgo.Entry("should first increase CPU (gu-container-1) request and limit, afterwards decrease CPU (gu-container-1) request and limit within available capacity", + // Initial + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount before first patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + }, + // Desired first patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected after first patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount after first patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 4, + }, + }, + // Want error after first patch + "", + // Desired second patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected after second patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount after second patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + }, + "", + ), + ) + }) + + ginkgo.When("topologyManagerPolicOption is set to best-effort, resizing a Guaranteed multiple containers Pod with integer CPU request", ginkgo.Label("guaranteed pod with integer CPU requests resize", "exclusive-cpus"), func() { + ginkgo.BeforeEach(func(ctx context.Context) { + reservedCPUs = cpuset.New(0) + }) + ginkgo.DescribeTable("", + func(ctx context.Context, + originalContainers []podresize.ResizableContainerInfo, + originalCpuInfo []containerCPUInfo, + desiredContainersFirstPatch []podresize.ResizableContainerInfo, + expectedContainersFirstPatch []podresize.ResizableContainerInfo, + expectedCpuInfoFirstPatch []containerCPUInfo, + wantErrorFirstPatch string, + desiredContainersSecondPatch []podresize.ResizableContainerInfo, + expectedContainersSecondPatch []podresize.ResizableContainerInfo, + expectedCpuInfoSecondPatch []containerCPUInfo, + wantErrorSecondPatch string, + ) { + + expectedCPUCount := 0 + for ctx := range expectedCpuInfoFirstPatch { + expectedCPUCount += expectedCpuInfoFirstPatch[ctx].cpuCount + } + skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), expectedCPUCount) + + expectedCPUCount = 0 + for ctx := range expectedCpuInfoSecondPatch { + expectedCPUCount += expectedCpuInfoSecondPatch[ctx].cpuCount + } + skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), expectedCPUCount) + + updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + reservedSystemCPUs: reservedCPUs, // Not really needed for the tests but helps to make a more precise check + enableInPlacePodVerticalScalingExclusiveCPUs: true, + topologyManagerPolicyName: "best-effort", + topologyManagerScopeName: "container", + topologyManagerPolicyOptions: map[string]string{ + "max-allowable-numa-nodes": "8", + "prefer-closest-numa-nodes": "true", + }, + })) + + tStamp := strconv.Itoa(time.Now().Nanosecond()) + testPod1 := podresize.MakePodWithResizableContainers(f.Namespace.Name, "testpod1", tStamp, originalContainers, nil) + testPod1 = e2epod.MustMixinRestrictedPodSecurity(testPod1) + + ginkgo.By("creating pod with multiple containers") + podClient := e2epod.NewPodClient(f) + newPods := podClient.CreateBatch(ctx, []*v1.Pod{testPod1}) + + ginkgo.By("verifying original pod resources, allocations are as expected") + podresize.VerifyPodResources(newPods[0], originalContainers, nil) + + ginkgo.By("verifying original pod cpusets are as expected") + for cdx := range originalCpuInfo { + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(originalCpuInfo[cdx].Name, originalCpuInfo[cdx].cpuCount)) + } + + ginkgo.By("patching pod for resize") + patchString := podresize.MakeResizePatch(originalContainers, desiredContainersFirstPatch, nil, nil) + + if wantErrorFirstPatch == "" { + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch pod for resize") + + expected := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, expectedContainersFirstPatch) + ginkgo.By("verifying pod resources are as expected post patch, pre-actuation") + podresize.VerifyPodResources(patchedPod, expected, nil) + + ginkgo.By("waiting for resize to be actuated") + resizedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expected) + podresize.ExpectPodResized(ctx, f, resizedPod, expected) + + ginkgo.By("verifying pod resources after resize") + podresize.VerifyPodResources(resizedPod, expected, nil) + + ginkgo.By("verifying pod cpusets after resize") + for cdx := range originalCpuInfo { + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(expectedCpuInfoFirstPatch[cdx].Name, expectedCpuInfoFirstPatch[cdx].cpuCount)) + } + + ginkgo.By("patching again pod for resize") + secondPatchString := podresize.MakeResizePatch(expected, desiredContainersSecondPatch, nil, nil) + + if wantErrorSecondPatch == "" { + + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(secondPatchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch again pod for resize") + + expected = podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, expectedContainersSecondPatch) + ginkgo.By("verifying pod resources are as expected post second patch, pre-actuation") + podresize.VerifyPodResources(patchedPod, expected, nil) + + ginkgo.By("waiting for second patch resize to be actuated") + resizedPod = podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expected) + podresize.ExpectPodResized(ctx, f, resizedPod, expected) + + ginkgo.By("verifying pod resources after second resize") + podresize.VerifyPodResources(resizedPod, expected, nil) + + ginkgo.By("verifying pod cpusets after second resize") + for cdx := range expectedCpuInfoSecondPatch { + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(expectedCpuInfoSecondPatch[cdx].Name, expectedCpuInfoSecondPatch[cdx].cpuCount)) + } + } else { + patchedPod, pErr = f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(secondPatchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch again pod for resize") + + ginkgo.By("verifying testing pod resources are as expected post second patch, pre-actuation") + expectedPreActuation := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, desiredContainersSecondPatch) + podresize.VerifyPodResources(patchedPod, expectedPreActuation, nil) + + resizePendingPod, err := framework.GetObject(podClient.Get, patchedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get resize pending pod for second patch") + + ginkgo.By("waiting for testing pod resize to be actuated for second patch") + expectedPostActuation := podresize.UpdateExpectedContainerRestarts(ctx, resizePendingPod, expectedContainersSecondPatch) + actuatedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expectedPostActuation) + + ginkgo.By("waiting for testing pod resize status to be pending for second patch") + WaitForPodResizePending(ctx, f, actuatedPod) + + actuatedPod, err = framework.GetObject(podClient.Get, actuatedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get actuated pod for second patch") + + expectedPostActuation = podresize.UpdateExpectedContainerRestarts(ctx, actuatedPod, expectedContainersSecondPatch) + ginkgo.By("verifying testing pod condition type as expected post patch, post-actuation for second patch") + podresize.ExpectPodResizePending(ctx, f, actuatedPod, expectedPostActuation) + + ginkgo.By("ensuring the testing pod is failed for the expected reason for second patch") + gomega.Expect(actuatedPod).To(HaveStatusConditionsMatchingRegex(wantErrorSecondPatch)) + + for cdx := range expectedCpuInfoSecondPatch { + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(expectedCpuInfoSecondPatch[cdx].Name, expectedCpuInfoSecondPatch[cdx].cpuCount)) + } + } + } else { + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch pod for resize") + + ginkgo.By("verifying testing pod resources are as expected post patch, pre-actuation") + expectedPreActuation := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, desiredContainersFirstPatch) + podresize.VerifyPodResources(patchedPod, expectedPreActuation, nil) + + resizePendingPod, err := framework.GetObject(podClient.Get, patchedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get resize pending pod") + + ginkgo.By("waiting for testing pod resize to be actuated") + expectedPostActuation := podresize.UpdateExpectedContainerRestarts(ctx, resizePendingPod, expectedContainersFirstPatch) + actuatedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expectedPostActuation) + + ginkgo.By("waiting for testing pod resize status to be pending") + WaitForPodResizePending(ctx, f, actuatedPod) + + actuatedPod, err = framework.GetObject(podClient.Get, actuatedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get actuated pod") + + expectedPostActuation = podresize.UpdateExpectedContainerRestarts(ctx, actuatedPod, expectedContainersFirstPatch) + ginkgo.By("verifying testing pod condition type as expected post patch, post-actuation") + podresize.ExpectPodResizePending(ctx, f, actuatedPod, expectedPostActuation) + + ginkgo.By("ensuring the testing pod is failed for the expected reason") + gomega.Expect(actuatedPod).To(HaveStatusConditionsMatchingRegex(wantErrorFirstPatch)) + + // we cannot nor we should predict which CPUs the container gets + ginkgo.By("verifying pod cpusets after resize") + gomega.Expect(actuatedPod).To(HaveContainerCPUsCount("gu-container-1", expectedCpuInfoFirstPatch[0].cpuCount)) + } + }, + ginkgo.Entry("should first increase (gu-container-1) CPU request/limit, afterwards decrease (gu-container-1) CPU request/limit, within available capacity", + // Initial + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount before first patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + }, + // Desired first patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected after first patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount after first patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 4, + }, + }, + // Want error after first patch + "", + // Desired second patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected after second patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount after second patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + }, + "", + ), + ) + }) + + ginkgo.When("topologyManagerPolicy option is set to restricted, resizing a Guaranteed multiple containers Pod, with integer CPU request", ginkgo.Label("guaranteed multiple containers pod with integer CPU requests resize", "exclusive-cpus"), func() { + ginkgo.BeforeEach(func(ctx context.Context) { + reservedCPUs = cpuset.New(0) + }) + ginkgo.DescribeTable("", + func(ctx context.Context, + originalContainers []podresize.ResizableContainerInfo, + originalCpuInfo []containerCPUInfo, + desiredContainersFirstPatch []podresize.ResizableContainerInfo, + expectedContainersFirstPatch []podresize.ResizableContainerInfo, + expectedCpuInfoFirstPatch []containerCPUInfo, + wantErrorFirstPatch string, + desiredContainersSecondPatch []podresize.ResizableContainerInfo, + expectedContainersSecondPatch []podresize.ResizableContainerInfo, + expectedCpuInfoSecondPatch []containerCPUInfo, + wantErrorSecondPatch string, + ) { + + expectedCPUCount := 0 + for ctx := range expectedCpuInfoFirstPatch { + expectedCPUCount += expectedCpuInfoFirstPatch[ctx].cpuCount + } + skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), expectedCPUCount) + + expectedCPUCount = 0 + for ctx := range expectedCpuInfoSecondPatch { + expectedCPUCount += expectedCpuInfoSecondPatch[ctx].cpuCount + } + skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), expectedCPUCount) + + updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + reservedSystemCPUs: reservedCPUs, // Not really needed for the tests but helps to make a more precise check + enableInPlacePodVerticalScalingExclusiveCPUs: true, + topologyManagerPolicyName: "restricted", + topologyManagerScopeName: "container", + topologyManagerPolicyOptions: map[string]string{ + "max-allowable-numa-nodes": "8", + "prefer-closest-numa-nodes": "true", + }, + })) + + tStamp := strconv.Itoa(time.Now().Nanosecond()) + testPod1 := podresize.MakePodWithResizableContainers(f.Namespace.Name, "testpod1", tStamp, originalContainers, nil) + testPod1 = e2epod.MustMixinRestrictedPodSecurity(testPod1) + + ginkgo.By("creating pod with multiple containers") + podClient := e2epod.NewPodClient(f) + newPods := podClient.CreateBatch(ctx, []*v1.Pod{testPod1}) + + ginkgo.By("verifying original pod resources, allocations are as expected") + podresize.VerifyPodResources(newPods[0], originalContainers, nil) + + ginkgo.By("verifying original pod cpusets are as expected") + for cdx := range originalCpuInfo { + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(originalCpuInfo[cdx].Name, originalCpuInfo[cdx].cpuCount)) + } + + ginkgo.By("patching pod for resize") + patchString := podresize.MakeResizePatch(originalContainers, desiredContainersFirstPatch, nil, nil) + + if wantErrorFirstPatch == "" { + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch pod for resize") + + expected := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, expectedContainersFirstPatch) + ginkgo.By("verifying pod resources are as expected post patch, pre-actuation") + podresize.VerifyPodResources(patchedPod, expected, nil) + + ginkgo.By("waiting for resize to be actuated") + resizedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expected) + podresize.ExpectPodResized(ctx, f, resizedPod, expected) + + ginkgo.By("verifying pod resources after resize") + podresize.VerifyPodResources(resizedPod, expected, nil) + + ginkgo.By("verifying pod cpusets after resize") + for cdx := range originalCpuInfo { + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(expectedCpuInfoFirstPatch[cdx].Name, expectedCpuInfoFirstPatch[cdx].cpuCount)) + } + + ginkgo.By("patching again pod for resize") + secondPatchString := podresize.MakeResizePatch(expected, desiredContainersSecondPatch, nil, nil) + + if wantErrorSecondPatch == "" { + + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(secondPatchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch again pod for resize") + + expected = podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, expectedContainersSecondPatch) + ginkgo.By("verifying pod resources are as expected post second patch, pre-actuation") + podresize.VerifyPodResources(patchedPod, expected, nil) + + ginkgo.By("waiting for second patch resize to be actuated") + resizedPod = podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expected) + podresize.ExpectPodResized(ctx, f, resizedPod, expected) + + ginkgo.By("verifying pod resources after second resize") + podresize.VerifyPodResources(resizedPod, expected, nil) + + ginkgo.By("verifying pod cpusets after second resize") + for cdx := range expectedCpuInfoSecondPatch { + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(expectedCpuInfoSecondPatch[cdx].Name, expectedCpuInfoSecondPatch[cdx].cpuCount)) + } + } else { + patchedPod, pErr = f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(secondPatchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch again pod for resize") + + ginkgo.By("verifying testing pod resources are as expected post second patch, pre-actuation") + expectedPreActuation := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, desiredContainersSecondPatch) + podresize.VerifyPodResources(patchedPod, expectedPreActuation, nil) + + resizePendingPod, err := framework.GetObject(podClient.Get, patchedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get resize pending pod for second patch") + + ginkgo.By("waiting for testing pod resize to be actuated for second patch") + expectedPostActuation := podresize.UpdateExpectedContainerRestarts(ctx, resizePendingPod, expectedContainersSecondPatch) + actuatedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expectedPostActuation) + + ginkgo.By("waiting for testing pod resize status to be pending for second patch") + WaitForPodResizePending(ctx, f, actuatedPod) + + actuatedPod, err = framework.GetObject(podClient.Get, actuatedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get actuated pod for second patch") + + expectedPostActuation = podresize.UpdateExpectedContainerRestarts(ctx, actuatedPod, expectedContainersSecondPatch) + ginkgo.By("verifying testing pod condition type as expected post patch, post-actuation for second patch") + podresize.ExpectPodResizePending(ctx, f, actuatedPod, expectedPostActuation) + + ginkgo.By("ensuring the testing pod is failed for the expected reason for second patch") + gomega.Expect(actuatedPod).To(HaveStatusConditionsMatchingRegex(wantErrorSecondPatch)) + + for cdx := range expectedCpuInfoSecondPatch { + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(expectedCpuInfoSecondPatch[cdx].Name, expectedCpuInfoSecondPatch[cdx].cpuCount)) + } + } + } else { + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch pod for resize") + + ginkgo.By("verifying testing pod resources are as expected post patch, pre-actuation") + expectedPreActuation := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, desiredContainersFirstPatch) + podresize.VerifyPodResources(patchedPod, expectedPreActuation, nil) + + resizePendingPod, err := framework.GetObject(podClient.Get, patchedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get resize pending pod") + + ginkgo.By("waiting for testing pod resize to be actuated") + expectedPostActuation := podresize.UpdateExpectedContainerRestarts(ctx, resizePendingPod, expectedContainersFirstPatch) + actuatedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expectedPostActuation) + + ginkgo.By("waiting for testing pod resize status to be pending") + WaitForPodResizePending(ctx, f, actuatedPod) + + actuatedPod, err = framework.GetObject(podClient.Get, actuatedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get actuated pod") + + expectedPostActuation = podresize.UpdateExpectedContainerRestarts(ctx, actuatedPod, expectedContainersFirstPatch) + ginkgo.By("verifying testing pod condition type as expected post patch, post-actuation") + podresize.ExpectPodResizePending(ctx, f, actuatedPod, expectedPostActuation) + + ginkgo.By("ensuring the testing pod is failed for the expected reason") + gomega.Expect(actuatedPod).To(HaveStatusConditionsMatchingRegex(wantErrorFirstPatch)) + + // we cannot nor we should predict which CPUs the container gets + ginkgo.By("verifying pod cpusets after resize") + gomega.Expect(actuatedPod).To(HaveContainerCPUsCount("gu-container-1", expectedCpuInfoFirstPatch[0].cpuCount)) + } + }, + ginkgo.Entry("should first increase (gu-container-1) CPU request/limit, afterwards decrease (gu-container-1) CPU request/limit, within available capacity", + // Initial + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount before first patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + }, + // Desired first patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "10000m", CPULim: "10000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected after first patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "10000m", CPULim: "10000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount after first patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 10, + }, + }, + // Want error after first patch + "", + // Desired second patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected after second patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount after second patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + }, + "", + ), + ) + }) + + ginkgo.When("topology manager policy option is set to single-numa-node, resizing a Guaranteed multiple container pod, with integer CPU request", ginkgo.Label("guaranteed multiple containers pod with integer CPU requests resize", "exclusive-cpus"), func() { + ginkgo.BeforeEach(func(ctx context.Context) { + reservedCPUs = cpuset.New(0) + }) + ginkgo.DescribeTable("", + func(ctx context.Context, + originalContainers []podresize.ResizableContainerInfo, + originalCpuInfo []containerCPUInfo, + desiredContainersFirstPatch []podresize.ResizableContainerInfo, + expectedContainersFirstPatch []podresize.ResizableContainerInfo, + expectedCpuInfoFirstPatch []containerCPUInfo, + wantErrorFirstPatch string, + desiredContainersSecondPatch []podresize.ResizableContainerInfo, + expectedContainersSecondPatch []podresize.ResizableContainerInfo, + expectedCpuInfoSecondPatch []containerCPUInfo, + wantErrorSecondPatch string, + ) { + + expectedCPUCount := 0 + for ctx := range expectedCpuInfoFirstPatch { + expectedCPUCount += expectedCpuInfoFirstPatch[ctx].cpuCount + } + skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), expectedCPUCount) + + expectedCPUCount = 0 + for ctx := range expectedCpuInfoSecondPatch { + expectedCPUCount += expectedCpuInfoSecondPatch[ctx].cpuCount + } + skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), expectedCPUCount) + + updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + reservedSystemCPUs: reservedCPUs, // Not really needed for the tests but helps to make a more precise check + enableInPlacePodVerticalScalingExclusiveCPUs: true, + topologyManagerPolicyName: "single-numa-node", + topologyManagerScopeName: "container", + topologyManagerPolicyOptions: map[string]string{ + "max-allowable-numa-nodes": "8", + "prefer-closest-numa-nodes": "true", + }, + })) + + tStamp := strconv.Itoa(time.Now().Nanosecond()) + testPod1 := podresize.MakePodWithResizableContainers(f.Namespace.Name, "testpod1", tStamp, originalContainers, nil) + testPod1 = e2epod.MustMixinRestrictedPodSecurity(testPod1) + + ginkgo.By("creating pod with multiple containers") + podClient := e2epod.NewPodClient(f) + newPods := podClient.CreateBatch(ctx, []*v1.Pod{testPod1}) + + ginkgo.By("verifying original pod resources, allocations are as expected") + podresize.VerifyPodResources(newPods[0], originalContainers, nil) + + ginkgo.By("verifying original pod cpusets are as expected") + for cdx := range originalCpuInfo { + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(originalCpuInfo[cdx].Name, originalCpuInfo[cdx].cpuCount)) + } + + ginkgo.By("patching pod for resize") + patchString := podresize.MakeResizePatch(originalContainers, desiredContainersFirstPatch, nil, nil) + + if wantErrorFirstPatch == "" { + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch pod for resize") + + expected := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, expectedContainersFirstPatch) + ginkgo.By("verifying pod resources are as expected post patch, pre-actuation") + podresize.VerifyPodResources(patchedPod, expected, nil) + + ginkgo.By("waiting for resize to be actuated") + resizedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expected) + podresize.ExpectPodResized(ctx, f, resizedPod, expected) + + ginkgo.By("verifying pod resources after resize") + podresize.VerifyPodResources(resizedPod, expected, nil) + + ginkgo.By("verifying pod cpusets after resize") + for cdx := range originalCpuInfo { + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(expectedCpuInfoFirstPatch[cdx].Name, expectedCpuInfoFirstPatch[cdx].cpuCount)) + } + + ginkgo.By("patching again pod for resize") + secondPatchString := podresize.MakeResizePatch(expected, desiredContainersSecondPatch, nil, nil) + + if wantErrorSecondPatch == "" { + + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(secondPatchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch again pod for resize") + + expected = podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, expectedContainersSecondPatch) + ginkgo.By("verifying pod resources are as expected post second patch, pre-actuation") + podresize.VerifyPodResources(patchedPod, expected, nil) + + ginkgo.By("waiting for second patch resize to be actuated") + resizedPod = podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expected) + podresize.ExpectPodResized(ctx, f, resizedPod, expected) + + ginkgo.By("verifying pod resources after second resize") + podresize.VerifyPodResources(resizedPod, expected, nil) -func uncoreCacheIDFromSysFS(cpuID int) (int64, error) { - // expect sysfs path for Uncore Cache ID for each CPU to be: - // /sys/devices/system/cpu/cpu#/cache/index3/id - cacheIDPath := filepath.Join("/sys/devices/system/cpu", fmt.Sprintf("cpu%d", cpuID), "cache", "index3", "id") - cacheIDBytes, err := os.ReadFile(cacheIDPath) - if err != nil { - return 0, fmt.Errorf("failed to read cache ID for CPU %d: %w", cpuID, err) - } + ginkgo.By("verifying pod cpusets after second resize") + for cdx := range expectedCpuInfoSecondPatch { + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(expectedCpuInfoSecondPatch[cdx].Name, expectedCpuInfoSecondPatch[cdx].cpuCount)) + } + } else { + patchedPod, pErr = f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(secondPatchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch again pod for resize") - cacheIDStr := strings.TrimSpace(string(cacheIDBytes)) - cacheID, err := strconv.ParseInt(cacheIDStr, 10, 64) - if err != nil { - return 0, fmt.Errorf("failed to parse cache ID for CPU %d: %w", cpuID, err) - } + ginkgo.By("verifying testing pod resources are as expected post second patch, pre-actuation") + expectedPreActuation := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, desiredContainersSecondPatch) + podresize.VerifyPodResources(patchedPod, expectedPreActuation, nil) - return cacheID, nil -} + resizePendingPod, err := framework.GetObject(podClient.Get, patchedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get resize pending pod for second patch") -func makeCPUManagerBEPod(podName string, ctnAttributes []ctnAttribute) *v1.Pod { - var containers []v1.Container - for _, ctnAttr := range ctnAttributes { - ctn := v1.Container{ - Name: ctnAttr.ctnName, - Image: busyboxImage, - Command: []string{"sh", "-c", ctnAttr.ctnCommand}, - VolumeMounts: []v1.VolumeMount{ - { - Name: "sysfscgroup", - MountPath: "/sysfscgroup", - }, - { - Name: "podinfo", - MountPath: "/podinfo", - }, - }, - } - containers = append(containers, ctn) - } + ginkgo.By("waiting for testing pod resize to be actuated for second patch") + expectedPostActuation := podresize.UpdateExpectedContainerRestarts(ctx, resizePendingPod, expectedContainersSecondPatch) + actuatedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expectedPostActuation) - return &v1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: podName, - }, - Spec: v1.PodSpec{ - RestartPolicy: v1.RestartPolicyNever, - Containers: containers, - Volumes: []v1.Volume{ - { - Name: "sysfscgroup", - VolumeSource: v1.VolumeSource{ - HostPath: &v1.HostPathVolumeSource{Path: "/sys/fs/cgroup"}, - }, - }, - { - Name: "podinfo", - VolumeSource: v1.VolumeSource{ - DownwardAPI: &v1.DownwardAPIVolumeSource{ - Items: []v1.DownwardAPIVolumeFile{ - { - Path: "uid", - FieldRef: &v1.ObjectFieldSelector{ - APIVersion: "v1", - FieldPath: "metadata.uid", - }, - }, - }, - }, - }, - }, - }, - }, - } -} + ginkgo.By("waiting for testing pod resize status to be pending for second patch") + WaitForPodResizePending(ctx, f, actuatedPod) -func requireCGroupV2() { - if e2enodeCgroupV2Enabled { - return - } - e2eskipper.Skipf("Skipping since CgroupV2 not used") + actuatedPod, err = framework.GetObject(podClient.Get, actuatedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get actuated pod for second patch") -} + expectedPostActuation = podresize.UpdateExpectedContainerRestarts(ctx, actuatedPod, expectedContainersSecondPatch) + ginkgo.By("verifying testing pod condition type as expected post patch, post-actuation for second patch") + podresize.ExpectPodResizePending(ctx, f, actuatedPod, expectedPostActuation) -const ( - minSMTLevel = 2 - minCPUCapacity = 2 -) + ginkgo.By("ensuring the testing pod is failed for the expected reason for second patch") + gomega.Expect(actuatedPod).To(HaveStatusConditionsMatchingRegex(wantErrorSecondPatch)) -// Helper for makeCPUManagerPod(). -type ctnAttribute struct { - ctnName string - ctnCommand string - cpuRequest string - cpuLimit string - restartPolicy *v1.ContainerRestartPolicy -} + for cdx := range expectedCpuInfoSecondPatch { + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(expectedCpuInfoSecondPatch[cdx].Name, expectedCpuInfoSecondPatch[cdx].cpuCount)) + } + } + } else { + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch pod for resize") -// makeCPUMangerPod returns a pod with the provided ctnAttributes. -func makeCPUManagerPod(podName string, ctnAttributes []ctnAttribute) *v1.Pod { - var containers []v1.Container - for _, ctnAttr := range ctnAttributes { - cpusetCmd := "grep Cpus_allowed_list /proc/self/status | cut -f2 && sleep 1d" - ctn := v1.Container{ - Name: ctnAttr.ctnName, - Image: busyboxImage, - Resources: v1.ResourceRequirements{ - Requests: v1.ResourceList{ - v1.ResourceCPU: resource.MustParse(ctnAttr.cpuRequest), - v1.ResourceMemory: resource.MustParse("100Mi"), - }, - Limits: v1.ResourceList{ - v1.ResourceCPU: resource.MustParse(ctnAttr.cpuLimit), - v1.ResourceMemory: resource.MustParse("100Mi"), - }, - }, - Command: []string{"sh", "-c", cpusetCmd}, - VolumeMounts: []v1.VolumeMount{ - { - Name: "sysfscgroup", - MountPath: "/sysfscgroup", - }, - { - Name: "podinfo", - MountPath: "/podinfo", - }, - }, - } - containers = append(containers, ctn) - } + ginkgo.By("verifying testing pod resources are as expected post patch, pre-actuation") + expectedPreActuation := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, desiredContainersFirstPatch) + podresize.VerifyPodResources(patchedPod, expectedPreActuation, nil) - return &v1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: podName, - }, - Spec: v1.PodSpec{ - RestartPolicy: v1.RestartPolicyNever, - Containers: containers, - Volumes: []v1.Volume{ - { - Name: "sysfscgroup", - VolumeSource: v1.VolumeSource{ - HostPath: &v1.HostPathVolumeSource{Path: "/sys/fs/cgroup"}, + resizePendingPod, err := framework.GetObject(podClient.Get, patchedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get resize pending pod") + + ginkgo.By("waiting for testing pod resize to be actuated") + expectedPostActuation := podresize.UpdateExpectedContainerRestarts(ctx, resizePendingPod, expectedContainersFirstPatch) + actuatedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expectedPostActuation) + + ginkgo.By("waiting for testing pod resize status to be pending") + WaitForPodResizePending(ctx, f, actuatedPod) + + actuatedPod, err = framework.GetObject(podClient.Get, actuatedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get actuated pod") + + expectedPostActuation = podresize.UpdateExpectedContainerRestarts(ctx, actuatedPod, expectedContainersFirstPatch) + ginkgo.By("verifying testing pod condition type as expected post patch, post-actuation") + podresize.ExpectPodResizePending(ctx, f, actuatedPod, expectedPostActuation) + + ginkgo.By("ensuring the testing pod is failed for the expected reason") + gomega.Expect(actuatedPod).To(HaveStatusConditionsMatchingRegex(wantErrorFirstPatch)) + + // we cannot nor we should predict which CPUs the container gets + ginkgo.By("verifying pod cpusets after resize") + gomega.Expect(actuatedPod).To(HaveContainerCPUsCount("gu-container-1", expectedCpuInfoFirstPatch[0].cpuCount)) + } + }, + ginkgo.Entry("should first increase (gu-container-1) CPU request/limit and afterwards decrease (gu-container-1) CPU request/limit, within available capacity", + // Initial + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount before first patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + }, + // Desired first patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected after first patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount after first patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 4, + }, + }, + // Want error after first patch + "", + // Desired second patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected after second patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount after second patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + }, + "", + ), + ginkgo.Entry("should first increase (gu-container-1) CPU request/limit, afterwards restore (gu-container-1) CPU request/limit and increase (gu-container-2) CPU request/limit, within available capacity", + // Initial + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount before first patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + { + Name: "gu-container-2", + cpuCount: 2, + }, + }, + // Desired first patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected after first patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount after first patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 4, + }, + { + Name: "gu-container-2", + cpuCount: 2, + }, + }, + // Want error after first patch + "", + // Desired second patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected after second patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount after second patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + { + Name: "gu-container-2", + cpuCount: 4, + }, + }, + "", + ), + ginkgo.Entry("should first increase (gu-container-1) CPU request/limit, afterwards fail to reduce (gu-container-1) CPU request/limit, below promised and increase (gu-container-2) CPU request/limit, within available capacity", + // Initial + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "3000m", CPULim: "3000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount before first patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 3, + }, + { + Name: "gu-container-2", + cpuCount: 2, + }, + }, + // Desired first patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected after first patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount after first patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 4, + }, + { + Name: "gu-container-2", + cpuCount: 2, + }, + }, + // Want error after first patch + "", + // Desired second patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected after second patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount after second patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 4, + }, + { + Name: "gu-container-2", + cpuCount: 2, + }, + }, + "prohibitedCPUAllocation.*", + ), + ginkgo.Entry("should first increase (gu-container-1) CPU request/limit, afterwards fail to restore (gu-container-1) CPU request/limit and to increase (gu-container-2) CPU request/limit above available capacity", + // Initial + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount before first patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + { + Name: "gu-container-2", + cpuCount: 2, + }, + }, + // Desired first patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, }, - }, - { - Name: "podinfo", - VolumeSource: v1.VolumeSource{ - DownwardAPI: &v1.DownwardAPIVolumeSource{ - Items: []v1.DownwardAPIVolumeFile{ - { - Path: "uid", - FieldRef: &v1.ObjectFieldSelector{ - APIVersion: "v1", - FieldPath: "metadata.uid", - }, - }, - }, + // Expected after first patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, }, }, - }, - }, - }, - } -} - -// makeCPUMangerInitContainersPod returns a pod with init containers with the -// provided ctnAttributes. -func makeCPUManagerInitContainersPod(podName string, ctnAttributes []ctnAttribute) *v1.Pod { - var containers []v1.Container - cpusetCmd := "grep Cpus_allowed_list /proc/self/status | cut -f2" - cpusetAndSleepCmd := "grep Cpus_allowed_list /proc/self/status | cut -f2 && sleep 1d" - for _, ctnAttr := range ctnAttributes { - ctn := v1.Container{ - Name: ctnAttr.ctnName, - Image: busyboxImage, - Resources: v1.ResourceRequirements{ - Requests: v1.ResourceList{ - v1.ResourceCPU: resource.MustParse(ctnAttr.cpuRequest), - v1.ResourceMemory: resource.MustParse("100Mi"), - }, - Limits: v1.ResourceList{ - v1.ResourceCPU: resource.MustParse(ctnAttr.cpuLimit), - v1.ResourceMemory: resource.MustParse("100Mi"), - }, - }, - Command: []string{"sh", "-c", cpusetCmd}, - RestartPolicy: ctnAttr.restartPolicy, - } - if ctnAttr.restartPolicy != nil && *ctnAttr.restartPolicy == v1.ContainerRestartPolicyAlways { - ctn.Command = []string{"sh", "-c", cpusetAndSleepCmd} - } - containers = append(containers, ctn) - } - - return &v1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: podName, - }, - Spec: v1.PodSpec{ - RestartPolicy: v1.RestartPolicyNever, - InitContainers: containers, - Containers: []v1.Container{ - { - Name: "regular", - Image: busyboxImage, - Resources: v1.ResourceRequirements{ - Requests: v1.ResourceList{ - v1.ResourceCPU: resource.MustParse("1000m"), - v1.ResourceMemory: resource.MustParse("100Mi"), + // Expected cpuCount after first patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 4, }, - Limits: v1.ResourceList{ - v1.ResourceCPU: resource.MustParse("1000m"), - v1.ResourceMemory: resource.MustParse("100Mi"), + { + Name: "gu-container-2", + cpuCount: 2, }, }, - Command: []string{"sh", "-c", cpusetAndSleepCmd}, - }, - }, - }, - } -} - -type cpuManagerKubeletArguments struct { - policyName string - enableCPUManagerOptions bool - disableCPUQuotaWithExclusiveCPUs bool - enablePodLevelResources bool - customCPUCFSQuotaPeriod time.Duration - reservedSystemCPUs cpuset.CPUSet - options map[string]string -} - -func configureCPUManagerInKubelet(oldCfg *kubeletconfig.KubeletConfiguration, kubeletArguments *cpuManagerKubeletArguments, isInPlacePodVerticalScalingAllocatedStatusEnabled bool, isInPlacePodVerticalScalingExclusiveCPUsEnabled bool) *kubeletconfig.KubeletConfiguration { - newCfg := oldCfg.DeepCopy() - if newCfg.FeatureGates == nil { - newCfg.FeatureGates = make(map[string]bool) - } + // Want error after first patch + "", + // Desired second patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "40000m", CPULim: "40000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected after second patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount after second patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 4, + }, + { + Name: "gu-container-2", + cpuCount: 2, + }, + }, + "Infeasible.*Node.*didn't.*have.*enough.*capacity.*", + ), + ) + }) + }, +) - newCfg.FeatureGates["CPUManagerPolicyBetaOptions"] = kubeletArguments.enableCPUManagerOptions - newCfg.FeatureGates["CPUManagerPolicyAlphaOptions"] = kubeletArguments.enableCPUManagerOptions - newCfg.FeatureGates["DisableCPUQuotaWithExclusiveCPUs"] = kubeletArguments.disableCPUQuotaWithExclusiveCPUs - newCfg.FeatureGates["PodLevelResources"] = kubeletArguments.enablePodLevelResources - newCfg.FeatureGates["InPlacePodVerticalScalingExclusiveCPUs"] = isInPlacePodVerticalScalingExclusiveCPUsEnabled - newCfg.FeatureGates["InPlacePodVerticalScalingAllocatedStatus"] = isInPlacePodVerticalScalingAllocatedStatusEnabled +// Matching helpers +func WaitForPodResizePending(ctx context.Context, f *framework.Framework, testPod *v1.Pod) { + framework.ExpectNoError(e2epod.WaitForPodCondition(ctx, f.ClientSet, testPod.Namespace, testPod.Name, "display pod resize status as pending", f.Timeouts.PodStart, func(pod *v1.Pod) (bool, error) { + for _, condition := range pod.Status.Conditions { - if kubeletArguments.customCPUCFSQuotaPeriod != 0 { - newCfg.FeatureGates["CustomCPUCFSQuotaPeriod"] = true - newCfg.CPUCFSQuotaPeriod.Duration = kubeletArguments.customCPUCFSQuotaPeriod - } else { - newCfg.FeatureGates["CustomCPUCFSQuotaPeriod"] = false - } + if condition.Type == v1.PodResizePending { + return true, nil + } + } + return false, nil + })) +} - newCfg.CPUManagerPolicy = kubeletArguments.policyName - newCfg.CPUManagerReconcilePeriod = metav1.Duration{Duration: 1 * time.Second} +func WaitForPodResizeDeferred(ctx context.Context, f *framework.Framework, testPod *v1.Pod) { + framework.ExpectNoError(e2epod.WaitForPodCondition(ctx, f.ClientSet, testPod.Namespace, testPod.Name, "display pod resize status as deferred", f.Timeouts.PodStart, func(pod *v1.Pod) (bool, error) { + return helpers.IsPodResizeDeferred(pod), nil + })) +} - if kubeletArguments.options != nil { - newCfg.CPUManagerPolicyOptions = kubeletArguments.options - } +func WaitForPodResizeInfeasible(ctx context.Context, f *framework.Framework, testPod *v1.Pod) { + framework.ExpectNoError(e2epod.WaitForPodCondition(ctx, f.ClientSet, testPod.Namespace, testPod.Name, "display pod resize status as infeasible", f.Timeouts.PodStart, func(pod *v1.Pod) (bool, error) { + return helpers.IsPodResizeInfeasible(pod), nil + })) +} - if kubeletArguments.reservedSystemCPUs.Size() > 0 { - cpus := kubeletArguments.reservedSystemCPUs.String() - framework.Logf("configureCPUManagerInKubelet: using reservedSystemCPUs=%q", cpus) - newCfg.ReservedSystemCPUs = cpus - } else { - // The Kubelet panics if either kube-reserved or system-reserved is not set - // when CPU Manager is enabled. Set cpu in kube-reserved > 0 so that - // kubelet doesn't panic. - if newCfg.KubeReserved == nil { - newCfg.KubeReserved = map[string]string{} +func HaveStatusConditionsMatchingRegex(expr string) types.GomegaMatcher { + return gcustom.MakeMatcher(func(actual *v1.Pod) (bool, error) { + re, err := regexp.Compile(expr) + if err != nil { + return false, err } - - if _, ok := newCfg.KubeReserved["cpu"]; !ok { - newCfg.KubeReserved["cpu"] = "200m" + for _, condition := range actual.Status.Conditions { + if re.MatchString(fmt.Sprintf("%v", condition)) { + return true, nil + } } - } - - return newCfg + return false, nil + }).WithTemplate("Pod {{.Actual.Namespace}}/{{.Actual.Name}} UID {{.Actual.UID}} conditions {{.Actual.Status.Conditions}} does not match regexp {{.Data}}", expr) } -func runAutomaticallyRemoveInactivePodsFromCPUManagerStateFile(ctx context.Context, f *framework.Framework) { - var cpu1 int - var ctnAttrs []ctnAttribute - var pod *v1.Pod - var cpuList []int - var expAllowedCPUsListRegex string - var err error - // First running a Gu Pod, - // second disable cpu manager in kubelet, - // then delete the Gu Pod, - // then enable cpu manager in kubelet, - // at last wait for the reconcile process cleaned up the state file, if the assignments map is empty, - // it proves that the automatic cleanup in the reconcile process is in effect. - ginkgo.By("running a Gu pod for test remove") - ctnAttrs = []ctnAttribute{ - { - ctnName: "gu-container-testremove", - cpuRequest: "1000m", - cpuLimit: "1000m", - }, - } - pod = makeCPUManagerPod("gu-pod-testremove", ctnAttrs) - pod = e2epod.NewPodClient(f).CreateSync(ctx, pod) - - ginkgo.By("checking if the expected cpuset was assigned") - cpu1 = 1 - if isHTEnabled() { - cpuList = mustParseCPUSet(getCPUSiblingList(0)).List() - cpu1 = cpuList[1] - } else if isMultiNUMA() { - cpuList = mustParseCPUSet(getCoreSiblingList(0)).List() - if len(cpuList) > 1 { - cpu1 = cpuList[1] +func HaveStatusReasonMatchingRegex(expr string) types.GomegaMatcher { + return gcustom.MakeMatcher(func(actual *v1.Pod) (bool, error) { + re, err := regexp.Compile(expr) + if err != nil { + return false, err } - } - expAllowedCPUsListRegex = fmt.Sprintf("^%d\n$", cpu1) - err = e2epod.NewPodClient(f).MatchContainerOutput(ctx, pod.Name, pod.Spec.Containers[0].Name, expAllowedCPUsListRegex) - framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", - pod.Spec.Containers[0].Name, pod.Name) - - deletePodSyncByName(ctx, f, pod.Name) - // we need to wait for all containers to really be gone so cpumanager reconcile loop will not rewrite the cpu_manager_state. - // this is in turn needed because we will have an unavoidable (in the current framework) race with the - // reconcile loop which will make our attempt to delete the state file and to restore the old config go haywire - waitForAllContainerRemoval(ctx, pod.Name, pod.Namespace) - + return re.MatchString(actual.Status.Reason), nil + }).WithTemplate("Pod {{.Actual.Namespace}}/{{.Actual.Name}} UID {{.Actual.UID}} reason {{.Actual.Status.Reason}} does not match regexp {{.Data}}", expr) } -func runCfsQuotaGuPods(ctx context.Context, f *framework.Framework, disabledCPUQuotaWithExclusiveCPUs bool, cpuAlloc int64) { - var err error - var ctnAttrs []ctnAttribute - var pod1, pod2, pod3 *v1.Pod - podsToClean := make(map[string]*v1.Pod) // pod.UID -> pod - - framework.Logf("runCfsQuotaGuPods: disableQuota=%v, CPU Allocatable=%v", disabledCPUQuotaWithExclusiveCPUs, cpuAlloc) - - deleteTestPod := func(pod *v1.Pod) { - // waitForContainerRemoval takes "long" to complete; if we use the parent ctx we get a - // 'deadline expired' message and the cleanup aborts, which we don't want. - // So let's use a separate and more generous timeout (determined by trial and error) - ctx2, cancel := context.WithTimeout(context.Background(), 10*time.Minute) - defer cancel() - deletePodSyncAndWait(ctx2, f, pod.Namespace, pod.Name) - delete(podsToClean, string(pod.UID)) - } - - // cleanup leftovers on test failure. The happy path is covered by `deleteTestPod` calls - ginkgo.DeferCleanup(func() { - ginkgo.By("by deleting the pods and waiting for container removal") - // waitForContainerRemoval takes "long" to complete; if we use the parent ctx we get a - // 'deadline expired' message and the cleanup aborts, which we don't want. - // So let's use a separate and more generous timeout (determined by trial and error) - ctx2, cancel := context.WithTimeout(context.Background(), 10*time.Minute) - defer cancel() - deletePodsAsync(ctx2, f, podsToClean) - }) - - podCFSCheckCommand := []string{"sh", "-c", `cat $(find /sysfscgroup | grep -E "($(cat /podinfo/uid)|$(cat /podinfo/uid | sed 's/-/_/g'))(/|\.slice/)cpu.max$") && sleep 1d`} - cfsCheckCommand := []string{"sh", "-c", "cat /sys/fs/cgroup/cpu.max && sleep 1d"} - defaultPeriod := "100000" - - ctnAttrs = []ctnAttribute{ - { - ctnName: "gu-container-cfsquota-disabled", - cpuRequest: "1", - cpuLimit: "1", - }, - } - pod1 = makeCPUManagerPod("gu-pod1", ctnAttrs) - pod1.Spec.Containers[0].Command = cfsCheckCommand - pod1 = e2epod.NewPodClient(f).CreateSync(ctx, pod1) - podsToClean[string(pod1.UID)] = pod1 - - ginkgo.By("checking if the expected cfs quota was assigned (GU pod, exclusive CPUs, unlimited)") +type msgData struct { + Name string + CurrentCPUs string + ExpectedCPUs string + MismatchedCPUs string + UncoreCacheAlign string + Count int + Aligned int + CurrentQuota string + ExpectedQuota string +} - expectedQuota := "100000" - if disabledCPUQuotaWithExclusiveCPUs { - expectedQuota = "max" - } - expCFSQuotaRegex := fmt.Sprintf("^%s %s\n$", expectedQuota, defaultPeriod) - err = e2epod.NewPodClient(f).MatchContainerOutput(ctx, pod1.Name, pod1.Spec.Containers[0].Name, expCFSQuotaRegex) - framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", - pod1.Spec.Containers[0].Name, pod1.Name) - deleteTestPod(pod1) - - ctnAttrs = []ctnAttribute{ - { - ctnName: "gu-container-cfsquota-enabled", - cpuRequest: "500m", - cpuLimit: "500m", - }, - } - pod2 = makeCPUManagerPod("gu-pod2", ctnAttrs) - pod2.Spec.Containers[0].Command = cfsCheckCommand - pod2 = e2epod.NewPodClient(f).CreateSync(ctx, pod2) - podsToClean[string(pod2.UID)] = pod2 - - ginkgo.By("checking if the expected cfs quota was assigned (GU pod, limited)") - - expectedQuota = "50000" - expCFSQuotaRegex = fmt.Sprintf("^%s %s\n$", expectedQuota, defaultPeriod) - err = e2epod.NewPodClient(f).MatchContainerOutput(ctx, pod2.Name, pod2.Spec.Containers[0].Name, expCFSQuotaRegex) - framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", - pod2.Spec.Containers[0].Name, pod2.Name) - deleteTestPod(pod2) - - ctnAttrs = []ctnAttribute{ - { - ctnName: "non-gu-container", - cpuRequest: "100m", - cpuLimit: "500m", - }, +func HaveContainerCPUsCount(ctnName string, val int) types.GomegaMatcher { + md := &msgData{ + Name: ctnName, + Count: val, } - pod3 = makeCPUManagerPod("non-gu-pod3", ctnAttrs) - pod3.Spec.Containers[0].Command = cfsCheckCommand - pod3 = e2epod.NewPodClient(f).CreateSync(ctx, pod3) - podsToClean[string(pod3.UID)] = pod3 - - ginkgo.By("checking if the expected cfs quota was assigned (BU pod, limited)") - - expectedQuota = "50000" - expCFSQuotaRegex = fmt.Sprintf("^%s %s\n$", expectedQuota, defaultPeriod) - err = e2epod.NewPodClient(f).MatchContainerOutput(ctx, pod3.Name, pod3.Spec.Containers[0].Name, expCFSQuotaRegex) - framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", - pod3.Spec.Containers[0].Name, pod3.Name) - deleteTestPod(pod3) - - if cpuAlloc >= 2 { - ctnAttrs = []ctnAttribute{ - { - ctnName: "gu-container-non-int-values", - cpuRequest: "500m", - cpuLimit: "500m", - }, - { - ctnName: "gu-container-int-values", - cpuRequest: "1", - cpuLimit: "1", - }, - } - pod4 := makeCPUManagerPod("gu-pod4", ctnAttrs) - pod4.Spec.Containers[0].Command = cfsCheckCommand - pod4.Spec.Containers[1].Command = cfsCheckCommand - pod4 = e2epod.NewPodClient(f).CreateSync(ctx, pod4) - podsToClean[string(pod4.UID)] = pod4 - - ginkgo.By("checking if the expected cfs quota was assigned (GU pod, container 0 exclusive CPUs unlimited, container 1 limited)") - - expectedQuota = "50000" - expCFSQuotaRegex = fmt.Sprintf("^%s %s\n$", expectedQuota, defaultPeriod) - err = e2epod.NewPodClient(f).MatchContainerOutput(ctx, pod4.Name, pod4.Spec.Containers[0].Name, expCFSQuotaRegex) - framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", - pod4.Spec.Containers[0].Name, pod4.Name) - expectedQuota = "100000" - if disabledCPUQuotaWithExclusiveCPUs { - expectedQuota = "max" - } - expCFSQuotaRegex = fmt.Sprintf("^%s %s\n$", expectedQuota, defaultPeriod) - err = e2epod.NewPodClient(f).MatchContainerOutput(ctx, pod4.Name, pod4.Spec.Containers[1].Name, expCFSQuotaRegex) - framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", - pod4.Spec.Containers[1].Name, pod4.Name) - deleteTestPod(pod4) - - ctnAttrs = []ctnAttribute{ - { - ctnName: "gu-container-non-int-values", - cpuRequest: "500m", - cpuLimit: "500m", - }, - { - ctnName: "gu-container-int-values", - cpuRequest: "1", - cpuLimit: "1", - }, - } - - pod5 := makeCPUManagerPod("gu-pod5", ctnAttrs) - pod5.Spec.Containers[0].Command = podCFSCheckCommand - pod5 = e2epod.NewPodClient(f).CreateSync(ctx, pod5) - podsToClean[string(pod5.UID)] = pod5 - - ginkgo.By("checking if the expected cfs quota was assigned to pod (GU pod, unlimited)") - - expectedQuota = "150000" - - if disabledCPUQuotaWithExclusiveCPUs { - expectedQuota = "max" + return gcustom.MakeMatcher(func(actual *v1.Pod) (bool, error) { + cpus, err := getContainerAllowedCPUs(actual, ctnName, false) + md.CurrentCPUs = cpus.String() + if err != nil { + framework.Logf("getContainerAllowedCPUs(%s) failed: %v", ctnName, err) + return false, err } + return cpus.Size() == val, nil + }).WithTemplate("Pod {{.Actual.Namespace}}/{{.Actual.Name}} UID {{.Actual.UID}} has allowed CPUs <{{.Data.CurrentCPUs}}> not matching expected count <{{.Data.Count}}> for container {{.Data.Name}}", md) +} - expCFSQuotaRegex = fmt.Sprintf("^%s %s\n$", expectedQuota, defaultPeriod) - - err = e2epod.NewPodClient(f).MatchContainerOutput(ctx, pod5.Name, pod5.Spec.Containers[0].Name, expCFSQuotaRegex) - framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", pod5.Spec.Containers[0].Name, pod5.Name) - deleteTestPod(pod5) - } else { - ginkgo.By(fmt.Sprintf("some cases SKIPPED - requests at least %d allocatable cores, got %d", 2, cpuAlloc)) +func HaveContainerCPUsAlignedTo(ctnName string, val int) types.GomegaMatcher { + md := &msgData{ + Name: ctnName, + Aligned: val, } + return gcustom.MakeMatcher(func(actual *v1.Pod) (bool, error) { + cpus, err := getContainerAllowedCPUs(actual, ctnName, false) + md.CurrentCPUs = cpus.String() + if err != nil { + framework.Logf("getContainerAllowedCPUs(%s) failed: %v", ctnName, err) + return false, err + } + return cpus.Size()%val == 0, nil + }).WithTemplate("Pod {{.Actual.Namespace}}/{{.Actual.Name}} UID {{.Actual.UID}} has allowed CPUs <{{.Data.CurrentCPUs}}> not aligned to value <{{.Data.Aligned}}> for container {{.Data.Name}}", md) +} - ctnAttrs = []ctnAttribute{ - { - ctnName: "gu-container", - cpuRequest: "100m", - cpuLimit: "100m", - }, +func HaveContainerCPUsOverlapWith(ctnName string, ref cpuset.CPUSet) types.GomegaMatcher { + md := &msgData{ + Name: ctnName, + ExpectedCPUs: ref.String(), } - - pod6 := makeCPUManagerPod("gu-pod6", ctnAttrs) - pod6.Spec.Containers[0].Command = podCFSCheckCommand - pod6 = e2epod.NewPodClient(f).CreateSync(ctx, pod6) - podsToClean[string(pod6.UID)] = pod6 - - ginkgo.By("checking if the expected cfs quota was assigned to pod (GU pod, limited)") - - expectedQuota = "10000" - expCFSQuotaRegex = fmt.Sprintf("^%s %s\n$", expectedQuota, defaultPeriod) - err = e2epod.NewPodClient(f).MatchContainerOutput(ctx, pod6.Name, pod6.Spec.Containers[0].Name, expCFSQuotaRegex) - framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", pod6.Spec.Containers[0].Name, pod6.Name) - deleteTestPod(pod6) + return gcustom.MakeMatcher(func(actual *v1.Pod) (bool, error) { + cpus, err := getContainerAllowedCPUs(actual, ctnName, false) + md.CurrentCPUs = cpus.String() + if err != nil { + framework.Logf("getContainerAllowedCPUs(%s) failed: %v", ctnName, err) + return false, err + } + sharedCPUs := cpus.Intersection(ref) + return sharedCPUs.Size() > 0, nil + }).WithTemplate("Pod {{.Actual.Namespace}}/{{.Actual.Name}} UID {{.Actual.UID}} has allowed CPUs <{{.Data.CurrentCPUs}}> overlapping with expected CPUs <{{.Data.ExpectedCPUs}}> for container {{.Data.Name}}", md) } -func runCPUManagerTests(f *framework.Framework) { - var cpuCap, cpuAlloc int64 - var oldCfg *kubeletconfig.KubeletConfiguration - - ginkgo.BeforeEach(func(ctx context.Context) { - var err error - if oldCfg == nil { - oldCfg, err = getCurrentKubeletConfig(ctx) - framework.ExpectNoError(err) +func HaveContainerCPUsASubsetOf(ctnName string, ref cpuset.CPUSet) types.GomegaMatcher { + md := &msgData{ + Name: ctnName, + ExpectedCPUs: ref.String(), + } + return gcustom.MakeMatcher(func(actual *v1.Pod) (bool, error) { + cpus, err := getContainerAllowedCPUs(actual, ctnName, false) + md.CurrentCPUs = cpus.String() + if err != nil { + framework.Logf("getContainerAllowedCPUs(%s) failed: %v", ctnName, err) + return false, err } - }) - - ginkgo.It("should assign CPUs as expected based on the Pod spec", func(ctx context.Context) { - cpuCap, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f) + return cpus.IsSubsetOf(ref), nil + }).WithTemplate("Pod {{.Actual.Namespace}}/{{.Actual.Name}} UID {{.Actual.UID}} has allowed CPUs <{{.Data.CurrentCPUs}}> not a subset of expected CPUs <{{.Data.ExpectedCPUs}}> for container {{.Data.Name}}", md) +} - // Skip CPU Manager tests altogether if the CPU capacity < minCPUCapacity. - if cpuCap < minCPUCapacity { - e2eskipper.Skipf("Skipping CPU Manager tests since the CPU capacity < %d", minCPUCapacity) +func HaveContainerCPUsEqualTo(ctnName string, expectedCPUs cpuset.CPUSet) types.GomegaMatcher { + md := &msgData{ + Name: ctnName, + ExpectedCPUs: expectedCPUs.String(), + } + return gcustom.MakeMatcher(func(actual *v1.Pod) (bool, error) { + cpus, err := getContainerAllowedCPUs(actual, ctnName, false) + md.CurrentCPUs = cpus.String() + if err != nil { + framework.Logf("getContainerAllowedCPUs(%s) failed: %v", ctnName, err) + return false, err } + return cpus.Equals(expectedCPUs), nil + }).WithTemplate("Pod {{.Actual.Namespace}}/{{.Actual.Name}} UID {{.Actual.UID}} has allowed CPUs <{{.Data.CurrentCPUs}}> not matching the expected value <{{.Data.ExpectedCPUs}}> for container {{.Data.Name}}", md) +} - // Enable CPU Manager in the kubelet. - newCfg := configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ - policyName: string(cpumanager.PolicyStatic), - reservedSystemCPUs: cpuset.CPUSet{}, - }, false, false) - updateKubeletConfig(ctx, f, newCfg, true) - - ginkgo.By("running a non-Gu pod") - runNonGuPodTest(ctx, f, cpuCap, cpuset.New()) - - ginkgo.By("running a Gu pod") - runGuPodTest(ctx, f, 1, cpuset.New()) - - ginkgo.By("running multiple Gu and non-Gu pods") - runMultipleGuNonGuPods(ctx, f, cpuCap, cpuAlloc) - - // Skip rest of the tests if CPU capacity < 3. - if cpuCap < 3 { - e2eskipper.Skipf("Skipping rest of the CPU Manager tests since CPU capacity < 3") +func HaveSandboxQuotaWithPeriod(expectedQuota, cfsPeriod string) types.GomegaMatcher { + md := &msgData{} + return gcustom.MakeMatcher(func(actual *v1.Pod) (bool, error) { + md.Name = klog.KObj(actual).String() + quota, err := getSandboxCFSQuota(actual) + md.CurrentQuota = quota + if err != nil { + framework.Logf("getSandboxCFSQuota() failed: %v", err) + return false, err } - - ginkgo.By("running a Gu pod requesting multiple CPUs") - runMultipleCPUGuPod(ctx, f) - - ginkgo.By("running a Gu pod with multiple containers requesting integer CPUs") - runMultipleCPUContainersGuPod(ctx, f) - - ginkgo.By("running multiple Gu pods") - runMultipleGuPods(ctx, f) - - ginkgo.By("test for automatically remove inactive pods from cpumanager state file.") - runAutomaticallyRemoveInactivePodsFromCPUManagerStateFile(ctx, f) - }) - - ginkgo.It("reservedSystemCPUs are excluded only for Gu pods (strict-cpu-reservation option not enabled by default)", func(ctx context.Context) { - cpuCap, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f) - - // Skip CPU Manager tests altogether if the CPU capacity < 2. - if cpuCap < 2 { - e2eskipper.Skipf("Skipping CPU Manager tests since the CPU capacity < 2") + md.ExpectedQuota = fmt.Sprintf("^%s %s$", expectedQuota, cfsPeriod) + re, err := regexp.Compile(md.ExpectedQuota) + if err != nil { + return false, err } + return re.MatchString(quota), nil + }).WithTemplate("Pod {{.Actual.Namespace}}/{{.Actual.Name}} UID {{.Actual.UID}} has quota <{{.Data.CurrentQuota}}> not matching expected value <{{.Data.ExpectedQuota}}>", md) +} - reservedSystemCPUs := cpuset.New(0) - newCfg := configureCPUManagerInKubelet(oldCfg, - &cpuManagerKubeletArguments{ - policyName: string(cpumanager.PolicyStatic), - reservedSystemCPUs: reservedSystemCPUs, - }, - false, - false, - ) - updateKubeletConfig(ctx, f, newCfg, true) - - ginkgo.By("running a Gu pod - it shouldn't use reserved system CPUs") - runGuPodTest(ctx, f, 1, reservedSystemCPUs) - - ginkgo.By("running a non-Gu pod - it can use reserved system CPUs") - runNonGuPodTest(ctx, f, cpuCap, cpuset.New()) - - }) - - ginkgo.It("reservedSystemCPUs are excluded for both Gu and non-Gu pods (strict-cpu-reservation option enabled)", func(ctx context.Context) { - cpuCap, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f) - - // Skip CPU Manager tests altogether if the CPU capacity < 2. - if cpuCap < 2 { - e2eskipper.Skipf("Skipping CPU Manager tests since the CPU capacity < 2") +func HaveContainerQuotaWithPeriod(ctnName, expectedQuota, cfsPeriod string) types.GomegaMatcher { + md := &msgData{ + Name: ctnName, + } + return gcustom.MakeMatcher(func(actual *v1.Pod) (bool, error) { + quota, err := getContainerCFSQuota(actual, ctnName, false) + md.CurrentQuota = quota + if err != nil { + framework.Logf("getContainerCFSQuota(%s) failed: %v", ctnName, err) + return false, err } - - reservedSystemCPUs := cpuset.New(0) - cpuPolicyOptions := map[string]string{ - cpumanager.StrictCPUReservationOption: "true", + md.ExpectedQuota = fmt.Sprintf("^%s %s$", expectedQuota, cfsPeriod) + re, err := regexp.Compile(md.ExpectedQuota) + if err != nil { + return false, err } - newCfg := configureCPUManagerInKubelet(oldCfg, - &cpuManagerKubeletArguments{ - policyName: string(cpumanager.PolicyStatic), - reservedSystemCPUs: reservedSystemCPUs, - enableCPUManagerOptions: true, - options: cpuPolicyOptions, - }, - false, - false, - ) - updateKubeletConfig(ctx, f, newCfg, true) - - ginkgo.By("running a Gu pod - it shouldn't use reserved system CPUs") - runGuPodTest(ctx, f, 1, reservedSystemCPUs) + return re.MatchString(quota), nil + }).WithTemplate("Pod {{.Actual.Namespace}}/{{.Actual.Name}} UID {{.Actual.UID}} has quota <{{.Data.CurrentQuota}}> not matching expected value <{{.Data.ExpectedQuota}}> for container {{.Data.Name}}", md) +} - ginkgo.By("running a non-Gu pod - it shouldn't use reserved system CPUs with strict-cpu-reservation option enabled") - runNonGuPodTest(ctx, f, cpuCap, reservedSystemCPUs) - }) +func HaveSandboxQuota(expectedQuota string) types.GomegaMatcher { + return HaveSandboxQuotaWithPeriod(expectedQuota, defaultCFSPeriod) +} - ginkgo.It("should assign CPUs as expected with enhanced policy based on strict SMT alignment", func(ctx context.Context) { - fullCPUsOnlyOpt := fmt.Sprintf("option=%s", cpumanager.FullPCPUsOnlyOption) - _, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f) - smtLevel := getSMTLevel() +func HaveContainerQuota(ctnName, expectedQuota string) types.GomegaMatcher { + return HaveContainerQuotaWithPeriod(ctnName, expectedQuota, defaultCFSPeriod) +} - // strict SMT alignment is trivially verified and granted on non-SMT systems - if smtLevel < minSMTLevel { - e2eskipper.Skipf("Skipping CPU Manager %s tests since SMT disabled", fullCPUsOnlyOpt) +func HaveContainerCPUsThreadSiblings(ctnName string) types.GomegaMatcher { + md := &msgData{ + Name: ctnName, + } + return gcustom.MakeMatcher(func(actual *v1.Pod) (bool, error) { + cpus, err := getContainerAllowedCPUs(actual, ctnName, false) + md.CurrentCPUs = cpus.String() + if err != nil { + framework.Logf("getContainerAllowedCPUs(%s) failed: %v", ctnName, err) + return false, err } + expectedCPUs := makeThreadSiblingCPUSet(cpus) + md.ExpectedCPUs = expectedCPUs.String() + return cpus.Equals(expectedCPUs), nil + }).WithTemplate("Pod {{.Actual.Namespace}}/{{.Actual.Name}} UID {{.Actual.UID}} has allowed CPUs <{{.Data.CurrentCPUs}}> not all thread sibling pairs (would be <{{.Data.ExpectedCPUs}}>) for container {{.Data.Name}}", md) +} - // our tests want to allocate a full core, so we need at least 2*2=4 virtual cpus - minCPUCount := int64(smtLevel * minCPUCapacity) - if cpuAlloc < minCPUCount { - e2eskipper.Skipf("Skipping CPU Manager %s tests since the CPU capacity < %d", fullCPUsOnlyOpt, minCPUCount) +func HaveContainerCPUsQuasiThreadSiblings(ctnName string, toleration int) types.GomegaMatcher { + md := &msgData{ + Name: ctnName, + Count: toleration, + } + return gcustom.MakeMatcher(func(actual *v1.Pod) (bool, error) { + cpus, err := getContainerAllowedCPUs(actual, ctnName, false) + md.CurrentCPUs = cpus.String() + if err != nil { + framework.Logf("getContainerAllowedCPUs(%s) failed: %v", ctnName, err) + return false, err } + // this is by construction >= cpus (extreme case: cpus is made by all non-thread-siblings) + expectedCPUs := makeThreadSiblingCPUSet(cpus) + md.ExpectedCPUs = expectedCPUs.String() + mismatchedCPUs := expectedCPUs.Difference(cpus) + md.MismatchedCPUs = mismatchedCPUs.String() + return mismatchedCPUs.Size() <= toleration, nil + }).WithTemplate("Pod {{.Actual.Namespace}}/{{.Actual.Name}} UID {{.Actual.UID}} has allowed CPUs <{{.Data.CurrentCPUs}}> not all thread sibling pairs (would be <{{.Data.ExpectedCPUs}}> mismatched <{{.Data.MismatchedCPUs}}> toleration <{{.Data.Count}}>) for container {{.Data.Name}}", md) +} - framework.Logf("SMT level %d", smtLevel) - - // TODO: we assume the first available CPUID is 0, which is pretty fair, but we should probably - // check what we do have in the node. - cpuPolicyOptions := map[string]string{ - cpumanager.FullPCPUsOnlyOption: "true", +func HaveContainerCPUsWithSameUncoreCacheID(ctnName string) types.GomegaMatcher { + md := &msgData{ + Name: ctnName, + } + return gcustom.MakeMatcher(func(actual *v1.Pod) (bool, error) { + cpus, err := getContainerAllowedCPUs(actual, ctnName, false) + if err != nil { + return false, fmt.Errorf("getContainerAllowedCPUs(%s) failed: %w", ctnName, err) } - newCfg := configureCPUManagerInKubelet(oldCfg, - &cpuManagerKubeletArguments{ - policyName: string(cpumanager.PolicyStatic), - reservedSystemCPUs: cpuset.New(0), - enableCPUManagerOptions: true, - options: cpuPolicyOptions, - }, false, false, - ) - updateKubeletConfig(ctx, f, newCfg, true) - - // the order between negative and positive doesn't really matter - runSMTAlignmentNegativeTests(ctx, f) - runSMTAlignmentPositiveTests(ctx, f, smtLevel, cpuset.New()) - }) + md.CurrentCPUs = cpus.String() - ginkgo.It("should assign CPUs as expected based on strict SMT alignment, reservedSystemCPUs should be excluded (both strict-cpu-reservation and full-pcpus-only options enabled)", func(ctx context.Context) { - fullCPUsOnlyOpt := fmt.Sprintf("option=%s", cpumanager.FullPCPUsOnlyOption) - _, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f) - smtLevel := getSMTLevel() + var commonCacheID *int64 - // strict SMT alignment is trivially verified and granted on non-SMT systems - if smtLevel < 2 { - e2eskipper.Skipf("Skipping CPU Manager %s tests since SMT disabled", fullCPUsOnlyOpt) - } + for _, cpu := range cpus.List() { + // determine the Uncore Cache ID for each cpu + uncoreID, err := uncoreCacheIDFromSysFS(cpu) + if err != nil { + return false, fmt.Errorf("failed to read cache ID for CPU %d: %w", cpu, err) + } - // our tests want to allocate a full core, so we need at last smtLevel*2 virtual cpus - if cpuAlloc < int64(smtLevel*2) { - e2eskipper.Skipf("Skipping CPU Manager %s tests since the CPU capacity < %d", fullCPUsOnlyOpt, smtLevel*2) + // if this the first CPU we check, set the Uncore Cache ID as the reference + // for subsequent CPUs, compare the Uncore Cache ID to the reference + if commonCacheID == nil { + commonCacheID = &uncoreID + } else if *commonCacheID != uncoreID { + md.UncoreCacheAlign = fmt.Sprintf("shared uncoreID mismatch: CPU %d has uncoreID %d, CPUSet has uncoreID %d", cpu, uncoreID, *commonCacheID) + return false, nil + } } - framework.Logf("SMT level %d", smtLevel) + // All CPUs matched the same cache ID + md.UncoreCacheAlign = fmt.Sprintf("all CPUs share cache ID %d", *commonCacheID) + return true, nil + }).WithTemplate( + "Pod {{.Actual.Namespace}}/{{.Actual.Name}} UID {{.Actual.UID}} container {{.Data.Name}} has CPUSet <{{.Data.CurrentCPUs}}> where not all CPUs share the same uncore cache ID: {{.Data.UncoreCacheAlign}}", + md, + ) +} - reservedSystemCPUs := cpuset.New(0) - cpuPolicyOptions := map[string]string{ - cpumanager.FullPCPUsOnlyOption: "true", - cpumanager.StrictCPUReservationOption: "true", +func HaveContainerCPUsShareUncoreCacheWith(ctnName string, ref cpuset.CPUSet) types.GomegaMatcher { + md := &msgData{ + Name: ctnName, + ExpectedCPUs: ref.String(), + } + return gcustom.MakeMatcher(func(actual *v1.Pod) (bool, error) { + containerCPUs, err := getContainerAllowedCPUs(actual, ctnName, false) + if err != nil { + return false, fmt.Errorf("getContainerAllowedCPUs(%s) failed: %w", ctnName, err) } - newCfg := configureCPUManagerInKubelet(oldCfg, - &cpuManagerKubeletArguments{ - policyName: string(cpumanager.PolicyStatic), - reservedSystemCPUs: reservedSystemCPUs, - enableCPUManagerOptions: true, - options: cpuPolicyOptions, - }, - false, - false, - ) - updateKubeletConfig(ctx, f, newCfg, true) - - // the order between negative and positive doesn't really matter - runSMTAlignmentNegativeTests(ctx, f) - runSMTAlignmentPositiveTests(ctx, f, smtLevel, reservedSystemCPUs) - }) + md.CurrentCPUs = containerCPUs.String() - ginkgo.It("should not enforce CFS quota for containers with static CPUs assigned", func(ctx context.Context) { - if !IsCgroup2UnifiedMode() { - e2eskipper.Skipf("Skipping since CgroupV2 not used") + // Build set of uncore cache IDs from the reference cpuset + refUncoreIDs := sets.New[int64]() + for _, cpu := range ref.UnsortedList() { + uncoreID, err := uncoreCacheIDFromSysFS(cpu) + if err != nil { + return false, fmt.Errorf("failed to read uncore cache ID for reference CPU %d: %w", cpu, err) + } + refUncoreIDs.Insert(uncoreID) } - _, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f) - if cpuAlloc < 1 { // save expensive kubelet restart - e2eskipper.Skipf("Skipping since not enough allocatable CPU got %d required 1", cpuAlloc) + + // Check if any container CPUs share an uncore ID with the reference set + for _, cpu := range containerCPUs.UnsortedList() { + uncoreID, err := uncoreCacheIDFromSysFS(cpu) + if err != nil { + return false, fmt.Errorf("failed to read uncore cache ID for container CPU %d: %w", cpu, err) + } + if refUncoreIDs.Has(uncoreID) { + md.UncoreCacheAlign = fmt.Sprintf("%d", uncoreID) + return true, nil + } } - newCfg := configureCPUManagerInKubelet(oldCfg, - &cpuManagerKubeletArguments{ - policyName: string(cpumanager.PolicyStatic), - reservedSystemCPUs: cpuset.New(0), - disableCPUQuotaWithExclusiveCPUs: true, - }, - false, - false, - ) - updateKubeletConfig(ctx, f, newCfg, true) - _, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f) // check again after we reserved 1 full CPU. Some tests require > 1 exclusive CPU - runCfsQuotaGuPods(ctx, f, true, cpuAlloc) - }) + return false, nil + }).WithTemplate( + "Pod {{.Actual.Namespace}}/{{.Actual.Name}} UID {{.Actual.UID}} container {{.Data.Name}} has CPUSet <{{.Data.CurrentCPUs}}> sharing uncoreCache ID <{{.Data.UncoreCacheAlign}}> with reference CPUSet <{{.Data.ExpectedCPUs}}>", + md, + ) +} - ginkgo.It("should keep enforcing the CFS quota for containers with static CPUs assigned and feature gate disabled", func(ctx context.Context) { - if !IsCgroup2UnifiedMode() { - e2eskipper.Skipf("Skipping since CgroupV2 not used") +// Custom matcher for checking packed CPUs. +func BePackedCPUs() types.GomegaMatcher { + return gcustom.MakeMatcher(func(allocatedCPUs cpuset.CPUSet) (bool, error) { + distribution := computeNUMADistribution(allocatedCPUs) + for _, count := range distribution { + // This assumption holds true if there are enough CPUs on a single NUMA node. + // We are intentionally limiting the CPU request to 2 to minimize the number + // of CPUs required to fulfill this case and therefore maximize the chances + // of correctly validating this case. + if count == allocatedCPUs.Size() { + return true, nil + } } - _, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f) - if cpuAlloc < 1 { // save expensive kubelet restart - e2eskipper.Skipf("Skipping since not enough allocatable CPU got %d required 1", cpuAlloc) + return false, nil + }).WithMessage("expected CPUs to be packed") +} + +// Custom matcher for checking distributed CPUs. +func BeDistributedCPUs(expectedSpread int) types.GomegaMatcher { + return gcustom.MakeMatcher(func(allocatedCPUs cpuset.CPUSet) (bool, error) { + distribution := computeNUMADistribution(allocatedCPUs) + for _, count := range distribution { + if count != expectedSpread { + return false, nil + } } - newCfg := configureCPUManagerInKubelet(oldCfg, - &cpuManagerKubeletArguments{ - policyName: string(cpumanager.PolicyStatic), - reservedSystemCPUs: cpuset.New(0), - disableCPUQuotaWithExclusiveCPUs: false, - }, - false, - false, - ) + return true, nil + }).WithTemplate("expected CPUs to be evenly distributed across NUMA nodes\nExpected: {{.Data}}\nGot:\n{{.FormattedActual}}\nDistribution: {{.Data}}\n").WithTemplateData(expectedSpread) +} - updateKubeletConfig(ctx, f, newCfg, true) +// Other helpers - _, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f) // check again after we reserved 1 full CPU. Some tests require > 1 exclusive CPU - runCfsQuotaGuPods(ctx, f, false, cpuAlloc) - }) +func getContainerAllowedCPUsFromLogs(podName, cntName, logs string) cpuset.CPUSet { + framework.Logf("got pod logs: <%v>", logs) + cpus, err := cpuset.Parse(strings.TrimSpace(logs)) + framework.ExpectNoError(err, "parsing cpuset from logs for [%s] of pod [%s]", cntName, podName) + return cpus +} - f.It("should not reuse CPUs of restartable init containers", feature.SidecarContainers, func(ctx context.Context) { - cpuCap, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f) +// computeNUMADistribution calculates CPU distribution per NUMA node. +func computeNUMADistribution(allocatedCPUs cpuset.CPUSet) map[int]int { + numaCPUs, err := getNumaNodeCPUs() + framework.ExpectNoError(err, "Error retrieving NUMA nodes") + framework.Logf("NUMA Node CPUs allocation: %v", numaCPUs) - // Skip rest of the tests if CPU capacity < 3. - if cpuCap < 3 { - e2eskipper.Skipf("Skipping rest of the CPU Manager tests since CPU capacity < 3, got %d", cpuCap) - } + distribution := make(map[int]int) + for node, cpus := range numaCPUs { + distribution[node] = cpus.Intersection(allocatedCPUs).Size() + } - // Enable CPU Manager in the kubelet. - newCfg := configureCPUManagerInKubelet(oldCfg, - &cpuManagerKubeletArguments{ - policyName: string(cpumanager.PolicyStatic), - reservedSystemCPUs: cpuset.CPUSet{}, - }, - false, - false, - ) - updateKubeletConfig(ctx, f, newCfg, true) - - ginkgo.By("running a Gu pod with a regular init container and a restartable init container") - ctrAttrs := []ctnAttribute{ - { - ctnName: "gu-init-container1", - cpuRequest: "1000m", - cpuLimit: "1000m", - }, - { - ctnName: "gu-restartable-init-container2", - cpuRequest: "1000m", - cpuLimit: "1000m", - restartPolicy: &containerRestartPolicyAlways, - }, - } - pod := makeCPUManagerInitContainersPod("gu-pod", ctrAttrs) - pod = e2epod.NewPodClient(f).CreateSync(ctx, pod) + framework.Logf("allocated CPUs %s distribution: %v", allocatedCPUs.String(), distribution) + return distribution +} - ginkgo.By("checking if the expected cpuset was assigned") - logs, err := e2epod.GetPodLogs(ctx, f.ClientSet, f.Namespace.Name, pod.Name, pod.Spec.InitContainers[0].Name) - framework.ExpectNoError(err, "expected log not found in init container [%s] of pod [%s]", pod.Spec.InitContainers[0].Name, pod.Name) +func getContainerAllowedCPUs(pod *v1.Pod, ctnName string, isInit bool) (cpuset.CPUSet, error) { + cgPath, err := makeCgroupPathForContainer(pod, ctnName, isInit, e2enodeCgroupV2Enabled) + if err != nil { + return cpuset.New(), err + } + cgPath = filepath.Join(cgPath, cpusetFileNameFromVersion(e2enodeCgroupV2Enabled)) + framework.Logf("pod %s/%s cnt %s qos=%s path %q", pod.Namespace, pod.Name, ctnName, pod.Status.QOSClass, cgPath) + data, err := os.ReadFile(cgPath) + if err != nil { + return cpuset.New(), err + } + cpus := strings.TrimSpace(string(data)) + framework.Logf("pod %s/%s cnt %s cpuset %q", pod.Namespace, pod.Name, ctnName, cpus) + return cpuset.Parse(cpus) +} - reusableCPUs := getContainerAllowedCPUsFromLogs(pod.Name, pod.Spec.InitContainers[0].Name, logs) +func getSandboxCFSQuota(pod *v1.Pod) (string, error) { + if !e2enodeCgroupV2Enabled { + return "", fmt.Errorf("only Cgroup V2 is supported") + } + cgPath := filepath.Join(makeCgroupPathForPod(pod, true), "cpu.max") + data, err := os.ReadFile(cgPath) + if err != nil { + return "", err + } + quota := strings.TrimSpace(string(data)) + framework.Logf("pod %s/%s qos=%s path %q quota %q", pod.Namespace, pod.Name, pod.Status.QOSClass, cgPath, quota) + return quota, nil +} - gomega.Expect(reusableCPUs.Size()).To(gomega.Equal(1), "expected cpu set size == 1, got %q", reusableCPUs.String()) +func getContainerCFSQuota(pod *v1.Pod, ctnName string, isInit bool) (string, error) { + if !e2enodeCgroupV2Enabled { + return "", fmt.Errorf("only Cgroup V2 is supported") + } + cgPath, err := makeCgroupPathForContainer(pod, ctnName, isInit, true) + if err != nil { + return "", err + } + data, err := os.ReadFile(filepath.Join(cgPath, "cpu.max")) + if err != nil { + return "", err + } + quota := strings.TrimSpace(string(data)) + framework.Logf("pod %s/%s qos=%s cnt %s path %q quota %q", pod.Namespace, pod.Name, pod.Status.QOSClass, ctnName, cgPath, quota) + return quota, nil +} - logs, err = e2epod.GetPodLogs(ctx, f.ClientSet, f.Namespace.Name, pod.Name, pod.Spec.InitContainers[1].Name) - framework.ExpectNoError(err, "expected log not found in init container [%s] of pod [%s]", pod.Spec.InitContainers[1].Name, pod.Name) +const ( + kubeCgroupRoot = "/sys/fs/cgroup" +) - nonReusableCPUs := getContainerAllowedCPUsFromLogs(pod.Name, pod.Spec.InitContainers[1].Name, logs) +// example path (systemd, crio, v2): +// /sys/fs/cgroup/ kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod0b7632a2_a56e_4278_987a_22de18008dbe.slice/ crio-conmon-0bc5eac79e3ae7a0c2651f14722aa10fa333eb2325c2ca97da33aa284cda81b0.scope +// example path (cgroup, containerd, v1): +// /sys/fs/cgroup/cpuset kubepods/burstable pod8e414e92-17c2-41de-81c7-0045bba9103b b5791f89a6971bb4a751ffbebf533399c91630aa2906d7c6b5e239f405f3b97a - gomega.Expect(nonReusableCPUs.Size()).To(gomega.Equal(1), "expected cpu set size == 1, got %q", nonReusableCPUs.String()) +func makeCgroupPathForPod(pod *v1.Pod, isV2 bool) string { + components := []string{defaultNodeAllocatableCgroup} + if pod.Status.QOSClass != v1.PodQOSGuaranteed { + components = append(components, strings.ToLower(string(pod.Status.QOSClass))) + } + components = append(components, "pod"+string(pod.UID)) - logs, err = e2epod.GetPodLogs(ctx, f.ClientSet, f.Namespace.Name, pod.Name, pod.Spec.Containers[0].Name) - framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", pod.Spec.Containers[0].Name, pod.Name) + cgroupName := cm.NewCgroupName(cm.RootCgroupName, components...) + cgroupFsName := "" + // it's quite ugly to use a global, but it saves us to pass a parameter all across the stack many times + if e2enodeCgroupDriver == "systemd" { + cgroupFsName = cgroupName.ToSystemd() + } else { + cgroupFsName = cgroupName.ToCgroupfs() + } + if !isV2 { + cgroupFsName = filepath.Join("cpuset", cgroupFsName) + } + return filepath.Join(kubeCgroupRoot, cgroupFsName) +} - cpus := getContainerAllowedCPUsFromLogs(pod.Name, pod.Spec.Containers[0].Name, logs) +func makeCgroupPathForContainer(pod *v1.Pod, ctnName string, isInit, isV2 bool) (string, error) { + fullCntID, ok := findContainerIDByName(pod, ctnName, isInit) + if !ok { + return "", fmt.Errorf("cannot find status for container %q", ctnName) + } + cntID, err := parseContainerID(fullCntID) + if err != nil { + return "", err + } + cntPath := "" + if e2enodeCgroupDriver == "systemd" { + cntPath = containerCgroupPathPrefixFromDriver(e2enodeRuntimeName) + "-" + cntID + ".scope" + } else { + cntPath = cntID + } - gomega.Expect(cpus.Size()).To(gomega.Equal(1), "expected cpu set size == 1, got %q", cpus.String()) + return filepath.Join(makeCgroupPathForPod(pod, isV2), cntPath), nil +} - gomega.Expect(reusableCPUs.Equals(nonReusableCPUs)).To(gomega.BeTrueBecause("expected reusable cpuset [%s] to be equal to non-reusable cpuset [%s]", reusableCPUs.String(), nonReusableCPUs.String())) - gomega.Expect(nonReusableCPUs.Intersection(cpus).IsEmpty()).To(gomega.BeTrueBecause("expected non-reusable cpuset [%s] to be disjoint from cpuset [%s]", nonReusableCPUs.String(), cpus.String())) +func cpusetFileNameFromVersion(isV2 bool) string { + if isV2 { + return "cpuset.cpus.effective" + } + return "cpuset.cpus" +} - ginkgo.By("by deleting the pods and waiting for container removal") - deletePods(ctx, f, []string{pod.Name}) - waitForContainerRemoval(ctx, pod.Spec.InitContainers[0].Name, pod.Name, pod.Namespace) - waitForContainerRemoval(ctx, pod.Spec.InitContainers[1].Name, pod.Name, pod.Namespace) - waitForContainerRemoval(ctx, pod.Spec.Containers[0].Name, pod.Name, pod.Namespace) - }) +func containerCgroupPathPrefixFromDriver(runtimeName string) string { + if runtimeName == "cri-o" { + return "crio" + } + return "cri-containerd" +} - ginkgo.It("should assign packed CPUs with distribute-cpus-across-numa disabled and pcpu-only policy options enabled", func(ctx context.Context) { - fullCPUsOnlyOpt := fmt.Sprintf("option=%s", cpumanager.FullPCPUsOnlyOption) - _, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f) - smtLevel := getSMTLevel() +func parseContainerID(fullID string) (string, error) { + _, cntID, found := strings.Cut(fullID, "://") + if !found { + return "", fmt.Errorf("unsupported containerID: %q", fullID) + } + // TODO: should we validate the kind? + return cntID, nil +} - // strict SMT alignment is trivially verified and granted on non-SMT systems - if smtLevel < minSMTLevel { - e2eskipper.Skipf("Skipping CPU Manager %s tests since SMT disabled", fullCPUsOnlyOpt) +func findContainerIDByName(pod *v1.Pod, ctnName string, isInit bool) (string, bool) { + cntStatuses := pod.Status.ContainerStatuses + if isInit { + cntStatuses = pod.Status.InitContainerStatuses + } + for idx := range cntStatuses { + if cntStatuses[idx].Name == ctnName { + return cntStatuses[idx].ContainerID, true } + } + return "", false +} - // our tests want to allocate a full core, so we need at least 2*2=4 virtual cpus - minCPUCount := int64(smtLevel * minCPUCapacity) - if cpuAlloc < minCPUCount { - e2eskipper.Skipf("Skipping CPU Manager %s tests since the CPU capacity < %d", fullCPUsOnlyOpt, minCPUCount) - } +func makeThreadSiblingCPUSet(cpus cpuset.CPUSet) cpuset.CPUSet { + siblingsCPUs := cpuset.New() + for _, cpuID := range cpus.UnsortedList() { + siblingsCPUs = siblingsCPUs.Union(cpuSiblingListFromSysFS(int64(cpuID))) + } + return siblingsCPUs +} - framework.Logf("SMT level %d", smtLevel) +func updateKubeletConfigIfNeeded(ctx context.Context, f *framework.Framework, desiredCfg *kubeletconfig.KubeletConfiguration) *v1.Node { + curCfg, err := getCurrentKubeletConfig(ctx) + framework.ExpectNoError(err) - cpuPolicyOptions := map[string]string{ - cpumanager.FullPCPUsOnlyOption: "true", - cpumanager.DistributeCPUsAcrossNUMAOption: "false", - } - newCfg := configureCPUManagerInKubelet(oldCfg, - &cpuManagerKubeletArguments{ - policyName: string(cpumanager.PolicyStatic), - reservedSystemCPUs: cpuset.New(0), - enableCPUManagerOptions: true, - options: cpuPolicyOptions, - }, - false, - false, - ) - updateKubeletConfig(ctx, f, newCfg, true) - - ctnAttrs := []ctnAttribute{ - { - ctnName: "test-gu-container-distribute-cpus-across-numa-disabled", - cpuRequest: "2000m", - cpuLimit: "2000m", - }, - } - pod := makeCPUManagerPod("test-pod-distribute-cpus-across-numa-disabled", ctnAttrs) - pod = e2epod.NewPodClient(f).CreateSync(ctx, pod) + if equalKubeletConfiguration(curCfg, desiredCfg) { + framework.Logf("Kubelet configuration already compliant, nothing to do") + return getLocalNode(ctx, f) + } - for _, cnt := range pod.Spec.Containers { - ginkgo.By(fmt.Sprintf("validating the container %s on Gu pod %s", cnt.Name, pod.Name)) + framework.Logf("Updating Kubelet configuration") + updateKubeletConfig(ctx, f, desiredCfg, true) + framework.Logf("Updated Kubelet configuration") - logs, err := e2epod.GetPodLogs(ctx, f.ClientSet, f.Namespace.Name, pod.Name, cnt.Name) - framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", cnt.Name, pod.Name) + return getLocalNode(ctx, f) +} - cpus := getContainerAllowedCPUsFromLogs(pod.Name, cnt.Name, logs) +func equalKubeletConfiguration(cfgA, cfgB *kubeletconfig.KubeletConfiguration) bool { + cfgA = cfgA.DeepCopy() + cfgB = cfgB.DeepCopy() + // we care only about the payload, force metadata to be uniform + cfgA.TypeMeta = metav1.TypeMeta{} + cfgB.TypeMeta = metav1.TypeMeta{} + return reflect.DeepEqual(cfgA, cfgB) +} - validateSMTAlignment(cpus, smtLevel, pod, &cnt) - gomega.Expect(cpus).To(BePackedCPUs()) - } - deletePodSyncByName(ctx, f, pod.Name) - // we need to wait for all containers to really be gone so cpumanager reconcile loop will not rewrite the cpu_manager_state. - // this is in turn needed because we will have an unavoidable (in the current framework) race with th - // reconcile loop which will make our attempt to delete the state file and to restore the old config go haywire - waitForAllContainerRemoval(ctx, pod.Name, pod.Namespace) - }) +type nodeCPUDetails struct { + Capacity int64 + Allocatable int64 + Reserved int64 +} - ginkgo.It("should assign CPUs distributed across NUMA with distribute-cpus-across-numa and pcpu-only policy options enabled", func(ctx context.Context) { - var cpusNumPerNUMA, numaNodeNum int +func cpuDetailsFromNode(node *v1.Node) nodeCPUDetails { + localNodeCap := node.Status.Capacity + cpuCap := localNodeCap[v1.ResourceCPU] + localNodeAlloc := node.Status.Allocatable + cpuAlloc := localNodeAlloc[v1.ResourceCPU] + cpuRes := cpuCap.DeepCopy() + cpuRes.Sub(cpuAlloc) + // RoundUp reserved CPUs to get only integer cores. + cpuRes.RoundUp(0) + return nodeCPUDetails{ + Capacity: cpuCap.Value(), + Allocatable: cpuCap.Value() - cpuRes.Value(), + Reserved: cpuRes.Value(), + } +} - fullCPUsOnlyOpt := fmt.Sprintf("option=%s", cpumanager.FullPCPUsOnlyOption) - _, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f) - smtLevel := getSMTLevel() - framework.Logf("SMT level %d", smtLevel) +// smtLevelFromSysFS returns the number of symmetrical multi-thread (SMT) execution units the processor provides. +// The most common value on x86_64 is 2 (2 virtual threads/cores per physical core), that would be smtLevel == 2. +// The following are all synonyms: threadsPerCore, smtLevel +// Note: can't find a good enough yet not overly long name, "threadSiblingCount", "smtLevel", "threadsPerCore" are all questionable. +func smtLevelFromSysFS() int { + cpuID := int64(0) // this is just the most likely cpu to be present in a random system. No special meaning besides this. + cpus := cpuSiblingListFromSysFS(cpuID) + return cpus.Size() +} - // strict SMT alignment is trivially verified and granted on non-SMT systems - if smtLevel < minSMTLevel { - e2eskipper.Skipf("Skipping CPU Manager %s tests since SMT disabled", fullCPUsOnlyOpt) - } +func cpuSiblingListFromSysFS(cpuID int64) cpuset.CPUSet { + data, err := os.ReadFile(fmt.Sprintf("/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpuID)) + framework.ExpectNoError(err) + // how many thread sibling you have = SMT level + // example: 2-way SMT means 2 threads sibling for each thread + cpus, err := cpuset.Parse(strings.TrimSpace(string(data))) + framework.ExpectNoError(err) + return cpus +} - // our tests want to allocate a full core, so we need at least 2*2=4 virtual cpus - minCPUCount := int64(smtLevel * minCPUCapacity) - if cpuAlloc < minCPUCount { - e2eskipper.Skipf("Skipping CPU Manager %s tests since the CPU capacity < %d", fullCPUsOnlyOpt, minCPUCount) - } +func uncoreCacheIDFromSysFS(cpuID int) (int64, error) { + // expect sysfs path for Uncore Cache ID for each CPU to be: + // /sys/devices/system/cpu/cpu#/cache/index3/id + cacheIDPath := filepath.Join("/sys/devices/system/cpu", fmt.Sprintf("cpu%d", cpuID), "cache", "index3", "id") + cacheIDBytes, err := os.ReadFile(cacheIDPath) + if err != nil { + return 0, fmt.Errorf("failed to read cache ID for CPU %d: %w", cpuID, err) + } + + cacheIDStr := strings.TrimSpace(string(cacheIDBytes)) + cacheID, err := strconv.ParseInt(cacheIDStr, 10, 64) + if err != nil { + return 0, fmt.Errorf("failed to parse cache ID for CPU %d: %w", cpuID, err) + } - // this test is intended to be run on a multi-node NUMA system and - // a system with at least 4 cores per socket, hostcheck skips test - // if above requirements are not satisfied - numaNodeNum, _, _, cpusNumPerNUMA = hostCheck() + return cacheID, nil +} - cpuPolicyOptions := map[string]string{ - cpumanager.FullPCPUsOnlyOption: "true", - cpumanager.DistributeCPUsAcrossNUMAOption: "true", - } - newCfg := configureCPUManagerInKubelet(oldCfg, - &cpuManagerKubeletArguments{ - policyName: string(cpumanager.PolicyStatic), - reservedSystemCPUs: cpuset.New(0), - enableCPUManagerOptions: true, - options: cpuPolicyOptions, +func makeCPUManagerBEPod(podName string, ctnAttributes []ctnAttribute) *v1.Pod { + var containers []v1.Container + for _, ctnAttr := range ctnAttributes { + ctn := v1.Container{ + Name: ctnAttr.ctnName, + Image: busyboxImage, + Command: []string{"sh", "-c", ctnAttr.ctnCommand}, + VolumeMounts: []v1.VolumeMount{ + { + Name: "sysfscgroup", + MountPath: "/sysfscgroup", + }, + { + Name: "podinfo", + MountPath: "/podinfo", + }, }, - false, - false, - ) - updateKubeletConfig(ctx, f, newCfg, true) - // 'distribute-cpus-across-numa' policy option ensures that CPU allocations are evenly distributed - // across NUMA nodes in cases where more than one NUMA node is required to satisfy the allocation. - // So, we want to ensure that the CPU Request exceeds the number of CPUs that can fit within a single - // NUMA node. We have to pick cpuRequest such that: - // 1. CPURequest > cpusNumPerNUMA - // 2. Not occupy all the CPUs on the node ande leave room for reserved CPU - // 3. CPURequest is a multiple if number of NUMA nodes to allow equal CPU distribution across NUMA nodes - // - // In summary: cpusNumPerNUMA < CPURequest < ((cpusNumPerNuma * numaNodeNum) - reservedCPUscount) - // Considering all these constraints we select: CPURequest= (cpusNumPerNUMA-smtLevel)*numaNodeNum - - cpuReq := (cpusNumPerNUMA - smtLevel) * numaNodeNum - ctnAttrs := []ctnAttribute{ - { - ctnName: "test-gu-container-distribute-cpus-across-numa", - cpuRequest: fmt.Sprintf("%d", cpuReq), - cpuLimit: fmt.Sprintf("%d", cpuReq), + } + containers = append(containers, ctn) + } + + return &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: podName, + }, + Spec: v1.PodSpec{ + RestartPolicy: v1.RestartPolicyNever, + Containers: containers, + Volumes: []v1.Volume{ + { + Name: "sysfscgroup", + VolumeSource: v1.VolumeSource{ + HostPath: &v1.HostPathVolumeSource{Path: "/sys/fs/cgroup"}, + }, + }, + { + Name: "podinfo", + VolumeSource: v1.VolumeSource{ + DownwardAPI: &v1.DownwardAPIVolumeSource{ + Items: []v1.DownwardAPIVolumeFile{ + { + Path: "uid", + FieldRef: &v1.ObjectFieldSelector{ + APIVersion: "v1", + FieldPath: "metadata.uid", + }, + }, + }, + }, + }, + }, }, - } - pod := makeCPUManagerPod("test-pod-distribute-cpus-across-numa", ctnAttrs) - pod = e2epod.NewPodClient(f).CreateSync(ctx, pod) - - for _, cnt := range pod.Spec.Containers { - ginkgo.By(fmt.Sprintf("validating the container %s on Gu pod %s", cnt.Name, pod.Name)) - - logs, err := e2epod.GetPodLogs(ctx, f.ClientSet, f.Namespace.Name, pod.Name, cnt.Name) - framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", cnt.Name, pod.Name) - - cpus := getContainerAllowedCPUsFromLogs(pod.Name, cnt.Name, logs) - - validateSMTAlignment(cpus, smtLevel, pod, &cnt) - // We expect a perfectly even spilit i.e. equal distribution across NUMA Node as the CPU Request is 4*smtLevel*numaNodeNum. - expectedSpread := cpus.Size() / numaNodeNum - gomega.Expect(cpus).To(BeDistributedCPUs(expectedSpread)) - } - deletePodSyncByName(ctx, f, pod.Name) - // we need to wait for all containers to really be gone so cpumanager reconcile loop will not rewrite the cpu_manager_state. - // this is in turn needed because we will have an unavoidable (in the current framework) race with th - // reconcile loop which will make our attempt to delete the state file and to restore the old config go haywire - waitForAllContainerRemoval(ctx, pod.Name, pod.Namespace) - }) - - ginkgo.AfterEach(func(ctx context.Context) { - updateKubeletConfig(ctx, f, oldCfg, true) - }) -} - -func runSMTAlignmentNegativeTests(ctx context.Context, f *framework.Framework) { - // negative test: try to run a container whose requests aren't a multiple of SMT level, expect a rejection - ctnAttrs := []ctnAttribute{ - { - ctnName: "gu-container-neg", - cpuRequest: "1000m", - cpuLimit: "1000m", }, } - pod := makeCPUManagerPod("gu-pod", ctnAttrs) - // CreateSync would wait for pod to become Ready - which will never happen if production code works as intended! - pod = e2epod.NewPodClient(f).Create(ctx, pod) - - err := e2epod.WaitForPodCondition(ctx, f.ClientSet, f.Namespace.Name, pod.Name, "Failed", 30*time.Second, func(pod *v1.Pod) (bool, error) { - if pod.Status.Phase != v1.PodPending { - return true, nil - } - return false, nil - }) - framework.ExpectNoError(err) - pod, err = e2epod.NewPodClient(f).Get(ctx, pod.Name, metav1.GetOptions{}) - framework.ExpectNoError(err) - - if pod.Status.Phase != v1.PodFailed { - framework.Failf("pod %s not failed: %v", pod.Name, pod.Status) - } - if !isSMTAlignmentError(pod) { - framework.Failf("pod %s failed for wrong reason: %q", pod.Name, pod.Status.Reason) - } - - deletePodSyncByName(ctx, f, pod.Name) - // we need to wait for all containers to really be gone so cpumanager reconcile loop will not rewrite the cpu_manager_state. - // this is in turn needed because we will have an unavoidable (in the current framework) race with th - // reconcile loop which will make our attempt to delete the state file and to restore the old config go haywire - waitForAllContainerRemoval(ctx, pod.Name, pod.Namespace) } -func runSMTAlignmentPositiveTests(ctx context.Context, f *framework.Framework, smtLevel int, strictReservedCPUs cpuset.CPUSet) { - // positive test: try to run a container whose requests are a multiple of SMT level, check allocated cores - // 1. are core siblings - // 2. take a full core - // WARNING: this assumes 2-way SMT systems - we don't know how to access other SMT levels. - // this means on more-than-2-way SMT systems this test will prove nothing - ctnAttrs := []ctnAttribute{ - { - ctnName: "gu-container-pos", - cpuRequest: "2000m", - cpuLimit: "2000m", - }, +func requireCGroupV2() { + if e2enodeCgroupV2Enabled { + return } - pod := makeCPUManagerPod("gu-pod", ctnAttrs) - pod = e2epod.NewPodClient(f).CreateSync(ctx, pod) - - for _, cnt := range pod.Spec.Containers { - ginkgo.By(fmt.Sprintf("validating the container %s on Gu pod %s", cnt.Name, pod.Name)) - - logs, err := e2epod.GetPodLogs(ctx, f.ClientSet, f.Namespace.Name, pod.Name, cnt.Name) - framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", cnt.Name, pod.Name) + e2eskipper.Skipf("Skipping since CgroupV2 not used") - cpus := getContainerAllowedCPUsFromLogs(pod.Name, cnt.Name, logs) +} - gomega.Expect(cpus.Intersection(strictReservedCPUs).IsEmpty()).To(gomega.BeTrueBecause("cpuset %q should not contain strict reserved cpus %q", cpus.String(), strictReservedCPUs.String())) - validateSMTAlignment(cpus, smtLevel, pod, &cnt) - } +const ( + minSMTLevel = 2 + minCPUCapacity = 2 +) - deletePodSyncByName(ctx, f, pod.Name) - // we need to wait for all containers to really be gone so cpumanager reconcile loop will not rewrite the cpu_manager_state. - // this is in turn needed because we will have an unavoidable (in the current framework) race with th - // reconcile loop which will make our attempt to delete the state file and to restore the old config go haywire - waitForAllContainerRemoval(ctx, pod.Name, pod.Namespace) +// Helper for makeCPUManagerPod(). +type ctnAttribute struct { + ctnName string + ctnCommand string + cpuRequest string + cpuLimit string + restartPolicy *v1.ContainerRestartPolicy } -func validateSMTAlignment(cpus cpuset.CPUSet, smtLevel int, pod *v1.Pod, cnt *v1.Container) { - framework.Logf("validating cpus: %v", cpus) +// makeCPUMangerPod returns a pod with the provided ctnAttributes. +func makeCPUManagerPod(podName string, ctnAttributes []ctnAttribute) *v1.Pod { + var containers []v1.Container + for _, ctnAttr := range ctnAttributes { + cpusetCmd := "grep Cpus_allowed_list /proc/self/status | cut -f2 && sleep 1d" + ctn := v1.Container{ + Name: ctnAttr.ctnName, + Image: busyboxImage, + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse(ctnAttr.cpuRequest), + v1.ResourceMemory: resource.MustParse("100Mi"), + }, + Limits: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse(ctnAttr.cpuLimit), + v1.ResourceMemory: resource.MustParse("100Mi"), + }, + }, + Command: []string{"sh", "-c", cpusetCmd}, + VolumeMounts: []v1.VolumeMount{ + { + Name: "sysfscgroup", + MountPath: "/sysfscgroup", + }, + { + Name: "podinfo", + MountPath: "/podinfo", + }, + }, + } + containers = append(containers, ctn) + } - if cpus.Size()%smtLevel != 0 { - framework.Failf("pod %q cnt %q received non-smt-multiple cpuset %v (SMT level %d)", pod.Name, cnt.Name, cpus, smtLevel) + return &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: podName, + }, + Spec: v1.PodSpec{ + RestartPolicy: v1.RestartPolicyNever, + Containers: containers, + Volumes: []v1.Volume{ + { + Name: "sysfscgroup", + VolumeSource: v1.VolumeSource{ + HostPath: &v1.HostPathVolumeSource{Path: "/sys/fs/cgroup"}, + }, + }, + { + Name: "podinfo", + VolumeSource: v1.VolumeSource{ + DownwardAPI: &v1.DownwardAPIVolumeSource{ + Items: []v1.DownwardAPIVolumeFile{ + { + Path: "uid", + FieldRef: &v1.ObjectFieldSelector{ + APIVersion: "v1", + FieldPath: "metadata.uid", + }, + }, + }, + }, + }, + }, + }, + }, } +} - // now check all the given cpus are thread siblings. - // to do so the easiest way is to rebuild the expected set of siblings from all the cpus we got. - // if the expected set matches the given set, the given set was good. - siblingsCPUs := cpuset.New() - for _, cpuID := range cpus.UnsortedList() { - threadSiblings, err := cpuset.Parse(strings.TrimSpace(getCPUSiblingList(int64(cpuID)))) - framework.ExpectNoError(err, "parsing cpuset from logs for [%s] of pod [%s]", cnt.Name, pod.Name) - siblingsCPUs = siblingsCPUs.Union(threadSiblings) +// makeCPUMangerInitContainersPod returns a pod with init containers with the +// provided ctnAttributes. +func makeCPUManagerInitContainersPod(podName string, ctnAttributes []ctnAttribute) *v1.Pod { + var containers []v1.Container + cpusetCmd := "grep Cpus_allowed_list /proc/self/status | cut -f2" + cpusetAndSleepCmd := "grep Cpus_allowed_list /proc/self/status | cut -f2 && sleep 1d" + for _, ctnAttr := range ctnAttributes { + ctn := v1.Container{ + Name: ctnAttr.ctnName, + Image: busyboxImage, + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse(ctnAttr.cpuRequest), + v1.ResourceMemory: resource.MustParse("100Mi"), + }, + Limits: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse(ctnAttr.cpuLimit), + v1.ResourceMemory: resource.MustParse("100Mi"), + }, + }, + Command: []string{"sh", "-c", cpusetCmd}, + RestartPolicy: ctnAttr.restartPolicy, + } + if ctnAttr.restartPolicy != nil && *ctnAttr.restartPolicy == v1.ContainerRestartPolicyAlways { + ctn.Command = []string{"sh", "-c", cpusetAndSleepCmd} + } + containers = append(containers, ctn) } - framework.Logf("siblings cpus: %v", siblingsCPUs) - if !siblingsCPUs.Equals(cpus) { - framework.Failf("pod %q cnt %q received non-smt-aligned cpuset %v (expected %v)", pod.Name, cnt.Name, cpus, siblingsCPUs) + return &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: podName, + }, + Spec: v1.PodSpec{ + RestartPolicy: v1.RestartPolicyNever, + InitContainers: containers, + Containers: []v1.Container{ + { + Name: "regular", + Image: busyboxImage, + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("1000m"), + v1.ResourceMemory: resource.MustParse("100Mi"), + }, + Limits: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("1000m"), + v1.ResourceMemory: resource.MustParse("100Mi"), + }, + }, + Command: []string{"sh", "-c", cpusetAndSleepCmd}, + }, + }, + }, } } -func isSMTAlignmentError(pod *v1.Pod) bool { - re := regexp.MustCompile(`SMT.*Alignment.*Error`) - return re.MatchString(pod.Status.Reason) +type cpuManagerKubeletArguments struct { + policyName string + enableCPUManagerOptions bool + disableCPUQuotaWithExclusiveCPUs bool + enablePodLevelResources bool + customCPUCFSQuotaPeriod time.Duration + reservedSystemCPUs cpuset.CPUSet + options map[string]string + enableInPlacePodVerticalScalingExclusiveCPUs bool + topologyManagerPolicyName string + topologyManagerScopeName string + topologyManagerPolicyOptions map[string]string } -// getNumaNodeCPUs retrieves CPUs for each NUMA node. -func getNumaNodeCPUs() (map[int]cpuset.CPUSet, error) { - numaNodes := make(map[int]cpuset.CPUSet) - nodePaths, err := filepath.Glob("/sys/devices/system/node/node*/cpulist") - if err != nil { - return nil, err +func configureCPUManagerInKubelet(oldCfg *kubeletconfig.KubeletConfiguration, kubeletArguments *cpuManagerKubeletArguments) *kubeletconfig.KubeletConfiguration { + newCfg := oldCfg.DeepCopy() + if newCfg.FeatureGates == nil { + newCfg.FeatureGates = make(map[string]bool) } - for _, nodePath := range nodePaths { - data, err := os.ReadFile(nodePath) - framework.ExpectNoError(err, "Error obtaning CPU information from the node") - cpuSet := strings.TrimSpace(string(data)) - cpus, err := cpuset.Parse(cpuSet) - framework.ExpectNoError(err, "Error parsing CPUset") + newCfg.FeatureGates["CPUManagerPolicyBetaOptions"] = kubeletArguments.enableCPUManagerOptions + newCfg.FeatureGates["CPUManagerPolicyAlphaOptions"] = kubeletArguments.enableCPUManagerOptions + newCfg.FeatureGates["DisableCPUQuotaWithExclusiveCPUs"] = kubeletArguments.disableCPUQuotaWithExclusiveCPUs + newCfg.FeatureGates["PodLevelResources"] = kubeletArguments.enablePodLevelResources + newCfg.FeatureGates["InPlacePodVerticalScalingExclusiveCPUs"] = kubeletArguments.enableInPlacePodVerticalScalingExclusiveCPUs - // Extract node ID from path (e.g., "node0" -> 0) - base := filepath.Base(filepath.Dir(nodePath)) - nodeID, err := strconv.Atoi(strings.TrimPrefix(base, "node")) - if err != nil { - continue - } - numaNodes[nodeID] = cpus + if kubeletArguments.customCPUCFSQuotaPeriod != 0 { + newCfg.FeatureGates["CustomCPUCFSQuotaPeriod"] = true + newCfg.CPUCFSQuotaPeriod.Duration = kubeletArguments.customCPUCFSQuotaPeriod + } else { + newCfg.FeatureGates["CustomCPUCFSQuotaPeriod"] = false } - return numaNodes, nil -} - -func getContainerAllowedCPUsFromLogs(podName, cntName, logs string) cpuset.CPUSet { - framework.Logf("got pod logs: <%v>", logs) - cpus, err := cpuset.Parse(strings.TrimSpace(logs)) - framework.ExpectNoError(err, "parsing cpuset from logs for [%s] of pod [%s]", cntName, podName) - return cpus -} + newCfg.CPUManagerPolicy = kubeletArguments.policyName + newCfg.CPUManagerReconcilePeriod = metav1.Duration{Duration: 1 * time.Second} -// computeNUMADistribution calculates CPU distribution per NUMA node. -func computeNUMADistribution(allocatedCPUs cpuset.CPUSet) map[int]int { - numaCPUs, err := getNumaNodeCPUs() - framework.ExpectNoError(err, "Error retrieving NUMA nodes") - framework.Logf("NUMA Node CPUs allocation: %v", numaCPUs) + newCfg.TopologyManagerPolicy = kubeletArguments.topologyManagerPolicyName + newCfg.TopologyManagerScope = kubeletArguments.topologyManagerScopeName - distribution := make(map[int]int) - for node, cpus := range numaCPUs { - distribution[node] = cpus.Intersection(allocatedCPUs).Size() + if kubeletArguments.topologyManagerPolicyOptions != nil { + newCfg.TopologyManagerPolicyOptions = kubeletArguments.topologyManagerPolicyOptions } - framework.Logf("allocated CPUs %s distribution: %v", allocatedCPUs.String(), distribution) - return distribution -} + if kubeletArguments.options != nil { + newCfg.CPUManagerPolicyOptions = kubeletArguments.options + } -// Custom matcher for checking packed CPUs. -func BePackedCPUs() gomegatypes.GomegaMatcher { - return gcustom.MakeMatcher(func(allocatedCPUs cpuset.CPUSet) (bool, error) { - distribution := computeNUMADistribution(allocatedCPUs) - for _, count := range distribution { - // This assumption holds true if there are enough CPUs on a single NUMA node. - // We are intentionally limiting the CPU request to 2 to minimize the number - // of CPUs required to fulfill this case and therefore maximize the chances - // of correctly validating this case. - if count == allocatedCPUs.Size() { - return true, nil - } + if kubeletArguments.reservedSystemCPUs.Size() > 0 { + cpus := kubeletArguments.reservedSystemCPUs.String() + framework.Logf("configureCPUManagerInKubelet: using reservedSystemCPUs=%q", cpus) + newCfg.ReservedSystemCPUs = cpus + } else { + // The Kubelet panics if either kube-reserved or system-reserved is not set + // when CPU Manager is enabled. Set cpu in kube-reserved > 0 so that + // kubelet doesn't panic. + if newCfg.KubeReserved == nil { + newCfg.KubeReserved = map[string]string{} } - return false, nil - }).WithMessage("expected CPUs to be packed") -} -// Custom matcher for checking distributed CPUs. -func BeDistributedCPUs(expectedSpread int) gomegatypes.GomegaMatcher { - return gcustom.MakeMatcher(func(allocatedCPUs cpuset.CPUSet) (bool, error) { - distribution := computeNUMADistribution(allocatedCPUs) - for _, count := range distribution { - if count != expectedSpread { - return false, nil - } + if _, ok := newCfg.KubeReserved["cpu"]; !ok { + newCfg.KubeReserved["cpu"] = "200m" } - return true, nil - }).WithTemplate("expected CPUs to be evenly distributed across NUMA nodes\nExpected: {{.Data}}\nGot:\n{{.FormattedActual}}\nDistribution: {{.Data}}\n").WithTemplateData(expectedSpread) -} - -// Serial because the test updates kubelet configuration. -var _ = SIGDescribe("CPU Manager", framework.WithSerial(), feature.CPUManager, func() { - f := framework.NewDefaultFramework("cpu-manager-test") - f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged + } - ginkgo.Context("With kubeconfig updated with static CPU Manager policy run the CPU Manager tests", func() { - runCPUManagerTests(f) - }) -}) + return newCfg +} diff --git a/test/e2e_node/pod_resize_test.go b/test/e2e_node/pod_resize_test.go deleted file mode 100644 index b0d5ccfc53a75..0000000000000 --- a/test/e2e_node/pod_resize_test.go +++ /dev/null @@ -1,2471 +0,0 @@ -/* -Copyright 2024 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package e2enode - -import ( - "context" - "encoding/json" - "fmt" - "strconv" - "strings" - "time" - - "github.com/onsi/ginkgo/v2" - "github.com/onsi/gomega" - v1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/resource" - "k8s.io/kubernetes/test/e2e/common/node/framework/cgroups" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/types" - "k8s.io/apimachinery/pkg/util/strategicpatch" - clientset "k8s.io/client-go/kubernetes" - kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config" - "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager" - "k8s.io/kubernetes/test/e2e/common/node/framework/podresize" - "k8s.io/kubernetes/test/e2e/framework" - e2enode "k8s.io/kubernetes/test/e2e/framework/node" - e2epod "k8s.io/kubernetes/test/e2e/framework/pod" - e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" - testutils "k8s.io/kubernetes/test/utils" - admissionapi "k8s.io/pod-security-admission/api" - "k8s.io/utils/cpuset" -) - -const ( - fakeExtendedResource = "dummy.com/dummy" -) - -func patchNode(ctx context.Context, client clientset.Interface, old *v1.Node, new *v1.Node) error { - oldData, err := json.Marshal(old) - if err != nil { - return err - } - - newData, err := json.Marshal(new) - if err != nil { - return err - } - patchBytes, err := strategicpatch.CreateTwoWayMergePatch(oldData, newData, &v1.Node{}) - if err != nil { - return fmt.Errorf("failed to create merge patch for node %q: %w", old.Name, err) - } - _, err = client.CoreV1().Nodes().Patch(ctx, old.Name, types.StrategicMergePatchType, patchBytes, metav1.PatchOptions{}, "status") - return err -} - -func addExtendedResource(clientSet clientset.Interface, nodeName, extendedResourceName string, extendedResourceQuantity resource.Quantity) { - extendedResource := v1.ResourceName(extendedResourceName) - - ginkgo.By("Adding a custom resource") - OriginalNode, err := clientSet.CoreV1().Nodes().Get(context.Background(), nodeName, metav1.GetOptions{}) - framework.ExpectNoError(err) - - node := OriginalNode.DeepCopy() - node.Status.Capacity[extendedResource] = extendedResourceQuantity - node.Status.Allocatable[extendedResource] = extendedResourceQuantity - err = patchNode(context.Background(), clientSet, OriginalNode.DeepCopy(), node) - framework.ExpectNoError(err) - - gomega.Eventually(func() error { - node, err = clientSet.CoreV1().Nodes().Get(context.Background(), node.Name, metav1.GetOptions{}) - framework.ExpectNoError(err) - - fakeResourceCapacity, exists := node.Status.Capacity[extendedResource] - if !exists { - return fmt.Errorf("node %s has no %s resource capacity", node.Name, extendedResourceName) - } - if expectedResource := resource.MustParse("123"); fakeResourceCapacity.Cmp(expectedResource) != 0 { - return fmt.Errorf("node %s has resource capacity %s, expected: %s", node.Name, fakeResourceCapacity.String(), expectedResource.String()) - } - - return nil - }).WithTimeout(30 * time.Second).WithPolling(time.Second).ShouldNot(gomega.HaveOccurred()) -} - -func removeExtendedResource(clientSet clientset.Interface, nodeName, extendedResourceName string) { - extendedResource := v1.ResourceName(extendedResourceName) - - ginkgo.By("Removing a custom resource") - originalNode, err := clientSet.CoreV1().Nodes().Get(context.Background(), nodeName, metav1.GetOptions{}) - framework.ExpectNoError(err) - - node := originalNode.DeepCopy() - delete(node.Status.Capacity, extendedResource) - delete(node.Status.Allocatable, extendedResource) - err = patchNode(context.Background(), clientSet, originalNode.DeepCopy(), node) - framework.ExpectNoError(err) - - gomega.Eventually(func() error { - node, err = clientSet.CoreV1().Nodes().Get(context.Background(), nodeName, metav1.GetOptions{}) - framework.ExpectNoError(err) - - if _, exists := node.Status.Capacity[extendedResource]; exists { - return fmt.Errorf("node %s has resource capacity %s which is expected to be removed", node.Name, extendedResourceName) - } - - return nil - }).WithTimeout(30 * time.Second).WithPolling(time.Second).ShouldNot(gomega.HaveOccurred()) -} - -func cpuManagerPolicyKubeletConfig(ctx context.Context, f *framework.Framework, oldCfg *kubeletconfig.KubeletConfiguration, cpuManagerPolicyName string, cpuManagerPolicyOptions map[string]string, isInPlacePodVerticalScalingAllocatedStatusEnabled bool, isInPlacePodVerticalScalingExclusiveCPUsEnabled bool) { - if cpuManagerPolicyName != "" { - if cpuManagerPolicyOptions != nil { - func() { - var cpuAlloc int64 - for policyOption, policyOptionValue := range cpuManagerPolicyOptions { - if policyOption == cpumanager.FullPCPUsOnlyOption && policyOptionValue == "true" { - _, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f) - smtLevel := getSMTLevel() - - // strict SMT alignment is trivially verified and granted on non-SMT systems - if smtLevel < 2 { - e2eskipper.Skipf("Skipping Pod Resize along side CPU Manager %s tests since SMT disabled", policyOption) - } - - // our tests want to allocate a full core, so we need at last 2*2=4 virtual cpus - if cpuAlloc < int64(smtLevel*2) { - e2eskipper.Skipf("Skipping Pod resize along side CPU Manager %s tests since the CPU capacity < 4", policyOption) - } - - framework.Logf("SMT level %d", smtLevel) - return - } - } - }() - - // TODO: we assume the first available CPUID is 0, which is pretty fair, but we should probably - // check what we do have in the node. - newCfg := configureCPUManagerInKubelet(oldCfg, - &cpuManagerKubeletArguments{ - policyName: cpuManagerPolicyName, - reservedSystemCPUs: cpuset.New(0), - enableCPUManagerOptions: true, - options: cpuManagerPolicyOptions, - }, - isInPlacePodVerticalScalingAllocatedStatusEnabled, - isInPlacePodVerticalScalingExclusiveCPUsEnabled, - ) - updateKubeletConfig(ctx, f, newCfg, true) - } else { - var cpuCap int64 - cpuCap, _, _ = getLocalNodeCPUDetails(ctx, f) - // Skip CPU Manager tests altogether if the CPU capacity < 2. - if cpuCap < 2 { - e2eskipper.Skipf("Skipping Pod Resize alongside CPU Manager tests since the CPU capacity < 2") - } - // Enable CPU Manager in the kubelet. - newCfg := configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ - policyName: cpuManagerPolicyName, - reservedSystemCPUs: cpuset.CPUSet{}, - }, isInPlacePodVerticalScalingAllocatedStatusEnabled, isInPlacePodVerticalScalingExclusiveCPUsEnabled) - updateKubeletConfig(ctx, f, newCfg, true) - } - } -} - -type cpuManagerPolicyConfig struct { - name string - title string - options map[string]string -} - -func doPodResizeTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalScalingAllocatedStatusEnabled bool, isInPlacePodVerticalScalingExclusiveCPUsEnabled bool) { - f := framework.NewDefaultFramework("pod-resize-test") - f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged - var podClient *e2epod.PodClient - var oldCfg *kubeletconfig.KubeletConfiguration - ginkgo.BeforeEach(func(ctx context.Context) { - var err error - node := getLocalNode(ctx, f) - if framework.NodeOSDistroIs("windows") || e2enode.IsARM64(node) { - e2eskipper.Skipf("runtime does not support InPlacePodVerticalScaling -- skipping") - } - podClient = e2epod.NewPodClient(f) - if oldCfg == nil { - oldCfg, err = getCurrentKubeletConfig(ctx) - framework.ExpectNoError(err) - } - }) - - type testCase struct { - name string - containers []podresize.ResizableContainerInfo - patchString string - expected []podresize.ResizableContainerInfo - addExtendedResource bool - } - - noRestart := v1.NotRequired - doRestart := v1.RestartContainer - tests := []testCase{ - { - name: "Guaranteed QoS pod, one container - increase CPU & memory", - containers: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "100m", MemReq: "200Mi", MemLim: "200Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - }, - }, - patchString: `{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"cpu":"200m","memory":"400Mi"},"limits":{"cpu":"200m","memory":"400Mi"}}} - ]}}`, - expected: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "200m", MemReq: "400Mi", MemLim: "400Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - }, - }, - }, - { - name: "Guaranteed QoS pod, one container - decrease CPU & memory", - containers: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "300m", CPULim: "300m", MemReq: "500Mi", MemLim: "500Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - }, - }, - patchString: `{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"cpu":"100m","memory":"250Mi"},"limits":{"cpu":"100m","memory":"250Mi"}}} - ]}}`, - expected: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "100m", MemReq: "250Mi", MemLim: "250Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - }, - }, - }, - { - name: "Guaranteed QoS pod, one container - increase CPU & decrease memory", - containers: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "100m", MemReq: "200Mi", MemLim: "200Mi"}, - }, - }, - patchString: `{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"cpu":"200m","memory":"100Mi"},"limits":{"cpu":"200m","memory":"100Mi"}}} - ]}}`, - expected: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "200m", MemReq: "100Mi", MemLim: "100Mi"}, - }, - }, - }, - { - name: "Guaranteed QoS pod, one container - decrease CPU & increase memory", - containers: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "100m", MemReq: "200Mi", MemLim: "200Mi"}, - }, - }, - patchString: `{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"cpu":"50m","memory":"300Mi"},"limits":{"cpu":"50m","memory":"300Mi"}}} - ]}}`, - expected: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "50m", CPULim: "50m", MemReq: "300Mi", MemLim: "300Mi"}, - }, - }, - }, - { - name: "Guaranteed QoS pod, three containers (c1, c2, c3) - increase: CPU (c1,c3), memory (c2) ; decrease: CPU (c2), memory (c1,c3)", - containers: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "100m", MemReq: "100Mi", MemLim: "100Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - }, - { - Name: "c2", - Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "200m", MemReq: "200Mi", MemLim: "200Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - }, - { - Name: "c3", - Resources: &cgroups.ContainerResources{CPUReq: "300m", CPULim: "300m", MemReq: "300Mi", MemLim: "300Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - }, - }, - patchString: `{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"cpu":"140m","memory":"50Mi"},"limits":{"cpu":"140m","memory":"50Mi"}}}, - {"name":"c2", "resources":{"requests":{"cpu":"150m","memory":"240Mi"},"limits":{"cpu":"150m","memory":"240Mi"}}}, - {"name":"c3", "resources":{"requests":{"cpu":"340m","memory":"250Mi"},"limits":{"cpu":"340m","memory":"250Mi"}}} - ]}}`, - expected: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "140m", CPULim: "140m", MemReq: "50Mi", MemLim: "50Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - }, - { - Name: "c2", - Resources: &cgroups.ContainerResources{CPUReq: "150m", CPULim: "150m", MemReq: "240Mi", MemLim: "240Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - }, - { - Name: "c3", - Resources: &cgroups.ContainerResources{CPUReq: "340m", CPULim: "340m", MemReq: "250Mi", MemLim: "250Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - }, - }, - }, - { - name: "Burstable QoS pod, one container with cpu & memory requests + limits - decrease memory requests only", - containers: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, - }, - }, - patchString: `{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"memory":"200Mi"}}} - ]}}`, - expected: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "200Mi", MemLim: "500Mi"}, - }, - }, - }, - { - name: "Burstable QoS pod, one container with cpu & memory requests + limits - decrease memory limits only", - containers: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, - }, - }, - patchString: `{"spec":{"containers":[ - {"name":"c1", "resources":{"limits":{"memory":"400Mi"}}} - ]}}`, - expected: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "400Mi"}, - }, - }, - }, - { - name: "Burstable QoS pod, one container with cpu & memory requests + limits - increase memory requests only", - containers: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, - }, - }, - patchString: `{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"memory":"300Mi"}}} - ]}}`, - expected: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "300Mi", MemLim: "500Mi"}, - }, - }, - }, - { - name: "Burstable QoS pod, one container with cpu & memory requests + limits - increase memory limits only", - containers: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, - }, - }, - patchString: `{"spec":{"containers":[ - {"name":"c1", "resources":{"limits":{"memory":"600Mi"}}} - ]}}`, - expected: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "600Mi"}, - }, - }, - }, - { - name: "Burstable QoS pod, one container with cpu & memory requests + limits - decrease CPU requests only", - containers: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, - }, - }, - patchString: `{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"cpu":"100m"}}} - ]}}`, - expected: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, - }, - }, - }, - { - name: "Burstable QoS pod, one container with cpu & memory requests + limits - decrease CPU limits only", - containers: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, - }, - }, - patchString: `{"spec":{"containers":[ - {"name":"c1", "resources":{"limits":{"cpu":"300m"}}} - ]}}`, - expected: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "300m", MemReq: "250Mi", MemLim: "500Mi"}, - }, - }, - }, - { - name: "Burstable QoS pod, one container with cpu & memory requests + limits - increase CPU requests only", - containers: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "250Mi", MemLim: "500Mi"}, - }, - }, - patchString: `{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"cpu":"150m"}}} - ]}}`, - expected: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "150m", CPULim: "200m", MemReq: "250Mi", MemLim: "500Mi"}, - }, - }, - }, - { - name: "Burstable QoS pod, one container with cpu & memory requests + limits - increase CPU limits only", - containers: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, - }, - }, - patchString: `{"spec":{"containers":[ - {"name":"c1", "resources":{"limits":{"cpu":"500m"}}} - ]}}`, - expected: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "500m", MemReq: "250Mi", MemLim: "500Mi"}, - }, - }, - }, - { - name: "Burstable QoS pod, one container with cpu & memory requests + limits - decrease CPU requests and limits", - containers: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, - }, - }, - patchString: `{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"cpu":"100m"},"limits":{"cpu":"200m"}}} - ]}}`, - expected: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "250Mi", MemLim: "500Mi"}, - }, - }, - }, - { - name: "Burstable QoS pod, one container with cpu & memory requests + limits - increase CPU requests and limits", - containers: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "250Mi", MemLim: "500Mi"}, - }, - }, - patchString: `{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"cpu":"200m"},"limits":{"cpu":"400m"}}} - ]}}`, - expected: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, - }, - }, - }, - { - name: "Burstable QoS pod, one container with cpu & memory requests + limits - decrease CPU requests and increase CPU limits", - containers: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, - }, - }, - patchString: `{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"cpu":"100m"},"limits":{"cpu":"500m"}}} - ]}}`, - expected: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "500m", MemReq: "250Mi", MemLim: "500Mi"}, - }, - }, - }, - { - name: "Burstable QoS pod, one container with cpu & memory requests + limits - increase CPU requests and decrease CPU limits", - containers: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, - }, - }, - patchString: `{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"cpu":"200m"},"limits":{"cpu":"300m"}}} - ]}}`, - expected: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "300m", MemReq: "250Mi", MemLim: "500Mi"}, - }, - }, - }, - { - name: "Burstable QoS pod, one container with cpu & memory requests + limits - decrease memory requests and limits", - containers: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "200Mi", MemLim: "400Mi"}, - }, - }, - patchString: `{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"memory":"100Mi"},"limits":{"memory":"300Mi"}}} - ]}}`, - expected: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "100Mi", MemLim: "300Mi"}, - }, - }, - }, - { - name: "Burstable QoS pod, one container with cpu & memory requests + limits - increase memory requests and limits", - containers: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "200Mi", MemLim: "400Mi"}, - }, - }, - patchString: `{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"memory":"300Mi"},"limits":{"memory":"500Mi"}}} - ]}}`, - expected: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "300Mi", MemLim: "500Mi"}, - }, - }, - }, - { - name: "Burstable QoS pod, one container with cpu & memory requests + limits - decrease memory requests and increase memory limits", - containers: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "200Mi", MemLim: "400Mi"}, - }, - }, - patchString: `{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"memory":"100Mi"},"limits":{"memory":"500Mi"}}} - ]}}`, - expected: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "100Mi", MemLim: "500Mi"}, - }, - }, - }, - { - name: "Burstable QoS pod, one container with cpu & memory requests + limits - increase memory requests and decrease memory limits", - containers: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "200Mi", MemLim: "400Mi"}, - }, - }, - patchString: `{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"memory":"300Mi"},"limits":{"memory":"300Mi"}}} - ]}}`, - expected: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "300Mi", MemLim: "300Mi"}, - }, - }, - }, - { - name: "Burstable QoS pod, one container with cpu & memory requests + limits - decrease CPU requests and increase memory limits", - containers: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "200Mi", MemLim: "400Mi"}, - }, - }, - patchString: `{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"cpu":"100m"},"limits":{"memory":"500Mi"}}} - ]}}`, - expected: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "400m", MemReq: "200Mi", MemLim: "500Mi"}, - }, - }, - }, - { - name: "Burstable QoS pod, one container with cpu & memory requests + limits - increase CPU requests and decrease memory limits", - containers: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "400m", MemReq: "200Mi", MemLim: "500Mi"}, - }, - }, - patchString: `{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"cpu":"200m"},"limits":{"memory":"400Mi"}}} - ]}}`, - expected: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "200Mi", MemLim: "400Mi"}, - }, - }, - }, - { - name: "Burstable QoS pod, one container with cpu & memory requests + limits - decrease memory requests and increase CPU limits", - containers: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "200Mi", MemLim: "400Mi"}, - }, - }, - patchString: `{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"memory":"100Mi"},"limits":{"cpu":"300m"}}} - ]}}`, - expected: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "300m", MemReq: "100Mi", MemLim: "400Mi"}, - }, - }, - }, - { - name: "Burstable QoS pod, one container with cpu & memory requests + limits - increase memory requests and decrease CPU limits", - containers: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "200Mi", MemLim: "400Mi"}, - }, - }, - patchString: `{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"memory":"300Mi"},"limits":{"cpu":"300m"}}} - ]}}`, - expected: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "300m", MemReq: "300Mi", MemLim: "400Mi"}, - }, - }, - }, - { - name: "Burstable QoS pod, one container with cpu & memory requests - decrease memory request", - containers: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "200m", MemReq: "500Mi"}, - }, - }, - patchString: `{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"memory":"400Mi"}}} - ]}}`, - expected: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "200m", MemReq: "400Mi"}, - }, - }, - }, - { - name: "Guaranteed QoS pod, one container - increase CPU (NotRequired) & memory (RestartContainer)", - containers: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "100m", MemReq: "200Mi", MemLim: "200Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &doRestart, - }, - }, - patchString: `{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"cpu":"200m","memory":"400Mi"},"limits":{"cpu":"200m","memory":"400Mi"}}} - ]}}`, - expected: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "200m", MemReq: "400Mi", MemLim: "400Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &doRestart, - RestartCount: 1, - }, - }, - }, - { - name: "Burstable QoS pod, one container - decrease CPU (RestartContainer) & memory (NotRequired)", - containers: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "200Mi", MemLim: "400Mi"}, - CPUPolicy: &doRestart, - MemPolicy: &noRestart, - }, - }, - patchString: `{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"cpu":"50m","memory":"100Mi"},"limits":{"cpu":"100m","memory":"200Mi"}}} - ]}}`, - expected: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "50m", CPULim: "100m", MemReq: "100Mi", MemLim: "200Mi"}, - CPUPolicy: &doRestart, - MemPolicy: &noRestart, - RestartCount: 1, - }, - }, - }, - { - name: "Burstable QoS pod, three containers - increase c1 resources, no change for c2, decrease c3 resources (no net change for pod)", - containers: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "100Mi", MemLim: "200Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - }, - { - Name: "c2", - Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "300m", MemReq: "200Mi", MemLim: "300Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &doRestart, - }, - { - Name: "c3", - Resources: &cgroups.ContainerResources{CPUReq: "300m", CPULim: "400m", MemReq: "300Mi", MemLim: "400Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - }, - }, - patchString: `{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"cpu":"150m","memory":"150Mi"},"limits":{"cpu":"250m","memory":"250Mi"}}}, - {"name":"c3", "resources":{"requests":{"cpu":"250m","memory":"250Mi"},"limits":{"cpu":"350m","memory":"350Mi"}}} - ]}}`, - expected: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "150m", CPULim: "250m", MemReq: "150Mi", MemLim: "250Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - }, - { - Name: "c2", - Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "300m", MemReq: "200Mi", MemLim: "300Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &doRestart, - }, - { - Name: "c3", - Resources: &cgroups.ContainerResources{CPUReq: "250m", CPULim: "350m", MemReq: "250Mi", MemLim: "350Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - }, - }, - }, - { - name: "Burstable QoS pod, three containers - decrease c1 resources, increase c2 resources, no change for c3 (net increase for pod)", - containers: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "100Mi", MemLim: "200Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - }, - { - Name: "c2", - Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "300m", MemReq: "200Mi", MemLim: "300Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &doRestart, - }, - { - Name: "c3", - Resources: &cgroups.ContainerResources{CPUReq: "300m", CPULim: "400m", MemReq: "300Mi", MemLim: "400Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - }, - }, - patchString: `{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"cpu":"50m","memory":"50Mi"},"limits":{"cpu":"150m","memory":"150Mi"}}}, - {"name":"c2", "resources":{"requests":{"cpu":"350m","memory":"350Mi"},"limits":{"cpu":"450m","memory":"450Mi"}}} - ]}}`, - expected: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "50m", CPULim: "150m", MemReq: "50Mi", MemLim: "150Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - }, - { - Name: "c2", - Resources: &cgroups.ContainerResources{CPUReq: "350m", CPULim: "450m", MemReq: "350Mi", MemLim: "450Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &doRestart, - RestartCount: 1, - }, - { - Name: "c3", - Resources: &cgroups.ContainerResources{CPUReq: "300m", CPULim: "400m", MemReq: "300Mi", MemLim: "400Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - }, - }, - }, - { - name: "Burstable QoS pod, three containers - no change for c1, increase c2 resources, decrease c3 (net decrease for pod)", - containers: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "100Mi", MemLim: "200Mi"}, - CPUPolicy: &doRestart, - MemPolicy: &doRestart, - }, - { - Name: "c2", - Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "300m", MemReq: "200Mi", MemLim: "300Mi"}, - CPUPolicy: &doRestart, - MemPolicy: &noRestart, - }, - { - Name: "c3", - Resources: &cgroups.ContainerResources{CPUReq: "300m", CPULim: "400m", MemReq: "300Mi", MemLim: "400Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &doRestart, - }, - }, - patchString: `{"spec":{"containers":[ - {"name":"c2", "resources":{"requests":{"cpu":"250m","memory":"250Mi"},"limits":{"cpu":"350m","memory":"350Mi"}}}, - {"name":"c3", "resources":{"requests":{"cpu":"100m","memory":"100Mi"},"limits":{"cpu":"200m","memory":"200Mi"}}} - ]}}`, - expected: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "100Mi", MemLim: "200Mi"}, - CPUPolicy: &doRestart, - MemPolicy: &doRestart, - }, - { - Name: "c2", - Resources: &cgroups.ContainerResources{CPUReq: "250m", CPULim: "350m", MemReq: "250Mi", MemLim: "350Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - RestartCount: 1, - }, - { - Name: "c3", - Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "100Mi", MemLim: "200Mi"}, - CPUPolicy: &doRestart, - MemPolicy: &doRestart, - RestartCount: 1, - }, - }, - }, - { - name: "Guaranteed QoS pod, one container - increase CPU & memory with an extended resource", - containers: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "100m", MemReq: "200Mi", MemLim: "200Mi", - ExtendedResourceReq: "1", ExtendedResourceLim: "1"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - }, - }, - patchString: `{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"cpu":"200m","memory":"400Mi"},"limits":{"cpu":"200m","memory":"400Mi"}}} - ]}}`, - expected: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "200m", MemReq: "400Mi", MemLim: "400Mi", - ExtendedResourceReq: "1", ExtendedResourceLim: "1"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - }, - }, - addExtendedResource: true, - }, - { - name: "Guaranteed QoS pod, one container - increase CPU & memory, with integer CPU requests", - containers: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "200Mi", MemLim: "200Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - CPUsAllowedListValue: "2", - }, - }, - patchString: `{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"cpu":"4","memory":"400Mi"},"limits":{"cpu":"4","memory":"400Mi"}}} - ]}}`, - expected: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "4", CPULim: "4", MemReq: "400Mi", MemLim: "400Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - CPUsAllowedListValue: "4", - }, - }, - }, - { - name: "Burstable QoS pod, three containers - no change for c1, decrease c2 resources, decrease c3 (net decrease for pod)", - containers: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "100Mi", MemLim: "200Mi"}, - CPUPolicy: &doRestart, - MemPolicy: &doRestart, - }, - { - Name: "c2", - Resources: &cgroups.ContainerResources{CPUReq: "4", CPULim: "4", MemReq: "200Mi", MemLim: "300Mi"}, - CPUPolicy: &doRestart, - MemPolicy: &noRestart, - }, - { - Name: "c3", - Resources: &cgroups.ContainerResources{CPUReq: "300m", CPULim: "400m", MemReq: "300Mi", MemLim: "400Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &doRestart, - }, - }, - patchString: `{"spec":{"containers":[ - {"name":"c2", "resources":{"requests":{"cpu":"1","memory":"150Mi"},"limits":{"cpu":"1","memory":"250Mi"}}}, - {"name":"c3", "resources":{"requests":{"cpu":"100m","memory":"100Mi"},"limits":{"cpu":"200m","memory":"200Mi"}}} - ]}}`, - expected: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "100Mi", MemLim: "200Mi"}, - CPUPolicy: &doRestart, - MemPolicy: &doRestart, - }, - { - Name: "c2", - Resources: &cgroups.ContainerResources{CPUReq: "1", CPULim: "1", MemReq: "150Mi", MemLim: "250Mi"}, - CPUPolicy: &doRestart, - MemPolicy: &noRestart, - RestartCount: 1, - }, - { - Name: "c3", - Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "100Mi", MemLim: "200Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &doRestart, - RestartCount: 1, - }, - }, - }, - { - name: "Burstable QoS pod, three containers - no change for c1, increase c2 resources, decrease c3 (net increase for pod)", - containers: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "100Mi", MemLim: "200Mi"}, - CPUPolicy: &doRestart, - MemPolicy: &doRestart, - }, - { - Name: "c2", - Resources: &cgroups.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "200Mi", MemLim: "300Mi"}, - CPUPolicy: &doRestart, - MemPolicy: &noRestart, - }, - { - Name: "c3", - Resources: &cgroups.ContainerResources{CPUReq: "300m", CPULim: "400m", MemReq: "300Mi", MemLim: "400Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &doRestart, - }, - }, - patchString: `{"spec":{"containers":[ - {"name":"c2", "resources":{"requests":{"cpu":"4","memory":"250Mi"},"limits":{"cpu":"4","memory":"350Mi"}}}, - {"name":"c3", "resources":{"requests":{"cpu":"100m","memory":"100Mi"},"limits":{"cpu":"200m","memory":"200Mi"}}} - ]}}`, - expected: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "100Mi", MemLim: "200Mi"}, - CPUPolicy: &doRestart, - MemPolicy: &doRestart, - }, - { - Name: "c2", - Resources: &cgroups.ContainerResources{CPUReq: "4", CPULim: "4", MemReq: "250Mi", MemLim: "350Mi"}, - CPUPolicy: &doRestart, - MemPolicy: &noRestart, - RestartCount: 1, - }, - { - Name: "c3", - Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "100Mi", MemLim: "200Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &doRestart, - RestartCount: 1, - }, - }, - }, - { - name: "Guaranteed QoS pod, one container - decrease CPU & increase memory", - containers: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "100m", MemReq: "200Mi", MemLim: "200Mi"}, - }, - }, - patchString: `{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"cpu":"50m","memory":"300Mi"},"limits":{"cpu":"50m","memory":"300Mi"}}} - ]}}`, - expected: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "50m", CPULim: "50m", MemReq: "300Mi", MemLim: "300Mi"}, - }, - }, - }, - { - name: "Guaranteed QoS pod, one container - decrease CPU & memory, with integer CPU requests", - containers: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "4", CPULim: "4", MemReq: "500Mi", MemLim: "500Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - CPUsAllowedListValue: "4", - }, - }, - patchString: `{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"cpu":"2","memory":"250Mi"},"limits":{"cpu":"2","memory":"250Mi"}}} - ]}}`, - expected: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "250Mi", MemLim: "250Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - CPUsAllowedListValue: "2", - }, - }, - }, - { - name: "Guaranteed QoS pod, one container - decrease CPU & memory, with integer CPU requests", - containers: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "4", CPULim: "4", MemReq: "500Mi", MemLim: "500Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - CPUsAllowedListValue: "4", - }, - }, - patchString: `{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"cpu":"2","memory":"250Mi"},"limits":{"cpu":"2","memory":"250Mi"}}} - ]}}`, - expected: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "250Mi", MemLim: "250Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - CPUsAllowedListValue: "2", - }, - }, - }, - { - name: "Guaranteed QoS pod, one container - increase CPU & decrease memory, with integer CPU requests", - containers: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "200Mi", MemLim: "200Mi"}, - CPUsAllowedListValue: "2", - }, - }, - patchString: `{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"cpu":"4","memory":"100Mi"},"limits":{"cpu":"4","memory":"100Mi"}}} - ]}}`, - expected: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "4", CPULim: "4", MemReq: "100Mi", MemLim: "100Mi"}, - CPUsAllowedListValue: "4", - }, - }, - }, - { - name: "Guaranteed QoS pod, one container - increase CPU & decrease memory, with integer CPU requests", - containers: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "200Mi", MemLim: "200Mi"}, - CPUsAllowedListValue: "2", - }, - }, - patchString: `{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"cpu":"4","memory":"100Mi"},"limits":{"cpu":"4","memory":"100Mi"}}} - ]}}`, - expected: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "4", CPULim: "4", MemReq: "100Mi", MemLim: "100Mi"}, - CPUsAllowedListValue: "4", - }, - }, - }, - { - name: "Guaranteed QoS pod, one container - increase CPU & memory, with integer CPU requests", - containers: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "200Mi", MemLim: "200Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - CPUsAllowedListValue: "2", - }, - }, - patchString: `{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"cpu":"4","memory":"400Mi"},"limits":{"cpu":"4","memory":"400Mi"}}} - ]}}`, - expected: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "4", CPULim: "4", MemReq: "400Mi", MemLim: "400Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - CPUsAllowedListValue: "4", - }, - }, - }, - { - name: "Guaranteed QoS pod, one container - increase CPU (NotRequired) & memory (RestartContainer), with integer CPU requests", - containers: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "200Mi", MemLim: "200Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &doRestart, - CPUsAllowedListValue: "2", - }, - }, - patchString: `{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"cpu":"4","memory":"400Mi"},"limits":{"cpu":"4","memory":"400Mi"}}} - ]}}`, - expected: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "4", CPULim: "4", MemReq: "400Mi", MemLim: "400Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &doRestart, - CPUsAllowedListValue: "4", - RestartCount: 1, - }, - }, - }, - { - name: "Guaranteed QoS pod, one container - increase CPU (NotRequired) & memory (RestartContainer), with integer CPU requests", - containers: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "200Mi", MemLim: "200Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &doRestart, - CPUsAllowedListValue: "2", - }, - }, - patchString: `{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"cpu":"4","memory":"400Mi"},"limits":{"cpu":"4","memory":"400Mi"}}} - ]}}`, - expected: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "4", CPULim: "4", MemReq: "400Mi", MemLim: "400Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &doRestart, - CPUsAllowedListValue: "4", - RestartCount: 1, - }, - }, - }, - { - name: "Guaranteed QoS pod, three containers (c1, c2, c3) - increase CPU (c1,c3) and memory (c2) ; decrease CPU (c2) and memory (c1,c3)", - containers: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "100m", MemReq: "100Mi", MemLim: "100Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - }, - { - Name: "c2", - Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "200m", MemReq: "200Mi", MemLim: "200Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - }, - { - Name: "c3", - Resources: &cgroups.ContainerResources{CPUReq: "300m", CPULim: "300m", MemReq: "300Mi", MemLim: "300Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - }, - }, - patchString: `{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"cpu":"140m","memory":"50Mi"},"limits":{"cpu":"140m","memory":"50Mi"}}}, - {"name":"c2", "resources":{"requests":{"cpu":"150m","memory":"240Mi"},"limits":{"cpu":"150m","memory":"240Mi"}}}, - {"name":"c3", "resources":{"requests":{"cpu":"340m","memory":"250Mi"},"limits":{"cpu":"340m","memory":"250Mi"}}} - ]}}`, - expected: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "140m", CPULim: "140m", MemReq: "50Mi", MemLim: "50Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - }, - { - Name: "c2", - Resources: &cgroups.ContainerResources{CPUReq: "150m", CPULim: "150m", MemReq: "240Mi", MemLim: "240Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - }, - { - Name: "c3", - Resources: &cgroups.ContainerResources{CPUReq: "340m", CPULim: "340m", MemReq: "250Mi", MemLim: "250Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - }, - }, - }, - { - name: "Guaranteed QoS pod, three containers (c1, c2, c3) - increase CPU (c1,c3) and memory (c2) ; decrease CPU (c2) and memory (c1,c3), with integer CPU requests", - containers: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "100Mi", MemLim: "100Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - CPUsAllowedListValue: "2", - }, - { - Name: "c2", - Resources: &cgroups.ContainerResources{CPUReq: "4", CPULim: "4", MemReq: "200Mi", MemLim: "200Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - CPUsAllowedListValue: "4", - }, - { - Name: "c3", - Resources: &cgroups.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "300Mi", MemLim: "300Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - CPUsAllowedListValue: "2", - }, - }, - patchString: `{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"cpu":"4","memory":"50Mi"},"limits":{"cpu":"4","memory":"50Mi"}}}, - {"name":"c2", "resources":{"requests":{"cpu":"2","memory":"240Mi"},"limits":{"cpu":"2","memory":"240Mi"}}}, - {"name":"c3", "resources":{"requests":{"cpu":"4","memory":"250Mi"},"limits":{"cpu":"4","memory":"250Mi"}}} - ]}}`, - expected: []podresize.ResizableContainerInfo{ - { - Name: "c1", - Resources: &cgroups.ContainerResources{CPUReq: "4", CPULim: "4", MemReq: "50Mi", MemLim: "50Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - CPUsAllowedListValue: "4", - }, - { - Name: "c2", - Resources: &cgroups.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "240Mi", MemLim: "240Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - CPUsAllowedListValue: "2", - }, - { - Name: "c3", - Resources: &cgroups.ContainerResources{CPUReq: "4", CPULim: "4", MemReq: "250Mi", MemLim: "250Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - CPUsAllowedListValue: "4", - }, - }, - }, - } - - timeouts := framework.NewTimeoutContext() - - for idx := range tests { - tc := tests[idx] - ginkgo.It(tc.name+policy.title+" (InPlacePodVerticalScalingAllocatedStatus="+strconv.FormatBool(isInPlacePodVerticalScalingAllocatedStatusEnabled)+", InPlacePodVerticalScalingExclusiveCPUs="+strconv.FormatBool(isInPlacePodVerticalScalingExclusiveCPUsEnabled)+")", func(ctx context.Context) { - cpuManagerPolicyKubeletConfig(ctx, f, oldCfg, policy.name, policy.options, isInPlacePodVerticalScalingAllocatedStatusEnabled, isInPlacePodVerticalScalingExclusiveCPUsEnabled) - - var testPod, patchedPod *v1.Pod - var pErr error - - tStamp := strconv.Itoa(time.Now().Nanosecond()) - testPod = podresize.MakePodWithResizableContainers(f.Namespace.Name, "testpod", tStamp, tc.containers) - testPod.GenerateName = "resize-test-" - testPod = e2epod.MustMixinRestrictedPodSecurity(testPod) - - if tc.addExtendedResource { - nodes, err := e2enode.GetReadySchedulableNodes(context.Background(), f.ClientSet) - framework.ExpectNoError(err) - - for _, node := range nodes.Items { - addExtendedResource(f.ClientSet, node.Name, fakeExtendedResource, resource.MustParse("123")) - } - defer func() { - for _, node := range nodes.Items { - removeExtendedResource(f.ClientSet, node.Name, fakeExtendedResource) - } - }() - } - - ginkgo.By("creating pod") - newPod := podClient.CreateSync(ctx, testPod) - - ginkgo.By("verifying initial pod resources, allocations are as expected") - podresize.VerifyPodResources(newPod, tc.containers) - ginkgo.By("verifying initial pod resize policy is as expected") - podresize.VerifyPodResizePolicy(newPod, tc.containers) - - ginkgo.By("verifying initial pod status resources are as expected") - framework.ExpectNoError(podresize.VerifyPodStatusResources(newPod, tc.containers)) - ginkgo.By("verifying initial cgroup config are as expected") - framework.ExpectNoError(podresize.VerifyPodContainersCgroupValues(ctx, f, newPod, tc.containers)) - // TODO make this dynamic depending on Policy Name, Resources input and topology of target - // machine. - // For the moment skip below if CPU Manager Policy is set to none - if policy.name == string(cpumanager.PolicyStatic) { - ginkgo.By("verifying initial pod Cpus allowed list value") - gomega.Eventually(ctx, podresize.VerifyPodContainersCPUsAllowedListValue, timeouts.PodStartShort, timeouts.Poll). - WithArguments(f, newPod, tc.containers). - Should(gomega.Succeed(), "failed to verify initial Pod CPUsAllowedListValue") - } - - patchAndVerify := func(patchString string, expectedContainers []e2epod.ResizableContainerInfo, initialContainers []e2epod.ResizableContainerInfo, opStr string) { - ginkgo.By(fmt.Sprintf("patching pod for %s", opStr)) - patchedPod, pErr = f.ClientSet.CoreV1().Pods(newPod.Namespace).Patch(ctx, newPod.Name, - types.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") - framework.ExpectNoError(pErr, fmt.Sprintf("failed to patch pod for %s", opStr)) - expected := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, expectedContainers) - - ginkgo.By(fmt.Sprintf("verifying pod patched for %s", opStr)) - podresize.VerifyPodResources(patchedPod, expected) - - ginkgo.By(fmt.Sprintf("waiting for %s to be actuated", opStr)) - resizedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPod, expected) - podresize.ExpectPodResized(ctx, f, resizedPod, expected) - - // Check cgroup values only for containerd versions before 1.6.9 - ginkgo.By(fmt.Sprintf("verifying pod container's cgroup values after %s", opStr)) - framework.ExpectNoError(podresize.VerifyPodContainersCgroupValues(ctx, f, resizedPod, expected)) - - ginkgo.By(fmt.Sprintf("verifying pod resources after %s", opStr)) - podresize.VerifyPodResources(resizedPod, expected) - - // TODO make this dynamic depending on Policy Name, Resources input and topology of target - // machine. - // For the moment skip below if CPU Manager Policy is set to none - if policy.name == string(cpumanager.PolicyStatic) { - ginkgo.By("verifying pod Cpus allowed list value after resize") - if isInPlacePodVerticalScalingExclusiveCPUsEnabled { - gomega.Eventually(ctx, podresize.VerifyPodContainersCPUsAllowedListValue, timeouts.PodStartShort, timeouts.Poll). - WithArguments(f, resizedPod, tc.expected). - Should(gomega.Succeed(), "failed to verify Pod CPUsAllowedListValue for resizedPod with InPlacePodVerticalScalingExclusiveCPUs enabled") - } else { - gomega.Eventually(ctx, podresize.VerifyPodContainersCPUsAllowedListValue, timeouts.PodStartShort, timeouts.Poll). - WithArguments(f, resizedPod, tc.containers). - Should(gomega.Succeed(), "failed to verify Pod CPUsAllowedListValue for resizedPod with InPlacePodVerticalScalingExclusiveCPUs disabled (default)") - } - } - } - - patchAndVerify(tc.patchString, tc.expected, tc.containers, "resize") - - rbPatchStr, err := podresize.ResizeContainerPatch(tc.containers) - framework.ExpectNoError(err) - // Resize has been actuated, test rollback - patchAndVerify(rbPatchStr, tc.containers, tc.expected, "rollback") - - ginkgo.By("deleting pod") - deletePodSyncByName(ctx, f, newPod.Name) - // we need to wait for all containers to really be gone so cpumanager reconcile loop will not rewrite the cpu_manager_state. - // this is in turn needed because we will have an unavoidable (in the current framework) race with the - // reconcile loop which will make our attempt to delete the state file and to restore the old config go haywire - waitForAllContainerRemoval(ctx, newPod.Name, newPod.Namespace) - }) - } - - ginkgo.AfterEach(func(ctx context.Context) { - if oldCfg != nil { - updateKubeletConfig(ctx, f, oldCfg, true) - } - }) - -} - -func doPodResizeErrorTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalScalingAllocatedStatusEnabled bool, isInPlacePodVerticalScalingExclusiveCPUsEnabled bool) { - f := framework.NewDefaultFramework("pod-resize-errors") - f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged - var podClient *e2epod.PodClient - var oldCfg *kubeletconfig.KubeletConfiguration - ginkgo.BeforeEach(func(ctx context.Context) { - var err error - node := getLocalNode(ctx, f) - if framework.NodeOSDistroIs("windows") || e2enode.IsARM64(node) { - e2eskipper.Skipf("runtime does not support InPlacePodVerticalScaling -- skipping") - } - podClient = e2epod.NewPodClient(f) - if oldCfg == nil { - oldCfg, err = getCurrentKubeletConfig(ctx) - framework.ExpectNoError(err) - } - }) - - type testCase struct { - name string - containers []podresize.ResizableContainerInfo - patchString string - patchError string - expected []podresize.ResizableContainerInfo - } - - tests := []testCase{ - { - name: "BestEffort QoS pod, one container - try requesting memory, expect error", - containers: []podresize.ResizableContainerInfo{ - { - Name: "c1", - }, - }, - patchString: `{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"memory":"400Mi"}}} - ]}}`, - patchError: "Pod QoS is immutable", - expected: []podresize.ResizableContainerInfo{ - { - Name: "c1", - }, - }, - }, - { - name: "BestEffort QoS pod, three containers - try requesting memory for c1, expect error", - containers: []podresize.ResizableContainerInfo{ - { - Name: "c1", - }, - { - Name: "c2", - }, - { - Name: "c3", - }, - }, - patchString: `{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"memory":"400Mi"}}} - ]}}`, - patchError: "Pod QoS is immutable", - expected: []podresize.ResizableContainerInfo{ - { - Name: "c1", - }, - { - Name: "c2", - }, - { - Name: "c3", - }, - }, - }, - } - - timeouts := framework.NewTimeoutContext() - - for idx := range tests { - tc := tests[idx] - ginkgo.It(tc.name+policy.title+" (InPlacePodVerticalScalingAllocatedStatus="+strconv.FormatBool(isInPlacePodVerticalScalingAllocatedStatusEnabled)+", InPlacePodVerticalScalingExclusiveCPUs="+strconv.FormatBool(isInPlacePodVerticalScalingExclusiveCPUsEnabled)+")", func(ctx context.Context) { - var testPod, patchedPod *v1.Pod - var pErr error - - tStamp := strconv.Itoa(time.Now().Nanosecond()) - testPod = podresize.MakePodWithResizableContainers(f.Namespace.Name, "testpod", tStamp, tc.containers) - testPod = e2epod.MustMixinRestrictedPodSecurity(testPod) - - ginkgo.By("creating pod") - newPod := podClient.CreateSync(ctx, testPod) - - perr := e2epod.WaitForPodCondition(ctx, f.ClientSet, newPod.Namespace, newPod.Name, "Ready", timeouts.PodStartSlow, testutils.PodRunningReady) - framework.ExpectNoError(perr, "pod %s/%s did not go running", newPod.Namespace, newPod.Name) - framework.Logf("pod %s/%s running", newPod.Namespace, newPod.Name) - - ginkgo.By("verifying initial pod resources, allocations, and policy are as expected") - podresize.VerifyPodResources(newPod, tc.containers) - podresize.VerifyPodResizePolicy(newPod, tc.containers) - - ginkgo.By("verifying initial pod status resources and cgroup config are as expected") - framework.ExpectNoError(podresize.VerifyPodStatusResources(newPod, tc.containers)) - - ginkgo.By("patching pod for resize") - patchedPod, pErr = f.ClientSet.CoreV1().Pods(newPod.Namespace).Patch(ctx, newPod.Name, - types.StrategicMergePatchType, []byte(tc.patchString), metav1.PatchOptions{}) - if tc.patchError == "" { - framework.ExpectNoError(pErr, "failed to patch pod for resize") - } else { - gomega.Expect(pErr).To(gomega.HaveOccurred(), tc.patchError) - patchedPod = newPod - } - - ginkgo.By("verifying pod resources after patch") - podresize.VerifyPodResources(patchedPod, tc.expected) - - deletePodSyncByName(ctx, f, newPod.Name) - // we need to wait for all containers to really be gone so cpumanager reconcile loop will not rewrite the cpu_manager_state. - // this is in turn needed because we will have an unavoidable (in the current framework) race with the - // reconcile loop which will make our attempt to delete the state file and to restore the old config go haywire - waitForAllContainerRemoval(ctx, newPod.Name, newPod.Namespace) - - }) - } - - ginkgo.AfterEach(func(ctx context.Context) { - if oldCfg != nil { - updateKubeletConfig(ctx, f, oldCfg, true) - } - }) - -} - -// NOTE: Pod resize scheduler resource quota tests are out of scope in e2e_node tests, -// because in e2e_node tests -// a) scheduler and controller manager is not running by the Node e2e -// b) api-server in services doesn't start with --enable-admission-plugins=ResourceQuota -// and is not possible to start it from TEST_ARGS -// Above tests are performed by doSheduletTests() and doPodResizeResourceQuotaTests() -// in test/e2e/node/pod_resize.go - -var _ = SIGDescribe("Pod InPlace Resize Container", framework.WithSerial(), func() { - - policiesGeneralAvailability := []cpuManagerPolicyConfig{ - { - name: string(cpumanager.PolicyNone), - title: "", - }, - { - name: string(cpumanager.PolicyStatic), - title: ", alongside CPU Manager Static Policy with no options", - options: map[string]string{ - cpumanager.FullPCPUsOnlyOption: "false", - cpumanager.DistributeCPUsAcrossNUMAOption: "false", - cpumanager.AlignBySocketOption: "false", - cpumanager.DistributeCPUsAcrossCoresOption: "false", - }, - }, - } - - policiesBeta := []cpuManagerPolicyConfig{ - { - name: string(cpumanager.PolicyStatic), - title: ", alongside CPU Manager Static Policy with FullPCPUsOnlyOption", - options: map[string]string{ - cpumanager.FullPCPUsOnlyOption: "true", - cpumanager.DistributeCPUsAcrossNUMAOption: "false", - cpumanager.AlignBySocketOption: "false", - cpumanager.DistributeCPUsAcrossCoresOption: "false", - }, - }, - } - - /*policiesAlpha := []cpuManagerPolicyConfig{ - { - name: string(cpumanager.PolicyStatic), - title: ", alongside CPU Manager Static Policy with DistributeCPUsAcrossNUMAOption", - options: map[string]string{ - cpumanager.FullPCPUsOnlyOption: "false", - cpumanager.DistributeCPUsAcrossNUMAOption: "true", - cpumanager.AlignBySocketOption: "false", - cpumanager.DistributeCPUsAcrossCoresOption: "false", - }, - }, - { - name: string(cpumanager.PolicyStatic), - title: ", alongside CPU Manager Static Policy with FullPCPUsOnlyOption, DistributeCPUsAcrossNUMAOption", - options: map[string]string{ - cpumanager.FullPCPUsOnlyOption: "true", - cpumanager.DistributeCPUsAcrossNUMAOption: "true", - cpumanager.AlignBySocketOption: "false", - cpumanager.DistributeCPUsAcrossCoresOption: "false", - }, - }, - { - name: string(cpumanager.PolicyStatic), - title: ", alongside CPU Manager Static Policy with AlignBySocketOption", - options: map[string]string{ - cpumanager.FullPCPUsOnlyOption: "false", - cpumanager.DistributeCPUsAcrossNUMAOption: "false", - cpumanager.AlignBySocketOption: "true", - cpumanager.DistributeCPUsAcrossCoresOption: "false", - }, - }, - { - name: string(cpumanager.PolicyStatic), - title: ", alongside CPU Manager Static Policy with FullPCPUsOnlyOption, AlignBySocketOption", - options: map[string]string{ - cpumanager.FullPCPUsOnlyOption: "true", - cpumanager.DistributeCPUsAcrossNUMAOption: "false", - cpumanager.AlignBySocketOption: "true", - cpumanager.DistributeCPUsAcrossCoresOption: "false", - }, - }, - { - name: string(cpumanager.PolicyStatic), - title: ", alongside CPU Manager Static Policy with DistributeCPUsAcrossNUMAOption, AlignBySocketOption", - options: map[string]string{ - cpumanager.FullPCPUsOnlyOption: "false", - cpumanager.DistributeCPUsAcrossNUMAOption: "true", - cpumanager.AlignBySocketOption: "true", - cpumanager.DistributeCPUsAcrossCoresOption: "false", - }, - }, - { - name: string(cpumanager.PolicyStatic), - title: ", alongside CPU Manager Static Policy with FullPCPUsOnlyOption, DistributeCPUsAcrossNUMAOption, AlignBySocketOption", - options: map[string]string{ - cpumanager.FullPCPUsOnlyOption: "true", - cpumanager.DistributeCPUsAcrossNUMAOption: "true", - cpumanager.AlignBySocketOption: "true", - cpumanager.DistributeCPUsAcrossCoresOption: "false", - }, - }, - { - name: string(cpumanager.PolicyStatic), - title: ", alongside CPU Manager Static Policy with DistributeCPUsAcrossCoresOption", - options: map[string]string{ - cpumanager.FullPCPUsOnlyOption: "false", - cpumanager.DistributeCPUsAcrossNUMAOption: "false", - cpumanager.AlignBySocketOption: "false", - cpumanager.DistributeCPUsAcrossCoresOption: "true", - }, - }, - { - name: string(cpumanager.PolicyStatic), - title: ", alongside CPU Manager Static Policy with DistributeCPUsAcrossCoresOption, AlignBySocketOption", - options: map[string]string{ - cpumanager.FullPCPUsOnlyOption: "false", - cpumanager.DistributeCPUsAcrossNUMAOption: "false", - cpumanager.AlignBySocketOption: "true", - cpumanager.DistributeCPUsAcrossCoresOption: "true", - }, - }, - }*/ - - for idp := range policiesGeneralAvailability { - doPodResizeTests(policiesGeneralAvailability[idp], false, false) - doPodResizeTests(policiesGeneralAvailability[idp], true, false) - doPodResizeTests(policiesGeneralAvailability[idp], false, true) - doPodResizeTests(policiesGeneralAvailability[idp], true, true) - doPodResizeErrorTests(policiesGeneralAvailability[idp], false, false) - doPodResizeErrorTests(policiesGeneralAvailability[idp], true, false) - doPodResizeErrorTests(policiesGeneralAvailability[idp], false, true) - doPodResizeErrorTests(policiesGeneralAvailability[idp], true, true) - } - - for idp := range policiesBeta { - doPodResizeTests(policiesBeta[idp], false, false) - doPodResizeTests(policiesBeta[idp], true, false) - doPodResizeTests(policiesBeta[idp], false, true) - doPodResizeTests(policiesBeta[idp], true, true) - doPodResizeErrorTests(policiesBeta[idp], false, false) - doPodResizeErrorTests(policiesBeta[idp], true, false) - doPodResizeErrorTests(policiesBeta[idp], false, true) - doPodResizeErrorTests(policiesBeta[idp], true, true) - } - - /*for idp := range policiesAlpha { - doPodResizeTests(policiesAlpha[idp], true, false) - doPodResizeTests(policiesAlpha[idp], true, true) - doPodResizeErrorTests(policiesAlpha[idp], true, false) - doPodResizeErrorTests(policiesAlpha[idp], true, true) - }*/ - -}) - -func doPodResizeExtendTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalScalingAllocatedStatusEnabled bool, isInPlacePodVerticalScalingExclusiveCPUsEnabled bool) { - f := framework.NewDefaultFramework("pod-resize-test") - f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged - var podClient *e2epod.PodClient - var oldCfg *kubeletconfig.KubeletConfiguration - ginkgo.BeforeEach(func(ctx context.Context) { - var err error - node := getLocalNode(ctx, f) - if framework.NodeOSDistroIs("windows") || e2enode.IsARM64(node) { - e2eskipper.Skipf("runtime does not support InPlacePodVerticalScaling -- skipping") - } - if isMultiNUMA() { - e2eskipper.Skipf("For simple test, only test one NUMA, multi NUMA -- skipping") - } - podClient = e2epod.NewPodClient(f) - if oldCfg == nil { - oldCfg, err = getCurrentKubeletConfig(ctx) - framework.ExpectNoError(err) - } - }) - - type testCase struct { - name string - containers []e2epod.ResizableContainerInfo - patchString string - expected []e2epod.ResizableContainerInfo - addExtendedResource bool - skipFlag bool - } - - setCPUsForTestCase := func(ctx context.Context, tests *testCase, fullPCPUsOnly string) { - cpuCap, _, _ := getLocalNodeCPUDetails(ctx, f) - firstContainerCpuset := cpuset.New() - firstAdditionCpuset := cpuset.New() - firstExpectedCpuset := cpuset.New() - secondContainerCpuset := cpuset.New() - secondAdditionCpuset := cpuset.New() - secondExpectedCpuset := cpuset.New() - - if tests.name == "1 Guaranteed QoS pod, one container - increase CPU & memory, FullPCPUsOnlyOption = false" { - if cpuCap < 2 { - tests.skipFlag = true - } - firstContainerCpuset = cpuset.New(1) - if isHTEnabled() { - cpuList := mustParseCPUSet(getCPUSiblingList(0)).List() - firstContainerCpuset = cpuset.New(cpuList[1]) - } - tests.containers[0].CPUsAllowedList = firstContainerCpuset.String() - - firstAdditionCpuset = cpuset.New(2) - if isHTEnabled() { - cpuList := mustParseCPUSet(getCPUSiblingList(1)).List() - firstAdditionCpuset = cpuset.New(cpuList[0]) - } - firstExpectedCpuset = firstAdditionCpuset.Union(firstContainerCpuset) - tests.expected[0].CPUsAllowedList = firstExpectedCpuset.String() - } else if tests.name == "1 Guaranteed QoS pod, two containers - increase CPU & memory, FullPCPUsOnlyOption = false" { - if cpuCap < 4 { - tests.skipFlag = true - } - firstContainerCpuset = cpuset.New(1) - if isHTEnabled() { - cpuList := mustParseCPUSet(getCPUSiblingList(0)).List() - firstContainerCpuset = cpuset.New(cpuList[1]) - } - tests.containers[0].CPUsAllowedList = firstContainerCpuset.String() - - secondContainerCpuset = cpuset.New(1) - if isHTEnabled() { - cpuList := mustParseCPUSet(getCPUSiblingList(1)).List() - secondContainerCpuset = cpuset.New(cpuList[0]) - } - tests.containers[1].CPUsAllowedList = secondContainerCpuset.String() - - firstAdditionCpuset = cpuset.New(2) - if isHTEnabled() { - cpuList := mustParseCPUSet(getCPUSiblingList(1)).List() - firstAdditionCpuset = cpuset.New(cpuList[1]) - } - firstExpectedCpuset = firstAdditionCpuset.Union(firstContainerCpuset) - tests.expected[0].CPUsAllowedList = firstExpectedCpuset.String() - - secondAdditionCpuset = cpuset.New(2) - if isHTEnabled() { - cpuList := mustParseCPUSet(getCPUSiblingList(2)).List() - secondAdditionCpuset = cpuset.New(cpuList[0]) - } - secondExpectedCpuset = secondAdditionCpuset.Union(secondContainerCpuset) - tests.expected[1].CPUsAllowedList = secondExpectedCpuset.String() - } else if (tests.name == "1 Guaranteed QoS pod, one container - decrease CPU & memory, FullPCPUsOnlyOption = false") || (tests.name == "1 Guaranteed QoS pod, one container - decrease CPU & memory with mustKeepCPUs, FullPCPUsOnlyOption = false") { - if cpuCap < 2 { - tests.skipFlag = true - } - firstContainerCpuset = cpuset.New(2, 3) - if isHTEnabled() { - cpuList := mustParseCPUSet(getCPUSiblingList(0)).List() - if cpuList[1] != 1 { - firstContainerCpuset = mustParseCPUSet(getCPUSiblingList(1)) - } - } - tests.containers[0].CPUsAllowedList = firstContainerCpuset.String() - - firstExpectedCpuset = cpuset.New(firstContainerCpuset.List()[0]) - tests.expected[0].CPUsAllowedList = firstExpectedCpuset.String() - if tests.name == "1 Guaranteed QoS pod, one container - decrease CPU & memory with mustKeepCPUs, FullPCPUsOnlyOption = false" { - startIndex := strings.Index(tests.patchString, `"mustKeepCPUs","value": "`) + len(`"mustKeepCPUs","value": "`) - endIndex := strings.Index(tests.patchString[startIndex:], `"`) + startIndex - tests.expected[0].CPUsAllowedList = tests.patchString[startIndex:endIndex] - ginkgo.By(fmt.Sprintf("startIndex:%d, endIndex:%d", startIndex, endIndex)) - } - } else if (tests.name == "1 Guaranteed QoS pod, one container - decrease CPU & memory, FullPCPUsOnlyOption = true") || (tests.name == "1 Guaranteed QoS pod, one container - decrease CPU with wrong mustKeepCPU, FullPCPUsOnlyOption = ture") || (tests.name == "1 Guaranteed QoS pod, one container - decrease CPU & memory with correct mustKeepCPU, FullPCPUsOnlyOption = true") { - if cpuCap < 4 { - tests.skipFlag = true - } - firstContainerCpuset = cpuset.New(2, 3, 4, 5) - if isHTEnabled() { - cpuList := mustParseCPUSet(getCPUSiblingList(0)).List() - if cpuList[1] != 1 { - firstContainerCpuset = mustParseCPUSet(getCPUSiblingList(1)) - firstContainerCpuset = firstContainerCpuset.Union(mustParseCPUSet(getCPUSiblingList(2))) - } - } - tests.containers[0].CPUsAllowedList = firstContainerCpuset.String() - - firstExpectedCpuset = mustParseCPUSet(getCPUSiblingList(1)) - tests.expected[0].CPUsAllowedList = firstExpectedCpuset.String() - if tests.name == "1 Guaranteed QoS pod, one container - decrease CPU & memory with correct mustKeepCPU, FullPCPUsOnlyOption = true" { - startIndex := strings.Index(tests.patchString, `"mustKeepCPUs","value": "`) + len(`"mustKeepCPUs","value": "`) - endIndex := strings.Index(tests.patchString[startIndex:], `"`) + startIndex - tests.expected[0].CPUsAllowedList = tests.patchString[startIndex:endIndex] - ginkgo.By(fmt.Sprintf("startIndex:%d, endIndex:%d", startIndex, endIndex)) - } - } - - ginkgo.By(fmt.Sprintf("firstContainerCpuset:%v, firstAdditionCpuset:%v, firstExpectedCpuset:%v", firstContainerCpuset, firstAdditionCpuset, firstExpectedCpuset)) - ginkgo.By(fmt.Sprintf("secondContainerCpuset:%v, secondAdditionCpuset:%v, secondExpectedCpuset:%v", secondContainerCpuset, secondAdditionCpuset, secondExpectedCpuset)) - } - - noRestart := v1.NotRequired - testsWithFalseFullCPUs := []testCase{ - { - name: "1 Guaranteed QoS pod, one container - increase CPU & memory, FullPCPUsOnlyOption = false", - containers: []e2epod.ResizableContainerInfo{ - { - Name: "c1", - Resources: &e2epod.ContainerResources{CPUReq: "1", CPULim: "1", MemReq: "200Mi", MemLim: "200Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - CPUsAllowedListValue: "1", - }, - }, - patchString: `{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"cpu":"2","memory":"400Mi"},"limits":{"cpu":"2","memory":"400Mi"}}} - ]}}`, - expected: []e2epod.ResizableContainerInfo{ - { - Name: "c1", - Resources: &e2epod.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "400Mi", MemLim: "400Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - CPUsAllowedListValue: "2", - }, - }, - }, - { - name: "1 Guaranteed QoS pod, two containers - increase CPU & memory, FullPCPUsOnlyOption = false", - containers: []e2epod.ResizableContainerInfo{ - { - Name: "c1", - Resources: &e2epod.ContainerResources{CPUReq: "1", CPULim: "1", MemReq: "200Mi", MemLim: "200Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - CPUsAllowedListValue: "1", - }, - { - Name: "c2", - Resources: &e2epod.ContainerResources{CPUReq: "1", CPULim: "1", MemReq: "200Mi", MemLim: "200Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - CPUsAllowedListValue: "1", - }, - }, - patchString: `{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"cpu":"2","memory":"400Mi"},"limits":{"cpu":"2","memory":"400Mi"}}}, - {"name":"c2", "resources":{"requests":{"cpu":"2","memory":"400Mi"},"limits":{"cpu":"2","memory":"400Mi"}}} - ]}}`, - expected: []e2epod.ResizableContainerInfo{ - { - Name: "c1", - Resources: &e2epod.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "400Mi", MemLim: "400Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - CPUsAllowedListValue: "2", - }, - { - Name: "c2", - Resources: &e2epod.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "400Mi", MemLim: "400Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - CPUsAllowedListValue: "2", - }, - }, - }, - { - name: "1 Guaranteed QoS pod, one container - decrease CPU & memory, FullPCPUsOnlyOption = false", - containers: []e2epod.ResizableContainerInfo{ - { - Name: "c1", - Resources: &e2epod.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "400Mi", MemLim: "400Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - CPUsAllowedListValue: "2", - }, - }, - patchString: `{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"cpu":"1","memory":"200Mi"},"limits":{"cpu":"1","memory":"200Mi"}}} - ]}}`, - expected: []e2epod.ResizableContainerInfo{ - { - Name: "c1", - Resources: &e2epod.ContainerResources{CPUReq: "1", CPULim: "1", MemReq: "200Mi", MemLim: "200Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - CPUsAllowedListValue: "1", - }, - }, - }, - { - name: "1 Guaranteed QoS pod, one container - decrease CPU & memory with mustKeepCPUs, FullPCPUsOnlyOption = false", - containers: []e2epod.ResizableContainerInfo{ - { - Name: "c1", - Resources: &e2epod.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "200Mi", MemLim: "200Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - CPUsAllowedListValue: "2", - }, - }, - patchString: `{"spec":{"containers":[ - {"name":"c1", "env":[{"name":"mustKeepCPUs","value": "11"}], "resources":{"requests":{"cpu":"1","memory":"400Mi"},"limits":{"cpu":"1","memory":"400Mi"}}} - ]}}`, - expected: []e2epod.ResizableContainerInfo{ - { - Name: "c1", - Resources: &e2epod.ContainerResources{CPUReq: "1", CPULim: "1", MemReq: "400Mi", MemLim: "400Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - CPUsAllowedListValue: "1", - }, - }, - }, - } - - testsWithTrueFullCPUs := []testCase{ - { - name: "1 Guaranteed QoS pod, one container - decrease CPU & memory, FullPCPUsOnlyOption = true", - containers: []e2epod.ResizableContainerInfo{ - { - Name: "c1", - Resources: &e2epod.ContainerResources{CPUReq: "4", CPULim: "4", MemReq: "400Mi", MemLim: "400Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - CPUsAllowedListValue: "4", - }, - }, - patchString: `{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"cpu":"2","memory":"200Mi"},"limits":{"cpu":"2","memory":"200Mi"}}} - ]}}`, - expected: []e2epod.ResizableContainerInfo{ - { - Name: "c1", - Resources: &e2epod.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "200Mi", MemLim: "200Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - CPUsAllowedListValue: "2", - }, - }, - }, - { - name: "1 Guaranteed QoS pod, one container - decrease CPU & memory with correct mustKeepCPU, FullPCPUsOnlyOption = true", - containers: []e2epod.ResizableContainerInfo{ - { - Name: "c1", - Resources: &e2epod.ContainerResources{CPUReq: "4", CPULim: "4", MemReq: "200Mi", MemLim: "200Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - CPUsAllowedListValue: "4", - }, - }, - patchString: `{"spec":{"containers":[ - {"name":"c1", "env":[{"name":"mustKeepCPUs","value": "2,12"}], "resources":{"requests":{"cpu":"2"},"limits":{"cpu":"2"}}} - ]}}`, - expected: []e2epod.ResizableContainerInfo{ - { - Name: "c1", - Resources: &e2epod.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "200Mi", MemLim: "200Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - CPUsAllowedListValue: "2", - }, - }, - }, - // Abnormal case, CPUs in mustKeepCPUs not full PCPUs, the mustKeepCPUs will be ignored - { - name: "1 Guaranteed QoS pod, one container - decrease CPU with wrong mustKeepCPU, FullPCPUsOnlyOption = ture", - containers: []e2epod.ResizableContainerInfo{ - { - Name: "c1", - Resources: &e2epod.ContainerResources{CPUReq: "4", CPULim: "4", MemReq: "200Mi", MemLim: "200Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - CPUsAllowedListValue: "4", - }, - }, - patchString: `{"spec":{"containers":[ - {"name":"c1", "env":[{"name":"mustKeepCPUs","value": "1,2"}], "resources":{"requests":{"cpu":"2"},"limits":{"cpu":"2"}}} - ]}}`, - expected: []e2epod.ResizableContainerInfo{ - { - Name: "c1", - Resources: &e2epod.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "200Mi", MemLim: "200Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - CPUsAllowedListValue: "2", - }, - }, - }, - } - - timeouts := framework.NewTimeoutContext() - - var tests []testCase - if policy.options[cpumanager.FullPCPUsOnlyOption] == "false" { - tests = testsWithFalseFullCPUs - } else if policy.options[cpumanager.FullPCPUsOnlyOption] == "true" { - tests = testsWithTrueFullCPUs - } - - for idx := range tests { - tc := tests[idx] - ginkgo.It(tc.name+policy.title+" (InPlacePodVerticalScalingAllocatedStatus="+strconv.FormatBool(isInPlacePodVerticalScalingAllocatedStatusEnabled)+", InPlacePodVerticalScalingExclusiveCPUs="+strconv.FormatBool(isInPlacePodVerticalScalingExclusiveCPUsEnabled)+")", func(ctx context.Context) { - cpuManagerPolicyKubeletConfig(ctx, f, oldCfg, policy.name, policy.options, isInPlacePodVerticalScalingAllocatedStatusEnabled, isInPlacePodVerticalScalingExclusiveCPUsEnabled) - - setCPUsForTestCase(ctx, &tc, policy.options[cpumanager.FullPCPUsOnlyOption]) - if tc.skipFlag { - e2eskipper.Skipf("Skipping CPU Manager tests since the CPU not enough") - } - - var testPod, patchedPod *v1.Pod - var pErr error - - tStamp := strconv.Itoa(time.Now().Nanosecond()) - testPod = e2epod.MakePodWithResizableContainers(f.Namespace.Name, "testpod", tStamp, tc.containers) - testPod.GenerateName = "resize-test-" - testPod = e2epod.MustMixinRestrictedPodSecurity(testPod) - - if tc.addExtendedResource { - nodes, err := e2enode.GetReadySchedulableNodes(context.Background(), f.ClientSet) - framework.ExpectNoError(err) - - for _, node := range nodes.Items { - addExtendedResource(f.ClientSet, node.Name, fakeExtendedResource, resource.MustParse("123")) - } - defer func() { - for _, node := range nodes.Items { - removeExtendedResource(f.ClientSet, node.Name, fakeExtendedResource) - } - }() - } - - ginkgo.By("creating pod") - newPod := podClient.CreateSync(ctx, testPod) - - ginkgo.By("verifying initial pod resources, allocations are as expected") - e2epod.VerifyPodResources(newPod, tc.containers) - ginkgo.By("verifying initial pod resize policy is as expected") - e2epod.VerifyPodResizePolicy(newPod, tc.containers) - - ginkgo.By("verifying initial pod status resources are as expected") - framework.ExpectNoError(e2epod.VerifyPodStatusResources(newPod, tc.containers)) - ginkgo.By("verifying initial cgroup config are as expected") - framework.ExpectNoError(e2epod.VerifyPodContainersCgroupValues(ctx, f, newPod, tc.containers)) - // TODO make this dynamic depending on Policy Name, Resources input and topology of target - // machine. - // For the moment skip below if CPU Manager Policy is set to none - if policy.name == string(cpumanager.PolicyStatic) { - ginkgo.By("verifying initial pod Cpus allowed list value") - gomega.Eventually(ctx, e2epod.VerifyPodContainersCPUsAllowedListValue, timeouts.PodStartShort, timeouts.Poll). - WithArguments(f, newPod, tc.containers). - Should(gomega.Succeed(), "failed to verify initial Pod CPUsAllowedListValue") - } - - patchAndVerify := func(patchString string, expectedContainers []e2epod.ResizableContainerInfo, initialContainers []e2epod.ResizableContainerInfo, opStr string) { - ginkgo.By(fmt.Sprintf("patching pod for %s", opStr)) - patchedPod, pErr = f.ClientSet.CoreV1().Pods(newPod.Namespace).Patch(ctx, newPod.Name, - types.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") - framework.ExpectNoError(pErr, fmt.Sprintf("failed to patch pod for %s", opStr)) - - ginkgo.By(fmt.Sprintf("verifying pod patched for %s", opStr)) - e2epod.VerifyPodResources(patchedPod, expectedContainers) - - ginkgo.By(fmt.Sprintf("waiting for %s to be actuated", opStr)) - resizedPod := e2epod.WaitForPodResizeActuation(ctx, f, podClient, newPod, expectedContainers) - e2epod.ExpectPodResized(ctx, f, resizedPod, expectedContainers) - - // Check cgroup values only for containerd versions before 1.6.9 - ginkgo.By(fmt.Sprintf("verifying pod container's cgroup values after %s", opStr)) - framework.ExpectNoError(e2epod.VerifyPodContainersCgroupValues(ctx, f, resizedPod, expectedContainers)) - - ginkgo.By(fmt.Sprintf("verifying pod resources after %s", opStr)) - e2epod.VerifyPodResources(resizedPod, expectedContainers) - - // TODO make this dynamic depending on Policy Name, Resources input and topology of target - // machine. - // For the moment skip below if CPU Manager Policy is set to none - if policy.name == string(cpumanager.PolicyStatic) { - ginkgo.By(fmt.Sprintf("verifying pod Cpus allowed list value after %s", opStr)) - if isInPlacePodVerticalScalingExclusiveCPUsEnabled { - gomega.Eventually(ctx, e2epod.VerifyPodContainersCPUsAllowedListValue, timeouts.PodStartShort, timeouts.Poll). - WithArguments(f, resizedPod, expectedContainers). - Should(gomega.Succeed(), "failed to verify Pod CPUsAllowedListValue for resizedPod with InPlacePodVerticalScalingExclusiveCPUs enabled") - } else { - gomega.Eventually(ctx, e2epod.VerifyPodContainersCPUsAllowedListValue, timeouts.PodStartShort, timeouts.Poll). - WithArguments(f, resizedPod, tc.containers). - Should(gomega.Succeed(), "failed to verify Pod CPUsAllowedListValue for resizedPod with InPlacePodVerticalScalingExclusiveCPUs disabled (default)") - } - } - } - - ginkgo.By("First patch") - patchAndVerify(tc.patchString, tc.expected, tc.containers, "resize") - - rbPatchStr, err := e2epod.ResizeContainerPatch(tc.containers) - framework.ExpectNoError(err) - // Resize has been actuated, test rollback - ginkgo.By("Second patch for rollback") - patchAndVerify(rbPatchStr, tc.containers, tc.expected, "rollback") - - ginkgo.By("deleting pod") - deletePodSyncByName(ctx, f, newPod.Name) - // we need to wait for all containers to really be gone so cpumanager reconcile loop will not rewrite the cpu_manager_state. - // this is in turn needed because we will have an unavoidable (in the current framework) race with the - // reconcile loop which will make our attempt to delete the state file and to restore the old config go haywire - waitForAllContainerRemoval(ctx, newPod.Name, newPod.Namespace) - }) - } - - ginkgo.AfterEach(func(ctx context.Context) { - if oldCfg != nil { - updateKubeletConfig(ctx, f, oldCfg, true) - } - }) - -} - -func doMultiPodResizeTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalScalingAllocatedStatusEnabled bool, isInPlacePodVerticalScalingExclusiveCPUsEnabled bool) { - f := framework.NewDefaultFramework("pod-resize-test") - f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged - var podClient *e2epod.PodClient - var oldCfg *kubeletconfig.KubeletConfiguration - ginkgo.BeforeEach(func(ctx context.Context) { - var err error - node := getLocalNode(ctx, f) - if framework.NodeOSDistroIs("windows") || e2enode.IsARM64(node) { - e2eskipper.Skipf("runtime does not support InPlacePodVerticalScaling -- skipping") - } - podClient = e2epod.NewPodClient(f) - if oldCfg == nil { - oldCfg, err = getCurrentKubeletConfig(ctx) - framework.ExpectNoError(err) - } - }) - - type testPod struct { - containers []e2epod.ResizableContainerInfo - patchString string - expected []e2epod.ResizableContainerInfo - } - - type testCase struct { - name string - testPod1 testPod - testPod2 testPod - skipFlag bool - } - - setCPUsForTestCase := func(ctx context.Context, tests *testCase, fullPCPUsOnly string) { - cpuCap, _, _ := getLocalNodeCPUDetails(ctx, f) - firstContainerCpuset := cpuset.New() - firstAdditionCpuset := cpuset.New() - firstExpectedCpuset := cpuset.New() - secondContainerCpuset := cpuset.New() - secondAdditionCpuset := cpuset.New() - secondExpectedCpuset := cpuset.New() - - if tests.name == "1 Guaranteed QoS pod, two containers - increase CPU & memory, FullPCPUsOnlyOption = false" { - if cpuCap < 4 { - tests.skipFlag = true - } - firstContainerCpuset = cpuset.New(1) - if isHTEnabled() { - cpuList := mustParseCPUSet(getCPUSiblingList(0)).List() - firstContainerCpuset = cpuset.New(cpuList[1]) - } - tests.testPod1.containers[0].CPUsAllowedList = firstContainerCpuset.String() - - secondContainerCpuset = cpuset.New(1) - if isHTEnabled() { - cpuList := mustParseCPUSet(getCPUSiblingList(1)).List() - secondContainerCpuset = cpuset.New(cpuList[0]) - } - tests.testPod2.containers[1].CPUsAllowedList = secondContainerCpuset.String() - - firstAdditionCpuset = cpuset.New(2) - if isHTEnabled() { - cpuList := mustParseCPUSet(getCPUSiblingList(1)).List() - firstAdditionCpuset = cpuset.New(cpuList[1]) - } - firstExpectedCpuset = firstAdditionCpuset.Union(firstContainerCpuset) - tests.testPod1.expected[0].CPUsAllowedList = firstExpectedCpuset.String() - - secondAdditionCpuset = cpuset.New(2) - if isHTEnabled() { - cpuList := mustParseCPUSet(getCPUSiblingList(2)).List() - secondAdditionCpuset = cpuset.New(cpuList[0]) - } - secondExpectedCpuset = secondAdditionCpuset.Union(secondContainerCpuset) - tests.testPod2.expected[1].CPUsAllowedList = secondExpectedCpuset.String() - } - ginkgo.By(fmt.Sprintf("firstContainerCpuset:%v, firstAdditionCpuset:%v, firstExpectedCpuset:%v", firstContainerCpuset, firstAdditionCpuset, firstExpectedCpuset)) - ginkgo.By(fmt.Sprintf("secondContainerCpuset:%v, secondAdditionCpuset:%v, secondExpectedCpuset:%v", secondContainerCpuset, secondAdditionCpuset, secondExpectedCpuset)) - } - - noRestart := v1.NotRequired - tests := []testCase{ - { - name: "2 Guaranteed QoS pod, one container - increase CPU & memory, FullPCPUsOnlyOption = false", - testPod1: testPod{ - containers: []e2epod.ResizableContainerInfo{ - { - Name: "c1", - Resources: &e2epod.ContainerResources{CPUReq: "1", CPULim: "1", MemReq: "200Mi", MemLim: "200Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - CPUsAllowedListValue: "1", - }, - }, - patchString: `{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"cpu":"2","memory":"400Mi"},"limits":{"cpu":"2","memory":"400Mi"}}} - ]}}`, - expected: []e2epod.ResizableContainerInfo{ - { - Name: "c1", - Resources: &e2epod.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "400Mi", MemLim: "400Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - CPUsAllowedListValue: "2", - }, - }, - }, - testPod2: testPod{ - containers: []e2epod.ResizableContainerInfo{ - { - Name: "c2", - Resources: &e2epod.ContainerResources{CPUReq: "1", CPULim: "1", MemReq: "200Mi", MemLim: "200Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - CPUsAllowedListValue: "1", - }, - }, - patchString: `{"spec":{"containers":[ - {"name":"c2", "resources":{"requests":{"cpu":"2","memory":"400Mi"},"limits":{"cpu":"2","memory":"400Mi"}}} - ]}}`, - expected: []e2epod.ResizableContainerInfo{ - { - Name: "c2", - Resources: &e2epod.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "400Mi", MemLim: "400Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - CPUsAllowedListValue: "2", - }, - }, - }, - }, - } - - timeouts := framework.NewTimeoutContext() - - for idx := range tests { - tc := tests[idx] - ginkgo.It(tc.name+policy.title+" (InPlacePodVerticalScalingAllocatedStatus="+strconv.FormatBool(isInPlacePodVerticalScalingAllocatedStatusEnabled)+", InPlacePodVerticalScalingExclusiveCPUs="+strconv.FormatBool(isInPlacePodVerticalScalingExclusiveCPUsEnabled)+")", func(ctx context.Context) { - cpuManagerPolicyKubeletConfig(ctx, f, oldCfg, policy.name, policy.options, isInPlacePodVerticalScalingAllocatedStatusEnabled, isInPlacePodVerticalScalingExclusiveCPUsEnabled) - - setCPUsForTestCase(ctx, &tc, policy.options[cpumanager.FullPCPUsOnlyOption]) - if tc.skipFlag { - e2eskipper.Skipf("Skipping CPU Manager tests since the CPU not enough") - } - - var patchedPod *v1.Pod - var pErr error - - createAndVerify := func(podName string, podClient *e2epod.PodClient, testContainers []e2epod.ResizableContainerInfo) (newPod *v1.Pod) { - var testPod *v1.Pod - - tStamp := strconv.Itoa(time.Now().Nanosecond()) - testPod = e2epod.MakePodWithResizableContainers(f.Namespace.Name, fmt.Sprintf("resizepod-%s", podName), tStamp, testContainers) - testPod.GenerateName = "resize-test-" - testPod = e2epod.MustMixinRestrictedPodSecurity(testPod) - - ginkgo.By("creating pod") - newPod = podClient.CreateSync(ctx, testPod) - - ginkgo.By("verifying initial pod resources, allocations are as expected") - e2epod.VerifyPodResources(newPod, testContainers) - ginkgo.By("verifying initial pod resize policy is as expected") - e2epod.VerifyPodResizePolicy(newPod, testContainers) - - ginkgo.By("verifying initial pod status resources are as expected") - framework.ExpectNoError(e2epod.VerifyPodStatusResources(newPod, testContainers)) - ginkgo.By("verifying initial cgroup config are as expected") - framework.ExpectNoError(e2epod.VerifyPodContainersCgroupValues(ctx, f, newPod, testContainers)) - // TODO make this dynamic depending on Policy Name, Resources input and topology of target - // machine. - // For the moment skip below if CPU Manager Policy is set to none - if policy.name == string(cpumanager.PolicyStatic) { - ginkgo.By("verifying initial pod Cpus allowed list value") - gomega.Eventually(ctx, e2epod.VerifyPodContainersCPUsAllowedListValue, timeouts.PodStartShort, timeouts.Poll). - WithArguments(f, newPod, testContainers). - Should(gomega.Succeed(), "failed to verify initial Pod CPUsAllowedListValue") - } - return newPod - } - - newPod1 := createAndVerify("testpod1", podClient, tc.testPod1.containers) - newPod2 := createAndVerify("testpod2", podClient, tc.testPod2.containers) - - patchAndVerify := func(patchString string, expectedContainers []e2epod.ResizableContainerInfo, initialContainers []e2epod.ResizableContainerInfo, opStr string, newPod *v1.Pod) { - ginkgo.By(fmt.Sprintf("patching pod for %s", opStr)) - patchedPod, pErr = f.ClientSet.CoreV1().Pods(newPod.Namespace).Patch(ctx, newPod.Name, - types.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") - framework.ExpectNoError(pErr, fmt.Sprintf("failed to patch pod for %s", opStr)) - - ginkgo.By(fmt.Sprintf("verifying pod patched for %s", opStr)) - e2epod.VerifyPodResources(patchedPod, expectedContainers) - - ginkgo.By(fmt.Sprintf("waiting for %s to be actuated", opStr)) - resizedPod := e2epod.WaitForPodResizeActuation(ctx, f, podClient, newPod, expectedContainers) - e2epod.ExpectPodResized(ctx, f, resizedPod, expectedContainers) - - // Check cgroup values only for containerd versions before 1.6.9 - ginkgo.By(fmt.Sprintf("verifying pod container's cgroup values after %s", opStr)) - framework.ExpectNoError(e2epod.VerifyPodContainersCgroupValues(ctx, f, resizedPod, expectedContainers)) - - ginkgo.By(fmt.Sprintf("verifying pod resources after %s", opStr)) - e2epod.VerifyPodResources(resizedPod, expectedContainers) - - // TODO make this dynamic depending on Policy Name, Resources input and topology of target - // machine. - // For the moment skip below if CPU Manager Policy is set to none - if policy.name == string(cpumanager.PolicyStatic) { - ginkgo.By(fmt.Sprintf("verifying pod Cpus allowed list value after %s", opStr)) - if isInPlacePodVerticalScalingExclusiveCPUsEnabled { - gomega.Eventually(ctx, e2epod.VerifyPodContainersCPUsAllowedListValue, timeouts.PodStartShort, timeouts.Poll). - WithArguments(f, resizedPod, expectedContainers). - Should(gomega.Succeed(), "failed to verify Pod CPUsAllowedListValue for resizedPod with InPlacePodVerticalScalingExclusiveCPUs enabled") - } else { - gomega.Eventually(ctx, e2epod.VerifyPodContainersCPUsAllowedListValue, timeouts.PodStartShort, timeouts.Poll). - WithArguments(f, resizedPod, initialContainers). - Should(gomega.Succeed(), "failed to verify Pod CPUsAllowedListValue for resizedPod with InPlacePodVerticalScalingExclusiveCPUs disabled (default)") - } - } - } - - patchAndVerify(tc.testPod1.patchString, tc.testPod1.expected, tc.testPod1.containers, "resize", newPod1) - patchAndVerify(tc.testPod2.patchString, tc.testPod2.expected, tc.testPod2.containers, "resize", newPod2) - - rbPatchStr1, err1 := e2epod.ResizeContainerPatch(tc.testPod1.containers) - framework.ExpectNoError(err1) - rbPatchStr2, err2 := e2epod.ResizeContainerPatch(tc.testPod2.containers) - framework.ExpectNoError(err2) - // Resize has been actuated, test rollback - patchAndVerify(rbPatchStr1, tc.testPod1.containers, tc.testPod1.expected, "rollback", newPod1) - patchAndVerify(rbPatchStr2, tc.testPod2.containers, tc.testPod2.expected, "rollback", newPod2) - - ginkgo.By("deleting pod") - deletePodSyncByName(ctx, f, newPod1.Name) - deletePodSyncByName(ctx, f, newPod2.Name) - // we need to wait for all containers to really be gone so cpumanager reconcile loop will not rewrite the cpu_manager_state. - // this is in turn needed because we will have an unavoidable (in the current framework) race with the - // reconcile loop which will make our attempt to delete the state file and to restore the old config go haywire - waitForAllContainerRemoval(ctx, newPod1.Name, newPod1.Namespace) - waitForAllContainerRemoval(ctx, newPod2.Name, newPod2.Namespace) - }) - } - - ginkgo.AfterEach(func(ctx context.Context) { - if oldCfg != nil { - updateKubeletConfig(ctx, f, oldCfg, true) - } - }) -} - -var _ = SIGDescribe("Pod InPlace Resize Container Extended Cases", framework.WithSerial(), func() { - - policiesGeneralAvailability := []cpuManagerPolicyConfig{ - { - name: string(cpumanager.PolicyStatic), - title: ", alongside CPU Manager Static Policy with no options", - options: map[string]string{ - cpumanager.FullPCPUsOnlyOption: "false", - cpumanager.DistributeCPUsAcrossNUMAOption: "false", - cpumanager.AlignBySocketOption: "false", - cpumanager.DistributeCPUsAcrossCoresOption: "false", - }, - }, - { - name: string(cpumanager.PolicyStatic), - title: ", alongside CPU Manager Static Policy with FullPCPUsOnlyOption", - options: map[string]string{ - cpumanager.FullPCPUsOnlyOption: "true", - cpumanager.DistributeCPUsAcrossNUMAOption: "false", - cpumanager.AlignBySocketOption: "false", - cpumanager.DistributeCPUsAcrossCoresOption: "false", - }, - }, - } - - doPodResizeExtendTests(policiesGeneralAvailability[0], true, true) - doPodResizeExtendTests(policiesGeneralAvailability[1], true, true) - doMultiPodResizeTests(policiesGeneralAvailability[0], true, true) -}) diff --git a/test/e2e_node/util.go b/test/e2e_node/util.go index 27f961e6e425b..8b51be319cd56 100644 --- a/test/e2e_node/util.go +++ b/test/e2e_node/util.go @@ -184,7 +184,7 @@ func waitForKubeletToStart(ctx context.Context, f *framework.Framework) { // wait until the kubelet health check will succeed gomega.Eventually(ctx, func() bool { return kubeletHealthCheck(kubeletHealthCheckURL) - }, 5*time.Minute, 2*time.Second).Should(gomega.BeTrueBecause("expected kubelet to be in healthy state")) + }, 2*time.Minute, 5*time.Second).Should(gomega.BeTrueBecause("expected kubelet to be in healthy state")) // Wait for the Kubelet to be ready. gomega.Eventually(ctx, func(ctx context.Context) error { @@ -506,7 +506,7 @@ func waitForAllContainerRemoval(ctx context.Context, podName, podNS string) { return fmt.Errorf("expected all containers to be removed from CRI but %v containers still remain. Containers: %+v", len(containers), containers) } return nil - }, 5*time.Minute, 2*time.Second).Should(gomega.Succeed()) + }, 2*time.Minute, 1*time.Second).Should(gomega.Succeed()) } func getPidsForProcess(name, pidFile string) ([]int, error) { diff --git a/test/e2e_node/util_machineinfo_unsupported.go b/test/e2e_node/util_machineinfo_unsupported.go index 7863d27846392..1dd7ca1e9cd80 100644 --- a/test/e2e_node/util_machineinfo_unsupported.go +++ b/test/e2e_node/util_machineinfo_unsupported.go @@ -53,3 +53,7 @@ func getCoreSiblingList(cpuRes int64) string { func getNumaNodeCPUs() (map[int]cpuset.CPUSet, error) { return nil, errors.New("not implemented") } + +func getSMTLevel() int { + return 1 +} From 37cd3f372b873fc7a60d9207f64ea555b9ef47ac Mon Sep 17 00:00:00 2001 From: Sotiris Salloumis Date: Tue, 3 Mar 2026 18:06:11 +0100 Subject: [PATCH 09/15] Refactor using lifecycle.Operation To implement the design approved by KEP, update admit handler for topology/cpu manager to perform the appropriate feasibility checks on lifecycle.ResizeOperation. --- pkg/kubelet/cm/cpumanager/cpu_manager.go | 19 +- pkg/kubelet/cm/cpumanager/cpu_manager_test.go | 15 +- pkg/kubelet/cm/cpumanager/fake_cpu_manager.go | 7 +- pkg/kubelet/cm/cpumanager/policy.go | 7 +- pkg/kubelet/cm/cpumanager/policy_none.go | 7 +- pkg/kubelet/cm/cpumanager/policy_none_test.go | 3 +- pkg/kubelet/cm/cpumanager/policy_static.go | 350 ++++++++++++------ .../cm/cpumanager/policy_static_test.go | 19 +- .../cm/cpumanager/topology_hints_test.go | 7 +- pkg/kubelet/cm/devicemanager/manager.go | 4 +- pkg/kubelet/cm/devicemanager/manager_test.go | 24 +- .../cm/devicemanager/topology_hints.go | 5 +- .../cm/devicemanager/topology_hints_test.go | 5 +- pkg/kubelet/cm/devicemanager/types.go | 6 +- .../cm/memorymanager/fake_memory_manager.go | 7 +- .../cm/memorymanager/memory_manager.go | 19 +- .../cm/memorymanager/memory_manager_test.go | 15 +- pkg/kubelet/cm/memorymanager/policy.go | 7 +- .../cm/memorymanager/policy_best_effort.go | 13 +- pkg/kubelet/cm/memorymanager/policy_none.go | 7 +- pkg/kubelet/cm/memorymanager/policy_static.go | 7 +- .../cm/memorymanager/policy_static_test.go | 15 +- pkg/kubelet/cm/topologymanager/policy_test.go | 3 +- pkg/kubelet/cm/topologymanager/scope.go | 10 +- .../cm/topologymanager/scope_container.go | 22 +- .../topologymanager/scope_container_test.go | 5 +- pkg/kubelet/cm/topologymanager/scope_none.go | 4 +- pkg/kubelet/cm/topologymanager/scope_pod.go | 18 +- .../cm/topologymanager/scope_pod_test.go | 5 +- .../cm/topologymanager/topology_manager.go | 8 +- .../topologymanager/topology_manager_test.go | 6 +- 31 files changed, 392 insertions(+), 257 deletions(-) diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager.go b/pkg/kubelet/cm/cpumanager/cpu_manager.go index bb87a7f067555..f3536f1a2a393 100644 --- a/pkg/kubelet/cm/cpumanager/cpu_manager.go +++ b/pkg/kubelet/cm/cpumanager/cpu_manager.go @@ -36,6 +36,7 @@ import ( "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" "k8s.io/kubernetes/pkg/kubelet/config" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" + "k8s.io/kubernetes/pkg/kubelet/lifecycle" "k8s.io/kubernetes/pkg/kubelet/status" "k8s.io/utils/cpuset" ) @@ -62,7 +63,7 @@ type Manager interface { // Called to trigger the allocation of CPUs to a container. This must be // called at some point prior to the AddContainer() call for a container, // e.g. at pod admission time. - Allocate(pod *v1.Pod, container *v1.Container) error + Allocate(pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) error // AddContainer adds the mapping between container ID to pod UID and the container name // The mapping used to remove the CPU allocation during the container removal @@ -79,7 +80,7 @@ type Manager interface { // GetTopologyHints implements the topologymanager.HintProvider Interface // and is consulted to achieve NUMA aware resource alignment among this // and other resource controllers. - GetTopologyHints(pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint + GetTopologyHints(pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint // GetExclusiveCPUs implements the podresources.CPUsProvider interface to provide // exclusively allocated cpus for the container @@ -88,7 +89,7 @@ type Manager interface { // GetPodTopologyHints implements the topologymanager.HintProvider Interface // and is consulted to achieve NUMA aware resource alignment per Pod // among this and other resource controllers. - GetPodTopologyHints(pod *v1.Pod) map[string][]topologymanager.TopologyHint + GetPodTopologyHints(pod *v1.Pod, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint // GetAllocatableCPUs returns the total set of CPUs available for allocation. GetAllocatableCPUs() cpuset.CPUSet @@ -256,7 +257,7 @@ func (m *manager) Start(ctx context.Context, activePods ActivePodsFunc, sourcesR return nil } -func (m *manager) Allocate(p *v1.Pod, c *v1.Container) error { +func (m *manager) Allocate(p *v1.Pod, c *v1.Container, operation lifecycle.Operation) error { logger := klog.TODO() // until we move topology manager to contextual logging // Garbage collect any stranded resources before allocating CPUs. @@ -266,7 +267,7 @@ func (m *manager) Allocate(p *v1.Pod, c *v1.Container) error { defer m.Unlock() // Call down into the policy to assign this container CPUs if required. - err := m.policy.Allocate(logger, m.state, p, c) + err := m.policy.Allocate(logger, m.state, p, c, operation) if err != nil { logger.Error(err, "policy error") return err @@ -327,20 +328,20 @@ func (m *manager) State() state.Reader { return m.state } -func (m *manager) GetTopologyHints(pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint { +func (m *manager) GetTopologyHints(pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint { logger := klog.TODO() // Garbage collect any stranded resources before providing TopologyHints m.removeStaleState(logger) // Delegate to active policy - return m.policy.GetTopologyHints(logger, m.state, pod, container) + return m.policy.GetTopologyHints(logger, m.state, pod, container, operation) } -func (m *manager) GetPodTopologyHints(pod *v1.Pod) map[string][]topologymanager.TopologyHint { +func (m *manager) GetPodTopologyHints(pod *v1.Pod, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint { logger := klog.TODO() // Garbage collect any stranded resources before providing TopologyHints m.removeStaleState(logger) // Delegate to active policy - return m.policy.GetPodTopologyHints(logger, m.state, pod) + return m.policy.GetPodTopologyHints(logger, m.state, pod, operation) } func (m *manager) GetAllocatableCPUs() cpuset.CPUSet { diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go index 67e2cc0d36bbd..df46ff1697de7 100644 --- a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go +++ b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go @@ -45,6 +45,7 @@ import ( "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state" "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" + "k8s.io/kubernetes/pkg/kubelet/lifecycle" "k8s.io/utils/cpuset" ) @@ -126,7 +127,7 @@ func (p *mockPolicy) Start(_ logr.Logger, s state.State) error { return p.err } -func (p *mockPolicy) Allocate(_ logr.Logger, s state.State, pod *v1.Pod, container *v1.Container) error { +func (p *mockPolicy) Allocate(_ logr.Logger, s state.State, pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) error { return p.err } @@ -134,11 +135,11 @@ func (p *mockPolicy) RemoveContainer(_ logr.Logger, s state.State, podUID string return p.err } -func (p *mockPolicy) GetTopologyHints(_ logr.Logger, s state.State, pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint { +func (p *mockPolicy) GetTopologyHints(_ logr.Logger, s state.State, pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint { return nil } -func (p *mockPolicy) GetPodTopologyHints(_ logr.Logger, s state.State, pod *v1.Pod) map[string][]topologymanager.TopologyHint { +func (p *mockPolicy) GetPodTopologyHints(_ logr.Logger, s state.State, pod *v1.Pod, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint { return nil } @@ -373,7 +374,7 @@ func TestCPUManagerAdd(t *testing.T) { container := &pod.Spec.Containers[0] mgr.activePods = func() []*v1.Pod { return []*v1.Pod{pod} } - err := mgr.Allocate(pod, container) + err := mgr.Allocate(pod, container, lifecycle.AddOperation) if !reflect.DeepEqual(err, testCase.expAllocateErr) { t.Errorf("CPU Manager Allocate() error (%v). expected error: %v but got: %v", testCase.description, testCase.expAllocateErr, err) @@ -614,7 +615,7 @@ func TestCPUManagerAddWithInitContainers(t *testing.T) { cumCSet := cpuset.New() for i := range containers { - err := mgr.Allocate(testCase.pod, &containers[i]) + err := mgr.Allocate(testCase.pod, &containers[i], lifecycle.AddOperation) if err != nil { t.Errorf("StaticPolicy Allocate() error (%v). unexpected error for container id: %v: %v", testCase.description, containerIDs[i], err) @@ -1402,7 +1403,7 @@ func TestCPUManagerAddWithResvList(t *testing.T) { container := &pod.Spec.Containers[0] mgr.activePods = func() []*v1.Pod { return []*v1.Pod{pod} } - err := mgr.Allocate(pod, container) + err := mgr.Allocate(pod, container, lifecycle.AddOperation) if !reflect.DeepEqual(err, testCase.expAllocateErr) { t.Errorf("CPU Manager Allocate() error (%v). expected error: %v but got: %v", testCase.description, testCase.expAllocateErr, err) @@ -1552,7 +1553,7 @@ func TestCPUManagerGetAllocatableCPUs(t *testing.T) { pod := makePod("fakePod", "fakeContainer", "2", "2") container := &pod.Spec.Containers[0] - _ = mgr.Allocate(pod, container) + _ = mgr.Allocate(pod, container, lifecycle.AddOperation) if !mgr.GetAllocatableCPUs().Equals(testCase.expAllocatableCPUs) { t.Errorf("Policy GetAllocatableCPUs() error (%v). expected cpuset %v for container %v but got %v", diff --git a/pkg/kubelet/cm/cpumanager/fake_cpu_manager.go b/pkg/kubelet/cm/cpumanager/fake_cpu_manager.go index 791f2e5bec0d1..40608edd40c64 100644 --- a/pkg/kubelet/cm/cpumanager/fake_cpu_manager.go +++ b/pkg/kubelet/cm/cpumanager/fake_cpu_manager.go @@ -26,6 +26,7 @@ import ( "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" "k8s.io/kubernetes/pkg/kubelet/config" + "k8s.io/kubernetes/pkg/kubelet/lifecycle" "k8s.io/kubernetes/pkg/kubelet/status" "k8s.io/utils/cpuset" ) @@ -47,7 +48,7 @@ func (m *fakeManager) Policy() Policy { return pol } -func (m *fakeManager) Allocate(pod *v1.Pod, container *v1.Container) error { +func (m *fakeManager) Allocate(pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) error { logger := klog.TODO() logger.Info("Allocate", "pod", klog.KObj(pod), "containerName", container.Name) return nil @@ -62,13 +63,13 @@ func (m *fakeManager) RemoveContainer(logger logr.Logger, containerID string) er return nil } -func (m *fakeManager) GetTopologyHints(pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint { +func (m *fakeManager) GetTopologyHints(pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint { logger := klog.TODO() logger.Info("Get container topology hints") return map[string][]topologymanager.TopologyHint{} } -func (m *fakeManager) GetPodTopologyHints(pod *v1.Pod) map[string][]topologymanager.TopologyHint { +func (m *fakeManager) GetPodTopologyHints(pod *v1.Pod, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint { logger := klog.TODO() logger.Info("Get pod topology hints") return map[string][]topologymanager.TopologyHint{} diff --git a/pkg/kubelet/cm/cpumanager/policy.go b/pkg/kubelet/cm/cpumanager/policy.go index 628b0254af16a..d5c3b9d18bbc6 100644 --- a/pkg/kubelet/cm/cpumanager/policy.go +++ b/pkg/kubelet/cm/cpumanager/policy.go @@ -22,6 +22,7 @@ import ( v1 "k8s.io/api/core/v1" "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" + "k8s.io/kubernetes/pkg/kubelet/lifecycle" "k8s.io/utils/cpuset" ) @@ -30,17 +31,17 @@ type Policy interface { Name() string Start(logger logr.Logger, s state.State) error // Allocate call is idempotent - Allocate(logger logr.Logger, s state.State, pod *v1.Pod, container *v1.Container) error + Allocate(logger logr.Logger, s state.State, pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) error // RemoveContainer call is idempotent RemoveContainer(logger logr.Logger, s state.State, podUID string, containerName string) error // GetTopologyHints implements the topologymanager.HintProvider Interface // and is consulted to achieve NUMA aware resource alignment among this // and other resource controllers. - GetTopologyHints(logger logr.Logger, s state.State, pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint + GetTopologyHints(logger logr.Logger, s state.State, pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint // GetPodTopologyHints implements the topologymanager.HintProvider Interface // and is consulted to achieve NUMA aware resource alignment per Pod // among this and other resource controllers. - GetPodTopologyHints(logger logr.Logger, s state.State, pod *v1.Pod) map[string][]topologymanager.TopologyHint + GetPodTopologyHints(logger logr.Logger, s state.State, pod *v1.Pod, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint // GetAllocatableCPUs returns the total set of CPUs available for allocation. GetAllocatableCPUs(m state.State) cpuset.CPUSet } diff --git a/pkg/kubelet/cm/cpumanager/policy_none.go b/pkg/kubelet/cm/cpumanager/policy_none.go index e29d0598a36f0..19c0d86fb9f3b 100644 --- a/pkg/kubelet/cm/cpumanager/policy_none.go +++ b/pkg/kubelet/cm/cpumanager/policy_none.go @@ -23,6 +23,7 @@ import ( v1 "k8s.io/api/core/v1" "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" + "k8s.io/kubernetes/pkg/kubelet/lifecycle" "k8s.io/utils/cpuset" ) @@ -51,7 +52,7 @@ func (p *nonePolicy) Start(logger logr.Logger, s state.State) error { return nil } -func (p *nonePolicy) Allocate(_ logr.Logger, s state.State, pod *v1.Pod, container *v1.Container) error { +func (p *nonePolicy) Allocate(_ logr.Logger, s state.State, pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) error { return nil } @@ -59,11 +60,11 @@ func (p *nonePolicy) RemoveContainer(_ logr.Logger, s state.State, podUID string return nil } -func (p *nonePolicy) GetTopologyHints(_ logr.Logger, s state.State, pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint { +func (p *nonePolicy) GetTopologyHints(_ logr.Logger, s state.State, pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint { return nil } -func (p *nonePolicy) GetPodTopologyHints(_ logr.Logger, s state.State, pod *v1.Pod) map[string][]topologymanager.TopologyHint { +func (p *nonePolicy) GetPodTopologyHints(_ logr.Logger, s state.State, pod *v1.Pod, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint { return nil } diff --git a/pkg/kubelet/cm/cpumanager/policy_none_test.go b/pkg/kubelet/cm/cpumanager/policy_none_test.go index bba7908fe5dce..749045df68a60 100644 --- a/pkg/kubelet/cm/cpumanager/policy_none_test.go +++ b/pkg/kubelet/cm/cpumanager/policy_none_test.go @@ -22,6 +22,7 @@ import ( "k8s.io/kubernetes/test/utils/ktesting" "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state" + "k8s.io/kubernetes/pkg/kubelet/lifecycle" "k8s.io/utils/cpuset" ) @@ -46,7 +47,7 @@ func TestNonePolicyAllocate(t *testing.T) { testPod := makePod("fakePod", "fakeContainer", "1000m", "1000m") container := &testPod.Spec.Containers[0] - err := policy.Allocate(logger, st, testPod, container) + err := policy.Allocate(logger, st, testPod, container, lifecycle.AddOperation) if err != nil { t.Errorf("NonePolicy Allocate() error. expected no error but got: %v", err) } diff --git a/pkg/kubelet/cm/cpumanager/policy_static.go b/pkg/kubelet/cm/cpumanager/policy_static.go index 3185ecb984417..f712a240a5a70 100644 --- a/pkg/kubelet/cm/cpumanager/policy_static.go +++ b/pkg/kubelet/cm/cpumanager/policy_static.go @@ -33,6 +33,7 @@ import ( "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask" + "k8s.io/kubernetes/pkg/kubelet/lifecycle" "k8s.io/kubernetes/pkg/kubelet/metrics" "k8s.io/utils/cpuset" ) @@ -53,8 +54,22 @@ const ( ErrorGetOriginalCPUSet = "getOriginalCPUSetError" // ErrorResizeAllocateCPUs represents the type of a ResizeAllocateCPUsError ErrorResizeAllocateCPUs = "ResizeAllocateCPUsError" + // ErrorUnsupportedLifecycleOperation represents the type of a UnsupportedLifecycleOperationError + ErrorUnsupportedLifecycleOperation = "UnsupportedLifecycleOperationError" ) +type UnsupportedLifecycleOperationError struct { + Operation lifecycle.Operation +} + +func (e UnsupportedLifecycleOperationError) Error() string { + return fmt.Sprintf("Unsupported Lifecycle Operation Error: %s is neither AddOperation nor ResizeOperation", e.Operation) +} + +func (e UnsupportedLifecycleOperationError) Type() string { + return ErrorUnsupportedLifecycleOperation +} + // SMTAlignmentError represents an error due to SMT alignment type SMTAlignmentError struct { RequestedCPUs int @@ -424,152 +439,251 @@ func (p *staticPolicy) updateCPUsToReuse(pod *v1.Pod, container *v1.Container, c p.cpusToReuse[string(pod.UID)] = p.cpusToReuse[string(pod.UID)].Difference(cset) } -func (p *staticPolicy) Allocate(logger logr.Logger, s state.State, pod *v1.Pod, container *v1.Container) (rerr error) { - logger = klog.LoggerWithValues(logger, "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", container.Name) +func (p *staticPolicy) Allocate(logger logr.Logger, s state.State, pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) (rerr error) { + logger = klog.LoggerWithValues(logger, "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", container.Name, "operation", operation) logger.Info("Allocate start") // V=0 for backward compatibility defer logger.V(2).Info("Allocate end") - numCPUs := p.guaranteedCPUs(logger, pod, container) - if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { - // During a pod resize, handle corner cases - err := p.isFeasibleResize(logger, s, pod, container) + if operation == lifecycle.AddOperation { + + numCPUs := p.guaranteedCPUs(logger, pod, container) + + if numCPUs == 0 { + // container belongs in the shared pool (nothing to do; use default cpuset) + return nil + } + + if utilfeature.DefaultFeatureGate.Enabled(features.PodLevelResources) && resourcehelper.IsPodLevelResourcesSet(pod) { + logger.Info("CPU Manager allocation skipped, pod is using pod-level resources which are not supported by the static CPU manager policy") + return nil + } + + logger.Info("Static policy: Allocate") + + // container belongs in an exclusively allocated pool + metrics.CPUManagerPinningRequestsTotal.Inc() + defer func() { + if rerr != nil { + metrics.CPUManagerPinningErrorsTotal.Inc() + if p.options.FullPhysicalCPUsOnly { + metrics.ContainerAlignedComputeResourcesFailure.WithLabelValues(metrics.AlignScopeContainer, metrics.AlignedPhysicalCPU).Inc() + } + return + } + // TODO: move in updateMetricsOnAllocate + if p.options.FullPhysicalCPUsOnly { + // increment only if we know we allocate aligned resources + metrics.ContainerAlignedComputeResources.WithLabelValues(metrics.AlignScopeContainer, metrics.AlignedPhysicalCPU).Inc() + } + }() + + if p.options.FullPhysicalCPUsOnly { + if (numCPUs % p.cpuGroupSize) != 0 { + // Since CPU Manager has been enabled requesting strict SMT alignment, it means a guaranteed pod can only be admitted + // if the CPU requested is a multiple of the number of virtual cpus per physical cores. + // In case CPU request is not a multiple of the number of virtual cpus per physical cores the Pod will be put + // in Failed state, with SMTAlignmentError as reason. Since the allocation happens in terms of physical cores + // and the scheduler is responsible for ensuring that the workload goes to a node that has enough CPUs, + // the pod would be placed on a node where there are enough physical cores available to be allocated. + // Just like the behaviour in case of static policy, takeByTopology will try to first allocate CPUs from the same socket + // and only in case the request cannot be sattisfied on a single socket, CPU allocation is done for a workload to occupy all + // CPUs on a physical core. Allocation of individual threads would never have to occur. + return SMTAlignmentError{ + RequestedCPUs: numCPUs, + CpusPerCore: p.cpuGroupSize, + CausedByPhysicalCPUs: false, + } + } + + availablePhysicalCPUs := p.GetAvailablePhysicalCPUs(s).Size() + + // It's legal to reserve CPUs which are not core siblings. In this case the CPU allocator can descend to single cores + // when picking CPUs. This will void the guarantee of FullPhysicalCPUsOnly. To prevent this, we need to additionally consider + // all the core siblings of the reserved CPUs as unavailable when computing the free CPUs, before to start the actual allocation. + // This way, by construction all possible CPUs allocation whose number is multiple of the SMT level are now correct again. + if numCPUs > availablePhysicalCPUs { + return SMTAlignmentError{ + RequestedCPUs: numCPUs, + CpusPerCore: p.cpuGroupSize, + AvailablePhysicalCPUs: availablePhysicalCPUs, + CausedByPhysicalCPUs: true, + } + } + } + if cset, ok := s.GetCPUSet(string(pod.UID), container.Name); ok { + p.updateCPUsToReuse(pod, container, cset) + logger.Info("Static policy: container already present in state, skipping") + return nil + } + + // Call Topology Manager to get the aligned socket affinity across all hint providers. + hint := p.affinity.GetAffinity(string(pod.UID), container.Name) + logger.Info("Topology Affinity", "affinity", hint) + + // Allocate CPUs according to the NUMA affinity contained in the hint. + cpuAllocation, err := p.allocateCPUs(logger, s, numCPUs, hint.NUMANodeAffinity, p.cpusToReuse[string(pod.UID)], nil, nil) if err != nil { - logger.Error(err, "Static policy: Unfeasible to resize allocated CPUs,", "pod", klog.KObj(pod), "containerName", container.Name, "numCPUs", numCPUs) + logger.Error(err, "Unable to allocate CPUs", "numCPUs", numCPUs) return err } - } - if numCPUs == 0 { - // container belongs in the shared pool (nothing to do; use default cpuset) - return nil - } + s.SetCPUSet(string(pod.UID), container.Name, cpuAllocation.CPUs) + p.updateCPUsToReuse(pod, container, cpuAllocation.CPUs) + p.updateMetricsOnAllocate(logger, s, cpuAllocation) - if utilfeature.DefaultFeatureGate.Enabled(features.PodLevelResources) && resourcehelper.IsPodLevelResourcesSet(pod) { - logger.Info("CPU Manager allocation skipped, pod is using pod-level resources which are not supported by the static CPU manager policy") + logger.V(4).Info("Allocated exclusive CPUs", "cpuset", cpuAllocation.CPUs.String()) return nil + } - logger.Info("Static policy: Allocate") + if operation == lifecycle.ResizeOperation { - // container belongs in an exclusively allocated pool - metrics.CPUManagerPinningRequestsTotal.Inc() - defer func() { - if rerr != nil { - metrics.CPUManagerPinningErrorsTotal.Inc() - if p.options.FullPhysicalCPUsOnly { - metrics.ContainerAlignedComputeResourcesFailure.WithLabelValues(metrics.AlignScopeContainer, metrics.AlignedPhysicalCPU).Inc() + numCPUs := p.guaranteedCPUs(logger, pod, container) + if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { + // During a pod resize, handle corner cases + err := p.isFeasibleResize(logger, s, pod, container) + if err != nil { + logger.Error(err, "Static policy: Unfeasible to resize allocated CPUs,", "pod", klog.KObj(pod), "containerName", container.Name, "numCPUs", numCPUs) + return err } - return } - // TODO: move in updateMetricsOnAllocate - if p.options.FullPhysicalCPUsOnly { - // increment only if we know we allocate aligned resources - metrics.ContainerAlignedComputeResources.WithLabelValues(metrics.AlignScopeContainer, metrics.AlignedPhysicalCPU).Inc() + + if numCPUs == 0 { + // container belongs in the shared pool (nothing to do; use default cpuset) + return nil } - }() - - if p.options.FullPhysicalCPUsOnly { - if (numCPUs % p.cpuGroupSize) != 0 { - // Since CPU Manager has been enabled requesting strict SMT alignment, it means a guaranteed pod can only be admitted - // if the CPU requested is a multiple of the number of virtual cpus per physical cores. - // In case CPU request is not a multiple of the number of virtual cpus per physical cores the Pod will be put - // in Failed state, with SMTAlignmentError as reason. Since the allocation happens in terms of physical cores - // and the scheduler is responsible for ensuring that the workload goes to a node that has enough CPUs, - // the pod would be placed on a node where there are enough physical cores available to be allocated. - // Just like the behaviour in case of static policy, takeByTopology will try to first allocate CPUs from the same socket - // and only in case the request cannot be sattisfied on a single socket, CPU allocation is done for a workload to occupy all - // CPUs on a physical core. Allocation of individual threads would never have to occur. - return SMTAlignmentError{ - RequestedCPUs: numCPUs, - CpusPerCore: p.cpuGroupSize, - CausedByPhysicalCPUs: false, - } + + if utilfeature.DefaultFeatureGate.Enabled(features.PodLevelResources) && resourcehelper.IsPodLevelResourcesSet(pod) { + logger.Info("CPU Manager allocation skipped, pod is using pod-level resources which are not supported by the static CPU manager policy") + return nil } - availablePhysicalCPUs := p.GetAvailablePhysicalCPUs(s).Size() + logger.Info("Static policy: Allocate") - if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { - if cs, found := podutil.GetContainerStatus(pod.Status.ContainerStatuses, container.Name); found { - cpuAllocatedQuantity := cs.AllocatedResources[v1.ResourceCPU] - availablePhysicalCPUs += int(cpuAllocatedQuantity.Value()) + // container belongs in an exclusively allocated pool + metrics.CPUManagerPinningRequestsTotal.Inc() + defer func() { + if rerr != nil { + metrics.CPUManagerPinningErrorsTotal.Inc() + if p.options.FullPhysicalCPUsOnly { + metrics.ContainerAlignedComputeResourcesFailure.WithLabelValues(metrics.AlignScopeContainer, metrics.AlignedPhysicalCPU).Inc() + } + return } - } - // It's legal to reserve CPUs which are not core siblings. In this case the CPU allocator can descend to single cores - // when picking CPUs. This will void the guarantee of FullPhysicalCPUsOnly. To prevent this, we need to additionally consider - // all the core siblings of the reserved CPUs as unavailable when computing the free CPUs, before to start the actual allocation. - // This way, by construction all possible CPUs allocation whose number is multiple of the SMT level are now correct again. - if numCPUs > availablePhysicalCPUs { - return SMTAlignmentError{ - RequestedCPUs: numCPUs, - CpusPerCore: p.cpuGroupSize, - AvailablePhysicalCPUs: availablePhysicalCPUs, - CausedByPhysicalCPUs: true, + // TODO: move in updateMetricsOnAllocate + if p.options.FullPhysicalCPUsOnly { + // increment only if we know we allocate aligned resources + metrics.ContainerAlignedComputeResources.WithLabelValues(metrics.AlignScopeContainer, metrics.AlignedPhysicalCPU).Inc() } - } - } - if cpusInUseByPodContainer, ok := s.GetCPUSet(string(pod.UID), container.Name); ok { - if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScalingExclusiveCPUs) && utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { - logger.Info("Static policy: container already present in state, attempting InPlacePodVerticalScaling", "pod", klog.KObj(pod), "containerName", container.Name) - // Call Topology Manager to get the aligned socket affinity across all hint providers. - hint := p.affinity.GetAffinity(string(pod.UID), container.Name) - logger.Info("Topology Affinity", "pod", klog.KObj(pod), "containerName", container.Name, "affinity", hint) - // Attempt new allocation ( reusing allocated CPUs ) according to the NUMA affinity contained in the hint - // Since NUMA affinity container in the hint is unmutable already allocated CPUs pass the criteria - mustKeepCPUsForResize, ok := s.GetOriginalCPUSet(string(pod.UID), container.Name) - if !ok { - err := getOriginalCPUSetError{ - PodUID: string(pod.UID), - ContainerName: container.Name, + }() + + if p.options.FullPhysicalCPUsOnly { + if (numCPUs % p.cpuGroupSize) != 0 { + // Since CPU Manager has been enabled requesting strict SMT alignment, it means a guaranteed pod can only be admitted + // if the CPU requested is a multiple of the number of virtual cpus per physical cores. + // In case CPU request is not a multiple of the number of virtual cpus per physical cores the Pod will be put + // in Failed state, with SMTAlignmentError as reason. Since the allocation happens in terms of physical cores + // and the scheduler is responsible for ensuring that the workload goes to a node that has enough CPUs, + // the pod would be placed on a node where there are enough physical cores available to be allocated. + // Just like the behaviour in case of static policy, takeByTopology will try to first allocate CPUs from the same socket + // and only in case the request cannot be sattisfied on a single socket, CPU allocation is done for a workload to occupy all + // CPUs on a physical core. Allocation of individual threads would never have to occur. + return SMTAlignmentError{ + RequestedCPUs: numCPUs, + CpusPerCore: p.cpuGroupSize, + CausedByPhysicalCPUs: false, } - return err } - // Allocate CPUs according to the NUMA affinity contained in the hint. - newallocatedcpuset, witherr := p.allocateCPUs(logger, s, numCPUs, hint.NUMANodeAffinity, p.cpusToReuse[string(pod.UID)], &cpusInUseByPodContainer, &mustKeepCPUsForResize) - if witherr != nil { - err := ResizeAllocateCPUsError{ - PodUID: string(pod.UID), - ContainerName: container.Name, - TopologyError: witherr.Error(), + + availablePhysicalCPUs := p.GetAvailablePhysicalCPUs(s).Size() + + if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { + if cs, found := podutil.GetContainerStatus(pod.Status.ContainerStatuses, container.Name); found { + cpuAllocatedQuantity := cs.AllocatedResources[v1.ResourceCPU] + availablePhysicalCPUs += int(cpuAllocatedQuantity.Value()) + } + } + // It's legal to reserve CPUs which are not core siblings. In this case the CPU allocator can descend to single cores + // when picking CPUs. This will void the guarantee of FullPhysicalCPUsOnly. To prevent this, we need to additionally consider + // all the core siblings of the reserved CPUs as unavailable when computing the free CPUs, before to start the actual allocation. + // This way, by construction all possible CPUs allocation whose number is multiple of the SMT level are now correct again. + if numCPUs > availablePhysicalCPUs { + return SMTAlignmentError{ + RequestedCPUs: numCPUs, + CpusPerCore: p.cpuGroupSize, + AvailablePhysicalCPUs: availablePhysicalCPUs, + CausedByPhysicalCPUs: true, } - return err } + } + if cpusInUseByPodContainer, ok := s.GetCPUSet(string(pod.UID), container.Name); ok { + if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScalingExclusiveCPUs) && utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { + logger.Info("Static policy: container already present in state, attempting InPlacePodVerticalScaling", "pod", klog.KObj(pod), "containerName", container.Name) + // Call Topology Manager to get the aligned socket affinity across all hint providers. + hint := p.affinity.GetAffinity(string(pod.UID), container.Name) + logger.Info("Topology Affinity", "pod", klog.KObj(pod), "containerName", container.Name, "affinity", hint) + // Attempt new allocation ( reusing allocated CPUs ) according to the NUMA affinity contained in the hint + // Since NUMA affinity container in the hint is unmutable already allocated CPUs pass the criteria + mustKeepCPUsForResize, ok := s.GetOriginalCPUSet(string(pod.UID), container.Name) + if !ok { + err := getOriginalCPUSetError{ + PodUID: string(pod.UID), + ContainerName: container.Name, + } + return err + } + // Allocate CPUs according to the NUMA affinity contained in the hint. + newallocatedcpuset, witherr := p.allocateCPUs(logger, s, numCPUs, hint.NUMANodeAffinity, p.cpusToReuse[string(pod.UID)], &cpusInUseByPodContainer, &mustKeepCPUsForResize) + if witherr != nil { + err := ResizeAllocateCPUsError{ + PodUID: string(pod.UID), + ContainerName: container.Name, + TopologyError: witherr.Error(), + } + return err + } - // Allocation successful, update the current state - s.SetCPUSet(string(pod.UID), container.Name, newallocatedcpuset.CPUs) - p.updateCPUsToReuse(pod, container, newallocatedcpuset.CPUs) - p.updateMetricsOnAllocate(logger, s, newallocatedcpuset) - logger.Info("Allocated exclusive CPUs after InPlacePodVerticalScaling attempt", "pod", klog.KObj(pod), "containerName", container.Name, "cpuset", newallocatedcpuset.CPUs.String()) - // Updated state to the checkpoint file will be stored during - // the reconcile loop. TODO is this a problem? I don't believe - // because if kubelet will be terminated now, anyhow it will be - // needed the state to be cleaned up, an error will appear requiring - // the node to be drained. I think we are safe. All computations are - // using state_mem and not the checkpoint. - return nil - } else { - p.updateCPUsToReuse(pod, container, cpusInUseByPodContainer) - logger.Info("Static policy: container already present in state, skipping", "pod", klog.KObj(pod), "containerName", container.Name) - return nil + // Allocation successful, update the current state + s.SetCPUSet(string(pod.UID), container.Name, newallocatedcpuset.CPUs) + p.updateCPUsToReuse(pod, container, newallocatedcpuset.CPUs) + p.updateMetricsOnAllocate(logger, s, newallocatedcpuset) + logger.Info("Allocated exclusive CPUs after InPlacePodVerticalScaling attempt", "pod", klog.KObj(pod), "containerName", container.Name, "cpuset", newallocatedcpuset.CPUs.String()) + // Updated state to the checkpoint file will be stored during + // the reconcile loop. TODO is this a problem? I don't believe + // because if kubelet will be terminated now, anyhow it will be + // needed the state to be cleaned up, an error will appear requiring + // the node to be drained. I think we are safe. All computations are + // using state_mem and not the checkpoint. + return nil + } else { + p.updateCPUsToReuse(pod, container, cpusInUseByPodContainer) + logger.Info("Static policy: container already present in state, skipping", "pod", klog.KObj(pod), "containerName", container.Name) + return nil + } } - } - // Call Topology Manager to get the aligned socket affinity across all hint providers. - hint := p.affinity.GetAffinity(string(pod.UID), container.Name) - logger.Info("Topology Affinity", "affinity", hint) + // Call Topology Manager to get the aligned socket affinity across all hint providers. + hint := p.affinity.GetAffinity(string(pod.UID), container.Name) + logger.Info("Topology Affinity", "affinity", hint) - // Allocate CPUs according to the NUMA affinity contained in the hint. - cpuAllocation, err := p.allocateCPUs(logger, s, numCPUs, hint.NUMANodeAffinity, p.cpusToReuse[string(pod.UID)], nil, nil) - if err != nil { - logger.Error(err, "Unable to allocate CPUs", "numCPUs", numCPUs) - return err - } + // Allocate CPUs according to the NUMA affinity contained in the hint. + cpuAllocation, err := p.allocateCPUs(logger, s, numCPUs, hint.NUMANodeAffinity, p.cpusToReuse[string(pod.UID)], nil, nil) + if err != nil { + logger.Error(err, "Unable to allocate CPUs", "numCPUs", numCPUs) + return err + } - s.SetCPUSet(string(pod.UID), container.Name, cpuAllocation.CPUs) - p.updateCPUsToReuse(pod, container, cpuAllocation.CPUs) - p.updateMetricsOnAllocate(logger, s, cpuAllocation) + s.SetCPUSet(string(pod.UID), container.Name, cpuAllocation.CPUs) + p.updateCPUsToReuse(pod, container, cpuAllocation.CPUs) + p.updateMetricsOnAllocate(logger, s, cpuAllocation) + + logger.V(4).Info("Allocated exclusive CPUs", "cpuset", cpuAllocation.CPUs.String()) + return nil + } + return UnsupportedLifecycleOperationError{ + Operation: operation, + } - logger.V(4).Info("Allocated exclusive CPUs", "cpuset", cpuAllocation.CPUs.String()) - return nil } // getAssignedCPUsOfSiblings returns assigned cpus of given container's siblings(all containers other than the given container) in the given pod `podUID`. @@ -769,7 +883,7 @@ func (p *staticPolicy) takeByTopology(logger logr.Logger, availableCPUs cpuset.C return takeByTopologyNUMAPacked(logger, p.topology, availableCPUs, numCPUs, cpuSortingStrategy, p.options.PreferAlignByUncoreCacheOption, reusableCPUsForResize, mustKeepCPUsForResize) } -func (p *staticPolicy) GetTopologyHints(logger logr.Logger, s state.State, pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint { +func (p *staticPolicy) GetTopologyHints(logger logr.Logger, s state.State, pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint { logger = klog.LoggerWithValues(logger, "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", container.Name) // Get a count of how many guaranteed CPUs have been requested. @@ -846,7 +960,7 @@ func (p *staticPolicy) GetTopologyHints(logger logr.Logger, s state.State, pod * } } -func (p *staticPolicy) GetPodTopologyHints(logger logr.Logger, s state.State, pod *v1.Pod) map[string][]topologymanager.TopologyHint { +func (p *staticPolicy) GetPodTopologyHints(logger logr.Logger, s state.State, pod *v1.Pod, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint { logger = klog.LoggerWithValues(logger, "pod", klog.KObj(pod), "podUID", pod.UID) // Get a count of how many guaranteed CPUs have been requested by Pod. diff --git a/pkg/kubelet/cm/cpumanager/policy_static_test.go b/pkg/kubelet/cm/cpumanager/policy_static_test.go index 6321937cfba3b..33eafd56f2788 100644 --- a/pkg/kubelet/cm/cpumanager/policy_static_test.go +++ b/pkg/kubelet/cm/cpumanager/policy_static_test.go @@ -31,6 +31,7 @@ import ( "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask" + "k8s.io/kubernetes/pkg/kubelet/lifecycle" "k8s.io/kubernetes/test/utils/ktesting" "k8s.io/utils/cpuset" ) @@ -775,7 +776,7 @@ func runStaticPolicyTestCase(t *testing.T, testCase staticPolicyTest) { } container := &testCase.pod.Spec.Containers[0] - err = policy.Allocate(logger, st, testCase.pod, container) + err = policy.Allocate(logger, st, testCase.pod, container, lifecycle.AddOperation) if !reflect.DeepEqual(err, testCase.expErr) { t.Errorf("StaticPolicy Allocate() error (%v). expected add error: %q but got: %q", testCase.description, testCase.expErr, err) @@ -858,7 +859,7 @@ func TestStaticPolicyReuseCPUs(t *testing.T) { // allocate for _, container := range append(pod.Spec.InitContainers, pod.Spec.Containers...) { - _ = policy.Allocate(logger, st, pod, &container) + _ = policy.Allocate(logger, st, pod, &container, lifecycle.AddOperation) } if !st.defaultCPUSet.Equals(testCase.expCSetAfterAlloc) { t.Errorf("StaticPolicy Allocate() error (%v). expected default cpuset %s but got %s", @@ -1100,7 +1101,7 @@ func TestStaticPolicyPodResizeCPUsSingleContainerPod(t *testing.T) { // allocate for _, container := range append(pod.Spec.InitContainers, pod.Spec.Containers...) { - err := policy.Allocate(logger, st, pod, &container) + err := policy.Allocate(logger, st, pod, &container, lifecycle.AddOperation) if err != nil { t.Errorf("StaticPolicy Allocate() error (%v). expected no error but got %v", testCase.description, err) @@ -1130,7 +1131,7 @@ func TestStaticPolicyPodResizeCPUsSingleContainerPod(t *testing.T) { } podResized := pod for _, container := range append(podResized.Spec.InitContainers, podResized.Spec.Containers...) { - err := policy.Allocate(logger, st, podResized, &container) + err := policy.Allocate(logger, st, podResized, &container, lifecycle.ResizeOperation) if err != nil { if !reflect.DeepEqual(err, testCase.expAllocErr) { t.Errorf("StaticPolicy Allocate() error (%v), expected error: %v but got: %v", @@ -1208,7 +1209,7 @@ func TestStaticPolicyDoNotReuseCPUs(t *testing.T) { // allocate for _, container := range append(pod.Spec.InitContainers, pod.Spec.Containers...) { - err := policy.Allocate(logger, st, pod, &container) + err := policy.Allocate(logger, st, pod, &container, lifecycle.AddOperation) if err != nil { t.Errorf("StaticPolicy Allocate() error (%v). expected no error but got %v", testCase.description, err) @@ -1438,7 +1439,7 @@ func TestStaticPolicyPodResizeCPUsMultiContainerPod(t *testing.T) { // allocate for _, container := range append(pod.Spec.InitContainers, pod.Spec.Containers...) { - err := policy.Allocate(logger, st, pod, &container) + err := policy.Allocate(logger, st, pod, &container, lifecycle.AddOperation) if err != nil { t.Errorf("StaticPolicy Allocate() error (%v). expected no error but got %v", testCase.description, err) @@ -1468,7 +1469,7 @@ func TestStaticPolicyPodResizeCPUsMultiContainerPod(t *testing.T) { } podResized := pod for _, container := range append(podResized.Spec.InitContainers, podResized.Spec.Containers...) { - err := policy.Allocate(logger, st, podResized, &container) + err := policy.Allocate(logger, st, podResized, &container, lifecycle.ResizeOperation) if err != nil { if !reflect.DeepEqual(err, testCase.expAllocErr) { t.Errorf("StaticPolicy Allocate() error (%v), expected error: %v but got: %v", @@ -1859,7 +1860,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) { } container := &testCase.pod.Spec.Containers[0] - err = policy.Allocate(logger, st, testCase.pod, container) + err = policy.Allocate(logger, st, testCase.pod, container, lifecycle.AddOperation) if !reflect.DeepEqual(err, testCase.expErr) { t.Errorf("StaticPolicy Allocate() error (%v). expected add error: %v but got: %v", testCase.description, testCase.expErr, err) @@ -2629,7 +2630,7 @@ func TestStaticPolicyAddWithUncoreAlignment(t *testing.T) { for idx := range testCase.pod.Spec.Containers { container := &testCase.pod.Spec.Containers[idx] - err := policy.Allocate(logger, st, testCase.pod, container) + err := policy.Allocate(logger, st, testCase.pod, container, lifecycle.AddOperation) if err != nil { t.Fatalf("Allocate failed: pod=%q container=%q", testCase.pod.UID, container.Name) } diff --git a/pkg/kubelet/cm/cpumanager/topology_hints_test.go b/pkg/kubelet/cm/cpumanager/topology_hints_test.go index 937de282157d8..755a75b7b3378 100644 --- a/pkg/kubelet/cm/cpumanager/topology_hints_test.go +++ b/pkg/kubelet/cm/cpumanager/topology_hints_test.go @@ -31,6 +31,7 @@ import ( "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask" + "k8s.io/kubernetes/pkg/kubelet/lifecycle" "k8s.io/kubernetes/test/utils/ktesting" "k8s.io/utils/cpuset" ) @@ -246,7 +247,7 @@ func TestGetTopologyHints(t *testing.T) { sourcesReady: &sourcesReadyStub{}, } - hints := m.GetTopologyHints(&tc.pod, &tc.container)[string(v1.ResourceCPU)] + hints := m.GetTopologyHints(&tc.pod, &tc.container, lifecycle.AddOperation)[string(v1.ResourceCPU)] if len(tc.expectedHints) == 0 && len(hints) == 0 { continue } @@ -297,7 +298,7 @@ func TestGetPodTopologyHints(t *testing.T) { sourcesReady: &sourcesReadyStub{}, } - podHints := m.GetPodTopologyHints(&tc.pod)[string(v1.ResourceCPU)] + podHints := m.GetPodTopologyHints(&tc.pod, lifecycle.AddOperation)[string(v1.ResourceCPU)] if len(tc.expectedHints) == 0 && len(podHints) == 0 { continue } @@ -479,7 +480,7 @@ func TestGetPodTopologyHintsWithPolicyOptions(t *testing.T) { sourcesReady: &sourcesReadyStub{}, } - podHints := m.GetPodTopologyHints(&testCase.pod)[string(v1.ResourceCPU)] + podHints := m.GetPodTopologyHints(&testCase.pod, lifecycle.AddOperation)[string(v1.ResourceCPU)] sort.SliceStable(podHints, func(i, j int) bool { return podHints[i].LessThan(podHints[j]) }) diff --git a/pkg/kubelet/cm/devicemanager/manager.go b/pkg/kubelet/cm/devicemanager/manager.go index 0019ae5d0d9a1..db9cc723aeb4b 100644 --- a/pkg/kubelet/cm/devicemanager/manager.go +++ b/pkg/kubelet/cm/devicemanager/manager.go @@ -363,7 +363,7 @@ func (m *ManagerImpl) Stop(logger klog.Logger) error { // Allocate is the call that you can use to allocate a set of devices // from the registered device plugins. -func (m *ManagerImpl) Allocate(pod *v1.Pod, container *v1.Container) error { +func (m *ManagerImpl) Allocate(pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) error { // Use context.TODO() because we currently do not have a proper context to pass in. // Replace this with an appropriate context when refactoring this function to accept a context parameter. ctx := context.TODO() @@ -984,7 +984,7 @@ func (m *ManagerImpl) GetDeviceRunContainerOptions(ctx context.Context, pod *v1. } if needsReAllocate { logger.V(2).Info("Needs to re-allocate device plugin resources for pod", "pod", klog.KObj(pod), "containerName", container.Name) - if err := m.Allocate(pod, container); err != nil { + if err := m.Allocate(pod, container, lifecycle.AddOperation); err != nil { return nil, err } } diff --git a/pkg/kubelet/cm/devicemanager/manager_test.go b/pkg/kubelet/cm/devicemanager/manager_test.go index 38add8678e7d0..bdf19a9935f52 100644 --- a/pkg/kubelet/cm/devicemanager/manager_test.go +++ b/pkg/kubelet/cm/devicemanager/manager_test.go @@ -1087,7 +1087,7 @@ func TestPodContainerDeviceAllocation(t *testing.T) { pod := testCase.testPod activePods = append(activePods, pod) podsStub.updateActivePods(activePods) - err := testManager.Allocate(pod, &pod.Spec.Containers[0]) + err := testManager.Allocate(pod, &pod.Spec.Containers[0], lifecycle.AddOperation) if !reflect.DeepEqual(err, testCase.expErr) { t.Errorf("DevicePluginManager error (%v). expected error: %v but got: %v", testCase.description, testCase.expErr, err) @@ -1320,9 +1320,9 @@ func TestGetDeviceRunContainerOptions(t *testing.T) { activePods := []*v1.Pod{pod1, pod2} podsStub.updateActivePods(activePods) - err = testManager.Allocate(pod1, &pod1.Spec.Containers[0]) + err = testManager.Allocate(pod1, &pod1.Spec.Containers[0], lifecycle.AddOperation) as.NoError(err) - err = testManager.Allocate(pod2, &pod2.Spec.Containers[0]) + err = testManager.Allocate(pod2, &pod2.Spec.Containers[0], lifecycle.AddOperation) as.NoError(err) // when pod is in activePods, GetDeviceRunContainerOptions should return @@ -1418,10 +1418,10 @@ func TestInitContainerDeviceAllocation(t *testing.T) { } podsStub.updateActivePods([]*v1.Pod{podWithPluginResourcesInInitContainers}) for _, container := range podWithPluginResourcesInInitContainers.Spec.InitContainers { - err = testManager.Allocate(podWithPluginResourcesInInitContainers, &container) + err = testManager.Allocate(podWithPluginResourcesInInitContainers, &container, lifecycle.AddOperation) } for _, container := range podWithPluginResourcesInInitContainers.Spec.Containers { - err = testManager.Allocate(podWithPluginResourcesInInitContainers, &container) + err = testManager.Allocate(podWithPluginResourcesInInitContainers, &container, lifecycle.AddOperation) } as.NoError(err) podUID := string(podWithPluginResourcesInInitContainers.UID) @@ -1528,10 +1528,10 @@ func TestRestartableInitContainerDeviceAllocation(t *testing.T) { } podsStub.updateActivePods([]*v1.Pod{podWithPluginResourcesInRestartableInitContainers}) for _, container := range podWithPluginResourcesInRestartableInitContainers.Spec.InitContainers { - err = testManager.Allocate(podWithPluginResourcesInRestartableInitContainers, &container) + err = testManager.Allocate(podWithPluginResourcesInRestartableInitContainers, &container, lifecycle.AddOperation) } for _, container := range podWithPluginResourcesInRestartableInitContainers.Spec.Containers { - err = testManager.Allocate(podWithPluginResourcesInRestartableInitContainers, &container) + err = testManager.Allocate(podWithPluginResourcesInRestartableInitContainers, &container, lifecycle.AddOperation) } as.NoError(err) podUID := string(podWithPluginResourcesInRestartableInitContainers.UID) @@ -1661,7 +1661,7 @@ func TestDevicePreStartContainer(t *testing.T) { activePods := []*v1.Pod{} activePods = append(activePods, pod) podsStub.updateActivePods(activePods) - err = testManager.Allocate(pod, &pod.Spec.Containers[0]) + err = testManager.Allocate(pod, &pod.Spec.Containers[0], lifecycle.AddOperation) as.NoError(err) runContainerOpts, err := testManager.GetDeviceRunContainerOptions(tCtx, pod, &pod.Spec.Containers[0]) as.NoError(err) @@ -1689,7 +1689,7 @@ func TestDevicePreStartContainer(t *testing.T) { v1.ResourceName(res1.resourceName): *resource.NewQuantity(int64(0), resource.DecimalSI)}) activePods = append(activePods, pod2) podsStub.updateActivePods(activePods) - err = testManager.Allocate(pod2, &pod2.Spec.Containers[0]) + err = testManager.Allocate(pod2, &pod2.Spec.Containers[0], lifecycle.AddOperation) as.NoError(err) _, err = testManager.GetDeviceRunContainerOptions(tCtx, pod2, &pod2.Spec.Containers[0]) as.NoError(err) @@ -1842,7 +1842,7 @@ func TestGetTopologyHintsWithUpdates(t *testing.T) { count: 10, devices: devs, testfunc: func(manager *wrappedManagerImpl) { - manager.GetTopologyHints(testPod, &testPod.Spec.Containers[0]) + manager.GetTopologyHints(testPod, &testPod.Spec.Containers[0], lifecycle.AddOperation) }, }, { @@ -1850,7 +1850,7 @@ func TestGetTopologyHintsWithUpdates(t *testing.T) { count: 10, devices: devs, testfunc: func(manager *wrappedManagerImpl) { - manager.GetPodTopologyHints(testPod) + manager.GetPodTopologyHints(testPod, lifecycle.AddOperation) }, }, } @@ -2132,7 +2132,7 @@ func TestAdmitPodWithDRAResources(t *testing.T) { sourcesReady: &sourcesReadyStub{}, } - err := testManager.Allocate(pod, &pod.Spec.Containers[0]) + err := testManager.Allocate(pod, &pod.Spec.Containers[0], lifecycle.AddOperation) test.checkError(t, err) }) } diff --git a/pkg/kubelet/cm/devicemanager/topology_hints.go b/pkg/kubelet/cm/devicemanager/topology_hints.go index 8846122a3d769..3f2aca102a090 100644 --- a/pkg/kubelet/cm/devicemanager/topology_hints.go +++ b/pkg/kubelet/cm/devicemanager/topology_hints.go @@ -25,12 +25,13 @@ import ( "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask" + "k8s.io/kubernetes/pkg/kubelet/lifecycle" ) // GetTopologyHints implements the TopologyManager HintProvider Interface which // ensures the Device Manager is consulted when Topology Aware Hints for each // container are created. -func (m *ManagerImpl) GetTopologyHints(pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint { +func (m *ManagerImpl) GetTopologyHints(pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint { // Use klog.TODO() because we currently do not have a proper logger to pass in. // Replace this with an appropriate logger when refactoring this function to accept a logger parameter. logger := klog.TODO() @@ -85,7 +86,7 @@ func (m *ManagerImpl) GetTopologyHints(pod *v1.Pod, container *v1.Container) map // GetPodTopologyHints implements the topologymanager.HintProvider Interface which // ensures the Device Manager is consulted when Topology Aware Hints for Pod are created. -func (m *ManagerImpl) GetPodTopologyHints(pod *v1.Pod) map[string][]topologymanager.TopologyHint { +func (m *ManagerImpl) GetPodTopologyHints(pod *v1.Pod, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint { // Use klog.TODO() because we currently do not have a proper logger to pass in. // Replace this with an appropriate logger when refactoring this function to accept a logger parameter. logger := klog.TODO() diff --git a/pkg/kubelet/cm/devicemanager/topology_hints_test.go b/pkg/kubelet/cm/devicemanager/topology_hints_test.go index 53ea645a3af68..567503c19de76 100644 --- a/pkg/kubelet/cm/devicemanager/topology_hints_test.go +++ b/pkg/kubelet/cm/devicemanager/topology_hints_test.go @@ -29,6 +29,7 @@ import ( pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask" + "k8s.io/kubernetes/pkg/kubelet/lifecycle" "k8s.io/kubernetes/test/utils/ktesting" ) @@ -93,7 +94,7 @@ func TestGetTopologyHints(t *testing.T) { } } - hints := m.GetTopologyHints(tc.pod, &tc.pod.Spec.Containers[0]) + hints := m.GetTopologyHints(tc.pod, &tc.pod.Spec.Containers[0], lifecycle.AddOperation) for r := range tc.expectedHints { sort.SliceStable(hints[r], func(i, j int) bool { @@ -960,7 +961,7 @@ func TestGetPodTopologyHints(t *testing.T) { } } - hints := m.GetPodTopologyHints(tc.pod) + hints := m.GetPodTopologyHints(tc.pod, lifecycle.AddOperation) for r := range tc.expectedHints { sort.SliceStable(hints[r], func(i, j int) bool { diff --git a/pkg/kubelet/cm/devicemanager/types.go b/pkg/kubelet/cm/devicemanager/types.go index b377443c434c2..cb2e11fd26c44 100644 --- a/pkg/kubelet/cm/devicemanager/types.go +++ b/pkg/kubelet/cm/devicemanager/types.go @@ -44,7 +44,7 @@ type Manager interface { // owning device plugin to allow setup procedures to take place, and for // the device plugin to provide runtime settings to use the device // (environment variables, mount points and device files). - Allocate(pod *v1.Pod, container *v1.Container) error + Allocate(pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) error // UpdatePluginResources updates node resources based on devices already // allocated to pods. The node object is provided for the device manager to @@ -83,11 +83,11 @@ type Manager interface { // TopologyManager HintProvider provider indicates the Device Manager implements the Topology Manager Interface // and is consulted to make Topology aware resource alignments - GetTopologyHints(pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint + GetTopologyHints(pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint // TopologyManager HintProvider provider indicates the Device Manager implements the Topology Manager Interface // and is consulted to make Topology aware resource alignments per Pod - GetPodTopologyHints(pod *v1.Pod) map[string][]topologymanager.TopologyHint + GetPodTopologyHints(pod *v1.Pod, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint // UpdateAllocatedDevices frees any Devices that are bound to terminated pods. UpdateAllocatedDevices() diff --git a/pkg/kubelet/cm/memorymanager/fake_memory_manager.go b/pkg/kubelet/cm/memorymanager/fake_memory_manager.go index e589c78f573d3..f0df61673c409 100644 --- a/pkg/kubelet/cm/memorymanager/fake_memory_manager.go +++ b/pkg/kubelet/cm/memorymanager/fake_memory_manager.go @@ -26,6 +26,7 @@ import ( "k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" "k8s.io/kubernetes/pkg/kubelet/config" + "k8s.io/kubernetes/pkg/kubelet/lifecycle" "k8s.io/kubernetes/pkg/kubelet/status" ) @@ -44,7 +45,7 @@ func (m *fakeManager) Policy(logger klog.Logger) Policy { return NewPolicyNone(logger) } -func (m *fakeManager) Allocate(pod *v1.Pod, container *v1.Container) error { +func (m *fakeManager) Allocate(pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) error { logger := klog.TODO() logger.Info("Allocate", "pod", klog.KObj(pod), "containerName", container.Name) return nil @@ -64,13 +65,13 @@ func (m *fakeManager) RemoveContainer(logger klog.Logger, containerID string) er return nil } -func (m *fakeManager) GetTopologyHints(pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint { +func (m *fakeManager) GetTopologyHints(pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint { logger := klog.TODO() logger.Info("Get Topology Hints", "pod", klog.KObj(pod), "containerName", container.Name) return map[string][]topologymanager.TopologyHint{} } -func (m *fakeManager) GetPodTopologyHints(pod *v1.Pod) map[string][]topologymanager.TopologyHint { +func (m *fakeManager) GetPodTopologyHints(pod *v1.Pod, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint { logger := klog.TODO() logger.Info("Get Pod Topology Hints", "pod", klog.KObj(pod)) return map[string][]topologymanager.TopologyHint{} diff --git a/pkg/kubelet/cm/memorymanager/memory_manager.go b/pkg/kubelet/cm/memorymanager/memory_manager.go index 585ebff101390..658d337692bf8 100644 --- a/pkg/kubelet/cm/memorymanager/memory_manager.go +++ b/pkg/kubelet/cm/memorymanager/memory_manager.go @@ -36,6 +36,7 @@ import ( "k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" "k8s.io/kubernetes/pkg/kubelet/config" + "k8s.io/kubernetes/pkg/kubelet/lifecycle" "k8s.io/kubernetes/pkg/kubelet/status" ) @@ -65,7 +66,7 @@ type Manager interface { // Allocate is called to pre-allocate memory resources during Pod admission. // This must be called at some point prior to the AddContainer() call for a container, e.g. at pod admission time. - Allocate(pod *v1.Pod, container *v1.Container) error + Allocate(pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) error // RemoveContainer is called after Kubelet decides to kill or delete a // container. After this call, any memory allocated to the container is freed. @@ -77,12 +78,12 @@ type Manager interface { // GetTopologyHints implements the topologymanager.HintProvider Interface // and is consulted to achieve NUMA aware resource alignment among this // and other resource controllers. - GetTopologyHints(*v1.Pod, *v1.Container) map[string][]topologymanager.TopologyHint + GetTopologyHints(*v1.Pod, *v1.Container, lifecycle.Operation) map[string][]topologymanager.TopologyHint // GetPodTopologyHints implements the topologymanager.HintProvider Interface // and is consulted to achieve NUMA aware resource alignment among this // and other resource controllers. - GetPodTopologyHints(*v1.Pod) map[string][]topologymanager.TopologyHint + GetPodTopologyHints(*v1.Pod, lifecycle.Operation) map[string][]topologymanager.TopologyHint // GetMemoryNUMANodes provides NUMA nodes that are used to allocate the container memory GetMemoryNUMANodes(logger klog.Logger, pod *v1.Pod, container *v1.Container) sets.Set[int] @@ -261,7 +262,7 @@ func (m *manager) GetMemoryNUMANodes(logger klog.Logger, pod *v1.Pod, container } // Allocate is called to pre-allocate memory resources during Pod admission. -func (m *manager) Allocate(pod *v1.Pod, container *v1.Container) error { +func (m *manager) Allocate(pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) error { logger := klog.TODO() m.removeStaleState(logger) @@ -269,7 +270,7 @@ func (m *manager) Allocate(pod *v1.Pod, container *v1.Container) error { defer m.Unlock() // Call down into the policy to assign this container memory if required. - if err := m.policy.Allocate(logger, m.state, pod, container); err != nil { + if err := m.policy.Allocate(logger, m.state, pod, container, operation); err != nil { logger.Error(err, "Allocate error", "pod", klog.KObj(pod), "containerName", container.Name) return err } @@ -301,22 +302,22 @@ func (m *manager) State() state.Reader { } // GetPodTopologyHints returns the topology hints for the topology manager -func (m *manager) GetPodTopologyHints(pod *v1.Pod) map[string][]topologymanager.TopologyHint { +func (m *manager) GetPodTopologyHints(pod *v1.Pod, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint { // Use context.TODO() because we currently do not have a proper context to pass in. // This should be replaced with an appropriate context when refactoring this function to accept a context parameter. ctx := context.TODO() // Garbage collect any stranded resources before providing TopologyHints m.removeStaleState(klog.FromContext(ctx)) // Delegate to active policy - return m.policy.GetPodTopologyHints(klog.TODO(), m.state, pod) + return m.policy.GetPodTopologyHints(klog.TODO(), m.state, pod, operation) } // GetTopologyHints returns the topology hints for the topology manager -func (m *manager) GetTopologyHints(pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint { +func (m *manager) GetTopologyHints(pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint { // Garbage collect any stranded resources before providing TopologyHints m.removeStaleState(klog.TODO()) // Delegate to active policy - return m.policy.GetTopologyHints(klog.TODO(), m.state, pod, container) + return m.policy.GetTopologyHints(klog.TODO(), m.state, pod, container, operation) } // TODO: move the method to the upper level, to re-use it under the CPU and memory managers diff --git a/pkg/kubelet/cm/memorymanager/memory_manager_test.go b/pkg/kubelet/cm/memorymanager/memory_manager_test.go index 6844710fecd98..b868f52537b53 100644 --- a/pkg/kubelet/cm/memorymanager/memory_manager_test.go +++ b/pkg/kubelet/cm/memorymanager/memory_manager_test.go @@ -38,6 +38,7 @@ import ( "k8s.io/kubernetes/pkg/kubelet/cm/containermap" "k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" + "k8s.io/kubernetes/pkg/kubelet/lifecycle" "k8s.io/kubernetes/test/utils/ktesting" ) @@ -99,18 +100,18 @@ func (p *mockPolicy) Start(klog.Logger, state.State) error { return p.err } -func (p *mockPolicy) Allocate(klog.Logger, state.State, *v1.Pod, *v1.Container) error { +func (p *mockPolicy) Allocate(klog.Logger, state.State, *v1.Pod, *v1.Container, lifecycle.Operation) error { return p.err } func (p *mockPolicy) RemoveContainer(klog.Logger, state.State, string, string) { } -func (p *mockPolicy) GetTopologyHints(klog.Logger, state.State, *v1.Pod, *v1.Container) map[string][]topologymanager.TopologyHint { +func (p *mockPolicy) GetTopologyHints(klog.Logger, state.State, *v1.Pod, *v1.Container, lifecycle.Operation) map[string][]topologymanager.TopologyHint { return nil } -func (p *mockPolicy) GetPodTopologyHints(klog.Logger, state.State, *v1.Pod) map[string][]topologymanager.TopologyHint { +func (p *mockPolicy) GetPodTopologyHints(klog.Logger, state.State, *v1.Pod, lifecycle.Operation) map[string][]topologymanager.TopologyHint { return nil } @@ -1414,7 +1415,7 @@ func TestAddContainer(t *testing.T) { } pod := testCase.podAllocate container := &pod.Spec.Containers[0] - err := mgr.Allocate(pod, container) + err := mgr.Allocate(pod, container, lifecycle.AddOperation) if !reflect.DeepEqual(err, testCase.expectedAllocateError) { t.Errorf("Memory Manager Allocate() error (%v), expected error: %v, but got: %v", testCase.description, testCase.expectedAllocateError, err) @@ -2173,7 +2174,7 @@ func TestGetTopologyHints(t *testing.T) { pod := getPod("fakePod1", "fakeContainer1", requirementsGuaranteed) container := &pod.Spec.Containers[0] - hints := mgr.GetTopologyHints(pod, container) + hints := mgr.GetTopologyHints(pod, container, lifecycle.AddOperation) if !reflect.DeepEqual(hints, testCase.expectedHints) { t.Errorf("Hints were not generated correctly. Hints generated: %+v, hints expected: %+v", hints, testCase.expectedHints) @@ -2351,7 +2352,7 @@ func TestAllocateAndAddPodWithInitContainers(t *testing.T) { // Allocates memory for init containers for i := range testCase.podAllocate.Spec.InitContainers { - err := mgr.Allocate(testCase.podAllocate, &testCase.podAllocate.Spec.InitContainers[i]) + err := mgr.Allocate(testCase.podAllocate, &testCase.podAllocate.Spec.InitContainers[i], lifecycle.AddOperation) if !reflect.DeepEqual(err, testCase.expectedError) { t.Fatalf("The actual error %v is different from the expected one %v", err, testCase.expectedError) } @@ -2359,7 +2360,7 @@ func TestAllocateAndAddPodWithInitContainers(t *testing.T) { // Allocates memory for apps containers for i := range testCase.podAllocate.Spec.Containers { - err := mgr.Allocate(testCase.podAllocate, &testCase.podAllocate.Spec.Containers[i]) + err := mgr.Allocate(testCase.podAllocate, &testCase.podAllocate.Spec.Containers[i], lifecycle.AddOperation) if !reflect.DeepEqual(err, testCase.expectedError) { t.Fatalf("The actual error %v is different from the expected one %v", err, testCase.expectedError) } diff --git a/pkg/kubelet/cm/memorymanager/policy.go b/pkg/kubelet/cm/memorymanager/policy.go index 3c17ec93c21fa..fa23ca42691e0 100644 --- a/pkg/kubelet/cm/memorymanager/policy.go +++ b/pkg/kubelet/cm/memorymanager/policy.go @@ -21,6 +21,7 @@ import ( "k8s.io/klog/v2" "k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" + "k8s.io/kubernetes/pkg/kubelet/lifecycle" ) // Type defines the policy type @@ -31,17 +32,17 @@ type Policy interface { Name() string Start(logger klog.Logger, s state.State) error // Allocate call is idempotent - Allocate(logger klog.Logger, s state.State, pod *v1.Pod, container *v1.Container) error + Allocate(logger klog.Logger, s state.State, pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) error // RemoveContainer call is idempotent RemoveContainer(logger klog.Logger, s state.State, podUID string, containerName string) // GetTopologyHints implements the topologymanager.HintProvider Interface // and is consulted to achieve NUMA aware resource alignment among this // and other resource controllers. - GetTopologyHints(logger klog.Logger, s state.State, pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint + GetTopologyHints(logger klog.Logger, s state.State, pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint // GetPodTopologyHints implements the topologymanager.HintProvider Interface // and is consulted to achieve NUMA aware resource alignment among this // and other resource controllers. - GetPodTopologyHints(logger klog.Logger, s state.State, pod *v1.Pod) map[string][]topologymanager.TopologyHint + GetPodTopologyHints(logger klog.Logger, s state.State, pod *v1.Pod, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint // GetAllocatableMemory returns the amount of allocatable memory for each NUMA node GetAllocatableMemory(s state.State) []state.Block } diff --git a/pkg/kubelet/cm/memorymanager/policy_best_effort.go b/pkg/kubelet/cm/memorymanager/policy_best_effort.go index 53f5746d48d87..3e60a70df3d2b 100644 --- a/pkg/kubelet/cm/memorymanager/policy_best_effort.go +++ b/pkg/kubelet/cm/memorymanager/policy_best_effort.go @@ -24,6 +24,7 @@ import ( "k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" + "k8s.io/kubernetes/pkg/kubelet/lifecycle" ) // On Windows we want to use the same logic as the StaticPolicy to compute the memory topology hints @@ -60,20 +61,20 @@ func (p *bestEffortPolicy) Start(logger logr.Logger, s state.State) error { return p.static.Start(logger, s) } -func (p *bestEffortPolicy) Allocate(logger logr.Logger, s state.State, pod *v1.Pod, container *v1.Container) (rerr error) { - return p.static.Allocate(logger, s, pod, container) +func (p *bestEffortPolicy) Allocate(logger logr.Logger, s state.State, pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) (rerr error) { + return p.static.Allocate(logger, s, pod, container, operation) } func (p *bestEffortPolicy) RemoveContainer(logger logr.Logger, s state.State, podUID string, containerName string) { p.static.RemoveContainer(logger, s, podUID, containerName) } -func (p *bestEffortPolicy) GetPodTopologyHints(logger logr.Logger, s state.State, pod *v1.Pod) map[string][]topologymanager.TopologyHint { - return p.static.GetPodTopologyHints(logger, s, pod) +func (p *bestEffortPolicy) GetPodTopologyHints(logger logr.Logger, s state.State, pod *v1.Pod, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint { + return p.static.GetPodTopologyHints(logger, s, pod, operation) } -func (p *bestEffortPolicy) GetTopologyHints(logger logr.Logger, s state.State, pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint { - return p.static.GetTopologyHints(logger, s, pod, container) +func (p *bestEffortPolicy) GetTopologyHints(logger logr.Logger, s state.State, pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint { + return p.static.GetTopologyHints(logger, s, pod, container, operation) } func (p *bestEffortPolicy) GetAllocatableMemory(s state.State) []state.Block { diff --git a/pkg/kubelet/cm/memorymanager/policy_none.go b/pkg/kubelet/cm/memorymanager/policy_none.go index ceb2d236d1faf..f8b06a0ef9401 100644 --- a/pkg/kubelet/cm/memorymanager/policy_none.go +++ b/pkg/kubelet/cm/memorymanager/policy_none.go @@ -21,6 +21,7 @@ import ( "k8s.io/klog/v2" "k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" + "k8s.io/kubernetes/pkg/kubelet/lifecycle" ) const policyTypeNone policyType = "None" @@ -46,7 +47,7 @@ func (p *none) Start(logger klog.Logger, s state.State) error { } // Allocate call is idempotent -func (p *none) Allocate(_ klog.Logger, s state.State, pod *v1.Pod, container *v1.Container) error { +func (p *none) Allocate(_ klog.Logger, s state.State, pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) error { return nil } @@ -57,14 +58,14 @@ func (p *none) RemoveContainer(_ klog.Logger, s state.State, podUID string, cont // GetTopologyHints implements the topologymanager.HintProvider Interface // and is consulted to achieve NUMA aware resource alignment among this // and other resource controllers. -func (p *none) GetTopologyHints(_ klog.Logger, s state.State, pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint { +func (p *none) GetTopologyHints(_ klog.Logger, s state.State, pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint { return nil } // GetPodTopologyHints implements the topologymanager.HintProvider Interface // and is consulted to achieve NUMA aware resource alignment among this // and other resource controllers. -func (p *none) GetPodTopologyHints(_ klog.Logger, s state.State, pod *v1.Pod) map[string][]topologymanager.TopologyHint { +func (p *none) GetPodTopologyHints(_ klog.Logger, s state.State, pod *v1.Pod, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint { return nil } diff --git a/pkg/kubelet/cm/memorymanager/policy_static.go b/pkg/kubelet/cm/memorymanager/policy_static.go index eb5c3b5f67c32..2bdcee8006662 100644 --- a/pkg/kubelet/cm/memorymanager/policy_static.go +++ b/pkg/kubelet/cm/memorymanager/policy_static.go @@ -34,6 +34,7 @@ import ( "k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask" + "k8s.io/kubernetes/pkg/kubelet/lifecycle" "k8s.io/kubernetes/pkg/kubelet/metrics" ) @@ -95,7 +96,7 @@ func (p *staticPolicy) Start(logger klog.Logger, s state.State) error { } // Allocate call is idempotent -func (p *staticPolicy) Allocate(logger klog.Logger, s state.State, pod *v1.Pod, container *v1.Container) (rerr error) { +func (p *staticPolicy) Allocate(logger klog.Logger, s state.State, pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) (rerr error) { // allocate the memory only for guaranteed pods logger = klog.LoggerWithValues(logger, "pod", klog.KObj(pod), "containerName", container.Name) qos := v1qos.GetPodQOS(pod) @@ -401,7 +402,7 @@ func getPodRequestedResources(pod *v1.Pod) (map[v1.ResourceName]uint64, error) { return reqRsrcs, nil } -func (p *staticPolicy) GetPodTopologyHints(logger klog.Logger, s state.State, pod *v1.Pod) map[string][]topologymanager.TopologyHint { +func (p *staticPolicy) GetPodTopologyHints(logger klog.Logger, s state.State, pod *v1.Pod, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint { logger = klog.LoggerWithValues(logger, "pod", klog.KObj(pod)) if v1qos.GetPodQOS(pod) != v1.PodQOSGuaranteed { @@ -436,7 +437,7 @@ func (p *staticPolicy) GetPodTopologyHints(logger klog.Logger, s state.State, po // GetTopologyHints implements the topologymanager.HintProvider Interface // and is consulted to achieve NUMA aware resource alignment among this // and other resource controllers. -func (p *staticPolicy) GetTopologyHints(logger klog.Logger, s state.State, pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint { +func (p *staticPolicy) GetTopologyHints(logger klog.Logger, s state.State, pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint { logger = klog.LoggerWithValues(logger, "pod", klog.KObj(pod)) if v1qos.GetPodQOS(pod) != v1.PodQOSGuaranteed { diff --git a/pkg/kubelet/cm/memorymanager/policy_static_test.go b/pkg/kubelet/cm/memorymanager/policy_static_test.go index 5f51082f87b3d..702a3ada25436 100644 --- a/pkg/kubelet/cm/memorymanager/policy_static_test.go +++ b/pkg/kubelet/cm/memorymanager/policy_static_test.go @@ -32,6 +32,7 @@ import ( "k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask" + "k8s.io/kubernetes/pkg/kubelet/lifecycle" "k8s.io/kubernetes/test/utils/ktesting" ) @@ -2089,7 +2090,7 @@ func TestStaticPolicyAllocate(t *testing.T) { t.Fatalf("Unexpected error: %v", err) } - err = p.Allocate(logger, s, testCase.pod, &testCase.pod.Spec.Containers[0]) + err = p.Allocate(logger, s, testCase.pod, &testCase.pod.Spec.Containers[0], lifecycle.AddOperation) if (err == nil) != (testCase.expectedError == nil) || (err != nil && testCase.expectedError != nil && err.Error() != testCase.expectedError.Error()) { t.Fatalf("The actual error %v is different from the expected one %v", err, testCase.expectedError) } @@ -2814,14 +2815,14 @@ func TestStaticPolicyAllocateWithInitContainers(t *testing.T) { } for i := range testCase.pod.Spec.InitContainers { - err = p.Allocate(logger, s, testCase.pod, &testCase.pod.Spec.InitContainers[i]) + err = p.Allocate(logger, s, testCase.pod, &testCase.pod.Spec.InitContainers[i], lifecycle.AddOperation) if !reflect.DeepEqual(err, testCase.expectedError) { t.Fatalf("The actual error %v is different from the expected one %v", err, testCase.expectedError) } } for i := range testCase.pod.Spec.Containers { - err = p.Allocate(logger, s, testCase.pod, &testCase.pod.Spec.Containers[i]) + err = p.Allocate(logger, s, testCase.pod, &testCase.pod.Spec.Containers[i], lifecycle.AddOperation) if !reflect.DeepEqual(err, testCase.expectedError) { t.Fatalf("The actual error %v is different from the expected one %v", err, testCase.expectedError) } @@ -3148,7 +3149,7 @@ func TestStaticPolicyAllocateWithRestartableInitContainers(t *testing.T) { } for i := range testCase.pod.Spec.InitContainers { - err = p.Allocate(logger, s, testCase.pod, &testCase.pod.Spec.InitContainers[i]) + err = p.Allocate(logger, s, testCase.pod, &testCase.pod.Spec.InitContainers[i], lifecycle.AddOperation) if !reflect.DeepEqual(err, testCase.expectedError) { t.Fatalf("The actual error %v is different from the expected one %v", err, testCase.expectedError) } @@ -3159,7 +3160,7 @@ func TestStaticPolicyAllocateWithRestartableInitContainers(t *testing.T) { } for i := range testCase.pod.Spec.Containers { - err = p.Allocate(logger, s, testCase.pod, &testCase.pod.Spec.Containers[i]) + err = p.Allocate(logger, s, testCase.pod, &testCase.pod.Spec.Containers[i], lifecycle.AddOperation) if err != nil { t.Fatalf("Unexpected error: %v", err) } @@ -3827,7 +3828,7 @@ func TestStaticPolicyGetTopologyHints(t *testing.T) { t.Fatalf("Unexpected error: %v", err) } - topologyHints := p.GetTopologyHints(logger, s, testCase.pod, &testCase.pod.Spec.Containers[0]) + topologyHints := p.GetTopologyHints(logger, s, testCase.pod, &testCase.pod.Spec.Containers[0], lifecycle.AddOperation) if !reflect.DeepEqual(topologyHints, testCase.expectedTopologyHints) { t.Fatalf("The actual topology hints: '%+v' are different from the expected one: '%+v'", topologyHints, testCase.expectedTopologyHints) } @@ -3860,7 +3861,7 @@ func TestStaticPolicyGetPodTopologyHints(t *testing.T) { t.Fatalf("Unexpected error: %v", err) } - topologyHints := p.GetPodTopologyHints(logger, s, testCase.pod) + topologyHints := p.GetPodTopologyHints(logger, s, testCase.pod, lifecycle.AddOperation) if !reflect.DeepEqual(topologyHints, testCase.expectedTopologyHints) { t.Fatalf("The actual topology hints: '%+v' are different from the expected one: '%+v'", topologyHints, testCase.expectedTopologyHints) } diff --git a/pkg/kubelet/cm/topologymanager/policy_test.go b/pkg/kubelet/cm/topologymanager/policy_test.go index fb94454b94703..f7dce8c1f5ef9 100644 --- a/pkg/kubelet/cm/topologymanager/policy_test.go +++ b/pkg/kubelet/cm/topologymanager/policy_test.go @@ -22,6 +22,7 @@ import ( "k8s.io/api/core/v1" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask" + "k8s.io/kubernetes/pkg/kubelet/lifecycle" "k8s.io/kubernetes/test/utils/ktesting" ) @@ -1279,7 +1280,7 @@ func testPolicyMerge(policy Policy, tcases []policyMergeTestCase, t *testing.T) for _, tc := range tcases { var providersHints []map[string][]TopologyHint for _, provider := range tc.hp { - hints := provider.GetTopologyHints(&v1.Pod{}, &v1.Container{}) + hints := provider.GetTopologyHints(&v1.Pod{}, &v1.Container{}, lifecycle.AddOperation) providersHints = append(providersHints, hints) } diff --git a/pkg/kubelet/cm/topologymanager/scope.go b/pkg/kubelet/cm/topologymanager/scope.go index ff34253df953c..e4c8388294a72 100644 --- a/pkg/kubelet/cm/topologymanager/scope.go +++ b/pkg/kubelet/cm/topologymanager/scope.go @@ -42,7 +42,7 @@ type podTopologyHints map[string]map[string]TopologyHint type Scope interface { Name() string GetPolicy() Policy - Admit(ctx context.Context, pod *v1.Pod) lifecycle.PodAdmitResult + Admit(ctx context.Context, pod *v1.Pod, operation lifecycle.Operation) lifecycle.PodAdmitResult // AddHintProvider adds a hint provider to manager to indicate the hint provider // wants to be consoluted with when making topology hints AddHintProvider(h HintProvider) @@ -140,9 +140,9 @@ func (s *scope) RemoveContainer(containerID string) error { return nil } -func (s *scope) admitPolicyNone(pod *v1.Pod) lifecycle.PodAdmitResult { +func (s *scope) admitPolicyNone(pod *v1.Pod, operation lifecycle.Operation) lifecycle.PodAdmitResult { for _, container := range append(pod.Spec.InitContainers, pod.Spec.Containers...) { - err := s.allocateAlignedResources(pod, &container) + err := s.allocateAlignedResources(pod, &container, operation) if err != nil { return admission.GetPodAdmitResult(err) } @@ -152,9 +152,9 @@ func (s *scope) admitPolicyNone(pod *v1.Pod) lifecycle.PodAdmitResult { // It would be better to implement this function in topologymanager instead of scope // but topologymanager do not track providers anymore -func (s *scope) allocateAlignedResources(pod *v1.Pod, container *v1.Container) error { +func (s *scope) allocateAlignedResources(pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) error { for _, provider := range s.hintProviders { - err := provider.Allocate(pod, container) + err := provider.Allocate(pod, container, operation) if err != nil { return err } diff --git a/pkg/kubelet/cm/topologymanager/scope_container.go b/pkg/kubelet/cm/topologymanager/scope_container.go index d8a68ec451558..db2e4ed54c056 100644 --- a/pkg/kubelet/cm/topologymanager/scope_container.go +++ b/pkg/kubelet/cm/topologymanager/scope_container.go @@ -46,12 +46,12 @@ func NewContainerScope(policy Policy) Scope { } } -func (s *containerScope) Admit(ctx context.Context, pod *v1.Pod) lifecycle.PodAdmitResult { +func (s *containerScope) Admit(ctx context.Context, pod *v1.Pod, operation lifecycle.Operation) lifecycle.PodAdmitResult { logger := klog.FromContext(ctx) for _, container := range append(pod.Spec.InitContainers, pod.Spec.Containers...) { - bestHint, admit := s.calculateAffinity(logger, pod, &container) - logger.Info("Best TopologyHint", "bestHint", bestHint, "pod", klog.KObj(pod), "containerName", container.Name) + bestHint, admit := s.calculateAffinity(logger, pod, &container, operation) + logger.Info("Best TopologyHint", "bestHint", bestHint, "pod", klog.KObj(pod), "containerName", container.Name, "operation", operation) if !admit { if IsAlignmentGuaranteed(s.policy) { @@ -60,10 +60,10 @@ func (s *containerScope) Admit(ctx context.Context, pod *v1.Pod) lifecycle.PodAd metrics.TopologyManagerAdmissionErrorsTotal.Inc() return admission.GetPodAdmitResult(&TopologyAffinityError{}) } - logger.Info("Topology Affinity", "bestHint", bestHint, "pod", klog.KObj(pod), "containerName", container.Name) + logger.Info("Topology Affinity", "bestHint", bestHint, "pod", klog.KObj(pod), "containerName", container.Name, "operation", operation) s.setTopologyHints(string(pod.UID), container.Name, bestHint) - err := s.allocateAlignedResources(pod, &container) + err := s.allocateAlignedResources(pod, &container, operation) if err != nil { metrics.TopologyManagerAdmissionErrorsTotal.Inc() return admission.GetPodAdmitResult(err) @@ -77,21 +77,21 @@ func (s *containerScope) Admit(ctx context.Context, pod *v1.Pod) lifecycle.PodAd return admission.GetPodAdmitResult(nil) } -func (s *containerScope) accumulateProvidersHints(logger klog.Logger, pod *v1.Pod, container *v1.Container) []map[string][]TopologyHint { +func (s *containerScope) accumulateProvidersHints(logger klog.Logger, pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) []map[string][]TopologyHint { var providersHints []map[string][]TopologyHint for _, provider := range s.hintProviders { // Get the TopologyHints for a Container from a provider. - hints := provider.GetTopologyHints(pod, container) + hints := provider.GetTopologyHints(pod, container, operation) providersHints = append(providersHints, hints) - logger.Info("TopologyHints", "hints", hints, "pod", klog.KObj(pod), "containerName", container.Name) + logger.Info("TopologyHints", "hints", hints, "pod", klog.KObj(pod), "containerName", container.Name, "operation", operation) } return providersHints } -func (s *containerScope) calculateAffinity(logger klog.Logger, pod *v1.Pod, container *v1.Container) (TopologyHint, bool) { - providersHints := s.accumulateProvidersHints(logger, pod, container) +func (s *containerScope) calculateAffinity(logger klog.Logger, pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) (TopologyHint, bool) { + providersHints := s.accumulateProvidersHints(logger, pod, container, operation) bestHint, admit := s.policy.Merge(logger, providersHints) - logger.Info("ContainerTopologyHint", "bestHint", bestHint, "pod", klog.KObj(pod), "containerName", container.Name) + logger.Info("ContainerTopologyHint", "bestHint", bestHint, "pod", klog.KObj(pod), "containerName", container.Name, "operation", operation) return bestHint, admit } diff --git a/pkg/kubelet/cm/topologymanager/scope_container_test.go b/pkg/kubelet/cm/topologymanager/scope_container_test.go index bb597169ef619..f44cf3b679938 100644 --- a/pkg/kubelet/cm/topologymanager/scope_container_test.go +++ b/pkg/kubelet/cm/topologymanager/scope_container_test.go @@ -21,6 +21,7 @@ import ( "testing" v1 "k8s.io/api/core/v1" + "k8s.io/kubernetes/pkg/kubelet/lifecycle" "k8s.io/kubernetes/test/utils/ktesting" ) @@ -133,7 +134,7 @@ func TestContainerCalculateAffinity(t *testing.T) { }, } - ctnScope.calculateAffinity(logger, &v1.Pod{}, &v1.Container{}) + ctnScope.calculateAffinity(logger, &v1.Pod{}, &v1.Container{}, lifecycle.AddOperation) actual := ctnScope.policy.(*mockPolicy).ph if !reflect.DeepEqual(tc.expected, actual) { t.Errorf("Test Case: %s", tc.name) @@ -265,7 +266,7 @@ func TestContainerAccumulateProvidersHints(t *testing.T) { hintProviders: tc.hp, }, } - actual := ctnScope.accumulateProvidersHints(logger, &v1.Pod{}, &v1.Container{}) + actual := ctnScope.accumulateProvidersHints(logger, &v1.Pod{}, &v1.Container{}, lifecycle.AddOperation) if !reflect.DeepEqual(actual, tc.expected) { t.Errorf("Test Case %s: Expected NUMANodeAffinity in result to be %v, got %v", tc.name, tc.expected, actual) } diff --git a/pkg/kubelet/cm/topologymanager/scope_none.go b/pkg/kubelet/cm/topologymanager/scope_none.go index 44f6c32158f1d..730ecd32dc4ef 100644 --- a/pkg/kubelet/cm/topologymanager/scope_none.go +++ b/pkg/kubelet/cm/topologymanager/scope_none.go @@ -43,6 +43,6 @@ func NewNoneScope() Scope { } } -func (s *noneScope) Admit(ctx context.Context, pod *v1.Pod) lifecycle.PodAdmitResult { - return s.admitPolicyNone(pod) +func (s *noneScope) Admit(ctx context.Context, pod *v1.Pod, operation lifecycle.Operation) lifecycle.PodAdmitResult { + return s.admitPolicyNone(pod, operation) } diff --git a/pkg/kubelet/cm/topologymanager/scope_pod.go b/pkg/kubelet/cm/topologymanager/scope_pod.go index 8498ccbd89a04..2fb583c389c19 100644 --- a/pkg/kubelet/cm/topologymanager/scope_pod.go +++ b/pkg/kubelet/cm/topologymanager/scope_pod.go @@ -46,10 +46,10 @@ func NewPodScope(policy Policy) Scope { } } -func (s *podScope) Admit(ctx context.Context, pod *v1.Pod) lifecycle.PodAdmitResult { +func (s *podScope) Admit(ctx context.Context, pod *v1.Pod, operation lifecycle.Operation) lifecycle.PodAdmitResult { logger := klog.FromContext(ctx) - bestHint, admit := s.calculateAffinity(logger, pod) + bestHint, admit := s.calculateAffinity(logger, pod, operation) logger.Info("Best TopologyHint", "bestHint", bestHint, "pod", klog.KObj(pod)) if !admit { if IsAlignmentGuaranteed(s.policy) { @@ -64,7 +64,7 @@ func (s *podScope) Admit(ctx context.Context, pod *v1.Pod) lifecycle.PodAdmitRes logger.Info("Topology Affinity", "bestHint", bestHint, "pod", klog.KObj(pod), "containerName", container.Name) s.setTopologyHints(string(pod.UID), container.Name, bestHint) - err := s.allocateAlignedResources(pod, &container) + err := s.allocateAlignedResources(pod, &container, operation) if err != nil { metrics.TopologyManagerAdmissionErrorsTotal.Inc() return admission.GetPodAdmitResult(err) @@ -78,21 +78,21 @@ func (s *podScope) Admit(ctx context.Context, pod *v1.Pod) lifecycle.PodAdmitRes return admission.GetPodAdmitResult(nil) } -func (s *podScope) accumulateProvidersHints(logger klog.Logger, pod *v1.Pod) []map[string][]TopologyHint { +func (s *podScope) accumulateProvidersHints(logger klog.Logger, pod *v1.Pod, operation lifecycle.Operation) []map[string][]TopologyHint { var providersHints []map[string][]TopologyHint for _, provider := range s.hintProviders { // Get the TopologyHints for a Pod from a provider. - hints := provider.GetPodTopologyHints(pod) + hints := provider.GetPodTopologyHints(pod, operation) providersHints = append(providersHints, hints) - logger.Info("TopologyHints", "hints", hints, "pod", klog.KObj(pod)) + logger.Info("TopologyHints", "hints", hints, "pod", klog.KObj(pod), "operation", operation) } return providersHints } -func (s *podScope) calculateAffinity(logger klog.Logger, pod *v1.Pod) (TopologyHint, bool) { - providersHints := s.accumulateProvidersHints(logger, pod) +func (s *podScope) calculateAffinity(logger klog.Logger, pod *v1.Pod, operation lifecycle.Operation) (TopologyHint, bool) { + providersHints := s.accumulateProvidersHints(logger, pod, operation) bestHint, admit := s.policy.Merge(logger, providersHints) - logger.Info("PodTopologyHint", "bestHint", bestHint, "pod", klog.KObj(pod)) + logger.Info("PodTopologyHint", "bestHint", bestHint, "pod", klog.KObj(pod), "operation", operation) return bestHint, admit } diff --git a/pkg/kubelet/cm/topologymanager/scope_pod_test.go b/pkg/kubelet/cm/topologymanager/scope_pod_test.go index c8d347f552f34..6408b26b731e9 100644 --- a/pkg/kubelet/cm/topologymanager/scope_pod_test.go +++ b/pkg/kubelet/cm/topologymanager/scope_pod_test.go @@ -21,6 +21,7 @@ import ( "testing" v1 "k8s.io/api/core/v1" + "k8s.io/kubernetes/pkg/kubelet/lifecycle" "k8s.io/kubernetes/test/utils/ktesting" ) @@ -133,7 +134,7 @@ func TestPodCalculateAffinity(t *testing.T) { }, } - podScope.calculateAffinity(logger, &v1.Pod{}) + podScope.calculateAffinity(logger, &v1.Pod{}, lifecycle.AddOperation) actual := podScope.policy.(*mockPolicy).ph if !reflect.DeepEqual(tc.expected, actual) { t.Errorf("Test Case: %s", tc.name) @@ -265,7 +266,7 @@ func TestPodAccumulateProvidersHints(t *testing.T) { hintProviders: tc.hp, }, } - actual := pScope.accumulateProvidersHints(logger, &v1.Pod{}) + actual := pScope.accumulateProvidersHints(logger, &v1.Pod{}, lifecycle.AddOperation) if !reflect.DeepEqual(actual, tc.expected) { t.Errorf("Test Case %s: Expected NUMANodeAffinity in result to be %v, got %v", tc.name, tc.expected, actual) } diff --git a/pkg/kubelet/cm/topologymanager/topology_manager.go b/pkg/kubelet/cm/topologymanager/topology_manager.go index 458efe178d9ef..463bd01066d8d 100644 --- a/pkg/kubelet/cm/topologymanager/topology_manager.go +++ b/pkg/kubelet/cm/topologymanager/topology_manager.go @@ -85,14 +85,14 @@ type HintProvider interface { // this function for each hint provider, and merges the hints to produce // a consensus "best" hint. The hint providers may subsequently query the // topology manager to influence actual resource assignment. - GetTopologyHints(pod *v1.Pod, container *v1.Container) map[string][]TopologyHint + GetTopologyHints(pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) map[string][]TopologyHint // GetPodTopologyHints returns a map of resource names to a list of possible // concrete resource allocations per Pod in terms of NUMA locality hints. - GetPodTopologyHints(pod *v1.Pod) map[string][]TopologyHint + GetPodTopologyHints(pod *v1.Pod, operation lifecycle.Operation) map[string][]TopologyHint // Allocate triggers resource allocation to occur on the HintProvider after // all hints have been gathered and the aggregated Hint is available via a // call to Store.GetAffinity(). - Allocate(pod *v1.Pod, container *v1.Container) error + Allocate(pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) error } // Store interface is to allow Hint Providers to retrieve pod affinity @@ -235,7 +235,7 @@ func (m *manager) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle.PodAdmitR metrics.TopologyManagerAdmissionRequestsTotal.Inc() startTime := time.Now() - podAdmitResult := m.scope.Admit(ctx, attrs.Pod) + podAdmitResult := m.scope.Admit(ctx, attrs.Pod, attrs.Operation) metrics.TopologyManagerAdmissionDuration.Observe(float64(time.Since(startTime).Milliseconds())) logger.V(4).Info("Pod Admit Result", "Message", podAdmitResult.Message, "pod", klog.KObj(attrs.Pod)) diff --git a/pkg/kubelet/cm/topologymanager/topology_manager_test.go b/pkg/kubelet/cm/topologymanager/topology_manager_test.go index 64cea9f7d3fc7..244d3af3ce83c 100644 --- a/pkg/kubelet/cm/topologymanager/topology_manager_test.go +++ b/pkg/kubelet/cm/topologymanager/topology_manager_test.go @@ -210,15 +210,15 @@ type mockHintProvider struct { //allocateError error } -func (m *mockHintProvider) GetTopologyHints(pod *v1.Pod, container *v1.Container) map[string][]TopologyHint { +func (m *mockHintProvider) GetTopologyHints(pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) map[string][]TopologyHint { return m.th } -func (m *mockHintProvider) GetPodTopologyHints(pod *v1.Pod) map[string][]TopologyHint { +func (m *mockHintProvider) GetPodTopologyHints(pod *v1.Pod, operation lifecycle.Operation) map[string][]TopologyHint { return m.th } -func (m *mockHintProvider) Allocate(pod *v1.Pod, container *v1.Container) error { +func (m *mockHintProvider) Allocate(pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) error { //return allocateError return nil } From 7111a54788dab19c0a701967e96b59c796cbd5c3 Mon Sep 17 00:00:00 2001 From: Sotiris Salloumis Date: Fri, 6 Mar 2026 05:37:28 +0100 Subject: [PATCH 10/15] Separate flow, put back of FG, infeasible resize on TopologyAffinityError Handle first review round, not ready yet --- pkg/kubelet/allocation/allocation_manager.go | 6 + pkg/kubelet/cm/cpumanager/cpu_assignment.go | 382 +++- .../cm/cpumanager/cpu_assignment_test.go | 22 +- pkg/kubelet/cm/cpumanager/fake_cpu_manager.go | 6 +- pkg/kubelet/cm/cpumanager/policy_static.go | 850 ++++--- .../cm/cpumanager/policy_static_test.go | 5 +- .../common/node/framework/podresize/resize.go | 2 +- test/e2e_node/cpu_manager_test.go | 1946 ++++++++++++----- 8 files changed, 2268 insertions(+), 951 deletions(-) diff --git a/pkg/kubelet/allocation/allocation_manager.go b/pkg/kubelet/allocation/allocation_manager.go index 50220da2589b4..e98800e5efa6c 100644 --- a/pkg/kubelet/allocation/allocation_manager.go +++ b/pkg/kubelet/allocation/allocation_manager.go @@ -36,6 +36,7 @@ import ( v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos" "k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/pkg/kubelet/allocation/state" + "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" "k8s.io/kubernetes/pkg/kubelet/config" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" "k8s.io/kubernetes/pkg/kubelet/events" @@ -592,6 +593,11 @@ func (m *manager) handlePodResourcesResize(logger klog.Logger, pod *v1.Pod) (boo } if reason != "" { + if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScalingExclusiveCPUs) { + if reason == topologymanager.ErrorTopologyAffinity { + reason = v1.PodReasonInfeasible + } + } if m.statusManager.SetPodResizePendingCondition(pod.UID, reason, message, pod.Generation) { eventType := events.ResizeDeferred if reason == v1.PodReasonInfeasible { diff --git a/pkg/kubelet/cm/cpumanager/cpu_assignment.go b/pkg/kubelet/cm/cpumanager/cpu_assignment.go index 7e23e488bf540..cb612052b7de4 100644 --- a/pkg/kubelet/cm/cpumanager/cpu_assignment.go +++ b/pkg/kubelet/cm/cpumanager/cpu_assignment.go @@ -451,7 +451,31 @@ type cpuAccumulator struct { availableCPUSorter availableCPUSorter } -func newCPUAccumulator(logger logr.Logger, topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, numCPUs int, cpuSortingStrategy CPUSortingStrategy, reusableCPUsForResize *cpuset.CPUSet, mustKeepCPUsForResize *cpuset.CPUSet) *cpuAccumulator { +func newCPUAccumulator(logger logr.Logger, topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, numCPUs int, cpuSortingStrategy CPUSortingStrategy) *cpuAccumulator { + acc := &cpuAccumulator{ + logger: logger, + topo: topo, + details: topo.CPUDetails.KeepOnly(availableCPUs), + numCPUsNeeded: numCPUs, + result: cpuset.New(), + } + + if topo.NumSockets >= topo.NumNUMANodes { + acc.numaOrSocketsFirst = &numaFirst{acc} + } else { + acc.numaOrSocketsFirst = &socketsFirst{acc} + } + + if cpuSortingStrategy == CPUSortingStrategyPacked { + acc.availableCPUSorter = &sortCPUsPacked{acc} + } else { + acc.availableCPUSorter = &sortCPUsSpread{acc} + } + + return acc +} + +func newCPUAccumulatorForResize(logger logr.Logger, topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, numCPUs int, cpuSortingStrategy CPUSortingStrategy, reusableCPUsForResize *cpuset.CPUSet, mustKeepCPUsForResize *cpuset.CPUSet) *cpuAccumulator { acc := &cpuAccumulator{ logger: logger, topo: topo, @@ -1128,17 +1152,8 @@ func (a *cpuAccumulator) iterateCombinations(n []int, k int, f func([]int) LoopC // the least amount of free CPUs to the one with the highest amount of free CPUs (i.e. in ascending // order of free CPUs). For any NUMA node, the cores are selected from the ones in the socket with // the least amount of free CPUs to the one with the highest amount of free CPUs. -func takeByTopologyNUMAPacked(logger logr.Logger, topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, numCPUs int, cpuSortingStrategy CPUSortingStrategy, preferAlignByUncoreCache bool, reusableCPUsForResize *cpuset.CPUSet, mustKeepCPUsForResize *cpuset.CPUSet) (cpuset.CPUSet, error) { - - // If the number of CPUs requested to be retained is not a subset - // of reusableCPUs, then we fail early - if reusableCPUsForResize != nil && mustKeepCPUsForResize != nil { - if (mustKeepCPUsForResize.Intersection(reusableCPUsForResize.Clone())).IsEmpty() { - return cpuset.New(), fmt.Errorf("requested CPUs to be retained %s are not a subset of reusable CPUs %s", mustKeepCPUsForResize.String(), reusableCPUsForResize.String()) - } - } - - acc := newCPUAccumulator(logger, topo, availableCPUs, numCPUs, cpuSortingStrategy, reusableCPUsForResize, mustKeepCPUsForResize) +func takeByTopologyNUMAPacked(logger logr.Logger, topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, numCPUs int, cpuSortingStrategy CPUSortingStrategy, preferAlignByUncoreCache bool) (cpuset.CPUSet, error) { + acc := newCPUAccumulator(logger, topo, availableCPUs, numCPUs, cpuSortingStrategy) if acc.isSatisfied() { return acc.result, nil } @@ -1151,18 +1166,10 @@ func takeByTopologyNUMAPacked(logger logr.Logger, topo *topology.CPUTopology, av // requires at least a NUMA node or socket's-worth of CPUs. If NUMA // Nodes map to 1 or more sockets, pull from NUMA nodes first. // Otherwise pull from sockets first. - acc.numaOrSocketsFirst.takeFullFirstLevelForResize() - if acc.isSatisfied() { - return acc.result, nil - } acc.numaOrSocketsFirst.takeFullFirstLevel() if acc.isSatisfied() { return acc.result, nil } - acc.numaOrSocketsFirst.takeFullSecondLevelForResize() - if acc.isSatisfied() { - return acc.result, nil - } acc.numaOrSocketsFirst.takeFullSecondLevel() if acc.isSatisfied() { return acc.result, nil @@ -1182,10 +1189,6 @@ func takeByTopologyNUMAPacked(logger logr.Logger, topo *topology.CPUTopology, av // a core's-worth of CPUs. // If `CPUSortingStrategySpread` is specified, skip taking the whole core. if cpuSortingStrategy != CPUSortingStrategySpread { - acc.takeRemainCpusForFullCores() - if acc.isSatisfied() { - return acc.result, nil - } acc.takeFullCores() if acc.isSatisfied() { return acc.result, nil @@ -1195,10 +1198,6 @@ func takeByTopologyNUMAPacked(logger logr.Logger, topo *topology.CPUTopology, av // 4. Acquire single threads, preferring to fill partially-allocated cores // on the same sockets as the whole cores we have already taken in this // allocation. - acc.takeRemainingCPUsForResize() - if acc.isSatisfied() { - return acc.result, nil - } acc.takeRemainingCPUs() if acc.isSatisfied() { return acc.result, nil @@ -1270,14 +1269,246 @@ func takeByTopologyNUMAPacked(logger logr.Logger, topo *topology.CPUTopology, av // of size 'cpuGroupSize' according to the algorithm described above. This is // important, for example, to ensure that all CPUs (i.e. all hyperthreads) from // a single core are allocated together. -func takeByTopologyNUMADistributed(logger logr.Logger, topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, numCPUs int, cpuGroupSize int, cpuSortingStrategy CPUSortingStrategy, reusableCPUsForResize *cpuset.CPUSet, mustKeepCPUsForResize *cpuset.CPUSet) (cpuset.CPUSet, error) { +func takeByTopologyNUMADistributed(logger logr.Logger, topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, numCPUs int, cpuGroupSize int, cpuSortingStrategy CPUSortingStrategy) (cpuset.CPUSet, error) { // If the number of CPUs requested cannot be handed out in chunks of // 'cpuGroupSize', then we just call out the packing algorithm since we // can't distribute CPUs in this chunk size. // PreferAlignByUncoreCache feature not implemented here yet and set to false. // Support for PreferAlignByUncoreCache to be done at beta release. if (numCPUs % cpuGroupSize) != 0 { - return takeByTopologyNUMAPacked(logger, topo, availableCPUs, numCPUs, cpuSortingStrategy, false, reusableCPUsForResize, mustKeepCPUsForResize) + return takeByTopologyNUMAPacked(logger, topo, availableCPUs, numCPUs, cpuSortingStrategy, false) + } + + // Otherwise build an accumulator to start allocating CPUs from. + acc := newCPUAccumulator(logger, topo, availableCPUs, numCPUs, cpuSortingStrategy) + if acc.isSatisfied() { + return acc.result, nil + } + if acc.isFailed() { + return cpuset.New(), fmt.Errorf("not enough cpus available to satisfy request: requested=%d, available=%d", numCPUs, availableCPUs.Size()) + } + + // Get the list of NUMA nodes represented by the set of CPUs in 'availableCPUs'. + numas := acc.sortAvailableNUMANodes() + + // Calculate the minimum and maximum possible number of NUMA nodes that + // could satisfy this request. This is used to optimize how many iterations + // of the loop we need to go through below. + minNUMAs, maxNUMAs := acc.rangeNUMANodesNeededToSatisfy(cpuGroupSize) + + // Try combinations of 1,2,3,... NUMA nodes until we find a combination + // where we can evenly distribute CPUs across them. To optimize things, we + // don't always start at 1 and end at len(numas). Instead, we use the + // values of 'minNUMAs' and 'maxNUMAs' calculated above. + for k := minNUMAs; k <= maxNUMAs; k++ { + // Iterate through the various n-choose-k NUMA node combinations, + // looking for the combination of NUMA nodes that can best have CPUs + // distributed across them. + var bestBalance float64 = math.MaxFloat64 + var bestRemainder []int = nil + var bestCombo []int = nil + acc.iterateCombinations(numas, k, func(combo []int) LoopControl { + // If we've already found a combo with a balance of 0 in a + // different iteration, then don't bother checking any others. + if bestBalance == 0 { + return Break + } + + // Check that this combination of NUMA nodes has enough CPUs to + // satisfy the allocation overall. + cpus := acc.details.CPUsInNUMANodes(combo...) + if cpus.Size() < numCPUs { + return Continue + } + + // Check that CPUs can be handed out in groups of size + // 'cpuGroupSize' across the NUMA nodes in this combo. + numCPUGroups := 0 + for _, numa := range combo { + numCPUGroups += (acc.details.CPUsInNUMANodes(numa).Size() / cpuGroupSize) + } + if (numCPUGroups * cpuGroupSize) < numCPUs { + return Continue + } + + // Check that each NUMA node in this combination can allocate an + // even distribution of CPUs in groups of size 'cpuGroupSize', + // modulo some remainder. + distribution := (numCPUs / len(combo) / cpuGroupSize) * cpuGroupSize + for _, numa := range combo { + cpus := acc.details.CPUsInNUMANodes(numa) + if cpus.Size() < distribution { + return Continue + } + } + + // Calculate how many CPUs will be available on each NUMA node in + // the system after allocating an even distribution of CPU groups + // of size 'cpuGroupSize' from each NUMA node in 'combo'. This will + // be used in the "balance score" calculation to help decide if + // this combo should ultimately be chosen. + availableAfterAllocation := make(mapIntInt, len(numas)) + for _, numa := range numas { + availableAfterAllocation[numa] = acc.details.CPUsInNUMANodes(numa).Size() + } + for _, numa := range combo { + availableAfterAllocation[numa] -= distribution + } + + // Check if there are any remaining CPUs to distribute across the + // NUMA nodes once CPUs have been evenly distributed in groups of + // size 'cpuGroupSize'. + remainder := numCPUs - (distribution * len(combo)) + + // Get a list of NUMA nodes to consider pulling the remainder CPUs + // from. This list excludes NUMA nodes that don't have at least + // 'cpuGroupSize' CPUs available after being allocated + // 'distribution' number of CPUs. + var remainderCombo []int + for _, numa := range combo { + if availableAfterAllocation[numa] >= cpuGroupSize { + remainderCombo = append(remainderCombo, numa) + } + } + + // Declare a set of local variables to help track the "balance + // scores" calculated when using different subsets of + // 'remainderCombo' to allocate remainder CPUs from. + var bestLocalBalance float64 = math.MaxFloat64 + var bestLocalRemainder []int = nil + + // If there aren't any remainder CPUs to allocate, then calculate + // the "balance score" of this combo as the standard deviation of + // the values contained in 'availableAfterAllocation'. + if remainder == 0 { + bestLocalBalance = standardDeviation(availableAfterAllocation.Values()) + bestLocalRemainder = nil + } + + // Otherwise, find the best "balance score" when allocating the + // remainder CPUs across different subsets of NUMA nodes in 'remainderCombo'. + // These remainder CPUs are handed out in groups of size 'cpuGroupSize'. + // We start from k=len(remainderCombo) and walk down to k=1 so that + // we continue to distribute CPUs as much as possible across + // multiple NUMA nodes. + for k := len(remainderCombo); remainder > 0 && k >= 1; k-- { + acc.iterateCombinations(remainderCombo, k, func(subset []int) LoopControl { + // Make a local copy of 'remainder'. + remainder := remainder + + // Make a local copy of 'availableAfterAllocation'. + availableAfterAllocation := availableAfterAllocation.Clone() + + // If this subset is not capable of allocating all + // remainder CPUs, continue to the next one. + if sum(availableAfterAllocation.Values(subset...)) < remainder { + return Continue + } + + // For all NUMA nodes in 'subset', walk through them, + // removing 'cpuGroupSize' number of CPUs from each + // until all remainder CPUs have been accounted for. + for remainder > 0 { + for _, numa := range subset { + if remainder == 0 { + break + } + if availableAfterAllocation[numa] < cpuGroupSize { + continue + } + availableAfterAllocation[numa] -= cpuGroupSize + remainder -= cpuGroupSize + } + } + + // Calculate the "balance score" as the standard deviation + // of the number of CPUs available on all NUMA nodes in the + // system after the remainder CPUs have been allocated + // across 'subset' in groups of size 'cpuGroupSize'. + balance := standardDeviation(availableAfterAllocation.Values()) + if balance < bestLocalBalance { + bestLocalBalance = balance + bestLocalRemainder = subset + } + + return Continue + }) + } + + // If the best "balance score" for this combo is less than the + // lowest "balance score" of all previous combos, then update this + // combo (and remainder set) to be the best one found so far. + if bestLocalBalance < bestBalance { + bestBalance = bestLocalBalance + bestRemainder = bestLocalRemainder + bestCombo = combo + } + + return Continue + }) + + // If we made it through all of the iterations above without finding a + // combination of NUMA nodes that can properly balance CPU allocations, + // then move on to the next larger set of NUMA node combinations. + if bestCombo == nil { + continue + } + + // Otherwise, start allocating CPUs from the NUMA node combination + // chosen. First allocate an even distribution of CPUs in groups of + // size 'cpuGroupSize' from 'bestCombo'. + distribution := (numCPUs / len(bestCombo) / cpuGroupSize) * cpuGroupSize + for _, numa := range bestCombo { + cpus, _ := takeByTopologyNUMAPacked(logger, acc.topo, acc.details.CPUsInNUMANodes(numa), distribution, cpuSortingStrategy, false) + acc.take(cpus) + } + + // Then allocate any remaining CPUs in groups of size 'cpuGroupSize' + // from each NUMA node in the remainder set. + remainder := numCPUs - (distribution * len(bestCombo)) + for remainder > 0 { + for _, numa := range bestRemainder { + if remainder == 0 { + break + } + if acc.details.CPUsInNUMANodes(numa).Size() < cpuGroupSize { + continue + } + cpus, _ := takeByTopologyNUMAPacked(logger, acc.topo, acc.details.CPUsInNUMANodes(numa), cpuGroupSize, cpuSortingStrategy, false) + acc.take(cpus) + remainder -= cpuGroupSize + } + } + + // If we haven't allocated all of our CPUs at this point, then something + // went wrong in our accounting and we should error out. + if acc.numCPUsNeeded > 0 { + return cpuset.New(), fmt.Errorf("accounting error, not enough CPUs allocated, remaining: %v", acc.numCPUsNeeded) + } + + // Likewise, if we have allocated too many CPUs at this point, then something + // went wrong in our accounting and we should error out. + if acc.numCPUsNeeded < 0 { + return cpuset.New(), fmt.Errorf("accounting error, too many CPUs allocated, remaining: %v", acc.numCPUsNeeded) + } + + // Otherwise, return the result + return acc.result, nil + } + + // If we never found a combination of NUMA nodes that we could properly + // distribute CPUs across, fall back to the packing algorithm. + return takeByTopologyNUMAPacked(logger, topo, availableCPUs, numCPUs, cpuSortingStrategy, false) +} + +func takeByTopologyNUMADistributedForResize(logger logr.Logger, topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, numCPUs int, cpuGroupSize int, cpuSortingStrategy CPUSortingStrategy, reusableCPUsForResize *cpuset.CPUSet, mustKeepCPUsForResize *cpuset.CPUSet) (cpuset.CPUSet, error) { + // If the number of CPUs requested cannot be handed out in chunks of + // 'cpuGroupSize', then we just call out the packing algorithm since we + // can't distribute CPUs in this chunk size. + // PreferAlignByUncoreCache feature not implemented here yet and set to false. + // Support for PreferAlignByUncoreCache to be done at beta release. + if (numCPUs % cpuGroupSize) != 0 { + return takeByTopologyNUMAPackedForResize(logger, topo, availableCPUs, numCPUs, cpuSortingStrategy, false, reusableCPUsForResize, mustKeepCPUsForResize) } // If the number of CPUs requested to be retained is not a subset @@ -1289,7 +1520,7 @@ func takeByTopologyNUMADistributed(logger logr.Logger, topo *topology.CPUTopolog } // Otherwise build an accumulator to start allocating CPUs from. - acc := newCPUAccumulator(logger, topo, availableCPUs, numCPUs, cpuSortingStrategy, nil, mustKeepCPUsForResize) + acc := newCPUAccumulatorForResize(logger, topo, availableCPUs, numCPUs, cpuSortingStrategy, nil, mustKeepCPUsForResize) if acc.isSatisfied() { return acc.result, nil } @@ -1325,7 +1556,7 @@ func takeByTopologyNUMADistributed(logger logr.Logger, topo *topology.CPUTopolog // Iterate through the various n-choose-k NUMA node combinations, // looking for the combination of NUMA nodes that can best have CPUs // distributed across them. - var bestBalance float64 = math.MaxFloat64 + var bestBalance = math.MaxFloat64 var bestRemainder []int = nil var bestCombo []int = nil acc.iterateCombinations(numas, k, func(combo []int) LoopControl { @@ -1404,7 +1635,7 @@ func takeByTopologyNUMADistributed(logger logr.Logger, topo *topology.CPUTopolog // Declare a set of local variables to help track the "balance // scores" calculated when using different subsets of // 'remainderCombo' to allocate remainder CPUs from. - var bestLocalBalance float64 = math.MaxFloat64 + var bestLocalBalance = math.MaxFloat64 var bestLocalRemainder []int = nil // If there aren't any remainder CPUs to allocate, then calculate @@ -1490,7 +1721,7 @@ func takeByTopologyNUMADistributed(logger logr.Logger, topo *topology.CPUTopolog distribution := (numCPUs / len(bestCombo) / cpuGroupSize) * cpuGroupSize for _, numa := range bestCombo { reusableCPUsPerNumaForResize := reusableCPUsForResizeDetail.CPUsInNUMANodes(numa) - cpus, _ := takeByTopologyNUMAPacked(logger, acc.topo, acc.details.CPUsInNUMANodes(numa), distribution, cpuSortingStrategy, false, &reusableCPUsPerNumaForResize, mustKeepCPUsForResize) + cpus, _ := takeByTopologyNUMAPackedForResize(logger, acc.topo, acc.details.CPUsInNUMANodes(numa), distribution, cpuSortingStrategy, false, &reusableCPUsPerNumaForResize, mustKeepCPUsForResize) acc.take(cpus) } @@ -1505,7 +1736,7 @@ func takeByTopologyNUMADistributed(logger logr.Logger, topo *topology.CPUTopolog if acc.details.CPUsInNUMANodes(numa).Size() < cpuGroupSize { continue } - cpus, _ := takeByTopologyNUMAPacked(logger, acc.topo, acc.details.CPUsInNUMANodes(numa), cpuGroupSize, cpuSortingStrategy, false, nil, mustKeepCPUsForResize) + cpus, _ := takeByTopologyNUMAPackedForResize(logger, acc.topo, acc.details.CPUsInNUMANodes(numa), cpuGroupSize, cpuSortingStrategy, false, nil, mustKeepCPUsForResize) acc.take(cpus) remainder -= cpuGroupSize } @@ -1529,5 +1760,84 @@ func takeByTopologyNUMADistributed(logger logr.Logger, topo *topology.CPUTopolog // If we never found a combination of NUMA nodes that we could properly // distribute CPUs across, fall back to the packing algorithm. - return takeByTopologyNUMAPacked(logger, topo, availableCPUs, numCPUs, cpuSortingStrategy, false, reusableCPUsForResize, mustKeepCPUsForResize) + return takeByTopologyNUMAPackedForResize(logger, topo, availableCPUs, numCPUs, cpuSortingStrategy, false, reusableCPUsForResize, mustKeepCPUsForResize) +} + +func takeByTopologyNUMAPackedForResize(logger logr.Logger, topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, numCPUs int, cpuSortingStrategy CPUSortingStrategy, preferAlignByUncoreCache bool, reusableCPUsForResize *cpuset.CPUSet, mustKeepCPUsForResize *cpuset.CPUSet) (cpuset.CPUSet, error) { + + // If the number of CPUs requested to be retained is not a subset + // of reusableCPUs, then we fail early + if reusableCPUsForResize != nil && mustKeepCPUsForResize != nil { + if (mustKeepCPUsForResize.Intersection(reusableCPUsForResize.Clone())).IsEmpty() { + return cpuset.New(), fmt.Errorf("requested CPUs to be retained %s are not a subset of reusable CPUs %s", mustKeepCPUsForResize.String(), reusableCPUsForResize.String()) + } + } + + acc := newCPUAccumulatorForResize(logger, topo, availableCPUs, numCPUs, cpuSortingStrategy, reusableCPUsForResize, mustKeepCPUsForResize) + if acc.isSatisfied() { + return acc.result, nil + } + if acc.isFailed() { + return cpuset.New(), fmt.Errorf("not enough cpus available to satisfy request: requested=%d, available=%d", numCPUs, availableCPUs.Size()) + } + + // Algorithm: topology-aware best-fit + // 1. Acquire whole NUMA nodes and sockets, if available and the container + // requires at least a NUMA node or socket's-worth of CPUs. If NUMA + // Nodes map to 1 or more sockets, pull from NUMA nodes first. + // Otherwise pull from sockets first. + acc.numaOrSocketsFirst.takeFullFirstLevelForResize() + if acc.isSatisfied() { + return acc.result, nil + } + acc.numaOrSocketsFirst.takeFullFirstLevel() + if acc.isSatisfied() { + return acc.result, nil + } + acc.numaOrSocketsFirst.takeFullSecondLevelForResize() + if acc.isSatisfied() { + return acc.result, nil + } + acc.numaOrSocketsFirst.takeFullSecondLevel() + if acc.isSatisfied() { + return acc.result, nil + } + + // 2. If PreferAlignByUncoreCache is enabled, acquire whole UncoreCaches + // if available and the container requires at least a UncoreCache's-worth + // of CPUs. Otherwise, acquire CPUs from the least amount of UncoreCaches. + if preferAlignByUncoreCache { + acc.takeUncoreCache() + if acc.isSatisfied() { + return acc.result, nil + } + } + + // 3. Acquire whole cores, if available and the container requires at least + // a core's-worth of CPUs. + // If `CPUSortingStrategySpread` is specified, skip taking the whole core. + if cpuSortingStrategy != CPUSortingStrategySpread { + acc.takeRemainCpusForFullCores() + if acc.isSatisfied() { + return acc.result, nil + } + acc.takeFullCores() + if acc.isSatisfied() { + return acc.result, nil + } + } + + // 4. Acquire single threads, preferring to fill partially-allocated cores + // on the same sockets as the whole cores we have already taken in this + // allocation. + acc.takeRemainingCPUsForResize() + if acc.isSatisfied() { + return acc.result, nil + } + acc.takeRemainingCPUs() + if acc.isSatisfied() { + return acc.result, nil + } + + return cpuset.New(), fmt.Errorf("failed to allocate cpus") } diff --git a/pkg/kubelet/cm/cpumanager/cpu_assignment_test.go b/pkg/kubelet/cm/cpumanager/cpu_assignment_test.go index f3c94066cf62e..fe8d629fa2e2d 100644 --- a/pkg/kubelet/cm/cpumanager/cpu_assignment_test.go +++ b/pkg/kubelet/cm/cpumanager/cpu_assignment_test.go @@ -116,7 +116,7 @@ func TestCPUAccumulatorFreeSockets(t *testing.T) { for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { - acc := newCPUAccumulator(logger, tc.topo, tc.availableCPUs, 0, CPUSortingStrategyPacked, nil, nil) + acc := newCPUAccumulator(logger, tc.topo, tc.availableCPUs, 0, CPUSortingStrategyPacked) result := acc.freeSockets() sort.Ints(result) if !reflect.DeepEqual(result, tc.expect) { @@ -217,7 +217,7 @@ func TestCPUAccumulatorFreeNUMANodes(t *testing.T) { for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { - acc := newCPUAccumulator(logger, tc.topo, tc.availableCPUs, 0, CPUSortingStrategyPacked, nil, nil) + acc := newCPUAccumulator(logger, tc.topo, tc.availableCPUs, 0, CPUSortingStrategyPacked) result := acc.freeNUMANodes() if !reflect.DeepEqual(result, tc.expect) { t.Errorf("expected %v to equal %v", result, tc.expect) @@ -267,7 +267,7 @@ func TestCPUAccumulatorFreeSocketsAndNUMANodes(t *testing.T) { for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { - acc := newCPUAccumulator(logger, tc.topo, tc.availableCPUs, 0, CPUSortingStrategyPacked, nil, nil) + acc := newCPUAccumulator(logger, tc.topo, tc.availableCPUs, 0, CPUSortingStrategyPacked) resultNUMANodes := acc.freeNUMANodes() if !reflect.DeepEqual(resultNUMANodes, tc.expectNUMANodes) { t.Errorf("expected NUMA Nodes %v to equal %v", resultNUMANodes, tc.expectNUMANodes) @@ -340,7 +340,7 @@ func TestCPUAccumulatorFreeCores(t *testing.T) { for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { - acc := newCPUAccumulator(logger, tc.topo, tc.availableCPUs, 0, CPUSortingStrategyPacked, nil, nil) + acc := newCPUAccumulator(logger, tc.topo, tc.availableCPUs, 0, CPUSortingStrategyPacked) result := acc.freeCores() if !reflect.DeepEqual(result, tc.expect) { t.Errorf("expected %v to equal %v", result, tc.expect) @@ -397,7 +397,7 @@ func TestCPUAccumulatorFreeCPUs(t *testing.T) { for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { - acc := newCPUAccumulator(logger, tc.topo, tc.availableCPUs, 0, CPUSortingStrategyPacked, nil, nil) + acc := newCPUAccumulator(logger, tc.topo, tc.availableCPUs, 0, CPUSortingStrategyPacked) result := acc.freeCPUs() if !reflect.DeepEqual(result, tc.expect) { t.Errorf("expected %v to equal %v", result, tc.expect) @@ -484,7 +484,7 @@ func TestCPUAccumulatorTake(t *testing.T) { for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { - acc := newCPUAccumulator(logger, tc.topo, tc.availableCPUs, tc.numCPUs, CPUSortingStrategyPacked, nil, nil) + acc := newCPUAccumulator(logger, tc.topo, tc.availableCPUs, tc.numCPUs, CPUSortingStrategyPacked) totalTaken := 0 for _, cpus := range tc.takeCPUs { acc.take(cpus) @@ -758,7 +758,7 @@ func TestTakeByTopologyNUMAPacked(t *testing.T) { strategy = CPUSortingStrategySpread } - result, err := takeByTopologyNUMAPacked(logger, tc.topo, tc.availableCPUs, tc.numCPUs, strategy, tc.opts.PreferAlignByUncoreCacheOption, nil, nil) + result, err := takeByTopologyNUMAPacked(logger, tc.topo, tc.availableCPUs, tc.numCPUs, strategy, tc.opts.PreferAlignByUncoreCacheOption) if tc.expErr != "" && err != nil && err.Error() != tc.expErr { t.Errorf("expected error to be [%v] but it was [%v]", tc.expErr, err) } @@ -860,7 +860,7 @@ func TestTakeByTopologyWithSpreadPhysicalCPUsPreferredOption(t *testing.T) { if tc.opts.DistributeCPUsAcrossCores { strategy = CPUSortingStrategySpread } - result, err := takeByTopologyNUMAPacked(logger, tc.topo, tc.availableCPUs, tc.numCPUs, strategy, tc.opts.PreferAlignByUncoreCacheOption, nil, nil) + result, err := takeByTopologyNUMAPacked(logger, tc.topo, tc.availableCPUs, tc.numCPUs, strategy, tc.opts.PreferAlignByUncoreCacheOption) if tc.expErr != "" && err.Error() != tc.expErr { t.Errorf("testCase %q failed, expected error to be [%v] but it was [%v]", tc.description, tc.expErr, err) } @@ -1063,7 +1063,7 @@ func TestTakeByTopologyNUMADistributed(t *testing.T) { for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { - result, err := takeByTopologyNUMADistributed(logger, tc.topo, tc.availableCPUs, tc.numCPUs, tc.cpuGroupSize, CPUSortingStrategyPacked, nil, nil) + result, err := takeByTopologyNUMADistributed(logger, tc.topo, tc.availableCPUs, tc.numCPUs, tc.cpuGroupSize, CPUSortingStrategyPacked) if err != nil { if tc.expErr == "" { t.Errorf("unexpected error [%v]", err) @@ -1327,7 +1327,7 @@ func TestTakeByTopologyNUMAPackedForResize(t *testing.T) { strategy = CPUSortingStrategySpread } - result, err := takeByTopologyNUMAPacked(logger, tc.topo, tc.availableCPUs, tc.numCPUs, strategy, tc.opts.PreferAlignByUncoreCacheOption, &tc.reusableCPUs, nil) + result, err := takeByTopologyNUMAPackedForResize(logger, tc.topo, tc.availableCPUs, tc.numCPUs, strategy, tc.opts.PreferAlignByUncoreCacheOption, &tc.reusableCPUs, nil) if tc.expErr != "" && err != nil && err.Error() != tc.expErr { t.Errorf("expected error to be [%v] but it was [%v]", tc.expErr, err) @@ -1532,7 +1532,7 @@ func TestTakeByTopologyNUMADistributedForResize(t *testing.T) { for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { - result, err := takeByTopologyNUMADistributed(logger, tc.topo, tc.availableCPUs, tc.numCPUs, tc.cpuGroupSize, CPUSortingStrategyPacked, &tc.reusableCPUs, nil) + result, err := takeByTopologyNUMADistributedForResize(logger, tc.topo, tc.availableCPUs, tc.numCPUs, tc.cpuGroupSize, CPUSortingStrategyPacked, &tc.reusableCPUs, nil) if err != nil { if tc.expErr == "" { t.Errorf("unexpected error [%v]", err) diff --git a/pkg/kubelet/cm/cpumanager/fake_cpu_manager.go b/pkg/kubelet/cm/cpumanager/fake_cpu_manager.go index 40608edd40c64..a074db11ca293 100644 --- a/pkg/kubelet/cm/cpumanager/fake_cpu_manager.go +++ b/pkg/kubelet/cm/cpumanager/fake_cpu_manager.go @@ -50,7 +50,7 @@ func (m *fakeManager) Policy() Policy { func (m *fakeManager) Allocate(pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) error { logger := klog.TODO() - logger.Info("Allocate", "pod", klog.KObj(pod), "containerName", container.Name) + logger.Info("Allocate", "pod", klog.KObj(pod), "containerName", container.Name, "operation", operation) return nil } @@ -65,13 +65,13 @@ func (m *fakeManager) RemoveContainer(logger logr.Logger, containerID string) er func (m *fakeManager) GetTopologyHints(pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint { logger := klog.TODO() - logger.Info("Get container topology hints") + logger.Info("Get container topology hints", "operation", operation) return map[string][]topologymanager.TopologyHint{} } func (m *fakeManager) GetPodTopologyHints(pod *v1.Pod, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint { logger := klog.TODO() - logger.Info("Get pod topology hints") + logger.Info("Get pod topology hints", "operation", operation) return map[string][]topologymanager.TopologyHint{} } diff --git a/pkg/kubelet/cm/cpumanager/policy_static.go b/pkg/kubelet/cm/cpumanager/policy_static.go index f712a240a5a70..1cf11df76c77b 100644 --- a/pkg/kubelet/cm/cpumanager/policy_static.go +++ b/pkg/kubelet/cm/cpumanager/policy_static.go @@ -227,8 +227,6 @@ type staticPolicy struct { affinity topologymanager.Store // set of CPUs to reuse across allocations in a pod cpusToReuse map[string]cpuset.CPUSet - // set of CPUs to reuse during pod resize - cpusToReuseDuringResize map[string]cpuset.CPUSet // options allow to fine-tune the behaviour of the policy options StaticPolicyOptions // we compute this value multiple time, and it's not supposed to change @@ -256,12 +254,11 @@ func NewStaticPolicy(logger logr.Logger, topology *topology.CPUTopology, numRese logger.Info("created with configuration", "options", opts, "cpuGroupSize", cpuGroupSize) policy := &staticPolicy{ - topology: topology, - affinity: affinity, - cpusToReuse: make(map[string]cpuset.CPUSet), - options: opts, - cpuGroupSize: cpuGroupSize, - cpusToReuseDuringResize: make(map[string]cpuset.CPUSet), + topology: topology, + affinity: affinity, + cpusToReuse: make(map[string]cpuset.CPUSet), + options: opts, + cpuGroupSize: cpuGroupSize, } allCPUs := topology.CPUDetails.CPUs() @@ -274,7 +271,7 @@ func NewStaticPolicy(logger logr.Logger, topology *topology.CPUTopology, numRese // // For example: Given a system with 8 CPUs available and HT enabled, // if numReservedCPUs=2, then reserved={0,4} - reserved, _ = policy.takeByTopology(logger, allCPUs, numReservedCPUs, nil, nil) + reserved, _ = policy.takeByTopology(logger, allCPUs, numReservedCPUs) } if reserved.Size() != numReservedCPUs { @@ -350,17 +347,10 @@ func (p *staticPolicy) validateState(logger logr.Logger, s state.State) error { // 2. Check if state for static policy is consistent for pod := range tmpAssignments { for container, assignment := range tmpAssignments[pod] { - var cset cpuset.CPUSet - if assignment.Resized.IsEmpty() { - cset = assignment.Original - } else { - cset = assignment.Resized - } - // None of the cpu in DEFAULT cset should be in s.assignments - if !tmpDefaultCPUset.Intersection(cset).IsEmpty() { + if !tmpDefaultCPUset.Intersection(getCPUSetFromAssignment(assignment)).IsEmpty() { return fmt.Errorf("pod: %s, container: %s cpuset: %q overlaps with default cpuset %q", - pod, container, cset.String(), tmpDefaultCPUset.String()) + pod, container, getCPUSetFromAssignment(assignment).String(), tmpDefaultCPUset.String()) } } } @@ -376,13 +366,7 @@ func (p *staticPolicy) validateState(logger logr.Logger, s state.State) error { tmpCPUSets := []cpuset.CPUSet{} for pod := range tmpAssignments { for _, assignment := range tmpAssignments[pod] { - var cset cpuset.CPUSet - if assignment.Resized.IsEmpty() { - cset = assignment.Original - } else { - cset = assignment.Resized - } - tmpCPUSets = append(tmpCPUSets, cset) + tmpCPUSets = append(tmpCPUSets, getCPUSetFromAssignment(assignment)) } } totalKnownCPUs = totalKnownCPUs.Union(tmpCPUSets...) @@ -444,246 +428,240 @@ func (p *staticPolicy) Allocate(logger logr.Logger, s state.State, pod *v1.Pod, logger.Info("Allocate start") // V=0 for backward compatibility defer logger.V(2).Info("Allocate end") - if operation == lifecycle.AddOperation { - - numCPUs := p.guaranteedCPUs(logger, pod, container) - - if numCPUs == 0 { - // container belongs in the shared pool (nothing to do; use default cpuset) + switch operation { + case lifecycle.AddOperation: + return p.allocateForAdd(logger, s, pod, container) + case lifecycle.ResizeOperation: + if !utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScalingExclusiveCPUs) || !utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { + logger.Info("CPU Manager allocation resize operation skipped, InPlacePodVerticalScaling and/or InPlacePodVerticalScalingExclusiveCPUs not enabled") return nil } - - if utilfeature.DefaultFeatureGate.Enabled(features.PodLevelResources) && resourcehelper.IsPodLevelResourcesSet(pod) { - logger.Info("CPU Manager allocation skipped, pod is using pod-level resources which are not supported by the static CPU manager policy") - return nil + return p.allocateForResize(logger, s, pod, container) + default: + return UnsupportedLifecycleOperationError{ + Operation: operation, } + } +} + +func (p *staticPolicy) allocateForAdd(logger logr.Logger, s state.State, pod *v1.Pod, container *v1.Container) (rerr error) { + numCPUs := p.guaranteedCPUs(logger, pod, container) + if numCPUs == 0 { + // container belongs in the shared pool (nothing to do; use default cpuset) + return nil + } - logger.Info("Static policy: Allocate") + if utilfeature.DefaultFeatureGate.Enabled(features.PodLevelResources) && resourcehelper.IsPodLevelResourcesSet(pod) { + logger.Info("CPU Manager allocation skipped, pod is using pod-level resources which are not supported by the static CPU manager policy") + return nil + } - // container belongs in an exclusively allocated pool - metrics.CPUManagerPinningRequestsTotal.Inc() - defer func() { - if rerr != nil { - metrics.CPUManagerPinningErrorsTotal.Inc() - if p.options.FullPhysicalCPUsOnly { - metrics.ContainerAlignedComputeResourcesFailure.WithLabelValues(metrics.AlignScopeContainer, metrics.AlignedPhysicalCPU).Inc() - } - return - } - // TODO: move in updateMetricsOnAllocate + // container belongs in an exclusively allocated pool + metrics.CPUManagerPinningRequestsTotal.Inc() + defer func() { + if rerr != nil { + metrics.CPUManagerPinningErrorsTotal.Inc() if p.options.FullPhysicalCPUsOnly { - // increment only if we know we allocate aligned resources - metrics.ContainerAlignedComputeResources.WithLabelValues(metrics.AlignScopeContainer, metrics.AlignedPhysicalCPU).Inc() + metrics.ContainerAlignedComputeResourcesFailure.WithLabelValues(metrics.AlignScopeContainer, metrics.AlignedPhysicalCPU).Inc() } - }() - + return + } + // TODO: move in updateMetricsOnAllocate if p.options.FullPhysicalCPUsOnly { - if (numCPUs % p.cpuGroupSize) != 0 { - // Since CPU Manager has been enabled requesting strict SMT alignment, it means a guaranteed pod can only be admitted - // if the CPU requested is a multiple of the number of virtual cpus per physical cores. - // In case CPU request is not a multiple of the number of virtual cpus per physical cores the Pod will be put - // in Failed state, with SMTAlignmentError as reason. Since the allocation happens in terms of physical cores - // and the scheduler is responsible for ensuring that the workload goes to a node that has enough CPUs, - // the pod would be placed on a node where there are enough physical cores available to be allocated. - // Just like the behaviour in case of static policy, takeByTopology will try to first allocate CPUs from the same socket - // and only in case the request cannot be sattisfied on a single socket, CPU allocation is done for a workload to occupy all - // CPUs on a physical core. Allocation of individual threads would never have to occur. - return SMTAlignmentError{ - RequestedCPUs: numCPUs, - CpusPerCore: p.cpuGroupSize, - CausedByPhysicalCPUs: false, - } - } - - availablePhysicalCPUs := p.GetAvailablePhysicalCPUs(s).Size() - - // It's legal to reserve CPUs which are not core siblings. In this case the CPU allocator can descend to single cores - // when picking CPUs. This will void the guarantee of FullPhysicalCPUsOnly. To prevent this, we need to additionally consider - // all the core siblings of the reserved CPUs as unavailable when computing the free CPUs, before to start the actual allocation. - // This way, by construction all possible CPUs allocation whose number is multiple of the SMT level are now correct again. - if numCPUs > availablePhysicalCPUs { - return SMTAlignmentError{ - RequestedCPUs: numCPUs, - CpusPerCore: p.cpuGroupSize, - AvailablePhysicalCPUs: availablePhysicalCPUs, - CausedByPhysicalCPUs: true, - } - } + // increment only if we know we allocate aligned resources + metrics.ContainerAlignedComputeResources.WithLabelValues(metrics.AlignScopeContainer, metrics.AlignedPhysicalCPU).Inc() } - if cset, ok := s.GetCPUSet(string(pod.UID), container.Name); ok { - p.updateCPUsToReuse(pod, container, cset) - logger.Info("Static policy: container already present in state, skipping") - return nil + }() + + if p.options.FullPhysicalCPUsOnly { + if (numCPUs % p.cpuGroupSize) != 0 { + // Since CPU Manager has been enabled requesting strict SMT alignment, it means a guaranteed pod can only be admitted + // if the CPU requested is a multiple of the number of virtual cpus per physical cores. + // In case CPU request is not a multiple of the number of virtual cpus per physical cores the Pod will be put + // in Failed state, with SMTAlignmentError as reason. Since the allocation happens in terms of physical cores + // and the scheduler is responsible for ensuring that the workload goes to a node that has enough CPUs, + // the pod would be placed on a node where there are enough physical cores available to be allocated. + // Just like the behaviour in case of static policy, takeByTopology will try to first allocate CPUs from the same socket + // and only in case the request cannot be sattisfied on a single socket, CPU allocation is done for a workload to occupy all + // CPUs on a physical core. Allocation of individual threads would never have to occur. + return SMTAlignmentError{ + RequestedCPUs: numCPUs, + CpusPerCore: p.cpuGroupSize, + CausedByPhysicalCPUs: false, + } } - // Call Topology Manager to get the aligned socket affinity across all hint providers. - hint := p.affinity.GetAffinity(string(pod.UID), container.Name) - logger.Info("Topology Affinity", "affinity", hint) - - // Allocate CPUs according to the NUMA affinity contained in the hint. - cpuAllocation, err := p.allocateCPUs(logger, s, numCPUs, hint.NUMANodeAffinity, p.cpusToReuse[string(pod.UID)], nil, nil) - if err != nil { - logger.Error(err, "Unable to allocate CPUs", "numCPUs", numCPUs) - return err + availablePhysicalCPUs := p.GetAvailablePhysicalCPUs(s).Size() + + // It's legal to reserve CPUs which are not core siblings. In this case the CPU allocator can descend to single cores + // when picking CPUs. This will void the guarantee of FullPhysicalCPUsOnly. To prevent this, we need to additionally consider + // all the core siblings of the reserved CPUs as unavailable when computing the free CPUs, before to start the actual allocation. + // This way, by construction all possible CPUs allocation whose number is multiple of the SMT level are now correct again. + if numCPUs > availablePhysicalCPUs { + return SMTAlignmentError{ + RequestedCPUs: numCPUs, + CpusPerCore: p.cpuGroupSize, + AvailablePhysicalCPUs: availablePhysicalCPUs, + CausedByPhysicalCPUs: true, + } } - - s.SetCPUSet(string(pod.UID), container.Name, cpuAllocation.CPUs) - p.updateCPUsToReuse(pod, container, cpuAllocation.CPUs) - p.updateMetricsOnAllocate(logger, s, cpuAllocation) - - logger.V(4).Info("Allocated exclusive CPUs", "cpuset", cpuAllocation.CPUs.String()) + } + if cset, ok := s.GetCPUSet(string(pod.UID), container.Name); ok { + p.updateCPUsToReuse(pod, container, cset) + logger.Info("Static policy: container already present in state, skipping") return nil + } + + // Call Topology Manager to get the aligned socket affinity across all hint providers. + hint := p.affinity.GetAffinity(string(pod.UID), container.Name) + logger.Info("Topology Affinity", "affinity", hint) + // Allocate CPUs according to the NUMA affinity contained in the hint. + cpuAllocation, err := p.allocateCPUs(logger, s, numCPUs, hint.NUMANodeAffinity, p.cpusToReuse[string(pod.UID)]) + if err != nil { + logger.Error(err, "Unable to allocate CPUs", "numCPUs", numCPUs) + return err } - if operation == lifecycle.ResizeOperation { + s.SetCPUSet(string(pod.UID), container.Name, cpuAllocation.CPUs) + p.updateCPUsToReuse(pod, container, cpuAllocation.CPUs) + p.updateMetricsOnAllocate(logger, s, cpuAllocation) - numCPUs := p.guaranteedCPUs(logger, pod, container) - if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { - // During a pod resize, handle corner cases - err := p.isFeasibleResize(logger, s, pod, container) - if err != nil { - logger.Error(err, "Static policy: Unfeasible to resize allocated CPUs,", "pod", klog.KObj(pod), "containerName", container.Name, "numCPUs", numCPUs) - return err - } - } + logger.V(4).Info("Allocated exclusive CPUs", "cpuset", cpuAllocation.CPUs.String()) + return nil +} - if numCPUs == 0 { - // container belongs in the shared pool (nothing to do; use default cpuset) - return nil - } +func (p *staticPolicy) allocateForResize(logger logr.Logger, s state.State, pod *v1.Pod, container *v1.Container) (rerr error) { + numCPUs := p.guaranteedCPUs(logger, pod, container) + // During a pod resize, handle corner cases + err := p.isFeasibleResize(logger, s, pod, container) + if err != nil { + logger.Error(err, "Static policy: Unfeasible to resize allocated CPUs,", "pod", klog.KObj(pod), "containerName", container.Name, "numCPUs", numCPUs) + return err + } - if utilfeature.DefaultFeatureGate.Enabled(features.PodLevelResources) && resourcehelper.IsPodLevelResourcesSet(pod) { - logger.Info("CPU Manager allocation skipped, pod is using pod-level resources which are not supported by the static CPU manager policy") - return nil - } + if numCPUs == 0 { + // container belongs in the shared pool (nothing to do; use default cpuset) + return nil + } - logger.Info("Static policy: Allocate") + if utilfeature.DefaultFeatureGate.Enabled(features.PodLevelResources) && resourcehelper.IsPodLevelResourcesSet(pod) { + logger.Info("CPU Manager allocation skipped, pod is using pod-level resources which are not supported by the static CPU manager policy") + return nil + } - // container belongs in an exclusively allocated pool - metrics.CPUManagerPinningRequestsTotal.Inc() - defer func() { - if rerr != nil { - metrics.CPUManagerPinningErrorsTotal.Inc() - if p.options.FullPhysicalCPUsOnly { - metrics.ContainerAlignedComputeResourcesFailure.WithLabelValues(metrics.AlignScopeContainer, metrics.AlignedPhysicalCPU).Inc() - } - return - } - // TODO: move in updateMetricsOnAllocate + // container belongs in an exclusively allocated pool + logger.Info("Increasing metric") + metrics.CPUManagerPinningRequestsTotal.Inc() + defer func() { + if rerr != nil { + metrics.CPUManagerPinningErrorsTotal.Inc() if p.options.FullPhysicalCPUsOnly { - // increment only if we know we allocate aligned resources - metrics.ContainerAlignedComputeResources.WithLabelValues(metrics.AlignScopeContainer, metrics.AlignedPhysicalCPU).Inc() + metrics.ContainerAlignedComputeResourcesFailure.WithLabelValues(metrics.AlignScopeContainer, metrics.AlignedPhysicalCPU).Inc() } - }() - + return + } + // TODO: move in updateMetricsOnAllocate if p.options.FullPhysicalCPUsOnly { - if (numCPUs % p.cpuGroupSize) != 0 { - // Since CPU Manager has been enabled requesting strict SMT alignment, it means a guaranteed pod can only be admitted - // if the CPU requested is a multiple of the number of virtual cpus per physical cores. - // In case CPU request is not a multiple of the number of virtual cpus per physical cores the Pod will be put - // in Failed state, with SMTAlignmentError as reason. Since the allocation happens in terms of physical cores - // and the scheduler is responsible for ensuring that the workload goes to a node that has enough CPUs, - // the pod would be placed on a node where there are enough physical cores available to be allocated. - // Just like the behaviour in case of static policy, takeByTopology will try to first allocate CPUs from the same socket - // and only in case the request cannot be sattisfied on a single socket, CPU allocation is done for a workload to occupy all - // CPUs on a physical core. Allocation of individual threads would never have to occur. - return SMTAlignmentError{ - RequestedCPUs: numCPUs, - CpusPerCore: p.cpuGroupSize, - CausedByPhysicalCPUs: false, - } + // increment only if we know we allocate aligned resources + metrics.ContainerAlignedComputeResources.WithLabelValues(metrics.AlignScopeContainer, metrics.AlignedPhysicalCPU).Inc() + } + }() + + if p.options.FullPhysicalCPUsOnly { + if (numCPUs % p.cpuGroupSize) != 0 { + // Since CPU Manager has been enabled requesting strict SMT alignment, it means a guaranteed pod can only be admitted + // if the CPU requested is a multiple of the number of virtual cpus per physical cores. + // In case CPU request is not a multiple of the number of virtual cpus per physical cores the Pod will be put + // in Failed state, with SMTAlignmentError as reason. Since the allocation happens in terms of physical cores + // and the scheduler is responsible for ensuring that the workload goes to a node that has enough CPUs, + // the pod would be placed on a node where there are enough physical cores available to be allocated. + // Just like the behaviour in case of static policy, takeByTopology will try to first allocate CPUs from the same socket + // and only in case the request cannot be sattisfied on a single socket, CPU allocation is done for a workload to occupy all + // CPUs on a physical core. Allocation of individual threads would never have to occur. + return SMTAlignmentError{ + RequestedCPUs: numCPUs, + CpusPerCore: p.cpuGroupSize, + CausedByPhysicalCPUs: false, } + } - availablePhysicalCPUs := p.GetAvailablePhysicalCPUs(s).Size() + availablePhysicalCPUs := p.GetAvailablePhysicalCPUs(s).Size() - if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { - if cs, found := podutil.GetContainerStatus(pod.Status.ContainerStatuses, container.Name); found { - cpuAllocatedQuantity := cs.AllocatedResources[v1.ResourceCPU] - availablePhysicalCPUs += int(cpuAllocatedQuantity.Value()) - } - } - // It's legal to reserve CPUs which are not core siblings. In this case the CPU allocator can descend to single cores - // when picking CPUs. This will void the guarantee of FullPhysicalCPUsOnly. To prevent this, we need to additionally consider - // all the core siblings of the reserved CPUs as unavailable when computing the free CPUs, before to start the actual allocation. - // This way, by construction all possible CPUs allocation whose number is multiple of the SMT level are now correct again. - if numCPUs > availablePhysicalCPUs { - return SMTAlignmentError{ - RequestedCPUs: numCPUs, - CpusPerCore: p.cpuGroupSize, - AvailablePhysicalCPUs: availablePhysicalCPUs, - CausedByPhysicalCPUs: true, - } - } + if cs, found := podutil.GetContainerStatus(pod.Status.ContainerStatuses, container.Name); found { + cpuAllocatedQuantity := cs.AllocatedResources[v1.ResourceCPU] + availablePhysicalCPUs += int(cpuAllocatedQuantity.Value()) } - if cpusInUseByPodContainer, ok := s.GetCPUSet(string(pod.UID), container.Name); ok { - if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScalingExclusiveCPUs) && utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { - logger.Info("Static policy: container already present in state, attempting InPlacePodVerticalScaling", "pod", klog.KObj(pod), "containerName", container.Name) - // Call Topology Manager to get the aligned socket affinity across all hint providers. - hint := p.affinity.GetAffinity(string(pod.UID), container.Name) - logger.Info("Topology Affinity", "pod", klog.KObj(pod), "containerName", container.Name, "affinity", hint) - // Attempt new allocation ( reusing allocated CPUs ) according to the NUMA affinity contained in the hint - // Since NUMA affinity container in the hint is unmutable already allocated CPUs pass the criteria - mustKeepCPUsForResize, ok := s.GetOriginalCPUSet(string(pod.UID), container.Name) - if !ok { - err := getOriginalCPUSetError{ - PodUID: string(pod.UID), - ContainerName: container.Name, - } - return err - } - // Allocate CPUs according to the NUMA affinity contained in the hint. - newallocatedcpuset, witherr := p.allocateCPUs(logger, s, numCPUs, hint.NUMANodeAffinity, p.cpusToReuse[string(pod.UID)], &cpusInUseByPodContainer, &mustKeepCPUsForResize) - if witherr != nil { - err := ResizeAllocateCPUsError{ - PodUID: string(pod.UID), - ContainerName: container.Name, - TopologyError: witherr.Error(), - } - return err - } - - // Allocation successful, update the current state - s.SetCPUSet(string(pod.UID), container.Name, newallocatedcpuset.CPUs) - p.updateCPUsToReuse(pod, container, newallocatedcpuset.CPUs) - p.updateMetricsOnAllocate(logger, s, newallocatedcpuset) - logger.Info("Allocated exclusive CPUs after InPlacePodVerticalScaling attempt", "pod", klog.KObj(pod), "containerName", container.Name, "cpuset", newallocatedcpuset.CPUs.String()) - // Updated state to the checkpoint file will be stored during - // the reconcile loop. TODO is this a problem? I don't believe - // because if kubelet will be terminated now, anyhow it will be - // needed the state to be cleaned up, an error will appear requiring - // the node to be drained. I think we are safe. All computations are - // using state_mem and not the checkpoint. - return nil - } else { - p.updateCPUsToReuse(pod, container, cpusInUseByPodContainer) - logger.Info("Static policy: container already present in state, skipping", "pod", klog.KObj(pod), "containerName", container.Name) - return nil + // It's legal to reserve CPUs which are not core siblings. In this case the CPU allocator can descend to single cores + // when picking CPUs. This will void the guarantee of FullPhysicalCPUsOnly. To prevent this, we need to additionally consider + // all the core siblings of the reserved CPUs as unavailable when computing the free CPUs, before to start the actual allocation. + // This way, by construction all possible CPUs allocation whose number is multiple of the SMT level are now correct again. + if numCPUs > availablePhysicalCPUs { + return SMTAlignmentError{ + RequestedCPUs: numCPUs, + CpusPerCore: p.cpuGroupSize, + AvailablePhysicalCPUs: availablePhysicalCPUs, + CausedByPhysicalCPUs: true, } } - + } + if cpusInUseByPodContainer, ok := s.GetCPUSet(string(pod.UID), container.Name); ok { + logger.Info("Static policy: container already present in state, attempting InPlacePodVerticalScaling", "pod", klog.KObj(pod), "containerName", container.Name) // Call Topology Manager to get the aligned socket affinity across all hint providers. hint := p.affinity.GetAffinity(string(pod.UID), container.Name) - logger.Info("Topology Affinity", "affinity", hint) - + logger.Info("Topology Affinity", "pod", klog.KObj(pod), "containerName", container.Name, "affinity", hint) + // Attempt new allocation ( reusing allocated CPUs ) according to the NUMA affinity contained in the hint + // Since NUMA affinity container in the hint is unmutable already allocated CPUs pass the criteria + mustKeepCPUsForResize, ok := s.GetOriginalCPUSet(string(pod.UID), container.Name) + if !ok { + err := getOriginalCPUSetError{ + PodUID: string(pod.UID), + ContainerName: container.Name, + } + return err + } // Allocate CPUs according to the NUMA affinity contained in the hint. - cpuAllocation, err := p.allocateCPUs(logger, s, numCPUs, hint.NUMANodeAffinity, p.cpusToReuse[string(pod.UID)], nil, nil) - if err != nil { - logger.Error(err, "Unable to allocate CPUs", "numCPUs", numCPUs) + newallocatedcpuset, witherr := p.allocateCPUsForResize(logger, s, numCPUs, hint.NUMANodeAffinity, p.cpusToReuse[string(pod.UID)], &cpusInUseByPodContainer, &mustKeepCPUsForResize) + if witherr != nil { + err := ResizeAllocateCPUsError{ + PodUID: string(pod.UID), + ContainerName: container.Name, + TopologyError: witherr.Error(), + } return err } - s.SetCPUSet(string(pod.UID), container.Name, cpuAllocation.CPUs) - p.updateCPUsToReuse(pod, container, cpuAllocation.CPUs) - p.updateMetricsOnAllocate(logger, s, cpuAllocation) - - logger.V(4).Info("Allocated exclusive CPUs", "cpuset", cpuAllocation.CPUs.String()) + // Allocation successful, update the current state + s.SetCPUSet(string(pod.UID), container.Name, newallocatedcpuset.CPUs) + p.updateCPUsToReuse(pod, container, newallocatedcpuset.CPUs) + p.updateMetricsOnAllocate(logger, s, newallocatedcpuset) + logger.Info("Allocated exclusive CPUs after InPlacePodVerticalScaling attempt", "pod", klog.KObj(pod), "containerName", container.Name, "cpuset", newallocatedcpuset.CPUs.String()) + // Updated state to the checkpoint file will be stored during + // the reconcile loop. TODO is this a problem? I don't believe + // because if kubelet will be terminated now, anyhow it will be + // needed the state to be cleaned up, an error will appear requiring + // the node to be drained. I think we are safe. All computations are + // using state_mem and not the checkpoint. return nil } - return UnsupportedLifecycleOperationError{ - Operation: operation, + + // Call Topology Manager to get the aligned socket affinity across all hint providers. + hint := p.affinity.GetAffinity(string(pod.UID), container.Name) + logger.Info("Topology Affinity", "affinity", hint) + + // Allocate CPUs according to the NUMA affinity contained in the hint. + cpuAllocation, err := p.allocateCPUsForResize(logger, s, numCPUs, hint.NUMANodeAffinity, p.cpusToReuse[string(pod.UID)], nil, nil) + if err != nil { + logger.Error(err, "Unable to allocate CPUs", "numCPUs", numCPUs) + return err } + s.SetCPUSet(string(pod.UID), container.Name, cpuAllocation.CPUs) + p.updateCPUsToReuse(pod, container, cpuAllocation.CPUs) + p.updateMetricsOnAllocate(logger, s, cpuAllocation) + + logger.V(4).Info("Allocated exclusive CPUs", "cpuset", cpuAllocation.CPUs.String()) + return nil } // getAssignedCPUsOfSiblings returns assigned cpus of given container's siblings(all containers other than the given container) in the given pod `podUID`. @@ -694,11 +672,7 @@ func getAssignedCPUsOfSiblings(s state.State, podUID string, containerName strin if containerName == name { continue } - if assignment.Resized.IsEmpty() { - cset = cset.Union(assignment.Original) - } else { - cset = cset.Union(assignment.Resized) - } + cset = cset.Union(getCPUSetFromAssignment(assignment)) } return cset } @@ -722,7 +696,7 @@ func (p *staticPolicy) RemoveContainer(logger logr.Logger, s state.State, podUID return nil } -func (p *staticPolicy) allocateCPUs(logger logr.Logger, s state.State, numCPUs int, numaAffinity bitmask.BitMask, reusableCPUs cpuset.CPUSet, reusableCPUsForResize *cpuset.CPUSet, mustKeepCPUsForResize *cpuset.CPUSet) (topology.Allocation, error) { +func (p *staticPolicy) allocateCPUsForResize(logger logr.Logger, s state.State, numCPUs int, numaAffinity bitmask.BitMask, reusableCPUs cpuset.CPUSet, reusableCPUsForResize *cpuset.CPUSet, mustKeepCPUsForResize *cpuset.CPUSet) (topology.Allocation, error) { logger.Info("AllocateCPUs", "numCPUs", numCPUs, "socket", numaAffinity) allocatableCPUs := cpuset.New() @@ -751,11 +725,11 @@ func (p *staticPolicy) allocateCPUs(logger logr.Logger, s state.State, numCPUs i alignedCPUs := p.getAlignedCPUs(numaAffinity, allocatableCPUs) numAlignedToAlloc := alignedCPUs.Size() - if numCPUs < numAlignedToAlloc { + if min(numCPUs, numAlignedToAlloc) == numCPUs { numAlignedToAlloc = numCPUs } - allocatedCPUs, err := p.takeByTopology(logger, alignedCPUs, numAlignedToAlloc, reusableCPUsForResize, mustKeepCPUsForResize) + allocatedCPUs, err := p.takeByTopologyForResize(logger, alignedCPUs, numAlignedToAlloc, reusableCPUsForResize, mustKeepCPUsForResize) if err != nil { return topology.EmptyAllocation(), err } @@ -765,7 +739,7 @@ func (p *staticPolicy) allocateCPUs(logger logr.Logger, s state.State, numCPUs i if numCPUs > result.CPUs.Size() { // Get any remaining CPUs from what's leftover after attempting to grab aligned ones. - remainingCPUs, err := p.takeByTopology(logger, allocatableCPUs.Difference(result.CPUs), numCPUs-result.CPUs.Size(), reusableCPUsForResize, mustKeepCPUsForResize) + remainingCPUs, err := p.takeByTopologyForResize(logger, allocatableCPUs.Difference(result.CPUs), numCPUs-result.CPUs.Size(), reusableCPUsForResize, mustKeepCPUsForResize) if err != nil { return topology.EmptyAllocation(), err } @@ -796,6 +770,44 @@ func (p *staticPolicy) allocateCPUs(logger logr.Logger, s state.State, numCPUs i return result, nil } +func (p *staticPolicy) allocateCPUs(logger logr.Logger, s state.State, numCPUs int, numaAffinity bitmask.BitMask, reusableCPUs cpuset.CPUSet) (topology.Allocation, error) { + logger.Info("AllocateCPUs", "numCPUs", numCPUs, "socket", numaAffinity) + + allocatableCPUs := p.GetAvailableCPUs(s).Union(reusableCPUs) + + // If there are aligned CPUs in numaAffinity, attempt to take those first. + result := topology.EmptyAllocation() + if numaAffinity != nil { + alignedCPUs := p.getAlignedCPUs(numaAffinity, allocatableCPUs) + + numAlignedToAlloc := alignedCPUs.Size() + if numCPUs < numAlignedToAlloc { + numAlignedToAlloc = numCPUs + } + + allocatedCPUs, err := p.takeByTopology(logger, alignedCPUs, numAlignedToAlloc) + if err != nil { + return topology.EmptyAllocation(), err + } + + result.CPUs = result.CPUs.Union(allocatedCPUs) + } + + // Get any remaining CPUs from what's leftover after attempting to grab aligned ones. + remainingCPUs, err := p.takeByTopology(logger, allocatableCPUs.Difference(result.CPUs), numCPUs-result.CPUs.Size()) + if err != nil { + return topology.EmptyAllocation(), err + } + result.CPUs = result.CPUs.Union(remainingCPUs) + result.Aligned = p.topology.CheckAlignment(result.CPUs) + + // Remove allocated CPUs from the shared CPUSet. + s.SetDefaultCPUSet(s.GetDefaultCPUSet().Difference(result.CPUs)) + + logger.Info("AllocateCPUs", "result", result.String()) + return result, nil +} + func (p *staticPolicy) guaranteedCPUs(logger logr.Logger, pod *v1.Pod, container *v1.Container) int { qos := v1qos.GetPodQOS(pod) if qos != v1.PodQOSGuaranteed { @@ -848,25 +860,7 @@ func (p *staticPolicy) podGuaranteedCPUs(logger logr.Logger, pod *v1.Pod) int { return requestedByLongRunningContainers } -func (p *staticPolicy) takeByTopology(logger logr.Logger, availableCPUs cpuset.CPUSet, numCPUs int, reusableCPUsForResize *cpuset.CPUSet, mustKeepCPUsForResize *cpuset.CPUSet) (cpuset.CPUSet, error) { - - // Protect against CPU leaks by failing early - if mustKeepCPUsForResize != nil { - if !mustKeepCPUsForResize.IsSubsetOf(availableCPUs) { - return cpuset.New(), fmt.Errorf("requested CPUs to be retained %s are not a subset of available CPUs %s", mustKeepCPUsForResize.String(), availableCPUs.String()) - } - } - if reusableCPUsForResize != nil { - if !reusableCPUsForResize.IsSubsetOf(availableCPUs) { - return cpuset.New(), fmt.Errorf("reusable CPUs %s are not a subset of available CPUs %s", reusableCPUsForResize.String(), availableCPUs.String()) - } - } - if reusableCPUsForResize != nil && mustKeepCPUsForResize != nil { - if !mustKeepCPUsForResize.IsSubsetOf(reusableCPUsForResize.Clone()) { - return cpuset.New(), fmt.Errorf("requested CPUs to be retained %s are not a subset of reusable CPUs %s", mustKeepCPUsForResize.String(), reusableCPUsForResize.String()) - } - } - +func (p *staticPolicy) takeByTopology(logger logr.Logger, availableCPUs cpuset.CPUSet, numCPUs int) (cpuset.CPUSet, error) { cpuSortingStrategy := CPUSortingStrategyPacked if p.options.DistributeCPUsAcrossCores { cpuSortingStrategy = CPUSortingStrategySpread @@ -877,15 +871,31 @@ func (p *staticPolicy) takeByTopology(logger logr.Logger, availableCPUs cpuset.C if p.options.FullPhysicalCPUsOnly { cpuGroupSize = p.cpuGroupSize } - return takeByTopologyNUMADistributed(logger, p.topology, availableCPUs, numCPUs, cpuGroupSize, cpuSortingStrategy, reusableCPUsForResize, mustKeepCPUsForResize) + return takeByTopologyNUMADistributed(logger, p.topology, availableCPUs, numCPUs, cpuGroupSize, cpuSortingStrategy) } - return takeByTopologyNUMAPacked(logger, p.topology, availableCPUs, numCPUs, cpuSortingStrategy, p.options.PreferAlignByUncoreCacheOption, reusableCPUsForResize, mustKeepCPUsForResize) + return takeByTopologyNUMAPacked(logger, p.topology, availableCPUs, numCPUs, cpuSortingStrategy, p.options.PreferAlignByUncoreCacheOption) } func (p *staticPolicy) GetTopologyHints(logger logr.Logger, s state.State, pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint { - logger = klog.LoggerWithValues(logger, "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", container.Name) + logger = klog.LoggerWithValues(logger, "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", container.Name, "operation", operation) + switch operation { + case lifecycle.AddOperation: + return p.getTopologyHintsForAdd(logger, s, pod, container) + case lifecycle.ResizeOperation: + if !utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScalingExclusiveCPUs) || !utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { + logger.V(3).Info("CPU Manager hint generation skipped, resize operation not supported by the static CPU manager policy, InPlacePodVerticalScaling and/or InPlacePodVerticalScalingExclusiveCPUs are not enabled", "pod", klog.KObj(pod), "podUID", pod.UID) + return nil + } + return p.getTopologyHintsForResize(logger, s, pod, container) + default: + logger.V(3).Info("CPU Manager hint generation skipped, operation not supported by the static CPU manager policy", "pod", klog.KObj(pod), "podUID", pod.UID) + return nil + } +} +func (p *staticPolicy) getTopologyHintsForAdd(logger logr.Logger, s state.State, pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint { + logger = klog.LoggerWithValues(logger, "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", container.Name) // Get a count of how many guaranteed CPUs have been requested. requested := p.guaranteedCPUs(logger, pod, container) @@ -902,33 +912,88 @@ func (p *staticPolicy) GetTopologyHints(logger logr.Logger, s state.State, pod * return nil } - reusable := cpuset.New() - // Short circuit to regenerate the same hints if there are already // guaranteed CPUs allocated to the Container. This might happen after a // kubelet restart, for example. if allocated, exists := s.GetCPUSet(string(pod.UID), container.Name); exists { if allocated.Size() != requested { - if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScalingExclusiveCPUs) && utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { - if allocated.Size() < requested { - reusable = reusable.Union(allocated) - } else { - reusable = allocated - - // Get a list of reusable CPUs (e.g. CPUs reused from initContainers). - // It should be an empty CPUSet for a newly created pod. - reusable = reusable.Union(p.cpusToReuse[string(pod.UID)]) - - // Generate hints. - cpuHints := p.generateCPUTopologyHints(cpuset.New(), reusable, requested) - logger.Info("TopologyHints generated", "pod", klog.KObj(pod), "containerName", container.Name, "cpuHints", cpuHints) - - return map[string][]topologymanager.TopologyHint{ - string(v1.ResourceCPU): cpuHints, - } - } - } else { - logger.Info("CPUs already allocated to container with different number than request", "requestedSize", requested, "allocatedSize", allocated.Size()) + logger.Info("CPUs already allocated to container with different number than request", "requestedSize", requested, "allocatedSize", allocated.Size()) + // An empty list of hints will be treated as a preference that cannot be satisfied. + // In definition of hints this is equal to: TopologyHint[NUMANodeAffinity: nil, Preferred: false]. + // For all but the best-effort policy, the Topology Manager will throw a pod-admission error. + return map[string][]topologymanager.TopologyHint{ + string(v1.ResourceCPU): {}, + } + } + logger.Info("Regenerating TopologyHints for CPUs already allocated") + return map[string][]topologymanager.TopologyHint{ + string(v1.ResourceCPU): p.generateCPUTopologyHints(allocated, cpuset.New(), requested), + } + } + + // Get a list of available CPUs. + available := p.GetAvailableCPUs(s) + + // Get a list of reusable CPUs (e.g. CPUs reused from initContainers). + // It should be an empty CPUSet for a newly created pod. + reusable := p.cpusToReuse[string(pod.UID)] + + // Generate hints. + cpuHints := p.generateCPUTopologyHints(available, reusable, requested) + logger.Info("TopologyHints generated", "cpuHints", cpuHints) + + return map[string][]topologymanager.TopologyHint{ + string(v1.ResourceCPU): cpuHints, + } +} + +func (p *staticPolicy) GetPodTopologyHints(logger logr.Logger, s state.State, pod *v1.Pod, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint { + logger = klog.LoggerWithValues(logger, "pod", klog.KObj(pod), "podUID", pod.UID, "operation", operation) + switch operation { + case lifecycle.AddOperation: + return p.getPodTopologyHintsForAdd(logger, s, pod) + case lifecycle.ResizeOperation: + if !utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScalingExclusiveCPUs) || !utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { + logger.V(3).Info("CPU Manager hint generation skipped, resize operation not supported by the static CPU manager policy, InPlacePodVerticalScaling and/or InPlacePodVerticalScalingExclusiveCPUs are not enabled", "pod", klog.KObj(pod), "podUID", pod.UID) + return nil + } + return p.getPodTopologyHintsForResize(logger, s, pod) + default: + logger.V(3).Info("CPU Manager hint generation skipped, operation not supported by the static CPU manager policy", "pod", klog.KObj(pod), "podUID", pod.UID) + return nil + } +} + +func (p *staticPolicy) getPodTopologyHintsForAdd(logger logr.Logger, s state.State, pod *v1.Pod) map[string][]topologymanager.TopologyHint { + logger = klog.LoggerWithValues(logger, "pod", klog.KObj(pod), "podUID", pod.UID) + + // Get a count of how many guaranteed CPUs have been requested by Pod. + requested := p.podGuaranteedCPUs(logger, pod) + + // Number of required CPUs is not an integer or a pod is not part of the Guaranteed QoS class. + // It will be treated by the TopologyManager as having no preference and cause it to ignore this + // resource when considering pod alignment. + // In terms of hints, this is equal to: TopologyHints[NUMANodeAffinity: nil, Preferred: true]. + if requested == 0 { + return nil + } + + if utilfeature.DefaultFeatureGate.Enabled(features.PodLevelResources) && resourcehelper.IsPodLevelResourcesSet(pod) { + logger.V(3).Info("CPU Manager pod hint generation skipped, pod is using pod-level resources which are not supported by the static CPU manager policy") + return nil + } + + assignedCPUs := cpuset.New() + for _, container := range append(pod.Spec.InitContainers, pod.Spec.Containers...) { + logger_ := klog.LoggerWithValues(logger, "containerName", container.Name) + + requestedByContainer := p.guaranteedCPUs(logger, pod, &container) + // Short circuit to regenerate the same hints if there are already + // guaranteed CPUs allocated to the Container. This might happen after a + // kubelet restart, for example. + if allocated, exists := s.GetCPUSet(string(pod.UID), container.Name); exists { + if allocated.Size() != requestedByContainer { + logger_.Info("CPUs already allocated to container with different number than request", "allocatedSize", requested, "requestedByContainer", requestedByContainer, "allocatedSize", allocated.Size()) // An empty list of hints will be treated as a preference that cannot be satisfied. // In definition of hints this is equal to: TopologyHint[NUMANodeAffinity: nil, Preferred: false]. // For all but the best-effort policy, the Topology Manager will throw a pod-admission error. @@ -936,11 +1001,14 @@ func (p *staticPolicy) GetTopologyHints(logger logr.Logger, s state.State, pod * string(v1.ResourceCPU): {}, } } - } else { - logger.Info("Regenerating TopologyHints for CPUs already allocated", "pod", klog.KObj(pod), "containerName", container.Name) - return map[string][]topologymanager.TopologyHint{ - string(v1.ResourceCPU): p.generateCPUTopologyHints(allocated, cpuset.New(), requested), - } + // A set of CPUs already assigned to containers in this pod + assignedCPUs = assignedCPUs.Union(allocated) + } + } + if assignedCPUs.Size() == requested { + logger.Info("Regenerating TopologyHints for CPUs already allocated") + return map[string][]topologymanager.TopologyHint{ + string(v1.ResourceCPU): p.generateCPUTopologyHints(assignedCPUs, cpuset.New(), requested), } } @@ -949,7 +1017,10 @@ func (p *staticPolicy) GetTopologyHints(logger logr.Logger, s state.State, pod * // Get a list of reusable CPUs (e.g. CPUs reused from initContainers). // It should be an empty CPUSet for a newly created pod. - reusable = reusable.Union(p.cpusToReuse[string(pod.UID)]) + reusable := p.cpusToReuse[string(pod.UID)] + + // Ensure any CPUs already assigned to containers in this pod are included as part of the hint generation. + reusable = reusable.Union(assignedCPUs) // Generate hints. cpuHints := p.generateCPUTopologyHints(available, reusable, requested) @@ -960,7 +1031,7 @@ func (p *staticPolicy) GetTopologyHints(logger logr.Logger, s state.State, pod * } } -func (p *staticPolicy) GetPodTopologyHints(logger logr.Logger, s state.State, pod *v1.Pod, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint { +func (p *staticPolicy) getPodTopologyHintsForResize(logger logr.Logger, s state.State, pod *v1.Pod) map[string][]topologymanager.TopologyHint { logger = klog.LoggerWithValues(logger, "pod", klog.KObj(pod), "podUID", pod.UID) // Get a count of how many guaranteed CPUs have been requested by Pod. @@ -990,13 +1061,11 @@ func (p *staticPolicy) GetPodTopologyHints(logger logr.Logger, s state.State, po if allocated, exists := s.GetCPUSet(string(pod.UID), container.Name); exists { if allocated.Size() != requestedByContainer { logger_.Info("CPUs already allocated to container with different number than request", "allocatedSize", requested, "requestedByContainer", requestedByContainer, "allocatedSize", allocated.Size()) - if !utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScalingExclusiveCPUs) || !utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { - // An empty list of hints will be treated as a preference that cannot be satisfied. - // In definition of hints this is equal to: TopologyHint[NUMANodeAffinity: nil, Preferred: false]. - // For all but the best-effort policy, the Topology Manager will throw a pod-admission error. - return map[string][]topologymanager.TopologyHint{ - string(v1.ResourceCPU): {}, - } + // An empty list of hints will be treated as a preference that cannot be satisfied. + // In definition of hints this is equal to: TopologyHint[NUMANodeAffinity: nil, Preferred: false]. + // For all but the best-effort policy, the Topology Manager will throw a pod-admission error. + return map[string][]topologymanager.TopologyHint{ + string(v1.ResourceCPU): {}, } } // A set of CPUs already assigned to containers in this pod @@ -1027,6 +1096,7 @@ func (p *staticPolicy) GetPodTopologyHints(logger logr.Logger, s state.State, po return map[string][]topologymanager.TopologyHint{ string(v1.ResourceCPU): cpuHints, } + } // generateCPUTopologyHints generates a set of TopologyHints given the set of @@ -1093,12 +1163,6 @@ func (p *staticPolicy) generateCPUTopologyHints(availableCPUs cpuset.CPUSet, reu if hints[i].NUMANodeAffinity.Count() == minAffinitySize { hints[i].Preferred = true } - - if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScalingExclusiveCPUs) && utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { - if hints[i].NUMANodeAffinity.Count() == request { - hints[i].Preferred = true - } - } } return hints @@ -1173,13 +1237,7 @@ func getTotalAssignedExclusiveCPUs(s state.State) cpuset.CPUSet { totalAssignedCPUs := cpuset.New() for _, assignment := range s.GetCPUAssignments() { for _, assignment := range assignment { - var cset cpuset.CPUSet - if assignment.Resized.IsEmpty() { - cset = assignment.Original - } else { - cset = assignment.Resized - } - totalAssignedCPUs = totalAssignedCPUs.Union(cset) + totalAssignedCPUs = totalAssignedCPUs.Union(getCPUSetFromAssignment(assignment)) } } return totalAssignedCPUs @@ -1270,3 +1328,185 @@ func (p *staticPolicy) isFeasibleResize(logger logr.Logger, s state.State, pod * } return nil } + +// generateCPUTopologyHintsForResize generates a set of TopologyHints given the set of +// available CPUs and the number of CPUs being requested. +// +// It follows the convention of marking all hints that have the same number of +// bits set as the narrowest matching NUMANodeAffinity with 'Preferred: true', and +// marking all others with 'Preferred: false'. +func (p *staticPolicy) generateCPUTopologyHintsForResize(availableCPUs cpuset.CPUSet, reusableCPUs cpuset.CPUSet, request int) []topologymanager.TopologyHint { + // Initialize minAffinitySize to include all NUMA Nodes. + minAffinitySize := p.topology.CPUDetails.NUMANodes().Size() + + // Iterate through all combinations of numa nodes bitmask and build hints from them. + hints := []topologymanager.TopologyHint{} + bitmask.IterateBitMasks(p.topology.CPUDetails.NUMANodes().List(), func(mask bitmask.BitMask) { + // First, update minAffinitySize for the current request size. + cpusInMask := p.topology.CPUDetails.CPUsInNUMANodes(mask.GetBits()...).Size() + if cpusInMask >= request && mask.Count() < minAffinitySize { + minAffinitySize = mask.Count() + } + + // Then check to see if we have enough CPUs available on the current + // numa node bitmask to satisfy the CPU request. + numMatching := 0 + for _, c := range reusableCPUs.List() { + // Disregard this mask if its NUMANode isn't part of it. + if !mask.IsSet(p.topology.CPUDetails[c].NUMANodeID) { + return + } + numMatching++ + } + + // Finally, check to see if enough available CPUs remain on the current + // NUMA node combination to satisfy the CPU request. + for _, c := range availableCPUs.List() { + if mask.IsSet(p.topology.CPUDetails[c].NUMANodeID) { + numMatching++ + } + } + + // If they don't, then move onto the next combination. + if numMatching < request { + return + } + + // Otherwise, create a new hint from the numa node bitmask and add it to the + // list of hints. We set all hint preferences to 'false' on the first + // pass through. + hints = append(hints, topologymanager.TopologyHint{ + NUMANodeAffinity: mask, + Preferred: false, + }) + }) + + // Loop back through all hints and update the 'Preferred' field based on + // counting the number of bits sets in the affinity mask and comparing it + // to the minAffinitySize. Only those with an equal number of bits set (and + // with a minimal set of numa nodes) will be considered preferred. + for i := range hints { + if p.options.AlignBySocket && p.isHintSocketAligned(hints[i], minAffinitySize) { + hints[i].Preferred = true + continue + } + if hints[i].NUMANodeAffinity.Count() == minAffinitySize { + hints[i].Preferred = true + } + } + + return hints +} + +func (p *staticPolicy) takeByTopologyForResize(logger logr.Logger, availableCPUs cpuset.CPUSet, numCPUs int, reusableCPUsForResize *cpuset.CPUSet, mustKeepCPUsForResize *cpuset.CPUSet) (cpuset.CPUSet, error) { + + // Protect against CPU leaks by failing early + if mustKeepCPUsForResize != nil { + if !mustKeepCPUsForResize.IsSubsetOf(availableCPUs) { + return cpuset.New(), fmt.Errorf("requested CPUs to be retained %s are not a subset of available CPUs %s", mustKeepCPUsForResize.String(), availableCPUs.String()) + } + } + if reusableCPUsForResize != nil { + if !reusableCPUsForResize.IsSubsetOf(availableCPUs) { + return cpuset.New(), fmt.Errorf("reusable CPUs %s are not a subset of available CPUs %s", reusableCPUsForResize.String(), availableCPUs.String()) + } + } + if reusableCPUsForResize != nil && mustKeepCPUsForResize != nil { + if !mustKeepCPUsForResize.IsSubsetOf(reusableCPUsForResize.Clone()) { + return cpuset.New(), fmt.Errorf("requested CPUs to be retained %s are not a subset of reusable CPUs %s", mustKeepCPUsForResize.String(), reusableCPUsForResize.String()) + } + } + + cpuSortingStrategy := CPUSortingStrategyPacked + if p.options.DistributeCPUsAcrossCores { + cpuSortingStrategy = CPUSortingStrategySpread + } + + if p.options.DistributeCPUsAcrossNUMA { + cpuGroupSize := 1 + if p.options.FullPhysicalCPUsOnly { + cpuGroupSize = p.cpuGroupSize + } + return takeByTopologyNUMADistributedForResize(logger, p.topology, availableCPUs, numCPUs, cpuGroupSize, cpuSortingStrategy, reusableCPUsForResize, mustKeepCPUsForResize) + } + + return takeByTopologyNUMAPackedForResize(logger, p.topology, availableCPUs, numCPUs, cpuSortingStrategy, p.options.PreferAlignByUncoreCacheOption, reusableCPUsForResize, mustKeepCPUsForResize) +} + +func (p *staticPolicy) getTopologyHintsForResize(logger logr.Logger, s state.State, pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint { + // Get a count of how many guaranteed CPUs have been requested. + requested := p.guaranteedCPUs(logger, pod, container) + + // Number of required CPUs is not an integer or a container is not part of the Guaranteed QoS class. + // It will be treated by the TopologyManager as having no preference and cause it to ignore this + // resource when considering pod alignment. + // In terms of hints, this is equal to: TopologyHints[NUMANodeAffinity: nil, Preferred: true]. + if requested == 0 { + return nil + } + + if utilfeature.DefaultFeatureGate.Enabled(features.PodLevelResources) && resourcehelper.IsPodLevelResourcesSet(pod) { + logger.V(3).Info("CPU Manager hint generation skipped, pod is using pod-level resources which are not supported by the static CPU manager policy", "pod", klog.KObj(pod), "podUID", pod.UID) + return nil + } + + reusable := cpuset.New() + + // Short circuit to regenerate the same hints if there are already + // guaranteed CPUs allocated to the Container. This might happen after a + // kubelet restart, for example. + if allocated, exists := s.GetCPUSet(string(pod.UID), container.Name); exists { + if allocated.Size() != requested { + if allocated.Size() < requested { + reusable = reusable.Union(allocated) + } else { + reusable = allocated + + // Get a list of reusable CPUs (e.g. CPUs reused from initContainers). + // It should be an empty CPUSet for a newly created pod. + reusable = reusable.Union(p.cpusToReuse[string(pod.UID)]) + + // Generate hints. + cpuHints := p.generateCPUTopologyHintsForResize(cpuset.New(), reusable, requested) + logger.Info("TopologyHints generated", "pod", klog.KObj(pod), "containerName", container.Name, "cpuHints", cpuHints) + + return map[string][]topologymanager.TopologyHint{ + string(v1.ResourceCPU): cpuHints, + } + } + } else { + logger.Info("Regenerating TopologyHints for CPUs already allocated", "pod", klog.KObj(pod), "containerName", container.Name) + return map[string][]topologymanager.TopologyHint{ + string(v1.ResourceCPU): p.generateCPUTopologyHintsForResize(allocated, cpuset.New(), requested), + } + } + } + + // Get a list of available CPUs. + available := p.GetAvailableCPUs(s) + + // Get a list of reusable CPUs (e.g. CPUs reused from initContainers). + // It should be an empty CPUSet for a newly created pod. + reusable = reusable.Union(p.cpusToReuse[string(pod.UID)]) + + // Generate hints. + cpuHints := p.generateCPUTopologyHintsForResize(available, reusable, requested) + logger.Info("TopologyHints generated", "cpuHints", cpuHints) + + return map[string][]topologymanager.TopologyHint{ + string(v1.ResourceCPU): cpuHints, + } + +} + +func getCPUSetFromAssignment(assignment state.ContainerCPUAssignment) cpuset.CPUSet { + if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScalingExclusiveCPUs) && utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { + if assignment.Resized.IsEmpty() { + return assignment.Original + } else { + return assignment.Resized + } + } else { + return assignment.Original + } +} diff --git a/pkg/kubelet/cm/cpumanager/policy_static_test.go b/pkg/kubelet/cm/cpumanager/policy_static_test.go index 33eafd56f2788..b5402ed314426 100644 --- a/pkg/kubelet/cm/cpumanager/policy_static_test.go +++ b/pkg/kubelet/cm/cpumanager/policy_static_test.go @@ -817,6 +817,7 @@ func runStaticPolicyTestCaseWithFeatureGate(t *testing.T, testCase staticPolicyT func runStaticPolicyTestCaseWithFeatureGateAlongsideInPlacePodVerticalScaling(t *testing.T, testCase staticPolicyTest) { featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.CPUManagerPolicyAlphaOptions, true) featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.InPlacePodVerticalScaling, true) + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.InPlacePodVerticalScalingExclusiveCPUs, false) runStaticPolicyTestCase(t, testCase) } @@ -1088,6 +1089,7 @@ func TestStaticPolicyPodResizeCPUsSingleContainerPod(t *testing.T) { logger, _ := ktesting.NewTestContext(t) featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.CPUManagerPolicyAlphaOptions, true) featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.InPlacePodVerticalScaling, true) + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.InPlacePodVerticalScalingExclusiveCPUs, false) t.Run(testCase.description, func(t *testing.T) { policy, _ := NewStaticPolicy(logger, testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), nil) @@ -1426,6 +1428,7 @@ func TestStaticPolicyPodResizeCPUsMultiContainerPod(t *testing.T) { logger, _ := ktesting.NewTestContext(t) featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.CPUManagerPolicyAlphaOptions, true) featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.InPlacePodVerticalScaling, true) + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.InPlacePodVerticalScalingExclusiveCPUs, false) t.Run(testCase.description, func(t *testing.T) { policy, _ := NewStaticPolicy(logger, testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), nil) @@ -1685,7 +1688,7 @@ func TestTopologyAwareAllocateCPUs(t *testing.T) { continue } - cpuAlloc, err := policy.allocateCPUs(logger, st, tc.numRequested, tc.socketMask, cpuset.New(), nil, nil) + cpuAlloc, err := policy.allocateCPUs(logger, st, tc.numRequested, tc.socketMask, cpuset.New()) if err != nil { t.Errorf("StaticPolicy allocateCPUs() error (%v). expected CPUSet %v not error %v", tc.description, tc.expCSet, err) diff --git a/test/e2e/common/node/framework/podresize/resize.go b/test/e2e/common/node/framework/podresize/resize.go index 802a6b3a67d7d..2f5903c3416df 100644 --- a/test/e2e/common/node/framework/podresize/resize.go +++ b/test/e2e/common/node/framework/podresize/resize.go @@ -480,7 +480,7 @@ func ExpectPodResizePending(ctx context.Context, f *framework.Framework, resizeP if resourceErrs := VerifyPodStatusResources(resizePendingPod, expectedContainers); resourceErrs != nil { errs = append(errs, fmt.Errorf("container status resources don't match expected: %w", formatErrors(resourceErrs))) } - if restartErrs := verifyPodRestarts(f, resizePendingPod, expectedContainers); restartErrs != nil { + if restartErrs := verifyPodRestarts(ctx, f, resizePendingPod, expectedContainers); restartErrs != nil { errs = append(errs, fmt.Errorf("container restart counts don't match expected: %w", formatErrors(restartErrs))) } diff --git a/test/e2e_node/cpu_manager_test.go b/test/e2e_node/cpu_manager_test.go index be1d5c5615e92..56cdea30f40dd 100644 --- a/test/e2e_node/cpu_manager_test.go +++ b/test/e2e_node/cpu_manager_test.go @@ -4127,755 +4127,1513 @@ var _ = SIGDescribe("CPU Manager with InPlacePodVerticalScalingExclusiveCPUs ena ginkgo.When("topologyManagerPolicy option is set to restricted, resizing a Guaranteed multiple containers Pod, with integer CPU request", ginkgo.Label("guaranteed multiple containers pod with integer CPU requests resize", "exclusive-cpus"), func() { ginkgo.BeforeEach(func(ctx context.Context) { + if smtLevel < 1 { + e2eskipper.Skipf("Skipping CPU Manager %q tests since SMT disabled", cpumanager.FullPCPUsOnlyOption) + } reservedCPUs = cpuset.New(0) }) - ginkgo.DescribeTable("", - func(ctx context.Context, - originalContainers []podresize.ResizableContainerInfo, - originalCpuInfo []containerCPUInfo, - desiredContainersFirstPatch []podresize.ResizableContainerInfo, - expectedContainersFirstPatch []podresize.ResizableContainerInfo, - expectedCpuInfoFirstPatch []containerCPUInfo, - wantErrorFirstPatch string, - desiredContainersSecondPatch []podresize.ResizableContainerInfo, - expectedContainersSecondPatch []podresize.ResizableContainerInfo, - expectedCpuInfoSecondPatch []containerCPUInfo, - wantErrorSecondPatch string, - ) { + if smtLevel >= minSMTLevel { + ginkgo.DescribeTable("", + func(ctx context.Context, + originalContainers []podresize.ResizableContainerInfo, + originalCpuInfo []containerCPUInfo, + desiredContainersFirstPatch []podresize.ResizableContainerInfo, + expectedContainersFirstPatch []podresize.ResizableContainerInfo, + expectedCpuInfoFirstPatch []containerCPUInfo, + wantErrorFirstPatch string, + desiredContainersSecondPatch []podresize.ResizableContainerInfo, + expectedContainersSecondPatch []podresize.ResizableContainerInfo, + expectedCpuInfoSecondPatch []containerCPUInfo, + wantErrorSecondPatch string, + ) { + + expectedCPUCount := 0 + for ctx := range expectedCpuInfoFirstPatch { + expectedCPUCount += expectedCpuInfoFirstPatch[ctx].cpuCount + } + skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), expectedCPUCount) - expectedCPUCount := 0 - for ctx := range expectedCpuInfoFirstPatch { - expectedCPUCount += expectedCpuInfoFirstPatch[ctx].cpuCount - } - skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), expectedCPUCount) + expectedCPUCount = 0 + for ctx := range expectedCpuInfoSecondPatch { + expectedCPUCount += expectedCpuInfoSecondPatch[ctx].cpuCount + } + skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), expectedCPUCount) + + updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + reservedSystemCPUs: reservedCPUs, // Not really needed for the tests but helps to make a more precise check + enableInPlacePodVerticalScalingExclusiveCPUs: true, + topologyManagerPolicyName: "restricted", + topologyManagerScopeName: "container", + topologyManagerPolicyOptions: map[string]string{ + "max-allowable-numa-nodes": "8", + "prefer-closest-numa-nodes": "true", + }, + })) - expectedCPUCount = 0 - for ctx := range expectedCpuInfoSecondPatch { - expectedCPUCount += expectedCpuInfoSecondPatch[ctx].cpuCount - } - skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), expectedCPUCount) + tStamp := strconv.Itoa(time.Now().Nanosecond()) + testPod1 := podresize.MakePodWithResizableContainers(f.Namespace.Name, "testpod1", tStamp, originalContainers, nil) + testPod1 = e2epod.MustMixinRestrictedPodSecurity(testPod1) - updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ - policyName: string(cpumanager.PolicyStatic), - reservedSystemCPUs: reservedCPUs, // Not really needed for the tests but helps to make a more precise check - enableInPlacePodVerticalScalingExclusiveCPUs: true, - topologyManagerPolicyName: "restricted", - topologyManagerScopeName: "container", - topologyManagerPolicyOptions: map[string]string{ - "max-allowable-numa-nodes": "8", - "prefer-closest-numa-nodes": "true", - }, - })) + ginkgo.By("creating pod with multiple containers") + podClient := e2epod.NewPodClient(f) + newPods := podClient.CreateBatch(ctx, []*v1.Pod{testPod1}) - tStamp := strconv.Itoa(time.Now().Nanosecond()) - testPod1 := podresize.MakePodWithResizableContainers(f.Namespace.Name, "testpod1", tStamp, originalContainers, nil) - testPod1 = e2epod.MustMixinRestrictedPodSecurity(testPod1) + ginkgo.By("verifying original pod resources, allocations are as expected") + podresize.VerifyPodResources(newPods[0], originalContainers, nil) - ginkgo.By("creating pod with multiple containers") - podClient := e2epod.NewPodClient(f) - newPods := podClient.CreateBatch(ctx, []*v1.Pod{testPod1}) + ginkgo.By("verifying original pod cpusets are as expected") + for cdx := range originalCpuInfo { + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(originalCpuInfo[cdx].Name, originalCpuInfo[cdx].cpuCount)) + } - ginkgo.By("verifying original pod resources, allocations are as expected") - podresize.VerifyPodResources(newPods[0], originalContainers, nil) + ginkgo.By("patching pod for resize") + patchString := podresize.MakeResizePatch(originalContainers, desiredContainersFirstPatch, nil, nil) - ginkgo.By("verifying original pod cpusets are as expected") - for cdx := range originalCpuInfo { - gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(originalCpuInfo[cdx].Name, originalCpuInfo[cdx].cpuCount)) - } + if wantErrorFirstPatch == "" { + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch pod for resize") - ginkgo.By("patching pod for resize") - patchString := podresize.MakeResizePatch(originalContainers, desiredContainersFirstPatch, nil, nil) + expected := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, expectedContainersFirstPatch) + ginkgo.By("verifying pod resources are as expected post patch, pre-actuation") + podresize.VerifyPodResources(patchedPod, expected, nil) - if wantErrorFirstPatch == "" { - patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, - newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") - framework.ExpectNoError(pErr, "failed to patch pod for resize") + ginkgo.By("waiting for resize to be actuated") + resizedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expected) + podresize.ExpectPodResized(ctx, f, resizedPod, expected) - expected := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, expectedContainersFirstPatch) - ginkgo.By("verifying pod resources are as expected post patch, pre-actuation") - podresize.VerifyPodResources(patchedPod, expected, nil) + ginkgo.By("verifying pod resources after resize") + podresize.VerifyPodResources(resizedPod, expected, nil) - ginkgo.By("waiting for resize to be actuated") - resizedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expected) - podresize.ExpectPodResized(ctx, f, resizedPod, expected) + ginkgo.By("verifying pod cpusets after resize") + for cdx := range originalCpuInfo { + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(expectedCpuInfoFirstPatch[cdx].Name, expectedCpuInfoFirstPatch[cdx].cpuCount)) + } - ginkgo.By("verifying pod resources after resize") - podresize.VerifyPodResources(resizedPod, expected, nil) + ginkgo.By("patching again pod for resize") + secondPatchString := podresize.MakeResizePatch(expected, desiredContainersSecondPatch, nil, nil) - ginkgo.By("verifying pod cpusets after resize") - for cdx := range originalCpuInfo { - gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(expectedCpuInfoFirstPatch[cdx].Name, expectedCpuInfoFirstPatch[cdx].cpuCount)) - } + if wantErrorSecondPatch == "" { - ginkgo.By("patching again pod for resize") - secondPatchString := podresize.MakeResizePatch(expected, desiredContainersSecondPatch, nil, nil) + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(secondPatchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch again pod for resize") - if wantErrorSecondPatch == "" { + expected = podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, expectedContainersSecondPatch) + ginkgo.By("verifying pod resources are as expected post second patch, pre-actuation") + podresize.VerifyPodResources(patchedPod, expected, nil) - patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, - newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(secondPatchString), metav1.PatchOptions{}, "resize") - framework.ExpectNoError(pErr, "failed to patch again pod for resize") + ginkgo.By("waiting for second patch resize to be actuated") + resizedPod = podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expected) + podresize.ExpectPodResized(ctx, f, resizedPod, expected) - expected = podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, expectedContainersSecondPatch) - ginkgo.By("verifying pod resources are as expected post second patch, pre-actuation") - podresize.VerifyPodResources(patchedPod, expected, nil) + ginkgo.By("verifying pod resources after second resize") + podresize.VerifyPodResources(resizedPod, expected, nil) - ginkgo.By("waiting for second patch resize to be actuated") - resizedPod = podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expected) - podresize.ExpectPodResized(ctx, f, resizedPod, expected) + ginkgo.By("verifying pod cpusets after second resize") + for cdx := range expectedCpuInfoSecondPatch { + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(expectedCpuInfoSecondPatch[cdx].Name, expectedCpuInfoSecondPatch[cdx].cpuCount)) + } + } else { + patchedPod, pErr = f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(secondPatchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch again pod for resize") - ginkgo.By("verifying pod resources after second resize") - podresize.VerifyPodResources(resizedPod, expected, nil) + ginkgo.By("verifying testing pod resources are as expected post second patch, pre-actuation") + expectedPreActuation := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, desiredContainersSecondPatch) + podresize.VerifyPodResources(patchedPod, expectedPreActuation, nil) - ginkgo.By("verifying pod cpusets after second resize") - for cdx := range expectedCpuInfoSecondPatch { - gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(expectedCpuInfoSecondPatch[cdx].Name, expectedCpuInfoSecondPatch[cdx].cpuCount)) + resizePendingPod, err := framework.GetObject(podClient.Get, patchedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get resize pending pod for second patch") + + ginkgo.By("waiting for testing pod resize to be actuated for second patch") + expectedPostActuation := podresize.UpdateExpectedContainerRestarts(ctx, resizePendingPod, expectedContainersSecondPatch) + actuatedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expectedPostActuation) + + ginkgo.By("waiting for testing pod resize status to be pending for second patch") + WaitForPodResizePending(ctx, f, actuatedPod) + + actuatedPod, err = framework.GetObject(podClient.Get, actuatedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get actuated pod for second patch") + + expectedPostActuation = podresize.UpdateExpectedContainerRestarts(ctx, actuatedPod, expectedContainersSecondPatch) + ginkgo.By("verifying testing pod condition type as expected post patch, post-actuation for second patch") + podresize.ExpectPodResizePending(ctx, f, actuatedPod, expectedPostActuation) + + ginkgo.By("ensuring the testing pod is failed for the expected reason for second patch") + gomega.Expect(actuatedPod).To(HaveStatusConditionsMatchingRegex(wantErrorSecondPatch)) + + for cdx := range expectedCpuInfoSecondPatch { + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(expectedCpuInfoSecondPatch[cdx].Name, expectedCpuInfoSecondPatch[cdx].cpuCount)) + } } } else { - patchedPod, pErr = f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, - newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(secondPatchString), metav1.PatchOptions{}, "resize") - framework.ExpectNoError(pErr, "failed to patch again pod for resize") + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch pod for resize") - ginkgo.By("verifying testing pod resources are as expected post second patch, pre-actuation") - expectedPreActuation := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, desiredContainersSecondPatch) + ginkgo.By("verifying testing pod resources are as expected post patch, pre-actuation") + expectedPreActuation := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, desiredContainersFirstPatch) podresize.VerifyPodResources(patchedPod, expectedPreActuation, nil) resizePendingPod, err := framework.GetObject(podClient.Get, patchedPod.Name, metav1.GetOptions{})(ctx) - framework.ExpectNoError(err, "failed to get resize pending pod for second patch") + framework.ExpectNoError(err, "failed to get resize pending pod") - ginkgo.By("waiting for testing pod resize to be actuated for second patch") - expectedPostActuation := podresize.UpdateExpectedContainerRestarts(ctx, resizePendingPod, expectedContainersSecondPatch) + ginkgo.By("waiting for testing pod resize to be actuated") + expectedPostActuation := podresize.UpdateExpectedContainerRestarts(ctx, resizePendingPod, expectedContainersFirstPatch) actuatedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expectedPostActuation) - ginkgo.By("waiting for testing pod resize status to be pending for second patch") + ginkgo.By("waiting for testing pod resize status to be pending") WaitForPodResizePending(ctx, f, actuatedPod) actuatedPod, err = framework.GetObject(podClient.Get, actuatedPod.Name, metav1.GetOptions{})(ctx) - framework.ExpectNoError(err, "failed to get actuated pod for second patch") + framework.ExpectNoError(err, "failed to get actuated pod") - expectedPostActuation = podresize.UpdateExpectedContainerRestarts(ctx, actuatedPod, expectedContainersSecondPatch) - ginkgo.By("verifying testing pod condition type as expected post patch, post-actuation for second patch") + expectedPostActuation = podresize.UpdateExpectedContainerRestarts(ctx, actuatedPod, expectedContainersFirstPatch) + ginkgo.By("verifying testing pod condition type as expected post patch, post-actuation") podresize.ExpectPodResizePending(ctx, f, actuatedPod, expectedPostActuation) - ginkgo.By("ensuring the testing pod is failed for the expected reason for second patch") - gomega.Expect(actuatedPod).To(HaveStatusConditionsMatchingRegex(wantErrorSecondPatch)) + ginkgo.By("ensuring the testing pod is failed for the expected reason") + gomega.Expect(actuatedPod).To(HaveStatusConditionsMatchingRegex(wantErrorFirstPatch)) - for cdx := range expectedCpuInfoSecondPatch { - gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(expectedCpuInfoSecondPatch[cdx].Name, expectedCpuInfoSecondPatch[cdx].cpuCount)) - } + // we cannot nor we should predict which CPUs the container gets + ginkgo.By("verifying pod cpusets after resize") + gomega.Expect(actuatedPod).To(HaveContainerCPUsCount("gu-container-1", expectedCpuInfoFirstPatch[0].cpuCount)) } - } else { - patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, - newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") - framework.ExpectNoError(pErr, "failed to patch pod for resize") + }, + ginkgo.Entry("should first increase (gu-container-1) CPU request/limit, afterwards decrease (gu-container-1) CPU request/limit, within available capacity", + // Initial + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount before first patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + }, + // Desired first patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "10000m", CPULim: "10000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected after first patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "10000m", CPULim: "10000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount after first patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 10, + }, + }, + // Want error after first patch + "", + // Desired second patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected after second patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount after second patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + }, + "", + ), + ) + } + if smtLevel == 1 { + ginkgo.DescribeTable("", + func(ctx context.Context, + originalContainers []podresize.ResizableContainerInfo, + originalCpuInfo []containerCPUInfo, + desiredContainersFirstPatch []podresize.ResizableContainerInfo, + expectedContainersFirstPatch []podresize.ResizableContainerInfo, + expectedCpuInfoFirstPatch []containerCPUInfo, + wantErrorFirstPatch string, + desiredContainersSecondPatch []podresize.ResizableContainerInfo, + expectedContainersSecondPatch []podresize.ResizableContainerInfo, + expectedCpuInfoSecondPatch []containerCPUInfo, + wantErrorSecondPatch string, + ) { + + expectedCPUCount := 0 + for ctx := range expectedCpuInfoFirstPatch { + expectedCPUCount += expectedCpuInfoFirstPatch[ctx].cpuCount + } + skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), expectedCPUCount) - ginkgo.By("verifying testing pod resources are as expected post patch, pre-actuation") - expectedPreActuation := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, desiredContainersFirstPatch) - podresize.VerifyPodResources(patchedPod, expectedPreActuation, nil) + expectedCPUCount = 0 + for ctx := range expectedCpuInfoSecondPatch { + expectedCPUCount += expectedCpuInfoSecondPatch[ctx].cpuCount + } + skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), expectedCPUCount) + + updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + reservedSystemCPUs: reservedCPUs, // Not really needed for the tests but helps to make a more precise check + enableInPlacePodVerticalScalingExclusiveCPUs: true, + topologyManagerPolicyName: "restricted", + topologyManagerScopeName: "container", + topologyManagerPolicyOptions: map[string]string{ + "max-allowable-numa-nodes": "8", + "prefer-closest-numa-nodes": "true", + }, + })) - resizePendingPod, err := framework.GetObject(podClient.Get, patchedPod.Name, metav1.GetOptions{})(ctx) - framework.ExpectNoError(err, "failed to get resize pending pod") + tStamp := strconv.Itoa(time.Now().Nanosecond()) + testPod1 := podresize.MakePodWithResizableContainers(f.Namespace.Name, "testpod1", tStamp, originalContainers, nil) + testPod1 = e2epod.MustMixinRestrictedPodSecurity(testPod1) - ginkgo.By("waiting for testing pod resize to be actuated") - expectedPostActuation := podresize.UpdateExpectedContainerRestarts(ctx, resizePendingPod, expectedContainersFirstPatch) - actuatedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expectedPostActuation) + ginkgo.By("creating pod with multiple containers") + podClient := e2epod.NewPodClient(f) + newPods := podClient.CreateBatch(ctx, []*v1.Pod{testPod1}) - ginkgo.By("waiting for testing pod resize status to be pending") - WaitForPodResizePending(ctx, f, actuatedPod) + ginkgo.By("verifying original pod resources, allocations are as expected") + podresize.VerifyPodResources(newPods[0], originalContainers, nil) - actuatedPod, err = framework.GetObject(podClient.Get, actuatedPod.Name, metav1.GetOptions{})(ctx) - framework.ExpectNoError(err, "failed to get actuated pod") + ginkgo.By("verifying original pod cpusets are as expected") + for cdx := range originalCpuInfo { + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(originalCpuInfo[cdx].Name, originalCpuInfo[cdx].cpuCount)) + } - expectedPostActuation = podresize.UpdateExpectedContainerRestarts(ctx, actuatedPod, expectedContainersFirstPatch) - ginkgo.By("verifying testing pod condition type as expected post patch, post-actuation") - podresize.ExpectPodResizePending(ctx, f, actuatedPod, expectedPostActuation) + ginkgo.By("patching pod for resize") + patchString := podresize.MakeResizePatch(originalContainers, desiredContainersFirstPatch, nil, nil) - ginkgo.By("ensuring the testing pod is failed for the expected reason") - gomega.Expect(actuatedPod).To(HaveStatusConditionsMatchingRegex(wantErrorFirstPatch)) + if wantErrorFirstPatch == "" { + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch pod for resize") - // we cannot nor we should predict which CPUs the container gets - ginkgo.By("verifying pod cpusets after resize") - gomega.Expect(actuatedPod).To(HaveContainerCPUsCount("gu-container-1", expectedCpuInfoFirstPatch[0].cpuCount)) - } - }, - ginkgo.Entry("should first increase (gu-container-1) CPU request/limit, afterwards decrease (gu-container-1) CPU request/limit, within available capacity", - // Initial - []podresize.ResizableContainerInfo{ - { - Name: "gu-container-1", - Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, - }, + expected := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, expectedContainersFirstPatch) + ginkgo.By("verifying pod resources are as expected post patch, pre-actuation") + podresize.VerifyPodResources(patchedPod, expected, nil) + + ginkgo.By("waiting for resize to be actuated") + resizedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expected) + podresize.ExpectPodResized(ctx, f, resizedPod, expected) + + ginkgo.By("verifying pod resources after resize") + podresize.VerifyPodResources(resizedPod, expected, nil) + + ginkgo.By("verifying pod cpusets after resize") + for cdx := range originalCpuInfo { + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(expectedCpuInfoFirstPatch[cdx].Name, expectedCpuInfoFirstPatch[cdx].cpuCount)) + } + + ginkgo.By("patching again pod for resize") + secondPatchString := podresize.MakeResizePatch(expected, desiredContainersSecondPatch, nil, nil) + + if wantErrorSecondPatch == "" { + + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(secondPatchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch again pod for resize") + + expected = podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, expectedContainersSecondPatch) + ginkgo.By("verifying pod resources are as expected post second patch, pre-actuation") + podresize.VerifyPodResources(patchedPod, expected, nil) + + ginkgo.By("waiting for second patch resize to be actuated") + resizedPod = podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expected) + podresize.ExpectPodResized(ctx, f, resizedPod, expected) + + ginkgo.By("verifying pod resources after second resize") + podresize.VerifyPodResources(resizedPod, expected, nil) + + ginkgo.By("verifying pod cpusets after second resize") + for cdx := range expectedCpuInfoSecondPatch { + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(expectedCpuInfoSecondPatch[cdx].Name, expectedCpuInfoSecondPatch[cdx].cpuCount)) + } + } else { + patchedPod, pErr = f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(secondPatchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch again pod for resize") + + ginkgo.By("verifying testing pod resources are as expected post second patch, pre-actuation") + expectedPreActuation := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, desiredContainersSecondPatch) + podresize.VerifyPodResources(patchedPod, expectedPreActuation, nil) + + resizePendingPod, err := framework.GetObject(podClient.Get, patchedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get resize pending pod for second patch") + + ginkgo.By("waiting for testing pod resize to be actuated for second patch") + expectedPostActuation := podresize.UpdateExpectedContainerRestarts(ctx, resizePendingPod, expectedContainersSecondPatch) + actuatedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expectedPostActuation) + + ginkgo.By("waiting for testing pod resize status to be pending for second patch") + WaitForPodResizePending(ctx, f, actuatedPod) + + actuatedPod, err = framework.GetObject(podClient.Get, actuatedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get actuated pod for second patch") + + expectedPostActuation = podresize.UpdateExpectedContainerRestarts(ctx, actuatedPod, expectedContainersSecondPatch) + ginkgo.By("verifying testing pod condition type as expected post patch, post-actuation for second patch") + podresize.ExpectPodResizePending(ctx, f, actuatedPod, expectedPostActuation) + + ginkgo.By("ensuring the testing pod is failed for the expected reason for second patch") + gomega.Expect(actuatedPod).To(HaveStatusConditionsMatchingRegex(wantErrorSecondPatch)) + + for cdx := range expectedCpuInfoSecondPatch { + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(expectedCpuInfoSecondPatch[cdx].Name, expectedCpuInfoSecondPatch[cdx].cpuCount)) + } + } + } else { + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch pod for resize") + + ginkgo.By("verifying testing pod resources are as expected post patch, pre-actuation") + expectedPreActuation := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, desiredContainersFirstPatch) + podresize.VerifyPodResources(patchedPod, expectedPreActuation, nil) + + resizePendingPod, err := framework.GetObject(podClient.Get, patchedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get resize pending pod") + + ginkgo.By("waiting for testing pod resize to be actuated") + expectedPostActuation := podresize.UpdateExpectedContainerRestarts(ctx, resizePendingPod, expectedContainersFirstPatch) + actuatedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expectedPostActuation) + + ginkgo.By("waiting for testing pod resize status to be pending") + WaitForPodResizePending(ctx, f, actuatedPod) + + actuatedPod, err = framework.GetObject(podClient.Get, actuatedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get actuated pod") + + expectedPostActuation = podresize.UpdateExpectedContainerRestarts(ctx, actuatedPod, expectedContainersFirstPatch) + ginkgo.By("verifying testing pod condition type as expected post patch, post-actuation") + podresize.ExpectPodResizePending(ctx, f, actuatedPod, expectedPostActuation) + + ginkgo.By("ensuring the testing pod is failed for the expected reason") + gomega.Expect(actuatedPod).To(HaveStatusConditionsMatchingRegex(wantErrorFirstPatch)) + + // we cannot nor we should predict which CPUs the container gets + ginkgo.By("verifying pod cpusets after resize") + gomega.Expect(actuatedPod).To(HaveContainerCPUsCount("gu-container-1", expectedCpuInfoFirstPatch[0].cpuCount)) + } }, - // Expected cpuCount before first patch - []containerCPUInfo{ - { - Name: "gu-container-1", - cpuCount: 2, + ginkgo.Entry("should first increase (gu-container-1) CPU request/limit, afterwards fail to decrease (gu-container-1) CPU request/limit, within available capacity because of TopologyAffinityError", + // Initial + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, }, - }, - // Desired first patch - []podresize.ResizableContainerInfo{ - { - Name: "gu-container-1", - Resources: &cgroups.ContainerResources{CPUReq: "10000m", CPULim: "10000m", MemReq: "200Mi", MemLim: "200Mi"}, + // Expected cpuCount before first patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, }, - }, - // Expected after first patch - []podresize.ResizableContainerInfo{ - { - Name: "gu-container-1", - Resources: &cgroups.ContainerResources{CPUReq: "10000m", CPULim: "10000m", MemReq: "200Mi", MemLim: "200Mi"}, + // Desired first patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "10000m", CPULim: "10000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, }, - }, - // Expected cpuCount after first patch - []containerCPUInfo{ - { - Name: "gu-container-1", - cpuCount: 10, + // Expected after first patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "10000m", CPULim: "10000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, }, - }, - // Want error after first patch - "", - // Desired second patch - []podresize.ResizableContainerInfo{ - { - Name: "gu-container-1", - Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + // Expected cpuCount after first patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 10, + }, }, - }, - // Expected after second patch - []podresize.ResizableContainerInfo{ - { - Name: "gu-container-1", - Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + // Want error after first patch + "", + // Desired second patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, }, - }, - // Expected cpuCount after second patch - []containerCPUInfo{ - { - Name: "gu-container-1", - cpuCount: 2, + // Expected after second patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "10000m", CPULim: "10000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, }, - }, - "", - ), - ) + // Expected cpuCount after second patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 10, + }, + }, + // Want error after first patch + "Infeasible.*", + ), + ) + } }) ginkgo.When("topology manager policy option is set to single-numa-node, resizing a Guaranteed multiple container pod, with integer CPU request", ginkgo.Label("guaranteed multiple containers pod with integer CPU requests resize", "exclusive-cpus"), func() { ginkgo.BeforeEach(func(ctx context.Context) { + if smtLevel < 1 { + e2eskipper.Skipf("Skipping CPU Manager %q tests since SMT disabled", cpumanager.FullPCPUsOnlyOption) + } reservedCPUs = cpuset.New(0) }) - ginkgo.DescribeTable("", - func(ctx context.Context, - originalContainers []podresize.ResizableContainerInfo, - originalCpuInfo []containerCPUInfo, - desiredContainersFirstPatch []podresize.ResizableContainerInfo, - expectedContainersFirstPatch []podresize.ResizableContainerInfo, - expectedCpuInfoFirstPatch []containerCPUInfo, - wantErrorFirstPatch string, - desiredContainersSecondPatch []podresize.ResizableContainerInfo, - expectedContainersSecondPatch []podresize.ResizableContainerInfo, - expectedCpuInfoSecondPatch []containerCPUInfo, - wantErrorSecondPatch string, - ) { + if smtLevel >= minSMTLevel { + ginkgo.DescribeTable("", + func(ctx context.Context, + originalContainers []podresize.ResizableContainerInfo, + originalCpuInfo []containerCPUInfo, + desiredContainersFirstPatch []podresize.ResizableContainerInfo, + expectedContainersFirstPatch []podresize.ResizableContainerInfo, + expectedCpuInfoFirstPatch []containerCPUInfo, + wantErrorFirstPatch string, + desiredContainersSecondPatch []podresize.ResizableContainerInfo, + expectedContainersSecondPatch []podresize.ResizableContainerInfo, + expectedCpuInfoSecondPatch []containerCPUInfo, + wantErrorSecondPatch string, + ) { + + expectedCPUCount := 0 + for ctx := range expectedCpuInfoFirstPatch { + expectedCPUCount += expectedCpuInfoFirstPatch[ctx].cpuCount + } + skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), expectedCPUCount) - expectedCPUCount := 0 - for ctx := range expectedCpuInfoFirstPatch { - expectedCPUCount += expectedCpuInfoFirstPatch[ctx].cpuCount - } - skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), expectedCPUCount) + expectedCPUCount = 0 + for ctx := range expectedCpuInfoSecondPatch { + expectedCPUCount += expectedCpuInfoSecondPatch[ctx].cpuCount + } + skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), expectedCPUCount) + + updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + reservedSystemCPUs: reservedCPUs, // Not really needed for the tests but helps to make a more precise check + enableInPlacePodVerticalScalingExclusiveCPUs: true, + topologyManagerPolicyName: "single-numa-node", + topologyManagerScopeName: "container", + topologyManagerPolicyOptions: map[string]string{ + "max-allowable-numa-nodes": "8", + "prefer-closest-numa-nodes": "true", + }, + })) - expectedCPUCount = 0 - for ctx := range expectedCpuInfoSecondPatch { - expectedCPUCount += expectedCpuInfoSecondPatch[ctx].cpuCount - } - skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), expectedCPUCount) + tStamp := strconv.Itoa(time.Now().Nanosecond()) + testPod1 := podresize.MakePodWithResizableContainers(f.Namespace.Name, "testpod1", tStamp, originalContainers, nil) + testPod1 = e2epod.MustMixinRestrictedPodSecurity(testPod1) - updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ - policyName: string(cpumanager.PolicyStatic), - reservedSystemCPUs: reservedCPUs, // Not really needed for the tests but helps to make a more precise check - enableInPlacePodVerticalScalingExclusiveCPUs: true, - topologyManagerPolicyName: "single-numa-node", - topologyManagerScopeName: "container", - topologyManagerPolicyOptions: map[string]string{ - "max-allowable-numa-nodes": "8", - "prefer-closest-numa-nodes": "true", - }, - })) + ginkgo.By("creating pod with multiple containers") + podClient := e2epod.NewPodClient(f) + newPods := podClient.CreateBatch(ctx, []*v1.Pod{testPod1}) - tStamp := strconv.Itoa(time.Now().Nanosecond()) - testPod1 := podresize.MakePodWithResizableContainers(f.Namespace.Name, "testpod1", tStamp, originalContainers, nil) - testPod1 = e2epod.MustMixinRestrictedPodSecurity(testPod1) + ginkgo.By("verifying original pod resources, allocations are as expected") + podresize.VerifyPodResources(newPods[0], originalContainers, nil) - ginkgo.By("creating pod with multiple containers") - podClient := e2epod.NewPodClient(f) - newPods := podClient.CreateBatch(ctx, []*v1.Pod{testPod1}) + ginkgo.By("verifying original pod cpusets are as expected") + for cdx := range originalCpuInfo { + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(originalCpuInfo[cdx].Name, originalCpuInfo[cdx].cpuCount)) + } - ginkgo.By("verifying original pod resources, allocations are as expected") - podresize.VerifyPodResources(newPods[0], originalContainers, nil) + ginkgo.By("patching pod for resize") + patchString := podresize.MakeResizePatch(originalContainers, desiredContainersFirstPatch, nil, nil) - ginkgo.By("verifying original pod cpusets are as expected") - for cdx := range originalCpuInfo { - gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(originalCpuInfo[cdx].Name, originalCpuInfo[cdx].cpuCount)) - } + if wantErrorFirstPatch == "" { + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch pod for resize") - ginkgo.By("patching pod for resize") - patchString := podresize.MakeResizePatch(originalContainers, desiredContainersFirstPatch, nil, nil) + expected := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, expectedContainersFirstPatch) + ginkgo.By("verifying pod resources are as expected post patch, pre-actuation") + podresize.VerifyPodResources(patchedPod, expected, nil) - if wantErrorFirstPatch == "" { - patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, - newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") - framework.ExpectNoError(pErr, "failed to patch pod for resize") + ginkgo.By("waiting for resize to be actuated") + resizedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expected) + podresize.ExpectPodResized(ctx, f, resizedPod, expected) - expected := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, expectedContainersFirstPatch) - ginkgo.By("verifying pod resources are as expected post patch, pre-actuation") - podresize.VerifyPodResources(patchedPod, expected, nil) + ginkgo.By("verifying pod resources after resize") + podresize.VerifyPodResources(resizedPod, expected, nil) - ginkgo.By("waiting for resize to be actuated") - resizedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expected) - podresize.ExpectPodResized(ctx, f, resizedPod, expected) + ginkgo.By("verifying pod cpusets after resize") + for cdx := range originalCpuInfo { + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(expectedCpuInfoFirstPatch[cdx].Name, expectedCpuInfoFirstPatch[cdx].cpuCount)) + } - ginkgo.By("verifying pod resources after resize") - podresize.VerifyPodResources(resizedPod, expected, nil) + ginkgo.By("patching again pod for resize") + secondPatchString := podresize.MakeResizePatch(expected, desiredContainersSecondPatch, nil, nil) - ginkgo.By("verifying pod cpusets after resize") - for cdx := range originalCpuInfo { - gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(expectedCpuInfoFirstPatch[cdx].Name, expectedCpuInfoFirstPatch[cdx].cpuCount)) - } + if wantErrorSecondPatch == "" { - ginkgo.By("patching again pod for resize") - secondPatchString := podresize.MakeResizePatch(expected, desiredContainersSecondPatch, nil, nil) + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(secondPatchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch again pod for resize") - if wantErrorSecondPatch == "" { + expected = podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, expectedContainersSecondPatch) + ginkgo.By("verifying pod resources are as expected post second patch, pre-actuation") + podresize.VerifyPodResources(patchedPod, expected, nil) - patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, - newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(secondPatchString), metav1.PatchOptions{}, "resize") - framework.ExpectNoError(pErr, "failed to patch again pod for resize") + ginkgo.By("waiting for second patch resize to be actuated") + resizedPod = podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expected) + podresize.ExpectPodResized(ctx, f, resizedPod, expected) - expected = podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, expectedContainersSecondPatch) - ginkgo.By("verifying pod resources are as expected post second patch, pre-actuation") - podresize.VerifyPodResources(patchedPod, expected, nil) + ginkgo.By("verifying pod resources after second resize") + podresize.VerifyPodResources(resizedPod, expected, nil) - ginkgo.By("waiting for second patch resize to be actuated") - resizedPod = podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expected) - podresize.ExpectPodResized(ctx, f, resizedPod, expected) + ginkgo.By("verifying pod cpusets after second resize") + for cdx := range expectedCpuInfoSecondPatch { + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(expectedCpuInfoSecondPatch[cdx].Name, expectedCpuInfoSecondPatch[cdx].cpuCount)) + } + } else { + patchedPod, pErr = f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(secondPatchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch again pod for resize") - ginkgo.By("verifying pod resources after second resize") - podresize.VerifyPodResources(resizedPod, expected, nil) + ginkgo.By("verifying testing pod resources are as expected post second patch, pre-actuation") + expectedPreActuation := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, desiredContainersSecondPatch) + podresize.VerifyPodResources(patchedPod, expectedPreActuation, nil) - ginkgo.By("verifying pod cpusets after second resize") - for cdx := range expectedCpuInfoSecondPatch { - gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(expectedCpuInfoSecondPatch[cdx].Name, expectedCpuInfoSecondPatch[cdx].cpuCount)) + resizePendingPod, err := framework.GetObject(podClient.Get, patchedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get resize pending pod for second patch") + + ginkgo.By("waiting for testing pod resize to be actuated for second patch") + expectedPostActuation := podresize.UpdateExpectedContainerRestarts(ctx, resizePendingPod, expectedContainersSecondPatch) + actuatedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expectedPostActuation) + + ginkgo.By("waiting for testing pod resize status to be pending for second patch") + WaitForPodResizePending(ctx, f, actuatedPod) + + actuatedPod, err = framework.GetObject(podClient.Get, actuatedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get actuated pod for second patch") + + expectedPostActuation = podresize.UpdateExpectedContainerRestarts(ctx, actuatedPod, expectedContainersSecondPatch) + ginkgo.By("verifying testing pod condition type as expected post patch, post-actuation for second patch") + podresize.ExpectPodResizePending(ctx, f, actuatedPod, expectedPostActuation) + + ginkgo.By("ensuring the testing pod is failed for the expected reason for second patch") + gomega.Expect(actuatedPod).To(HaveStatusConditionsMatchingRegex(wantErrorSecondPatch)) + + for cdx := range expectedCpuInfoSecondPatch { + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(expectedCpuInfoSecondPatch[cdx].Name, expectedCpuInfoSecondPatch[cdx].cpuCount)) + } } } else { - patchedPod, pErr = f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, - newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(secondPatchString), metav1.PatchOptions{}, "resize") - framework.ExpectNoError(pErr, "failed to patch again pod for resize") + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch pod for resize") - ginkgo.By("verifying testing pod resources are as expected post second patch, pre-actuation") - expectedPreActuation := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, desiredContainersSecondPatch) + ginkgo.By("verifying testing pod resources are as expected post patch, pre-actuation") + expectedPreActuation := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, desiredContainersFirstPatch) podresize.VerifyPodResources(patchedPod, expectedPreActuation, nil) resizePendingPod, err := framework.GetObject(podClient.Get, patchedPod.Name, metav1.GetOptions{})(ctx) - framework.ExpectNoError(err, "failed to get resize pending pod for second patch") + framework.ExpectNoError(err, "failed to get resize pending pod") - ginkgo.By("waiting for testing pod resize to be actuated for second patch") - expectedPostActuation := podresize.UpdateExpectedContainerRestarts(ctx, resizePendingPod, expectedContainersSecondPatch) + ginkgo.By("waiting for testing pod resize to be actuated") + expectedPostActuation := podresize.UpdateExpectedContainerRestarts(ctx, resizePendingPod, expectedContainersFirstPatch) actuatedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expectedPostActuation) - ginkgo.By("waiting for testing pod resize status to be pending for second patch") + ginkgo.By("waiting for testing pod resize status to be pending") WaitForPodResizePending(ctx, f, actuatedPod) actuatedPod, err = framework.GetObject(podClient.Get, actuatedPod.Name, metav1.GetOptions{})(ctx) - framework.ExpectNoError(err, "failed to get actuated pod for second patch") + framework.ExpectNoError(err, "failed to get actuated pod") - expectedPostActuation = podresize.UpdateExpectedContainerRestarts(ctx, actuatedPod, expectedContainersSecondPatch) - ginkgo.By("verifying testing pod condition type as expected post patch, post-actuation for second patch") + expectedPostActuation = podresize.UpdateExpectedContainerRestarts(ctx, actuatedPod, expectedContainersFirstPatch) + ginkgo.By("verifying testing pod condition type as expected post patch, post-actuation") podresize.ExpectPodResizePending(ctx, f, actuatedPod, expectedPostActuation) - ginkgo.By("ensuring the testing pod is failed for the expected reason for second patch") - gomega.Expect(actuatedPod).To(HaveStatusConditionsMatchingRegex(wantErrorSecondPatch)) + ginkgo.By("ensuring the testing pod is failed for the expected reason") + gomega.Expect(actuatedPod).To(HaveStatusConditionsMatchingRegex(wantErrorFirstPatch)) - for cdx := range expectedCpuInfoSecondPatch { - gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(expectedCpuInfoSecondPatch[cdx].Name, expectedCpuInfoSecondPatch[cdx].cpuCount)) - } + // we cannot nor we should predict which CPUs the container gets + ginkgo.By("verifying pod cpusets after resize") + gomega.Expect(actuatedPod).To(HaveContainerCPUsCount("gu-container-1", expectedCpuInfoFirstPatch[0].cpuCount)) } - } else { - patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, - newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") - framework.ExpectNoError(pErr, "failed to patch pod for resize") - - ginkgo.By("verifying testing pod resources are as expected post patch, pre-actuation") - expectedPreActuation := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, desiredContainersFirstPatch) - podresize.VerifyPodResources(patchedPod, expectedPreActuation, nil) - - resizePendingPod, err := framework.GetObject(podClient.Get, patchedPod.Name, metav1.GetOptions{})(ctx) - framework.ExpectNoError(err, "failed to get resize pending pod") - - ginkgo.By("waiting for testing pod resize to be actuated") - expectedPostActuation := podresize.UpdateExpectedContainerRestarts(ctx, resizePendingPod, expectedContainersFirstPatch) - actuatedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expectedPostActuation) - - ginkgo.By("waiting for testing pod resize status to be pending") - WaitForPodResizePending(ctx, f, actuatedPod) - - actuatedPod, err = framework.GetObject(podClient.Get, actuatedPod.Name, metav1.GetOptions{})(ctx) - framework.ExpectNoError(err, "failed to get actuated pod") - - expectedPostActuation = podresize.UpdateExpectedContainerRestarts(ctx, actuatedPod, expectedContainersFirstPatch) - ginkgo.By("verifying testing pod condition type as expected post patch, post-actuation") - podresize.ExpectPodResizePending(ctx, f, actuatedPod, expectedPostActuation) - - ginkgo.By("ensuring the testing pod is failed for the expected reason") - gomega.Expect(actuatedPod).To(HaveStatusConditionsMatchingRegex(wantErrorFirstPatch)) - - // we cannot nor we should predict which CPUs the container gets - ginkgo.By("verifying pod cpusets after resize") - gomega.Expect(actuatedPod).To(HaveContainerCPUsCount("gu-container-1", expectedCpuInfoFirstPatch[0].cpuCount)) - } - }, - ginkgo.Entry("should first increase (gu-container-1) CPU request/limit and afterwards decrease (gu-container-1) CPU request/limit, within available capacity", - // Initial - []podresize.ResizableContainerInfo{ - { - Name: "gu-container-1", - Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, - }, }, - // Expected cpuCount before first patch - []containerCPUInfo{ - { - Name: "gu-container-1", - cpuCount: 2, + ginkgo.Entry("should first increase (gu-container-1) CPU request/limit and afterwards decrease (gu-container-1) CPU request/limit, within available capacity", + // Initial + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, }, - }, - // Desired first patch - []podresize.ResizableContainerInfo{ - { - Name: "gu-container-1", - Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + // Expected cpuCount before first patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, }, - }, - // Expected after first patch - []podresize.ResizableContainerInfo{ - { - Name: "gu-container-1", - Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + // Desired first patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, }, - }, - // Expected cpuCount after first patch - []containerCPUInfo{ - { - Name: "gu-container-1", - cpuCount: 4, + // Expected after first patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, }, - }, - // Want error after first patch - "", - // Desired second patch - []podresize.ResizableContainerInfo{ - { - Name: "gu-container-1", - Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + // Expected cpuCount after first patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 4, + }, }, - }, - // Expected after second patch - []podresize.ResizableContainerInfo{ - { - Name: "gu-container-1", - Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + // Want error after first patch + "", + // Desired second patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, }, - }, - // Expected cpuCount after second patch - []containerCPUInfo{ - { - Name: "gu-container-1", - cpuCount: 2, + // Expected after second patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, }, - }, - "", - ), - ginkgo.Entry("should first increase (gu-container-1) CPU request/limit, afterwards restore (gu-container-1) CPU request/limit and increase (gu-container-2) CPU request/limit, within available capacity", - // Initial - []podresize.ResizableContainerInfo{ - { - Name: "gu-container-1", - Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + // Expected cpuCount after second patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, }, - { - Name: "gu-container-2", - Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + // Want error after first patch + "", + ), + ginkgo.Entry("should first increase (gu-container-1) CPU request/limit, afterwards restore (gu-container-1) CPU request/limit and increase (gu-container-2) CPU request/limit, within available capacity", + // Initial + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, }, - }, - // Expected cpuCount before first patch - []containerCPUInfo{ - { - Name: "gu-container-1", - cpuCount: 2, + // Expected cpuCount before first patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + { + Name: "gu-container-2", + cpuCount: 2, + }, }, - { - Name: "gu-container-2", - cpuCount: 2, + // Desired first patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, }, - }, - // Desired first patch - []podresize.ResizableContainerInfo{ - { - Name: "gu-container-1", - Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + // Expected after first patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, }, - { - Name: "gu-container-2", - Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + // Expected cpuCount after first patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 4, + }, + { + Name: "gu-container-2", + cpuCount: 2, + }, }, - }, - // Expected after first patch - []podresize.ResizableContainerInfo{ - { - Name: "gu-container-1", - Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + // Want error after first patch + "", + // Desired second patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, }, - { - Name: "gu-container-2", - Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + // Expected after second patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, }, - }, - // Expected cpuCount after first patch - []containerCPUInfo{ - { - Name: "gu-container-1", - cpuCount: 4, + // Expected cpuCount after second patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + { + Name: "gu-container-2", + cpuCount: 4, + }, }, - { - Name: "gu-container-2", - cpuCount: 2, + "", + ), + ginkgo.Entry("should first increase (gu-container-1) CPU request/limit, afterwards fail to reduce (gu-container-1) CPU request/limit, below promised and increase (gu-container-2) CPU request/limit, within available capacity", + // Initial + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "3000m", CPULim: "3000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, }, - }, - // Want error after first patch - "", - // Desired second patch - []podresize.ResizableContainerInfo{ - { - Name: "gu-container-1", - Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + // Expected cpuCount before first patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 3, + }, + { + Name: "gu-container-2", + cpuCount: 2, + }, }, - { - Name: "gu-container-2", - Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + // Desired first patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, }, - }, - // Expected after second patch - []podresize.ResizableContainerInfo{ - { - Name: "gu-container-1", - Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + // Expected after first patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, }, - { - Name: "gu-container-2", - Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + // Expected cpuCount after first patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 4, + }, + { + Name: "gu-container-2", + cpuCount: 2, + }, }, - }, - // Expected cpuCount after second patch - []containerCPUInfo{ - { - Name: "gu-container-1", - cpuCount: 2, + // Want error after first patch + "", + // Desired second patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, }, - { - Name: "gu-container-2", - cpuCount: 4, + // Expected after second patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, }, - }, - "", - ), - ginkgo.Entry("should first increase (gu-container-1) CPU request/limit, afterwards fail to reduce (gu-container-1) CPU request/limit, below promised and increase (gu-container-2) CPU request/limit, within available capacity", - // Initial - []podresize.ResizableContainerInfo{ - { - Name: "gu-container-1", - Resources: &cgroups.ContainerResources{CPUReq: "3000m", CPULim: "3000m", MemReq: "200Mi", MemLim: "200Mi"}, + // Expected cpuCount after second patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 4, + }, + { + Name: "gu-container-2", + cpuCount: 2, + }, }, - { - Name: "gu-container-2", - Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + "prohibitedCPUAllocation.*", + ), + ginkgo.Entry("should first increase (gu-container-1) CPU request/limit, afterwards fail to restore (gu-container-1) CPU request/limit and to increase (gu-container-2) CPU request/limit above available capacity", + // Initial + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, }, - }, - // Expected cpuCount before first patch - []containerCPUInfo{ - { - Name: "gu-container-1", - cpuCount: 3, + // Expected cpuCount before first patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + { + Name: "gu-container-2", + cpuCount: 2, + }, }, - { - Name: "gu-container-2", - cpuCount: 2, + // Desired first patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, }, - }, - // Desired first patch - []podresize.ResizableContainerInfo{ - { - Name: "gu-container-1", - Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + // Expected after first patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, }, - { - Name: "gu-container-2", - Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + // Expected cpuCount after first patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 4, + }, + { + Name: "gu-container-2", + cpuCount: 2, + }, }, - }, - // Expected after first patch - []podresize.ResizableContainerInfo{ - { - Name: "gu-container-1", - Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + // Want error after first patch + "", + // Desired second patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "40000m", CPULim: "40000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, }, - { - Name: "gu-container-2", - Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + // Expected after second patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, }, + // Expected cpuCount after second patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 4, + }, + { + Name: "gu-container-2", + cpuCount: 2, + }, + }, + "Infeasible.*Node.*didn't.*have.*enough.*capacity.*", + ), + ) + } + if smtLevel == 1 { + ginkgo.DescribeTable("", + func(ctx context.Context, + originalContainers []podresize.ResizableContainerInfo, + originalCpuInfo []containerCPUInfo, + desiredContainersFirstPatch []podresize.ResizableContainerInfo, + expectedContainersFirstPatch []podresize.ResizableContainerInfo, + expectedCpuInfoFirstPatch []containerCPUInfo, + wantErrorFirstPatch string, + desiredContainersSecondPatch []podresize.ResizableContainerInfo, + expectedContainersSecondPatch []podresize.ResizableContainerInfo, + expectedCpuInfoSecondPatch []containerCPUInfo, + wantErrorSecondPatch string, + ) { + + expectedCPUCount := 0 + for ctx := range expectedCpuInfoFirstPatch { + expectedCPUCount += expectedCpuInfoFirstPatch[ctx].cpuCount + } + skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), expectedCPUCount) + + expectedCPUCount = 0 + for ctx := range expectedCpuInfoSecondPatch { + expectedCPUCount += expectedCpuInfoSecondPatch[ctx].cpuCount + } + skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), expectedCPUCount) + + updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + reservedSystemCPUs: reservedCPUs, // Not really needed for the tests but helps to make a more precise check + enableInPlacePodVerticalScalingExclusiveCPUs: true, + topologyManagerPolicyName: "single-numa-node", + topologyManagerScopeName: "container", + topologyManagerPolicyOptions: map[string]string{ + "max-allowable-numa-nodes": "8", + "prefer-closest-numa-nodes": "true", + }, + })) + + tStamp := strconv.Itoa(time.Now().Nanosecond()) + testPod1 := podresize.MakePodWithResizableContainers(f.Namespace.Name, "testpod1", tStamp, originalContainers, nil) + testPod1 = e2epod.MustMixinRestrictedPodSecurity(testPod1) + + ginkgo.By("creating pod with multiple containers") + podClient := e2epod.NewPodClient(f) + newPods := podClient.CreateBatch(ctx, []*v1.Pod{testPod1}) + + ginkgo.By("verifying original pod resources, allocations are as expected") + podresize.VerifyPodResources(newPods[0], originalContainers, nil) + + ginkgo.By("verifying original pod cpusets are as expected") + for cdx := range originalCpuInfo { + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(originalCpuInfo[cdx].Name, originalCpuInfo[cdx].cpuCount)) + } + + ginkgo.By("patching pod for resize") + patchString := podresize.MakeResizePatch(originalContainers, desiredContainersFirstPatch, nil, nil) + + if wantErrorFirstPatch == "" { + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch pod for resize") + + expected := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, expectedContainersFirstPatch) + ginkgo.By("verifying pod resources are as expected post patch, pre-actuation") + podresize.VerifyPodResources(patchedPod, expected, nil) + + ginkgo.By("waiting for resize to be actuated") + resizedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expected) + podresize.ExpectPodResized(ctx, f, resizedPod, expected) + + ginkgo.By("verifying pod resources after resize") + podresize.VerifyPodResources(resizedPod, expected, nil) + + ginkgo.By("verifying pod cpusets after resize") + for cdx := range originalCpuInfo { + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(expectedCpuInfoFirstPatch[cdx].Name, expectedCpuInfoFirstPatch[cdx].cpuCount)) + } + + ginkgo.By("patching again pod for resize") + secondPatchString := podresize.MakeResizePatch(expected, desiredContainersSecondPatch, nil, nil) + + if wantErrorSecondPatch == "" { + + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(secondPatchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch again pod for resize") + + expected = podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, expectedContainersSecondPatch) + ginkgo.By("verifying pod resources are as expected post second patch, pre-actuation") + podresize.VerifyPodResources(patchedPod, expected, nil) + + ginkgo.By("waiting for second patch resize to be actuated") + resizedPod = podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expected) + podresize.ExpectPodResized(ctx, f, resizedPod, expected) + + ginkgo.By("verifying pod resources after second resize") + podresize.VerifyPodResources(resizedPod, expected, nil) + + ginkgo.By("verifying pod cpusets after second resize") + for cdx := range expectedCpuInfoSecondPatch { + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(expectedCpuInfoSecondPatch[cdx].Name, expectedCpuInfoSecondPatch[cdx].cpuCount)) + } + } else { + patchedPod, pErr = f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(secondPatchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch again pod for resize") + + ginkgo.By("verifying testing pod resources are as expected post second patch, pre-actuation") + expectedPreActuation := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, desiredContainersSecondPatch) + podresize.VerifyPodResources(patchedPod, expectedPreActuation, nil) + + resizePendingPod, err := framework.GetObject(podClient.Get, patchedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get resize pending pod for second patch") + + ginkgo.By("waiting for testing pod resize to be actuated for second patch") + expectedPostActuation := podresize.UpdateExpectedContainerRestarts(ctx, resizePendingPod, expectedContainersSecondPatch) + actuatedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expectedPostActuation) + + ginkgo.By("waiting for testing pod resize status to be pending for second patch") + WaitForPodResizePending(ctx, f, actuatedPod) + + actuatedPod, err = framework.GetObject(podClient.Get, actuatedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get actuated pod for second patch") + + expectedPostActuation = podresize.UpdateExpectedContainerRestarts(ctx, actuatedPod, expectedContainersSecondPatch) + ginkgo.By("verifying testing pod condition type as expected post patch, post-actuation for second patch") + podresize.ExpectPodResizePending(ctx, f, actuatedPod, expectedPostActuation) + + ginkgo.By("ensuring the testing pod is failed for the expected reason for second patch") + gomega.Expect(actuatedPod).To(HaveStatusConditionsMatchingRegex(wantErrorSecondPatch)) + + for cdx := range expectedCpuInfoSecondPatch { + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(expectedCpuInfoSecondPatch[cdx].Name, expectedCpuInfoSecondPatch[cdx].cpuCount)) + } + } + } else { + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch pod for resize") + + ginkgo.By("verifying testing pod resources are as expected post patch, pre-actuation") + expectedPreActuation := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, desiredContainersFirstPatch) + podresize.VerifyPodResources(patchedPod, expectedPreActuation, nil) + + resizePendingPod, err := framework.GetObject(podClient.Get, patchedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get resize pending pod") + + ginkgo.By("waiting for testing pod resize to be actuated") + expectedPostActuation := podresize.UpdateExpectedContainerRestarts(ctx, resizePendingPod, expectedContainersFirstPatch) + actuatedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expectedPostActuation) + + ginkgo.By("waiting for testing pod resize status to be pending") + WaitForPodResizePending(ctx, f, actuatedPod) + + actuatedPod, err = framework.GetObject(podClient.Get, actuatedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get actuated pod") + + expectedPostActuation = podresize.UpdateExpectedContainerRestarts(ctx, actuatedPod, expectedContainersFirstPatch) + ginkgo.By("verifying testing pod condition type as expected post patch, post-actuation") + podresize.ExpectPodResizePending(ctx, f, actuatedPod, expectedPostActuation) + + ginkgo.By("ensuring the testing pod is failed for the expected reason") + gomega.Expect(actuatedPod).To(HaveStatusConditionsMatchingRegex(wantErrorFirstPatch)) + + // we cannot nor we should predict which CPUs the container gets + ginkgo.By("verifying pod cpusets after resize") + gomega.Expect(actuatedPod).To(HaveContainerCPUsCount("gu-container-1", expectedCpuInfoFirstPatch[0].cpuCount)) + } }, - // Expected cpuCount after first patch - []containerCPUInfo{ - { - Name: "gu-container-1", - cpuCount: 4, + ginkgo.Entry("should first increase (gu-container-1) CPU request/limit and afterwards decrease (gu-container-1) CPU request/limit, within available capacity", + // Initial + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, }, - { - Name: "gu-container-2", - cpuCount: 2, + // Expected cpuCount before first patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, }, - }, - // Want error after first patch - "", - // Desired second patch - []podresize.ResizableContainerInfo{ - { - Name: "gu-container-1", - Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + // Desired first patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, }, - { - Name: "gu-container-2", - Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + // Expected after first patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, }, - }, - // Expected after second patch - []podresize.ResizableContainerInfo{ - { - Name: "gu-container-1", - Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + // Expected cpuCount after first patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 4, + }, }, - { - Name: "gu-container-2", - Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + // Want error after first patch + "", + // Desired second patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, }, - }, - // Expected cpuCount after second patch - []containerCPUInfo{ - { - Name: "gu-container-1", - cpuCount: 4, + // Expected after second patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, }, - { - Name: "gu-container-2", - cpuCount: 2, + // Expected cpuCount after second patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, }, - }, - "prohibitedCPUAllocation.*", - ), - ginkgo.Entry("should first increase (gu-container-1) CPU request/limit, afterwards fail to restore (gu-container-1) CPU request/limit and to increase (gu-container-2) CPU request/limit above available capacity", - // Initial - []podresize.ResizableContainerInfo{ - { - Name: "gu-container-1", - Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + // Want error after first patch + "", + ), + ginkgo.Entry("should first increase (gu-container-1) CPU request/limit, afterwards restore (gu-container-1) CPU request/limit and increase (gu-container-2) CPU request/limit, within available capacity", + // Initial + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, }, - { - Name: "gu-container-2", - Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + // Expected cpuCount before first patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + { + Name: "gu-container-2", + cpuCount: 2, + }, }, - }, - // Expected cpuCount before first patch - []containerCPUInfo{ - { - Name: "gu-container-1", - cpuCount: 2, + // Desired first patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, }, - { - Name: "gu-container-2", - cpuCount: 2, + // Expected after first patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, }, - }, - // Desired first patch - []podresize.ResizableContainerInfo{ - { - Name: "gu-container-1", - Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + // Expected cpuCount after first patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 4, + }, + { + Name: "gu-container-2", + cpuCount: 2, + }, }, - { - Name: "gu-container-2", - Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + // Want error after first patch + "", + // Desired second patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, }, - }, - // Expected after first patch - []podresize.ResizableContainerInfo{ - { - Name: "gu-container-1", - Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + // Expected after second patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, }, - { - Name: "gu-container-2", - Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + // Expected cpuCount after second patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + { + Name: "gu-container-2", + cpuCount: 4, + }, }, - }, - // Expected cpuCount after first patch - []containerCPUInfo{ - { - Name: "gu-container-1", - cpuCount: 4, + "", + ), + ginkgo.Entry("should first increase (gu-container-1) CPU request/limit, afterwards fail to reduce (gu-container-1) CPU request/limit, below promised and increase (gu-container-2) CPU request/limit, within available capacity", + // Initial + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "3000m", CPULim: "3000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, }, - { - Name: "gu-container-2", - cpuCount: 2, + // Expected cpuCount before first patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 3, + }, + { + Name: "gu-container-2", + cpuCount: 2, + }, }, - }, - // Want error after first patch - "", - // Desired second patch - []podresize.ResizableContainerInfo{ - { - Name: "gu-container-1", - Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + // Desired first patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, }, - { - Name: "gu-container-2", - Resources: &cgroups.ContainerResources{CPUReq: "40000m", CPULim: "40000m", MemReq: "200Mi", MemLim: "200Mi"}, + // Expected after first patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, }, - }, - // Expected after second patch - []podresize.ResizableContainerInfo{ - { - Name: "gu-container-1", - Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + // Expected cpuCount after first patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 4, + }, + { + Name: "gu-container-2", + cpuCount: 2, + }, }, - { - Name: "gu-container-2", - Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + // Want error after first patch + "", + // Desired second patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, }, - }, - // Expected cpuCount after second patch - []containerCPUInfo{ - { - Name: "gu-container-1", - cpuCount: 4, + // Expected after second patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, }, - { - Name: "gu-container-2", - cpuCount: 2, + // Expected cpuCount after second patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 4, + }, + { + Name: "gu-container-2", + cpuCount: 2, + }, }, - }, - "Infeasible.*Node.*didn't.*have.*enough.*capacity.*", - ), - ) + "prohibitedCPUAllocation.*", + ), + ginkgo.Entry("should first increase (gu-container-1) CPU request/limit, afterwards fail to restore (gu-container-1) CPU request/limit and to increase (gu-container-2) CPU request/limit above available capacity", + // Initial + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount before first patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + { + Name: "gu-container-2", + cpuCount: 2, + }, + }, + // Desired first patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected after first patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount after first patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 4, + }, + { + Name: "gu-container-2", + cpuCount: 2, + }, + }, + // Want error after first patch + "", + // Desired second patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "40000m", CPULim: "40000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected after second patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount after second patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 4, + }, + { + Name: "gu-container-2", + cpuCount: 2, + }, + }, + "Infeasible.*Node.*didn't.*have.*enough.*capacity.*", + ), + ) + } }) }, ) From a5fd8a54f5297934795aaa1a876a612d23b7f45b Mon Sep 17 00:00:00 2001 From: Lukasz Wojciechowski Date: Wed, 11 Feb 2026 06:43:47 +0100 Subject: [PATCH 11/15] kubelet: enhance TestReconcileState Enhance CPU manager's test of reconcileState function (the one that actuates allocated CPU sets in runtime). The improvement involves three elements: 1) verification if lastUpdateState contains expected values; 2) enabling verification of how the reconcile process completed for multiple containers, not just a single one; 3) extending test cases to cover the above changes and add a simple multi-container test. Signed-off-by: Lukasz Wojciechowski --- pkg/kubelet/cm/cpumanager/cpu_manager_test.go | 272 +++++++++++++----- 1 file changed, 207 insertions(+), 65 deletions(-) diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go index df46ff1697de7..c240064321cff 100644 --- a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go +++ b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go @@ -841,20 +841,22 @@ func TestReconcileState(t *testing.T) { nil) testCases := []struct { - description string - policy Policy - activePods []*v1.Pod - pspPS v1.PodStatus - pspFound bool - updateErr error - stAssignments state.ContainerCPUAssignments - stDefaultCPUSet cpuset.CPUSet - lastUpdateStAssignments state.ContainerCPUAssignments - lastUpdateStDefaultCPUSet cpuset.CPUSet - expectStAssignments state.ContainerCPUAssignments - expectStDefaultCPUSet cpuset.CPUSet - expectSucceededContainerName string - expectFailedContainerName string + description string + policy Policy + activePods []*v1.Pod + pspPS v1.PodStatus + pspFound bool + updateErr error + stAssignments state.ContainerCPUAssignments + stDefaultCPUSet cpuset.CPUSet + lastUpdateStAssignments state.ContainerCPUAssignments + lastUpdateStDefaultCPUSet cpuset.CPUSet + expectStAssignments state.ContainerCPUAssignments + expectStDefaultCPUSet cpuset.CPUSet + expectLastUpdateStAssignments state.ContainerCPUAssignments + expectLastUpdateStDefaultCPUSet cpuset.CPUSet + expectSucceededContainerName []string + expectFailedContainerName []string }{ { description: "cpu manager reconcile - no error", @@ -900,9 +902,15 @@ func TestReconcileState(t *testing.T) { "fakeContainerName": {Original: cpuset.New(1, 2), Resized: cpuset.New()}, }, }, - expectStDefaultCPUSet: cpuset.New(3, 4, 5, 6, 7), - expectSucceededContainerName: "fakeContainerName", - expectFailedContainerName: "", + expectStDefaultCPUSet: cpuset.New(3, 4, 5, 6, 7), + expectLastUpdateStAssignments: state.ContainerCPUAssignments{ + "fakePodUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerName": {Original: cpuset.New(1, 2), Resized: cpuset.New()}, + }, + }, + expectLastUpdateStDefaultCPUSet: cpuset.New(), + expectSucceededContainerName: []string{"fakeContainerName"}, + expectFailedContainerName: []string{}, }, { description: "cpu manager reconcile init container - no error", @@ -948,9 +956,15 @@ func TestReconcileState(t *testing.T) { "fakeContainerName": {Original: cpuset.New(1, 2), Resized: cpuset.New()}, }, }, - expectStDefaultCPUSet: cpuset.New(3, 4, 5, 6, 7), - expectSucceededContainerName: "fakeContainerName", - expectFailedContainerName: "", + expectStDefaultCPUSet: cpuset.New(3, 4, 5, 6, 7), + expectLastUpdateStAssignments: state.ContainerCPUAssignments{ + "fakePodUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerName": {Original: cpuset.New(1, 2), Resized: cpuset.New()}, + }, + }, + expectLastUpdateStDefaultCPUSet: cpuset.New(), + expectSucceededContainerName: []string{"fakeContainerName"}, + expectFailedContainerName: []string{}, }, { description: "cpu manager reconcile - pod status not found", @@ -970,17 +984,19 @@ func TestReconcileState(t *testing.T) { }, }, }, - pspPS: v1.PodStatus{}, - pspFound: false, - updateErr: nil, - stAssignments: state.ContainerCPUAssignments{}, - stDefaultCPUSet: cpuset.New(), - lastUpdateStAssignments: state.ContainerCPUAssignments{}, - lastUpdateStDefaultCPUSet: cpuset.New(), - expectStAssignments: state.ContainerCPUAssignments{}, - expectStDefaultCPUSet: cpuset.New(), - expectSucceededContainerName: "", - expectFailedContainerName: "", + pspPS: v1.PodStatus{}, + pspFound: false, + updateErr: nil, + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(), + lastUpdateStAssignments: state.ContainerCPUAssignments{}, + lastUpdateStDefaultCPUSet: cpuset.New(), + expectStAssignments: state.ContainerCPUAssignments{}, + expectStDefaultCPUSet: cpuset.New(), + expectLastUpdateStAssignments: state.ContainerCPUAssignments{}, + expectLastUpdateStDefaultCPUSet: cpuset.New(), + expectSucceededContainerName: []string{}, + expectFailedContainerName: []string{}, }, { description: "cpu manager reconcile - container state not found", @@ -1008,16 +1024,18 @@ func TestReconcileState(t *testing.T) { }, }, }, - pspFound: true, - updateErr: nil, - stAssignments: state.ContainerCPUAssignments{}, - stDefaultCPUSet: cpuset.New(), - lastUpdateStAssignments: state.ContainerCPUAssignments{}, - lastUpdateStDefaultCPUSet: cpuset.New(), - expectStAssignments: state.ContainerCPUAssignments{}, - expectStDefaultCPUSet: cpuset.New(), - expectSucceededContainerName: "", - expectFailedContainerName: "fakeContainerName", + pspFound: true, + updateErr: nil, + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(), + lastUpdateStAssignments: state.ContainerCPUAssignments{}, + lastUpdateStDefaultCPUSet: cpuset.New(), + expectStAssignments: state.ContainerCPUAssignments{}, + expectStDefaultCPUSet: cpuset.New(), + expectLastUpdateStAssignments: state.ContainerCPUAssignments{}, + expectLastUpdateStDefaultCPUSet: cpuset.New(), + expectSucceededContainerName: []string{}, + expectFailedContainerName: []string{"fakeContainerName"}, }, { description: "cpu manager reconclie - cpuset is empty", @@ -1063,9 +1081,11 @@ func TestReconcileState(t *testing.T) { "fakeContainerName": {Original: cpuset.New(), Resized: cpuset.New()}, }, }, - expectStDefaultCPUSet: cpuset.New(1, 2, 3, 4, 5, 6, 7), - expectSucceededContainerName: "", - expectFailedContainerName: "fakeContainerName", + expectStDefaultCPUSet: cpuset.New(1, 2, 3, 4, 5, 6, 7), + expectLastUpdateStAssignments: state.ContainerCPUAssignments{}, + expectLastUpdateStDefaultCPUSet: cpuset.New(), + expectSucceededContainerName: []string{}, + expectFailedContainerName: []string{"fakeContainerName"}, }, { description: "cpu manager reconclie - container update error", @@ -1111,9 +1131,11 @@ func TestReconcileState(t *testing.T) { "fakeContainerName": {Original: cpuset.New(1, 2), Resized: cpuset.New()}, }, }, - expectStDefaultCPUSet: cpuset.New(3, 4, 5, 6, 7), - expectSucceededContainerName: "", - expectFailedContainerName: "fakeContainerName", + expectStDefaultCPUSet: cpuset.New(3, 4, 5, 6, 7), + expectLastUpdateStAssignments: state.ContainerCPUAssignments{}, + expectLastUpdateStDefaultCPUSet: cpuset.New(), + expectSucceededContainerName: []string{}, + expectFailedContainerName: []string{"fakeContainerName"}, }, { description: "cpu manager reconcile - state has inactive container", @@ -1162,9 +1184,15 @@ func TestReconcileState(t *testing.T) { "fakeContainerName": {Original: cpuset.New(1, 2), Resized: cpuset.New()}, }, }, - expectStDefaultCPUSet: cpuset.New(3, 4, 5, 6, 7), - expectSucceededContainerName: "fakeContainerName", - expectFailedContainerName: "", + expectStDefaultCPUSet: cpuset.New(3, 4, 5, 6, 7), + expectLastUpdateStAssignments: state.ContainerCPUAssignments{ + "fakePodUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerName": {Original: cpuset.New(1, 2), Resized: cpuset.New()}, + }, + }, + expectLastUpdateStDefaultCPUSet: cpuset.New(), + expectSucceededContainerName: []string{"fakeContainerName"}, + expectFailedContainerName: []string{}, }, { description: "cpu manager reconcile - last update state is current", @@ -1208,15 +1236,21 @@ func TestReconcileState(t *testing.T) { "fakeContainerName": {Original: cpuset.New(1, 2), Resized: cpuset.New()}, }, }, - lastUpdateStDefaultCPUSet: cpuset.New(5, 6, 7), + lastUpdateStDefaultCPUSet: cpuset.New(), expectStAssignments: state.ContainerCPUAssignments{ "fakePodUID": map[string]state.ContainerCPUAssignment{ "fakeContainerName": {Original: cpuset.New(1, 2), Resized: cpuset.New()}, }, }, - expectStDefaultCPUSet: cpuset.New(5, 6, 7), - expectSucceededContainerName: "fakeContainerName", - expectFailedContainerName: "", + expectStDefaultCPUSet: cpuset.New(5, 6, 7), + expectLastUpdateStAssignments: state.ContainerCPUAssignments{ + "fakePodUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerName": {Original: cpuset.New(1, 2), Resized: cpuset.New()}, + }, + }, + expectLastUpdateStDefaultCPUSet: cpuset.New(), + expectSucceededContainerName: []string{"fakeContainerName"}, + expectFailedContainerName: []string{}, }, { description: "cpu manager reconcile - last update state is not current", @@ -1260,15 +1294,113 @@ func TestReconcileState(t *testing.T) { "fakeContainerName": {Original: cpuset.New(3, 4), Resized: cpuset.New()}, }, }, - lastUpdateStDefaultCPUSet: cpuset.New(1, 2, 5, 6, 7), + lastUpdateStDefaultCPUSet: cpuset.New(), expectStAssignments: state.ContainerCPUAssignments{ "fakePodUID": map[string]state.ContainerCPUAssignment{ "fakeContainerName": {Original: cpuset.New(1, 2), Resized: cpuset.New()}, }, }, - expectStDefaultCPUSet: cpuset.New(3, 4, 5, 6, 7), - expectSucceededContainerName: "fakeContainerName", - expectFailedContainerName: "", + expectStDefaultCPUSet: cpuset.New(3, 4, 5, 6, 7), + expectLastUpdateStAssignments: state.ContainerCPUAssignments{ + "fakePodUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerName": {Original: cpuset.New(3, 4), Resized: cpuset.New(1, 2)}, + }, + }, + expectLastUpdateStDefaultCPUSet: cpuset.New(), + expectSucceededContainerName: []string{"fakeContainerName"}, + expectFailedContainerName: []string{}, + }, + { + description: "cpu manager reconcile default CPU sets - no error", + policy: testPolicy, + activePods: []*v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakePodAName", + UID: "fakePodAUID", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainerAName", + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakePodBName", + UID: "fakePodBUID", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainerBName", + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakePodCName", + UID: "fakePodCUID", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainerCName", + }, + }, + }, + }, + }, + pspPS: v1.PodStatus{ + ContainerStatuses: []v1.ContainerStatus{ + { + Name: "fakeContainerAName", + ContainerID: "docker://fakeContainerAID", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + { + Name: "fakeContainerBName", + ContainerID: "docker://fakeContainerBID", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + { + Name: "fakeContainerCName", + ContainerID: "docker://fakeContainerCID", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + }, + }, + pspFound: true, + updateErr: nil, + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(1, 2, 3, 4, 5, 6, 7), + lastUpdateStAssignments: state.ContainerCPUAssignments{}, + lastUpdateStDefaultCPUSet: cpuset.New(), + expectStAssignments: state.ContainerCPUAssignments{}, + expectStDefaultCPUSet: cpuset.New(1, 2, 3, 4, 5, 6, 7), + expectLastUpdateStAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(1, 2, 3, 4, 5, 6, 7), Resized: cpuset.New()}, + }, + "fakePodBUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerBName": {Original: cpuset.New(1, 2, 3, 4, 5, 6, 7), Resized: cpuset.New()}, + }, + "fakePodCUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerCName": {Original: cpuset.New(1, 2, 3, 4, 5, 6, 7), Resized: cpuset.New()}, + }, + }, + expectLastUpdateStDefaultCPUSet: cpuset.New(), + expectSucceededContainerName: []string{"fakeContainerAName", "fakeContainerBName", "fakeContainerCName"}, + expectFailedContainerName: []string{}, }, } @@ -1294,47 +1426,57 @@ func TestReconcileState(t *testing.T) { }, } mgr.sourcesReady = &sourcesReadyStub{} + mgr.lastUpdateState.SetCPUAssignments(testCase.lastUpdateStAssignments) + mgr.lastUpdateState.SetDefaultCPUSet(testCase.lastUpdateStDefaultCPUSet) success, failure := mgr.reconcileState(context.Background()) if !reflect.DeepEqual(testCase.expectStAssignments, mgr.state.GetCPUAssignments()) { t.Errorf("%v", testCase.description) t.Errorf("Expected state container cpu assignments: %v, actual: %v", testCase.expectStAssignments, mgr.state.GetCPUAssignments()) - } if !reflect.DeepEqual(testCase.expectStDefaultCPUSet, mgr.state.GetDefaultCPUSet()) { t.Errorf("%v", testCase.description) t.Errorf("Expected state default cpuset: %v, actual: %v", testCase.expectStDefaultCPUSet, mgr.state.GetDefaultCPUSet()) + } + if !reflect.DeepEqual(testCase.expectLastUpdateStAssignments, mgr.lastUpdateState.GetCPUAssignments()) { + t.Errorf("%v", testCase.description) + t.Errorf("Expected lastUpdateState container cpu assignments: %v, actual: %v", testCase.expectLastUpdateStAssignments, mgr.lastUpdateState.GetCPUAssignments()) + } + + if !reflect.DeepEqual(testCase.expectLastUpdateStDefaultCPUSet, mgr.lastUpdateState.GetDefaultCPUSet()) { + t.Errorf("%v", testCase.description) + t.Errorf("Expected lastUpdateState default cpuset: %v, actual: %v", testCase.expectLastUpdateStDefaultCPUSet, mgr.lastUpdateState.GetDefaultCPUSet()) } - if testCase.expectSucceededContainerName != "" { + for _, name := range testCase.expectSucceededContainerName { // Search succeeded reconciled containers for the supplied name. foundSucceededContainer := false for _, reconciled := range success { - if reconciled.containerName == testCase.expectSucceededContainerName { + if reconciled.containerName == name { foundSucceededContainer = true break } } if !foundSucceededContainer { t.Errorf("%v", testCase.description) - t.Errorf("Expected reconciliation success for container: %s", testCase.expectSucceededContainerName) + t.Errorf("Expected reconciliation success for container: %s", name) } } - if testCase.expectFailedContainerName != "" { + for _, name := range testCase.expectFailedContainerName { // Search failed reconciled containers for the supplied name. foundFailedContainer := false for _, reconciled := range failure { - if reconciled.containerName == testCase.expectFailedContainerName { + if reconciled.containerName == name { foundFailedContainer = true break } } if !foundFailedContainer { t.Errorf("%v", testCase.description) - t.Errorf("Expected reconciliation failure for container: %s", testCase.expectFailedContainerName) + t.Errorf("Expected reconciliation failure for container: %s", name) } } } From 92072471fe9bc0110c987b870c372ba286598ea6 Mon Sep 17 00:00:00 2001 From: Lukasz Wojciechowski Date: Fri, 6 Mar 2026 18:02:52 +0100 Subject: [PATCH 12/15] kubelet: verify CPUSets set in mock runtime Extend mockRuntimeService in the CPU manager tests to store CPUSets that were applied to the runtime. Additionally, after each update of container resources (if the testCPUConflicts flag is enabled), the mock runtime verifies if exclusive CPUs are assigned to a single container only. The extended mockRuntimeService is applied to TestReconcileState, so it verifies conflicts after each update and after reconciliation is completed, it is verified if the runtime state matches expectations. Proper fields are added to each of the existing test cases. Additionally, the mockRuntimeService has been enhanced to support returning a sequence of errors from UpdateContainerResources calls. The err field was changed from a single error to a slice of errors ([]error), and the UpdateContainerResources function now returns the first error from the slice and removes it, allowing different errors to be returned for successive calls. This enhancement enables more sophisticated testing scenarios where multiple container resource updates may fail at different points in the test sequence. Proper fields are added to each of the existing test cases to accommodate the new error slice functionality and CPU set tracking capabilities. Signed-off-by: Lukasz Wojciechowski --- .../cm/cpumanager/cpu_manager_others_test.go | 36 ++++ pkg/kubelet/cm/cpumanager/cpu_manager_test.go | 198 +++++++++++++----- .../cm/cpumanager/cpu_manager_windows_test.go | 42 ++++ 3 files changed, 228 insertions(+), 48 deletions(-) create mode 100644 pkg/kubelet/cm/cpumanager/cpu_manager_others_test.go create mode 100644 pkg/kubelet/cm/cpumanager/cpu_manager_windows_test.go diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager_others_test.go b/pkg/kubelet/cm/cpumanager/cpu_manager_others_test.go new file mode 100644 index 0000000000000..07a1406177b6f --- /dev/null +++ b/pkg/kubelet/cm/cpumanager/cpu_manager_others_test.go @@ -0,0 +1,36 @@ +//go:build !windows + +/* +Copyright 2026 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cpumanager + +import ( + runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1" + "k8s.io/utils/cpuset" +) + +func (rt mockRuntimeService) getCPUSetFromResources(resources *runtimeapi.ContainerResources) cpuset.CPUSet { + if resources != nil && resources.Linux != nil { + set, err := cpuset.Parse(resources.Linux.CpusetCpus) + if err != nil { + rt.t.Errorf("(%v) Cannot parse Linux CPUSet resources %v", rt.testCaseDescription, resources.Linux.CpusetCpus) + return cpuset.New() + } + return set + } + return cpuset.New() +} diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go index c240064321cff..40c024f801ea7 100644 --- a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go +++ b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go @@ -148,11 +148,52 @@ func (p *mockPolicy) GetAllocatableCPUs(m state.State) cpuset.CPUSet { } type mockRuntimeService struct { - err error + err []error + containerIDsWithExclusiveCPUs []string + state map[string]cpuset.CPUSet + testCPUConflicts bool + testCaseDescription string + t *testing.T } -func (rt mockRuntimeService) UpdateContainerResources(_ context.Context, id string, resources *runtimeapi.ContainerResources) error { - return rt.err +func (rt *mockRuntimeService) UpdateContainerResources(_ context.Context, id string, resources *runtimeapi.ContainerResources) error { + var ret error + if len(rt.err) > 0 { + ret = rt.err[0] + rt.err = rt.err[1:] + } + + // update state + if ret == nil { + newSet := rt.getCPUSetFromResources(resources) + if !newSet.IsEmpty() { + rt.state[id] = newSet + } + } + + if rt.testCPUConflicts { + // count in how many containers each CPU is used + cpuUsage := make(map[int][]string) + for containerID, set := range rt.state { + for _, cpu := range set.List() { + cpuUsage[cpu] = append(cpuUsage[cpu], containerID) + } + } + + // check if CPUs assigned to containers with exclusive CPUs are used exactly once + for _, containerID := range rt.containerIDsWithExclusiveCPUs { + set := rt.state[containerID] + for _, cpu := range set.List() { + if len(cpuUsage[cpu]) != 1 { + rt.t.Errorf("%v", rt.testCaseDescription) + rt.t.Errorf("after updating container resources of %s", id) + rt.t.Errorf("Expected CPU %d usage 1, actual usage %d %v", cpu, len(cpuUsage[cpu]), cpuUsage[cpu]) + } + } + } + } + + return ret } type mockPodStatusProvider struct { @@ -328,7 +369,7 @@ func TestCPUManagerAdd(t *testing.T) { nil) testCases := []struct { description string - updateErr error + updateErr []error policy Policy expCPUSet cpuset.CPUSet expAllocateErr error @@ -362,7 +403,7 @@ func TestCPUManagerAdd(t *testing.T) { defaultCPUSet: cpuset.New(1, 2, 3, 4), }, lastUpdateState: state.NewMemoryState(logger), - containerRuntime: mockRuntimeService{ + containerRuntime: &mockRuntimeService{ err: testCase.updateErr, }, containerMap: containermap.NewContainerMap(), @@ -591,7 +632,7 @@ func TestCPUManagerAddWithInitContainers(t *testing.T) { policy: policy, state: mockState, lastUpdateState: state.NewMemoryState(logger), - containerRuntime: mockRuntimeService{}, + containerRuntime: &mockRuntimeService{}, containerMap: containermap.NewContainerMap(), podStatusProvider: mockPodStatusProvider{}, sourcesReady: &sourcesReadyStub{}, @@ -781,7 +822,7 @@ func TestCPUManagerRemove(t *testing.T) { defaultCPUSet: cpuset.New(), }, lastUpdateState: state.NewMemoryState(logger), - containerRuntime: mockRuntimeService{}, + containerRuntime: &mockRuntimeService{}, containerMap: containerMap, activePods: func() []*v1.Pod { return nil }, podStatusProvider: mockPodStatusProvider{}, @@ -798,7 +839,7 @@ func TestCPUManagerRemove(t *testing.T) { err: fmt.Errorf("fake error"), }, state: state.NewMemoryState(logger), - containerRuntime: mockRuntimeService{}, + containerRuntime: &mockRuntimeService{}, containerMap: containerMap, activePods: func() []*v1.Pod { return nil }, podStatusProvider: mockPodStatusProvider{}, @@ -846,7 +887,9 @@ func TestReconcileState(t *testing.T) { activePods []*v1.Pod pspPS v1.PodStatus pspFound bool - updateErr error + updateErr []error + containerIDsWithExclusiveCPUs []string + containerRuntimeInitialState map[string]cpuset.CPUSet stAssignments state.ContainerCPUAssignments stDefaultCPUSet cpuset.CPUSet lastUpdateStAssignments state.ContainerCPUAssignments @@ -855,6 +898,7 @@ func TestReconcileState(t *testing.T) { expectStDefaultCPUSet cpuset.CPUSet expectLastUpdateStAssignments state.ContainerCPUAssignments expectLastUpdateStDefaultCPUSet cpuset.CPUSet + expectContainerRuntimeState map[string]cpuset.CPUSet expectSucceededContainerName []string expectFailedContainerName []string }{ @@ -887,8 +931,10 @@ func TestReconcileState(t *testing.T) { }, }, }, - pspFound: true, - updateErr: nil, + pspFound: true, + updateErr: nil, + containerIDsWithExclusiveCPUs: []string{"fakeContainerID"}, + containerRuntimeInitialState: map[string]cpuset.CPUSet{}, stAssignments: state.ContainerCPUAssignments{ "fakePodUID": map[string]state.ContainerCPUAssignment{ "fakeContainerName": {Original: cpuset.New(1, 2), Resized: cpuset.New()}, @@ -909,8 +955,11 @@ func TestReconcileState(t *testing.T) { }, }, expectLastUpdateStDefaultCPUSet: cpuset.New(), - expectSucceededContainerName: []string{"fakeContainerName"}, - expectFailedContainerName: []string{}, + expectContainerRuntimeState: map[string]cpuset.CPUSet{ + "fakeContainerID": cpuset.New(1, 2), + }, + expectSucceededContainerName: []string{"fakeContainerName"}, + expectFailedContainerName: []string{}, }, { description: "cpu manager reconcile init container - no error", @@ -941,8 +990,10 @@ func TestReconcileState(t *testing.T) { }, }, }, - pspFound: true, - updateErr: nil, + pspFound: true, + updateErr: nil, + containerIDsWithExclusiveCPUs: []string{"fakeContainerID"}, + containerRuntimeInitialState: map[string]cpuset.CPUSet{}, stAssignments: state.ContainerCPUAssignments{ "fakePodUID": map[string]state.ContainerCPUAssignment{ "fakeContainerName": {Original: cpuset.New(1, 2), Resized: cpuset.New()}, @@ -963,8 +1014,11 @@ func TestReconcileState(t *testing.T) { }, }, expectLastUpdateStDefaultCPUSet: cpuset.New(), - expectSucceededContainerName: []string{"fakeContainerName"}, - expectFailedContainerName: []string{}, + expectContainerRuntimeState: map[string]cpuset.CPUSet{ + "fakeContainerID": cpuset.New(1, 2), + }, + expectSucceededContainerName: []string{"fakeContainerName"}, + expectFailedContainerName: []string{}, }, { description: "cpu manager reconcile - pod status not found", @@ -987,6 +1041,8 @@ func TestReconcileState(t *testing.T) { pspPS: v1.PodStatus{}, pspFound: false, updateErr: nil, + containerIDsWithExclusiveCPUs: []string{}, + containerRuntimeInitialState: map[string]cpuset.CPUSet{}, stAssignments: state.ContainerCPUAssignments{}, stDefaultCPUSet: cpuset.New(), lastUpdateStAssignments: state.ContainerCPUAssignments{}, @@ -995,6 +1051,7 @@ func TestReconcileState(t *testing.T) { expectStDefaultCPUSet: cpuset.New(), expectLastUpdateStAssignments: state.ContainerCPUAssignments{}, expectLastUpdateStDefaultCPUSet: cpuset.New(), + expectContainerRuntimeState: map[string]cpuset.CPUSet{}, expectSucceededContainerName: []string{}, expectFailedContainerName: []string{}, }, @@ -1026,6 +1083,8 @@ func TestReconcileState(t *testing.T) { }, pspFound: true, updateErr: nil, + containerIDsWithExclusiveCPUs: []string{}, + containerRuntimeInitialState: map[string]cpuset.CPUSet{}, stAssignments: state.ContainerCPUAssignments{}, stDefaultCPUSet: cpuset.New(), lastUpdateStAssignments: state.ContainerCPUAssignments{}, @@ -1034,6 +1093,7 @@ func TestReconcileState(t *testing.T) { expectStDefaultCPUSet: cpuset.New(), expectLastUpdateStAssignments: state.ContainerCPUAssignments{}, expectLastUpdateStDefaultCPUSet: cpuset.New(), + expectContainerRuntimeState: map[string]cpuset.CPUSet{}, expectSucceededContainerName: []string{}, expectFailedContainerName: []string{"fakeContainerName"}, }, @@ -1066,8 +1126,10 @@ func TestReconcileState(t *testing.T) { }, }, }, - pspFound: true, - updateErr: nil, + pspFound: true, + updateErr: nil, + containerIDsWithExclusiveCPUs: []string{"fakeContainerID"}, + containerRuntimeInitialState: map[string]cpuset.CPUSet{}, stAssignments: state.ContainerCPUAssignments{ "fakePodUID": map[string]state.ContainerCPUAssignment{ "fakeContainerName": {Original: cpuset.New(), Resized: cpuset.New()}, @@ -1084,6 +1146,7 @@ func TestReconcileState(t *testing.T) { expectStDefaultCPUSet: cpuset.New(1, 2, 3, 4, 5, 6, 7), expectLastUpdateStAssignments: state.ContainerCPUAssignments{}, expectLastUpdateStDefaultCPUSet: cpuset.New(), + expectContainerRuntimeState: map[string]cpuset.CPUSet{}, expectSucceededContainerName: []string{}, expectFailedContainerName: []string{"fakeContainerName"}, }, @@ -1116,8 +1179,10 @@ func TestReconcileState(t *testing.T) { }, }, }, - pspFound: true, - updateErr: fmt.Errorf("fake container update error"), + pspFound: true, + updateErr: []error{fmt.Errorf("fake container update error")}, + containerIDsWithExclusiveCPUs: []string{"fakeContainerID"}, + containerRuntimeInitialState: map[string]cpuset.CPUSet{}, stAssignments: state.ContainerCPUAssignments{ "fakePodUID": map[string]state.ContainerCPUAssignment{ "fakeContainerName": {Original: cpuset.New(1, 2), Resized: cpuset.New()}, @@ -1134,6 +1199,7 @@ func TestReconcileState(t *testing.T) { expectStDefaultCPUSet: cpuset.New(3, 4, 5, 6, 7), expectLastUpdateStAssignments: state.ContainerCPUAssignments{}, expectLastUpdateStDefaultCPUSet: cpuset.New(), + expectContainerRuntimeState: map[string]cpuset.CPUSet{}, expectSucceededContainerName: []string{}, expectFailedContainerName: []string{"fakeContainerName"}, }, @@ -1166,8 +1232,10 @@ func TestReconcileState(t *testing.T) { }, }, }, - pspFound: true, - updateErr: nil, + pspFound: true, + updateErr: nil, + containerIDsWithExclusiveCPUs: []string{"fakeContainerID"}, + containerRuntimeInitialState: map[string]cpuset.CPUSet{}, stAssignments: state.ContainerCPUAssignments{ "fakePodUID": map[string]state.ContainerCPUAssignment{ "fakeContainerName": {Original: cpuset.New(1, 2), Resized: cpuset.New()}, @@ -1191,8 +1259,11 @@ func TestReconcileState(t *testing.T) { }, }, expectLastUpdateStDefaultCPUSet: cpuset.New(), - expectSucceededContainerName: []string{"fakeContainerName"}, - expectFailedContainerName: []string{}, + expectContainerRuntimeState: map[string]cpuset.CPUSet{ + "fakeContainerID": cpuset.New(1, 2), + }, + expectSucceededContainerName: []string{"fakeContainerName"}, + expectFailedContainerName: []string{}, }, { description: "cpu manager reconcile - last update state is current", @@ -1223,8 +1294,12 @@ func TestReconcileState(t *testing.T) { }, }, }, - pspFound: true, - updateErr: nil, + pspFound: true, + updateErr: nil, + containerIDsWithExclusiveCPUs: []string{"fakeContainerID"}, + containerRuntimeInitialState: map[string]cpuset.CPUSet{ + "fakeContainerID": cpuset.New(1, 2), + }, stAssignments: state.ContainerCPUAssignments{ "fakePodUID": map[string]state.ContainerCPUAssignment{ "fakeContainerName": {Original: cpuset.New(1, 2), Resized: cpuset.New()}, @@ -1249,8 +1324,11 @@ func TestReconcileState(t *testing.T) { }, }, expectLastUpdateStDefaultCPUSet: cpuset.New(), - expectSucceededContainerName: []string{"fakeContainerName"}, - expectFailedContainerName: []string{}, + expectContainerRuntimeState: map[string]cpuset.CPUSet{ + "fakeContainerID": cpuset.New(1, 2), + }, + expectSucceededContainerName: []string{"fakeContainerName"}, + expectFailedContainerName: []string{}, }, { description: "cpu manager reconcile - last update state is not current", @@ -1281,8 +1359,12 @@ func TestReconcileState(t *testing.T) { }, }, }, - pspFound: true, - updateErr: nil, + pspFound: true, + updateErr: nil, + containerIDsWithExclusiveCPUs: []string{"fakeContainerID"}, + containerRuntimeInitialState: map[string]cpuset.CPUSet{ + "fakeContainerID": cpuset.New(3, 4), + }, stAssignments: state.ContainerCPUAssignments{ "fakePodUID": map[string]state.ContainerCPUAssignment{ "fakeContainerName": {Original: cpuset.New(1, 2), Resized: cpuset.New()}, @@ -1307,11 +1389,14 @@ func TestReconcileState(t *testing.T) { }, }, expectLastUpdateStDefaultCPUSet: cpuset.New(), - expectSucceededContainerName: []string{"fakeContainerName"}, - expectFailedContainerName: []string{}, + expectContainerRuntimeState: map[string]cpuset.CPUSet{ + "fakeContainerID": cpuset.New(1, 2), + }, + expectSucceededContainerName: []string{"fakeContainerName"}, + expectFailedContainerName: []string{}, }, { - description: "cpu manager reconcile default CPU sets - no error", + description: "cpu manager reconcile - default CPU sets no error", policy: testPolicy, activePods: []*v1.Pod{ { @@ -1379,14 +1464,16 @@ func TestReconcileState(t *testing.T) { }, }, }, - pspFound: true, - updateErr: nil, - stAssignments: state.ContainerCPUAssignments{}, - stDefaultCPUSet: cpuset.New(1, 2, 3, 4, 5, 6, 7), - lastUpdateStAssignments: state.ContainerCPUAssignments{}, - lastUpdateStDefaultCPUSet: cpuset.New(), - expectStAssignments: state.ContainerCPUAssignments{}, - expectStDefaultCPUSet: cpuset.New(1, 2, 3, 4, 5, 6, 7), + pspFound: true, + updateErr: nil, + containerIDsWithExclusiveCPUs: []string{}, + containerRuntimeInitialState: map[string]cpuset.CPUSet{}, + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(1, 2, 3, 4, 5, 6, 7), + lastUpdateStAssignments: state.ContainerCPUAssignments{}, + lastUpdateStDefaultCPUSet: cpuset.New(), + expectStAssignments: state.ContainerCPUAssignments{}, + expectStDefaultCPUSet: cpuset.New(1, 2, 3, 4, 5, 6, 7), expectLastUpdateStAssignments: state.ContainerCPUAssignments{ "fakePodAUID": map[string]state.ContainerCPUAssignment{ "fakeContainerAName": {Original: cpuset.New(1, 2, 3, 4, 5, 6, 7), Resized: cpuset.New()}, @@ -1399,8 +1486,13 @@ func TestReconcileState(t *testing.T) { }, }, expectLastUpdateStDefaultCPUSet: cpuset.New(), - expectSucceededContainerName: []string{"fakeContainerAName", "fakeContainerBName", "fakeContainerCName"}, - expectFailedContainerName: []string{}, + expectContainerRuntimeState: map[string]cpuset.CPUSet{ + "fakeContainerAID": cpuset.New(1, 2, 3, 4, 5, 6, 7), + "fakeContainerBID": cpuset.New(1, 2, 3, 4, 5, 6, 7), + "fakeContainerCID": cpuset.New(1, 2, 3, 4, 5, 6, 7), + }, + expectSucceededContainerName: []string{"fakeContainerAName", "fakeContainerBName", "fakeContainerCName"}, + expectFailedContainerName: []string{}, }, } @@ -1413,8 +1505,13 @@ func TestReconcileState(t *testing.T) { defaultCPUSet: testCase.stDefaultCPUSet, }, lastUpdateState: state.NewMemoryState(logger), - containerRuntime: mockRuntimeService{ - err: testCase.updateErr, + containerRuntime: &mockRuntimeService{ + err: testCase.updateErr, + containerIDsWithExclusiveCPUs: testCase.containerIDsWithExclusiveCPUs, + state: testCase.containerRuntimeInitialState, + testCPUConflicts: true, + testCaseDescription: testCase.description, + t: t, }, containerMap: containermap.NewContainerMap(), activePods: func() []*v1.Pod { @@ -1450,6 +1547,11 @@ func TestReconcileState(t *testing.T) { t.Errorf("Expected lastUpdateState default cpuset: %v, actual: %v", testCase.expectLastUpdateStDefaultCPUSet, mgr.lastUpdateState.GetDefaultCPUSet()) } + if !reflect.DeepEqual(testCase.expectContainerRuntimeState, mgr.containerRuntime.(*mockRuntimeService).state) { + t.Errorf("%v", testCase.description) + t.Errorf("Expected containerRuntimeState: %v, actual: %v", testCase.expectContainerRuntimeState, mgr.containerRuntime.(*mockRuntimeService).state) + } + for _, name := range testCase.expectSucceededContainerName { // Search succeeded reconciled containers for the supplied name. foundSucceededContainer := false @@ -1509,7 +1611,7 @@ func TestCPUManagerAddWithResvList(t *testing.T) { nil) testCases := []struct { description string - updateErr error + updateErr []error policy Policy expCPUSet cpuset.CPUSet expAllocateErr error @@ -1533,7 +1635,7 @@ func TestCPUManagerAddWithResvList(t *testing.T) { defaultCPUSet: cpuset.New(0, 1, 2, 3), }, lastUpdateState: state.NewMemoryState(logger), - containerRuntime: mockRuntimeService{ + containerRuntime: &mockRuntimeService{ err: testCase.updateErr, }, containerMap: containermap.NewContainerMap(), diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager_windows_test.go b/pkg/kubelet/cm/cpumanager/cpu_manager_windows_test.go new file mode 100644 index 0000000000000..ccc1447f76394 --- /dev/null +++ b/pkg/kubelet/cm/cpumanager/cpu_manager_windows_test.go @@ -0,0 +1,42 @@ +//go:build windows + +/* +Copyright 2026 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cpumanager + +import ( + utilfeature "k8s.io/apiserver/pkg/util/feature" + runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1" + kubefeatures "k8s.io/kubernetes/pkg/features" + "k8s.io/kubernetes/pkg/kubelet/winstats" + "k8s.io/utils/cpuset" +) + +func (rt mockRuntimeService) getCPUSetFromResources(resources *runtimeapi.ContainerResources) cpuset.CPUSet { + if !utilfeature.DefaultFeatureGate.Enabled(kubefeatures.WindowsCPUAndMemoryAffinity) { + return cpuset.New() + } + if resources != nil && resources.Windows != nil { + var cpus []int + for _, affinity := range resources.Windows.AffinityCpus { + ga := winstats.GroupAffinity{Mask: affinity.CpuMask, Group: uint16(affinity.CpuGroup)} + cpus = append(cpus, ga.Processors()...) + } + return cpuset.New(cpus...) + } + return cpuset.New() +} From 768d48dafb915dfaf5fc2591c43eb6f305fe60b4 Mon Sep 17 00:00:00 2001 From: Lukasz Wojciechowski Date: Wed, 11 Feb 2026 07:56:53 +0100 Subject: [PATCH 13/15] kubelet: add resize testcases for reconcileState Add test cases for verification of CPUSets reconcilation for containers using exclusive CPUs. These test cases verify behavior of CPUs scaling with InPlacePodVerticalScalingExclusiveCPUs enabled. Signed-off-by: Lukasz Wojciechowski --- pkg/kubelet/cm/cpumanager/cpu_manager_test.go | 468 ++++++++++++++++++ 1 file changed, 468 insertions(+) diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go index 40c024f801ea7..742221dfe90e2 100644 --- a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go +++ b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go @@ -1494,6 +1494,474 @@ func TestReconcileState(t *testing.T) { expectSucceededContainerName: []string{"fakeContainerAName", "fakeContainerBName", "fakeContainerCName"}, expectFailedContainerName: []string{}, }, + { + description: "cpu manager reconcile - exclusive cpu container scaled up", + policy: testPolicy, + activePods: []*v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakePodAName", + UID: "fakePodAUID", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainerAName", + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakePodBName", + UID: "fakePodBUID", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainerBName", + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakePodCName", + UID: "fakePodCUID", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainerCName", + }, + }, + }, + }, + }, + pspPS: v1.PodStatus{ + ContainerStatuses: []v1.ContainerStatus{ + { + Name: "fakeContainerAName", + ContainerID: "docker://fakeContainerAID", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + { + Name: "fakeContainerBName", + ContainerID: "docker://fakeContainerBID", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + { + Name: "fakeContainerCName", + ContainerID: "docker://fakeContainerCID", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + }, + }, + pspFound: true, + updateErr: nil, + containerIDsWithExclusiveCPUs: []string{"fakeContainerBID"}, + containerRuntimeInitialState: map[string]cpuset.CPUSet{ + "fakeContainerAID": cpuset.New(1, 2, 5, 6, 7), + "fakeContainerBID": cpuset.New(3, 4), + "fakeContainerCID": cpuset.New(1, 2, 5, 6, 7), + }, + stAssignments: state.ContainerCPUAssignments{ + "fakePodBUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerBName": {Original: cpuset.New(3, 4), Resized: cpuset.New(3, 4, 5, 6)}, + }, + }, + stDefaultCPUSet: cpuset.New(1, 2, 7), + lastUpdateStAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(1, 2, 5, 6, 7), Resized: cpuset.New()}, + }, + "fakePodBUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerBName": {Original: cpuset.New(3, 4), Resized: cpuset.New()}, + }, + "fakePodCUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerCName": {Original: cpuset.New(1, 2, 5, 6, 7), Resized: cpuset.New()}, + }, + }, + lastUpdateStDefaultCPUSet: cpuset.New(), + expectStAssignments: state.ContainerCPUAssignments{ + "fakePodBUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerBName": {Original: cpuset.New(3, 4), Resized: cpuset.New(3, 4, 5, 6)}, + }, + }, + expectStDefaultCPUSet: cpuset.New(1, 2, 7), + expectLastUpdateStAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(1, 2, 5, 6, 7), Resized: cpuset.New(1, 2, 7)}, + }, + "fakePodBUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerBName": {Original: cpuset.New(3, 4), Resized: cpuset.New(3, 4, 5, 6)}, + }, + "fakePodCUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerCName": {Original: cpuset.New(1, 2, 5, 6, 7), Resized: cpuset.New(1, 2, 7)}, + }, + }, + expectLastUpdateStDefaultCPUSet: cpuset.New(), + expectContainerRuntimeState: map[string]cpuset.CPUSet{ + "fakeContainerAID": cpuset.New(1, 2, 7), + "fakeContainerBID": cpuset.New(3, 4, 5, 6), + "fakeContainerCID": cpuset.New(1, 2, 7), + }, + expectSucceededContainerName: []string{"fakeContainerAName", "fakeContainerBName", "fakeContainerCName"}, + expectFailedContainerName: []string{}, + }, + { + description: "cpu manager reconcile - exclusive cpu container scaled down", + policy: testPolicy, + activePods: []*v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakePodAName", + UID: "fakePodAUID", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainerAName", + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakePodBName", + UID: "fakePodBUID", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainerBName", + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakePodCName", + UID: "fakePodCUID", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainerCName", + }, + }, + }, + }, + }, + pspPS: v1.PodStatus{ + ContainerStatuses: []v1.ContainerStatus{ + { + Name: "fakeContainerAName", + ContainerID: "docker://fakeContainerAID", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + { + Name: "fakeContainerBName", + ContainerID: "docker://fakeContainerBID", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + { + Name: "fakeContainerCName", + ContainerID: "docker://fakeContainerCID", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + }, + }, + pspFound: true, + updateErr: nil, + containerIDsWithExclusiveCPUs: []string{"fakeContainerBID"}, + containerRuntimeInitialState: map[string]cpuset.CPUSet{ + "fakeContainerAID": cpuset.New(1, 2, 7), + "fakeContainerBID": cpuset.New(3, 4, 5, 6), + "fakeContainerCID": cpuset.New(1, 2, 7), + }, + stAssignments: state.ContainerCPUAssignments{ + "fakePodBUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerBName": {Original: cpuset.New(3, 4), Resized: cpuset.New(3, 4)}, + }, + }, + stDefaultCPUSet: cpuset.New(1, 2, 5, 6, 7), + lastUpdateStAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(1, 2, 7), Resized: cpuset.New()}, + }, + "fakePodBUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerBName": {Original: cpuset.New(3, 4, 5, 6), Resized: cpuset.New()}, + }, + "fakePodCUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerCName": {Original: cpuset.New(1, 2, 7), Resized: cpuset.New()}, + }, + }, + lastUpdateStDefaultCPUSet: cpuset.New(), + expectStAssignments: state.ContainerCPUAssignments{ + "fakePodBUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerBName": {Original: cpuset.New(3, 4), Resized: cpuset.New(3, 4)}, + }, + }, + expectStDefaultCPUSet: cpuset.New(1, 2, 5, 6, 7), + expectLastUpdateStAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(1, 2, 7), Resized: cpuset.New(1, 2, 5, 6, 7)}, + }, + "fakePodBUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerBName": {Original: cpuset.New(3, 4, 5, 6), Resized: cpuset.New(3, 4)}, + }, + "fakePodCUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerCName": {Original: cpuset.New(1, 2, 7), Resized: cpuset.New(1, 2, 5, 6, 7)}, + }, + }, + expectLastUpdateStDefaultCPUSet: cpuset.New(), + expectContainerRuntimeState: map[string]cpuset.CPUSet{ + "fakeContainerAID": cpuset.New(1, 2, 5, 6, 7), + "fakeContainerBID": cpuset.New(3, 4), + "fakeContainerCID": cpuset.New(1, 2, 5, 6, 7), + }, + expectSucceededContainerName: []string{"fakeContainerAName", "fakeContainerBName", "fakeContainerCName"}, + expectFailedContainerName: []string{}, + }, + { + description: "cpu manager reconcile - exclusive cpu containers swap CPUs", + policy: testPolicy, + activePods: []*v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakePodAName", + UID: "fakePodAUID", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainerAName", + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakePodBName", + UID: "fakePodBUID", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainerBName", + }, + }, + }, + }, + }, + pspPS: v1.PodStatus{ + ContainerStatuses: []v1.ContainerStatus{ + { + Name: "fakeContainerAName", + ContainerID: "docker://fakeContainerAID", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + { + Name: "fakeContainerBName", + ContainerID: "docker://fakeContainerBID", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + }, + }, + pspFound: true, + updateErr: nil, + containerIDsWithExclusiveCPUs: []string{"fakeContainerAID", "fakeContainerBID"}, + containerRuntimeInitialState: map[string]cpuset.CPUSet{ + "fakeContainerAID": cpuset.New(1, 2), + "fakeContainerBID": cpuset.New(3, 4), + }, + stAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(1), Resized: cpuset.New(1, 4)}, + }, + "fakePodBUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerBName": {Original: cpuset.New(3), Resized: cpuset.New(2, 3)}, + }, + }, + stDefaultCPUSet: cpuset.New(5, 6, 7), + lastUpdateStAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(1), Resized: cpuset.New(1, 2)}, + }, + "fakePodBUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerBName": {Original: cpuset.New(3), Resized: cpuset.New(3, 4)}, + }, + }, + lastUpdateStDefaultCPUSet: cpuset.New(), + expectStAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(1), Resized: cpuset.New(1, 4)}, + }, + "fakePodBUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerBName": {Original: cpuset.New(3), Resized: cpuset.New(2, 3)}, + }, + }, + expectStDefaultCPUSet: cpuset.New(5, 6, 7), + expectLastUpdateStAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(1), Resized: cpuset.New(1, 4)}, + }, + "fakePodBUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerBName": {Original: cpuset.New(3), Resized: cpuset.New(2, 3)}, + }, + }, + expectLastUpdateStDefaultCPUSet: cpuset.New(), + expectContainerRuntimeState: map[string]cpuset.CPUSet{ + "fakeContainerAID": cpuset.New(1, 4), + "fakeContainerBID": cpuset.New(2, 3), + }, + expectSucceededContainerName: []string{"fakeContainerAName", "fakeContainerBName"}, + expectFailedContainerName: []string{}, + }, + { + description: "cpu manager reconcile - exclusive cpu containers scaled down and up", + policy: testPolicy, + activePods: []*v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakePodAName", + UID: "fakePodAUID", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainerAName", + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakePodBName", + UID: "fakePodBUID", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainerBName", + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakePodCName", + UID: "fakePodCUID", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainerCName", + }, + }, + }, + }, + }, + pspPS: v1.PodStatus{ + ContainerStatuses: []v1.ContainerStatus{ + { + Name: "fakeContainerAName", + ContainerID: "docker://fakeContainerAID", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + { + Name: "fakeContainerBName", + ContainerID: "docker://fakeContainerBID", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + { + Name: "fakeContainerCName", + ContainerID: "docker://fakeContainerCID", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + }, + }, + pspFound: true, + updateErr: nil, + containerIDsWithExclusiveCPUs: []string{"fakeContainerAID", "fakeContainerBID"}, + containerRuntimeInitialState: map[string]cpuset.CPUSet{ + "fakeContainerAID": cpuset.New(1, 2), + "fakeContainerBID": cpuset.New(3, 4), + "fakeContainerCID": cpuset.New(5, 6), + }, + stAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(1), Resized: cpuset.New(1, 2, 5, 6)}, + }, + "fakePodBUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerBName": {Original: cpuset.New(3), Resized: cpuset.New(3)}, + }, + }, + stDefaultCPUSet: cpuset.New(4), + lastUpdateStAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(1), Resized: cpuset.New(1, 2)}, + }, + "fakePodBUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerBName": {Original: cpuset.New(3), Resized: cpuset.New(3, 4)}, + }, + "fakePodCUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerCName": {Original: cpuset.New(5, 6), Resized: cpuset.New()}, + }, + }, + lastUpdateStDefaultCPUSet: cpuset.New(), + expectStAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(1), Resized: cpuset.New(1, 2, 5, 6)}, + }, + "fakePodBUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerBName": {Original: cpuset.New(3), Resized: cpuset.New(3)}, + }, + }, + expectStDefaultCPUSet: cpuset.New(4), + expectLastUpdateStAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(1), Resized: cpuset.New(1, 2, 5, 6)}, + }, + "fakePodBUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerBName": {Original: cpuset.New(3), Resized: cpuset.New(3)}, + }, + "fakePodCUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerCName": {Original: cpuset.New(5, 6), Resized: cpuset.New(4)}, + }, + }, + expectLastUpdateStDefaultCPUSet: cpuset.New(), + expectContainerRuntimeState: map[string]cpuset.CPUSet{ + "fakeContainerAID": cpuset.New(1, 2, 5, 6), + "fakeContainerBID": cpuset.New(3), + "fakeContainerCID": cpuset.New(4), + }, + expectSucceededContainerName: []string{"fakeContainerAName", "fakeContainerBName", "fakeContainerCName"}, + expectFailedContainerName: []string{}, + }, } for _, testCase := range testCases { From 31e893bb4379752dd3e18c176b7839481119745e Mon Sep 17 00:00:00 2001 From: Lukasz Wojciechowski Date: Fri, 6 Mar 2026 18:03:15 +0100 Subject: [PATCH 14/15] kubelet: rework CPUSet reconciliation algorithm The former implementation of reconcileState in CPU manager had two issues: 1) It didn't apply CPU sets of all containers as a consistent state. The loop for all pods and containers applied CPUSets one by one without any critical section. During iteration over loop, the allocated CPUSets and default set could have been changed by executing Allocate for needs of resize or appearance of new container. Such situation could lead to conflicts of exclusive CPUs, e.g. a) reconcileState applies default CPU Set to container A runtime b) Allocate removes some CPUs from default CPU Set and use them as additional exclusive CPUS for container B which resizes up c) reconcileState loop continues and applies new CPU Set to container B runtime The CPUs that were allocated in step b) are now assigned to both containers A and B in runtime. 2) It didn't consider temporary conflicts when moving CPUs from one container to another. For example: a) container A uses CPUs: 1, 2; container B uses CPUs: 3, 4 b) container B scales down by one cpu, and now only CPU: 3 is allocated for it c) container A scales up and receives CPU: 4 during allocation (so now it has CPUs: 1, 2, 4 allocated) d) reconcileState applies new CPU Set to container A runtime: 1, 2, 4 e) reconcileState applies new CPU Set to container B runtime: 3 Between steps d) and e) CPU: 4 is assigned to both container A and container B. If kubelet is restarted that time, the situation will hold for some time. The new algorithm: 1) Modifies the loop iterating over all containers in all pods to act in a critical section controlled by CPU manager's lock - same that is used during Allocate. During the iteration CPU Sets are not yet applied but only collected to local variables: exclusiveCPUContainers and nonExclusiveCPUContainers. Usage of the lock guarantees consistent state. 2) After collection and outside critical section CPU Sets are applied to runtime in three steps: 2.1) remove scaled down exclusive CPUs from containers * as containers using exclusive CPUs cannot be scaled down to 0, because they need to retain Original CPU Set, it is safe operation that won't try to set an empty set in runtime * after this operation all CPUs that belong now to default CPU Set are no longer used exclusively by any container, so next step can be applied 2.2) apply CPU Sets for all non-exclusive containers * these containers will use default CPUSet * if default CPUSet shrank since last reconcileState call due to allocation of the CPUs as exclusive, the CPUs removed from it are now no longer used by non-exclusive containers, so next step can be applied 2.3) set final CPU Sets for containers using exclusive CPUs 3) Improves conflict detection by tracking failed container updates and preventing further updates that would conflict with previously failed ones. Signed-off-by: Lukasz Wojciechowski --- pkg/kubelet/cm/cpumanager/cpu_manager.go | 98 +++++++++++++++++++----- 1 file changed, 80 insertions(+), 18 deletions(-) diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager.go b/pkg/kubelet/cm/cpumanager/cpu_manager.go index f3536f1a2a393..05913c0ade16b 100644 --- a/pkg/kubelet/cm/cpumanager/cpu_manager.go +++ b/pkg/kubelet/cm/cpumanager/cpu_manager.go @@ -354,10 +354,16 @@ func (m *manager) GetAllCPUs() cpuset.CPUSet { type reconciledContainer struct { podName string + podUID string containerName string containerID string } +type reconciledContainerAllocation struct { + reconciledContainer + allocatedSet cpuset.CPUSet +} + func (m *manager) removeStaleState(rootLogger logr.Logger) { // Only once all sources are ready do we attempt to remove any stale state. // This ensures that the call to `m.activePods()` below will succeed with @@ -424,15 +430,19 @@ func (m *manager) reconcileState(ctx context.Context) (success []reconciledConta failure = []reconciledContainer{} rootLogger := klog.FromContext(ctx) - m.removeStaleState(rootLogger) + + exclusiveCPUContainers := []reconciledContainerAllocation{} + nonExclusiveCPUContainers := []reconciledContainerAllocation{} + + m.Lock() for _, pod := range m.activePods() { podLogger := klog.LoggerWithValues(rootLogger, "pod", klog.KObj(pod)) pstatus, ok := m.podStatusProvider.GetPodStatus(pod.UID) if !ok { podLogger.V(5).Info("skipping pod; status not found") - failure = append(failure, reconciledContainer{pod.Name, "", ""}) + failure = append(failure, reconciledContainer{pod.Name, string(pod.UID), "", ""}) continue } @@ -444,25 +454,24 @@ func (m *manager) reconcileState(ctx context.Context) (success []reconciledConta containerID, err := findContainerIDByName(&pstatus, container.Name) if err != nil { logger.V(5).Info("skipping container; ID not found in pod status", "err", err) - failure = append(failure, reconciledContainer{pod.Name, container.Name, ""}) + failure = append(failure, reconciledContainer{pod.Name, string(pod.UID), container.Name, ""}) continue } cstatus, err := findContainerStatusByName(&pstatus, container.Name) if err != nil { logger.V(5).Info("skipping container; container status not found in pod status", "err", err) - failure = append(failure, reconciledContainer{pod.Name, container.Name, ""}) + failure = append(failure, reconciledContainer{pod.Name, string(pod.UID), container.Name, ""}) continue } if cstatus.State.Waiting != nil || (cstatus.State.Waiting == nil && cstatus.State.Running == nil && cstatus.State.Terminated == nil) { logger.V(4).Info("skipping container; container still in the waiting state", "err", err) - failure = append(failure, reconciledContainer{pod.Name, container.Name, ""}) + failure = append(failure, reconciledContainer{pod.Name, string(pod.UID), container.Name, ""}) continue } - m.Lock() if cstatus.State.Terminated != nil { // The container is terminated but we can't call m.RemoveContainer() // here because it could remove the allocated cpuset for the container @@ -473,7 +482,6 @@ func (m *manager) reconcileState(ctx context.Context) (success []reconciledConta if err == nil { logger.V(4).Info("ignoring terminated container", "containerID", containerID) } - m.Unlock() continue } @@ -481,30 +489,84 @@ func (m *manager) reconcileState(ctx context.Context) (success []reconciledConta // Idempotently add it to the containerMap incase it is missing. // This can happen after a kubelet restart, for example. m.containerMap.Add(string(pod.UID), container.Name, containerID) - m.Unlock() - cset := m.state.GetCPUSetOrDefault(string(pod.UID), container.Name) + cset, exclusive := m.state.GetCPUSet(string(pod.UID), container.Name) + if !exclusive { + cset = m.state.GetDefaultCPUSet() + } if cset.IsEmpty() { // NOTE: This should not happen outside of tests. logger.V(2).Info("ReconcileState: skipping container; empty cpuset assigned") - failure = append(failure, reconciledContainer{pod.Name, container.Name, containerID}) + failure = append(failure, reconciledContainer{pod.Name, string(pod.UID), container.Name, containerID}) continue } - lcset := m.lastUpdateState.GetCPUSetOrDefault(string(pod.UID), container.Name) - if !cset.Equals(lcset) { - logger.V(5).Info("updating container", "containerID", containerID, "cpuSet", cset) - err = m.updateContainerCPUSet(ctx, containerID, cset) + rca := reconciledContainerAllocation{ + reconciledContainer{pod.Name, string(pod.UID), container.Name, containerID}, + cset, + } + if exclusive { + exclusiveCPUContainers = append(exclusiveCPUContainers, rca) + } else { + nonExclusiveCPUContainers = append(nonExclusiveCPUContainers, rca) + } + + } + } + m.Unlock() + + failedContainersCPUSet := cpuset.New() + + updateContainers := func(containers []reconciledContainerAllocation, preliminary bool) { + for _, rca := range containers { + logger := klog.LoggerWithValues(rootLogger, "podName", rca.podName, "containerName", rca.containerName) + + lcset := m.lastUpdateState.GetCPUSetOrDefault(rca.podUID, rca.containerName) + + // Determine the CPU set to use based on the pass + var targetCPUSet cpuset.CPUSet + if preliminary { + targetCPUSet = rca.allocatedSet.Intersection(lcset) + } else { + targetCPUSet = rca.allocatedSet + } + + // Check if update is needed + if !targetCPUSet.Equals(lcset) { + if !preliminary && !targetCPUSet.Intersection(failedContainersCPUSet).IsEmpty() { + logger.Error(fmt.Errorf("Conflict with previously failed container CPUSet updates"), "failed to update container", "containerID", rca.containerID, "cpuSet", rca.allocatedSet) + failure = append(failure, rca.reconciledContainer) + failedContainersCPUSet = failedContainersCPUSet.Union(lcset) + continue + } + + logger.V(5).Info("updating container", "containerID", rca.containerID, "cpuSet", targetCPUSet) + err := m.updateContainerCPUSet(ctx, rca.containerID, targetCPUSet) if err != nil { - logger.Error(err, "failed to update container", "containerID", containerID, "cpuSet", cset) - failure = append(failure, reconciledContainer{pod.Name, container.Name, containerID}) + logger.Error(err, "failed to update container", "containerID", rca.containerID, "cpuSet", targetCPUSet) + failure = append(failure, rca.reconciledContainer) + failedContainersCPUSet = failedContainersCPUSet.Union(lcset) continue } - m.lastUpdateState.SetCPUSet(string(pod.UID), container.Name, cset) + m.lastUpdateState.SetCPUSet(rca.podUID, rca.containerName, targetCPUSet) + } + + // Add to success list if required + if !preliminary { + success = append(success, rca.reconciledContainer) } - success = append(success, reconciledContainer{pod.Name, container.Name, containerID}) } } + + // first pass - only remove CPUs from containers using exclusive CPUs + updateContainers(exclusiveCPUContainers, true) + + // second pass - apply CPU sets to non exclusive CPU containers + updateContainers(nonExclusiveCPUContainers, false) + + // third pass - apply final CPU set to containers using exclusive CPUs + updateContainers(exclusiveCPUContainers, false) + return success, failure } From 4b312389b39f475a285811237c195ed6b2ac06df Mon Sep 17 00:00:00 2001 From: Lukasz Wojciechowski Date: Fri, 6 Mar 2026 17:15:19 +0100 Subject: [PATCH 15/15] kubelet: test cpu-manager multi-pass reconcilation This commit adds extensive test coverage for the new multi-pass reconciliation algorithm in the CPU manager, covering various failure scenarios of UpdateContainerResources function during the reconciliation process to ensure proper handling of CPU conflicts. Signed-off-by: Lukasz Wojciechowski --- pkg/kubelet/cm/cpumanager/cpu_manager_test.go | 656 ++++++++++++++++++ 1 file changed, 656 insertions(+) diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go index 742221dfe90e2..5ca794d3c2e8e 100644 --- a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go +++ b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go @@ -1962,6 +1962,662 @@ func TestReconcileState(t *testing.T) { expectSucceededContainerName: []string{"fakeContainerAName", "fakeContainerBName", "fakeContainerCName"}, expectFailedContainerName: []string{}, }, + { + description: "cpu manager reconcile - fail in first reconcile pass does not cause conflict", + policy: testPolicy, + activePods: []*v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakePodAName", + UID: "fakePodAUID", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainerAName", + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakePodBName", + UID: "fakePodBUID", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainerBName", + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakePodCName", + UID: "fakePodCUID", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainerCName", + }, + }, + }, + }, + }, + pspPS: v1.PodStatus{ + ContainerStatuses: []v1.ContainerStatus{ + { + Name: "fakeContainerAName", + ContainerID: "docker://fakeContainerAID", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + { + Name: "fakeContainerBName", + ContainerID: "docker://fakeContainerBID", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + { + Name: "fakeContainerCName", + ContainerID: "docker://fakeContainerCID", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + }, + }, + pspFound: true, + updateErr: []error{ + fmt.Errorf("fakeContainerAID pass 1 error"), + nil, //fakeContainerCID pass 1 ok + nil, //fakeContainerBID pass 2 ok + nil, //fakeContainerCID pass 3 ok + }, + containerIDsWithExclusiveCPUs: []string{"fakeContainerAID", "fakeContainerCID"}, + containerRuntimeInitialState: map[string]cpuset.CPUSet{ + "fakeContainerAID": cpuset.New(0, 1, 2), + "fakeContainerBID": cpuset.New(3, 4, 5), + "fakeContainerCID": cpuset.New(6, 7, 8), + }, + stAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(0), Resized: cpuset.New(0, 3, 6)}, + }, + "fakePodCUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerCName": {Original: cpuset.New(8), Resized: cpuset.New(5, 8)}, + }, + }, + stDefaultCPUSet: cpuset.New(4, 7), + lastUpdateStAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(0), Resized: cpuset.New(0, 1, 2)}, + }, + "fakePodBUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerBName": {Original: cpuset.New(4), Resized: cpuset.New(3, 4, 5)}, + }, + "fakePodCUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerCName": {Original: cpuset.New(8), Resized: cpuset.New(6, 7, 8)}, + }, + }, + lastUpdateStDefaultCPUSet: cpuset.New(), + expectStAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(0), Resized: cpuset.New(0, 3, 6)}, + }, + "fakePodCUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerCName": {Original: cpuset.New(8), Resized: cpuset.New(5, 8)}, + }, + }, + expectStDefaultCPUSet: cpuset.New(4, 7), + expectLastUpdateStAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(0), Resized: cpuset.New(0, 1, 2)}, + }, + "fakePodBUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerBName": {Original: cpuset.New(4), Resized: cpuset.New(4, 7)}, + }, + "fakePodCUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerCName": {Original: cpuset.New(8), Resized: cpuset.New(5, 8)}, + }, + }, + expectLastUpdateStDefaultCPUSet: cpuset.New(), + expectContainerRuntimeState: map[string]cpuset.CPUSet{ + "fakeContainerAID": cpuset.New(0, 1, 2), + "fakeContainerBID": cpuset.New(4, 7), + "fakeContainerCID": cpuset.New(5, 8), + }, + expectSucceededContainerName: []string{"fakeContainerBName", "fakeContainerCName"}, + expectFailedContainerName: []string{"fakeContainerAName"}, + }, + { + description: "cpu manager reconcile - fail in first reconcile pass causes conflict in second pass", + policy: testPolicy, + activePods: []*v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakePodAName", + UID: "fakePodAUID", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainerAName", + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakePodBName", + UID: "fakePodBUID", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainerBName", + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakePodCName", + UID: "fakePodCUID", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainerCName", + }, + }, + }, + }, + }, + pspPS: v1.PodStatus{ + ContainerStatuses: []v1.ContainerStatus{ + { + Name: "fakeContainerAName", + ContainerID: "docker://fakeContainerAID", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + { + Name: "fakeContainerBName", + ContainerID: "docker://fakeContainerBID", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + { + Name: "fakeContainerCName", + ContainerID: "docker://fakeContainerCID", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + }, + }, + pspFound: true, + updateErr: []error{ + fmt.Errorf("fakeContainerAID pass 1 error"), + nil, //fakeContainerCID pass 1 ok + nil, //fakeContainerCID pass 3 ok + }, + containerIDsWithExclusiveCPUs: []string{"fakeContainerAID", "fakeContainerCID"}, + containerRuntimeInitialState: map[string]cpuset.CPUSet{ + "fakeContainerAID": cpuset.New(0, 1, 2), + "fakeContainerBID": cpuset.New(3, 4, 5), + "fakeContainerCID": cpuset.New(6, 7, 8), + }, + stAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(0), Resized: cpuset.New(0, 3, 6)}, + }, + "fakePodCUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerCName": {Original: cpuset.New(8), Resized: cpuset.New(8)}, + }, + }, + stDefaultCPUSet: cpuset.New(1, 4, 7), + lastUpdateStAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(0), Resized: cpuset.New(0, 1, 2)}, + }, + "fakePodBUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerBName": {Original: cpuset.New(4), Resized: cpuset.New(3, 4, 5)}, + }, + "fakePodCUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerCName": {Original: cpuset.New(8), Resized: cpuset.New(6, 7, 8)}, + }, + }, + lastUpdateStDefaultCPUSet: cpuset.New(), + expectStAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(0), Resized: cpuset.New(0, 3, 6)}, + }, + "fakePodCUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerCName": {Original: cpuset.New(8), Resized: cpuset.New(8)}, + }, + }, + expectStDefaultCPUSet: cpuset.New(1, 4, 7), + expectLastUpdateStAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(0), Resized: cpuset.New(0, 1, 2)}, + }, + "fakePodBUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerBName": {Original: cpuset.New(4), Resized: cpuset.New(3, 4, 5)}, + }, + "fakePodCUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerCName": {Original: cpuset.New(8), Resized: cpuset.New(8)}, + }, + }, + expectLastUpdateStDefaultCPUSet: cpuset.New(), + expectContainerRuntimeState: map[string]cpuset.CPUSet{ + "fakeContainerAID": cpuset.New(0, 1, 2), + "fakeContainerBID": cpuset.New(3, 4, 5), + "fakeContainerCID": cpuset.New(8), + }, + expectSucceededContainerName: []string{"fakeContainerCName"}, + expectFailedContainerName: []string{"fakeContainerAName", "fakeContainerBName"}, + }, + { + description: "cpu manager reconcile - fail in first reconcile pass causes conflict in second pass which causes conflict in third pass", + policy: testPolicy, + activePods: []*v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakePodAName", + UID: "fakePodAUID", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainerAName", + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakePodBName", + UID: "fakePodBUID", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainerBName", + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakePodCName", + UID: "fakePodCUID", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainerCName", + }, + }, + }, + }, + }, + pspPS: v1.PodStatus{ + ContainerStatuses: []v1.ContainerStatus{ + { + Name: "fakeContainerAName", + ContainerID: "docker://fakeContainerAID", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + { + Name: "fakeContainerBName", + ContainerID: "docker://fakeContainerBID", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + { + Name: "fakeContainerCName", + ContainerID: "docker://fakeContainerCID", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + }, + }, + pspFound: true, + updateErr: []error{ + fmt.Errorf("fakeContainerAID pass 1 error"), + nil, //fakeContainerCID pass 1 ok + }, + containerIDsWithExclusiveCPUs: []string{"fakeContainerAID", "fakeContainerCID"}, + containerRuntimeInitialState: map[string]cpuset.CPUSet{ + "fakeContainerAID": cpuset.New(0, 1, 2), + "fakeContainerBID": cpuset.New(3, 4, 5), + "fakeContainerCID": cpuset.New(6, 7, 8), + }, + stAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(0), Resized: cpuset.New(0, 3, 6)}, + }, + "fakePodCUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerCName": {Original: cpuset.New(8), Resized: cpuset.New(5, 8)}, + }, + }, + stDefaultCPUSet: cpuset.New(1, 4, 7), + lastUpdateStAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(0), Resized: cpuset.New(0, 1, 2)}, + }, + "fakePodBUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerBName": {Original: cpuset.New(4), Resized: cpuset.New(3, 4, 5)}, + }, + "fakePodCUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerCName": {Original: cpuset.New(8), Resized: cpuset.New(6, 7, 8)}, + }, + }, + lastUpdateStDefaultCPUSet: cpuset.New(), + expectStAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(0), Resized: cpuset.New(0, 3, 6)}, + }, + "fakePodCUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerCName": {Original: cpuset.New(8), Resized: cpuset.New(5, 8)}, + }, + }, + expectStDefaultCPUSet: cpuset.New(1, 4, 7), + expectLastUpdateStAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(0), Resized: cpuset.New(0, 1, 2)}, + }, + "fakePodBUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerBName": {Original: cpuset.New(4), Resized: cpuset.New(3, 4, 5)}, + }, + "fakePodCUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerCName": {Original: cpuset.New(8), Resized: cpuset.New(8)}, + }, + }, + expectLastUpdateStDefaultCPUSet: cpuset.New(), + expectContainerRuntimeState: map[string]cpuset.CPUSet{ + "fakeContainerAID": cpuset.New(0, 1, 2), + "fakeContainerBID": cpuset.New(3, 4, 5), + "fakeContainerCID": cpuset.New(8), + }, + expectSucceededContainerName: []string{}, + expectFailedContainerName: []string{"fakeContainerAName", "fakeContainerBName", "fakeContainerCName"}, + }, + { + description: "cpu manager reconcile - fail in first reconcile pass causes conflict in third pass", + policy: testPolicy, + activePods: []*v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakePodAName", + UID: "fakePodAUID", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainerAName", + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakePodBName", + UID: "fakePodBUID", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainerBName", + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakePodCName", + UID: "fakePodCUID", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainerCName", + }, + }, + }, + }, + }, + pspPS: v1.PodStatus{ + ContainerStatuses: []v1.ContainerStatus{ + { + Name: "fakeContainerAName", + ContainerID: "docker://fakeContainerAID", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + { + Name: "fakeContainerBName", + ContainerID: "docker://fakeContainerBID", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + { + Name: "fakeContainerCName", + ContainerID: "docker://fakeContainerCID", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + }, + }, + pspFound: true, + updateErr: []error{ + fmt.Errorf("fakeContainerAID pass 1 error"), + nil, //fakeContainerCID pass 1 ok + nil, //fakeContainerBID pass 2 ok + }, + containerIDsWithExclusiveCPUs: []string{"fakeContainerAID", "fakeContainerCID"}, + containerRuntimeInitialState: map[string]cpuset.CPUSet{ + "fakeContainerAID": cpuset.New(0, 1, 2), + "fakeContainerBID": cpuset.New(3, 4, 5), + "fakeContainerCID": cpuset.New(6, 7, 8), + }, + stAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(0), Resized: cpuset.New(0, 3, 6)}, + }, + "fakePodCUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerCName": {Original: cpuset.New(8), Resized: cpuset.New(2, 5, 8)}, + }, + }, + stDefaultCPUSet: cpuset.New(4, 7), + lastUpdateStAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(0), Resized: cpuset.New(0, 1, 2)}, + }, + "fakePodBUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerBName": {Original: cpuset.New(4), Resized: cpuset.New(3, 4, 5)}, + }, + "fakePodCUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerCName": {Original: cpuset.New(8), Resized: cpuset.New(6, 7, 8)}, + }, + }, + lastUpdateStDefaultCPUSet: cpuset.New(), + expectStAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(0), Resized: cpuset.New(0, 3, 6)}, + }, + "fakePodCUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerCName": {Original: cpuset.New(8), Resized: cpuset.New(2, 5, 8)}, + }, + }, + expectStDefaultCPUSet: cpuset.New(4, 7), + expectLastUpdateStAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(0), Resized: cpuset.New(0, 1, 2)}, + }, + "fakePodBUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerBName": {Original: cpuset.New(4), Resized: cpuset.New(4, 7)}, + }, + "fakePodCUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerCName": {Original: cpuset.New(8), Resized: cpuset.New(8)}, + }, + }, + expectLastUpdateStDefaultCPUSet: cpuset.New(), + expectContainerRuntimeState: map[string]cpuset.CPUSet{ + "fakeContainerAID": cpuset.New(0, 1, 2), + "fakeContainerBID": cpuset.New(4, 7), + "fakeContainerCID": cpuset.New(8), + }, + expectSucceededContainerName: []string{"fakeContainerBName"}, + expectFailedContainerName: []string{"fakeContainerAName", "fakeContainerCName"}, + }, + { + description: "cpu manager reconcile - fail in second reconcile pass causes conflict in third pass", + policy: testPolicy, + activePods: []*v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakePodAName", + UID: "fakePodAUID", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainerAName", + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakePodBName", + UID: "fakePodBUID", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainerBName", + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakePodCName", + UID: "fakePodCUID", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainerCName", + }, + }, + }, + }, + }, + pspPS: v1.PodStatus{ + ContainerStatuses: []v1.ContainerStatus{ + { + Name: "fakeContainerAName", + ContainerID: "docker://fakeContainerAID", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + { + Name: "fakeContainerBName", + ContainerID: "docker://fakeContainerBID", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + { + Name: "fakeContainerCName", + ContainerID: "docker://fakeContainerCID", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + }, + }, + pspFound: true, + updateErr: []error{ + nil, //fakeContainerAID pass 1 ok + nil, //fakeContainerCID pass 1 ok + fmt.Errorf("fakeContainerBID pass 2 error"), + nil, //fakeContainerAID pass 3 ok + }, + containerIDsWithExclusiveCPUs: []string{"fakeContainerAID", "fakeContainerCID"}, + containerRuntimeInitialState: map[string]cpuset.CPUSet{ + "fakeContainerAID": cpuset.New(0, 1, 2), + "fakeContainerBID": cpuset.New(3, 4, 5), + "fakeContainerCID": cpuset.New(6, 7, 8), + }, + stAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(0), Resized: cpuset.New(0, 6)}, + }, + "fakePodCUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerCName": {Original: cpuset.New(8), Resized: cpuset.New(2, 5, 8)}, + }, + }, + stDefaultCPUSet: cpuset.New(1, 4, 7), + lastUpdateStAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(0), Resized: cpuset.New(0, 1, 2)}, + }, + "fakePodBUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerBName": {Original: cpuset.New(4), Resized: cpuset.New(3, 4, 5)}, + }, + "fakePodCUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerCName": {Original: cpuset.New(8), Resized: cpuset.New(6, 7, 8)}, + }, + }, + lastUpdateStDefaultCPUSet: cpuset.New(), + expectStAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(0), Resized: cpuset.New(0, 6)}, + }, + "fakePodCUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerCName": {Original: cpuset.New(8), Resized: cpuset.New(2, 5, 8)}, + }, + }, + expectStDefaultCPUSet: cpuset.New(1, 4, 7), + expectLastUpdateStAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(0), Resized: cpuset.New(0, 6)}, + }, + "fakePodBUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerBName": {Original: cpuset.New(4), Resized: cpuset.New(3, 4, 5)}, + }, + "fakePodCUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerCName": {Original: cpuset.New(8), Resized: cpuset.New(8)}, + }, + }, + expectLastUpdateStDefaultCPUSet: cpuset.New(), + expectContainerRuntimeState: map[string]cpuset.CPUSet{ + "fakeContainerAID": cpuset.New(0, 6), + "fakeContainerBID": cpuset.New(3, 4, 5), + "fakeContainerCID": cpuset.New(8), + }, + expectSucceededContainerName: []string{"fakeContainerAName"}, + expectFailedContainerName: []string{"fakeContainerBName", "fakeContainerCName"}, + }, } for _, testCase := range testCases {