From 643f058aa35275a8fcfe20d3710ee76566174590 Mon Sep 17 00:00:00 2001 From: Sotiris Salloumis Date: Mon, 20 Jan 2025 17:58:01 +0100 Subject: [PATCH 1/9] Static CPU management policy alongside InPlacePodVerticalScaling --- pkg/kubelet/cm/cpumanager/cpu_assignment.go | 74 +- .../cm/cpumanager/cpu_assignment_test.go | 18 +- pkg/kubelet/cm/cpumanager/cpu_manager_test.go | 1 + pkg/kubelet/cm/cpumanager/policy_static.go | 213 +- .../cm/cpumanager/policy_static_test.go | 714 ++++++- pkg/kubelet/types/constants.go | 7 + .../common/node/framework/podresize/resize.go | 62 +- test/e2e_node/cpu_manager_metrics_test.go | 6 +- test/e2e_node/cpu_manager_test.go | 863 +++++++- test/e2e_node/pod_resize_test.go | 1737 +++++++++++++++++ test/e2e_node/util.go | 4 +- 11 files changed, 3634 insertions(+), 65 deletions(-) create mode 100644 test/e2e_node/pod_resize_test.go diff --git a/pkg/kubelet/cm/cpumanager/cpu_assignment.go b/pkg/kubelet/cm/cpumanager/cpu_assignment.go index 4838056c0de17..7da5202a02c83 100644 --- a/pkg/kubelet/cm/cpumanager/cpu_assignment.go +++ b/pkg/kubelet/cm/cpumanager/cpu_assignment.go @@ -291,7 +291,7 @@ type cpuAccumulator struct { availableCPUSorter availableCPUSorter } -func newCPUAccumulator(topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, numCPUs int, cpuSortingStrategy CPUSortingStrategy) *cpuAccumulator { +func newCPUAccumulator(topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, numCPUs int, cpuSortingStrategy CPUSortingStrategy, reusableCPUsForResize *cpuset.CPUSet, mustKeepCPUsForScaleDown *cpuset.CPUSet) *cpuAccumulator { acc := &cpuAccumulator{ topo: topo, details: topo.CPUDetails.KeepOnly(availableCPUs), @@ -299,6 +299,43 @@ func newCPUAccumulator(topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, result: cpuset.New(), } + if reusableCPUsForResize != nil { + if !reusableCPUsForResize.IsEmpty() { + // Increase of CPU resources ( scale up ) + // Take existing from allocated + // CPUs + if numCPUs > reusableCPUsForResize.Size() { + // scale up ... + acc.take(reusableCPUsForResize.Clone()) + } + + // Decrease of CPU resources ( scale down ) + // Take delta from allocated CPUs, if mustKeepCPUsForScaleDown + // is not nil, use explicetely those. If it is nil + // take delta starting from lowest CoreId of CPUs ( TODO esotsal, perhaps not needed). + if numCPUs < reusableCPUsForResize.Size() { + if mustKeepCPUsForScaleDown != nil { + // If explicetely CPUs to keep + // during scale down is given ( this requires + // addition in container[].resources ... which + // could be possible to patch ? Esotsal Note This means + // modifying API code + if !(mustKeepCPUsForScaleDown.Intersection(reusableCPUsForResize.Clone())).IsEmpty() { + acc.take(mustKeepCPUsForScaleDown.Clone()) + } else { + return acc + } + } + } + + if numCPUs == reusableCPUsForResize.Size() { + // nothing to do return as is + acc.take(reusableCPUsForResize.Clone()) + return acc + } + } + } + if topo.NumSockets >= topo.NumNUMANodes { acc.numaOrSocketsFirst = &numaFirst{acc} } else { @@ -765,15 +802,23 @@ func (a *cpuAccumulator) iterateCombinations(n []int, k int, f func([]int) LoopC // the least amount of free CPUs to the one with the highest amount of free CPUs (i.e. in ascending // order of free CPUs). For any NUMA node, the cores are selected from the ones in the socket with // the least amount of free CPUs to the one with the highest amount of free CPUs. -func takeByTopologyNUMAPacked(topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, numCPUs int, cpuSortingStrategy CPUSortingStrategy, preferAlignByUncoreCache bool) (cpuset.CPUSet, error) { - acc := newCPUAccumulator(topo, availableCPUs, numCPUs, cpuSortingStrategy) +func takeByTopologyNUMAPacked(topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, numCPUs int, cpuSortingStrategy CPUSortingStrategy, preferAlignByUncoreCache bool, reusableCPUsForResize *cpuset.CPUSet, mustKeepCPUsForScaleDown *cpuset.CPUSet) (cpuset.CPUSet, error) { + + // If the number of CPUs requested to be retained is not a subset + // of reusableCPUs, then we fail early + if reusableCPUsForResize != nil && mustKeepCPUsForScaleDown != nil { + if (mustKeepCPUsForScaleDown.Intersection(reusableCPUsForResize.Clone())).IsEmpty() { + return cpuset.New(), fmt.Errorf("requested CPUs to be retained %s are not a subset of reusable CPUs %s", mustKeepCPUsForScaleDown.String(), reusableCPUsForResize.String()) + } + } + + acc := newCPUAccumulator(topo, availableCPUs, numCPUs, cpuSortingStrategy, reusableCPUsForResize, mustKeepCPUsForScaleDown) if acc.isSatisfied() { return acc.result, nil } if acc.isFailed() { return cpuset.New(), fmt.Errorf("not enough cpus available to satisfy request: requested=%d, available=%d", numCPUs, availableCPUs.Size()) } - // Algorithm: topology-aware best-fit // 1. Acquire whole NUMA nodes and sockets, if available and the container // requires at least a NUMA node or socket's-worth of CPUs. If NUMA @@ -882,25 +927,32 @@ func takeByTopologyNUMAPacked(topo *topology.CPUTopology, availableCPUs cpuset.C // of size 'cpuGroupSize' according to the algorithm described above. This is // important, for example, to ensure that all CPUs (i.e. all hyperthreads) from // a single core are allocated together. -func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, numCPUs int, cpuGroupSize int, cpuSortingStrategy CPUSortingStrategy) (cpuset.CPUSet, error) { +func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, numCPUs int, cpuGroupSize int, cpuSortingStrategy CPUSortingStrategy, reusableCPUsForResize *cpuset.CPUSet, mustKeepCPUsForScaleDown *cpuset.CPUSet) (cpuset.CPUSet, error) { // If the number of CPUs requested cannot be handed out in chunks of // 'cpuGroupSize', then we just call out the packing algorithm since we // can't distribute CPUs in this chunk size. // PreferAlignByUncoreCache feature not implemented here yet and set to false. // Support for PreferAlignByUncoreCache to be done at beta release. if (numCPUs % cpuGroupSize) != 0 { - return takeByTopologyNUMAPacked(topo, availableCPUs, numCPUs, cpuSortingStrategy, false) + return takeByTopologyNUMAPacked(topo, availableCPUs, numCPUs, cpuSortingStrategy, false, reusableCPUsForResize, mustKeepCPUsForScaleDown) + } + + // If the number of CPUs requested to be retained is not a subset + // of reusableCPUs, then we fail early + if reusableCPUsForResize != nil && mustKeepCPUsForScaleDown != nil { + if (mustKeepCPUsForScaleDown.Intersection(reusableCPUsForResize.Clone())).IsEmpty() { + return cpuset.New(), fmt.Errorf("requested CPUs to be retained %s are not a subset of reusable CPUs %s", mustKeepCPUsForScaleDown.String(), reusableCPUsForResize.String()) + } } // Otherwise build an accumulator to start allocating CPUs from. - acc := newCPUAccumulator(topo, availableCPUs, numCPUs, cpuSortingStrategy) + acc := newCPUAccumulator(topo, availableCPUs, numCPUs, cpuSortingStrategy, reusableCPUsForResize, mustKeepCPUsForScaleDown) if acc.isSatisfied() { return acc.result, nil } if acc.isFailed() { return cpuset.New(), fmt.Errorf("not enough cpus available to satisfy request: requested=%d, available=%d", numCPUs, availableCPUs.Size()) } - // Get the list of NUMA nodes represented by the set of CPUs in 'availableCPUs'. numas := acc.sortAvailableNUMANodes() @@ -1072,7 +1124,7 @@ func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpu // size 'cpuGroupSize' from 'bestCombo'. distribution := (numCPUs / len(bestCombo) / cpuGroupSize) * cpuGroupSize for _, numa := range bestCombo { - cpus, _ := takeByTopologyNUMAPacked(acc.topo, acc.details.CPUsInNUMANodes(numa), distribution, cpuSortingStrategy, false) + cpus, _ := takeByTopologyNUMAPacked(acc.topo, acc.details.CPUsInNUMANodes(numa), distribution, cpuSortingStrategy, false, reusableCPUsForResize, mustKeepCPUsForScaleDown) acc.take(cpus) } @@ -1087,7 +1139,7 @@ func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpu if acc.details.CPUsInNUMANodes(numa).Size() < cpuGroupSize { continue } - cpus, _ := takeByTopologyNUMAPacked(acc.topo, acc.details.CPUsInNUMANodes(numa), cpuGroupSize, cpuSortingStrategy, false) + cpus, _ := takeByTopologyNUMAPacked(acc.topo, acc.details.CPUsInNUMANodes(numa), cpuGroupSize, cpuSortingStrategy, false, reusableCPUsForResize, mustKeepCPUsForScaleDown) acc.take(cpus) remainder -= cpuGroupSize } @@ -1111,5 +1163,5 @@ func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpu // If we never found a combination of NUMA nodes that we could properly // distribute CPUs across, fall back to the packing algorithm. - return takeByTopologyNUMAPacked(topo, availableCPUs, numCPUs, cpuSortingStrategy, false) + return takeByTopologyNUMAPacked(topo, availableCPUs, numCPUs, cpuSortingStrategy, false, reusableCPUsForResize, mustKeepCPUsForScaleDown) } diff --git a/pkg/kubelet/cm/cpumanager/cpu_assignment_test.go b/pkg/kubelet/cm/cpumanager/cpu_assignment_test.go index 8d5e05aa5d4ec..080961402e55c 100644 --- a/pkg/kubelet/cm/cpumanager/cpu_assignment_test.go +++ b/pkg/kubelet/cm/cpumanager/cpu_assignment_test.go @@ -114,7 +114,7 @@ func TestCPUAccumulatorFreeSockets(t *testing.T) { for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { - acc := newCPUAccumulator(tc.topo, tc.availableCPUs, 0, CPUSortingStrategyPacked) + acc := newCPUAccumulator(tc.topo, tc.availableCPUs, 0, CPUSortingStrategyPacked, nil, nil) result := acc.freeSockets() sort.Ints(result) if !reflect.DeepEqual(result, tc.expect) { @@ -214,7 +214,7 @@ func TestCPUAccumulatorFreeNUMANodes(t *testing.T) { for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { - acc := newCPUAccumulator(tc.topo, tc.availableCPUs, 0, CPUSortingStrategyPacked) + acc := newCPUAccumulator(tc.topo, tc.availableCPUs, 0, CPUSortingStrategyPacked, nil, nil) result := acc.freeNUMANodes() if !reflect.DeepEqual(result, tc.expect) { t.Errorf("expected %v to equal %v", result, tc.expect) @@ -263,7 +263,7 @@ func TestCPUAccumulatorFreeSocketsAndNUMANodes(t *testing.T) { for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { - acc := newCPUAccumulator(tc.topo, tc.availableCPUs, 0, CPUSortingStrategyPacked) + acc := newCPUAccumulator(tc.topo, tc.availableCPUs, 0, CPUSortingStrategyPacked, nil, nil) resultNUMANodes := acc.freeNUMANodes() if !reflect.DeepEqual(resultNUMANodes, tc.expectNUMANodes) { t.Errorf("expected NUMA Nodes %v to equal %v", resultNUMANodes, tc.expectNUMANodes) @@ -335,7 +335,7 @@ func TestCPUAccumulatorFreeCores(t *testing.T) { for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { - acc := newCPUAccumulator(tc.topo, tc.availableCPUs, 0, CPUSortingStrategyPacked) + acc := newCPUAccumulator(tc.topo, tc.availableCPUs, 0, CPUSortingStrategyPacked, nil, nil) result := acc.freeCores() if !reflect.DeepEqual(result, tc.expect) { t.Errorf("expected %v to equal %v", result, tc.expect) @@ -391,7 +391,7 @@ func TestCPUAccumulatorFreeCPUs(t *testing.T) { for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { - acc := newCPUAccumulator(tc.topo, tc.availableCPUs, 0, CPUSortingStrategyPacked) + acc := newCPUAccumulator(tc.topo, tc.availableCPUs, 0, CPUSortingStrategyPacked, nil, nil) result := acc.freeCPUs() if !reflect.DeepEqual(result, tc.expect) { t.Errorf("expected %v to equal %v", result, tc.expect) @@ -477,7 +477,7 @@ func TestCPUAccumulatorTake(t *testing.T) { for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { - acc := newCPUAccumulator(tc.topo, tc.availableCPUs, tc.numCPUs, CPUSortingStrategyPacked) + acc := newCPUAccumulator(tc.topo, tc.availableCPUs, tc.numCPUs, CPUSortingStrategyPacked, nil, nil) totalTaken := 0 for _, cpus := range tc.takeCPUs { acc.take(cpus) @@ -750,7 +750,7 @@ func TestTakeByTopologyNUMAPacked(t *testing.T) { strategy = CPUSortingStrategySpread } - result, err := takeByTopologyNUMAPacked(tc.topo, tc.availableCPUs, tc.numCPUs, strategy, tc.opts.PreferAlignByUncoreCacheOption) + result, err := takeByTopologyNUMAPacked(tc.topo, tc.availableCPUs, tc.numCPUs, strategy, tc.opts.PreferAlignByUncoreCacheOption, nil, nil) if tc.expErr != "" && err != nil && err.Error() != tc.expErr { t.Errorf("expected error to be [%v] but it was [%v]", tc.expErr, err) } @@ -851,7 +851,7 @@ func TestTakeByTopologyWithSpreadPhysicalCPUsPreferredOption(t *testing.T) { if tc.opts.DistributeCPUsAcrossCores { strategy = CPUSortingStrategySpread } - result, err := takeByTopologyNUMAPacked(tc.topo, tc.availableCPUs, tc.numCPUs, strategy, tc.opts.PreferAlignByUncoreCacheOption) + result, err := takeByTopologyNUMAPacked(tc.topo, tc.availableCPUs, tc.numCPUs, strategy, tc.opts.PreferAlignByUncoreCacheOption, nil, nil) if tc.expErr != "" && err.Error() != tc.expErr { t.Errorf("testCase %q failed, expected error to be [%v] but it was [%v]", tc.description, tc.expErr, err) } @@ -1053,7 +1053,7 @@ func TestTakeByTopologyNUMADistributed(t *testing.T) { for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { - result, err := takeByTopologyNUMADistributed(tc.topo, tc.availableCPUs, tc.numCPUs, tc.cpuGroupSize, CPUSortingStrategyPacked) + result, err := takeByTopologyNUMADistributed(tc.topo, tc.availableCPUs, tc.numCPUs, tc.cpuGroupSize, CPUSortingStrategyPacked, nil, nil) if err != nil { if tc.expErr == "" { t.Errorf("unexpected error [%v]", err) diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go index 742dfc11ae902..702e317ab6907 100644 --- a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go +++ b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go @@ -167,6 +167,7 @@ func makePod(podUID, containerName, cpuRequest, cpuLimit string) *v1.Pod { } pod.UID = types.UID(podUID) + pod.Name = podUID pod.Spec.Containers[0].Name = containerName return pod diff --git a/pkg/kubelet/cm/cpumanager/policy_static.go b/pkg/kubelet/cm/cpumanager/policy_static.go index 1c843a26d84a7..6ea26a669649c 100644 --- a/pkg/kubelet/cm/cpumanager/policy_static.go +++ b/pkg/kubelet/cm/cpumanager/policy_static.go @@ -32,6 +32,7 @@ import ( "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask" "k8s.io/kubernetes/pkg/kubelet/metrics" + "k8s.io/kubernetes/pkg/kubelet/types" "k8s.io/utils/cpuset" ) @@ -65,6 +66,50 @@ func (e SMTAlignmentError) Type() string { return ErrorSMTAlignment } +// inconsistentCPUAllocationError represents an error due to an +// attempt to either move a container from exclusively allocated +// pool to shared pool or move a container from shared pool to +// exclusively allocated pool. +type inconsistentCPUAllocationError struct { + RequestedCPUs string + AllocatedCPUs string + Shared2Exclusive bool +} + +func (e inconsistentCPUAllocationError) Error() string { + if e.RequestedCPUs == e.AllocatedCPUs { + return fmt.Sprintf("inconsistentCPUAllocation Error: Skip resize, nothing to be done, (requested CPUs = %s equal to allocated CPUs = %s)", e.RequestedCPUs, e.AllocatedCPUs) + } + if e.Shared2Exclusive { + return fmt.Sprintf("inconsistentCPUAllocation Error: Not allowed to move a container from shared pool to exclusively allocated pool, (requested CPUs = %s, allocated CPUs = %s)", e.RequestedCPUs, e.AllocatedCPUs) + } else { + return fmt.Sprintf("inconsistentCPUAllocation Error: Not allowed to move a container from exclusively allocated pool to shared pool, not allowed (requested CPUs = %s, allocated CPUs = %s)", e.RequestedCPUs, e.AllocatedCPUs) + } +} + +// Type returns human-readable type of this error. +// Used in the HandlePodResourcesResize to populate Failure reason +func (e inconsistentCPUAllocationError) Type() string { + return types.ErrorInconsistentCPUAllocation +} + +// getCPUSetError represents an error due to a +// failed attempt to GetCPUSet from state +type getCPUSetError struct { + PodUID string + ContainerName string +} + +func (e getCPUSetError) Error() string { + return fmt.Sprintf("getCPUSet Error: Skip resize, unable to get CPUSet, nothing to be done, (podUID = %s, containerName %s)", e.PodUID, e.ContainerName) +} + +// Type returns human-readable type of this error. +// Used in the HandlePodResourcesResize to populate Failure reason +func (e getCPUSetError) Type() string { + return types.ErrorGetCPUSet +} + // staticPolicy is a CPU manager policy that does not change CPU // assignments for exclusively pinned guaranteed containers after the main // container process starts. @@ -118,6 +163,8 @@ type staticPolicy struct { affinity topologymanager.Store // set of CPUs to reuse across allocations in a pod cpusToReuse map[string]cpuset.CPUSet + // set of CPUs to reuse during pod resize + cpusToReuseDuringResize map[string]cpuset.CPUSet // options allow to fine-tune the behaviour of the policy options StaticPolicyOptions // we compute this value multiple time, and it's not supposed to change @@ -145,11 +192,12 @@ func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reserv klog.InfoS("Static policy created with configuration", "options", opts, "cpuGroupSize", cpuGroupSize) policy := &staticPolicy{ - topology: topology, - affinity: affinity, - cpusToReuse: make(map[string]cpuset.CPUSet), - options: opts, - cpuGroupSize: cpuGroupSize, + topology: topology, + affinity: affinity, + cpusToReuse: make(map[string]cpuset.CPUSet), + options: opts, + cpuGroupSize: cpuGroupSize, + cpusToReuseDuringResize: make(map[string]cpuset.CPUSet), } allCPUs := topology.CPUDetails.CPUs() @@ -162,7 +210,7 @@ func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reserv // // For example: Given a system with 8 CPUs available and HT enabled, // if numReservedCPUs=2, then reserved={0,4} - reserved, _ = policy.takeByTopology(allCPUs, numReservedCPUs) + reserved, _ = policy.takeByTopology(allCPUs, numReservedCPUs, nil, nil) } if reserved.Size() != numReservedCPUs { @@ -316,6 +364,15 @@ func (p *staticPolicy) updateCPUsToReuse(pod *v1.Pod, container *v1.Container, c func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Container) (rerr error) { numCPUs := p.guaranteedCPUs(pod, container) + if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { + // During a pod resize, handle corner cases + err := p.validateInPlacePodVerticalScaling(pod, container) + if err != nil { + klog.ErrorS(err, "Static policy: Unable to resize allocated CPUs", "pod", klog.KObj(pod), "containerName", container.Name, "numCPUs", numCPUs) + return err + } + } + if numCPUs == 0 { // container belongs in the shared pool (nothing to do; use default cpuset) return nil @@ -364,6 +421,12 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai availablePhysicalCPUs := p.GetAvailablePhysicalCPUs(s).Size() + if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { + if cs, ok := podutil.GetContainerStatus(pod.Status.ContainerStatuses, container.Name); ok { + cpuAllocatedQuantity := cs.AllocatedResources[v1.ResourceCPU] + availablePhysicalCPUs += int(cpuAllocatedQuantity.Value()) + } + } // It's legal to reserve CPUs which are not core siblings. In this case the CPU allocator can descend to single cores // when picking CPUs. This will void the guarantee of FullPhysicalCPUsOnly. To prevent this, we need to additionally consider // all the core siblings of the reserved CPUs as unavailable when computing the free CPUs, before to start the actual allocation. @@ -377,10 +440,47 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai } } } - if cset, ok := s.GetCPUSet(string(pod.UID), container.Name); ok { - p.updateCPUsToReuse(pod, container, cset) - klog.InfoS("Static policy: container already present in state, skipping", "pod", klog.KObj(pod), "containerName", container.Name) - return nil + if cpuset, ok := s.GetCPUSet(string(pod.UID), container.Name); ok { + if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScalingExclusiveCPUs) { + if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { + klog.InfoS("Static policy: container already present in state, attempting InPlacePodVerticalScaling", "pod", klog.KObj(pod), "containerName", container.Name) + if cpusInUseByPodContainerToResize, ok := s.GetCPUSet(string(pod.UID), container.Name); ok { + // Call Topology Manager to get the aligned socket affinity across all hint providers. + hint := p.affinity.GetAffinity(string(pod.UID), container.Name) + klog.InfoS("Topology Affinity", "pod", klog.KObj(pod), "containerName", container.Name, "affinity", hint) + // Attempt new allocation ( reusing allocated CPUs ) according to the NUMA affinity contained in the hint + // Since NUMA affinity container in the hint is unmutable already allocated CPUs pass the criteria + newallocatedcpuset, err := p.allocateCPUs(s, numCPUs, hint.NUMANodeAffinity, p.cpusToReuse[string(pod.UID)], &cpusInUseByPodContainerToResize, nil) + if err != nil { + klog.ErrorS(err, "Static policy: Unable to allocate new CPUs", "pod", klog.KObj(pod), "containerName", container.Name, "numCPUs", numCPUs) + return err + } + // Allocation successful, update the current state + s.SetCPUSet(string(pod.UID), container.Name, newallocatedcpuset.CPUs) + p.updateCPUsToReuse(pod, container, newallocatedcpuset.CPUs) + // Updated state to the checkpoint file will be stored during + // the reconcile loop. TODO is this a problem? I don't believe + // because if kubelet will be terminated now, anyhow it will be + // needed the state to be cleaned up, an error will appear requiring + // the node to be drained. I think we are safe. All computations are + // using state_mem and not the checkpoint. + return nil + } else { + return getCPUSetError{ + PodUID: string(pod.UID), + ContainerName: container.Name, + } + } + } else { + p.updateCPUsToReuse(pod, container, cpuset) + klog.InfoS("Static policy: InPlacePodVerticalScaling alognside CPU Static policy requires InPlacePodVerticalScaling to be enabled, skipping pod resize") + return nil + } + } else { + p.updateCPUsToReuse(pod, container, cpuset) + klog.InfoS("Static policy: container already present in state, skipping", "pod", klog.KObj(pod), "containerName", container.Name) + return nil + } } // Call Topology Manager to get the aligned socket affinity across all hint providers. @@ -388,7 +488,7 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai klog.InfoS("Topology Affinity", "pod", klog.KObj(pod), "containerName", container.Name, "affinity", hint) // Allocate CPUs according to the NUMA affinity contained in the hint. - cpuAllocation, err := p.allocateCPUs(s, numCPUs, hint.NUMANodeAffinity, p.cpusToReuse[string(pod.UID)]) + cpuAllocation, err := p.allocateCPUs(s, numCPUs, hint.NUMANodeAffinity, p.cpusToReuse[string(pod.UID)], nil, nil) if err != nil { klog.ErrorS(err, "Unable to allocate CPUs", "pod", klog.KObj(pod), "containerName", container.Name, "numCPUs", numCPUs) return err @@ -429,10 +529,18 @@ func (p *staticPolicy) RemoveContainer(s state.State, podUID string, containerNa return nil } -func (p *staticPolicy) allocateCPUs(s state.State, numCPUs int, numaAffinity bitmask.BitMask, reusableCPUs cpuset.CPUSet) (topology.Allocation, error) { +func (p *staticPolicy) allocateCPUs(s state.State, numCPUs int, numaAffinity bitmask.BitMask, reusableCPUs cpuset.CPUSet, reusableCPUsForResize *cpuset.CPUSet, mustKeepCPUsForResize *cpuset.CPUSet) (topology.Allocation, error) { klog.InfoS("AllocateCPUs", "numCPUs", numCPUs, "socket", numaAffinity) - - allocatableCPUs := p.GetAvailableCPUs(s).Union(reusableCPUs) + allocatableCPUs := cpuset.New() + if reusableCPUsForResize != nil { + if numCPUs >= reusableCPUsForResize.Size() { + allocatableCPUs = allocatableCPUs.Union(p.GetAvailableCPUs(s).Union(reusableCPUsForResize.Clone())) + } else if numCPUs < reusableCPUsForResize.Size() { + allocatableCPUs = reusableCPUsForResize.Clone() + } + } else { + allocatableCPUs = allocatableCPUs.Union(p.GetAvailableCPUs(s).Union(reusableCPUs)) + } // If there are aligned CPUs in numaAffinity, attempt to take those first. result := topology.EmptyAllocation() @@ -444,7 +552,7 @@ func (p *staticPolicy) allocateCPUs(s state.State, numCPUs int, numaAffinity bit numAlignedToAlloc = numCPUs } - allocatedCPUs, err := p.takeByTopology(alignedCPUs, numAlignedToAlloc) + allocatedCPUs, err := p.takeByTopology(alignedCPUs, numAlignedToAlloc, reusableCPUsForResize, mustKeepCPUsForResize) if err != nil { return topology.EmptyAllocation(), err } @@ -453,7 +561,7 @@ func (p *staticPolicy) allocateCPUs(s state.State, numCPUs int, numaAffinity bit } // Get any remaining CPUs from what's leftover after attempting to grab aligned ones. - remainingCPUs, err := p.takeByTopology(allocatableCPUs.Difference(result.CPUs), numCPUs-result.CPUs.Size()) + remainingCPUs, err := p.takeByTopology(allocatableCPUs.Difference(result.CPUs), numCPUs-result.CPUs.Size(), reusableCPUsForResize, mustKeepCPUsForResize) if err != nil { return topology.EmptyAllocation(), err } @@ -462,6 +570,17 @@ func (p *staticPolicy) allocateCPUs(s state.State, numCPUs int, numaAffinity bit // Remove allocated CPUs from the shared CPUSet. s.SetDefaultCPUSet(s.GetDefaultCPUSet().Difference(result.CPUs)) + if reusableCPUsForResize != nil { + if reusableCPUsForResize.Size() < result.CPUs.Size() { + // Scale up or creation has been performed + s.SetDefaultCPUSet(s.GetDefaultCPUSet().Difference(result.CPUs)) + } else if reusableCPUsForResize.Size() > result.CPUs.Size() { + // Scale down has been performed + s.SetDefaultCPUSet(s.GetDefaultCPUSet().Union(reusableCPUsForResize.Difference(result.CPUs))) + } + } else { + s.SetDefaultCPUSet(s.GetDefaultCPUSet().Difference(result.CPUs)) + } klog.InfoS("AllocateCPUs", "result", result.String()) return result, nil @@ -519,7 +638,7 @@ func (p *staticPolicy) podGuaranteedCPUs(pod *v1.Pod) int { return requestedByLongRunningContainers } -func (p *staticPolicy) takeByTopology(availableCPUs cpuset.CPUSet, numCPUs int) (cpuset.CPUSet, error) { +func (p *staticPolicy) takeByTopology(availableCPUs cpuset.CPUSet, numCPUs int, reusableCPUsForResize *cpuset.CPUSet, mustKeepCPUsForScaleDown *cpuset.CPUSet) (cpuset.CPUSet, error) { cpuSortingStrategy := CPUSortingStrategyPacked if p.options.DistributeCPUsAcrossCores { cpuSortingStrategy = CPUSortingStrategySpread @@ -530,10 +649,9 @@ func (p *staticPolicy) takeByTopology(availableCPUs cpuset.CPUSet, numCPUs int) if p.options.FullPhysicalCPUsOnly { cpuGroupSize = p.cpuGroupSize } - return takeByTopologyNUMADistributed(p.topology, availableCPUs, numCPUs, cpuGroupSize, cpuSortingStrategy) + return takeByTopologyNUMADistributed(p.topology, availableCPUs, numCPUs, cpuGroupSize, cpuSortingStrategy, reusableCPUsForResize, mustKeepCPUsForScaleDown) } - - return takeByTopologyNUMAPacked(p.topology, availableCPUs, numCPUs, cpuSortingStrategy, p.options.PreferAlignByUncoreCacheOption) + return takeByTopologyNUMAPacked(p.topology, availableCPUs, numCPUs, cpuSortingStrategy, p.options.PreferAlignByUncoreCacheOption, reusableCPUsForResize, mustKeepCPUsForScaleDown) } func (p *staticPolicy) GetTopologyHints(s state.State, pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint { @@ -558,7 +676,7 @@ func (p *staticPolicy) GetTopologyHints(s state.State, pod *v1.Pod, container *v // kubelet restart, for example. if allocated, exists := s.GetCPUSet(string(pod.UID), container.Name); exists { if allocated.Size() != requested { - klog.InfoS("CPUs already allocated to container with different number than request", "pod", klog.KObj(pod), "containerName", container.Name, "requestedSize", requested, "allocatedSize", allocated.Size()) + klog.ErrorS(nil, "CPUs already allocated to container with different number than request", "pod", klog.KObj(pod), "containerName", container.Name, "requestedSize", requested, "allocatedSize", allocated.Size()) // An empty list of hints will be treated as a preference that cannot be satisfied. // In definition of hints this is equal to: TopologyHint[NUMANodeAffinity: nil, Preferred: false]. // For all but the best-effort policy, the Topology Manager will throw a pod-admission error. @@ -613,7 +731,7 @@ func (p *staticPolicy) GetPodTopologyHints(s state.State, pod *v1.Pod) map[strin // kubelet restart, for example. if allocated, exists := s.GetCPUSet(string(pod.UID), container.Name); exists { if allocated.Size() != requestedByContainer { - klog.InfoS("CPUs already allocated to container with different number than request", "pod", klog.KObj(pod), "containerName", container.Name, "allocatedSize", requested, "requestedByContainer", requestedByContainer, "allocatedSize", allocated.Size()) + klog.ErrorS(nil, "CPUs already allocated to container with different number than request", "pod", klog.KObj(pod), "containerName", container.Name, "allocatedSize", requested, "requestedByContainer", requestedByContainer, "allocatedSize", allocated.Size()) // An empty list of hints will be treated as a preference that cannot be satisfied. // In definition of hints this is equal to: TopologyHint[NUMANodeAffinity: nil, Preferred: false]. // For all but the best-effort policy, the Topology Manager will throw a pod-admission error. @@ -663,7 +781,7 @@ func (p *staticPolicy) generateCPUTopologyHints(availableCPUs cpuset.CPUSet, reu // Iterate through all combinations of numa nodes bitmask and build hints from them. hints := []topologymanager.TopologyHint{} - bitmask.IterateBitMasks(p.topology.CPUDetails.NUMANodes().List(), func(mask bitmask.BitMask) { + bitmask.IterateBitMasks(p.topology.CPUDetails.NUMANodes().UnsortedList(), func(mask bitmask.BitMask) { // First, update minAffinitySize for the current request size. cpusInMask := p.topology.CPUDetails.CPUsInNUMANodes(mask.GetBits()...).Size() if cpusInMask >= request && mask.Count() < minAffinitySize { @@ -673,7 +791,7 @@ func (p *staticPolicy) generateCPUTopologyHints(availableCPUs cpuset.CPUSet, reu // Then check to see if we have enough CPUs available on the current // numa node bitmask to satisfy the CPU request. numMatching := 0 - for _, c := range reusableCPUs.List() { + for _, c := range reusableCPUs.UnsortedList() { // Disregard this mask if its NUMANode isn't part of it. if !mask.IsSet(p.topology.CPUDetails[c].NUMANodeID) { return @@ -683,7 +801,7 @@ func (p *staticPolicy) generateCPUTopologyHints(availableCPUs cpuset.CPUSet, reu // Finally, check to see if enough available CPUs remain on the current // NUMA node combination to satisfy the CPU request. - for _, c := range availableCPUs.List() { + for _, c := range availableCPUs.UnsortedList() { if mask.IsSet(p.topology.CPUDetails[c].NUMANodeID) { numMatching++ } @@ -815,3 +933,48 @@ func updateAllocationPerNUMAMetric(topo *topology.CPUTopology, allocatedCPUs cpu metrics.CPUManagerAllocationPerNUMA.WithLabelValues(strconv.Itoa(numaNode)).Set(float64(count)) } } + +func (p *staticPolicy) validateInPlacePodVerticalScaling(pod *v1.Pod, container *v1.Container) error { + + if v1qos.GetPodQOS(pod) != v1.PodQOSGuaranteed { + return nil + } + cpuQuantity := container.Resources.Requests[v1.ResourceCPU] + if cs, ok := podutil.GetContainerStatus(pod.Status.ContainerStatuses, container.Name); ok { + allocatedCPUQuantity := cs.AllocatedResources[v1.ResourceCPU] + if allocatedCPUQuantity.Value() > 0 { + if allocatedCPUQuantity.Value()*1000 == allocatedCPUQuantity.MilliValue() { + // container belongs in exclusive pool + if cpuQuantity.Value()*1000 != cpuQuantity.MilliValue() { + // container move to shared pool not allowed + return inconsistentCPUAllocationError{ + RequestedCPUs: cpuQuantity.String(), + AllocatedCPUs: allocatedCPUQuantity.String(), + Shared2Exclusive: false, + } + } + } else { + // container belongs in shared pool + if cpuQuantity.Value()*1000 == cpuQuantity.MilliValue() { + // container move to exclusive pool not allowed + return inconsistentCPUAllocationError{ + RequestedCPUs: cpuQuantity.String(), + AllocatedCPUs: allocatedCPUQuantity.String(), + Shared2Exclusive: true, + } + } + } + } else { + // container belongs in shared pool + if cpuQuantity.Value()*1000 == cpuQuantity.MilliValue() { + // container move to exclusive pool not allowed + return inconsistentCPUAllocationError{ + RequestedCPUs: cpuQuantity.String(), + AllocatedCPUs: allocatedCPUQuantity.String(), + Shared2Exclusive: true, + } + } + } + } + return nil +} diff --git a/pkg/kubelet/cm/cpumanager/policy_static_test.go b/pkg/kubelet/cm/cpumanager/policy_static_test.go index a91c54f84078c..0e140eebb8e1e 100644 --- a/pkg/kubelet/cm/cpumanager/policy_static_test.go +++ b/pkg/kubelet/cm/cpumanager/policy_static_test.go @@ -23,6 +23,7 @@ import ( v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/api/resource" utilfeature "k8s.io/apiserver/pkg/util/feature" featuregatetesting "k8s.io/component-base/featuregate/testing" pkgfeatures "k8s.io/kubernetes/pkg/features" @@ -44,6 +45,10 @@ type staticPolicyTest struct { stAssignments state.ContainerCPUAssignments stDefaultCPUSet cpuset.CPUSet pod *v1.Pod + qosClass v1.PodQOSClass + podAllocated string + resizeLimit string + resizeRequest string topologyHint *topologymanager.TopologyHint expErr error expCPUAlloc bool @@ -437,14 +442,14 @@ func TestStaticPolicyAdd(t *testing.T) { numReservedCPUs: 1, stAssignments: state.ContainerCPUAssignments{ "fakePod": map[string]cpuset.CPUSet{ - "fakeContainer3": cpuset.New(2, 3, 6, 7), + "fakeContainer3": cpuset.New(1, 2, 5, 6), }, }, - stDefaultCPUSet: cpuset.New(0, 1, 4, 5), + stDefaultCPUSet: cpuset.New(0, 3, 4, 7), pod: makePod("fakePod", "fakeContainer3", "4000m", "4000m"), expErr: nil, expCPUAlloc: true, - expCSet: cpuset.New(2, 3, 6, 7), + expCSet: cpuset.New(1, 2, 5, 6), }, { description: "GuPodMultipleCores, DualSocketHT, NoAllocExpectError", @@ -571,6 +576,115 @@ func TestStaticPolicyAdd(t *testing.T) { expCSet: cpuset.New(1, 2, 3, 4, 5, 7, 8, 9, 10, 11), }, } + + // testcases for podResize + podResizeTestCases := []staticPolicyTest{ + { + description: "podResize GuPodMultipleCores, SingleSocketHT, ExpectSameAllocation", + topo: topoSingleSocketHT, + numReservedCPUs: 1, + stAssignments: state.ContainerCPUAssignments{ + "fakePod": map[string]cpuset.CPUSet{ + "fakeContainer3": cpuset.New(1, 2, 5, 6), + }, + }, + stDefaultCPUSet: cpuset.New(0, 3, 4, 7), + pod: makePod("fakePod", "fakeContainer3", "4000m", "4000m"), + expErr: nil, + expCPUAlloc: true, + expCSet: cpuset.New(1, 2, 5, 6), + }, + { + description: "podResize GuPodSingleCore, SingleSocketHT, ExpectAllocOneCPU", + topo: topoSingleSocketHT, + options: map[string]string{ + FullPCPUsOnlyOption: "true", + }, + numReservedCPUs: 1, + stAssignments: state.ContainerCPUAssignments{ + "fakePod": map[string]cpuset.CPUSet{ + "fakeContainer3": cpuset.New(1, 5), + }, + }, + stDefaultCPUSet: cpuset.New(0, 2, 3, 4, 6, 7), + pod: makePod("fakePod", "fakeContainer3", "4000m", "4000m"), + expErr: nil, + expCPUAlloc: true, + expCSet: cpuset.New(1, 5), + }, + { + description: "podResize GuPodSingleCore, SingleSocketHT, ExpectAllocOneCPU", + topo: topoSingleSocketHT, + options: map[string]string{ + FullPCPUsOnlyOption: "true", + }, + numReservedCPUs: 1, + stAssignments: state.ContainerCPUAssignments{ + "fakePod": map[string]cpuset.CPUSet{ + "fakeContainer3": cpuset.New(1, 5), + }, + }, + stDefaultCPUSet: cpuset.New(0, 2, 3, 4, 6, 7), + pod: makePod("fakePod", "fakeContainer3", "2000m", "2000m"), + expErr: nil, + expCPUAlloc: true, + expCSet: cpuset.New(1, 5), + }, + { + description: "podResize", + topo: topoSingleSocketHT, + options: map[string]string{ + FullPCPUsOnlyOption: "true", + }, + numReservedCPUs: 1, + stAssignments: state.ContainerCPUAssignments{ + "fakePod": map[string]cpuset.CPUSet{ + "fakeContainer3": cpuset.New(1, 5), + }, + }, + stDefaultCPUSet: cpuset.New(0, 2, 3, 4, 6, 7), + pod: makePod("fakePod", "fakeContainer3", "100m", "100m"), + //expErr: inconsistentCPUAllocationError{RequestedCPUs: "0", AllocatedCPUs: "2"}, + expErr: nil, + expCPUAlloc: true, + expCSet: cpuset.New(1, 5), + }, + { + description: "podResize", + topo: topoSingleSocketHT, + options: map[string]string{ + FullPCPUsOnlyOption: "false", + }, + numReservedCPUs: 1, + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + pod: makePod("fakePod", "fakeContainer3", "1000m", "1000m"), + //expErr: inconsistentCPUAllocationError{RequestedCPUs: "0", AllocatedCPUs: "2"}, + expErr: nil, + expCPUAlloc: true, + expCSet: cpuset.New(4), + }, + { + description: "podResize", + topo: topoSingleSocketHT, + options: map[string]string{ + FullPCPUsOnlyOption: "true", + }, + numReservedCPUs: 1, + stAssignments: state.ContainerCPUAssignments{ + "fakePod": map[string]cpuset.CPUSet{ + "fakeContainer3": cpuset.New(1, 5), + }, + }, + stDefaultCPUSet: cpuset.New(0, 2, 3, 4, 6, 7), + pod: makePod("fakePod", "fakeContainer3", "100m", "100m"), + //expErr: inconsistentCPUAllocationError{RequestedCPUs: "0", AllocatedCPUs: "2"}, + expErr: nil, + expCPUAlloc: true, + expCSet: cpuset.New(1, 5), + }, + } + newNUMAAffinity := func(bits ...int) bitmask.BitMask { affinity, _ := bitmask.NewBitMask(bits...) return affinity @@ -631,6 +745,9 @@ func TestStaticPolicyAdd(t *testing.T) { for _, testCase := range alignBySocketOptionTestCases { runStaticPolicyTestCaseWithFeatureGate(t, testCase) } + for _, testCase := range podResizeTestCases { + runStaticPolicyTestCaseWithFeatureGateAlongsideInPlacePodVerticalScaling(t, testCase) + } } func runStaticPolicyTestCase(t *testing.T, testCase staticPolicyTest) { @@ -691,6 +808,12 @@ func runStaticPolicyTestCaseWithFeatureGate(t *testing.T, testCase staticPolicyT runStaticPolicyTestCase(t, testCase) } +func runStaticPolicyTestCaseWithFeatureGateAlongsideInPlacePodVerticalScaling(t *testing.T, testCase staticPolicyTest) { + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.CPUManagerPolicyAlphaOptions, true) + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.InPlacePodVerticalScaling, true) + runStaticPolicyTestCase(t, testCase) +} + func TestStaticPolicyReuseCPUs(t *testing.T) { testCases := []struct { staticPolicyTest @@ -750,6 +873,297 @@ func TestStaticPolicyReuseCPUs(t *testing.T) { } } +func TestStaticPolicyPodResizeCPUsSingleContainerPod(t *testing.T) { + testCases := []struct { + staticPolicyTest + expAllocErr error + expCSetAfterAlloc cpuset.CPUSet + expCSetAfterResize cpuset.CPUSet + expCSetAfterResizeSize int + expCSetAfterRemove cpuset.CPUSet + }{ + { + staticPolicyTest: staticPolicyTest{ + description: "SingleSocketHT, PodResize, Container in exclusively allocated pool, Increase allocated CPUs", + topo: topoSingleSocketHT, + pod: makeMultiContainerPodWithOptions( + nil, + []*containerOptions{ + {request: "2000m", limit: "2000m", restartPolicy: v1.ContainerRestartPolicy("Never")}}, // 0, 4 + ), + qosClass: v1.PodQOSGuaranteed, + podAllocated: "2000m", + resizeLimit: "4000m", + resizeRequest: "4000m", + containerName: "appContainer-0", + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + expCSetAfterAlloc: cpuset.New(1, 2, 3, 5, 6, 7), + expCSetAfterResize: cpuset.New(1, 2, 3, 5, 6, 7), + expCSetAfterRemove: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + { + staticPolicyTest: staticPolicyTest{ + description: "SingleSocketHT, PodResize, Container in exclusively allocated pool, Keep same allocated CPUs", + topo: topoSingleSocketHT, + pod: makeMultiContainerPodWithOptions( + nil, + []*containerOptions{ + {request: "2000m", limit: "2000m", restartPolicy: v1.ContainerRestartPolicy("Never")}}, // 0, 4 + ), + qosClass: v1.PodQOSGuaranteed, + podAllocated: "2000m", + resizeLimit: "2000m", + resizeRequest: "2000m", + containerName: "appContainer-0", + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + expAllocErr: inconsistentCPUAllocationError{RequestedCPUs: "2", AllocatedCPUs: "2", Shared2Exclusive: false}, + expCSetAfterAlloc: cpuset.New(1, 2, 3, 5, 6, 7), + expCSetAfterResize: cpuset.New(1, 2, 3, 5, 6, 7), + expCSetAfterRemove: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + { + staticPolicyTest: staticPolicyTest{ + description: "SingleSocketHT, PodResize, Container in exclusively allocated pool, Decrease allocated CPUs", + topo: topoSingleSocketHT, + pod: makeMultiContainerPodWithOptions( + nil, + []*containerOptions{ + {request: "4000m", limit: "4000m", restartPolicy: v1.ContainerRestartPolicy("Never")}}, // 0-1, 4-5 + ), + qosClass: v1.PodQOSGuaranteed, + podAllocated: "4000m", + resizeLimit: "2000m", + resizeRequest: "2000m", + containerName: "appContainer-0", + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + expCSetAfterAlloc: cpuset.New(2, 3, 6, 7), + expCSetAfterResizeSize: 4, + expCSetAfterRemove: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + { + staticPolicyTest: staticPolicyTest{ + description: "SingleSocketHT, PodResize, Container in shared pool with more than one core, Attempt to move to exclusively allocated pool", + topo: topoSingleSocketHT, + pod: makeMultiContainerPodWithOptions( + nil, + []*containerOptions{ + {request: "2100m", limit: "2100m", restartPolicy: v1.ContainerRestartPolicy("Never")}}, // 0-7 + ), + qosClass: v1.PodQOSGuaranteed, + podAllocated: "2100m", + resizeLimit: "2000m", + resizeRequest: "2000m", + containerName: "appContainer-0", + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + expAllocErr: inconsistentCPUAllocationError{RequestedCPUs: "2", AllocatedCPUs: "2100m", Shared2Exclusive: true}, + expCSetAfterAlloc: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + expCSetAfterResize: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + expCSetAfterRemove: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + { + staticPolicyTest: staticPolicyTest{ + description: "SingleSocketHT, PodResize, Container in shared pool, Increase CPU and keep in shared pool", + topo: topoSingleSocketHT, + pod: makeMultiContainerPodWithOptions( + nil, + []*containerOptions{ + {request: "100m", limit: "100m", restartPolicy: v1.ContainerRestartPolicy("Never")}}, // 0-7 + ), + qosClass: v1.PodQOSGuaranteed, + podAllocated: "100m", + resizeLimit: "200m", + resizeRequest: "200m", + containerName: "appContainer-0", + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + expCSetAfterAlloc: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + expCSetAfterResize: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + expCSetAfterRemove: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + { + staticPolicyTest: staticPolicyTest{ + description: "SingleSocketHT, PodResize, Container in shared pool, Increase CPU and keep in shared pool", + topo: topoSingleSocketHT, + pod: makeMultiContainerPodWithOptions( + nil, + []*containerOptions{ + {request: "1100m", limit: "1100m", restartPolicy: v1.ContainerRestartPolicy("Never")}}, // 0-7 + ), + qosClass: v1.PodQOSGuaranteed, + podAllocated: "1100m", + resizeLimit: "1200m", + resizeRequest: "1200m", + containerName: "appContainer-0", + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + expCSetAfterAlloc: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + expCSetAfterResize: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + expCSetAfterRemove: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + { + staticPolicyTest: staticPolicyTest{ + description: "SingleSocketHT, PodResize, Container in shared pool with less than one core, Decrease CPU and keep in shared pool", + topo: topoSingleSocketHT, + pod: makeMultiContainerPodWithOptions( + nil, + []*containerOptions{ + {request: "200m", limit: "200m", restartPolicy: v1.ContainerRestartPolicy("Never")}}, // 0-7 + ), + qosClass: v1.PodQOSGuaranteed, + podAllocated: "200m", + resizeLimit: "100m", + resizeRequest: "100m", + containerName: "appContainer-0", + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + expCSetAfterAlloc: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + expCSetAfterResize: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + expCSetAfterRemove: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + { + staticPolicyTest: staticPolicyTest{ + description: "SingleSocketHT, PodResize, Container in shared pool with more than one core, Decrease CPU and keep in shared pool", + topo: topoSingleSocketHT, + pod: makeMultiContainerPodWithOptions( + nil, + []*containerOptions{ + {request: "1200m", limit: "1200m", restartPolicy: v1.ContainerRestartPolicy("Never")}}, // 0-7 + ), + qosClass: v1.PodQOSGuaranteed, + podAllocated: "1200m", + resizeLimit: "1100m", + resizeRequest: "1100m", + containerName: "appContainer-0", + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + expCSetAfterAlloc: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + expCSetAfterResize: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + expCSetAfterRemove: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + { + staticPolicyTest: staticPolicyTest{ + description: "SingleSocketHT, PodResize, Container in exclusively allocated pool, Move to shared pool", + topo: topoSingleSocketHT, + pod: makeMultiContainerPodWithOptions( + nil, + []*containerOptions{ + {request: "2000m", limit: "2000m", restartPolicy: v1.ContainerRestartPolicy("Never")}}, // 0-1, 4-5 + ), + qosClass: v1.PodQOSGuaranteed, + podAllocated: "2000m", + resizeLimit: "1500m", + resizeRequest: "1500m", + containerName: "appContainer-0", + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + expAllocErr: inconsistentCPUAllocationError{RequestedCPUs: "1500m", AllocatedCPUs: "2", Shared2Exclusive: false}, + expCSetAfterAlloc: cpuset.New(1, 2, 3, 5, 6, 7), + expCSetAfterResize: cpuset.New(1, 2, 3, 5, 6, 7), + expCSetAfterRemove: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + } + + for _, testCase := range testCases { + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.CPUManagerPolicyAlphaOptions, true) + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.InPlacePodVerticalScaling, true) + t.Run(testCase.description, func(t *testing.T) { + + policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), nil) + + st := &mockState{ + assignments: testCase.stAssignments, + defaultCPUSet: testCase.stDefaultCPUSet, + } + pod := testCase.pod + pod.Status.QOSClass = testCase.qosClass + + // allocate + for _, container := range append(pod.Spec.InitContainers, pod.Spec.Containers...) { + err := policy.Allocate(st, pod, &container) + if err != nil { + t.Errorf("StaticPolicy Allocate() error (%v). expected no error but got %v", + testCase.description, err) + } + } + if !reflect.DeepEqual(st.defaultCPUSet, testCase.expCSetAfterAlloc) { + t.Errorf("StaticPolicy Allocate() error (%v) before pod resize. expected default cpuset %v but got %v", + testCase.description, testCase.expCSetAfterAlloc, st.defaultCPUSet) + } + + // resize + pod.Status.ContainerStatuses = []v1.ContainerStatus{ + { + Name: testCase.containerName, + AllocatedResources: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse(testCase.podAllocated), + }, + }, + } + pod.Spec.Containers[0].Resources = v1.ResourceRequirements{ + Limits: v1.ResourceList{ + v1.ResourceName(v1.ResourceCPU): resource.MustParse(testCase.resizeLimit), + }, + Requests: v1.ResourceList{ + v1.ResourceName(v1.ResourceCPU): resource.MustParse(testCase.resizeRequest), + }, + } + podResized := pod + for _, container := range append(podResized.Spec.InitContainers, podResized.Spec.Containers...) { + err := policy.Allocate(st, podResized, &container) + if err != nil { + if !reflect.DeepEqual(err, testCase.expAllocErr) { + t.Errorf("StaticPolicy Allocate() error (%v), expected error: %v but got: %v", + testCase.description, testCase.expAllocErr, err) + } + } + } + if testCase.expCSetAfterResizeSize > 0 { + // expCSetAfterResizeSize is used when testing scale down because allocated CPUs are not deterministic, + // since size of defaultCPUSet is deterministic and also interesection with expected allocation + // should not be nill. < ====== TODO esotsal + if !reflect.DeepEqual(st.defaultCPUSet.Size(), testCase.expCSetAfterResizeSize) { + t.Errorf("StaticPolicy Allocate() error (%v) after pod resize. expected default cpuset size equal to %v but got %v", + testCase.description, testCase.expCSetAfterResizeSize, st.defaultCPUSet.Size()) + } + } else { + if !reflect.DeepEqual(st.defaultCPUSet, testCase.expCSetAfterResize) { + t.Errorf("StaticPolicy Allocate() error (%v) after pod resize. expected default cpuset %v but got %v", + testCase.description, testCase.expCSetAfterResize, st.defaultCPUSet) + } + } + + // remove + err := policy.RemoveContainer(st, string(pod.UID), testCase.containerName) + if err != nil { + t.Errorf("StaticPolicy RemoveContainer() error (%v) after pod resize. expected no error but got %v", + testCase.description, err) + } + + if !reflect.DeepEqual(st.defaultCPUSet, testCase.expCSetAfterRemove) { + t.Errorf("StaticPolicy RemoveContainer() error (%v) after pod resize. expected default cpuset %v but got %v", + testCase.description, testCase.expCSetAfterRemove, st.defaultCPUSet) + } + if _, found := st.assignments[string(pod.UID)][testCase.containerName]; found { + t.Errorf("StaticPolicy RemoveContainer() error (%v) after pod resize. expected (pod %v, container %v) not be in assignments %v", + testCase.description, testCase.podUID, testCase.containerName, st.assignments) + } + }) + } +} + func TestStaticPolicyDoNotReuseCPUs(t *testing.T) { testCases := []struct { staticPolicyTest @@ -798,6 +1212,298 @@ func TestStaticPolicyDoNotReuseCPUs(t *testing.T) { } } +func TestStaticPolicyPodResizeCPUsMultiContainerPod(t *testing.T) { + testCases := []struct { + staticPolicyTest + containerName2 string + expAllocErr error + expCSetAfterAlloc cpuset.CPUSet + expCSetAfterResize cpuset.CPUSet + expCSetAfterResizeSize int + expCSetAfterRemove cpuset.CPUSet + }{ + { + staticPolicyTest: staticPolicyTest{ + description: "SingleSocketHT, PodResize, Containers in exclusively allocated pool, Increase appContainer-0 allocated CPUs", + topo: topoSingleSocketHT, + pod: makeMultiContainerPodWithOptions( + nil, + []*containerOptions{ + {request: "2000m", limit: "2000m", restartPolicy: v1.ContainerRestartPolicy("Never")}, // 0, 4 + {request: "2000m", limit: "2000m", restartPolicy: v1.ContainerRestartPolicy("Never")}}, // 1, 5 + ), + qosClass: v1.PodQOSGuaranteed, + podAllocated: "2000m", + resizeLimit: "4000m", + resizeRequest: "4000m", + containerName: "appContainer-0", + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + containerName2: "appContainer-1", + expCSetAfterAlloc: cpuset.New(2, 3, 6, 7), + expCSetAfterResize: cpuset.New(2, 3, 6, 7), + expCSetAfterRemove: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + { + staticPolicyTest: staticPolicyTest{ + description: "SingleSocketHT, PodResize, Containers in exclusively allocated pool, Keep same allocated CPUs", + topo: topoSingleSocketHT, + pod: makeMultiContainerPodWithOptions( + nil, + []*containerOptions{ + {request: "2000m", limit: "2000m", restartPolicy: v1.ContainerRestartPolicy("Never")}, // 0, 4 + {request: "2000m", limit: "2000m", restartPolicy: v1.ContainerRestartPolicy("Never")}}, // 1, 5 + ), + qosClass: v1.PodQOSGuaranteed, + podAllocated: "2000m", + resizeLimit: "2000m", + resizeRequest: "2000m", + containerName: "appContainer-0", + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + containerName2: "appContainer-1", + expAllocErr: inconsistentCPUAllocationError{RequestedCPUs: "2", AllocatedCPUs: "2", Shared2Exclusive: false}, + expCSetAfterAlloc: cpuset.New(2, 3, 6, 7), + expCSetAfterResize: cpuset.New(2, 3, 6, 7), + expCSetAfterRemove: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + { + staticPolicyTest: staticPolicyTest{ + description: "SingleSocketHT, PodResize, Containers in exclusively allocated pool, Decrease appContainer-0 allocated CPUs", + topo: topoSingleSocketHT, + pod: makeMultiContainerPodWithOptions( + nil, + []*containerOptions{ + {request: "4000m", limit: "4000m", restartPolicy: v1.ContainerRestartPolicy("Never")}, // appContainer-0 CPUs 0, 4, 1, 5 + {request: "4000m", limit: "4000m", restartPolicy: v1.ContainerRestartPolicy("Never")}}, // appContainer-1 CPUS 2, 6, 3, 7 + ), + qosClass: v1.PodQOSGuaranteed, + podAllocated: "4000m", + resizeLimit: "2000m", + resizeRequest: "2000m", + containerName: "appContainer-0", + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + containerName2: "appContainer-1", + expCSetAfterAlloc: cpuset.New(), + expCSetAfterResize: cpuset.New(), + expCSetAfterRemove: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + { + staticPolicyTest: staticPolicyTest{ + description: "SingleSocketHT, PodResize, Containers in shared pool with more than one core, Attempt to move to exclusively allocated pool", + topo: topoSingleSocketHT, + pod: makeMultiContainerPodWithOptions( + nil, + []*containerOptions{ + {request: "2100m", limit: "2100m", restartPolicy: v1.ContainerRestartPolicy("Never")}, // 0-7 + {request: "2100m", limit: "2100m", restartPolicy: v1.ContainerRestartPolicy("Never")}}, // 0-7 + ), + qosClass: v1.PodQOSGuaranteed, + podAllocated: "2100m", + resizeLimit: "2000m", + resizeRequest: "2000m", + containerName: "appContainer-0", + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + containerName2: "appContainer-1", + expAllocErr: inconsistentCPUAllocationError{RequestedCPUs: "2", AllocatedCPUs: "2100m", Shared2Exclusive: true}, + expCSetAfterAlloc: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + expCSetAfterResize: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + expCSetAfterRemove: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + { + staticPolicyTest: staticPolicyTest{ + description: "SingleSocketHT, PodResize, appContainer-0 in shared pool, Increase CPU and keep appContainer-0 in shared pool", + topo: topoSingleSocketHT, + pod: makeMultiContainerPodWithOptions( + nil, + []*containerOptions{ + {request: "100m", limit: "100m", restartPolicy: v1.ContainerRestartPolicy("Never")}, // 2-3, 6-7 + {request: "4000m", limit: "4000m", restartPolicy: v1.ContainerRestartPolicy("Never")}}, // 0-1, 4-5 + ), + qosClass: v1.PodQOSGuaranteed, + podAllocated: "100m", + resizeLimit: "200m", + resizeRequest: "200m", + containerName: "appContainer-0", + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + containerName2: "appContainer-1", + expCSetAfterAlloc: cpuset.New(2, 3, 6, 7), + expCSetAfterResize: cpuset.New(2, 3, 6, 7), + expCSetAfterRemove: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + { + staticPolicyTest: staticPolicyTest{ + description: "SingleSocketHT, PodResize, appContainer-0 in shared pool with more than one core, Increase CPU and keep appContainer-0 in shared pool", + topo: topoSingleSocketHT, + pod: makeMultiContainerPodWithOptions( + nil, + []*containerOptions{ + {request: "1100m", limit: "1100m", restartPolicy: v1.ContainerRestartPolicy("Never")}, // 0-7 + {request: "4000m", limit: "4000m", restartPolicy: v1.ContainerRestartPolicy("Never")}}, // 0-1, 4-5 + ), + qosClass: v1.PodQOSGuaranteed, + podAllocated: "1100m", + resizeLimit: "1200m", + resizeRequest: "1200m", + containerName: "appContainer-0", + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + containerName2: "appContainer-1", + expCSetAfterAlloc: cpuset.New(2, 3, 6, 7), + expCSetAfterResize: cpuset.New(2, 3, 6, 7), + expCSetAfterRemove: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + { + staticPolicyTest: staticPolicyTest{ + description: "SingleSocketHT, PodResize, appContainer-0 in shared pool, appContainer-1 in exclusive pool, Decrease CPU and keep in shared pool", + topo: topoSingleSocketHT, + pod: makeMultiContainerPodWithOptions( + nil, + []*containerOptions{ + {request: "200m", limit: "200m", restartPolicy: v1.ContainerRestartPolicy("Never")}, // 0-7 + {request: "4000m", limit: "4000m", restartPolicy: v1.ContainerRestartPolicy("Never")}}, // 0-1, 4-5 + ), + qosClass: v1.PodQOSGuaranteed, + podAllocated: "200m", + resizeLimit: "100m", + resizeRequest: "100m", + containerName: "appContainer-0", + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + containerName2: "appContainer-1", + expCSetAfterAlloc: cpuset.New(2, 3, 6, 7), + expCSetAfterResize: cpuset.New(2, 3, 6, 7), + expCSetAfterRemove: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + { + staticPolicyTest: staticPolicyTest{ + description: "SingleSocketHT, PodResize, appContainer-0 in exclusively allocated pool, Move to shared pool", + topo: topoSingleSocketHT, + pod: makeMultiContainerPodWithOptions( + nil, + []*containerOptions{ + {request: "2000m", limit: "2000m", restartPolicy: v1.ContainerRestartPolicy("Never")}, // 0-1, 4-5 + {request: "200m", limit: "200m", restartPolicy: v1.ContainerRestartPolicy("Never")}}, // 0-7 + ), + qosClass: v1.PodQOSGuaranteed, + podAllocated: "2000m", + resizeLimit: "1500m", + resizeRequest: "1500m", + containerName: "appContainer-0", + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + containerName2: "appContainer-1", + expAllocErr: inconsistentCPUAllocationError{RequestedCPUs: "1500m", AllocatedCPUs: "2", Shared2Exclusive: false}, + expCSetAfterAlloc: cpuset.New(1, 2, 3, 5, 6, 7), + expCSetAfterResize: cpuset.New(1, 2, 3, 5, 6, 7), + expCSetAfterRemove: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + } + + for _, testCase := range testCases { + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.CPUManagerPolicyAlphaOptions, true) + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.InPlacePodVerticalScaling, true) + t.Run(testCase.description, func(t *testing.T) { + + policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), nil) + + st := &mockState{ + assignments: testCase.stAssignments, + defaultCPUSet: testCase.stDefaultCPUSet, + } + pod := testCase.pod + pod.Status.QOSClass = testCase.qosClass + + // allocate + for _, container := range append(pod.Spec.InitContainers, pod.Spec.Containers...) { + err := policy.Allocate(st, pod, &container) + if err != nil { + t.Errorf("StaticPolicy Allocate() error (%v). expected no error but got %v", + testCase.description, err) + } + } + if !reflect.DeepEqual(st.defaultCPUSet, testCase.expCSetAfterAlloc) { + t.Errorf("StaticPolicy Allocate() error (%v) before pod resize. expected default cpuset %v but got %v", + testCase.description, testCase.expCSetAfterAlloc, st.defaultCPUSet) + } + + // resize + pod.Status.ContainerStatuses = []v1.ContainerStatus{ + { + Name: testCase.containerName, + AllocatedResources: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse(testCase.podAllocated), + }, + }, + } + pod.Spec.Containers[0].Resources = v1.ResourceRequirements{ + Limits: v1.ResourceList{ + v1.ResourceName(v1.ResourceCPU): resource.MustParse(testCase.resizeLimit), + }, + Requests: v1.ResourceList{ + v1.ResourceName(v1.ResourceCPU): resource.MustParse(testCase.resizeRequest), + }, + } + podResized := pod + for _, container := range append(podResized.Spec.InitContainers, podResized.Spec.Containers...) { + err := policy.Allocate(st, podResized, &container) + if err != nil { + if !reflect.DeepEqual(err, testCase.expAllocErr) { + t.Errorf("StaticPolicy Allocate() error (%v), expected error: %v but got: %v", + testCase.description, testCase.expAllocErr, err) + } + } + } + + if testCase.expCSetAfterResizeSize > 0 { + // expCSetAfterResizeSize is used when testing scale down because allocated CPUs are not deterministic, + // since size of defaultCPUSet is deterministic and also interesection with expected allocation + // should not be nill. < ====== TODO esotsal + if !reflect.DeepEqual(st.defaultCPUSet.Size(), testCase.expCSetAfterResizeSize) { + t.Errorf("StaticPolicy Allocate() error (%v) after pod resize. expected default cpuset size equal to %v but got %v", + testCase.description, testCase.expCSetAfterResizeSize, st.defaultCPUSet.Size()) + } + } else { + if !reflect.DeepEqual(st.defaultCPUSet, testCase.expCSetAfterResize) { + t.Errorf("StaticPolicy Allocate() error (%v) after pod resize. expected default cpuset %v but got %v", + testCase.description, testCase.expCSetAfterResize, st.defaultCPUSet) + } + } + + // remove + err := policy.RemoveContainer(st, string(pod.UID), testCase.containerName) + if err != nil { + t.Errorf("StaticPolicy RemoveContainer() error (%v) after pod resize. expected no error but got %v", + testCase.description, err) + } + err = policy.RemoveContainer(st, string(pod.UID), testCase.containerName2) + if err != nil { + t.Errorf("StaticPolicy RemoveContainer() error (%v) after pod resize. expected no error but got %v", + testCase.description, err) + } + + if !reflect.DeepEqual(st.defaultCPUSet, testCase.expCSetAfterRemove) { + t.Errorf("StaticPolicy RemoveContainer() error (%v) after pod resize. expected default cpuset %v but got %v", + testCase.description, testCase.expCSetAfterRemove, st.defaultCPUSet) + } + if _, found := st.assignments[string(pod.UID)][testCase.containerName]; found { + t.Errorf("StaticPolicy RemoveContainer() error (%v) after pod resize. expected (pod %v, container %v) not be in assignments %v", + testCase.description, testCase.podUID, testCase.containerName, st.assignments) + } + }) + } +} func TestStaticPolicyRemove(t *testing.T) { testCases := []staticPolicyTest{ { @@ -965,7 +1671,7 @@ func TestTopologyAwareAllocateCPUs(t *testing.T) { continue } - cpuAlloc, err := policy.allocateCPUs(st, tc.numRequested, tc.socketMask, cpuset.New()) + cpuAlloc, err := policy.allocateCPUs(st, tc.numRequested, tc.socketMask, cpuset.New(), nil, nil) if err != nil { t.Errorf("StaticPolicy allocateCPUs() error (%v). expected CPUSet %v not error %v", tc.description, tc.expCSet, err) diff --git a/pkg/kubelet/types/constants.go b/pkg/kubelet/types/constants.go index 791052dbbcece..6c032139b74a1 100644 --- a/pkg/kubelet/types/constants.go +++ b/pkg/kubelet/types/constants.go @@ -38,3 +38,10 @@ const ( LimitedSwap SwapBehavior = "LimitedSwap" NoSwap SwapBehavior = "NoSwap" ) + +// InPlacePodVerticalScaling types +const ( + // ErrorInconsistentCPUAllocation represent the type of an inconsistentCPUAllocationError + ErrorInconsistentCPUAllocation = "inconsistentCPUAllocationError" + ErrorGetCPUSet = "getCPUSetError" +) diff --git a/test/e2e/common/node/framework/podresize/resize.go b/test/e2e/common/node/framework/podresize/resize.go index 56c4a0e061f3b..56d44c89fc3d6 100644 --- a/test/e2e/common/node/framework/podresize/resize.go +++ b/test/e2e/common/node/framework/podresize/resize.go @@ -24,6 +24,9 @@ import ( "strconv" "strings" + "github.com/onsi/ginkgo/v2" + "github.com/onsi/gomega" + v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" utilerrors "k8s.io/apimachinery/pkg/util/errors" @@ -34,23 +37,30 @@ import ( "k8s.io/kubernetes/test/e2e/common/node/framework/cgroups" "k8s.io/kubernetes/test/e2e/framework" e2epod "k8s.io/kubernetes/test/e2e/framework/pod" - - "github.com/onsi/ginkgo/v2" - "github.com/onsi/gomega" + "k8s.io/utils/cpuset" ) const ( - MinContainerRuntimeVersion string = "1.6.9" + CgroupCPUPeriod string = "/sys/fs/cgroup/cpu/cpu.cfs_period_us" + CgroupCPUShares string = "/sys/fs/cgroup/cpu/cpu.shares" + CgroupCPUQuota string = "/sys/fs/cgroup/cpu/cpu.cfs_quota_us" + CgroupMemLimit string = "/sys/fs/cgroup/memory/memory.limit_in_bytes" + Cgroupv2MemLimit string = "/sys/fs/cgroup/memory.max" + Cgroupv2MemRequest string = "/sys/fs/cgroup/memory.min" + Cgroupv2CPULimit string = "/sys/fs/cgroup/cpu.max" + Cgroupv2CPURequest string = "/sys/fs/cgroup/cpu.weight" + CPUPeriod string = "100000" ) type ResizableContainerInfo struct { - Name string - Resources *cgroups.ContainerResources - CPUPolicy *v1.ResourceResizeRestartPolicy - MemPolicy *v1.ResourceResizeRestartPolicy - RestartCount int32 - RestartPolicy v1.ContainerRestartPolicy - InitCtr bool + Name string + Resources *cgroups.ContainerResources + CPUPolicy *v1.ResourceResizeRestartPolicy + MemPolicy *v1.ResourceResizeRestartPolicy + RestartCount int32 + RestartPolicy v1.ContainerRestartPolicy + InitCtr bool + CPUsAllowedListValue string } func getTestResizePolicy(tcInfo ResizableContainerInfo) (resizePol []v1.ContainerResizePolicy) { @@ -393,3 +403,33 @@ func formatErrors(err error) error { } return fmt.Errorf("[\n%s\n]", strings.Join(errStrings, ",\n")) } + +func VerifyPodContainersCPUsAllowedListValue(f *framework.Framework, pod *v1.Pod, wantCtrs []ResizableContainerInfo) error { + ginkgo.GinkgoHelper() + verifyCPUsAllowedListValue := func(cName, expectedCPUsAllowedListValue string) error { + mycmd := "grep Cpus_allowed_list /proc/self/status | cut -f2" + calValue, _, err := e2epod.ExecCommandInContainerWithFullOutput(f, pod.Name, cName, "/bin/sh", "-c", mycmd) + framework.Logf("Namespace %s Pod %s Container %s - looking for Cpus allowed list value %s in /proc/self/status", + pod.Namespace, pod.Name, cName, expectedCPUsAllowedListValue) + if err != nil { + return fmt.Errorf("failed to find expected value '%s' in container '%s' Cpus allowed list '/proc/self/status'", cName, expectedCPUsAllowedListValue) + } + c, err := cpuset.Parse(calValue) + framework.ExpectNoError(err, "failed parsing Cpus allowed list for container %s in pod %s", cName, pod.Name) + cpuTotalValue := strconv.Itoa(c.Size()) + if cpuTotalValue != expectedCPUsAllowedListValue { + return fmt.Errorf("container '%s' cgroup value '%s' results to total CPUs '%s' not equal to expected '%s'", cName, calValue, cpuTotalValue, expectedCPUsAllowedListValue) + } + return nil + } + for _, ci := range wantCtrs { + if ci.CPUsAllowedListValue == "" { + continue + } + err := verifyCPUsAllowedListValue(ci.Name, ci.CPUsAllowedListValue) + if err != nil { + return err + } + } + return nil +} diff --git a/test/e2e_node/cpu_manager_metrics_test.go b/test/e2e_node/cpu_manager_metrics_test.go index 94ccb20954dd1..484dffb9a1684 100644 --- a/test/e2e_node/cpu_manager_metrics_test.go +++ b/test/e2e_node/cpu_manager_metrics_test.go @@ -104,6 +104,8 @@ var _ = SIGDescribe("CPU Manager Metrics", framework.WithSerial(), feature.CPUMa enableCPUManagerOptions: true, options: cpuPolicyOptions, }, + false, + false, ) updateKubeletConfig(ctx, f, newCfg, true) }) @@ -402,7 +404,7 @@ var _ = SIGDescribe("CPU Manager Metrics", framework.WithSerial(), feature.CPUMa reservedSystemCPUs: cpuset.New(0), enableCPUManagerOptions: true, options: cpuPolicyOptions, - }, + }, false, false, ) updateKubeletConfig(ctx, f, newCfg, true) @@ -442,7 +444,7 @@ var _ = SIGDescribe("CPU Manager Metrics", framework.WithSerial(), feature.CPUMa reservedSystemCPUs: cpuset.New(0), enableCPUManagerOptions: true, options: cpuPolicyOptions, - }, + }, false, false, ) updateKubeletConfig(ctx, f, newCfg, true) diff --git a/test/e2e_node/cpu_manager_test.go b/test/e2e_node/cpu_manager_test.go index fa593d124c4a7..30ff89ce729eb 100644 --- a/test/e2e_node/cpu_manager_test.go +++ b/test/e2e_node/cpu_manager_test.go @@ -2805,7 +2805,7 @@ type cpuManagerKubeletArguments struct { options map[string]string } -func configureCPUManagerInKubelet(oldCfg *kubeletconfig.KubeletConfiguration, kubeletArguments *cpuManagerKubeletArguments) *kubeletconfig.KubeletConfiguration { +func configureCPUManagerInKubelet(oldCfg *kubeletconfig.KubeletConfiguration, kubeletArguments *cpuManagerKubeletArguments, isInPlacePodVerticalScalingAllocatedStatusEnabled bool, isInPlacePodVerticalScalingExclusiveCPUsEnabled bool) *kubeletconfig.KubeletConfiguration { newCfg := oldCfg.DeepCopy() if newCfg.FeatureGates == nil { newCfg.FeatureGates = make(map[string]bool) @@ -2815,6 +2815,8 @@ func configureCPUManagerInKubelet(oldCfg *kubeletconfig.KubeletConfiguration, ku newCfg.FeatureGates["CPUManagerPolicyAlphaOptions"] = kubeletArguments.enableCPUManagerOptions newCfg.FeatureGates["DisableCPUQuotaWithExclusiveCPUs"] = kubeletArguments.disableCPUQuotaWithExclusiveCPUs newCfg.FeatureGates["PodLevelResources"] = kubeletArguments.enablePodLevelResources + newCfg.FeatureGates["InPlacePodVerticalScalingExclusiveCPUs"] = isInPlacePodVerticalScalingExclusiveCPUsEnabled + newCfg.FeatureGates["InPlacePodVerticalScalingAllocatedStatus"] = isInPlacePodVerticalScalingAllocatedStatusEnabled newCfg.CPUManagerPolicy = kubeletArguments.policyName newCfg.CPUManagerReconcilePeriod = metav1.Duration{Duration: 1 * time.Second} @@ -2842,3 +2844,862 @@ func configureCPUManagerInKubelet(oldCfg *kubeletconfig.KubeletConfiguration, ku return newCfg } + +func runAutomaticallyRemoveInactivePodsFromCPUManagerStateFile(ctx context.Context, f *framework.Framework) { + var cpu1 int + var ctnAttrs []ctnAttribute + var pod *v1.Pod + var cpuList []int + var expAllowedCPUsListRegex string + var err error + // First running a Gu Pod, + // second disable cpu manager in kubelet, + // then delete the Gu Pod, + // then enable cpu manager in kubelet, + // at last wait for the reconcile process cleaned up the state file, if the assignments map is empty, + // it proves that the automatic cleanup in the reconcile process is in effect. + ginkgo.By("running a Gu pod for test remove") + ctnAttrs = []ctnAttribute{ + { + ctnName: "gu-container-testremove", + cpuRequest: "1000m", + cpuLimit: "1000m", + }, + } + pod = makeCPUManagerPod("gu-pod-testremove", ctnAttrs) + pod = e2epod.NewPodClient(f).CreateSync(ctx, pod) + + ginkgo.By("checking if the expected cpuset was assigned") + cpu1 = 1 + if isHTEnabled() { + cpuList = mustParseCPUSet(getCPUSiblingList(0)).List() + cpu1 = cpuList[1] + } else if isMultiNUMA() { + cpuList = mustParseCPUSet(getCoreSiblingList(0)).List() + if len(cpuList) > 1 { + cpu1 = cpuList[1] + } + } + expAllowedCPUsListRegex = fmt.Sprintf("^%d\n$", cpu1) + err = e2epod.NewPodClient(f).MatchContainerOutput(ctx, pod.Name, pod.Spec.Containers[0].Name, expAllowedCPUsListRegex) + framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", + pod.Spec.Containers[0].Name, pod.Name) + + deletePodSyncByName(ctx, f, pod.Name) + // we need to wait for all containers to really be gone so cpumanager reconcile loop will not rewrite the cpu_manager_state. + // this is in turn needed because we will have an unavoidable (in the current framework) race with the + // reconcile loop which will make our attempt to delete the state file and to restore the old config go haywire + waitForAllContainerRemoval(ctx, pod.Name, pod.Namespace) + +} + +func runCfsQuotaGuPods(ctx context.Context, f *framework.Framework, disabledCPUQuotaWithExclusiveCPUs bool, cpuAlloc int64) { + var err error + var ctnAttrs []ctnAttribute + var pod1, pod2, pod3 *v1.Pod + podsToClean := make(map[string]*v1.Pod) // pod.UID -> pod + + framework.Logf("runCfsQuotaGuPods: disableQuota=%v, CPU Allocatable=%v", disabledCPUQuotaWithExclusiveCPUs, cpuAlloc) + + deleteTestPod := func(pod *v1.Pod) { + // waitForContainerRemoval takes "long" to complete; if we use the parent ctx we get a + // 'deadline expired' message and the cleanup aborts, which we don't want. + // So let's use a separate and more generous timeout (determined by trial and error) + ctx2, cancel := context.WithTimeout(context.Background(), 10*time.Minute) + defer cancel() + deletePodSyncAndWait(ctx2, f, pod.Namespace, pod.Name) + delete(podsToClean, string(pod.UID)) + } + + // cleanup leftovers on test failure. The happy path is covered by `deleteTestPod` calls + ginkgo.DeferCleanup(func() { + ginkgo.By("by deleting the pods and waiting for container removal") + // waitForContainerRemoval takes "long" to complete; if we use the parent ctx we get a + // 'deadline expired' message and the cleanup aborts, which we don't want. + // So let's use a separate and more generous timeout (determined by trial and error) + ctx2, cancel := context.WithTimeout(context.Background(), 10*time.Minute) + defer cancel() + deletePodsAsync(ctx2, f, podsToClean) + }) + + podCFSCheckCommand := []string{"sh", "-c", `cat $(find /sysfscgroup | grep -E "($(cat /podinfo/uid)|$(cat /podinfo/uid | sed 's/-/_/g'))(/|\.slice/)cpu.max$") && sleep 1d`} + cfsCheckCommand := []string{"sh", "-c", "cat /sys/fs/cgroup/cpu.max && sleep 1d"} + defaultPeriod := "100000" + + ctnAttrs = []ctnAttribute{ + { + ctnName: "gu-container-cfsquota-disabled", + cpuRequest: "1", + cpuLimit: "1", + }, + } + pod1 = makeCPUManagerPod("gu-pod1", ctnAttrs) + pod1.Spec.Containers[0].Command = cfsCheckCommand + pod1 = e2epod.NewPodClient(f).CreateSync(ctx, pod1) + podsToClean[string(pod1.UID)] = pod1 + + ginkgo.By("checking if the expected cfs quota was assigned (GU pod, exclusive CPUs, unlimited)") + + expectedQuota := "100000" + if disabledCPUQuotaWithExclusiveCPUs { + expectedQuota = "max" + } + expCFSQuotaRegex := fmt.Sprintf("^%s %s\n$", expectedQuota, defaultPeriod) + err = e2epod.NewPodClient(f).MatchContainerOutput(ctx, pod1.Name, pod1.Spec.Containers[0].Name, expCFSQuotaRegex) + framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", + pod1.Spec.Containers[0].Name, pod1.Name) + deleteTestPod(pod1) + + ctnAttrs = []ctnAttribute{ + { + ctnName: "gu-container-cfsquota-enabled", + cpuRequest: "500m", + cpuLimit: "500m", + }, + } + pod2 = makeCPUManagerPod("gu-pod2", ctnAttrs) + pod2.Spec.Containers[0].Command = cfsCheckCommand + pod2 = e2epod.NewPodClient(f).CreateSync(ctx, pod2) + podsToClean[string(pod2.UID)] = pod2 + + ginkgo.By("checking if the expected cfs quota was assigned (GU pod, limited)") + + expectedQuota = "50000" + expCFSQuotaRegex = fmt.Sprintf("^%s %s\n$", expectedQuota, defaultPeriod) + err = e2epod.NewPodClient(f).MatchContainerOutput(ctx, pod2.Name, pod2.Spec.Containers[0].Name, expCFSQuotaRegex) + framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", + pod2.Spec.Containers[0].Name, pod2.Name) + deleteTestPod(pod2) + + ctnAttrs = []ctnAttribute{ + { + ctnName: "non-gu-container", + cpuRequest: "100m", + cpuLimit: "500m", + }, + } + pod3 = makeCPUManagerPod("non-gu-pod3", ctnAttrs) + pod3.Spec.Containers[0].Command = cfsCheckCommand + pod3 = e2epod.NewPodClient(f).CreateSync(ctx, pod3) + podsToClean[string(pod3.UID)] = pod3 + + ginkgo.By("checking if the expected cfs quota was assigned (BU pod, limited)") + + expectedQuota = "50000" + expCFSQuotaRegex = fmt.Sprintf("^%s %s\n$", expectedQuota, defaultPeriod) + err = e2epod.NewPodClient(f).MatchContainerOutput(ctx, pod3.Name, pod3.Spec.Containers[0].Name, expCFSQuotaRegex) + framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", + pod3.Spec.Containers[0].Name, pod3.Name) + deleteTestPod(pod3) + + if cpuAlloc >= 2 { + ctnAttrs = []ctnAttribute{ + { + ctnName: "gu-container-non-int-values", + cpuRequest: "500m", + cpuLimit: "500m", + }, + { + ctnName: "gu-container-int-values", + cpuRequest: "1", + cpuLimit: "1", + }, + } + pod4 := makeCPUManagerPod("gu-pod4", ctnAttrs) + pod4.Spec.Containers[0].Command = cfsCheckCommand + pod4.Spec.Containers[1].Command = cfsCheckCommand + pod4 = e2epod.NewPodClient(f).CreateSync(ctx, pod4) + podsToClean[string(pod4.UID)] = pod4 + + ginkgo.By("checking if the expected cfs quota was assigned (GU pod, container 0 exclusive CPUs unlimited, container 1 limited)") + + expectedQuota = "50000" + expCFSQuotaRegex = fmt.Sprintf("^%s %s\n$", expectedQuota, defaultPeriod) + err = e2epod.NewPodClient(f).MatchContainerOutput(ctx, pod4.Name, pod4.Spec.Containers[0].Name, expCFSQuotaRegex) + framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", + pod4.Spec.Containers[0].Name, pod4.Name) + expectedQuota = "100000" + if disabledCPUQuotaWithExclusiveCPUs { + expectedQuota = "max" + } + expCFSQuotaRegex = fmt.Sprintf("^%s %s\n$", expectedQuota, defaultPeriod) + err = e2epod.NewPodClient(f).MatchContainerOutput(ctx, pod4.Name, pod4.Spec.Containers[1].Name, expCFSQuotaRegex) + framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", + pod4.Spec.Containers[1].Name, pod4.Name) + deleteTestPod(pod4) + + ctnAttrs = []ctnAttribute{ + { + ctnName: "gu-container-non-int-values", + cpuRequest: "500m", + cpuLimit: "500m", + }, + { + ctnName: "gu-container-int-values", + cpuRequest: "1", + cpuLimit: "1", + }, + } + + pod5 := makeCPUManagerPod("gu-pod5", ctnAttrs) + pod5.Spec.Containers[0].Command = podCFSCheckCommand + pod5 = e2epod.NewPodClient(f).CreateSync(ctx, pod5) + podsToClean[string(pod5.UID)] = pod5 + + ginkgo.By("checking if the expected cfs quota was assigned to pod (GU pod, unlimited)") + + expectedQuota = "150000" + + if disabledCPUQuotaWithExclusiveCPUs { + expectedQuota = "max" + } + + expCFSQuotaRegex = fmt.Sprintf("^%s %s\n$", expectedQuota, defaultPeriod) + + err = e2epod.NewPodClient(f).MatchContainerOutput(ctx, pod5.Name, pod5.Spec.Containers[0].Name, expCFSQuotaRegex) + framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", pod5.Spec.Containers[0].Name, pod5.Name) + deleteTestPod(pod5) + } else { + ginkgo.By(fmt.Sprintf("some cases SKIPPED - requests at least %d allocatable cores, got %d", 2, cpuAlloc)) + } + + ctnAttrs = []ctnAttribute{ + { + ctnName: "gu-container", + cpuRequest: "100m", + cpuLimit: "100m", + }, + } + + pod6 := makeCPUManagerPod("gu-pod6", ctnAttrs) + pod6.Spec.Containers[0].Command = podCFSCheckCommand + pod6 = e2epod.NewPodClient(f).CreateSync(ctx, pod6) + podsToClean[string(pod6.UID)] = pod6 + + ginkgo.By("checking if the expected cfs quota was assigned to pod (GU pod, limited)") + + expectedQuota = "10000" + expCFSQuotaRegex = fmt.Sprintf("^%s %s\n$", expectedQuota, defaultPeriod) + err = e2epod.NewPodClient(f).MatchContainerOutput(ctx, pod6.Name, pod6.Spec.Containers[0].Name, expCFSQuotaRegex) + framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", pod6.Spec.Containers[0].Name, pod6.Name) + deleteTestPod(pod6) +} + +func runCPUManagerTests(f *framework.Framework) { + var cpuCap, cpuAlloc int64 + var oldCfg *kubeletconfig.KubeletConfiguration + + ginkgo.BeforeEach(func(ctx context.Context) { + var err error + if oldCfg == nil { + oldCfg, err = getCurrentKubeletConfig(ctx) + framework.ExpectNoError(err) + } + }) + + ginkgo.It("should assign CPUs as expected based on the Pod spec", func(ctx context.Context) { + cpuCap, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f) + + // Skip CPU Manager tests altogether if the CPU capacity < minCPUCapacity. + if cpuCap < minCPUCapacity { + e2eskipper.Skipf("Skipping CPU Manager tests since the CPU capacity < %d", minCPUCapacity) + } + + // Enable CPU Manager in the kubelet. + newCfg := configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + reservedSystemCPUs: cpuset.CPUSet{}, + }, false, false) + updateKubeletConfig(ctx, f, newCfg, true) + + ginkgo.By("running a non-Gu pod") + runNonGuPodTest(ctx, f, cpuCap, cpuset.New()) + + ginkgo.By("running a Gu pod") + runGuPodTest(ctx, f, 1, cpuset.New()) + + ginkgo.By("running multiple Gu and non-Gu pods") + runMultipleGuNonGuPods(ctx, f, cpuCap, cpuAlloc) + + // Skip rest of the tests if CPU capacity < 3. + if cpuCap < 3 { + e2eskipper.Skipf("Skipping rest of the CPU Manager tests since CPU capacity < 3") + } + + ginkgo.By("running a Gu pod requesting multiple CPUs") + runMultipleCPUGuPod(ctx, f) + + ginkgo.By("running a Gu pod with multiple containers requesting integer CPUs") + runMultipleCPUContainersGuPod(ctx, f) + + ginkgo.By("running multiple Gu pods") + runMultipleGuPods(ctx, f) + + ginkgo.By("test for automatically remove inactive pods from cpumanager state file.") + runAutomaticallyRemoveInactivePodsFromCPUManagerStateFile(ctx, f) + }) + + ginkgo.It("reservedSystemCPUs are excluded only for Gu pods (strict-cpu-reservation option not enabled by default)", func(ctx context.Context) { + cpuCap, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f) + + // Skip CPU Manager tests altogether if the CPU capacity < 2. + if cpuCap < 2 { + e2eskipper.Skipf("Skipping CPU Manager tests since the CPU capacity < 2") + } + + reservedSystemCPUs := cpuset.New(0) + newCfg := configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + reservedSystemCPUs: reservedSystemCPUs, + }, false, false) + updateKubeletConfig(ctx, f, newCfg, true) + + ginkgo.By("running a Gu pod - it shouldn't use reserved system CPUs") + runGuPodTest(ctx, f, 1, reservedSystemCPUs) + + ginkgo.By("running a non-Gu pod - it can use reserved system CPUs") + runNonGuPodTest(ctx, f, cpuCap, cpuset.New()) + + }) + + ginkgo.It("reservedSystemCPUs are excluded for both Gu and non-Gu pods (strict-cpu-reservation option enabled)", func(ctx context.Context) { + cpuCap, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f) + + // Skip CPU Manager tests altogether if the CPU capacity < 2. + if cpuCap < 2 { + e2eskipper.Skipf("Skipping CPU Manager tests since the CPU capacity < 2") + } + + reservedSystemCPUs := cpuset.New(0) + cpuPolicyOptions := map[string]string{ + cpumanager.StrictCPUReservationOption: "true", + } + newCfg := configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + reservedSystemCPUs: reservedSystemCPUs, + enableCPUManagerOptions: true, + options: cpuPolicyOptions, + }, false, false) + updateKubeletConfig(ctx, f, newCfg, true) + + ginkgo.By("running a Gu pod - it shouldn't use reserved system CPUs") + runGuPodTest(ctx, f, 1, reservedSystemCPUs) + + ginkgo.By("running a non-Gu pod - it shouldn't use reserved system CPUs with strict-cpu-reservation option enabled") + runNonGuPodTest(ctx, f, cpuCap, reservedSystemCPUs) + }) + + ginkgo.It("should assign CPUs as expected with enhanced policy based on strict SMT alignment", func(ctx context.Context) { + fullCPUsOnlyOpt := fmt.Sprintf("option=%s", cpumanager.FullPCPUsOnlyOption) + _, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f) + smtLevel := getSMTLevel() + + // strict SMT alignment is trivially verified and granted on non-SMT systems + if smtLevel < minSMTLevel { + e2eskipper.Skipf("Skipping CPU Manager %s tests since SMT disabled", fullCPUsOnlyOpt) + } + + // our tests want to allocate a full core, so we need at least 2*2=4 virtual cpus + minCPUCount := int64(smtLevel * minCPUCapacity) + if cpuAlloc < minCPUCount { + e2eskipper.Skipf("Skipping CPU Manager %s tests since the CPU capacity < %d", fullCPUsOnlyOpt, minCPUCount) + } + + framework.Logf("SMT level %d", smtLevel) + + // TODO: we assume the first available CPUID is 0, which is pretty fair, but we should probably + // check what we do have in the node. + cpuPolicyOptions := map[string]string{ + cpumanager.FullPCPUsOnlyOption: "true", + } + newCfg := configureCPUManagerInKubelet(oldCfg, + &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + reservedSystemCPUs: cpuset.New(0), + enableCPUManagerOptions: true, + options: cpuPolicyOptions, + }, false, false, + ) + updateKubeletConfig(ctx, f, newCfg, true) + + // the order between negative and positive doesn't really matter + runSMTAlignmentNegativeTests(ctx, f) + runSMTAlignmentPositiveTests(ctx, f, smtLevel, cpuset.New()) + }) + + ginkgo.It("should assign CPUs as expected based on strict SMT alignment, reservedSystemCPUs should be excluded (both strict-cpu-reservation and full-pcpus-only options enabled)", func(ctx context.Context) { + fullCPUsOnlyOpt := fmt.Sprintf("option=%s", cpumanager.FullPCPUsOnlyOption) + _, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f) + smtLevel := getSMTLevel() + + // strict SMT alignment is trivially verified and granted on non-SMT systems + if smtLevel < 2 { + e2eskipper.Skipf("Skipping CPU Manager %s tests since SMT disabled", fullCPUsOnlyOpt) + } + + // our tests want to allocate a full core, so we need at last smtLevel*2 virtual cpus + if cpuAlloc < int64(smtLevel*2) { + e2eskipper.Skipf("Skipping CPU Manager %s tests since the CPU capacity < %d", fullCPUsOnlyOpt, smtLevel*2) + } + + framework.Logf("SMT level %d", smtLevel) + + reservedSystemCPUs := cpuset.New(0) + cpuPolicyOptions := map[string]string{ + cpumanager.FullPCPUsOnlyOption: "true", + cpumanager.StrictCPUReservationOption: "true", + } + newCfg := configureCPUManagerInKubelet(oldCfg, + &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + reservedSystemCPUs: reservedSystemCPUs, + enableCPUManagerOptions: true, + options: cpuPolicyOptions, + }, false, false, + ) + updateKubeletConfig(ctx, f, newCfg, true) + + // the order between negative and positive doesn't really matter + runSMTAlignmentNegativeTests(ctx, f) + runSMTAlignmentPositiveTests(ctx, f, smtLevel, reservedSystemCPUs) + }) + + ginkgo.It("should not enforce CFS quota for containers with static CPUs assigned", func(ctx context.Context) { + if !IsCgroup2UnifiedMode() { + e2eskipper.Skipf("Skipping since CgroupV2 not used") + } + _, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f) + if cpuAlloc < 1 { // save expensive kubelet restart + e2eskipper.Skipf("Skipping since not enough allocatable CPU got %d required 1", cpuAlloc) + } + newCfg := configureCPUManagerInKubelet(oldCfg, + &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + reservedSystemCPUs: cpuset.New(0), + disableCPUQuotaWithExclusiveCPUs: true, + }, false, false, + ) + updateKubeletConfig(ctx, f, newCfg, true) + + _, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f) // check again after we reserved 1 full CPU. Some tests require > 1 exclusive CPU + runCfsQuotaGuPods(ctx, f, true, cpuAlloc) + }) + + ginkgo.It("should keep enforcing the CFS quota for containers with static CPUs assigned and feature gate disabled", func(ctx context.Context) { + if !IsCgroup2UnifiedMode() { + e2eskipper.Skipf("Skipping since CgroupV2 not used") + } + _, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f) + if cpuAlloc < 1 { // save expensive kubelet restart + e2eskipper.Skipf("Skipping since not enough allocatable CPU got %d required 1", cpuAlloc) + } + newCfg := configureCPUManagerInKubelet(oldCfg, + &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + reservedSystemCPUs: cpuset.New(0), + disableCPUQuotaWithExclusiveCPUs: false, + }, false, false, + ) + + updateKubeletConfig(ctx, f, newCfg, true) + + _, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f) // check again after we reserved 1 full CPU. Some tests require > 1 exclusive CPU + runCfsQuotaGuPods(ctx, f, false, cpuAlloc) + }) + + f.It("should not reuse CPUs of restartable init containers", feature.SidecarContainers, func(ctx context.Context) { + cpuCap, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f) + + // Skip rest of the tests if CPU capacity < 3. + if cpuCap < 3 { + e2eskipper.Skipf("Skipping rest of the CPU Manager tests since CPU capacity < 3, got %d", cpuCap) + } + + // Enable CPU Manager in the kubelet. + newCfg := configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + reservedSystemCPUs: cpuset.CPUSet{}, + }, false, false) + updateKubeletConfig(ctx, f, newCfg, true) + + ginkgo.By("running a Gu pod with a regular init container and a restartable init container") + ctrAttrs := []ctnAttribute{ + { + ctnName: "gu-init-container1", + cpuRequest: "1000m", + cpuLimit: "1000m", + }, + { + ctnName: "gu-restartable-init-container2", + cpuRequest: "1000m", + cpuLimit: "1000m", + restartPolicy: &containerRestartPolicyAlways, + }, + } + pod := makeCPUManagerInitContainersPod("gu-pod", ctrAttrs) + pod = e2epod.NewPodClient(f).CreateSync(ctx, pod) + + ginkgo.By("checking if the expected cpuset was assigned") + logs, err := e2epod.GetPodLogs(ctx, f.ClientSet, f.Namespace.Name, pod.Name, pod.Spec.InitContainers[0].Name) + framework.ExpectNoError(err, "expected log not found in init container [%s] of pod [%s]", pod.Spec.InitContainers[0].Name, pod.Name) + + reusableCPUs := getContainerAllowedCPUsFromLogs(pod.Name, pod.Spec.InitContainers[0].Name, logs) + + gomega.Expect(reusableCPUs.Size()).To(gomega.Equal(1), "expected cpu set size == 1, got %q", reusableCPUs.String()) + + logs, err = e2epod.GetPodLogs(ctx, f.ClientSet, f.Namespace.Name, pod.Name, pod.Spec.InitContainers[1].Name) + framework.ExpectNoError(err, "expected log not found in init container [%s] of pod [%s]", pod.Spec.InitContainers[1].Name, pod.Name) + + nonReusableCPUs := getContainerAllowedCPUsFromLogs(pod.Name, pod.Spec.InitContainers[1].Name, logs) + + gomega.Expect(nonReusableCPUs.Size()).To(gomega.Equal(1), "expected cpu set size == 1, got %q", nonReusableCPUs.String()) + + logs, err = e2epod.GetPodLogs(ctx, f.ClientSet, f.Namespace.Name, pod.Name, pod.Spec.Containers[0].Name) + framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", pod.Spec.Containers[0].Name, pod.Name) + + cpus := getContainerAllowedCPUsFromLogs(pod.Name, pod.Spec.Containers[0].Name, logs) + + gomega.Expect(cpus.Size()).To(gomega.Equal(1), "expected cpu set size == 1, got %q", cpus.String()) + + gomega.Expect(reusableCPUs.Equals(nonReusableCPUs)).To(gomega.BeTrueBecause("expected reusable cpuset [%s] to be equal to non-reusable cpuset [%s]", reusableCPUs.String(), nonReusableCPUs.String())) + gomega.Expect(nonReusableCPUs.Intersection(cpus).IsEmpty()).To(gomega.BeTrueBecause("expected non-reusable cpuset [%s] to be disjoint from cpuset [%s]", nonReusableCPUs.String(), cpus.String())) + + ginkgo.By("by deleting the pods and waiting for container removal") + deletePods(ctx, f, []string{pod.Name}) + waitForContainerRemoval(ctx, pod.Spec.InitContainers[0].Name, pod.Name, pod.Namespace) + waitForContainerRemoval(ctx, pod.Spec.InitContainers[1].Name, pod.Name, pod.Namespace) + waitForContainerRemoval(ctx, pod.Spec.Containers[0].Name, pod.Name, pod.Namespace) + }) + + ginkgo.It("should assign packed CPUs with distribute-cpus-across-numa disabled and pcpu-only policy options enabled", func(ctx context.Context) { + fullCPUsOnlyOpt := fmt.Sprintf("option=%s", cpumanager.FullPCPUsOnlyOption) + _, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f) + smtLevel := getSMTLevel() + + // strict SMT alignment is trivially verified and granted on non-SMT systems + if smtLevel < minSMTLevel { + e2eskipper.Skipf("Skipping CPU Manager %s tests since SMT disabled", fullCPUsOnlyOpt) + } + + // our tests want to allocate a full core, so we need at least 2*2=4 virtual cpus + minCPUCount := int64(smtLevel * minCPUCapacity) + if cpuAlloc < minCPUCount { + e2eskipper.Skipf("Skipping CPU Manager %s tests since the CPU capacity < %d", fullCPUsOnlyOpt, minCPUCount) + } + + framework.Logf("SMT level %d", smtLevel) + + cpuPolicyOptions := map[string]string{ + cpumanager.FullPCPUsOnlyOption: "true", + cpumanager.DistributeCPUsAcrossNUMAOption: "false", + } + newCfg := configureCPUManagerInKubelet(oldCfg, + &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + reservedSystemCPUs: cpuset.New(0), + enableCPUManagerOptions: true, + options: cpuPolicyOptions, + }, false, false, + ) + updateKubeletConfig(ctx, f, newCfg, true) + + ctnAttrs := []ctnAttribute{ + { + ctnName: "test-gu-container-distribute-cpus-across-numa-disabled", + cpuRequest: "2000m", + cpuLimit: "2000m", + }, + } + pod := makeCPUManagerPod("test-pod-distribute-cpus-across-numa-disabled", ctnAttrs) + pod = e2epod.NewPodClient(f).CreateSync(ctx, pod) + + for _, cnt := range pod.Spec.Containers { + ginkgo.By(fmt.Sprintf("validating the container %s on Gu pod %s", cnt.Name, pod.Name)) + + logs, err := e2epod.GetPodLogs(ctx, f.ClientSet, f.Namespace.Name, pod.Name, cnt.Name) + framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", cnt.Name, pod.Name) + + cpus := getContainerAllowedCPUsFromLogs(pod.Name, cnt.Name, logs) + + validateSMTAlignment(cpus, smtLevel, pod, &cnt) + gomega.Expect(cpus).To(BePackedCPUs()) + } + deletePodSyncByName(ctx, f, pod.Name) + // we need to wait for all containers to really be gone so cpumanager reconcile loop will not rewrite the cpu_manager_state. + // this is in turn needed because we will have an unavoidable (in the current framework) race with th + // reconcile loop which will make our attempt to delete the state file and to restore the old config go haywire + waitForAllContainerRemoval(ctx, pod.Name, pod.Namespace) + }) + + ginkgo.It("should assign CPUs distributed across NUMA with distribute-cpus-across-numa and pcpu-only policy options enabled", func(ctx context.Context) { + var cpusNumPerNUMA, numaNodeNum int + + fullCPUsOnlyOpt := fmt.Sprintf("option=%s", cpumanager.FullPCPUsOnlyOption) + _, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f) + smtLevel := getSMTLevel() + framework.Logf("SMT level %d", smtLevel) + + // strict SMT alignment is trivially verified and granted on non-SMT systems + if smtLevel < minSMTLevel { + e2eskipper.Skipf("Skipping CPU Manager %s tests since SMT disabled", fullCPUsOnlyOpt) + } + + // our tests want to allocate a full core, so we need at least 2*2=4 virtual cpus + minCPUCount := int64(smtLevel * minCPUCapacity) + if cpuAlloc < minCPUCount { + e2eskipper.Skipf("Skipping CPU Manager %s tests since the CPU capacity < %d", fullCPUsOnlyOpt, minCPUCount) + } + + // this test is intended to be run on a multi-node NUMA system and + // a system with at least 4 cores per socket, hostcheck skips test + // if above requirements are not satisfied + numaNodeNum, _, _, cpusNumPerNUMA = hostCheck() + + cpuPolicyOptions := map[string]string{ + cpumanager.FullPCPUsOnlyOption: "true", + cpumanager.DistributeCPUsAcrossNUMAOption: "true", + } + newCfg := configureCPUManagerInKubelet(oldCfg, + &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + reservedSystemCPUs: cpuset.New(0), + enableCPUManagerOptions: true, + options: cpuPolicyOptions, + }, false, false, + ) + updateKubeletConfig(ctx, f, newCfg, true) + // 'distribute-cpus-across-numa' policy option ensures that CPU allocations are evenly distributed + // across NUMA nodes in cases where more than one NUMA node is required to satisfy the allocation. + // So, we want to ensure that the CPU Request exceeds the number of CPUs that can fit within a single + // NUMA node. We have to pick cpuRequest such that: + // 1. CPURequest > cpusNumPerNUMA + // 2. Not occupy all the CPUs on the node ande leave room for reserved CPU + // 3. CPURequest is a multiple if number of NUMA nodes to allow equal CPU distribution across NUMA nodes + // + // In summary: cpusNumPerNUMA < CPURequest < ((cpusNumPerNuma * numaNodeNum) - reservedCPUscount) + // Considering all these constraints we select: CPURequest= (cpusNumPerNUMA-smtLevel)*numaNodeNum + + cpuReq := (cpusNumPerNUMA - smtLevel) * numaNodeNum + ctnAttrs := []ctnAttribute{ + { + ctnName: "test-gu-container-distribute-cpus-across-numa", + cpuRequest: fmt.Sprintf("%d", cpuReq), + cpuLimit: fmt.Sprintf("%d", cpuReq), + }, + } + pod := makeCPUManagerPod("test-pod-distribute-cpus-across-numa", ctnAttrs) + pod = e2epod.NewPodClient(f).CreateSync(ctx, pod) + + for _, cnt := range pod.Spec.Containers { + ginkgo.By(fmt.Sprintf("validating the container %s on Gu pod %s", cnt.Name, pod.Name)) + + logs, err := e2epod.GetPodLogs(ctx, f.ClientSet, f.Namespace.Name, pod.Name, cnt.Name) + framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", cnt.Name, pod.Name) + + cpus := getContainerAllowedCPUsFromLogs(pod.Name, cnt.Name, logs) + + validateSMTAlignment(cpus, smtLevel, pod, &cnt) + // We expect a perfectly even spilit i.e. equal distribution across NUMA Node as the CPU Request is 4*smtLevel*numaNodeNum. + expectedSpread := cpus.Size() / numaNodeNum + gomega.Expect(cpus).To(BeDistributedCPUs(expectedSpread)) + } + deletePodSyncByName(ctx, f, pod.Name) + // we need to wait for all containers to really be gone so cpumanager reconcile loop will not rewrite the cpu_manager_state. + // this is in turn needed because we will have an unavoidable (in the current framework) race with th + // reconcile loop which will make our attempt to delete the state file and to restore the old config go haywire + waitForAllContainerRemoval(ctx, pod.Name, pod.Namespace) + }) + + ginkgo.AfterEach(func(ctx context.Context) { + updateKubeletConfig(ctx, f, oldCfg, true) + }) +} + +func runSMTAlignmentNegativeTests(ctx context.Context, f *framework.Framework) { + // negative test: try to run a container whose requests aren't a multiple of SMT level, expect a rejection + ctnAttrs := []ctnAttribute{ + { + ctnName: "gu-container-neg", + cpuRequest: "1000m", + cpuLimit: "1000m", + }, + } + pod := makeCPUManagerPod("gu-pod", ctnAttrs) + // CreateSync would wait for pod to become Ready - which will never happen if production code works as intended! + pod = e2epod.NewPodClient(f).Create(ctx, pod) + + err := e2epod.WaitForPodCondition(ctx, f.ClientSet, f.Namespace.Name, pod.Name, "Failed", 30*time.Second, func(pod *v1.Pod) (bool, error) { + if pod.Status.Phase != v1.PodPending { + return true, nil + } + return false, nil + }) + framework.ExpectNoError(err) + pod, err = e2epod.NewPodClient(f).Get(ctx, pod.Name, metav1.GetOptions{}) + framework.ExpectNoError(err) + + if pod.Status.Phase != v1.PodFailed { + framework.Failf("pod %s not failed: %v", pod.Name, pod.Status) + } + if !isSMTAlignmentError(pod) { + framework.Failf("pod %s failed for wrong reason: %q", pod.Name, pod.Status.Reason) + } + + deletePodSyncByName(ctx, f, pod.Name) + // we need to wait for all containers to really be gone so cpumanager reconcile loop will not rewrite the cpu_manager_state. + // this is in turn needed because we will have an unavoidable (in the current framework) race with th + // reconcile loop which will make our attempt to delete the state file and to restore the old config go haywire + waitForAllContainerRemoval(ctx, pod.Name, pod.Namespace) +} + +func runSMTAlignmentPositiveTests(ctx context.Context, f *framework.Framework, smtLevel int, strictReservedCPUs cpuset.CPUSet) { + // positive test: try to run a container whose requests are a multiple of SMT level, check allocated cores + // 1. are core siblings + // 2. take a full core + // WARNING: this assumes 2-way SMT systems - we don't know how to access other SMT levels. + // this means on more-than-2-way SMT systems this test will prove nothing + ctnAttrs := []ctnAttribute{ + { + ctnName: "gu-container-pos", + cpuRequest: "2000m", + cpuLimit: "2000m", + }, + } + pod := makeCPUManagerPod("gu-pod", ctnAttrs) + pod = e2epod.NewPodClient(f).CreateSync(ctx, pod) + + for _, cnt := range pod.Spec.Containers { + ginkgo.By(fmt.Sprintf("validating the container %s on Gu pod %s", cnt.Name, pod.Name)) + + logs, err := e2epod.GetPodLogs(ctx, f.ClientSet, f.Namespace.Name, pod.Name, cnt.Name) + framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", cnt.Name, pod.Name) + + cpus := getContainerAllowedCPUsFromLogs(pod.Name, cnt.Name, logs) + + gomega.Expect(cpus.Intersection(strictReservedCPUs).IsEmpty()).To(gomega.BeTrueBecause("cpuset %q should not contain strict reserved cpus %q", cpus.String(), strictReservedCPUs.String())) + validateSMTAlignment(cpus, smtLevel, pod, &cnt) + } + + deletePodSyncByName(ctx, f, pod.Name) + // we need to wait for all containers to really be gone so cpumanager reconcile loop will not rewrite the cpu_manager_state. + // this is in turn needed because we will have an unavoidable (in the current framework) race with th + // reconcile loop which will make our attempt to delete the state file and to restore the old config go haywire + waitForAllContainerRemoval(ctx, pod.Name, pod.Namespace) +} + +func validateSMTAlignment(cpus cpuset.CPUSet, smtLevel int, pod *v1.Pod, cnt *v1.Container) { + framework.Logf("validating cpus: %v", cpus) + + if cpus.Size()%smtLevel != 0 { + framework.Failf("pod %q cnt %q received non-smt-multiple cpuset %v (SMT level %d)", pod.Name, cnt.Name, cpus, smtLevel) + } + + // now check all the given cpus are thread siblings. + // to do so the easiest way is to rebuild the expected set of siblings from all the cpus we got. + // if the expected set matches the given set, the given set was good. + siblingsCPUs := cpuset.New() + for _, cpuID := range cpus.UnsortedList() { + threadSiblings, err := cpuset.Parse(strings.TrimSpace(getCPUSiblingList(int64(cpuID)))) + framework.ExpectNoError(err, "parsing cpuset from logs for [%s] of pod [%s]", cnt.Name, pod.Name) + siblingsCPUs = siblingsCPUs.Union(threadSiblings) + } + + framework.Logf("siblings cpus: %v", siblingsCPUs) + if !siblingsCPUs.Equals(cpus) { + framework.Failf("pod %q cnt %q received non-smt-aligned cpuset %v (expected %v)", pod.Name, cnt.Name, cpus, siblingsCPUs) + } +} + +func isSMTAlignmentError(pod *v1.Pod) bool { + re := regexp.MustCompile(`SMT.*Alignment.*Error`) + return re.MatchString(pod.Status.Reason) +} + +// getNumaNodeCPUs retrieves CPUs for each NUMA node. +func getNumaNodeCPUs() (map[int]cpuset.CPUSet, error) { + numaNodes := make(map[int]cpuset.CPUSet) + nodePaths, err := filepath.Glob("/sys/devices/system/node/node*/cpulist") + if err != nil { + return nil, err + } + + for _, nodePath := range nodePaths { + data, err := os.ReadFile(nodePath) + framework.ExpectNoError(err, "Error obtaning CPU information from the node") + cpuSet := strings.TrimSpace(string(data)) + cpus, err := cpuset.Parse(cpuSet) + framework.ExpectNoError(err, "Error parsing CPUset") + + // Extract node ID from path (e.g., "node0" -> 0) + base := filepath.Base(filepath.Dir(nodePath)) + nodeID, err := strconv.Atoi(strings.TrimPrefix(base, "node")) + if err != nil { + continue + } + numaNodes[nodeID] = cpus + } + + return numaNodes, nil +} + +func getContainerAllowedCPUsFromLogs(podName, cntName, logs string) cpuset.CPUSet { + framework.Logf("got pod logs: <%v>", logs) + cpus, err := cpuset.Parse(strings.TrimSpace(logs)) + framework.ExpectNoError(err, "parsing cpuset from logs for [%s] of pod [%s]", cntName, podName) + return cpus +} + +// computeNUMADistribution calculates CPU distribution per NUMA node. +func computeNUMADistribution(allocatedCPUs cpuset.CPUSet) map[int]int { + numaCPUs, err := getNumaNodeCPUs() + framework.ExpectNoError(err, "Error retrieving NUMA nodes") + framework.Logf("NUMA Node CPUs allocation: %v", numaCPUs) + + distribution := make(map[int]int) + for node, cpus := range numaCPUs { + distribution[node] = cpus.Intersection(allocatedCPUs).Size() + } + + framework.Logf("allocated CPUs %s distribution: %v", allocatedCPUs.String(), distribution) + return distribution +} + +// Custom matcher for checking packed CPUs. +func BePackedCPUs() gomegatypes.GomegaMatcher { + return gcustom.MakeMatcher(func(allocatedCPUs cpuset.CPUSet) (bool, error) { + distribution := computeNUMADistribution(allocatedCPUs) + for _, count := range distribution { + // This assumption holds true if there are enough CPUs on a single NUMA node. + // We are intentionally limiting the CPU request to 2 to minimize the number + // of CPUs required to fulfill this case and therefore maximize the chances + // of correctly validating this case. + if count == allocatedCPUs.Size() { + return true, nil + } + } + return false, nil + }).WithMessage("expected CPUs to be packed") +} + +// Custom matcher for checking distributed CPUs. +func BeDistributedCPUs(expectedSpread int) gomegatypes.GomegaMatcher { + return gcustom.MakeMatcher(func(allocatedCPUs cpuset.CPUSet) (bool, error) { + distribution := computeNUMADistribution(allocatedCPUs) + for _, count := range distribution { + if count != expectedSpread { + return false, nil + } + } + return true, nil + }).WithTemplate("expected CPUs to be evenly distributed across NUMA nodes\nExpected: {{.Data}}\nGot:\n{{.FormattedActual}}\nDistribution: {{.Data}}\n").WithTemplateData(expectedSpread) +} + +// Serial because the test updates kubelet configuration. +var _ = SIGDescribe("CPU Manager", framework.WithSerial(), feature.CPUManager, func() { + f := framework.NewDefaultFramework("cpu-manager-test") + f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged + + ginkgo.Context("With kubeconfig updated with static CPU Manager policy run the CPU Manager tests", func() { + runCPUManagerTests(f) + }) +}) diff --git a/test/e2e_node/pod_resize_test.go b/test/e2e_node/pod_resize_test.go new file mode 100644 index 0000000000000..4b2ad1144bd05 --- /dev/null +++ b/test/e2e_node/pod_resize_test.go @@ -0,0 +1,1737 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2enode + +import ( + "context" + "encoding/json" + "fmt" + "strconv" + "time" + + "github.com/onsi/ginkgo/v2" + "github.com/onsi/gomega" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/kubernetes/test/e2e/common/node/framework/cgroups" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/strategicpatch" + clientset "k8s.io/client-go/kubernetes" + kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config" + "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager" + "k8s.io/kubernetes/test/e2e/common/node/framework/podresize" + "k8s.io/kubernetes/test/e2e/framework" + e2enode "k8s.io/kubernetes/test/e2e/framework/node" + e2epod "k8s.io/kubernetes/test/e2e/framework/pod" + e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" + testutils "k8s.io/kubernetes/test/utils" + admissionapi "k8s.io/pod-security-admission/api" + "k8s.io/utils/cpuset" +) + +const ( + fakeExtendedResource = "dummy.com/dummy" +) + +func patchNode(ctx context.Context, client clientset.Interface, old *v1.Node, new *v1.Node) error { + oldData, err := json.Marshal(old) + if err != nil { + return err + } + + newData, err := json.Marshal(new) + if err != nil { + return err + } + patchBytes, err := strategicpatch.CreateTwoWayMergePatch(oldData, newData, &v1.Node{}) + if err != nil { + return fmt.Errorf("failed to create merge patch for node %q: %w", old.Name, err) + } + _, err = client.CoreV1().Nodes().Patch(ctx, old.Name, types.StrategicMergePatchType, patchBytes, metav1.PatchOptions{}, "status") + return err +} + +func addExtendedResource(clientSet clientset.Interface, nodeName, extendedResourceName string, extendedResourceQuantity resource.Quantity) { + extendedResource := v1.ResourceName(extendedResourceName) + + ginkgo.By("Adding a custom resource") + OriginalNode, err := clientSet.CoreV1().Nodes().Get(context.Background(), nodeName, metav1.GetOptions{}) + framework.ExpectNoError(err) + + node := OriginalNode.DeepCopy() + node.Status.Capacity[extendedResource] = extendedResourceQuantity + node.Status.Allocatable[extendedResource] = extendedResourceQuantity + err = patchNode(context.Background(), clientSet, OriginalNode.DeepCopy(), node) + framework.ExpectNoError(err) + + gomega.Eventually(func() error { + node, err = clientSet.CoreV1().Nodes().Get(context.Background(), node.Name, metav1.GetOptions{}) + framework.ExpectNoError(err) + + fakeResourceCapacity, exists := node.Status.Capacity[extendedResource] + if !exists { + return fmt.Errorf("node %s has no %s resource capacity", node.Name, extendedResourceName) + } + if expectedResource := resource.MustParse("123"); fakeResourceCapacity.Cmp(expectedResource) != 0 { + return fmt.Errorf("node %s has resource capacity %s, expected: %s", node.Name, fakeResourceCapacity.String(), expectedResource.String()) + } + + return nil + }).WithTimeout(30 * time.Second).WithPolling(time.Second).ShouldNot(gomega.HaveOccurred()) +} + +func removeExtendedResource(clientSet clientset.Interface, nodeName, extendedResourceName string) { + extendedResource := v1.ResourceName(extendedResourceName) + + ginkgo.By("Removing a custom resource") + originalNode, err := clientSet.CoreV1().Nodes().Get(context.Background(), nodeName, metav1.GetOptions{}) + framework.ExpectNoError(err) + + node := originalNode.DeepCopy() + delete(node.Status.Capacity, extendedResource) + delete(node.Status.Allocatable, extendedResource) + err = patchNode(context.Background(), clientSet, originalNode.DeepCopy(), node) + framework.ExpectNoError(err) + + gomega.Eventually(func() error { + node, err = clientSet.CoreV1().Nodes().Get(context.Background(), nodeName, metav1.GetOptions{}) + framework.ExpectNoError(err) + + if _, exists := node.Status.Capacity[extendedResource]; exists { + return fmt.Errorf("node %s has resource capacity %s which is expected to be removed", node.Name, extendedResourceName) + } + + return nil + }).WithTimeout(30 * time.Second).WithPolling(time.Second).ShouldNot(gomega.HaveOccurred()) +} + +func cpuManagerPolicyKubeletConfig(ctx context.Context, f *framework.Framework, oldCfg *kubeletconfig.KubeletConfiguration, cpuManagerPolicyName string, cpuManagerPolicyOptions map[string]string, isInPlacePodVerticalScalingAllocatedStatusEnabled bool, isInPlacePodVerticalScalingExclusiveCPUsEnabled bool) { + if cpuManagerPolicyName != "" { + if cpuManagerPolicyOptions != nil { + func() { + var cpuAlloc int64 + for policyOption, policyOptionValue := range cpuManagerPolicyOptions { + if policyOption == cpumanager.FullPCPUsOnlyOption && policyOptionValue == "true" { + _, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f) + smtLevel := getSMTLevel() + + // strict SMT alignment is trivially verified and granted on non-SMT systems + if smtLevel < 2 { + e2eskipper.Skipf("Skipping Pod Resize along side CPU Manager %s tests since SMT disabled", policyOption) + } + + // our tests want to allocate a full core, so we need at last 2*2=4 virtual cpus + if cpuAlloc < int64(smtLevel*2) { + e2eskipper.Skipf("Skipping Pod resize along side CPU Manager %s tests since the CPU capacity < 4", policyOption) + } + + framework.Logf("SMT level %d", smtLevel) + return + } + } + }() + + // TODO: we assume the first available CPUID is 0, which is pretty fair, but we should probably + // check what we do have in the node. + newCfg := configureCPUManagerInKubelet(oldCfg, + &cpuManagerKubeletArguments{ + policyName: cpuManagerPolicyName, + reservedSystemCPUs: cpuset.New(0), + enableCPUManagerOptions: true, + options: cpuManagerPolicyOptions, + }, + isInPlacePodVerticalScalingAllocatedStatusEnabled, + isInPlacePodVerticalScalingExclusiveCPUsEnabled, + ) + updateKubeletConfig(ctx, f, newCfg, true) + } else { + var cpuCap int64 + cpuCap, _, _ = getLocalNodeCPUDetails(ctx, f) + // Skip CPU Manager tests altogether if the CPU capacity < 2. + if cpuCap < 2 { + e2eskipper.Skipf("Skipping Pod Resize alongside CPU Manager tests since the CPU capacity < 2") + } + // Enable CPU Manager in the kubelet. + newCfg := configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ + policyName: cpuManagerPolicyName, + reservedSystemCPUs: cpuset.CPUSet{}, + }, isInPlacePodVerticalScalingAllocatedStatusEnabled, isInPlacePodVerticalScalingExclusiveCPUsEnabled) + updateKubeletConfig(ctx, f, newCfg, true) + } + } +} + +type cpuManagerPolicyConfig struct { + name string + title string + options map[string]string +} + +func doPodResizeTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalScalingAllocatedStatusEnabled bool, isInPlacePodVerticalScalingExclusiveCPUsEnabled bool) { + f := framework.NewDefaultFramework("pod-resize-test") + f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged + var podClient *e2epod.PodClient + var oldCfg *kubeletconfig.KubeletConfiguration + ginkgo.BeforeEach(func(ctx context.Context) { + var err error + node := getLocalNode(ctx, f) + if framework.NodeOSDistroIs("windows") || e2enode.IsARM64(node) { + e2eskipper.Skipf("runtime does not support InPlacePodVerticalScaling -- skipping") + } + podClient = e2epod.NewPodClient(f) + if oldCfg == nil { + oldCfg, err = getCurrentKubeletConfig(ctx) + framework.ExpectNoError(err) + } + }) + + type testCase struct { + name string + containers []podresize.ResizableContainerInfo + patchString string + expected []podresize.ResizableContainerInfo + addExtendedResource bool + } + + noRestart := v1.NotRequired + doRestart := v1.RestartContainer + tests := []testCase{ + { + name: "Guaranteed QoS pod, one container - increase CPU & memory", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "100m", MemReq: "200Mi", MemLim: "200Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"200m","memory":"400Mi"},"limits":{"cpu":"200m","memory":"400Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "200m", MemReq: "400Mi", MemLim: "400Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + }, + }, + }, + { + name: "Guaranteed QoS pod, one container - decrease CPU & memory", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "300m", CPULim: "300m", MemReq: "500Mi", MemLim: "500Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"100m","memory":"250Mi"},"limits":{"cpu":"100m","memory":"250Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "100m", MemReq: "250Mi", MemLim: "250Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + }, + }, + }, + { + name: "Guaranteed QoS pod, one container - increase CPU & decrease memory", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "100m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"200m","memory":"100Mi"},"limits":{"cpu":"200m","memory":"100Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "200m", MemReq: "100Mi", MemLim: "100Mi"}, + }, + }, + }, + { + name: "Guaranteed QoS pod, one container - decrease CPU & increase memory", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "100m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"50m","memory":"300Mi"},"limits":{"cpu":"50m","memory":"300Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "50m", CPULim: "50m", MemReq: "300Mi", MemLim: "300Mi"}, + }, + }, + }, + { + name: "Guaranteed QoS pod, three containers (c1, c2, c3) - increase: CPU (c1,c3), memory (c2) ; decrease: CPU (c2), memory (c1,c3)", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "100m", MemReq: "100Mi", MemLim: "100Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + }, + { + Name: "c2", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "200m", MemReq: "200Mi", MemLim: "200Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + }, + { + Name: "c3", + Resources: &cgroups.ContainerResources{CPUReq: "300m", CPULim: "300m", MemReq: "300Mi", MemLim: "300Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"140m","memory":"50Mi"},"limits":{"cpu":"140m","memory":"50Mi"}}}, + {"name":"c2", "resources":{"requests":{"cpu":"150m","memory":"240Mi"},"limits":{"cpu":"150m","memory":"240Mi"}}}, + {"name":"c3", "resources":{"requests":{"cpu":"340m","memory":"250Mi"},"limits":{"cpu":"340m","memory":"250Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "140m", CPULim: "140m", MemReq: "50Mi", MemLim: "50Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + }, + { + Name: "c2", + Resources: &cgroups.ContainerResources{CPUReq: "150m", CPULim: "150m", MemReq: "240Mi", MemLim: "240Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + }, + { + Name: "c3", + Resources: &cgroups.ContainerResources{CPUReq: "340m", CPULim: "340m", MemReq: "250Mi", MemLim: "250Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + }, + }, + }, + { + name: "Burstable QoS pod, one container with cpu & memory requests + limits - decrease memory requests only", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"memory":"200Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "200Mi", MemLim: "500Mi"}, + }, + }, + }, + { + name: "Burstable QoS pod, one container with cpu & memory requests + limits - decrease memory limits only", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"limits":{"memory":"400Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "400Mi"}, + }, + }, + }, + { + name: "Burstable QoS pod, one container with cpu & memory requests + limits - increase memory requests only", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"memory":"300Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "300Mi", MemLim: "500Mi"}, + }, + }, + }, + { + name: "Burstable QoS pod, one container with cpu & memory requests + limits - increase memory limits only", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"limits":{"memory":"600Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "600Mi"}, + }, + }, + }, + { + name: "Burstable QoS pod, one container with cpu & memory requests + limits - decrease CPU requests only", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"100m"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + }, + { + name: "Burstable QoS pod, one container with cpu & memory requests + limits - decrease CPU limits only", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"limits":{"cpu":"300m"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "300m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + }, + { + name: "Burstable QoS pod, one container with cpu & memory requests + limits - increase CPU requests only", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"150m"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "150m", CPULim: "200m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + }, + { + name: "Burstable QoS pod, one container with cpu & memory requests + limits - increase CPU limits only", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"limits":{"cpu":"500m"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "500m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + }, + { + name: "Burstable QoS pod, one container with cpu & memory requests + limits - decrease CPU requests and limits", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"100m"},"limits":{"cpu":"200m"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + }, + { + name: "Burstable QoS pod, one container with cpu & memory requests + limits - increase CPU requests and limits", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"200m"},"limits":{"cpu":"400m"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + }, + { + name: "Burstable QoS pod, one container with cpu & memory requests + limits - decrease CPU requests and increase CPU limits", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"100m"},"limits":{"cpu":"500m"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "500m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + }, + { + name: "Burstable QoS pod, one container with cpu & memory requests + limits - increase CPU requests and decrease CPU limits", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"200m"},"limits":{"cpu":"300m"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "300m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + }, + { + name: "Burstable QoS pod, one container with cpu & memory requests + limits - decrease memory requests and limits", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "200Mi", MemLim: "400Mi"}, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"memory":"100Mi"},"limits":{"memory":"300Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "100Mi", MemLim: "300Mi"}, + }, + }, + }, + { + name: "Burstable QoS pod, one container with cpu & memory requests + limits - increase memory requests and limits", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "200Mi", MemLim: "400Mi"}, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"memory":"300Mi"},"limits":{"memory":"500Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "300Mi", MemLim: "500Mi"}, + }, + }, + }, + { + name: "Burstable QoS pod, one container with cpu & memory requests + limits - decrease memory requests and increase memory limits", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "200Mi", MemLim: "400Mi"}, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"memory":"100Mi"},"limits":{"memory":"500Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "100Mi", MemLim: "500Mi"}, + }, + }, + }, + { + name: "Burstable QoS pod, one container with cpu & memory requests + limits - increase memory requests and decrease memory limits", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "200Mi", MemLim: "400Mi"}, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"memory":"300Mi"},"limits":{"memory":"300Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "300Mi", MemLim: "300Mi"}, + }, + }, + }, + { + name: "Burstable QoS pod, one container with cpu & memory requests + limits - decrease CPU requests and increase memory limits", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "200Mi", MemLim: "400Mi"}, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"100m"},"limits":{"memory":"500Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "400m", MemReq: "200Mi", MemLim: "500Mi"}, + }, + }, + }, + { + name: "Burstable QoS pod, one container with cpu & memory requests + limits - increase CPU requests and decrease memory limits", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "400m", MemReq: "200Mi", MemLim: "500Mi"}, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"200m"},"limits":{"memory":"400Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "200Mi", MemLim: "400Mi"}, + }, + }, + }, + { + name: "Burstable QoS pod, one container with cpu & memory requests + limits - decrease memory requests and increase CPU limits", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "200Mi", MemLim: "400Mi"}, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"memory":"100Mi"},"limits":{"cpu":"300m"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "300m", MemReq: "100Mi", MemLim: "400Mi"}, + }, + }, + }, + { + name: "Burstable QoS pod, one container with cpu & memory requests + limits - increase memory requests and decrease CPU limits", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "200Mi", MemLim: "400Mi"}, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"memory":"300Mi"},"limits":{"cpu":"300m"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "300m", MemReq: "300Mi", MemLim: "400Mi"}, + }, + }, + }, + { + name: "Burstable QoS pod, one container with cpu & memory requests - decrease memory request", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", MemReq: "500Mi"}, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"memory":"400Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", MemReq: "400Mi"}, + }, + }, + }, + { + name: "Guaranteed QoS pod, one container - increase CPU (NotRequired) & memory (RestartContainer)", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "100m", MemReq: "200Mi", MemLim: "200Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &doRestart, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"200m","memory":"400Mi"},"limits":{"cpu":"200m","memory":"400Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "200m", MemReq: "400Mi", MemLim: "400Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &doRestart, + RestartCount: 1, + }, + }, + }, + { + name: "Burstable QoS pod, one container - decrease CPU (RestartContainer) & memory (NotRequired)", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "200Mi", MemLim: "400Mi"}, + CPUPolicy: &doRestart, + MemPolicy: &noRestart, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"50m","memory":"100Mi"},"limits":{"cpu":"100m","memory":"200Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "50m", CPULim: "100m", MemReq: "100Mi", MemLim: "200Mi"}, + CPUPolicy: &doRestart, + MemPolicy: &noRestart, + RestartCount: 1, + }, + }, + }, + { + name: "Burstable QoS pod, three containers - increase c1 resources, no change for c2, decrease c3 resources (no net change for pod)", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "100Mi", MemLim: "200Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + }, + { + Name: "c2", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "300m", MemReq: "200Mi", MemLim: "300Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &doRestart, + }, + { + Name: "c3", + Resources: &cgroups.ContainerResources{CPUReq: "300m", CPULim: "400m", MemReq: "300Mi", MemLim: "400Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"150m","memory":"150Mi"},"limits":{"cpu":"250m","memory":"250Mi"}}}, + {"name":"c3", "resources":{"requests":{"cpu":"250m","memory":"250Mi"},"limits":{"cpu":"350m","memory":"350Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "150m", CPULim: "250m", MemReq: "150Mi", MemLim: "250Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + }, + { + Name: "c2", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "300m", MemReq: "200Mi", MemLim: "300Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &doRestart, + }, + { + Name: "c3", + Resources: &cgroups.ContainerResources{CPUReq: "250m", CPULim: "350m", MemReq: "250Mi", MemLim: "350Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + }, + }, + }, + { + name: "Burstable QoS pod, three containers - decrease c1 resources, increase c2 resources, no change for c3 (net increase for pod)", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "100Mi", MemLim: "200Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + }, + { + Name: "c2", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "300m", MemReq: "200Mi", MemLim: "300Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &doRestart, + }, + { + Name: "c3", + Resources: &cgroups.ContainerResources{CPUReq: "300m", CPULim: "400m", MemReq: "300Mi", MemLim: "400Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"50m","memory":"50Mi"},"limits":{"cpu":"150m","memory":"150Mi"}}}, + {"name":"c2", "resources":{"requests":{"cpu":"350m","memory":"350Mi"},"limits":{"cpu":"450m","memory":"450Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "50m", CPULim: "150m", MemReq: "50Mi", MemLim: "150Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + }, + { + Name: "c2", + Resources: &cgroups.ContainerResources{CPUReq: "350m", CPULim: "450m", MemReq: "350Mi", MemLim: "450Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &doRestart, + RestartCount: 1, + }, + { + Name: "c3", + Resources: &cgroups.ContainerResources{CPUReq: "300m", CPULim: "400m", MemReq: "300Mi", MemLim: "400Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + }, + }, + }, + { + name: "Burstable QoS pod, three containers - no change for c1, increase c2 resources, decrease c3 (net decrease for pod)", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "100Mi", MemLim: "200Mi"}, + CPUPolicy: &doRestart, + MemPolicy: &doRestart, + }, + { + Name: "c2", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "300m", MemReq: "200Mi", MemLim: "300Mi"}, + CPUPolicy: &doRestart, + MemPolicy: &noRestart, + }, + { + Name: "c3", + Resources: &cgroups.ContainerResources{CPUReq: "300m", CPULim: "400m", MemReq: "300Mi", MemLim: "400Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &doRestart, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c2", "resources":{"requests":{"cpu":"250m","memory":"250Mi"},"limits":{"cpu":"350m","memory":"350Mi"}}}, + {"name":"c3", "resources":{"requests":{"cpu":"100m","memory":"100Mi"},"limits":{"cpu":"200m","memory":"200Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "100Mi", MemLim: "200Mi"}, + CPUPolicy: &doRestart, + MemPolicy: &doRestart, + }, + { + Name: "c2", + Resources: &cgroups.ContainerResources{CPUReq: "250m", CPULim: "350m", MemReq: "250Mi", MemLim: "350Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + RestartCount: 1, + }, + { + Name: "c3", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "100Mi", MemLim: "200Mi"}, + CPUPolicy: &doRestart, + MemPolicy: &doRestart, + RestartCount: 1, + }, + }, + }, + { + name: "Guaranteed QoS pod, one container - increase CPU & memory with an extended resource", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "100m", MemReq: "200Mi", MemLim: "200Mi", + ExtendedResourceReq: "1", ExtendedResourceLim: "1"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"200m","memory":"400Mi"},"limits":{"cpu":"200m","memory":"400Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "200m", MemReq: "400Mi", MemLim: "400Mi", + ExtendedResourceReq: "1", ExtendedResourceLim: "1"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + }, + }, + addExtendedResource: true, + }, + { + name: "Guaranteed QoS pod, one container - increase CPU & memory, with integer CPU requests", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "200Mi", MemLim: "200Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "2", + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"4","memory":"400Mi"},"limits":{"cpu":"4","memory":"400Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "4", CPULim: "4", MemReq: "400Mi", MemLim: "400Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "4", + }, + }, + }, + { + name: "Burstable QoS pod, three containers - no change for c1, decrease c2 resources, decrease c3 (net decrease for pod)", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "100Mi", MemLim: "200Mi"}, + CPUPolicy: &doRestart, + MemPolicy: &doRestart, + }, + { + Name: "c2", + Resources: &cgroups.ContainerResources{CPUReq: "4", CPULim: "4", MemReq: "200Mi", MemLim: "300Mi"}, + CPUPolicy: &doRestart, + MemPolicy: &noRestart, + }, + { + Name: "c3", + Resources: &cgroups.ContainerResources{CPUReq: "300m", CPULim: "400m", MemReq: "300Mi", MemLim: "400Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &doRestart, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c2", "resources":{"requests":{"cpu":"1","memory":"150Mi"},"limits":{"cpu":"1","memory":"250Mi"}}}, + {"name":"c3", "resources":{"requests":{"cpu":"100m","memory":"100Mi"},"limits":{"cpu":"200m","memory":"200Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "100Mi", MemLim: "200Mi"}, + CPUPolicy: &doRestart, + MemPolicy: &doRestart, + }, + { + Name: "c2", + Resources: &cgroups.ContainerResources{CPUReq: "1", CPULim: "1", MemReq: "150Mi", MemLim: "250Mi"}, + CPUPolicy: &doRestart, + MemPolicy: &noRestart, + RestartCount: 1, + }, + { + Name: "c3", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "100Mi", MemLim: "200Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &doRestart, + RestartCount: 1, + }, + }, + }, + { + name: "Burstable QoS pod, three containers - no change for c1, increase c2 resources, decrease c3 (net increase for pod)", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "100Mi", MemLim: "200Mi"}, + CPUPolicy: &doRestart, + MemPolicy: &doRestart, + }, + { + Name: "c2", + Resources: &cgroups.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "200Mi", MemLim: "300Mi"}, + CPUPolicy: &doRestart, + MemPolicy: &noRestart, + }, + { + Name: "c3", + Resources: &cgroups.ContainerResources{CPUReq: "300m", CPULim: "400m", MemReq: "300Mi", MemLim: "400Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &doRestart, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c2", "resources":{"requests":{"cpu":"4","memory":"250Mi"},"limits":{"cpu":"4","memory":"350Mi"}}}, + {"name":"c3", "resources":{"requests":{"cpu":"100m","memory":"100Mi"},"limits":{"cpu":"200m","memory":"200Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "100Mi", MemLim: "200Mi"}, + CPUPolicy: &doRestart, + MemPolicy: &doRestart, + }, + { + Name: "c2", + Resources: &cgroups.ContainerResources{CPUReq: "4", CPULim: "4", MemReq: "250Mi", MemLim: "350Mi"}, + CPUPolicy: &doRestart, + MemPolicy: &noRestart, + RestartCount: 1, + }, + { + Name: "c3", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "100Mi", MemLim: "200Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &doRestart, + RestartCount: 1, + }, + }, + }, + { + name: "Guaranteed QoS pod, one container - decrease CPU & increase memory", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "100m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"50m","memory":"300Mi"},"limits":{"cpu":"50m","memory":"300Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "50m", CPULim: "50m", MemReq: "300Mi", MemLim: "300Mi"}, + }, + }, + }, + { + name: "Guaranteed QoS pod, one container - decrease CPU & memory, with integer CPU requests", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "4", CPULim: "4", MemReq: "500Mi", MemLim: "500Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "4", + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"2","memory":"250Mi"},"limits":{"cpu":"2","memory":"250Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "250Mi", MemLim: "250Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "2", + }, + }, + }, + { + name: "Guaranteed QoS pod, one container - decrease CPU & memory, with integer CPU requests", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "4", CPULim: "4", MemReq: "500Mi", MemLim: "500Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "4", + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"2","memory":"250Mi"},"limits":{"cpu":"2","memory":"250Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "250Mi", MemLim: "250Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "2", + }, + }, + }, + { + name: "Guaranteed QoS pod, one container - increase CPU & decrease memory, with integer CPU requests", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "200Mi", MemLim: "200Mi"}, + CPUsAllowedListValue: "2", + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"4","memory":"100Mi"},"limits":{"cpu":"4","memory":"100Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "4", CPULim: "4", MemReq: "100Mi", MemLim: "100Mi"}, + CPUsAllowedListValue: "4", + }, + }, + }, + { + name: "Guaranteed QoS pod, one container - increase CPU & decrease memory, with integer CPU requests", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "200Mi", MemLim: "200Mi"}, + CPUsAllowedListValue: "2", + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"4","memory":"100Mi"},"limits":{"cpu":"4","memory":"100Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "4", CPULim: "4", MemReq: "100Mi", MemLim: "100Mi"}, + CPUsAllowedListValue: "4", + }, + }, + }, + { + name: "Guaranteed QoS pod, one container - increase CPU & memory, with integer CPU requests", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "200Mi", MemLim: "200Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "2", + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"4","memory":"400Mi"},"limits":{"cpu":"4","memory":"400Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "4", CPULim: "4", MemReq: "400Mi", MemLim: "400Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "4", + }, + }, + }, + { + name: "Guaranteed QoS pod, one container - increase CPU (NotRequired) & memory (RestartContainer), with integer CPU requests", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "200Mi", MemLim: "200Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &doRestart, + CPUsAllowedListValue: "2", + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"4","memory":"400Mi"},"limits":{"cpu":"4","memory":"400Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "4", CPULim: "4", MemReq: "400Mi", MemLim: "400Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &doRestart, + CPUsAllowedListValue: "4", + RestartCount: 1, + }, + }, + }, + { + name: "Guaranteed QoS pod, one container - increase CPU (NotRequired) & memory (RestartContainer), with integer CPU requests", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "200Mi", MemLim: "200Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &doRestart, + CPUsAllowedListValue: "2", + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"4","memory":"400Mi"},"limits":{"cpu":"4","memory":"400Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "4", CPULim: "4", MemReq: "400Mi", MemLim: "400Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &doRestart, + CPUsAllowedListValue: "4", + RestartCount: 1, + }, + }, + }, + { + name: "Guaranteed QoS pod, three containers (c1, c2, c3) - increase CPU (c1,c3) and memory (c2) ; decrease CPU (c2) and memory (c1,c3)", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "100m", MemReq: "100Mi", MemLim: "100Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + }, + { + Name: "c2", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "200m", MemReq: "200Mi", MemLim: "200Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + }, + { + Name: "c3", + Resources: &cgroups.ContainerResources{CPUReq: "300m", CPULim: "300m", MemReq: "300Mi", MemLim: "300Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"140m","memory":"50Mi"},"limits":{"cpu":"140m","memory":"50Mi"}}}, + {"name":"c2", "resources":{"requests":{"cpu":"150m","memory":"240Mi"},"limits":{"cpu":"150m","memory":"240Mi"}}}, + {"name":"c3", "resources":{"requests":{"cpu":"340m","memory":"250Mi"},"limits":{"cpu":"340m","memory":"250Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "140m", CPULim: "140m", MemReq: "50Mi", MemLim: "50Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + }, + { + Name: "c2", + Resources: &cgroups.ContainerResources{CPUReq: "150m", CPULim: "150m", MemReq: "240Mi", MemLim: "240Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + }, + { + Name: "c3", + Resources: &cgroups.ContainerResources{CPUReq: "340m", CPULim: "340m", MemReq: "250Mi", MemLim: "250Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + }, + }, + }, + { + name: "Guaranteed QoS pod, three containers (c1, c2, c3) - increase CPU (c1,c3) and memory (c2) ; decrease CPU (c2) and memory (c1,c3), with integer CPU requests", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "100Mi", MemLim: "100Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "2", + }, + { + Name: "c2", + Resources: &cgroups.ContainerResources{CPUReq: "4", CPULim: "4", MemReq: "200Mi", MemLim: "200Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "4", + }, + { + Name: "c3", + Resources: &cgroups.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "300Mi", MemLim: "300Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "2", + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"4","memory":"50Mi"},"limits":{"cpu":"4","memory":"50Mi"}}}, + {"name":"c2", "resources":{"requests":{"cpu":"2","memory":"240Mi"},"limits":{"cpu":"2","memory":"240Mi"}}}, + {"name":"c3", "resources":{"requests":{"cpu":"4","memory":"250Mi"},"limits":{"cpu":"4","memory":"250Mi"}}} + ]}}`, + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + Resources: &cgroups.ContainerResources{CPUReq: "4", CPULim: "4", MemReq: "50Mi", MemLim: "50Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "4", + }, + { + Name: "c2", + Resources: &cgroups.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "240Mi", MemLim: "240Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "2", + }, + { + Name: "c3", + Resources: &cgroups.ContainerResources{CPUReq: "4", CPULim: "4", MemReq: "250Mi", MemLim: "250Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "4", + }, + }, + }, + } + + timeouts := framework.NewTimeoutContext() + + for idx := range tests { + tc := tests[idx] + ginkgo.It(tc.name+policy.title+" (InPlacePodVerticalScalingAllocatedStatus="+strconv.FormatBool(isInPlacePodVerticalScalingAllocatedStatusEnabled)+", InPlacePodVerticalScalingExclusiveCPUs="+strconv.FormatBool(isInPlacePodVerticalScalingExclusiveCPUsEnabled)+")", func(ctx context.Context) { + cpuManagerPolicyKubeletConfig(ctx, f, oldCfg, policy.name, policy.options, isInPlacePodVerticalScalingAllocatedStatusEnabled, isInPlacePodVerticalScalingExclusiveCPUsEnabled) + + var testPod, patchedPod *v1.Pod + var pErr error + + tStamp := strconv.Itoa(time.Now().Nanosecond()) + testPod = podresize.MakePodWithResizableContainers(f.Namespace.Name, "testpod", tStamp, tc.containers) + testPod.GenerateName = "resize-test-" + testPod = e2epod.MustMixinRestrictedPodSecurity(testPod) + + if tc.addExtendedResource { + nodes, err := e2enode.GetReadySchedulableNodes(context.Background(), f.ClientSet) + framework.ExpectNoError(err) + + for _, node := range nodes.Items { + addExtendedResource(f.ClientSet, node.Name, fakeExtendedResource, resource.MustParse("123")) + } + defer func() { + for _, node := range nodes.Items { + removeExtendedResource(f.ClientSet, node.Name, fakeExtendedResource) + } + }() + } + + ginkgo.By("creating pod") + newPod := podClient.CreateSync(ctx, testPod) + + ginkgo.By("verifying initial pod resources, allocations are as expected") + podresize.VerifyPodResources(newPod, tc.containers) + ginkgo.By("verifying initial pod resize policy is as expected") + podresize.VerifyPodResizePolicy(newPod, tc.containers) + + ginkgo.By("verifying initial pod status resources are as expected") + framework.ExpectNoError(podresize.VerifyPodStatusResources(newPod, tc.containers)) + ginkgo.By("verifying initial cgroup config are as expected") + framework.ExpectNoError(podresize.VerifyPodContainersCgroupValues(ctx, f, newPod, tc.containers)) + // TODO make this dynamic depending on Policy Name, Resources input and topology of target + // machine. + // For the moment skip below if CPU Manager Policy is set to none + if policy.name == string(cpumanager.PolicyStatic) { + ginkgo.By("verifying initial pod Cpus allowed list value") + gomega.Eventually(ctx, podresize.VerifyPodContainersCPUsAllowedListValue, timeouts.PodStartShort, timeouts.Poll). + WithArguments(f, newPod, tc.containers). + Should(gomega.BeNil(), "failed to verify initial Pod CPUsAllowedListValue") + } + + patchAndVerify := func(patchString string, expectedContainers []podresize.ResizableContainerInfo, initialContainers []podresize.ResizableContainerInfo, opStr string, isRollback bool) { + ginkgo.By(fmt.Sprintf("patching pod for %s", opStr)) + patchedPod, pErr = f.ClientSet.CoreV1().Pods(newPod.Namespace).Patch(ctx, newPod.Name, + types.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, fmt.Sprintf("failed to patch pod for %s", opStr)) + expected := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, expectedContainers) + + ginkgo.By(fmt.Sprintf("verifying pod patched for %s", opStr)) + podresize.VerifyPodResources(patchedPod, expected) + + ginkgo.By(fmt.Sprintf("waiting for %s to be actuated", opStr)) + resizedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPod, expected) + podresize.ExpectPodResized(ctx, f, resizedPod, expected) + + // Check cgroup values only for containerd versions before 1.6.9 + ginkgo.By(fmt.Sprintf("verifying pod container's cgroup values after %s", opStr)) + framework.ExpectNoError(podresize.VerifyPodContainersCgroupValues(ctx, f, resizedPod, expected)) + + ginkgo.By(fmt.Sprintf("verifying pod resources after %s", opStr)) + podresize.VerifyPodResources(resizedPod, expected) + + // TODO make this dynamic depending on Policy Name, Resources input and topology of target + // machine. + // For the moment skip below if CPU Manager Policy is set to none + if policy.name == string(cpumanager.PolicyStatic) { + ginkgo.By("verifying pod Cpus allowed list value after resize") + if isInPlacePodVerticalScalingExclusiveCPUsEnabled { + gomega.Eventually(ctx, podresize.VerifyPodContainersCPUsAllowedListValue, timeouts.PodStartShort, timeouts.Poll). + WithArguments(f, resizedPod, tc.expected). + Should(gomega.BeNil(), "failed to verify Pod CPUsAllowedListValue for resizedPod with InPlacePodVerticalScalingExclusiveCPUs enabled") + } else { + gomega.Eventually(ctx, podresize.VerifyPodContainersCPUsAllowedListValue, timeouts.PodStartShort, timeouts.Poll). + WithArguments(f, resizedPod, tc.containers). + Should(gomega.BeNil(), "failed to verify Pod CPUsAllowedListValue for resizedPod with InPlacePodVerticalScalingExclusiveCPUs disabled (default)") + } + } + } + + patchAndVerify(tc.patchString, tc.expected, tc.containers, "resize", false) + + rbPatchStr, err := podresize.ResizeContainerPatch(tc.containers) + framework.ExpectNoError(err) + // Resize has been actuated, test rollback + patchAndVerify(rbPatchStr, tc.containers, tc.expected, "rollback", true) + + ginkgo.By("deleting pod") + deletePodSyncByName(ctx, f, newPod.Name) + // we need to wait for all containers to really be gone so cpumanager reconcile loop will not rewrite the cpu_manager_state. + // this is in turn needed because we will have an unavoidable (in the current framework) race with the + // reconcile loop which will make our attempt to delete the state file and to restore the old config go haywire + waitForAllContainerRemoval(ctx, newPod.Name, newPod.Namespace) + }) + } + + ginkgo.AfterEach(func(ctx context.Context) { + if oldCfg != nil { + updateKubeletConfig(ctx, f, oldCfg, true) + } + }) + +} + +func doPodResizeErrorTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalScalingAllocatedStatusEnabled bool, isInPlacePodVerticalScalingExclusiveCPUsEnabled bool) { + f := framework.NewDefaultFramework("pod-resize-errors") + f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged + var podClient *e2epod.PodClient + var oldCfg *kubeletconfig.KubeletConfiguration + ginkgo.BeforeEach(func(ctx context.Context) { + var err error + node := getLocalNode(ctx, f) + if framework.NodeOSDistroIs("windows") || e2enode.IsARM64(node) { + e2eskipper.Skipf("runtime does not support InPlacePodVerticalScaling -- skipping") + } + podClient = e2epod.NewPodClient(f) + if oldCfg == nil { + oldCfg, err = getCurrentKubeletConfig(ctx) + framework.ExpectNoError(err) + } + }) + + type testCase struct { + name string + containers []podresize.ResizableContainerInfo + patchString string + patchError string + expected []podresize.ResizableContainerInfo + } + + tests := []testCase{ + { + name: "BestEffort QoS pod, one container - try requesting memory, expect error", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"memory":"400Mi"}}} + ]}}`, + patchError: "Pod QoS is immutable", + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + }, + }, + }, + { + name: "BestEffort QoS pod, three containers - try requesting memory for c1, expect error", + containers: []podresize.ResizableContainerInfo{ + { + Name: "c1", + }, + { + Name: "c2", + }, + { + Name: "c3", + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"memory":"400Mi"}}} + ]}}`, + patchError: "Pod QoS is immutable", + expected: []podresize.ResizableContainerInfo{ + { + Name: "c1", + }, + { + Name: "c2", + }, + { + Name: "c3", + }, + }, + }, + } + + timeouts := framework.NewTimeoutContext() + + for idx := range tests { + tc := tests[idx] + ginkgo.It(tc.name+policy.title+" (InPlacePodVerticalScalingAllocatedStatus="+strconv.FormatBool(isInPlacePodVerticalScalingAllocatedStatusEnabled)+", InPlacePodVerticalScalingExclusiveCPUs="+strconv.FormatBool(isInPlacePodVerticalScalingExclusiveCPUsEnabled)+")", func(ctx context.Context) { + var testPod, patchedPod *v1.Pod + var pErr error + + tStamp := strconv.Itoa(time.Now().Nanosecond()) + testPod = podresize.MakePodWithResizableContainers(f.Namespace.Name, "testpod", tStamp, tc.containers) + testPod = e2epod.MustMixinRestrictedPodSecurity(testPod) + + ginkgo.By("creating pod") + newPod := podClient.CreateSync(ctx, testPod) + + perr := e2epod.WaitForPodCondition(ctx, f.ClientSet, newPod.Namespace, newPod.Name, "Ready", timeouts.PodStartSlow, testutils.PodRunningReady) + framework.ExpectNoError(perr, "pod %s/%s did not go running", newPod.Namespace, newPod.Name) + framework.Logf("pod %s/%s running", newPod.Namespace, newPod.Name) + + ginkgo.By("verifying initial pod resources, allocations, and policy are as expected") + podresize.VerifyPodResources(newPod, tc.containers) + podresize.VerifyPodResizePolicy(newPod, tc.containers) + + ginkgo.By("verifying initial pod status resources and cgroup config are as expected") + framework.ExpectNoError(podresize.VerifyPodStatusResources(newPod, tc.containers)) + + ginkgo.By("patching pod for resize") + patchedPod, pErr = f.ClientSet.CoreV1().Pods(newPod.Namespace).Patch(ctx, newPod.Name, + types.StrategicMergePatchType, []byte(tc.patchString), metav1.PatchOptions{}) + if tc.patchError == "" { + framework.ExpectNoError(pErr, "failed to patch pod for resize") + } else { + gomega.Expect(pErr).To(gomega.HaveOccurred(), tc.patchError) + patchedPod = newPod + } + + ginkgo.By("verifying pod resources after patch") + podresize.VerifyPodResources(patchedPod, tc.expected) + + deletePodSyncByName(ctx, f, newPod.Name) + // we need to wait for all containers to really be gone so cpumanager reconcile loop will not rewrite the cpu_manager_state. + // this is in turn needed because we will have an unavoidable (in the current framework) race with the + // reconcile loop which will make our attempt to delete the state file and to restore the old config go haywire + waitForAllContainerRemoval(ctx, newPod.Name, newPod.Namespace) + + }) + } + + ginkgo.AfterEach(func(ctx context.Context) { + if oldCfg != nil { + updateKubeletConfig(ctx, f, oldCfg, true) + } + }) + +} + +// NOTE: Pod resize scheduler resource quota tests are out of scope in e2e_node tests, +// because in e2e_node tests +// a) scheduler and controller manager is not running by the Node e2e +// b) api-server in services doesn't start with --enable-admission-plugins=ResourceQuota +// and is not possible to start it from TEST_ARGS +// Above tests are performed by doSheduletTests() and doPodResizeResourceQuotaTests() +// in test/e2e/node/pod_resize.go + +var _ = SIGDescribe("Pod InPlace Resize Container", framework.WithSerial(), func() { + + policiesGeneralAvailability := []cpuManagerPolicyConfig{ + { + name: string(cpumanager.PolicyNone), + title: "", + }, + { + name: string(cpumanager.PolicyStatic), + title: ", alongside CPU Manager Static Policy with no options", + options: map[string]string{ + cpumanager.FullPCPUsOnlyOption: "false", + cpumanager.DistributeCPUsAcrossNUMAOption: "false", + cpumanager.AlignBySocketOption: "false", + cpumanager.DistributeCPUsAcrossCoresOption: "false", + }, + }, + } + + policiesBeta := []cpuManagerPolicyConfig{ + { + name: string(cpumanager.PolicyStatic), + title: ", alongside CPU Manager Static Policy with FullPCPUsOnlyOption", + options: map[string]string{ + cpumanager.FullPCPUsOnlyOption: "true", + cpumanager.DistributeCPUsAcrossNUMAOption: "false", + cpumanager.AlignBySocketOption: "false", + cpumanager.DistributeCPUsAcrossCoresOption: "false", + }, + }, + } + + /*policiesAlpha := []cpuManagerPolicyConfig{ + { + name: string(cpumanager.PolicyStatic), + title: ", alongside CPU Manager Static Policy with DistributeCPUsAcrossNUMAOption", + options: map[string]string{ + cpumanager.FullPCPUsOnlyOption: "false", + cpumanager.DistributeCPUsAcrossNUMAOption: "true", + cpumanager.AlignBySocketOption: "false", + cpumanager.DistributeCPUsAcrossCoresOption: "false", + }, + }, + { + name: string(cpumanager.PolicyStatic), + title: ", alongside CPU Manager Static Policy with FullPCPUsOnlyOption, DistributeCPUsAcrossNUMAOption", + options: map[string]string{ + cpumanager.FullPCPUsOnlyOption: "true", + cpumanager.DistributeCPUsAcrossNUMAOption: "true", + cpumanager.AlignBySocketOption: "false", + cpumanager.DistributeCPUsAcrossCoresOption: "false", + }, + }, + { + name: string(cpumanager.PolicyStatic), + title: ", alongside CPU Manager Static Policy with AlignBySocketOption", + options: map[string]string{ + cpumanager.FullPCPUsOnlyOption: "false", + cpumanager.DistributeCPUsAcrossNUMAOption: "false", + cpumanager.AlignBySocketOption: "true", + cpumanager.DistributeCPUsAcrossCoresOption: "false", + }, + }, + { + name: string(cpumanager.PolicyStatic), + title: ", alongside CPU Manager Static Policy with FullPCPUsOnlyOption, AlignBySocketOption", + options: map[string]string{ + cpumanager.FullPCPUsOnlyOption: "true", + cpumanager.DistributeCPUsAcrossNUMAOption: "false", + cpumanager.AlignBySocketOption: "true", + cpumanager.DistributeCPUsAcrossCoresOption: "false", + }, + }, + { + name: string(cpumanager.PolicyStatic), + title: ", alongside CPU Manager Static Policy with DistributeCPUsAcrossNUMAOption, AlignBySocketOption", + options: map[string]string{ + cpumanager.FullPCPUsOnlyOption: "false", + cpumanager.DistributeCPUsAcrossNUMAOption: "true", + cpumanager.AlignBySocketOption: "true", + cpumanager.DistributeCPUsAcrossCoresOption: "false", + }, + }, + { + name: string(cpumanager.PolicyStatic), + title: ", alongside CPU Manager Static Policy with FullPCPUsOnlyOption, DistributeCPUsAcrossNUMAOption, AlignBySocketOption", + options: map[string]string{ + cpumanager.FullPCPUsOnlyOption: "true", + cpumanager.DistributeCPUsAcrossNUMAOption: "true", + cpumanager.AlignBySocketOption: "true", + cpumanager.DistributeCPUsAcrossCoresOption: "false", + }, + }, + { + name: string(cpumanager.PolicyStatic), + title: ", alongside CPU Manager Static Policy with DistributeCPUsAcrossCoresOption", + options: map[string]string{ + cpumanager.FullPCPUsOnlyOption: "false", + cpumanager.DistributeCPUsAcrossNUMAOption: "false", + cpumanager.AlignBySocketOption: "false", + cpumanager.DistributeCPUsAcrossCoresOption: "true", + }, + }, + { + name: string(cpumanager.PolicyStatic), + title: ", alongside CPU Manager Static Policy with DistributeCPUsAcrossCoresOption, AlignBySocketOption", + options: map[string]string{ + cpumanager.FullPCPUsOnlyOption: "false", + cpumanager.DistributeCPUsAcrossNUMAOption: "false", + cpumanager.AlignBySocketOption: "true", + cpumanager.DistributeCPUsAcrossCoresOption: "true", + }, + }, + }*/ + + for idp := range policiesGeneralAvailability { + doPodResizeTests(policiesGeneralAvailability[idp], false, false) + doPodResizeTests(policiesGeneralAvailability[idp], true, false) + doPodResizeTests(policiesGeneralAvailability[idp], false, true) + doPodResizeTests(policiesGeneralAvailability[idp], true, true) + doPodResizeErrorTests(policiesGeneralAvailability[idp], false, false) + doPodResizeErrorTests(policiesGeneralAvailability[idp], true, false) + doPodResizeErrorTests(policiesGeneralAvailability[idp], false, true) + doPodResizeErrorTests(policiesGeneralAvailability[idp], true, true) + } + + for idp := range policiesBeta { + doPodResizeTests(policiesBeta[idp], false, false) + doPodResizeTests(policiesBeta[idp], true, false) + doPodResizeTests(policiesBeta[idp], false, true) + doPodResizeTests(policiesBeta[idp], true, true) + doPodResizeErrorTests(policiesBeta[idp], false, false) + doPodResizeErrorTests(policiesBeta[idp], true, false) + doPodResizeErrorTests(policiesBeta[idp], false, true) + doPodResizeErrorTests(policiesBeta[idp], true, true) + } + + /*for idp := range policiesAlpha { + doPodResizeTests(policiesAlpha[idp], true, false) + doPodResizeTests(policiesAlpha[idp], true, true) + doPodResizeErrorTests(policiesAlpha[idp], true, false) + doPodResizeErrorTests(policiesAlpha[idp], true, true) + }*/ + +}) diff --git a/test/e2e_node/util.go b/test/e2e_node/util.go index f2ce02d6dc0aa..b8bbd8ce4f05c 100644 --- a/test/e2e_node/util.go +++ b/test/e2e_node/util.go @@ -183,7 +183,7 @@ func waitForKubeletToStart(ctx context.Context, f *framework.Framework) { // wait until the kubelet health check will succeed gomega.Eventually(ctx, func() bool { return kubeletHealthCheck(kubeletHealthCheckURL) - }, 2*time.Minute, 5*time.Second).Should(gomega.BeTrueBecause("expected kubelet to be in healthy state")) + }, 5*time.Minute, 2*time.Second).Should(gomega.BeTrueBecause("expected kubelet to be in healthy state")) // Wait for the Kubelet to be ready. gomega.Eventually(ctx, func(ctx context.Context) error { @@ -504,7 +504,7 @@ func waitForAllContainerRemoval(ctx context.Context, podName, podNS string) { return fmt.Errorf("expected all containers to be removed from CRI but %v containers still remain. Containers: %+v", len(containers), containers) } return nil - }, 2*time.Minute, 1*time.Second).Should(gomega.Succeed()) + }, 5*time.Minute, 2*time.Second).Should(gomega.Succeed()) } func getPidsForProcess(name, pidFile string) ([]int, error) { From 734960689a603755dd8082f17c423d036bbe92ab Mon Sep 17 00:00:00 2001 From: Chunxia Guo/Modem Solution Lab /SRC-Beijing/Staff Engineer/Samsung Electronics Date: Fri, 21 Feb 2025 14:27:20 +0800 Subject: [PATCH 2/9] Support InPlacePodVerticalScaling for Static CPU management policy --- pkg/api/pod/testing/make.go | 6 + pkg/apis/core/validation/validation.go | 48 ++ pkg/apis/core/validation/validation_test.go | 82 ++ pkg/kubelet/cm/cpumanager/cpu_assignment.go | 372 ++++++++- .../cm/cpumanager/cpu_assignment_test.go | 467 +++++++++++ pkg/kubelet/cm/cpumanager/policy_static.go | 31 +- pkg/registry/core/pod/strategy.go | 1 + .../common/node/framework/podresize/resize.go | 14 +- test/e2e_node/pod_resize_test.go | 734 ++++++++++++++++++ 9 files changed, 1743 insertions(+), 12 deletions(-) diff --git a/pkg/api/pod/testing/make.go b/pkg/api/pod/testing/make.go index 552d795ff7861..f971c64b1d74d 100644 --- a/pkg/api/pod/testing/make.go +++ b/pkg/api/pod/testing/make.go @@ -293,6 +293,12 @@ func SetContainerResources(rr api.ResourceRequirements) TweakContainer { } } +func SetContainerEnv(env []api.EnvVar) TweakContainer { + return func(cnr *api.Container) { + cnr.Env = env + } +} + func SetContainerPorts(ports ...api.ContainerPort) TweakContainer { return func(cnr *api.Container) { cnr.Ports = ports diff --git a/pkg/apis/core/validation/validation.go b/pkg/apis/core/validation/validation.go index 04d3037984459..d2d4cfcd83c11 100644 --- a/pkg/apis/core/validation/validation.go +++ b/pkg/apis/core/validation/validation.go @@ -63,6 +63,7 @@ import ( "k8s.io/kubernetes/pkg/capabilities" "k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/pkg/fieldpath" + "k8s.io/utils/cpuset" ) const isNegativeErrorMsg string = apimachineryvalidation.IsNegativeErrorMsg @@ -6113,6 +6114,7 @@ func ValidatePodResize(newPod, oldPod *core.Pod, opts PodValidationOptions) fiel var newContainers []core.Container for ix, container := range originalCPUMemPodSpec.Containers { dropCPUMemoryResourcesFromContainer(&container, &oldPod.Spec.Containers[ix]) + allErrs = append(allErrs, dropMustKeepCPUsEnvFromContainer(&container, &oldPod.Spec.Containers[ix], specPath)...) if !apiequality.Semantic.DeepEqual(container, oldPod.Spec.Containers[ix]) { // This likely means that the user has made changes to resources other than CPU and memory for regular container. errs := field.Forbidden(specPath, "only cpu and memory resources are mutable") @@ -6208,6 +6210,52 @@ func dropCPUMemoryResourcesFromContainer(container *core.Container, oldPodSpecCo container.Resources = core.ResourceRequirements{Limits: lim, Requests: req} } +func removeEnvVar(envs []core.EnvVar, nameToRemove string) []core.EnvVar { + var newEnvs []core.EnvVar + for _, env := range envs { + if env.Name != nameToRemove { + newEnvs = append(newEnvs, env) + } + } + return newEnvs +} + +// dropMustKeepCPUsEnvFromContainer deletes the "mustKeepCPUs" in env from the container, and copies them from the old pod container resources if present. +func dropMustKeepCPUsEnvFromContainer(container *core.Container, oldPodSpecContainer *core.Container, fldPath *field.Path) field.ErrorList { + allErrs := field.ErrorList{} + // the element named "mustKeepCPUs" in env can be update or add + existNewMustKeepCPUs := false + existOldMustKeepCPUs := false + for jx, newEnv := range container.Env { + if newEnv.Name == "mustKeepCPUs" { + existNewMustKeepCPUs = true + _, err := cpuset.Parse(newEnv.Value) + if err != nil { + allErrs = append(allErrs, field.Invalid(fldPath, newEnv, "Check mustKeepCPUs format, only number \",\" and \"-\" are allowed")) + } + // Change mustKeepCPUs + for _, oldEnv := range oldPodSpecContainer.Env { + if oldEnv.Name == "mustKeepCPUs" { + existOldMustKeepCPUs = true + container.Env[jx] = oldEnv + break + } + } + // Add mustKeepCPUs + if !existOldMustKeepCPUs && (len(container.Env)-len(oldPodSpecContainer.Env)) == 1 { + // Delete "mustKeepCPUs" in newPod to make newPod equal to oldPod + container.Env = removeEnvVar(container.Env, "mustKeepCPUs") + } + break + } + } + // Delete mustKeepCPUs + if !existNewMustKeepCPUs && (len(oldPodSpecContainer.Env)-len(container.Env)) == 1 { + oldPodSpecContainer.Env = removeEnvVar(oldPodSpecContainer.Env, "mustKeepCPUs") + } + return allErrs +} + // isPodResizeRequestSupported checks whether the pod is running on a node with InPlacePodVerticalScaling enabled. func isPodResizeRequestSupported(pod core.Pod) bool { // TODO: Remove this after GA+3 releases of InPlacePodVerticalScaling diff --git a/pkg/apis/core/validation/validation_test.go b/pkg/apis/core/validation/validation_test.go index 5c32dcd3c7dbc..5bb7f5c7542f7 100644 --- a/pkg/apis/core/validation/validation_test.go +++ b/pkg/apis/core/validation/validation_test.go @@ -27852,6 +27852,46 @@ func TestValidatePodResize(t *testing.T) { })) } + mkPodWith1Env := func(envName1, envValue1 string, tweaks ...podtest.Tweak) *core.Pod { + return podtest.MakePod("pod", append(tweaks, + podtest.SetContainers( + podtest.MakeContainer( + "container", + podtest.SetContainerEnv( + []core.EnvVar{ + { + Name: envName1, + Value: envValue1, + }, + }, + ), + ), + ), + )...) + } + + mkPodWith2Env := func(envName1, envValue1, envName2, envValue2 string, tweaks ...podtest.Tweak) *core.Pod { + return podtest.MakePod("pod", append(tweaks, + podtest.SetContainers( + podtest.MakeContainer( + "container", + podtest.SetContainerEnv( + []core.EnvVar{ + { + Name: envName1, + Value: envValue1, + }, + { + Name: envName2, + Value: envValue2, + }, + }, + ), + ), + ), + )...) + } + tests := []struct { test string old *core.Pod @@ -28350,6 +28390,48 @@ func TestValidatePodResize(t *testing.T) { new: mkPodWithInitContainers(getResources("100m", "0", "2Gi", ""), core.ResourceList{}, core.ContainerRestartPolicyAlways, resizePolicy(core.ResourceMemory, core.NotRequired)), err: "spec: Forbidden: only cpu and memory resources are mutable", }, + { + test: "Pod env:mustKeepCPUs change value", + old: mkPodWith2Env("env1", "a", "mustKeepCPUs", "0"), + new: mkPodWith2Env("env1", "a", "mustKeepCPUs", "1"), + err: "", + }, + { + test: "Pod env:mustKeepCPUs add value", + old: mkPodWith1Env("env1", "a"), + new: mkPodWith2Env("env1", "a", "mustKeepCPUs", "1"), + err: "", + }, + { + test: "Pod env:mustKeepCPUs delete", + old: mkPodWith2Env("env1", "a", "mustKeepCPUs", "1"), + new: mkPodWith1Env("env1", "a"), + err: "", + }, + { + test: "Pod env:env1 change is forbidden", + old: mkPodWith2Env("env1", "a", "mustKeepCPUs", "0"), + new: mkPodWith2Env("env1", "b", "mustKeepCPUs", "0"), + err: "spec: Forbidden: only cpu and memory resources are mutable", + }, + { + test: "Pod env:env1 add is forbidden", + old: mkPodWith1Env("mustKeepCPUs", "0"), + new: mkPodWith2Env("env1", "a", "mustKeepCPUs", "1"), + err: "spec: Forbidden: only cpu and memory resources are mutable", + }, + { + test: "Pod env:env1 delete is forbidden", + old: mkPodWith2Env("env1", "a", "mustKeepCPUs", "1"), + new: mkPodWith1Env("mustKeepCPUs", "0"), + err: "spec: Forbidden: only cpu and memory resources are mutable", + }, + { + test: "Pod env:mustKeepCPUs delete", + old: mkPodWith2Env("env1", "a", "mustKeepCPUs", "1"), + new: mkPodWith2Env("env1", "a", "mustKeepCPUs", "1s2"), + err: "Check mustKeepCPUs format, only number \",\" and \"-\" are allowed", + }, } for _, test := range tests { diff --git a/pkg/kubelet/cm/cpumanager/cpu_assignment.go b/pkg/kubelet/cm/cpumanager/cpu_assignment.go index 7da5202a02c83..e6a8ba1984383 100644 --- a/pkg/kubelet/cm/cpumanager/cpu_assignment.go +++ b/pkg/kubelet/cm/cpumanager/cpu_assignment.go @@ -95,6 +95,11 @@ type numaOrSocketsFirstFuncs interface { sortAvailableNUMANodes() []int sortAvailableSockets() []int sortAvailableCores() []int + takeFullFirstLevelForResize() + takeFullSecondLevelForResize() + sortAvailableNUMANodesForResize() []int + sortAvailableSocketsForResize() []int + sortAvailableCoresForResize() []int } type numaFirst struct{ acc *cpuAccumulator } @@ -204,8 +209,145 @@ func (s *socketsFirst) sortAvailableCores() []int { return result } +// If NUMA nodes are higher in the memory hierarchy than sockets, then we take +// from the set of NUMA Nodes as the first level for resize. +func (n *numaFirst) takeFullFirstLevelForResize() { + n.acc.takeRemainCpusForFullNUMANodes() +} + +// If NUMA nodes are higher in the memory hierarchy than sockets, then we take +// from the set of sockets as the second level for resize. +func (n *numaFirst) takeFullSecondLevelForResize() { + n.acc.takeRemainCpusForFullSockets() +} + +// If NUMA nodes are higher in the memory hierarchy than sockets, then return the available NUMA nodes +// which have allocated CPUs to Container. +func (n *numaFirst) sortAvailableNUMANodesForResize() []int { + allocatedNumaNodesSet := n.acc.resultDetails.NUMANodes() + availableNumaNodesSet := n.acc.details.NUMANodes() + numas := allocatedNumaNodesSet.Intersection(availableNumaNodesSet).UnsortedList() + n.acc.sort(numas, n.acc.details.CPUsInNUMANodes) + return numas +} + +// If NUMA nodes are higher in the memory hierarchy than sockets, +// Firstly, pull the socket which are allocated CPUs to the Container +// Secondly, pull the other sockets which are not allocated CPUs to the Container, but contains in the NUMA node which are allocated CPUs to the Container +func (n *numaFirst) sortAvailableSocketsForResize() []int { + var result []int + + // Sort allocated sockets + allocatedSocketsSet := n.acc.resultDetails.Sockets() + availableSocketsSet := n.acc.details.Sockets() + allocatedSockets := allocatedSocketsSet.Intersection(availableSocketsSet).UnsortedList() + n.acc.sort(allocatedSockets, n.acc.details.CPUsInSockets) + result = append(result, allocatedSockets...) + + // Sort the sockets in allocated numa node, but not allocated CPU on these sockets + for _, numa := range n.sortAvailableNUMANodesForResize() { + socketSet := n.acc.details.SocketsInNUMANodes(numa) + sockets := socketSet.Difference(allocatedSocketsSet).UnsortedList() + n.acc.sort(sockets, n.acc.details.CPUsInSockets) + result = append(result, sockets...) + } + return result +} + +// If NUMA nodes are higher in the memory hierarchy than sockets, +// Firstly, pull the cores which are allocated CPUs to the Container +// Secondly, pull the other cores which are not allocated CPUs to the Container, but contains in the NUMA node which are allocated CPUs to the Container +func (n *numaFirst) sortAvailableCoresForResize() []int { + var result []int + + // Sort allocated cores + allocatedCoresSet := n.acc.resultDetails.Cores() + availableCoresSet := n.acc.details.Cores() + allocatedCores := allocatedCoresSet.Intersection(availableCoresSet).UnsortedList() + n.acc.sort(allocatedCores, n.acc.details.CPUsInCores) + result = append(result, allocatedCores...) + + // Sort the cores in allocated sockets, and allocated numa, but not allocated CPU on these sockets and numa + for _, socket := range n.acc.sortAvailableSocketsForResize() { + coresSet := n.acc.details.CoresInSockets(socket) + cores := coresSet.Difference(allocatedCoresSet).UnsortedList() + n.acc.sort(cores, n.acc.details.CPUsInCores) + result = append(result, cores...) + } + return result +} + +// If sockets are higher in the memory hierarchy than NUMA nodes, then we take +// from the set of NUMA Nodes as the first level for resize. +func (s *socketsFirst) takeFullFirstLevelForResize() { + s.acc.takeRemainCpusForFullSockets() +} + +// If sockets are higher in the memory hierarchy than NUMA nodes, then we take +// from the set of sockets as the second level for resize. +func (s *socketsFirst) takeFullSecondLevelForResize() { + s.acc.takeRemainCpusForFullNUMANodes() +} + +// If sockets are higher in the memory hierarchy than NUMA nodes, +// Firstly, pull the NUMA nodes which are allocated CPUs to the Container +// Secondly, pull the other NUMA nodes which are not allocated CPUs to the Container, but contains in the sockets which are allocated CPUs to the Container +func (s *socketsFirst) sortAvailableNUMANodesForResize() []int { + var result []int + + // Sort allocated sockets + allocatedNUMANodesSet := s.acc.resultDetails.NUMANodes() + availableNUMANodesSet := s.acc.details.NUMANodes() + allocatedNUMANodes := allocatedNUMANodesSet.Intersection(availableNUMANodesSet).UnsortedList() + s.acc.sort(allocatedNUMANodes, s.acc.details.CPUsInNUMANodes) + result = append(result, allocatedNUMANodes...) + + // Sort the sockets in allocated numa node, but not allocated CPU on these sockets + for _, socket := range s.sortAvailableSocketsForResize() { + NUMANodesSet := s.acc.details.NUMANodesInSockets(socket) + NUMANodes := NUMANodesSet.Difference(allocatedNUMANodesSet).UnsortedList() + s.acc.sort(NUMANodes, s.acc.details.CPUsInNUMANodes) + result = append(result, NUMANodes...) + } + return result +} + +// If sockets are higher in the memory hierarchy than NUMA nodes, then return the available sockets +// which have allocated CPUs to Container. +func (s *socketsFirst) sortAvailableSocketsForResize() []int { + allocatedSocketsSet := s.acc.resultDetails.Sockets() + availableSocketsSet := s.acc.details.Sockets() + sockets := allocatedSocketsSet.Intersection(availableSocketsSet).UnsortedList() + s.acc.sort(sockets, s.acc.details.CPUsInSockets) + return sockets +} + +// If sockets are higher in the memory hierarchy than NUMA nodes, +// Firstly, pull the cores which are allocated CPUs to the Container +// Secondly, pull the other cores which are not allocated CPUs to the Container, but contains in the socket which are allocated CPUs to the Container +func (s *socketsFirst) sortAvailableCoresForResize() []int { + var result []int + + // Sort allocated cores + allocatedCoresSet := s.acc.resultDetails.Cores() + availableCoresSet := s.acc.details.Cores() + allocatedCores := allocatedCoresSet.Intersection(availableCoresSet).UnsortedList() + s.acc.sort(allocatedCores, s.acc.details.CPUsInCores) + result = append(result, allocatedCores...) + + // Sort the cores in allocated sockets, and allocated numa, but not allocated CPU on these sockets and numa + for _, NUMANode := range s.acc.sortAvailableNUMANodesForResize() { + coresSet := s.acc.details.CoresInNUMANodes(NUMANode) + cores := coresSet.Difference(allocatedCoresSet).UnsortedList() + s.acc.sort(cores, s.acc.details.CPUsInCores) + result = append(result, cores...) + } + return result +} + type availableCPUSorter interface { sort() []int + sortForResize() []int } type sortCPUsPacked struct{ acc *cpuAccumulator } @@ -222,6 +364,14 @@ func (s sortCPUsSpread) sort() []int { return s.acc.sortAvailableCPUsSpread() } +func (s sortCPUsPacked) sortForResize() []int { + return s.acc.sortAvailableCPUsPackedForResize() +} + +func (s sortCPUsSpread) sortForResize() []int { + return s.acc.sortAvailableCPUsSpreadForResize() +} + // CPUSortingStrategy describes the CPU sorting solution within the socket scope. // Using topoDualSocketHT (12 CPUs, 2 sockets, 6 cores) as an example: // @@ -282,6 +432,9 @@ type cpuAccumulator struct { // cardinality equal to the total number of CPUs to accumulate. result cpuset.CPUSet + // `resultDetails` is the set of allocated CPUs in `result` + resultDetails topology.CPUDetails + numaOrSocketsFirst numaOrSocketsFirstFuncs // availableCPUSorter is used to control the cpu sorting result. @@ -297,6 +450,7 @@ func newCPUAccumulator(topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, details: topo.CPUDetails.KeepOnly(availableCPUs), numCPUsNeeded: numCPUs, result: cpuset.New(), + resultDetails: topo.CPUDetails.KeepOnly(cpuset.New()), } if reusableCPUsForResize != nil { @@ -424,6 +578,21 @@ func (a *cpuAccumulator) freeCPUs() []int { return a.availableCPUSorter.sort() } +// Return true if this numa only allocated CPUs for this Container +func (a *cpuAccumulator) isFullNUMANodeForResize(numaID int) bool { + return a.resultDetails.CPUsInNUMANodes(numaID).Size()+a.details.CPUsInNUMANodes(numaID).Size() == a.topo.CPUDetails.CPUsInNUMANodes(numaID).Size() +} + +// Return true if this Socket only allocated CPUs for this Container +func (a *cpuAccumulator) isFullSocketForResize(socketID int) bool { + return a.resultDetails.CPUsInSockets(socketID).Size()+a.details.CPUsInSockets(socketID).Size() == a.topo.CPUsPerSocket() +} + +// return true if this Socket only allocated CPUs for this Container +func (a *cpuAccumulator) isFullCoreForResize(coreID int) bool { + return a.resultDetails.CPUsInCores(coreID).Size()+a.details.CPUsInCores(coreID).Size() == a.topo.CPUsPerCore() +} + // Sorts the provided list of NUMA nodes/sockets/cores/cpus referenced in 'ids' // by the number of available CPUs contained within them (smallest to largest). // The 'getCPU()' parameter defines the function that should be called to @@ -553,8 +722,108 @@ func (a *cpuAccumulator) sortAvailableCPUsSpread() []int { return result } +// Sort all NUMA nodes with at least one free CPU. +// +// If NUMA nodes are higher than sockets in the memory hierarchy, they are sorted by ascending number +// of free CPUs that they contain. "higher than sockets in the memory hierarchy" means that NUMA nodes +// contain a bigger number of CPUs (free and busy) than sockets, or equivalently that each NUMA node +// contains more than one socket. +// +// If instead NUMA nodes are lower in the memory hierarchy than sockets, they are sorted as follows. +// First part, sort the NUMA nodes which contains the CPUs allocated to Container. and these NUMA nodes +// are sorted by number of free CPUs that they contain. +// Second part, sort the NUMA nodes contained in the sockets which contains the CPUs allocated to Container, +// but exclude the NUMA nodes in first part. these NUMA nodes sorted by the rule as below +// +// First, they are sorted by number of free CPUs in the sockets that contain them. Then, for each +// socket they are sorted by number of free CPUs that they contain. The order is always ascending. +func (a *cpuAccumulator) sortAvailableNUMANodesForResize() []int { + return a.numaOrSocketsFirst.sortAvailableNUMANodesForResize() +} + +// Sort all sockets with at least one free CPU. +// +// If sockets are higher than NUMA nodes in the memory hierarchy, they are sorted by ascending number +// of free CPUs that they contain. "higher than NUMA nodes in the memory hierarchy" means that +// sockets contain a bigger number of CPUs (free and busy) than NUMA nodes, or equivalently that each +// socket contains more than one NUMA node. +// +// If instead sockets are lower in the memory hierarchy than NUMA nodes, they are sorted as follows. +// First part, sort the sockets which contains the CPUs allocated to Container. and these sockets +// are sorted by number of free CPUs that they contain. +// Second part, sort the sockets contained in the NUMA nodes which contains the CPUs allocated to Container, +// but exclude the sockets in first part. these sockets sorted by the rule as below +// +// First, they are sorted by number of free CPUs in the NUMA nodes that contain them. Then, for each +// NUMA node they are sorted by number of free CPUs that they contain. The order is always ascending. +func (a *cpuAccumulator) sortAvailableSocketsForResize() []int { + return a.numaOrSocketsFirst.sortAvailableSocketsForResize() +} + +// Sort all cores with at least one free CPU. +// +// If sockets are higher in the memory hierarchy than NUMA nodes, meaning that sockets contain a +// bigger number of CPUs (free and busy) than NUMA nodes, or equivalently that each socket contains +// more than one NUMA node, the cores are sorted as follows. +// First part, sort the cores which contains the CPUs allocated to Container. and these cores +// are sorted by number of free CPUs that they contain. +// Second part, sort the cores contained in the NUMA nodes which contains the CPUs allocated to Container, +// but exclude the cores in first part. these cores sorted by the rule as below +// First, they are sorted by number of +// free CPUs that their sockets contain. Then, for each socket, the cores in it are sorted by number +// of free CPUs that their NUMA nodes contain. Then, for each NUMA node, the cores in it are sorted +// by number of free CPUs that they contain. The order is always ascending. + +// If instead NUMA nodes are higher in the memory hierarchy than sockets, the sorting happens in the +// same way as described in the previous paragraph. +func (a *cpuAccumulator) sortAvailableCoresForResize() []int { + return a.numaOrSocketsFirst.sortAvailableCoresForResize() +} + +// Sort all free CPUs. +// +// If sockets are higher in the memory hierarchy than NUMA nodes, meaning that sockets contain a +// bigger number of CPUs (free and busy) than NUMA nodes, or equivalently that each socket contains +// more than one NUMA node, the CPUs are sorted as follows. +// First part, sort the cores which contains the CPUs allocated to Container. and these cores +// are sorted by number of free CPUs that they contain. for each core, the CPUs in it are +// sorted by numerical ID. +// Second part, sort the cores contained in the NUMA nodes which contains the CPUs allocated to Container, +// but exclude the cores in first part. these cores sorted by the rule as below +// First, they are sorted by number of +// free CPUs that their sockets contain. Then, for each socket, the CPUs in it are sorted by number +// of free CPUs that their NUMA nodes contain. Then, for each NUMA node, the CPUs in it are sorted +// by number of free CPUs that their cores contain. Finally, for each core, the CPUs in it are +// sorted by numerical ID. The order is always ascending. +// +// If instead NUMA nodes are higher in the memory hierarchy than sockets, the sorting happens in the +// same way as described in the previous paragraph. +func (a *cpuAccumulator) sortAvailableCPUsPackedForResize() []int { + var result []int + for _, core := range a.sortAvailableCoresForResize() { + cpus := a.details.CPUsInCores(core).UnsortedList() + sort.Ints(cpus) + result = append(result, cpus...) + } + return result +} + +// Sort all available CPUs: +// - First by core using sortAvailableSocketsForResize(). +// - Then within each socket, sort cpus directly using the sort() algorithm defined above. +func (a *cpuAccumulator) sortAvailableCPUsSpreadForResize() []int { + var result []int + for _, socket := range a.sortAvailableSocketsForResize() { + cpus := a.details.CPUsInSockets(socket).UnsortedList() + sort.Ints(cpus) + result = append(result, cpus...) + } + return result +} + func (a *cpuAccumulator) take(cpus cpuset.CPUSet) { a.result = a.result.Union(cpus) + a.resultDetails = a.topo.CPUDetails.KeepOnly(a.result) a.details = a.details.KeepOnly(a.details.CPUs().Difference(a.result)) a.numCPUsNeeded -= cpus.Size() } @@ -676,6 +945,55 @@ func (a *cpuAccumulator) takeRemainingCPUs() { } } +func (a *cpuAccumulator) takeRemainCpusForFullNUMANodes() { + for _, numa := range a.sortAvailableNUMANodesForResize() { + if a.isFullNUMANodeForResize(numa) { + cpusInNUMANode := a.details.CPUsInNUMANodes(numa) + if !a.needsAtLeast(cpusInNUMANode.Size()) { + continue + } + klog.V(4).InfoS("takeRemainCpusForFullNUMANodes: claiming NUMA node", "numa", numa, "cpusInNUMANode", cpusInNUMANode) + a.take(cpusInNUMANode) + } + } +} + +func (a *cpuAccumulator) takeRemainCpusForFullSockets() { + for _, socket := range a.sortAvailableSocketsForResize() { + if a.isFullSocketForResize(socket) { + cpusInSocket := a.details.CPUsInSockets(socket) + if !a.needsAtLeast(cpusInSocket.Size()) { + continue + } + klog.V(4).InfoS("takeRemainCpusForFullSockets: claiming Socket", "socket", socket, "cpusInSocket", cpusInSocket) + a.take(cpusInSocket) + } + } +} + +func (a *cpuAccumulator) takeRemainCpusForFullCores() { + for _, core := range a.sortAvailableCoresForResize() { + if a.isFullCoreForResize(core) { + cpusInCore := a.details.CPUsInCores(core) + if !a.needsAtLeast(cpusInCore.Size()) { + continue + } + klog.V(4).InfoS("takeRemainCpusForFullCores: claiming Core", "core", core, "cpusInCore", cpusInCore) + a.take(cpusInCore) + } + } +} + +func (a *cpuAccumulator) takeRemainingCPUsForResize() { + for _, cpu := range a.availableCPUSorter.sortForResize() { + klog.V(4).InfoS("takeRemainingCPUsForResize: claiming CPU", "cpu", cpu) + a.take(cpuset.New(cpu)) + if a.isSatisfied() { + return + } + } +} + // rangeNUMANodesNeededToSatisfy returns minimum and maximum (in this order) number of NUMA nodes // needed to satisfy the cpuAccumulator's goal of accumulating `a.numCPUsNeeded` CPUs, assuming that // CPU groups have size given by the `cpuGroupSize` argument. @@ -824,10 +1142,18 @@ func takeByTopologyNUMAPacked(topo *topology.CPUTopology, availableCPUs cpuset.C // requires at least a NUMA node or socket's-worth of CPUs. If NUMA // Nodes map to 1 or more sockets, pull from NUMA nodes first. // Otherwise pull from sockets first. + acc.numaOrSocketsFirst.takeFullFirstLevelForResize() + if acc.isSatisfied() { + return acc.result, nil + } acc.numaOrSocketsFirst.takeFullFirstLevel() if acc.isSatisfied() { return acc.result, nil } + acc.numaOrSocketsFirst.takeFullSecondLevelForResize() + if acc.isSatisfied() { + return acc.result, nil + } acc.numaOrSocketsFirst.takeFullSecondLevel() if acc.isSatisfied() { return acc.result, nil @@ -847,6 +1173,10 @@ func takeByTopologyNUMAPacked(topo *topology.CPUTopology, availableCPUs cpuset.C // a core's-worth of CPUs. // If `CPUSortingStrategySpread` is specified, skip taking the whole core. if cpuSortingStrategy != CPUSortingStrategySpread { + acc.takeRemainCpusForFullCores() + if acc.isSatisfied() { + return acc.result, nil + } acc.takeFullCores() if acc.isSatisfied() { return acc.result, nil @@ -856,6 +1186,10 @@ func takeByTopologyNUMAPacked(topo *topology.CPUTopology, availableCPUs cpuset.C // 4. Acquire single threads, preferring to fill partially-allocated cores // on the same sockets as the whole cores we have already taken in this // allocation. + acc.takeRemainingCPUsForResize() + if acc.isSatisfied() { + return acc.result, nil + } acc.takeRemainingCPUs() if acc.isSatisfied() { return acc.result, nil @@ -946,7 +1280,7 @@ func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpu } // Otherwise build an accumulator to start allocating CPUs from. - acc := newCPUAccumulator(topo, availableCPUs, numCPUs, cpuSortingStrategy, reusableCPUsForResize, mustKeepCPUsForScaleDown) + acc := newCPUAccumulator(topo, availableCPUs, numCPUs, cpuSortingStrategy, nil, mustKeepCPUsForScaleDown) if acc.isSatisfied() { return acc.result, nil } @@ -955,11 +1289,23 @@ func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpu } // Get the list of NUMA nodes represented by the set of CPUs in 'availableCPUs'. numas := acc.sortAvailableNUMANodes() + reusableCPUsForResizeDetail := acc.topo.CPUDetails.KeepOnly(cpuset.New()) + allocatedCPUsNumber := 0 + if reusableCPUsForResize != nil { + reusableCPUsForResizeDetail = acc.topo.CPUDetails.KeepOnly(*reusableCPUsForResize) + allocatedCPUsNumber = reusableCPUsForResize.Size() + } + allocatedNumas := reusableCPUsForResizeDetail.NUMANodes() + allocatedCPUPerNuma := make(mapIntInt, len(numas)) + for _, numa := range numas { + allocatedCPUPerNuma[numa] = reusableCPUsForResizeDetail.CPUsInNUMANodes(numa).Size() + } // Calculate the minimum and maximum possible number of NUMA nodes that // could satisfy this request. This is used to optimize how many iterations // of the loop we need to go through below. minNUMAs, maxNUMAs := acc.rangeNUMANodesNeededToSatisfy(cpuGroupSize) + minNUMAs = max(minNUMAs, allocatedNumas.Size()) // Try combinations of 1,2,3,... NUMA nodes until we find a combination // where we can evenly distribute CPUs across them. To optimize things, we @@ -979,10 +1325,16 @@ func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpu return Break } + // Check if the 'allocatedNumas' CPU set is a subset of the 'comboSet' + comboSet := cpuset.New(combo...) + if !allocatedNumas.IsSubsetOf(comboSet) { + return Continue + } + // Check that this combination of NUMA nodes has enough CPUs to // satisfy the allocation overall. cpus := acc.details.CPUsInNUMANodes(combo...) - if cpus.Size() < numCPUs { + if (cpus.Size() + allocatedCPUsNumber) < numCPUs { return Continue } @@ -990,7 +1342,7 @@ func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpu // 'cpuGroupSize' across the NUMA nodes in this combo. numCPUGroups := 0 for _, numa := range combo { - numCPUGroups += (acc.details.CPUsInNUMANodes(numa).Size() / cpuGroupSize) + numCPUGroups += ((acc.details.CPUsInNUMANodes(numa).Size() + allocatedCPUPerNuma[numa]) / cpuGroupSize) } if (numCPUGroups * cpuGroupSize) < numCPUs { return Continue @@ -1002,7 +1354,10 @@ func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpu distribution := (numCPUs / len(combo) / cpuGroupSize) * cpuGroupSize for _, numa := range combo { cpus := acc.details.CPUsInNUMANodes(numa) - if cpus.Size() < distribution { + if (cpus.Size() + allocatedCPUPerNuma[numa]) < distribution { + return Continue + } + if allocatedCPUPerNuma[numa] > distribution { return Continue } } @@ -1017,7 +1372,7 @@ func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpu availableAfterAllocation[numa] = acc.details.CPUsInNUMANodes(numa).Size() } for _, numa := range combo { - availableAfterAllocation[numa] -= distribution + availableAfterAllocation[numa] -= (distribution - allocatedCPUPerNuma[numa]) } // Check if there are any remaining CPUs to distribute across the @@ -1124,7 +1479,8 @@ func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpu // size 'cpuGroupSize' from 'bestCombo'. distribution := (numCPUs / len(bestCombo) / cpuGroupSize) * cpuGroupSize for _, numa := range bestCombo { - cpus, _ := takeByTopologyNUMAPacked(acc.topo, acc.details.CPUsInNUMANodes(numa), distribution, cpuSortingStrategy, false, reusableCPUsForResize, mustKeepCPUsForScaleDown) + reusableCPUsPerNumaForResize := reusableCPUsForResizeDetail.CPUsInNUMANodes(numa) + cpus, _ := takeByTopologyNUMAPacked(acc.topo, acc.details.CPUsInNUMANodes(numa), distribution, cpuSortingStrategy, false, &reusableCPUsPerNumaForResize, mustKeepCPUsForScaleDown) acc.take(cpus) } @@ -1139,7 +1495,7 @@ func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpu if acc.details.CPUsInNUMANodes(numa).Size() < cpuGroupSize { continue } - cpus, _ := takeByTopologyNUMAPacked(acc.topo, acc.details.CPUsInNUMANodes(numa), cpuGroupSize, cpuSortingStrategy, false, reusableCPUsForResize, mustKeepCPUsForScaleDown) + cpus, _ := takeByTopologyNUMAPacked(acc.topo, acc.details.CPUsInNUMANodes(numa), cpuGroupSize, cpuSortingStrategy, false, nil, mustKeepCPUsForScaleDown) acc.take(cpus) remainder -= cpuGroupSize } @@ -1164,4 +1520,4 @@ func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpu // If we never found a combination of NUMA nodes that we could properly // distribute CPUs across, fall back to the packing algorithm. return takeByTopologyNUMAPacked(topo, availableCPUs, numCPUs, cpuSortingStrategy, false, reusableCPUsForResize, mustKeepCPUsForScaleDown) -} +} \ No newline at end of file diff --git a/pkg/kubelet/cm/cpumanager/cpu_assignment_test.go b/pkg/kubelet/cm/cpumanager/cpu_assignment_test.go index 080961402e55c..559e6b1892c22 100644 --- a/pkg/kubelet/cm/cpumanager/cpu_assignment_test.go +++ b/pkg/kubelet/cm/cpumanager/cpu_assignment_test.go @@ -1070,6 +1070,473 @@ func TestTakeByTopologyNUMADistributed(t *testing.T) { } } +type takeByTopologyTestCaseForResize struct { + description string + topo *topology.CPUTopology + opts StaticPolicyOptions + availableCPUs cpuset.CPUSet + reusableCPUs cpuset.CPUSet + numCPUs int + expErr string + expResult cpuset.CPUSet +} + +func commonTakeByTopologyTestCasesForResize(t *testing.T) []takeByTopologyTestCaseForResize { + return []takeByTopologyTestCaseForResize{ + { + "Allocated 1 CPUs, and take 1 cpus from single socket with HT", + topoSingleSocketHT, + StaticPolicyOptions{}, + mustParseCPUSet(t, "1-7"), + cpuset.New(0), + 1, + "", + cpuset.New(0), + }, + { + "Allocated 1 CPU, and take 2 cpu from single socket with HT", + topoSingleSocketHT, + StaticPolicyOptions{}, + mustParseCPUSet(t, "1-7"), + cpuset.New(0), + 2, + "", + cpuset.New(0, 4), + }, + { + "Allocated 1 CPU, and take 2 cpu from single socket with HT, some cpus are taken, no sibling CPU of allocated CPU", + topoSingleSocketHT, + StaticPolicyOptions{}, + mustParseCPUSet(t, "1,3,5,6,7"), + cpuset.New(0), + 2, + "", + cpuset.New(0, 6), + }, + { + "Allocated 1 CPU, and take 3 cpu from single socket with HT, some cpus are taken, no sibling CPU of allocated CPU", + topoSingleSocketHT, + StaticPolicyOptions{}, + mustParseCPUSet(t, "1,3,5,6,7"), + cpuset.New(0), + 3, + "", + cpuset.New(0, 1, 5), + }, + { + "Allocated 1 CPU, and take all cpu from single socket with HT", + topoSingleSocketHT, + StaticPolicyOptions{}, + mustParseCPUSet(t, "1-7"), + cpuset.New(0), + 8, + "", + mustParseCPUSet(t, "0-7"), + }, + { + "Allocated 1 CPU, take a core from dual socket with HT", + topoDualSocketHT, + StaticPolicyOptions{}, + mustParseCPUSet(t, "0-10"), + cpuset.New(11), + 2, + "", + cpuset.New(5, 11), + }, + { + "Allocated 1 CPU, take a socket of cpus from dual socket with HT", + topoDualSocketHT, + StaticPolicyOptions{}, + mustParseCPUSet(t, "0-10"), + cpuset.New(11), + 6, + "", + cpuset.New(1, 3, 5, 7, 9, 11), + }, + { + "Allocated 1 CPU, take a socket of cpus and 1 core of CPU from dual socket with HT", + topoDualSocketHT, + StaticPolicyOptions{}, + mustParseCPUSet(t, "0-10"), + cpuset.New(11), + 8, + "", + cpuset.New(0, 1, 3, 5, 6, 7, 9, 11), + }, + { + "Allocated 1 CPU, take a socket of cpus from dual socket with multi-numa-per-socket with HT", + topoDualSocketMultiNumaPerSocketHT, + StaticPolicyOptions{}, + mustParseCPUSet(t, "0-38,40-79"), + cpuset.New(39), + 40, + "", + mustParseCPUSet(t, "20-39,60-79"), + }, + { + "Allocated 1 CPU, take a NUMA node of cpus from dual socket with multi-numa-per-socket with HT", + topoDualSocketMultiNumaPerSocketHT, + StaticPolicyOptions{}, + mustParseCPUSet(t, "0-38,40-79"), + cpuset.New(39), + 20, + "", + mustParseCPUSet(t, "30-39,70-79"), + }, + { + "Allocated 2 CPUs, take a socket and a NUMA node of cpus from dual socket with multi-numa-per-socket with HT", + topoDualSocketMultiNumaPerSocketHT, + StaticPolicyOptions{}, + mustParseCPUSet(t, "0-38,40-58,60-79"), + cpuset.New(39, 59), + 60, + "", + mustParseCPUSet(t, "0-19,30-59,70-79"), + }, + { + "Allocated 1 CPU, take NUMA nodes of cpus from dual socket with multi-numa-per-socket with HT, the NUMA node with allocated CPUs already taken some CPUs", + topoDualSocketMultiNumaPerSocketHT, + StaticPolicyOptions{}, + mustParseCPUSet(t, "0-38,40-69"), + cpuset.New(39), + 40, + "", + mustParseCPUSet(t, "0-9,20-29,39-48,60-69"), + }, + { + "Allocated 1 CPU, take NUMA nodes of cpus from dual socket with multi-numa-per-socket with HT, the NUMA node with allocated CPUs already taken more CPUs", + topoDualSocketMultiNumaPerSocketHT, + StaticPolicyOptions{}, + mustParseCPUSet(t, "9,30-38,49"), + cpuset.New(), + 1, + "", + mustParseCPUSet(t, "9"), + }, + { + "Allocated 1 CPU, take NUMA nodes of cpus and 1 CPU from dual socket with multi-numa-per-socket with HT, the NUMA node with allocated CPUs already taken some CPUs", + topoDualSocketMultiNumaPerSocketHT, + StaticPolicyOptions{}, + mustParseCPUSet(t, "0-38,40-69"), + cpuset.New(39), + 41, + "", + mustParseCPUSet(t, "0-19,39-59"), + }, + { + "Allocated 1 CPUs, take a socket of cpus from single socket with HT, 3 cpus", + topoSingleSocketHT, + StaticPolicyOptions{DistributeCPUsAcrossCores: true}, + mustParseCPUSet(t, "0-6"), + cpuset.New(7), + 3, + "", + mustParseCPUSet(t, "0,1,7"), + }, + { + "Allocated 1 CPUs, take a socket of cpus from dual socket with HT, 3 cpus", + topoDualSocketHT, + StaticPolicyOptions{DistributeCPUsAcrossCores: true}, + mustParseCPUSet(t, "0-10"), + cpuset.New(11), + 3, + "", + mustParseCPUSet(t, "1,3,11"), + }, + { + "Allocated 1 CPUs, take a socket of cpus from dual socket with HT, 6 cpus", + topoDualSocketHT, + StaticPolicyOptions{DistributeCPUsAcrossCores: true}, + mustParseCPUSet(t, "0-10"), + cpuset.New(11), + 6, + "", + mustParseCPUSet(t, "1,3,5,7,9,11"), + }, + { + "Allocated 1 CPUs, take a socket of cpus from dual socket with HT, 8 cpus", + topoDualSocketHT, + StaticPolicyOptions{DistributeCPUsAcrossCores: true}, + mustParseCPUSet(t, "0-10"), + cpuset.New(11), + 8, + "", + mustParseCPUSet(t, "0,1,2,3,5,7,9,11"), + }, + { + "Allocated 1 CPUs, take a socket of cpus from dual socket without HT, 2 cpus", + topoDualSocketNoHT, + StaticPolicyOptions{DistributeCPUsAcrossCores: true}, + mustParseCPUSet(t, "0-6"), + cpuset.New(7), + 2, + "", + mustParseCPUSet(t, "4,7"), + }, + { + "Allocated 1 CPUs, take a socket of cpus from dual socket with multi numa per socket and HT, 8 cpus", + topoDualSocketMultiNumaPerSocketHT, + StaticPolicyOptions{DistributeCPUsAcrossCores: true}, + mustParseCPUSet(t, "0-38,40-79"), + cpuset.New(39), + 8, + "", + mustParseCPUSet(t, "20-26,39"), + }, + { + "Allocated 1 CPU, take NUMA nodes of cpus from dual socket with multi-numa-per-socket with HT, the NUMA node with allocated CPUs already taken some CPUs", + topoDualSocketMultiNumaPerSocketHT, + StaticPolicyOptions{DistributeCPUsAcrossCores: true}, + mustParseCPUSet(t, "0-38,40-69"), + cpuset.New(39), + 40, + "", + mustParseCPUSet(t, "0-9,20-39,60-69"), + }, + { + "Allocated 1 CPUs, take a socket of cpus from quad socket four way with HT, 12 cpus", + topoQuadSocketFourWayHT, + StaticPolicyOptions{DistributeCPUsAcrossCores: true}, + mustParseCPUSet(t, "0-59,61-287"), + cpuset.New(60), + 8, + "", + mustParseCPUSet(t, "3,4,11,12,15,16,23,60"), + }, + } +} + +func TestTakeByTopologyNUMAPackedForResize(t *testing.T) { + testCases := commonTakeByTopologyTestCasesForResize(t) + + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + strategy := CPUSortingStrategyPacked + if tc.opts.DistributeCPUsAcrossCores { + strategy = CPUSortingStrategySpread + } + + result, err := takeByTopologyNUMAPacked(tc.topo, tc.availableCPUs, tc.numCPUs, strategy, tc.opts.PreferAlignByUncoreCacheOption, &tc.reusableCPUs, nil) + + if tc.expErr != "" && err != nil && err.Error() != tc.expErr { + t.Errorf("expected error to be [%v] but it was [%v]", tc.expErr, err) + } + if !result.Equals(tc.expResult) { + t.Errorf("expected result [%s] to equal [%s]", result, tc.expResult) + } + }) + } +} + +type takeByTopologyExtendedTestCaseForResize struct { + description string + topo *topology.CPUTopology + availableCPUs cpuset.CPUSet + reusableCPUs cpuset.CPUSet + numCPUs int + cpuGroupSize int + expErr string + expResult cpuset.CPUSet +} + +func commonTakeByTopologyExtendedTestCasesForResize(t *testing.T) []takeByTopologyExtendedTestCaseForResize { + return []takeByTopologyExtendedTestCaseForResize{ + { + "Allocated 1 CPUs, allocate 4 full cores with 2 distributed across each NUMA node", + topoDualSocketHT, + mustParseCPUSet(t, "0-10"), + cpuset.New(11), + 8, + 1, + "", + mustParseCPUSet(t, "0,6,2,8,1,7,5,11"), + }, + { + "Allocated 8 CPUs, allocate 32 full cores with 8 distributed across each NUMA node", + topoDualSocketMultiNumaPerSocketHT, + mustParseCPUSet(t, "0-35,40-75"), + mustParseCPUSet(t, "36-39,76-79"), + 64, + 1, + "", + mustParseCPUSet(t, "0-7,10-17,20-27,30-33,36-39,40-47,50-57,60-67,70-73,76-79"), + }, + { + "Allocated 2 CPUs, allocate 8 full cores with 2 distributed across each NUMA node", + topoDualSocketMultiNumaPerSocketHT, + mustParseCPUSet(t, "2,10-12,20-22,30-32,40-41,50-51,60-61,70-71"), + mustParseCPUSet(t, "0,1"), + 16, + 1, + "", + mustParseCPUSet(t, "0-1,10-11,20-21,30-31,40-41,50-51,60-61,70-71"), + }, + { + "Allocated 1 CPUs, take 1 cpu from dual socket with HT - core from Socket 0", + topoDualSocketHT, + mustParseCPUSet(t, "0-10"), + mustParseCPUSet(t, "11"), + 1, + 1, + "", + mustParseCPUSet(t, "11"), + }, + { + "Allocated 1 CPUs, take 2 cpu from dual socket with HT - core from Socket 0", + topoDualSocketHT, + mustParseCPUSet(t, "0-10"), + mustParseCPUSet(t, "11"), + 2, + 1, + "", + mustParseCPUSet(t, "5,11"), + }, + { + "Allocated 2 CPUs, allocate 31 full cores with 15 CPUs distributed across each NUMA node and 1 CPU spilling over to each of NUMA 0, 1", + topoDualSocketMultiNumaPerSocketHT, + mustParseCPUSet(t, "2-79"), + mustParseCPUSet(t, "0,1"), + 62, + 1, + "", + mustParseCPUSet(t, "0-7,10-17,20-27,30-37,40-47,50-57,60-66,70-76"), + }, + { + "Allocated 2 CPUs, allocate 31 full cores with 14 CPUs distributed across each NUMA node and 2 CPUs spilling over to each of NUMA 0, 1, 2 (cpuGroupSize 2)", + topoDualSocketMultiNumaPerSocketHT, + mustParseCPUSet(t, "2-79"), + mustParseCPUSet(t, "0,1"), + 62, + 2, + "", + mustParseCPUSet(t, "0-7,10-17,20-27,30-36,40-47,50-57,60-67,70-76"), + }, + { + "Allocated 2 CPUs, allocate 31 full cores with 15 CPUs distributed across each NUMA node and 1 CPU spilling over to each of NUMA 2, 3 (to keep balance)", + topoDualSocketMultiNumaPerSocketHT, + mustParseCPUSet(t, "2-8,10-18,20-39,40-48,50-58,60-79"), + mustParseCPUSet(t, "0,1"), + 62, + 1, + "", + mustParseCPUSet(t, "0-7,10-17,20-27,30-37,40-46,50-56,60-67,70-77"), + }, + { + "Allocated 2 CPUs, allocate 31 full cores with 14 CPUs distributed across each NUMA node and 2 CPUs spilling over to each of NUMA 0, 2, 3 (to keep balance with cpuGroupSize 2)", + topoDualSocketMultiNumaPerSocketHT, + mustParseCPUSet(t, "2-8,10-18,20-39,40-48,50-58,60-79"), + mustParseCPUSet(t, "0,1"), + 62, + 2, + "", + mustParseCPUSet(t, "0-7,10-16,20-27,30-37,40-47,50-56,60-67,70-77"), + }, + { + "Allocated 4 CPUs, ensure bestRemainder chosen with NUMA nodes that have enough CPUs to satisfy the request", + topoDualSocketMultiNumaPerSocketHT, + mustParseCPUSet(t, "10-13,20-23,30-36,40-43,50-53,60-63,70-76"), + mustParseCPUSet(t, "0-3"), + 34, + 1, + "", + mustParseCPUSet(t, "0-3,10-13,20-23,30-34,40-43,50-53,60-63,70-74"), + }, + { + "Allocated 4 CPUs, ensure previous failure encountered on live machine has been fixed (1/1)", + topoDualSocketMultiNumaPerSocketHTLarge, + mustParseCPUSet(t, "0,128,30,31,158,159,47,171-175,62,63,190,191,75-79,203-207,94,96,222,223,101-111,229-239,126,127,254,255"), + mustParseCPUSet(t, "43-46"), + 28, + 1, + "", + mustParseCPUSet(t, "43-47,75-79,96,101-105,171-174,203-206,229-232"), + }, + { + "Allocated 14 CPUs, allocate 24 full cores with 8 distributed across the first 3 NUMA nodes", + topoDualSocketMultiNumaPerSocketHT, + mustParseCPUSet(t, "8-39,48-79"), + mustParseCPUSet(t, "0-7,40-47"), + 48, + 1, + "", + mustParseCPUSet(t, "0-7,10-17,20-27,40-47,50-57,60-67"), + }, + { + "Allocated 20 CPUs, allocated CPUs in numa0 is bigger than distribute CPUs, allocated CPUs by takeByTopologyNUMAPacked", + topoDualSocketMultiNumaPerSocketHT, + mustParseCPUSet(t, "10-39,50-79"), + mustParseCPUSet(t, "0-9,40-49"), + 48, + 1, + "", + mustParseCPUSet(t, "0-23,40-63"), + }, + { + "Allocated 12 CPUs, allocate 24 full cores with 8 distributed across the first 3 NUMA nodes (taking all but 2 from the first NUMA node)", + topoDualSocketMultiNumaPerSocketHT, + mustParseCPUSet(t, "8-29,32-39,48-69,72-79"), + mustParseCPUSet(t, "1-7,41-47"), + 48, + 1, + "", + mustParseCPUSet(t, "1-8,10-17,20-27,41-48,50-57,60-67"), + }, + { + "Allocated 10 CPUs, allocate 24 full cores with 8 distributed across the first 3 NUMA nodes (even though all 8 could be allocated from the first NUMA node)", + topoDualSocketMultiNumaPerSocketHT, + mustParseCPUSet(t, "2-29,31-39,42-69,71-79"), + mustParseCPUSet(t, "2-7,42-47"), + 48, + 1, + "", + mustParseCPUSet(t, "2-9,10-17,20-27,42-49,50-57,60-67"), + }, + { + "Allocated 2 CPUs, allocate 13 full cores distributed across the 2 NUMA nodes", + topoDualSocketMultiNumaPerSocketHT, + mustParseCPUSet(t, "0-29,31-69,71-79"), + mustParseCPUSet(t, "30,70"), + 26, + 1, + "", + mustParseCPUSet(t, "20-26,30-36,60-65,70-75"), + }, + { + "Allocated 2 CPUs, allocate 13 full cores distributed across the 2 NUMA nodes (cpuGroupSize 2)", + topoDualSocketMultiNumaPerSocketHT, + mustParseCPUSet(t, "0-29,31-69,71-79"), + mustParseCPUSet(t, "30,70"), + 26, + 2, + "", + mustParseCPUSet(t, "20-25,30-36,60-65,70-76"), + }, + } +} + +func TestTakeByTopologyNUMADistributedForResize(t *testing.T) { + testCases := commonTakeByTopologyExtendedTestCasesForResize(t) + + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + + result, err := takeByTopologyNUMADistributed(tc.topo, tc.availableCPUs, tc.numCPUs, tc.cpuGroupSize, CPUSortingStrategyPacked, &tc.reusableCPUs, nil) + if err != nil { + if tc.expErr == "" { + t.Errorf("unexpected error [%v]", err) + } + if tc.expErr != "" && err.Error() != tc.expErr { + t.Errorf("expected error to be [%v] but it was [%v]", tc.expErr, err) + } + return + } + if !result.Equals(tc.expResult) { + t.Errorf("expected result [%s] to equal [%s]", result, tc.expResult) + } + }) + } +} + func mustParseCPUSet(t *testing.T, s string) cpuset.CPUSet { cpus, err := cpuset.Parse(s) if err != nil { diff --git a/pkg/kubelet/cm/cpumanager/policy_static.go b/pkg/kubelet/cm/cpumanager/policy_static.go index 6ea26a669649c..0b9f9fb578af9 100644 --- a/pkg/kubelet/cm/cpumanager/policy_static.go +++ b/pkg/kubelet/cm/cpumanager/policy_static.go @@ -450,7 +450,8 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai klog.InfoS("Topology Affinity", "pod", klog.KObj(pod), "containerName", container.Name, "affinity", hint) // Attempt new allocation ( reusing allocated CPUs ) according to the NUMA affinity contained in the hint // Since NUMA affinity container in the hint is unmutable already allocated CPUs pass the criteria - newallocatedcpuset, err := p.allocateCPUs(s, numCPUs, hint.NUMANodeAffinity, p.cpusToReuse[string(pod.UID)], &cpusInUseByPodContainerToResize, nil) + mustKeepCPUsForResize := p.GetMustKeepCPUs(container, cpuset) + newallocatedcpuset, err := p.allocateCPUs(s, numCPUs, hint.NUMANodeAffinity, p.cpusToReuse[string(pod.UID)], &cpusInUseByPodContainerToResize, mustKeepCPUsForResize) if err != nil { klog.ErrorS(err, "Static policy: Unable to allocate new CPUs", "pod", klog.KObj(pod), "containerName", container.Name, "numCPUs", numCPUs) return err @@ -502,6 +503,34 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai return nil } +func (p *staticPolicy) GetMustKeepCPUs(container *v1.Container, oldCpuset cpuset.CPUSet) *cpuset.CPUSet { + mustKeepCPUs := cpuset.New() + for _, envVar := range container.Env { + if envVar.Name == "mustKeepCPUs" { + mustKeepCPUsInEnv, err := cpuset.Parse(envVar.Value) + if err == nil && mustKeepCPUsInEnv.Size() != 0 { + mustKeepCPUs = oldCpuset.Intersection(mustKeepCPUsInEnv) + } + klog.InfoS("mustKeepCPUs ", "is", mustKeepCPUs) + if p.options.FullPhysicalCPUsOnly { + // mustKeepCPUs must be aligned to the physical core + if (mustKeepCPUs.Size() % 2) != 0 { + return nil + } + mustKeepCPUsDetail := p.topology.CPUDetails.KeepOnly(mustKeepCPUs) + mustKeepCPUsDetailCores := mustKeepCPUsDetail.Cores() + if (mustKeepCPUs.Size() / mustKeepCPUsDetailCores.Size()) != p.cpuGroupSize { + klog.InfoS("mustKeepCPUs is nil") + return nil + } + } + return &mustKeepCPUs + } + } + klog.InfoS("mustKeepCPUs is nil") + return nil +} + // getAssignedCPUsOfSiblings returns assigned cpus of given container's siblings(all containers other than the given container) in the given pod `podUID`. func getAssignedCPUsOfSiblings(s state.State, podUID string, containerName string) cpuset.CPUSet { assignments := s.GetCPUAssignments() diff --git a/pkg/registry/core/pod/strategy.go b/pkg/registry/core/pod/strategy.go index ccdf35e76f47b..ac5fe4b79f433 100644 --- a/pkg/registry/core/pod/strategy.go +++ b/pkg/registry/core/pod/strategy.go @@ -395,6 +395,7 @@ func dropNonResizeUpdatesForContainers(new, old []api.Container) []api.Container } oldCopyWithMergedResources[i].Resources = ctr.Resources oldCopyWithMergedResources[i].ResizePolicy = ctr.ResizePolicy + oldCopyWithMergedResources[i].Env = ctr.Env } return oldCopyWithMergedResources diff --git a/test/e2e/common/node/framework/podresize/resize.go b/test/e2e/common/node/framework/podresize/resize.go index 56d44c89fc3d6..7312381ab7971 100644 --- a/test/e2e/common/node/framework/podresize/resize.go +++ b/test/e2e/common/node/framework/podresize/resize.go @@ -61,6 +61,7 @@ type ResizableContainerInfo struct { RestartPolicy v1.ContainerRestartPolicy InitCtr bool CPUsAllowedListValue string + CPUsAllowedList string } func getTestResizePolicy(tcInfo ResizableContainerInfo) (resizePol []v1.ContainerResizePolicy) { @@ -406,11 +407,11 @@ func formatErrors(err error) error { func VerifyPodContainersCPUsAllowedListValue(f *framework.Framework, pod *v1.Pod, wantCtrs []ResizableContainerInfo) error { ginkgo.GinkgoHelper() - verifyCPUsAllowedListValue := func(cName, expectedCPUsAllowedListValue string) error { + verifyCPUsAllowedListValue := func(cName, expectedCPUsAllowedListValue string, expectedCPUsAllowedList string) error { mycmd := "grep Cpus_allowed_list /proc/self/status | cut -f2" calValue, _, err := e2epod.ExecCommandInContainerWithFullOutput(f, pod.Name, cName, "/bin/sh", "-c", mycmd) framework.Logf("Namespace %s Pod %s Container %s - looking for Cpus allowed list value %s in /proc/self/status", - pod.Namespace, pod.Name, cName, expectedCPUsAllowedListValue) + pod.Namespace, pod.Name, cName, calValue) if err != nil { return fmt.Errorf("failed to find expected value '%s' in container '%s' Cpus allowed list '/proc/self/status'", cName, expectedCPUsAllowedListValue) } @@ -420,13 +421,20 @@ func VerifyPodContainersCPUsAllowedListValue(f *framework.Framework, pod *v1.Pod if cpuTotalValue != expectedCPUsAllowedListValue { return fmt.Errorf("container '%s' cgroup value '%s' results to total CPUs '%s' not equal to expected '%s'", cName, calValue, cpuTotalValue, expectedCPUsAllowedListValue) } + if expectedCPUsAllowedList != "" { + cExpected, err := cpuset.Parse(expectedCPUsAllowedList) + framework.ExpectNoError(err, "failed parsing Cpus allowed list for cexpectedCPUset") + if !c.Equals(cExpected) { + return fmt.Errorf("container '%s' cgroup value '%s' results to total CPUs '%v' not equal to expected '%v'", cName, calValue, c, cExpected) + } + } return nil } for _, ci := range wantCtrs { if ci.CPUsAllowedListValue == "" { continue } - err := verifyCPUsAllowedListValue(ci.Name, ci.CPUsAllowedListValue) + err := verifyCPUsAllowedListValue(ci.Name, ci.CPUsAllowedListValue, ci.CPUsAllowedList) if err != nil { return err } diff --git a/test/e2e_node/pod_resize_test.go b/test/e2e_node/pod_resize_test.go index 4b2ad1144bd05..423c76c2d7928 100644 --- a/test/e2e_node/pod_resize_test.go +++ b/test/e2e_node/pod_resize_test.go @@ -20,6 +20,7 @@ import ( "context" "encoding/json" "fmt" + "strings" "strconv" "time" @@ -1735,3 +1736,736 @@ var _ = SIGDescribe("Pod InPlace Resize Container", framework.WithSerial(), func }*/ }) + +func doPodResizeExtendTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalScalingAllocatedStatusEnabled bool, isInPlacePodVerticalScalingExclusiveCPUsEnabled bool) { + f := framework.NewDefaultFramework("pod-resize-test") + f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged + var podClient *e2epod.PodClient + var oldCfg *kubeletconfig.KubeletConfiguration + ginkgo.BeforeEach(func(ctx context.Context) { + var err error + node := getLocalNode(ctx, f) + if framework.NodeOSDistroIs("windows") || e2enode.IsARM64(node) { + e2eskipper.Skipf("runtime does not support InPlacePodVerticalScaling -- skipping") + } + if isMultiNUMA() { + e2eskipper.Skipf("For simple test, only test one NUMA, multi NUMA -- skipping") + } + podClient = e2epod.NewPodClient(f) + if oldCfg == nil { + oldCfg, err = getCurrentKubeletConfig(ctx) + framework.ExpectNoError(err) + } + }) + + type testCase struct { + name string + containers []e2epod.ResizableContainerInfo + patchString string + expected []e2epod.ResizableContainerInfo + addExtendedResource bool + skipFlag bool + } + + setCPUsForTestCase := func(ctx context.Context, tests *testCase, fullPCPUsOnly string) { + cpuCap, _, _ := getLocalNodeCPUDetails(ctx, f) + firstContainerCpuset := cpuset.New() + firstAdditionCpuset := cpuset.New() + firstExpectedCpuset := cpuset.New() + secondContainerCpuset := cpuset.New() + secondAdditionCpuset := cpuset.New() + secondExpectedCpuset := cpuset.New() + + if tests.name == "1 Guaranteed QoS pod, one container - increase CPU & memory, FullPCPUsOnlyOption = false" { + if cpuCap < 2 { + tests.skipFlag = true + } + firstContainerCpuset = cpuset.New(1) + if isHTEnabled() { + cpuList := mustParseCPUSet(getCPUSiblingList(0)).List() + firstContainerCpuset = cpuset.New(cpuList[1]) + } + tests.containers[0].CPUsAllowedList = firstContainerCpuset.String() + + firstAdditionCpuset = cpuset.New(2) + if isHTEnabled() { + cpuList := mustParseCPUSet(getCPUSiblingList(1)).List() + firstAdditionCpuset = cpuset.New(cpuList[0]) + } + firstExpectedCpuset = firstAdditionCpuset.Union(firstContainerCpuset) + tests.expected[0].CPUsAllowedList = firstExpectedCpuset.String() + } else if tests.name == "1 Guaranteed QoS pod, two containers - increase CPU & memory, FullPCPUsOnlyOption = false" { + if cpuCap < 4 { + tests.skipFlag = true + } + firstContainerCpuset = cpuset.New(1) + if isHTEnabled() { + cpuList := mustParseCPUSet(getCPUSiblingList(0)).List() + firstContainerCpuset = cpuset.New(cpuList[1]) + } + tests.containers[0].CPUsAllowedList = firstContainerCpuset.String() + + secondContainerCpuset = cpuset.New(1) + if isHTEnabled() { + cpuList := mustParseCPUSet(getCPUSiblingList(1)).List() + secondContainerCpuset = cpuset.New(cpuList[0]) + } + tests.containers[1].CPUsAllowedList = secondContainerCpuset.String() + + firstAdditionCpuset = cpuset.New(2) + if isHTEnabled() { + cpuList := mustParseCPUSet(getCPUSiblingList(1)).List() + firstAdditionCpuset = cpuset.New(cpuList[1]) + } + firstExpectedCpuset = firstAdditionCpuset.Union(firstContainerCpuset) + tests.expected[0].CPUsAllowedList = firstExpectedCpuset.String() + + secondAdditionCpuset = cpuset.New(2) + if isHTEnabled() { + cpuList := mustParseCPUSet(getCPUSiblingList(2)).List() + secondAdditionCpuset = cpuset.New(cpuList[0]) + } + secondExpectedCpuset = secondAdditionCpuset.Union(secondContainerCpuset) + tests.expected[1].CPUsAllowedList = secondExpectedCpuset.String() + } else if (tests.name == "1 Guaranteed QoS pod, one container - decrease CPU & memory, FullPCPUsOnlyOption = false") || (tests.name == "1 Guaranteed QoS pod, one container - decrease CPU & memory with mustKeepCPUs, FullPCPUsOnlyOption = false") { + if cpuCap < 2 { + tests.skipFlag = true + } + firstContainerCpuset = cpuset.New(2, 3) + if isHTEnabled() { + cpuList := mustParseCPUSet(getCPUSiblingList(0)).List() + if cpuList[1] != 1 { + firstContainerCpuset = mustParseCPUSet(getCPUSiblingList(1)) + } + } + tests.containers[0].CPUsAllowedList = firstContainerCpuset.String() + + firstExpectedCpuset = cpuset.New(firstContainerCpuset.List()[0]) + tests.expected[0].CPUsAllowedList = firstExpectedCpuset.String() + if tests.name == "1 Guaranteed QoS pod, one container - decrease CPU & memory with mustKeepCPUs, FullPCPUsOnlyOption = false" { + startIndex := strings.Index(tests.patchString, `"mustKeepCPUs","value": "`) + len(`"mustKeepCPUs","value": "`) + endIndex := strings.Index(tests.patchString[startIndex:], `"`) + startIndex + tests.expected[0].CPUsAllowedList = tests.patchString[startIndex:endIndex] + ginkgo.By(fmt.Sprintf("startIndex:%d, endIndex:%d", startIndex, endIndex)) + } + } else if (tests.name == "1 Guaranteed QoS pod, one container - decrease CPU & memory, FullPCPUsOnlyOption = true") || (tests.name == "1 Guaranteed QoS pod, one container - decrease CPU with wrong mustKeepCPU, FullPCPUsOnlyOption = ture") || (tests.name == "1 Guaranteed QoS pod, one container - decrease CPU & memory with correct mustKeepCPU, FullPCPUsOnlyOption = true") { + if cpuCap < 4 { + tests.skipFlag = true + } + firstContainerCpuset = cpuset.New(2, 3, 4, 5) + if isHTEnabled() { + cpuList := mustParseCPUSet(getCPUSiblingList(0)).List() + if cpuList[1] != 1 { + firstContainerCpuset = mustParseCPUSet(getCPUSiblingList(1)) + firstContainerCpuset = firstContainerCpuset.Union(mustParseCPUSet(getCPUSiblingList(2))) + } + } + tests.containers[0].CPUsAllowedList = firstContainerCpuset.String() + + firstExpectedCpuset = mustParseCPUSet(getCPUSiblingList(1)) + tests.expected[0].CPUsAllowedList = firstExpectedCpuset.String() + if tests.name == "1 Guaranteed QoS pod, one container - decrease CPU & memory with correct mustKeepCPU, FullPCPUsOnlyOption = true" { + startIndex := strings.Index(tests.patchString, `"mustKeepCPUs","value": "`) + len(`"mustKeepCPUs","value": "`) + endIndex := strings.Index(tests.patchString[startIndex:], `"`) + startIndex + tests.expected[0].CPUsAllowedList = tests.patchString[startIndex:endIndex] + ginkgo.By(fmt.Sprintf("startIndex:%d, endIndex:%d", startIndex, endIndex)) + } + } + + ginkgo.By(fmt.Sprintf("firstContainerCpuset:%v, firstAdditionCpuset:%v, firstExpectedCpuset:%v", firstContainerCpuset, firstAdditionCpuset, firstExpectedCpuset)) + ginkgo.By(fmt.Sprintf("secondContainerCpuset:%v, secondAdditionCpuset:%v, secondExpectedCpuset:%v", secondContainerCpuset, secondAdditionCpuset, secondExpectedCpuset)) + } + + noRestart := v1.NotRequired + testsWithFalseFullCPUs := []testCase{ + { + name: "1 Guaranteed QoS pod, one container - increase CPU & memory, FullPCPUsOnlyOption = false", + containers: []e2epod.ResizableContainerInfo{ + { + Name: "c1", + Resources: &e2epod.ContainerResources{CPUReq: "1", CPULim: "1", MemReq: "200Mi", MemLim: "200Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "1", + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"2","memory":"400Mi"},"limits":{"cpu":"2","memory":"400Mi"}}} + ]}}`, + expected: []e2epod.ResizableContainerInfo{ + { + Name: "c1", + Resources: &e2epod.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "400Mi", MemLim: "400Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "2", + }, + }, + }, + { + name: "1 Guaranteed QoS pod, two containers - increase CPU & memory, FullPCPUsOnlyOption = false", + containers: []e2epod.ResizableContainerInfo{ + { + Name: "c1", + Resources: &e2epod.ContainerResources{CPUReq: "1", CPULim: "1", MemReq: "200Mi", MemLim: "200Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "1", + }, + { + Name: "c2", + Resources: &e2epod.ContainerResources{CPUReq: "1", CPULim: "1", MemReq: "200Mi", MemLim: "200Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "1", + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"2","memory":"400Mi"},"limits":{"cpu":"2","memory":"400Mi"}}}, + {"name":"c2", "resources":{"requests":{"cpu":"2","memory":"400Mi"},"limits":{"cpu":"2","memory":"400Mi"}}} + ]}}`, + expected: []e2epod.ResizableContainerInfo{ + { + Name: "c1", + Resources: &e2epod.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "400Mi", MemLim: "400Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "2", + }, + { + Name: "c2", + Resources: &e2epod.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "400Mi", MemLim: "400Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "2", + }, + }, + }, + { + name: "1 Guaranteed QoS pod, one container - decrease CPU & memory, FullPCPUsOnlyOption = false", + containers: []e2epod.ResizableContainerInfo{ + { + Name: "c1", + Resources: &e2epod.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "400Mi", MemLim: "400Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "2", + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"1","memory":"200Mi"},"limits":{"cpu":"1","memory":"200Mi"}}} + ]}}`, + expected: []e2epod.ResizableContainerInfo{ + { + Name: "c1", + Resources: &e2epod.ContainerResources{CPUReq: "1", CPULim: "1", MemReq: "200Mi", MemLim: "200Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "1", + }, + }, + }, + { + name: "1 Guaranteed QoS pod, one container - decrease CPU & memory with mustKeepCPUs, FullPCPUsOnlyOption = false", + containers: []e2epod.ResizableContainerInfo{ + { + Name: "c1", + Resources: &e2epod.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "200Mi", MemLim: "200Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "2", + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "env":[{"name":"mustKeepCPUs","value": "11"}], "resources":{"requests":{"cpu":"1","memory":"400Mi"},"limits":{"cpu":"1","memory":"400Mi"}}} + ]}}`, + expected: []e2epod.ResizableContainerInfo{ + { + Name: "c1", + Resources: &e2epod.ContainerResources{CPUReq: "1", CPULim: "1", MemReq: "400Mi", MemLim: "400Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "1", + }, + }, + }, + } + + testsWithTrueFullCPUs := []testCase{ + { + name: "1 Guaranteed QoS pod, one container - decrease CPU & memory, FullPCPUsOnlyOption = true", + containers: []e2epod.ResizableContainerInfo{ + { + Name: "c1", + Resources: &e2epod.ContainerResources{CPUReq: "4", CPULim: "4", MemReq: "400Mi", MemLim: "400Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "4", + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"2","memory":"200Mi"},"limits":{"cpu":"2","memory":"200Mi"}}} + ]}}`, + expected: []e2epod.ResizableContainerInfo{ + { + Name: "c1", + Resources: &e2epod.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "200Mi", MemLim: "200Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "2", + }, + }, + }, + { + name: "1 Guaranteed QoS pod, one container - decrease CPU & memory with correct mustKeepCPU, FullPCPUsOnlyOption = true", + containers: []e2epod.ResizableContainerInfo{ + { + Name: "c1", + Resources: &e2epod.ContainerResources{CPUReq: "4", CPULim: "4", MemReq: "200Mi", MemLim: "200Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "4", + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "env":[{"name":"mustKeepCPUs","value": "2,12"}], "resources":{"requests":{"cpu":"2"},"limits":{"cpu":"2"}}} + ]}}`, + expected: []e2epod.ResizableContainerInfo{ + { + Name: "c1", + Resources: &e2epod.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "200Mi", MemLim: "200Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "2", + }, + }, + }, + // Abnormal case, CPUs in mustKeepCPUs not full PCPUs, the mustKeepCPUs will be ignored + { + name: "1 Guaranteed QoS pod, one container - decrease CPU with wrong mustKeepCPU, FullPCPUsOnlyOption = ture", + containers: []e2epod.ResizableContainerInfo{ + { + Name: "c1", + Resources: &e2epod.ContainerResources{CPUReq: "4", CPULim: "4", MemReq: "200Mi", MemLim: "200Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "4", + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "env":[{"name":"mustKeepCPUs","value": "1,2"}], "resources":{"requests":{"cpu":"2"},"limits":{"cpu":"2"}}} + ]}}`, + expected: []e2epod.ResizableContainerInfo{ + { + Name: "c1", + Resources: &e2epod.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "200Mi", MemLim: "200Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "2", + }, + }, + }, + } + + timeouts := framework.NewTimeoutContext() + + var tests []testCase + if policy.options[cpumanager.FullPCPUsOnlyOption] == "false" { + tests = testsWithFalseFullCPUs + } else if policy.options[cpumanager.FullPCPUsOnlyOption] == "true" { + tests = testsWithTrueFullCPUs + } + + for idx := range tests { + tc := tests[idx] + ginkgo.It(tc.name+policy.title+" (InPlacePodVerticalScalingAllocatedStatus="+strconv.FormatBool(isInPlacePodVerticalScalingAllocatedStatusEnabled)+", InPlacePodVerticalScalingExclusiveCPUs="+strconv.FormatBool(isInPlacePodVerticalScalingExclusiveCPUsEnabled)+")", func(ctx context.Context) { + cpuManagerPolicyKubeletConfig(ctx, f, oldCfg, policy.name, policy.options, isInPlacePodVerticalScalingAllocatedStatusEnabled, isInPlacePodVerticalScalingExclusiveCPUsEnabled) + + setCPUsForTestCase(ctx, &tc, policy.options[cpumanager.FullPCPUsOnlyOption]) + if tc.skipFlag { + e2eskipper.Skipf("Skipping CPU Manager tests since the CPU not enough") + } + + var testPod, patchedPod *v1.Pod + var pErr error + + tStamp := strconv.Itoa(time.Now().Nanosecond()) + testPod = e2epod.MakePodWithResizableContainers(f.Namespace.Name, "testpod", tStamp, tc.containers) + testPod.GenerateName = "resize-test-" + testPod = e2epod.MustMixinRestrictedPodSecurity(testPod) + + if tc.addExtendedResource { + nodes, err := e2enode.GetReadySchedulableNodes(context.Background(), f.ClientSet) + framework.ExpectNoError(err) + + for _, node := range nodes.Items { + addExtendedResource(f.ClientSet, node.Name, fakeExtendedResource, resource.MustParse("123")) + } + defer func() { + for _, node := range nodes.Items { + removeExtendedResource(f.ClientSet, node.Name, fakeExtendedResource) + } + }() + } + + ginkgo.By("creating pod") + newPod := podClient.CreateSync(ctx, testPod) + + ginkgo.By("verifying initial pod resources, allocations are as expected") + e2epod.VerifyPodResources(newPod, tc.containers) + ginkgo.By("verifying initial pod resize policy is as expected") + e2epod.VerifyPodResizePolicy(newPod, tc.containers) + + ginkgo.By("verifying initial pod status resources are as expected") + framework.ExpectNoError(e2epod.VerifyPodStatusResources(newPod, tc.containers)) + ginkgo.By("verifying initial cgroup config are as expected") + framework.ExpectNoError(e2epod.VerifyPodContainersCgroupValues(ctx, f, newPod, tc.containers)) + // TODO make this dynamic depending on Policy Name, Resources input and topology of target + // machine. + // For the moment skip below if CPU Manager Policy is set to none + if policy.name == string(cpumanager.PolicyStatic) { + ginkgo.By("verifying initial pod Cpus allowed list value") + gomega.Eventually(ctx, e2epod.VerifyPodContainersCPUsAllowedListValue, timeouts.PodStartShort, timeouts.Poll). + WithArguments(f, newPod, tc.containers). + Should(gomega.BeNil(), "failed to verify initial Pod CPUsAllowedListValue") + } + + patchAndVerify := func(patchString string, expectedContainers []e2epod.ResizableContainerInfo, initialContainers []e2epod.ResizableContainerInfo, opStr string, isRollback bool) { + ginkgo.By(fmt.Sprintf("patching pod for %s", opStr)) + patchedPod, pErr = f.ClientSet.CoreV1().Pods(newPod.Namespace).Patch(ctx, newPod.Name, + types.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, fmt.Sprintf("failed to patch pod for %s", opStr)) + + ginkgo.By(fmt.Sprintf("verifying pod patched for %s", opStr)) + e2epod.VerifyPodResources(patchedPod, expectedContainers) + + ginkgo.By(fmt.Sprintf("waiting for %s to be actuated", opStr)) + resizedPod := e2epod.WaitForPodResizeActuation(ctx, f, podClient, newPod) + e2epod.ExpectPodResized(ctx, f, resizedPod, expectedContainers) + + // Check cgroup values only for containerd versions before 1.6.9 + ginkgo.By(fmt.Sprintf("verifying pod container's cgroup values after %s", opStr)) + framework.ExpectNoError(e2epod.VerifyPodContainersCgroupValues(ctx, f, resizedPod, expectedContainers)) + + ginkgo.By(fmt.Sprintf("verifying pod resources after %s", opStr)) + e2epod.VerifyPodResources(resizedPod, expectedContainers) + + // TODO make this dynamic depending on Policy Name, Resources input and topology of target + // machine. + // For the moment skip below if CPU Manager Policy is set to none + if policy.name == string(cpumanager.PolicyStatic) { + ginkgo.By(fmt.Sprintf("verifying pod Cpus allowed list value after %s", opStr)) + if isInPlacePodVerticalScalingExclusiveCPUsEnabled { + gomega.Eventually(ctx, e2epod.VerifyPodContainersCPUsAllowedListValue, timeouts.PodStartShort, timeouts.Poll). + WithArguments(f, resizedPod, expectedContainers). + Should(gomega.BeNil(), "failed to verify Pod CPUsAllowedListValue for resizedPod with InPlacePodVerticalScalingExclusiveCPUs enabled") + } else { + gomega.Eventually(ctx, e2epod.VerifyPodContainersCPUsAllowedListValue, timeouts.PodStartShort, timeouts.Poll). + WithArguments(f, resizedPod, tc.containers). + Should(gomega.BeNil(), "failed to verify Pod CPUsAllowedListValue for resizedPod with InPlacePodVerticalScalingExclusiveCPUs disabled (default)") + } + } + } + + ginkgo.By("First patch") + patchAndVerify(tc.patchString, tc.expected, tc.containers, "resize", false) + + rbPatchStr, err := e2epod.ResizeContainerPatch(tc.containers) + framework.ExpectNoError(err) + // Resize has been actuated, test rollback + ginkgo.By("Second patch for rollback") + patchAndVerify(rbPatchStr, tc.containers, tc.expected, "rollback", true) + + ginkgo.By("deleting pod") + deletePodSyncByName(ctx, f, newPod.Name) + // we need to wait for all containers to really be gone so cpumanager reconcile loop will not rewrite the cpu_manager_state. + // this is in turn needed because we will have an unavoidable (in the current framework) race with the + // reconcile loop which will make our attempt to delete the state file and to restore the old config go haywire + waitForAllContainerRemoval(ctx, newPod.Name, newPod.Namespace) + }) + } + + ginkgo.AfterEach(func(ctx context.Context) { + if oldCfg != nil { + updateKubeletConfig(ctx, f, oldCfg, true) + } + }) + +} + +func doMultiPodResizeTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalScalingAllocatedStatusEnabled bool, isInPlacePodVerticalScalingExclusiveCPUsEnabled bool) { + f := framework.NewDefaultFramework("pod-resize-test") + f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged + var podClient *e2epod.PodClient + var oldCfg *kubeletconfig.KubeletConfiguration + ginkgo.BeforeEach(func(ctx context.Context) { + var err error + node := getLocalNode(ctx, f) + if framework.NodeOSDistroIs("windows") || e2enode.IsARM64(node) { + e2eskipper.Skipf("runtime does not support InPlacePodVerticalScaling -- skipping") + } + podClient = e2epod.NewPodClient(f) + if oldCfg == nil { + oldCfg, err = getCurrentKubeletConfig(ctx) + framework.ExpectNoError(err) + } + }) + + type testPod struct { + containers []e2epod.ResizableContainerInfo + patchString string + expected []e2epod.ResizableContainerInfo + } + + type testCase struct { + name string + testPod1 testPod + testPod2 testPod + skipFlag bool + } + + setCPUsForTestCase := func(ctx context.Context, tests *testCase, fullPCPUsOnly string) { + cpuCap, _, _ := getLocalNodeCPUDetails(ctx, f) + firstContainerCpuset := cpuset.New() + firstAdditionCpuset := cpuset.New() + firstExpectedCpuset := cpuset.New() + secondContainerCpuset := cpuset.New() + secondAdditionCpuset := cpuset.New() + secondExpectedCpuset := cpuset.New() + + if tests.name == "1 Guaranteed QoS pod, two containers - increase CPU & memory, FullPCPUsOnlyOption = false" { + if cpuCap < 4 { + tests.skipFlag = true + } + firstContainerCpuset = cpuset.New(1) + if isHTEnabled() { + cpuList := mustParseCPUSet(getCPUSiblingList(0)).List() + firstContainerCpuset = cpuset.New(cpuList[1]) + } + tests.testPod1.containers[0].CPUsAllowedList = firstContainerCpuset.String() + + secondContainerCpuset = cpuset.New(1) + if isHTEnabled() { + cpuList := mustParseCPUSet(getCPUSiblingList(1)).List() + secondContainerCpuset = cpuset.New(cpuList[0]) + } + tests.testPod2.containers[1].CPUsAllowedList = secondContainerCpuset.String() + + firstAdditionCpuset = cpuset.New(2) + if isHTEnabled() { + cpuList := mustParseCPUSet(getCPUSiblingList(1)).List() + firstAdditionCpuset = cpuset.New(cpuList[1]) + } + firstExpectedCpuset = firstAdditionCpuset.Union(firstContainerCpuset) + tests.testPod1.expected[0].CPUsAllowedList = firstExpectedCpuset.String() + + secondAdditionCpuset = cpuset.New(2) + if isHTEnabled() { + cpuList := mustParseCPUSet(getCPUSiblingList(2)).List() + secondAdditionCpuset = cpuset.New(cpuList[0]) + } + secondExpectedCpuset = secondAdditionCpuset.Union(secondContainerCpuset) + tests.testPod2.expected[1].CPUsAllowedList = secondExpectedCpuset.String() + } + ginkgo.By(fmt.Sprintf("firstContainerCpuset:%v, firstAdditionCpuset:%v, firstExpectedCpuset:%v", firstContainerCpuset, firstAdditionCpuset, firstExpectedCpuset)) + ginkgo.By(fmt.Sprintf("secondContainerCpuset:%v, secondAdditionCpuset:%v, secondExpectedCpuset:%v", secondContainerCpuset, secondAdditionCpuset, secondExpectedCpuset)) + } + + noRestart := v1.NotRequired + tests := []testCase{ + { + name: "2 Guaranteed QoS pod, one container - increase CPU & memory, FullPCPUsOnlyOption = false", + testPod1: testPod{ + containers: []e2epod.ResizableContainerInfo{ + { + Name: "c1", + Resources: &e2epod.ContainerResources{CPUReq: "1", CPULim: "1", MemReq: "200Mi", MemLim: "200Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "1", + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"2","memory":"400Mi"},"limits":{"cpu":"2","memory":"400Mi"}}} + ]}}`, + expected: []e2epod.ResizableContainerInfo{ + { + Name: "c1", + Resources: &e2epod.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "400Mi", MemLim: "400Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "2", + }, + }, + }, + testPod2: testPod{ + containers: []e2epod.ResizableContainerInfo{ + { + Name: "c2", + Resources: &e2epod.ContainerResources{CPUReq: "1", CPULim: "1", MemReq: "200Mi", MemLim: "200Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "1", + }, + }, + patchString: `{"spec":{"containers":[ + {"name":"c2", "resources":{"requests":{"cpu":"2","memory":"400Mi"},"limits":{"cpu":"2","memory":"400Mi"}}} + ]}}`, + expected: []e2epod.ResizableContainerInfo{ + { + Name: "c2", + Resources: &e2epod.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "400Mi", MemLim: "400Mi"}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + CPUsAllowedListValue: "2", + }, + }, + }, + }, + } + + timeouts := framework.NewTimeoutContext() + + for idx := range tests { + tc := tests[idx] + ginkgo.It(tc.name+policy.title+" (InPlacePodVerticalScalingAllocatedStatus="+strconv.FormatBool(isInPlacePodVerticalScalingAllocatedStatusEnabled)+", InPlacePodVerticalScalingExclusiveCPUs="+strconv.FormatBool(isInPlacePodVerticalScalingExclusiveCPUsEnabled)+")", func(ctx context.Context) { + cpuManagerPolicyKubeletConfig(ctx, f, oldCfg, policy.name, policy.options, isInPlacePodVerticalScalingAllocatedStatusEnabled, isInPlacePodVerticalScalingExclusiveCPUsEnabled) + + setCPUsForTestCase(ctx, &tc, policy.options[cpumanager.FullPCPUsOnlyOption]) + if tc.skipFlag { + e2eskipper.Skipf("Skipping CPU Manager tests since the CPU not enough") + } + + var patchedPod *v1.Pod + var pErr error + + createAndVerify := func(podName string, podClient *e2epod.PodClient, testContainers []e2epod.ResizableContainerInfo) (newPod *v1.Pod) { + var testPod *v1.Pod + + tStamp := strconv.Itoa(time.Now().Nanosecond()) + testPod = e2epod.MakePodWithResizableContainers(f.Namespace.Name, fmt.Sprintf("resizepod-%s", podName), tStamp, testContainers) + testPod.GenerateName = "resize-test-" + testPod = e2epod.MustMixinRestrictedPodSecurity(testPod) + + ginkgo.By("creating pod") + newPod = podClient.CreateSync(ctx, testPod) + + ginkgo.By("verifying initial pod resources, allocations are as expected") + e2epod.VerifyPodResources(newPod, testContainers) + ginkgo.By("verifying initial pod resize policy is as expected") + e2epod.VerifyPodResizePolicy(newPod, testContainers) + + ginkgo.By("verifying initial pod status resources are as expected") + framework.ExpectNoError(e2epod.VerifyPodStatusResources(newPod, testContainers)) + ginkgo.By("verifying initial cgroup config are as expected") + framework.ExpectNoError(e2epod.VerifyPodContainersCgroupValues(ctx, f, newPod, testContainers)) + // TODO make this dynamic depending on Policy Name, Resources input and topology of target + // machine. + // For the moment skip below if CPU Manager Policy is set to none + if policy.name == string(cpumanager.PolicyStatic) { + ginkgo.By("verifying initial pod Cpus allowed list value") + gomega.Eventually(ctx, e2epod.VerifyPodContainersCPUsAllowedListValue, timeouts.PodStartShort, timeouts.Poll). + WithArguments(f, newPod, testContainers). + Should(gomega.BeNil(), "failed to verify initial Pod CPUsAllowedListValue") + } + return newPod + } + + newPod1 := createAndVerify("testpod1", podClient, tc.testPod1.containers) + newPod2 := createAndVerify("testpod2", podClient, tc.testPod2.containers) + + patchAndVerify := func(patchString string, expectedContainers []e2epod.ResizableContainerInfo, initialContainers []e2epod.ResizableContainerInfo, opStr string, isRollback bool, newPod *v1.Pod) { + ginkgo.By(fmt.Sprintf("patching pod for %s", opStr)) + patchedPod, pErr = f.ClientSet.CoreV1().Pods(newPod.Namespace).Patch(ctx, newPod.Name, + types.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, fmt.Sprintf("failed to patch pod for %s", opStr)) + + ginkgo.By(fmt.Sprintf("verifying pod patched for %s", opStr)) + e2epod.VerifyPodResources(patchedPod, expectedContainers) + + ginkgo.By(fmt.Sprintf("waiting for %s to be actuated", opStr)) + resizedPod := e2epod.WaitForPodResizeActuation(ctx, f, podClient, newPod) + e2epod.ExpectPodResized(ctx, f, resizedPod, expectedContainers) + + // Check cgroup values only for containerd versions before 1.6.9 + ginkgo.By(fmt.Sprintf("verifying pod container's cgroup values after %s", opStr)) + framework.ExpectNoError(e2epod.VerifyPodContainersCgroupValues(ctx, f, resizedPod, expectedContainers)) + + ginkgo.By(fmt.Sprintf("verifying pod resources after %s", opStr)) + e2epod.VerifyPodResources(resizedPod, expectedContainers) + + // TODO make this dynamic depending on Policy Name, Resources input and topology of target + // machine. + // For the moment skip below if CPU Manager Policy is set to none + if policy.name == string(cpumanager.PolicyStatic) { + ginkgo.By(fmt.Sprintf("verifying pod Cpus allowed list value after %s", opStr)) + if isInPlacePodVerticalScalingExclusiveCPUsEnabled { + gomega.Eventually(ctx, e2epod.VerifyPodContainersCPUsAllowedListValue, timeouts.PodStartShort, timeouts.Poll). + WithArguments(f, resizedPod, expectedContainers). + Should(gomega.BeNil(), "failed to verify Pod CPUsAllowedListValue for resizedPod with InPlacePodVerticalScalingExclusiveCPUs enabled") + } else { + gomega.Eventually(ctx, e2epod.VerifyPodContainersCPUsAllowedListValue, timeouts.PodStartShort, timeouts.Poll). + WithArguments(f, resizedPod, initialContainers). + Should(gomega.BeNil(), "failed to verify Pod CPUsAllowedListValue for resizedPod with InPlacePodVerticalScalingExclusiveCPUs disabled (default)") + } + } + } + + patchAndVerify(tc.testPod1.patchString, tc.testPod1.expected, tc.testPod1.containers, "resize", false, newPod1) + patchAndVerify(tc.testPod2.patchString, tc.testPod2.expected, tc.testPod2.containers, "resize", false, newPod2) + + rbPatchStr1, err1 := e2epod.ResizeContainerPatch(tc.testPod1.containers) + framework.ExpectNoError(err1) + rbPatchStr2, err2 := e2epod.ResizeContainerPatch(tc.testPod2.containers) + framework.ExpectNoError(err2) + // Resize has been actuated, test rollback + patchAndVerify(rbPatchStr1, tc.testPod1.containers, tc.testPod1.expected, "rollback", true, newPod1) + patchAndVerify(rbPatchStr2, tc.testPod2.containers, tc.testPod2.expected, "rollback", true, newPod2) + + ginkgo.By("deleting pod") + deletePodSyncByName(ctx, f, newPod1.Name) + deletePodSyncByName(ctx, f, newPod2.Name) + // we need to wait for all containers to really be gone so cpumanager reconcile loop will not rewrite the cpu_manager_state. + // this is in turn needed because we will have an unavoidable (in the current framework) race with the + // reconcile loop which will make our attempt to delete the state file and to restore the old config go haywire + waitForAllContainerRemoval(ctx, newPod1.Name, newPod1.Namespace) + waitForAllContainerRemoval(ctx, newPod2.Name, newPod2.Namespace) + }) + } + + ginkgo.AfterEach(func(ctx context.Context) { + if oldCfg != nil { + updateKubeletConfig(ctx, f, oldCfg, true) + } + }) +} + +var _ = SIGDescribe("Pod InPlace Resize Container Extended Cases", framework.WithSerial(), func() { + + policiesGeneralAvailability := []cpuManagerPolicyConfig{ + { + name: string(cpumanager.PolicyStatic), + title: ", alongside CPU Manager Static Policy with no options", + options: map[string]string{ + cpumanager.FullPCPUsOnlyOption: "false", + cpumanager.DistributeCPUsAcrossNUMAOption: "false", + cpumanager.AlignBySocketOption: "false", + cpumanager.DistributeCPUsAcrossCoresOption: "false", + }, + }, + { + name: string(cpumanager.PolicyStatic), + title: ", alongside CPU Manager Static Policy with FullPCPUsOnlyOption", + options: map[string]string{ + cpumanager.FullPCPUsOnlyOption: "true", + cpumanager.DistributeCPUsAcrossNUMAOption: "false", + cpumanager.AlignBySocketOption: "false", + cpumanager.DistributeCPUsAcrossCoresOption: "false", + }, + }, + } + + doPodResizeExtendTests(policiesGeneralAvailability[0], true, true) + doPodResizeExtendTests(policiesGeneralAvailability[1], true, true) + doMultiPodResizeTests(policiesGeneralAvailability[0], true, true) +}) \ No newline at end of file From 2d2c8a35341c602a9114a8ee1991609f39990465 Mon Sep 17 00:00:00 2001 From: Sotiris Salloumis Date: Fri, 21 Feb 2025 12:38:01 +0100 Subject: [PATCH 3/9] Fix go fmt isssues --- pkg/kubelet/cm/cpumanager/cpu_assignment.go | 2 +- test/e2e_node/pod_resize_test.go | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/kubelet/cm/cpumanager/cpu_assignment.go b/pkg/kubelet/cm/cpumanager/cpu_assignment.go index e6a8ba1984383..652c3960a9abf 100644 --- a/pkg/kubelet/cm/cpumanager/cpu_assignment.go +++ b/pkg/kubelet/cm/cpumanager/cpu_assignment.go @@ -1520,4 +1520,4 @@ func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpu // If we never found a combination of NUMA nodes that we could properly // distribute CPUs across, fall back to the packing algorithm. return takeByTopologyNUMAPacked(topo, availableCPUs, numCPUs, cpuSortingStrategy, false, reusableCPUsForResize, mustKeepCPUsForScaleDown) -} \ No newline at end of file +} diff --git a/test/e2e_node/pod_resize_test.go b/test/e2e_node/pod_resize_test.go index 423c76c2d7928..7b79142536238 100644 --- a/test/e2e_node/pod_resize_test.go +++ b/test/e2e_node/pod_resize_test.go @@ -20,8 +20,8 @@ import ( "context" "encoding/json" "fmt" - "strings" "strconv" + "strings" "time" "github.com/onsi/ginkgo/v2" @@ -2468,4 +2468,4 @@ var _ = SIGDescribe("Pod InPlace Resize Container Extended Cases", framework.Wit doPodResizeExtendTests(policiesGeneralAvailability[0], true, true) doPodResizeExtendTests(policiesGeneralAvailability[1], true, true) doMultiPodResizeTests(policiesGeneralAvailability[0], true, true) -}) \ No newline at end of file +}) From b5daaf9de6a54a3b0aa5eecab2cd923ff1fa5e5e Mon Sep 17 00:00:00 2001 From: Sotiris Salloumis Date: Sat, 22 Feb 2025 14:45:10 +0100 Subject: [PATCH 4/9] Fix mutation heuristic check of mustKeepCPUs, reason is clone --- pkg/apis/core/validation/validation.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/apis/core/validation/validation.go b/pkg/apis/core/validation/validation.go index d2d4cfcd83c11..c92187dbb75fa 100644 --- a/pkg/apis/core/validation/validation.go +++ b/pkg/apis/core/validation/validation.go @@ -6237,7 +6237,7 @@ func dropMustKeepCPUsEnvFromContainer(container *core.Container, oldPodSpecConta for _, oldEnv := range oldPodSpecContainer.Env { if oldEnv.Name == "mustKeepCPUs" { existOldMustKeepCPUs = true - container.Env[jx] = oldEnv + container.Env[jx] = oldEnv // +k8s:verify-mutation:reason=clone break } } @@ -6251,7 +6251,7 @@ func dropMustKeepCPUsEnvFromContainer(container *core.Container, oldPodSpecConta } // Delete mustKeepCPUs if !existNewMustKeepCPUs && (len(oldPodSpecContainer.Env)-len(container.Env)) == 1 { - oldPodSpecContainer.Env = removeEnvVar(oldPodSpecContainer.Env, "mustKeepCPUs") + oldPodSpecContainer.Env = removeEnvVar(oldPodSpecContainer.Env, "mustKeepCPUs") // +k8s:verify-mutation:reason=clone } return allErrs } From 968d584f865dccb3b8e7ae14cd0630d77041a008 Mon Sep 17 00:00:00 2001 From: Sotiris Salloumis Date: Mon, 3 Mar 2025 15:02:24 +0100 Subject: [PATCH 5/9] Fix glangci-lint-pr failed test --- .../cm/cpumanager/policy_static_test.go | 2 +- test/e2e_node/pod_resize_test.go | 18 +++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/pkg/kubelet/cm/cpumanager/policy_static_test.go b/pkg/kubelet/cm/cpumanager/policy_static_test.go index 0e140eebb8e1e..f1a2d1f65c267 100644 --- a/pkg/kubelet/cm/cpumanager/policy_static_test.go +++ b/pkg/kubelet/cm/cpumanager/policy_static_test.go @@ -22,8 +22,8 @@ import ( "testing" v1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/apimachinery/pkg/types" utilfeature "k8s.io/apiserver/pkg/util/feature" featuregatetesting "k8s.io/component-base/featuregate/testing" pkgfeatures "k8s.io/kubernetes/pkg/features" diff --git a/test/e2e_node/pod_resize_test.go b/test/e2e_node/pod_resize_test.go index 7b79142536238..a906c83451c43 100644 --- a/test/e2e_node/pod_resize_test.go +++ b/test/e2e_node/pod_resize_test.go @@ -1387,7 +1387,7 @@ func doPodResizeTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalScaling ginkgo.By("verifying initial pod Cpus allowed list value") gomega.Eventually(ctx, podresize.VerifyPodContainersCPUsAllowedListValue, timeouts.PodStartShort, timeouts.Poll). WithArguments(f, newPod, tc.containers). - Should(gomega.BeNil(), "failed to verify initial Pod CPUsAllowedListValue") + Should(gomega.Succeed(), "failed to verify initial Pod CPUsAllowedListValue") } patchAndVerify := func(patchString string, expectedContainers []podresize.ResizableContainerInfo, initialContainers []podresize.ResizableContainerInfo, opStr string, isRollback bool) { @@ -1419,11 +1419,11 @@ func doPodResizeTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalScaling if isInPlacePodVerticalScalingExclusiveCPUsEnabled { gomega.Eventually(ctx, podresize.VerifyPodContainersCPUsAllowedListValue, timeouts.PodStartShort, timeouts.Poll). WithArguments(f, resizedPod, tc.expected). - Should(gomega.BeNil(), "failed to verify Pod CPUsAllowedListValue for resizedPod with InPlacePodVerticalScalingExclusiveCPUs enabled") + Should(gomega.Succeed(), "failed to verify Pod CPUsAllowedListValue for resizedPod with InPlacePodVerticalScalingExclusiveCPUs enabled") } else { gomega.Eventually(ctx, podresize.VerifyPodContainersCPUsAllowedListValue, timeouts.PodStartShort, timeouts.Poll). WithArguments(f, resizedPod, tc.containers). - Should(gomega.BeNil(), "failed to verify Pod CPUsAllowedListValue for resizedPod with InPlacePodVerticalScalingExclusiveCPUs disabled (default)") + Should(gomega.Succeed(), "failed to verify Pod CPUsAllowedListValue for resizedPod with InPlacePodVerticalScalingExclusiveCPUs disabled (default)") } } } @@ -2127,7 +2127,7 @@ func doPodResizeExtendTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalS ginkgo.By("verifying initial pod Cpus allowed list value") gomega.Eventually(ctx, e2epod.VerifyPodContainersCPUsAllowedListValue, timeouts.PodStartShort, timeouts.Poll). WithArguments(f, newPod, tc.containers). - Should(gomega.BeNil(), "failed to verify initial Pod CPUsAllowedListValue") + Should(gomega.Succeed(), "failed to verify initial Pod CPUsAllowedListValue") } patchAndVerify := func(patchString string, expectedContainers []e2epod.ResizableContainerInfo, initialContainers []e2epod.ResizableContainerInfo, opStr string, isRollback bool) { @@ -2158,11 +2158,11 @@ func doPodResizeExtendTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalS if isInPlacePodVerticalScalingExclusiveCPUsEnabled { gomega.Eventually(ctx, e2epod.VerifyPodContainersCPUsAllowedListValue, timeouts.PodStartShort, timeouts.Poll). WithArguments(f, resizedPod, expectedContainers). - Should(gomega.BeNil(), "failed to verify Pod CPUsAllowedListValue for resizedPod with InPlacePodVerticalScalingExclusiveCPUs enabled") + Should(gomega.Succeed(), "failed to verify Pod CPUsAllowedListValue for resizedPod with InPlacePodVerticalScalingExclusiveCPUs enabled") } else { gomega.Eventually(ctx, e2epod.VerifyPodContainersCPUsAllowedListValue, timeouts.PodStartShort, timeouts.Poll). WithArguments(f, resizedPod, tc.containers). - Should(gomega.BeNil(), "failed to verify Pod CPUsAllowedListValue for resizedPod with InPlacePodVerticalScalingExclusiveCPUs disabled (default)") + Should(gomega.Succeed(), "failed to verify Pod CPUsAllowedListValue for resizedPod with InPlacePodVerticalScalingExclusiveCPUs disabled (default)") } } } @@ -2366,7 +2366,7 @@ func doMultiPodResizeTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalSc ginkgo.By("verifying initial pod Cpus allowed list value") gomega.Eventually(ctx, e2epod.VerifyPodContainersCPUsAllowedListValue, timeouts.PodStartShort, timeouts.Poll). WithArguments(f, newPod, testContainers). - Should(gomega.BeNil(), "failed to verify initial Pod CPUsAllowedListValue") + Should(gomega.Succeed(), "failed to verify initial Pod CPUsAllowedListValue") } return newPod } @@ -2402,11 +2402,11 @@ func doMultiPodResizeTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalSc if isInPlacePodVerticalScalingExclusiveCPUsEnabled { gomega.Eventually(ctx, e2epod.VerifyPodContainersCPUsAllowedListValue, timeouts.PodStartShort, timeouts.Poll). WithArguments(f, resizedPod, expectedContainers). - Should(gomega.BeNil(), "failed to verify Pod CPUsAllowedListValue for resizedPod with InPlacePodVerticalScalingExclusiveCPUs enabled") + Should(gomega.Succeed(), "failed to verify Pod CPUsAllowedListValue for resizedPod with InPlacePodVerticalScalingExclusiveCPUs enabled") } else { gomega.Eventually(ctx, e2epod.VerifyPodContainersCPUsAllowedListValue, timeouts.PodStartShort, timeouts.Poll). WithArguments(f, resizedPod, initialContainers). - Should(gomega.BeNil(), "failed to verify Pod CPUsAllowedListValue for resizedPod with InPlacePodVerticalScalingExclusiveCPUs disabled (default)") + Should(gomega.Succeed(), "failed to verify Pod CPUsAllowedListValue for resizedPod with InPlacePodVerticalScalingExclusiveCPUs disabled (default)") } } } From e94a026770c85a37d22d8d0db321648a0841b88b Mon Sep 17 00:00:00 2001 From: Sotiris Salloumis Date: Wed, 12 Mar 2025 14:29:55 +0100 Subject: [PATCH 6/9] Fix compile issue, due to update in e2e/framework removing rollback bool --- pkg/kubelet/cm/cpumanager/policy_static.go | 1 - test/e2e_node/pod_resize_test.go | 26 +++++++++++----------- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/pkg/kubelet/cm/cpumanager/policy_static.go b/pkg/kubelet/cm/cpumanager/policy_static.go index 0b9f9fb578af9..00f50597b3e3c 100644 --- a/pkg/kubelet/cm/cpumanager/policy_static.go +++ b/pkg/kubelet/cm/cpumanager/policy_static.go @@ -598,7 +598,6 @@ func (p *staticPolicy) allocateCPUs(s state.State, numCPUs int, numaAffinity bit result.Aligned = p.topology.CheckAlignment(result.CPUs) // Remove allocated CPUs from the shared CPUSet. - s.SetDefaultCPUSet(s.GetDefaultCPUSet().Difference(result.CPUs)) if reusableCPUsForResize != nil { if reusableCPUsForResize.Size() < result.CPUs.Size() { // Scale up or creation has been performed diff --git a/test/e2e_node/pod_resize_test.go b/test/e2e_node/pod_resize_test.go index a906c83451c43..b0d5ccfc53a75 100644 --- a/test/e2e_node/pod_resize_test.go +++ b/test/e2e_node/pod_resize_test.go @@ -1390,7 +1390,7 @@ func doPodResizeTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalScaling Should(gomega.Succeed(), "failed to verify initial Pod CPUsAllowedListValue") } - patchAndVerify := func(patchString string, expectedContainers []podresize.ResizableContainerInfo, initialContainers []podresize.ResizableContainerInfo, opStr string, isRollback bool) { + patchAndVerify := func(patchString string, expectedContainers []e2epod.ResizableContainerInfo, initialContainers []e2epod.ResizableContainerInfo, opStr string) { ginkgo.By(fmt.Sprintf("patching pod for %s", opStr)) patchedPod, pErr = f.ClientSet.CoreV1().Pods(newPod.Namespace).Patch(ctx, newPod.Name, types.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") @@ -1428,12 +1428,12 @@ func doPodResizeTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalScaling } } - patchAndVerify(tc.patchString, tc.expected, tc.containers, "resize", false) + patchAndVerify(tc.patchString, tc.expected, tc.containers, "resize") rbPatchStr, err := podresize.ResizeContainerPatch(tc.containers) framework.ExpectNoError(err) // Resize has been actuated, test rollback - patchAndVerify(rbPatchStr, tc.containers, tc.expected, "rollback", true) + patchAndVerify(rbPatchStr, tc.containers, tc.expected, "rollback") ginkgo.By("deleting pod") deletePodSyncByName(ctx, f, newPod.Name) @@ -2130,7 +2130,7 @@ func doPodResizeExtendTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalS Should(gomega.Succeed(), "failed to verify initial Pod CPUsAllowedListValue") } - patchAndVerify := func(patchString string, expectedContainers []e2epod.ResizableContainerInfo, initialContainers []e2epod.ResizableContainerInfo, opStr string, isRollback bool) { + patchAndVerify := func(patchString string, expectedContainers []e2epod.ResizableContainerInfo, initialContainers []e2epod.ResizableContainerInfo, opStr string) { ginkgo.By(fmt.Sprintf("patching pod for %s", opStr)) patchedPod, pErr = f.ClientSet.CoreV1().Pods(newPod.Namespace).Patch(ctx, newPod.Name, types.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") @@ -2140,7 +2140,7 @@ func doPodResizeExtendTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalS e2epod.VerifyPodResources(patchedPod, expectedContainers) ginkgo.By(fmt.Sprintf("waiting for %s to be actuated", opStr)) - resizedPod := e2epod.WaitForPodResizeActuation(ctx, f, podClient, newPod) + resizedPod := e2epod.WaitForPodResizeActuation(ctx, f, podClient, newPod, expectedContainers) e2epod.ExpectPodResized(ctx, f, resizedPod, expectedContainers) // Check cgroup values only for containerd versions before 1.6.9 @@ -2168,13 +2168,13 @@ func doPodResizeExtendTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalS } ginkgo.By("First patch") - patchAndVerify(tc.patchString, tc.expected, tc.containers, "resize", false) + patchAndVerify(tc.patchString, tc.expected, tc.containers, "resize") rbPatchStr, err := e2epod.ResizeContainerPatch(tc.containers) framework.ExpectNoError(err) // Resize has been actuated, test rollback ginkgo.By("Second patch for rollback") - patchAndVerify(rbPatchStr, tc.containers, tc.expected, "rollback", true) + patchAndVerify(rbPatchStr, tc.containers, tc.expected, "rollback") ginkgo.By("deleting pod") deletePodSyncByName(ctx, f, newPod.Name) @@ -2374,7 +2374,7 @@ func doMultiPodResizeTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalSc newPod1 := createAndVerify("testpod1", podClient, tc.testPod1.containers) newPod2 := createAndVerify("testpod2", podClient, tc.testPod2.containers) - patchAndVerify := func(patchString string, expectedContainers []e2epod.ResizableContainerInfo, initialContainers []e2epod.ResizableContainerInfo, opStr string, isRollback bool, newPod *v1.Pod) { + patchAndVerify := func(patchString string, expectedContainers []e2epod.ResizableContainerInfo, initialContainers []e2epod.ResizableContainerInfo, opStr string, newPod *v1.Pod) { ginkgo.By(fmt.Sprintf("patching pod for %s", opStr)) patchedPod, pErr = f.ClientSet.CoreV1().Pods(newPod.Namespace).Patch(ctx, newPod.Name, types.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") @@ -2384,7 +2384,7 @@ func doMultiPodResizeTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalSc e2epod.VerifyPodResources(patchedPod, expectedContainers) ginkgo.By(fmt.Sprintf("waiting for %s to be actuated", opStr)) - resizedPod := e2epod.WaitForPodResizeActuation(ctx, f, podClient, newPod) + resizedPod := e2epod.WaitForPodResizeActuation(ctx, f, podClient, newPod, expectedContainers) e2epod.ExpectPodResized(ctx, f, resizedPod, expectedContainers) // Check cgroup values only for containerd versions before 1.6.9 @@ -2411,16 +2411,16 @@ func doMultiPodResizeTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalSc } } - patchAndVerify(tc.testPod1.patchString, tc.testPod1.expected, tc.testPod1.containers, "resize", false, newPod1) - patchAndVerify(tc.testPod2.patchString, tc.testPod2.expected, tc.testPod2.containers, "resize", false, newPod2) + patchAndVerify(tc.testPod1.patchString, tc.testPod1.expected, tc.testPod1.containers, "resize", newPod1) + patchAndVerify(tc.testPod2.patchString, tc.testPod2.expected, tc.testPod2.containers, "resize", newPod2) rbPatchStr1, err1 := e2epod.ResizeContainerPatch(tc.testPod1.containers) framework.ExpectNoError(err1) rbPatchStr2, err2 := e2epod.ResizeContainerPatch(tc.testPod2.containers) framework.ExpectNoError(err2) // Resize has been actuated, test rollback - patchAndVerify(rbPatchStr1, tc.testPod1.containers, tc.testPod1.expected, "rollback", true, newPod1) - patchAndVerify(rbPatchStr2, tc.testPod2.containers, tc.testPod2.expected, "rollback", true, newPod2) + patchAndVerify(rbPatchStr1, tc.testPod1.containers, tc.testPod1.expected, "rollback", newPod1) + patchAndVerify(rbPatchStr2, tc.testPod2.containers, tc.testPod2.expected, "rollback", newPod2) ginkgo.By("deleting pod") deletePodSyncByName(ctx, f, newPod1.Name) From 616ad16f396be03d52037a7f3905be54522ac397 Mon Sep 17 00:00:00 2001 From: Sotiris Salloumis Date: Mon, 17 Mar 2025 15:25:35 +0100 Subject: [PATCH 7/9] Fix compile issue and address review comment. Use new topology.Allocation struct (a CPU set plus alignment metadata) instead of CPU set, due to rebase. Remove duplicate unecessary SetDefaultCPUSet call as per review comment. --- test/e2e_node/cpu_manager_test.go | 60 +++++++++++++++++++++---------- 1 file changed, 41 insertions(+), 19 deletions(-) diff --git a/test/e2e_node/cpu_manager_test.go b/test/e2e_node/cpu_manager_test.go index 30ff89ce729eb..7f203df9266b6 100644 --- a/test/e2e_node/cpu_manager_test.go +++ b/test/e2e_node/cpu_manager_test.go @@ -3148,10 +3148,14 @@ func runCPUManagerTests(f *framework.Framework) { } reservedSystemCPUs := cpuset.New(0) - newCfg := configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ - policyName: string(cpumanager.PolicyStatic), - reservedSystemCPUs: reservedSystemCPUs, - }, false, false) + newCfg := configureCPUManagerInKubelet(oldCfg, + &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + reservedSystemCPUs: reservedSystemCPUs, + }, + false, + false, + ) updateKubeletConfig(ctx, f, newCfg, true) ginkgo.By("running a Gu pod - it shouldn't use reserved system CPUs") @@ -3174,12 +3178,16 @@ func runCPUManagerTests(f *framework.Framework) { cpuPolicyOptions := map[string]string{ cpumanager.StrictCPUReservationOption: "true", } - newCfg := configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ - policyName: string(cpumanager.PolicyStatic), - reservedSystemCPUs: reservedSystemCPUs, - enableCPUManagerOptions: true, - options: cpuPolicyOptions, - }, false, false) + newCfg := configureCPUManagerInKubelet(oldCfg, + &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + reservedSystemCPUs: reservedSystemCPUs, + enableCPUManagerOptions: true, + options: cpuPolicyOptions, + }, + false, + false, + ) updateKubeletConfig(ctx, f, newCfg, true) ginkgo.By("running a Gu pod - it shouldn't use reserved system CPUs") @@ -3255,7 +3263,9 @@ func runCPUManagerTests(f *framework.Framework) { reservedSystemCPUs: reservedSystemCPUs, enableCPUManagerOptions: true, options: cpuPolicyOptions, - }, false, false, + }, + false, + false, ) updateKubeletConfig(ctx, f, newCfg, true) @@ -3277,7 +3287,9 @@ func runCPUManagerTests(f *framework.Framework) { policyName: string(cpumanager.PolicyStatic), reservedSystemCPUs: cpuset.New(0), disableCPUQuotaWithExclusiveCPUs: true, - }, false, false, + }, + false, + false, ) updateKubeletConfig(ctx, f, newCfg, true) @@ -3298,7 +3310,9 @@ func runCPUManagerTests(f *framework.Framework) { policyName: string(cpumanager.PolicyStatic), reservedSystemCPUs: cpuset.New(0), disableCPUQuotaWithExclusiveCPUs: false, - }, false, false, + }, + false, + false, ) updateKubeletConfig(ctx, f, newCfg, true) @@ -3316,10 +3330,14 @@ func runCPUManagerTests(f *framework.Framework) { } // Enable CPU Manager in the kubelet. - newCfg := configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ - policyName: string(cpumanager.PolicyStatic), - reservedSystemCPUs: cpuset.CPUSet{}, - }, false, false) + newCfg := configureCPUManagerInKubelet(oldCfg, + &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + reservedSystemCPUs: cpuset.CPUSet{}, + }, + false, + false, + ) updateKubeletConfig(ctx, f, newCfg, true) ginkgo.By("running a Gu pod with a regular init container and a restartable init container") @@ -3399,7 +3417,9 @@ func runCPUManagerTests(f *framework.Framework) { reservedSystemCPUs: cpuset.New(0), enableCPUManagerOptions: true, options: cpuPolicyOptions, - }, false, false, + }, + false, + false, ) updateKubeletConfig(ctx, f, newCfg, true) @@ -3465,7 +3485,9 @@ func runCPUManagerTests(f *framework.Framework) { reservedSystemCPUs: cpuset.New(0), enableCPUManagerOptions: true, options: cpuPolicyOptions, - }, false, false, + }, + false, + false, ) updateKubeletConfig(ctx, f, newCfg, true) // 'distribute-cpus-across-numa' policy option ensures that CPU allocations are evenly distributed From adaf88a32a58f37e9b3028f7fa8bdf176e9a2d88 Mon Sep 17 00:00:00 2001 From: Sotiris Salloumis Date: Fri, 28 Mar 2025 13:25:42 +0100 Subject: [PATCH 8/9] Address sig-node meeting comments for mustKeepCPUs. - Revert introduction of API env mustKeepCPUs - Replace mustKeepCPUs with local checkpoint "promised" - Introduce "promised" in CPUManagerCheckpointV3 format - Add logic, refactor with Beta candidate - Fix lint issues - Fail if mustKeepCPUs are not subset of resulted CPUs - Fail if reusableCPUsForResize, mustKeepCPUs are not a subset of aligned CPUs - Fail if mustKeepCPUs are not a subset of reusable CPUs - TODO improve align resize tests, go through testing, corner cases refactor using cpumanager_test.go - TODO improve CPUManagerCheckpointV3 tests - TODO address code review/feedback to try different approach to allocate stepwise instead of once off when resizing - TODO check init-containers - TODO check migration from v2 to v3 CPU Manager checkpoint - TODO check kubectl failure when prohibited can this be done earlier? - WIP update CPU Manager tests to use refactored cpu_manager_test - TODO update topologymanager,cpumanager,memorymanager documentation --- hack/local-up-cluster.sh | 15 + pkg/api/pod/testing/make.go | 6 - pkg/apis/core/validation/validation.go | 48 -- pkg/apis/core/validation/validation_test.go | 82 -- pkg/kubelet/cm/cpumanager/cpu_assignment.go | 38 +- pkg/kubelet/cm/cpumanager/cpu_manager.go | 3 + pkg/kubelet/cm/cpumanager/cpu_manager_test.go | 32 + pkg/kubelet/cm/cpumanager/policy_static.go | 289 +++---- .../cm/cpumanager/policy_static_test.go | 232 ++++++ pkg/kubelet/cm/cpumanager/state/checkpoint.go | 69 +- pkg/kubelet/cm/cpumanager/state/state.go | 4 + .../cm/cpumanager/state/state_checkpoint.go | 140 +++- .../cpumanager/state/state_checkpoint_test.go | 1 + pkg/kubelet/cm/cpumanager/state/state_mem.go | 44 ++ pkg/kubelet/types/constants.go | 4 +- pkg/registry/core/pod/strategy.go | 1 - test/e2e_node/cpu_manager_metrics_test.go | 6 +- test/e2e_node/cpu_manager_test.go | 702 +----------------- test/e2e_node/pod_resize_test.go | 557 +++++++------- test/e2e_node/util.go | 4 +- test/e2e_node/util_machineinfo_linux.go | 11 + test/e2e_node/util_machineinfo_unsupported.go | 4 + 22 files changed, 1028 insertions(+), 1264 deletions(-) diff --git a/hack/local-up-cluster.sh b/hack/local-up-cluster.sh index 267dbafb39e24..c023cabd71fc2 100755 --- a/hack/local-up-cluster.sh +++ b/hack/local-up-cluster.sh @@ -95,6 +95,7 @@ KUBELET_PROVIDER_ID=${KUBELET_PROVIDER_ID:-"$(hostname)"} FEATURE_GATES=${FEATURE_GATES:-"AllAlpha=false"} EMULATED_VERSION=${EMULATED_VERSION:+kube=$EMULATED_VERSION} TOPOLOGY_MANAGER_POLICY=${TOPOLOGY_MANAGER_POLICY:-""} +MEMORY_MANAGER_POLICY=${MEMORY_MANAGER_POLICY:-""} CPUMANAGER_POLICY=${CPUMANAGER_POLICY:-""} CPUMANAGER_RECONCILE_PERIOD=${CPUMANAGER_RECONCILE_PERIOD:-""} CPUMANAGER_POLICY_OPTIONS=${CPUMANAGER_POLICY_OPTIONS:-""} @@ -181,6 +182,15 @@ function usage { echo " CPUMANAGER_RECONCILE_PERIOD=\"5s\" \\" echo " KUBELET_FLAGS=\"--kube-reserved=cpu=1,memory=2Gi,ephemeral-storage=1Gi --system-reserved=cpu=1,memory=2Gi,ephemeral-storage=1Gi\" \\" echo " hack/local-up-cluster.sh (build a local copy of the source with full-pcpus-only CPU Management policy)" + echo "Example 5: PATH=\"\${PATH}:/usr/local/go/src/k8s.io/kubernetes/third_party/etcd\" \\" + echo " FEATURE_GATES=CPUManagerPolicyOptions=true,MemoryManager=true,InPlacePodVerticalScalingExclusiveCPUs=true \\" + echo " TOPOLOGY_MANAGER_POLICY=\"single-numa-node\" \\" + echo " MEMORY_MANAGER_POLICY=\"Static\" \\" + echo " CPUMANAGER_POLICY=\"static\" \\" + echo " CPUMANAGER_POLICY_OPTIONS=full-pcpus-only=\"true\" \\" + echo " CPUMANAGER_RECONCILE_PERIOD=\"5s\" \\" + echo " KUBELET_FLAGS=\"--kube-reserved=cpu=1,memory=4Gi --system-reserved=cpu=1,memory=1Gi --reserved-memory 0:memory=3Gi;1:memory=2148Mi --resolv-conf=/run/systemd/resolve/resolv.conf\" \\" + echo " hack/local-up-cluster.sh ( run with Topology, CPU, Memory Management policies alongside InPlacePodVerticalScaling, extra flags for etcd and coredns)" echo "" echo "-d dry-run: prepare for running commands, then show their command lines instead of running them" } @@ -1000,6 +1010,11 @@ EOF echo "topologyManagerPolicy: \"${TOPOLOGY_MANAGER_POLICY}\"" fi + # memorymanager policy + if [[ -n ${MEMORY_MANAGER_POLICY} ]]; then + echo "memoryManagerPolicy: \"${MEMORY_MANAGER_POLICY}\"" + fi + # cpumanager policy if [[ -n ${CPUMANAGER_POLICY} ]]; then echo "cpuManagerPolicy: \"${CPUMANAGER_POLICY}\"" diff --git a/pkg/api/pod/testing/make.go b/pkg/api/pod/testing/make.go index f971c64b1d74d..552d795ff7861 100644 --- a/pkg/api/pod/testing/make.go +++ b/pkg/api/pod/testing/make.go @@ -293,12 +293,6 @@ func SetContainerResources(rr api.ResourceRequirements) TweakContainer { } } -func SetContainerEnv(env []api.EnvVar) TweakContainer { - return func(cnr *api.Container) { - cnr.Env = env - } -} - func SetContainerPorts(ports ...api.ContainerPort) TweakContainer { return func(cnr *api.Container) { cnr.Ports = ports diff --git a/pkg/apis/core/validation/validation.go b/pkg/apis/core/validation/validation.go index c92187dbb75fa..04d3037984459 100644 --- a/pkg/apis/core/validation/validation.go +++ b/pkg/apis/core/validation/validation.go @@ -63,7 +63,6 @@ import ( "k8s.io/kubernetes/pkg/capabilities" "k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/pkg/fieldpath" - "k8s.io/utils/cpuset" ) const isNegativeErrorMsg string = apimachineryvalidation.IsNegativeErrorMsg @@ -6114,7 +6113,6 @@ func ValidatePodResize(newPod, oldPod *core.Pod, opts PodValidationOptions) fiel var newContainers []core.Container for ix, container := range originalCPUMemPodSpec.Containers { dropCPUMemoryResourcesFromContainer(&container, &oldPod.Spec.Containers[ix]) - allErrs = append(allErrs, dropMustKeepCPUsEnvFromContainer(&container, &oldPod.Spec.Containers[ix], specPath)...) if !apiequality.Semantic.DeepEqual(container, oldPod.Spec.Containers[ix]) { // This likely means that the user has made changes to resources other than CPU and memory for regular container. errs := field.Forbidden(specPath, "only cpu and memory resources are mutable") @@ -6210,52 +6208,6 @@ func dropCPUMemoryResourcesFromContainer(container *core.Container, oldPodSpecCo container.Resources = core.ResourceRequirements{Limits: lim, Requests: req} } -func removeEnvVar(envs []core.EnvVar, nameToRemove string) []core.EnvVar { - var newEnvs []core.EnvVar - for _, env := range envs { - if env.Name != nameToRemove { - newEnvs = append(newEnvs, env) - } - } - return newEnvs -} - -// dropMustKeepCPUsEnvFromContainer deletes the "mustKeepCPUs" in env from the container, and copies them from the old pod container resources if present. -func dropMustKeepCPUsEnvFromContainer(container *core.Container, oldPodSpecContainer *core.Container, fldPath *field.Path) field.ErrorList { - allErrs := field.ErrorList{} - // the element named "mustKeepCPUs" in env can be update or add - existNewMustKeepCPUs := false - existOldMustKeepCPUs := false - for jx, newEnv := range container.Env { - if newEnv.Name == "mustKeepCPUs" { - existNewMustKeepCPUs = true - _, err := cpuset.Parse(newEnv.Value) - if err != nil { - allErrs = append(allErrs, field.Invalid(fldPath, newEnv, "Check mustKeepCPUs format, only number \",\" and \"-\" are allowed")) - } - // Change mustKeepCPUs - for _, oldEnv := range oldPodSpecContainer.Env { - if oldEnv.Name == "mustKeepCPUs" { - existOldMustKeepCPUs = true - container.Env[jx] = oldEnv // +k8s:verify-mutation:reason=clone - break - } - } - // Add mustKeepCPUs - if !existOldMustKeepCPUs && (len(container.Env)-len(oldPodSpecContainer.Env)) == 1 { - // Delete "mustKeepCPUs" in newPod to make newPod equal to oldPod - container.Env = removeEnvVar(container.Env, "mustKeepCPUs") - } - break - } - } - // Delete mustKeepCPUs - if !existNewMustKeepCPUs && (len(oldPodSpecContainer.Env)-len(container.Env)) == 1 { - oldPodSpecContainer.Env = removeEnvVar(oldPodSpecContainer.Env, "mustKeepCPUs") // +k8s:verify-mutation:reason=clone - } - return allErrs -} - // isPodResizeRequestSupported checks whether the pod is running on a node with InPlacePodVerticalScaling enabled. func isPodResizeRequestSupported(pod core.Pod) bool { // TODO: Remove this after GA+3 releases of InPlacePodVerticalScaling diff --git a/pkg/apis/core/validation/validation_test.go b/pkg/apis/core/validation/validation_test.go index 5bb7f5c7542f7..5c32dcd3c7dbc 100644 --- a/pkg/apis/core/validation/validation_test.go +++ b/pkg/apis/core/validation/validation_test.go @@ -27852,46 +27852,6 @@ func TestValidatePodResize(t *testing.T) { })) } - mkPodWith1Env := func(envName1, envValue1 string, tweaks ...podtest.Tweak) *core.Pod { - return podtest.MakePod("pod", append(tweaks, - podtest.SetContainers( - podtest.MakeContainer( - "container", - podtest.SetContainerEnv( - []core.EnvVar{ - { - Name: envName1, - Value: envValue1, - }, - }, - ), - ), - ), - )...) - } - - mkPodWith2Env := func(envName1, envValue1, envName2, envValue2 string, tweaks ...podtest.Tweak) *core.Pod { - return podtest.MakePod("pod", append(tweaks, - podtest.SetContainers( - podtest.MakeContainer( - "container", - podtest.SetContainerEnv( - []core.EnvVar{ - { - Name: envName1, - Value: envValue1, - }, - { - Name: envName2, - Value: envValue2, - }, - }, - ), - ), - ), - )...) - } - tests := []struct { test string old *core.Pod @@ -28390,48 +28350,6 @@ func TestValidatePodResize(t *testing.T) { new: mkPodWithInitContainers(getResources("100m", "0", "2Gi", ""), core.ResourceList{}, core.ContainerRestartPolicyAlways, resizePolicy(core.ResourceMemory, core.NotRequired)), err: "spec: Forbidden: only cpu and memory resources are mutable", }, - { - test: "Pod env:mustKeepCPUs change value", - old: mkPodWith2Env("env1", "a", "mustKeepCPUs", "0"), - new: mkPodWith2Env("env1", "a", "mustKeepCPUs", "1"), - err: "", - }, - { - test: "Pod env:mustKeepCPUs add value", - old: mkPodWith1Env("env1", "a"), - new: mkPodWith2Env("env1", "a", "mustKeepCPUs", "1"), - err: "", - }, - { - test: "Pod env:mustKeepCPUs delete", - old: mkPodWith2Env("env1", "a", "mustKeepCPUs", "1"), - new: mkPodWith1Env("env1", "a"), - err: "", - }, - { - test: "Pod env:env1 change is forbidden", - old: mkPodWith2Env("env1", "a", "mustKeepCPUs", "0"), - new: mkPodWith2Env("env1", "b", "mustKeepCPUs", "0"), - err: "spec: Forbidden: only cpu and memory resources are mutable", - }, - { - test: "Pod env:env1 add is forbidden", - old: mkPodWith1Env("mustKeepCPUs", "0"), - new: mkPodWith2Env("env1", "a", "mustKeepCPUs", "1"), - err: "spec: Forbidden: only cpu and memory resources are mutable", - }, - { - test: "Pod env:env1 delete is forbidden", - old: mkPodWith2Env("env1", "a", "mustKeepCPUs", "1"), - new: mkPodWith1Env("mustKeepCPUs", "0"), - err: "spec: Forbidden: only cpu and memory resources are mutable", - }, - { - test: "Pod env:mustKeepCPUs delete", - old: mkPodWith2Env("env1", "a", "mustKeepCPUs", "1"), - new: mkPodWith2Env("env1", "a", "mustKeepCPUs", "1s2"), - err: "Check mustKeepCPUs format, only number \",\" and \"-\" are allowed", - }, } for _, test := range tests { diff --git a/pkg/kubelet/cm/cpumanager/cpu_assignment.go b/pkg/kubelet/cm/cpumanager/cpu_assignment.go index 652c3960a9abf..201bb0b7422a1 100644 --- a/pkg/kubelet/cm/cpumanager/cpu_assignment.go +++ b/pkg/kubelet/cm/cpumanager/cpu_assignment.go @@ -444,7 +444,7 @@ type cpuAccumulator struct { availableCPUSorter availableCPUSorter } -func newCPUAccumulator(topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, numCPUs int, cpuSortingStrategy CPUSortingStrategy, reusableCPUsForResize *cpuset.CPUSet, mustKeepCPUsForScaleDown *cpuset.CPUSet) *cpuAccumulator { +func newCPUAccumulator(topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, numCPUs int, cpuSortingStrategy CPUSortingStrategy, reusableCPUsForResize *cpuset.CPUSet, mustKeepCPUsForResize *cpuset.CPUSet) *cpuAccumulator { acc := &cpuAccumulator{ topo: topo, details: topo.CPUDetails.KeepOnly(availableCPUs), @@ -464,18 +464,18 @@ func newCPUAccumulator(topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, } // Decrease of CPU resources ( scale down ) - // Take delta from allocated CPUs, if mustKeepCPUsForScaleDown + // Take delta from allocated CPUs, if mustKeepCPUsForResize // is not nil, use explicetely those. If it is nil // take delta starting from lowest CoreId of CPUs ( TODO esotsal, perhaps not needed). if numCPUs < reusableCPUsForResize.Size() { - if mustKeepCPUsForScaleDown != nil { + if mustKeepCPUsForResize != nil { // If explicetely CPUs to keep // during scale down is given ( this requires // addition in container[].resources ... which // could be possible to patch ? Esotsal Note This means // modifying API code - if !(mustKeepCPUsForScaleDown.Intersection(reusableCPUsForResize.Clone())).IsEmpty() { - acc.take(mustKeepCPUsForScaleDown.Clone()) + if !(mustKeepCPUsForResize.Intersection(reusableCPUsForResize.Clone())).IsEmpty() { + acc.take(mustKeepCPUsForResize.Clone()) } else { return acc } @@ -1120,17 +1120,17 @@ func (a *cpuAccumulator) iterateCombinations(n []int, k int, f func([]int) LoopC // the least amount of free CPUs to the one with the highest amount of free CPUs (i.e. in ascending // order of free CPUs). For any NUMA node, the cores are selected from the ones in the socket with // the least amount of free CPUs to the one with the highest amount of free CPUs. -func takeByTopologyNUMAPacked(topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, numCPUs int, cpuSortingStrategy CPUSortingStrategy, preferAlignByUncoreCache bool, reusableCPUsForResize *cpuset.CPUSet, mustKeepCPUsForScaleDown *cpuset.CPUSet) (cpuset.CPUSet, error) { +func takeByTopologyNUMAPacked(topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, numCPUs int, cpuSortingStrategy CPUSortingStrategy, preferAlignByUncoreCache bool, reusableCPUsForResize *cpuset.CPUSet, mustKeepCPUsForResize *cpuset.CPUSet) (cpuset.CPUSet, error) { // If the number of CPUs requested to be retained is not a subset // of reusableCPUs, then we fail early - if reusableCPUsForResize != nil && mustKeepCPUsForScaleDown != nil { - if (mustKeepCPUsForScaleDown.Intersection(reusableCPUsForResize.Clone())).IsEmpty() { - return cpuset.New(), fmt.Errorf("requested CPUs to be retained %s are not a subset of reusable CPUs %s", mustKeepCPUsForScaleDown.String(), reusableCPUsForResize.String()) + if reusableCPUsForResize != nil && mustKeepCPUsForResize != nil { + if (mustKeepCPUsForResize.Intersection(reusableCPUsForResize.Clone())).IsEmpty() { + return cpuset.New(), fmt.Errorf("requested CPUs to be retained %s are not a subset of reusable CPUs %s", mustKeepCPUsForResize.String(), reusableCPUsForResize.String()) } } - acc := newCPUAccumulator(topo, availableCPUs, numCPUs, cpuSortingStrategy, reusableCPUsForResize, mustKeepCPUsForScaleDown) + acc := newCPUAccumulator(topo, availableCPUs, numCPUs, cpuSortingStrategy, reusableCPUsForResize, mustKeepCPUsForResize) if acc.isSatisfied() { return acc.result, nil } @@ -1261,26 +1261,26 @@ func takeByTopologyNUMAPacked(topo *topology.CPUTopology, availableCPUs cpuset.C // of size 'cpuGroupSize' according to the algorithm described above. This is // important, for example, to ensure that all CPUs (i.e. all hyperthreads) from // a single core are allocated together. -func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, numCPUs int, cpuGroupSize int, cpuSortingStrategy CPUSortingStrategy, reusableCPUsForResize *cpuset.CPUSet, mustKeepCPUsForScaleDown *cpuset.CPUSet) (cpuset.CPUSet, error) { +func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, numCPUs int, cpuGroupSize int, cpuSortingStrategy CPUSortingStrategy, reusableCPUsForResize *cpuset.CPUSet, mustKeepCPUsForResize *cpuset.CPUSet) (cpuset.CPUSet, error) { // If the number of CPUs requested cannot be handed out in chunks of // 'cpuGroupSize', then we just call out the packing algorithm since we // can't distribute CPUs in this chunk size. // PreferAlignByUncoreCache feature not implemented here yet and set to false. // Support for PreferAlignByUncoreCache to be done at beta release. if (numCPUs % cpuGroupSize) != 0 { - return takeByTopologyNUMAPacked(topo, availableCPUs, numCPUs, cpuSortingStrategy, false, reusableCPUsForResize, mustKeepCPUsForScaleDown) + return takeByTopologyNUMAPacked(topo, availableCPUs, numCPUs, cpuSortingStrategy, false, reusableCPUsForResize, mustKeepCPUsForResize) } // If the number of CPUs requested to be retained is not a subset // of reusableCPUs, then we fail early - if reusableCPUsForResize != nil && mustKeepCPUsForScaleDown != nil { - if (mustKeepCPUsForScaleDown.Intersection(reusableCPUsForResize.Clone())).IsEmpty() { - return cpuset.New(), fmt.Errorf("requested CPUs to be retained %s are not a subset of reusable CPUs %s", mustKeepCPUsForScaleDown.String(), reusableCPUsForResize.String()) + if reusableCPUsForResize != nil && mustKeepCPUsForResize != nil { + if (mustKeepCPUsForResize.Intersection(reusableCPUsForResize.Clone())).IsEmpty() { + return cpuset.New(), fmt.Errorf("requested CPUs to be retained %s are not a subset of reusable CPUs %s", mustKeepCPUsForResize.String(), reusableCPUsForResize.String()) } } // Otherwise build an accumulator to start allocating CPUs from. - acc := newCPUAccumulator(topo, availableCPUs, numCPUs, cpuSortingStrategy, nil, mustKeepCPUsForScaleDown) + acc := newCPUAccumulator(topo, availableCPUs, numCPUs, cpuSortingStrategy, nil, mustKeepCPUsForResize) if acc.isSatisfied() { return acc.result, nil } @@ -1480,7 +1480,7 @@ func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpu distribution := (numCPUs / len(bestCombo) / cpuGroupSize) * cpuGroupSize for _, numa := range bestCombo { reusableCPUsPerNumaForResize := reusableCPUsForResizeDetail.CPUsInNUMANodes(numa) - cpus, _ := takeByTopologyNUMAPacked(acc.topo, acc.details.CPUsInNUMANodes(numa), distribution, cpuSortingStrategy, false, &reusableCPUsPerNumaForResize, mustKeepCPUsForScaleDown) + cpus, _ := takeByTopologyNUMAPacked(acc.topo, acc.details.CPUsInNUMANodes(numa), distribution, cpuSortingStrategy, false, &reusableCPUsPerNumaForResize, mustKeepCPUsForResize) acc.take(cpus) } @@ -1495,7 +1495,7 @@ func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpu if acc.details.CPUsInNUMANodes(numa).Size() < cpuGroupSize { continue } - cpus, _ := takeByTopologyNUMAPacked(acc.topo, acc.details.CPUsInNUMANodes(numa), cpuGroupSize, cpuSortingStrategy, false, nil, mustKeepCPUsForScaleDown) + cpus, _ := takeByTopologyNUMAPacked(acc.topo, acc.details.CPUsInNUMANodes(numa), cpuGroupSize, cpuSortingStrategy, false, nil, mustKeepCPUsForResize) acc.take(cpus) remainder -= cpuGroupSize } @@ -1519,5 +1519,5 @@ func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpu // If we never found a combination of NUMA nodes that we could properly // distribute CPUs across, fall back to the packing algorithm. - return takeByTopologyNUMAPacked(topo, availableCPUs, numCPUs, cpuSortingStrategy, false, reusableCPUsForResize, mustKeepCPUsForScaleDown) + return takeByTopologyNUMAPacked(topo, availableCPUs, numCPUs, cpuSortingStrategy, false, reusableCPUsForResize, mustKeepCPUsForResize) } diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager.go b/pkg/kubelet/cm/cpumanager/cpu_manager.go index 8b59ba6712190..28ae277ca43bd 100644 --- a/pkg/kubelet/cm/cpumanager/cpu_manager.go +++ b/pkg/kubelet/cm/cpumanager/cpu_manager.go @@ -275,6 +275,9 @@ func (m *manager) AddContainer(pod *v1.Pod, container *v1.Container, containerID if cset, exists := m.state.GetCPUSet(string(pod.UID), container.Name); exists { m.lastUpdateState.SetCPUSet(string(pod.UID), container.Name, cset) } + if cset, exists := m.state.GetPromisedCPUSet(string(pod.UID), container.Name); exists { + m.lastUpdateState.SetPromisedCPUSet(string(pod.UID), container.Name, cset) + } m.containerMap.Add(string(pod.UID), container.Name, containerID) } diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go index 702e317ab6907..6b0a0c12e42c3 100644 --- a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go +++ b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go @@ -46,6 +46,7 @@ import ( ) type mockState struct { + promised state.ContainerCPUAssignments assignments state.ContainerCPUAssignments defaultCPUSet cpuset.CPUSet } @@ -55,6 +56,11 @@ func (s *mockState) GetCPUSet(podUID string, containerName string) (cpuset.CPUSe return res.Clone(), ok } +func (s *mockState) GetPromisedCPUSet(podUID string, containerName string) (cpuset.CPUSet, bool) { + res, ok := s.promised[podUID][containerName] + return res.Clone(), ok +} + func (s *mockState) GetDefaultCPUSet() cpuset.CPUSet { return s.defaultCPUSet.Clone() } @@ -66,6 +72,13 @@ func (s *mockState) GetCPUSetOrDefault(podUID string, containerName string) cpus return s.GetDefaultCPUSet() } +func (s *mockState) SetPromisedCPUSet(podUID string, containerName string, cset cpuset.CPUSet) { + if _, exists := s.promised[podUID]; !exists { + s.promised[podUID] = make(map[string]cpuset.CPUSet) + } + s.promised[podUID][containerName] = cset +} + func (s *mockState) SetCPUSet(podUID string, containerName string, cset cpuset.CPUSet) { if _, exists := s.assignments[podUID]; !exists { s.assignments[podUID] = make(map[string]cpuset.CPUSet) @@ -82,11 +95,16 @@ func (s *mockState) Delete(podUID string, containerName string) { if len(s.assignments[podUID]) == 0 { delete(s.assignments, podUID) } + delete(s.promised[podUID], containerName) + if len(s.promised[podUID]) == 0 { + delete(s.promised, podUID) + } } func (s *mockState) ClearState() { s.defaultCPUSet = cpuset.CPUSet{} s.assignments = make(state.ContainerCPUAssignments) + s.promised = make(state.ContainerCPUAssignments) } func (s *mockState) SetCPUAssignments(a state.ContainerCPUAssignments) { @@ -97,6 +115,14 @@ func (s *mockState) GetCPUAssignments() state.ContainerCPUAssignments { return s.assignments.Clone() } +func (s *mockState) SetCPUPromised(a state.ContainerCPUAssignments) { + s.promised = a.Clone() +} + +func (s *mockState) GetCPUPromised() state.ContainerCPUAssignments { + return s.promised.Clone() +} + type mockPolicy struct { err error } @@ -337,6 +363,7 @@ func TestCPUManagerAdd(t *testing.T) { mgr := &manager{ policy: testCase.policy, state: &mockState{ + promised: state.ContainerCPUAssignments{}, assignments: state.ContainerCPUAssignments{}, defaultCPUSet: cpuset.New(1, 2, 3, 4), }, @@ -561,6 +588,7 @@ func TestCPUManagerAddWithInitContainers(t *testing.T) { policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), nil) mockState := &mockState{ + promised: testCase.stAssignments, assignments: testCase.stAssignments, defaultCPUSet: testCase.stDefaultCPUSet, } @@ -753,6 +781,7 @@ func TestCPUManagerRemove(t *testing.T) { err: nil, }, state: &mockState{ + promised: state.ContainerCPUAssignments{}, assignments: state.ContainerCPUAssignments{}, defaultCPUSet: cpuset.New(), }, @@ -1249,6 +1278,7 @@ func TestReconcileState(t *testing.T) { mgr := &manager{ policy: testCase.policy, state: &mockState{ + promised: testCase.stAssignments, assignments: testCase.stAssignments, defaultCPUSet: testCase.stDefaultCPUSet, }, @@ -1357,6 +1387,7 @@ func TestCPUManagerAddWithResvList(t *testing.T) { mgr := &manager{ policy: testCase.policy, state: &mockState{ + promised: state.ContainerCPUAssignments{}, assignments: state.ContainerCPUAssignments{}, defaultCPUSet: cpuset.New(0, 1, 2, 3), }, @@ -1506,6 +1537,7 @@ func TestCPUManagerGetAllocatableCPUs(t *testing.T) { policy: testCase.policy, activePods: func() []*v1.Pod { return nil }, state: &mockState{ + promised: state.ContainerCPUAssignments{}, assignments: state.ContainerCPUAssignments{}, defaultCPUSet: cpuset.New(0, 1, 2, 3), }, diff --git a/pkg/kubelet/cm/cpumanager/policy_static.go b/pkg/kubelet/cm/cpumanager/policy_static.go index 00f50597b3e3c..bae0f7dd7fb9d 100644 --- a/pkg/kubelet/cm/cpumanager/policy_static.go +++ b/pkg/kubelet/cm/cpumanager/policy_static.go @@ -66,6 +66,27 @@ func (e SMTAlignmentError) Type() string { return ErrorSMTAlignment } +// prohibitedCPUAllocationError represents an error due to an +// attempt to reduce container exclusively allocated +// pool below container exclusively promised pool +// allocated when container was created. +type prohibitedCPUAllocationError struct { + RequestedCPUs string + AllocatedCPUs string + PromisedCPUs int + GuaranteedCPUs int +} + +func (e prohibitedCPUAllocationError) Error() string { + return fmt.Sprintf("prohibitedCPUAllocation Error: Skip resize, Not allowed to reduce container exclusively allocated pool below promised, (requested CPUs = %s, allocated CPUs = %s, promised CPUs = %d, guaranteed CPUs = %d)", e.RequestedCPUs, e.AllocatedCPUs, e.PromisedCPUs, e.GuaranteedCPUs) +} + +// Type returns human-readable type of this error. +// Used in the HandlePodResourcesResize to populate Failure reason +func (e prohibitedCPUAllocationError) Type() string { + return types.ErrorProhibitedCPUAllocation +} + // inconsistentCPUAllocationError represents an error due to an // attempt to either move a container from exclusively allocated // pool to shared pool or move a container from shared pool to @@ -93,21 +114,21 @@ func (e inconsistentCPUAllocationError) Type() string { return types.ErrorInconsistentCPUAllocation } -// getCPUSetError represents an error due to a -// failed attempt to GetCPUSet from state -type getCPUSetError struct { +// getPromisedCPUSetError represents an error due to a +// failed attempt to GetPromisedCPUSet from state +type getPromisedCPUSetError struct { PodUID string ContainerName string } -func (e getCPUSetError) Error() string { - return fmt.Sprintf("getCPUSet Error: Skip resize, unable to get CPUSet, nothing to be done, (podUID = %s, containerName %s)", e.PodUID, e.ContainerName) +func (e getPromisedCPUSetError) Error() string { + return fmt.Sprintf("getPromisedCPUSet Error: Skip resize, unable to get PromisedCPUSet, nothing to be done, (podUID = %s, containerName %s)", e.PodUID, e.ContainerName) } // Type returns human-readable type of this error. // Used in the HandlePodResourcesResize to populate Failure reason -func (e getCPUSetError) Type() string { - return types.ErrorGetCPUSet +func (e getPromisedCPUSetError) Type() string { + return types.ErrorGetPromisedCPUSet } // staticPolicy is a CPU manager policy that does not change CPU @@ -366,7 +387,7 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai numCPUs := p.guaranteedCPUs(pod, container) if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { // During a pod resize, handle corner cases - err := p.validateInPlacePodVerticalScaling(pod, container) + err := p.validateInPlacePodVerticalScaling(s, pod, container) if err != nil { klog.ErrorS(err, "Static policy: Unable to resize allocated CPUs", "pod", klog.KObj(pod), "containerName", container.Name, "numCPUs", numCPUs) return err @@ -440,45 +461,41 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai } } } - if cpuset, ok := s.GetCPUSet(string(pod.UID), container.Name); ok { - if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScalingExclusiveCPUs) { - if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { - klog.InfoS("Static policy: container already present in state, attempting InPlacePodVerticalScaling", "pod", klog.KObj(pod), "containerName", container.Name) - if cpusInUseByPodContainerToResize, ok := s.GetCPUSet(string(pod.UID), container.Name); ok { - // Call Topology Manager to get the aligned socket affinity across all hint providers. - hint := p.affinity.GetAffinity(string(pod.UID), container.Name) - klog.InfoS("Topology Affinity", "pod", klog.KObj(pod), "containerName", container.Name, "affinity", hint) - // Attempt new allocation ( reusing allocated CPUs ) according to the NUMA affinity contained in the hint - // Since NUMA affinity container in the hint is unmutable already allocated CPUs pass the criteria - mustKeepCPUsForResize := p.GetMustKeepCPUs(container, cpuset) - newallocatedcpuset, err := p.allocateCPUs(s, numCPUs, hint.NUMANodeAffinity, p.cpusToReuse[string(pod.UID)], &cpusInUseByPodContainerToResize, mustKeepCPUsForResize) - if err != nil { - klog.ErrorS(err, "Static policy: Unable to allocate new CPUs", "pod", klog.KObj(pod), "containerName", container.Name, "numCPUs", numCPUs) - return err - } - // Allocation successful, update the current state - s.SetCPUSet(string(pod.UID), container.Name, newallocatedcpuset.CPUs) - p.updateCPUsToReuse(pod, container, newallocatedcpuset.CPUs) - // Updated state to the checkpoint file will be stored during - // the reconcile loop. TODO is this a problem? I don't believe - // because if kubelet will be terminated now, anyhow it will be - // needed the state to be cleaned up, an error will appear requiring - // the node to be drained. I think we are safe. All computations are - // using state_mem and not the checkpoint. - return nil - } else { - return getCPUSetError{ - PodUID: string(pod.UID), - ContainerName: container.Name, - } + if cpusInUseByPodContainer, ok := s.GetCPUSet(string(pod.UID), container.Name); ok { + if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScalingExclusiveCPUs) && utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { + klog.InfoS("Static policy: container already present in state, attempting InPlacePodVerticalScaling", "pod", klog.KObj(pod), "containerName", container.Name) + // Call Topology Manager to get the aligned socket affinity across all hint providers. + hint := p.affinity.GetAffinity(string(pod.UID), container.Name) + klog.InfoS("Topology Affinity", "pod", klog.KObj(pod), "containerName", container.Name, "affinity", hint) + // Attempt new allocation ( reusing allocated CPUs ) according to the NUMA affinity contained in the hint + // Since NUMA affinity container in the hint is unmutable already allocated CPUs pass the criteria + mustKeepCPUsForResize, ok := s.GetPromisedCPUSet(string(pod.UID), container.Name) + if !ok { + return getPromisedCPUSetError{ + PodUID: string(pod.UID), + ContainerName: container.Name, } - } else { - p.updateCPUsToReuse(pod, container, cpuset) - klog.InfoS("Static policy: InPlacePodVerticalScaling alognside CPU Static policy requires InPlacePodVerticalScaling to be enabled, skipping pod resize") - return nil } + newallocatedcpuset, err := p.allocateCPUs(s, numCPUs, hint.NUMANodeAffinity, p.cpusToReuse[string(pod.UID)], &cpusInUseByPodContainer, &mustKeepCPUsForResize) + if err != nil { + klog.ErrorS(err, "Static policy: Unable to allocate new CPUs", "pod", klog.KObj(pod), "containerName", container.Name, "numCPUs", numCPUs) + return err + } + + // Allocation successful, update the current state + s.SetCPUSet(string(pod.UID), container.Name, newallocatedcpuset.CPUs) + p.updateCPUsToReuse(pod, container, newallocatedcpuset.CPUs) + p.updateMetricsOnAllocate(s, newallocatedcpuset) + klog.V(4).InfoS("Allocated exclusive CPUs after InPlacePodVerticalScaling attempt", "pod", klog.KObj(pod), "containerName", container.Name, "cpuset", newallocatedcpuset.CPUs.String()) + // Updated state to the checkpoint file will be stored during + // the reconcile loop. TODO is this a problem? I don't believe + // because if kubelet will be terminated now, anyhow it will be + // needed the state to be cleaned up, an error will appear requiring + // the node to be drained. I think we are safe. All computations are + // using state_mem and not the checkpoint. + return nil } else { - p.updateCPUsToReuse(pod, container, cpuset) + p.updateCPUsToReuse(pod, container, cpusInUseByPodContainer) klog.InfoS("Static policy: container already present in state, skipping", "pod", klog.KObj(pod), "containerName", container.Name) return nil } @@ -496,6 +513,7 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai } s.SetCPUSet(string(pod.UID), container.Name, cpuAllocation.CPUs) + s.SetPromisedCPUSet(string(pod.UID), container.Name, cpuAllocation.CPUs) p.updateCPUsToReuse(pod, container, cpuAllocation.CPUs) p.updateMetricsOnAllocate(s, cpuAllocation) @@ -503,34 +521,6 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai return nil } -func (p *staticPolicy) GetMustKeepCPUs(container *v1.Container, oldCpuset cpuset.CPUSet) *cpuset.CPUSet { - mustKeepCPUs := cpuset.New() - for _, envVar := range container.Env { - if envVar.Name == "mustKeepCPUs" { - mustKeepCPUsInEnv, err := cpuset.Parse(envVar.Value) - if err == nil && mustKeepCPUsInEnv.Size() != 0 { - mustKeepCPUs = oldCpuset.Intersection(mustKeepCPUsInEnv) - } - klog.InfoS("mustKeepCPUs ", "is", mustKeepCPUs) - if p.options.FullPhysicalCPUsOnly { - // mustKeepCPUs must be aligned to the physical core - if (mustKeepCPUs.Size() % 2) != 0 { - return nil - } - mustKeepCPUsDetail := p.topology.CPUDetails.KeepOnly(mustKeepCPUs) - mustKeepCPUsDetailCores := mustKeepCPUsDetail.Cores() - if (mustKeepCPUs.Size() / mustKeepCPUsDetailCores.Size()) != p.cpuGroupSize { - klog.InfoS("mustKeepCPUs is nil") - return nil - } - } - return &mustKeepCPUs - } - } - klog.InfoS("mustKeepCPUs is nil") - return nil -} - // getAssignedCPUsOfSiblings returns assigned cpus of given container's siblings(all containers other than the given container) in the given pod `podUID`. func getAssignedCPUsOfSiblings(s state.State, podUID string, containerName string) cpuset.CPUSet { assignments := s.GetCPUAssignments() @@ -595,6 +585,11 @@ func (p *staticPolicy) allocateCPUs(s state.State, numCPUs int, numaAffinity bit return topology.EmptyAllocation(), err } result.CPUs = result.CPUs.Union(remainingCPUs) + if mustKeepCPUsForResize != nil { + if !mustKeepCPUsForResize.IsSubsetOf(result.CPUs) { + return topology.EmptyAllocation(), fmt.Errorf("requested CPUs to be retained %s are not a subset of resulted CPUs %s", mustKeepCPUsForResize.String(), result.CPUs.String()) + } + } result.Aligned = p.topology.CheckAlignment(result.CPUs) // Remove allocated CPUs from the shared CPUSet. @@ -666,7 +661,25 @@ func (p *staticPolicy) podGuaranteedCPUs(pod *v1.Pod) int { return requestedByLongRunningContainers } -func (p *staticPolicy) takeByTopology(availableCPUs cpuset.CPUSet, numCPUs int, reusableCPUsForResize *cpuset.CPUSet, mustKeepCPUsForScaleDown *cpuset.CPUSet) (cpuset.CPUSet, error) { +func (p *staticPolicy) takeByTopology(availableCPUs cpuset.CPUSet, numCPUs int, reusableCPUsForResize *cpuset.CPUSet, mustKeepCPUsForResize *cpuset.CPUSet) (cpuset.CPUSet, error) { + + // Protect against CPU leaks by failing early + if mustKeepCPUsForResize != nil { + if !mustKeepCPUsForResize.IsSubsetOf(availableCPUs) { + return cpuset.New(), fmt.Errorf("requested CPUs to be retained %s are not a subset of available CPUs %s", mustKeepCPUsForResize.String(), availableCPUs.String()) + } + } + if reusableCPUsForResize != nil { + if !reusableCPUsForResize.IsSubsetOf(availableCPUs) { + return cpuset.New(), fmt.Errorf("reusable CPUs %s are not a subset of available CPUs %s", reusableCPUsForResize.String(), availableCPUs.String()) + } + } + if reusableCPUsForResize != nil && mustKeepCPUsForResize != nil { + if !mustKeepCPUsForResize.IsSubsetOf(reusableCPUsForResize.Clone()) { + return cpuset.New(), fmt.Errorf("requested CPUs to be retained %s are not a subset of reusable CPUs %s", mustKeepCPUsForResize.String(), reusableCPUsForResize.String()) + } + } + cpuSortingStrategy := CPUSortingStrategyPacked if p.options.DistributeCPUsAcrossCores { cpuSortingStrategy = CPUSortingStrategySpread @@ -677,9 +690,9 @@ func (p *staticPolicy) takeByTopology(availableCPUs cpuset.CPUSet, numCPUs int, if p.options.FullPhysicalCPUsOnly { cpuGroupSize = p.cpuGroupSize } - return takeByTopologyNUMADistributed(p.topology, availableCPUs, numCPUs, cpuGroupSize, cpuSortingStrategy, reusableCPUsForResize, mustKeepCPUsForScaleDown) + return takeByTopologyNUMADistributed(p.topology, availableCPUs, numCPUs, cpuGroupSize, cpuSortingStrategy, reusableCPUsForResize, mustKeepCPUsForResize) } - return takeByTopologyNUMAPacked(p.topology, availableCPUs, numCPUs, cpuSortingStrategy, p.options.PreferAlignByUncoreCacheOption, reusableCPUsForResize, mustKeepCPUsForScaleDown) + return takeByTopologyNUMAPacked(p.topology, availableCPUs, numCPUs, cpuSortingStrategy, p.options.PreferAlignByUncoreCacheOption, reusableCPUsForResize, mustKeepCPUsForResize) } func (p *staticPolicy) GetTopologyHints(s state.State, pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint { @@ -699,23 +712,25 @@ func (p *staticPolicy) GetTopologyHints(s state.State, pod *v1.Pod, container *v return nil } - // Short circuit to regenerate the same hints if there are already - // guaranteed CPUs allocated to the Container. This might happen after a - // kubelet restart, for example. - if allocated, exists := s.GetCPUSet(string(pod.UID), container.Name); exists { - if allocated.Size() != requested { - klog.ErrorS(nil, "CPUs already allocated to container with different number than request", "pod", klog.KObj(pod), "containerName", container.Name, "requestedSize", requested, "allocatedSize", allocated.Size()) - // An empty list of hints will be treated as a preference that cannot be satisfied. - // In definition of hints this is equal to: TopologyHint[NUMANodeAffinity: nil, Preferred: false]. - // For all but the best-effort policy, the Topology Manager will throw a pod-admission error. + if !utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScalingExclusiveCPUs) || !utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { + // Short circuit to regenerate the same hints if there are already + // guaranteed CPUs allocated to the Container. This might happen after a + // kubelet restart, for example. + if allocated, exists := s.GetCPUSet(string(pod.UID), container.Name); exists { + if allocated.Size() != requested { + klog.InfoS("CPUs already allocated to container with different number than request", "pod", klog.KObj(pod), "containerName", container.Name, "requestedSize", requested, "allocatedSize", allocated.Size()) + // An empty list of hints will be treated as a preference that cannot be satisfied. + // In definition of hints this is equal to: TopologyHint[NUMANodeAffinity: nil, Preferred: false]. + // For all but the best-effort policy, the Topology Manager will throw a pod-admission error. + return map[string][]topologymanager.TopologyHint{ + string(v1.ResourceCPU): {}, + } + } + klog.InfoS("Regenerating TopologyHints for CPUs already allocated", "pod", klog.KObj(pod), "containerName", container.Name) return map[string][]topologymanager.TopologyHint{ - string(v1.ResourceCPU): {}, + string(v1.ResourceCPU): p.generateCPUTopologyHints(allocated, cpuset.CPUSet{}, requested), } } - klog.InfoS("Regenerating TopologyHints for CPUs already allocated", "pod", klog.KObj(pod), "containerName", container.Name) - return map[string][]topologymanager.TopologyHint{ - string(v1.ResourceCPU): p.generateCPUTopologyHints(allocated, cpuset.CPUSet{}, requested), - } } // Get a list of available CPUs. @@ -759,12 +774,14 @@ func (p *staticPolicy) GetPodTopologyHints(s state.State, pod *v1.Pod) map[strin // kubelet restart, for example. if allocated, exists := s.GetCPUSet(string(pod.UID), container.Name); exists { if allocated.Size() != requestedByContainer { - klog.ErrorS(nil, "CPUs already allocated to container with different number than request", "pod", klog.KObj(pod), "containerName", container.Name, "allocatedSize", requested, "requestedByContainer", requestedByContainer, "allocatedSize", allocated.Size()) - // An empty list of hints will be treated as a preference that cannot be satisfied. - // In definition of hints this is equal to: TopologyHint[NUMANodeAffinity: nil, Preferred: false]. - // For all but the best-effort policy, the Topology Manager will throw a pod-admission error. - return map[string][]topologymanager.TopologyHint{ - string(v1.ResourceCPU): {}, + klog.InfoS("CPUs already allocated to container with different number than request", "pod", klog.KObj(pod), "containerName", container.Name, "allocatedSize", requested, "requestedByContainer", requestedByContainer, "allocatedSize", allocated.Size()) + if !utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScalingExclusiveCPUs) || !utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { + // An empty list of hints will be treated as a preference that cannot be satisfied. + // In definition of hints this is equal to: TopologyHint[NUMANodeAffinity: nil, Preferred: false]. + // For all but the best-effort policy, the Topology Manager will throw a pod-admission error. + return map[string][]topologymanager.TopologyHint{ + string(v1.ResourceCPU): {}, + } } } // A set of CPUs already assigned to containers in this pod @@ -809,7 +826,7 @@ func (p *staticPolicy) generateCPUTopologyHints(availableCPUs cpuset.CPUSet, reu // Iterate through all combinations of numa nodes bitmask and build hints from them. hints := []topologymanager.TopologyHint{} - bitmask.IterateBitMasks(p.topology.CPUDetails.NUMANodes().UnsortedList(), func(mask bitmask.BitMask) { + bitmask.IterateBitMasks(p.topology.CPUDetails.NUMANodes().List(), func(mask bitmask.BitMask) { // First, update minAffinitySize for the current request size. cpusInMask := p.topology.CPUDetails.CPUsInNUMANodes(mask.GetBits()...).Size() if cpusInMask >= request && mask.Count() < minAffinitySize { @@ -819,7 +836,7 @@ func (p *staticPolicy) generateCPUTopologyHints(availableCPUs cpuset.CPUSet, reu // Then check to see if we have enough CPUs available on the current // numa node bitmask to satisfy the CPU request. numMatching := 0 - for _, c := range reusableCPUs.UnsortedList() { + for _, c := range reusableCPUs.List() { // Disregard this mask if its NUMANode isn't part of it. if !mask.IsSet(p.topology.CPUDetails[c].NUMANodeID) { return @@ -829,7 +846,7 @@ func (p *staticPolicy) generateCPUTopologyHints(availableCPUs cpuset.CPUSet, reu // Finally, check to see if enough available CPUs remain on the current // NUMA node combination to satisfy the CPU request. - for _, c := range availableCPUs.UnsortedList() { + for _, c := range availableCPUs.List() { if mask.IsSet(p.topology.CPUDetails[c].NUMANodeID) { numMatching++ } @@ -962,46 +979,62 @@ func updateAllocationPerNUMAMetric(topo *topology.CPUTopology, allocatedCPUs cpu } } -func (p *staticPolicy) validateInPlacePodVerticalScaling(pod *v1.Pod, container *v1.Container) error { +func (p *staticPolicy) validateInPlacePodVerticalScaling(s state.State, pod *v1.Pod, container *v1.Container) error { if v1qos.GetPodQOS(pod) != v1.PodQOSGuaranteed { return nil } cpuQuantity := container.Resources.Requests[v1.ResourceCPU] - if cs, ok := podutil.GetContainerStatus(pod.Status.ContainerStatuses, container.Name); ok { - allocatedCPUQuantity := cs.AllocatedResources[v1.ResourceCPU] - if allocatedCPUQuantity.Value() > 0 { - if allocatedCPUQuantity.Value()*1000 == allocatedCPUQuantity.MilliValue() { - // container belongs in exclusive pool - if cpuQuantity.Value()*1000 != cpuQuantity.MilliValue() { - // container move to shared pool not allowed - return inconsistentCPUAllocationError{ - RequestedCPUs: cpuQuantity.String(), - AllocatedCPUs: allocatedCPUQuantity.String(), - Shared2Exclusive: false, - } - } - } else { - // container belongs in shared pool - if cpuQuantity.Value()*1000 == cpuQuantity.MilliValue() { - // container move to exclusive pool not allowed - return inconsistentCPUAllocationError{ - RequestedCPUs: cpuQuantity.String(), - AllocatedCPUs: allocatedCPUQuantity.String(), - Shared2Exclusive: true, - } - } - } - } else { - // container belongs in shared pool - if cpuQuantity.Value()*1000 == cpuQuantity.MilliValue() { - // container move to exclusive pool not allowed + cs, ok := podutil.GetContainerStatus(pod.Status.ContainerStatuses, container.Name) + if !ok { + return nil + } + allocatedCPUQuantity := cs.AllocatedResources[v1.ResourceCPU] + if allocatedCPUQuantity.Value() > 0 { + if allocatedCPUQuantity.Value()*1000 == allocatedCPUQuantity.MilliValue() { + // container belongs in exclusive pool + if cpuQuantity.Value()*1000 != cpuQuantity.MilliValue() { + // container move to shared pool not allowed return inconsistentCPUAllocationError{ RequestedCPUs: cpuQuantity.String(), AllocatedCPUs: allocatedCPUQuantity.String(), - Shared2Exclusive: true, + Shared2Exclusive: false, } } + mustKeepCPUsPromised, ok := s.GetPromisedCPUSet(string(pod.UID), container.Name) + if !ok { + return getPromisedCPUSetError{ + PodUID: string(pod.UID), + ContainerName: container.Name, + } + } + numCPUs := p.guaranteedCPUs(pod, container) + promisedCPUsQuantity := mustKeepCPUsPromised.Size() + if promisedCPUsQuantity <= numCPUs { + return nil + } + return prohibitedCPUAllocationError{ + RequestedCPUs: cpuQuantity.String(), + AllocatedCPUs: allocatedCPUQuantity.String(), + PromisedCPUs: promisedCPUsQuantity, + GuaranteedCPUs: numCPUs, + } + } else if cpuQuantity.Value()*1000 == cpuQuantity.MilliValue() { + // container belongs in shared pool + // container move to exclusive pool not allowed + return inconsistentCPUAllocationError{ + RequestedCPUs: cpuQuantity.String(), + AllocatedCPUs: allocatedCPUQuantity.String(), + Shared2Exclusive: true, + } + } + } else if cpuQuantity.Value()*1000 == cpuQuantity.MilliValue() { + // container belongs in shared pool + // container move to exclusive pool not allowed + return inconsistentCPUAllocationError{ + RequestedCPUs: cpuQuantity.String(), + AllocatedCPUs: allocatedCPUQuantity.String(), + Shared2Exclusive: true, } } return nil diff --git a/pkg/kubelet/cm/cpumanager/policy_static_test.go b/pkg/kubelet/cm/cpumanager/policy_static_test.go index f1a2d1f65c267..96f835040a1ae 100644 --- a/pkg/kubelet/cm/cpumanager/policy_static_test.go +++ b/pkg/kubelet/cm/cpumanager/policy_static_test.go @@ -42,6 +42,7 @@ type staticPolicyTest struct { podUID string options map[string]string containerName string + stPromised state.ContainerCPUAssignments stAssignments state.ContainerCPUAssignments stDefaultCPUSet cpuset.CPUSet pod *v1.Pod @@ -65,6 +66,7 @@ func (spt staticPolicyTest) PseudoClone() staticPolicyTest { podUID: spt.podUID, options: spt.options, // accessed in read-only containerName: spt.containerName, + stPromised: spt.stAssignments.Clone(), stAssignments: spt.stAssignments.Clone(), stDefaultCPUSet: spt.stDefaultCPUSet.Clone(), pod: spt.pod, // accessed in read-only @@ -92,6 +94,11 @@ func TestStaticPolicyStart(t *testing.T) { { description: "non-corrupted state", topo: topoDualSocketHT, + stPromised: state.ContainerCPUAssignments{ + "fakePod": map[string]cpuset.CPUSet{ + "0": cpuset.New(0), + }, + }, stAssignments: state.ContainerCPUAssignments{ "fakePod": map[string]cpuset.CPUSet{ "0": cpuset.New(0), @@ -104,6 +111,7 @@ func TestStaticPolicyStart(t *testing.T) { description: "empty cpuset", topo: topoDualSocketHT, numReservedCPUs: 1, + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, stDefaultCPUSet: cpuset.New(), expCSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11), @@ -112,6 +120,7 @@ func TestStaticPolicyStart(t *testing.T) { description: "reserved cores 0 & 6 are not present in available cpuset", topo: topoDualSocketHT, numReservedCPUs: 2, + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, stDefaultCPUSet: cpuset.New(0, 1), expErr: fmt.Errorf("not all reserved cpus: \"0,6\" are present in defaultCpuSet: \"0-1\""), @@ -121,6 +130,7 @@ func TestStaticPolicyStart(t *testing.T) { topo: topoDualSocketHT, numReservedCPUs: 2, options: map[string]string{StrictCPUReservationOption: "true"}, + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, stDefaultCPUSet: cpuset.New(0, 1), expErr: fmt.Errorf("some of strictly reserved cpus: \"0\" are present in defaultCpuSet: \"0-1\""), @@ -128,6 +138,11 @@ func TestStaticPolicyStart(t *testing.T) { { description: "assigned core 2 is still present in available cpuset", topo: topoDualSocketHT, + stPromised: state.ContainerCPUAssignments{ + "fakePod": map[string]cpuset.CPUSet{ + "0": cpuset.New(0, 1, 2), + }, + }, stAssignments: state.ContainerCPUAssignments{ "fakePod": map[string]cpuset.CPUSet{ "0": cpuset.New(0, 1, 2), @@ -140,6 +155,11 @@ func TestStaticPolicyStart(t *testing.T) { description: "assigned core 2 is still present in available cpuset (StrictCPUReservationOption)", topo: topoDualSocketHT, options: map[string]string{StrictCPUReservationOption: "true"}, + stPromised: state.ContainerCPUAssignments{ + "fakePod": map[string]cpuset.CPUSet{ + "0": cpuset.New(0, 1, 2), + }, + }, stAssignments: state.ContainerCPUAssignments{ "fakePod": map[string]cpuset.CPUSet{ "0": cpuset.New(0, 1, 2), @@ -151,6 +171,12 @@ func TestStaticPolicyStart(t *testing.T) { { description: "core 12 is not present in topology but is in state cpuset", topo: topoDualSocketHT, + stPromised: state.ContainerCPUAssignments{ + "fakePod": map[string]cpuset.CPUSet{ + "0": cpuset.New(0, 1, 2), + "1": cpuset.New(3, 4), + }, + }, stAssignments: state.ContainerCPUAssignments{ "fakePod": map[string]cpuset.CPUSet{ "0": cpuset.New(0, 1, 2), @@ -163,6 +189,12 @@ func TestStaticPolicyStart(t *testing.T) { { description: "core 11 is present in topology but is not in state cpuset", topo: topoDualSocketHT, + stPromised: state.ContainerCPUAssignments{ + "fakePod": map[string]cpuset.CPUSet{ + "0": cpuset.New(0, 1, 2), + "1": cpuset.New(3, 4), + }, + }, stAssignments: state.ContainerCPUAssignments{ "fakePod": map[string]cpuset.CPUSet{ "0": cpuset.New(0, 1, 2), @@ -181,6 +213,7 @@ func TestStaticPolicyStart(t *testing.T) { } policy := p.(*staticPolicy) st := &mockState{ + promised: testCase.stPromised, assignments: testCase.stAssignments, defaultCPUSet: testCase.stDefaultCPUSet, } @@ -234,6 +267,11 @@ func TestStaticPolicyAdd(t *testing.T) { description: "GuPodMultipleCores, SingleSocketHT, ExpectAllocOneCore", topo: topoSingleSocketHT, numReservedCPUs: 1, + stPromised: state.ContainerCPUAssignments{ + "fakePod": map[string]cpuset.CPUSet{ + "fakeContainer100": cpuset.New(2, 3, 6, 7), + }, + }, stAssignments: state.ContainerCPUAssignments{ "fakePod": map[string]cpuset.CPUSet{ "fakeContainer100": cpuset.New(2, 3, 6, 7), @@ -249,6 +287,11 @@ func TestStaticPolicyAdd(t *testing.T) { description: "GuPodMultipleCores, DualSocketHT, ExpectAllocOneSocket", topo: topoDualSocketHT, numReservedCPUs: 1, + stPromised: state.ContainerCPUAssignments{ + "fakePod": map[string]cpuset.CPUSet{ + "fakeContainer100": cpuset.New(2), + }, + }, stAssignments: state.ContainerCPUAssignments{ "fakePod": map[string]cpuset.CPUSet{ "fakeContainer100": cpuset.New(2), @@ -264,6 +307,11 @@ func TestStaticPolicyAdd(t *testing.T) { description: "GuPodMultipleCores, DualSocketHT, ExpectAllocThreeCores", topo: topoDualSocketHT, numReservedCPUs: 1, + stPromised: state.ContainerCPUAssignments{ + "fakePod": map[string]cpuset.CPUSet{ + "fakeContainer100": cpuset.New(1, 5), + }, + }, stAssignments: state.ContainerCPUAssignments{ "fakePod": map[string]cpuset.CPUSet{ "fakeContainer100": cpuset.New(1, 5), @@ -279,6 +327,11 @@ func TestStaticPolicyAdd(t *testing.T) { description: "GuPodMultipleCores, DualSocketNoHT, ExpectAllocOneSocket", topo: topoDualSocketNoHT, numReservedCPUs: 1, + stPromised: state.ContainerCPUAssignments{ + "fakePod": map[string]cpuset.CPUSet{ + "fakeContainer100": cpuset.New(), + }, + }, stAssignments: state.ContainerCPUAssignments{ "fakePod": map[string]cpuset.CPUSet{ "fakeContainer100": cpuset.New(), @@ -294,6 +347,11 @@ func TestStaticPolicyAdd(t *testing.T) { description: "GuPodMultipleCores, DualSocketNoHT, ExpectAllocFourCores", topo: topoDualSocketNoHT, numReservedCPUs: 1, + stPromised: state.ContainerCPUAssignments{ + "fakePod": map[string]cpuset.CPUSet{ + "fakeContainer100": cpuset.New(4, 5), + }, + }, stAssignments: state.ContainerCPUAssignments{ "fakePod": map[string]cpuset.CPUSet{ "fakeContainer100": cpuset.New(4, 5), @@ -309,6 +367,11 @@ func TestStaticPolicyAdd(t *testing.T) { description: "GuPodMultipleCores, DualSocketHT, ExpectAllocOneSocketOneCore", topo: topoDualSocketHT, numReservedCPUs: 1, + stPromised: state.ContainerCPUAssignments{ + "fakePod": map[string]cpuset.CPUSet{ + "fakeContainer100": cpuset.New(2), + }, + }, stAssignments: state.ContainerCPUAssignments{ "fakePod": map[string]cpuset.CPUSet{ "fakeContainer100": cpuset.New(2), @@ -324,6 +387,7 @@ func TestStaticPolicyAdd(t *testing.T) { description: "NonGuPod, SingleSocketHT, NoAlloc", topo: topoSingleSocketHT, numReservedCPUs: 1, + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), pod: makePod("fakePod", "fakeContainer1", "1000m", "2000m"), @@ -335,6 +399,7 @@ func TestStaticPolicyAdd(t *testing.T) { description: "GuPodNonIntegerCore, SingleSocketHT, NoAlloc", topo: topoSingleSocketHT, numReservedCPUs: 1, + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), pod: makePod("fakePod", "fakeContainer4", "977m", "977m"), @@ -348,6 +413,11 @@ func TestStaticPolicyAdd(t *testing.T) { // Expect all CPUs from Socket 0. description: "GuPodMultipleCores, topoQuadSocketFourWayHT, ExpectAllocSock0", topo: topoQuadSocketFourWayHT, + stPromised: state.ContainerCPUAssignments{ + "fakePod": map[string]cpuset.CPUSet{ + "fakeContainer100": cpuset.New(3, 11, 4, 5, 6, 7), + }, + }, stAssignments: state.ContainerCPUAssignments{ "fakePod": map[string]cpuset.CPUSet{ "fakeContainer100": cpuset.New(3, 11, 4, 5, 6, 7), @@ -364,6 +434,12 @@ func TestStaticPolicyAdd(t *testing.T) { // Expect CPUs from the 2 full cores available from the three Sockets. description: "GuPodMultipleCores, topoQuadSocketFourWayHT, ExpectAllocAllFullCoresFromThreeSockets", topo: topoQuadSocketFourWayHT, + stPromised: state.ContainerCPUAssignments{ + "fakePod": map[string]cpuset.CPUSet{ + "fakeContainer100": largeTopoCPUSet.Difference(cpuset.New(1, 25, 13, 38, 2, 9, 11, 35, 23, 48, 12, 51, + 53, 173, 113, 233, 54, 61)), + }, + }, stAssignments: state.ContainerCPUAssignments{ "fakePod": map[string]cpuset.CPUSet{ "fakeContainer100": largeTopoCPUSet.Difference(cpuset.New(1, 25, 13, 38, 2, 9, 11, 35, 23, 48, 12, 51, @@ -381,6 +457,12 @@ func TestStaticPolicyAdd(t *testing.T) { // Expect all CPUs from Socket 1 and the hyper-threads from the full core. description: "GuPodMultipleCores, topoQuadSocketFourWayHT, ExpectAllocAllSock1+FullCore", topo: topoQuadSocketFourWayHT, + stPromised: state.ContainerCPUAssignments{ + "fakePod": map[string]cpuset.CPUSet{ + "fakeContainer100": largeTopoCPUSet.Difference(largeTopoSock1CPUSet.Union(cpuset.New(10, 34, 22, 47, 53, + 173, 61, 181, 108, 228, 115, 235))), + }, + }, stAssignments: state.ContainerCPUAssignments{ "fakePod": map[string]cpuset.CPUSet{ "fakeContainer100": largeTopoCPUSet.Difference(largeTopoSock1CPUSet.Union(cpuset.New(10, 34, 22, 47, 53, @@ -402,6 +484,7 @@ func TestStaticPolicyAdd(t *testing.T) { description: "GuPodSingleCore, SingleSocketHT, ExpectAllocOneCPU", topo: topoSingleSocketHT, numReservedCPUs: 1, + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), pod: makePod("fakePod", "fakeContainer2", "1000m", "1000m"), @@ -414,6 +497,11 @@ func TestStaticPolicyAdd(t *testing.T) { // Expect allocation of all the CPUs from the partial cores. description: "GuPodMultipleCores, topoQuadSocketFourWayHT, ExpectAllocCPUs", topo: topoQuadSocketFourWayHT, + stPromised: state.ContainerCPUAssignments{ + "fakePod": map[string]cpuset.CPUSet{ + "fakeContainer100": largeTopoCPUSet.Difference(cpuset.New(10, 11, 53, 37, 55, 67, 52)), + }, + }, stAssignments: state.ContainerCPUAssignments{ "fakePod": map[string]cpuset.CPUSet{ "fakeContainer100": largeTopoCPUSet.Difference(cpuset.New(10, 11, 53, 37, 55, 67, 52)), @@ -429,6 +517,7 @@ func TestStaticPolicyAdd(t *testing.T) { description: "GuPodSingleCore, SingleSocketHT, ExpectError", topo: topoSingleSocketHT, numReservedCPUs: 1, + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), pod: makePod("fakePod", "fakeContainer2", "8000m", "8000m"), @@ -440,6 +529,11 @@ func TestStaticPolicyAdd(t *testing.T) { description: "GuPodMultipleCores, SingleSocketHT, ExpectSameAllocation", topo: topoSingleSocketHT, numReservedCPUs: 1, + stPromised: state.ContainerCPUAssignments{ + "fakePod": map[string]cpuset.CPUSet{ + "fakeContainer3": cpuset.New(1, 2, 5, 6), + }, + }, stAssignments: state.ContainerCPUAssignments{ "fakePod": map[string]cpuset.CPUSet{ "fakeContainer3": cpuset.New(1, 2, 5, 6), @@ -455,6 +549,11 @@ func TestStaticPolicyAdd(t *testing.T) { description: "GuPodMultipleCores, DualSocketHT, NoAllocExpectError", topo: topoDualSocketHT, numReservedCPUs: 1, + stPromised: state.ContainerCPUAssignments{ + "fakePod": map[string]cpuset.CPUSet{ + "fakeContainer100": cpuset.New(1, 2, 3), + }, + }, stAssignments: state.ContainerCPUAssignments{ "fakePod": map[string]cpuset.CPUSet{ "fakeContainer100": cpuset.New(1, 2, 3), @@ -470,6 +569,11 @@ func TestStaticPolicyAdd(t *testing.T) { description: "GuPodMultipleCores, SingleSocketHT, NoAllocExpectError", topo: topoSingleSocketHT, numReservedCPUs: 1, + stPromised: state.ContainerCPUAssignments{ + "fakePod": map[string]cpuset.CPUSet{ + "fakeContainer100": cpuset.New(1, 2, 3, 4, 5, 6), + }, + }, stAssignments: state.ContainerCPUAssignments{ "fakePod": map[string]cpuset.CPUSet{ "fakeContainer100": cpuset.New(1, 2, 3, 4, 5, 6), @@ -487,6 +591,11 @@ func TestStaticPolicyAdd(t *testing.T) { // Error is expected since available CPUs are less than the request. description: "GuPodMultipleCores, topoQuadSocketFourWayHT, NoAlloc", topo: topoQuadSocketFourWayHT, + stPromised: state.ContainerCPUAssignments{ + "fakePod": map[string]cpuset.CPUSet{ + "fakeContainer100": largeTopoCPUSet.Difference(cpuset.New(10, 11, 53, 37, 55, 67, 52)), + }, + }, stAssignments: state.ContainerCPUAssignments{ "fakePod": map[string]cpuset.CPUSet{ "fakeContainer100": largeTopoCPUSet.Difference(cpuset.New(10, 11, 53, 37, 55, 67, 52)), @@ -509,6 +618,7 @@ func TestStaticPolicyAdd(t *testing.T) { FullPCPUsOnlyOption: "true", }, numReservedCPUs: 1, + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), pod: makePod("fakePod", "fakeContainer2", "1000m", "1000m"), @@ -524,6 +634,7 @@ func TestStaticPolicyAdd(t *testing.T) { FullPCPUsOnlyOption: "true", }, numReservedCPUs: 8, + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, stDefaultCPUSet: largeTopoCPUSet, pod: makePod("fakePod", "fakeContainer15", "15000m", "15000m"), @@ -539,6 +650,7 @@ func TestStaticPolicyAdd(t *testing.T) { }, numReservedCPUs: 2, reservedCPUs: newCPUSetPtr(1, 6), + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, stDefaultCPUSet: cpuset.New(0, 2, 3, 4, 5, 7, 8, 9, 10, 11), pod: makePod("fakePod", "fakeContainerBug113537_1", "10000m", "10000m"), @@ -553,6 +665,7 @@ func TestStaticPolicyAdd(t *testing.T) { FullPCPUsOnlyOption: "true", }, numReservedCPUs: 2, + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, stDefaultCPUSet: cpuset.New(1, 2, 3, 4, 5, 7, 8, 9, 10, 11), pod: makePod("fakePod", "fakeContainerBug113537_2", "10000m", "10000m"), @@ -568,6 +681,7 @@ func TestStaticPolicyAdd(t *testing.T) { }, numReservedCPUs: 2, reservedCPUs: newCPUSetPtr(0, 6), + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, stDefaultCPUSet: cpuset.New(1, 2, 3, 4, 5, 7, 8, 9, 10, 11), pod: makePod("fakePod", "fakeContainerBug113537_2", "10000m", "10000m"), @@ -583,6 +697,11 @@ func TestStaticPolicyAdd(t *testing.T) { description: "podResize GuPodMultipleCores, SingleSocketHT, ExpectSameAllocation", topo: topoSingleSocketHT, numReservedCPUs: 1, + stPromised: state.ContainerCPUAssignments{ + "fakePod": map[string]cpuset.CPUSet{ + "fakeContainer3": cpuset.New(1, 2, 5, 6), + }, + }, stAssignments: state.ContainerCPUAssignments{ "fakePod": map[string]cpuset.CPUSet{ "fakeContainer3": cpuset.New(1, 2, 5, 6), @@ -601,6 +720,11 @@ func TestStaticPolicyAdd(t *testing.T) { FullPCPUsOnlyOption: "true", }, numReservedCPUs: 1, + stPromised: state.ContainerCPUAssignments{ + "fakePod": map[string]cpuset.CPUSet{ + "fakeContainer3": cpuset.New(1, 5), + }, + }, stAssignments: state.ContainerCPUAssignments{ "fakePod": map[string]cpuset.CPUSet{ "fakeContainer3": cpuset.New(1, 5), @@ -619,6 +743,11 @@ func TestStaticPolicyAdd(t *testing.T) { FullPCPUsOnlyOption: "true", }, numReservedCPUs: 1, + stPromised: state.ContainerCPUAssignments{ + "fakePod": map[string]cpuset.CPUSet{ + "fakeContainer3": cpuset.New(1, 5), + }, + }, stAssignments: state.ContainerCPUAssignments{ "fakePod": map[string]cpuset.CPUSet{ "fakeContainer3": cpuset.New(1, 5), @@ -637,6 +766,11 @@ func TestStaticPolicyAdd(t *testing.T) { FullPCPUsOnlyOption: "true", }, numReservedCPUs: 1, + stPromised: state.ContainerCPUAssignments{ + "fakePod": map[string]cpuset.CPUSet{ + "fakeContainer3": cpuset.New(1, 5), + }, + }, stAssignments: state.ContainerCPUAssignments{ "fakePod": map[string]cpuset.CPUSet{ "fakeContainer3": cpuset.New(1, 5), @@ -656,6 +790,7 @@ func TestStaticPolicyAdd(t *testing.T) { FullPCPUsOnlyOption: "false", }, numReservedCPUs: 1, + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), pod: makePod("fakePod", "fakeContainer3", "1000m", "1000m"), @@ -671,6 +806,11 @@ func TestStaticPolicyAdd(t *testing.T) { FullPCPUsOnlyOption: "true", }, numReservedCPUs: 1, + stPromised: state.ContainerCPUAssignments{ + "fakePod": map[string]cpuset.CPUSet{ + "fakeContainer3": cpuset.New(1, 5), + }, + }, stAssignments: state.ContainerCPUAssignments{ "fakePod": map[string]cpuset.CPUSet{ "fakeContainer3": cpuset.New(1, 5), @@ -697,6 +837,7 @@ func TestStaticPolicyAdd(t *testing.T) { AlignBySocketOption: "true", }, numReservedCPUs: 1, + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, stDefaultCPUSet: cpuset.New(2, 11, 21, 22), pod: makePod("fakePod", "fakeContainer2", "2000m", "2000m"), @@ -712,6 +853,7 @@ func TestStaticPolicyAdd(t *testing.T) { AlignBySocketOption: "false", }, numReservedCPUs: 1, + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, stDefaultCPUSet: cpuset.New(2, 11, 21, 22), pod: makePod("fakePod", "fakeContainer2", "2000m", "2000m"), @@ -765,6 +907,7 @@ func runStaticPolicyTestCase(t *testing.T, testCase staticPolicyTest) { } st := &mockState{ + promised: testCase.stPromised, assignments: testCase.stAssignments, defaultCPUSet: testCase.stDefaultCPUSet, } @@ -830,6 +973,7 @@ func TestStaticPolicyReuseCPUs(t *testing.T) { []struct{ request, limit string }{ {"2000m", "2000m"}}), // 0, 4 containerName: "initContainer-0", + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), }, @@ -845,6 +989,7 @@ func TestStaticPolicyReuseCPUs(t *testing.T) { } st := &mockState{ + promised: testCase.stAssignments, assignments: testCase.stAssignments, defaultCPUSet: testCase.stDefaultCPUSet, } @@ -896,6 +1041,7 @@ func TestStaticPolicyPodResizeCPUsSingleContainerPod(t *testing.T) { resizeLimit: "4000m", resizeRequest: "4000m", containerName: "appContainer-0", + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), }, @@ -917,6 +1063,7 @@ func TestStaticPolicyPodResizeCPUsSingleContainerPod(t *testing.T) { resizeLimit: "2000m", resizeRequest: "2000m", containerName: "appContainer-0", + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), }, @@ -939,9 +1086,11 @@ func TestStaticPolicyPodResizeCPUsSingleContainerPod(t *testing.T) { resizeLimit: "2000m", resizeRequest: "2000m", containerName: "appContainer-0", + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), }, + expAllocErr: prohibitedCPUAllocationError{RequestedCPUs: "2", AllocatedCPUs: "4", PromisedCPUs: 4, GuaranteedCPUs: 2}, expCSetAfterAlloc: cpuset.New(2, 3, 6, 7), expCSetAfterResizeSize: 4, expCSetAfterRemove: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), @@ -960,6 +1109,7 @@ func TestStaticPolicyPodResizeCPUsSingleContainerPod(t *testing.T) { resizeLimit: "2000m", resizeRequest: "2000m", containerName: "appContainer-0", + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), }, @@ -982,6 +1132,7 @@ func TestStaticPolicyPodResizeCPUsSingleContainerPod(t *testing.T) { resizeLimit: "200m", resizeRequest: "200m", containerName: "appContainer-0", + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), }, @@ -1003,6 +1154,7 @@ func TestStaticPolicyPodResizeCPUsSingleContainerPod(t *testing.T) { resizeLimit: "1200m", resizeRequest: "1200m", containerName: "appContainer-0", + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), }, @@ -1024,6 +1176,7 @@ func TestStaticPolicyPodResizeCPUsSingleContainerPod(t *testing.T) { resizeLimit: "100m", resizeRequest: "100m", containerName: "appContainer-0", + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), }, @@ -1045,6 +1198,7 @@ func TestStaticPolicyPodResizeCPUsSingleContainerPod(t *testing.T) { resizeLimit: "1100m", resizeRequest: "1100m", containerName: "appContainer-0", + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), }, @@ -1066,6 +1220,7 @@ func TestStaticPolicyPodResizeCPUsSingleContainerPod(t *testing.T) { resizeLimit: "1500m", resizeRequest: "1500m", containerName: "appContainer-0", + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), }, @@ -1084,6 +1239,7 @@ func TestStaticPolicyPodResizeCPUsSingleContainerPod(t *testing.T) { policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), nil) st := &mockState{ + promised: testCase.stAssignments, assignments: testCase.stAssignments, defaultCPUSet: testCase.stDefaultCPUSet, } @@ -1178,6 +1334,7 @@ func TestStaticPolicyDoNotReuseCPUs(t *testing.T) { {request: "4000m", limit: "4000m", restartPolicy: v1.ContainerRestartPolicyAlways}}, // 0, 1, 4, 5 []*containerOptions{ {request: "2000m", limit: "2000m"}}), // 2, 6 + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), }, @@ -1192,6 +1349,7 @@ func TestStaticPolicyDoNotReuseCPUs(t *testing.T) { } st := &mockState{ + promised: testCase.stPromised, assignments: testCase.stAssignments, defaultCPUSet: testCase.stDefaultCPUSet, } @@ -1237,6 +1395,7 @@ func TestStaticPolicyPodResizeCPUsMultiContainerPod(t *testing.T) { resizeLimit: "4000m", resizeRequest: "4000m", containerName: "appContainer-0", + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), }, @@ -1260,6 +1419,7 @@ func TestStaticPolicyPodResizeCPUsMultiContainerPod(t *testing.T) { resizeLimit: "2000m", resizeRequest: "2000m", containerName: "appContainer-0", + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), }, @@ -1284,9 +1444,11 @@ func TestStaticPolicyPodResizeCPUsMultiContainerPod(t *testing.T) { resizeLimit: "2000m", resizeRequest: "2000m", containerName: "appContainer-0", + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), }, + expAllocErr: prohibitedCPUAllocationError{RequestedCPUs: "2", AllocatedCPUs: "4", PromisedCPUs: 4, GuaranteedCPUs: 2}, containerName2: "appContainer-1", expCSetAfterAlloc: cpuset.New(), expCSetAfterResize: cpuset.New(), @@ -1307,6 +1469,7 @@ func TestStaticPolicyPodResizeCPUsMultiContainerPod(t *testing.T) { resizeLimit: "2000m", resizeRequest: "2000m", containerName: "appContainer-0", + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), }, @@ -1331,6 +1494,7 @@ func TestStaticPolicyPodResizeCPUsMultiContainerPod(t *testing.T) { resizeLimit: "200m", resizeRequest: "200m", containerName: "appContainer-0", + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), }, @@ -1354,6 +1518,7 @@ func TestStaticPolicyPodResizeCPUsMultiContainerPod(t *testing.T) { resizeLimit: "1200m", resizeRequest: "1200m", containerName: "appContainer-0", + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), }, @@ -1377,6 +1542,7 @@ func TestStaticPolicyPodResizeCPUsMultiContainerPod(t *testing.T) { resizeLimit: "100m", resizeRequest: "100m", containerName: "appContainer-0", + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), }, @@ -1400,6 +1566,7 @@ func TestStaticPolicyPodResizeCPUsMultiContainerPod(t *testing.T) { resizeLimit: "1500m", resizeRequest: "1500m", containerName: "appContainer-0", + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), }, @@ -1419,6 +1586,7 @@ func TestStaticPolicyPodResizeCPUsMultiContainerPod(t *testing.T) { policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), nil) st := &mockState{ + promised: testCase.stPromised, assignments: testCase.stAssignments, defaultCPUSet: testCase.stDefaultCPUSet, } @@ -1511,6 +1679,11 @@ func TestStaticPolicyRemove(t *testing.T) { topo: topoSingleSocketHT, podUID: "fakePod", containerName: "fakeContainer1", + stPromised: state.ContainerCPUAssignments{ + "fakePod": map[string]cpuset.CPUSet{ + "fakeContainer1": cpuset.New(1, 2, 3), + }, + }, stAssignments: state.ContainerCPUAssignments{ "fakePod": map[string]cpuset.CPUSet{ "fakeContainer1": cpuset.New(1, 2, 3), @@ -1524,6 +1697,12 @@ func TestStaticPolicyRemove(t *testing.T) { topo: topoSingleSocketHT, podUID: "fakePod", containerName: "fakeContainer1", + stPromised: state.ContainerCPUAssignments{ + "fakePod": map[string]cpuset.CPUSet{ + "fakeContainer1": cpuset.New(1, 2, 3), + "fakeContainer2": cpuset.New(4, 5, 6, 7), + }, + }, stAssignments: state.ContainerCPUAssignments{ "fakePod": map[string]cpuset.CPUSet{ "fakeContainer1": cpuset.New(1, 2, 3), @@ -1538,6 +1717,12 @@ func TestStaticPolicyRemove(t *testing.T) { topo: topoSingleSocketHT, podUID: "fakePod", containerName: "fakeContainer1", + stPromised: state.ContainerCPUAssignments{ + "fakePod": map[string]cpuset.CPUSet{ + "fakeContainer1": cpuset.New(1, 3, 5), + "fakeContainer2": cpuset.New(2, 4), + }, + }, stAssignments: state.ContainerCPUAssignments{ "fakePod": map[string]cpuset.CPUSet{ "fakeContainer1": cpuset.New(1, 3, 5), @@ -1552,6 +1737,11 @@ func TestStaticPolicyRemove(t *testing.T) { topo: topoSingleSocketHT, podUID: "fakePod", containerName: "fakeContainer2", + stPromised: state.ContainerCPUAssignments{ + "fakePod": map[string]cpuset.CPUSet{ + "fakeContainer1": cpuset.New(1, 3, 5), + }, + }, stAssignments: state.ContainerCPUAssignments{ "fakePod": map[string]cpuset.CPUSet{ "fakeContainer1": cpuset.New(1, 3, 5), @@ -1569,6 +1759,7 @@ func TestStaticPolicyRemove(t *testing.T) { } st := &mockState{ + promised: testCase.stAssignments, assignments: testCase.stAssignments, defaultCPUSet: testCase.stDefaultCPUSet, } @@ -1591,6 +1782,7 @@ func TestTopologyAwareAllocateCPUs(t *testing.T) { testCases := []struct { description string topo *topology.CPUTopology + stPromised state.ContainerCPUAssignments stAssignments state.ContainerCPUAssignments stDefaultCPUSet cpuset.CPUSet numRequested int @@ -1600,6 +1792,7 @@ func TestTopologyAwareAllocateCPUs(t *testing.T) { { description: "Request 2 CPUs, No BitMask", topo: topoDualSocketHT, + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11), numRequested: 2, @@ -1609,6 +1802,7 @@ func TestTopologyAwareAllocateCPUs(t *testing.T) { { description: "Request 2 CPUs, BitMask on Socket 0", topo: topoDualSocketHT, + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11), numRequested: 2, @@ -1621,6 +1815,7 @@ func TestTopologyAwareAllocateCPUs(t *testing.T) { { description: "Request 2 CPUs, BitMask on Socket 1", topo: topoDualSocketHT, + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11), numRequested: 2, @@ -1633,6 +1828,7 @@ func TestTopologyAwareAllocateCPUs(t *testing.T) { { description: "Request 8 CPUs, BitMask on Socket 0", topo: topoDualSocketHT, + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11), numRequested: 8, @@ -1645,6 +1841,7 @@ func TestTopologyAwareAllocateCPUs(t *testing.T) { { description: "Request 8 CPUs, BitMask on Socket 1", topo: topoDualSocketHT, + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11), numRequested: 8, @@ -1662,6 +1859,7 @@ func TestTopologyAwareAllocateCPUs(t *testing.T) { } policy := p.(*staticPolicy) st := &mockState{ + promised: tc.stPromised, assignments: tc.stAssignments, defaultCPUSet: tc.stDefaultCPUSet, } @@ -1693,6 +1891,7 @@ type staticPolicyTestWithResvList struct { numReservedCPUs int reserved cpuset.CPUSet cpuPolicyOptions map[string]string + stPromised state.ContainerCPUAssignments stAssignments state.ContainerCPUAssignments stDefaultCPUSet cpuset.CPUSet pod *v1.Pod @@ -1710,6 +1909,7 @@ func TestStaticPolicyStartWithResvList(t *testing.T) { topo: topoDualSocketHT, numReservedCPUs: 2, reserved: cpuset.New(0, 1), + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, stDefaultCPUSet: cpuset.New(), expCSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11), @@ -1720,6 +1920,7 @@ func TestStaticPolicyStartWithResvList(t *testing.T) { numReservedCPUs: 2, reserved: cpuset.New(0, 1), cpuPolicyOptions: map[string]string{StrictCPUReservationOption: "true"}, + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, stDefaultCPUSet: cpuset.New(), expCSet: cpuset.New(2, 3, 4, 5, 6, 7, 8, 9, 10, 11), @@ -1729,6 +1930,7 @@ func TestStaticPolicyStartWithResvList(t *testing.T) { topo: topoDualSocketHT, numReservedCPUs: 2, reserved: cpuset.New(0, 1), + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, stDefaultCPUSet: cpuset.New(2, 3, 4, 5), expErr: fmt.Errorf("not all reserved cpus: \"0-1\" are present in defaultCpuSet: \"2-5\""), @@ -1739,6 +1941,7 @@ func TestStaticPolicyStartWithResvList(t *testing.T) { numReservedCPUs: 2, reserved: cpuset.New(0, 1), cpuPolicyOptions: map[string]string{StrictCPUReservationOption: "true"}, + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5), expErr: fmt.Errorf("some of strictly reserved cpus: \"0-1\" are present in defaultCpuSet: \"0-5\""), @@ -1748,6 +1951,7 @@ func TestStaticPolicyStartWithResvList(t *testing.T) { topo: topoDualSocketHT, numReservedCPUs: 1, reserved: cpuset.New(0, 1), + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11), expNewErr: fmt.Errorf("[cpumanager] unable to reserve the required amount of CPUs (size of 0-1 did not equal 1)"), @@ -1765,6 +1969,7 @@ func TestStaticPolicyStartWithResvList(t *testing.T) { } policy := p.(*staticPolicy) st := &mockState{ + promised: testCase.stAssignments, assignments: testCase.stAssignments, defaultCPUSet: testCase.stDefaultCPUSet, } @@ -1794,6 +1999,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) { topo: topoSingleSocketHT, numReservedCPUs: 1, reserved: cpuset.New(0), + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), pod: makePod("fakePod", "fakeContainer2", "8000m", "8000m"), @@ -1806,6 +2012,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) { topo: topoSingleSocketHT, numReservedCPUs: 2, reserved: cpuset.New(0, 1), + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), pod: makePod("fakePod", "fakeContainer2", "1000m", "1000m"), @@ -1818,6 +2025,11 @@ func TestStaticPolicyAddWithResvList(t *testing.T) { topo: topoSingleSocketHT, numReservedCPUs: 2, reserved: cpuset.New(0, 1), + stPromised: state.ContainerCPUAssignments{ + "fakePod": map[string]cpuset.CPUSet{ + "fakeContainer100": cpuset.New(2, 3, 6, 7), + }, + }, stAssignments: state.ContainerCPUAssignments{ "fakePod": map[string]cpuset.CPUSet{ "fakeContainer100": cpuset.New(2, 3, 6, 7), @@ -1838,6 +2050,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) { } st := &mockState{ + promised: testCase.stAssignments, assignments: testCase.stAssignments, defaultCPUSet: testCase.stDefaultCPUSet, } @@ -1896,6 +2109,7 @@ func TestStaticPolicyAddWithUncoreAlignment(t *testing.T) { FullPCPUsOnlyOption: "true", PreferAlignByUnCoreCacheOption: "true", }, + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, // remove partially used uncores from the available CPUs to simulate fully clean slate stDefaultCPUSet: topoDualSocketSingleNumaPerSocketSMTUncore.CPUDetails.CPUs().Difference( @@ -1925,6 +2139,7 @@ func TestStaticPolicyAddWithUncoreAlignment(t *testing.T) { FullPCPUsOnlyOption: "true", PreferAlignByUnCoreCacheOption: "true", }, + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, // remove partially used uncores from the available CPUs to simulate fully clean slate stDefaultCPUSet: topoDualSocketSingleNumaPerSocketSMTUncore.CPUDetails.CPUs().Difference( @@ -1955,6 +2170,7 @@ func TestStaticPolicyAddWithUncoreAlignment(t *testing.T) { FullPCPUsOnlyOption: "true", PreferAlignByUnCoreCacheOption: "true", }, + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, // remove partially used uncores from the available CPUs to simulate fully clean slate stDefaultCPUSet: topoDualSocketSingleNumaPerSocketSMTUncore.CPUDetails.CPUs().Difference( @@ -1985,6 +2201,7 @@ func TestStaticPolicyAddWithUncoreAlignment(t *testing.T) { FullPCPUsOnlyOption: "true", PreferAlignByUnCoreCacheOption: "true", }, + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, // remove partially used uncores from the available CPUs to simulate fully clean slate stDefaultCPUSet: topoDualSocketSingleNumaPerSocketSMTUncore.CPUDetails.CPUs().Difference( @@ -2017,6 +2234,7 @@ func TestStaticPolicyAddWithUncoreAlignment(t *testing.T) { FullPCPUsOnlyOption: "true", PreferAlignByUnCoreCacheOption: "true", }, + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, stDefaultCPUSet: topoDualSocketSingleNumaPerSocketSMTUncore.CPUDetails.CPUs(), pod: WithPodUID( @@ -2040,6 +2258,7 @@ func TestStaticPolicyAddWithUncoreAlignment(t *testing.T) { FullPCPUsOnlyOption: "true", PreferAlignByUnCoreCacheOption: "true", }, + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, stDefaultCPUSet: topoSingleSocketSingleNumaPerSocketSMTSmallUncore.CPUDetails.CPUs(), pod: WithPodUID( @@ -2063,6 +2282,7 @@ func TestStaticPolicyAddWithUncoreAlignment(t *testing.T) { FullPCPUsOnlyOption: "true", PreferAlignByUnCoreCacheOption: "true", }, + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, // Uncore 1 fully allocated stDefaultCPUSet: topoSingleSocketSingleNumaPerSocketSMTSmallUncore.CPUDetails.CPUs().Difference( @@ -2089,6 +2309,7 @@ func TestStaticPolicyAddWithUncoreAlignment(t *testing.T) { FullPCPUsOnlyOption: "true", PreferAlignByUnCoreCacheOption: "true", }, + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, // Uncore 2, 3, and 5 fully allocated stDefaultCPUSet: topoSingleSocketSingleNumaPerSocketNoSMTSmallUncore.CPUDetails.CPUs().Difference( @@ -2121,6 +2342,7 @@ func TestStaticPolicyAddWithUncoreAlignment(t *testing.T) { FullPCPUsOnlyOption: "true", PreferAlignByUnCoreCacheOption: "true", }, + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, // uncore 1 fully allocated stDefaultCPUSet: topoSmallDualSocketSingleNumaPerSocketNoSMTUncore.CPUDetails.CPUs().Difference( @@ -2148,6 +2370,7 @@ func TestStaticPolicyAddWithUncoreAlignment(t *testing.T) { FullPCPUsOnlyOption: "true", PreferAlignByUnCoreCacheOption: "true", }, + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, // uncore 1 fully allocated stDefaultCPUSet: topoSmallDualSocketSingleNumaPerSocketNoSMTUncore.CPUDetails.CPUs().Difference( @@ -2175,6 +2398,7 @@ func TestStaticPolicyAddWithUncoreAlignment(t *testing.T) { FullPCPUsOnlyOption: "true", PreferAlignByUnCoreCacheOption: "true", }, + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, // 4 cpus allocated from uncore 1 stDefaultCPUSet: topoLargeSingleSocketSingleNumaPerSocketSMTUncore.CPUDetails.CPUs().Difference( @@ -2202,6 +2426,7 @@ func TestStaticPolicyAddWithUncoreAlignment(t *testing.T) { FullPCPUsOnlyOption: "true", PreferAlignByUnCoreCacheOption: "true", }, + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, // 4 cpus allocated from uncore 1 stDefaultCPUSet: topoLargeSingleSocketSingleNumaPerSocketSMTUncore.CPUDetails.CPUs().Difference( @@ -2229,6 +2454,7 @@ func TestStaticPolicyAddWithUncoreAlignment(t *testing.T) { FullPCPUsOnlyOption: "true", PreferAlignByUnCoreCacheOption: "true", }, + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, stDefaultCPUSet: topoDualSocketMultiNumaPerSocketUncore.CPUDetails.CPUs(), pod: WithPodUID( @@ -2252,6 +2478,7 @@ func TestStaticPolicyAddWithUncoreAlignment(t *testing.T) { FullPCPUsOnlyOption: "true", PreferAlignByUnCoreCacheOption: "true", }, + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, stDefaultCPUSet: topoDualSocketSubNumaPerSocketHTMonolithicUncore.CPUDetails.CPUs(), pod: WithPodUID( @@ -2277,6 +2504,7 @@ func TestStaticPolicyAddWithUncoreAlignment(t *testing.T) { FullPCPUsOnlyOption: "true", PreferAlignByUnCoreCacheOption: "true", }, + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, // CPUs 4-7 allocated stDefaultCPUSet: topoSingleSocketSingleNumaPerSocketPCoreHTMonolithicUncore.CPUDetails.CPUs().Difference( @@ -2305,6 +2533,7 @@ func TestStaticPolicyAddWithUncoreAlignment(t *testing.T) { FullPCPUsOnlyOption: "true", PreferAlignByUnCoreCacheOption: "true", }, + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, stDefaultCPUSet: topoLargeSingleSocketSingleNumaPerSocketUncore.CPUDetails.CPUs(), pod: WithPodUID( @@ -2328,6 +2557,7 @@ func TestStaticPolicyAddWithUncoreAlignment(t *testing.T) { FullPCPUsOnlyOption: "true", PreferAlignByUnCoreCacheOption: "true", }, + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, // CPUs 6-9, 12-15, 18-19 allocated stDefaultCPUSet: topoSingleSocketSingleNumaPerSocketUncore.CPUDetails.CPUs().Difference( @@ -2363,6 +2593,7 @@ func TestStaticPolicyAddWithUncoreAlignment(t *testing.T) { FullPCPUsOnlyOption: "true", PreferAlignByUnCoreCacheOption: "true", }, + stPromised: state.ContainerCPUAssignments{}, stAssignments: state.ContainerCPUAssignments{}, // Every uncore has partially allocated 4 CPUs stDefaultCPUSet: topoSmallSingleSocketSingleNumaPerSocketNoSMTUncore.CPUDetails.CPUs().Difference( @@ -2606,6 +2837,7 @@ func TestStaticPolicyAddWithUncoreAlignment(t *testing.T) { } st := &mockState{ + promised: testCase.stAssignments, assignments: testCase.stAssignments, defaultCPUSet: testCase.stDefaultCPUSet.Difference(testCase.reserved), // ensure the cpumanager invariant } diff --git a/pkg/kubelet/cm/cpumanager/state/checkpoint.go b/pkg/kubelet/cm/cpumanager/state/checkpoint.go index 564c3482c0441..ea12e9be73e95 100644 --- a/pkg/kubelet/cm/cpumanager/state/checkpoint.go +++ b/pkg/kubelet/cm/cpumanager/state/checkpoint.go @@ -30,10 +30,20 @@ import ( var _ checkpointmanager.Checkpoint = &CPUManagerCheckpointV1{} var _ checkpointmanager.Checkpoint = &CPUManagerCheckpointV2{} +var _ checkpointmanager.Checkpoint = &CPUManagerCheckpointV3{} var _ checkpointmanager.Checkpoint = &CPUManagerCheckpoint{} -// CPUManagerCheckpoint struct is used to store cpu/pod assignments in a checkpoint in v2 format +// CPUManagerCheckpoint struct is used to store cpu/pod assignments in a checkpoint in v3 format type CPUManagerCheckpoint struct { + PolicyName string `json:"policyName"` + DefaultCPUSet string `json:"defaultCpuSet"` + Entries map[string]map[string]string `json:"entries,omitempty"` + Promised map[string]map[string]string `json:"promised,omitempty"` + Checksum checksum.Checksum `json:"checksum"` +} + +// CPUManagerCheckpoint struct is used to store cpu/pod assignments in a checkpoint in v2 format +type CPUManagerCheckpointV2 struct { PolicyName string `json:"policyName"` DefaultCPUSet string `json:"defaultCpuSet"` Entries map[string]map[string]string `json:"entries,omitempty"` @@ -48,13 +58,13 @@ type CPUManagerCheckpointV1 struct { Checksum checksum.Checksum `json:"checksum"` } -// CPUManagerCheckpointV2 struct is used to store cpu/pod assignments in a checkpoint in v2 format -type CPUManagerCheckpointV2 = CPUManagerCheckpoint +// CPUManagerCheckpointV3 struct is used to store cpu/pod assignments in a checkpoint in v3 format +type CPUManagerCheckpointV3 = CPUManagerCheckpoint // NewCPUManagerCheckpoint returns an instance of Checkpoint func NewCPUManagerCheckpoint() *CPUManagerCheckpoint { //nolint:staticcheck // unexported-type-in-api user-facing error message - return newCPUManagerCheckpointV2() + return newCPUManagerCheckpointV3() } func newCPUManagerCheckpointV1() *CPUManagerCheckpointV1 { @@ -69,6 +79,13 @@ func newCPUManagerCheckpointV2() *CPUManagerCheckpointV2 { } } +func newCPUManagerCheckpointV3() *CPUManagerCheckpointV3 { + return &CPUManagerCheckpointV3{ + Entries: make(map[string]map[string]string), + Promised: make(map[string]map[string]string), + } +} + // MarshalCheckpoint returns marshalled checkpoint in v1 format func (cp *CPUManagerCheckpointV1) MarshalCheckpoint() ([]byte, error) { // make sure checksum wasn't set before so it doesn't affect output checksum @@ -85,6 +102,14 @@ func (cp *CPUManagerCheckpointV2) MarshalCheckpoint() ([]byte, error) { return json.Marshal(*cp) } +// MarshalCheckpoint returns marshalled checkpoint in v3 format +func (cp *CPUManagerCheckpointV3) MarshalCheckpoint() ([]byte, error) { + // make sure checksum wasn't set before so it doesn't affect output checksum + cp.Checksum = 0 + cp.Checksum = checksum.New(cp) + return json.Marshal(*cp) +} + // UnmarshalCheckpoint tries to unmarshal passed bytes to checkpoint in v1 format func (cp *CPUManagerCheckpointV1) UnmarshalCheckpoint(blob []byte) error { return json.Unmarshal(blob, cp) @@ -95,6 +120,11 @@ func (cp *CPUManagerCheckpointV2) UnmarshalCheckpoint(blob []byte) error { return json.Unmarshal(blob, cp) } +// UnmarshalCheckpoint tries to unmarshal passed bytes to checkpoint in v3 format +func (cp *CPUManagerCheckpointV3) UnmarshalCheckpoint(blob []byte) error { + return json.Unmarshal(blob, cp) +} + // VerifyChecksum verifies that current checksum of checkpoint is valid in v1 format func (cp *CPUManagerCheckpointV1) VerifyChecksum() error { if cp.Checksum == 0 { @@ -109,7 +139,9 @@ func (cp *CPUManagerCheckpointV1) VerifyChecksum() error { cp.Checksum = ck hash := fnv.New32a() - fmt.Fprintf(hash, "%v", object) + if _, err := fmt.Fprintf(hash, "%v", object); err != nil { + return err + } actualCS := checksum.Checksum(hash.Sum32()) if cp.Checksum != actualCS { return &errors.CorruptCheckpointError{ @@ -123,6 +155,33 @@ func (cp *CPUManagerCheckpointV1) VerifyChecksum() error { // VerifyChecksum verifies that current checksum of checkpoint is valid in v2 format func (cp *CPUManagerCheckpointV2) VerifyChecksum() error { + if cp.Checksum == 0 { + // accept empty checksum for compatibility with old file backend + return nil + } + ck := cp.Checksum + cp.Checksum = 0 + object := dump.ForHash(cp) + object = strings.Replace(object, "CPUManagerCheckpointV2", "CPUManagerCheckpoint", 1) + cp.Checksum = ck + + hash := fnv.New32a() + if _, err := fmt.Fprintf(hash, "%v", object); err != nil { + return err + } + actualCS := checksum.Checksum(hash.Sum32()) + if cp.Checksum != actualCS { + return &errors.CorruptCheckpointError{ + ActualCS: uint64(actualCS), + ExpectedCS: uint64(cp.Checksum), + } + } + + return nil +} + +// VerifyChecksum verifies that current checksum of checkpoint is valid in v3 format +func (cp *CPUManagerCheckpointV3) VerifyChecksum() error { if cp.Checksum == 0 { // accept empty checksum for compatibility with old file backend return nil diff --git a/pkg/kubelet/cm/cpumanager/state/state.go b/pkg/kubelet/cm/cpumanager/state/state.go index 352fddfb9cdad..f1196601c1b7d 100644 --- a/pkg/kubelet/cm/cpumanager/state/state.go +++ b/pkg/kubelet/cm/cpumanager/state/state.go @@ -37,16 +37,20 @@ func (as ContainerCPUAssignments) Clone() ContainerCPUAssignments { // Reader interface used to read current cpu/pod assignment state type Reader interface { + GetPromisedCPUSet(podUID string, containerName string) (cpuset.CPUSet, bool) GetCPUSet(podUID string, containerName string) (cpuset.CPUSet, bool) GetDefaultCPUSet() cpuset.CPUSet GetCPUSetOrDefault(podUID string, containerName string) cpuset.CPUSet GetCPUAssignments() ContainerCPUAssignments + GetCPUPromised() ContainerCPUAssignments } type writer interface { + SetPromisedCPUSet(podUID string, containerName string, cpuset cpuset.CPUSet) SetCPUSet(podUID string, containerName string, cpuset cpuset.CPUSet) SetDefaultCPUSet(cpuset cpuset.CPUSet) SetCPUAssignments(ContainerCPUAssignments) + SetCPUPromised(ContainerCPUAssignments) Delete(podUID string, containerName string) ClearState() } diff --git a/pkg/kubelet/cm/cpumanager/state/state_checkpoint.go b/pkg/kubelet/cm/cpumanager/state/state_checkpoint.go index bda90ba1f4ca6..8312ac0e5084a 100644 --- a/pkg/kubelet/cm/cpumanager/state/state_checkpoint.go +++ b/pkg/kubelet/cm/cpumanager/state/state_checkpoint.go @@ -17,13 +17,14 @@ limitations under the License. package state import ( + "errors" "fmt" "path/filepath" "sync" "k8s.io/klog/v2" "k8s.io/kubernetes/pkg/kubelet/checkpointmanager" - "k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors" + checkpointerrors "k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors" "k8s.io/kubernetes/pkg/kubelet/cm/containermap" "k8s.io/utils/cpuset" ) @@ -62,8 +63,8 @@ func NewCheckpointState(stateDir, checkpointName, policyName string, initialCont return stateCheckpoint, nil } -// migrateV1CheckpointToV2Checkpoint() converts checkpoints from the v1 format to the v2 format -func (sc *stateCheckpoint) migrateV1CheckpointToV2Checkpoint(src *CPUManagerCheckpointV1, dst *CPUManagerCheckpointV2) error { +// migrateV1CheckpointToV3Checkpoint() converts checkpoints from the v1 format to the v3 format +func (sc *stateCheckpoint) migrateV1CheckpointToV3Checkpoint(src *CPUManagerCheckpointV1, dst *CPUManagerCheckpointV3) error { if src.PolicyName != "" { dst.PolicyName = src.PolicyName } @@ -82,6 +83,42 @@ func (sc *stateCheckpoint) migrateV1CheckpointToV2Checkpoint(src *CPUManagerChec dst.Entries[podUID] = make(map[string]string) } dst.Entries[podUID][containerName] = cset + if dst.Promised == nil { + dst.Promised = make(map[string]map[string]string) + } + if _, exists := dst.Promised[podUID]; !exists { + dst.Promised[podUID] = make(map[string]string) + } + dst.Promised[podUID][containerName] = cset + } + return nil +} + +// migrateV2CheckpointToV3Checkpoint() converts checkpoints from the v2 format to the v3 format +func (sc *stateCheckpoint) migrateV2CheckpointToV3Checkpoint(src *CPUManagerCheckpointV2, dst *CPUManagerCheckpointV3) error { + if src.PolicyName != "" { + dst.PolicyName = src.PolicyName + } + if src.DefaultCPUSet != "" { + dst.DefaultCPUSet = src.DefaultCPUSet + } + for podUID := range src.Entries { + for containerName, cpuString := range src.Entries[podUID] { + if dst.Entries == nil { + dst.Entries = make(map[string]map[string]string) + } + if _, exists := dst.Entries[podUID]; !exists { + dst.Entries[podUID] = make(map[string]string) + } + dst.Entries[podUID][containerName] = cpuString + if dst.Promised == nil { + dst.Promised = make(map[string]map[string]string) + } + if _, exists := dst.Promised[podUID]; !exists { + dst.Promised[podUID] = make(map[string]string) + } + dst.Promised[podUID][containerName] = cpuString + } } return nil } @@ -94,44 +131,62 @@ func (sc *stateCheckpoint) restoreState() error { checkpointV1 := newCPUManagerCheckpointV1() checkpointV2 := newCPUManagerCheckpointV2() + checkpointV3 := newCPUManagerCheckpointV3() if err = sc.checkpointManager.GetCheckpoint(sc.checkpointName, checkpointV1); err != nil { - checkpointV1 = &CPUManagerCheckpointV1{} // reset it back to 0 if err = sc.checkpointManager.GetCheckpoint(sc.checkpointName, checkpointV2); err != nil { - if err == errors.ErrCheckpointNotFound { - return sc.storeState() + if err = sc.checkpointManager.GetCheckpoint(sc.checkpointName, checkpointV3); err != nil { + if errors.Is(err, checkpointerrors.ErrCheckpointNotFound) { + return sc.storeState() + } + return err + } + } else { + if err = sc.migrateV2CheckpointToV3Checkpoint(checkpointV2, checkpointV3); err != nil { + return fmt.Errorf("error migrating v2 checkpoint state to v3 checkpoint state: %w", err) } - return err + } + } else { + if err = sc.migrateV1CheckpointToV3Checkpoint(checkpointV1, checkpointV3); err != nil { + return fmt.Errorf("error migrating v1 checkpoint state to v2 checkpoint state: %w", err) } } - if err = sc.migrateV1CheckpointToV2Checkpoint(checkpointV1, checkpointV2); err != nil { - return fmt.Errorf("error migrating v1 checkpoint state to v2 checkpoint state: %s", err) - } - - if sc.policyName != checkpointV2.PolicyName { - return fmt.Errorf("configured policy %q differs from state checkpoint policy %q", sc.policyName, checkpointV2.PolicyName) + if sc.policyName != checkpointV3.PolicyName { + return fmt.Errorf("configured policy %q differs from state checkpoint policy %q", sc.policyName, checkpointV3.PolicyName) } var tmpDefaultCPUSet cpuset.CPUSet - if tmpDefaultCPUSet, err = cpuset.Parse(checkpointV2.DefaultCPUSet); err != nil { - return fmt.Errorf("could not parse default cpu set %q: %v", checkpointV2.DefaultCPUSet, err) + if tmpDefaultCPUSet, err = cpuset.Parse(checkpointV3.DefaultCPUSet); err != nil { + return fmt.Errorf("could not parse default cpu set %q: %w", checkpointV3.DefaultCPUSet, err) } var tmpContainerCPUSet cpuset.CPUSet tmpAssignments := ContainerCPUAssignments{} - for pod := range checkpointV2.Entries { - tmpAssignments[pod] = make(map[string]cpuset.CPUSet, len(checkpointV2.Entries[pod])) - for container, cpuString := range checkpointV2.Entries[pod] { + for pod := range checkpointV3.Entries { + tmpAssignments[pod] = make(map[string]cpuset.CPUSet, len(checkpointV3.Entries[pod])) + for container, cpuString := range checkpointV3.Entries[pod] { if tmpContainerCPUSet, err = cpuset.Parse(cpuString); err != nil { - return fmt.Errorf("could not parse cpuset %q for container %q in pod %q: %v", cpuString, container, pod, err) + return fmt.Errorf("could not parse cpuset %q for container %q in pod %q: %w", cpuString, container, pod, err) } tmpAssignments[pod][container] = tmpContainerCPUSet } } + tmpPromised := ContainerCPUAssignments{} + for pod := range checkpointV3.Promised { + tmpPromised[pod] = make(map[string]cpuset.CPUSet, len(checkpointV3.Promised[pod])) + for container, cpuString := range checkpointV3.Promised[pod] { + if tmpContainerCPUSet, err = cpuset.Parse(cpuString); err != nil { + return fmt.Errorf("could not parse cpuset %q for container %q in pod %q: %w", cpuString, container, pod, err) + } + tmpPromised[pod][container] = tmpContainerCPUSet + } + } + sc.cache.SetDefaultCPUSet(tmpDefaultCPUSet) sc.cache.SetCPUAssignments(tmpAssignments) + sc.cache.SetCPUPromised(tmpPromised) klog.V(2).InfoS("State checkpoint: restored state from checkpoint") klog.V(2).InfoS("State checkpoint: defaultCPUSet", "defaultCpuSet", tmpDefaultCPUSet.String()) @@ -153,6 +208,14 @@ func (sc *stateCheckpoint) storeState() error { } } + promised := sc.cache.GetCPUPromised() + for pod := range promised { + checkpoint.Promised[pod] = make(map[string]string, len(promised[pod])) + for container, cset := range promised[pod] { + checkpoint.Promised[pod][container] = cset.String() + } + } + err := sc.checkpointManager.CreateCheckpoint(sc.checkpointName, checkpoint) if err != nil { klog.ErrorS(err, "Failed to save checkpoint") @@ -161,6 +224,15 @@ func (sc *stateCheckpoint) storeState() error { return nil } +// GetPromisedCPUSet returns promised CPU set +func (sc *stateCheckpoint) GetPromisedCPUSet(podUID string, containerName string) (cpuset.CPUSet, bool) { + sc.mux.RLock() + defer sc.mux.RUnlock() + + res, ok := sc.cache.GetPromisedCPUSet(podUID, containerName) + return res, ok +} + // GetCPUSet returns current CPU set func (sc *stateCheckpoint) GetCPUSet(podUID string, containerName string) (cpuset.CPUSet, bool) { sc.mux.RLock() @@ -194,6 +266,25 @@ func (sc *stateCheckpoint) GetCPUAssignments() ContainerCPUAssignments { return sc.cache.GetCPUAssignments() } +// GetCPUPromised returns current CPU to pod promised +func (sc *stateCheckpoint) GetCPUPromised() ContainerCPUAssignments { + sc.mux.RLock() + defer sc.mux.RUnlock() + + return sc.cache.GetCPUPromised() +} + +// SetPromisedCPUSet sets CPU set +func (sc *stateCheckpoint) SetPromisedCPUSet(podUID string, containerName string, cset cpuset.CPUSet) { + sc.mux.Lock() + defer sc.mux.Unlock() + sc.cache.SetPromisedCPUSet(podUID, containerName, cset) + err := sc.storeState() + if err != nil { + klog.ErrorS(err, "Failed to store state to checkpoint", "podUID", podUID, "containerName", containerName) + } +} + // SetCPUSet sets CPU set func (sc *stateCheckpoint) SetCPUSet(podUID string, containerName string, cset cpuset.CPUSet) { sc.mux.Lock() @@ -227,6 +318,17 @@ func (sc *stateCheckpoint) SetCPUAssignments(a ContainerCPUAssignments) { } } +// SetCPUPromised sets CPU to pod promised +func (sc *stateCheckpoint) SetCPUPromised(a ContainerCPUAssignments) { + sc.mux.Lock() + defer sc.mux.Unlock() + sc.cache.SetCPUPromised(a) + err := sc.storeState() + if err != nil { + klog.ErrorS(err, "Failed to store state to checkpoint") + } +} + // Delete deletes assignment for specified pod func (sc *stateCheckpoint) Delete(podUID string, containerName string) { sc.mux.Lock() diff --git a/pkg/kubelet/cm/cpumanager/state/state_checkpoint_test.go b/pkg/kubelet/cm/cpumanager/state/state_checkpoint_test.go index 3cb0ab9eb0fb8..1f14bc6b669cd 100644 --- a/pkg/kubelet/cm/cpumanager/state/state_checkpoint_test.go +++ b/pkg/kubelet/cm/cpumanager/state/state_checkpoint_test.go @@ -365,6 +365,7 @@ func TestCheckpointStateHelpers(t *testing.T) { } } } + }) } } diff --git a/pkg/kubelet/cm/cpumanager/state/state_mem.go b/pkg/kubelet/cm/cpumanager/state/state_mem.go index cb01ea92609b0..c47ad1daaa84b 100644 --- a/pkg/kubelet/cm/cpumanager/state/state_mem.go +++ b/pkg/kubelet/cm/cpumanager/state/state_mem.go @@ -25,6 +25,7 @@ import ( type stateMemory struct { sync.RWMutex + promised ContainerCPUAssignments assignments ContainerCPUAssignments defaultCPUSet cpuset.CPUSet } @@ -35,11 +36,20 @@ var _ State = &stateMemory{} func NewMemoryState() State { klog.InfoS("Initialized new in-memory state store") return &stateMemory{ + promised: ContainerCPUAssignments{}, assignments: ContainerCPUAssignments{}, defaultCPUSet: cpuset.New(), } } +func (s *stateMemory) GetPromisedCPUSet(podUID string, containerName string) (cpuset.CPUSet, bool) { + s.RLock() + defer s.RUnlock() + + res, ok := s.promised[podUID][containerName] + return res.Clone(), ok +} + func (s *stateMemory) GetCPUSet(podUID string, containerName string) (cpuset.CPUSet, bool) { s.RLock() defer s.RUnlock() @@ -62,12 +72,30 @@ func (s *stateMemory) GetCPUSetOrDefault(podUID string, containerName string) cp return s.GetDefaultCPUSet() } +func (s *stateMemory) GetCPUPromised() ContainerCPUAssignments { + s.RLock() + defer s.RUnlock() + return s.promised.Clone() +} + func (s *stateMemory) GetCPUAssignments() ContainerCPUAssignments { s.RLock() defer s.RUnlock() return s.assignments.Clone() } +func (s *stateMemory) SetPromisedCPUSet(podUID string, containerName string, cset cpuset.CPUSet) { + s.Lock() + defer s.Unlock() + + if _, ok := s.promised[podUID]; !ok { + s.promised[podUID] = make(map[string]cpuset.CPUSet) + } + + s.promised[podUID][containerName] = cset + klog.InfoS("Updated promised CPUSet", "podUID", podUID, "containerName", containerName, "cpuSet", cset) +} + func (s *stateMemory) SetCPUSet(podUID string, containerName string, cset cpuset.CPUSet) { s.Lock() defer s.Unlock() @@ -88,6 +116,14 @@ func (s *stateMemory) SetDefaultCPUSet(cset cpuset.CPUSet) { klog.InfoS("Updated default CPUSet", "cpuSet", cset) } +func (s *stateMemory) SetCPUPromised(a ContainerCPUAssignments) { + s.Lock() + defer s.Unlock() + + s.promised = a.Clone() + klog.InfoS("Updated CPUSet promised", "promised", a) +} + func (s *stateMemory) SetCPUAssignments(a ContainerCPUAssignments) { s.Lock() defer s.Unlock() @@ -105,6 +141,13 @@ func (s *stateMemory) Delete(podUID string, containerName string) { delete(s.assignments, podUID) } klog.V(2).InfoS("Deleted CPUSet assignment", "podUID", podUID, "containerName", containerName) + + delete(s.promised[podUID], containerName) + if len(s.promised[podUID]) == 0 { + delete(s.promised, podUID) + } + klog.V(2).InfoS("Deleted CPUSet promised", "podUID", podUID, "containerName", containerName) + } func (s *stateMemory) ClearState() { @@ -113,5 +156,6 @@ func (s *stateMemory) ClearState() { s.defaultCPUSet = cpuset.CPUSet{} s.assignments = make(ContainerCPUAssignments) + s.promised = make(ContainerCPUAssignments) klog.V(2).InfoS("Cleared state") } diff --git a/pkg/kubelet/types/constants.go b/pkg/kubelet/types/constants.go index 6c032139b74a1..2a8df5ec2ae95 100644 --- a/pkg/kubelet/types/constants.go +++ b/pkg/kubelet/types/constants.go @@ -41,7 +41,7 @@ const ( // InPlacePodVerticalScaling types const ( - // ErrorInconsistentCPUAllocation represent the type of an inconsistentCPUAllocationError ErrorInconsistentCPUAllocation = "inconsistentCPUAllocationError" - ErrorGetCPUSet = "getCPUSetError" + ErrorProhibitedCPUAllocation = "prohibitedCPUAllocationError" + ErrorGetPromisedCPUSet = "getPromisedCPUSetError" ) diff --git a/pkg/registry/core/pod/strategy.go b/pkg/registry/core/pod/strategy.go index ac5fe4b79f433..ccdf35e76f47b 100644 --- a/pkg/registry/core/pod/strategy.go +++ b/pkg/registry/core/pod/strategy.go @@ -395,7 +395,6 @@ func dropNonResizeUpdatesForContainers(new, old []api.Container) []api.Container } oldCopyWithMergedResources[i].Resources = ctr.Resources oldCopyWithMergedResources[i].ResizePolicy = ctr.ResizePolicy - oldCopyWithMergedResources[i].Env = ctr.Env } return oldCopyWithMergedResources diff --git a/test/e2e_node/cpu_manager_metrics_test.go b/test/e2e_node/cpu_manager_metrics_test.go index 484dffb9a1684..94ccb20954dd1 100644 --- a/test/e2e_node/cpu_manager_metrics_test.go +++ b/test/e2e_node/cpu_manager_metrics_test.go @@ -104,8 +104,6 @@ var _ = SIGDescribe("CPU Manager Metrics", framework.WithSerial(), feature.CPUMa enableCPUManagerOptions: true, options: cpuPolicyOptions, }, - false, - false, ) updateKubeletConfig(ctx, f, newCfg, true) }) @@ -404,7 +402,7 @@ var _ = SIGDescribe("CPU Manager Metrics", framework.WithSerial(), feature.CPUMa reservedSystemCPUs: cpuset.New(0), enableCPUManagerOptions: true, options: cpuPolicyOptions, - }, false, false, + }, ) updateKubeletConfig(ctx, f, newCfg, true) @@ -444,7 +442,7 @@ var _ = SIGDescribe("CPU Manager Metrics", framework.WithSerial(), feature.CPUMa reservedSystemCPUs: cpuset.New(0), enableCPUManagerOptions: true, options: cpuPolicyOptions, - }, false, false, + }, ) updateKubeletConfig(ctx, f, newCfg, true) diff --git a/test/e2e_node/cpu_manager_test.go b/test/e2e_node/cpu_manager_test.go index 7f203df9266b6..1cc14d9d60912 100644 --- a/test/e2e_node/cpu_manager_test.go +++ b/test/e2e_node/cpu_manager_test.go @@ -2346,28 +2346,6 @@ func BeDistributedCPUs(expectedSpread int) types.GomegaMatcher { // Other helpers -func getContainerAllowedCPUsFromLogs(podName, cntName, logs string) cpuset.CPUSet { - framework.Logf("got pod logs: <%v>", logs) - cpus, err := cpuset.Parse(strings.TrimSpace(logs)) - framework.ExpectNoError(err, "parsing cpuset from logs for [%s] of pod [%s]", cntName, podName) - return cpus -} - -// computeNUMADistribution calculates CPU distribution per NUMA node. -func computeNUMADistribution(allocatedCPUs cpuset.CPUSet) map[int]int { - numaCPUs, err := getNumaNodeCPUs() - framework.ExpectNoError(err, "Error retrieving NUMA nodes") - framework.Logf("NUMA Node CPUs allocation: %v", numaCPUs) - - distribution := make(map[int]int) - for node, cpus := range numaCPUs { - distribution[node] = cpus.Intersection(allocatedCPUs).Size() - } - - framework.Logf("allocated CPUs %s distribution: %v", allocatedCPUs.String(), distribution) - return distribution -} - func getContainerAllowedCPUs(pod *v1.Pod, ctnName string, isInit bool) (cpuset.CPUSet, error) { cgPath, err := makeCgroupPathForContainer(pod, ctnName, isInit, e2enodeCgroupV2Enabled) if err != nil { @@ -2797,15 +2775,18 @@ func makeCPUManagerInitContainersPod(podName string, ctnAttributes []ctnAttribut } type cpuManagerKubeletArguments struct { - policyName string - enableCPUManagerOptions bool - disableCPUQuotaWithExclusiveCPUs bool - enablePodLevelResources bool - reservedSystemCPUs cpuset.CPUSet - options map[string]string + policyName string + enableCPUManagerOptions bool + disableCPUQuotaWithExclusiveCPUs bool + enablePodLevelResources bool + enableInPlacePodVerticalScaling bool + enableInPlacePodVerticalScalingExclusiveCPUs bool + enableInPlacePodVerticalScalingAllocatedStatus bool + reservedSystemCPUs cpuset.CPUSet + options map[string]string } -func configureCPUManagerInKubelet(oldCfg *kubeletconfig.KubeletConfiguration, kubeletArguments *cpuManagerKubeletArguments, isInPlacePodVerticalScalingAllocatedStatusEnabled bool, isInPlacePodVerticalScalingExclusiveCPUsEnabled bool) *kubeletconfig.KubeletConfiguration { +func configureCPUManagerInKubelet(oldCfg *kubeletconfig.KubeletConfiguration, kubeletArguments *cpuManagerKubeletArguments) *kubeletconfig.KubeletConfiguration { newCfg := oldCfg.DeepCopy() if newCfg.FeatureGates == nil { newCfg.FeatureGates = make(map[string]bool) @@ -2815,8 +2796,9 @@ func configureCPUManagerInKubelet(oldCfg *kubeletconfig.KubeletConfiguration, ku newCfg.FeatureGates["CPUManagerPolicyAlphaOptions"] = kubeletArguments.enableCPUManagerOptions newCfg.FeatureGates["DisableCPUQuotaWithExclusiveCPUs"] = kubeletArguments.disableCPUQuotaWithExclusiveCPUs newCfg.FeatureGates["PodLevelResources"] = kubeletArguments.enablePodLevelResources - newCfg.FeatureGates["InPlacePodVerticalScalingExclusiveCPUs"] = isInPlacePodVerticalScalingExclusiveCPUsEnabled - newCfg.FeatureGates["InPlacePodVerticalScalingAllocatedStatus"] = isInPlacePodVerticalScalingAllocatedStatusEnabled + newCfg.FeatureGates["InPlacePodVerticalScaling"] = kubeletArguments.enableInPlacePodVerticalScaling + newCfg.FeatureGates["InPlacePodVerticalScalingExclusiveCPUs"] = kubeletArguments.enableInPlacePodVerticalScalingExclusiveCPUs + newCfg.FeatureGates["InPlacePodVerticalScalingAllocatedStatus"] = kubeletArguments.enableInPlacePodVerticalScalingAllocatedStatus newCfg.CPUManagerPolicy = kubeletArguments.policyName newCfg.CPUManagerReconcilePeriod = metav1.Duration{Duration: 1 * time.Second} @@ -2893,198 +2875,6 @@ func runAutomaticallyRemoveInactivePodsFromCPUManagerStateFile(ctx context.Conte } -func runCfsQuotaGuPods(ctx context.Context, f *framework.Framework, disabledCPUQuotaWithExclusiveCPUs bool, cpuAlloc int64) { - var err error - var ctnAttrs []ctnAttribute - var pod1, pod2, pod3 *v1.Pod - podsToClean := make(map[string]*v1.Pod) // pod.UID -> pod - - framework.Logf("runCfsQuotaGuPods: disableQuota=%v, CPU Allocatable=%v", disabledCPUQuotaWithExclusiveCPUs, cpuAlloc) - - deleteTestPod := func(pod *v1.Pod) { - // waitForContainerRemoval takes "long" to complete; if we use the parent ctx we get a - // 'deadline expired' message and the cleanup aborts, which we don't want. - // So let's use a separate and more generous timeout (determined by trial and error) - ctx2, cancel := context.WithTimeout(context.Background(), 10*time.Minute) - defer cancel() - deletePodSyncAndWait(ctx2, f, pod.Namespace, pod.Name) - delete(podsToClean, string(pod.UID)) - } - - // cleanup leftovers on test failure. The happy path is covered by `deleteTestPod` calls - ginkgo.DeferCleanup(func() { - ginkgo.By("by deleting the pods and waiting for container removal") - // waitForContainerRemoval takes "long" to complete; if we use the parent ctx we get a - // 'deadline expired' message and the cleanup aborts, which we don't want. - // So let's use a separate and more generous timeout (determined by trial and error) - ctx2, cancel := context.WithTimeout(context.Background(), 10*time.Minute) - defer cancel() - deletePodsAsync(ctx2, f, podsToClean) - }) - - podCFSCheckCommand := []string{"sh", "-c", `cat $(find /sysfscgroup | grep -E "($(cat /podinfo/uid)|$(cat /podinfo/uid | sed 's/-/_/g'))(/|\.slice/)cpu.max$") && sleep 1d`} - cfsCheckCommand := []string{"sh", "-c", "cat /sys/fs/cgroup/cpu.max && sleep 1d"} - defaultPeriod := "100000" - - ctnAttrs = []ctnAttribute{ - { - ctnName: "gu-container-cfsquota-disabled", - cpuRequest: "1", - cpuLimit: "1", - }, - } - pod1 = makeCPUManagerPod("gu-pod1", ctnAttrs) - pod1.Spec.Containers[0].Command = cfsCheckCommand - pod1 = e2epod.NewPodClient(f).CreateSync(ctx, pod1) - podsToClean[string(pod1.UID)] = pod1 - - ginkgo.By("checking if the expected cfs quota was assigned (GU pod, exclusive CPUs, unlimited)") - - expectedQuota := "100000" - if disabledCPUQuotaWithExclusiveCPUs { - expectedQuota = "max" - } - expCFSQuotaRegex := fmt.Sprintf("^%s %s\n$", expectedQuota, defaultPeriod) - err = e2epod.NewPodClient(f).MatchContainerOutput(ctx, pod1.Name, pod1.Spec.Containers[0].Name, expCFSQuotaRegex) - framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", - pod1.Spec.Containers[0].Name, pod1.Name) - deleteTestPod(pod1) - - ctnAttrs = []ctnAttribute{ - { - ctnName: "gu-container-cfsquota-enabled", - cpuRequest: "500m", - cpuLimit: "500m", - }, - } - pod2 = makeCPUManagerPod("gu-pod2", ctnAttrs) - pod2.Spec.Containers[0].Command = cfsCheckCommand - pod2 = e2epod.NewPodClient(f).CreateSync(ctx, pod2) - podsToClean[string(pod2.UID)] = pod2 - - ginkgo.By("checking if the expected cfs quota was assigned (GU pod, limited)") - - expectedQuota = "50000" - expCFSQuotaRegex = fmt.Sprintf("^%s %s\n$", expectedQuota, defaultPeriod) - err = e2epod.NewPodClient(f).MatchContainerOutput(ctx, pod2.Name, pod2.Spec.Containers[0].Name, expCFSQuotaRegex) - framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", - pod2.Spec.Containers[0].Name, pod2.Name) - deleteTestPod(pod2) - - ctnAttrs = []ctnAttribute{ - { - ctnName: "non-gu-container", - cpuRequest: "100m", - cpuLimit: "500m", - }, - } - pod3 = makeCPUManagerPod("non-gu-pod3", ctnAttrs) - pod3.Spec.Containers[0].Command = cfsCheckCommand - pod3 = e2epod.NewPodClient(f).CreateSync(ctx, pod3) - podsToClean[string(pod3.UID)] = pod3 - - ginkgo.By("checking if the expected cfs quota was assigned (BU pod, limited)") - - expectedQuota = "50000" - expCFSQuotaRegex = fmt.Sprintf("^%s %s\n$", expectedQuota, defaultPeriod) - err = e2epod.NewPodClient(f).MatchContainerOutput(ctx, pod3.Name, pod3.Spec.Containers[0].Name, expCFSQuotaRegex) - framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", - pod3.Spec.Containers[0].Name, pod3.Name) - deleteTestPod(pod3) - - if cpuAlloc >= 2 { - ctnAttrs = []ctnAttribute{ - { - ctnName: "gu-container-non-int-values", - cpuRequest: "500m", - cpuLimit: "500m", - }, - { - ctnName: "gu-container-int-values", - cpuRequest: "1", - cpuLimit: "1", - }, - } - pod4 := makeCPUManagerPod("gu-pod4", ctnAttrs) - pod4.Spec.Containers[0].Command = cfsCheckCommand - pod4.Spec.Containers[1].Command = cfsCheckCommand - pod4 = e2epod.NewPodClient(f).CreateSync(ctx, pod4) - podsToClean[string(pod4.UID)] = pod4 - - ginkgo.By("checking if the expected cfs quota was assigned (GU pod, container 0 exclusive CPUs unlimited, container 1 limited)") - - expectedQuota = "50000" - expCFSQuotaRegex = fmt.Sprintf("^%s %s\n$", expectedQuota, defaultPeriod) - err = e2epod.NewPodClient(f).MatchContainerOutput(ctx, pod4.Name, pod4.Spec.Containers[0].Name, expCFSQuotaRegex) - framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", - pod4.Spec.Containers[0].Name, pod4.Name) - expectedQuota = "100000" - if disabledCPUQuotaWithExclusiveCPUs { - expectedQuota = "max" - } - expCFSQuotaRegex = fmt.Sprintf("^%s %s\n$", expectedQuota, defaultPeriod) - err = e2epod.NewPodClient(f).MatchContainerOutput(ctx, pod4.Name, pod4.Spec.Containers[1].Name, expCFSQuotaRegex) - framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", - pod4.Spec.Containers[1].Name, pod4.Name) - deleteTestPod(pod4) - - ctnAttrs = []ctnAttribute{ - { - ctnName: "gu-container-non-int-values", - cpuRequest: "500m", - cpuLimit: "500m", - }, - { - ctnName: "gu-container-int-values", - cpuRequest: "1", - cpuLimit: "1", - }, - } - - pod5 := makeCPUManagerPod("gu-pod5", ctnAttrs) - pod5.Spec.Containers[0].Command = podCFSCheckCommand - pod5 = e2epod.NewPodClient(f).CreateSync(ctx, pod5) - podsToClean[string(pod5.UID)] = pod5 - - ginkgo.By("checking if the expected cfs quota was assigned to pod (GU pod, unlimited)") - - expectedQuota = "150000" - - if disabledCPUQuotaWithExclusiveCPUs { - expectedQuota = "max" - } - - expCFSQuotaRegex = fmt.Sprintf("^%s %s\n$", expectedQuota, defaultPeriod) - - err = e2epod.NewPodClient(f).MatchContainerOutput(ctx, pod5.Name, pod5.Spec.Containers[0].Name, expCFSQuotaRegex) - framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", pod5.Spec.Containers[0].Name, pod5.Name) - deleteTestPod(pod5) - } else { - ginkgo.By(fmt.Sprintf("some cases SKIPPED - requests at least %d allocatable cores, got %d", 2, cpuAlloc)) - } - - ctnAttrs = []ctnAttribute{ - { - ctnName: "gu-container", - cpuRequest: "100m", - cpuLimit: "100m", - }, - } - - pod6 := makeCPUManagerPod("gu-pod6", ctnAttrs) - pod6.Spec.Containers[0].Command = podCFSCheckCommand - pod6 = e2epod.NewPodClient(f).CreateSync(ctx, pod6) - podsToClean[string(pod6.UID)] = pod6 - - ginkgo.By("checking if the expected cfs quota was assigned to pod (GU pod, limited)") - - expectedQuota = "10000" - expCFSQuotaRegex = fmt.Sprintf("^%s %s\n$", expectedQuota, defaultPeriod) - err = e2epod.NewPodClient(f).MatchContainerOutput(ctx, pod6.Name, pod6.Spec.Containers[0].Name, expCFSQuotaRegex) - framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", pod6.Spec.Containers[0].Name, pod6.Name) - deleteTestPod(pod6) -} - func runCPUManagerTests(f *framework.Framework) { var cpuCap, cpuAlloc int64 var oldCfg *kubeletconfig.KubeletConfiguration @@ -3109,7 +2899,7 @@ func runCPUManagerTests(f *framework.Framework) { newCfg := configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ policyName: string(cpumanager.PolicyStatic), reservedSystemCPUs: cpuset.CPUSet{}, - }, false, false) + }) updateKubeletConfig(ctx, f, newCfg, true) ginkgo.By("running a non-Gu pod") @@ -3148,14 +2938,10 @@ func runCPUManagerTests(f *framework.Framework) { } reservedSystemCPUs := cpuset.New(0) - newCfg := configureCPUManagerInKubelet(oldCfg, - &cpuManagerKubeletArguments{ - policyName: string(cpumanager.PolicyStatic), - reservedSystemCPUs: reservedSystemCPUs, - }, - false, - false, - ) + newCfg := configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + reservedSystemCPUs: reservedSystemCPUs, + }) updateKubeletConfig(ctx, f, newCfg, true) ginkgo.By("running a Gu pod - it shouldn't use reserved system CPUs") @@ -3178,16 +2964,12 @@ func runCPUManagerTests(f *framework.Framework) { cpuPolicyOptions := map[string]string{ cpumanager.StrictCPUReservationOption: "true", } - newCfg := configureCPUManagerInKubelet(oldCfg, - &cpuManagerKubeletArguments{ - policyName: string(cpumanager.PolicyStatic), - reservedSystemCPUs: reservedSystemCPUs, - enableCPUManagerOptions: true, - options: cpuPolicyOptions, - }, - false, - false, - ) + newCfg := configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + reservedSystemCPUs: reservedSystemCPUs, + enableCPUManagerOptions: true, + options: cpuPolicyOptions, + }) updateKubeletConfig(ctx, f, newCfg, true) ginkgo.By("running a Gu pod - it shouldn't use reserved system CPUs") @@ -3197,130 +2979,6 @@ func runCPUManagerTests(f *framework.Framework) { runNonGuPodTest(ctx, f, cpuCap, reservedSystemCPUs) }) - ginkgo.It("should assign CPUs as expected with enhanced policy based on strict SMT alignment", func(ctx context.Context) { - fullCPUsOnlyOpt := fmt.Sprintf("option=%s", cpumanager.FullPCPUsOnlyOption) - _, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f) - smtLevel := getSMTLevel() - - // strict SMT alignment is trivially verified and granted on non-SMT systems - if smtLevel < minSMTLevel { - e2eskipper.Skipf("Skipping CPU Manager %s tests since SMT disabled", fullCPUsOnlyOpt) - } - - // our tests want to allocate a full core, so we need at least 2*2=4 virtual cpus - minCPUCount := int64(smtLevel * minCPUCapacity) - if cpuAlloc < minCPUCount { - e2eskipper.Skipf("Skipping CPU Manager %s tests since the CPU capacity < %d", fullCPUsOnlyOpt, minCPUCount) - } - - framework.Logf("SMT level %d", smtLevel) - - // TODO: we assume the first available CPUID is 0, which is pretty fair, but we should probably - // check what we do have in the node. - cpuPolicyOptions := map[string]string{ - cpumanager.FullPCPUsOnlyOption: "true", - } - newCfg := configureCPUManagerInKubelet(oldCfg, - &cpuManagerKubeletArguments{ - policyName: string(cpumanager.PolicyStatic), - reservedSystemCPUs: cpuset.New(0), - enableCPUManagerOptions: true, - options: cpuPolicyOptions, - }, false, false, - ) - updateKubeletConfig(ctx, f, newCfg, true) - - // the order between negative and positive doesn't really matter - runSMTAlignmentNegativeTests(ctx, f) - runSMTAlignmentPositiveTests(ctx, f, smtLevel, cpuset.New()) - }) - - ginkgo.It("should assign CPUs as expected based on strict SMT alignment, reservedSystemCPUs should be excluded (both strict-cpu-reservation and full-pcpus-only options enabled)", func(ctx context.Context) { - fullCPUsOnlyOpt := fmt.Sprintf("option=%s", cpumanager.FullPCPUsOnlyOption) - _, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f) - smtLevel := getSMTLevel() - - // strict SMT alignment is trivially verified and granted on non-SMT systems - if smtLevel < 2 { - e2eskipper.Skipf("Skipping CPU Manager %s tests since SMT disabled", fullCPUsOnlyOpt) - } - - // our tests want to allocate a full core, so we need at last smtLevel*2 virtual cpus - if cpuAlloc < int64(smtLevel*2) { - e2eskipper.Skipf("Skipping CPU Manager %s tests since the CPU capacity < %d", fullCPUsOnlyOpt, smtLevel*2) - } - - framework.Logf("SMT level %d", smtLevel) - - reservedSystemCPUs := cpuset.New(0) - cpuPolicyOptions := map[string]string{ - cpumanager.FullPCPUsOnlyOption: "true", - cpumanager.StrictCPUReservationOption: "true", - } - newCfg := configureCPUManagerInKubelet(oldCfg, - &cpuManagerKubeletArguments{ - policyName: string(cpumanager.PolicyStatic), - reservedSystemCPUs: reservedSystemCPUs, - enableCPUManagerOptions: true, - options: cpuPolicyOptions, - }, - false, - false, - ) - updateKubeletConfig(ctx, f, newCfg, true) - - // the order between negative and positive doesn't really matter - runSMTAlignmentNegativeTests(ctx, f) - runSMTAlignmentPositiveTests(ctx, f, smtLevel, reservedSystemCPUs) - }) - - ginkgo.It("should not enforce CFS quota for containers with static CPUs assigned", func(ctx context.Context) { - if !IsCgroup2UnifiedMode() { - e2eskipper.Skipf("Skipping since CgroupV2 not used") - } - _, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f) - if cpuAlloc < 1 { // save expensive kubelet restart - e2eskipper.Skipf("Skipping since not enough allocatable CPU got %d required 1", cpuAlloc) - } - newCfg := configureCPUManagerInKubelet(oldCfg, - &cpuManagerKubeletArguments{ - policyName: string(cpumanager.PolicyStatic), - reservedSystemCPUs: cpuset.New(0), - disableCPUQuotaWithExclusiveCPUs: true, - }, - false, - false, - ) - updateKubeletConfig(ctx, f, newCfg, true) - - _, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f) // check again after we reserved 1 full CPU. Some tests require > 1 exclusive CPU - runCfsQuotaGuPods(ctx, f, true, cpuAlloc) - }) - - ginkgo.It("should keep enforcing the CFS quota for containers with static CPUs assigned and feature gate disabled", func(ctx context.Context) { - if !IsCgroup2UnifiedMode() { - e2eskipper.Skipf("Skipping since CgroupV2 not used") - } - _, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f) - if cpuAlloc < 1 { // save expensive kubelet restart - e2eskipper.Skipf("Skipping since not enough allocatable CPU got %d required 1", cpuAlloc) - } - newCfg := configureCPUManagerInKubelet(oldCfg, - &cpuManagerKubeletArguments{ - policyName: string(cpumanager.PolicyStatic), - reservedSystemCPUs: cpuset.New(0), - disableCPUQuotaWithExclusiveCPUs: false, - }, - false, - false, - ) - - updateKubeletConfig(ctx, f, newCfg, true) - - _, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f) // check again after we reserved 1 full CPU. Some tests require > 1 exclusive CPU - runCfsQuotaGuPods(ctx, f, false, cpuAlloc) - }) - f.It("should not reuse CPUs of restartable init containers", feature.SidecarContainers, func(ctx context.Context) { cpuCap, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f) @@ -3330,14 +2988,10 @@ func runCPUManagerTests(f *framework.Framework) { } // Enable CPU Manager in the kubelet. - newCfg := configureCPUManagerInKubelet(oldCfg, - &cpuManagerKubeletArguments{ - policyName: string(cpumanager.PolicyStatic), - reservedSystemCPUs: cpuset.CPUSet{}, - }, - false, - false, - ) + newCfg := configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + reservedSystemCPUs: cpuset.CPUSet{}, + }) updateKubeletConfig(ctx, f, newCfg, true) ginkgo.By("running a Gu pod with a regular init container and a restartable init container") @@ -3389,281 +3043,11 @@ func runCPUManagerTests(f *framework.Framework) { waitForContainerRemoval(ctx, pod.Spec.Containers[0].Name, pod.Name, pod.Namespace) }) - ginkgo.It("should assign packed CPUs with distribute-cpus-across-numa disabled and pcpu-only policy options enabled", func(ctx context.Context) { - fullCPUsOnlyOpt := fmt.Sprintf("option=%s", cpumanager.FullPCPUsOnlyOption) - _, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f) - smtLevel := getSMTLevel() - - // strict SMT alignment is trivially verified and granted on non-SMT systems - if smtLevel < minSMTLevel { - e2eskipper.Skipf("Skipping CPU Manager %s tests since SMT disabled", fullCPUsOnlyOpt) - } - - // our tests want to allocate a full core, so we need at least 2*2=4 virtual cpus - minCPUCount := int64(smtLevel * minCPUCapacity) - if cpuAlloc < minCPUCount { - e2eskipper.Skipf("Skipping CPU Manager %s tests since the CPU capacity < %d", fullCPUsOnlyOpt, minCPUCount) - } - - framework.Logf("SMT level %d", smtLevel) - - cpuPolicyOptions := map[string]string{ - cpumanager.FullPCPUsOnlyOption: "true", - cpumanager.DistributeCPUsAcrossNUMAOption: "false", - } - newCfg := configureCPUManagerInKubelet(oldCfg, - &cpuManagerKubeletArguments{ - policyName: string(cpumanager.PolicyStatic), - reservedSystemCPUs: cpuset.New(0), - enableCPUManagerOptions: true, - options: cpuPolicyOptions, - }, - false, - false, - ) - updateKubeletConfig(ctx, f, newCfg, true) - - ctnAttrs := []ctnAttribute{ - { - ctnName: "test-gu-container-distribute-cpus-across-numa-disabled", - cpuRequest: "2000m", - cpuLimit: "2000m", - }, - } - pod := makeCPUManagerPod("test-pod-distribute-cpus-across-numa-disabled", ctnAttrs) - pod = e2epod.NewPodClient(f).CreateSync(ctx, pod) - - for _, cnt := range pod.Spec.Containers { - ginkgo.By(fmt.Sprintf("validating the container %s on Gu pod %s", cnt.Name, pod.Name)) - - logs, err := e2epod.GetPodLogs(ctx, f.ClientSet, f.Namespace.Name, pod.Name, cnt.Name) - framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", cnt.Name, pod.Name) - - cpus := getContainerAllowedCPUsFromLogs(pod.Name, cnt.Name, logs) - - validateSMTAlignment(cpus, smtLevel, pod, &cnt) - gomega.Expect(cpus).To(BePackedCPUs()) - } - deletePodSyncByName(ctx, f, pod.Name) - // we need to wait for all containers to really be gone so cpumanager reconcile loop will not rewrite the cpu_manager_state. - // this is in turn needed because we will have an unavoidable (in the current framework) race with th - // reconcile loop which will make our attempt to delete the state file and to restore the old config go haywire - waitForAllContainerRemoval(ctx, pod.Name, pod.Namespace) - }) - - ginkgo.It("should assign CPUs distributed across NUMA with distribute-cpus-across-numa and pcpu-only policy options enabled", func(ctx context.Context) { - var cpusNumPerNUMA, numaNodeNum int - - fullCPUsOnlyOpt := fmt.Sprintf("option=%s", cpumanager.FullPCPUsOnlyOption) - _, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f) - smtLevel := getSMTLevel() - framework.Logf("SMT level %d", smtLevel) - - // strict SMT alignment is trivially verified and granted on non-SMT systems - if smtLevel < minSMTLevel { - e2eskipper.Skipf("Skipping CPU Manager %s tests since SMT disabled", fullCPUsOnlyOpt) - } - - // our tests want to allocate a full core, so we need at least 2*2=4 virtual cpus - minCPUCount := int64(smtLevel * minCPUCapacity) - if cpuAlloc < minCPUCount { - e2eskipper.Skipf("Skipping CPU Manager %s tests since the CPU capacity < %d", fullCPUsOnlyOpt, minCPUCount) - } - - // this test is intended to be run on a multi-node NUMA system and - // a system with at least 4 cores per socket, hostcheck skips test - // if above requirements are not satisfied - numaNodeNum, _, _, cpusNumPerNUMA = hostCheck() - - cpuPolicyOptions := map[string]string{ - cpumanager.FullPCPUsOnlyOption: "true", - cpumanager.DistributeCPUsAcrossNUMAOption: "true", - } - newCfg := configureCPUManagerInKubelet(oldCfg, - &cpuManagerKubeletArguments{ - policyName: string(cpumanager.PolicyStatic), - reservedSystemCPUs: cpuset.New(0), - enableCPUManagerOptions: true, - options: cpuPolicyOptions, - }, - false, - false, - ) - updateKubeletConfig(ctx, f, newCfg, true) - // 'distribute-cpus-across-numa' policy option ensures that CPU allocations are evenly distributed - // across NUMA nodes in cases where more than one NUMA node is required to satisfy the allocation. - // So, we want to ensure that the CPU Request exceeds the number of CPUs that can fit within a single - // NUMA node. We have to pick cpuRequest such that: - // 1. CPURequest > cpusNumPerNUMA - // 2. Not occupy all the CPUs on the node ande leave room for reserved CPU - // 3. CPURequest is a multiple if number of NUMA nodes to allow equal CPU distribution across NUMA nodes - // - // In summary: cpusNumPerNUMA < CPURequest < ((cpusNumPerNuma * numaNodeNum) - reservedCPUscount) - // Considering all these constraints we select: CPURequest= (cpusNumPerNUMA-smtLevel)*numaNodeNum - - cpuReq := (cpusNumPerNUMA - smtLevel) * numaNodeNum - ctnAttrs := []ctnAttribute{ - { - ctnName: "test-gu-container-distribute-cpus-across-numa", - cpuRequest: fmt.Sprintf("%d", cpuReq), - cpuLimit: fmt.Sprintf("%d", cpuReq), - }, - } - pod := makeCPUManagerPod("test-pod-distribute-cpus-across-numa", ctnAttrs) - pod = e2epod.NewPodClient(f).CreateSync(ctx, pod) - - for _, cnt := range pod.Spec.Containers { - ginkgo.By(fmt.Sprintf("validating the container %s on Gu pod %s", cnt.Name, pod.Name)) - - logs, err := e2epod.GetPodLogs(ctx, f.ClientSet, f.Namespace.Name, pod.Name, cnt.Name) - framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", cnt.Name, pod.Name) - - cpus := getContainerAllowedCPUsFromLogs(pod.Name, cnt.Name, logs) - - validateSMTAlignment(cpus, smtLevel, pod, &cnt) - // We expect a perfectly even spilit i.e. equal distribution across NUMA Node as the CPU Request is 4*smtLevel*numaNodeNum. - expectedSpread := cpus.Size() / numaNodeNum - gomega.Expect(cpus).To(BeDistributedCPUs(expectedSpread)) - } - deletePodSyncByName(ctx, f, pod.Name) - // we need to wait for all containers to really be gone so cpumanager reconcile loop will not rewrite the cpu_manager_state. - // this is in turn needed because we will have an unavoidable (in the current framework) race with th - // reconcile loop which will make our attempt to delete the state file and to restore the old config go haywire - waitForAllContainerRemoval(ctx, pod.Name, pod.Namespace) - }) - ginkgo.AfterEach(func(ctx context.Context) { updateKubeletConfig(ctx, f, oldCfg, true) }) } -func runSMTAlignmentNegativeTests(ctx context.Context, f *framework.Framework) { - // negative test: try to run a container whose requests aren't a multiple of SMT level, expect a rejection - ctnAttrs := []ctnAttribute{ - { - ctnName: "gu-container-neg", - cpuRequest: "1000m", - cpuLimit: "1000m", - }, - } - pod := makeCPUManagerPod("gu-pod", ctnAttrs) - // CreateSync would wait for pod to become Ready - which will never happen if production code works as intended! - pod = e2epod.NewPodClient(f).Create(ctx, pod) - - err := e2epod.WaitForPodCondition(ctx, f.ClientSet, f.Namespace.Name, pod.Name, "Failed", 30*time.Second, func(pod *v1.Pod) (bool, error) { - if pod.Status.Phase != v1.PodPending { - return true, nil - } - return false, nil - }) - framework.ExpectNoError(err) - pod, err = e2epod.NewPodClient(f).Get(ctx, pod.Name, metav1.GetOptions{}) - framework.ExpectNoError(err) - - if pod.Status.Phase != v1.PodFailed { - framework.Failf("pod %s not failed: %v", pod.Name, pod.Status) - } - if !isSMTAlignmentError(pod) { - framework.Failf("pod %s failed for wrong reason: %q", pod.Name, pod.Status.Reason) - } - - deletePodSyncByName(ctx, f, pod.Name) - // we need to wait for all containers to really be gone so cpumanager reconcile loop will not rewrite the cpu_manager_state. - // this is in turn needed because we will have an unavoidable (in the current framework) race with th - // reconcile loop which will make our attempt to delete the state file and to restore the old config go haywire - waitForAllContainerRemoval(ctx, pod.Name, pod.Namespace) -} - -func runSMTAlignmentPositiveTests(ctx context.Context, f *framework.Framework, smtLevel int, strictReservedCPUs cpuset.CPUSet) { - // positive test: try to run a container whose requests are a multiple of SMT level, check allocated cores - // 1. are core siblings - // 2. take a full core - // WARNING: this assumes 2-way SMT systems - we don't know how to access other SMT levels. - // this means on more-than-2-way SMT systems this test will prove nothing - ctnAttrs := []ctnAttribute{ - { - ctnName: "gu-container-pos", - cpuRequest: "2000m", - cpuLimit: "2000m", - }, - } - pod := makeCPUManagerPod("gu-pod", ctnAttrs) - pod = e2epod.NewPodClient(f).CreateSync(ctx, pod) - - for _, cnt := range pod.Spec.Containers { - ginkgo.By(fmt.Sprintf("validating the container %s on Gu pod %s", cnt.Name, pod.Name)) - - logs, err := e2epod.GetPodLogs(ctx, f.ClientSet, f.Namespace.Name, pod.Name, cnt.Name) - framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", cnt.Name, pod.Name) - - cpus := getContainerAllowedCPUsFromLogs(pod.Name, cnt.Name, logs) - - gomega.Expect(cpus.Intersection(strictReservedCPUs).IsEmpty()).To(gomega.BeTrueBecause("cpuset %q should not contain strict reserved cpus %q", cpus.String(), strictReservedCPUs.String())) - validateSMTAlignment(cpus, smtLevel, pod, &cnt) - } - - deletePodSyncByName(ctx, f, pod.Name) - // we need to wait for all containers to really be gone so cpumanager reconcile loop will not rewrite the cpu_manager_state. - // this is in turn needed because we will have an unavoidable (in the current framework) race with th - // reconcile loop which will make our attempt to delete the state file and to restore the old config go haywire - waitForAllContainerRemoval(ctx, pod.Name, pod.Namespace) -} - -func validateSMTAlignment(cpus cpuset.CPUSet, smtLevel int, pod *v1.Pod, cnt *v1.Container) { - framework.Logf("validating cpus: %v", cpus) - - if cpus.Size()%smtLevel != 0 { - framework.Failf("pod %q cnt %q received non-smt-multiple cpuset %v (SMT level %d)", pod.Name, cnt.Name, cpus, smtLevel) - } - - // now check all the given cpus are thread siblings. - // to do so the easiest way is to rebuild the expected set of siblings from all the cpus we got. - // if the expected set matches the given set, the given set was good. - siblingsCPUs := cpuset.New() - for _, cpuID := range cpus.UnsortedList() { - threadSiblings, err := cpuset.Parse(strings.TrimSpace(getCPUSiblingList(int64(cpuID)))) - framework.ExpectNoError(err, "parsing cpuset from logs for [%s] of pod [%s]", cnt.Name, pod.Name) - siblingsCPUs = siblingsCPUs.Union(threadSiblings) - } - - framework.Logf("siblings cpus: %v", siblingsCPUs) - if !siblingsCPUs.Equals(cpus) { - framework.Failf("pod %q cnt %q received non-smt-aligned cpuset %v (expected %v)", pod.Name, cnt.Name, cpus, siblingsCPUs) - } -} - -func isSMTAlignmentError(pod *v1.Pod) bool { - re := regexp.MustCompile(`SMT.*Alignment.*Error`) - return re.MatchString(pod.Status.Reason) -} - -// getNumaNodeCPUs retrieves CPUs for each NUMA node. -func getNumaNodeCPUs() (map[int]cpuset.CPUSet, error) { - numaNodes := make(map[int]cpuset.CPUSet) - nodePaths, err := filepath.Glob("/sys/devices/system/node/node*/cpulist") - if err != nil { - return nil, err - } - - for _, nodePath := range nodePaths { - data, err := os.ReadFile(nodePath) - framework.ExpectNoError(err, "Error obtaning CPU information from the node") - cpuSet := strings.TrimSpace(string(data)) - cpus, err := cpuset.Parse(cpuSet) - framework.ExpectNoError(err, "Error parsing CPUset") - - // Extract node ID from path (e.g., "node0" -> 0) - base := filepath.Base(filepath.Dir(nodePath)) - nodeID, err := strconv.Atoi(strings.TrimPrefix(base, "node")) - if err != nil { - continue - } - numaNodes[nodeID] = cpus - } - - return numaNodes, nil -} - func getContainerAllowedCPUsFromLogs(podName, cntName, logs string) cpuset.CPUSet { framework.Logf("got pod logs: <%v>", logs) cpus, err := cpuset.Parse(strings.TrimSpace(logs)) @@ -3686,36 +3070,6 @@ func computeNUMADistribution(allocatedCPUs cpuset.CPUSet) map[int]int { return distribution } -// Custom matcher for checking packed CPUs. -func BePackedCPUs() gomegatypes.GomegaMatcher { - return gcustom.MakeMatcher(func(allocatedCPUs cpuset.CPUSet) (bool, error) { - distribution := computeNUMADistribution(allocatedCPUs) - for _, count := range distribution { - // This assumption holds true if there are enough CPUs on a single NUMA node. - // We are intentionally limiting the CPU request to 2 to minimize the number - // of CPUs required to fulfill this case and therefore maximize the chances - // of correctly validating this case. - if count == allocatedCPUs.Size() { - return true, nil - } - } - return false, nil - }).WithMessage("expected CPUs to be packed") -} - -// Custom matcher for checking distributed CPUs. -func BeDistributedCPUs(expectedSpread int) gomegatypes.GomegaMatcher { - return gcustom.MakeMatcher(func(allocatedCPUs cpuset.CPUSet) (bool, error) { - distribution := computeNUMADistribution(allocatedCPUs) - for _, count := range distribution { - if count != expectedSpread { - return false, nil - } - } - return true, nil - }).WithTemplate("expected CPUs to be evenly distributed across NUMA nodes\nExpected: {{.Data}}\nGot:\n{{.FormattedActual}}\nDistribution: {{.Data}}\n").WithTemplateData(expectedSpread) -} - // Serial because the test updates kubelet configuration. var _ = SIGDescribe("CPU Manager", framework.WithSerial(), feature.CPUManager, func() { f := framework.NewDefaultFramework("cpu-manager-test") diff --git a/test/e2e_node/pod_resize_test.go b/test/e2e_node/pod_resize_test.go index b0d5ccfc53a75..8d4eaa831b6e5 100644 --- a/test/e2e_node/pod_resize_test.go +++ b/test/e2e_node/pod_resize_test.go @@ -21,20 +21,19 @@ import ( "encoding/json" "fmt" "strconv" - "strings" "time" "github.com/onsi/ginkgo/v2" "github.com/onsi/gomega" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" - "k8s.io/kubernetes/test/e2e/common/node/framework/cgroups" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/strategicpatch" clientset "k8s.io/client-go/kubernetes" kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config" "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager" + "k8s.io/kubernetes/test/e2e/common/node/framework/cgroups" "k8s.io/kubernetes/test/e2e/common/node/framework/podresize" "k8s.io/kubernetes/test/e2e/framework" e2enode "k8s.io/kubernetes/test/e2e/framework/node" @@ -121,7 +120,7 @@ func removeExtendedResource(clientSet clientset.Interface, nodeName, extendedRes }).WithTimeout(30 * time.Second).WithPolling(time.Second).ShouldNot(gomega.HaveOccurred()) } -func cpuManagerPolicyKubeletConfig(ctx context.Context, f *framework.Framework, oldCfg *kubeletconfig.KubeletConfiguration, cpuManagerPolicyName string, cpuManagerPolicyOptions map[string]string, isInPlacePodVerticalScalingAllocatedStatusEnabled bool, isInPlacePodVerticalScalingExclusiveCPUsEnabled bool) { +func cpuManagerPolicyKubeletConfig(ctx context.Context, f *framework.Framework, oldCfg *kubeletconfig.KubeletConfiguration, cpuManagerPolicyName string, cpuManagerPolicyOptions map[string]string, ippvsCfg ippvsConfig) { if cpuManagerPolicyName != "" { if cpuManagerPolicyOptions != nil { func() { @@ -151,13 +150,14 @@ func cpuManagerPolicyKubeletConfig(ctx context.Context, f *framework.Framework, // check what we do have in the node. newCfg := configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ - policyName: cpuManagerPolicyName, - reservedSystemCPUs: cpuset.New(0), - enableCPUManagerOptions: true, - options: cpuManagerPolicyOptions, + policyName: cpuManagerPolicyName, + reservedSystemCPUs: cpuset.New(0), + enableCPUManagerOptions: true, + enableInPlacePodVerticalScaling: ippvsCfg.enableInPlacePodVerticalScaling, + enableInPlacePodVerticalScalingExclusiveCPUs: ippvsCfg.enableInPlacePodVerticalScalingExclusiveCPUs, + enableInPlacePodVerticalScalingAllocatedStatus: ippvsCfg.enableInPlacePodVerticalScalingAllocatedStatus, + options: cpuManagerPolicyOptions, }, - isInPlacePodVerticalScalingAllocatedStatusEnabled, - isInPlacePodVerticalScalingExclusiveCPUsEnabled, ) updateKubeletConfig(ctx, f, newCfg, true) } else { @@ -169,9 +169,12 @@ func cpuManagerPolicyKubeletConfig(ctx context.Context, f *framework.Framework, } // Enable CPU Manager in the kubelet. newCfg := configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ - policyName: cpuManagerPolicyName, - reservedSystemCPUs: cpuset.CPUSet{}, - }, isInPlacePodVerticalScalingAllocatedStatusEnabled, isInPlacePodVerticalScalingExclusiveCPUsEnabled) + policyName: cpuManagerPolicyName, + reservedSystemCPUs: cpuset.CPUSet{}, + enableInPlacePodVerticalScaling: ippvsCfg.enableInPlacePodVerticalScaling, + enableInPlacePodVerticalScalingExclusiveCPUs: ippvsCfg.enableInPlacePodVerticalScalingExclusiveCPUs, + enableInPlacePodVerticalScalingAllocatedStatus: ippvsCfg.enableInPlacePodVerticalScalingAllocatedStatus, + }) updateKubeletConfig(ctx, f, newCfg, true) } } @@ -183,7 +186,14 @@ type cpuManagerPolicyConfig struct { options map[string]string } -func doPodResizeTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalScalingAllocatedStatusEnabled bool, isInPlacePodVerticalScalingExclusiveCPUsEnabled bool) { +type ippvsConfig struct { + title string + enableInPlacePodVerticalScaling bool + enableInPlacePodVerticalScalingExclusiveCPUs bool + enableInPlacePodVerticalScalingAllocatedStatus bool +} + +func doPodResizeTests(policy cpuManagerPolicyConfig, ippvsCfg ippvsConfig) { f := framework.NewDefaultFramework("pod-resize-test") f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged var podClient *e2epod.PodClient @@ -1343,8 +1353,8 @@ func doPodResizeTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalScaling for idx := range tests { tc := tests[idx] - ginkgo.It(tc.name+policy.title+" (InPlacePodVerticalScalingAllocatedStatus="+strconv.FormatBool(isInPlacePodVerticalScalingAllocatedStatusEnabled)+", InPlacePodVerticalScalingExclusiveCPUs="+strconv.FormatBool(isInPlacePodVerticalScalingExclusiveCPUsEnabled)+")", func(ctx context.Context) { - cpuManagerPolicyKubeletConfig(ctx, f, oldCfg, policy.name, policy.options, isInPlacePodVerticalScalingAllocatedStatusEnabled, isInPlacePodVerticalScalingExclusiveCPUsEnabled) + ginkgo.It(tc.name+ippvsCfg.title+policy.title, func(ctx context.Context) { + cpuManagerPolicyKubeletConfig(ctx, f, oldCfg, policy.name, policy.options, ippvsCfg) var testPod, patchedPod *v1.Pod var pErr error @@ -1359,11 +1369,11 @@ func doPodResizeTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalScaling framework.ExpectNoError(err) for _, node := range nodes.Items { - addExtendedResource(f.ClientSet, node.Name, fakeExtendedResource, resource.MustParse("123")) + e2enode.AddExtendedResource(ctx, f.ClientSet, node.Name, fakeExtendedResource, resource.MustParse("123")) } defer func() { for _, node := range nodes.Items { - removeExtendedResource(f.ClientSet, node.Name, fakeExtendedResource) + e2enode.RemoveExtendedResource(ctx, f.ClientSet, node.Name, fakeExtendedResource) } }() } @@ -1390,7 +1400,7 @@ func doPodResizeTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalScaling Should(gomega.Succeed(), "failed to verify initial Pod CPUsAllowedListValue") } - patchAndVerify := func(patchString string, expectedContainers []e2epod.ResizableContainerInfo, initialContainers []e2epod.ResizableContainerInfo, opStr string) { + patchAndVerify := func(patchString string, expectedContainers []podresize.ResizableContainerInfo, initialContainers []podresize.ResizableContainerInfo, opStr string) { ginkgo.By(fmt.Sprintf("patching pod for %s", opStr)) patchedPod, pErr = f.ClientSet.CoreV1().Pods(newPod.Namespace).Patch(ctx, newPod.Name, types.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") @@ -1413,27 +1423,58 @@ func doPodResizeTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalScaling // TODO make this dynamic depending on Policy Name, Resources input and topology of target // machine. - // For the moment skip below if CPU Manager Policy is set to none + // For the moment verify only if CPU Manager Policy is set to PolicyStatic and InPlacePodVerticalScalingExclusiveCPUsEnabled is set to true if policy.name == string(cpumanager.PolicyStatic) { - ginkgo.By("verifying pod Cpus allowed list value after resize") - if isInPlacePodVerticalScalingExclusiveCPUsEnabled { + if ippvsCfg.enableInPlacePodVerticalScaling && ippvsCfg.enableInPlacePodVerticalScalingExclusiveCPUs { + ginkgo.By(fmt.Sprintf("patching pod for %s", opStr)) + patchedPod, pErr = f.ClientSet.CoreV1().Pods(newPod.Namespace).Patch(ctx, newPod.Name, + types.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, fmt.Sprintf("failed to patch pod for %s", opStr)) + + ginkgo.By(fmt.Sprintf("verifying pod patched for %s", opStr)) + podresize.VerifyPodResources(patchedPod, expectedContainers) + + ginkgo.By(fmt.Sprintf("waiting for %s to be actuated", opStr)) + resizedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPod, expectedContainers) + podresize.ExpectPodResized(ctx, f, resizedPod, expectedContainers) + + // Check cgroup values only for containerd versions before 1.6.9 + ginkgo.By(fmt.Sprintf("verifying pod container's cgroup values after %s", opStr)) + framework.ExpectNoError(podresize.VerifyPodContainersCgroupValues(ctx, f, resizedPod, expectedContainers)) + + ginkgo.By(fmt.Sprintf("verifying pod resources after %s", opStr)) + podresize.VerifyPodResources(resizedPod, expectedContainers) + + ginkgo.By("verifying pod Cpus allowed list value after resize") gomega.Eventually(ctx, podresize.VerifyPodContainersCPUsAllowedListValue, timeouts.PodStartShort, timeouts.Poll). WithArguments(f, resizedPod, tc.expected). Should(gomega.Succeed(), "failed to verify Pod CPUsAllowedListValue for resizedPod with InPlacePodVerticalScalingExclusiveCPUs enabled") - } else { - gomega.Eventually(ctx, podresize.VerifyPodContainersCPUsAllowedListValue, timeouts.PodStartShort, timeouts.Poll). - WithArguments(f, resizedPod, tc.containers). - Should(gomega.Succeed(), "failed to verify Pod CPUsAllowedListValue for resizedPod with InPlacePodVerticalScalingExclusiveCPUs disabled (default)") } } } patchAndVerify(tc.patchString, tc.expected, tc.containers, "resize") - rbPatchStr, err := podresize.ResizeContainerPatch(tc.containers) - framework.ExpectNoError(err) - // Resize has been actuated, test rollback - patchAndVerify(rbPatchStr, tc.containers, tc.expected, "rollback") + /*if tc.testRollback { + // Resize has been actuated, test rollback + rollbackContainers := make([]podresize.ResizableContainerInfo, len(tc.containers)) + copy(rollbackContainers, tc.containers) + for i, c := range rollbackContainers { + gomega.Expect(c.Name).To(gomega.Equal(tc.expected[i].Name), + "test case containers & expectations should be in the same order") + // Resizes that trigger a restart should trigger a second restart when rolling back. + rollbackContainers[i].RestartCount = tc.expected[i].RestartCount + } + + patchAndVerify(tc.expected, rollbackContainers, "rollback") + }*/ + + /* + rbPatchStr, err := e2epod.ResizeContainerPatch(tc.containers) + framework.ExpectNoError(err) + // Resize has been actuated, test rollback + patchAndVerify(rbPatchStr, tc.containers, tc.expected, "rollback") + */ ginkgo.By("deleting pod") deletePodSyncByName(ctx, f, newPod.Name) @@ -1452,7 +1493,7 @@ func doPodResizeTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalScaling } -func doPodResizeErrorTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalScalingAllocatedStatusEnabled bool, isInPlacePodVerticalScalingExclusiveCPUsEnabled bool) { +func doPodResizeErrorTests(policy cpuManagerPolicyConfig, ippvsCfg ippvsConfig) { f := framework.NewDefaultFramework("pod-resize-errors") f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged var podClient *e2epod.PodClient @@ -1531,7 +1572,9 @@ func doPodResizeErrorTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalSc for idx := range tests { tc := tests[idx] - ginkgo.It(tc.name+policy.title+" (InPlacePodVerticalScalingAllocatedStatus="+strconv.FormatBool(isInPlacePodVerticalScalingAllocatedStatusEnabled)+", InPlacePodVerticalScalingExclusiveCPUs="+strconv.FormatBool(isInPlacePodVerticalScalingExclusiveCPUsEnabled)+")", func(ctx context.Context) { + ginkgo.It(tc.name+ippvsCfg.title+policy.title, func(ctx context.Context) { + cpuManagerPolicyKubeletConfig(ctx, f, oldCfg, policy.name, policy.options, ippvsCfg) + var testPod, patchedPod *v1.Pod var pErr error @@ -1591,23 +1634,79 @@ func doPodResizeErrorTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalSc // Above tests are performed by doSheduletTests() and doPodResizeResourceQuotaTests() // in test/e2e/node/pod_resize.go -var _ = SIGDescribe("Pod InPlace Resize Container", framework.WithSerial(), func() { +var _ = SIGDescribe("Pod InPlace Resize Container Basic Cases", framework.WithSerial(), func() { - policiesGeneralAvailability := []cpuManagerPolicyConfig{ + ippvsConfigs := []ippvsConfig{ { - name: string(cpumanager.PolicyNone), - title: "", + title: ", with InPlacePodVerticalScaling enabled, InPlacePodVerticalScalingExclusiveCPUs enabled, InPlacePodVerticalScalingAllocatedStatus enabled", + enableInPlacePodVerticalScaling: true, + enableInPlacePodVerticalScalingExclusiveCPUs: true, + enableInPlacePodVerticalScalingAllocatedStatus: true, }, + { + title: ", with InPlacePodVerticalScaling enabled, InPlacePodVerticalScalingExclusiveCPUs disabled, InPlacePodVerticalScalingAllocatedStatus disabled", + enableInPlacePodVerticalScaling: true, + enableInPlacePodVerticalScalingExclusiveCPUs: false, + enableInPlacePodVerticalScalingAllocatedStatus: false, + }, + { + title: ", with InPlacePodVerticalScaling enabled, InPlacePodVerticalScalingExclusiveCPUs disabled, InPlacePodVerticalScalingAllocatedStatus enabled", + enableInPlacePodVerticalScaling: true, + enableInPlacePodVerticalScalingExclusiveCPUs: false, + enableInPlacePodVerticalScalingAllocatedStatus: true, + }, + { + title: ", with InPlacePodVerticalScaling enabled, InPlacePodVerticalScalingExclusiveCPUs enabled, InPlacePodVerticalScalingAllocatedStatus disabled", + enableInPlacePodVerticalScaling: true, + enableInPlacePodVerticalScalingExclusiveCPUs: true, + enableInPlacePodVerticalScalingAllocatedStatus: false, + }, + { + title: ", with InPlacePodVerticalScaling enabled, InPlacePodVerticalScalingExclusiveCPUs enabled, InPlacePodVerticalScalingAllocatedStatus enabled", + enableInPlacePodVerticalScaling: true, + enableInPlacePodVerticalScalingExclusiveCPUs: false, + enableInPlacePodVerticalScalingAllocatedStatus: false, + }, + { + title: ", with InPlacePodVerticalScaling disabled, InPlacePodVerticalScalingExclusiveCPUs disabled, InPlacePodVerticalScalingAllocatedStatus disabled", + enableInPlacePodVerticalScaling: false, + enableInPlacePodVerticalScalingExclusiveCPUs: false, + enableInPlacePodVerticalScalingAllocatedStatus: false, + }, + { + title: ", with InPlacePodVerticalScaling disabled, InPlacePodVerticalScalingExclusiveCPUs disabled, InPlacePodVerticalScalingAllocatedStatus enabled", + enableInPlacePodVerticalScaling: false, + enableInPlacePodVerticalScalingExclusiveCPUs: false, + enableInPlacePodVerticalScalingAllocatedStatus: true, + }, + { + title: ", with InPlacePodVerticalScaling disabled, InPlacePodVerticalScalingExclusiveCPUs enabled, InPlacePodVerticalScalingAllocatedStatus disabled", + enableInPlacePodVerticalScaling: false, + enableInPlacePodVerticalScalingExclusiveCPUs: true, + enableInPlacePodVerticalScalingAllocatedStatus: false, + }, + { + title: ", with InPlacePodVerticalScaling disabled, InPlacePodVerticalScalingExclusiveCPUs enabled, InPlacePodVerticalScalingAllocatedStatus enabled", + enableInPlacePodVerticalScaling: false, + enableInPlacePodVerticalScalingExclusiveCPUs: false, + enableInPlacePodVerticalScalingAllocatedStatus: false, + }, + } + + policiesGeneralAvailability := []cpuManagerPolicyConfig{ { name: string(cpumanager.PolicyStatic), title: ", alongside CPU Manager Static Policy with no options", options: map[string]string{ - cpumanager.FullPCPUsOnlyOption: "false", - cpumanager.DistributeCPUsAcrossNUMAOption: "false", - cpumanager.AlignBySocketOption: "false", - cpumanager.DistributeCPUsAcrossCoresOption: "false", + cpumanager.FullPCPUsOnlyOption: "false", + cpumanager.DistributeCPUsAcrossNUMAOption: "false", + cpumanager.AlignBySocketOption: "false", }, }, + { + name: string(cpumanager.PolicyNone), + title: "", + }, } policiesBeta := []cpuManagerPolicyConfig{ @@ -1623,7 +1722,7 @@ var _ = SIGDescribe("Pod InPlace Resize Container", framework.WithSerial(), func }, } - /*policiesAlpha := []cpuManagerPolicyConfig{ + policiesAlpha := []cpuManagerPolicyConfig{ { name: string(cpumanager.PolicyStatic), title: ", alongside CPU Manager Static Policy with DistributeCPUsAcrossNUMAOption", @@ -1704,40 +1803,26 @@ var _ = SIGDescribe("Pod InPlace Resize Container", framework.WithSerial(), func cpumanager.DistributeCPUsAcrossCoresOption: "true", }, }, - }*/ - - for idp := range policiesGeneralAvailability { - doPodResizeTests(policiesGeneralAvailability[idp], false, false) - doPodResizeTests(policiesGeneralAvailability[idp], true, false) - doPodResizeTests(policiesGeneralAvailability[idp], false, true) - doPodResizeTests(policiesGeneralAvailability[idp], true, true) - doPodResizeErrorTests(policiesGeneralAvailability[idp], false, false) - doPodResizeErrorTests(policiesGeneralAvailability[idp], true, false) - doPodResizeErrorTests(policiesGeneralAvailability[idp], false, true) - doPodResizeErrorTests(policiesGeneralAvailability[idp], true, true) } - for idp := range policiesBeta { - doPodResizeTests(policiesBeta[idp], false, false) - doPodResizeTests(policiesBeta[idp], true, false) - doPodResizeTests(policiesBeta[idp], false, true) - doPodResizeTests(policiesBeta[idp], true, true) - doPodResizeErrorTests(policiesBeta[idp], false, false) - doPodResizeErrorTests(policiesBeta[idp], true, false) - doPodResizeErrorTests(policiesBeta[idp], false, true) - doPodResizeErrorTests(policiesBeta[idp], true, true) + for idc := range ippvsConfigs { + for idp := range policiesGeneralAvailability { + doPodResizeTests(policiesGeneralAvailability[idp], ippvsConfigs[idc]) + doPodResizeErrorTests(policiesGeneralAvailability[idp], ippvsConfigs[idc]) + } + for idp := range policiesBeta { + doPodResizeTests(policiesBeta[idp], ippvsConfigs[idc]) + doPodResizeErrorTests(policiesBeta[idp], ippvsConfigs[idc]) + } + for idp := range policiesAlpha { + doPodResizeTests(policiesAlpha[idp], ippvsConfigs[idc]) + doPodResizeErrorTests(policiesAlpha[idp], ippvsConfigs[idc]) + } } - /*for idp := range policiesAlpha { - doPodResizeTests(policiesAlpha[idp], true, false) - doPodResizeTests(policiesAlpha[idp], true, true) - doPodResizeErrorTests(policiesAlpha[idp], true, false) - doPodResizeErrorTests(policiesAlpha[idp], true, true) - }*/ - }) -func doPodResizeExtendTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalScalingAllocatedStatusEnabled bool, isInPlacePodVerticalScalingExclusiveCPUsEnabled bool) { +func doPodResizeExtendTests(policy cpuManagerPolicyConfig, ippvsCfg ippvsConfig) { f := framework.NewDefaultFramework("pod-resize-test") f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged var podClient *e2epod.PodClient @@ -1760,9 +1845,9 @@ func doPodResizeExtendTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalS type testCase struct { name string - containers []e2epod.ResizableContainerInfo + containers []podresize.ResizableContainerInfo patchString string - expected []e2epod.ResizableContainerInfo + expected []podresize.ResizableContainerInfo addExtendedResource bool skipFlag bool } @@ -1776,7 +1861,8 @@ func doPodResizeExtendTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalS secondAdditionCpuset := cpuset.New() secondExpectedCpuset := cpuset.New() - if tests.name == "1 Guaranteed QoS pod, one container - increase CPU & memory, FullPCPUsOnlyOption = false" { + switch tests.name { + case "1 Guaranteed QoS pod, one container - increase CPU & memory, FullPCPUsOnlyOption = false": if cpuCap < 2 { tests.skipFlag = true } @@ -1794,7 +1880,7 @@ func doPodResizeExtendTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalS } firstExpectedCpuset = firstAdditionCpuset.Union(firstContainerCpuset) tests.expected[0].CPUsAllowedList = firstExpectedCpuset.String() - } else if tests.name == "1 Guaranteed QoS pod, two containers - increase CPU & memory, FullPCPUsOnlyOption = false" { + case "1 Guaranteed QoS pod, two containers - increase CPU & memory, FullPCPUsOnlyOption = false": if cpuCap < 4 { tests.skipFlag = true } @@ -1827,10 +1913,11 @@ func doPodResizeExtendTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalS } secondExpectedCpuset = secondAdditionCpuset.Union(secondContainerCpuset) tests.expected[1].CPUsAllowedList = secondExpectedCpuset.String() - } else if (tests.name == "1 Guaranteed QoS pod, one container - decrease CPU & memory, FullPCPUsOnlyOption = false") || (tests.name == "1 Guaranteed QoS pod, one container - decrease CPU & memory with mustKeepCPUs, FullPCPUsOnlyOption = false") { + case "1 Guaranteed QoS pod, one container - decrease CPU & memory, FullPCPUsOnlyOption = false": if cpuCap < 2 { tests.skipFlag = true } + firstContainerCpuset = cpuset.New(2, 3) if isHTEnabled() { cpuList := mustParseCPUSet(getCPUSiblingList(0)).List() @@ -1842,13 +1929,7 @@ func doPodResizeExtendTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalS firstExpectedCpuset = cpuset.New(firstContainerCpuset.List()[0]) tests.expected[0].CPUsAllowedList = firstExpectedCpuset.String() - if tests.name == "1 Guaranteed QoS pod, one container - decrease CPU & memory with mustKeepCPUs, FullPCPUsOnlyOption = false" { - startIndex := strings.Index(tests.patchString, `"mustKeepCPUs","value": "`) + len(`"mustKeepCPUs","value": "`) - endIndex := strings.Index(tests.patchString[startIndex:], `"`) + startIndex - tests.expected[0].CPUsAllowedList = tests.patchString[startIndex:endIndex] - ginkgo.By(fmt.Sprintf("startIndex:%d, endIndex:%d", startIndex, endIndex)) - } - } else if (tests.name == "1 Guaranteed QoS pod, one container - decrease CPU & memory, FullPCPUsOnlyOption = true") || (tests.name == "1 Guaranteed QoS pod, one container - decrease CPU with wrong mustKeepCPU, FullPCPUsOnlyOption = ture") || (tests.name == "1 Guaranteed QoS pod, one container - decrease CPU & memory with correct mustKeepCPU, FullPCPUsOnlyOption = true") { + case "1 Guaranteed QoS pod, one container - decrease CPU & memory, FullPCPUsOnlyOption = true": if cpuCap < 4 { tests.skipFlag = true } @@ -1864,12 +1945,6 @@ func doPodResizeExtendTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalS firstExpectedCpuset = mustParseCPUSet(getCPUSiblingList(1)) tests.expected[0].CPUsAllowedList = firstExpectedCpuset.String() - if tests.name == "1 Guaranteed QoS pod, one container - decrease CPU & memory with correct mustKeepCPU, FullPCPUsOnlyOption = true" { - startIndex := strings.Index(tests.patchString, `"mustKeepCPUs","value": "`) + len(`"mustKeepCPUs","value": "`) - endIndex := strings.Index(tests.patchString[startIndex:], `"`) + startIndex - tests.expected[0].CPUsAllowedList = tests.patchString[startIndex:endIndex] - ginkgo.By(fmt.Sprintf("startIndex:%d, endIndex:%d", startIndex, endIndex)) - } } ginkgo.By(fmt.Sprintf("firstContainerCpuset:%v, firstAdditionCpuset:%v, firstExpectedCpuset:%v", firstContainerCpuset, firstAdditionCpuset, firstExpectedCpuset)) @@ -1880,10 +1955,10 @@ func doPodResizeExtendTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalS testsWithFalseFullCPUs := []testCase{ { name: "1 Guaranteed QoS pod, one container - increase CPU & memory, FullPCPUsOnlyOption = false", - containers: []e2epod.ResizableContainerInfo{ + containers: []podresize.ResizableContainerInfo{ { Name: "c1", - Resources: &e2epod.ContainerResources{CPUReq: "1", CPULim: "1", MemReq: "200Mi", MemLim: "200Mi"}, + Resources: &cgroups.ContainerResources{CPUReq: "1", CPULim: "1", MemReq: "200Mi", MemLim: "200Mi"}, CPUPolicy: &noRestart, MemPolicy: &noRestart, CPUsAllowedListValue: "1", @@ -1892,10 +1967,10 @@ func doPodResizeExtendTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalS patchString: `{"spec":{"containers":[ {"name":"c1", "resources":{"requests":{"cpu":"2","memory":"400Mi"},"limits":{"cpu":"2","memory":"400Mi"}}} ]}}`, - expected: []e2epod.ResizableContainerInfo{ + expected: []podresize.ResizableContainerInfo{ { Name: "c1", - Resources: &e2epod.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "400Mi", MemLim: "400Mi"}, + Resources: &cgroups.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "400Mi", MemLim: "400Mi"}, CPUPolicy: &noRestart, MemPolicy: &noRestart, CPUsAllowedListValue: "2", @@ -1904,17 +1979,17 @@ func doPodResizeExtendTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalS }, { name: "1 Guaranteed QoS pod, two containers - increase CPU & memory, FullPCPUsOnlyOption = false", - containers: []e2epod.ResizableContainerInfo{ + containers: []podresize.ResizableContainerInfo{ { Name: "c1", - Resources: &e2epod.ContainerResources{CPUReq: "1", CPULim: "1", MemReq: "200Mi", MemLim: "200Mi"}, + Resources: &cgroups.ContainerResources{CPUReq: "1", CPULim: "1", MemReq: "200Mi", MemLim: "200Mi"}, CPUPolicy: &noRestart, MemPolicy: &noRestart, CPUsAllowedListValue: "1", }, { Name: "c2", - Resources: &e2epod.ContainerResources{CPUReq: "1", CPULim: "1", MemReq: "200Mi", MemLim: "200Mi"}, + Resources: &cgroups.ContainerResources{CPUReq: "1", CPULim: "1", MemReq: "200Mi", MemLim: "200Mi"}, CPUPolicy: &noRestart, MemPolicy: &noRestart, CPUsAllowedListValue: "1", @@ -1924,17 +1999,17 @@ func doPodResizeExtendTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalS {"name":"c1", "resources":{"requests":{"cpu":"2","memory":"400Mi"},"limits":{"cpu":"2","memory":"400Mi"}}}, {"name":"c2", "resources":{"requests":{"cpu":"2","memory":"400Mi"},"limits":{"cpu":"2","memory":"400Mi"}}} ]}}`, - expected: []e2epod.ResizableContainerInfo{ + expected: []podresize.ResizableContainerInfo{ { Name: "c1", - Resources: &e2epod.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "400Mi", MemLim: "400Mi"}, + Resources: &cgroups.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "400Mi", MemLim: "400Mi"}, CPUPolicy: &noRestart, MemPolicy: &noRestart, CPUsAllowedListValue: "2", }, { Name: "c2", - Resources: &e2epod.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "400Mi", MemLim: "400Mi"}, + Resources: &cgroups.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "400Mi", MemLim: "400Mi"}, CPUPolicy: &noRestart, MemPolicy: &noRestart, CPUsAllowedListValue: "2", @@ -1943,10 +2018,10 @@ func doPodResizeExtendTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalS }, { name: "1 Guaranteed QoS pod, one container - decrease CPU & memory, FullPCPUsOnlyOption = false", - containers: []e2epod.ResizableContainerInfo{ + containers: []podresize.ResizableContainerInfo{ { Name: "c1", - Resources: &e2epod.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "400Mi", MemLim: "400Mi"}, + Resources: &cgroups.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "400Mi", MemLim: "400Mi"}, CPUPolicy: &noRestart, MemPolicy: &noRestart, CPUsAllowedListValue: "2", @@ -1955,34 +2030,10 @@ func doPodResizeExtendTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalS patchString: `{"spec":{"containers":[ {"name":"c1", "resources":{"requests":{"cpu":"1","memory":"200Mi"},"limits":{"cpu":"1","memory":"200Mi"}}} ]}}`, - expected: []e2epod.ResizableContainerInfo{ - { - Name: "c1", - Resources: &e2epod.ContainerResources{CPUReq: "1", CPULim: "1", MemReq: "200Mi", MemLim: "200Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - CPUsAllowedListValue: "1", - }, - }, - }, - { - name: "1 Guaranteed QoS pod, one container - decrease CPU & memory with mustKeepCPUs, FullPCPUsOnlyOption = false", - containers: []e2epod.ResizableContainerInfo{ - { - Name: "c1", - Resources: &e2epod.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "200Mi", MemLim: "200Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - CPUsAllowedListValue: "2", - }, - }, - patchString: `{"spec":{"containers":[ - {"name":"c1", "env":[{"name":"mustKeepCPUs","value": "11"}], "resources":{"requests":{"cpu":"1","memory":"400Mi"},"limits":{"cpu":"1","memory":"400Mi"}}} - ]}}`, - expected: []e2epod.ResizableContainerInfo{ + expected: []podresize.ResizableContainerInfo{ { Name: "c1", - Resources: &e2epod.ContainerResources{CPUReq: "1", CPULim: "1", MemReq: "400Mi", MemLim: "400Mi"}, + Resources: &cgroups.ContainerResources{CPUReq: "1", CPULim: "1", MemReq: "200Mi", MemLim: "200Mi"}, CPUPolicy: &noRestart, MemPolicy: &noRestart, CPUsAllowedListValue: "1", @@ -1994,10 +2045,10 @@ func doPodResizeExtendTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalS testsWithTrueFullCPUs := []testCase{ { name: "1 Guaranteed QoS pod, one container - decrease CPU & memory, FullPCPUsOnlyOption = true", - containers: []e2epod.ResizableContainerInfo{ + containers: []podresize.ResizableContainerInfo{ { Name: "c1", - Resources: &e2epod.ContainerResources{CPUReq: "4", CPULim: "4", MemReq: "400Mi", MemLim: "400Mi"}, + Resources: &cgroups.ContainerResources{CPUReq: "4", CPULim: "4", MemReq: "400Mi", MemLim: "400Mi"}, CPUPolicy: &noRestart, MemPolicy: &noRestart, CPUsAllowedListValue: "4", @@ -2006,59 +2057,10 @@ func doPodResizeExtendTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalS patchString: `{"spec":{"containers":[ {"name":"c1", "resources":{"requests":{"cpu":"2","memory":"200Mi"},"limits":{"cpu":"2","memory":"200Mi"}}} ]}}`, - expected: []e2epod.ResizableContainerInfo{ - { - Name: "c1", - Resources: &e2epod.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "200Mi", MemLim: "200Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - CPUsAllowedListValue: "2", - }, - }, - }, - { - name: "1 Guaranteed QoS pod, one container - decrease CPU & memory with correct mustKeepCPU, FullPCPUsOnlyOption = true", - containers: []e2epod.ResizableContainerInfo{ - { - Name: "c1", - Resources: &e2epod.ContainerResources{CPUReq: "4", CPULim: "4", MemReq: "200Mi", MemLim: "200Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - CPUsAllowedListValue: "4", - }, - }, - patchString: `{"spec":{"containers":[ - {"name":"c1", "env":[{"name":"mustKeepCPUs","value": "2,12"}], "resources":{"requests":{"cpu":"2"},"limits":{"cpu":"2"}}} - ]}}`, - expected: []e2epod.ResizableContainerInfo{ - { - Name: "c1", - Resources: &e2epod.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "200Mi", MemLim: "200Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - CPUsAllowedListValue: "2", - }, - }, - }, - // Abnormal case, CPUs in mustKeepCPUs not full PCPUs, the mustKeepCPUs will be ignored - { - name: "1 Guaranteed QoS pod, one container - decrease CPU with wrong mustKeepCPU, FullPCPUsOnlyOption = ture", - containers: []e2epod.ResizableContainerInfo{ - { - Name: "c1", - Resources: &e2epod.ContainerResources{CPUReq: "4", CPULim: "4", MemReq: "200Mi", MemLim: "200Mi"}, - CPUPolicy: &noRestart, - MemPolicy: &noRestart, - CPUsAllowedListValue: "4", - }, - }, - patchString: `{"spec":{"containers":[ - {"name":"c1", "env":[{"name":"mustKeepCPUs","value": "1,2"}], "resources":{"requests":{"cpu":"2"},"limits":{"cpu":"2"}}} - ]}}`, - expected: []e2epod.ResizableContainerInfo{ + expected: []podresize.ResizableContainerInfo{ { Name: "c1", - Resources: &e2epod.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "200Mi", MemLim: "200Mi"}, + Resources: &cgroups.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "200Mi", MemLim: "200Mi"}, CPUPolicy: &noRestart, MemPolicy: &noRestart, CPUsAllowedListValue: "2", @@ -2070,16 +2072,17 @@ func doPodResizeExtendTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalS timeouts := framework.NewTimeoutContext() var tests []testCase - if policy.options[cpumanager.FullPCPUsOnlyOption] == "false" { + switch policy.options[cpumanager.FullPCPUsOnlyOption] { + case "false": tests = testsWithFalseFullCPUs - } else if policy.options[cpumanager.FullPCPUsOnlyOption] == "true" { + case "true": tests = testsWithTrueFullCPUs } for idx := range tests { tc := tests[idx] - ginkgo.It(tc.name+policy.title+" (InPlacePodVerticalScalingAllocatedStatus="+strconv.FormatBool(isInPlacePodVerticalScalingAllocatedStatusEnabled)+", InPlacePodVerticalScalingExclusiveCPUs="+strconv.FormatBool(isInPlacePodVerticalScalingExclusiveCPUsEnabled)+")", func(ctx context.Context) { - cpuManagerPolicyKubeletConfig(ctx, f, oldCfg, policy.name, policy.options, isInPlacePodVerticalScalingAllocatedStatusEnabled, isInPlacePodVerticalScalingExclusiveCPUsEnabled) + ginkgo.It(tc.name+ippvsCfg.title+policy.title, func(ctx context.Context) { + cpuManagerPolicyKubeletConfig(ctx, f, oldCfg, policy.name, policy.options, ippvsCfg) setCPUsForTestCase(ctx, &tc, policy.options[cpumanager.FullPCPUsOnlyOption]) if tc.skipFlag { @@ -2090,7 +2093,7 @@ func doPodResizeExtendTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalS var pErr error tStamp := strconv.Itoa(time.Now().Nanosecond()) - testPod = e2epod.MakePodWithResizableContainers(f.Namespace.Name, "testpod", tStamp, tc.containers) + testPod = podresize.MakePodWithResizableContainers(f.Namespace.Name, "testpod", tStamp, tc.containers) testPod.GenerateName = "resize-test-" testPod = e2epod.MustMixinRestrictedPodSecurity(testPod) @@ -2112,57 +2115,54 @@ func doPodResizeExtendTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalS newPod := podClient.CreateSync(ctx, testPod) ginkgo.By("verifying initial pod resources, allocations are as expected") - e2epod.VerifyPodResources(newPod, tc.containers) + podresize.VerifyPodResources(newPod, tc.containers) ginkgo.By("verifying initial pod resize policy is as expected") - e2epod.VerifyPodResizePolicy(newPod, tc.containers) + podresize.VerifyPodResizePolicy(newPod, tc.containers) ginkgo.By("verifying initial pod status resources are as expected") - framework.ExpectNoError(e2epod.VerifyPodStatusResources(newPod, tc.containers)) + framework.ExpectNoError(podresize.VerifyPodStatusResources(newPod, tc.containers)) ginkgo.By("verifying initial cgroup config are as expected") - framework.ExpectNoError(e2epod.VerifyPodContainersCgroupValues(ctx, f, newPod, tc.containers)) + framework.ExpectNoError(podresize.VerifyPodContainersCgroupValues(ctx, f, newPod, tc.containers)) // TODO make this dynamic depending on Policy Name, Resources input and topology of target // machine. // For the moment skip below if CPU Manager Policy is set to none if policy.name == string(cpumanager.PolicyStatic) { ginkgo.By("verifying initial pod Cpus allowed list value") - gomega.Eventually(ctx, e2epod.VerifyPodContainersCPUsAllowedListValue, timeouts.PodStartShort, timeouts.Poll). + gomega.Eventually(ctx, podresize.VerifyPodContainersCPUsAllowedListValue, timeouts.PodStartShort, timeouts.Poll). WithArguments(f, newPod, tc.containers). Should(gomega.Succeed(), "failed to verify initial Pod CPUsAllowedListValue") } - patchAndVerify := func(patchString string, expectedContainers []e2epod.ResizableContainerInfo, initialContainers []e2epod.ResizableContainerInfo, opStr string) { - ginkgo.By(fmt.Sprintf("patching pod for %s", opStr)) - patchedPod, pErr = f.ClientSet.CoreV1().Pods(newPod.Namespace).Patch(ctx, newPod.Name, - types.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") - framework.ExpectNoError(pErr, fmt.Sprintf("failed to patch pod for %s", opStr)) + patchAndVerify := func(patchString string, expectedContainers []podresize.ResizableContainerInfo, initialContainers []podresize.ResizableContainerInfo, opStr string) { + // TODO make this dynamic depending on Policy Name, Resources input and topology of target + // machine. + // For the moment verify only of if CPU Manager Policy is set to static and InPlacePodVerticalScalingExclusiveCPUsEnabled is true + if policy.name == string(cpumanager.PolicyStatic) { + if ippvsCfg.enableInPlacePodVerticalScaling && ippvsCfg.enableInPlacePodVerticalScalingExclusiveCPUs { - ginkgo.By(fmt.Sprintf("verifying pod patched for %s", opStr)) - e2epod.VerifyPodResources(patchedPod, expectedContainers) + ginkgo.By(fmt.Sprintf("patching pod for %s", opStr)) + patchedPod, pErr = f.ClientSet.CoreV1().Pods(newPod.Namespace).Patch(ctx, newPod.Name, + types.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, fmt.Sprintf("failed to patch pod for %s", opStr)) - ginkgo.By(fmt.Sprintf("waiting for %s to be actuated", opStr)) - resizedPod := e2epod.WaitForPodResizeActuation(ctx, f, podClient, newPod, expectedContainers) - e2epod.ExpectPodResized(ctx, f, resizedPod, expectedContainers) + ginkgo.By(fmt.Sprintf("verifying pod patched for %s", opStr)) + podresize.VerifyPodResources(patchedPod, expectedContainers) - // Check cgroup values only for containerd versions before 1.6.9 - ginkgo.By(fmt.Sprintf("verifying pod container's cgroup values after %s", opStr)) - framework.ExpectNoError(e2epod.VerifyPodContainersCgroupValues(ctx, f, resizedPod, expectedContainers)) + ginkgo.By(fmt.Sprintf("waiting for %s to be actuated", opStr)) + resizedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPod, expectedContainers) + podresize.ExpectPodResized(ctx, f, resizedPod, expectedContainers) - ginkgo.By(fmt.Sprintf("verifying pod resources after %s", opStr)) - e2epod.VerifyPodResources(resizedPod, expectedContainers) + // Check cgroup values only for containerd versions before 1.6.9 + ginkgo.By(fmt.Sprintf("verifying pod container's cgroup values after %s", opStr)) + framework.ExpectNoError(podresize.VerifyPodContainersCgroupValues(ctx, f, resizedPod, expectedContainers)) - // TODO make this dynamic depending on Policy Name, Resources input and topology of target - // machine. - // For the moment skip below if CPU Manager Policy is set to none - if policy.name == string(cpumanager.PolicyStatic) { - ginkgo.By(fmt.Sprintf("verifying pod Cpus allowed list value after %s", opStr)) - if isInPlacePodVerticalScalingExclusiveCPUsEnabled { - gomega.Eventually(ctx, e2epod.VerifyPodContainersCPUsAllowedListValue, timeouts.PodStartShort, timeouts.Poll). + ginkgo.By(fmt.Sprintf("verifying pod resources after %s", opStr)) + podresize.VerifyPodResources(resizedPod, expectedContainers) + + ginkgo.By(fmt.Sprintf("verifying pod Cpus allowed list value after %s", opStr)) + gomega.Eventually(ctx, podresize.VerifyPodContainersCPUsAllowedListValue, timeouts.PodStartShort, timeouts.Poll). WithArguments(f, resizedPod, expectedContainers). Should(gomega.Succeed(), "failed to verify Pod CPUsAllowedListValue for resizedPod with InPlacePodVerticalScalingExclusiveCPUs enabled") - } else { - gomega.Eventually(ctx, e2epod.VerifyPodContainersCPUsAllowedListValue, timeouts.PodStartShort, timeouts.Poll). - WithArguments(f, resizedPod, tc.containers). - Should(gomega.Succeed(), "failed to verify Pod CPUsAllowedListValue for resizedPod with InPlacePodVerticalScalingExclusiveCPUs disabled (default)") } } } @@ -2170,11 +2170,13 @@ func doPodResizeExtendTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalS ginkgo.By("First patch") patchAndVerify(tc.patchString, tc.expected, tc.containers, "resize") - rbPatchStr, err := e2epod.ResizeContainerPatch(tc.containers) - framework.ExpectNoError(err) - // Resize has been actuated, test rollback - ginkgo.By("Second patch for rollback") - patchAndVerify(rbPatchStr, tc.containers, tc.expected, "rollback") + /* + rbPatchStr, err := e2epod.ResizeContainerPatch(tc.containers) + framework.ExpectNoError(err) + // Resize has been actuated, test rollback + ginkgo.By("Second patch for rollback") + patchAndVerify(rbPatchStr, tc.containers, tc.expected, "rollback") + */ ginkgo.By("deleting pod") deletePodSyncByName(ctx, f, newPod.Name) @@ -2193,7 +2195,7 @@ func doPodResizeExtendTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalS } -func doMultiPodResizeTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalScalingAllocatedStatusEnabled bool, isInPlacePodVerticalScalingExclusiveCPUsEnabled bool) { +func doMultiPodResizeTests(policy cpuManagerPolicyConfig, ippvsCfg ippvsConfig) { f := framework.NewDefaultFramework("pod-resize-test") f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged var podClient *e2epod.PodClient @@ -2212,9 +2214,9 @@ func doMultiPodResizeTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalSc }) type testPod struct { - containers []e2epod.ResizableContainerInfo + containers []podresize.ResizableContainerInfo patchString string - expected []e2epod.ResizableContainerInfo + expected []podresize.ResizableContainerInfo } type testCase struct { @@ -2276,10 +2278,10 @@ func doMultiPodResizeTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalSc { name: "2 Guaranteed QoS pod, one container - increase CPU & memory, FullPCPUsOnlyOption = false", testPod1: testPod{ - containers: []e2epod.ResizableContainerInfo{ + containers: []podresize.ResizableContainerInfo{ { Name: "c1", - Resources: &e2epod.ContainerResources{CPUReq: "1", CPULim: "1", MemReq: "200Mi", MemLim: "200Mi"}, + Resources: &cgroups.ContainerResources{CPUReq: "1", CPULim: "1", MemReq: "200Mi", MemLim: "200Mi"}, CPUPolicy: &noRestart, MemPolicy: &noRestart, CPUsAllowedListValue: "1", @@ -2288,10 +2290,10 @@ func doMultiPodResizeTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalSc patchString: `{"spec":{"containers":[ {"name":"c1", "resources":{"requests":{"cpu":"2","memory":"400Mi"},"limits":{"cpu":"2","memory":"400Mi"}}} ]}}`, - expected: []e2epod.ResizableContainerInfo{ + expected: []podresize.ResizableContainerInfo{ { Name: "c1", - Resources: &e2epod.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "400Mi", MemLim: "400Mi"}, + Resources: &cgroups.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "400Mi", MemLim: "400Mi"}, CPUPolicy: &noRestart, MemPolicy: &noRestart, CPUsAllowedListValue: "2", @@ -2299,10 +2301,10 @@ func doMultiPodResizeTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalSc }, }, testPod2: testPod{ - containers: []e2epod.ResizableContainerInfo{ + containers: []podresize.ResizableContainerInfo{ { Name: "c2", - Resources: &e2epod.ContainerResources{CPUReq: "1", CPULim: "1", MemReq: "200Mi", MemLim: "200Mi"}, + Resources: &cgroups.ContainerResources{CPUReq: "1", CPULim: "1", MemReq: "200Mi", MemLim: "200Mi"}, CPUPolicy: &noRestart, MemPolicy: &noRestart, CPUsAllowedListValue: "1", @@ -2311,10 +2313,10 @@ func doMultiPodResizeTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalSc patchString: `{"spec":{"containers":[ {"name":"c2", "resources":{"requests":{"cpu":"2","memory":"400Mi"},"limits":{"cpu":"2","memory":"400Mi"}}} ]}}`, - expected: []e2epod.ResizableContainerInfo{ + expected: []podresize.ResizableContainerInfo{ { Name: "c2", - Resources: &e2epod.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "400Mi", MemLim: "400Mi"}, + Resources: &cgroups.ContainerResources{CPUReq: "2", CPULim: "2", MemReq: "400Mi", MemLim: "400Mi"}, CPUPolicy: &noRestart, MemPolicy: &noRestart, CPUsAllowedListValue: "2", @@ -2328,8 +2330,8 @@ func doMultiPodResizeTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalSc for idx := range tests { tc := tests[idx] - ginkgo.It(tc.name+policy.title+" (InPlacePodVerticalScalingAllocatedStatus="+strconv.FormatBool(isInPlacePodVerticalScalingAllocatedStatusEnabled)+", InPlacePodVerticalScalingExclusiveCPUs="+strconv.FormatBool(isInPlacePodVerticalScalingExclusiveCPUsEnabled)+")", func(ctx context.Context) { - cpuManagerPolicyKubeletConfig(ctx, f, oldCfg, policy.name, policy.options, isInPlacePodVerticalScalingAllocatedStatusEnabled, isInPlacePodVerticalScalingExclusiveCPUsEnabled) + ginkgo.It(tc.name+ippvsCfg.title+policy.title, func(ctx context.Context) { + cpuManagerPolicyKubeletConfig(ctx, f, oldCfg, policy.name, policy.options, ippvsCfg) setCPUsForTestCase(ctx, &tc, policy.options[cpumanager.FullPCPUsOnlyOption]) if tc.skipFlag { @@ -2339,11 +2341,11 @@ func doMultiPodResizeTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalSc var patchedPod *v1.Pod var pErr error - createAndVerify := func(podName string, podClient *e2epod.PodClient, testContainers []e2epod.ResizableContainerInfo) (newPod *v1.Pod) { + createAndVerify := func(podName string, podClient *e2epod.PodClient, testContainers []podresize.ResizableContainerInfo) (newPod *v1.Pod) { var testPod *v1.Pod tStamp := strconv.Itoa(time.Now().Nanosecond()) - testPod = e2epod.MakePodWithResizableContainers(f.Namespace.Name, fmt.Sprintf("resizepod-%s", podName), tStamp, testContainers) + testPod = podresize.MakePodWithResizableContainers(f.Namespace.Name, fmt.Sprintf("resizepod-%s", podName), tStamp, testContainers) testPod.GenerateName = "resize-test-" testPod = e2epod.MustMixinRestrictedPodSecurity(testPod) @@ -2351,20 +2353,20 @@ func doMultiPodResizeTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalSc newPod = podClient.CreateSync(ctx, testPod) ginkgo.By("verifying initial pod resources, allocations are as expected") - e2epod.VerifyPodResources(newPod, testContainers) + podresize.VerifyPodResources(newPod, testContainers) ginkgo.By("verifying initial pod resize policy is as expected") - e2epod.VerifyPodResizePolicy(newPod, testContainers) + podresize.VerifyPodResizePolicy(newPod, testContainers) ginkgo.By("verifying initial pod status resources are as expected") - framework.ExpectNoError(e2epod.VerifyPodStatusResources(newPod, testContainers)) + framework.ExpectNoError(podresize.VerifyPodStatusResources(newPod, testContainers)) ginkgo.By("verifying initial cgroup config are as expected") - framework.ExpectNoError(e2epod.VerifyPodContainersCgroupValues(ctx, f, newPod, testContainers)) + framework.ExpectNoError(podresize.VerifyPodContainersCgroupValues(ctx, f, newPod, testContainers)) // TODO make this dynamic depending on Policy Name, Resources input and topology of target // machine. // For the moment skip below if CPU Manager Policy is set to none if policy.name == string(cpumanager.PolicyStatic) { ginkgo.By("verifying initial pod Cpus allowed list value") - gomega.Eventually(ctx, e2epod.VerifyPodContainersCPUsAllowedListValue, timeouts.PodStartShort, timeouts.Poll). + gomega.Eventually(ctx, podresize.VerifyPodContainersCPUsAllowedListValue, timeouts.PodStartShort, timeouts.Poll). WithArguments(f, newPod, testContainers). Should(gomega.Succeed(), "failed to verify initial Pod CPUsAllowedListValue") } @@ -2374,39 +2376,35 @@ func doMultiPodResizeTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalSc newPod1 := createAndVerify("testpod1", podClient, tc.testPod1.containers) newPod2 := createAndVerify("testpod2", podClient, tc.testPod2.containers) - patchAndVerify := func(patchString string, expectedContainers []e2epod.ResizableContainerInfo, initialContainers []e2epod.ResizableContainerInfo, opStr string, newPod *v1.Pod) { - ginkgo.By(fmt.Sprintf("patching pod for %s", opStr)) - patchedPod, pErr = f.ClientSet.CoreV1().Pods(newPod.Namespace).Patch(ctx, newPod.Name, - types.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") - framework.ExpectNoError(pErr, fmt.Sprintf("failed to patch pod for %s", opStr)) + patchAndVerify := func(patchString string, expectedContainers []podresize.ResizableContainerInfo, initialContainers []podresize.ResizableContainerInfo, opStr string, newPod *v1.Pod) { + // TODO make this dynamic depending on Policy Name, Resources input and topology of target + // machine. + // For the moment verify only if CPU Manager Policy is set to static and InPlacePodVerticalScalingExclusiveCPUs is true + if policy.name == string(cpumanager.PolicyStatic) { + if ippvsCfg.enableInPlacePodVerticalScaling && ippvsCfg.enableInPlacePodVerticalScalingExclusiveCPUs { + ginkgo.By(fmt.Sprintf("patching pod for %s", opStr)) + patchedPod, pErr = f.ClientSet.CoreV1().Pods(newPod.Namespace).Patch(ctx, newPod.Name, + types.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, fmt.Sprintf("failed to patch pod for %s", opStr)) - ginkgo.By(fmt.Sprintf("verifying pod patched for %s", opStr)) - e2epod.VerifyPodResources(patchedPod, expectedContainers) + ginkgo.By(fmt.Sprintf("verifying pod patched for %s", opStr)) + podresize.VerifyPodResources(patchedPod, expectedContainers) - ginkgo.By(fmt.Sprintf("waiting for %s to be actuated", opStr)) - resizedPod := e2epod.WaitForPodResizeActuation(ctx, f, podClient, newPod, expectedContainers) - e2epod.ExpectPodResized(ctx, f, resizedPod, expectedContainers) + ginkgo.By(fmt.Sprintf("waiting for %s to be actuated", opStr)) + resizedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPod, expectedContainers) + podresize.ExpectPodResized(ctx, f, resizedPod, expectedContainers) - // Check cgroup values only for containerd versions before 1.6.9 - ginkgo.By(fmt.Sprintf("verifying pod container's cgroup values after %s", opStr)) - framework.ExpectNoError(e2epod.VerifyPodContainersCgroupValues(ctx, f, resizedPod, expectedContainers)) + // Check cgroup values only for containerd versions before 1.6.9 + ginkgo.By(fmt.Sprintf("verifying pod container's cgroup values after %s", opStr)) + framework.ExpectNoError(podresize.VerifyPodContainersCgroupValues(ctx, f, resizedPod, expectedContainers)) - ginkgo.By(fmt.Sprintf("verifying pod resources after %s", opStr)) - e2epod.VerifyPodResources(resizedPod, expectedContainers) + ginkgo.By(fmt.Sprintf("verifying pod resources after %s", opStr)) + podresize.VerifyPodResources(resizedPod, expectedContainers) - // TODO make this dynamic depending on Policy Name, Resources input and topology of target - // machine. - // For the moment skip below if CPU Manager Policy is set to none - if policy.name == string(cpumanager.PolicyStatic) { - ginkgo.By(fmt.Sprintf("verifying pod Cpus allowed list value after %s", opStr)) - if isInPlacePodVerticalScalingExclusiveCPUsEnabled { - gomega.Eventually(ctx, e2epod.VerifyPodContainersCPUsAllowedListValue, timeouts.PodStartShort, timeouts.Poll). + ginkgo.By(fmt.Sprintf("verifying pod Cpus allowed list value after %s", opStr)) + gomega.Eventually(ctx, podresize.VerifyPodContainersCPUsAllowedListValue, timeouts.PodStartShort, timeouts.Poll). WithArguments(f, resizedPod, expectedContainers). Should(gomega.Succeed(), "failed to verify Pod CPUsAllowedListValue for resizedPod with InPlacePodVerticalScalingExclusiveCPUs enabled") - } else { - gomega.Eventually(ctx, e2epod.VerifyPodContainersCPUsAllowedListValue, timeouts.PodStartShort, timeouts.Poll). - WithArguments(f, resizedPod, initialContainers). - Should(gomega.Succeed(), "failed to verify Pod CPUsAllowedListValue for resizedPod with InPlacePodVerticalScalingExclusiveCPUs disabled (default)") } } } @@ -2414,13 +2412,15 @@ func doMultiPodResizeTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalSc patchAndVerify(tc.testPod1.patchString, tc.testPod1.expected, tc.testPod1.containers, "resize", newPod1) patchAndVerify(tc.testPod2.patchString, tc.testPod2.expected, tc.testPod2.containers, "resize", newPod2) - rbPatchStr1, err1 := e2epod.ResizeContainerPatch(tc.testPod1.containers) - framework.ExpectNoError(err1) - rbPatchStr2, err2 := e2epod.ResizeContainerPatch(tc.testPod2.containers) - framework.ExpectNoError(err2) - // Resize has been actuated, test rollback - patchAndVerify(rbPatchStr1, tc.testPod1.containers, tc.testPod1.expected, "rollback", newPod1) - patchAndVerify(rbPatchStr2, tc.testPod2.containers, tc.testPod2.expected, "rollback", newPod2) + /* + rbPatchStr1, err1 := e2epod.ResizeContainerPatch(tc.testPod1.containers) + framework.ExpectNoError(err1) + rbPatchStr2, err2 := e2epod.ResizeContainerPatch(tc.testPod2.containers) + framework.ExpectNoError(err2) + // Resize has been actuated, test rollback + patchAndVerify(rbPatchStr1, tc.testPod1.containers, tc.testPod1.expected, "rollback", newPod1) + patchAndVerify(rbPatchStr2, tc.testPod2.containers, tc.testPod2.expected, "rollback", newPod2) + */ ginkgo.By("deleting pod") deletePodSyncByName(ctx, f, newPod1.Name) @@ -2442,6 +2442,15 @@ func doMultiPodResizeTests(policy cpuManagerPolicyConfig, isInPlacePodVerticalSc var _ = SIGDescribe("Pod InPlace Resize Container Extended Cases", framework.WithSerial(), func() { + ippvsConfigs := []ippvsConfig{ + { + title: ", with InPlacePodVerticalScaling enabled, InPlacePodVerticalScalingExclusiveCPUs enabled, InPlacePodVerticalScalingAllocatedStatus enabled", + enableInPlacePodVerticalScaling: true, + enableInPlacePodVerticalScalingExclusiveCPUs: true, + enableInPlacePodVerticalScalingAllocatedStatus: true, + }, + } + policiesGeneralAvailability := []cpuManagerPolicyConfig{ { name: string(cpumanager.PolicyStatic), @@ -2465,7 +2474,7 @@ var _ = SIGDescribe("Pod InPlace Resize Container Extended Cases", framework.Wit }, } - doPodResizeExtendTests(policiesGeneralAvailability[0], true, true) - doPodResizeExtendTests(policiesGeneralAvailability[1], true, true) - doMultiPodResizeTests(policiesGeneralAvailability[0], true, true) + doPodResizeExtendTests(policiesGeneralAvailability[0], ippvsConfigs[0]) + doPodResizeExtendTests(policiesGeneralAvailability[1], ippvsConfigs[0]) + doMultiPodResizeTests(policiesGeneralAvailability[0], ippvsConfigs[0]) }) diff --git a/test/e2e_node/util.go b/test/e2e_node/util.go index b8bbd8ce4f05c..f2ce02d6dc0aa 100644 --- a/test/e2e_node/util.go +++ b/test/e2e_node/util.go @@ -183,7 +183,7 @@ func waitForKubeletToStart(ctx context.Context, f *framework.Framework) { // wait until the kubelet health check will succeed gomega.Eventually(ctx, func() bool { return kubeletHealthCheck(kubeletHealthCheckURL) - }, 5*time.Minute, 2*time.Second).Should(gomega.BeTrueBecause("expected kubelet to be in healthy state")) + }, 2*time.Minute, 5*time.Second).Should(gomega.BeTrueBecause("expected kubelet to be in healthy state")) // Wait for the Kubelet to be ready. gomega.Eventually(ctx, func(ctx context.Context) error { @@ -504,7 +504,7 @@ func waitForAllContainerRemoval(ctx context.Context, podName, podNS string) { return fmt.Errorf("expected all containers to be removed from CRI but %v containers still remain. Containers: %+v", len(containers), containers) } return nil - }, 5*time.Minute, 2*time.Second).Should(gomega.Succeed()) + }, 2*time.Minute, 1*time.Second).Should(gomega.Succeed()) } func getPidsForProcess(name, pidFile string) ([]int, error) { diff --git a/test/e2e_node/util_machineinfo_linux.go b/test/e2e_node/util_machineinfo_linux.go index 721a4b8d70178..11f99eb97e1ed 100644 --- a/test/e2e_node/util_machineinfo_linux.go +++ b/test/e2e_node/util_machineinfo_linux.go @@ -111,3 +111,14 @@ func getNumaNodeCPUs() (map[int]cpuset.CPUSet, error) { return numaNodes, nil } + +func getSMTLevel() int { + cpuID := 0 // this is just the most likely cpu to be present in a random system. No special meaning besides this. + out, err := exec.Command("/bin/sh", "-c", fmt.Sprintf("cat /sys/devices/system/cpu/cpu%d/topology/thread_siblings_list | tr -d \"\n\r\"", cpuID)).Output() + framework.ExpectNoError(err) + // how many thread sibling you have = SMT level + // example: 2-way SMT means 2 threads sibling for each thread + cpus, err := cpuset.Parse(strings.TrimSpace(string(out))) + framework.ExpectNoError(err) + return cpus.Size() +} diff --git a/test/e2e_node/util_machineinfo_unsupported.go b/test/e2e_node/util_machineinfo_unsupported.go index a9eed49931685..891760ca25474 100644 --- a/test/e2e_node/util_machineinfo_unsupported.go +++ b/test/e2e_node/util_machineinfo_unsupported.go @@ -54,3 +54,7 @@ func getCoreSiblingList(cpuRes int64) string { func getNumaNodeCPUs() (map[int]cpuset.CPUSet, error) { return nil, errors.New("not implemented") } + +func getSMTLevel() int { + return 1 +} From 2c78b1663598e2ea67133a5db6db043431b6f608 Mon Sep 17 00:00:00 2001 From: Chunxia202410 Date: Thu, 5 Jun 2025 14:06:37 +0800 Subject: [PATCH 9/9] consider exist CPU in topology manager when pod resize. # Conflicts: # pkg/kubelet/cm/cpumanager/policy_static.go --- pkg/kubelet/cm/cpumanager/policy_static.go | 67 +++-- .../cm/cpumanager/topology_hints_test.go | 275 ++++++++++++++++++ 2 files changed, 323 insertions(+), 19 deletions(-) diff --git a/pkg/kubelet/cm/cpumanager/policy_static.go b/pkg/kubelet/cm/cpumanager/policy_static.go index bae0f7dd7fb9d..d5eafac7530e9 100644 --- a/pkg/kubelet/cm/cpumanager/policy_static.go +++ b/pkg/kubelet/cm/cpumanager/policy_static.go @@ -696,6 +696,8 @@ func (p *staticPolicy) takeByTopology(availableCPUs cpuset.CPUSet, numCPUs int, } func (p *staticPolicy) GetTopologyHints(s state.State, pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint { + var cpuHints []topologymanager.TopologyHint + // Get a count of how many guaranteed CPUs have been requested. requested := p.guaranteedCPUs(pod, container) @@ -711,13 +713,19 @@ func (p *staticPolicy) GetTopologyHints(s state.State, pod *v1.Pod, container *v klog.V(3).InfoS("CPU Manager hint generation skipped, pod is using pod-level resources which are not supported by the static CPU manager policy", "pod", klog.KObj(pod), "podUID", pod.UID) return nil } + // Get a list of available CPUs. + available := p.GetAvailableCPUs(s) + + // Get a list of reusable CPUs (e.g. CPUs reused from initContainers). + // It should be an empty CPUSet for a newly created pod. + reusable := p.cpusToReuse[string(pod.UID)] - if !utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScalingExclusiveCPUs) || !utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { - // Short circuit to regenerate the same hints if there are already - // guaranteed CPUs allocated to the Container. This might happen after a - // kubelet restart, for example. - if allocated, exists := s.GetCPUSet(string(pod.UID), container.Name); exists { - if allocated.Size() != requested { + // Short circuit to regenerate the same hints if there are already + // guaranteed CPUs allocated to the Container. This might happen after a + // kubelet restart, for example. + if allocated, exists := s.GetCPUSet(string(pod.UID), container.Name); exists { + if allocated.Size() != requested { + if !utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScalingExclusiveCPUs) || !utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { klog.InfoS("CPUs already allocated to container with different number than request", "pod", klog.KObj(pod), "containerName", container.Name, "requestedSize", requested, "allocatedSize", allocated.Size()) // An empty list of hints will be treated as a preference that cannot be satisfied. // In definition of hints this is equal to: TopologyHint[NUMANodeAffinity: nil, Preferred: false]. @@ -725,23 +733,40 @@ func (p *staticPolicy) GetTopologyHints(s state.State, pod *v1.Pod, container *v return map[string][]topologymanager.TopologyHint{ string(v1.ResourceCPU): {}, } - } - klog.InfoS("Regenerating TopologyHints for CPUs already allocated", "pod", klog.KObj(pod), "containerName", container.Name) - return map[string][]topologymanager.TopologyHint{ - string(v1.ResourceCPU): p.generateCPUTopologyHints(allocated, cpuset.CPUSet{}, requested), + } else { + if (allocated.Size() > requested) { //For container scale down + if mustKeepCPUsForResize, ok := s.GetPromisedCPUSet(string(pod.UID), container.Name); ok { + cpuHints = p.generateCPUTopologyHints(allocated, mustKeepCPUsForResize, requested) + klog.InfoS("Regenerating TopologyHints for container scale down", "pod", klog.KObj(pod), "containerName", container.Name, "cpuHints", cpuHints) + return map[string][]topologymanager.TopologyHint{ + string(v1.ResourceCPU): cpuHints, + } + } + return map[string][]topologymanager.TopologyHint{ + string(v1.ResourceCPU): {}, + } + } else { //For container scale up + if allocated.Size() + reusable.Size() >= requested{ + cpuHints = p.generateCPUTopologyHints(reusable, allocated, requested) + klog.InfoS("Regenerating TopologyHints for container scale up from reusable CPUs", "pod", klog.KObj(pod), "containerName", container.Name, "cpuHints", cpuHints) + } else { + cpuHints = p.generateCPUTopologyHints(available, reusable.Union(allocated), requested) + klog.InfoS("Regenerating TopologyHints for container scale up from available CPUs", "pod", klog.KObj(pod), "containerName", container.Name, "cpuHints", cpuHints) + } + return map[string][]topologymanager.TopologyHint{ + string(v1.ResourceCPU): cpuHints, + } + } } } + klog.InfoS("Regenerating TopologyHints for CPUs already allocated", "pod", klog.KObj(pod), "containerName", container.Name) + return map[string][]topologymanager.TopologyHint{ + string(v1.ResourceCPU): p.generateCPUTopologyHints(allocated, cpuset.CPUSet{}, requested), + } } - // Get a list of available CPUs. - available := p.GetAvailableCPUs(s) - - // Get a list of reusable CPUs (e.g. CPUs reused from initContainers). - // It should be an empty CPUSet for a newly created pod. - reusable := p.cpusToReuse[string(pod.UID)] - // Generate hints. - cpuHints := p.generateCPUTopologyHints(available, reusable, requested) + cpuHints = p.generateCPUTopologyHints(available, reusable, requested) klog.InfoS("TopologyHints generated", "pod", klog.KObj(pod), "containerName", container.Name, "cpuHints", cpuHints) return map[string][]topologymanager.TopologyHint{ @@ -750,6 +775,8 @@ func (p *staticPolicy) GetTopologyHints(s state.State, pod *v1.Pod, container *v } func (p *staticPolicy) GetPodTopologyHints(s state.State, pod *v1.Pod) map[string][]topologymanager.TopologyHint { + resizeFlag := false + // Get a count of how many guaranteed CPUs have been requested by Pod. requested := p.podGuaranteedCPUs(pod) @@ -783,12 +810,14 @@ func (p *staticPolicy) GetPodTopologyHints(s state.State, pod *v1.Pod) map[strin string(v1.ResourceCPU): {}, } } + resizeFlag = true } // A set of CPUs already assigned to containers in this pod assignedCPUs = assignedCPUs.Union(allocated) } } - if assignedCPUs.Size() == requested { + // resizeFlag == false can avoid the case of "1 container scale up X CPUs, and 1 container scale down X CPUs". + if resizeFlag == false { klog.InfoS("Regenerating TopologyHints for CPUs already allocated", "pod", klog.KObj(pod)) return map[string][]topologymanager.TopologyHint{ string(v1.ResourceCPU): p.generateCPUTopologyHints(assignedCPUs, cpuset.CPUSet{}, requested), diff --git a/pkg/kubelet/cm/cpumanager/topology_hints_test.go b/pkg/kubelet/cm/cpumanager/topology_hints_test.go index 4773c779a677a..28c9bc96c335b 100644 --- a/pkg/kubelet/cm/cpumanager/topology_hints_test.go +++ b/pkg/kubelet/cm/cpumanager/topology_hints_test.go @@ -674,3 +674,278 @@ func returnTestCases() []testCase { }, } } + +type testCaseForResize struct { + name string + pod v1.Pod + container v1.Container + promised state.ContainerCPUAssignments + assignments state.ContainerCPUAssignments + defaultCPUSet cpuset.CPUSet + expectedHints []topologymanager.TopologyHint + topology *topology.CPUTopology + policyOptions map[string]string +} + +func TestGetTopologyHintsForResize(t *testing.T) { + tcases := returnTestCasesForResize() + + for _, tc := range tcases { + t.Run(tc.name, func(t *testing.T) { + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.InPlacePodVerticalScalingExclusiveCPUs, true) + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.CPUManagerPolicyAlphaOptions, true) + policyOpt, _ := NewStaticPolicyOptions(tc.policyOptions) + var activePods []*v1.Pod + for p := range tc.assignments { + pod := v1.Pod{} + pod.UID = types.UID(p) + for c := range tc.assignments[p] { + container := v1.Container{} + container.Name = c + pod.Spec.Containers = append(pod.Spec.Containers, container) + } + activePods = append(activePods, &pod) + } + + m := manager{ + policy: &staticPolicy{ + topology: tc.topology, + options: policyOpt, + }, + state: &mockState{ + promised: tc.promised, + assignments: tc.assignments, + defaultCPUSet: tc.defaultCPUSet, + }, + topology: tc.topology, + activePods: func() []*v1.Pod { return activePods }, + podStatusProvider: mockPodStatusProvider{}, + sourcesReady: &sourcesReadyStub{}, + } + + hints := m.GetTopologyHints(&tc.pod, &tc.container)[string(v1.ResourceCPU)] + sort.SliceStable(hints, func(i, j int) bool { + return hints[i].LessThan(hints[j]) + }) + sort.SliceStable(tc.expectedHints, func(i, j int) bool { + return tc.expectedHints[i].LessThan(tc.expectedHints[j]) + }) + if !reflect.DeepEqual(tc.expectedHints, hints) { + t.Errorf("Expected in result to be %v , got %v", tc.expectedHints, hints) + } + }) + } +} + +func returnTestCasesForResize() []testCaseForResize { + testPod1 := makePod("fakePod", "fakeContainer", "6", "6") + testContainer1 := &testPod1.Spec.Containers[0] + testPod2 := makePod("fakePod", "fakeContainer", "4", "4") + testContainer2 := &testPod2.Spec.Containers[0] + + m0001, _ := bitmask.NewBitMask(0) + m0011, _ := bitmask.NewBitMask(0, 1) + m0101, _ := bitmask.NewBitMask(0, 2) + m1001, _ := bitmask.NewBitMask(0, 3) + m0111, _ := bitmask.NewBitMask(0, 1, 2) + m1011, _ := bitmask.NewBitMask(0, 1, 3) + m1101, _ := bitmask.NewBitMask(0, 2, 3) + m1111, _ := bitmask.NewBitMask(0, 1, 2, 3) + + return []testCaseForResize{ + { + name: "Pod scale up, Request 6 CPUs, promised 2 on NUMA 0, assignments 4 on NUMA 0,1", + pod: *testPod1, + container: *testContainer1, + promised: state.ContainerCPUAssignments{ + string(testPod1.UID): map[string]cpuset.CPUSet{ + testContainer1.Name: cpuset.New(0, 6), + }, + }, + assignments: state.ContainerCPUAssignments{ + string(testPod1.UID): map[string]cpuset.CPUSet{ + testContainer1.Name: cpuset.New(0, 6, 3, 9), + }, + }, + defaultCPUSet: cpuset.New(1, 2, 4, 7, 8, 10), + expectedHints: []topologymanager.TopologyHint{ + { + NUMANodeAffinity: m0011, + Preferred: false, + }, + }, + topology: topoDualSocketHT, + }, + { + name: "Pod scale up, Request 6 CPUs, promised 2 on NUMA 0, assignments 4 on NUMA 0", + pod: *testPod1, + container: *testContainer1, + promised: state.ContainerCPUAssignments{ + string(testPod1.UID): map[string]cpuset.CPUSet{ + testContainer1.Name: cpuset.New(0, 6), + }, + }, + assignments: state.ContainerCPUAssignments{ + string(testPod1.UID): map[string]cpuset.CPUSet{ + testContainer1.Name: cpuset.New(0, 6, 2, 8), + }, + }, + defaultCPUSet: cpuset.New(1, 3, 4, 7, 9, 10), + expectedHints: []topologymanager.TopologyHint{ + { + NUMANodeAffinity: m0001, + Preferred: true, + }, + { + NUMANodeAffinity: m0011, + Preferred: false, + }, + }, + topology: topoDualSocketHT, + }, + { + name: "Pod scale down, Request 4 CPUs, promised 2 on NUMA 0, assignments 6 on NUMA 0,1", + pod: *testPod2, + container: *testContainer2, + promised: state.ContainerCPUAssignments{ + string(testPod2.UID): map[string]cpuset.CPUSet{ + testContainer2.Name: cpuset.New(0, 6), + }, + }, + assignments: state.ContainerCPUAssignments{ + string(testPod2.UID): map[string]cpuset.CPUSet{ + testContainer2.Name: cpuset.New(0, 5, 6, 3, 9, 11), + }, + }, + defaultCPUSet: cpuset.New(1, 2, 4, 7, 8, 10), + expectedHints: []topologymanager.TopologyHint{ + { + NUMANodeAffinity: m0001, + Preferred: true, + }, + { + NUMANodeAffinity: m0011, + Preferred: false, + }, + }, + topology: topoDualSocketHT, + }, + { + name: "Pod scale down, Request 4 CPUs, no promised CPUs, assignments 6 on NUMA 0,1", + pod: *testPod2, + container: *testContainer2, + assignments: state.ContainerCPUAssignments{ + string(testPod2.UID): map[string]cpuset.CPUSet{ + testContainer2.Name: cpuset.New(0, 5, 6, 3, 9, 11), + }, + }, + defaultCPUSet: cpuset.New(1, 2, 4, 7, 8, 10), + expectedHints: []topologymanager.TopologyHint{}, + topology: topoDualSocketHT, + }, + { + name: "Pod scale up, Request 21 CPUs, promised 2 on NUMA 0, assignments 4 on NUMA 0, AlignBySocketOption is false", + pod: *testPod1, + container: *testContainer1, + promised: state.ContainerCPUAssignments{ + string(testPod1.UID): map[string]cpuset.CPUSet{ + testContainer1.Name: cpuset.New(0, 40), + }, + }, + assignments: state.ContainerCPUAssignments{ + string(testPod1.UID): map[string]cpuset.CPUSet{ + testContainer1.Name: cpuset.New(0, 1, 40, 41), + }, + }, + defaultCPUSet: cpuset.New(8, 9, 19, 29, 39), + expectedHints: []topologymanager.TopologyHint{ + { + NUMANodeAffinity: m0001, + Preferred: true, + }, + { + NUMANodeAffinity: m0011, + Preferred: false, + }, + { + NUMANodeAffinity: m0101, + Preferred: false, + }, + { + NUMANodeAffinity: m1001, + Preferred: false, + }, + { + NUMANodeAffinity: m0111, + Preferred: false, + }, + { + NUMANodeAffinity: m1011, + Preferred: false, + }, + { + NUMANodeAffinity: m1101, + Preferred: false, + }, + { + NUMANodeAffinity: m1111, + Preferred: false, + }, + }, + topology: topoDualSocketMultiNumaPerSocketHT, + policyOptions: map[string]string{AlignBySocketOption: "false"}, + }, + { + name: "Pod scale up, Request 21 CPUs, promised 2 on NUMA 0, assignments 4 on NUMA 0, AlignBySocketOption is true", + pod: *testPod1, + container: *testContainer1, + promised: state.ContainerCPUAssignments{ + string(testPod1.UID): map[string]cpuset.CPUSet{ + testContainer1.Name: cpuset.New(0, 40), + }, + }, + assignments: state.ContainerCPUAssignments{ + string(testPod1.UID): map[string]cpuset.CPUSet{ + testContainer1.Name: cpuset.New(0, 1, 40, 41), + }, + }, + defaultCPUSet: cpuset.New(8, 9, 19, 29, 39), + expectedHints: []topologymanager.TopologyHint{ + { + NUMANodeAffinity: m0001, + Preferred: true, + }, + { + NUMANodeAffinity: m0011, + Preferred: true, + }, + { + NUMANodeAffinity: m0101, + Preferred: false, + }, + { + NUMANodeAffinity: m1001, + Preferred: false, + }, + { + NUMANodeAffinity: m0111, + Preferred: false, + }, + { + NUMANodeAffinity: m1011, + Preferred: false, + }, + { + NUMANodeAffinity: m1101, + Preferred: false, + }, + { + NUMANodeAffinity: m1111, + Preferred: false, + }, + }, + topology: topoDualSocketMultiNumaPerSocketHT, + policyOptions: map[string]string{AlignBySocketOption: "true"}, + }, + } +} \ No newline at end of file