diff --git a/pkg/kubelet/allocation/allocation_manager.go b/pkg/kubelet/allocation/allocation_manager.go index 50220da2589b4..e98800e5efa6c 100644 --- a/pkg/kubelet/allocation/allocation_manager.go +++ b/pkg/kubelet/allocation/allocation_manager.go @@ -36,6 +36,7 @@ import ( v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos" "k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/pkg/kubelet/allocation/state" + "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" "k8s.io/kubernetes/pkg/kubelet/config" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" "k8s.io/kubernetes/pkg/kubelet/events" @@ -592,6 +593,11 @@ func (m *manager) handlePodResourcesResize(logger klog.Logger, pod *v1.Pod) (boo } if reason != "" { + if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScalingExclusiveCPUs) { + if reason == topologymanager.ErrorTopologyAffinity { + reason = v1.PodReasonInfeasible + } + } if m.statusManager.SetPodResizePendingCondition(pod.UID, reason, message, pod.Generation) { eventType := events.ResizeDeferred if reason == v1.PodReasonInfeasible { diff --git a/pkg/kubelet/cm/cpumanager/cpu_assignment.go b/pkg/kubelet/cm/cpumanager/cpu_assignment.go index bff55a600d7b4..cb612052b7de4 100644 --- a/pkg/kubelet/cm/cpumanager/cpu_assignment.go +++ b/pkg/kubelet/cm/cpumanager/cpu_assignment.go @@ -95,6 +95,11 @@ type numaOrSocketsFirstFuncs interface { sortAvailableNUMANodes() []int sortAvailableSockets() []int sortAvailableCores() []int + takeFullFirstLevelForResize() + takeFullSecondLevelForResize() + sortAvailableNUMANodesForResize() []int + sortAvailableSocketsForResize() []int + sortAvailableCoresForResize() []int } type numaFirst struct{ acc *cpuAccumulator } @@ -204,8 +209,145 @@ func (s *socketsFirst) sortAvailableCores() []int { return result } +// If NUMA nodes are higher in the memory hierarchy than sockets, then we take +// from the set of NUMA Nodes as the first level for resize. +func (n *numaFirst) takeFullFirstLevelForResize() { + n.acc.takeRemainCpusForFullNUMANodes() +} + +// If NUMA nodes are higher in the memory hierarchy than sockets, then we take +// from the set of sockets as the second level for resize. +func (n *numaFirst) takeFullSecondLevelForResize() { + n.acc.takeRemainCpusForFullSockets() +} + +// If NUMA nodes are higher in the memory hierarchy than sockets, then return the available NUMA nodes +// which have allocated CPUs to Container. +func (n *numaFirst) sortAvailableNUMANodesForResize() []int { + allocatedNumaNodesSet := n.acc.resultDetails.NUMANodes() + availableNumaNodesSet := n.acc.details.NUMANodes() + numas := allocatedNumaNodesSet.Intersection(availableNumaNodesSet).UnsortedList() + n.acc.sort(numas, n.acc.details.CPUsInNUMANodes) + return numas +} + +// If NUMA nodes are higher in the memory hierarchy than sockets, +// Firstly, pull the socket which are allocated CPUs to the Container +// Secondly, pull the other sockets which are not allocated CPUs to the Container, but contains in the NUMA node which are allocated CPUs to the Container +func (n *numaFirst) sortAvailableSocketsForResize() []int { + var result []int + + // Sort allocated sockets + allocatedSocketsSet := n.acc.resultDetails.Sockets() + availableSocketsSet := n.acc.details.Sockets() + allocatedSockets := allocatedSocketsSet.Intersection(availableSocketsSet).UnsortedList() + n.acc.sort(allocatedSockets, n.acc.details.CPUsInSockets) + result = append(result, allocatedSockets...) + + // Sort the sockets in allocated numa node, but not allocated CPU on these sockets + for _, numa := range n.sortAvailableNUMANodesForResize() { + socketSet := n.acc.details.SocketsInNUMANodes(numa) + sockets := socketSet.Difference(allocatedSocketsSet).UnsortedList() + n.acc.sort(sockets, n.acc.details.CPUsInSockets) + result = append(result, sockets...) + } + return result +} + +// If NUMA nodes are higher in the memory hierarchy than sockets, +// Firstly, pull the cores which are allocated CPUs to the Container +// Secondly, pull the other cores which are not allocated CPUs to the Container, but contains in the NUMA node which are allocated CPUs to the Container +func (n *numaFirst) sortAvailableCoresForResize() []int { + var result []int + + // Sort allocated cores + allocatedCoresSet := n.acc.resultDetails.Cores() + availableCoresSet := n.acc.details.Cores() + allocatedCores := allocatedCoresSet.Intersection(availableCoresSet).UnsortedList() + n.acc.sort(allocatedCores, n.acc.details.CPUsInCores) + result = append(result, allocatedCores...) + + // Sort the cores in allocated sockets, and allocated numa, but not allocated CPU on these sockets and numa + for _, socket := range n.acc.sortAvailableSocketsForResize() { + coresSet := n.acc.details.CoresInSockets(socket) + cores := coresSet.Difference(allocatedCoresSet).UnsortedList() + n.acc.sort(cores, n.acc.details.CPUsInCores) + result = append(result, cores...) + } + return result +} + +// If sockets are higher in the memory hierarchy than NUMA nodes, then we take +// from the set of NUMA Nodes as the first level for resize. +func (s *socketsFirst) takeFullFirstLevelForResize() { + s.acc.takeRemainCpusForFullSockets() +} + +// If sockets are higher in the memory hierarchy than NUMA nodes, then we take +// from the set of sockets as the second level for resize. +func (s *socketsFirst) takeFullSecondLevelForResize() { + s.acc.takeRemainCpusForFullNUMANodes() +} + +// If sockets are higher in the memory hierarchy than NUMA nodes, +// Firstly, pull the NUMA nodes which are allocated CPUs to the Container +// Secondly, pull the other NUMA nodes which are not allocated CPUs to the Container, but contains in the sockets which are allocated CPUs to the Container +func (s *socketsFirst) sortAvailableNUMANodesForResize() []int { + var result []int + + // Sort allocated sockets + allocatedNUMANodesSet := s.acc.resultDetails.NUMANodes() + availableNUMANodesSet := s.acc.details.NUMANodes() + allocatedNUMANodes := allocatedNUMANodesSet.Intersection(availableNUMANodesSet).UnsortedList() + s.acc.sort(allocatedNUMANodes, s.acc.details.CPUsInNUMANodes) + result = append(result, allocatedNUMANodes...) + + // Sort the sockets in allocated numa node, but not allocated CPU on these sockets + for _, socket := range s.sortAvailableSocketsForResize() { + NUMANodesSet := s.acc.details.NUMANodesInSockets(socket) + NUMANodes := NUMANodesSet.Difference(allocatedNUMANodesSet).UnsortedList() + s.acc.sort(NUMANodes, s.acc.details.CPUsInNUMANodes) + result = append(result, NUMANodes...) + } + return result +} + +// If sockets are higher in the memory hierarchy than NUMA nodes, then return the available sockets +// which have allocated CPUs to Container. +func (s *socketsFirst) sortAvailableSocketsForResize() []int { + allocatedSocketsSet := s.acc.resultDetails.Sockets() + availableSocketsSet := s.acc.details.Sockets() + sockets := allocatedSocketsSet.Intersection(availableSocketsSet).UnsortedList() + s.acc.sort(sockets, s.acc.details.CPUsInSockets) + return sockets +} + +// If sockets are higher in the memory hierarchy than NUMA nodes, +// Firstly, pull the cores which are allocated CPUs to the Container +// Secondly, pull the other cores which are not allocated CPUs to the Container, but contains in the socket which are allocated CPUs to the Container +func (s *socketsFirst) sortAvailableCoresForResize() []int { + var result []int + + // Sort allocated cores + allocatedCoresSet := s.acc.resultDetails.Cores() + availableCoresSet := s.acc.details.Cores() + allocatedCores := allocatedCoresSet.Intersection(availableCoresSet).UnsortedList() + s.acc.sort(allocatedCores, s.acc.details.CPUsInCores) + result = append(result, allocatedCores...) + + // Sort the cores in allocated sockets, and allocated numa, but not allocated CPU on these sockets and numa + for _, NUMANode := range s.acc.sortAvailableNUMANodesForResize() { + coresSet := s.acc.details.CoresInNUMANodes(NUMANode) + cores := coresSet.Difference(allocatedCoresSet).UnsortedList() + s.acc.sort(cores, s.acc.details.CPUsInCores) + result = append(result, cores...) + } + return result +} + type availableCPUSorter interface { sort() []int + sortForResize() []int } type sortCPUsPacked struct{ acc *cpuAccumulator } @@ -222,6 +364,14 @@ func (s sortCPUsSpread) sort() []int { return s.acc.sortAvailableCPUsSpread() } +func (s sortCPUsPacked) sortForResize() []int { + return s.acc.sortAvailableCPUsPackedForResize() +} + +func (s sortCPUsSpread) sortForResize() []int { + return s.acc.sortAvailableCPUsSpreadForResize() +} + // CPUSortingStrategy describes the CPU sorting solution within the socket scope. // Using topoDualSocketHT (12 CPUs, 2 sockets, 6 cores) as an example: // @@ -289,6 +439,9 @@ type cpuAccumulator struct { // cardinality equal to the total number of CPUs to accumulate. result cpuset.CPUSet + // `resultDetails` is the set of allocated CPUs in `result` + resultDetails topology.CPUDetails + numaOrSocketsFirst numaOrSocketsFirstFuncs // availableCPUSorter is used to control the cpu sorting result. @@ -322,6 +475,68 @@ func newCPUAccumulator(logger logr.Logger, topo *topology.CPUTopology, available return acc } +func newCPUAccumulatorForResize(logger logr.Logger, topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, numCPUs int, cpuSortingStrategy CPUSortingStrategy, reusableCPUsForResize *cpuset.CPUSet, mustKeepCPUsForResize *cpuset.CPUSet) *cpuAccumulator { + acc := &cpuAccumulator{ + logger: logger, + topo: topo, + details: topo.CPUDetails.KeepOnly(availableCPUs), + numCPUsNeeded: numCPUs, + result: cpuset.New(), + resultDetails: topo.CPUDetails.KeepOnly(cpuset.New()), + } + + if reusableCPUsForResize != nil { + if !reusableCPUsForResize.IsEmpty() { + // Increase of CPU resources ( scale up ) + // Take existing from allocated + // CPUs + if numCPUs > reusableCPUsForResize.Size() { + // scale up ... + acc.take(reusableCPUsForResize.Clone()) + } + + // Decrease of CPU resources ( scale down ) + // Take delta from allocated CPUs, if mustKeepCPUsForResize + // is not nil, use explicetely those. If it is nil + // take delta starting from lowest CoreId of CPUs ( TODO esotsal, perhaps not needed). + if numCPUs < reusableCPUsForResize.Size() { + if mustKeepCPUsForResize != nil { + // If explicetely CPUs to keep + // during scale down is given ( this requires + // addition in container[].resources ... which + // could be possible to patch ? Esotsal Note This means + // modifying API code + if !(mustKeepCPUsForResize.Intersection(reusableCPUsForResize.Clone())).IsEmpty() { + acc.take(mustKeepCPUsForResize.Clone()) + } else { + return acc + } + } + } + + if numCPUs == reusableCPUsForResize.Size() { + // nothing to do return as is + acc.take(reusableCPUsForResize.Clone()) + return acc + } + } + } + + if topo.NumSockets >= topo.NumNUMANodes { + acc.numaOrSocketsFirst = &numaFirst{acc} + } else { + acc.numaOrSocketsFirst = &socketsFirst{acc} + } + + if cpuSortingStrategy == CPUSortingStrategyPacked { + acc.availableCPUSorter = &sortCPUsPacked{acc} + } else { + acc.availableCPUSorter = &sortCPUsSpread{acc} + } + + return acc +} + // Returns true if the supplied NUMANode is fully available in `a.details`. // "fully available" means that all the CPUs in it are free. func (a *cpuAccumulator) isNUMANodeFree(numaID int) bool { @@ -395,6 +610,21 @@ func (a *cpuAccumulator) freeCPUs() []int { return a.availableCPUSorter.sort() } +// Return true if this numa only allocated CPUs for this Container +func (a *cpuAccumulator) isFullNUMANodeForResize(numaID int) bool { + return a.resultDetails.CPUsInNUMANodes(numaID).Size()+a.details.CPUsInNUMANodes(numaID).Size() == a.topo.CPUDetails.CPUsInNUMANodes(numaID).Size() +} + +// Return true if this Socket only allocated CPUs for this Container +func (a *cpuAccumulator) isFullSocketForResize(socketID int) bool { + return a.resultDetails.CPUsInSockets(socketID).Size()+a.details.CPUsInSockets(socketID).Size() == a.topo.CPUsPerSocket() +} + +// return true if this Socket only allocated CPUs for this Container +func (a *cpuAccumulator) isFullCoreForResize(coreID int) bool { + return a.resultDetails.CPUsInCores(coreID).Size()+a.details.CPUsInCores(coreID).Size() == a.topo.CPUsPerCore() +} + // Sorts the provided list of NUMA nodes/sockets/cores/cpus referenced in 'ids' // by the number of available CPUs contained within them (smallest to largest). // The 'getCPU()' parameter defines the function that should be called to @@ -524,8 +754,108 @@ func (a *cpuAccumulator) sortAvailableCPUsSpread() []int { return result } +// Sort all NUMA nodes with at least one free CPU. +// +// If NUMA nodes are higher than sockets in the memory hierarchy, they are sorted by ascending number +// of free CPUs that they contain. "higher than sockets in the memory hierarchy" means that NUMA nodes +// contain a bigger number of CPUs (free and busy) than sockets, or equivalently that each NUMA node +// contains more than one socket. +// +// If instead NUMA nodes are lower in the memory hierarchy than sockets, they are sorted as follows. +// First part, sort the NUMA nodes which contains the CPUs allocated to Container. and these NUMA nodes +// are sorted by number of free CPUs that they contain. +// Second part, sort the NUMA nodes contained in the sockets which contains the CPUs allocated to Container, +// but exclude the NUMA nodes in first part. these NUMA nodes sorted by the rule as below +// +// First, they are sorted by number of free CPUs in the sockets that contain them. Then, for each +// socket they are sorted by number of free CPUs that they contain. The order is always ascending. +func (a *cpuAccumulator) sortAvailableNUMANodesForResize() []int { + return a.numaOrSocketsFirst.sortAvailableNUMANodesForResize() +} + +// Sort all sockets with at least one free CPU. +// +// If sockets are higher than NUMA nodes in the memory hierarchy, they are sorted by ascending number +// of free CPUs that they contain. "higher than NUMA nodes in the memory hierarchy" means that +// sockets contain a bigger number of CPUs (free and busy) than NUMA nodes, or equivalently that each +// socket contains more than one NUMA node. +// +// If instead sockets are lower in the memory hierarchy than NUMA nodes, they are sorted as follows. +// First part, sort the sockets which contains the CPUs allocated to Container. and these sockets +// are sorted by number of free CPUs that they contain. +// Second part, sort the sockets contained in the NUMA nodes which contains the CPUs allocated to Container, +// but exclude the sockets in first part. these sockets sorted by the rule as below +// +// First, they are sorted by number of free CPUs in the NUMA nodes that contain them. Then, for each +// NUMA node they are sorted by number of free CPUs that they contain. The order is always ascending. +func (a *cpuAccumulator) sortAvailableSocketsForResize() []int { + return a.numaOrSocketsFirst.sortAvailableSocketsForResize() +} + +// Sort all cores with at least one free CPU. +// +// If sockets are higher in the memory hierarchy than NUMA nodes, meaning that sockets contain a +// bigger number of CPUs (free and busy) than NUMA nodes, or equivalently that each socket contains +// more than one NUMA node, the cores are sorted as follows. +// First part, sort the cores which contains the CPUs allocated to Container. and these cores +// are sorted by number of free CPUs that they contain. +// Second part, sort the cores contained in the NUMA nodes which contains the CPUs allocated to Container, +// but exclude the cores in first part. these cores sorted by the rule as below +// First, they are sorted by number of +// free CPUs that their sockets contain. Then, for each socket, the cores in it are sorted by number +// of free CPUs that their NUMA nodes contain. Then, for each NUMA node, the cores in it are sorted +// by number of free CPUs that they contain. The order is always ascending. + +// If instead NUMA nodes are higher in the memory hierarchy than sockets, the sorting happens in the +// same way as described in the previous paragraph. +func (a *cpuAccumulator) sortAvailableCoresForResize() []int { + return a.numaOrSocketsFirst.sortAvailableCoresForResize() +} + +// Sort all free CPUs. +// +// If sockets are higher in the memory hierarchy than NUMA nodes, meaning that sockets contain a +// bigger number of CPUs (free and busy) than NUMA nodes, or equivalently that each socket contains +// more than one NUMA node, the CPUs are sorted as follows. +// First part, sort the cores which contains the CPUs allocated to Container. and these cores +// are sorted by number of free CPUs that they contain. for each core, the CPUs in it are +// sorted by numerical ID. +// Second part, sort the cores contained in the NUMA nodes which contains the CPUs allocated to Container, +// but exclude the cores in first part. these cores sorted by the rule as below +// First, they are sorted by number of +// free CPUs that their sockets contain. Then, for each socket, the CPUs in it are sorted by number +// of free CPUs that their NUMA nodes contain. Then, for each NUMA node, the CPUs in it are sorted +// by number of free CPUs that their cores contain. Finally, for each core, the CPUs in it are +// sorted by numerical ID. The order is always ascending. +// +// If instead NUMA nodes are higher in the memory hierarchy than sockets, the sorting happens in the +// same way as described in the previous paragraph. +func (a *cpuAccumulator) sortAvailableCPUsPackedForResize() []int { + var result []int + for _, core := range a.sortAvailableCoresForResize() { + cpus := a.details.CPUsInCores(core).UnsortedList() + sort.Ints(cpus) + result = append(result, cpus...) + } + return result +} + +// Sort all available CPUs: +// - First by core using sortAvailableSocketsForResize(). +// - Then within each socket, sort cpus directly using the sort() algorithm defined above. +func (a *cpuAccumulator) sortAvailableCPUsSpreadForResize() []int { + var result []int + for _, socket := range a.sortAvailableSocketsForResize() { + cpus := a.details.CPUsInSockets(socket).UnsortedList() + sort.Ints(cpus) + result = append(result, cpus...) + } + return result +} + func (a *cpuAccumulator) take(cpus cpuset.CPUSet) { a.result = a.result.Union(cpus) + a.resultDetails = a.topo.CPUDetails.KeepOnly(a.result) a.details = a.details.KeepOnly(a.details.CPUs().Difference(a.result)) a.numCPUsNeeded -= cpus.Size() } @@ -647,6 +977,55 @@ func (a *cpuAccumulator) takeRemainingCPUs() { } } +func (a *cpuAccumulator) takeRemainCpusForFullNUMANodes() { + for _, numa := range a.sortAvailableNUMANodesForResize() { + if a.isFullNUMANodeForResize(numa) { + cpusInNUMANode := a.details.CPUsInNUMANodes(numa) + if !a.needsAtLeast(cpusInNUMANode.Size()) { + continue + } + a.logger.V(4).Info("takeRemainCpusForFullNUMANodes: claiming NUMA node", "numa", numa, "cpusInNUMANode", cpusInNUMANode) + a.take(cpusInNUMANode) + } + } +} + +func (a *cpuAccumulator) takeRemainCpusForFullSockets() { + for _, socket := range a.sortAvailableSocketsForResize() { + if a.isFullSocketForResize(socket) { + cpusInSocket := a.details.CPUsInSockets(socket) + if !a.needsAtLeast(cpusInSocket.Size()) { + continue + } + a.logger.V(4).Info("takeRemainCpusForFullSockets: claiming Socket", "socket", socket, "cpusInSocket", cpusInSocket) + a.take(cpusInSocket) + } + } +} + +func (a *cpuAccumulator) takeRemainCpusForFullCores() { + for _, core := range a.sortAvailableCoresForResize() { + if a.isFullCoreForResize(core) { + cpusInCore := a.details.CPUsInCores(core) + if !a.needsAtLeast(cpusInCore.Size()) { + continue + } + a.logger.V(4).Info("takeRemainCpusForFullCores: claiming Core", "core", core, "cpusInCore", cpusInCore) + a.take(cpusInCore) + } + } +} + +func (a *cpuAccumulator) takeRemainingCPUsForResize() { + for _, cpu := range a.availableCPUSorter.sortForResize() { + a.logger.V(4).Info("takeRemainingCPUsForResize: claiming CPU", "cpu", cpu) + a.take(cpuset.New(cpu)) + if a.isSatisfied() { + return + } + } +} + // rangeNUMANodesNeededToSatisfy returns minimum and maximum (in this order) number of NUMA nodes // needed to satisfy the cpuAccumulator's goal of accumulating `a.numCPUsNeeded` CPUs, assuming that // CPU groups have size given by the `cpuGroupSize` argument. @@ -1121,3 +1500,344 @@ func takeByTopologyNUMADistributed(logger logr.Logger, topo *topology.CPUTopolog // distribute CPUs across, fall back to the packing algorithm. return takeByTopologyNUMAPacked(logger, topo, availableCPUs, numCPUs, cpuSortingStrategy, false) } + +func takeByTopologyNUMADistributedForResize(logger logr.Logger, topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, numCPUs int, cpuGroupSize int, cpuSortingStrategy CPUSortingStrategy, reusableCPUsForResize *cpuset.CPUSet, mustKeepCPUsForResize *cpuset.CPUSet) (cpuset.CPUSet, error) { + // If the number of CPUs requested cannot be handed out in chunks of + // 'cpuGroupSize', then we just call out the packing algorithm since we + // can't distribute CPUs in this chunk size. + // PreferAlignByUncoreCache feature not implemented here yet and set to false. + // Support for PreferAlignByUncoreCache to be done at beta release. + if (numCPUs % cpuGroupSize) != 0 { + return takeByTopologyNUMAPackedForResize(logger, topo, availableCPUs, numCPUs, cpuSortingStrategy, false, reusableCPUsForResize, mustKeepCPUsForResize) + } + + // If the number of CPUs requested to be retained is not a subset + // of reusableCPUs, then we fail early + if reusableCPUsForResize != nil && mustKeepCPUsForResize != nil { + if (mustKeepCPUsForResize.Intersection(reusableCPUsForResize.Clone())).IsEmpty() { + return cpuset.New(), fmt.Errorf("requested CPUs to be retained %s are not a subset of reusable CPUs %s", mustKeepCPUsForResize.String(), reusableCPUsForResize.String()) + } + } + + // Otherwise build an accumulator to start allocating CPUs from. + acc := newCPUAccumulatorForResize(logger, topo, availableCPUs, numCPUs, cpuSortingStrategy, nil, mustKeepCPUsForResize) + if acc.isSatisfied() { + return acc.result, nil + } + if acc.isFailed() { + return cpuset.New(), fmt.Errorf("not enough cpus available to satisfy request: requested=%d, available=%d", numCPUs, availableCPUs.Size()) + } + + // Get the list of NUMA nodes represented by the set of CPUs in 'availableCPUs'. + numas := acc.sortAvailableNUMANodes() + reusableCPUsForResizeDetail := acc.topo.CPUDetails.KeepOnly(cpuset.New()) + allocatedCPUsNumber := 0 + if reusableCPUsForResize != nil { + reusableCPUsForResizeDetail = acc.topo.CPUDetails.KeepOnly(*reusableCPUsForResize) + allocatedCPUsNumber = reusableCPUsForResize.Size() + } + allocatedNumas := reusableCPUsForResizeDetail.NUMANodes() + allocatedCPUPerNuma := make(mapIntInt, len(numas)) + for _, numa := range numas { + allocatedCPUPerNuma[numa] = reusableCPUsForResizeDetail.CPUsInNUMANodes(numa).Size() + } + + // Calculate the minimum and maximum possible number of NUMA nodes that + // could satisfy this request. This is used to optimize how many iterations + // of the loop we need to go through below. + minNUMAs, maxNUMAs := acc.rangeNUMANodesNeededToSatisfy(cpuGroupSize) + minNUMAs = max(minNUMAs, allocatedNumas.Size()) + + // Try combinations of 1,2,3,... NUMA nodes until we find a combination + // where we can evenly distribute CPUs across them. To optimize things, we + // don't always start at 1 and end at len(numas). Instead, we use the + // values of 'minNUMAs' and 'maxNUMAs' calculated above. + for k := minNUMAs; k <= maxNUMAs; k++ { + // Iterate through the various n-choose-k NUMA node combinations, + // looking for the combination of NUMA nodes that can best have CPUs + // distributed across them. + var bestBalance = math.MaxFloat64 + var bestRemainder []int = nil + var bestCombo []int = nil + acc.iterateCombinations(numas, k, func(combo []int) LoopControl { + // If we've already found a combo with a balance of 0 in a + // different iteration, then don't bother checking any others. + if bestBalance == 0 { + return Break + } + + // Check if the 'allocatedNumas' CPU set is a subset of the 'comboSet' + comboSet := cpuset.New(combo...) + if !allocatedNumas.IsSubsetOf(comboSet) { + return Continue + } + + // Check that this combination of NUMA nodes has enough CPUs to + // satisfy the allocation overall. + cpus := acc.details.CPUsInNUMANodes(combo...) + if (cpus.Size() + allocatedCPUsNumber) < numCPUs { + return Continue + } + + // Check that CPUs can be handed out in groups of size + // 'cpuGroupSize' across the NUMA nodes in this combo. + numCPUGroups := 0 + for _, numa := range combo { + numCPUGroups += ((acc.details.CPUsInNUMANodes(numa).Size() + allocatedCPUPerNuma[numa]) / cpuGroupSize) + } + if (numCPUGroups * cpuGroupSize) < numCPUs { + return Continue + } + + // Check that each NUMA node in this combination can allocate an + // even distribution of CPUs in groups of size 'cpuGroupSize', + // modulo some remainder. + distribution := (numCPUs / len(combo) / cpuGroupSize) * cpuGroupSize + for _, numa := range combo { + cpus := acc.details.CPUsInNUMANodes(numa) + if (cpus.Size() + allocatedCPUPerNuma[numa]) < distribution { + return Continue + } + if allocatedCPUPerNuma[numa] > distribution { + return Continue + } + } + + // Calculate how many CPUs will be available on each NUMA node in + // the system after allocating an even distribution of CPU groups + // of size 'cpuGroupSize' from each NUMA node in 'combo'. This will + // be used in the "balance score" calculation to help decide if + // this combo should ultimately be chosen. + availableAfterAllocation := make(mapIntInt, len(numas)) + for _, numa := range numas { + availableAfterAllocation[numa] = acc.details.CPUsInNUMANodes(numa).Size() + } + for _, numa := range combo { + availableAfterAllocation[numa] -= (distribution - allocatedCPUPerNuma[numa]) + } + + // Check if there are any remaining CPUs to distribute across the + // NUMA nodes once CPUs have been evenly distributed in groups of + // size 'cpuGroupSize'. + remainder := numCPUs - (distribution * len(combo)) + + // Get a list of NUMA nodes to consider pulling the remainder CPUs + // from. This list excludes NUMA nodes that don't have at least + // 'cpuGroupSize' CPUs available after being allocated + // 'distribution' number of CPUs. + var remainderCombo []int + for _, numa := range combo { + if availableAfterAllocation[numa] >= cpuGroupSize { + remainderCombo = append(remainderCombo, numa) + } + } + + // Declare a set of local variables to help track the "balance + // scores" calculated when using different subsets of + // 'remainderCombo' to allocate remainder CPUs from. + var bestLocalBalance = math.MaxFloat64 + var bestLocalRemainder []int = nil + + // If there aren't any remainder CPUs to allocate, then calculate + // the "balance score" of this combo as the standard deviation of + // the values contained in 'availableAfterAllocation'. + if remainder == 0 { + bestLocalBalance = standardDeviation(availableAfterAllocation.Values()) + bestLocalRemainder = nil + } + + // Otherwise, find the best "balance score" when allocating the + // remainder CPUs across different subsets of NUMA nodes in 'remainderCombo'. + // These remainder CPUs are handed out in groups of size 'cpuGroupSize'. + // We start from k=len(remainderCombo) and walk down to k=1 so that + // we continue to distribute CPUs as much as possible across + // multiple NUMA nodes. + for k := len(remainderCombo); remainder > 0 && k >= 1; k-- { + acc.iterateCombinations(remainderCombo, k, func(subset []int) LoopControl { + // Make a local copy of 'remainder'. + remainder := remainder + + // Make a local copy of 'availableAfterAllocation'. + availableAfterAllocation := availableAfterAllocation.Clone() + + // If this subset is not capable of allocating all + // remainder CPUs, continue to the next one. + if sum(availableAfterAllocation.Values(subset...)) < remainder { + return Continue + } + + // For all NUMA nodes in 'subset', walk through them, + // removing 'cpuGroupSize' number of CPUs from each + // until all remainder CPUs have been accounted for. + for remainder > 0 { + for _, numa := range subset { + if remainder == 0 { + break + } + if availableAfterAllocation[numa] < cpuGroupSize { + continue + } + availableAfterAllocation[numa] -= cpuGroupSize + remainder -= cpuGroupSize + } + } + + // Calculate the "balance score" as the standard deviation + // of the number of CPUs available on all NUMA nodes in the + // system after the remainder CPUs have been allocated + // across 'subset' in groups of size 'cpuGroupSize'. + balance := standardDeviation(availableAfterAllocation.Values()) + if balance < bestLocalBalance { + bestLocalBalance = balance + bestLocalRemainder = subset + } + + return Continue + }) + } + + // If the best "balance score" for this combo is less than the + // lowest "balance score" of all previous combos, then update this + // combo (and remainder set) to be the best one found so far. + if bestLocalBalance < bestBalance { + bestBalance = bestLocalBalance + bestRemainder = bestLocalRemainder + bestCombo = combo + } + + return Continue + }) + + // If we made it through all of the iterations above without finding a + // combination of NUMA nodes that can properly balance CPU allocations, + // then move on to the next larger set of NUMA node combinations. + if bestCombo == nil { + continue + } + + // Otherwise, start allocating CPUs from the NUMA node combination + // chosen. First allocate an even distribution of CPUs in groups of + // size 'cpuGroupSize' from 'bestCombo'. + distribution := (numCPUs / len(bestCombo) / cpuGroupSize) * cpuGroupSize + for _, numa := range bestCombo { + reusableCPUsPerNumaForResize := reusableCPUsForResizeDetail.CPUsInNUMANodes(numa) + cpus, _ := takeByTopologyNUMAPackedForResize(logger, acc.topo, acc.details.CPUsInNUMANodes(numa), distribution, cpuSortingStrategy, false, &reusableCPUsPerNumaForResize, mustKeepCPUsForResize) + acc.take(cpus) + } + + // Then allocate any remaining CPUs in groups of size 'cpuGroupSize' + // from each NUMA node in the remainder set. + remainder := numCPUs - (distribution * len(bestCombo)) + for remainder > 0 { + for _, numa := range bestRemainder { + if remainder == 0 { + break + } + if acc.details.CPUsInNUMANodes(numa).Size() < cpuGroupSize { + continue + } + cpus, _ := takeByTopologyNUMAPackedForResize(logger, acc.topo, acc.details.CPUsInNUMANodes(numa), cpuGroupSize, cpuSortingStrategy, false, nil, mustKeepCPUsForResize) + acc.take(cpus) + remainder -= cpuGroupSize + } + } + + // If we haven't allocated all of our CPUs at this point, then something + // went wrong in our accounting and we should error out. + if acc.numCPUsNeeded > 0 { + return cpuset.New(), fmt.Errorf("accounting error, not enough CPUs allocated, remaining: %v", acc.numCPUsNeeded) + } + + // Likewise, if we have allocated too many CPUs at this point, then something + // went wrong in our accounting and we should error out. + if acc.numCPUsNeeded < 0 { + return cpuset.New(), fmt.Errorf("accounting error, too many CPUs allocated, remaining: %v", acc.numCPUsNeeded) + } + + // Otherwise, return the result + return acc.result, nil + } + + // If we never found a combination of NUMA nodes that we could properly + // distribute CPUs across, fall back to the packing algorithm. + return takeByTopologyNUMAPackedForResize(logger, topo, availableCPUs, numCPUs, cpuSortingStrategy, false, reusableCPUsForResize, mustKeepCPUsForResize) +} + +func takeByTopologyNUMAPackedForResize(logger logr.Logger, topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, numCPUs int, cpuSortingStrategy CPUSortingStrategy, preferAlignByUncoreCache bool, reusableCPUsForResize *cpuset.CPUSet, mustKeepCPUsForResize *cpuset.CPUSet) (cpuset.CPUSet, error) { + + // If the number of CPUs requested to be retained is not a subset + // of reusableCPUs, then we fail early + if reusableCPUsForResize != nil && mustKeepCPUsForResize != nil { + if (mustKeepCPUsForResize.Intersection(reusableCPUsForResize.Clone())).IsEmpty() { + return cpuset.New(), fmt.Errorf("requested CPUs to be retained %s are not a subset of reusable CPUs %s", mustKeepCPUsForResize.String(), reusableCPUsForResize.String()) + } + } + + acc := newCPUAccumulatorForResize(logger, topo, availableCPUs, numCPUs, cpuSortingStrategy, reusableCPUsForResize, mustKeepCPUsForResize) + if acc.isSatisfied() { + return acc.result, nil + } + if acc.isFailed() { + return cpuset.New(), fmt.Errorf("not enough cpus available to satisfy request: requested=%d, available=%d", numCPUs, availableCPUs.Size()) + } + + // Algorithm: topology-aware best-fit + // 1. Acquire whole NUMA nodes and sockets, if available and the container + // requires at least a NUMA node or socket's-worth of CPUs. If NUMA + // Nodes map to 1 or more sockets, pull from NUMA nodes first. + // Otherwise pull from sockets first. + acc.numaOrSocketsFirst.takeFullFirstLevelForResize() + if acc.isSatisfied() { + return acc.result, nil + } + acc.numaOrSocketsFirst.takeFullFirstLevel() + if acc.isSatisfied() { + return acc.result, nil + } + acc.numaOrSocketsFirst.takeFullSecondLevelForResize() + if acc.isSatisfied() { + return acc.result, nil + } + acc.numaOrSocketsFirst.takeFullSecondLevel() + if acc.isSatisfied() { + return acc.result, nil + } + + // 2. If PreferAlignByUncoreCache is enabled, acquire whole UncoreCaches + // if available and the container requires at least a UncoreCache's-worth + // of CPUs. Otherwise, acquire CPUs from the least amount of UncoreCaches. + if preferAlignByUncoreCache { + acc.takeUncoreCache() + if acc.isSatisfied() { + return acc.result, nil + } + } + + // 3. Acquire whole cores, if available and the container requires at least + // a core's-worth of CPUs. + // If `CPUSortingStrategySpread` is specified, skip taking the whole core. + if cpuSortingStrategy != CPUSortingStrategySpread { + acc.takeRemainCpusForFullCores() + if acc.isSatisfied() { + return acc.result, nil + } + acc.takeFullCores() + if acc.isSatisfied() { + return acc.result, nil + } + } + + // 4. Acquire single threads, preferring to fill partially-allocated cores + // on the same sockets as the whole cores we have already taken in this + // allocation. + acc.takeRemainingCPUsForResize() + if acc.isSatisfied() { + return acc.result, nil + } + acc.takeRemainingCPUs() + if acc.isSatisfied() { + return acc.result, nil + } + + return cpuset.New(), fmt.Errorf("failed to allocate cpus") +} diff --git a/pkg/kubelet/cm/cpumanager/cpu_assignment_test.go b/pkg/kubelet/cm/cpumanager/cpu_assignment_test.go index 34933bc982a55..fe8d629fa2e2d 100644 --- a/pkg/kubelet/cm/cpumanager/cpu_assignment_test.go +++ b/pkg/kubelet/cm/cpumanager/cpu_assignment_test.go @@ -1080,6 +1080,475 @@ func TestTakeByTopologyNUMADistributed(t *testing.T) { } } +type takeByTopologyTestCaseForResize struct { + description string + topo *topology.CPUTopology + opts StaticPolicyOptions + availableCPUs cpuset.CPUSet + reusableCPUs cpuset.CPUSet + numCPUs int + expErr string + expResult cpuset.CPUSet +} + +func commonTakeByTopologyTestCasesForResize(t *testing.T) []takeByTopologyTestCaseForResize { + return []takeByTopologyTestCaseForResize{ + { + "Allocated 1 CPUs, and take 1 cpus from single socket with HT", + topoSingleSocketHT, + StaticPolicyOptions{}, + mustParseCPUSet(t, "1-7"), + cpuset.New(0), + 1, + "", + cpuset.New(0), + }, + { + "Allocated 1 CPU, and take 2 cpu from single socket with HT", + topoSingleSocketHT, + StaticPolicyOptions{}, + mustParseCPUSet(t, "1-7"), + cpuset.New(0), + 2, + "", + cpuset.New(0, 4), + }, + { + "Allocated 1 CPU, and take 2 cpu from single socket with HT, some cpus are taken, no sibling CPU of allocated CPU", + topoSingleSocketHT, + StaticPolicyOptions{}, + mustParseCPUSet(t, "1,3,5,6,7"), + cpuset.New(0), + 2, + "", + cpuset.New(0, 6), + }, + { + "Allocated 1 CPU, and take 3 cpu from single socket with HT, some cpus are taken, no sibling CPU of allocated CPU", + topoSingleSocketHT, + StaticPolicyOptions{}, + mustParseCPUSet(t, "1,3,5,6,7"), + cpuset.New(0), + 3, + "", + cpuset.New(0, 1, 5), + }, + { + "Allocated 1 CPU, and take all cpu from single socket with HT", + topoSingleSocketHT, + StaticPolicyOptions{}, + mustParseCPUSet(t, "1-7"), + cpuset.New(0), + 8, + "", + mustParseCPUSet(t, "0-7"), + }, + { + "Allocated 1 CPU, take a core from dual socket with HT", + topoDualSocketHT, + StaticPolicyOptions{}, + mustParseCPUSet(t, "0-10"), + cpuset.New(11), + 2, + "", + cpuset.New(5, 11), + }, + { + "Allocated 1 CPU, take a socket of cpus from dual socket with HT", + topoDualSocketHT, + StaticPolicyOptions{}, + mustParseCPUSet(t, "0-10"), + cpuset.New(11), + 6, + "", + cpuset.New(1, 3, 5, 7, 9, 11), + }, + { + "Allocated 1 CPU, take a socket of cpus and 1 core of CPU from dual socket with HT", + topoDualSocketHT, + StaticPolicyOptions{}, + mustParseCPUSet(t, "0-10"), + cpuset.New(11), + 8, + "", + cpuset.New(0, 1, 3, 5, 6, 7, 9, 11), + }, + { + "Allocated 1 CPU, take a socket of cpus from dual socket with multi-numa-per-socket with HT", + topoDualSocketMultiNumaPerSocketHT, + StaticPolicyOptions{}, + mustParseCPUSet(t, "0-38,40-79"), + cpuset.New(39), + 40, + "", + mustParseCPUSet(t, "20-39,60-79"), + }, + { + "Allocated 1 CPU, take a NUMA node of cpus from dual socket with multi-numa-per-socket with HT", + topoDualSocketMultiNumaPerSocketHT, + StaticPolicyOptions{}, + mustParseCPUSet(t, "0-38,40-79"), + cpuset.New(39), + 20, + "", + mustParseCPUSet(t, "30-39,70-79"), + }, + { + "Allocated 2 CPUs, take a socket and a NUMA node of cpus from dual socket with multi-numa-per-socket with HT", + topoDualSocketMultiNumaPerSocketHT, + StaticPolicyOptions{}, + mustParseCPUSet(t, "0-38,40-58,60-79"), + cpuset.New(39, 59), + 60, + "", + mustParseCPUSet(t, "0-19,30-59,70-79"), + }, + { + "Allocated 1 CPU, take NUMA nodes of cpus from dual socket with multi-numa-per-socket with HT, the NUMA node with allocated CPUs already taken some CPUs", + topoDualSocketMultiNumaPerSocketHT, + StaticPolicyOptions{}, + mustParseCPUSet(t, "0-38,40-69"), + cpuset.New(39), + 40, + "", + mustParseCPUSet(t, "0-9,20-29,39-48,60-69"), + }, + { + "Allocated 1 CPU, take NUMA nodes of cpus from dual socket with multi-numa-per-socket with HT, the NUMA node with allocated CPUs already taken more CPUs", + topoDualSocketMultiNumaPerSocketHT, + StaticPolicyOptions{}, + mustParseCPUSet(t, "9,30-38,49"), + cpuset.New(), + 1, + "", + mustParseCPUSet(t, "9"), + }, + { + "Allocated 1 CPU, take NUMA nodes of cpus and 1 CPU from dual socket with multi-numa-per-socket with HT, the NUMA node with allocated CPUs already taken some CPUs", + topoDualSocketMultiNumaPerSocketHT, + StaticPolicyOptions{}, + mustParseCPUSet(t, "0-38,40-69"), + cpuset.New(39), + 41, + "", + mustParseCPUSet(t, "0-19,39-59"), + }, + { + "Allocated 1 CPUs, take a socket of cpus from single socket with HT, 3 cpus", + topoSingleSocketHT, + StaticPolicyOptions{DistributeCPUsAcrossCores: true}, + mustParseCPUSet(t, "0-6"), + cpuset.New(7), + 3, + "", + mustParseCPUSet(t, "0,1,7"), + }, + { + "Allocated 1 CPUs, take a socket of cpus from dual socket with HT, 3 cpus", + topoDualSocketHT, + StaticPolicyOptions{DistributeCPUsAcrossCores: true}, + mustParseCPUSet(t, "0-10"), + cpuset.New(11), + 3, + "", + mustParseCPUSet(t, "1,3,11"), + }, + { + "Allocated 1 CPUs, take a socket of cpus from dual socket with HT, 6 cpus", + topoDualSocketHT, + StaticPolicyOptions{DistributeCPUsAcrossCores: true}, + mustParseCPUSet(t, "0-10"), + cpuset.New(11), + 6, + "", + mustParseCPUSet(t, "1,3,5,7,9,11"), + }, + { + "Allocated 1 CPUs, take a socket of cpus from dual socket with HT, 8 cpus", + topoDualSocketHT, + StaticPolicyOptions{DistributeCPUsAcrossCores: true}, + mustParseCPUSet(t, "0-10"), + cpuset.New(11), + 8, + "", + mustParseCPUSet(t, "0,1,2,3,5,7,9,11"), + }, + { + "Allocated 1 CPUs, take a socket of cpus from dual socket without HT, 2 cpus", + topoDualSocketNoHT, + StaticPolicyOptions{DistributeCPUsAcrossCores: true}, + mustParseCPUSet(t, "0-6"), + cpuset.New(7), + 2, + "", + mustParseCPUSet(t, "4,7"), + }, + { + "Allocated 1 CPUs, take a socket of cpus from dual socket with multi numa per socket and HT, 8 cpus", + topoDualSocketMultiNumaPerSocketHT, + StaticPolicyOptions{DistributeCPUsAcrossCores: true}, + mustParseCPUSet(t, "0-38,40-79"), + cpuset.New(39), + 8, + "", + mustParseCPUSet(t, "20-26,39"), + }, + { + "Allocated 1 CPU, take NUMA nodes of cpus from dual socket with multi-numa-per-socket with HT, the NUMA node with allocated CPUs already taken some CPUs", + topoDualSocketMultiNumaPerSocketHT, + StaticPolicyOptions{DistributeCPUsAcrossCores: true}, + mustParseCPUSet(t, "0-38,40-69"), + cpuset.New(39), + 40, + "", + mustParseCPUSet(t, "0-9,20-39,60-69"), + }, + { + "Allocated 1 CPUs, take a socket of cpus from quad socket four way with HT, 12 cpus", + topoQuadSocketFourWayHT, + StaticPolicyOptions{DistributeCPUsAcrossCores: true}, + mustParseCPUSet(t, "0-59,61-287"), + cpuset.New(60), + 8, + "", + mustParseCPUSet(t, "3,4,11,12,15,16,23,60"), + }, + } +} + +func TestTakeByTopologyNUMAPackedForResize(t *testing.T) { + logger, _ := ktesting.NewTestContext(t) + testCases := commonTakeByTopologyTestCasesForResize(t) + + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + strategy := CPUSortingStrategyPacked + if tc.opts.DistributeCPUsAcrossCores { + strategy = CPUSortingStrategySpread + } + + result, err := takeByTopologyNUMAPackedForResize(logger, tc.topo, tc.availableCPUs, tc.numCPUs, strategy, tc.opts.PreferAlignByUncoreCacheOption, &tc.reusableCPUs, nil) + + if tc.expErr != "" && err != nil && err.Error() != tc.expErr { + t.Errorf("expected error to be [%v] but it was [%v]", tc.expErr, err) + } + if !result.Equals(tc.expResult) { + t.Errorf("expected result [%s] to equal [%s]", result, tc.expResult) + } + }) + } +} + +type takeByTopologyExtendedTestCaseForResize struct { + description string + topo *topology.CPUTopology + availableCPUs cpuset.CPUSet + reusableCPUs cpuset.CPUSet + numCPUs int + cpuGroupSize int + expErr string + expResult cpuset.CPUSet +} + +func commonTakeByTopologyExtendedTestCasesForResize(t *testing.T) []takeByTopologyExtendedTestCaseForResize { + return []takeByTopologyExtendedTestCaseForResize{ + { + "Allocated 1 CPUs, allocate 4 full cores with 2 distributed across each NUMA node", + topoDualSocketHT, + mustParseCPUSet(t, "0-10"), + cpuset.New(11), + 8, + 1, + "", + mustParseCPUSet(t, "0,6,2,8,1,7,5,11"), + }, + { + "Allocated 8 CPUs, allocate 32 full cores with 8 distributed across each NUMA node", + topoDualSocketMultiNumaPerSocketHT, + mustParseCPUSet(t, "0-35,40-75"), + mustParseCPUSet(t, "36-39,76-79"), + 64, + 1, + "", + mustParseCPUSet(t, "0-7,10-17,20-27,30-33,36-39,40-47,50-57,60-67,70-73,76-79"), + }, + { + "Allocated 2 CPUs, allocate 8 full cores with 2 distributed across each NUMA node", + topoDualSocketMultiNumaPerSocketHT, + mustParseCPUSet(t, "2,10-12,20-22,30-32,40-41,50-51,60-61,70-71"), + mustParseCPUSet(t, "0,1"), + 16, + 1, + "", + mustParseCPUSet(t, "0-1,10-11,20-21,30-31,40-41,50-51,60-61,70-71"), + }, + { + "Allocated 1 CPUs, take 1 cpu from dual socket with HT - core from Socket 0", + topoDualSocketHT, + mustParseCPUSet(t, "0-10"), + mustParseCPUSet(t, "11"), + 1, + 1, + "", + mustParseCPUSet(t, "11"), + }, + { + "Allocated 1 CPUs, take 2 cpu from dual socket with HT - core from Socket 0", + topoDualSocketHT, + mustParseCPUSet(t, "0-10"), + mustParseCPUSet(t, "11"), + 2, + 1, + "", + mustParseCPUSet(t, "5,11"), + }, + { + "Allocated 2 CPUs, allocate 31 full cores with 15 CPUs distributed across each NUMA node and 1 CPU spilling over to each of NUMA 0, 1", + topoDualSocketMultiNumaPerSocketHT, + mustParseCPUSet(t, "2-79"), + mustParseCPUSet(t, "0,1"), + 62, + 1, + "", + mustParseCPUSet(t, "0-7,10-17,20-27,30-37,40-47,50-57,60-66,70-76"), + }, + { + "Allocated 2 CPUs, allocate 31 full cores with 14 CPUs distributed across each NUMA node and 2 CPUs spilling over to each of NUMA 0, 1, 2 (cpuGroupSize 2)", + topoDualSocketMultiNumaPerSocketHT, + mustParseCPUSet(t, "2-79"), + mustParseCPUSet(t, "0,1"), + 62, + 2, + "", + mustParseCPUSet(t, "0-7,10-17,20-27,30-36,40-47,50-57,60-67,70-76"), + }, + { + "Allocated 2 CPUs, allocate 31 full cores with 15 CPUs distributed across each NUMA node and 1 CPU spilling over to each of NUMA 2, 3 (to keep balance)", + topoDualSocketMultiNumaPerSocketHT, + mustParseCPUSet(t, "2-8,10-18,20-39,40-48,50-58,60-79"), + mustParseCPUSet(t, "0,1"), + 62, + 1, + "", + mustParseCPUSet(t, "0-7,10-17,20-27,30-37,40-46,50-56,60-67,70-77"), + }, + { + "Allocated 2 CPUs, allocate 31 full cores with 14 CPUs distributed across each NUMA node and 2 CPUs spilling over to each of NUMA 0, 2, 3 (to keep balance with cpuGroupSize 2)", + topoDualSocketMultiNumaPerSocketHT, + mustParseCPUSet(t, "2-8,10-18,20-39,40-48,50-58,60-79"), + mustParseCPUSet(t, "0,1"), + 62, + 2, + "", + mustParseCPUSet(t, "0-7,10-16,20-27,30-37,40-47,50-56,60-67,70-77"), + }, + { + "Allocated 4 CPUs, ensure bestRemainder chosen with NUMA nodes that have enough CPUs to satisfy the request", + topoDualSocketMultiNumaPerSocketHT, + mustParseCPUSet(t, "10-13,20-23,30-36,40-43,50-53,60-63,70-76"), + mustParseCPUSet(t, "0-3"), + 34, + 1, + "", + mustParseCPUSet(t, "0-3,10-13,20-23,30-34,40-43,50-53,60-63,70-74"), + }, + { + "Allocated 4 CPUs, ensure previous failure encountered on live machine has been fixed (1/1)", + topoDualSocketMultiNumaPerSocketHTLarge, + mustParseCPUSet(t, "0,128,30,31,158,159,47,171-175,62,63,190,191,75-79,203-207,94,96,222,223,101-111,229-239,126,127,254,255"), + mustParseCPUSet(t, "43-46"), + 28, + 1, + "", + mustParseCPUSet(t, "43-47,75-79,96,101-105,171-174,203-206,229-232"), + }, + { + "Allocated 14 CPUs, allocate 24 full cores with 8 distributed across the first 3 NUMA nodes", + topoDualSocketMultiNumaPerSocketHT, + mustParseCPUSet(t, "8-39,48-79"), + mustParseCPUSet(t, "0-7,40-47"), + 48, + 1, + "", + mustParseCPUSet(t, "0-7,10-17,20-27,40-47,50-57,60-67"), + }, + { + "Allocated 20 CPUs, allocated CPUs in numa0 is bigger than distribute CPUs, allocated CPUs by takeByTopologyNUMAPacked", + topoDualSocketMultiNumaPerSocketHT, + mustParseCPUSet(t, "10-39,50-79"), + mustParseCPUSet(t, "0-9,40-49"), + 48, + 1, + "", + mustParseCPUSet(t, "0-23,40-63"), + }, + { + "Allocated 12 CPUs, allocate 24 full cores with 8 distributed across the first 3 NUMA nodes (taking all but 2 from the first NUMA node)", + topoDualSocketMultiNumaPerSocketHT, + mustParseCPUSet(t, "8-29,32-39,48-69,72-79"), + mustParseCPUSet(t, "1-7,41-47"), + 48, + 1, + "", + mustParseCPUSet(t, "1-8,10-17,20-27,41-48,50-57,60-67"), + }, + { + "Allocated 10 CPUs, allocate 24 full cores with 8 distributed across the first 3 NUMA nodes (even though all 8 could be allocated from the first NUMA node)", + topoDualSocketMultiNumaPerSocketHT, + mustParseCPUSet(t, "2-29,31-39,42-69,71-79"), + mustParseCPUSet(t, "2-7,42-47"), + 48, + 1, + "", + mustParseCPUSet(t, "2-9,10-17,20-27,42-49,50-57,60-67"), + }, + { + "Allocated 2 CPUs, allocate 13 full cores distributed across the 2 NUMA nodes", + topoDualSocketMultiNumaPerSocketHT, + mustParseCPUSet(t, "0-29,31-69,71-79"), + mustParseCPUSet(t, "30,70"), + 26, + 1, + "", + mustParseCPUSet(t, "20-26,30-36,60-65,70-75"), + }, + { + "Allocated 2 CPUs, allocate 13 full cores distributed across the 2 NUMA nodes (cpuGroupSize 2)", + topoDualSocketMultiNumaPerSocketHT, + mustParseCPUSet(t, "0-29,31-69,71-79"), + mustParseCPUSet(t, "30,70"), + 26, + 2, + "", + mustParseCPUSet(t, "20-25,30-36,60-65,70-76"), + }, + } +} + +func TestTakeByTopologyNUMADistributedForResize(t *testing.T) { + logger, _ := ktesting.NewTestContext(t) + testCases := commonTakeByTopologyExtendedTestCasesForResize(t) + + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + + result, err := takeByTopologyNUMADistributedForResize(logger, tc.topo, tc.availableCPUs, tc.numCPUs, tc.cpuGroupSize, CPUSortingStrategyPacked, &tc.reusableCPUs, nil) + if err != nil { + if tc.expErr == "" { + t.Errorf("unexpected error [%v]", err) + } + if tc.expErr != "" && err.Error() != tc.expErr { + t.Errorf("expected error to be [%v] but it was [%v]", tc.expErr, err) + } + return + } + if !result.Equals(tc.expResult) { + t.Errorf("expected result [%s] to equal [%s]", result, tc.expResult) + } + }) + } +} + func mustParseCPUSet(t *testing.T, s string) cpuset.CPUSet { cpus, err := cpuset.Parse(s) if err != nil { diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager.go b/pkg/kubelet/cm/cpumanager/cpu_manager.go index bb87a7f067555..05913c0ade16b 100644 --- a/pkg/kubelet/cm/cpumanager/cpu_manager.go +++ b/pkg/kubelet/cm/cpumanager/cpu_manager.go @@ -36,6 +36,7 @@ import ( "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" "k8s.io/kubernetes/pkg/kubelet/config" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" + "k8s.io/kubernetes/pkg/kubelet/lifecycle" "k8s.io/kubernetes/pkg/kubelet/status" "k8s.io/utils/cpuset" ) @@ -62,7 +63,7 @@ type Manager interface { // Called to trigger the allocation of CPUs to a container. This must be // called at some point prior to the AddContainer() call for a container, // e.g. at pod admission time. - Allocate(pod *v1.Pod, container *v1.Container) error + Allocate(pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) error // AddContainer adds the mapping between container ID to pod UID and the container name // The mapping used to remove the CPU allocation during the container removal @@ -79,7 +80,7 @@ type Manager interface { // GetTopologyHints implements the topologymanager.HintProvider Interface // and is consulted to achieve NUMA aware resource alignment among this // and other resource controllers. - GetTopologyHints(pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint + GetTopologyHints(pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint // GetExclusiveCPUs implements the podresources.CPUsProvider interface to provide // exclusively allocated cpus for the container @@ -88,7 +89,7 @@ type Manager interface { // GetPodTopologyHints implements the topologymanager.HintProvider Interface // and is consulted to achieve NUMA aware resource alignment per Pod // among this and other resource controllers. - GetPodTopologyHints(pod *v1.Pod) map[string][]topologymanager.TopologyHint + GetPodTopologyHints(pod *v1.Pod, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint // GetAllocatableCPUs returns the total set of CPUs available for allocation. GetAllocatableCPUs() cpuset.CPUSet @@ -256,7 +257,7 @@ func (m *manager) Start(ctx context.Context, activePods ActivePodsFunc, sourcesR return nil } -func (m *manager) Allocate(p *v1.Pod, c *v1.Container) error { +func (m *manager) Allocate(p *v1.Pod, c *v1.Container, operation lifecycle.Operation) error { logger := klog.TODO() // until we move topology manager to contextual logging // Garbage collect any stranded resources before allocating CPUs. @@ -266,7 +267,7 @@ func (m *manager) Allocate(p *v1.Pod, c *v1.Container) error { defer m.Unlock() // Call down into the policy to assign this container CPUs if required. - err := m.policy.Allocate(logger, m.state, p, c) + err := m.policy.Allocate(logger, m.state, p, c, operation) if err != nil { logger.Error(err, "policy error") return err @@ -327,20 +328,20 @@ func (m *manager) State() state.Reader { return m.state } -func (m *manager) GetTopologyHints(pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint { +func (m *manager) GetTopologyHints(pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint { logger := klog.TODO() // Garbage collect any stranded resources before providing TopologyHints m.removeStaleState(logger) // Delegate to active policy - return m.policy.GetTopologyHints(logger, m.state, pod, container) + return m.policy.GetTopologyHints(logger, m.state, pod, container, operation) } -func (m *manager) GetPodTopologyHints(pod *v1.Pod) map[string][]topologymanager.TopologyHint { +func (m *manager) GetPodTopologyHints(pod *v1.Pod, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint { logger := klog.TODO() // Garbage collect any stranded resources before providing TopologyHints m.removeStaleState(logger) // Delegate to active policy - return m.policy.GetPodTopologyHints(logger, m.state, pod) + return m.policy.GetPodTopologyHints(logger, m.state, pod, operation) } func (m *manager) GetAllocatableCPUs() cpuset.CPUSet { @@ -353,10 +354,16 @@ func (m *manager) GetAllCPUs() cpuset.CPUSet { type reconciledContainer struct { podName string + podUID string containerName string containerID string } +type reconciledContainerAllocation struct { + reconciledContainer + allocatedSet cpuset.CPUSet +} + func (m *manager) removeStaleState(rootLogger logr.Logger) { // Only once all sources are ready do we attempt to remove any stale state. // This ensures that the call to `m.activePods()` below will succeed with @@ -423,15 +430,19 @@ func (m *manager) reconcileState(ctx context.Context) (success []reconciledConta failure = []reconciledContainer{} rootLogger := klog.FromContext(ctx) - m.removeStaleState(rootLogger) + + exclusiveCPUContainers := []reconciledContainerAllocation{} + nonExclusiveCPUContainers := []reconciledContainerAllocation{} + + m.Lock() for _, pod := range m.activePods() { podLogger := klog.LoggerWithValues(rootLogger, "pod", klog.KObj(pod)) pstatus, ok := m.podStatusProvider.GetPodStatus(pod.UID) if !ok { podLogger.V(5).Info("skipping pod; status not found") - failure = append(failure, reconciledContainer{pod.Name, "", ""}) + failure = append(failure, reconciledContainer{pod.Name, string(pod.UID), "", ""}) continue } @@ -443,25 +454,24 @@ func (m *manager) reconcileState(ctx context.Context) (success []reconciledConta containerID, err := findContainerIDByName(&pstatus, container.Name) if err != nil { logger.V(5).Info("skipping container; ID not found in pod status", "err", err) - failure = append(failure, reconciledContainer{pod.Name, container.Name, ""}) + failure = append(failure, reconciledContainer{pod.Name, string(pod.UID), container.Name, ""}) continue } cstatus, err := findContainerStatusByName(&pstatus, container.Name) if err != nil { logger.V(5).Info("skipping container; container status not found in pod status", "err", err) - failure = append(failure, reconciledContainer{pod.Name, container.Name, ""}) + failure = append(failure, reconciledContainer{pod.Name, string(pod.UID), container.Name, ""}) continue } if cstatus.State.Waiting != nil || (cstatus.State.Waiting == nil && cstatus.State.Running == nil && cstatus.State.Terminated == nil) { logger.V(4).Info("skipping container; container still in the waiting state", "err", err) - failure = append(failure, reconciledContainer{pod.Name, container.Name, ""}) + failure = append(failure, reconciledContainer{pod.Name, string(pod.UID), container.Name, ""}) continue } - m.Lock() if cstatus.State.Terminated != nil { // The container is terminated but we can't call m.RemoveContainer() // here because it could remove the allocated cpuset for the container @@ -472,7 +482,6 @@ func (m *manager) reconcileState(ctx context.Context) (success []reconciledConta if err == nil { logger.V(4).Info("ignoring terminated container", "containerID", containerID) } - m.Unlock() continue } @@ -480,30 +489,84 @@ func (m *manager) reconcileState(ctx context.Context) (success []reconciledConta // Idempotently add it to the containerMap incase it is missing. // This can happen after a kubelet restart, for example. m.containerMap.Add(string(pod.UID), container.Name, containerID) - m.Unlock() - cset := m.state.GetCPUSetOrDefault(string(pod.UID), container.Name) + cset, exclusive := m.state.GetCPUSet(string(pod.UID), container.Name) + if !exclusive { + cset = m.state.GetDefaultCPUSet() + } if cset.IsEmpty() { // NOTE: This should not happen outside of tests. logger.V(2).Info("ReconcileState: skipping container; empty cpuset assigned") - failure = append(failure, reconciledContainer{pod.Name, container.Name, containerID}) + failure = append(failure, reconciledContainer{pod.Name, string(pod.UID), container.Name, containerID}) continue } - lcset := m.lastUpdateState.GetCPUSetOrDefault(string(pod.UID), container.Name) - if !cset.Equals(lcset) { - logger.V(5).Info("updating container", "containerID", containerID, "cpuSet", cset) - err = m.updateContainerCPUSet(ctx, containerID, cset) + rca := reconciledContainerAllocation{ + reconciledContainer{pod.Name, string(pod.UID), container.Name, containerID}, + cset, + } + if exclusive { + exclusiveCPUContainers = append(exclusiveCPUContainers, rca) + } else { + nonExclusiveCPUContainers = append(nonExclusiveCPUContainers, rca) + } + + } + } + m.Unlock() + + failedContainersCPUSet := cpuset.New() + + updateContainers := func(containers []reconciledContainerAllocation, preliminary bool) { + for _, rca := range containers { + logger := klog.LoggerWithValues(rootLogger, "podName", rca.podName, "containerName", rca.containerName) + + lcset := m.lastUpdateState.GetCPUSetOrDefault(rca.podUID, rca.containerName) + + // Determine the CPU set to use based on the pass + var targetCPUSet cpuset.CPUSet + if preliminary { + targetCPUSet = rca.allocatedSet.Intersection(lcset) + } else { + targetCPUSet = rca.allocatedSet + } + + // Check if update is needed + if !targetCPUSet.Equals(lcset) { + if !preliminary && !targetCPUSet.Intersection(failedContainersCPUSet).IsEmpty() { + logger.Error(fmt.Errorf("Conflict with previously failed container CPUSet updates"), "failed to update container", "containerID", rca.containerID, "cpuSet", rca.allocatedSet) + failure = append(failure, rca.reconciledContainer) + failedContainersCPUSet = failedContainersCPUSet.Union(lcset) + continue + } + + logger.V(5).Info("updating container", "containerID", rca.containerID, "cpuSet", targetCPUSet) + err := m.updateContainerCPUSet(ctx, rca.containerID, targetCPUSet) if err != nil { - logger.Error(err, "failed to update container", "containerID", containerID, "cpuSet", cset) - failure = append(failure, reconciledContainer{pod.Name, container.Name, containerID}) + logger.Error(err, "failed to update container", "containerID", rca.containerID, "cpuSet", targetCPUSet) + failure = append(failure, rca.reconciledContainer) + failedContainersCPUSet = failedContainersCPUSet.Union(lcset) continue } - m.lastUpdateState.SetCPUSet(string(pod.UID), container.Name, cset) + m.lastUpdateState.SetCPUSet(rca.podUID, rca.containerName, targetCPUSet) + } + + // Add to success list if required + if !preliminary { + success = append(success, rca.reconciledContainer) } - success = append(success, reconciledContainer{pod.Name, container.Name, containerID}) } } + + // first pass - only remove CPUs from containers using exclusive CPUs + updateContainers(exclusiveCPUContainers, true) + + // second pass - apply CPU sets to non exclusive CPU containers + updateContainers(nonExclusiveCPUContainers, false) + + // third pass - apply final CPU set to containers using exclusive CPUs + updateContainers(exclusiveCPUContainers, false) + return success, failure } diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager_others_test.go b/pkg/kubelet/cm/cpumanager/cpu_manager_others_test.go new file mode 100644 index 0000000000000..07a1406177b6f --- /dev/null +++ b/pkg/kubelet/cm/cpumanager/cpu_manager_others_test.go @@ -0,0 +1,36 @@ +//go:build !windows + +/* +Copyright 2026 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cpumanager + +import ( + runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1" + "k8s.io/utils/cpuset" +) + +func (rt mockRuntimeService) getCPUSetFromResources(resources *runtimeapi.ContainerResources) cpuset.CPUSet { + if resources != nil && resources.Linux != nil { + set, err := cpuset.Parse(resources.Linux.CpusetCpus) + if err != nil { + rt.t.Errorf("(%v) Cannot parse Linux CPUSet resources %v", rt.testCaseDescription, resources.Linux.CpusetCpus) + return cpuset.New() + } + return set + } + return cpuset.New() +} diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go index cf19290b14fe0..5ca794d3c2e8e 100644 --- a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go +++ b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go @@ -45,6 +45,7 @@ import ( "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state" "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" + "k8s.io/kubernetes/pkg/kubelet/lifecycle" "k8s.io/utils/cpuset" ) @@ -53,9 +54,17 @@ type mockState struct { defaultCPUSet cpuset.CPUSet } +func (s *mockState) GetOriginalCPUSet(podUID string, containerName string) (cpuset.CPUSet, bool) { + res, exists := s.assignments[podUID][containerName] + return res.Original.Clone(), exists +} + func (s *mockState) GetCPUSet(podUID string, containerName string) (cpuset.CPUSet, bool) { - res, ok := s.assignments[podUID][containerName] - return res.Clone(), ok + res, exists := s.assignments[podUID][containerName] + if res.Resized.IsEmpty() { + return res.Original.Clone(), exists + } + return res.Resized.Clone(), exists } func (s *mockState) GetDefaultCPUSet() cpuset.CPUSet { @@ -71,9 +80,15 @@ func (s *mockState) GetCPUSetOrDefault(podUID string, containerName string) cpus func (s *mockState) SetCPUSet(podUID string, containerName string, cset cpuset.CPUSet) { if _, exists := s.assignments[podUID]; !exists { - s.assignments[podUID] = make(map[string]cpuset.CPUSet) + s.assignments[podUID] = make(map[string]state.ContainerCPUAssignment) + s.assignments[podUID][containerName] = state.ContainerCPUAssignment{Original: cset, Resized: cpuset.New()} + } else { + if entry, exists := s.assignments[podUID][containerName]; !exists { + s.assignments[podUID][containerName] = state.ContainerCPUAssignment{Original: cset, Resized: cpuset.New()} + } else { + s.assignments[podUID][containerName] = state.ContainerCPUAssignment{Original: entry.Original, Resized: cset} + } } - s.assignments[podUID][containerName] = cset } func (s *mockState) SetDefaultCPUSet(cset cpuset.CPUSet) { @@ -112,7 +127,7 @@ func (p *mockPolicy) Start(_ logr.Logger, s state.State) error { return p.err } -func (p *mockPolicy) Allocate(_ logr.Logger, s state.State, pod *v1.Pod, container *v1.Container) error { +func (p *mockPolicy) Allocate(_ logr.Logger, s state.State, pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) error { return p.err } @@ -120,11 +135,11 @@ func (p *mockPolicy) RemoveContainer(_ logr.Logger, s state.State, podUID string return p.err } -func (p *mockPolicy) GetTopologyHints(_ logr.Logger, s state.State, pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint { +func (p *mockPolicy) GetTopologyHints(_ logr.Logger, s state.State, pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint { return nil } -func (p *mockPolicy) GetPodTopologyHints(_ logr.Logger, s state.State, pod *v1.Pod) map[string][]topologymanager.TopologyHint { +func (p *mockPolicy) GetPodTopologyHints(_ logr.Logger, s state.State, pod *v1.Pod, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint { return nil } @@ -133,11 +148,52 @@ func (p *mockPolicy) GetAllocatableCPUs(m state.State) cpuset.CPUSet { } type mockRuntimeService struct { - err error + err []error + containerIDsWithExclusiveCPUs []string + state map[string]cpuset.CPUSet + testCPUConflicts bool + testCaseDescription string + t *testing.T } -func (rt mockRuntimeService) UpdateContainerResources(_ context.Context, id string, resources *runtimeapi.ContainerResources) error { - return rt.err +func (rt *mockRuntimeService) UpdateContainerResources(_ context.Context, id string, resources *runtimeapi.ContainerResources) error { + var ret error + if len(rt.err) > 0 { + ret = rt.err[0] + rt.err = rt.err[1:] + } + + // update state + if ret == nil { + newSet := rt.getCPUSetFromResources(resources) + if !newSet.IsEmpty() { + rt.state[id] = newSet + } + } + + if rt.testCPUConflicts { + // count in how many containers each CPU is used + cpuUsage := make(map[int][]string) + for containerID, set := range rt.state { + for _, cpu := range set.List() { + cpuUsage[cpu] = append(cpuUsage[cpu], containerID) + } + } + + // check if CPUs assigned to containers with exclusive CPUs are used exactly once + for _, containerID := range rt.containerIDsWithExclusiveCPUs { + set := rt.state[containerID] + for _, cpu := range set.List() { + if len(cpuUsage[cpu]) != 1 { + rt.t.Errorf("%v", rt.testCaseDescription) + rt.t.Errorf("after updating container resources of %s", id) + rt.t.Errorf("Expected CPU %d usage 1, actual usage %d %v", cpu, len(cpuUsage[cpu]), cpuUsage[cpu]) + } + } + } + } + + return ret } type mockPodStatusProvider struct { @@ -170,6 +226,7 @@ func makePod(podUID, containerName, cpuRequest, cpuLimit string) *v1.Pod { } pod.UID = types.UID(podUID) + pod.Name = podUID pod.Spec.Containers[0].Name = containerName return pod @@ -312,7 +369,7 @@ func TestCPUManagerAdd(t *testing.T) { nil) testCases := []struct { description string - updateErr error + updateErr []error policy Policy expCPUSet cpuset.CPUSet expAllocateErr error @@ -346,7 +403,7 @@ func TestCPUManagerAdd(t *testing.T) { defaultCPUSet: cpuset.New(1, 2, 3, 4), }, lastUpdateState: state.NewMemoryState(logger), - containerRuntime: mockRuntimeService{ + containerRuntime: &mockRuntimeService{ err: testCase.updateErr, }, containerMap: containermap.NewContainerMap(), @@ -358,7 +415,7 @@ func TestCPUManagerAdd(t *testing.T) { container := &pod.Spec.Containers[0] mgr.activePods = func() []*v1.Pod { return []*v1.Pod{pod} } - err := mgr.Allocate(pod, container) + err := mgr.Allocate(pod, container, lifecycle.AddOperation) if !reflect.DeepEqual(err, testCase.expAllocateErr) { t.Errorf("CPU Manager Allocate() error (%v). expected error: %v but got: %v", testCase.description, testCase.expAllocateErr, err) @@ -575,7 +632,7 @@ func TestCPUManagerAddWithInitContainers(t *testing.T) { policy: policy, state: mockState, lastUpdateState: state.NewMemoryState(logger), - containerRuntime: mockRuntimeService{}, + containerRuntime: &mockRuntimeService{}, containerMap: containermap.NewContainerMap(), podStatusProvider: mockPodStatusProvider{}, sourcesReady: &sourcesReadyStub{}, @@ -599,7 +656,7 @@ func TestCPUManagerAddWithInitContainers(t *testing.T) { cumCSet := cpuset.New() for i := range containers { - err := mgr.Allocate(testCase.pod, &containers[i]) + err := mgr.Allocate(testCase.pod, &containers[i], lifecycle.AddOperation) if err != nil { t.Errorf("StaticPolicy Allocate() error (%v). unexpected error for container id: %v: %v", testCase.description, containerIDs[i], err) @@ -612,18 +669,18 @@ func TestCPUManagerAddWithInitContainers(t *testing.T) { testCase.description, containerIDs[i], err) } - cset, found := mockState.assignments[string(testCase.pod.UID)][containers[i].Name] + assignment, found := mockState.assignments[string(testCase.pod.UID)][containers[i].Name] if !expCSets[i].IsEmpty() && !found { t.Errorf("StaticPolicy AddContainer() error (%v). expected container %v to be present in assignments %v", testCase.description, containers[i].Name, mockState.assignments) } - if found && !cset.Equals(expCSets[i]) { + if found && !assignment.Original.Equals(expCSets[i]) { t.Errorf("StaticPolicy AddContainer() error (%v). expected cpuset %v for container %v but got %v", - testCase.description, expCSets[i], containers[i].Name, cset) + testCase.description, expCSets[i], containers[i].Name, assignment.Original) } - cumCSet = cumCSet.Union(cset) + cumCSet = cumCSet.Union(assignment.Original) } if !testCase.stDefaultCPUSet.Difference(cumCSet).Equals(mockState.defaultCPUSet) { @@ -765,7 +822,7 @@ func TestCPUManagerRemove(t *testing.T) { defaultCPUSet: cpuset.New(), }, lastUpdateState: state.NewMemoryState(logger), - containerRuntime: mockRuntimeService{}, + containerRuntime: &mockRuntimeService{}, containerMap: containerMap, activePods: func() []*v1.Pod { return nil }, podStatusProvider: mockPodStatusProvider{}, @@ -782,7 +839,7 @@ func TestCPUManagerRemove(t *testing.T) { err: fmt.Errorf("fake error"), }, state: state.NewMemoryState(logger), - containerRuntime: mockRuntimeService{}, + containerRuntime: &mockRuntimeService{}, containerMap: containerMap, activePods: func() []*v1.Pod { return nil }, podStatusProvider: mockPodStatusProvider{}, @@ -825,20 +882,25 @@ func TestReconcileState(t *testing.T) { nil) testCases := []struct { - description string - policy Policy - activePods []*v1.Pod - pspPS v1.PodStatus - pspFound bool - updateErr error - stAssignments state.ContainerCPUAssignments - stDefaultCPUSet cpuset.CPUSet - lastUpdateStAssignments state.ContainerCPUAssignments - lastUpdateStDefaultCPUSet cpuset.CPUSet - expectStAssignments state.ContainerCPUAssignments - expectStDefaultCPUSet cpuset.CPUSet - expectSucceededContainerName string - expectFailedContainerName string + description string + policy Policy + activePods []*v1.Pod + pspPS v1.PodStatus + pspFound bool + updateErr []error + containerIDsWithExclusiveCPUs []string + containerRuntimeInitialState map[string]cpuset.CPUSet + stAssignments state.ContainerCPUAssignments + stDefaultCPUSet cpuset.CPUSet + lastUpdateStAssignments state.ContainerCPUAssignments + lastUpdateStDefaultCPUSet cpuset.CPUSet + expectStAssignments state.ContainerCPUAssignments + expectStDefaultCPUSet cpuset.CPUSet + expectLastUpdateStAssignments state.ContainerCPUAssignments + expectLastUpdateStDefaultCPUSet cpuset.CPUSet + expectContainerRuntimeState map[string]cpuset.CPUSet + expectSucceededContainerName []string + expectFailedContainerName []string }{ { description: "cpu manager reconcile - no error", @@ -869,24 +931,35 @@ func TestReconcileState(t *testing.T) { }, }, }, - pspFound: true, - updateErr: nil, + pspFound: true, + updateErr: nil, + containerIDsWithExclusiveCPUs: []string{"fakeContainerID"}, + containerRuntimeInitialState: map[string]cpuset.CPUSet{}, stAssignments: state.ContainerCPUAssignments{ - "fakePodUID": map[string]cpuset.CPUSet{ - "fakeContainerName": cpuset.New(1, 2), + "fakePodUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerName": {Original: cpuset.New(1, 2), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(3, 4, 5, 6, 7), lastUpdateStAssignments: state.ContainerCPUAssignments{}, lastUpdateStDefaultCPUSet: cpuset.New(), expectStAssignments: state.ContainerCPUAssignments{ - "fakePodUID": map[string]cpuset.CPUSet{ - "fakeContainerName": cpuset.New(1, 2), + "fakePodUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerName": {Original: cpuset.New(1, 2), Resized: cpuset.New()}, + }, + }, + expectStDefaultCPUSet: cpuset.New(3, 4, 5, 6, 7), + expectLastUpdateStAssignments: state.ContainerCPUAssignments{ + "fakePodUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerName": {Original: cpuset.New(1, 2), Resized: cpuset.New()}, }, }, - expectStDefaultCPUSet: cpuset.New(3, 4, 5, 6, 7), - expectSucceededContainerName: "fakeContainerName", - expectFailedContainerName: "", + expectLastUpdateStDefaultCPUSet: cpuset.New(), + expectContainerRuntimeState: map[string]cpuset.CPUSet{ + "fakeContainerID": cpuset.New(1, 2), + }, + expectSucceededContainerName: []string{"fakeContainerName"}, + expectFailedContainerName: []string{}, }, { description: "cpu manager reconcile init container - no error", @@ -917,24 +990,35 @@ func TestReconcileState(t *testing.T) { }, }, }, - pspFound: true, - updateErr: nil, + pspFound: true, + updateErr: nil, + containerIDsWithExclusiveCPUs: []string{"fakeContainerID"}, + containerRuntimeInitialState: map[string]cpuset.CPUSet{}, stAssignments: state.ContainerCPUAssignments{ - "fakePodUID": map[string]cpuset.CPUSet{ - "fakeContainerName": cpuset.New(1, 2), + "fakePodUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerName": {Original: cpuset.New(1, 2), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(3, 4, 5, 6, 7), lastUpdateStAssignments: state.ContainerCPUAssignments{}, lastUpdateStDefaultCPUSet: cpuset.New(), expectStAssignments: state.ContainerCPUAssignments{ - "fakePodUID": map[string]cpuset.CPUSet{ - "fakeContainerName": cpuset.New(1, 2), + "fakePodUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerName": {Original: cpuset.New(1, 2), Resized: cpuset.New()}, + }, + }, + expectStDefaultCPUSet: cpuset.New(3, 4, 5, 6, 7), + expectLastUpdateStAssignments: state.ContainerCPUAssignments{ + "fakePodUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerName": {Original: cpuset.New(1, 2), Resized: cpuset.New()}, }, }, - expectStDefaultCPUSet: cpuset.New(3, 4, 5, 6, 7), - expectSucceededContainerName: "fakeContainerName", - expectFailedContainerName: "", + expectLastUpdateStDefaultCPUSet: cpuset.New(), + expectContainerRuntimeState: map[string]cpuset.CPUSet{ + "fakeContainerID": cpuset.New(1, 2), + }, + expectSucceededContainerName: []string{"fakeContainerName"}, + expectFailedContainerName: []string{}, }, { description: "cpu manager reconcile - pod status not found", @@ -954,17 +1038,22 @@ func TestReconcileState(t *testing.T) { }, }, }, - pspPS: v1.PodStatus{}, - pspFound: false, - updateErr: nil, - stAssignments: state.ContainerCPUAssignments{}, - stDefaultCPUSet: cpuset.New(), - lastUpdateStAssignments: state.ContainerCPUAssignments{}, - lastUpdateStDefaultCPUSet: cpuset.New(), - expectStAssignments: state.ContainerCPUAssignments{}, - expectStDefaultCPUSet: cpuset.New(), - expectSucceededContainerName: "", - expectFailedContainerName: "", + pspPS: v1.PodStatus{}, + pspFound: false, + updateErr: nil, + containerIDsWithExclusiveCPUs: []string{}, + containerRuntimeInitialState: map[string]cpuset.CPUSet{}, + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(), + lastUpdateStAssignments: state.ContainerCPUAssignments{}, + lastUpdateStDefaultCPUSet: cpuset.New(), + expectStAssignments: state.ContainerCPUAssignments{}, + expectStDefaultCPUSet: cpuset.New(), + expectLastUpdateStAssignments: state.ContainerCPUAssignments{}, + expectLastUpdateStDefaultCPUSet: cpuset.New(), + expectContainerRuntimeState: map[string]cpuset.CPUSet{}, + expectSucceededContainerName: []string{}, + expectFailedContainerName: []string{}, }, { description: "cpu manager reconcile - container state not found", @@ -992,16 +1081,21 @@ func TestReconcileState(t *testing.T) { }, }, }, - pspFound: true, - updateErr: nil, - stAssignments: state.ContainerCPUAssignments{}, - stDefaultCPUSet: cpuset.New(), - lastUpdateStAssignments: state.ContainerCPUAssignments{}, - lastUpdateStDefaultCPUSet: cpuset.New(), - expectStAssignments: state.ContainerCPUAssignments{}, - expectStDefaultCPUSet: cpuset.New(), - expectSucceededContainerName: "", - expectFailedContainerName: "fakeContainerName", + pspFound: true, + updateErr: nil, + containerIDsWithExclusiveCPUs: []string{}, + containerRuntimeInitialState: map[string]cpuset.CPUSet{}, + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(), + lastUpdateStAssignments: state.ContainerCPUAssignments{}, + lastUpdateStDefaultCPUSet: cpuset.New(), + expectStAssignments: state.ContainerCPUAssignments{}, + expectStDefaultCPUSet: cpuset.New(), + expectLastUpdateStAssignments: state.ContainerCPUAssignments{}, + expectLastUpdateStDefaultCPUSet: cpuset.New(), + expectContainerRuntimeState: map[string]cpuset.CPUSet{}, + expectSucceededContainerName: []string{}, + expectFailedContainerName: []string{"fakeContainerName"}, }, { description: "cpu manager reconclie - cpuset is empty", @@ -1032,24 +1126,29 @@ func TestReconcileState(t *testing.T) { }, }, }, - pspFound: true, - updateErr: nil, + pspFound: true, + updateErr: nil, + containerIDsWithExclusiveCPUs: []string{"fakeContainerID"}, + containerRuntimeInitialState: map[string]cpuset.CPUSet{}, stAssignments: state.ContainerCPUAssignments{ - "fakePodUID": map[string]cpuset.CPUSet{ - "fakeContainerName": cpuset.New(), + "fakePodUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerName": {Original: cpuset.New(), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(1, 2, 3, 4, 5, 6, 7), lastUpdateStAssignments: state.ContainerCPUAssignments{}, lastUpdateStDefaultCPUSet: cpuset.New(), expectStAssignments: state.ContainerCPUAssignments{ - "fakePodUID": map[string]cpuset.CPUSet{ - "fakeContainerName": cpuset.New(), + "fakePodUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerName": {Original: cpuset.New(), Resized: cpuset.New()}, }, }, - expectStDefaultCPUSet: cpuset.New(1, 2, 3, 4, 5, 6, 7), - expectSucceededContainerName: "", - expectFailedContainerName: "fakeContainerName", + expectStDefaultCPUSet: cpuset.New(1, 2, 3, 4, 5, 6, 7), + expectLastUpdateStAssignments: state.ContainerCPUAssignments{}, + expectLastUpdateStDefaultCPUSet: cpuset.New(), + expectContainerRuntimeState: map[string]cpuset.CPUSet{}, + expectSucceededContainerName: []string{}, + expectFailedContainerName: []string{"fakeContainerName"}, }, { description: "cpu manager reconclie - container update error", @@ -1080,24 +1179,29 @@ func TestReconcileState(t *testing.T) { }, }, }, - pspFound: true, - updateErr: fmt.Errorf("fake container update error"), + pspFound: true, + updateErr: []error{fmt.Errorf("fake container update error")}, + containerIDsWithExclusiveCPUs: []string{"fakeContainerID"}, + containerRuntimeInitialState: map[string]cpuset.CPUSet{}, stAssignments: state.ContainerCPUAssignments{ - "fakePodUID": map[string]cpuset.CPUSet{ - "fakeContainerName": cpuset.New(1, 2), + "fakePodUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerName": {Original: cpuset.New(1, 2), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(3, 4, 5, 6, 7), lastUpdateStAssignments: state.ContainerCPUAssignments{}, lastUpdateStDefaultCPUSet: cpuset.New(), expectStAssignments: state.ContainerCPUAssignments{ - "fakePodUID": map[string]cpuset.CPUSet{ - "fakeContainerName": cpuset.New(1, 2), + "fakePodUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerName": {Original: cpuset.New(1, 2), Resized: cpuset.New()}, }, }, - expectStDefaultCPUSet: cpuset.New(3, 4, 5, 6, 7), - expectSucceededContainerName: "", - expectFailedContainerName: "fakeContainerName", + expectStDefaultCPUSet: cpuset.New(3, 4, 5, 6, 7), + expectLastUpdateStAssignments: state.ContainerCPUAssignments{}, + expectLastUpdateStDefaultCPUSet: cpuset.New(), + expectContainerRuntimeState: map[string]cpuset.CPUSet{}, + expectSucceededContainerName: []string{}, + expectFailedContainerName: []string{"fakeContainerName"}, }, { description: "cpu manager reconcile - state has inactive container", @@ -1128,27 +1232,38 @@ func TestReconcileState(t *testing.T) { }, }, }, - pspFound: true, - updateErr: nil, + pspFound: true, + updateErr: nil, + containerIDsWithExclusiveCPUs: []string{"fakeContainerID"}, + containerRuntimeInitialState: map[string]cpuset.CPUSet{}, stAssignments: state.ContainerCPUAssignments{ - "fakePodUID": map[string]cpuset.CPUSet{ - "fakeContainerName": cpuset.New(1, 2), + "fakePodUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerName": {Original: cpuset.New(1, 2), Resized: cpuset.New()}, }, - "secondfakePodUID": map[string]cpuset.CPUSet{ - "secondfakeContainerName": cpuset.New(3, 4), + "secondfakePodUID": map[string]state.ContainerCPUAssignment{ + "secondfakeContainerName": {Original: cpuset.New(3, 4), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(5, 6, 7), lastUpdateStAssignments: state.ContainerCPUAssignments{}, lastUpdateStDefaultCPUSet: cpuset.New(), expectStAssignments: state.ContainerCPUAssignments{ - "fakePodUID": map[string]cpuset.CPUSet{ - "fakeContainerName": cpuset.New(1, 2), + "fakePodUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerName": {Original: cpuset.New(1, 2), Resized: cpuset.New()}, }, }, - expectStDefaultCPUSet: cpuset.New(3, 4, 5, 6, 7), - expectSucceededContainerName: "fakeContainerName", - expectFailedContainerName: "", + expectStDefaultCPUSet: cpuset.New(3, 4, 5, 6, 7), + expectLastUpdateStAssignments: state.ContainerCPUAssignments{ + "fakePodUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerName": {Original: cpuset.New(1, 2), Resized: cpuset.New()}, + }, + }, + expectLastUpdateStDefaultCPUSet: cpuset.New(), + expectContainerRuntimeState: map[string]cpuset.CPUSet{ + "fakeContainerID": cpuset.New(1, 2), + }, + expectSucceededContainerName: []string{"fakeContainerName"}, + expectFailedContainerName: []string{}, }, { description: "cpu manager reconcile - last update state is current", @@ -1179,28 +1294,41 @@ func TestReconcileState(t *testing.T) { }, }, }, - pspFound: true, - updateErr: nil, + pspFound: true, + updateErr: nil, + containerIDsWithExclusiveCPUs: []string{"fakeContainerID"}, + containerRuntimeInitialState: map[string]cpuset.CPUSet{ + "fakeContainerID": cpuset.New(1, 2), + }, stAssignments: state.ContainerCPUAssignments{ - "fakePodUID": map[string]cpuset.CPUSet{ - "fakeContainerName": cpuset.New(1, 2), + "fakePodUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerName": {Original: cpuset.New(1, 2), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(5, 6, 7), lastUpdateStAssignments: state.ContainerCPUAssignments{ - "fakePodUID": map[string]cpuset.CPUSet{ - "fakeContainerName": cpuset.New(1, 2), + "fakePodUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerName": {Original: cpuset.New(1, 2), Resized: cpuset.New()}, }, }, - lastUpdateStDefaultCPUSet: cpuset.New(5, 6, 7), + lastUpdateStDefaultCPUSet: cpuset.New(), expectStAssignments: state.ContainerCPUAssignments{ - "fakePodUID": map[string]cpuset.CPUSet{ - "fakeContainerName": cpuset.New(1, 2), + "fakePodUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerName": {Original: cpuset.New(1, 2), Resized: cpuset.New()}, + }, + }, + expectStDefaultCPUSet: cpuset.New(5, 6, 7), + expectLastUpdateStAssignments: state.ContainerCPUAssignments{ + "fakePodUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerName": {Original: cpuset.New(1, 2), Resized: cpuset.New()}, }, }, - expectStDefaultCPUSet: cpuset.New(5, 6, 7), - expectSucceededContainerName: "fakeContainerName", - expectFailedContainerName: "", + expectLastUpdateStDefaultCPUSet: cpuset.New(), + expectContainerRuntimeState: map[string]cpuset.CPUSet{ + "fakeContainerID": cpuset.New(1, 2), + }, + expectSucceededContainerName: []string{"fakeContainerName"}, + expectFailedContainerName: []string{}, }, { description: "cpu manager reconcile - last update state is not current", @@ -1231,94 +1359,1350 @@ func TestReconcileState(t *testing.T) { }, }, }, - pspFound: true, - updateErr: nil, + pspFound: true, + updateErr: nil, + containerIDsWithExclusiveCPUs: []string{"fakeContainerID"}, + containerRuntimeInitialState: map[string]cpuset.CPUSet{ + "fakeContainerID": cpuset.New(3, 4), + }, stAssignments: state.ContainerCPUAssignments{ - "fakePodUID": map[string]cpuset.CPUSet{ - "fakeContainerName": cpuset.New(1, 2), + "fakePodUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerName": {Original: cpuset.New(1, 2), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(3, 4, 5, 6, 7), lastUpdateStAssignments: state.ContainerCPUAssignments{ - "fakePodUID": map[string]cpuset.CPUSet{ - "fakeContainerName": cpuset.New(3, 4), + "fakePodUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerName": {Original: cpuset.New(3, 4), Resized: cpuset.New()}, }, }, - lastUpdateStDefaultCPUSet: cpuset.New(1, 2, 5, 6, 7), + lastUpdateStDefaultCPUSet: cpuset.New(), expectStAssignments: state.ContainerCPUAssignments{ - "fakePodUID": map[string]cpuset.CPUSet{ - "fakeContainerName": cpuset.New(1, 2), + "fakePodUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerName": {Original: cpuset.New(1, 2), Resized: cpuset.New()}, + }, + }, + expectStDefaultCPUSet: cpuset.New(3, 4, 5, 6, 7), + expectLastUpdateStAssignments: state.ContainerCPUAssignments{ + "fakePodUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerName": {Original: cpuset.New(3, 4), Resized: cpuset.New(1, 2)}, }, }, - expectStDefaultCPUSet: cpuset.New(3, 4, 5, 6, 7), - expectSucceededContainerName: "fakeContainerName", - expectFailedContainerName: "", + expectLastUpdateStDefaultCPUSet: cpuset.New(), + expectContainerRuntimeState: map[string]cpuset.CPUSet{ + "fakeContainerID": cpuset.New(1, 2), + }, + expectSucceededContainerName: []string{"fakeContainerName"}, + expectFailedContainerName: []string{}, }, - } - - for _, testCase := range testCases { - logger, _ := ktesting.NewTestContext(t) - mgr := &manager{ - policy: testCase.policy, - state: &mockState{ - assignments: testCase.stAssignments, - defaultCPUSet: testCase.stDefaultCPUSet, + { + description: "cpu manager reconcile - default CPU sets no error", + policy: testPolicy, + activePods: []*v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakePodAName", + UID: "fakePodAUID", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainerAName", + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakePodBName", + UID: "fakePodBUID", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainerBName", + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakePodCName", + UID: "fakePodCUID", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainerCName", + }, + }, + }, + }, }, - lastUpdateState: state.NewMemoryState(logger), - containerRuntime: mockRuntimeService{ - err: testCase.updateErr, + pspPS: v1.PodStatus{ + ContainerStatuses: []v1.ContainerStatus{ + { + Name: "fakeContainerAName", + ContainerID: "docker://fakeContainerAID", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + { + Name: "fakeContainerBName", + ContainerID: "docker://fakeContainerBID", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + { + Name: "fakeContainerCName", + ContainerID: "docker://fakeContainerCID", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + }, }, - containerMap: containermap.NewContainerMap(), - activePods: func() []*v1.Pod { - return testCase.activePods + pspFound: true, + updateErr: nil, + containerIDsWithExclusiveCPUs: []string{}, + containerRuntimeInitialState: map[string]cpuset.CPUSet{}, + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(1, 2, 3, 4, 5, 6, 7), + lastUpdateStAssignments: state.ContainerCPUAssignments{}, + lastUpdateStDefaultCPUSet: cpuset.New(), + expectStAssignments: state.ContainerCPUAssignments{}, + expectStDefaultCPUSet: cpuset.New(1, 2, 3, 4, 5, 6, 7), + expectLastUpdateStAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(1, 2, 3, 4, 5, 6, 7), Resized: cpuset.New()}, + }, + "fakePodBUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerBName": {Original: cpuset.New(1, 2, 3, 4, 5, 6, 7), Resized: cpuset.New()}, + }, + "fakePodCUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerCName": {Original: cpuset.New(1, 2, 3, 4, 5, 6, 7), Resized: cpuset.New()}, + }, }, - podStatusProvider: mockPodStatusProvider{ - podStatus: testCase.pspPS, - found: testCase.pspFound, + expectLastUpdateStDefaultCPUSet: cpuset.New(), + expectContainerRuntimeState: map[string]cpuset.CPUSet{ + "fakeContainerAID": cpuset.New(1, 2, 3, 4, 5, 6, 7), + "fakeContainerBID": cpuset.New(1, 2, 3, 4, 5, 6, 7), + "fakeContainerCID": cpuset.New(1, 2, 3, 4, 5, 6, 7), }, - } - mgr.sourcesReady = &sourcesReadyStub{} - success, failure := mgr.reconcileState(context.Background()) - - if !reflect.DeepEqual(testCase.expectStAssignments, mgr.state.GetCPUAssignments()) { - t.Errorf("%v", testCase.description) - t.Errorf("Expected state container cpu assignments: %v, actual: %v", testCase.expectStAssignments, mgr.state.GetCPUAssignments()) - - } - - if !reflect.DeepEqual(testCase.expectStDefaultCPUSet, mgr.state.GetDefaultCPUSet()) { - t.Errorf("%v", testCase.description) - t.Errorf("Expected state default cpuset: %v, actual: %v", testCase.expectStDefaultCPUSet, mgr.state.GetDefaultCPUSet()) - + expectSucceededContainerName: []string{"fakeContainerAName", "fakeContainerBName", "fakeContainerCName"}, + expectFailedContainerName: []string{}, + }, + { + description: "cpu manager reconcile - exclusive cpu container scaled up", + policy: testPolicy, + activePods: []*v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakePodAName", + UID: "fakePodAUID", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainerAName", + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakePodBName", + UID: "fakePodBUID", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainerBName", + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakePodCName", + UID: "fakePodCUID", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainerCName", + }, + }, + }, + }, + }, + pspPS: v1.PodStatus{ + ContainerStatuses: []v1.ContainerStatus{ + { + Name: "fakeContainerAName", + ContainerID: "docker://fakeContainerAID", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + { + Name: "fakeContainerBName", + ContainerID: "docker://fakeContainerBID", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + { + Name: "fakeContainerCName", + ContainerID: "docker://fakeContainerCID", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + }, + }, + pspFound: true, + updateErr: nil, + containerIDsWithExclusiveCPUs: []string{"fakeContainerBID"}, + containerRuntimeInitialState: map[string]cpuset.CPUSet{ + "fakeContainerAID": cpuset.New(1, 2, 5, 6, 7), + "fakeContainerBID": cpuset.New(3, 4), + "fakeContainerCID": cpuset.New(1, 2, 5, 6, 7), + }, + stAssignments: state.ContainerCPUAssignments{ + "fakePodBUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerBName": {Original: cpuset.New(3, 4), Resized: cpuset.New(3, 4, 5, 6)}, + }, + }, + stDefaultCPUSet: cpuset.New(1, 2, 7), + lastUpdateStAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(1, 2, 5, 6, 7), Resized: cpuset.New()}, + }, + "fakePodBUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerBName": {Original: cpuset.New(3, 4), Resized: cpuset.New()}, + }, + "fakePodCUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerCName": {Original: cpuset.New(1, 2, 5, 6, 7), Resized: cpuset.New()}, + }, + }, + lastUpdateStDefaultCPUSet: cpuset.New(), + expectStAssignments: state.ContainerCPUAssignments{ + "fakePodBUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerBName": {Original: cpuset.New(3, 4), Resized: cpuset.New(3, 4, 5, 6)}, + }, + }, + expectStDefaultCPUSet: cpuset.New(1, 2, 7), + expectLastUpdateStAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(1, 2, 5, 6, 7), Resized: cpuset.New(1, 2, 7)}, + }, + "fakePodBUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerBName": {Original: cpuset.New(3, 4), Resized: cpuset.New(3, 4, 5, 6)}, + }, + "fakePodCUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerCName": {Original: cpuset.New(1, 2, 5, 6, 7), Resized: cpuset.New(1, 2, 7)}, + }, + }, + expectLastUpdateStDefaultCPUSet: cpuset.New(), + expectContainerRuntimeState: map[string]cpuset.CPUSet{ + "fakeContainerAID": cpuset.New(1, 2, 7), + "fakeContainerBID": cpuset.New(3, 4, 5, 6), + "fakeContainerCID": cpuset.New(1, 2, 7), + }, + expectSucceededContainerName: []string{"fakeContainerAName", "fakeContainerBName", "fakeContainerCName"}, + expectFailedContainerName: []string{}, + }, + { + description: "cpu manager reconcile - exclusive cpu container scaled down", + policy: testPolicy, + activePods: []*v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakePodAName", + UID: "fakePodAUID", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainerAName", + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakePodBName", + UID: "fakePodBUID", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainerBName", + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakePodCName", + UID: "fakePodCUID", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainerCName", + }, + }, + }, + }, + }, + pspPS: v1.PodStatus{ + ContainerStatuses: []v1.ContainerStatus{ + { + Name: "fakeContainerAName", + ContainerID: "docker://fakeContainerAID", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + { + Name: "fakeContainerBName", + ContainerID: "docker://fakeContainerBID", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + { + Name: "fakeContainerCName", + ContainerID: "docker://fakeContainerCID", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + }, + }, + pspFound: true, + updateErr: nil, + containerIDsWithExclusiveCPUs: []string{"fakeContainerBID"}, + containerRuntimeInitialState: map[string]cpuset.CPUSet{ + "fakeContainerAID": cpuset.New(1, 2, 7), + "fakeContainerBID": cpuset.New(3, 4, 5, 6), + "fakeContainerCID": cpuset.New(1, 2, 7), + }, + stAssignments: state.ContainerCPUAssignments{ + "fakePodBUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerBName": {Original: cpuset.New(3, 4), Resized: cpuset.New(3, 4)}, + }, + }, + stDefaultCPUSet: cpuset.New(1, 2, 5, 6, 7), + lastUpdateStAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(1, 2, 7), Resized: cpuset.New()}, + }, + "fakePodBUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerBName": {Original: cpuset.New(3, 4, 5, 6), Resized: cpuset.New()}, + }, + "fakePodCUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerCName": {Original: cpuset.New(1, 2, 7), Resized: cpuset.New()}, + }, + }, + lastUpdateStDefaultCPUSet: cpuset.New(), + expectStAssignments: state.ContainerCPUAssignments{ + "fakePodBUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerBName": {Original: cpuset.New(3, 4), Resized: cpuset.New(3, 4)}, + }, + }, + expectStDefaultCPUSet: cpuset.New(1, 2, 5, 6, 7), + expectLastUpdateStAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(1, 2, 7), Resized: cpuset.New(1, 2, 5, 6, 7)}, + }, + "fakePodBUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerBName": {Original: cpuset.New(3, 4, 5, 6), Resized: cpuset.New(3, 4)}, + }, + "fakePodCUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerCName": {Original: cpuset.New(1, 2, 7), Resized: cpuset.New(1, 2, 5, 6, 7)}, + }, + }, + expectLastUpdateStDefaultCPUSet: cpuset.New(), + expectContainerRuntimeState: map[string]cpuset.CPUSet{ + "fakeContainerAID": cpuset.New(1, 2, 5, 6, 7), + "fakeContainerBID": cpuset.New(3, 4), + "fakeContainerCID": cpuset.New(1, 2, 5, 6, 7), + }, + expectSucceededContainerName: []string{"fakeContainerAName", "fakeContainerBName", "fakeContainerCName"}, + expectFailedContainerName: []string{}, + }, + { + description: "cpu manager reconcile - exclusive cpu containers swap CPUs", + policy: testPolicy, + activePods: []*v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakePodAName", + UID: "fakePodAUID", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainerAName", + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakePodBName", + UID: "fakePodBUID", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainerBName", + }, + }, + }, + }, + }, + pspPS: v1.PodStatus{ + ContainerStatuses: []v1.ContainerStatus{ + { + Name: "fakeContainerAName", + ContainerID: "docker://fakeContainerAID", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + { + Name: "fakeContainerBName", + ContainerID: "docker://fakeContainerBID", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + }, + }, + pspFound: true, + updateErr: nil, + containerIDsWithExclusiveCPUs: []string{"fakeContainerAID", "fakeContainerBID"}, + containerRuntimeInitialState: map[string]cpuset.CPUSet{ + "fakeContainerAID": cpuset.New(1, 2), + "fakeContainerBID": cpuset.New(3, 4), + }, + stAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(1), Resized: cpuset.New(1, 4)}, + }, + "fakePodBUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerBName": {Original: cpuset.New(3), Resized: cpuset.New(2, 3)}, + }, + }, + stDefaultCPUSet: cpuset.New(5, 6, 7), + lastUpdateStAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(1), Resized: cpuset.New(1, 2)}, + }, + "fakePodBUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerBName": {Original: cpuset.New(3), Resized: cpuset.New(3, 4)}, + }, + }, + lastUpdateStDefaultCPUSet: cpuset.New(), + expectStAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(1), Resized: cpuset.New(1, 4)}, + }, + "fakePodBUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerBName": {Original: cpuset.New(3), Resized: cpuset.New(2, 3)}, + }, + }, + expectStDefaultCPUSet: cpuset.New(5, 6, 7), + expectLastUpdateStAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(1), Resized: cpuset.New(1, 4)}, + }, + "fakePodBUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerBName": {Original: cpuset.New(3), Resized: cpuset.New(2, 3)}, + }, + }, + expectLastUpdateStDefaultCPUSet: cpuset.New(), + expectContainerRuntimeState: map[string]cpuset.CPUSet{ + "fakeContainerAID": cpuset.New(1, 4), + "fakeContainerBID": cpuset.New(2, 3), + }, + expectSucceededContainerName: []string{"fakeContainerAName", "fakeContainerBName"}, + expectFailedContainerName: []string{}, + }, + { + description: "cpu manager reconcile - exclusive cpu containers scaled down and up", + policy: testPolicy, + activePods: []*v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakePodAName", + UID: "fakePodAUID", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainerAName", + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakePodBName", + UID: "fakePodBUID", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainerBName", + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakePodCName", + UID: "fakePodCUID", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainerCName", + }, + }, + }, + }, + }, + pspPS: v1.PodStatus{ + ContainerStatuses: []v1.ContainerStatus{ + { + Name: "fakeContainerAName", + ContainerID: "docker://fakeContainerAID", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + { + Name: "fakeContainerBName", + ContainerID: "docker://fakeContainerBID", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + { + Name: "fakeContainerCName", + ContainerID: "docker://fakeContainerCID", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + }, + }, + pspFound: true, + updateErr: nil, + containerIDsWithExclusiveCPUs: []string{"fakeContainerAID", "fakeContainerBID"}, + containerRuntimeInitialState: map[string]cpuset.CPUSet{ + "fakeContainerAID": cpuset.New(1, 2), + "fakeContainerBID": cpuset.New(3, 4), + "fakeContainerCID": cpuset.New(5, 6), + }, + stAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(1), Resized: cpuset.New(1, 2, 5, 6)}, + }, + "fakePodBUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerBName": {Original: cpuset.New(3), Resized: cpuset.New(3)}, + }, + }, + stDefaultCPUSet: cpuset.New(4), + lastUpdateStAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(1), Resized: cpuset.New(1, 2)}, + }, + "fakePodBUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerBName": {Original: cpuset.New(3), Resized: cpuset.New(3, 4)}, + }, + "fakePodCUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerCName": {Original: cpuset.New(5, 6), Resized: cpuset.New()}, + }, + }, + lastUpdateStDefaultCPUSet: cpuset.New(), + expectStAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(1), Resized: cpuset.New(1, 2, 5, 6)}, + }, + "fakePodBUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerBName": {Original: cpuset.New(3), Resized: cpuset.New(3)}, + }, + }, + expectStDefaultCPUSet: cpuset.New(4), + expectLastUpdateStAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(1), Resized: cpuset.New(1, 2, 5, 6)}, + }, + "fakePodBUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerBName": {Original: cpuset.New(3), Resized: cpuset.New(3)}, + }, + "fakePodCUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerCName": {Original: cpuset.New(5, 6), Resized: cpuset.New(4)}, + }, + }, + expectLastUpdateStDefaultCPUSet: cpuset.New(), + expectContainerRuntimeState: map[string]cpuset.CPUSet{ + "fakeContainerAID": cpuset.New(1, 2, 5, 6), + "fakeContainerBID": cpuset.New(3), + "fakeContainerCID": cpuset.New(4), + }, + expectSucceededContainerName: []string{"fakeContainerAName", "fakeContainerBName", "fakeContainerCName"}, + expectFailedContainerName: []string{}, + }, + { + description: "cpu manager reconcile - fail in first reconcile pass does not cause conflict", + policy: testPolicy, + activePods: []*v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakePodAName", + UID: "fakePodAUID", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainerAName", + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakePodBName", + UID: "fakePodBUID", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainerBName", + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakePodCName", + UID: "fakePodCUID", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainerCName", + }, + }, + }, + }, + }, + pspPS: v1.PodStatus{ + ContainerStatuses: []v1.ContainerStatus{ + { + Name: "fakeContainerAName", + ContainerID: "docker://fakeContainerAID", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + { + Name: "fakeContainerBName", + ContainerID: "docker://fakeContainerBID", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + { + Name: "fakeContainerCName", + ContainerID: "docker://fakeContainerCID", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + }, + }, + pspFound: true, + updateErr: []error{ + fmt.Errorf("fakeContainerAID pass 1 error"), + nil, //fakeContainerCID pass 1 ok + nil, //fakeContainerBID pass 2 ok + nil, //fakeContainerCID pass 3 ok + }, + containerIDsWithExclusiveCPUs: []string{"fakeContainerAID", "fakeContainerCID"}, + containerRuntimeInitialState: map[string]cpuset.CPUSet{ + "fakeContainerAID": cpuset.New(0, 1, 2), + "fakeContainerBID": cpuset.New(3, 4, 5), + "fakeContainerCID": cpuset.New(6, 7, 8), + }, + stAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(0), Resized: cpuset.New(0, 3, 6)}, + }, + "fakePodCUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerCName": {Original: cpuset.New(8), Resized: cpuset.New(5, 8)}, + }, + }, + stDefaultCPUSet: cpuset.New(4, 7), + lastUpdateStAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(0), Resized: cpuset.New(0, 1, 2)}, + }, + "fakePodBUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerBName": {Original: cpuset.New(4), Resized: cpuset.New(3, 4, 5)}, + }, + "fakePodCUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerCName": {Original: cpuset.New(8), Resized: cpuset.New(6, 7, 8)}, + }, + }, + lastUpdateStDefaultCPUSet: cpuset.New(), + expectStAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(0), Resized: cpuset.New(0, 3, 6)}, + }, + "fakePodCUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerCName": {Original: cpuset.New(8), Resized: cpuset.New(5, 8)}, + }, + }, + expectStDefaultCPUSet: cpuset.New(4, 7), + expectLastUpdateStAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(0), Resized: cpuset.New(0, 1, 2)}, + }, + "fakePodBUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerBName": {Original: cpuset.New(4), Resized: cpuset.New(4, 7)}, + }, + "fakePodCUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerCName": {Original: cpuset.New(8), Resized: cpuset.New(5, 8)}, + }, + }, + expectLastUpdateStDefaultCPUSet: cpuset.New(), + expectContainerRuntimeState: map[string]cpuset.CPUSet{ + "fakeContainerAID": cpuset.New(0, 1, 2), + "fakeContainerBID": cpuset.New(4, 7), + "fakeContainerCID": cpuset.New(5, 8), + }, + expectSucceededContainerName: []string{"fakeContainerBName", "fakeContainerCName"}, + expectFailedContainerName: []string{"fakeContainerAName"}, + }, + { + description: "cpu manager reconcile - fail in first reconcile pass causes conflict in second pass", + policy: testPolicy, + activePods: []*v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakePodAName", + UID: "fakePodAUID", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainerAName", + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakePodBName", + UID: "fakePodBUID", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainerBName", + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakePodCName", + UID: "fakePodCUID", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainerCName", + }, + }, + }, + }, + }, + pspPS: v1.PodStatus{ + ContainerStatuses: []v1.ContainerStatus{ + { + Name: "fakeContainerAName", + ContainerID: "docker://fakeContainerAID", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + { + Name: "fakeContainerBName", + ContainerID: "docker://fakeContainerBID", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + { + Name: "fakeContainerCName", + ContainerID: "docker://fakeContainerCID", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + }, + }, + pspFound: true, + updateErr: []error{ + fmt.Errorf("fakeContainerAID pass 1 error"), + nil, //fakeContainerCID pass 1 ok + nil, //fakeContainerCID pass 3 ok + }, + containerIDsWithExclusiveCPUs: []string{"fakeContainerAID", "fakeContainerCID"}, + containerRuntimeInitialState: map[string]cpuset.CPUSet{ + "fakeContainerAID": cpuset.New(0, 1, 2), + "fakeContainerBID": cpuset.New(3, 4, 5), + "fakeContainerCID": cpuset.New(6, 7, 8), + }, + stAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(0), Resized: cpuset.New(0, 3, 6)}, + }, + "fakePodCUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerCName": {Original: cpuset.New(8), Resized: cpuset.New(8)}, + }, + }, + stDefaultCPUSet: cpuset.New(1, 4, 7), + lastUpdateStAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(0), Resized: cpuset.New(0, 1, 2)}, + }, + "fakePodBUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerBName": {Original: cpuset.New(4), Resized: cpuset.New(3, 4, 5)}, + }, + "fakePodCUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerCName": {Original: cpuset.New(8), Resized: cpuset.New(6, 7, 8)}, + }, + }, + lastUpdateStDefaultCPUSet: cpuset.New(), + expectStAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(0), Resized: cpuset.New(0, 3, 6)}, + }, + "fakePodCUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerCName": {Original: cpuset.New(8), Resized: cpuset.New(8)}, + }, + }, + expectStDefaultCPUSet: cpuset.New(1, 4, 7), + expectLastUpdateStAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(0), Resized: cpuset.New(0, 1, 2)}, + }, + "fakePodBUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerBName": {Original: cpuset.New(4), Resized: cpuset.New(3, 4, 5)}, + }, + "fakePodCUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerCName": {Original: cpuset.New(8), Resized: cpuset.New(8)}, + }, + }, + expectLastUpdateStDefaultCPUSet: cpuset.New(), + expectContainerRuntimeState: map[string]cpuset.CPUSet{ + "fakeContainerAID": cpuset.New(0, 1, 2), + "fakeContainerBID": cpuset.New(3, 4, 5), + "fakeContainerCID": cpuset.New(8), + }, + expectSucceededContainerName: []string{"fakeContainerCName"}, + expectFailedContainerName: []string{"fakeContainerAName", "fakeContainerBName"}, + }, + { + description: "cpu manager reconcile - fail in first reconcile pass causes conflict in second pass which causes conflict in third pass", + policy: testPolicy, + activePods: []*v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakePodAName", + UID: "fakePodAUID", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainerAName", + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakePodBName", + UID: "fakePodBUID", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainerBName", + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakePodCName", + UID: "fakePodCUID", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainerCName", + }, + }, + }, + }, + }, + pspPS: v1.PodStatus{ + ContainerStatuses: []v1.ContainerStatus{ + { + Name: "fakeContainerAName", + ContainerID: "docker://fakeContainerAID", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + { + Name: "fakeContainerBName", + ContainerID: "docker://fakeContainerBID", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + { + Name: "fakeContainerCName", + ContainerID: "docker://fakeContainerCID", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + }, + }, + pspFound: true, + updateErr: []error{ + fmt.Errorf("fakeContainerAID pass 1 error"), + nil, //fakeContainerCID pass 1 ok + }, + containerIDsWithExclusiveCPUs: []string{"fakeContainerAID", "fakeContainerCID"}, + containerRuntimeInitialState: map[string]cpuset.CPUSet{ + "fakeContainerAID": cpuset.New(0, 1, 2), + "fakeContainerBID": cpuset.New(3, 4, 5), + "fakeContainerCID": cpuset.New(6, 7, 8), + }, + stAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(0), Resized: cpuset.New(0, 3, 6)}, + }, + "fakePodCUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerCName": {Original: cpuset.New(8), Resized: cpuset.New(5, 8)}, + }, + }, + stDefaultCPUSet: cpuset.New(1, 4, 7), + lastUpdateStAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(0), Resized: cpuset.New(0, 1, 2)}, + }, + "fakePodBUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerBName": {Original: cpuset.New(4), Resized: cpuset.New(3, 4, 5)}, + }, + "fakePodCUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerCName": {Original: cpuset.New(8), Resized: cpuset.New(6, 7, 8)}, + }, + }, + lastUpdateStDefaultCPUSet: cpuset.New(), + expectStAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(0), Resized: cpuset.New(0, 3, 6)}, + }, + "fakePodCUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerCName": {Original: cpuset.New(8), Resized: cpuset.New(5, 8)}, + }, + }, + expectStDefaultCPUSet: cpuset.New(1, 4, 7), + expectLastUpdateStAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(0), Resized: cpuset.New(0, 1, 2)}, + }, + "fakePodBUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerBName": {Original: cpuset.New(4), Resized: cpuset.New(3, 4, 5)}, + }, + "fakePodCUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerCName": {Original: cpuset.New(8), Resized: cpuset.New(8)}, + }, + }, + expectLastUpdateStDefaultCPUSet: cpuset.New(), + expectContainerRuntimeState: map[string]cpuset.CPUSet{ + "fakeContainerAID": cpuset.New(0, 1, 2), + "fakeContainerBID": cpuset.New(3, 4, 5), + "fakeContainerCID": cpuset.New(8), + }, + expectSucceededContainerName: []string{}, + expectFailedContainerName: []string{"fakeContainerAName", "fakeContainerBName", "fakeContainerCName"}, + }, + { + description: "cpu manager reconcile - fail in first reconcile pass causes conflict in third pass", + policy: testPolicy, + activePods: []*v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakePodAName", + UID: "fakePodAUID", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainerAName", + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakePodBName", + UID: "fakePodBUID", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainerBName", + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakePodCName", + UID: "fakePodCUID", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainerCName", + }, + }, + }, + }, + }, + pspPS: v1.PodStatus{ + ContainerStatuses: []v1.ContainerStatus{ + { + Name: "fakeContainerAName", + ContainerID: "docker://fakeContainerAID", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + { + Name: "fakeContainerBName", + ContainerID: "docker://fakeContainerBID", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + { + Name: "fakeContainerCName", + ContainerID: "docker://fakeContainerCID", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + }, + }, + pspFound: true, + updateErr: []error{ + fmt.Errorf("fakeContainerAID pass 1 error"), + nil, //fakeContainerCID pass 1 ok + nil, //fakeContainerBID pass 2 ok + }, + containerIDsWithExclusiveCPUs: []string{"fakeContainerAID", "fakeContainerCID"}, + containerRuntimeInitialState: map[string]cpuset.CPUSet{ + "fakeContainerAID": cpuset.New(0, 1, 2), + "fakeContainerBID": cpuset.New(3, 4, 5), + "fakeContainerCID": cpuset.New(6, 7, 8), + }, + stAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(0), Resized: cpuset.New(0, 3, 6)}, + }, + "fakePodCUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerCName": {Original: cpuset.New(8), Resized: cpuset.New(2, 5, 8)}, + }, + }, + stDefaultCPUSet: cpuset.New(4, 7), + lastUpdateStAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(0), Resized: cpuset.New(0, 1, 2)}, + }, + "fakePodBUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerBName": {Original: cpuset.New(4), Resized: cpuset.New(3, 4, 5)}, + }, + "fakePodCUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerCName": {Original: cpuset.New(8), Resized: cpuset.New(6, 7, 8)}, + }, + }, + lastUpdateStDefaultCPUSet: cpuset.New(), + expectStAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(0), Resized: cpuset.New(0, 3, 6)}, + }, + "fakePodCUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerCName": {Original: cpuset.New(8), Resized: cpuset.New(2, 5, 8)}, + }, + }, + expectStDefaultCPUSet: cpuset.New(4, 7), + expectLastUpdateStAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(0), Resized: cpuset.New(0, 1, 2)}, + }, + "fakePodBUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerBName": {Original: cpuset.New(4), Resized: cpuset.New(4, 7)}, + }, + "fakePodCUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerCName": {Original: cpuset.New(8), Resized: cpuset.New(8)}, + }, + }, + expectLastUpdateStDefaultCPUSet: cpuset.New(), + expectContainerRuntimeState: map[string]cpuset.CPUSet{ + "fakeContainerAID": cpuset.New(0, 1, 2), + "fakeContainerBID": cpuset.New(4, 7), + "fakeContainerCID": cpuset.New(8), + }, + expectSucceededContainerName: []string{"fakeContainerBName"}, + expectFailedContainerName: []string{"fakeContainerAName", "fakeContainerCName"}, + }, + { + description: "cpu manager reconcile - fail in second reconcile pass causes conflict in third pass", + policy: testPolicy, + activePods: []*v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakePodAName", + UID: "fakePodAUID", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainerAName", + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakePodBName", + UID: "fakePodBUID", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainerBName", + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakePodCName", + UID: "fakePodCUID", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainerCName", + }, + }, + }, + }, + }, + pspPS: v1.PodStatus{ + ContainerStatuses: []v1.ContainerStatus{ + { + Name: "fakeContainerAName", + ContainerID: "docker://fakeContainerAID", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + { + Name: "fakeContainerBName", + ContainerID: "docker://fakeContainerBID", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + { + Name: "fakeContainerCName", + ContainerID: "docker://fakeContainerCID", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + }, + }, + pspFound: true, + updateErr: []error{ + nil, //fakeContainerAID pass 1 ok + nil, //fakeContainerCID pass 1 ok + fmt.Errorf("fakeContainerBID pass 2 error"), + nil, //fakeContainerAID pass 3 ok + }, + containerIDsWithExclusiveCPUs: []string{"fakeContainerAID", "fakeContainerCID"}, + containerRuntimeInitialState: map[string]cpuset.CPUSet{ + "fakeContainerAID": cpuset.New(0, 1, 2), + "fakeContainerBID": cpuset.New(3, 4, 5), + "fakeContainerCID": cpuset.New(6, 7, 8), + }, + stAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(0), Resized: cpuset.New(0, 6)}, + }, + "fakePodCUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerCName": {Original: cpuset.New(8), Resized: cpuset.New(2, 5, 8)}, + }, + }, + stDefaultCPUSet: cpuset.New(1, 4, 7), + lastUpdateStAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(0), Resized: cpuset.New(0, 1, 2)}, + }, + "fakePodBUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerBName": {Original: cpuset.New(4), Resized: cpuset.New(3, 4, 5)}, + }, + "fakePodCUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerCName": {Original: cpuset.New(8), Resized: cpuset.New(6, 7, 8)}, + }, + }, + lastUpdateStDefaultCPUSet: cpuset.New(), + expectStAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(0), Resized: cpuset.New(0, 6)}, + }, + "fakePodCUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerCName": {Original: cpuset.New(8), Resized: cpuset.New(2, 5, 8)}, + }, + }, + expectStDefaultCPUSet: cpuset.New(1, 4, 7), + expectLastUpdateStAssignments: state.ContainerCPUAssignments{ + "fakePodAUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerAName": {Original: cpuset.New(0), Resized: cpuset.New(0, 6)}, + }, + "fakePodBUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerBName": {Original: cpuset.New(4), Resized: cpuset.New(3, 4, 5)}, + }, + "fakePodCUID": map[string]state.ContainerCPUAssignment{ + "fakeContainerCName": {Original: cpuset.New(8), Resized: cpuset.New(8)}, + }, + }, + expectLastUpdateStDefaultCPUSet: cpuset.New(), + expectContainerRuntimeState: map[string]cpuset.CPUSet{ + "fakeContainerAID": cpuset.New(0, 6), + "fakeContainerBID": cpuset.New(3, 4, 5), + "fakeContainerCID": cpuset.New(8), + }, + expectSucceededContainerName: []string{"fakeContainerAName"}, + expectFailedContainerName: []string{"fakeContainerBName", "fakeContainerCName"}, + }, + } + + for _, testCase := range testCases { + logger, _ := ktesting.NewTestContext(t) + mgr := &manager{ + policy: testCase.policy, + state: &mockState{ + assignments: testCase.stAssignments, + defaultCPUSet: testCase.stDefaultCPUSet, + }, + lastUpdateState: state.NewMemoryState(logger), + containerRuntime: &mockRuntimeService{ + err: testCase.updateErr, + containerIDsWithExclusiveCPUs: testCase.containerIDsWithExclusiveCPUs, + state: testCase.containerRuntimeInitialState, + testCPUConflicts: true, + testCaseDescription: testCase.description, + t: t, + }, + containerMap: containermap.NewContainerMap(), + activePods: func() []*v1.Pod { + return testCase.activePods + }, + podStatusProvider: mockPodStatusProvider{ + podStatus: testCase.pspPS, + found: testCase.pspFound, + }, + } + mgr.sourcesReady = &sourcesReadyStub{} + mgr.lastUpdateState.SetCPUAssignments(testCase.lastUpdateStAssignments) + mgr.lastUpdateState.SetDefaultCPUSet(testCase.lastUpdateStDefaultCPUSet) + success, failure := mgr.reconcileState(context.Background()) + + if !reflect.DeepEqual(testCase.expectStAssignments, mgr.state.GetCPUAssignments()) { + t.Errorf("%v", testCase.description) + t.Errorf("Expected state container cpu assignments: %v, actual: %v", testCase.expectStAssignments, mgr.state.GetCPUAssignments()) + } + + if !reflect.DeepEqual(testCase.expectStDefaultCPUSet, mgr.state.GetDefaultCPUSet()) { + t.Errorf("%v", testCase.description) + t.Errorf("Expected state default cpuset: %v, actual: %v", testCase.expectStDefaultCPUSet, mgr.state.GetDefaultCPUSet()) + } + + if !reflect.DeepEqual(testCase.expectLastUpdateStAssignments, mgr.lastUpdateState.GetCPUAssignments()) { + t.Errorf("%v", testCase.description) + t.Errorf("Expected lastUpdateState container cpu assignments: %v, actual: %v", testCase.expectLastUpdateStAssignments, mgr.lastUpdateState.GetCPUAssignments()) + } + + if !reflect.DeepEqual(testCase.expectLastUpdateStDefaultCPUSet, mgr.lastUpdateState.GetDefaultCPUSet()) { + t.Errorf("%v", testCase.description) + t.Errorf("Expected lastUpdateState default cpuset: %v, actual: %v", testCase.expectLastUpdateStDefaultCPUSet, mgr.lastUpdateState.GetDefaultCPUSet()) + } + + if !reflect.DeepEqual(testCase.expectContainerRuntimeState, mgr.containerRuntime.(*mockRuntimeService).state) { + t.Errorf("%v", testCase.description) + t.Errorf("Expected containerRuntimeState: %v, actual: %v", testCase.expectContainerRuntimeState, mgr.containerRuntime.(*mockRuntimeService).state) } - if testCase.expectSucceededContainerName != "" { + for _, name := range testCase.expectSucceededContainerName { // Search succeeded reconciled containers for the supplied name. foundSucceededContainer := false for _, reconciled := range success { - if reconciled.containerName == testCase.expectSucceededContainerName { + if reconciled.containerName == name { foundSucceededContainer = true break } } if !foundSucceededContainer { t.Errorf("%v", testCase.description) - t.Errorf("Expected reconciliation success for container: %s", testCase.expectSucceededContainerName) + t.Errorf("Expected reconciliation success for container: %s", name) } } - if testCase.expectFailedContainerName != "" { + for _, name := range testCase.expectFailedContainerName { // Search failed reconciled containers for the supplied name. foundFailedContainer := false for _, reconciled := range failure { - if reconciled.containerName == testCase.expectFailedContainerName { + if reconciled.containerName == name { foundFailedContainer = true break } } if !foundFailedContainer { t.Errorf("%v", testCase.description) - t.Errorf("Expected reconciliation failure for container: %s", testCase.expectFailedContainerName) + t.Errorf("Expected reconciliation failure for container: %s", name) } } } @@ -1351,7 +2735,7 @@ func TestCPUManagerAddWithResvList(t *testing.T) { nil) testCases := []struct { description string - updateErr error + updateErr []error policy Policy expCPUSet cpuset.CPUSet expAllocateErr error @@ -1375,7 +2759,7 @@ func TestCPUManagerAddWithResvList(t *testing.T) { defaultCPUSet: cpuset.New(0, 1, 2, 3), }, lastUpdateState: state.NewMemoryState(logger), - containerRuntime: mockRuntimeService{ + containerRuntime: &mockRuntimeService{ err: testCase.updateErr, }, containerMap: containermap.NewContainerMap(), @@ -1387,7 +2771,7 @@ func TestCPUManagerAddWithResvList(t *testing.T) { container := &pod.Spec.Containers[0] mgr.activePods = func() []*v1.Pod { return []*v1.Pod{pod} } - err := mgr.Allocate(pod, container) + err := mgr.Allocate(pod, container, lifecycle.AddOperation) if !reflect.DeepEqual(err, testCase.expAllocateErr) { t.Errorf("CPU Manager Allocate() error (%v). expected error: %v but got: %v", testCase.description, testCase.expAllocateErr, err) @@ -1537,7 +2921,7 @@ func TestCPUManagerGetAllocatableCPUs(t *testing.T) { pod := makePod("fakePod", "fakeContainer", "2", "2") container := &pod.Spec.Containers[0] - _ = mgr.Allocate(pod, container) + _ = mgr.Allocate(pod, container, lifecycle.AddOperation) if !mgr.GetAllocatableCPUs().Equals(testCase.expAllocatableCPUs) { t.Errorf("Policy GetAllocatableCPUs() error (%v). expected cpuset %v for container %v but got %v", diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager_windows_test.go b/pkg/kubelet/cm/cpumanager/cpu_manager_windows_test.go new file mode 100644 index 0000000000000..ccc1447f76394 --- /dev/null +++ b/pkg/kubelet/cm/cpumanager/cpu_manager_windows_test.go @@ -0,0 +1,42 @@ +//go:build windows + +/* +Copyright 2026 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cpumanager + +import ( + utilfeature "k8s.io/apiserver/pkg/util/feature" + runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1" + kubefeatures "k8s.io/kubernetes/pkg/features" + "k8s.io/kubernetes/pkg/kubelet/winstats" + "k8s.io/utils/cpuset" +) + +func (rt mockRuntimeService) getCPUSetFromResources(resources *runtimeapi.ContainerResources) cpuset.CPUSet { + if !utilfeature.DefaultFeatureGate.Enabled(kubefeatures.WindowsCPUAndMemoryAffinity) { + return cpuset.New() + } + if resources != nil && resources.Windows != nil { + var cpus []int + for _, affinity := range resources.Windows.AffinityCpus { + ga := winstats.GroupAffinity{Mask: affinity.CpuMask, Group: uint16(affinity.CpuGroup)} + cpus = append(cpus, ga.Processors()...) + } + return cpuset.New(cpus...) + } + return cpuset.New() +} diff --git a/pkg/kubelet/cm/cpumanager/fake_cpu_manager.go b/pkg/kubelet/cm/cpumanager/fake_cpu_manager.go index 791f2e5bec0d1..a074db11ca293 100644 --- a/pkg/kubelet/cm/cpumanager/fake_cpu_manager.go +++ b/pkg/kubelet/cm/cpumanager/fake_cpu_manager.go @@ -26,6 +26,7 @@ import ( "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" "k8s.io/kubernetes/pkg/kubelet/config" + "k8s.io/kubernetes/pkg/kubelet/lifecycle" "k8s.io/kubernetes/pkg/kubelet/status" "k8s.io/utils/cpuset" ) @@ -47,9 +48,9 @@ func (m *fakeManager) Policy() Policy { return pol } -func (m *fakeManager) Allocate(pod *v1.Pod, container *v1.Container) error { +func (m *fakeManager) Allocate(pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) error { logger := klog.TODO() - logger.Info("Allocate", "pod", klog.KObj(pod), "containerName", container.Name) + logger.Info("Allocate", "pod", klog.KObj(pod), "containerName", container.Name, "operation", operation) return nil } @@ -62,15 +63,15 @@ func (m *fakeManager) RemoveContainer(logger logr.Logger, containerID string) er return nil } -func (m *fakeManager) GetTopologyHints(pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint { +func (m *fakeManager) GetTopologyHints(pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint { logger := klog.TODO() - logger.Info("Get container topology hints") + logger.Info("Get container topology hints", "operation", operation) return map[string][]topologymanager.TopologyHint{} } -func (m *fakeManager) GetPodTopologyHints(pod *v1.Pod) map[string][]topologymanager.TopologyHint { +func (m *fakeManager) GetPodTopologyHints(pod *v1.Pod, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint { logger := klog.TODO() - logger.Info("Get pod topology hints") + logger.Info("Get pod topology hints", "operation", operation) return map[string][]topologymanager.TopologyHint{} } diff --git a/pkg/kubelet/cm/cpumanager/policy.go b/pkg/kubelet/cm/cpumanager/policy.go index 628b0254af16a..d5c3b9d18bbc6 100644 --- a/pkg/kubelet/cm/cpumanager/policy.go +++ b/pkg/kubelet/cm/cpumanager/policy.go @@ -22,6 +22,7 @@ import ( v1 "k8s.io/api/core/v1" "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" + "k8s.io/kubernetes/pkg/kubelet/lifecycle" "k8s.io/utils/cpuset" ) @@ -30,17 +31,17 @@ type Policy interface { Name() string Start(logger logr.Logger, s state.State) error // Allocate call is idempotent - Allocate(logger logr.Logger, s state.State, pod *v1.Pod, container *v1.Container) error + Allocate(logger logr.Logger, s state.State, pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) error // RemoveContainer call is idempotent RemoveContainer(logger logr.Logger, s state.State, podUID string, containerName string) error // GetTopologyHints implements the topologymanager.HintProvider Interface // and is consulted to achieve NUMA aware resource alignment among this // and other resource controllers. - GetTopologyHints(logger logr.Logger, s state.State, pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint + GetTopologyHints(logger logr.Logger, s state.State, pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint // GetPodTopologyHints implements the topologymanager.HintProvider Interface // and is consulted to achieve NUMA aware resource alignment per Pod // among this and other resource controllers. - GetPodTopologyHints(logger logr.Logger, s state.State, pod *v1.Pod) map[string][]topologymanager.TopologyHint + GetPodTopologyHints(logger logr.Logger, s state.State, pod *v1.Pod, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint // GetAllocatableCPUs returns the total set of CPUs available for allocation. GetAllocatableCPUs(m state.State) cpuset.CPUSet } diff --git a/pkg/kubelet/cm/cpumanager/policy_none.go b/pkg/kubelet/cm/cpumanager/policy_none.go index e29d0598a36f0..19c0d86fb9f3b 100644 --- a/pkg/kubelet/cm/cpumanager/policy_none.go +++ b/pkg/kubelet/cm/cpumanager/policy_none.go @@ -23,6 +23,7 @@ import ( v1 "k8s.io/api/core/v1" "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" + "k8s.io/kubernetes/pkg/kubelet/lifecycle" "k8s.io/utils/cpuset" ) @@ -51,7 +52,7 @@ func (p *nonePolicy) Start(logger logr.Logger, s state.State) error { return nil } -func (p *nonePolicy) Allocate(_ logr.Logger, s state.State, pod *v1.Pod, container *v1.Container) error { +func (p *nonePolicy) Allocate(_ logr.Logger, s state.State, pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) error { return nil } @@ -59,11 +60,11 @@ func (p *nonePolicy) RemoveContainer(_ logr.Logger, s state.State, podUID string return nil } -func (p *nonePolicy) GetTopologyHints(_ logr.Logger, s state.State, pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint { +func (p *nonePolicy) GetTopologyHints(_ logr.Logger, s state.State, pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint { return nil } -func (p *nonePolicy) GetPodTopologyHints(_ logr.Logger, s state.State, pod *v1.Pod) map[string][]topologymanager.TopologyHint { +func (p *nonePolicy) GetPodTopologyHints(_ logr.Logger, s state.State, pod *v1.Pod, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint { return nil } diff --git a/pkg/kubelet/cm/cpumanager/policy_none_test.go b/pkg/kubelet/cm/cpumanager/policy_none_test.go index bba7908fe5dce..749045df68a60 100644 --- a/pkg/kubelet/cm/cpumanager/policy_none_test.go +++ b/pkg/kubelet/cm/cpumanager/policy_none_test.go @@ -22,6 +22,7 @@ import ( "k8s.io/kubernetes/test/utils/ktesting" "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state" + "k8s.io/kubernetes/pkg/kubelet/lifecycle" "k8s.io/utils/cpuset" ) @@ -46,7 +47,7 @@ func TestNonePolicyAllocate(t *testing.T) { testPod := makePod("fakePod", "fakeContainer", "1000m", "1000m") container := &testPod.Spec.Containers[0] - err := policy.Allocate(logger, st, testPod, container) + err := policy.Allocate(logger, st, testPod, container, lifecycle.AddOperation) if err != nil { t.Errorf("NonePolicy Allocate() error. expected no error but got: %v", err) } diff --git a/pkg/kubelet/cm/cpumanager/policy_static.go b/pkg/kubelet/cm/cpumanager/policy_static.go index 53519390c45d8..1cf11df76c77b 100644 --- a/pkg/kubelet/cm/cpumanager/policy_static.go +++ b/pkg/kubelet/cm/cpumanager/policy_static.go @@ -33,6 +33,7 @@ import ( "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask" + "k8s.io/kubernetes/pkg/kubelet/lifecycle" "k8s.io/kubernetes/pkg/kubelet/metrics" "k8s.io/utils/cpuset" ) @@ -45,8 +46,30 @@ const ( PolicyStatic policyName = "static" // ErrorSMTAlignment represents the type of an SMTAlignmentError ErrorSMTAlignment = "SMTAlignmentError" + // ErrorIncosistendCPUAllocation represents the type of an incosistentCPUAllocationError + ErrorInconsistentCPUAllocation = "inconsistentCPUAllocationError" + // ErrorProhibitedCPUAlloacation represents the type of an prohibitedCPUAllocationError + ErrorProhibitedCPUAllocation = "prohibitedCPUAllocationError" + // ErrorGetOriginalCPUSetError represents the type of an getOriginalCPUSetError + ErrorGetOriginalCPUSet = "getOriginalCPUSetError" + // ErrorResizeAllocateCPUs represents the type of a ResizeAllocateCPUsError + ErrorResizeAllocateCPUs = "ResizeAllocateCPUsError" + // ErrorUnsupportedLifecycleOperation represents the type of a UnsupportedLifecycleOperationError + ErrorUnsupportedLifecycleOperation = "UnsupportedLifecycleOperationError" ) +type UnsupportedLifecycleOperationError struct { + Operation lifecycle.Operation +} + +func (e UnsupportedLifecycleOperationError) Error() string { + return fmt.Sprintf("Unsupported Lifecycle Operation Error: %s is neither AddOperation nor ResizeOperation", e.Operation) +} + +func (e UnsupportedLifecycleOperationError) Type() string { + return ErrorUnsupportedLifecycleOperation +} + // SMTAlignmentError represents an error due to SMT alignment type SMTAlignmentError struct { RequestedCPUs int @@ -67,6 +90,90 @@ func (e SMTAlignmentError) Type() string { return ErrorSMTAlignment } +// prohibitedCPUAllocationError represents an error due to an +// attempt to reduce container exclusively allocated +// pool below container exclusively original pool +// allocated when container was created. +type prohibitedCPUAllocationError struct { + RequestedCPUs string + AllocatedCPUs string + OriginalCPUs int + GuaranteedCPUs int +} + +func (e prohibitedCPUAllocationError) Error() string { + return fmt.Sprintf("prohibitedCPUAllocation Error: Skip resize, Not allowed to reduce container exclusively allocated pool below promised, (requested CPUs = %s, allocated CPUs = %s, promised CPUs = %d, guaranteed CPUs = %d)", e.RequestedCPUs, e.AllocatedCPUs, e.OriginalCPUs, e.GuaranteedCPUs) +} + +// Type returns human-readable type of this error. +// Used in the HandlePodResourcesResize to populate Failure reason +func (e prohibitedCPUAllocationError) Type() string { + return ErrorProhibitedCPUAllocation +} + +// inconsistentCPUAllocationError represents an error due to an +// attempt to either move a container from exclusively allocated +// pool to shared pool or move a container from shared pool to +// exclusively allocated pool. +type inconsistentCPUAllocationError struct { + RequestedCPUs string + AllocatedCPUs string + Shared2Exclusive bool +} + +func (e inconsistentCPUAllocationError) Error() string { + if e.RequestedCPUs == e.AllocatedCPUs { + return fmt.Sprintf("inconsistentCPUAllocation Error: Skip resize, nothing to be done, (requested CPUs = %s equal to allocated CPUs = %s)", e.RequestedCPUs, e.AllocatedCPUs) + } + if e.Shared2Exclusive { + return fmt.Sprintf("inconsistentCPUAllocation Error: Not allowed to move a container from shared pool to exclusively allocated pool, (requested CPUs = %s, allocated CPUs = %s)", e.RequestedCPUs, e.AllocatedCPUs) + } else { + return fmt.Sprintf("inconsistentCPUAllocation Error: Not allowed to move a container from exclusively allocated pool to shared pool, not allowed (requested CPUs = %s, allocated CPUs = %s)", e.RequestedCPUs, e.AllocatedCPUs) + } +} + +// Type returns human-readable type of this error. +// Used in the HandlePodResourcesResize to populate Failure reason +func (e inconsistentCPUAllocationError) Type() string { + return ErrorInconsistentCPUAllocation +} + +// getOriginalCPUSetError represents an error due to a +// failed attempt to GetOriginalCPUSet from state +type getOriginalCPUSetError struct { + PodUID string + ContainerName string +} + +func (e getOriginalCPUSetError) Error() string { + return fmt.Sprintf("getOriginalCPUSet Error: Skip resize, unable to get PromisedCPUSet, nothing to be done, (podUID = %s, containerName %s)", e.PodUID, e.ContainerName) +} + +// Type returns human-readable type of this error. +// Used in the HandlePodResourcesResize to populate Failure reason +func (e getOriginalCPUSetError) Type() string { + return ErrorGetOriginalCPUSet +} + +// ResizeAllocateCPUsError represents an error during +// an attempt to allocate a container's CPU exclusive pool +// resize. +type ResizeAllocateCPUsError struct { + PodUID string + ContainerName string + TopologyError string +} + +func (e ResizeAllocateCPUsError) Error() string { + return fmt.Sprintf("ResizeAllocateCPUs Error: Skip resize, unable to resize container exclusively allocated pool, (podUID = %s, containerName = %s, topologyError = %s)", e.PodUID, e.ContainerName, e.TopologyError) +} + +// Type returns human-readable type of this error. +// Used in the HandlePodResourcesResize to populate Failure reason +func (e ResizeAllocateCPUsError) Type() string { + return ErrorResizeAllocateCPUs +} + // staticPolicy is a CPU manager policy that does not change CPU // assignments for exclusively pinned guaranteed containers after the main // container process starts. @@ -239,11 +346,11 @@ func (p *staticPolicy) validateState(logger logr.Logger, s state.State) error { // 2. Check if state for static policy is consistent for pod := range tmpAssignments { - for container, cset := range tmpAssignments[pod] { + for container, assignment := range tmpAssignments[pod] { // None of the cpu in DEFAULT cset should be in s.assignments - if !tmpDefaultCPUset.Intersection(cset).IsEmpty() { + if !tmpDefaultCPUset.Intersection(getCPUSetFromAssignment(assignment)).IsEmpty() { return fmt.Errorf("pod: %s, container: %s cpuset: %q overlaps with default cpuset %q", - pod, container, cset.String(), tmpDefaultCPUset.String()) + pod, container, getCPUSetFromAssignment(assignment).String(), tmpDefaultCPUset.String()) } } } @@ -258,8 +365,8 @@ func (p *staticPolicy) validateState(logger logr.Logger, s state.State) error { totalKnownCPUs := tmpDefaultCPUset.Clone() tmpCPUSets := []cpuset.CPUSet{} for pod := range tmpAssignments { - for _, cset := range tmpAssignments[pod] { - tmpCPUSets = append(tmpCPUSets, cset) + for _, assignment := range tmpAssignments[pod] { + tmpCPUSets = append(tmpCPUSets, getCPUSetFromAssignment(assignment)) } } totalKnownCPUs = totalKnownCPUs.Union(tmpCPUSets...) @@ -316,11 +423,28 @@ func (p *staticPolicy) updateCPUsToReuse(pod *v1.Pod, container *v1.Container, c p.cpusToReuse[string(pod.UID)] = p.cpusToReuse[string(pod.UID)].Difference(cset) } -func (p *staticPolicy) Allocate(logger logr.Logger, s state.State, pod *v1.Pod, container *v1.Container) (rerr error) { - logger = klog.LoggerWithValues(logger, "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", container.Name) +func (p *staticPolicy) Allocate(logger logr.Logger, s state.State, pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) (rerr error) { + logger = klog.LoggerWithValues(logger, "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", container.Name, "operation", operation) logger.Info("Allocate start") // V=0 for backward compatibility defer logger.V(2).Info("Allocate end") + switch operation { + case lifecycle.AddOperation: + return p.allocateForAdd(logger, s, pod, container) + case lifecycle.ResizeOperation: + if !utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScalingExclusiveCPUs) || !utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { + logger.Info("CPU Manager allocation resize operation skipped, InPlacePodVerticalScaling and/or InPlacePodVerticalScalingExclusiveCPUs not enabled") + return nil + } + return p.allocateForResize(logger, s, pod, container) + default: + return UnsupportedLifecycleOperationError{ + Operation: operation, + } + } +} + +func (p *staticPolicy) allocateForAdd(logger logr.Logger, s state.State, pod *v1.Pod, container *v1.Container) (rerr error) { numCPUs := p.guaranteedCPUs(logger, pod, container) if numCPUs == 0 { // container belongs in the shared pool (nothing to do; use default cpuset) @@ -332,8 +456,6 @@ func (p *staticPolicy) Allocate(logger logr.Logger, s state.State, pod *v1.Pod, return nil } - logger.Info("Static policy: Allocate") - // container belongs in an exclusively allocated pool metrics.CPUManagerPinningRequestsTotal.Inc() defer func() { @@ -409,15 +531,148 @@ func (p *staticPolicy) Allocate(logger logr.Logger, s state.State, pod *v1.Pod, return nil } +func (p *staticPolicy) allocateForResize(logger logr.Logger, s state.State, pod *v1.Pod, container *v1.Container) (rerr error) { + numCPUs := p.guaranteedCPUs(logger, pod, container) + // During a pod resize, handle corner cases + err := p.isFeasibleResize(logger, s, pod, container) + if err != nil { + logger.Error(err, "Static policy: Unfeasible to resize allocated CPUs,", "pod", klog.KObj(pod), "containerName", container.Name, "numCPUs", numCPUs) + return err + } + + if numCPUs == 0 { + // container belongs in the shared pool (nothing to do; use default cpuset) + return nil + } + + if utilfeature.DefaultFeatureGate.Enabled(features.PodLevelResources) && resourcehelper.IsPodLevelResourcesSet(pod) { + logger.Info("CPU Manager allocation skipped, pod is using pod-level resources which are not supported by the static CPU manager policy") + return nil + } + + // container belongs in an exclusively allocated pool + logger.Info("Increasing metric") + metrics.CPUManagerPinningRequestsTotal.Inc() + defer func() { + if rerr != nil { + metrics.CPUManagerPinningErrorsTotal.Inc() + if p.options.FullPhysicalCPUsOnly { + metrics.ContainerAlignedComputeResourcesFailure.WithLabelValues(metrics.AlignScopeContainer, metrics.AlignedPhysicalCPU).Inc() + } + return + } + // TODO: move in updateMetricsOnAllocate + if p.options.FullPhysicalCPUsOnly { + // increment only if we know we allocate aligned resources + metrics.ContainerAlignedComputeResources.WithLabelValues(metrics.AlignScopeContainer, metrics.AlignedPhysicalCPU).Inc() + } + }() + + if p.options.FullPhysicalCPUsOnly { + if (numCPUs % p.cpuGroupSize) != 0 { + // Since CPU Manager has been enabled requesting strict SMT alignment, it means a guaranteed pod can only be admitted + // if the CPU requested is a multiple of the number of virtual cpus per physical cores. + // In case CPU request is not a multiple of the number of virtual cpus per physical cores the Pod will be put + // in Failed state, with SMTAlignmentError as reason. Since the allocation happens in terms of physical cores + // and the scheduler is responsible for ensuring that the workload goes to a node that has enough CPUs, + // the pod would be placed on a node where there are enough physical cores available to be allocated. + // Just like the behaviour in case of static policy, takeByTopology will try to first allocate CPUs from the same socket + // and only in case the request cannot be sattisfied on a single socket, CPU allocation is done for a workload to occupy all + // CPUs on a physical core. Allocation of individual threads would never have to occur. + return SMTAlignmentError{ + RequestedCPUs: numCPUs, + CpusPerCore: p.cpuGroupSize, + CausedByPhysicalCPUs: false, + } + } + + availablePhysicalCPUs := p.GetAvailablePhysicalCPUs(s).Size() + + if cs, found := podutil.GetContainerStatus(pod.Status.ContainerStatuses, container.Name); found { + cpuAllocatedQuantity := cs.AllocatedResources[v1.ResourceCPU] + availablePhysicalCPUs += int(cpuAllocatedQuantity.Value()) + } + // It's legal to reserve CPUs which are not core siblings. In this case the CPU allocator can descend to single cores + // when picking CPUs. This will void the guarantee of FullPhysicalCPUsOnly. To prevent this, we need to additionally consider + // all the core siblings of the reserved CPUs as unavailable when computing the free CPUs, before to start the actual allocation. + // This way, by construction all possible CPUs allocation whose number is multiple of the SMT level are now correct again. + if numCPUs > availablePhysicalCPUs { + return SMTAlignmentError{ + RequestedCPUs: numCPUs, + CpusPerCore: p.cpuGroupSize, + AvailablePhysicalCPUs: availablePhysicalCPUs, + CausedByPhysicalCPUs: true, + } + } + } + if cpusInUseByPodContainer, ok := s.GetCPUSet(string(pod.UID), container.Name); ok { + logger.Info("Static policy: container already present in state, attempting InPlacePodVerticalScaling", "pod", klog.KObj(pod), "containerName", container.Name) + // Call Topology Manager to get the aligned socket affinity across all hint providers. + hint := p.affinity.GetAffinity(string(pod.UID), container.Name) + logger.Info("Topology Affinity", "pod", klog.KObj(pod), "containerName", container.Name, "affinity", hint) + // Attempt new allocation ( reusing allocated CPUs ) according to the NUMA affinity contained in the hint + // Since NUMA affinity container in the hint is unmutable already allocated CPUs pass the criteria + mustKeepCPUsForResize, ok := s.GetOriginalCPUSet(string(pod.UID), container.Name) + if !ok { + err := getOriginalCPUSetError{ + PodUID: string(pod.UID), + ContainerName: container.Name, + } + return err + } + // Allocate CPUs according to the NUMA affinity contained in the hint. + newallocatedcpuset, witherr := p.allocateCPUsForResize(logger, s, numCPUs, hint.NUMANodeAffinity, p.cpusToReuse[string(pod.UID)], &cpusInUseByPodContainer, &mustKeepCPUsForResize) + if witherr != nil { + err := ResizeAllocateCPUsError{ + PodUID: string(pod.UID), + ContainerName: container.Name, + TopologyError: witherr.Error(), + } + return err + } + + // Allocation successful, update the current state + s.SetCPUSet(string(pod.UID), container.Name, newallocatedcpuset.CPUs) + p.updateCPUsToReuse(pod, container, newallocatedcpuset.CPUs) + p.updateMetricsOnAllocate(logger, s, newallocatedcpuset) + logger.Info("Allocated exclusive CPUs after InPlacePodVerticalScaling attempt", "pod", klog.KObj(pod), "containerName", container.Name, "cpuset", newallocatedcpuset.CPUs.String()) + // Updated state to the checkpoint file will be stored during + // the reconcile loop. TODO is this a problem? I don't believe + // because if kubelet will be terminated now, anyhow it will be + // needed the state to be cleaned up, an error will appear requiring + // the node to be drained. I think we are safe. All computations are + // using state_mem and not the checkpoint. + return nil + } + + // Call Topology Manager to get the aligned socket affinity across all hint providers. + hint := p.affinity.GetAffinity(string(pod.UID), container.Name) + logger.Info("Topology Affinity", "affinity", hint) + + // Allocate CPUs according to the NUMA affinity contained in the hint. + cpuAllocation, err := p.allocateCPUsForResize(logger, s, numCPUs, hint.NUMANodeAffinity, p.cpusToReuse[string(pod.UID)], nil, nil) + if err != nil { + logger.Error(err, "Unable to allocate CPUs", "numCPUs", numCPUs) + return err + } + + s.SetCPUSet(string(pod.UID), container.Name, cpuAllocation.CPUs) + p.updateCPUsToReuse(pod, container, cpuAllocation.CPUs) + p.updateMetricsOnAllocate(logger, s, cpuAllocation) + + logger.V(4).Info("Allocated exclusive CPUs", "cpuset", cpuAllocation.CPUs.String()) + return nil +} + // getAssignedCPUsOfSiblings returns assigned cpus of given container's siblings(all containers other than the given container) in the given pod `podUID`. func getAssignedCPUsOfSiblings(s state.State, podUID string, containerName string) cpuset.CPUSet { assignments := s.GetCPUAssignments() cset := cpuset.New() - for name, cpus := range assignments[podUID] { + for name, assignment := range assignments[podUID] { if containerName == name { continue } - cset = cset.Union(cpus) + cset = cset.Union(getCPUSetFromAssignment(assignment)) } return cset } @@ -441,6 +696,80 @@ func (p *staticPolicy) RemoveContainer(logger logr.Logger, s state.State, podUID return nil } +func (p *staticPolicy) allocateCPUsForResize(logger logr.Logger, s state.State, numCPUs int, numaAffinity bitmask.BitMask, reusableCPUs cpuset.CPUSet, reusableCPUsForResize *cpuset.CPUSet, mustKeepCPUsForResize *cpuset.CPUSet) (topology.Allocation, error) { + logger.Info("AllocateCPUs", "numCPUs", numCPUs, "socket", numaAffinity) + allocatableCPUs := cpuset.New() + + if mustKeepCPUsForResize != nil { + if numCPUs >= mustKeepCPUsForResize.Size() { + allocatableCPUs = mustKeepCPUsForResize.Clone() + } + if numCPUs < mustKeepCPUsForResize.Size() { + return topology.EmptyAllocation(), fmt.Errorf("requested number of CPUs ( %d ) are less than number of retained CPUs ( %d )", numCPUs, mustKeepCPUsForResize.Size()) + } + } + + if reusableCPUsForResize != nil { + if numCPUs >= reusableCPUsForResize.Size() { + allocatableCPUs = allocatableCPUs.Union(p.GetAvailableCPUs(s).Union(reusableCPUsForResize.Clone())) + } else if numCPUs < reusableCPUsForResize.Size() { + allocatableCPUs = reusableCPUsForResize.Clone() + } + } else { + allocatableCPUs = allocatableCPUs.Union(p.GetAvailableCPUs(s).Union(reusableCPUs)) + } + + // If there are aligned CPUs in numaAffinity, attempt to take those first. + result := topology.EmptyAllocation() + if numaAffinity != nil { + alignedCPUs := p.getAlignedCPUs(numaAffinity, allocatableCPUs) + + numAlignedToAlloc := alignedCPUs.Size() + if min(numCPUs, numAlignedToAlloc) == numCPUs { + numAlignedToAlloc = numCPUs + } + + allocatedCPUs, err := p.takeByTopologyForResize(logger, alignedCPUs, numAlignedToAlloc, reusableCPUsForResize, mustKeepCPUsForResize) + if err != nil { + return topology.EmptyAllocation(), err + } + + result.CPUs = result.CPUs.Union(allocatedCPUs) + } + + if numCPUs > result.CPUs.Size() { + // Get any remaining CPUs from what's leftover after attempting to grab aligned ones. + remainingCPUs, err := p.takeByTopologyForResize(logger, allocatableCPUs.Difference(result.CPUs), numCPUs-result.CPUs.Size(), reusableCPUsForResize, mustKeepCPUsForResize) + if err != nil { + return topology.EmptyAllocation(), err + } + result.CPUs = result.CPUs.Union(remainingCPUs) + } + + if mustKeepCPUsForResize != nil { + if !mustKeepCPUsForResize.IsSubsetOf(result.CPUs) { + return topology.EmptyAllocation(), fmt.Errorf("requested CPUs to be retained %s are not a subset of resulted CPUs %s", mustKeepCPUsForResize.String(), result.CPUs.String()) + } + } + result.Aligned = p.topology.CheckAlignment(result.CPUs) + + // Remove allocated CPUs from the shared CPUSet. + if reusableCPUsForResize != nil { + if reusableCPUsForResize.Size() < result.CPUs.Size() { + // Scale up or creation has been performed + s.SetDefaultCPUSet(s.GetDefaultCPUSet().Difference(result.CPUs)) + } else if reusableCPUsForResize.Size() > result.CPUs.Size() { + // Scale down has been performed + s.SetDefaultCPUSet(s.GetDefaultCPUSet().Union(reusableCPUsForResize.Difference(result.CPUs))) + } + } else { + s.SetDefaultCPUSet(s.GetDefaultCPUSet().Difference(result.CPUs)) + } + + logger.Info("AllocateCPUs", "result", result.String()) + return result, nil +} + func (p *staticPolicy) allocateCPUs(logger logr.Logger, s state.State, numCPUs int, numaAffinity bitmask.BitMask, reusableCPUs cpuset.CPUSet) (topology.Allocation, error) { logger.Info("AllocateCPUs", "numCPUs", numCPUs, "socket", numaAffinity) @@ -548,9 +877,25 @@ func (p *staticPolicy) takeByTopology(logger logr.Logger, availableCPUs cpuset.C return takeByTopologyNUMAPacked(logger, p.topology, availableCPUs, numCPUs, cpuSortingStrategy, p.options.PreferAlignByUncoreCacheOption) } -func (p *staticPolicy) GetTopologyHints(logger logr.Logger, s state.State, pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint { - logger = klog.LoggerWithValues(logger, "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", container.Name) +func (p *staticPolicy) GetTopologyHints(logger logr.Logger, s state.State, pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint { + logger = klog.LoggerWithValues(logger, "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", container.Name, "operation", operation) + switch operation { + case lifecycle.AddOperation: + return p.getTopologyHintsForAdd(logger, s, pod, container) + case lifecycle.ResizeOperation: + if !utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScalingExclusiveCPUs) || !utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { + logger.V(3).Info("CPU Manager hint generation skipped, resize operation not supported by the static CPU manager policy, InPlacePodVerticalScaling and/or InPlacePodVerticalScalingExclusiveCPUs are not enabled", "pod", klog.KObj(pod), "podUID", pod.UID) + return nil + } + return p.getTopologyHintsForResize(logger, s, pod, container) + default: + logger.V(3).Info("CPU Manager hint generation skipped, operation not supported by the static CPU manager policy", "pod", klog.KObj(pod), "podUID", pod.UID) + return nil + } +} +func (p *staticPolicy) getTopologyHintsForAdd(logger logr.Logger, s state.State, pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint { + logger = klog.LoggerWithValues(logger, "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", container.Name) // Get a count of how many guaranteed CPUs have been requested. requested := p.guaranteedCPUs(logger, pod, container) @@ -602,7 +947,91 @@ func (p *staticPolicy) GetTopologyHints(logger logr.Logger, s state.State, pod * } } -func (p *staticPolicy) GetPodTopologyHints(logger logr.Logger, s state.State, pod *v1.Pod) map[string][]topologymanager.TopologyHint { +func (p *staticPolicy) GetPodTopologyHints(logger logr.Logger, s state.State, pod *v1.Pod, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint { + logger = klog.LoggerWithValues(logger, "pod", klog.KObj(pod), "podUID", pod.UID, "operation", operation) + switch operation { + case lifecycle.AddOperation: + return p.getPodTopologyHintsForAdd(logger, s, pod) + case lifecycle.ResizeOperation: + if !utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScalingExclusiveCPUs) || !utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { + logger.V(3).Info("CPU Manager hint generation skipped, resize operation not supported by the static CPU manager policy, InPlacePodVerticalScaling and/or InPlacePodVerticalScalingExclusiveCPUs are not enabled", "pod", klog.KObj(pod), "podUID", pod.UID) + return nil + } + return p.getPodTopologyHintsForResize(logger, s, pod) + default: + logger.V(3).Info("CPU Manager hint generation skipped, operation not supported by the static CPU manager policy", "pod", klog.KObj(pod), "podUID", pod.UID) + return nil + } +} + +func (p *staticPolicy) getPodTopologyHintsForAdd(logger logr.Logger, s state.State, pod *v1.Pod) map[string][]topologymanager.TopologyHint { + logger = klog.LoggerWithValues(logger, "pod", klog.KObj(pod), "podUID", pod.UID) + + // Get a count of how many guaranteed CPUs have been requested by Pod. + requested := p.podGuaranteedCPUs(logger, pod) + + // Number of required CPUs is not an integer or a pod is not part of the Guaranteed QoS class. + // It will be treated by the TopologyManager as having no preference and cause it to ignore this + // resource when considering pod alignment. + // In terms of hints, this is equal to: TopologyHints[NUMANodeAffinity: nil, Preferred: true]. + if requested == 0 { + return nil + } + + if utilfeature.DefaultFeatureGate.Enabled(features.PodLevelResources) && resourcehelper.IsPodLevelResourcesSet(pod) { + logger.V(3).Info("CPU Manager pod hint generation skipped, pod is using pod-level resources which are not supported by the static CPU manager policy") + return nil + } + + assignedCPUs := cpuset.New() + for _, container := range append(pod.Spec.InitContainers, pod.Spec.Containers...) { + logger_ := klog.LoggerWithValues(logger, "containerName", container.Name) + + requestedByContainer := p.guaranteedCPUs(logger, pod, &container) + // Short circuit to regenerate the same hints if there are already + // guaranteed CPUs allocated to the Container. This might happen after a + // kubelet restart, for example. + if allocated, exists := s.GetCPUSet(string(pod.UID), container.Name); exists { + if allocated.Size() != requestedByContainer { + logger_.Info("CPUs already allocated to container with different number than request", "allocatedSize", requested, "requestedByContainer", requestedByContainer, "allocatedSize", allocated.Size()) + // An empty list of hints will be treated as a preference that cannot be satisfied. + // In definition of hints this is equal to: TopologyHint[NUMANodeAffinity: nil, Preferred: false]. + // For all but the best-effort policy, the Topology Manager will throw a pod-admission error. + return map[string][]topologymanager.TopologyHint{ + string(v1.ResourceCPU): {}, + } + } + // A set of CPUs already assigned to containers in this pod + assignedCPUs = assignedCPUs.Union(allocated) + } + } + if assignedCPUs.Size() == requested { + logger.Info("Regenerating TopologyHints for CPUs already allocated") + return map[string][]topologymanager.TopologyHint{ + string(v1.ResourceCPU): p.generateCPUTopologyHints(assignedCPUs, cpuset.New(), requested), + } + } + + // Get a list of available CPUs. + available := p.GetAvailableCPUs(s) + + // Get a list of reusable CPUs (e.g. CPUs reused from initContainers). + // It should be an empty CPUSet for a newly created pod. + reusable := p.cpusToReuse[string(pod.UID)] + + // Ensure any CPUs already assigned to containers in this pod are included as part of the hint generation. + reusable = reusable.Union(assignedCPUs) + + // Generate hints. + cpuHints := p.generateCPUTopologyHints(available, reusable, requested) + logger.Info("TopologyHints generated", "cpuHints", cpuHints) + + return map[string][]topologymanager.TopologyHint{ + string(v1.ResourceCPU): cpuHints, + } +} + +func (p *staticPolicy) getPodTopologyHintsForResize(logger logr.Logger, s state.State, pod *v1.Pod) map[string][]topologymanager.TopologyHint { logger = klog.LoggerWithValues(logger, "pod", klog.KObj(pod), "podUID", pod.UID) // Get a count of how many guaranteed CPUs have been requested by Pod. @@ -667,6 +1096,7 @@ func (p *staticPolicy) GetPodTopologyHints(logger logr.Logger, s state.State, po return map[string][]topologymanager.TopologyHint{ string(v1.ResourceCPU): cpuHints, } + } // generateCPUTopologyHints generates a set of TopologyHints given the set of @@ -806,8 +1236,8 @@ func (p *staticPolicy) updateMetricsOnRelease(logger logr.Logger, s state.State, func getTotalAssignedExclusiveCPUs(s state.State) cpuset.CPUSet { totalAssignedCPUs := cpuset.New() for _, assignment := range s.GetCPUAssignments() { - for _, cset := range assignment { - totalAssignedCPUs = totalAssignedCPUs.Union(cset) + for _, assignment := range assignment { + totalAssignedCPUs = totalAssignedCPUs.Union(getCPUSetFromAssignment(assignment)) } } return totalAssignedCPUs @@ -832,3 +1262,251 @@ func updateAllocationPerNUMAMetric(logger logr.Logger, topo *topology.CPUTopolog metrics.CPUManagerAllocationPerNUMA.WithLabelValues(strconv.Itoa(numaNode)).Set(float64(count)) } } + +func (p *staticPolicy) isFeasibleResize(logger logr.Logger, s state.State, pod *v1.Pod, container *v1.Container) error { + + if v1qos.GetPodQOS(pod) != v1.PodQOSGuaranteed { + return nil + } + cpuQuantity := container.Resources.Requests[v1.ResourceCPU] + cs, ok := podutil.GetContainerStatus(pod.Status.ContainerStatuses, container.Name) + if !ok { + return nil + } + // Policy static specific resize feasibility checks, to decide if it is capable of performing the resize + allocatedCPUQuantity := cs.AllocatedResources[v1.ResourceCPU] + if allocatedCPUQuantity.Value() > 0 { + if allocatedCPUQuantity.Value()*1000 == allocatedCPUQuantity.MilliValue() { + // container belongs in exclusive pool + if cpuQuantity.Value()*1000 != cpuQuantity.MilliValue() { + // container move to shared pool not allowed + return inconsistentCPUAllocationError{ + RequestedCPUs: cpuQuantity.String(), + AllocatedCPUs: allocatedCPUQuantity.String(), + Shared2Exclusive: false, + } + } + // Todo this is a good place to add a check with cpu manage + // state reading original / resized and check if allocated is + // up to date, this will be useful for troubleshooting and + // fine tune errors + mustKeepCPUsPromised, ok := s.GetOriginalCPUSet(string(pod.UID), container.Name) + if !ok { + return getOriginalCPUSetError{ + PodUID: string(pod.UID), + ContainerName: container.Name, + } + } + numCPUs := p.guaranteedCPUs(logger, pod, container) + promisedCPUsQuantity := mustKeepCPUsPromised.Size() + if promisedCPUsQuantity <= numCPUs { + return nil + } + return prohibitedCPUAllocationError{ + RequestedCPUs: cpuQuantity.String(), + AllocatedCPUs: allocatedCPUQuantity.String(), + OriginalCPUs: promisedCPUsQuantity, + GuaranteedCPUs: numCPUs, + } + } else if cpuQuantity.Value()*1000 == cpuQuantity.MilliValue() { + // container belongs in shared pool + // container move to exclusive pool not allowed + return inconsistentCPUAllocationError{ + RequestedCPUs: cpuQuantity.String(), + AllocatedCPUs: allocatedCPUQuantity.String(), + Shared2Exclusive: true, + } + } + } else if cpuQuantity.Value()*1000 == cpuQuantity.MilliValue() { + // container belongs in shared pool + // container move to exclusive pool not allowed + return inconsistentCPUAllocationError{ + RequestedCPUs: cpuQuantity.String(), + AllocatedCPUs: allocatedCPUQuantity.String(), + Shared2Exclusive: true, + } + } + return nil +} + +// generateCPUTopologyHintsForResize generates a set of TopologyHints given the set of +// available CPUs and the number of CPUs being requested. +// +// It follows the convention of marking all hints that have the same number of +// bits set as the narrowest matching NUMANodeAffinity with 'Preferred: true', and +// marking all others with 'Preferred: false'. +func (p *staticPolicy) generateCPUTopologyHintsForResize(availableCPUs cpuset.CPUSet, reusableCPUs cpuset.CPUSet, request int) []topologymanager.TopologyHint { + // Initialize minAffinitySize to include all NUMA Nodes. + minAffinitySize := p.topology.CPUDetails.NUMANodes().Size() + + // Iterate through all combinations of numa nodes bitmask and build hints from them. + hints := []topologymanager.TopologyHint{} + bitmask.IterateBitMasks(p.topology.CPUDetails.NUMANodes().List(), func(mask bitmask.BitMask) { + // First, update minAffinitySize for the current request size. + cpusInMask := p.topology.CPUDetails.CPUsInNUMANodes(mask.GetBits()...).Size() + if cpusInMask >= request && mask.Count() < minAffinitySize { + minAffinitySize = mask.Count() + } + + // Then check to see if we have enough CPUs available on the current + // numa node bitmask to satisfy the CPU request. + numMatching := 0 + for _, c := range reusableCPUs.List() { + // Disregard this mask if its NUMANode isn't part of it. + if !mask.IsSet(p.topology.CPUDetails[c].NUMANodeID) { + return + } + numMatching++ + } + + // Finally, check to see if enough available CPUs remain on the current + // NUMA node combination to satisfy the CPU request. + for _, c := range availableCPUs.List() { + if mask.IsSet(p.topology.CPUDetails[c].NUMANodeID) { + numMatching++ + } + } + + // If they don't, then move onto the next combination. + if numMatching < request { + return + } + + // Otherwise, create a new hint from the numa node bitmask and add it to the + // list of hints. We set all hint preferences to 'false' on the first + // pass through. + hints = append(hints, topologymanager.TopologyHint{ + NUMANodeAffinity: mask, + Preferred: false, + }) + }) + + // Loop back through all hints and update the 'Preferred' field based on + // counting the number of bits sets in the affinity mask and comparing it + // to the minAffinitySize. Only those with an equal number of bits set (and + // with a minimal set of numa nodes) will be considered preferred. + for i := range hints { + if p.options.AlignBySocket && p.isHintSocketAligned(hints[i], minAffinitySize) { + hints[i].Preferred = true + continue + } + if hints[i].NUMANodeAffinity.Count() == minAffinitySize { + hints[i].Preferred = true + } + } + + return hints +} + +func (p *staticPolicy) takeByTopologyForResize(logger logr.Logger, availableCPUs cpuset.CPUSet, numCPUs int, reusableCPUsForResize *cpuset.CPUSet, mustKeepCPUsForResize *cpuset.CPUSet) (cpuset.CPUSet, error) { + + // Protect against CPU leaks by failing early + if mustKeepCPUsForResize != nil { + if !mustKeepCPUsForResize.IsSubsetOf(availableCPUs) { + return cpuset.New(), fmt.Errorf("requested CPUs to be retained %s are not a subset of available CPUs %s", mustKeepCPUsForResize.String(), availableCPUs.String()) + } + } + if reusableCPUsForResize != nil { + if !reusableCPUsForResize.IsSubsetOf(availableCPUs) { + return cpuset.New(), fmt.Errorf("reusable CPUs %s are not a subset of available CPUs %s", reusableCPUsForResize.String(), availableCPUs.String()) + } + } + if reusableCPUsForResize != nil && mustKeepCPUsForResize != nil { + if !mustKeepCPUsForResize.IsSubsetOf(reusableCPUsForResize.Clone()) { + return cpuset.New(), fmt.Errorf("requested CPUs to be retained %s are not a subset of reusable CPUs %s", mustKeepCPUsForResize.String(), reusableCPUsForResize.String()) + } + } + + cpuSortingStrategy := CPUSortingStrategyPacked + if p.options.DistributeCPUsAcrossCores { + cpuSortingStrategy = CPUSortingStrategySpread + } + + if p.options.DistributeCPUsAcrossNUMA { + cpuGroupSize := 1 + if p.options.FullPhysicalCPUsOnly { + cpuGroupSize = p.cpuGroupSize + } + return takeByTopologyNUMADistributedForResize(logger, p.topology, availableCPUs, numCPUs, cpuGroupSize, cpuSortingStrategy, reusableCPUsForResize, mustKeepCPUsForResize) + } + + return takeByTopologyNUMAPackedForResize(logger, p.topology, availableCPUs, numCPUs, cpuSortingStrategy, p.options.PreferAlignByUncoreCacheOption, reusableCPUsForResize, mustKeepCPUsForResize) +} + +func (p *staticPolicy) getTopologyHintsForResize(logger logr.Logger, s state.State, pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint { + // Get a count of how many guaranteed CPUs have been requested. + requested := p.guaranteedCPUs(logger, pod, container) + + // Number of required CPUs is not an integer or a container is not part of the Guaranteed QoS class. + // It will be treated by the TopologyManager as having no preference and cause it to ignore this + // resource when considering pod alignment. + // In terms of hints, this is equal to: TopologyHints[NUMANodeAffinity: nil, Preferred: true]. + if requested == 0 { + return nil + } + + if utilfeature.DefaultFeatureGate.Enabled(features.PodLevelResources) && resourcehelper.IsPodLevelResourcesSet(pod) { + logger.V(3).Info("CPU Manager hint generation skipped, pod is using pod-level resources which are not supported by the static CPU manager policy", "pod", klog.KObj(pod), "podUID", pod.UID) + return nil + } + + reusable := cpuset.New() + + // Short circuit to regenerate the same hints if there are already + // guaranteed CPUs allocated to the Container. This might happen after a + // kubelet restart, for example. + if allocated, exists := s.GetCPUSet(string(pod.UID), container.Name); exists { + if allocated.Size() != requested { + if allocated.Size() < requested { + reusable = reusable.Union(allocated) + } else { + reusable = allocated + + // Get a list of reusable CPUs (e.g. CPUs reused from initContainers). + // It should be an empty CPUSet for a newly created pod. + reusable = reusable.Union(p.cpusToReuse[string(pod.UID)]) + + // Generate hints. + cpuHints := p.generateCPUTopologyHintsForResize(cpuset.New(), reusable, requested) + logger.Info("TopologyHints generated", "pod", klog.KObj(pod), "containerName", container.Name, "cpuHints", cpuHints) + + return map[string][]topologymanager.TopologyHint{ + string(v1.ResourceCPU): cpuHints, + } + } + } else { + logger.Info("Regenerating TopologyHints for CPUs already allocated", "pod", klog.KObj(pod), "containerName", container.Name) + return map[string][]topologymanager.TopologyHint{ + string(v1.ResourceCPU): p.generateCPUTopologyHintsForResize(allocated, cpuset.New(), requested), + } + } + } + + // Get a list of available CPUs. + available := p.GetAvailableCPUs(s) + + // Get a list of reusable CPUs (e.g. CPUs reused from initContainers). + // It should be an empty CPUSet for a newly created pod. + reusable = reusable.Union(p.cpusToReuse[string(pod.UID)]) + + // Generate hints. + cpuHints := p.generateCPUTopologyHintsForResize(available, reusable, requested) + logger.Info("TopologyHints generated", "cpuHints", cpuHints) + + return map[string][]topologymanager.TopologyHint{ + string(v1.ResourceCPU): cpuHints, + } + +} + +func getCPUSetFromAssignment(assignment state.ContainerCPUAssignment) cpuset.CPUSet { + if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScalingExclusiveCPUs) && utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { + if assignment.Resized.IsEmpty() { + return assignment.Original + } else { + return assignment.Resized + } + } else { + return assignment.Original + } +} diff --git a/pkg/kubelet/cm/cpumanager/policy_static_test.go b/pkg/kubelet/cm/cpumanager/policy_static_test.go index df557fe63a744..b5402ed314426 100644 --- a/pkg/kubelet/cm/cpumanager/policy_static_test.go +++ b/pkg/kubelet/cm/cpumanager/policy_static_test.go @@ -22,15 +22,16 @@ import ( "testing" v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/types" utilfeature "k8s.io/apiserver/pkg/util/feature" featuregatetesting "k8s.io/component-base/featuregate/testing" - "k8s.io/klog/v2" pkgfeatures "k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state" "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask" + "k8s.io/kubernetes/pkg/kubelet/lifecycle" "k8s.io/kubernetes/test/utils/ktesting" "k8s.io/utils/cpuset" ) @@ -46,6 +47,10 @@ type staticPolicyTest struct { stAssignments state.ContainerCPUAssignments stDefaultCPUSet cpuset.CPUSet pod *v1.Pod + qosClass v1.PodQOSClass + podAllocated string + resizeLimit string + resizeRequest string topologyHint *topologymanager.TopologyHint expErr error expCPUAlloc bool @@ -91,8 +96,8 @@ func TestStaticPolicyStart(t *testing.T) { description: "non-corrupted state", topo: topoDualSocketHT, stAssignments: state.ContainerCPUAssignments{ - "fakePod": map[string]cpuset.CPUSet{ - "0": cpuset.New(0), + "fakePod": map[string]state.ContainerCPUAssignment{ + "0": {Original: cpuset.New(0), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11), @@ -127,8 +132,8 @@ func TestStaticPolicyStart(t *testing.T) { description: "assigned core 2 is still present in available cpuset", topo: topoDualSocketHT, stAssignments: state.ContainerCPUAssignments{ - "fakePod": map[string]cpuset.CPUSet{ - "0": cpuset.New(0, 1, 2), + "fakePod": map[string]state.ContainerCPUAssignment{ + "0": {Original: cpuset.New(0, 1, 2), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(2, 3, 4, 5, 6, 7, 8, 9, 10, 11), @@ -139,8 +144,8 @@ func TestStaticPolicyStart(t *testing.T) { topo: topoDualSocketHT, options: map[string]string{StrictCPUReservationOption: "true"}, stAssignments: state.ContainerCPUAssignments{ - "fakePod": map[string]cpuset.CPUSet{ - "0": cpuset.New(0, 1, 2), + "fakePod": map[string]state.ContainerCPUAssignment{ + "0": {Original: cpuset.New(0, 1, 2), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(2, 3, 4, 5, 6, 7, 8, 9, 10, 11), @@ -150,9 +155,9 @@ func TestStaticPolicyStart(t *testing.T) { description: "core 12 is not present in topology but is in state cpuset", topo: topoDualSocketHT, stAssignments: state.ContainerCPUAssignments{ - "fakePod": map[string]cpuset.CPUSet{ - "0": cpuset.New(0, 1, 2), - "1": cpuset.New(3, 4), + "fakePod": map[string]state.ContainerCPUAssignment{ + "0": {Original: cpuset.New(0, 1, 2), Resized: cpuset.New()}, + "1": {Original: cpuset.New(3, 4), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(5, 6, 7, 8, 9, 10, 11, 12), @@ -162,9 +167,9 @@ func TestStaticPolicyStart(t *testing.T) { description: "core 11 is present in topology but is not in state cpuset", topo: topoDualSocketHT, stAssignments: state.ContainerCPUAssignments{ - "fakePod": map[string]cpuset.CPUSet{ - "0": cpuset.New(0, 1, 2), - "1": cpuset.New(3, 4), + "fakePod": map[string]state.ContainerCPUAssignment{ + "0": {Original: cpuset.New(0, 1, 2), Resized: cpuset.New()}, + "1": {Original: cpuset.New(3, 4), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(5, 6, 7, 8, 9, 10), @@ -235,8 +240,8 @@ func TestStaticPolicyAdd(t *testing.T) { topo: topoSingleSocketHT, numReservedCPUs: 1, stAssignments: state.ContainerCPUAssignments{ - "fakePod": map[string]cpuset.CPUSet{ - "fakeContainer100": cpuset.New(2, 3, 6, 7), + "fakePod": map[string]state.ContainerCPUAssignment{ + "fakeContainer100": {Original: cpuset.New(2, 3, 6, 7), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(0, 1, 4, 5), @@ -250,8 +255,8 @@ func TestStaticPolicyAdd(t *testing.T) { topo: topoDualSocketHT, numReservedCPUs: 1, stAssignments: state.ContainerCPUAssignments{ - "fakePod": map[string]cpuset.CPUSet{ - "fakeContainer100": cpuset.New(2), + "fakePod": map[string]state.ContainerCPUAssignment{ + "fakeContainer100": {Original: cpuset.New(2), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11), @@ -265,8 +270,8 @@ func TestStaticPolicyAdd(t *testing.T) { topo: topoDualSocketHT, numReservedCPUs: 1, stAssignments: state.ContainerCPUAssignments{ - "fakePod": map[string]cpuset.CPUSet{ - "fakeContainer100": cpuset.New(1, 5), + "fakePod": map[string]state.ContainerCPUAssignment{ + "fakeContainer100": {Original: cpuset.New(1, 5), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(0, 2, 3, 4, 6, 7, 8, 9, 10, 11), @@ -280,8 +285,8 @@ func TestStaticPolicyAdd(t *testing.T) { topo: topoDualSocketNoHT, numReservedCPUs: 1, stAssignments: state.ContainerCPUAssignments{ - "fakePod": map[string]cpuset.CPUSet{ - "fakeContainer100": cpuset.New(), + "fakePod": map[string]state.ContainerCPUAssignment{ + "fakeContainer100": {Original: cpuset.New(), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(0, 1, 3, 4, 5, 6, 7), @@ -295,8 +300,8 @@ func TestStaticPolicyAdd(t *testing.T) { topo: topoDualSocketNoHT, numReservedCPUs: 1, stAssignments: state.ContainerCPUAssignments{ - "fakePod": map[string]cpuset.CPUSet{ - "fakeContainer100": cpuset.New(4, 5), + "fakePod": map[string]state.ContainerCPUAssignment{ + "fakeContainer100": {Original: cpuset.New(4, 5), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(0, 1, 3, 6, 7), @@ -310,8 +315,8 @@ func TestStaticPolicyAdd(t *testing.T) { topo: topoDualSocketHT, numReservedCPUs: 1, stAssignments: state.ContainerCPUAssignments{ - "fakePod": map[string]cpuset.CPUSet{ - "fakeContainer100": cpuset.New(2), + "fakePod": map[string]state.ContainerCPUAssignment{ + "fakeContainer100": {Original: cpuset.New(2), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11), @@ -349,8 +354,8 @@ func TestStaticPolicyAdd(t *testing.T) { description: "GuPodMultipleCores, topoQuadSocketFourWayHT, ExpectAllocSock0", topo: topoQuadSocketFourWayHT, stAssignments: state.ContainerCPUAssignments{ - "fakePod": map[string]cpuset.CPUSet{ - "fakeContainer100": cpuset.New(3, 11, 4, 5, 6, 7), + "fakePod": map[string]state.ContainerCPUAssignment{ + "fakeContainer100": {Original: cpuset.New(3, 11, 4, 5, 6, 7), Resized: cpuset.New()}, }, }, stDefaultCPUSet: largeTopoCPUSet.Difference(cpuset.New(3, 11, 4, 5, 6, 7)), @@ -365,9 +370,9 @@ func TestStaticPolicyAdd(t *testing.T) { description: "GuPodMultipleCores, topoQuadSocketFourWayHT, ExpectAllocAllFullCoresFromThreeSockets", topo: topoQuadSocketFourWayHT, stAssignments: state.ContainerCPUAssignments{ - "fakePod": map[string]cpuset.CPUSet{ - "fakeContainer100": largeTopoCPUSet.Difference(cpuset.New(1, 25, 13, 38, 2, 9, 11, 35, 23, 48, 12, 51, - 53, 173, 113, 233, 54, 61)), + "fakePod": map[string]state.ContainerCPUAssignment{ + "fakeContainer100": {Original: largeTopoCPUSet.Difference(cpuset.New(1, 25, 13, 38, 2, 9, 11, 35, 23, 48, 12, 51, + 53, 173, 113, 233, 54, 61)), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(1, 25, 13, 38, 2, 9, 11, 35, 23, 48, 12, 51, 53, 173, 113, 233, 54, 61), @@ -382,9 +387,9 @@ func TestStaticPolicyAdd(t *testing.T) { description: "GuPodMultipleCores, topoQuadSocketFourWayHT, ExpectAllocAllSock1+FullCore", topo: topoQuadSocketFourWayHT, stAssignments: state.ContainerCPUAssignments{ - "fakePod": map[string]cpuset.CPUSet{ - "fakeContainer100": largeTopoCPUSet.Difference(largeTopoSock1CPUSet.Union(cpuset.New(10, 34, 22, 47, 53, - 173, 61, 181, 108, 228, 115, 235))), + "fakePod": map[string]state.ContainerCPUAssignment{ + "fakeContainer100": {Original: largeTopoCPUSet.Difference(largeTopoSock1CPUSet.Union(cpuset.New(10, 34, 22, 47, 53, + 173, 61, 181, 108, 228, 115, 235))), Resized: cpuset.New()}, }, }, stDefaultCPUSet: largeTopoSock1CPUSet.Union(cpuset.New(10, 34, 22, 47, 53, 173, 61, 181, 108, 228, @@ -415,8 +420,8 @@ func TestStaticPolicyAdd(t *testing.T) { description: "GuPodMultipleCores, topoQuadSocketFourWayHT, ExpectAllocCPUs", topo: topoQuadSocketFourWayHT, stAssignments: state.ContainerCPUAssignments{ - "fakePod": map[string]cpuset.CPUSet{ - "fakeContainer100": largeTopoCPUSet.Difference(cpuset.New(10, 11, 53, 37, 55, 67, 52)), + "fakePod": map[string]state.ContainerCPUAssignment{ + "fakeContainer100": {Original: largeTopoCPUSet.Difference(cpuset.New(10, 11, 53, 37, 55, 67, 52)), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(10, 11, 53, 67, 52), @@ -441,23 +446,23 @@ func TestStaticPolicyAdd(t *testing.T) { topo: topoSingleSocketHT, numReservedCPUs: 1, stAssignments: state.ContainerCPUAssignments{ - "fakePod": map[string]cpuset.CPUSet{ - "fakeContainer3": cpuset.New(2, 3, 6, 7), + "fakePod": map[string]state.ContainerCPUAssignment{ + "fakeContainer3": {Original: cpuset.New(1, 2, 5, 6), Resized: cpuset.New()}, }, }, - stDefaultCPUSet: cpuset.New(0, 1, 4, 5), + stDefaultCPUSet: cpuset.New(0, 3, 4, 7), pod: makePod("fakePod", "fakeContainer3", "4000m", "4000m"), expErr: nil, expCPUAlloc: true, - expCSet: cpuset.New(2, 3, 6, 7), + expCSet: cpuset.New(1, 2, 5, 6), }, { description: "GuPodMultipleCores, DualSocketHT, NoAllocExpectError", topo: topoDualSocketHT, numReservedCPUs: 1, stAssignments: state.ContainerCPUAssignments{ - "fakePod": map[string]cpuset.CPUSet{ - "fakeContainer100": cpuset.New(1, 2, 3), + "fakePod": map[string]state.ContainerCPUAssignment{ + "fakeContainer100": {Original: cpuset.New(1, 2, 3), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(0, 4, 5, 6, 7, 8, 9, 10, 11), @@ -471,8 +476,8 @@ func TestStaticPolicyAdd(t *testing.T) { topo: topoSingleSocketHT, numReservedCPUs: 1, stAssignments: state.ContainerCPUAssignments{ - "fakePod": map[string]cpuset.CPUSet{ - "fakeContainer100": cpuset.New(1, 2, 3, 4, 5, 6), + "fakePod": map[string]state.ContainerCPUAssignment{ + "fakeContainer100": {Original: cpuset.New(1, 2, 3, 4, 5, 6), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(0, 7), @@ -488,8 +493,8 @@ func TestStaticPolicyAdd(t *testing.T) { description: "GuPodMultipleCores, topoQuadSocketFourWayHT, NoAlloc", topo: topoQuadSocketFourWayHT, stAssignments: state.ContainerCPUAssignments{ - "fakePod": map[string]cpuset.CPUSet{ - "fakeContainer100": largeTopoCPUSet.Difference(cpuset.New(10, 11, 53, 37, 55, 67, 52)), + "fakePod": map[string]state.ContainerCPUAssignment{ + "fakeContainer100": {Original: largeTopoCPUSet.Difference(cpuset.New(10, 11, 53, 37, 55, 67, 52)), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(10, 11, 53, 37, 55, 67, 52), @@ -576,6 +581,115 @@ func TestStaticPolicyAdd(t *testing.T) { expCSet: cpuset.New(1, 2, 3, 4, 5, 7, 8, 9, 10, 11), }, } + + // testcases for podResize + podResizeTestCases := []staticPolicyTest{ + { + description: "podResize GuPodMultipleCores, SingleSocketHT, ExpectSameAllocation", + topo: topoSingleSocketHT, + numReservedCPUs: 1, + stAssignments: state.ContainerCPUAssignments{ + "fakePod": map[string]state.ContainerCPUAssignment{ + "fakeContainer3": {Original: cpuset.New(1, 2, 5, 6), Resized: cpuset.New()}, + }, + }, + stDefaultCPUSet: cpuset.New(0, 3, 4, 7), + pod: makePod("fakePod", "fakeContainer3", "4000m", "4000m"), + expErr: nil, + expCPUAlloc: true, + expCSet: cpuset.New(1, 2, 5, 6), + }, + { + description: "podResize GuPodSingleCore, SingleSocketHT, ExpectAllocOneCPU", + topo: topoSingleSocketHT, + options: map[string]string{ + FullPCPUsOnlyOption: "true", + }, + numReservedCPUs: 1, + stAssignments: state.ContainerCPUAssignments{ + "fakePod": map[string]state.ContainerCPUAssignment{ + "fakeContainer3": {Original: cpuset.New(1, 5), Resized: cpuset.New()}, + }, + }, + stDefaultCPUSet: cpuset.New(0, 2, 3, 4, 6, 7), + pod: makePod("fakePod", "fakeContainer3", "4000m", "4000m"), + expErr: nil, + expCPUAlloc: true, + expCSet: cpuset.New(1, 5), + }, + { + description: "podResize GuPodSingleCore, SingleSocketHT, ExpectAllocOneCPU", + topo: topoSingleSocketHT, + options: map[string]string{ + FullPCPUsOnlyOption: "true", + }, + numReservedCPUs: 1, + stAssignments: state.ContainerCPUAssignments{ + "fakePod": map[string]state.ContainerCPUAssignment{ + "fakeContainer3": {Original: cpuset.New(1, 5), Resized: cpuset.New()}, + }, + }, + stDefaultCPUSet: cpuset.New(0, 2, 3, 4, 6, 7), + pod: makePod("fakePod", "fakeContainer3", "2000m", "2000m"), + expErr: nil, + expCPUAlloc: true, + expCSet: cpuset.New(1, 5), + }, + { + description: "podResize", + topo: topoSingleSocketHT, + options: map[string]string{ + FullPCPUsOnlyOption: "true", + }, + numReservedCPUs: 1, + stAssignments: state.ContainerCPUAssignments{ + "fakePod": map[string]state.ContainerCPUAssignment{ + "fakeContainer3": {Original: cpuset.New(1, 5), Resized: cpuset.New()}, + }, + }, + stDefaultCPUSet: cpuset.New(0, 2, 3, 4, 6, 7), + pod: makePod("fakePod", "fakeContainer3", "100m", "100m"), + //expErr: inconsistentCPUAllocationError{RequestedCPUs: "0", AllocatedCPUs: "2"}, + expErr: nil, + expCPUAlloc: true, + expCSet: cpuset.New(1, 5), + }, + { + description: "podResize", + topo: topoSingleSocketHT, + options: map[string]string{ + FullPCPUsOnlyOption: "false", + }, + numReservedCPUs: 1, + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + pod: makePod("fakePod", "fakeContainer3", "1000m", "1000m"), + //expErr: inconsistentCPUAllocationError{RequestedCPUs: "0", AllocatedCPUs: "2"}, + expErr: nil, + expCPUAlloc: true, + expCSet: cpuset.New(4), + }, + { + description: "podResize", + topo: topoSingleSocketHT, + options: map[string]string{ + FullPCPUsOnlyOption: "true", + }, + numReservedCPUs: 1, + stAssignments: state.ContainerCPUAssignments{ + "fakePod": map[string]state.ContainerCPUAssignment{ + "fakeContainer3": {Original: cpuset.New(1, 5), Resized: cpuset.New()}, + }, + }, + stDefaultCPUSet: cpuset.New(0, 2, 3, 4, 6, 7), + pod: makePod("fakePod", "fakeContainer3", "100m", "100m"), + //expErr: inconsistentCPUAllocationError{RequestedCPUs: "0", AllocatedCPUs: "2"}, + expErr: nil, + expCPUAlloc: true, + expCSet: cpuset.New(1, 5), + }, + } + newNUMAAffinity := func(bits ...int) bitmask.BitMask { affinity, _ := bitmask.NewBitMask(bits...) return affinity @@ -636,6 +750,9 @@ func TestStaticPolicyAdd(t *testing.T) { for _, testCase := range alignBySocketOptionTestCases { runStaticPolicyTestCaseWithFeatureGate(t, testCase) } + for _, testCase := range podResizeTestCases { + runStaticPolicyTestCaseWithFeatureGateAlongsideInPlacePodVerticalScaling(t, testCase) + } } func runStaticPolicyTestCase(t *testing.T, testCase staticPolicyTest) { @@ -659,27 +776,27 @@ func runStaticPolicyTestCase(t *testing.T, testCase staticPolicyTest) { } container := &testCase.pod.Spec.Containers[0] - err = policy.Allocate(logger, st, testCase.pod, container) + err = policy.Allocate(logger, st, testCase.pod, container, lifecycle.AddOperation) if !reflect.DeepEqual(err, testCase.expErr) { t.Errorf("StaticPolicy Allocate() error (%v). expected add error: %q but got: %q", testCase.description, testCase.expErr, err) } if testCase.expCPUAlloc { - cset, found := st.assignments[string(testCase.pod.UID)][container.Name] + assignment, found := st.assignments[string(testCase.pod.UID)][container.Name] if !found { t.Errorf("StaticPolicy Allocate() error (%v). expected container %v to be present in assignments %v", testCase.description, container.Name, st.assignments) } - if !cset.Equals(testCase.expCSet) { + if !assignment.Original.Equals(testCase.expCSet) { t.Errorf("StaticPolicy Allocate() error (%v). expected cpuset %s but got %s", - testCase.description, testCase.expCSet, cset) + testCase.description, testCase.expCSet, assignment.Original) } - if !cset.Intersection(st.defaultCPUSet).IsEmpty() { + if !assignment.Original.Intersection(st.defaultCPUSet).IsEmpty() { t.Errorf("StaticPolicy Allocate() error (%v). expected cpuset %s to be disoint from the shared cpuset %s", - testCase.description, cset, st.defaultCPUSet) + testCase.description, assignment.Original, st.defaultCPUSet) } } @@ -697,6 +814,13 @@ func runStaticPolicyTestCaseWithFeatureGate(t *testing.T, testCase staticPolicyT runStaticPolicyTestCase(t, testCase) } +func runStaticPolicyTestCaseWithFeatureGateAlongsideInPlacePodVerticalScaling(t *testing.T, testCase staticPolicyTest) { + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.CPUManagerPolicyAlphaOptions, true) + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.InPlacePodVerticalScaling, true) + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.InPlacePodVerticalScalingExclusiveCPUs, false) + runStaticPolicyTestCase(t, testCase) +} + func TestStaticPolicyReuseCPUs(t *testing.T) { testCases := []struct { staticPolicyTest @@ -736,7 +860,7 @@ func TestStaticPolicyReuseCPUs(t *testing.T) { // allocate for _, container := range append(pod.Spec.InitContainers, pod.Spec.Containers...) { - _ = policy.Allocate(logger, st, pod, &container) + _ = policy.Allocate(logger, st, pod, &container, lifecycle.AddOperation) } if !st.defaultCPUSet.Equals(testCase.expCSetAfterAlloc) { t.Errorf("StaticPolicy Allocate() error (%v). expected default cpuset %s but got %s", @@ -757,6 +881,300 @@ func TestStaticPolicyReuseCPUs(t *testing.T) { } } +func TestStaticPolicyPodResizeCPUsSingleContainerPod(t *testing.T) { + testCases := []struct { + staticPolicyTest + expAllocErr error + expCSetAfterAlloc cpuset.CPUSet + expCSetAfterResize cpuset.CPUSet + expCSetAfterResizeSize int + expCSetAfterRemove cpuset.CPUSet + }{ + { + staticPolicyTest: staticPolicyTest{ + description: "SingleSocketHT, PodResize, Container in exclusively allocated pool, Increase allocated CPUs", + topo: topoSingleSocketHT, + pod: makeMultiContainerPodWithOptions( + nil, + []*containerOptions{ + {request: "2000m", limit: "2000m", restartPolicy: v1.ContainerRestartPolicy("Never")}}, // 0, 4 + ), + qosClass: v1.PodQOSGuaranteed, + podAllocated: "2000m", + resizeLimit: "4000m", + resizeRequest: "4000m", + containerName: "appContainer-0", + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + expCSetAfterAlloc: cpuset.New(1, 2, 3, 5, 6, 7), + expCSetAfterResize: cpuset.New(1, 2, 3, 5, 6, 7), + expCSetAfterRemove: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + { + staticPolicyTest: staticPolicyTest{ + description: "SingleSocketHT, PodResize, Container in exclusively allocated pool, Keep same allocated CPUs", + topo: topoSingleSocketHT, + pod: makeMultiContainerPodWithOptions( + nil, + []*containerOptions{ + {request: "2000m", limit: "2000m", restartPolicy: v1.ContainerRestartPolicy("Never")}}, // 0, 4 + ), + qosClass: v1.PodQOSGuaranteed, + podAllocated: "2000m", + resizeLimit: "2000m", + resizeRequest: "2000m", + containerName: "appContainer-0", + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + expAllocErr: inconsistentCPUAllocationError{RequestedCPUs: "2", AllocatedCPUs: "2", Shared2Exclusive: false}, + expCSetAfterAlloc: cpuset.New(1, 2, 3, 5, 6, 7), + expCSetAfterResize: cpuset.New(1, 2, 3, 5, 6, 7), + expCSetAfterRemove: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + { + staticPolicyTest: staticPolicyTest{ + description: "SingleSocketHT, PodResize, Container in exclusively allocated pool, Decrease allocated CPUs", + topo: topoSingleSocketHT, + pod: makeMultiContainerPodWithOptions( + nil, + []*containerOptions{ + {request: "4000m", limit: "4000m", restartPolicy: v1.ContainerRestartPolicy("Never")}}, // 0-1, 4-5 + ), + qosClass: v1.PodQOSGuaranteed, + podAllocated: "4000m", + resizeLimit: "2000m", + resizeRequest: "2000m", + containerName: "appContainer-0", + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + expAllocErr: prohibitedCPUAllocationError{RequestedCPUs: "2", AllocatedCPUs: "4", OriginalCPUs: 4, GuaranteedCPUs: 2}, + expCSetAfterAlloc: cpuset.New(2, 3, 6, 7), + expCSetAfterResizeSize: 4, + expCSetAfterRemove: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + { + staticPolicyTest: staticPolicyTest{ + description: "SingleSocketHT, PodResize, Container in shared pool with more than one core, Attempt to move to exclusively allocated pool", + topo: topoSingleSocketHT, + pod: makeMultiContainerPodWithOptions( + nil, + []*containerOptions{ + {request: "2100m", limit: "2100m", restartPolicy: v1.ContainerRestartPolicy("Never")}}, // 0-7 + ), + qosClass: v1.PodQOSGuaranteed, + podAllocated: "2100m", + resizeLimit: "2000m", + resizeRequest: "2000m", + containerName: "appContainer-0", + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + expAllocErr: inconsistentCPUAllocationError{RequestedCPUs: "2", AllocatedCPUs: "2100m", Shared2Exclusive: true}, + expCSetAfterAlloc: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + expCSetAfterResize: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + expCSetAfterRemove: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + { + staticPolicyTest: staticPolicyTest{ + description: "SingleSocketHT, PodResize, Container in shared pool, Increase CPU and keep in shared pool", + topo: topoSingleSocketHT, + pod: makeMultiContainerPodWithOptions( + nil, + []*containerOptions{ + {request: "100m", limit: "100m", restartPolicy: v1.ContainerRestartPolicy("Never")}}, // 0-7 + ), + qosClass: v1.PodQOSGuaranteed, + podAllocated: "100m", + resizeLimit: "200m", + resizeRequest: "200m", + containerName: "appContainer-0", + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + expCSetAfterAlloc: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + expCSetAfterResize: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + expCSetAfterRemove: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + { + staticPolicyTest: staticPolicyTest{ + description: "SingleSocketHT, PodResize, Container in shared pool, Increase CPU and keep in shared pool", + topo: topoSingleSocketHT, + pod: makeMultiContainerPodWithOptions( + nil, + []*containerOptions{ + {request: "1100m", limit: "1100m", restartPolicy: v1.ContainerRestartPolicy("Never")}}, // 0-7 + ), + qosClass: v1.PodQOSGuaranteed, + podAllocated: "1100m", + resizeLimit: "1200m", + resizeRequest: "1200m", + containerName: "appContainer-0", + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + expCSetAfterAlloc: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + expCSetAfterResize: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + expCSetAfterRemove: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + { + staticPolicyTest: staticPolicyTest{ + description: "SingleSocketHT, PodResize, Container in shared pool with less than one core, Decrease CPU and keep in shared pool", + topo: topoSingleSocketHT, + pod: makeMultiContainerPodWithOptions( + nil, + []*containerOptions{ + {request: "200m", limit: "200m", restartPolicy: v1.ContainerRestartPolicy("Never")}}, // 0-7 + ), + qosClass: v1.PodQOSGuaranteed, + podAllocated: "200m", + resizeLimit: "100m", + resizeRequest: "100m", + containerName: "appContainer-0", + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + expCSetAfterAlloc: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + expCSetAfterResize: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + expCSetAfterRemove: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + { + staticPolicyTest: staticPolicyTest{ + description: "SingleSocketHT, PodResize, Container in shared pool with more than one core, Decrease CPU and keep in shared pool", + topo: topoSingleSocketHT, + pod: makeMultiContainerPodWithOptions( + nil, + []*containerOptions{ + {request: "1200m", limit: "1200m", restartPolicy: v1.ContainerRestartPolicy("Never")}}, // 0-7 + ), + qosClass: v1.PodQOSGuaranteed, + podAllocated: "1200m", + resizeLimit: "1100m", + resizeRequest: "1100m", + containerName: "appContainer-0", + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + expCSetAfterAlloc: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + expCSetAfterResize: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + expCSetAfterRemove: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + { + staticPolicyTest: staticPolicyTest{ + description: "SingleSocketHT, PodResize, Container in exclusively allocated pool, Move to shared pool", + topo: topoSingleSocketHT, + pod: makeMultiContainerPodWithOptions( + nil, + []*containerOptions{ + {request: "2000m", limit: "2000m", restartPolicy: v1.ContainerRestartPolicy("Never")}}, // 0-1, 4-5 + ), + qosClass: v1.PodQOSGuaranteed, + podAllocated: "2000m", + resizeLimit: "1500m", + resizeRequest: "1500m", + containerName: "appContainer-0", + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + expAllocErr: inconsistentCPUAllocationError{RequestedCPUs: "1500m", AllocatedCPUs: "2", Shared2Exclusive: false}, + expCSetAfterAlloc: cpuset.New(1, 2, 3, 5, 6, 7), + expCSetAfterResize: cpuset.New(1, 2, 3, 5, 6, 7), + expCSetAfterRemove: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + } + + for _, testCase := range testCases { + logger, _ := ktesting.NewTestContext(t) + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.CPUManagerPolicyAlphaOptions, true) + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.InPlacePodVerticalScaling, true) + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.InPlacePodVerticalScalingExclusiveCPUs, false) + t.Run(testCase.description, func(t *testing.T) { + + policy, _ := NewStaticPolicy(logger, testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), nil) + + st := &mockState{ + assignments: testCase.stAssignments, + defaultCPUSet: testCase.stDefaultCPUSet, + } + pod := testCase.pod + pod.Status.QOSClass = testCase.qosClass + + // allocate + for _, container := range append(pod.Spec.InitContainers, pod.Spec.Containers...) { + err := policy.Allocate(logger, st, pod, &container, lifecycle.AddOperation) + if err != nil { + t.Errorf("StaticPolicy Allocate() error (%v). expected no error but got %v", + testCase.description, err) + } + } + if !reflect.DeepEqual(st.defaultCPUSet, testCase.expCSetAfterAlloc) { + t.Errorf("StaticPolicy Allocate() error (%v) before pod resize. expected default cpuset %v but got %v", + testCase.description, testCase.expCSetAfterAlloc, st.defaultCPUSet) + } + + // resize + pod.Status.ContainerStatuses = []v1.ContainerStatus{ + { + Name: testCase.containerName, + AllocatedResources: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse(testCase.podAllocated), + }, + }, + } + pod.Spec.Containers[0].Resources = v1.ResourceRequirements{ + Limits: v1.ResourceList{ + v1.ResourceName(v1.ResourceCPU): resource.MustParse(testCase.resizeLimit), + }, + Requests: v1.ResourceList{ + v1.ResourceName(v1.ResourceCPU): resource.MustParse(testCase.resizeRequest), + }, + } + podResized := pod + for _, container := range append(podResized.Spec.InitContainers, podResized.Spec.Containers...) { + err := policy.Allocate(logger, st, podResized, &container, lifecycle.ResizeOperation) + if err != nil { + if !reflect.DeepEqual(err, testCase.expAllocErr) { + t.Errorf("StaticPolicy Allocate() error (%v), expected error: %v but got: %v", + testCase.description, testCase.expAllocErr, err) + } + } + } + if testCase.expCSetAfterResizeSize > 0 { + // expCSetAfterResizeSize is used when testing scale down because allocated CPUs are not deterministic, + // since size of defaultCPUSet is deterministic and also interesection with expected allocation + // should not be nill. < ====== TODO esotsal + if !reflect.DeepEqual(st.defaultCPUSet.Size(), testCase.expCSetAfterResizeSize) { + t.Errorf("StaticPolicy Allocate() error (%v) after pod resize. expected default cpuset size equal to %v but got %v", + testCase.description, testCase.expCSetAfterResizeSize, st.defaultCPUSet.Size()) + } + } else { + if !reflect.DeepEqual(st.defaultCPUSet, testCase.expCSetAfterResize) { + t.Errorf("StaticPolicy Allocate() error (%v) after pod resize. expected default cpuset %v but got %v", + testCase.description, testCase.expCSetAfterResize, st.defaultCPUSet) + } + } + + // remove + err := policy.RemoveContainer(logger, st, string(pod.UID), testCase.containerName) + if err != nil { + t.Errorf("StaticPolicy RemoveContainer() error (%v) after pod resize. expected no error but got %v", + testCase.description, err) + } + + if !reflect.DeepEqual(st.defaultCPUSet, testCase.expCSetAfterRemove) { + t.Errorf("StaticPolicy RemoveContainer() error (%v) after pod resize. expected default cpuset %v but got %v", + testCase.description, testCase.expCSetAfterRemove, st.defaultCPUSet) + } + if _, found := st.assignments[string(pod.UID)][testCase.containerName]; found { + t.Errorf("StaticPolicy RemoveContainer() error (%v) after pod resize. expected (pod %v, container %v) not be in assignments %v", + testCase.description, testCase.podUID, testCase.containerName, st.assignments) + } + }) + } +} + func TestStaticPolicyDoNotReuseCPUs(t *testing.T) { testCases := []struct { staticPolicyTest @@ -793,7 +1211,7 @@ func TestStaticPolicyDoNotReuseCPUs(t *testing.T) { // allocate for _, container := range append(pod.Spec.InitContainers, pod.Spec.Containers...) { - err := policy.Allocate(logger, st, pod, &container) + err := policy.Allocate(logger, st, pod, &container, lifecycle.AddOperation) if err != nil { t.Errorf("StaticPolicy Allocate() error (%v). expected no error but got %v", testCase.description, err) @@ -806,6 +1224,301 @@ func TestStaticPolicyDoNotReuseCPUs(t *testing.T) { } } +func TestStaticPolicyPodResizeCPUsMultiContainerPod(t *testing.T) { + testCases := []struct { + staticPolicyTest + containerName2 string + expAllocErr error + expCSetAfterAlloc cpuset.CPUSet + expCSetAfterResize cpuset.CPUSet + expCSetAfterResizeSize int + expCSetAfterRemove cpuset.CPUSet + }{ + { + staticPolicyTest: staticPolicyTest{ + description: "SingleSocketHT, PodResize, Containers in exclusively allocated pool, Increase appContainer-0 allocated CPUs", + topo: topoSingleSocketHT, + pod: makeMultiContainerPodWithOptions( + nil, + []*containerOptions{ + {request: "2000m", limit: "2000m", restartPolicy: v1.ContainerRestartPolicy("Never")}, // 0, 4 + {request: "2000m", limit: "2000m", restartPolicy: v1.ContainerRestartPolicy("Never")}}, // 1, 5 + ), + qosClass: v1.PodQOSGuaranteed, + podAllocated: "2000m", + resizeLimit: "4000m", + resizeRequest: "4000m", + containerName: "appContainer-0", + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + containerName2: "appContainer-1", + expCSetAfterAlloc: cpuset.New(2, 3, 6, 7), + expCSetAfterResize: cpuset.New(2, 3, 6, 7), + expCSetAfterRemove: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + { + staticPolicyTest: staticPolicyTest{ + description: "SingleSocketHT, PodResize, Containers in exclusively allocated pool, Keep same allocated CPUs", + topo: topoSingleSocketHT, + pod: makeMultiContainerPodWithOptions( + nil, + []*containerOptions{ + {request: "2000m", limit: "2000m", restartPolicy: v1.ContainerRestartPolicy("Never")}, // 0, 4 + {request: "2000m", limit: "2000m", restartPolicy: v1.ContainerRestartPolicy("Never")}}, // 1, 5 + ), + qosClass: v1.PodQOSGuaranteed, + podAllocated: "2000m", + resizeLimit: "2000m", + resizeRequest: "2000m", + containerName: "appContainer-0", + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + containerName2: "appContainer-1", + expAllocErr: inconsistentCPUAllocationError{RequestedCPUs: "2", AllocatedCPUs: "2", Shared2Exclusive: false}, + expCSetAfterAlloc: cpuset.New(2, 3, 6, 7), + expCSetAfterResize: cpuset.New(2, 3, 6, 7), + expCSetAfterRemove: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + { + staticPolicyTest: staticPolicyTest{ + description: "SingleSocketHT, PodResize, Containers in exclusively allocated pool, Decrease appContainer-0 allocated CPUs", + topo: topoSingleSocketHT, + pod: makeMultiContainerPodWithOptions( + nil, + []*containerOptions{ + {request: "4000m", limit: "4000m", restartPolicy: v1.ContainerRestartPolicy("Never")}, // appContainer-0 CPUs 0, 4, 1, 5 + {request: "4000m", limit: "4000m", restartPolicy: v1.ContainerRestartPolicy("Never")}}, // appContainer-1 CPUS 2, 6, 3, 7 + ), + qosClass: v1.PodQOSGuaranteed, + podAllocated: "4000m", + resizeLimit: "2000m", + resizeRequest: "2000m", + containerName: "appContainer-0", + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + expAllocErr: prohibitedCPUAllocationError{RequestedCPUs: "2", AllocatedCPUs: "4", OriginalCPUs: 4, GuaranteedCPUs: 2}, + containerName2: "appContainer-1", + expCSetAfterAlloc: cpuset.New(), + expCSetAfterResize: cpuset.New(), + expCSetAfterRemove: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + { + staticPolicyTest: staticPolicyTest{ + description: "SingleSocketHT, PodResize, Containers in shared pool with more than one core, Attempt to move to exclusively allocated pool", + topo: topoSingleSocketHT, + pod: makeMultiContainerPodWithOptions( + nil, + []*containerOptions{ + {request: "2100m", limit: "2100m", restartPolicy: v1.ContainerRestartPolicy("Never")}, // 0-7 + {request: "2100m", limit: "2100m", restartPolicy: v1.ContainerRestartPolicy("Never")}}, // 0-7 + ), + qosClass: v1.PodQOSGuaranteed, + podAllocated: "2100m", + resizeLimit: "2000m", + resizeRequest: "2000m", + containerName: "appContainer-0", + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + containerName2: "appContainer-1", + expAllocErr: inconsistentCPUAllocationError{RequestedCPUs: "2", AllocatedCPUs: "2100m", Shared2Exclusive: true}, + expCSetAfterAlloc: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + expCSetAfterResize: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + expCSetAfterRemove: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + { + staticPolicyTest: staticPolicyTest{ + description: "SingleSocketHT, PodResize, appContainer-0 in shared pool, Increase CPU and keep appContainer-0 in shared pool", + topo: topoSingleSocketHT, + pod: makeMultiContainerPodWithOptions( + nil, + []*containerOptions{ + {request: "100m", limit: "100m", restartPolicy: v1.ContainerRestartPolicy("Never")}, // 2-3, 6-7 + {request: "4000m", limit: "4000m", restartPolicy: v1.ContainerRestartPolicy("Never")}}, // 0-1, 4-5 + ), + qosClass: v1.PodQOSGuaranteed, + podAllocated: "100m", + resizeLimit: "200m", + resizeRequest: "200m", + containerName: "appContainer-0", + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + containerName2: "appContainer-1", + expCSetAfterAlloc: cpuset.New(2, 3, 6, 7), + expCSetAfterResize: cpuset.New(2, 3, 6, 7), + expCSetAfterRemove: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + { + staticPolicyTest: staticPolicyTest{ + description: "SingleSocketHT, PodResize, appContainer-0 in shared pool with more than one core, Increase CPU and keep appContainer-0 in shared pool", + topo: topoSingleSocketHT, + pod: makeMultiContainerPodWithOptions( + nil, + []*containerOptions{ + {request: "1100m", limit: "1100m", restartPolicy: v1.ContainerRestartPolicy("Never")}, // 0-7 + {request: "4000m", limit: "4000m", restartPolicy: v1.ContainerRestartPolicy("Never")}}, // 0-1, 4-5 + ), + qosClass: v1.PodQOSGuaranteed, + podAllocated: "1100m", + resizeLimit: "1200m", + resizeRequest: "1200m", + containerName: "appContainer-0", + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + containerName2: "appContainer-1", + expCSetAfterAlloc: cpuset.New(2, 3, 6, 7), + expCSetAfterResize: cpuset.New(2, 3, 6, 7), + expCSetAfterRemove: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + { + staticPolicyTest: staticPolicyTest{ + description: "SingleSocketHT, PodResize, appContainer-0 in shared pool, appContainer-1 in exclusive pool, Decrease CPU and keep in shared pool", + topo: topoSingleSocketHT, + pod: makeMultiContainerPodWithOptions( + nil, + []*containerOptions{ + {request: "200m", limit: "200m", restartPolicy: v1.ContainerRestartPolicy("Never")}, // 0-7 + {request: "4000m", limit: "4000m", restartPolicy: v1.ContainerRestartPolicy("Never")}}, // 0-1, 4-5 + ), + qosClass: v1.PodQOSGuaranteed, + podAllocated: "200m", + resizeLimit: "100m", + resizeRequest: "100m", + containerName: "appContainer-0", + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + containerName2: "appContainer-1", + expCSetAfterAlloc: cpuset.New(2, 3, 6, 7), + expCSetAfterResize: cpuset.New(2, 3, 6, 7), + expCSetAfterRemove: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + { + staticPolicyTest: staticPolicyTest{ + description: "SingleSocketHT, PodResize, appContainer-0 in exclusively allocated pool, Move to shared pool", + topo: topoSingleSocketHT, + pod: makeMultiContainerPodWithOptions( + nil, + []*containerOptions{ + {request: "2000m", limit: "2000m", restartPolicy: v1.ContainerRestartPolicy("Never")}, // 0-1, 4-5 + {request: "200m", limit: "200m", restartPolicy: v1.ContainerRestartPolicy("Never")}}, // 0-7 + ), + qosClass: v1.PodQOSGuaranteed, + podAllocated: "2000m", + resizeLimit: "1500m", + resizeRequest: "1500m", + containerName: "appContainer-0", + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + containerName2: "appContainer-1", + expAllocErr: inconsistentCPUAllocationError{RequestedCPUs: "1500m", AllocatedCPUs: "2", Shared2Exclusive: false}, + expCSetAfterAlloc: cpuset.New(1, 2, 3, 5, 6, 7), + expCSetAfterResize: cpuset.New(1, 2, 3, 5, 6, 7), + expCSetAfterRemove: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7), + }, + } + + for _, testCase := range testCases { + logger, _ := ktesting.NewTestContext(t) + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.CPUManagerPolicyAlphaOptions, true) + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.InPlacePodVerticalScaling, true) + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.InPlacePodVerticalScalingExclusiveCPUs, false) + t.Run(testCase.description, func(t *testing.T) { + + policy, _ := NewStaticPolicy(logger, testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), nil) + + st := &mockState{ + assignments: testCase.stAssignments, + defaultCPUSet: testCase.stDefaultCPUSet, + } + pod := testCase.pod + pod.Status.QOSClass = testCase.qosClass + + // allocate + for _, container := range append(pod.Spec.InitContainers, pod.Spec.Containers...) { + err := policy.Allocate(logger, st, pod, &container, lifecycle.AddOperation) + if err != nil { + t.Errorf("StaticPolicy Allocate() error (%v). expected no error but got %v", + testCase.description, err) + } + } + if !reflect.DeepEqual(st.defaultCPUSet, testCase.expCSetAfterAlloc) { + t.Errorf("StaticPolicy Allocate() error (%v) before pod resize. expected default cpuset %v but got %v", + testCase.description, testCase.expCSetAfterAlloc, st.defaultCPUSet) + } + + // resize + pod.Status.ContainerStatuses = []v1.ContainerStatus{ + { + Name: testCase.containerName, + AllocatedResources: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse(testCase.podAllocated), + }, + }, + } + pod.Spec.Containers[0].Resources = v1.ResourceRequirements{ + Limits: v1.ResourceList{ + v1.ResourceName(v1.ResourceCPU): resource.MustParse(testCase.resizeLimit), + }, + Requests: v1.ResourceList{ + v1.ResourceName(v1.ResourceCPU): resource.MustParse(testCase.resizeRequest), + }, + } + podResized := pod + for _, container := range append(podResized.Spec.InitContainers, podResized.Spec.Containers...) { + err := policy.Allocate(logger, st, podResized, &container, lifecycle.ResizeOperation) + if err != nil { + if !reflect.DeepEqual(err, testCase.expAllocErr) { + t.Errorf("StaticPolicy Allocate() error (%v), expected error: %v but got: %v", + testCase.description, testCase.expAllocErr, err) + } + } + } + + if testCase.expCSetAfterResizeSize > 0 { + // expCSetAfterResizeSize is used when testing scale down because allocated CPUs are not deterministic, + // since size of defaultCPUSet is deterministic and also interesection with expected allocation + // should not be nill. < ====== TODO esotsal + if !reflect.DeepEqual(st.defaultCPUSet.Size(), testCase.expCSetAfterResizeSize) { + t.Errorf("StaticPolicy Allocate() error (%v) after pod resize. expected default cpuset size equal to %v but got %v", + testCase.description, testCase.expCSetAfterResizeSize, st.defaultCPUSet.Size()) + } + } else { + if !reflect.DeepEqual(st.defaultCPUSet, testCase.expCSetAfterResize) { + t.Errorf("StaticPolicy Allocate() error (%v) after pod resize. expected default cpuset %v but got %v", + testCase.description, testCase.expCSetAfterResize, st.defaultCPUSet) + } + } + + // remove + err := policy.RemoveContainer(logger, st, string(pod.UID), testCase.containerName) + if err != nil { + t.Errorf("StaticPolicy RemoveContainer() error (%v) after pod resize. expected no error but got %v", + testCase.description, err) + } + err = policy.RemoveContainer(logger, st, string(pod.UID), testCase.containerName2) + if err != nil { + t.Errorf("StaticPolicy RemoveContainer() error (%v) after pod resize. expected no error but got %v", + testCase.description, err) + } + + if !reflect.DeepEqual(st.defaultCPUSet, testCase.expCSetAfterRemove) { + t.Errorf("StaticPolicy RemoveContainer() error (%v) after pod resize. expected default cpuset %v but got %v", + testCase.description, testCase.expCSetAfterRemove, st.defaultCPUSet) + } + if _, found := st.assignments[string(pod.UID)][testCase.containerName]; found { + t.Errorf("StaticPolicy RemoveContainer() error (%v) after pod resize. expected (pod %v, container %v) not be in assignments %v", + testCase.description, testCase.podUID, testCase.containerName, st.assignments) + } + }) + } +} func TestStaticPolicyRemove(t *testing.T) { testCases := []staticPolicyTest{ { @@ -814,8 +1527,8 @@ func TestStaticPolicyRemove(t *testing.T) { podUID: "fakePod", containerName: "fakeContainer1", stAssignments: state.ContainerCPUAssignments{ - "fakePod": map[string]cpuset.CPUSet{ - "fakeContainer1": cpuset.New(1, 2, 3), + "fakePod": map[string]state.ContainerCPUAssignment{ + "fakeContainer1": {Original: cpuset.New(1, 2, 3), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(4, 5, 6, 7), @@ -827,9 +1540,9 @@ func TestStaticPolicyRemove(t *testing.T) { podUID: "fakePod", containerName: "fakeContainer1", stAssignments: state.ContainerCPUAssignments{ - "fakePod": map[string]cpuset.CPUSet{ - "fakeContainer1": cpuset.New(1, 2, 3), - "fakeContainer2": cpuset.New(4, 5, 6, 7), + "fakePod": map[string]state.ContainerCPUAssignment{ + "fakeContainer1": {Original: cpuset.New(1, 2, 3), Resized: cpuset.New()}, + "fakeContainer2": {Original: cpuset.New(4, 5, 6, 7), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(), @@ -841,9 +1554,9 @@ func TestStaticPolicyRemove(t *testing.T) { podUID: "fakePod", containerName: "fakeContainer1", stAssignments: state.ContainerCPUAssignments{ - "fakePod": map[string]cpuset.CPUSet{ - "fakeContainer1": cpuset.New(1, 3, 5), - "fakeContainer2": cpuset.New(2, 4), + "fakePod": map[string]state.ContainerCPUAssignment{ + "fakeContainer1": {Original: cpuset.New(1, 3, 5), Resized: cpuset.New()}, + "fakeContainer2": {Original: cpuset.New(2, 4), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(6, 7), @@ -855,8 +1568,8 @@ func TestStaticPolicyRemove(t *testing.T) { podUID: "fakePod", containerName: "fakeContainer2", stAssignments: state.ContainerCPUAssignments{ - "fakePod": map[string]cpuset.CPUSet{ - "fakeContainer1": cpuset.New(1, 3, 5), + "fakePod": map[string]state.ContainerCPUAssignment{ + "fakeContainer1": {Original: cpuset.New(1, 3, 5), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(2, 4, 6, 7), @@ -975,7 +1688,7 @@ func TestTopologyAwareAllocateCPUs(t *testing.T) { continue } - cpuAlloc, err := policy.allocateCPUs(klog.Background(), st, tc.numRequested, tc.socketMask, cpuset.New()) + cpuAlloc, err := policy.allocateCPUs(logger, st, tc.numRequested, tc.socketMask, cpuset.New()) if err != nil { t.Errorf("StaticPolicy allocateCPUs() error (%v). expected CPUSet %v not error %v", tc.description, tc.expCSet, err) @@ -1125,8 +1838,8 @@ func TestStaticPolicyAddWithResvList(t *testing.T) { numReservedCPUs: 2, reserved: cpuset.New(0, 1), stAssignments: state.ContainerCPUAssignments{ - "fakePod": map[string]cpuset.CPUSet{ - "fakeContainer100": cpuset.New(2, 3, 6, 7), + "fakePod": map[string]state.ContainerCPUAssignment{ + "fakeContainer100": {Original: cpuset.New(2, 3, 6, 7), Resized: cpuset.New()}, }, }, stDefaultCPUSet: cpuset.New(0, 1, 4, 5), @@ -1150,27 +1863,27 @@ func TestStaticPolicyAddWithResvList(t *testing.T) { } container := &testCase.pod.Spec.Containers[0] - err = policy.Allocate(logger, st, testCase.pod, container) + err = policy.Allocate(logger, st, testCase.pod, container, lifecycle.AddOperation) if !reflect.DeepEqual(err, testCase.expErr) { t.Errorf("StaticPolicy Allocate() error (%v). expected add error: %v but got: %v", testCase.description, testCase.expErr, err) } if testCase.expCPUAlloc { - cset, found := st.assignments[string(testCase.pod.UID)][container.Name] + assignment, found := st.assignments[string(testCase.pod.UID)][container.Name] if !found { t.Errorf("StaticPolicy Allocate() error (%v). expected container %v to be present in assignments %v", testCase.description, container.Name, st.assignments) } - if !cset.Equals(testCase.expCSet) { + if !assignment.Original.Equals(testCase.expCSet) { t.Errorf("StaticPolicy Allocate() error (%v). expected cpuset %s but got %s", - testCase.description, testCase.expCSet, cset) + testCase.description, testCase.expCSet, assignment.Original) } - if !cset.Intersection(st.defaultCPUSet).IsEmpty() { + if !assignment.Original.Intersection(st.defaultCPUSet).IsEmpty() { t.Errorf("StaticPolicy Allocate() error (%v). expected cpuset %s to be disoint from the shared cpuset %s", - testCase.description, cset, st.defaultCPUSet) + testCase.description, assignment.Original, st.defaultCPUSet) } } @@ -1920,7 +2633,7 @@ func TestStaticPolicyAddWithUncoreAlignment(t *testing.T) { for idx := range testCase.pod.Spec.Containers { container := &testCase.pod.Spec.Containers[idx] - err := policy.Allocate(logger, st, testCase.pod, container) + err := policy.Allocate(logger, st, testCase.pod, container, lifecycle.AddOperation) if err != nil { t.Fatalf("Allocate failed: pod=%q container=%q", testCase.pod.UID, container.Name) } @@ -1928,14 +2641,14 @@ func TestStaticPolicyAddWithUncoreAlignment(t *testing.T) { if testCase.expCPUAlloc { container := &testCase.pod.Spec.Containers[0] - cset, found := st.assignments[string(testCase.pod.UID)][container.Name] + assignment, found := st.assignments[string(testCase.pod.UID)][container.Name] if !found { t.Errorf("StaticPolicy Allocate() error (%v). expected container %v to be present in assignments %v", testCase.description, container.Name, st.assignments) } - if !testCase.expCSet.Equals(cset) { + if !testCase.expCSet.Equals(assignment.Original) { t.Errorf("StaticPolicy Allocate() error (%v). expected CPUSet %v but got %v", - testCase.description, testCase.expCSet, cset) + testCase.description, testCase.expCSet, assignment.Original) } return } diff --git a/pkg/kubelet/cm/cpumanager/state/checkpoint.go b/pkg/kubelet/cm/cpumanager/state/checkpoint.go index c69d12998121f..fbad1fb0d8a64 100644 --- a/pkg/kubelet/cm/cpumanager/state/checkpoint.go +++ b/pkg/kubelet/cm/cpumanager/state/checkpoint.go @@ -30,10 +30,26 @@ import ( var _ checkpointmanager.Checkpoint = &CPUManagerCheckpointV1{} var _ checkpointmanager.Checkpoint = &CPUManagerCheckpointV2{} +var _ checkpointmanager.Checkpoint = &CPUManagerCheckpointV3{} var _ checkpointmanager.Checkpoint = &CPUManagerCheckpoint{} -// CPUManagerCheckpoint struct is used to store cpu/pod assignments in a checkpoint in v2 format +// ContainerCPUs struct is used in a checkpoint in v3 format, +// to support In place update pod resources alongside Static CPU Manager policy +type ContainerCPUs struct { + Original string `json:"original"` + Resized string `json:"resized"` +} + +// CPUManagerCheckpoint struct is used to store cpu/pod assignments in a checkpoint in v3 format type CPUManagerCheckpoint struct { + PolicyName string `json:"policyName"` + DefaultCPUSet string `json:"defaultCpuSet"` + Entries map[string]map[string]ContainerCPUs `json:"entries,omitempty"` + Checksum checksum.Checksum `json:"checksum"` +} + +// CPUManagerCheckpoint struct is used to store cpu/pod assignments in a checkpoint in v2 format +type CPUManagerCheckpointV2 struct { PolicyName string `json:"policyName"` DefaultCPUSet string `json:"defaultCpuSet"` Entries map[string]map[string]string `json:"entries,omitempty"` @@ -48,13 +64,13 @@ type CPUManagerCheckpointV1 struct { Checksum checksum.Checksum `json:"checksum"` } -// CPUManagerCheckpointV2 struct is used to store cpu/pod assignments in a checkpoint in v2 format -type CPUManagerCheckpointV2 = CPUManagerCheckpoint +// CPUManagerCheckpointV3 struct is used to store cpu/pod assignments in a checkpoint in v3 format +type CPUManagerCheckpointV3 = CPUManagerCheckpoint // NewCPUManagerCheckpoint returns an instance of Checkpoint func NewCPUManagerCheckpoint() *CPUManagerCheckpoint { //nolint:staticcheck // unexported-type-in-api user-facing error message - return newCPUManagerCheckpointV2() + return newCPUManagerCheckpointV3() } func newCPUManagerCheckpointV1() *CPUManagerCheckpointV1 { @@ -69,6 +85,12 @@ func newCPUManagerCheckpointV2() *CPUManagerCheckpointV2 { } } +func newCPUManagerCheckpointV3() *CPUManagerCheckpointV3 { + return &CPUManagerCheckpointV3{ + Entries: make(map[string]map[string]ContainerCPUs), + } +} + // MarshalCheckpoint returns marshalled checkpoint in v1 format func (cp *CPUManagerCheckpointV1) MarshalCheckpoint() ([]byte, error) { // make sure checksum wasn't set before so it doesn't affect output checksum @@ -85,6 +107,14 @@ func (cp *CPUManagerCheckpointV2) MarshalCheckpoint() ([]byte, error) { return json.Marshal(*cp) } +// MarshalCheckpoint returns marshalled checkpoint in v3 format +func (cp *CPUManagerCheckpointV3) MarshalCheckpoint() ([]byte, error) { + // make sure checksum wasn't set before so it doesn't affect output checksum + cp.Checksum = 0 + cp.Checksum = checksum.New(cp) + return json.Marshal(*cp) +} + // UnmarshalCheckpoint tries to unmarshal passed bytes to checkpoint in v1 format func (cp *CPUManagerCheckpointV1) UnmarshalCheckpoint(blob []byte) error { return json.Unmarshal(blob, cp) @@ -95,6 +125,11 @@ func (cp *CPUManagerCheckpointV2) UnmarshalCheckpoint(blob []byte) error { return json.Unmarshal(blob, cp) } +// UnmarshalCheckpoint tries to unmarshal passed bytes to checkpoint in v3 format +func (cp *CPUManagerCheckpointV3) UnmarshalCheckpoint(blob []byte) error { + return json.Unmarshal(blob, cp) +} + // VerifyChecksum verifies that current checksum of checkpoint is valid in v1 format func (cp *CPUManagerCheckpointV1) VerifyChecksum() error { if cp.Checksum == 0 { @@ -109,7 +144,9 @@ func (cp *CPUManagerCheckpointV1) VerifyChecksum() error { cp.Checksum = ck hash := fnv.New32a() - fmt.Fprintf(hash, "%v", object) + if _, err := fmt.Fprintf(hash, "%v", object); err != nil { + return err + } actualCS := checksum.Checksum(hash.Sum32()) if cp.Checksum != actualCS { return &errors.CorruptCheckpointError{ @@ -123,6 +160,33 @@ func (cp *CPUManagerCheckpointV1) VerifyChecksum() error { // VerifyChecksum verifies that current checksum of checkpoint is valid in v2 format func (cp *CPUManagerCheckpointV2) VerifyChecksum() error { + if cp.Checksum == 0 { + // accept empty checksum for compatibility with old file backend + return nil + } + ck := cp.Checksum + cp.Checksum = 0 + object := dump.ForHash(cp) + object = strings.Replace(object, "CPUManagerCheckpointV2", "CPUManagerCheckpoint", 1) + cp.Checksum = ck + + hash := fnv.New32a() + if _, err := fmt.Fprintf(hash, "%v", object); err != nil { + return err + } + actualCS := checksum.Checksum(hash.Sum32()) + if cp.Checksum != actualCS { + return &errors.CorruptCheckpointError{ + ActualCS: uint64(actualCS), + ExpectedCS: uint64(cp.Checksum), + } + } + + return nil +} + +// VerifyChecksum verifies that current checksum of checkpoint is valid in v3 format +func (cp *CPUManagerCheckpointV3) VerifyChecksum() error { if cp.Checksum == 0 { // accept empty checksum for compatibility with old file backend return nil diff --git a/pkg/kubelet/cm/cpumanager/state/state.go b/pkg/kubelet/cm/cpumanager/state/state.go index 352fddfb9cdad..513a85010a2b0 100644 --- a/pkg/kubelet/cm/cpumanager/state/state.go +++ b/pkg/kubelet/cm/cpumanager/state/state.go @@ -18,25 +18,29 @@ package state import ( "k8s.io/utils/cpuset" + "maps" ) +type ContainerCPUAssignment struct { + Original cpuset.CPUSet + Resized cpuset.CPUSet +} + // ContainerCPUAssignments type used in cpu manager state -type ContainerCPUAssignments map[string]map[string]cpuset.CPUSet +type ContainerCPUAssignments map[string]map[string]ContainerCPUAssignment // Clone returns a copy of ContainerCPUAssignments func (as ContainerCPUAssignments) Clone() ContainerCPUAssignments { ret := make(ContainerCPUAssignments, len(as)) for pod := range as { - ret[pod] = make(map[string]cpuset.CPUSet, len(as[pod])) - for container, cset := range as[pod] { - ret[pod][container] = cset - } + ret[pod] = maps.Clone(as[pod]) } return ret } // Reader interface used to read current cpu/pod assignment state type Reader interface { + GetOriginalCPUSet(podUID string, containerName string) (cpuset.CPUSet, bool) GetCPUSet(podUID string, containerName string) (cpuset.CPUSet, bool) GetDefaultCPUSet() cpuset.CPUSet GetCPUSetOrDefault(podUID string, containerName string) cpuset.CPUSet diff --git a/pkg/kubelet/cm/cpumanager/state/state_checkpoint.go b/pkg/kubelet/cm/cpumanager/state/state_checkpoint.go index 9774bf3728717..5540f1007a26c 100644 --- a/pkg/kubelet/cm/cpumanager/state/state_checkpoint.go +++ b/pkg/kubelet/cm/cpumanager/state/state_checkpoint.go @@ -17,6 +17,7 @@ limitations under the License. package state import ( + "errors" "fmt" "path/filepath" "sync" @@ -24,7 +25,7 @@ import ( "github.com/go-logr/logr" "k8s.io/klog/v2" "k8s.io/kubernetes/pkg/kubelet/checkpointmanager" - "k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors" + checkpointerrors "k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors" "k8s.io/kubernetes/pkg/kubelet/cm/containermap" "k8s.io/utils/cpuset" ) @@ -68,26 +69,24 @@ func NewCheckpointState(logger logr.Logger, stateDir, checkpointName, policyName return stateCheckpoint, nil } -// migrateV1CheckpointToV2Checkpoint() converts checkpoints from the v1 format to the v2 format -func (sc *stateCheckpoint) migrateV1CheckpointToV2Checkpoint(src *CPUManagerCheckpointV1, dst *CPUManagerCheckpointV2) error { +// migrateV2CheckpointToV3Checkpoint() converts checkpoints from the v2 format to the v3 format +func (sc *stateCheckpoint) migrateV2CheckpointToV3Checkpoint(src *CPUManagerCheckpointV2, dst *CPUManagerCheckpointV3) error { if src.PolicyName != "" { dst.PolicyName = src.PolicyName } if src.DefaultCPUSet != "" { dst.DefaultCPUSet = src.DefaultCPUSet } - for containerID, cset := range src.Entries { - podUID, containerName, err := sc.initialContainers.GetContainerRef(containerID) - if err != nil { - return fmt.Errorf("containerID '%v' not found in initial containers list", containerID) - } - if dst.Entries == nil { - dst.Entries = make(map[string]map[string]string) - } - if _, exists := dst.Entries[podUID]; !exists { - dst.Entries[podUID] = make(map[string]string) + for podUID := range src.Entries { + for containerName, cpuString := range src.Entries[podUID] { + if dst.Entries == nil { + dst.Entries = make(map[string]map[string]ContainerCPUs) + } + if _, exists := dst.Entries[podUID]; !exists { + dst.Entries[podUID] = make(map[string]ContainerCPUs) + } + dst.Entries[podUID][containerName] = ContainerCPUs{Original: cpuString} } - dst.Entries[podUID][containerName] = cset } return nil } @@ -100,39 +99,52 @@ func (sc *stateCheckpoint) restoreState() error { checkpointV1 := newCPUManagerCheckpointV1() checkpointV2 := newCPUManagerCheckpointV2() + checkpointV3 := newCPUManagerCheckpointV3() if err = sc.checkpointManager.GetCheckpoint(sc.checkpointName, checkpointV1); err != nil { - checkpointV1 = &CPUManagerCheckpointV1{} // reset it back to 0 if err = sc.checkpointManager.GetCheckpoint(sc.checkpointName, checkpointV2); err != nil { - if err == errors.ErrCheckpointNotFound { - return sc.storeState() + if err = sc.checkpointManager.GetCheckpoint(sc.checkpointName, checkpointV3); err != nil { + if errors.Is(err, checkpointerrors.ErrCheckpointNotFound) { + return sc.storeState() + } + return err + } + } else { + if err = sc.migrateV2CheckpointToV3Checkpoint(checkpointV2, checkpointV3); err != nil { + return fmt.Errorf("error migrating v2 checkpoint state to v3 checkpoint state: %w", err) } - return err } + } else { + return fmt.Errorf("error migrating v1 checkpoint state to v3 checkpoint state is not supported") } - if err = sc.migrateV1CheckpointToV2Checkpoint(checkpointV1, checkpointV2); err != nil { - return fmt.Errorf("error migrating v1 checkpoint state to v2 checkpoint state: %s", err) - } - - if sc.policyName != checkpointV2.PolicyName { - return fmt.Errorf("configured policy %q differs from state checkpoint policy %q", sc.policyName, checkpointV2.PolicyName) + if sc.policyName != checkpointV3.PolicyName { + return fmt.Errorf("configured policy %q differs from state checkpoint policy %q", sc.policyName, checkpointV3.PolicyName) } var tmpDefaultCPUSet cpuset.CPUSet - if tmpDefaultCPUSet, err = cpuset.Parse(checkpointV2.DefaultCPUSet); err != nil { - return fmt.Errorf("could not parse default cpu set %q: %v", checkpointV2.DefaultCPUSet, err) + if tmpDefaultCPUSet, err = cpuset.Parse(checkpointV3.DefaultCPUSet); err != nil { + return fmt.Errorf("could not parse default cpu set %q: %w", checkpointV3.DefaultCPUSet, err) } - var tmpContainerCPUSet cpuset.CPUSet + var tmpOriginal cpuset.CPUSet + var tmpResized cpuset.CPUSet tmpAssignments := ContainerCPUAssignments{} - for pod := range checkpointV2.Entries { - tmpAssignments[pod] = make(map[string]cpuset.CPUSet, len(checkpointV2.Entries[pod])) - for container, cpuString := range checkpointV2.Entries[pod] { - if tmpContainerCPUSet, err = cpuset.Parse(cpuString); err != nil { - return fmt.Errorf("could not parse cpuset %q for container %q in pod %q: %v", cpuString, container, pod, err) + for pod := range checkpointV3.Entries { + tmpAssignments[pod] = make(map[string]ContainerCPUAssignment, len(checkpointV3.Entries[pod])) + for container, containerCPUs := range checkpointV3.Entries[pod] { + if tmpOriginal, err = cpuset.Parse(containerCPUs.Original); err != nil { + return fmt.Errorf("could not parse original cpuset %q for container %q in pod %q: %w", containerCPUs.Original, container, pod, err) } - tmpAssignments[pod][container] = tmpContainerCPUSet + if tmpResized, err = cpuset.Parse(containerCPUs.Resized); err != nil { + return fmt.Errorf("could not parse resized cpuset %q for container %q in pod %q: %w", containerCPUs.Resized, container, pod, err) + } + if !tmpOriginal.IsEmpty() && !tmpResized.IsEmpty() { + if !tmpResized.IsSubsetOf(tmpOriginal) { + return fmt.Errorf("original cpuset %q for container %q in pod %q is not subset of resized cpuset %q", containerCPUs.Original, container, pod, containerCPUs.Resized) + } + } + tmpAssignments[pod][container] = ContainerCPUAssignment{Original: tmpOriginal, Resized: tmpResized} } } @@ -153,9 +165,9 @@ func (sc *stateCheckpoint) storeState() error { assignments := sc.cache.GetCPUAssignments() for pod := range assignments { - checkpoint.Entries[pod] = make(map[string]string, len(assignments[pod])) - for container, cset := range assignments[pod] { - checkpoint.Entries[pod][container] = cset.String() + checkpoint.Entries[pod] = make(map[string]ContainerCPUs, len(assignments[pod])) + for container, assignment := range assignments[pod] { + checkpoint.Entries[pod][container] = ContainerCPUs{Original: assignment.Original.String(), Resized: assignment.Resized.String()} } } @@ -167,6 +179,14 @@ func (sc *stateCheckpoint) storeState() error { return nil } +// GetOriginalCPUSet returns current CPU set +func (sc *stateCheckpoint) GetOriginalCPUSet(podUID string, containerName string) (cpuset.CPUSet, bool) { + sc.mux.RLock() + defer sc.mux.RUnlock() + + return sc.cache.GetOriginalCPUSet(podUID, containerName) +} + // GetCPUSet returns current CPU set func (sc *stateCheckpoint) GetCPUSet(podUID string, containerName string) (cpuset.CPUSet, bool) { sc.mux.RLock() diff --git a/pkg/kubelet/cm/cpumanager/state/state_checkpoint_test.go b/pkg/kubelet/cm/cpumanager/state/state_checkpoint_test.go index 458419fe1e43f..dfb988ef72476 100644 --- a/pkg/kubelet/cm/cpumanager/state/state_checkpoint_test.go +++ b/pkg/kubelet/cm/cpumanager/state/state_checkpoint_test.go @@ -50,7 +50,7 @@ func TestCheckpointStateRestore(t *testing.T) { &stateMemory{}, }, { - "Restore default cpu set", + "Restore default cpu set from checkpoint with v2 checksum", `{ "policyName": "none", "defaultCPUSet": "4-6", @@ -65,7 +65,7 @@ func TestCheckpointStateRestore(t *testing.T) { }, }, { - "Restore valid checkpoint", + "Restore valid checkpoint from checkpoint with v2 checksum", `{ "policyName": "none", "defaultCPUSet": "1-3", @@ -82,16 +82,22 @@ func TestCheckpointStateRestore(t *testing.T) { "", &stateMemory{ assignments: ContainerCPUAssignments{ - "pod": map[string]cpuset.CPUSet{ - "container1": cpuset.New(4, 5, 6), - "container2": cpuset.New(1, 2, 3), + "pod": map[string]ContainerCPUAssignment{ + "container1": { + Original: cpuset.New(4, 5, 6), + Resized: cpuset.New(), + }, + "container2": { + Original: cpuset.New(1, 2, 3), + Resized: cpuset.New(), + }, }, }, defaultCPUSet: cpuset.New(1, 2, 3), }, }, { - "Restore checkpoint with invalid checksum", + "Restore checkpoint with invalid checksum from checkpoint with v2 checksum", `{ "policyName": "none", "defaultCPUSet": "4-6", @@ -112,7 +118,7 @@ func TestCheckpointStateRestore(t *testing.T) { &stateMemory{}, }, { - "Restore checkpoint with invalid policy name", + "Restore checkpoint with invalid policy name from checkpoint with v2 checksum", `{ "policyName": "other", "defaultCPUSet": "1-3", @@ -125,7 +131,7 @@ func TestCheckpointStateRestore(t *testing.T) { &stateMemory{}, }, { - "Restore checkpoint with unparsable default cpu set", + "Restore checkpoint with unparsable default cpu set from checkpoint with v2 checksum", `{ "policyName": "none", "defaultCPUSet": "1.3", @@ -137,8 +143,8 @@ func TestCheckpointStateRestore(t *testing.T) { `could not parse default cpu set "1.3": strconv.Atoi: parsing "1.3": invalid syntax`, &stateMemory{}, }, - { - "Restore checkpoint with unparsable assignment entry", + /*Sotiris to fix for v3{ + "Restore checkpoint with unparsable assignment entry from checkpoint with v2 checksum", `{ "policyName": "none", "defaultCPUSet": "1-3", @@ -154,7 +160,7 @@ func TestCheckpointStateRestore(t *testing.T) { containermap.ContainerMap{}, `could not parse cpuset "asd" for container "container2" in pod "pod": strconv.Atoi: parsing "asd": invalid syntax`, &stateMemory{}, - }, + },*/ { "Restore checkpoint from checkpoint with v1 checksum", `{ @@ -164,13 +170,11 @@ func TestCheckpointStateRestore(t *testing.T) { }`, "none", containermap.ContainerMap{}, - "", - &stateMemory{ - defaultCPUSet: cpuset.New(1, 2, 3), - }, + "error migrating v1 checkpoint state to v3 checkpoint state is not supported", + &stateMemory{}, }, { - "Restore checkpoint with migration", + "Restore checkpoint with migration from checkpoint with v1 checksum", `{ "policyName": "none", "defaultCPUSet": "1-3", @@ -181,22 +185,9 @@ func TestCheckpointStateRestore(t *testing.T) { "checksum": 3680390589 }`, "none", - func() containermap.ContainerMap { - cm := containermap.NewContainerMap() - cm.Add("pod", "container1", "containerID1") - cm.Add("pod", "container2", "containerID2") - return cm - }(), - "", - &stateMemory{ - assignments: ContainerCPUAssignments{ - "pod": map[string]cpuset.CPUSet{ - "container1": cpuset.New(4, 5, 6), - "container2": cpuset.New(1, 2, 3), - }, - }, - defaultCPUSet: cpuset.New(1, 2, 3), - }, + containermap.ContainerMap{}, + "error migrating v1 checkpoint state to v3 checkpoint state is not supported", + &stateMemory{}, }, } @@ -249,9 +240,12 @@ func TestCheckpointStateStore(t *testing.T) { { "Store assignments", &stateMemory{ - assignments: map[string]map[string]cpuset.CPUSet{ + assignments: map[string]map[string]ContainerCPUAssignment{ "pod": { - "container1": cpuset.New(1, 5, 8), + "container1": ContainerCPUAssignment{ + Original: cpuset.New(1, 5, 8), + Resized: cpuset.New(), + }, }, }, }, @@ -377,14 +371,17 @@ func TestCheckpointStateClear(t *testing.T) { testCases := []struct { description string defaultCPUset cpuset.CPUSet - assignments map[string]map[string]cpuset.CPUSet + assignments map[string]map[string]ContainerCPUAssignment }{ { "Valid state", cpuset.New(1, 5, 10), - map[string]map[string]cpuset.CPUSet{ + map[string]map[string]ContainerCPUAssignment{ "pod": { - "container1": cpuset.New(1, 4), + "container1": ContainerCPUAssignment{ + Original: cpuset.New(1, 4), + Resized: cpuset.New(), + }, }, }, }, diff --git a/pkg/kubelet/cm/cpumanager/state/state_mem.go b/pkg/kubelet/cm/cpumanager/state/state_mem.go index 73a77920a0720..6de4b97636b18 100644 --- a/pkg/kubelet/cm/cpumanager/state/state_mem.go +++ b/pkg/kubelet/cm/cpumanager/state/state_mem.go @@ -46,12 +46,23 @@ func NewMemoryState(logger logr.Logger) State { } } +func (s *stateMemory) GetOriginalCPUSet(podUID string, containerName string) (cpuset.CPUSet, bool) { + s.RLock() + defer s.RUnlock() + + entry, exists := s.assignments[podUID][containerName] + return entry.Original.Clone(), exists +} + func (s *stateMemory) GetCPUSet(podUID string, containerName string) (cpuset.CPUSet, bool) { s.RLock() defer s.RUnlock() - res, ok := s.assignments[podUID][containerName] - return res.Clone(), ok + entry, exists := s.assignments[podUID][containerName] + if entry.Resized.IsEmpty() { + return entry.Original.Clone(), exists + } + return entry.Resized.Clone(), exists } func (s *stateMemory) GetDefaultCPUSet() cpuset.CPUSet { @@ -79,11 +90,18 @@ func (s *stateMemory) SetCPUSet(podUID string, containerName string, cset cpuset defer s.Unlock() if _, ok := s.assignments[podUID]; !ok { - s.assignments[podUID] = make(map[string]cpuset.CPUSet) + s.assignments[podUID] = make(map[string]ContainerCPUAssignment) + s.assignments[podUID][containerName] = ContainerCPUAssignment{Original: cset, Resized: cpuset.New()} + s.logger.Info("Updated CPUSet", "podUID", podUID, "containerName", containerName, "Original cpuSet", cset, "Resized cpuSet", cpuset.New()) + } else { + if entry, ok := s.assignments[podUID][containerName]; !ok { + s.assignments[podUID][containerName] = ContainerCPUAssignment{Original: cset, Resized: cpuset.New()} + s.logger.Info("Updated CPUSet", "podUID", podUID, "containerName", containerName, "Original cpuSet", cset, "Resized cpuSet", cpuset.New()) + } else { + s.assignments[podUID][containerName] = ContainerCPUAssignment{Original: entry.Original, Resized: cset} + s.logger.Info("Updated CPUSet", "podUID", podUID, "containerName", containerName, "Original cpuSet", entry.Original, "Resized cpuSet", cset) + } } - - s.assignments[podUID][containerName] = cset - s.logger.Info("Updated desired CPUSet", "podUID", podUID, "containerName", containerName, "cpuSet", cset) } func (s *stateMemory) SetDefaultCPUSet(cset cpuset.CPUSet) { diff --git a/pkg/kubelet/cm/cpumanager/state/state_test.go b/pkg/kubelet/cm/cpumanager/state/state_test.go index efe9ba1c611d5..6ce60439008cb 100644 --- a/pkg/kubelet/cm/cpumanager/state/state_test.go +++ b/pkg/kubelet/cm/cpumanager/state/state_test.go @@ -25,9 +25,9 @@ import ( func TestClone(t *testing.T) { expect := ContainerCPUAssignments{ - "pod": map[string]cpuset.CPUSet{ - "container1": cpuset.New(4, 5, 6), - "container2": cpuset.New(1, 2, 3), + "pod": map[string]ContainerCPUAssignment{ + "container1": {Original: cpuset.New(4, 5, 6), Resized: cpuset.New()}, + "container2": {Original: cpuset.New(1, 2, 3), Resized: cpuset.New()}, }, } actual := expect.Clone() diff --git a/pkg/kubelet/cm/cpumanager/topology_hints_test.go b/pkg/kubelet/cm/cpumanager/topology_hints_test.go index 166b86db0530d..755a75b7b3378 100644 --- a/pkg/kubelet/cm/cpumanager/topology_hints_test.go +++ b/pkg/kubelet/cm/cpumanager/topology_hints_test.go @@ -31,6 +31,7 @@ import ( "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask" + "k8s.io/kubernetes/pkg/kubelet/lifecycle" "k8s.io/kubernetes/test/utils/ktesting" "k8s.io/utils/cpuset" ) @@ -246,7 +247,7 @@ func TestGetTopologyHints(t *testing.T) { sourcesReady: &sourcesReadyStub{}, } - hints := m.GetTopologyHints(&tc.pod, &tc.container)[string(v1.ResourceCPU)] + hints := m.GetTopologyHints(&tc.pod, &tc.container, lifecycle.AddOperation)[string(v1.ResourceCPU)] if len(tc.expectedHints) == 0 && len(hints) == 0 { continue } @@ -297,7 +298,7 @@ func TestGetPodTopologyHints(t *testing.T) { sourcesReady: &sourcesReadyStub{}, } - podHints := m.GetPodTopologyHints(&tc.pod)[string(v1.ResourceCPU)] + podHints := m.GetPodTopologyHints(&tc.pod, lifecycle.AddOperation)[string(v1.ResourceCPU)] if len(tc.expectedHints) == 0 && len(podHints) == 0 { continue } @@ -479,7 +480,7 @@ func TestGetPodTopologyHintsWithPolicyOptions(t *testing.T) { sourcesReady: &sourcesReadyStub{}, } - podHints := m.GetPodTopologyHints(&testCase.pod)[string(v1.ResourceCPU)] + podHints := m.GetPodTopologyHints(&testCase.pod, lifecycle.AddOperation)[string(v1.ResourceCPU)] sort.SliceStable(podHints, func(i, j int) bool { return podHints[i].LessThan(podHints[j]) }) @@ -590,8 +591,8 @@ func returnTestCases() []testCase { pod: *testPod1, container: *testContainer1, assignments: state.ContainerCPUAssignments{ - string(testPod1.UID): map[string]cpuset.CPUSet{ - testContainer1.Name: cpuset.New(0, 6), + string(testPod1.UID): map[string]state.ContainerCPUAssignment{ + testContainer1.Name: {Original: cpuset.New(0, 6), Resized: cpuset.New()}, }, }, defaultCPUSet: cpuset.New(), @@ -611,8 +612,8 @@ func returnTestCases() []testCase { pod: *testPod1, container: *testContainer1, assignments: state.ContainerCPUAssignments{ - string(testPod1.UID): map[string]cpuset.CPUSet{ - testContainer1.Name: cpuset.New(3, 9), + string(testPod1.UID): map[string]state.ContainerCPUAssignment{ + testContainer1.Name: {Original: cpuset.New(3, 9), Resized: cpuset.New()}, }, }, defaultCPUSet: cpuset.New(), @@ -632,8 +633,8 @@ func returnTestCases() []testCase { pod: *testPod4, container: *testContainer4, assignments: state.ContainerCPUAssignments{ - string(testPod4.UID): map[string]cpuset.CPUSet{ - testContainer4.Name: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10), + string(testPod1.UID): map[string]state.ContainerCPUAssignment{ + testContainer4.Name: {Original: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10), Resized: cpuset.New()}, }, }, defaultCPUSet: cpuset.New(), @@ -649,8 +650,8 @@ func returnTestCases() []testCase { pod: *testPod1, container: *testContainer1, assignments: state.ContainerCPUAssignments{ - string(testPod1.UID): map[string]cpuset.CPUSet{ - testContainer1.Name: cpuset.New(0, 6, 3, 9), + string(testPod1.UID): map[string]state.ContainerCPUAssignment{ + testContainer1.Name: {Original: cpuset.New(0, 6, 3, 9), Resized: cpuset.New()}, }, }, defaultCPUSet: cpuset.New(), @@ -661,8 +662,8 @@ func returnTestCases() []testCase { pod: *testPod4, container: *testContainer4, assignments: state.ContainerCPUAssignments{ - string(testPod4.UID): map[string]cpuset.CPUSet{ - testContainer4.Name: cpuset.New(0, 6, 3, 9), + string(testPod4.UID): map[string]state.ContainerCPUAssignment{ + testContainer4.Name: {Original: cpuset.New(0, 6, 3, 9), Resized: cpuset.New()}, }, }, defaultCPUSet: cpuset.New(), diff --git a/pkg/kubelet/cm/devicemanager/manager.go b/pkg/kubelet/cm/devicemanager/manager.go index 0019ae5d0d9a1..db9cc723aeb4b 100644 --- a/pkg/kubelet/cm/devicemanager/manager.go +++ b/pkg/kubelet/cm/devicemanager/manager.go @@ -363,7 +363,7 @@ func (m *ManagerImpl) Stop(logger klog.Logger) error { // Allocate is the call that you can use to allocate a set of devices // from the registered device plugins. -func (m *ManagerImpl) Allocate(pod *v1.Pod, container *v1.Container) error { +func (m *ManagerImpl) Allocate(pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) error { // Use context.TODO() because we currently do not have a proper context to pass in. // Replace this with an appropriate context when refactoring this function to accept a context parameter. ctx := context.TODO() @@ -984,7 +984,7 @@ func (m *ManagerImpl) GetDeviceRunContainerOptions(ctx context.Context, pod *v1. } if needsReAllocate { logger.V(2).Info("Needs to re-allocate device plugin resources for pod", "pod", klog.KObj(pod), "containerName", container.Name) - if err := m.Allocate(pod, container); err != nil { + if err := m.Allocate(pod, container, lifecycle.AddOperation); err != nil { return nil, err } } diff --git a/pkg/kubelet/cm/devicemanager/manager_test.go b/pkg/kubelet/cm/devicemanager/manager_test.go index 38add8678e7d0..bdf19a9935f52 100644 --- a/pkg/kubelet/cm/devicemanager/manager_test.go +++ b/pkg/kubelet/cm/devicemanager/manager_test.go @@ -1087,7 +1087,7 @@ func TestPodContainerDeviceAllocation(t *testing.T) { pod := testCase.testPod activePods = append(activePods, pod) podsStub.updateActivePods(activePods) - err := testManager.Allocate(pod, &pod.Spec.Containers[0]) + err := testManager.Allocate(pod, &pod.Spec.Containers[0], lifecycle.AddOperation) if !reflect.DeepEqual(err, testCase.expErr) { t.Errorf("DevicePluginManager error (%v). expected error: %v but got: %v", testCase.description, testCase.expErr, err) @@ -1320,9 +1320,9 @@ func TestGetDeviceRunContainerOptions(t *testing.T) { activePods := []*v1.Pod{pod1, pod2} podsStub.updateActivePods(activePods) - err = testManager.Allocate(pod1, &pod1.Spec.Containers[0]) + err = testManager.Allocate(pod1, &pod1.Spec.Containers[0], lifecycle.AddOperation) as.NoError(err) - err = testManager.Allocate(pod2, &pod2.Spec.Containers[0]) + err = testManager.Allocate(pod2, &pod2.Spec.Containers[0], lifecycle.AddOperation) as.NoError(err) // when pod is in activePods, GetDeviceRunContainerOptions should return @@ -1418,10 +1418,10 @@ func TestInitContainerDeviceAllocation(t *testing.T) { } podsStub.updateActivePods([]*v1.Pod{podWithPluginResourcesInInitContainers}) for _, container := range podWithPluginResourcesInInitContainers.Spec.InitContainers { - err = testManager.Allocate(podWithPluginResourcesInInitContainers, &container) + err = testManager.Allocate(podWithPluginResourcesInInitContainers, &container, lifecycle.AddOperation) } for _, container := range podWithPluginResourcesInInitContainers.Spec.Containers { - err = testManager.Allocate(podWithPluginResourcesInInitContainers, &container) + err = testManager.Allocate(podWithPluginResourcesInInitContainers, &container, lifecycle.AddOperation) } as.NoError(err) podUID := string(podWithPluginResourcesInInitContainers.UID) @@ -1528,10 +1528,10 @@ func TestRestartableInitContainerDeviceAllocation(t *testing.T) { } podsStub.updateActivePods([]*v1.Pod{podWithPluginResourcesInRestartableInitContainers}) for _, container := range podWithPluginResourcesInRestartableInitContainers.Spec.InitContainers { - err = testManager.Allocate(podWithPluginResourcesInRestartableInitContainers, &container) + err = testManager.Allocate(podWithPluginResourcesInRestartableInitContainers, &container, lifecycle.AddOperation) } for _, container := range podWithPluginResourcesInRestartableInitContainers.Spec.Containers { - err = testManager.Allocate(podWithPluginResourcesInRestartableInitContainers, &container) + err = testManager.Allocate(podWithPluginResourcesInRestartableInitContainers, &container, lifecycle.AddOperation) } as.NoError(err) podUID := string(podWithPluginResourcesInRestartableInitContainers.UID) @@ -1661,7 +1661,7 @@ func TestDevicePreStartContainer(t *testing.T) { activePods := []*v1.Pod{} activePods = append(activePods, pod) podsStub.updateActivePods(activePods) - err = testManager.Allocate(pod, &pod.Spec.Containers[0]) + err = testManager.Allocate(pod, &pod.Spec.Containers[0], lifecycle.AddOperation) as.NoError(err) runContainerOpts, err := testManager.GetDeviceRunContainerOptions(tCtx, pod, &pod.Spec.Containers[0]) as.NoError(err) @@ -1689,7 +1689,7 @@ func TestDevicePreStartContainer(t *testing.T) { v1.ResourceName(res1.resourceName): *resource.NewQuantity(int64(0), resource.DecimalSI)}) activePods = append(activePods, pod2) podsStub.updateActivePods(activePods) - err = testManager.Allocate(pod2, &pod2.Spec.Containers[0]) + err = testManager.Allocate(pod2, &pod2.Spec.Containers[0], lifecycle.AddOperation) as.NoError(err) _, err = testManager.GetDeviceRunContainerOptions(tCtx, pod2, &pod2.Spec.Containers[0]) as.NoError(err) @@ -1842,7 +1842,7 @@ func TestGetTopologyHintsWithUpdates(t *testing.T) { count: 10, devices: devs, testfunc: func(manager *wrappedManagerImpl) { - manager.GetTopologyHints(testPod, &testPod.Spec.Containers[0]) + manager.GetTopologyHints(testPod, &testPod.Spec.Containers[0], lifecycle.AddOperation) }, }, { @@ -1850,7 +1850,7 @@ func TestGetTopologyHintsWithUpdates(t *testing.T) { count: 10, devices: devs, testfunc: func(manager *wrappedManagerImpl) { - manager.GetPodTopologyHints(testPod) + manager.GetPodTopologyHints(testPod, lifecycle.AddOperation) }, }, } @@ -2132,7 +2132,7 @@ func TestAdmitPodWithDRAResources(t *testing.T) { sourcesReady: &sourcesReadyStub{}, } - err := testManager.Allocate(pod, &pod.Spec.Containers[0]) + err := testManager.Allocate(pod, &pod.Spec.Containers[0], lifecycle.AddOperation) test.checkError(t, err) }) } diff --git a/pkg/kubelet/cm/devicemanager/topology_hints.go b/pkg/kubelet/cm/devicemanager/topology_hints.go index 8846122a3d769..3f2aca102a090 100644 --- a/pkg/kubelet/cm/devicemanager/topology_hints.go +++ b/pkg/kubelet/cm/devicemanager/topology_hints.go @@ -25,12 +25,13 @@ import ( "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask" + "k8s.io/kubernetes/pkg/kubelet/lifecycle" ) // GetTopologyHints implements the TopologyManager HintProvider Interface which // ensures the Device Manager is consulted when Topology Aware Hints for each // container are created. -func (m *ManagerImpl) GetTopologyHints(pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint { +func (m *ManagerImpl) GetTopologyHints(pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint { // Use klog.TODO() because we currently do not have a proper logger to pass in. // Replace this with an appropriate logger when refactoring this function to accept a logger parameter. logger := klog.TODO() @@ -85,7 +86,7 @@ func (m *ManagerImpl) GetTopologyHints(pod *v1.Pod, container *v1.Container) map // GetPodTopologyHints implements the topologymanager.HintProvider Interface which // ensures the Device Manager is consulted when Topology Aware Hints for Pod are created. -func (m *ManagerImpl) GetPodTopologyHints(pod *v1.Pod) map[string][]topologymanager.TopologyHint { +func (m *ManagerImpl) GetPodTopologyHints(pod *v1.Pod, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint { // Use klog.TODO() because we currently do not have a proper logger to pass in. // Replace this with an appropriate logger when refactoring this function to accept a logger parameter. logger := klog.TODO() diff --git a/pkg/kubelet/cm/devicemanager/topology_hints_test.go b/pkg/kubelet/cm/devicemanager/topology_hints_test.go index 53ea645a3af68..567503c19de76 100644 --- a/pkg/kubelet/cm/devicemanager/topology_hints_test.go +++ b/pkg/kubelet/cm/devicemanager/topology_hints_test.go @@ -29,6 +29,7 @@ import ( pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask" + "k8s.io/kubernetes/pkg/kubelet/lifecycle" "k8s.io/kubernetes/test/utils/ktesting" ) @@ -93,7 +94,7 @@ func TestGetTopologyHints(t *testing.T) { } } - hints := m.GetTopologyHints(tc.pod, &tc.pod.Spec.Containers[0]) + hints := m.GetTopologyHints(tc.pod, &tc.pod.Spec.Containers[0], lifecycle.AddOperation) for r := range tc.expectedHints { sort.SliceStable(hints[r], func(i, j int) bool { @@ -960,7 +961,7 @@ func TestGetPodTopologyHints(t *testing.T) { } } - hints := m.GetPodTopologyHints(tc.pod) + hints := m.GetPodTopologyHints(tc.pod, lifecycle.AddOperation) for r := range tc.expectedHints { sort.SliceStable(hints[r], func(i, j int) bool { diff --git a/pkg/kubelet/cm/devicemanager/types.go b/pkg/kubelet/cm/devicemanager/types.go index b377443c434c2..cb2e11fd26c44 100644 --- a/pkg/kubelet/cm/devicemanager/types.go +++ b/pkg/kubelet/cm/devicemanager/types.go @@ -44,7 +44,7 @@ type Manager interface { // owning device plugin to allow setup procedures to take place, and for // the device plugin to provide runtime settings to use the device // (environment variables, mount points and device files). - Allocate(pod *v1.Pod, container *v1.Container) error + Allocate(pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) error // UpdatePluginResources updates node resources based on devices already // allocated to pods. The node object is provided for the device manager to @@ -83,11 +83,11 @@ type Manager interface { // TopologyManager HintProvider provider indicates the Device Manager implements the Topology Manager Interface // and is consulted to make Topology aware resource alignments - GetTopologyHints(pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint + GetTopologyHints(pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint // TopologyManager HintProvider provider indicates the Device Manager implements the Topology Manager Interface // and is consulted to make Topology aware resource alignments per Pod - GetPodTopologyHints(pod *v1.Pod) map[string][]topologymanager.TopologyHint + GetPodTopologyHints(pod *v1.Pod, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint // UpdateAllocatedDevices frees any Devices that are bound to terminated pods. UpdateAllocatedDevices() diff --git a/pkg/kubelet/cm/memorymanager/fake_memory_manager.go b/pkg/kubelet/cm/memorymanager/fake_memory_manager.go index e589c78f573d3..f0df61673c409 100644 --- a/pkg/kubelet/cm/memorymanager/fake_memory_manager.go +++ b/pkg/kubelet/cm/memorymanager/fake_memory_manager.go @@ -26,6 +26,7 @@ import ( "k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" "k8s.io/kubernetes/pkg/kubelet/config" + "k8s.io/kubernetes/pkg/kubelet/lifecycle" "k8s.io/kubernetes/pkg/kubelet/status" ) @@ -44,7 +45,7 @@ func (m *fakeManager) Policy(logger klog.Logger) Policy { return NewPolicyNone(logger) } -func (m *fakeManager) Allocate(pod *v1.Pod, container *v1.Container) error { +func (m *fakeManager) Allocate(pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) error { logger := klog.TODO() logger.Info("Allocate", "pod", klog.KObj(pod), "containerName", container.Name) return nil @@ -64,13 +65,13 @@ func (m *fakeManager) RemoveContainer(logger klog.Logger, containerID string) er return nil } -func (m *fakeManager) GetTopologyHints(pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint { +func (m *fakeManager) GetTopologyHints(pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint { logger := klog.TODO() logger.Info("Get Topology Hints", "pod", klog.KObj(pod), "containerName", container.Name) return map[string][]topologymanager.TopologyHint{} } -func (m *fakeManager) GetPodTopologyHints(pod *v1.Pod) map[string][]topologymanager.TopologyHint { +func (m *fakeManager) GetPodTopologyHints(pod *v1.Pod, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint { logger := klog.TODO() logger.Info("Get Pod Topology Hints", "pod", klog.KObj(pod)) return map[string][]topologymanager.TopologyHint{} diff --git a/pkg/kubelet/cm/memorymanager/memory_manager.go b/pkg/kubelet/cm/memorymanager/memory_manager.go index 585ebff101390..658d337692bf8 100644 --- a/pkg/kubelet/cm/memorymanager/memory_manager.go +++ b/pkg/kubelet/cm/memorymanager/memory_manager.go @@ -36,6 +36,7 @@ import ( "k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" "k8s.io/kubernetes/pkg/kubelet/config" + "k8s.io/kubernetes/pkg/kubelet/lifecycle" "k8s.io/kubernetes/pkg/kubelet/status" ) @@ -65,7 +66,7 @@ type Manager interface { // Allocate is called to pre-allocate memory resources during Pod admission. // This must be called at some point prior to the AddContainer() call for a container, e.g. at pod admission time. - Allocate(pod *v1.Pod, container *v1.Container) error + Allocate(pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) error // RemoveContainer is called after Kubelet decides to kill or delete a // container. After this call, any memory allocated to the container is freed. @@ -77,12 +78,12 @@ type Manager interface { // GetTopologyHints implements the topologymanager.HintProvider Interface // and is consulted to achieve NUMA aware resource alignment among this // and other resource controllers. - GetTopologyHints(*v1.Pod, *v1.Container) map[string][]topologymanager.TopologyHint + GetTopologyHints(*v1.Pod, *v1.Container, lifecycle.Operation) map[string][]topologymanager.TopologyHint // GetPodTopologyHints implements the topologymanager.HintProvider Interface // and is consulted to achieve NUMA aware resource alignment among this // and other resource controllers. - GetPodTopologyHints(*v1.Pod) map[string][]topologymanager.TopologyHint + GetPodTopologyHints(*v1.Pod, lifecycle.Operation) map[string][]topologymanager.TopologyHint // GetMemoryNUMANodes provides NUMA nodes that are used to allocate the container memory GetMemoryNUMANodes(logger klog.Logger, pod *v1.Pod, container *v1.Container) sets.Set[int] @@ -261,7 +262,7 @@ func (m *manager) GetMemoryNUMANodes(logger klog.Logger, pod *v1.Pod, container } // Allocate is called to pre-allocate memory resources during Pod admission. -func (m *manager) Allocate(pod *v1.Pod, container *v1.Container) error { +func (m *manager) Allocate(pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) error { logger := klog.TODO() m.removeStaleState(logger) @@ -269,7 +270,7 @@ func (m *manager) Allocate(pod *v1.Pod, container *v1.Container) error { defer m.Unlock() // Call down into the policy to assign this container memory if required. - if err := m.policy.Allocate(logger, m.state, pod, container); err != nil { + if err := m.policy.Allocate(logger, m.state, pod, container, operation); err != nil { logger.Error(err, "Allocate error", "pod", klog.KObj(pod), "containerName", container.Name) return err } @@ -301,22 +302,22 @@ func (m *manager) State() state.Reader { } // GetPodTopologyHints returns the topology hints for the topology manager -func (m *manager) GetPodTopologyHints(pod *v1.Pod) map[string][]topologymanager.TopologyHint { +func (m *manager) GetPodTopologyHints(pod *v1.Pod, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint { // Use context.TODO() because we currently do not have a proper context to pass in. // This should be replaced with an appropriate context when refactoring this function to accept a context parameter. ctx := context.TODO() // Garbage collect any stranded resources before providing TopologyHints m.removeStaleState(klog.FromContext(ctx)) // Delegate to active policy - return m.policy.GetPodTopologyHints(klog.TODO(), m.state, pod) + return m.policy.GetPodTopologyHints(klog.TODO(), m.state, pod, operation) } // GetTopologyHints returns the topology hints for the topology manager -func (m *manager) GetTopologyHints(pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint { +func (m *manager) GetTopologyHints(pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint { // Garbage collect any stranded resources before providing TopologyHints m.removeStaleState(klog.TODO()) // Delegate to active policy - return m.policy.GetTopologyHints(klog.TODO(), m.state, pod, container) + return m.policy.GetTopologyHints(klog.TODO(), m.state, pod, container, operation) } // TODO: move the method to the upper level, to re-use it under the CPU and memory managers diff --git a/pkg/kubelet/cm/memorymanager/memory_manager_test.go b/pkg/kubelet/cm/memorymanager/memory_manager_test.go index 6844710fecd98..b868f52537b53 100644 --- a/pkg/kubelet/cm/memorymanager/memory_manager_test.go +++ b/pkg/kubelet/cm/memorymanager/memory_manager_test.go @@ -38,6 +38,7 @@ import ( "k8s.io/kubernetes/pkg/kubelet/cm/containermap" "k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" + "k8s.io/kubernetes/pkg/kubelet/lifecycle" "k8s.io/kubernetes/test/utils/ktesting" ) @@ -99,18 +100,18 @@ func (p *mockPolicy) Start(klog.Logger, state.State) error { return p.err } -func (p *mockPolicy) Allocate(klog.Logger, state.State, *v1.Pod, *v1.Container) error { +func (p *mockPolicy) Allocate(klog.Logger, state.State, *v1.Pod, *v1.Container, lifecycle.Operation) error { return p.err } func (p *mockPolicy) RemoveContainer(klog.Logger, state.State, string, string) { } -func (p *mockPolicy) GetTopologyHints(klog.Logger, state.State, *v1.Pod, *v1.Container) map[string][]topologymanager.TopologyHint { +func (p *mockPolicy) GetTopologyHints(klog.Logger, state.State, *v1.Pod, *v1.Container, lifecycle.Operation) map[string][]topologymanager.TopologyHint { return nil } -func (p *mockPolicy) GetPodTopologyHints(klog.Logger, state.State, *v1.Pod) map[string][]topologymanager.TopologyHint { +func (p *mockPolicy) GetPodTopologyHints(klog.Logger, state.State, *v1.Pod, lifecycle.Operation) map[string][]topologymanager.TopologyHint { return nil } @@ -1414,7 +1415,7 @@ func TestAddContainer(t *testing.T) { } pod := testCase.podAllocate container := &pod.Spec.Containers[0] - err := mgr.Allocate(pod, container) + err := mgr.Allocate(pod, container, lifecycle.AddOperation) if !reflect.DeepEqual(err, testCase.expectedAllocateError) { t.Errorf("Memory Manager Allocate() error (%v), expected error: %v, but got: %v", testCase.description, testCase.expectedAllocateError, err) @@ -2173,7 +2174,7 @@ func TestGetTopologyHints(t *testing.T) { pod := getPod("fakePod1", "fakeContainer1", requirementsGuaranteed) container := &pod.Spec.Containers[0] - hints := mgr.GetTopologyHints(pod, container) + hints := mgr.GetTopologyHints(pod, container, lifecycle.AddOperation) if !reflect.DeepEqual(hints, testCase.expectedHints) { t.Errorf("Hints were not generated correctly. Hints generated: %+v, hints expected: %+v", hints, testCase.expectedHints) @@ -2351,7 +2352,7 @@ func TestAllocateAndAddPodWithInitContainers(t *testing.T) { // Allocates memory for init containers for i := range testCase.podAllocate.Spec.InitContainers { - err := mgr.Allocate(testCase.podAllocate, &testCase.podAllocate.Spec.InitContainers[i]) + err := mgr.Allocate(testCase.podAllocate, &testCase.podAllocate.Spec.InitContainers[i], lifecycle.AddOperation) if !reflect.DeepEqual(err, testCase.expectedError) { t.Fatalf("The actual error %v is different from the expected one %v", err, testCase.expectedError) } @@ -2359,7 +2360,7 @@ func TestAllocateAndAddPodWithInitContainers(t *testing.T) { // Allocates memory for apps containers for i := range testCase.podAllocate.Spec.Containers { - err := mgr.Allocate(testCase.podAllocate, &testCase.podAllocate.Spec.Containers[i]) + err := mgr.Allocate(testCase.podAllocate, &testCase.podAllocate.Spec.Containers[i], lifecycle.AddOperation) if !reflect.DeepEqual(err, testCase.expectedError) { t.Fatalf("The actual error %v is different from the expected one %v", err, testCase.expectedError) } diff --git a/pkg/kubelet/cm/memorymanager/policy.go b/pkg/kubelet/cm/memorymanager/policy.go index 3c17ec93c21fa..fa23ca42691e0 100644 --- a/pkg/kubelet/cm/memorymanager/policy.go +++ b/pkg/kubelet/cm/memorymanager/policy.go @@ -21,6 +21,7 @@ import ( "k8s.io/klog/v2" "k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" + "k8s.io/kubernetes/pkg/kubelet/lifecycle" ) // Type defines the policy type @@ -31,17 +32,17 @@ type Policy interface { Name() string Start(logger klog.Logger, s state.State) error // Allocate call is idempotent - Allocate(logger klog.Logger, s state.State, pod *v1.Pod, container *v1.Container) error + Allocate(logger klog.Logger, s state.State, pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) error // RemoveContainer call is idempotent RemoveContainer(logger klog.Logger, s state.State, podUID string, containerName string) // GetTopologyHints implements the topologymanager.HintProvider Interface // and is consulted to achieve NUMA aware resource alignment among this // and other resource controllers. - GetTopologyHints(logger klog.Logger, s state.State, pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint + GetTopologyHints(logger klog.Logger, s state.State, pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint // GetPodTopologyHints implements the topologymanager.HintProvider Interface // and is consulted to achieve NUMA aware resource alignment among this // and other resource controllers. - GetPodTopologyHints(logger klog.Logger, s state.State, pod *v1.Pod) map[string][]topologymanager.TopologyHint + GetPodTopologyHints(logger klog.Logger, s state.State, pod *v1.Pod, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint // GetAllocatableMemory returns the amount of allocatable memory for each NUMA node GetAllocatableMemory(s state.State) []state.Block } diff --git a/pkg/kubelet/cm/memorymanager/policy_best_effort.go b/pkg/kubelet/cm/memorymanager/policy_best_effort.go index 53f5746d48d87..3e60a70df3d2b 100644 --- a/pkg/kubelet/cm/memorymanager/policy_best_effort.go +++ b/pkg/kubelet/cm/memorymanager/policy_best_effort.go @@ -24,6 +24,7 @@ import ( "k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" + "k8s.io/kubernetes/pkg/kubelet/lifecycle" ) // On Windows we want to use the same logic as the StaticPolicy to compute the memory topology hints @@ -60,20 +61,20 @@ func (p *bestEffortPolicy) Start(logger logr.Logger, s state.State) error { return p.static.Start(logger, s) } -func (p *bestEffortPolicy) Allocate(logger logr.Logger, s state.State, pod *v1.Pod, container *v1.Container) (rerr error) { - return p.static.Allocate(logger, s, pod, container) +func (p *bestEffortPolicy) Allocate(logger logr.Logger, s state.State, pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) (rerr error) { + return p.static.Allocate(logger, s, pod, container, operation) } func (p *bestEffortPolicy) RemoveContainer(logger logr.Logger, s state.State, podUID string, containerName string) { p.static.RemoveContainer(logger, s, podUID, containerName) } -func (p *bestEffortPolicy) GetPodTopologyHints(logger logr.Logger, s state.State, pod *v1.Pod) map[string][]topologymanager.TopologyHint { - return p.static.GetPodTopologyHints(logger, s, pod) +func (p *bestEffortPolicy) GetPodTopologyHints(logger logr.Logger, s state.State, pod *v1.Pod, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint { + return p.static.GetPodTopologyHints(logger, s, pod, operation) } -func (p *bestEffortPolicy) GetTopologyHints(logger logr.Logger, s state.State, pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint { - return p.static.GetTopologyHints(logger, s, pod, container) +func (p *bestEffortPolicy) GetTopologyHints(logger logr.Logger, s state.State, pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint { + return p.static.GetTopologyHints(logger, s, pod, container, operation) } func (p *bestEffortPolicy) GetAllocatableMemory(s state.State) []state.Block { diff --git a/pkg/kubelet/cm/memorymanager/policy_none.go b/pkg/kubelet/cm/memorymanager/policy_none.go index ceb2d236d1faf..f8b06a0ef9401 100644 --- a/pkg/kubelet/cm/memorymanager/policy_none.go +++ b/pkg/kubelet/cm/memorymanager/policy_none.go @@ -21,6 +21,7 @@ import ( "k8s.io/klog/v2" "k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" + "k8s.io/kubernetes/pkg/kubelet/lifecycle" ) const policyTypeNone policyType = "None" @@ -46,7 +47,7 @@ func (p *none) Start(logger klog.Logger, s state.State) error { } // Allocate call is idempotent -func (p *none) Allocate(_ klog.Logger, s state.State, pod *v1.Pod, container *v1.Container) error { +func (p *none) Allocate(_ klog.Logger, s state.State, pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) error { return nil } @@ -57,14 +58,14 @@ func (p *none) RemoveContainer(_ klog.Logger, s state.State, podUID string, cont // GetTopologyHints implements the topologymanager.HintProvider Interface // and is consulted to achieve NUMA aware resource alignment among this // and other resource controllers. -func (p *none) GetTopologyHints(_ klog.Logger, s state.State, pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint { +func (p *none) GetTopologyHints(_ klog.Logger, s state.State, pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint { return nil } // GetPodTopologyHints implements the topologymanager.HintProvider Interface // and is consulted to achieve NUMA aware resource alignment among this // and other resource controllers. -func (p *none) GetPodTopologyHints(_ klog.Logger, s state.State, pod *v1.Pod) map[string][]topologymanager.TopologyHint { +func (p *none) GetPodTopologyHints(_ klog.Logger, s state.State, pod *v1.Pod, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint { return nil } diff --git a/pkg/kubelet/cm/memorymanager/policy_static.go b/pkg/kubelet/cm/memorymanager/policy_static.go index eb5c3b5f67c32..2bdcee8006662 100644 --- a/pkg/kubelet/cm/memorymanager/policy_static.go +++ b/pkg/kubelet/cm/memorymanager/policy_static.go @@ -34,6 +34,7 @@ import ( "k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask" + "k8s.io/kubernetes/pkg/kubelet/lifecycle" "k8s.io/kubernetes/pkg/kubelet/metrics" ) @@ -95,7 +96,7 @@ func (p *staticPolicy) Start(logger klog.Logger, s state.State) error { } // Allocate call is idempotent -func (p *staticPolicy) Allocate(logger klog.Logger, s state.State, pod *v1.Pod, container *v1.Container) (rerr error) { +func (p *staticPolicy) Allocate(logger klog.Logger, s state.State, pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) (rerr error) { // allocate the memory only for guaranteed pods logger = klog.LoggerWithValues(logger, "pod", klog.KObj(pod), "containerName", container.Name) qos := v1qos.GetPodQOS(pod) @@ -401,7 +402,7 @@ func getPodRequestedResources(pod *v1.Pod) (map[v1.ResourceName]uint64, error) { return reqRsrcs, nil } -func (p *staticPolicy) GetPodTopologyHints(logger klog.Logger, s state.State, pod *v1.Pod) map[string][]topologymanager.TopologyHint { +func (p *staticPolicy) GetPodTopologyHints(logger klog.Logger, s state.State, pod *v1.Pod, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint { logger = klog.LoggerWithValues(logger, "pod", klog.KObj(pod)) if v1qos.GetPodQOS(pod) != v1.PodQOSGuaranteed { @@ -436,7 +437,7 @@ func (p *staticPolicy) GetPodTopologyHints(logger klog.Logger, s state.State, po // GetTopologyHints implements the topologymanager.HintProvider Interface // and is consulted to achieve NUMA aware resource alignment among this // and other resource controllers. -func (p *staticPolicy) GetTopologyHints(logger klog.Logger, s state.State, pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint { +func (p *staticPolicy) GetTopologyHints(logger klog.Logger, s state.State, pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) map[string][]topologymanager.TopologyHint { logger = klog.LoggerWithValues(logger, "pod", klog.KObj(pod)) if v1qos.GetPodQOS(pod) != v1.PodQOSGuaranteed { diff --git a/pkg/kubelet/cm/memorymanager/policy_static_test.go b/pkg/kubelet/cm/memorymanager/policy_static_test.go index 5f51082f87b3d..702a3ada25436 100644 --- a/pkg/kubelet/cm/memorymanager/policy_static_test.go +++ b/pkg/kubelet/cm/memorymanager/policy_static_test.go @@ -32,6 +32,7 @@ import ( "k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask" + "k8s.io/kubernetes/pkg/kubelet/lifecycle" "k8s.io/kubernetes/test/utils/ktesting" ) @@ -2089,7 +2090,7 @@ func TestStaticPolicyAllocate(t *testing.T) { t.Fatalf("Unexpected error: %v", err) } - err = p.Allocate(logger, s, testCase.pod, &testCase.pod.Spec.Containers[0]) + err = p.Allocate(logger, s, testCase.pod, &testCase.pod.Spec.Containers[0], lifecycle.AddOperation) if (err == nil) != (testCase.expectedError == nil) || (err != nil && testCase.expectedError != nil && err.Error() != testCase.expectedError.Error()) { t.Fatalf("The actual error %v is different from the expected one %v", err, testCase.expectedError) } @@ -2814,14 +2815,14 @@ func TestStaticPolicyAllocateWithInitContainers(t *testing.T) { } for i := range testCase.pod.Spec.InitContainers { - err = p.Allocate(logger, s, testCase.pod, &testCase.pod.Spec.InitContainers[i]) + err = p.Allocate(logger, s, testCase.pod, &testCase.pod.Spec.InitContainers[i], lifecycle.AddOperation) if !reflect.DeepEqual(err, testCase.expectedError) { t.Fatalf("The actual error %v is different from the expected one %v", err, testCase.expectedError) } } for i := range testCase.pod.Spec.Containers { - err = p.Allocate(logger, s, testCase.pod, &testCase.pod.Spec.Containers[i]) + err = p.Allocate(logger, s, testCase.pod, &testCase.pod.Spec.Containers[i], lifecycle.AddOperation) if !reflect.DeepEqual(err, testCase.expectedError) { t.Fatalf("The actual error %v is different from the expected one %v", err, testCase.expectedError) } @@ -3148,7 +3149,7 @@ func TestStaticPolicyAllocateWithRestartableInitContainers(t *testing.T) { } for i := range testCase.pod.Spec.InitContainers { - err = p.Allocate(logger, s, testCase.pod, &testCase.pod.Spec.InitContainers[i]) + err = p.Allocate(logger, s, testCase.pod, &testCase.pod.Spec.InitContainers[i], lifecycle.AddOperation) if !reflect.DeepEqual(err, testCase.expectedError) { t.Fatalf("The actual error %v is different from the expected one %v", err, testCase.expectedError) } @@ -3159,7 +3160,7 @@ func TestStaticPolicyAllocateWithRestartableInitContainers(t *testing.T) { } for i := range testCase.pod.Spec.Containers { - err = p.Allocate(logger, s, testCase.pod, &testCase.pod.Spec.Containers[i]) + err = p.Allocate(logger, s, testCase.pod, &testCase.pod.Spec.Containers[i], lifecycle.AddOperation) if err != nil { t.Fatalf("Unexpected error: %v", err) } @@ -3827,7 +3828,7 @@ func TestStaticPolicyGetTopologyHints(t *testing.T) { t.Fatalf("Unexpected error: %v", err) } - topologyHints := p.GetTopologyHints(logger, s, testCase.pod, &testCase.pod.Spec.Containers[0]) + topologyHints := p.GetTopologyHints(logger, s, testCase.pod, &testCase.pod.Spec.Containers[0], lifecycle.AddOperation) if !reflect.DeepEqual(topologyHints, testCase.expectedTopologyHints) { t.Fatalf("The actual topology hints: '%+v' are different from the expected one: '%+v'", topologyHints, testCase.expectedTopologyHints) } @@ -3860,7 +3861,7 @@ func TestStaticPolicyGetPodTopologyHints(t *testing.T) { t.Fatalf("Unexpected error: %v", err) } - topologyHints := p.GetPodTopologyHints(logger, s, testCase.pod) + topologyHints := p.GetPodTopologyHints(logger, s, testCase.pod, lifecycle.AddOperation) if !reflect.DeepEqual(topologyHints, testCase.expectedTopologyHints) { t.Fatalf("The actual topology hints: '%+v' are different from the expected one: '%+v'", topologyHints, testCase.expectedTopologyHints) } diff --git a/pkg/kubelet/cm/topologymanager/policy_test.go b/pkg/kubelet/cm/topologymanager/policy_test.go index fb94454b94703..f7dce8c1f5ef9 100644 --- a/pkg/kubelet/cm/topologymanager/policy_test.go +++ b/pkg/kubelet/cm/topologymanager/policy_test.go @@ -22,6 +22,7 @@ import ( "k8s.io/api/core/v1" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask" + "k8s.io/kubernetes/pkg/kubelet/lifecycle" "k8s.io/kubernetes/test/utils/ktesting" ) @@ -1279,7 +1280,7 @@ func testPolicyMerge(policy Policy, tcases []policyMergeTestCase, t *testing.T) for _, tc := range tcases { var providersHints []map[string][]TopologyHint for _, provider := range tc.hp { - hints := provider.GetTopologyHints(&v1.Pod{}, &v1.Container{}) + hints := provider.GetTopologyHints(&v1.Pod{}, &v1.Container{}, lifecycle.AddOperation) providersHints = append(providersHints, hints) } diff --git a/pkg/kubelet/cm/topologymanager/scope.go b/pkg/kubelet/cm/topologymanager/scope.go index ff34253df953c..e4c8388294a72 100644 --- a/pkg/kubelet/cm/topologymanager/scope.go +++ b/pkg/kubelet/cm/topologymanager/scope.go @@ -42,7 +42,7 @@ type podTopologyHints map[string]map[string]TopologyHint type Scope interface { Name() string GetPolicy() Policy - Admit(ctx context.Context, pod *v1.Pod) lifecycle.PodAdmitResult + Admit(ctx context.Context, pod *v1.Pod, operation lifecycle.Operation) lifecycle.PodAdmitResult // AddHintProvider adds a hint provider to manager to indicate the hint provider // wants to be consoluted with when making topology hints AddHintProvider(h HintProvider) @@ -140,9 +140,9 @@ func (s *scope) RemoveContainer(containerID string) error { return nil } -func (s *scope) admitPolicyNone(pod *v1.Pod) lifecycle.PodAdmitResult { +func (s *scope) admitPolicyNone(pod *v1.Pod, operation lifecycle.Operation) lifecycle.PodAdmitResult { for _, container := range append(pod.Spec.InitContainers, pod.Spec.Containers...) { - err := s.allocateAlignedResources(pod, &container) + err := s.allocateAlignedResources(pod, &container, operation) if err != nil { return admission.GetPodAdmitResult(err) } @@ -152,9 +152,9 @@ func (s *scope) admitPolicyNone(pod *v1.Pod) lifecycle.PodAdmitResult { // It would be better to implement this function in topologymanager instead of scope // but topologymanager do not track providers anymore -func (s *scope) allocateAlignedResources(pod *v1.Pod, container *v1.Container) error { +func (s *scope) allocateAlignedResources(pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) error { for _, provider := range s.hintProviders { - err := provider.Allocate(pod, container) + err := provider.Allocate(pod, container, operation) if err != nil { return err } diff --git a/pkg/kubelet/cm/topologymanager/scope_container.go b/pkg/kubelet/cm/topologymanager/scope_container.go index d8a68ec451558..db2e4ed54c056 100644 --- a/pkg/kubelet/cm/topologymanager/scope_container.go +++ b/pkg/kubelet/cm/topologymanager/scope_container.go @@ -46,12 +46,12 @@ func NewContainerScope(policy Policy) Scope { } } -func (s *containerScope) Admit(ctx context.Context, pod *v1.Pod) lifecycle.PodAdmitResult { +func (s *containerScope) Admit(ctx context.Context, pod *v1.Pod, operation lifecycle.Operation) lifecycle.PodAdmitResult { logger := klog.FromContext(ctx) for _, container := range append(pod.Spec.InitContainers, pod.Spec.Containers...) { - bestHint, admit := s.calculateAffinity(logger, pod, &container) - logger.Info("Best TopologyHint", "bestHint", bestHint, "pod", klog.KObj(pod), "containerName", container.Name) + bestHint, admit := s.calculateAffinity(logger, pod, &container, operation) + logger.Info("Best TopologyHint", "bestHint", bestHint, "pod", klog.KObj(pod), "containerName", container.Name, "operation", operation) if !admit { if IsAlignmentGuaranteed(s.policy) { @@ -60,10 +60,10 @@ func (s *containerScope) Admit(ctx context.Context, pod *v1.Pod) lifecycle.PodAd metrics.TopologyManagerAdmissionErrorsTotal.Inc() return admission.GetPodAdmitResult(&TopologyAffinityError{}) } - logger.Info("Topology Affinity", "bestHint", bestHint, "pod", klog.KObj(pod), "containerName", container.Name) + logger.Info("Topology Affinity", "bestHint", bestHint, "pod", klog.KObj(pod), "containerName", container.Name, "operation", operation) s.setTopologyHints(string(pod.UID), container.Name, bestHint) - err := s.allocateAlignedResources(pod, &container) + err := s.allocateAlignedResources(pod, &container, operation) if err != nil { metrics.TopologyManagerAdmissionErrorsTotal.Inc() return admission.GetPodAdmitResult(err) @@ -77,21 +77,21 @@ func (s *containerScope) Admit(ctx context.Context, pod *v1.Pod) lifecycle.PodAd return admission.GetPodAdmitResult(nil) } -func (s *containerScope) accumulateProvidersHints(logger klog.Logger, pod *v1.Pod, container *v1.Container) []map[string][]TopologyHint { +func (s *containerScope) accumulateProvidersHints(logger klog.Logger, pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) []map[string][]TopologyHint { var providersHints []map[string][]TopologyHint for _, provider := range s.hintProviders { // Get the TopologyHints for a Container from a provider. - hints := provider.GetTopologyHints(pod, container) + hints := provider.GetTopologyHints(pod, container, operation) providersHints = append(providersHints, hints) - logger.Info("TopologyHints", "hints", hints, "pod", klog.KObj(pod), "containerName", container.Name) + logger.Info("TopologyHints", "hints", hints, "pod", klog.KObj(pod), "containerName", container.Name, "operation", operation) } return providersHints } -func (s *containerScope) calculateAffinity(logger klog.Logger, pod *v1.Pod, container *v1.Container) (TopologyHint, bool) { - providersHints := s.accumulateProvidersHints(logger, pod, container) +func (s *containerScope) calculateAffinity(logger klog.Logger, pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) (TopologyHint, bool) { + providersHints := s.accumulateProvidersHints(logger, pod, container, operation) bestHint, admit := s.policy.Merge(logger, providersHints) - logger.Info("ContainerTopologyHint", "bestHint", bestHint, "pod", klog.KObj(pod), "containerName", container.Name) + logger.Info("ContainerTopologyHint", "bestHint", bestHint, "pod", klog.KObj(pod), "containerName", container.Name, "operation", operation) return bestHint, admit } diff --git a/pkg/kubelet/cm/topologymanager/scope_container_test.go b/pkg/kubelet/cm/topologymanager/scope_container_test.go index bb597169ef619..f44cf3b679938 100644 --- a/pkg/kubelet/cm/topologymanager/scope_container_test.go +++ b/pkg/kubelet/cm/topologymanager/scope_container_test.go @@ -21,6 +21,7 @@ import ( "testing" v1 "k8s.io/api/core/v1" + "k8s.io/kubernetes/pkg/kubelet/lifecycle" "k8s.io/kubernetes/test/utils/ktesting" ) @@ -133,7 +134,7 @@ func TestContainerCalculateAffinity(t *testing.T) { }, } - ctnScope.calculateAffinity(logger, &v1.Pod{}, &v1.Container{}) + ctnScope.calculateAffinity(logger, &v1.Pod{}, &v1.Container{}, lifecycle.AddOperation) actual := ctnScope.policy.(*mockPolicy).ph if !reflect.DeepEqual(tc.expected, actual) { t.Errorf("Test Case: %s", tc.name) @@ -265,7 +266,7 @@ func TestContainerAccumulateProvidersHints(t *testing.T) { hintProviders: tc.hp, }, } - actual := ctnScope.accumulateProvidersHints(logger, &v1.Pod{}, &v1.Container{}) + actual := ctnScope.accumulateProvidersHints(logger, &v1.Pod{}, &v1.Container{}, lifecycle.AddOperation) if !reflect.DeepEqual(actual, tc.expected) { t.Errorf("Test Case %s: Expected NUMANodeAffinity in result to be %v, got %v", tc.name, tc.expected, actual) } diff --git a/pkg/kubelet/cm/topologymanager/scope_none.go b/pkg/kubelet/cm/topologymanager/scope_none.go index 44f6c32158f1d..730ecd32dc4ef 100644 --- a/pkg/kubelet/cm/topologymanager/scope_none.go +++ b/pkg/kubelet/cm/topologymanager/scope_none.go @@ -43,6 +43,6 @@ func NewNoneScope() Scope { } } -func (s *noneScope) Admit(ctx context.Context, pod *v1.Pod) lifecycle.PodAdmitResult { - return s.admitPolicyNone(pod) +func (s *noneScope) Admit(ctx context.Context, pod *v1.Pod, operation lifecycle.Operation) lifecycle.PodAdmitResult { + return s.admitPolicyNone(pod, operation) } diff --git a/pkg/kubelet/cm/topologymanager/scope_pod.go b/pkg/kubelet/cm/topologymanager/scope_pod.go index 8498ccbd89a04..2fb583c389c19 100644 --- a/pkg/kubelet/cm/topologymanager/scope_pod.go +++ b/pkg/kubelet/cm/topologymanager/scope_pod.go @@ -46,10 +46,10 @@ func NewPodScope(policy Policy) Scope { } } -func (s *podScope) Admit(ctx context.Context, pod *v1.Pod) lifecycle.PodAdmitResult { +func (s *podScope) Admit(ctx context.Context, pod *v1.Pod, operation lifecycle.Operation) lifecycle.PodAdmitResult { logger := klog.FromContext(ctx) - bestHint, admit := s.calculateAffinity(logger, pod) + bestHint, admit := s.calculateAffinity(logger, pod, operation) logger.Info("Best TopologyHint", "bestHint", bestHint, "pod", klog.KObj(pod)) if !admit { if IsAlignmentGuaranteed(s.policy) { @@ -64,7 +64,7 @@ func (s *podScope) Admit(ctx context.Context, pod *v1.Pod) lifecycle.PodAdmitRes logger.Info("Topology Affinity", "bestHint", bestHint, "pod", klog.KObj(pod), "containerName", container.Name) s.setTopologyHints(string(pod.UID), container.Name, bestHint) - err := s.allocateAlignedResources(pod, &container) + err := s.allocateAlignedResources(pod, &container, operation) if err != nil { metrics.TopologyManagerAdmissionErrorsTotal.Inc() return admission.GetPodAdmitResult(err) @@ -78,21 +78,21 @@ func (s *podScope) Admit(ctx context.Context, pod *v1.Pod) lifecycle.PodAdmitRes return admission.GetPodAdmitResult(nil) } -func (s *podScope) accumulateProvidersHints(logger klog.Logger, pod *v1.Pod) []map[string][]TopologyHint { +func (s *podScope) accumulateProvidersHints(logger klog.Logger, pod *v1.Pod, operation lifecycle.Operation) []map[string][]TopologyHint { var providersHints []map[string][]TopologyHint for _, provider := range s.hintProviders { // Get the TopologyHints for a Pod from a provider. - hints := provider.GetPodTopologyHints(pod) + hints := provider.GetPodTopologyHints(pod, operation) providersHints = append(providersHints, hints) - logger.Info("TopologyHints", "hints", hints, "pod", klog.KObj(pod)) + logger.Info("TopologyHints", "hints", hints, "pod", klog.KObj(pod), "operation", operation) } return providersHints } -func (s *podScope) calculateAffinity(logger klog.Logger, pod *v1.Pod) (TopologyHint, bool) { - providersHints := s.accumulateProvidersHints(logger, pod) +func (s *podScope) calculateAffinity(logger klog.Logger, pod *v1.Pod, operation lifecycle.Operation) (TopologyHint, bool) { + providersHints := s.accumulateProvidersHints(logger, pod, operation) bestHint, admit := s.policy.Merge(logger, providersHints) - logger.Info("PodTopologyHint", "bestHint", bestHint, "pod", klog.KObj(pod)) + logger.Info("PodTopologyHint", "bestHint", bestHint, "pod", klog.KObj(pod), "operation", operation) return bestHint, admit } diff --git a/pkg/kubelet/cm/topologymanager/scope_pod_test.go b/pkg/kubelet/cm/topologymanager/scope_pod_test.go index c8d347f552f34..6408b26b731e9 100644 --- a/pkg/kubelet/cm/topologymanager/scope_pod_test.go +++ b/pkg/kubelet/cm/topologymanager/scope_pod_test.go @@ -21,6 +21,7 @@ import ( "testing" v1 "k8s.io/api/core/v1" + "k8s.io/kubernetes/pkg/kubelet/lifecycle" "k8s.io/kubernetes/test/utils/ktesting" ) @@ -133,7 +134,7 @@ func TestPodCalculateAffinity(t *testing.T) { }, } - podScope.calculateAffinity(logger, &v1.Pod{}) + podScope.calculateAffinity(logger, &v1.Pod{}, lifecycle.AddOperation) actual := podScope.policy.(*mockPolicy).ph if !reflect.DeepEqual(tc.expected, actual) { t.Errorf("Test Case: %s", tc.name) @@ -265,7 +266,7 @@ func TestPodAccumulateProvidersHints(t *testing.T) { hintProviders: tc.hp, }, } - actual := pScope.accumulateProvidersHints(logger, &v1.Pod{}) + actual := pScope.accumulateProvidersHints(logger, &v1.Pod{}, lifecycle.AddOperation) if !reflect.DeepEqual(actual, tc.expected) { t.Errorf("Test Case %s: Expected NUMANodeAffinity in result to be %v, got %v", tc.name, tc.expected, actual) } diff --git a/pkg/kubelet/cm/topologymanager/topology_manager.go b/pkg/kubelet/cm/topologymanager/topology_manager.go index 458efe178d9ef..463bd01066d8d 100644 --- a/pkg/kubelet/cm/topologymanager/topology_manager.go +++ b/pkg/kubelet/cm/topologymanager/topology_manager.go @@ -85,14 +85,14 @@ type HintProvider interface { // this function for each hint provider, and merges the hints to produce // a consensus "best" hint. The hint providers may subsequently query the // topology manager to influence actual resource assignment. - GetTopologyHints(pod *v1.Pod, container *v1.Container) map[string][]TopologyHint + GetTopologyHints(pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) map[string][]TopologyHint // GetPodTopologyHints returns a map of resource names to a list of possible // concrete resource allocations per Pod in terms of NUMA locality hints. - GetPodTopologyHints(pod *v1.Pod) map[string][]TopologyHint + GetPodTopologyHints(pod *v1.Pod, operation lifecycle.Operation) map[string][]TopologyHint // Allocate triggers resource allocation to occur on the HintProvider after // all hints have been gathered and the aggregated Hint is available via a // call to Store.GetAffinity(). - Allocate(pod *v1.Pod, container *v1.Container) error + Allocate(pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) error } // Store interface is to allow Hint Providers to retrieve pod affinity @@ -235,7 +235,7 @@ func (m *manager) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle.PodAdmitR metrics.TopologyManagerAdmissionRequestsTotal.Inc() startTime := time.Now() - podAdmitResult := m.scope.Admit(ctx, attrs.Pod) + podAdmitResult := m.scope.Admit(ctx, attrs.Pod, attrs.Operation) metrics.TopologyManagerAdmissionDuration.Observe(float64(time.Since(startTime).Milliseconds())) logger.V(4).Info("Pod Admit Result", "Message", podAdmitResult.Message, "pod", klog.KObj(attrs.Pod)) diff --git a/pkg/kubelet/cm/topologymanager/topology_manager_test.go b/pkg/kubelet/cm/topologymanager/topology_manager_test.go index 64cea9f7d3fc7..244d3af3ce83c 100644 --- a/pkg/kubelet/cm/topologymanager/topology_manager_test.go +++ b/pkg/kubelet/cm/topologymanager/topology_manager_test.go @@ -210,15 +210,15 @@ type mockHintProvider struct { //allocateError error } -func (m *mockHintProvider) GetTopologyHints(pod *v1.Pod, container *v1.Container) map[string][]TopologyHint { +func (m *mockHintProvider) GetTopologyHints(pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) map[string][]TopologyHint { return m.th } -func (m *mockHintProvider) GetPodTopologyHints(pod *v1.Pod) map[string][]TopologyHint { +func (m *mockHintProvider) GetPodTopologyHints(pod *v1.Pod, operation lifecycle.Operation) map[string][]TopologyHint { return m.th } -func (m *mockHintProvider) Allocate(pod *v1.Pod, container *v1.Container) error { +func (m *mockHintProvider) Allocate(pod *v1.Pod, container *v1.Container, operation lifecycle.Operation) error { //return allocateError return nil } diff --git a/test/e2e/common/node/framework/podresize/resize.go b/test/e2e/common/node/framework/podresize/resize.go index 0dc9ce2572ee0..2f5903c3416df 100644 --- a/test/e2e/common/node/framework/podresize/resize.go +++ b/test/e2e/common/node/framework/podresize/resize.go @@ -25,6 +25,9 @@ import ( "strings" "time" + "github.com/onsi/ginkgo/v2" + "github.com/onsi/gomega" + v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" utilerrors "k8s.io/apimachinery/pkg/util/errors" @@ -35,23 +38,31 @@ import ( "k8s.io/kubernetes/test/e2e/common/node/framework/cgroups" "k8s.io/kubernetes/test/e2e/framework" e2epod "k8s.io/kubernetes/test/e2e/framework/pod" - - "github.com/onsi/ginkgo/v2" - "github.com/onsi/gomega" + "k8s.io/utils/cpuset" ) const ( - MinContainerRuntimeVersion string = "1.6.9" + CgroupCPUPeriod string = "/sys/fs/cgroup/cpu/cpu.cfs_period_us" + CgroupCPUShares string = "/sys/fs/cgroup/cpu/cpu.shares" + CgroupCPUQuota string = "/sys/fs/cgroup/cpu/cpu.cfs_quota_us" + CgroupMemLimit string = "/sys/fs/cgroup/memory/memory.limit_in_bytes" + Cgroupv2MemLimit string = "/sys/fs/cgroup/memory.max" + Cgroupv2MemRequest string = "/sys/fs/cgroup/memory.min" + Cgroupv2CPULimit string = "/sys/fs/cgroup/cpu.max" + Cgroupv2CPURequest string = "/sys/fs/cgroup/cpu.weight" + CPUPeriod string = "100000" ) type ResizableContainerInfo struct { - Name string - Resources *cgroups.ContainerResources - CPUPolicy *v1.ResourceResizeRestartPolicy - MemPolicy *v1.ResourceResizeRestartPolicy - RestartCount int32 - RestartPolicy v1.ContainerRestartPolicy - InitCtr bool + Name string + Resources *cgroups.ContainerResources + CPUPolicy *v1.ResourceResizeRestartPolicy + MemPolicy *v1.ResourceResizeRestartPolicy + RestartCount int32 + RestartPolicy v1.ContainerRestartPolicy + InitCtr bool + CPUsAllowedListValue string + CPUsAllowedList string } func getTestResizePolicy(tcInfo ResizableContainerInfo) (resizePol []v1.ContainerResizePolicy) { @@ -458,6 +469,39 @@ func ExpectPodResized(ctx context.Context, f *framework.Framework, resizedPod *v } } +func ExpectPodResizePending(ctx context.Context, f *framework.Framework, resizePendingPod *v1.Pod, expectedContainers []ResizableContainerInfo) { + ginkgo.GinkgoHelper() + + // Verify Pod Containers Cgroup Values + var errs []error + if cgroupErrs := VerifyPodContainersCgroupValues(ctx, f, resizePendingPod, expectedContainers); cgroupErrs != nil { + errs = append(errs, fmt.Errorf("container cgroup values don't match expected: %w", formatErrors(cgroupErrs))) + } + if resourceErrs := VerifyPodStatusResources(resizePendingPod, expectedContainers); resourceErrs != nil { + errs = append(errs, fmt.Errorf("container status resources don't match expected: %w", formatErrors(resourceErrs))) + } + if restartErrs := verifyPodRestarts(ctx, f, resizePendingPod, expectedContainers); restartErrs != nil { + errs = append(errs, fmt.Errorf("container restart counts don't match expected: %w", formatErrors(restartErrs))) + } + + // Verify Pod Resize conditions are empty. + podResizePendingFound := false + for _, condition := range resizePendingPod.Status.Conditions { + if condition.Type == v1.PodResizePending { + podResizePendingFound = true + } + } + if !podResizePendingFound { + errs = append(errs, fmt.Errorf("resize condition type %s not found in pod status", v1.PodResizePending)) + } + + if len(errs) > 0 { + resizePendingPod.ManagedFields = nil // Suppress managed fields in error output. + framework.ExpectNoError(formatErrors(utilerrors.NewAggregate(errs)), + "Verifying pod resources resize state. Pod: %s", framework.PrettyPrintJSON(resizePendingPod)) + } +} + func MakeResizePatch(originalContainers, desiredContainers []ResizableContainerInfo, originPodResources, desiredPodResources *v1.ResourceRequirements) []byte { original, err := json.Marshal(MakePodWithResizableContainers("", "", "", originalContainers, originPodResources)) framework.ExpectNoError(err) @@ -505,3 +549,40 @@ func formatErrors(err error) error { } return fmt.Errorf("[\n%s\n]", strings.Join(errStrings, ",\n")) } + +func VerifyPodContainersCPUsAllowedListValue(f *framework.Framework, pod *v1.Pod, wantCtrs []ResizableContainerInfo) error { + ginkgo.GinkgoHelper() + verifyCPUsAllowedListValue := func(cName, expectedCPUsAllowedListValue string, expectedCPUsAllowedList string) error { + mycmd := "grep Cpus_allowed_list /proc/self/status | cut -f2" + calValue, _, err := e2epod.ExecCommandInContainerWithFullOutput(f, pod.Name, cName, "/bin/sh", "-c", mycmd) + framework.Logf("Namespace %s Pod %s Container %s - looking for Cpus allowed list value %s in /proc/self/status", + pod.Namespace, pod.Name, cName, calValue) + if err != nil { + return fmt.Errorf("failed to find expected value '%s' in container '%s' Cpus allowed list '/proc/self/status'", cName, expectedCPUsAllowedListValue) + } + c, err := cpuset.Parse(calValue) + framework.ExpectNoError(err, "failed parsing Cpus allowed list for container %s in pod %s", cName, pod.Name) + cpuTotalValue := strconv.Itoa(c.Size()) + if cpuTotalValue != expectedCPUsAllowedListValue { + return fmt.Errorf("container '%s' cgroup value '%s' results to total CPUs '%s' not equal to expected '%s'", cName, calValue, cpuTotalValue, expectedCPUsAllowedListValue) + } + if expectedCPUsAllowedList != "" { + cExpected, err := cpuset.Parse(expectedCPUsAllowedList) + framework.ExpectNoError(err, "failed parsing Cpus allowed list for cexpectedCPUset") + if !c.Equals(cExpected) { + return fmt.Errorf("container '%s' cgroup value '%s' results to total CPUs '%v' not equal to expected '%v'", cName, calValue, c, cExpected) + } + } + return nil + } + for _, ci := range wantCtrs { + if ci.CPUsAllowedListValue == "" { + continue + } + err := verifyCPUsAllowedListValue(ci.Name, ci.CPUsAllowedListValue, ci.CPUsAllowedList) + if err != nil { + return err + } + } + return nil +} diff --git a/test/e2e/feature/feature.go b/test/e2e/feature/feature.go index b0bf9feaf7dce..357a8fdfd4111 100644 --- a/test/e2e/feature/feature.go +++ b/test/e2e/feature/feature.go @@ -202,6 +202,16 @@ var ( // ImageVolume is used for testing the image volume source feature (https://kep.k8s.io/4639). ImageVolume = framework.WithFeature(framework.ValidFeatures.Add("ImageVolume")) + // Owner: sig-node + // Marks a test for InPlacePodVerticalScaling feature that requires + // InPlacePodVerticalScaling feature gate to be enabled. + InPlacePodVerticalScaling = framework.WithFeature(framework.ValidFeatures.Add("InPlacePodVerticalScaling")) + + // Owner: sig-node + // Marks a test for InPlacePodVerticalScalingExclusiveCPUs feature that requires + // InPlacePodVerticalScalingExclusiveCPUs feature gate to be enabled. + InPlacePodVerticalScalingExclusiveCPUs = framework.WithFeature(framework.ValidFeatures.Add("InPlacePodVerticalScalingExclusiveCPUs")) + // Owner: sig-network // Marks tests that require a conforming implementation of // Ingress.networking.k8s.io to be present. diff --git a/test/e2e_node/cpu_manager_test.go b/test/e2e_node/cpu_manager_test.go index 7ca4c52867fd9..56cdea30f40dd 100644 --- a/test/e2e_node/cpu_manager_test.go +++ b/test/e2e_node/cpu_manager_test.go @@ -35,17 +35,22 @@ import ( v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + apimachinerytypes "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/sets" + helpers "k8s.io/component-helpers/resource" "k8s.io/klog/v2" "k8s.io/kubernetes/pkg/features" kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config" "k8s.io/kubernetes/pkg/kubelet/cm" "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager" + "k8s.io/kubernetes/test/e2e/common/node/framework/cgroups" + "k8s.io/kubernetes/test/e2e/common/node/framework/podresize" admissionapi "k8s.io/pod-security-admission/api" "k8s.io/utils/cpuset" "k8s.io/kubernetes/test/e2e/feature" "k8s.io/kubernetes/test/e2e/framework" + e2enode "k8s.io/kubernetes/test/e2e/framework/node" e2epod "k8s.io/kubernetes/test/e2e/framework/pod" e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" ) @@ -2101,7 +2106,3577 @@ var _ = SIGDescribe("CPU Manager Incompatibility Pod Level Resources", ginkgo.Or }) }) +var _ = SIGDescribe("CPU Manager with InPlacePodVerticalScalingExclusiveCPUs disabled", + ginkgo.Ordered, + ginkgo.ContinueOnFailure, + framework.WithSerial(), + feature.CPUManager, + feature.InPlacePodVerticalScaling, + feature.InPlacePodVerticalScalingExclusiveCPUs, + framework.WithFeatureGate(features.InPlacePodVerticalScaling), + framework.WithFeatureGate(features.InPlacePodVerticalScalingExclusiveCPUs), + func() { + + type containerCPUInfo struct { + Name string + cpuCount int + } + + f := framework.NewDefaultFramework("cpu-manager-pod-resize-test") + f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged + + // original kubeletconfig before the context start, to be restored + var oldCfg *kubeletconfig.KubeletConfiguration + var reservedCPUs cpuset.CPUSet + var onlineCPUs cpuset.CPUSet + var smtLevel int + var uncoreGroupSize int + // tracks all the pods created by a It() block. Best would be a namespace per It block + // TODO: move to a namespace per It block? + var podMap map[string]*v1.Pod + + // closure just and only to not carry around awkwardly `f` and `onlineCPUs` only for logging purposes + var skipIfAllocatableCPUsLessThan func(node *v1.Node, cpuReq int) + + ginkgo.BeforeAll(func(ctx context.Context) { + var err error + oldCfg, err = getCurrentKubeletConfig(ctx) + framework.ExpectNoError(err) + + onlineCPUs, err = getOnlineCPUs() // this should not change at all, at least during this suite lifetime + framework.ExpectNoError(err) + framework.Logf("Online CPUs: %s", onlineCPUs) + + smtLevel = smtLevelFromSysFS() // this should not change at all, at least during this suite lifetime + framework.Logf("SMT level: %d", smtLevel) + + uncoreGroupSize = getUncoreCPUGroupSize() + framework.Logf("Uncore Group Size: %d", uncoreGroupSize) + + e2enodeCgroupV2Enabled = IsCgroup2UnifiedMode() + framework.Logf("cgroup V2 enabled: %v", e2enodeCgroupV2Enabled) + + e2enodeCgroupDriver = oldCfg.CgroupDriver + framework.Logf("cgroup driver: %s", e2enodeCgroupDriver) + + runtime, _, err := getCRIClient() + framework.ExpectNoError(err, "Failed to get CRI client") + + version, err := runtime.Version(context.Background(), "") + framework.ExpectNoError(err, "Failed to get runtime version") + + e2enodeRuntimeName = version.GetRuntimeName() + framework.Logf("runtime: %s", e2enodeRuntimeName) + }) + + ginkgo.AfterAll(func(ctx context.Context) { + updateKubeletConfig(ctx, f, oldCfg, true) + }) + + ginkgo.BeforeEach(func(ctx context.Context) { + // note intentionally NOT set reservedCPUs - this must be initialized on a test-by-test basis + podMap = make(map[string]*v1.Pod) + }) + + ginkgo.JustBeforeEach(func(ctx context.Context) { + // note intentionally NOT set reservedCPUs - this must be initialized on a test-by-test basis + + // use a closure to minimize the arguments, to make the usage more straightforward + skipIfAllocatableCPUsLessThan = func(node *v1.Node, val int) { + ginkgo.GinkgoHelper() + cpuReq := int64(val + reservedCPUs.Size()) // reserved CPUs are not usable, need to account them + // the framework is initialized using an injected BeforeEach node, so the + // earliest we can do is to initialize the other objects here + nodeCPUDetails := cpuDetailsFromNode(node) + + msg := fmt.Sprintf("%v full CPUs (detected=%v requested=%v reserved=%v online=%v smt=%v)", cpuReq, nodeCPUDetails.Allocatable, val, reservedCPUs.Size(), onlineCPUs.Size(), smtLevel) + ginkgo.By("Checking if allocatable: " + msg) + if nodeCPUDetails.Allocatable < cpuReq { + e2eskipper.Skipf("Skipping CPU Manager test: not allocatable %s", msg) + } + } + }) + + ginkgo.AfterEach(func(ctx context.Context) { + deletePodsAsync(ctx, f, podMap) + }) + + ginkgo.When("resizing a Guaranteed QoS single container pod with integer CPU requests", ginkgo.Label("guaranteed single container pod with integer CPU requests resize", "exclusive-cpus"), func() { + ginkgo.BeforeEach(func(ctx context.Context) { + reservedCPUs = cpuset.New(0) + }) + ginkgo.DescribeTable("", + func(ctx context.Context, + originalContainers []podresize.ResizableContainerInfo, + originalCpuInfo []containerCPUInfo, + desiredContainers []podresize.ResizableContainerInfo, + expectedContainers []podresize.ResizableContainerInfo, + expectedCpuInfo []containerCPUInfo, + wantError string, + ) { + skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), expectedCpuInfo[0].cpuCount) + + updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + reservedSystemCPUs: reservedCPUs, // Not really needed for the tests but helps to make a more precise check + enableInPlacePodVerticalScalingExclusiveCPUs: false, + })) + + tStamp := strconv.Itoa(time.Now().Nanosecond()) + testPod1 := podresize.MakePodWithResizableContainers(f.Namespace.Name, "testpod1", tStamp, originalContainers, nil) + testPod1 = e2epod.MustMixinRestrictedPodSecurity(testPod1) + + ginkgo.By("creating pod") + podClient := e2epod.NewPodClient(f) + newPods := podClient.CreateBatch(ctx, []*v1.Pod{testPod1}) + + ginkgo.By("verifying original pod resources, allocations and policy are as expected") + podresize.VerifyPodResources(newPods[0], originalContainers, nil) + + ginkgo.By("verifying original pod cpusets are as expected") + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount("gu-container-1", originalCpuInfo[0].cpuCount)) + + ginkgo.By("patching pod for resize") + patchString := podresize.MakeResizePatch(originalContainers, desiredContainers, nil, nil) + + if wantError == "" { + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch pod for resize") + + expected := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, expectedContainers) + ginkgo.By("verifying pod resources are as expected post patch, pre-actuation") + podresize.VerifyPodResources(patchedPod, expected, nil) + + ginkgo.By("waiting for resize to be actuated") + resizedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expected) + podresize.ExpectPodResized(ctx, f, resizedPod, expected) + + ginkgo.By("verifying pod resources after resize") + podresize.VerifyPodResources(resizedPod, expected, nil) + + ginkgo.By("verifying pod cpusets after resize") + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount("gu-container-1", expectedCpuInfo[0].cpuCount)) + } else { + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch pod for resize") + + ginkgo.By("verifying testing pod resources are as expected post patch, pre-actuation") + expectedPreActuation := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, desiredContainers) + podresize.VerifyPodResources(patchedPod, expectedPreActuation, nil) + + resizePendingPod, err := framework.GetObject(podClient.Get, patchedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get resize pending pod") + + ginkgo.By("waiting for testing pod resize to be actuated") + expectedPostActuation := podresize.UpdateExpectedContainerRestarts(ctx, resizePendingPod, expectedContainers) + actuatedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expectedPostActuation) + + ginkgo.By("waiting for testing pod resize status to be pending") + WaitForPodResizePending(ctx, f, actuatedPod) + + actuatedPod, err = framework.GetObject(podClient.Get, actuatedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get actuated pod") + + expectedPostActuation = podresize.UpdateExpectedContainerRestarts(ctx, actuatedPod, expectedContainers) + ginkgo.By("verifying testing pod condition type as expected post patch, post-actuation") + podresize.ExpectPodResizePending(ctx, f, actuatedPod, expectedPostActuation) + + ginkgo.By("ensuring the testing pod is failed for the expected reason") + gomega.Expect(actuatedPod).To(HaveStatusConditionsMatchingRegex(wantError)) + + ginkgo.By("verifying pod cpusets after resize") + gomega.Expect(actuatedPod).To(HaveContainerCPUsCount("gu-container-1", expectedCpuInfo[0].cpuCount)) + } + }, + ginkgo.Entry("neither should increase the CPU request/limit nor decrease the memory request/limit, within available capacity", + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "400Mi", MemLim: "400Mi"}, + }, + }, + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "400Mi", MemLim: "400Mi"}, + }, + }, + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + }, + "Infeasible Resize is infeasible for Guaranteed Pods alongside CPU Manager", + ), + ginkgo.Entry("neither should increase the CPU request/limit nor increase the memory request/limit, within available capacity", + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "400Mi", MemLim: "400Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + }, + "Infeasible Resize is infeasible for Guaranteed Pods alongside CPU Manager", + ), + ginkgo.Entry("should not increase the exclusively CPUs, within available capacity", + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "300Mi", MemLim: "300Mi"}, + }, + }, + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "3000m", CPULim: "3000m", MemReq: "300Mi", MemLim: "300Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "300Mi", MemLim: "300Mi"}, + }, + }, + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + }, + "Infeasible Resize is infeasible for Guaranteed Pods alongside CPU Manager", + ), + ginkgo.Entry("should not decrease the allocated exclusively CPUs below promised cpuset", + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "300Mi", MemLim: "300Mi"}, + }, + }, + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "1000m", CPULim: "1000m", MemReq: "300Mi", MemLim: "300Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "300Mi", MemLim: "300Mi"}, + }, + }, + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + }, + "Infeasible Resize is infeasible for Guaranteed Pods alongside CPU Manager", + ), + ginkgo.Entry("should not increase the allocated exclusively CPUs beyond available capacity", + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "300Mi", MemLim: "300Mi"}, + }, + }, + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000000m", CPULim: "2000000m", MemReq: "300Mi", MemLim: "300Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "300Mi", MemLim: "300Mi"}, + }, + }, + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + }, + "Infeasible Resize is infeasible for Guaranteed Pods alongside CPU Manager", + ), + ) + }) + }, +) + +var _ = SIGDescribe("CPU Manager with InPlacePodVerticalScalingExclusiveCPUs enabled", + ginkgo.Ordered, + ginkgo.ContinueOnFailure, + framework.WithSerial(), + feature.CPUManager, + feature.InPlacePodVerticalScaling, + feature.InPlacePodVerticalScalingExclusiveCPUs, + framework.WithFeatureGate(features.InPlacePodVerticalScaling), + framework.WithFeatureGate(features.InPlacePodVerticalScalingExclusiveCPUs), + func() { + + type containerCPUInfo struct { + Name string + cpuCount int + } + + f := framework.NewDefaultFramework("cpu-manager-pod-resize-test") + f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged + + // original kubeletconfig before the context start, to be restored + var oldCfg *kubeletconfig.KubeletConfiguration + var reservedCPUs cpuset.CPUSet + var onlineCPUs cpuset.CPUSet + var smtLevel int + var uncoreGroupSize int + // tracks all the pods created by a It() block. Best would be a namespace per It block + // TODO: move to a namespace per It block? + var podMap map[string]*v1.Pod + + // closure just and only to not carry around awkwardly `f` and `onlineCPUs` only for logging purposes + var skipIfAllocatableCPUsLessThan func(node *v1.Node, cpuReq int) + + ginkgo.BeforeAll(func(ctx context.Context) { + var err error + oldCfg, err = getCurrentKubeletConfig(ctx) + framework.ExpectNoError(err) + + onlineCPUs, err = getOnlineCPUs() // this should not change at all, at least during this suite lifetime + framework.ExpectNoError(err) + framework.Logf("Online CPUs: %s", onlineCPUs) + + smtLevel = smtLevelFromSysFS() // this should not change at all, at least during this suite lifetime + framework.Logf("SMT level: %d", smtLevel) + + uncoreGroupSize = getUncoreCPUGroupSize() + framework.Logf("Uncore Group Size: %d", uncoreGroupSize) + + e2enodeCgroupV2Enabled = IsCgroup2UnifiedMode() + framework.Logf("cgroup V2 enabled: %v", e2enodeCgroupV2Enabled) + + e2enodeCgroupDriver = oldCfg.CgroupDriver + framework.Logf("cgroup driver: %s", e2enodeCgroupDriver) + + runtime, _, err := getCRIClient() + framework.ExpectNoError(err, "Failed to get CRI client") + + version, err := runtime.Version(context.Background(), "") + framework.ExpectNoError(err, "Failed to get runtime version") + + e2enodeRuntimeName = version.GetRuntimeName() + framework.Logf("runtime: %s", e2enodeRuntimeName) + }) + + ginkgo.AfterAll(func(ctx context.Context) { + updateKubeletConfig(ctx, f, oldCfg, true) + }) + + ginkgo.BeforeEach(func(ctx context.Context) { + // note intentionally NOT set reservedCPUs - this must be initialized on a test-by-test basis + podMap = make(map[string]*v1.Pod) + }) + + ginkgo.JustBeforeEach(func(ctx context.Context) { + // note intentionally NOT set reservedCPUs - this must be initialized on a test-by-test basis + + // use a closure to minimize the arguments, to make the usage more straightforward + skipIfAllocatableCPUsLessThan = func(node *v1.Node, val int) { + ginkgo.GinkgoHelper() + cpuReq := int64(val + reservedCPUs.Size()) // reserved CPUs are not usable, need to account them + // the framework is initialized using an injected BeforeEach node, so the + // earliest we can do is to initialize the other objects here + nodeCPUDetails := cpuDetailsFromNode(node) + + msg := fmt.Sprintf("%v full CPUs (detected=%v requested=%v reserved=%v online=%v smt=%v)", cpuReq, nodeCPUDetails.Allocatable, val, reservedCPUs.Size(), onlineCPUs.Size(), smtLevel) + ginkgo.By("Checking if allocatable: " + msg) + if nodeCPUDetails.Allocatable < cpuReq { + e2eskipper.Skipf("Skipping CPU Manager test: not allocatable %s", msg) + } + } + }) + + ginkgo.AfterEach(func(ctx context.Context) { + deletePodsAsync(ctx, f, podMap) + }) + + ginkgo.When("resizing a Burstable single container Pod", ginkgo.Label("burstable single container pod resize", "exclusive-cpus"), func() { + ginkgo.BeforeEach(func(ctx context.Context) { + reservedCPUs = cpuset.New(0) + }) + ginkgo.DescribeTable("", + func(ctx context.Context, + originalContainers []podresize.ResizableContainerInfo, + desiredContainers []podresize.ResizableContainerInfo, + expectedContainers []podresize.ResizableContainerInfo, + wantError string, + ) { + updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + reservedSystemCPUs: reservedCPUs, // Not really needed for the tests but helps to make a more precise check + enableInPlacePodVerticalScalingExclusiveCPUs: true, + })) + + tStamp := strconv.Itoa(time.Now().Nanosecond()) + testPod1 := podresize.MakePodWithResizableContainers(f.Namespace.Name, "testpod1", tStamp, originalContainers, nil) + testPod1 = e2epod.MustMixinRestrictedPodSecurity(testPod1) + + ginkgo.By("creating pod") + podClient := e2epod.NewPodClient(f) + newPods := podClient.CreateBatch(ctx, []*v1.Pod{testPod1}) + + ginkgo.By("verifying original pod resources, allocations and policy are as expected") + podresize.VerifyPodResources(newPods[0], originalContainers, nil) + + ginkgo.By("verifying original pod cpusets are as expected") + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount("non-gu-container-1", onlineCPUs.Size())) + + ginkgo.By("patching pod for resize") + patchString := podresize.MakeResizePatch(originalContainers, desiredContainers, nil, nil) + + if wantError == "" { + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch pod for resize") + + expected := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, expectedContainers) + ginkgo.By("verifying pod resources are as expected post patch, pre-actuation") + podresize.VerifyPodResources(patchedPod, expected, nil) + + ginkgo.By("waiting for resize to be actuated") + resizedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expected) + podresize.ExpectPodResized(ctx, f, resizedPod, expected) + + ginkgo.By("verifying pod resources after resize") + podresize.VerifyPodResources(resizedPod, expected, nil) + + ginkgo.By("verifying pod cpusets after resize") + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount("non-gu-container-1", onlineCPUs.Size())) + } else { + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch pod for resize") + + ginkgo.By("verifying testing pod resources are as expected post patch, pre-actuation") + expectedPreActuation := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, desiredContainers) + podresize.VerifyPodResources(patchedPod, expectedPreActuation, nil) + + resizePendingPod, err := framework.GetObject(podClient.Get, patchedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get resize pending pod") + + ginkgo.By("waiting for testing pod resize to be actuated") + expectedPostActuation := podresize.UpdateExpectedContainerRestarts(ctx, resizePendingPod, expectedContainers) + actuatedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expectedPostActuation) + + ginkgo.By("waiting for testing pod resize status to be pending") + WaitForPodResizePending(ctx, f, actuatedPod) + + actuatedPod, err = framework.GetObject(podClient.Get, actuatedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get actuated pod") + + expectedPostActuation = podresize.UpdateExpectedContainerRestarts(ctx, actuatedPod, expectedContainers) + ginkgo.By("verifying testing pod condition type as expected post patch, post-actuation") + podresize.ExpectPodResizePending(ctx, f, actuatedPod, expectedPostActuation) + + ginkgo.By("ensuring the testing pod is failed for the expected reason") + gomega.Expect(actuatedPod).To(HaveStatusConditionsMatchingRegex(wantError)) + + // we cannot nor we should predict which CPUs the container gets + ginkgo.By("verifying pod cpusets after resize") + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount("non-gu-container-1", onlineCPUs.Size())) + } + }, + ginkgo.Entry("should increase the CPU request/limit & the memory request/limit, within available capacity", + []podresize.ResizableContainerInfo{ + { + Name: "non-gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "300m", MemReq: "200Mi", MemLim: "300Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "non-gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "400m", CPULim: "2000m", MemReq: "200Mi", MemLim: "400Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "non-gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "400m", CPULim: "2000m", MemReq: "200Mi", MemLim: "400Mi"}, + }, + }, + "", + ), + ginkgo.Entry("should decrease the CPU request/limit and the memory request/limit, within available capacity", + []podresize.ResizableContainerInfo{ + { + Name: "non-gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "500m", CPULim: "3000m", MemReq: "200Mi", MemLim: "400Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "non-gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "500m", CPULim: "1000m", MemReq: "200Mi", MemLim: "300Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "non-gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "500m", CPULim: "1000m", MemReq: "200Mi", MemLim: "300Mi"}, + }, + }, + "", + ), + ) + }) + + ginkgo.When("resizing a Guaranteed single container Pod without integer CPU requests", ginkgo.Label("guaranteed single container pod resize"), func() { + ginkgo.BeforeEach(func(ctx context.Context) { + reservedCPUs = cpuset.New(0) + }) + ginkgo.DescribeTable("", + func(ctx context.Context, + originalContainers []podresize.ResizableContainerInfo, + desiredContainers []podresize.ResizableContainerInfo, + expectedContainers []podresize.ResizableContainerInfo, + wantError string, + ) { + updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + reservedSystemCPUs: reservedCPUs, // Not really needed for the tests but helps to make a more precise check + enableInPlacePodVerticalScalingExclusiveCPUs: true, + })) + + tStamp := strconv.Itoa(time.Now().Nanosecond()) + testPod1 := podresize.MakePodWithResizableContainers(f.Namespace.Name, "testpod1", tStamp, originalContainers, nil) + testPod1 = e2epod.MustMixinRestrictedPodSecurity(testPod1) + + ginkgo.By("creating pod") + podClient := e2epod.NewPodClient(f) + newPods := podClient.CreateBatch(ctx, []*v1.Pod{testPod1}) + + ginkgo.By("verifying original pod resources, allocations and policy are as expected") + podresize.VerifyPodResources(newPods[0], originalContainers, nil) + + ginkgo.By("verifying original pod cpusets are as expected") + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount("gu-container-1", onlineCPUs.Size())) + + ginkgo.By("patching pod for resize") + patchString := podresize.MakeResizePatch(originalContainers, desiredContainers, nil, nil) + + if wantError == "" { + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch pod for resize") + + expected := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, expectedContainers) + ginkgo.By("verifying pod resources are as expected post patch, pre-actuation") + podresize.VerifyPodResources(patchedPod, expected, nil) + + ginkgo.By("waiting for resize to be actuated") + resizedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expected) + podresize.ExpectPodResized(ctx, f, resizedPod, expected) + + ginkgo.By("verifying pod resources after resize") + podresize.VerifyPodResources(resizedPod, expected, nil) + + ginkgo.By("verifying pod cpusets after resize") + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount("gu-container-1", onlineCPUs.Size())) + } else { + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch pod for resize") + + ginkgo.By("verifying testing pod resources are as expected post patch, pre-actuation") + expectedPreActuation := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, desiredContainers) + podresize.VerifyPodResources(patchedPod, expectedPreActuation, nil) + + resizePendingPod, err := framework.GetObject(podClient.Get, patchedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get resize pending pod") + + ginkgo.By("waiting for testing pod resize to be actuated") + expectedPostActuation := podresize.UpdateExpectedContainerRestarts(ctx, resizePendingPod, expectedContainers) + actuatedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expectedPostActuation) + + ginkgo.By("waiting for testing pod resize status to be pending") + WaitForPodResizePending(ctx, f, actuatedPod) + + actuatedPod, err = framework.GetObject(podClient.Get, actuatedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get actuated pod") + + expectedPostActuation = podresize.UpdateExpectedContainerRestarts(ctx, actuatedPod, expectedContainers) + ginkgo.By("verifying testing pod condition type as expected post patch, post-actuation") + podresize.ExpectPodResizePending(ctx, f, actuatedPod, expectedPostActuation) + + ginkgo.By("ensuring the testing pod is failed for the expected reason") + gomega.Expect(actuatedPod).To(HaveStatusConditionsMatchingRegex(wantError)) + + // we cannot nor we should predict which CPUs the container gets + ginkgo.By("verifying pod cpusets after resize") + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount("gu-container-1", onlineCPUs.Size())) + } + }, + ginkgo.Entry("should increase CPU & memory request/limit, within available capacity", + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "100m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "200m", MemReq: "400Mi", MemLim: "400Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "200m", MemReq: "400Mi", MemLim: "400Mi"}, + }, + }, + "", + ), + ginkgo.Entry("should decrease CPU & memory request/limit", + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "300m", CPULim: "300m", MemReq: "500Mi", MemLim: "500Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "100m", MemReq: "250Mi", MemLim: "250Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "100m", MemReq: "250Mi", MemLim: "250Mi"}, + }, + }, + "", + ), + ginkgo.Entry("should decrease CPU request/limit, increase memory request/limit", + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "100m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "50m", CPULim: "50m", MemReq: "300Mi", MemLim: "300Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "50m", CPULim: "50m", MemReq: "300Mi", MemLim: "300Mi"}, + }, + }, + "", + ), + ) + }) + + ginkgo.When("resizing a Burstable single container Pod", ginkgo.Label("burstable single container pod resize"), func() { + ginkgo.BeforeEach(func(ctx context.Context) { + reservedCPUs = cpuset.New(0) + }) + ginkgo.DescribeTable("", + func(ctx context.Context, + originalContainers []podresize.ResizableContainerInfo, + desiredContainers []podresize.ResizableContainerInfo, + expectedContainers []podresize.ResizableContainerInfo, + wantError string, + ) { + updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + reservedSystemCPUs: reservedCPUs, // Not really needed for the tests but helps to make a more precise check + enableInPlacePodVerticalScalingExclusiveCPUs: true, + })) + + tStamp := strconv.Itoa(time.Now().Nanosecond()) + testPod1 := podresize.MakePodWithResizableContainers(f.Namespace.Name, "testpod1", tStamp, originalContainers, nil) + testPod1 = e2epod.MustMixinRestrictedPodSecurity(testPod1) + + ginkgo.By("creating pod") + podClient := e2epod.NewPodClient(f) + newPods := podClient.CreateBatch(ctx, []*v1.Pod{testPod1}) + + ginkgo.By("verifying original pod resources, allocations and policy are as expected") + podresize.VerifyPodResources(newPods[0], originalContainers, nil) + + ginkgo.By("verifying original pod cpusets are as expected") + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount("bu-container-1", onlineCPUs.Size())) + + ginkgo.By("patching pod for resize") + patchString := podresize.MakeResizePatch(originalContainers, desiredContainers, nil, nil) + + if wantError == "" { + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch pod for resize") + + expected := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, expectedContainers) + ginkgo.By("verifying pod resources are as expected post patch, pre-actuation") + podresize.VerifyPodResources(patchedPod, expected, nil) + + ginkgo.By("waiting for resize to be actuated") + resizedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expected) + podresize.ExpectPodResized(ctx, f, resizedPod, expected) + + ginkgo.By("verifying pod resources after resize") + podresize.VerifyPodResources(resizedPod, expected, nil) + + ginkgo.By("verifying pod cpusets after resize") + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount("bu-container-1", onlineCPUs.Size())) + } else { + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch pod for resize") + + ginkgo.By("verifying testing pod resources are as expected post patch, pre-actuation") + expectedPreActuation := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, desiredContainers) + podresize.VerifyPodResources(patchedPod, expectedPreActuation, nil) + + resizePendingPod, err := framework.GetObject(podClient.Get, patchedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get resize pending pod") + + ginkgo.By("waiting for testing pod resize to be actuated") + expectedPostActuation := podresize.UpdateExpectedContainerRestarts(ctx, resizePendingPod, expectedContainers) + actuatedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expectedPostActuation) + + ginkgo.By("waiting for testing pod resize status to be pending") + WaitForPodResizePending(ctx, f, actuatedPod) + + actuatedPod, err = framework.GetObject(podClient.Get, actuatedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get actuated pod") + + expectedPostActuation = podresize.UpdateExpectedContainerRestarts(ctx, actuatedPod, expectedContainers) + ginkgo.By("verifying testing pod condition type as expected post patch, post-actuation") + podresize.ExpectPodResizePending(ctx, f, actuatedPod, expectedPostActuation) + + ginkgo.By("ensuring the testing pod is failed for the expected reason") + gomega.Expect(actuatedPod).To(HaveStatusConditionsMatchingRegex(wantError)) + + // we cannot nor we should predict which CPUs the container gets + ginkgo.By("verifying pod cpusets after resize") + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount("bu-container-1", onlineCPUs.Size())) + } + }, + ginkgo.Entry("should decrease the memory request only", + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "200Mi", MemLim: "500Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "200Mi", MemLim: "500Mi"}, + }, + }, + "", + ), + ginkgo.Entry("should decrease the memory limit only", + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "400Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "400Mi"}, + }, + }, + "", + ), + ginkgo.Entry("should increase the memory request only", + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "300Mi", MemLim: "500Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "300Mi", MemLim: "500Mi"}, + }, + }, + "", + ), + ginkgo.Entry("should increase the memory limit only", + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "600Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "600Mi"}, + }, + }, + "", + ), + ginkgo.Entry("should decrease the CPU request only", + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + "", + ), + ginkgo.Entry("should decrease the CPU limit only", + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "300m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "300m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + "", + ), + ginkgo.Entry("should increase the CPU request only", + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "150m", CPULim: "200m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "150m", CPULim: "200m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + "", + ), + ginkgo.Entry("should increase the CPU limit only", + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "500m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "500m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + "", + ), + ginkgo.Entry("should decrease the CPU request/limit", + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + "", + ), + ginkgo.Entry("should increase the CPU request/limit", + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + "", + ), + ginkgo.Entry("should decrease the CPU request and increase the CPU limit", + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "500m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "500m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + "", + ), + ginkgo.Entry("should increase the CPU request and the decrease CPU limit", + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "400m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "300m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "300m", MemReq: "250Mi", MemLim: "500Mi"}, + }, + }, + "", + ), + ginkgo.Entry("should decrease the memory request/limit", + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "200Mi", MemLim: "400Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "100Mi", MemLim: "300Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "100Mi", MemLim: "300Mi"}, + }, + }, + "", + ), + ginkgo.Entry("should increase the memory request/limit", + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "200Mi", MemLim: "400Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "300Mi", MemLim: "500Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "300Mi", MemLim: "500Mi"}, + }, + }, + "", + ), + ginkgo.Entry("should decrease the memory request and increase the memory limit", + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "200Mi", MemLim: "400Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "100Mi", MemLim: "500Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "100Mi", MemLim: "500Mi"}, + }, + }, + "", + ), + ginkgo.Entry("should increase the memory request and decrease the memory limit", + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "200Mi", MemLim: "400Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "300Mi", MemLim: "300Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "300Mi", MemLim: "300Mi"}, + }, + }, + "", + ), + ginkgo.Entry("should decrease the CPU request and increase the memory limit", + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "200Mi", MemLim: "400Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "400m", MemReq: "200Mi", MemLim: "500Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "400m", MemReq: "200Mi", MemLim: "500Mi"}, + }, + }, + "", + ), + ginkgo.Entry("should increase the CPU request and decrease the memory limit", + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "400m", MemReq: "200Mi", MemLim: "500Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "200Mi", MemLim: "400Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "200Mi", MemLim: "400Mi"}, + }, + }, + "", + ), + ginkgo.Entry("should decrease the memory request and increase the CPU limit", + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "200m", MemReq: "200Mi", MemLim: "400Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "300m", MemReq: "100Mi", MemLim: "400Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "100m", CPULim: "300m", MemReq: "100Mi", MemLim: "400Mi"}, + }, + }, + "", + ), + ginkgo.Entry("should increase the memory request and decrease the CPU limit", + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "400m", MemReq: "200Mi", MemLim: "400Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "300m", MemReq: "300Mi", MemLim: "400Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", CPULim: "300m", MemReq: "300Mi", MemLim: "400Mi"}, + }, + }, + "", + ), + ginkgo.Entry("should decrease the memory request", + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", MemReq: "500Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", MemReq: "400Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "bu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200m", MemReq: "400Mi"}, + }, + }, + "", + ), + ) + }) + + ginkgo.When("resizing a Guaranteed Pod with a single container and integer CPU requests", ginkgo.Label("single container guaranteed pod with integer CPU requests resize", "exclusive-cpus"), func() { + ginkgo.BeforeEach(func(ctx context.Context) { + reservedCPUs = cpuset.New(0) + }) + ginkgo.DescribeTable("", + func(ctx context.Context, + originalContainers []podresize.ResizableContainerInfo, + originalCpuInfo []containerCPUInfo, + desiredContainers []podresize.ResizableContainerInfo, + expectedContainers []podresize.ResizableContainerInfo, + expectedCpuInfo []containerCPUInfo, + wantError string, + ) { + skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), expectedCpuInfo[0].cpuCount) + + updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + reservedSystemCPUs: reservedCPUs, // Not really needed for the tests but helps to make a more precise check + enableInPlacePodVerticalScalingExclusiveCPUs: true, + })) + + podClient := e2epod.NewPodClient(f) + nodes, err := e2enode.GetReadySchedulableNodes(ctx, f.ClientSet) + framework.ExpectNoError(err, "failed to get running nodes") + gomega.Expect(nodes.Items).ShouldNot(gomega.BeEmpty()) + framework.Logf("Found %d schedulable nodes", len(nodes.Items)) + + ginkgo.By("Find node CPU resources available for allocation!") + node := nodes.Items[0] + nodeAllocatableCPU, nodeAvailableCPU, err := e2enode.GetNodeAllocatableAndAvailableQuantities(ctx, f.ClientSet, &node, v1.ResourceCPU) + framework.ExpectNoError(err, "failed to get CPU resources available for allocation") + framework.Logf("Node '%s': NodeAllocatable MilliCPUs = %dm. MilliCPUs currently available to allocate = %dm.", + node.Name, nodeAllocatableCPU.MilliValue(), nodeAvailableCPU.MilliValue()) + + tStamp := strconv.Itoa(time.Now().Nanosecond()) + testPod1 := podresize.MakePodWithResizableContainers(f.Namespace.Name, "testpod1", tStamp, originalContainers, nil) + testPod1 = e2epod.MustMixinRestrictedPodSecurity(testPod1) + e2epod.SetNodeAffinity(&testPod1.Spec, node.Name) + + ginkgo.By("creating pod") + newPods := podClient.CreateBatch(ctx, []*v1.Pod{testPod1}) + + ginkgo.By("verifying original pod resources, allocations and policy are as expected") + podresize.VerifyPodResources(newPods[0], originalContainers, nil) + + ginkgo.By("verifying original pod cpusets are as expected") + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount("gu-container-1", originalCpuInfo[0].cpuCount)) + + nodeAllocatableCPUAfterPodCreate, nodeAvailableCPUAfterPodCreate, err := e2enode.GetNodeAllocatableAndAvailableQuantities(ctx, f.ClientSet, &node, v1.ResourceCPU) + framework.ExpectNoError(err, "failed to get CPU resources available for allocation") + framework.Logf("Node '%s': NodeAllocatable MilliCPUs = %dm. MilliCPUs currently available to allocate = %dm.", + node.Name, nodeAllocatableCPUAfterPodCreate.MilliValue(), nodeAvailableCPUAfterPodCreate.MilliValue()) + + ginkgo.By("patching pod for resize") + patchString := podresize.MakeResizePatch(originalContainers, desiredContainers, nil, nil) + + if wantError == "" { + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch pod for resize") + + expected := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, expectedContainers) + ginkgo.By("verifying pod resources are as expected post patch, pre-actuation") + podresize.VerifyPodResources(patchedPod, expected, nil) + + ginkgo.By("waiting for resize to be actuated") + resizedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expected) + podresize.ExpectPodResized(ctx, f, resizedPod, expected) + + ginkgo.By("verifying pod resources after resize") + podresize.VerifyPodResources(resizedPod, expected, nil) + + // we cannot nor we should predict which CPUs the container gets + ginkgo.By("verifying pod cpusets after resize") + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount("gu-container-1", expectedCpuInfo[0].cpuCount)) + + nodeAllocatableCPUAfterPodResize, nodeAvailableCPUAfterPodResize, err := e2enode.GetNodeAllocatableAndAvailableQuantities(ctx, f.ClientSet, &node, v1.ResourceCPU) + framework.ExpectNoError(err, "failed to get CPU resources available for allocation") + framework.Logf("Node '%s': NodeAllocatable MilliCPUs = %dm. MilliCPUs currently available to allocate = %dm.", + node.Name, nodeAllocatableCPUAfterPodResize.MilliValue(), nodeAvailableCPUAfterPodResize.MilliValue()) + + } else { + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch pod for resize") + + ginkgo.By("verifying testing pod resources are as expected post patch, pre-actuation") + expectedPreActuation := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, desiredContainers) + podresize.VerifyPodResources(patchedPod, expectedPreActuation, nil) + + resizePendingPod, err := framework.GetObject(podClient.Get, patchedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get resize pending pod") + + ginkgo.By("waiting for testing pod resize to be actuated") + expectedPostActuation := podresize.UpdateExpectedContainerRestarts(ctx, resizePendingPod, expectedContainers) + actuatedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expectedPostActuation) + + ginkgo.By("waiting for testing pod resize status to be pending") + WaitForPodResizePending(ctx, f, actuatedPod) + + actuatedPod, err = framework.GetObject(podClient.Get, actuatedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get actuated pod") + + expectedPostActuation = podresize.UpdateExpectedContainerRestarts(ctx, actuatedPod, expectedContainers) + ginkgo.By("verifying testing pod condition type as expected post patch, post-actuation") + podresize.ExpectPodResizePending(ctx, f, actuatedPod, expectedPostActuation) + + ginkgo.By("ensuring the testing pod is failed for the expected reason") + gomega.Expect(actuatedPod).To(HaveStatusConditionsMatchingRegex(wantError)) + + // we cannot nor we should predict which CPUs the container gets + ginkgo.By("verifying pod cpusets after resize") + gomega.Expect(actuatedPod).To(HaveContainerCPUsCount("gu-container-1", expectedCpuInfo[0].cpuCount)) + } + }, + ginkgo.Entry("should increase the CPU request/limit, decrease memory request/limit, within available capacity", + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "400Mi", MemLim: "400Mi"}, + }, + }, + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 4, + }, + }, + "", + ), + ginkgo.Entry("should increase the CPU request/limit, increase memory request/limit, within available capacity", + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "400Mi", MemLim: "400Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "400Mi", MemLim: "400Mi"}, + }, + }, + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 4, + }, + }, + "", + ), + ginkgo.Entry("should increase exclusively CPUs, within available capacity", + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "300Mi", MemLim: "300Mi"}, + }, + }, + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "3000m", CPULim: "3000m", MemReq: "300Mi", MemLim: "300Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "3000m", CPULim: "3000m", MemReq: "300Mi", MemLim: "300Mi"}, + }, + }, + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 3, + }, + }, + "", + ), + ginkgo.Entry("should not decrease allocated exclusively CPUs, below promised cpuset", + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "300Mi", MemLim: "300Mi"}, + }, + }, + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "1000m", CPULim: "1000m", MemReq: "300Mi", MemLim: "300Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "300Mi", MemLim: "300Mi"}, + }, + }, + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + }, + "prohibitedCPUAllocation.*", + ), + ginkgo.Entry("should not increase allocated exclusively CPUs, beyond available capacity", + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "300Mi", MemLim: "300Mi"}, + }, + }, + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "200000m", CPULim: "200000m", MemReq: "300Mi", MemLim: "300Mi"}, + }, + }, + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "300Mi", MemLim: "300Mi"}, + }, + }, + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + }, + "Infeasible.*Node.*didn't.*have.*enough.*capacity.*", + ), + ) + }) + + ginkgo.When("topologyManagerPolicy is set to none, resizing a Guaranteed multiple containers Pod with integer CPU requests", ginkgo.Label("guaranteed multiple container pod with integer CPU requests resize", "exclusive-cpus"), func() { + ginkgo.BeforeEach(func(ctx context.Context) { + reservedCPUs = cpuset.New(0) + }) + ginkgo.DescribeTable("", + func(ctx context.Context, + originalContainers []podresize.ResizableContainerInfo, + originalCpuInfo []containerCPUInfo, + desiredContainersFirstPatch []podresize.ResizableContainerInfo, + expectedContainersFirstPatch []podresize.ResizableContainerInfo, + expectedCpuInfoFirstPatch []containerCPUInfo, + wantErrorFirstPatch string, + desiredContainersSecondPatch []podresize.ResizableContainerInfo, + expectedContainersSecondPatch []podresize.ResizableContainerInfo, + expectedCpuInfoSecondPatch []containerCPUInfo, + wantErrorSecondPatch string, + ) { + + expectedCPUCount := 0 + for ctx := range expectedCpuInfoFirstPatch { + expectedCPUCount += expectedCpuInfoFirstPatch[ctx].cpuCount + } + skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), expectedCPUCount) + + expectedCPUCount = 0 + for ctx := range expectedCpuInfoSecondPatch { + expectedCPUCount += expectedCpuInfoSecondPatch[ctx].cpuCount + } + skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), expectedCPUCount) + + updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + reservedSystemCPUs: reservedCPUs, // Not really needed for the tests but helps to make a more precise check + enableInPlacePodVerticalScalingExclusiveCPUs: true, + topologyManagerPolicyName: "none", + topologyManagerScopeName: "container", + topologyManagerPolicyOptions: map[string]string{ + "max-allowable-numa-nodes": "8", + "prefer-closest-numa-nodes": "true", + }, + })) + + tStamp := strconv.Itoa(time.Now().Nanosecond()) + testPod1 := podresize.MakePodWithResizableContainers(f.Namespace.Name, "testpod1", tStamp, originalContainers, nil) + testPod1 = e2epod.MustMixinRestrictedPodSecurity(testPod1) + + ginkgo.By("creating pod with multiple containers") + podClient := e2epod.NewPodClient(f) + newPods := podClient.CreateBatch(ctx, []*v1.Pod{testPod1}) + + ginkgo.By("verifying original pod resources, allocations are as expected") + podresize.VerifyPodResources(newPods[0], originalContainers, nil) + + ginkgo.By("verifying original pod cpusets are as expected") + for cdx := range originalCpuInfo { + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(originalCpuInfo[cdx].Name, originalCpuInfo[cdx].cpuCount)) + } + + ginkgo.By("patching pod for resize") + patchString := podresize.MakeResizePatch(originalContainers, desiredContainersFirstPatch, nil, nil) + + if wantErrorFirstPatch == "" { + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch pod for resize") + + expected := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, expectedContainersFirstPatch) + ginkgo.By("verifying pod resources are as expected post patch, pre-actuation") + podresize.VerifyPodResources(patchedPod, expected, nil) + + ginkgo.By("waiting for resize to be actuated") + resizedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expected) + podresize.ExpectPodResized(ctx, f, resizedPod, expected) + + ginkgo.By("verifying pod resources after resize") + podresize.VerifyPodResources(resizedPod, expected, nil) + + ginkgo.By("verifying pod cpusets after resize") + for cdx := range originalCpuInfo { + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(expectedCpuInfoFirstPatch[cdx].Name, expectedCpuInfoFirstPatch[cdx].cpuCount)) + } + + ginkgo.By("patching again pod for resize") + secondPatchString := podresize.MakeResizePatch(expected, desiredContainersSecondPatch, nil, nil) + + if wantErrorSecondPatch == "" { + + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(secondPatchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch again pod for resize") + + expected = podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, expectedContainersSecondPatch) + ginkgo.By("verifying pod resources are as expected post second patch, pre-actuation") + podresize.VerifyPodResources(patchedPod, expected, nil) + + ginkgo.By("waiting for second patch resize to be actuated") + resizedPod = podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expected) + podresize.ExpectPodResized(ctx, f, resizedPod, expected) + + ginkgo.By("verifying pod resources after second resize") + podresize.VerifyPodResources(resizedPod, expected, nil) + + ginkgo.By("verifying pod cpusets after second resize") + for cdx := range expectedCpuInfoSecondPatch { + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(expectedCpuInfoSecondPatch[cdx].Name, expectedCpuInfoSecondPatch[cdx].cpuCount)) + } + } else { + patchedPod, pErr = f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(secondPatchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch again pod for resize") + + ginkgo.By("verifying testing pod resources are as expected post second patch, pre-actuation") + expectedPreActuation := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, desiredContainersSecondPatch) + podresize.VerifyPodResources(patchedPod, expectedPreActuation, nil) + + resizePendingPod, err := framework.GetObject(podClient.Get, patchedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get resize pending pod for second patch") + + ginkgo.By("waiting for testing pod resize to be actuated for second patch") + expectedPostActuation := podresize.UpdateExpectedContainerRestarts(ctx, resizePendingPod, expectedContainersSecondPatch) + actuatedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expectedPostActuation) + + ginkgo.By("waiting for testing pod resize status to be pending for second patch") + WaitForPodResizePending(ctx, f, actuatedPod) + + actuatedPod, err = framework.GetObject(podClient.Get, actuatedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get actuated pod for second patch") + + expectedPostActuation = podresize.UpdateExpectedContainerRestarts(ctx, actuatedPod, expectedContainersSecondPatch) + ginkgo.By("verifying testing pod condition type as expected post patch, post-actuation for second patch") + podresize.ExpectPodResizePending(ctx, f, actuatedPod, expectedPostActuation) + + ginkgo.By("ensuring the testing pod is failed for the expected reason for second patch") + gomega.Expect(actuatedPod).To(HaveStatusConditionsMatchingRegex(wantErrorSecondPatch)) + + for cdx := range expectedCpuInfoSecondPatch { + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(expectedCpuInfoSecondPatch[cdx].Name, expectedCpuInfoSecondPatch[cdx].cpuCount)) + } + } + } else { + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch pod for resize") + + ginkgo.By("verifying testing pod resources are as expected post patch, pre-actuation") + expectedPreActuation := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, desiredContainersFirstPatch) + podresize.VerifyPodResources(patchedPod, expectedPreActuation, nil) + + resizePendingPod, err := framework.GetObject(podClient.Get, patchedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get resize pending pod") + + ginkgo.By("waiting for testing pod resize to be actuated") + expectedPostActuation := podresize.UpdateExpectedContainerRestarts(ctx, resizePendingPod, expectedContainersFirstPatch) + actuatedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expectedPostActuation) + + ginkgo.By("waiting for testing pod resize status to be pending") + WaitForPodResizePending(ctx, f, actuatedPod) + + actuatedPod, err = framework.GetObject(podClient.Get, actuatedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get actuated pod") + + expectedPostActuation = podresize.UpdateExpectedContainerRestarts(ctx, actuatedPod, expectedContainersFirstPatch) + ginkgo.By("verifying testing pod condition type as expected post patch, post-actuation") + podresize.ExpectPodResizePending(ctx, f, actuatedPod, expectedPostActuation) + + ginkgo.By("ensuring the testing pod is failed for the expected reason") + gomega.Expect(actuatedPod).To(HaveStatusConditionsMatchingRegex(wantErrorFirstPatch)) + + // we cannot nor we should predict which CPUs the container gets + ginkgo.By("verifying pod cpusets after resize") + gomega.Expect(actuatedPod).To(HaveContainerCPUsCount("gu-container-1", expectedCpuInfoFirstPatch[0].cpuCount)) + } + }, + ginkgo.Entry("should first increase CPU (gu-container-1) request and limit, afterwards decrease CPU (gu-container-1) request and limit within available capacity", + // Initial + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount before first patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + }, + // Desired first patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected after first patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount after first patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 4, + }, + }, + // Want error after first patch + "", + // Desired second patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected after second patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount after second patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + }, + "", + ), + ) + }) + + ginkgo.When("topologyManagerPolicOption is set to best-effort, resizing a Guaranteed multiple containers Pod with integer CPU request", ginkgo.Label("guaranteed pod with integer CPU requests resize", "exclusive-cpus"), func() { + ginkgo.BeforeEach(func(ctx context.Context) { + reservedCPUs = cpuset.New(0) + }) + ginkgo.DescribeTable("", + func(ctx context.Context, + originalContainers []podresize.ResizableContainerInfo, + originalCpuInfo []containerCPUInfo, + desiredContainersFirstPatch []podresize.ResizableContainerInfo, + expectedContainersFirstPatch []podresize.ResizableContainerInfo, + expectedCpuInfoFirstPatch []containerCPUInfo, + wantErrorFirstPatch string, + desiredContainersSecondPatch []podresize.ResizableContainerInfo, + expectedContainersSecondPatch []podresize.ResizableContainerInfo, + expectedCpuInfoSecondPatch []containerCPUInfo, + wantErrorSecondPatch string, + ) { + + expectedCPUCount := 0 + for ctx := range expectedCpuInfoFirstPatch { + expectedCPUCount += expectedCpuInfoFirstPatch[ctx].cpuCount + } + skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), expectedCPUCount) + + expectedCPUCount = 0 + for ctx := range expectedCpuInfoSecondPatch { + expectedCPUCount += expectedCpuInfoSecondPatch[ctx].cpuCount + } + skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), expectedCPUCount) + + updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + reservedSystemCPUs: reservedCPUs, // Not really needed for the tests but helps to make a more precise check + enableInPlacePodVerticalScalingExclusiveCPUs: true, + topologyManagerPolicyName: "best-effort", + topologyManagerScopeName: "container", + topologyManagerPolicyOptions: map[string]string{ + "max-allowable-numa-nodes": "8", + "prefer-closest-numa-nodes": "true", + }, + })) + + tStamp := strconv.Itoa(time.Now().Nanosecond()) + testPod1 := podresize.MakePodWithResizableContainers(f.Namespace.Name, "testpod1", tStamp, originalContainers, nil) + testPod1 = e2epod.MustMixinRestrictedPodSecurity(testPod1) + + ginkgo.By("creating pod with multiple containers") + podClient := e2epod.NewPodClient(f) + newPods := podClient.CreateBatch(ctx, []*v1.Pod{testPod1}) + + ginkgo.By("verifying original pod resources, allocations are as expected") + podresize.VerifyPodResources(newPods[0], originalContainers, nil) + + ginkgo.By("verifying original pod cpusets are as expected") + for cdx := range originalCpuInfo { + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(originalCpuInfo[cdx].Name, originalCpuInfo[cdx].cpuCount)) + } + + ginkgo.By("patching pod for resize") + patchString := podresize.MakeResizePatch(originalContainers, desiredContainersFirstPatch, nil, nil) + + if wantErrorFirstPatch == "" { + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch pod for resize") + + expected := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, expectedContainersFirstPatch) + ginkgo.By("verifying pod resources are as expected post patch, pre-actuation") + podresize.VerifyPodResources(patchedPod, expected, nil) + + ginkgo.By("waiting for resize to be actuated") + resizedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expected) + podresize.ExpectPodResized(ctx, f, resizedPod, expected) + + ginkgo.By("verifying pod resources after resize") + podresize.VerifyPodResources(resizedPod, expected, nil) + + ginkgo.By("verifying pod cpusets after resize") + for cdx := range originalCpuInfo { + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(expectedCpuInfoFirstPatch[cdx].Name, expectedCpuInfoFirstPatch[cdx].cpuCount)) + } + + ginkgo.By("patching again pod for resize") + secondPatchString := podresize.MakeResizePatch(expected, desiredContainersSecondPatch, nil, nil) + + if wantErrorSecondPatch == "" { + + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(secondPatchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch again pod for resize") + + expected = podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, expectedContainersSecondPatch) + ginkgo.By("verifying pod resources are as expected post second patch, pre-actuation") + podresize.VerifyPodResources(patchedPod, expected, nil) + + ginkgo.By("waiting for second patch resize to be actuated") + resizedPod = podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expected) + podresize.ExpectPodResized(ctx, f, resizedPod, expected) + + ginkgo.By("verifying pod resources after second resize") + podresize.VerifyPodResources(resizedPod, expected, nil) + + ginkgo.By("verifying pod cpusets after second resize") + for cdx := range expectedCpuInfoSecondPatch { + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(expectedCpuInfoSecondPatch[cdx].Name, expectedCpuInfoSecondPatch[cdx].cpuCount)) + } + } else { + patchedPod, pErr = f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(secondPatchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch again pod for resize") + + ginkgo.By("verifying testing pod resources are as expected post second patch, pre-actuation") + expectedPreActuation := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, desiredContainersSecondPatch) + podresize.VerifyPodResources(patchedPod, expectedPreActuation, nil) + + resizePendingPod, err := framework.GetObject(podClient.Get, patchedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get resize pending pod for second patch") + + ginkgo.By("waiting for testing pod resize to be actuated for second patch") + expectedPostActuation := podresize.UpdateExpectedContainerRestarts(ctx, resizePendingPod, expectedContainersSecondPatch) + actuatedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expectedPostActuation) + + ginkgo.By("waiting for testing pod resize status to be pending for second patch") + WaitForPodResizePending(ctx, f, actuatedPod) + + actuatedPod, err = framework.GetObject(podClient.Get, actuatedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get actuated pod for second patch") + + expectedPostActuation = podresize.UpdateExpectedContainerRestarts(ctx, actuatedPod, expectedContainersSecondPatch) + ginkgo.By("verifying testing pod condition type as expected post patch, post-actuation for second patch") + podresize.ExpectPodResizePending(ctx, f, actuatedPod, expectedPostActuation) + + ginkgo.By("ensuring the testing pod is failed for the expected reason for second patch") + gomega.Expect(actuatedPod).To(HaveStatusConditionsMatchingRegex(wantErrorSecondPatch)) + + for cdx := range expectedCpuInfoSecondPatch { + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(expectedCpuInfoSecondPatch[cdx].Name, expectedCpuInfoSecondPatch[cdx].cpuCount)) + } + } + } else { + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch pod for resize") + + ginkgo.By("verifying testing pod resources are as expected post patch, pre-actuation") + expectedPreActuation := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, desiredContainersFirstPatch) + podresize.VerifyPodResources(patchedPod, expectedPreActuation, nil) + + resizePendingPod, err := framework.GetObject(podClient.Get, patchedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get resize pending pod") + + ginkgo.By("waiting for testing pod resize to be actuated") + expectedPostActuation := podresize.UpdateExpectedContainerRestarts(ctx, resizePendingPod, expectedContainersFirstPatch) + actuatedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expectedPostActuation) + + ginkgo.By("waiting for testing pod resize status to be pending") + WaitForPodResizePending(ctx, f, actuatedPod) + + actuatedPod, err = framework.GetObject(podClient.Get, actuatedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get actuated pod") + + expectedPostActuation = podresize.UpdateExpectedContainerRestarts(ctx, actuatedPod, expectedContainersFirstPatch) + ginkgo.By("verifying testing pod condition type as expected post patch, post-actuation") + podresize.ExpectPodResizePending(ctx, f, actuatedPod, expectedPostActuation) + + ginkgo.By("ensuring the testing pod is failed for the expected reason") + gomega.Expect(actuatedPod).To(HaveStatusConditionsMatchingRegex(wantErrorFirstPatch)) + + // we cannot nor we should predict which CPUs the container gets + ginkgo.By("verifying pod cpusets after resize") + gomega.Expect(actuatedPod).To(HaveContainerCPUsCount("gu-container-1", expectedCpuInfoFirstPatch[0].cpuCount)) + } + }, + ginkgo.Entry("should first increase (gu-container-1) CPU request/limit, afterwards decrease (gu-container-1) CPU request/limit, within available capacity", + // Initial + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount before first patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + }, + // Desired first patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected after first patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount after first patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 4, + }, + }, + // Want error after first patch + "", + // Desired second patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected after second patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount after second patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + }, + "", + ), + ) + }) + + ginkgo.When("topologyManagerPolicy option is set to restricted, resizing a Guaranteed multiple containers Pod, with integer CPU request", ginkgo.Label("guaranteed multiple containers pod with integer CPU requests resize", "exclusive-cpus"), func() { + ginkgo.BeforeEach(func(ctx context.Context) { + if smtLevel < 1 { + e2eskipper.Skipf("Skipping CPU Manager %q tests since SMT disabled", cpumanager.FullPCPUsOnlyOption) + } + reservedCPUs = cpuset.New(0) + }) + if smtLevel >= minSMTLevel { + ginkgo.DescribeTable("", + func(ctx context.Context, + originalContainers []podresize.ResizableContainerInfo, + originalCpuInfo []containerCPUInfo, + desiredContainersFirstPatch []podresize.ResizableContainerInfo, + expectedContainersFirstPatch []podresize.ResizableContainerInfo, + expectedCpuInfoFirstPatch []containerCPUInfo, + wantErrorFirstPatch string, + desiredContainersSecondPatch []podresize.ResizableContainerInfo, + expectedContainersSecondPatch []podresize.ResizableContainerInfo, + expectedCpuInfoSecondPatch []containerCPUInfo, + wantErrorSecondPatch string, + ) { + + expectedCPUCount := 0 + for ctx := range expectedCpuInfoFirstPatch { + expectedCPUCount += expectedCpuInfoFirstPatch[ctx].cpuCount + } + skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), expectedCPUCount) + + expectedCPUCount = 0 + for ctx := range expectedCpuInfoSecondPatch { + expectedCPUCount += expectedCpuInfoSecondPatch[ctx].cpuCount + } + skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), expectedCPUCount) + + updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + reservedSystemCPUs: reservedCPUs, // Not really needed for the tests but helps to make a more precise check + enableInPlacePodVerticalScalingExclusiveCPUs: true, + topologyManagerPolicyName: "restricted", + topologyManagerScopeName: "container", + topologyManagerPolicyOptions: map[string]string{ + "max-allowable-numa-nodes": "8", + "prefer-closest-numa-nodes": "true", + }, + })) + + tStamp := strconv.Itoa(time.Now().Nanosecond()) + testPod1 := podresize.MakePodWithResizableContainers(f.Namespace.Name, "testpod1", tStamp, originalContainers, nil) + testPod1 = e2epod.MustMixinRestrictedPodSecurity(testPod1) + + ginkgo.By("creating pod with multiple containers") + podClient := e2epod.NewPodClient(f) + newPods := podClient.CreateBatch(ctx, []*v1.Pod{testPod1}) + + ginkgo.By("verifying original pod resources, allocations are as expected") + podresize.VerifyPodResources(newPods[0], originalContainers, nil) + + ginkgo.By("verifying original pod cpusets are as expected") + for cdx := range originalCpuInfo { + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(originalCpuInfo[cdx].Name, originalCpuInfo[cdx].cpuCount)) + } + + ginkgo.By("patching pod for resize") + patchString := podresize.MakeResizePatch(originalContainers, desiredContainersFirstPatch, nil, nil) + + if wantErrorFirstPatch == "" { + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch pod for resize") + + expected := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, expectedContainersFirstPatch) + ginkgo.By("verifying pod resources are as expected post patch, pre-actuation") + podresize.VerifyPodResources(patchedPod, expected, nil) + + ginkgo.By("waiting for resize to be actuated") + resizedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expected) + podresize.ExpectPodResized(ctx, f, resizedPod, expected) + + ginkgo.By("verifying pod resources after resize") + podresize.VerifyPodResources(resizedPod, expected, nil) + + ginkgo.By("verifying pod cpusets after resize") + for cdx := range originalCpuInfo { + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(expectedCpuInfoFirstPatch[cdx].Name, expectedCpuInfoFirstPatch[cdx].cpuCount)) + } + + ginkgo.By("patching again pod for resize") + secondPatchString := podresize.MakeResizePatch(expected, desiredContainersSecondPatch, nil, nil) + + if wantErrorSecondPatch == "" { + + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(secondPatchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch again pod for resize") + + expected = podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, expectedContainersSecondPatch) + ginkgo.By("verifying pod resources are as expected post second patch, pre-actuation") + podresize.VerifyPodResources(patchedPod, expected, nil) + + ginkgo.By("waiting for second patch resize to be actuated") + resizedPod = podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expected) + podresize.ExpectPodResized(ctx, f, resizedPod, expected) + + ginkgo.By("verifying pod resources after second resize") + podresize.VerifyPodResources(resizedPod, expected, nil) + + ginkgo.By("verifying pod cpusets after second resize") + for cdx := range expectedCpuInfoSecondPatch { + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(expectedCpuInfoSecondPatch[cdx].Name, expectedCpuInfoSecondPatch[cdx].cpuCount)) + } + } else { + patchedPod, pErr = f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(secondPatchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch again pod for resize") + + ginkgo.By("verifying testing pod resources are as expected post second patch, pre-actuation") + expectedPreActuation := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, desiredContainersSecondPatch) + podresize.VerifyPodResources(patchedPod, expectedPreActuation, nil) + + resizePendingPod, err := framework.GetObject(podClient.Get, patchedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get resize pending pod for second patch") + + ginkgo.By("waiting for testing pod resize to be actuated for second patch") + expectedPostActuation := podresize.UpdateExpectedContainerRestarts(ctx, resizePendingPod, expectedContainersSecondPatch) + actuatedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expectedPostActuation) + + ginkgo.By("waiting for testing pod resize status to be pending for second patch") + WaitForPodResizePending(ctx, f, actuatedPod) + + actuatedPod, err = framework.GetObject(podClient.Get, actuatedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get actuated pod for second patch") + + expectedPostActuation = podresize.UpdateExpectedContainerRestarts(ctx, actuatedPod, expectedContainersSecondPatch) + ginkgo.By("verifying testing pod condition type as expected post patch, post-actuation for second patch") + podresize.ExpectPodResizePending(ctx, f, actuatedPod, expectedPostActuation) + + ginkgo.By("ensuring the testing pod is failed for the expected reason for second patch") + gomega.Expect(actuatedPod).To(HaveStatusConditionsMatchingRegex(wantErrorSecondPatch)) + + for cdx := range expectedCpuInfoSecondPatch { + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(expectedCpuInfoSecondPatch[cdx].Name, expectedCpuInfoSecondPatch[cdx].cpuCount)) + } + } + } else { + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch pod for resize") + + ginkgo.By("verifying testing pod resources are as expected post patch, pre-actuation") + expectedPreActuation := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, desiredContainersFirstPatch) + podresize.VerifyPodResources(patchedPod, expectedPreActuation, nil) + + resizePendingPod, err := framework.GetObject(podClient.Get, patchedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get resize pending pod") + + ginkgo.By("waiting for testing pod resize to be actuated") + expectedPostActuation := podresize.UpdateExpectedContainerRestarts(ctx, resizePendingPod, expectedContainersFirstPatch) + actuatedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expectedPostActuation) + + ginkgo.By("waiting for testing pod resize status to be pending") + WaitForPodResizePending(ctx, f, actuatedPod) + + actuatedPod, err = framework.GetObject(podClient.Get, actuatedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get actuated pod") + + expectedPostActuation = podresize.UpdateExpectedContainerRestarts(ctx, actuatedPod, expectedContainersFirstPatch) + ginkgo.By("verifying testing pod condition type as expected post patch, post-actuation") + podresize.ExpectPodResizePending(ctx, f, actuatedPod, expectedPostActuation) + + ginkgo.By("ensuring the testing pod is failed for the expected reason") + gomega.Expect(actuatedPod).To(HaveStatusConditionsMatchingRegex(wantErrorFirstPatch)) + + // we cannot nor we should predict which CPUs the container gets + ginkgo.By("verifying pod cpusets after resize") + gomega.Expect(actuatedPod).To(HaveContainerCPUsCount("gu-container-1", expectedCpuInfoFirstPatch[0].cpuCount)) + } + }, + ginkgo.Entry("should first increase (gu-container-1) CPU request/limit, afterwards decrease (gu-container-1) CPU request/limit, within available capacity", + // Initial + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount before first patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + }, + // Desired first patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "10000m", CPULim: "10000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected after first patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "10000m", CPULim: "10000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount after first patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 10, + }, + }, + // Want error after first patch + "", + // Desired second patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected after second patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount after second patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + }, + "", + ), + ) + } + if smtLevel == 1 { + ginkgo.DescribeTable("", + func(ctx context.Context, + originalContainers []podresize.ResizableContainerInfo, + originalCpuInfo []containerCPUInfo, + desiredContainersFirstPatch []podresize.ResizableContainerInfo, + expectedContainersFirstPatch []podresize.ResizableContainerInfo, + expectedCpuInfoFirstPatch []containerCPUInfo, + wantErrorFirstPatch string, + desiredContainersSecondPatch []podresize.ResizableContainerInfo, + expectedContainersSecondPatch []podresize.ResizableContainerInfo, + expectedCpuInfoSecondPatch []containerCPUInfo, + wantErrorSecondPatch string, + ) { + + expectedCPUCount := 0 + for ctx := range expectedCpuInfoFirstPatch { + expectedCPUCount += expectedCpuInfoFirstPatch[ctx].cpuCount + } + skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), expectedCPUCount) + + expectedCPUCount = 0 + for ctx := range expectedCpuInfoSecondPatch { + expectedCPUCount += expectedCpuInfoSecondPatch[ctx].cpuCount + } + skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), expectedCPUCount) + + updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + reservedSystemCPUs: reservedCPUs, // Not really needed for the tests but helps to make a more precise check + enableInPlacePodVerticalScalingExclusiveCPUs: true, + topologyManagerPolicyName: "restricted", + topologyManagerScopeName: "container", + topologyManagerPolicyOptions: map[string]string{ + "max-allowable-numa-nodes": "8", + "prefer-closest-numa-nodes": "true", + }, + })) + + tStamp := strconv.Itoa(time.Now().Nanosecond()) + testPod1 := podresize.MakePodWithResizableContainers(f.Namespace.Name, "testpod1", tStamp, originalContainers, nil) + testPod1 = e2epod.MustMixinRestrictedPodSecurity(testPod1) + + ginkgo.By("creating pod with multiple containers") + podClient := e2epod.NewPodClient(f) + newPods := podClient.CreateBatch(ctx, []*v1.Pod{testPod1}) + + ginkgo.By("verifying original pod resources, allocations are as expected") + podresize.VerifyPodResources(newPods[0], originalContainers, nil) + + ginkgo.By("verifying original pod cpusets are as expected") + for cdx := range originalCpuInfo { + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(originalCpuInfo[cdx].Name, originalCpuInfo[cdx].cpuCount)) + } + + ginkgo.By("patching pod for resize") + patchString := podresize.MakeResizePatch(originalContainers, desiredContainersFirstPatch, nil, nil) + + if wantErrorFirstPatch == "" { + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch pod for resize") + + expected := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, expectedContainersFirstPatch) + ginkgo.By("verifying pod resources are as expected post patch, pre-actuation") + podresize.VerifyPodResources(patchedPod, expected, nil) + + ginkgo.By("waiting for resize to be actuated") + resizedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expected) + podresize.ExpectPodResized(ctx, f, resizedPod, expected) + + ginkgo.By("verifying pod resources after resize") + podresize.VerifyPodResources(resizedPod, expected, nil) + + ginkgo.By("verifying pod cpusets after resize") + for cdx := range originalCpuInfo { + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(expectedCpuInfoFirstPatch[cdx].Name, expectedCpuInfoFirstPatch[cdx].cpuCount)) + } + + ginkgo.By("patching again pod for resize") + secondPatchString := podresize.MakeResizePatch(expected, desiredContainersSecondPatch, nil, nil) + + if wantErrorSecondPatch == "" { + + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(secondPatchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch again pod for resize") + + expected = podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, expectedContainersSecondPatch) + ginkgo.By("verifying pod resources are as expected post second patch, pre-actuation") + podresize.VerifyPodResources(patchedPod, expected, nil) + + ginkgo.By("waiting for second patch resize to be actuated") + resizedPod = podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expected) + podresize.ExpectPodResized(ctx, f, resizedPod, expected) + + ginkgo.By("verifying pod resources after second resize") + podresize.VerifyPodResources(resizedPod, expected, nil) + + ginkgo.By("verifying pod cpusets after second resize") + for cdx := range expectedCpuInfoSecondPatch { + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(expectedCpuInfoSecondPatch[cdx].Name, expectedCpuInfoSecondPatch[cdx].cpuCount)) + } + } else { + patchedPod, pErr = f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(secondPatchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch again pod for resize") + + ginkgo.By("verifying testing pod resources are as expected post second patch, pre-actuation") + expectedPreActuation := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, desiredContainersSecondPatch) + podresize.VerifyPodResources(patchedPod, expectedPreActuation, nil) + + resizePendingPod, err := framework.GetObject(podClient.Get, patchedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get resize pending pod for second patch") + + ginkgo.By("waiting for testing pod resize to be actuated for second patch") + expectedPostActuation := podresize.UpdateExpectedContainerRestarts(ctx, resizePendingPod, expectedContainersSecondPatch) + actuatedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expectedPostActuation) + + ginkgo.By("waiting for testing pod resize status to be pending for second patch") + WaitForPodResizePending(ctx, f, actuatedPod) + + actuatedPod, err = framework.GetObject(podClient.Get, actuatedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get actuated pod for second patch") + + expectedPostActuation = podresize.UpdateExpectedContainerRestarts(ctx, actuatedPod, expectedContainersSecondPatch) + ginkgo.By("verifying testing pod condition type as expected post patch, post-actuation for second patch") + podresize.ExpectPodResizePending(ctx, f, actuatedPod, expectedPostActuation) + + ginkgo.By("ensuring the testing pod is failed for the expected reason for second patch") + gomega.Expect(actuatedPod).To(HaveStatusConditionsMatchingRegex(wantErrorSecondPatch)) + + for cdx := range expectedCpuInfoSecondPatch { + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(expectedCpuInfoSecondPatch[cdx].Name, expectedCpuInfoSecondPatch[cdx].cpuCount)) + } + } + } else { + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch pod for resize") + + ginkgo.By("verifying testing pod resources are as expected post patch, pre-actuation") + expectedPreActuation := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, desiredContainersFirstPatch) + podresize.VerifyPodResources(patchedPod, expectedPreActuation, nil) + + resizePendingPod, err := framework.GetObject(podClient.Get, patchedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get resize pending pod") + + ginkgo.By("waiting for testing pod resize to be actuated") + expectedPostActuation := podresize.UpdateExpectedContainerRestarts(ctx, resizePendingPod, expectedContainersFirstPatch) + actuatedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expectedPostActuation) + + ginkgo.By("waiting for testing pod resize status to be pending") + WaitForPodResizePending(ctx, f, actuatedPod) + + actuatedPod, err = framework.GetObject(podClient.Get, actuatedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get actuated pod") + + expectedPostActuation = podresize.UpdateExpectedContainerRestarts(ctx, actuatedPod, expectedContainersFirstPatch) + ginkgo.By("verifying testing pod condition type as expected post patch, post-actuation") + podresize.ExpectPodResizePending(ctx, f, actuatedPod, expectedPostActuation) + + ginkgo.By("ensuring the testing pod is failed for the expected reason") + gomega.Expect(actuatedPod).To(HaveStatusConditionsMatchingRegex(wantErrorFirstPatch)) + + // we cannot nor we should predict which CPUs the container gets + ginkgo.By("verifying pod cpusets after resize") + gomega.Expect(actuatedPod).To(HaveContainerCPUsCount("gu-container-1", expectedCpuInfoFirstPatch[0].cpuCount)) + } + }, + ginkgo.Entry("should first increase (gu-container-1) CPU request/limit, afterwards fail to decrease (gu-container-1) CPU request/limit, within available capacity because of TopologyAffinityError", + // Initial + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount before first patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + }, + // Desired first patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "10000m", CPULim: "10000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected after first patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "10000m", CPULim: "10000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount after first patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 10, + }, + }, + // Want error after first patch + "", + // Desired second patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected after second patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "10000m", CPULim: "10000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount after second patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 10, + }, + }, + // Want error after first patch + "Infeasible.*", + ), + ) + } + }) + + ginkgo.When("topology manager policy option is set to single-numa-node, resizing a Guaranteed multiple container pod, with integer CPU request", ginkgo.Label("guaranteed multiple containers pod with integer CPU requests resize", "exclusive-cpus"), func() { + ginkgo.BeforeEach(func(ctx context.Context) { + if smtLevel < 1 { + e2eskipper.Skipf("Skipping CPU Manager %q tests since SMT disabled", cpumanager.FullPCPUsOnlyOption) + } + reservedCPUs = cpuset.New(0) + }) + if smtLevel >= minSMTLevel { + ginkgo.DescribeTable("", + func(ctx context.Context, + originalContainers []podresize.ResizableContainerInfo, + originalCpuInfo []containerCPUInfo, + desiredContainersFirstPatch []podresize.ResizableContainerInfo, + expectedContainersFirstPatch []podresize.ResizableContainerInfo, + expectedCpuInfoFirstPatch []containerCPUInfo, + wantErrorFirstPatch string, + desiredContainersSecondPatch []podresize.ResizableContainerInfo, + expectedContainersSecondPatch []podresize.ResizableContainerInfo, + expectedCpuInfoSecondPatch []containerCPUInfo, + wantErrorSecondPatch string, + ) { + + expectedCPUCount := 0 + for ctx := range expectedCpuInfoFirstPatch { + expectedCPUCount += expectedCpuInfoFirstPatch[ctx].cpuCount + } + skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), expectedCPUCount) + + expectedCPUCount = 0 + for ctx := range expectedCpuInfoSecondPatch { + expectedCPUCount += expectedCpuInfoSecondPatch[ctx].cpuCount + } + skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), expectedCPUCount) + + updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + reservedSystemCPUs: reservedCPUs, // Not really needed for the tests but helps to make a more precise check + enableInPlacePodVerticalScalingExclusiveCPUs: true, + topologyManagerPolicyName: "single-numa-node", + topologyManagerScopeName: "container", + topologyManagerPolicyOptions: map[string]string{ + "max-allowable-numa-nodes": "8", + "prefer-closest-numa-nodes": "true", + }, + })) + + tStamp := strconv.Itoa(time.Now().Nanosecond()) + testPod1 := podresize.MakePodWithResizableContainers(f.Namespace.Name, "testpod1", tStamp, originalContainers, nil) + testPod1 = e2epod.MustMixinRestrictedPodSecurity(testPod1) + + ginkgo.By("creating pod with multiple containers") + podClient := e2epod.NewPodClient(f) + newPods := podClient.CreateBatch(ctx, []*v1.Pod{testPod1}) + + ginkgo.By("verifying original pod resources, allocations are as expected") + podresize.VerifyPodResources(newPods[0], originalContainers, nil) + + ginkgo.By("verifying original pod cpusets are as expected") + for cdx := range originalCpuInfo { + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(originalCpuInfo[cdx].Name, originalCpuInfo[cdx].cpuCount)) + } + + ginkgo.By("patching pod for resize") + patchString := podresize.MakeResizePatch(originalContainers, desiredContainersFirstPatch, nil, nil) + + if wantErrorFirstPatch == "" { + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch pod for resize") + + expected := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, expectedContainersFirstPatch) + ginkgo.By("verifying pod resources are as expected post patch, pre-actuation") + podresize.VerifyPodResources(patchedPod, expected, nil) + + ginkgo.By("waiting for resize to be actuated") + resizedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expected) + podresize.ExpectPodResized(ctx, f, resizedPod, expected) + + ginkgo.By("verifying pod resources after resize") + podresize.VerifyPodResources(resizedPod, expected, nil) + + ginkgo.By("verifying pod cpusets after resize") + for cdx := range originalCpuInfo { + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(expectedCpuInfoFirstPatch[cdx].Name, expectedCpuInfoFirstPatch[cdx].cpuCount)) + } + + ginkgo.By("patching again pod for resize") + secondPatchString := podresize.MakeResizePatch(expected, desiredContainersSecondPatch, nil, nil) + + if wantErrorSecondPatch == "" { + + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(secondPatchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch again pod for resize") + + expected = podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, expectedContainersSecondPatch) + ginkgo.By("verifying pod resources are as expected post second patch, pre-actuation") + podresize.VerifyPodResources(patchedPod, expected, nil) + + ginkgo.By("waiting for second patch resize to be actuated") + resizedPod = podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expected) + podresize.ExpectPodResized(ctx, f, resizedPod, expected) + + ginkgo.By("verifying pod resources after second resize") + podresize.VerifyPodResources(resizedPod, expected, nil) + + ginkgo.By("verifying pod cpusets after second resize") + for cdx := range expectedCpuInfoSecondPatch { + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(expectedCpuInfoSecondPatch[cdx].Name, expectedCpuInfoSecondPatch[cdx].cpuCount)) + } + } else { + patchedPod, pErr = f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(secondPatchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch again pod for resize") + + ginkgo.By("verifying testing pod resources are as expected post second patch, pre-actuation") + expectedPreActuation := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, desiredContainersSecondPatch) + podresize.VerifyPodResources(patchedPod, expectedPreActuation, nil) + + resizePendingPod, err := framework.GetObject(podClient.Get, patchedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get resize pending pod for second patch") + + ginkgo.By("waiting for testing pod resize to be actuated for second patch") + expectedPostActuation := podresize.UpdateExpectedContainerRestarts(ctx, resizePendingPod, expectedContainersSecondPatch) + actuatedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expectedPostActuation) + + ginkgo.By("waiting for testing pod resize status to be pending for second patch") + WaitForPodResizePending(ctx, f, actuatedPod) + + actuatedPod, err = framework.GetObject(podClient.Get, actuatedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get actuated pod for second patch") + + expectedPostActuation = podresize.UpdateExpectedContainerRestarts(ctx, actuatedPod, expectedContainersSecondPatch) + ginkgo.By("verifying testing pod condition type as expected post patch, post-actuation for second patch") + podresize.ExpectPodResizePending(ctx, f, actuatedPod, expectedPostActuation) + + ginkgo.By("ensuring the testing pod is failed for the expected reason for second patch") + gomega.Expect(actuatedPod).To(HaveStatusConditionsMatchingRegex(wantErrorSecondPatch)) + + for cdx := range expectedCpuInfoSecondPatch { + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(expectedCpuInfoSecondPatch[cdx].Name, expectedCpuInfoSecondPatch[cdx].cpuCount)) + } + } + } else { + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch pod for resize") + + ginkgo.By("verifying testing pod resources are as expected post patch, pre-actuation") + expectedPreActuation := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, desiredContainersFirstPatch) + podresize.VerifyPodResources(patchedPod, expectedPreActuation, nil) + + resizePendingPod, err := framework.GetObject(podClient.Get, patchedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get resize pending pod") + + ginkgo.By("waiting for testing pod resize to be actuated") + expectedPostActuation := podresize.UpdateExpectedContainerRestarts(ctx, resizePendingPod, expectedContainersFirstPatch) + actuatedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expectedPostActuation) + + ginkgo.By("waiting for testing pod resize status to be pending") + WaitForPodResizePending(ctx, f, actuatedPod) + + actuatedPod, err = framework.GetObject(podClient.Get, actuatedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get actuated pod") + + expectedPostActuation = podresize.UpdateExpectedContainerRestarts(ctx, actuatedPod, expectedContainersFirstPatch) + ginkgo.By("verifying testing pod condition type as expected post patch, post-actuation") + podresize.ExpectPodResizePending(ctx, f, actuatedPod, expectedPostActuation) + + ginkgo.By("ensuring the testing pod is failed for the expected reason") + gomega.Expect(actuatedPod).To(HaveStatusConditionsMatchingRegex(wantErrorFirstPatch)) + + // we cannot nor we should predict which CPUs the container gets + ginkgo.By("verifying pod cpusets after resize") + gomega.Expect(actuatedPod).To(HaveContainerCPUsCount("gu-container-1", expectedCpuInfoFirstPatch[0].cpuCount)) + } + }, + ginkgo.Entry("should first increase (gu-container-1) CPU request/limit and afterwards decrease (gu-container-1) CPU request/limit, within available capacity", + // Initial + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount before first patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + }, + // Desired first patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected after first patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount after first patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 4, + }, + }, + // Want error after first patch + "", + // Desired second patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected after second patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount after second patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + }, + // Want error after first patch + "", + ), + ginkgo.Entry("should first increase (gu-container-1) CPU request/limit, afterwards restore (gu-container-1) CPU request/limit and increase (gu-container-2) CPU request/limit, within available capacity", + // Initial + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount before first patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + { + Name: "gu-container-2", + cpuCount: 2, + }, + }, + // Desired first patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected after first patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount after first patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 4, + }, + { + Name: "gu-container-2", + cpuCount: 2, + }, + }, + // Want error after first patch + "", + // Desired second patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected after second patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount after second patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + { + Name: "gu-container-2", + cpuCount: 4, + }, + }, + "", + ), + ginkgo.Entry("should first increase (gu-container-1) CPU request/limit, afterwards fail to reduce (gu-container-1) CPU request/limit, below promised and increase (gu-container-2) CPU request/limit, within available capacity", + // Initial + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "3000m", CPULim: "3000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount before first patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 3, + }, + { + Name: "gu-container-2", + cpuCount: 2, + }, + }, + // Desired first patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected after first patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount after first patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 4, + }, + { + Name: "gu-container-2", + cpuCount: 2, + }, + }, + // Want error after first patch + "", + // Desired second patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected after second patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount after second patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 4, + }, + { + Name: "gu-container-2", + cpuCount: 2, + }, + }, + "prohibitedCPUAllocation.*", + ), + ginkgo.Entry("should first increase (gu-container-1) CPU request/limit, afterwards fail to restore (gu-container-1) CPU request/limit and to increase (gu-container-2) CPU request/limit above available capacity", + // Initial + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount before first patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + { + Name: "gu-container-2", + cpuCount: 2, + }, + }, + // Desired first patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected after first patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount after first patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 4, + }, + { + Name: "gu-container-2", + cpuCount: 2, + }, + }, + // Want error after first patch + "", + // Desired second patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "40000m", CPULim: "40000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected after second patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount after second patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 4, + }, + { + Name: "gu-container-2", + cpuCount: 2, + }, + }, + "Infeasible.*Node.*didn't.*have.*enough.*capacity.*", + ), + ) + } + if smtLevel == 1 { + ginkgo.DescribeTable("", + func(ctx context.Context, + originalContainers []podresize.ResizableContainerInfo, + originalCpuInfo []containerCPUInfo, + desiredContainersFirstPatch []podresize.ResizableContainerInfo, + expectedContainersFirstPatch []podresize.ResizableContainerInfo, + expectedCpuInfoFirstPatch []containerCPUInfo, + wantErrorFirstPatch string, + desiredContainersSecondPatch []podresize.ResizableContainerInfo, + expectedContainersSecondPatch []podresize.ResizableContainerInfo, + expectedCpuInfoSecondPatch []containerCPUInfo, + wantErrorSecondPatch string, + ) { + + expectedCPUCount := 0 + for ctx := range expectedCpuInfoFirstPatch { + expectedCPUCount += expectedCpuInfoFirstPatch[ctx].cpuCount + } + skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), expectedCPUCount) + + expectedCPUCount = 0 + for ctx := range expectedCpuInfoSecondPatch { + expectedCPUCount += expectedCpuInfoSecondPatch[ctx].cpuCount + } + skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), expectedCPUCount) + + updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + reservedSystemCPUs: reservedCPUs, // Not really needed for the tests but helps to make a more precise check + enableInPlacePodVerticalScalingExclusiveCPUs: true, + topologyManagerPolicyName: "single-numa-node", + topologyManagerScopeName: "container", + topologyManagerPolicyOptions: map[string]string{ + "max-allowable-numa-nodes": "8", + "prefer-closest-numa-nodes": "true", + }, + })) + + tStamp := strconv.Itoa(time.Now().Nanosecond()) + testPod1 := podresize.MakePodWithResizableContainers(f.Namespace.Name, "testpod1", tStamp, originalContainers, nil) + testPod1 = e2epod.MustMixinRestrictedPodSecurity(testPod1) + + ginkgo.By("creating pod with multiple containers") + podClient := e2epod.NewPodClient(f) + newPods := podClient.CreateBatch(ctx, []*v1.Pod{testPod1}) + + ginkgo.By("verifying original pod resources, allocations are as expected") + podresize.VerifyPodResources(newPods[0], originalContainers, nil) + + ginkgo.By("verifying original pod cpusets are as expected") + for cdx := range originalCpuInfo { + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(originalCpuInfo[cdx].Name, originalCpuInfo[cdx].cpuCount)) + } + + ginkgo.By("patching pod for resize") + patchString := podresize.MakeResizePatch(originalContainers, desiredContainersFirstPatch, nil, nil) + + if wantErrorFirstPatch == "" { + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch pod for resize") + + expected := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, expectedContainersFirstPatch) + ginkgo.By("verifying pod resources are as expected post patch, pre-actuation") + podresize.VerifyPodResources(patchedPod, expected, nil) + + ginkgo.By("waiting for resize to be actuated") + resizedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expected) + podresize.ExpectPodResized(ctx, f, resizedPod, expected) + + ginkgo.By("verifying pod resources after resize") + podresize.VerifyPodResources(resizedPod, expected, nil) + + ginkgo.By("verifying pod cpusets after resize") + for cdx := range originalCpuInfo { + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(expectedCpuInfoFirstPatch[cdx].Name, expectedCpuInfoFirstPatch[cdx].cpuCount)) + } + + ginkgo.By("patching again pod for resize") + secondPatchString := podresize.MakeResizePatch(expected, desiredContainersSecondPatch, nil, nil) + + if wantErrorSecondPatch == "" { + + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(secondPatchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch again pod for resize") + + expected = podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, expectedContainersSecondPatch) + ginkgo.By("verifying pod resources are as expected post second patch, pre-actuation") + podresize.VerifyPodResources(patchedPod, expected, nil) + + ginkgo.By("waiting for second patch resize to be actuated") + resizedPod = podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expected) + podresize.ExpectPodResized(ctx, f, resizedPod, expected) + + ginkgo.By("verifying pod resources after second resize") + podresize.VerifyPodResources(resizedPod, expected, nil) + + ginkgo.By("verifying pod cpusets after second resize") + for cdx := range expectedCpuInfoSecondPatch { + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(expectedCpuInfoSecondPatch[cdx].Name, expectedCpuInfoSecondPatch[cdx].cpuCount)) + } + } else { + patchedPod, pErr = f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(secondPatchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch again pod for resize") + + ginkgo.By("verifying testing pod resources are as expected post second patch, pre-actuation") + expectedPreActuation := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, desiredContainersSecondPatch) + podresize.VerifyPodResources(patchedPod, expectedPreActuation, nil) + + resizePendingPod, err := framework.GetObject(podClient.Get, patchedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get resize pending pod for second patch") + + ginkgo.By("waiting for testing pod resize to be actuated for second patch") + expectedPostActuation := podresize.UpdateExpectedContainerRestarts(ctx, resizePendingPod, expectedContainersSecondPatch) + actuatedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expectedPostActuation) + + ginkgo.By("waiting for testing pod resize status to be pending for second patch") + WaitForPodResizePending(ctx, f, actuatedPod) + + actuatedPod, err = framework.GetObject(podClient.Get, actuatedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get actuated pod for second patch") + + expectedPostActuation = podresize.UpdateExpectedContainerRestarts(ctx, actuatedPod, expectedContainersSecondPatch) + ginkgo.By("verifying testing pod condition type as expected post patch, post-actuation for second patch") + podresize.ExpectPodResizePending(ctx, f, actuatedPod, expectedPostActuation) + + ginkgo.By("ensuring the testing pod is failed for the expected reason for second patch") + gomega.Expect(actuatedPod).To(HaveStatusConditionsMatchingRegex(wantErrorSecondPatch)) + + for cdx := range expectedCpuInfoSecondPatch { + gomega.Expect(newPods[0]).To(HaveContainerCPUsCount(expectedCpuInfoSecondPatch[cdx].Name, expectedCpuInfoSecondPatch[cdx].cpuCount)) + } + } + } else { + patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, + newPods[0].Name, apimachinerytypes.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(pErr, "failed to patch pod for resize") + + ginkgo.By("verifying testing pod resources are as expected post patch, pre-actuation") + expectedPreActuation := podresize.UpdateExpectedContainerRestarts(ctx, patchedPod, desiredContainersFirstPatch) + podresize.VerifyPodResources(patchedPod, expectedPreActuation, nil) + + resizePendingPod, err := framework.GetObject(podClient.Get, patchedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get resize pending pod") + + ginkgo.By("waiting for testing pod resize to be actuated") + expectedPostActuation := podresize.UpdateExpectedContainerRestarts(ctx, resizePendingPod, expectedContainersFirstPatch) + actuatedPod := podresize.WaitForPodResizeActuation(ctx, f, podClient, newPods[0], expectedPostActuation) + + ginkgo.By("waiting for testing pod resize status to be pending") + WaitForPodResizePending(ctx, f, actuatedPod) + + actuatedPod, err = framework.GetObject(podClient.Get, actuatedPod.Name, metav1.GetOptions{})(ctx) + framework.ExpectNoError(err, "failed to get actuated pod") + + expectedPostActuation = podresize.UpdateExpectedContainerRestarts(ctx, actuatedPod, expectedContainersFirstPatch) + ginkgo.By("verifying testing pod condition type as expected post patch, post-actuation") + podresize.ExpectPodResizePending(ctx, f, actuatedPod, expectedPostActuation) + + ginkgo.By("ensuring the testing pod is failed for the expected reason") + gomega.Expect(actuatedPod).To(HaveStatusConditionsMatchingRegex(wantErrorFirstPatch)) + + // we cannot nor we should predict which CPUs the container gets + ginkgo.By("verifying pod cpusets after resize") + gomega.Expect(actuatedPod).To(HaveContainerCPUsCount("gu-container-1", expectedCpuInfoFirstPatch[0].cpuCount)) + } + }, + ginkgo.Entry("should first increase (gu-container-1) CPU request/limit and afterwards decrease (gu-container-1) CPU request/limit, within available capacity", + // Initial + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount before first patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + }, + // Desired first patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected after first patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount after first patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 4, + }, + }, + // Want error after first patch + "", + // Desired second patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected after second patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount after second patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + }, + // Want error after first patch + "", + ), + ginkgo.Entry("should first increase (gu-container-1) CPU request/limit, afterwards restore (gu-container-1) CPU request/limit and increase (gu-container-2) CPU request/limit, within available capacity", + // Initial + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount before first patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + { + Name: "gu-container-2", + cpuCount: 2, + }, + }, + // Desired first patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected after first patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount after first patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 4, + }, + { + Name: "gu-container-2", + cpuCount: 2, + }, + }, + // Want error after first patch + "", + // Desired second patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected after second patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount after second patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + { + Name: "gu-container-2", + cpuCount: 4, + }, + }, + "", + ), + ginkgo.Entry("should first increase (gu-container-1) CPU request/limit, afterwards fail to reduce (gu-container-1) CPU request/limit, below promised and increase (gu-container-2) CPU request/limit, within available capacity", + // Initial + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "3000m", CPULim: "3000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount before first patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 3, + }, + { + Name: "gu-container-2", + cpuCount: 2, + }, + }, + // Desired first patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected after first patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount after first patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 4, + }, + { + Name: "gu-container-2", + cpuCount: 2, + }, + }, + // Want error after first patch + "", + // Desired second patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected after second patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount after second patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 4, + }, + { + Name: "gu-container-2", + cpuCount: 2, + }, + }, + "prohibitedCPUAllocation.*", + ), + ginkgo.Entry("should first increase (gu-container-1) CPU request/limit, afterwards fail to restore (gu-container-1) CPU request/limit and to increase (gu-container-2) CPU request/limit above available capacity", + // Initial + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount before first patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 2, + }, + { + Name: "gu-container-2", + cpuCount: 2, + }, + }, + // Desired first patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected after first patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount after first patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 4, + }, + { + Name: "gu-container-2", + cpuCount: 2, + }, + }, + // Want error after first patch + "", + // Desired second patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "40000m", CPULim: "40000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected after second patch + []podresize.ResizableContainerInfo{ + { + Name: "gu-container-1", + Resources: &cgroups.ContainerResources{CPUReq: "4000m", CPULim: "4000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + { + Name: "gu-container-2", + Resources: &cgroups.ContainerResources{CPUReq: "2000m", CPULim: "2000m", MemReq: "200Mi", MemLim: "200Mi"}, + }, + }, + // Expected cpuCount after second patch + []containerCPUInfo{ + { + Name: "gu-container-1", + cpuCount: 4, + }, + { + Name: "gu-container-2", + cpuCount: 2, + }, + }, + "Infeasible.*Node.*didn't.*have.*enough.*capacity.*", + ), + ) + } + }) + }, +) + // Matching helpers +func WaitForPodResizePending(ctx context.Context, f *framework.Framework, testPod *v1.Pod) { + framework.ExpectNoError(e2epod.WaitForPodCondition(ctx, f.ClientSet, testPod.Namespace, testPod.Name, "display pod resize status as pending", f.Timeouts.PodStart, func(pod *v1.Pod) (bool, error) { + for _, condition := range pod.Status.Conditions { + + if condition.Type == v1.PodResizePending { + return true, nil + } + } + return false, nil + })) +} + +func WaitForPodResizeDeferred(ctx context.Context, f *framework.Framework, testPod *v1.Pod) { + framework.ExpectNoError(e2epod.WaitForPodCondition(ctx, f.ClientSet, testPod.Namespace, testPod.Name, "display pod resize status as deferred", f.Timeouts.PodStart, func(pod *v1.Pod) (bool, error) { + return helpers.IsPodResizeDeferred(pod), nil + })) +} + +func WaitForPodResizeInfeasible(ctx context.Context, f *framework.Framework, testPod *v1.Pod) { + framework.ExpectNoError(e2epod.WaitForPodCondition(ctx, f.ClientSet, testPod.Namespace, testPod.Name, "display pod resize status as infeasible", f.Timeouts.PodStart, func(pod *v1.Pod) (bool, error) { + return helpers.IsPodResizeInfeasible(pod), nil + })) +} + +func HaveStatusConditionsMatchingRegex(expr string) types.GomegaMatcher { + return gcustom.MakeMatcher(func(actual *v1.Pod) (bool, error) { + re, err := regexp.Compile(expr) + if err != nil { + return false, err + } + for _, condition := range actual.Status.Conditions { + if re.MatchString(fmt.Sprintf("%v", condition)) { + return true, nil + } + } + return false, nil + }).WithTemplate("Pod {{.Actual.Namespace}}/{{.Actual.Name}} UID {{.Actual.UID}} conditions {{.Actual.Status.Conditions}} does not match regexp {{.Data}}", expr) +} func HaveStatusReasonMatchingRegex(expr string) types.GomegaMatcher { return gcustom.MakeMatcher(func(actual *v1.Pod) (bool, error) { @@ -2854,13 +6429,17 @@ func makeCPUManagerInitContainersPod(podName string, ctnAttributes []ctnAttribut } type cpuManagerKubeletArguments struct { - policyName string - enableCPUManagerOptions bool - disableCPUQuotaWithExclusiveCPUs bool - enablePodLevelResources bool - customCPUCFSQuotaPeriod time.Duration - reservedSystemCPUs cpuset.CPUSet - options map[string]string + policyName string + enableCPUManagerOptions bool + disableCPUQuotaWithExclusiveCPUs bool + enablePodLevelResources bool + customCPUCFSQuotaPeriod time.Duration + reservedSystemCPUs cpuset.CPUSet + options map[string]string + enableInPlacePodVerticalScalingExclusiveCPUs bool + topologyManagerPolicyName string + topologyManagerScopeName string + topologyManagerPolicyOptions map[string]string } func configureCPUManagerInKubelet(oldCfg *kubeletconfig.KubeletConfiguration, kubeletArguments *cpuManagerKubeletArguments) *kubeletconfig.KubeletConfiguration { @@ -2873,6 +6452,7 @@ func configureCPUManagerInKubelet(oldCfg *kubeletconfig.KubeletConfiguration, ku newCfg.FeatureGates["CPUManagerPolicyAlphaOptions"] = kubeletArguments.enableCPUManagerOptions newCfg.FeatureGates["DisableCPUQuotaWithExclusiveCPUs"] = kubeletArguments.disableCPUQuotaWithExclusiveCPUs newCfg.FeatureGates["PodLevelResources"] = kubeletArguments.enablePodLevelResources + newCfg.FeatureGates["InPlacePodVerticalScalingExclusiveCPUs"] = kubeletArguments.enableInPlacePodVerticalScalingExclusiveCPUs if kubeletArguments.customCPUCFSQuotaPeriod != 0 { newCfg.FeatureGates["CustomCPUCFSQuotaPeriod"] = true @@ -2884,6 +6464,13 @@ func configureCPUManagerInKubelet(oldCfg *kubeletconfig.KubeletConfiguration, ku newCfg.CPUManagerPolicy = kubeletArguments.policyName newCfg.CPUManagerReconcilePeriod = metav1.Duration{Duration: 1 * time.Second} + newCfg.TopologyManagerPolicy = kubeletArguments.topologyManagerPolicyName + newCfg.TopologyManagerScope = kubeletArguments.topologyManagerScopeName + + if kubeletArguments.topologyManagerPolicyOptions != nil { + newCfg.TopologyManagerPolicyOptions = kubeletArguments.topologyManagerPolicyOptions + } + if kubeletArguments.options != nil { newCfg.CPUManagerPolicyOptions = kubeletArguments.options } diff --git a/test/e2e_node/util_machineinfo_unsupported.go b/test/e2e_node/util_machineinfo_unsupported.go index 7863d27846392..1dd7ca1e9cd80 100644 --- a/test/e2e_node/util_machineinfo_unsupported.go +++ b/test/e2e_node/util_machineinfo_unsupported.go @@ -53,3 +53,7 @@ func getCoreSiblingList(cpuRes int64) string { func getNumaNodeCPUs() (map[int]cpuset.CPUSet, error) { return nil, errors.New("not implemented") } + +func getSMTLevel() int { + return 1 +}