From 2a78f0d575713bf00e5f6a226ef53741cab834b6 Mon Sep 17 00:00:00 2001
From: Jonathan Rosenboim <jrosenboim@nvidia.com>
Date: Mon, 2 Mar 2026 12:53:55 +0200
Subject: [PATCH 01/25] Add Kai Scheduler VPA objects

---
 pkg/apis/kai/v1/global.go | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/pkg/apis/kai/v1/global.go b/pkg/apis/kai/v1/global.go
index c355d84c3..407028f76 100644
--- a/pkg/apis/kai/v1/global.go
+++ b/pkg/apis/kai/v1/global.go
@@ -13,6 +13,10 @@ import (
 
 // GlobalConfig defines the global configuration of the system
 type GlobalConfig struct {
+	// VPA defines the default Vertical Pod Autoscaler configuration for all services
+	// +kubebuilder:validation:Optional
+	VPA *common.VPASpec `json:"vpa,omitempty"`
+
 	// Openshift configures the operator to install on Openshift
 	// +kubebuilder:validation:Optional
 	Openshift *bool `json:"openshift,omitempty"`
@@ -103,6 +107,11 @@ func (g *GlobalConfig) SetDefaultWhereNeeded() {
 	}
 
 	g.RequireDefaultPodAntiAffinityTerm = common.SetDefault(g.RequireDefaultPodAntiAffinityTerm, ptr.To(false))
+
+	if g.VPA == nil {
+		g.VPA = &common.VPASpec{}
+	}
+	g.VPA.SetDefaultsWhereNeeded()
 }
 
 func (g *GlobalConfig) GetSecurityContext() *v1.SecurityContext {

From 83845b152f51db4c047f3c0666ffc8cda74e88ae Mon Sep 17 00:00:00 2001
From: Jonathan Rosenboim <jrosenboim@nvidia.com>
Date: Mon, 2 Mar 2026 12:54:06 +0200
Subject: [PATCH 02/25] Add VPA Spec definition

---
 pkg/apis/kai/v1/common/vpa.go | 37 +++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)
 create mode 100644 pkg/apis/kai/v1/common/vpa.go

diff --git a/pkg/apis/kai/v1/common/vpa.go b/pkg/apis/kai/v1/common/vpa.go
new file mode 100644
index 000000000..e4ba3ee9a
--- /dev/null
+++ b/pkg/apis/kai/v1/common/vpa.go
@@ -0,0 +1,37 @@
+// Copyright 2025 NVIDIA CORPORATION
+// SPDX-License-Identifier: Apache-2.0
+
+// +kubebuilder:object:generate:=true
+package common
+
+import (
+	vpav1 "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1"
+	"k8s.io/utils/ptr"
+)
+
+// VPASpec defines Vertical Pod Autoscaler configuration
+type VPASpec struct {
+	// Enabled specifies if VPA should be enabled
+	// +kubebuilder:validation:Optional
+	Enabled *bool `json:"enabled,omitempty"`
+
+	// UpdatePolicy controls when and how VPA applies changes to pod resources
+	// +kubebuilder:validation:Optional
+	UpdatePolicy *vpav1.PodUpdatePolicy `json:"updatePolicy,omitempty"`
+
+	// ResourcePolicy controls how VPA computes recommended resources for containers
+	// +kubebuilder:validation:Optional
+	ResourcePolicy *vpav1.PodResourcePolicy `json:"resourcePolicy,omitempty"`
+}
+
+func (v *VPASpec) SetDefaultsWhereNeeded() {
+	if v.Enabled == nil {
+		v.Enabled = ptr.To(false)
+	}
+	if v.UpdatePolicy == nil {
+		mode := vpav1.UpdateModeInPlaceOrRecreate
+		v.UpdatePolicy = &vpav1.PodUpdatePolicy{
+			UpdateMode: &mode,
+		}
+	}
+}

From 7755f13a383704028a968db0b2f6294acb28bc72 Mon Sep 17 00:00:00 2001
From: Jonathan Rosenboim <jrosenboim@nvidia.com>
Date: Mon, 2 Mar 2026 12:59:21 +0200
Subject: [PATCH 03/25] Add VPA config to the scheduler's components

---
 pkg/apis/kai/v1/admission/admission.go             | 10 +++++++++-
 pkg/apis/kai/v1/binder/binder.go                   | 10 +++++++++-
 pkg/apis/kai/v1/config_types.go                    | 14 +++++++-------
 .../v1/node_scale_adjuster/node_scale_adjuster.go  | 10 +++++++++-
 .../pod_group_controller/pod_group_controller.go   | 10 +++++++++-
 pkg/apis/kai/v1/pod_grouper/pod_grouper.go         | 10 +++++++++-
 .../kai/v1/queue_controller/queue_controller.go    | 10 +++++++++-
 pkg/apis/kai/v1/scheduler/scheduler.go             | 10 +++++++++-
 8 files changed, 70 insertions(+), 14 deletions(-)

diff --git a/pkg/apis/kai/v1/admission/admission.go b/pkg/apis/kai/v1/admission/admission.go
index 61f67a393..0cea4f5fc 100644
--- a/pkg/apis/kai/v1/admission/admission.go
+++ b/pkg/apis/kai/v1/admission/admission.go
@@ -48,9 +48,13 @@ type Admission struct {
 	// set to empty string to disable
 	// +kubebuilder:validation:Optional
 	GPUPodRuntimeClassName *string `json:"gpuPodRuntimeClassName,omitempty"`
+
+	// VPA specifies Vertical Pod Autoscaler configuration for the admission service
+	// +kubebuilder:validation:Optional
+	VPA *common.VPASpec `json:"vpa,omitempty"`
 }
 
-func (b *Admission) SetDefaultsWhereNeeded(replicaCount *int32) {
+func (b *Admission) SetDefaultsWhereNeeded(replicaCount *int32, globalVPA *common.VPASpec) {
 	b.Service = common.SetDefault(b.Service, &common.Service{})
 	b.Service.SetDefaultsWhereNeeded(imageName)
 
@@ -68,6 +72,10 @@ func (b *Admission) SetDefaultsWhereNeeded(replicaCount *int32) {
 	b.MutatingWebhookConfigurationName = common.SetDefault(b.MutatingWebhookConfigurationName, ptr.To(defaultMutatingWebhookName))
 
 	b.GPUPodRuntimeClassName = common.SetDefault(b.GPUPodRuntimeClassName, ptr.To(constants.DefaultRuntimeClassName))
+
+	if b.VPA == nil {
+		b.VPA = globalVPA
+	}
 }
 
 // Webhook defines configuration for the admission webhook
diff --git a/pkg/apis/kai/v1/binder/binder.go b/pkg/apis/kai/v1/binder/binder.go
index 5763c87ab..4839cbaef 100644
--- a/pkg/apis/kai/v1/binder/binder.go
+++ b/pkg/apis/kai/v1/binder/binder.go
@@ -47,9 +47,13 @@ type Binder struct {
 	// leave empty if unsure to let the operator auto detect using ClusterPolicy (nvidia gpu-operator only)
 	// +kubebuilder:validation:Optional
 	CDIEnabled *bool `json:"cdiEnabled,omitempty"`
+
+	// VPA specifies Vertical Pod Autoscaler configuration for the binder
+	// +kubebuilder:validation:Optional
+	VPA *common.VPASpec `json:"vpa,omitempty"`
 }
 
-func (b *Binder) SetDefaultsWhereNeeded(replicaCount *int32) {
+func (b *Binder) SetDefaultsWhereNeeded(replicaCount *int32, globalVPA *common.VPASpec) {
 	b.Service = common.SetDefault(b.Service, &common.Service{})
 	b.Service.Resources = common.SetDefault(b.Service.Resources, &common.Resources{})
 	if b.Service.Resources.Requests == nil {
@@ -81,6 +85,10 @@ func (b *Binder) SetDefaultsWhereNeeded(replicaCount *int32) {
 
 	b.ProbePort = common.SetDefault(b.ProbePort, ptr.To(8081))
 	b.MetricsPort = common.SetDefault(b.MetricsPort, ptr.To(8080))
+
+	if b.VPA == nil {
+		b.VPA = globalVPA
+	}
 }
 
 type ResourceReservation struct {
diff --git a/pkg/apis/kai/v1/config_types.go b/pkg/apis/kai/v1/config_types.go
index ac5cd35a1..ce35e2411 100644
--- a/pkg/apis/kai/v1/config_types.go
+++ b/pkg/apis/kai/v1/config_types.go
@@ -97,25 +97,25 @@ func (c *ConfigSpec) SetDefaultsWhereNeeded() {
 	c.Global.SetDefaultWhereNeeded()
 
 	c.QueueController = common.SetDefault(c.QueueController, &queue_controller.QueueController{})
-	c.QueueController.SetDefaultsWhereNeeded(c.Global.ReplicaCount)
+	c.QueueController.SetDefaultsWhereNeeded(c.Global.ReplicaCount, c.Global.VPA)
 
 	c.Binder = common.SetDefault(c.Binder, &binder.Binder{})
-	c.Binder.SetDefaultsWhereNeeded(c.Global.ReplicaCount)
+	c.Binder.SetDefaultsWhereNeeded(c.Global.ReplicaCount, c.Global.VPA)
 
 	c.PodGrouper = common.SetDefault(c.PodGrouper, &pod_grouper.PodGrouper{})
-	c.PodGrouper.SetDefaultsWhereNeeded(c.Global.ReplicaCount)
+	c.PodGrouper.SetDefaultsWhereNeeded(c.Global.ReplicaCount, c.Global.VPA)
 
 	c.Scheduler = common.SetDefault(c.Scheduler, &scheduler.Scheduler{})
-	c.Scheduler.SetDefaultsWhereNeeded(c.Global.ReplicaCount)
+	c.Scheduler.SetDefaultsWhereNeeded(c.Global.ReplicaCount, c.Global.VPA)
 
 	c.PodGroupController = common.SetDefault(c.PodGroupController, &pod_group_controller.PodGroupController{})
-	c.PodGroupController.SetDefaultsWhereNeeded(c.Global.ReplicaCount)
+	c.PodGroupController.SetDefaultsWhereNeeded(c.Global.ReplicaCount, c.Global.VPA)
 
 	c.Admission = common.SetDefault(c.Admission, &admission.Admission{})
-	c.Admission.SetDefaultsWhereNeeded(c.Global.ReplicaCount)
+	c.Admission.SetDefaultsWhereNeeded(c.Global.ReplicaCount, c.Global.VPA)
 
 	c.NodeScaleAdjuster = common.SetDefault(c.NodeScaleAdjuster, &node_scale_adjuster.NodeScaleAdjuster{})
-	c.NodeScaleAdjuster.SetDefaultsWhereNeeded()
+	c.NodeScaleAdjuster.SetDefaultsWhereNeeded(c.Global.VPA)
 
 	c.Prometheus = common.SetDefault(c.Prometheus, &prometheus.Prometheus{})
 	c.Prometheus.SetDefaultsWhereNeeded()
diff --git a/pkg/apis/kai/v1/node_scale_adjuster/node_scale_adjuster.go b/pkg/apis/kai/v1/node_scale_adjuster/node_scale_adjuster.go
index a8acc61fe..e83d8f357 100644
--- a/pkg/apis/kai/v1/node_scale_adjuster/node_scale_adjuster.go
+++ b/pkg/apis/kai/v1/node_scale_adjuster/node_scale_adjuster.go
@@ -22,6 +22,10 @@ type NodeScaleAdjuster struct {
 	// Args specifies the CLI arguments for node-scale-adjuster
 	// +kubebuilder:validation:Optional
 	Args *Args `json:"args,omitempty"`
+
+	// VPA specifies Vertical Pod Autoscaler configuration for the node-scale-adjuster
+	// +kubebuilder:validation:Optional
+	VPA *common.VPASpec `json:"vpa,omitempty"`
 }
 
 // Args specifies the CLI arguments for node-scale-adjuster
@@ -54,10 +58,14 @@ func (args *Args) SetDefaultsWhereNeeded() {
 }
 
 // SetDefaultsWhereNeeded sets default for unset fields
-func (nsa *NodeScaleAdjuster) SetDefaultsWhereNeeded() {
+func (nsa *NodeScaleAdjuster) SetDefaultsWhereNeeded(globalVPA *common.VPASpec) {
 	nsa.Service = common.SetDefault(nsa.Service, &common.Service{})
 	nsa.Service.SetDefaultsWhereNeeded(imageName)
 
 	nsa.Args = common.SetDefault(nsa.Args, &Args{})
 	nsa.Args.SetDefaultsWhereNeeded()
+
+	if nsa.VPA == nil {
+		nsa.VPA = globalVPA
+	}
 }
diff --git a/pkg/apis/kai/v1/pod_group_controller/pod_group_controller.go b/pkg/apis/kai/v1/pod_group_controller/pod_group_controller.go
index ef77f3623..f60418f67 100644
--- a/pkg/apis/kai/v1/pod_group_controller/pod_group_controller.go
+++ b/pkg/apis/kai/v1/pod_group_controller/pod_group_controller.go
@@ -35,9 +35,13 @@ type PodGroupController struct {
 	// Replicas specifies the number podgroup controller replicas
 	// +kubebuilder:validation:Optional
 	Replicas *int32 `json:"replicas,omitempty"`
+
+	// VPA specifies Vertical Pod Autoscaler configuration for the pod group controller
+	// +kubebuilder:validation:Optional
+	VPA *common.VPASpec `json:"vpa,omitempty"`
 }
 
-func (pg *PodGroupController) SetDefaultsWhereNeeded(replicaCount *int32) {
+func (pg *PodGroupController) SetDefaultsWhereNeeded(replicaCount *int32, globalVPA *common.VPASpec) {
 	pg.Service = common.SetDefault(pg.Service, &common.Service{})
 	pg.Service.SetDefaultsWhereNeeded(imageName)
 
@@ -61,6 +65,10 @@ func (pg *PodGroupController) SetDefaultsWhereNeeded(replicaCount *int32) {
 
 	pg.Webhooks = common.SetDefault(pg.Webhooks, &PodGroupControllerWebhooks{})
 	pg.Webhooks.SetDefaultsWhereNeeded()
+
+	if pg.VPA == nil {
+		pg.VPA = globalVPA
+	}
 }
 
 type Service struct {
diff --git a/pkg/apis/kai/v1/pod_grouper/pod_grouper.go b/pkg/apis/kai/v1/pod_grouper/pod_grouper.go
index 33d7c0a6b..a47dbfcab 100644
--- a/pkg/apis/kai/v1/pod_grouper/pod_grouper.go
+++ b/pkg/apis/kai/v1/pod_grouper/pod_grouper.go
@@ -33,6 +33,10 @@ type PodGrouper struct {
 	// Replicas specifies the number of replicas of the pod-grouper controller
 	// +kubebuilder:validation:Optional
 	Replicas *int32 `json:"replicas,omitempty"`
+
+	// VPA specifies Vertical Pod Autoscaler configuration for the pod-grouper
+	// +kubebuilder:validation:Optional
+	VPA *common.VPASpec `json:"vpa,omitempty"`
 }
 
 // Args defines command line arguments for the pod-grouper
@@ -50,7 +54,7 @@ type Args struct {
 	DefaultPrioritiesConfigMapNamespace *string `json:"defaultPrioritiesConfigMapNamespace,omitempty"`
 }
 
-func (pg *PodGrouper) SetDefaultsWhereNeeded(replicaCount *int32) {
+func (pg *PodGrouper) SetDefaultsWhereNeeded(replicaCount *int32, globalVPA *common.VPASpec) {
 	pg.Service = common.SetDefault(pg.Service, &common.Service{})
 	pg.Service.SetDefaultsWhereNeeded(imageName)
 
@@ -70,4 +74,8 @@ func (pg *PodGrouper) SetDefaultsWhereNeeded(replicaCount *int32) {
 	pg.Args = common.SetDefault(pg.Args, &Args{})
 	pg.Replicas = common.SetDefault(pg.Replicas, ptr.To(ptr.Deref(replicaCount, 1)))
 	pg.K8sClientConfig = common.SetDefault(pg.K8sClientConfig, &common.K8sClientConfig{})
+
+	if pg.VPA == nil {
+		pg.VPA = globalVPA
+	}
 }
diff --git a/pkg/apis/kai/v1/queue_controller/queue_controller.go b/pkg/apis/kai/v1/queue_controller/queue_controller.go
index 62e974023..9ac0deabd 100644
--- a/pkg/apis/kai/v1/queue_controller/queue_controller.go
+++ b/pkg/apis/kai/v1/queue_controller/queue_controller.go
@@ -42,9 +42,13 @@ type QueueController struct {
 	// QueueLabelToDefaultMetricValue maps queue label keys to default metric values when the label is absent
 	// +kubebuilder:validation:Optional
 	QueueLabelToDefaultMetricValue *string `json:"queueLabelToDefaultMetricValue,omitempty"`
+
+	// VPA specifies Vertical Pod Autoscaler configuration for the queue controller
+	// +kubebuilder:validation:Optional
+	VPA *common.VPASpec `json:"vpa,omitempty"`
 }
 
-func (q *QueueController) SetDefaultsWhereNeeded(replicaCount *int32) {
+func (q *QueueController) SetDefaultsWhereNeeded(replicaCount *int32, globalVPA *common.VPASpec) {
 	q.Service = common.SetDefault(q.Service, &common.Service{})
 	q.Service.SetDefaultsWhereNeeded(imageName)
 
@@ -68,6 +72,10 @@ func (q *QueueController) SetDefaultsWhereNeeded(replicaCount *int32) {
 
 	q.Webhooks = common.SetDefault(q.Webhooks, &QueueControllerWebhooks{})
 	q.Webhooks.SetDefaultsWhereNeeded()
+
+	if q.VPA == nil {
+		q.VPA = globalVPA
+	}
 }
 
 type Service struct {
diff --git a/pkg/apis/kai/v1/scheduler/scheduler.go b/pkg/apis/kai/v1/scheduler/scheduler.go
index edb862e32..e7d8beae0 100644
--- a/pkg/apis/kai/v1/scheduler/scheduler.go
+++ b/pkg/apis/kai/v1/scheduler/scheduler.go
@@ -30,9 +30,13 @@ type Scheduler struct {
 	// Replicas specifies the number of replicas of the scheduler service
 	// +kubebuilder:validation:Optional
 	Replicas *int32 `json:"replicas,omitempty"`
+
+	// VPA specifies Vertical Pod Autoscaler configuration for the scheduler
+	// +kubebuilder:validation:Optional
+	VPA *common.VPASpec `json:"vpa,omitempty"`
 }
 
-func (s *Scheduler) SetDefaultsWhereNeeded(replicaCount *int32) {
+func (s *Scheduler) SetDefaultsWhereNeeded(replicaCount *int32, globalVPA *common.VPASpec) {
 	s.Service = common.SetDefault(s.Service, &common.Service{})
 
 	s.Service.Resources = common.SetDefault(s.Service.Resources, &common.Resources{})
@@ -63,6 +67,10 @@ func (s *Scheduler) SetDefaultsWhereNeeded(replicaCount *int32) {
 	s.SchedulerService.SetDefaultsWhereNeeded()
 
 	s.Replicas = common.SetDefault(s.Replicas, ptr.To(ptr.Deref(replicaCount, 1)))
+
+	if s.VPA == nil {
+		s.VPA = globalVPA
+	}
 }
 
 // Service defines configuration for the scheduler service

From 884bfbe921e3409cad12ed6bc5d7b703ad95f8ff Mon Sep 17 00:00:00 2001
From: Jonathan Rosenboim <jrosenboim@nvidia.com>
Date: Mon, 2 Mar 2026 12:59:51 +0200
Subject: [PATCH 04/25] Add construction of the VPA object

---
 pkg/operator/operands/common/vpa.go | 59 +++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)
 create mode 100644 pkg/operator/operands/common/vpa.go

diff --git a/pkg/operator/operands/common/vpa.go b/pkg/operator/operands/common/vpa.go
new file mode 100644
index 000000000..ffa4db08b
--- /dev/null
+++ b/pkg/operator/operands/common/vpa.go
@@ -0,0 +1,59 @@
+// Copyright 2025 NVIDIA CORPORATION
+// SPDX-License-Identifier: Apache-2.0
+
+package common
+
+import (
+	appsv1 "k8s.io/api/apps/v1"
+	autoscalingv1 "k8s.io/api/autoscaling/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	vpav1 "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+
+	kaicommon "github.com/NVIDIA/KAI-scheduler/pkg/apis/kai/v1/common"
+)
+
+// BuildVPA creates a VerticalPodAutoscaler targeting the named resource of the given kind.
+// Returns nil if VPA is not enabled.
+func BuildVPA(vpaSpec *kaicommon.VPASpec, targetName, namespace, targetKind string) client.Object {
+	if vpaSpec == nil || vpaSpec.Enabled == nil || !*vpaSpec.Enabled {
+		return nil
+	}
+
+	return &vpav1.VerticalPodAutoscaler{
+		TypeMeta: metav1.TypeMeta{
+			APIVersion: "autoscaling.k8s.io/v1",
+			Kind:       "VerticalPodAutoscaler",
+		},
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      targetName,
+			Namespace: namespace,
+		},
+		Spec: vpav1.VerticalPodAutoscalerSpec{
+			TargetRef: &autoscalingv1.CrossVersionObjectReference{
+				APIVersion: "apps/v1",
+				Kind:       targetKind,
+				Name:       targetName,
+			},
+			UpdatePolicy:   vpaSpec.UpdatePolicy,
+			ResourcePolicy: vpaSpec.ResourcePolicy,
+		},
+	}
+}
+
+// BuildVPAFromObjects finds the first Deployment or DaemonSet in objects and builds a VPA
+// targeting it. Returns nil if VPA is not enabled or no workload is found.
+func BuildVPAFromObjects(vpaSpec *kaicommon.VPASpec, objects []client.Object, namespace string) client.Object {
+	if vpaSpec == nil || vpaSpec.Enabled == nil || !*vpaSpec.Enabled {
+		return nil
+	}
+	for _, obj := range objects {
+		switch o := obj.(type) {
+		case *appsv1.Deployment:
+			return BuildVPA(vpaSpec, o.Name, namespace, "Deployment")
+		case *appsv1.DaemonSet:
+			return BuildVPA(vpaSpec, o.Name, namespace, "DaemonSet")
+		}
+	}
+	return nil
+}

From 0889d229c46e84dc9467a2c18d91f1a05f63652d Mon Sep 17 00:00:00 2001
From: Jonathan Rosenboim <jrosenboim@nvidia.com>
Date: Mon, 2 Mar 2026 13:06:52 +0200
Subject: [PATCH 05/25] Build the VPA object for the scheduler's components

---
 cmd/operator/app/app.go                       |  2 +
 pkg/operator/controller/config_controller.go  |  1 +
 pkg/operator/operands/admission/admission.go  |  4 ++
 pkg/operator/operands/binder/binder.go        |  4 ++
 .../operands/known_types/known_types.go       |  1 +
 .../known_types/verticalpodautoscalers.go     | 58 +++++++++++++++++++
 .../node_scale_adjuster.go                    |  4 ++
 .../pod_group_controller.go                   |  4 ++
 .../operands/pod_grouper/pod_grouper.go       |  4 ++
 .../queue_controller/queue_controller.go      |  4 ++
 pkg/operator/operands/scheduler/scheduler.go  |  4 ++
 11 files changed, 90 insertions(+)
 create mode 100644 pkg/operator/operands/known_types/verticalpodautoscalers.go

diff --git a/cmd/operator/app/app.go b/cmd/operator/app/app.go
index 3717d3863..332fce33b 100644
--- a/cmd/operator/app/app.go
+++ b/cmd/operator/app/app.go
@@ -8,6 +8,7 @@ import (
 
 	nvidiav1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1"
 	monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
+	vpav1 "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1"
 
 	"github.com/NVIDIA/KAI-scheduler/cmd/operator/config"
 	kaiv1 "github.com/NVIDIA/KAI-scheduler/pkg/apis/kai/v1"
@@ -45,6 +46,7 @@ func init() {
 	utilruntime.Must(kaiv1alpha1.AddToScheme(scheme))
 	utilruntime.Must(nvidiav1.AddToScheme(scheme))
 	utilruntime.Must(monitoringv1.AddToScheme(scheme))
+	utilruntime.Must(vpav1.AddToScheme(scheme))
 	// +kubebuilder:scaffold:scheme
 }
 
diff --git a/pkg/operator/controller/config_controller.go b/pkg/operator/controller/config_controller.go
index 9dce04dc1..ab0ac22bb 100644
--- a/pkg/operator/controller/config_controller.go
+++ b/pkg/operator/controller/config_controller.go
@@ -83,6 +83,7 @@ func (r *ConfigReconciler) SetOperands(ops []operands.Operand) {
 // +kubebuilder:rbac:groups="nvidia.com",resources=clusterpolicies,verbs=get;list;watch
 // +kubebuilder:rbac:groups="monitoring.coreos.com",resources=prometheuses;servicemonitors,verbs=get;list;watch;create;update;patch;delete
 // +kubebuilder:rbac:groups="scheduling.run.ai",resources=queues,verbs=get;list;watch
+// +kubebuilder:rbac:groups="autoscaling.k8s.io",resources=verticalpodautoscalers,verbs=get;list;watch;create;update;patch;delete
 
 // Reconcile is part of the main kubernetes reconciliation loop which aims to
 // move the current state of the cluster closer to the desired state.
diff --git a/pkg/operator/operands/admission/admission.go b/pkg/operator/operands/admission/admission.go
index c3694d6af..c38229683 100644
--- a/pkg/operator/operands/admission/admission.go
+++ b/pkg/operator/operands/admission/admission.go
@@ -62,6 +62,10 @@ func (a *Admission) DesiredState(
 		objects = append(objects, newResources...)
 	}
 
+	if vpa := common.BuildVPAFromObjects(kaiConfig.Spec.Admission.VPA, objects, kaiConfig.Spec.Namespace); vpa != nil {
+		objects = append(objects, vpa)
+	}
+
 	a.lastDesiredState = objects
 	return objects, nil
 }
diff --git a/pkg/operator/operands/binder/binder.go b/pkg/operator/operands/binder/binder.go
index 0d9898dd0..2c28ef156 100644
--- a/pkg/operator/operands/binder/binder.go
+++ b/pkg/operator/operands/binder/binder.go
@@ -47,6 +47,10 @@ func (b *Binder) DesiredState(
 		objects = append(objects, newResources...)
 	}
 
+	if vpa := common.BuildVPAFromObjects(kaiConfig.Spec.Binder.VPA, objects, kaiConfig.Spec.Namespace); vpa != nil {
+		objects = append(objects, vpa)
+	}
+
 	b.lastDesiredState = objects
 	return objects, nil
 }
diff --git a/pkg/operator/operands/known_types/known_types.go b/pkg/operator/operands/known_types/known_types.go
index 9e69c180f..8a04444b7 100644
--- a/pkg/operator/operands/known_types/known_types.go
+++ b/pkg/operator/operands/known_types/known_types.go
@@ -47,6 +47,7 @@ func init() {
 	registerValidatingWebhookConfigurations()
 	registerCustomResourceDefinitions()
 	registerPrometheus()
+	registerVerticalPodAutoscalers()
 }
 
 func SetupKAIConfigOwned(fn *Collectable) {
diff --git a/pkg/operator/operands/known_types/verticalpodautoscalers.go b/pkg/operator/operands/known_types/verticalpodautoscalers.go
new file mode 100644
index 000000000..3a6363b49
--- /dev/null
+++ b/pkg/operator/operands/known_types/verticalpodautoscalers.go
@@ -0,0 +1,58 @@
+// Copyright 2025 NVIDIA CORPORATION
+// SPDX-License-Identifier: Apache-2.0
+
+package known_types
+
+import (
+	"context"
+
+	vpav1 "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"sigs.k8s.io/controller-runtime/pkg/builder"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+	"sigs.k8s.io/controller-runtime/pkg/client/fake"
+	"sigs.k8s.io/controller-runtime/pkg/manager"
+)
+
+func vpaIndexer(object client.Object) []string {
+	vpa := object.(*vpav1.VerticalPodAutoscaler)
+	owner := metav1.GetControllerOf(vpa)
+	if !checkOwnerType(owner) {
+		return nil
+	}
+	return []string{getOwnerKey(owner)}
+}
+
+func registerVerticalPodAutoscalers() {
+	collectable := &Collectable{
+		Collect: getCurrentVPAState,
+		InitWithManager: func(ctx context.Context, mgr manager.Manager) error {
+			return mgr.GetFieldIndexer().IndexField(ctx, &vpav1.VerticalPodAutoscaler{}, CollectableOwnerKey, vpaIndexer)
+		},
+		InitWithBuilder: func(builder *builder.Builder) *builder.Builder {
+			return builder.Owns(&vpav1.VerticalPodAutoscaler{})
+		},
+		InitWithFakeClientBuilder: func(fakeClientBuilder *fake.ClientBuilder) {
+			fakeClientBuilder.WithIndex(&vpav1.VerticalPodAutoscaler{}, CollectableOwnerKey, vpaIndexer)
+		},
+	}
+	SetupKAIConfigOwned(collectable)
+	SetupSchedulingShardOwned(collectable)
+}
+
+func getCurrentVPAState(ctx context.Context, runtimeClient client.Client, reconciler client.Object) (map[string]client.Object, error) {
+	result := map[string]client.Object{}
+	vpas := &vpav1.VerticalPodAutoscalerList{}
+	reconcilerKey := getReconcilerKey(reconciler)
+
+	err := runtimeClient.List(ctx, vpas, client.MatchingFields{CollectableOwnerKey: reconcilerKey})
+	if err != nil {
+		return nil, err
+	}
+
+	for _, vpa := range vpas.Items {
+		result[GetKey(vpa.GroupVersionKind(), vpa.Namespace, vpa.Name)] = &vpa
+	}
+
+	return result, nil
+}
diff --git a/pkg/operator/operands/node_scale_adjuster/node_scale_adjuster.go b/pkg/operator/operands/node_scale_adjuster/node_scale_adjuster.go
index 9726ceb50..f22c7c475 100644
--- a/pkg/operator/operands/node_scale_adjuster/node_scale_adjuster.go
+++ b/pkg/operator/operands/node_scale_adjuster/node_scale_adjuster.go
@@ -42,6 +42,10 @@ func (nsa *NodeScaleAdjuster) DesiredState(
 		objects = append(objects, obj)
 	}
 
+	if vpa := common.BuildVPAFromObjects(kaiConfig.Spec.NodeScaleAdjuster.VPA, objects, kaiConfig.Spec.Namespace); vpa != nil {
+		objects = append(objects, vpa)
+	}
+
 	nsa.lastDesiredState = objects
 	return objects, nil
 }
diff --git a/pkg/operator/operands/pod_group_controller/pod_group_controller.go b/pkg/operator/operands/pod_group_controller/pod_group_controller.go
index 1f24e066c..8f79b8d12 100644
--- a/pkg/operator/operands/pod_group_controller/pod_group_controller.go
+++ b/pkg/operator/operands/pod_group_controller/pod_group_controller.go
@@ -62,6 +62,10 @@ func (p *PodGroupController) DesiredState(
 		objects = append(objects, obj...)
 	}
 
+	if vpa := common.BuildVPAFromObjects(kaiConfig.Spec.PodGroupController.VPA, objects, kaiConfig.Spec.Namespace); vpa != nil {
+		objects = append(objects, vpa)
+	}
+
 	p.lastDesiredState = objects
 	return objects, nil
 }
diff --git a/pkg/operator/operands/pod_grouper/pod_grouper.go b/pkg/operator/operands/pod_grouper/pod_grouper.go
index 5c677fa59..b1f9902c6 100644
--- a/pkg/operator/operands/pod_grouper/pod_grouper.go
+++ b/pkg/operator/operands/pod_grouper/pod_grouper.go
@@ -45,6 +45,10 @@ func (p *PodGrouper) DesiredState(
 		objects = append(objects, obj)
 	}
 
+	if vpa := common.BuildVPAFromObjects(kaiConfig.Spec.PodGrouper.VPA, objects, kaiConfig.Spec.Namespace); vpa != nil {
+		objects = append(objects, vpa)
+	}
+
 	p.lastDesiredState = objects
 	return objects, nil
 }
diff --git a/pkg/operator/operands/queue_controller/queue_controller.go b/pkg/operator/operands/queue_controller/queue_controller.go
index 93753262d..35fd3782c 100644
--- a/pkg/operator/operands/queue_controller/queue_controller.go
+++ b/pkg/operator/operands/queue_controller/queue_controller.go
@@ -63,6 +63,10 @@ func (q *QueueController) DesiredState(
 		objects = append(objects, obj...)
 	}
 
+	if vpa := common.BuildVPAFromObjects(kaiConfig.Spec.QueueController.VPA, objects, kaiConfig.Spec.Namespace); vpa != nil {
+		objects = append(objects, vpa)
+	}
+
 	q.lastDesiredState = objects
 	return objects, nil
 }
diff --git a/pkg/operator/operands/scheduler/scheduler.go b/pkg/operator/operands/scheduler/scheduler.go
index 2f916f903..8edbfd8b0 100644
--- a/pkg/operator/operands/scheduler/scheduler.go
+++ b/pkg/operator/operands/scheduler/scheduler.go
@@ -68,6 +68,10 @@ func (s *SchedulerForShard) DesiredState(
 		objects = append(objects, object)
 	}
 
+	if vpa := common.BuildVPAFromObjects(kaiConfig.Spec.Scheduler.VPA, objects, kaiConfig.Spec.Namespace); vpa != nil {
+		objects = append(objects, vpa)
+	}
+
 	s.lastDesiredState = objects
 
 	return s.lastDesiredState, nil

From 9846906f1e7181ae5d31c6b03880494e3d152895 Mon Sep 17 00:00:00 2001
From: Jonathan Rosenboim <jrosenboim@nvidia.com>
Date: Mon, 2 Mar 2026 13:14:57 +0200
Subject: [PATCH 06/25] Add VPA package to dependencies

---
 go.mod | 19 ++++++++++---------
 go.sum | 38 ++++++++++++++++++++------------------
 2 files changed, 30 insertions(+), 27 deletions(-)

diff --git a/go.mod b/go.mod
index 6eb595069..838e4248f 100644
--- a/go.mod
+++ b/go.mod
@@ -46,6 +46,7 @@ require (
 	k8s.io/apiextensions-apiserver v0.34.3
 	k8s.io/apimachinery v0.34.3
 	k8s.io/apiserver v0.34.3
+	k8s.io/autoscaler/vertical-pod-autoscaler v1.5.1
 	k8s.io/cli-runtime v0.34.1
 	k8s.io/client-go v0.34.3
 	k8s.io/cluster-bootstrap v0.34.1
@@ -98,7 +99,7 @@ require (
 	github.com/cyphar/filepath-securejoin v0.6.0 // indirect
 	github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
 	github.com/distribution/reference v0.6.0 // indirect
-	github.com/emicklei/go-restful/v3 v3.12.2 // indirect
+	github.com/emicklei/go-restful/v3 v3.13.0 // indirect
 	github.com/evanphx/json-patch/v5 v5.9.11 // indirect
 	github.com/felixge/httpsnoop v1.0.4 // indirect
 	github.com/fsnotify/fsnotify v1.9.0 // indirect
@@ -106,7 +107,7 @@ require (
 	github.com/gabriel-vasile/mimetype v1.4.7 // indirect
 	github.com/gin-contrib/sse v0.1.0 // indirect
 	github.com/go-logr/zapr v1.3.0 // indirect
-	github.com/go-openapi/jsonpointer v0.21.1 // indirect
+	github.com/go-openapi/jsonpointer v0.21.2 // indirect
 	github.com/go-openapi/jsonreference v0.21.0 // indirect
 	github.com/go-openapi/swag v0.23.1 // indirect
 	github.com/go-playground/locales v0.14.1 // indirect
@@ -148,7 +149,7 @@ require (
 	github.com/pelletier/go-toml/v2 v2.2.3 // indirect
 	github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
 	github.com/prometheus/client_model v0.6.2 // indirect
-	github.com/prometheus/procfs v0.16.1 // indirect
+	github.com/prometheus/procfs v0.17.0 // indirect
 	github.com/robfig/cron/v3 v3.0.1 // indirect
 	github.com/sirupsen/logrus v1.9.3 // indirect
 	github.com/spf13/cobra v1.10.1 // indirect
@@ -159,12 +160,12 @@ require (
 	go.opentelemetry.io/auto/sdk v1.1.0 // indirect
 	go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.60.0 // indirect
 	go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.59.0 // indirect
-	go.opentelemetry.io/otel v1.35.0 // indirect
+	go.opentelemetry.io/otel v1.37.0 // indirect
 	go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.34.0 // indirect
 	go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.34.0 // indirect
-	go.opentelemetry.io/otel/metric v1.35.0 // indirect
+	go.opentelemetry.io/otel/metric v1.37.0 // indirect
 	go.opentelemetry.io/otel/sdk v1.35.0 // indirect
-	go.opentelemetry.io/otel/trace v1.35.0 // indirect
+	go.opentelemetry.io/otel/trace v1.37.0 // indirect
 	go.opentelemetry.io/proto/otlp v1.5.0 // indirect
 	go.uber.org/automaxprocs v1.6.0 // indirect
 	go.yaml.in/yaml/v2 v2.4.3 // indirect
@@ -177,19 +178,19 @@ require (
 	golang.org/x/sys v0.38.0 // indirect
 	golang.org/x/term v0.37.0 // indirect
 	golang.org/x/text v0.31.0 // indirect
-	golang.org/x/time v0.11.0 // indirect
+	golang.org/x/time v0.12.0 // indirect
 	golang.org/x/tools v0.38.0 // indirect
 	google.golang.org/genproto v0.0.0-20250303144028-a0af3efb3deb // indirect
 	google.golang.org/genproto/googleapis/api v0.0.0-20250303144028-a0af3efb3deb // indirect
 	google.golang.org/genproto/googleapis/rpc v0.0.0-20250313205543-e70fdf4c4cb4 // indirect
 	google.golang.org/protobuf v1.36.8 // indirect
-	gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect
+	gopkg.in/evanphx/json-patch.v4 v4.13.0 // indirect
 	gopkg.in/inf.v0 v0.9.1 // indirect
 	k8s.io/cloud-provider v0.34.1 // indirect
 	k8s.io/controller-manager v0.34.1 // indirect
 	k8s.io/cri-api v0.34.1 // indirect
 	k8s.io/csi-translation-lib v0.34.1 // indirect
-	k8s.io/kube-openapi v0.0.0-20250710124328-f3f2b991d03b // indirect
+	k8s.io/kube-openapi v0.0.0-20250814151709-d7b6acb124c3 // indirect
 	k8s.io/kubelet v0.34.1 // indirect
 	knative.dev/networking v0.0.0-20250117155906-67d1c274ba6a // indirect
 	sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 // indirect
diff --git a/go.sum b/go.sum
index f0df2a691..b4c6c785d 100644
--- a/go.sum
+++ b/go.sum
@@ -71,8 +71,8 @@ github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5Qvfr
 github.com/distribution/reference v0.6.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E=
 github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
 github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
-github.com/emicklei/go-restful/v3 v3.12.2 h1:DhwDP0vY3k8ZzE0RunuJy8GhNpPL6zqLkDf9B/a0/xU=
-github.com/emicklei/go-restful/v3 v3.12.2/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
+github.com/emicklei/go-restful/v3 v3.13.0 h1:C4Bl2xDndpU6nJ4bc1jXd+uTmYPVUwkD6bFY/oTyCes=
+github.com/emicklei/go-restful/v3 v3.13.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
 github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
 github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
 github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98=
@@ -107,8 +107,8 @@ github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
 github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
 github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ=
 github.com/go-logr/zapr v1.3.0/go.mod h1:YKepepNBd1u/oyhd/yQmtjVXmm9uML4IXUgMOwR8/Gg=
-github.com/go-openapi/jsonpointer v0.21.1 h1:whnzv/pNXtK2FbX/W9yJfRmE2gsmkfahjMKB0fZvcic=
-github.com/go-openapi/jsonpointer v0.21.1/go.mod h1:50I1STOfbY1ycR8jGz8DaMeLCdXiI6aDteEdRNNzpdk=
+github.com/go-openapi/jsonpointer v0.21.2 h1:AqQaNADVwq/VnkCmQg6ogE+M3FOsKTytwges0JdwVuA=
+github.com/go-openapi/jsonpointer v0.21.2/go.mod h1:50I1STOfbY1ycR8jGz8DaMeLCdXiI6aDteEdRNNzpdk=
 github.com/go-openapi/jsonreference v0.21.0 h1:Rs+Y7hSXT83Jacb7kFyjn4ijOuVGSvOdF2+tg1TRrwQ=
 github.com/go-openapi/jsonreference v0.21.0/go.mod h1:LmZmgsrTkVg9LG4EaHeY8cBDslNPMo06cago5JNLkm4=
 github.com/go-openapi/swag v0.23.1 h1:lpsStH0n2ittzTnbaSloVZLuB5+fvSY/+hnagBjSNZU=
@@ -257,8 +257,8 @@ github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNw
 github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE=
 github.com/prometheus/common v0.66.1 h1:h5E0h5/Y8niHc5DlaLlWLArTQI7tMrsfQjHV+d9ZoGs=
 github.com/prometheus/common v0.66.1/go.mod h1:gcaUsgf3KfRSwHY4dIMXLPV0K/Wg1oZ8+SbZk/HH/dA=
-github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg=
-github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is=
+github.com/prometheus/procfs v0.17.0 h1:FuLQ+05u4ZI+SS/w9+BWEM2TXiHKsUQ9TADiRH7DuK0=
+github.com/prometheus/procfs v0.17.0/go.mod h1:oPQLaDAMRbA+u8H5Pbfq+dl3VDAvHxMUOVhe0wYB2zw=
 github.com/prometheus/statsd_exporter v0.22.7 h1:7Pji/i2GuhK6Lu7DHrtTkFmNBCudCPT1pX2CziuyQR0=
 github.com/prometheus/statsd_exporter v0.22.7/go.mod h1:N/TevpjkIh9ccs6nuzY3jQn9dFqnUakOjnEuMPJJJnI=
 github.com/ray-project/kuberay/ray-operator v1.4.2 h1:A4tGzbIky8sInAUxZBdBb+rrpZ7fbqoxdsOtm559Zqg=
@@ -314,20 +314,20 @@ go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.6
 go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.60.0/go.mod h1:rg+RlpR5dKwaS95IyyZqj5Wd4E13lk/msnTS0Xl9lJM=
 go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.59.0 h1:CV7UdSGJt/Ao6Gp4CXckLxVRRsRgDHoI8XjbL3PDl8s=
 go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.59.0/go.mod h1:FRmFuRJfag1IZ2dPkHnEoSFVgTVPUd2qf5Vi69hLb8I=
-go.opentelemetry.io/otel v1.35.0 h1:xKWKPxrxB6OtMCbmMY021CqC45J+3Onta9MqjhnusiQ=
-go.opentelemetry.io/otel v1.35.0/go.mod h1:UEqy8Zp11hpkUrL73gSlELM0DupHoiq72dR+Zqel/+Y=
+go.opentelemetry.io/otel v1.37.0 h1:9zhNfelUvx0KBfu/gb+ZgeAfAgtWrfHJZcAqFC228wQ=
+go.opentelemetry.io/otel v1.37.0/go.mod h1:ehE/umFRLnuLa/vSccNq9oS1ErUlkkK71gMcN34UG8I=
 go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.34.0 h1:OeNbIYk/2C15ckl7glBlOBp5+WlYsOElzTNmiPW/x60=
 go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.34.0/go.mod h1:7Bept48yIeqxP2OZ9/AqIpYS94h2or0aB4FypJTc8ZM=
 go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.34.0 h1:tgJ0uaNS4c98WRNUEx5U3aDlrDOI5Rs+1Vifcw4DJ8U=
 go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.34.0/go.mod h1:U7HYyW0zt/a9x5J1Kjs+r1f/d4ZHnYFclhYY2+YbeoE=
-go.opentelemetry.io/otel/metric v1.35.0 h1:0znxYu2SNyuMSQT4Y9WDWej0VpcsxkuklLa4/siN90M=
-go.opentelemetry.io/otel/metric v1.35.0/go.mod h1:nKVFgxBZ2fReX6IlyW28MgZojkoAkJGaE8CpgeAU3oE=
+go.opentelemetry.io/otel/metric v1.37.0 h1:mvwbQS5m0tbmqML4NqK+e3aDiO02vsf/WgbsdpcPoZE=
+go.opentelemetry.io/otel/metric v1.37.0/go.mod h1:04wGrZurHYKOc+RKeye86GwKiTb9FKm1WHtO+4EVr2E=
 go.opentelemetry.io/otel/sdk v1.35.0 h1:iPctf8iprVySXSKJffSS79eOjl9pvxV9ZqOWT0QejKY=
 go.opentelemetry.io/otel/sdk v1.35.0/go.mod h1:+ga1bZliga3DxJ3CQGg3updiaAJoNECOgJREo9KHGQg=
 go.opentelemetry.io/otel/sdk/metric v1.34.0 h1:5CeK9ujjbFVL5c1PhLuStg1wxA7vQv7ce1EK0Gyvahk=
 go.opentelemetry.io/otel/sdk/metric v1.34.0/go.mod h1:jQ/r8Ze28zRKoNRdkjCZxfs6YvBTG1+YIqyFVFYec5w=
-go.opentelemetry.io/otel/trace v1.35.0 h1:dPpEfJu1sDIqruz7BHFG3c7528f6ddfSWfFDVt/xgMs=
-go.opentelemetry.io/otel/trace v1.35.0/go.mod h1:WUk7DtFp1Aw2MkvqGdwiXYDZZNvA/1J8o6xRXLrIkyc=
+go.opentelemetry.io/otel/trace v1.37.0 h1:HLdcFNbRQBE2imdSEgm/kwqmQj1Or1l/7bW6mxVK7z4=
+go.opentelemetry.io/otel/trace v1.37.0/go.mod h1:TlgrlQ+PtQO5XFerSPUYG0JSgGyryXewPGyayAWSBS0=
 go.opentelemetry.io/proto/otlp v1.5.0 h1:xJvq7gMzB31/d406fB8U5CBdyQGw4P399D1aQWU/3i4=
 go.opentelemetry.io/proto/otlp v1.5.0/go.mod h1:keN8WnHxOy8PG0rQZjJJ5A2ebUoafqWp0eVQ4yIXvJ4=
 go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE=
@@ -403,8 +403,8 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.31.0 h1:aC8ghyu4JhP8VojJ2lEHBnochRno1sgL6nEi9WGFGMM=
 golang.org/x/text v0.31.0/go.mod h1:tKRAlv61yKIjGGHX/4tP1LTbc13YSec1pxVEWXzfoeM=
-golang.org/x/time v0.11.0 h1:/bpjEDfN9tkoN/ryeYHnv5hcMlc8ncjMcM4XBk5NWV0=
-golang.org/x/time v0.11.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg=
+golang.org/x/time v0.12.0 h1:ScB/8o8olJvc+CQPWrK3fPZNfh7qgwCrY0zJmoEQLSE=
+golang.org/x/time v0.12.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg=
 golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
@@ -446,8 +446,8 @@ google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXn
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
-gopkg.in/evanphx/json-patch.v4 v4.12.0 h1:n6jtcsulIzXPJaxegRbvFNNrZDjbij7ny3gmSPG+6V4=
-gopkg.in/evanphx/json-patch.v4 v4.12.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M=
+gopkg.in/evanphx/json-patch.v4 v4.13.0 h1:czT3CmqEaQ1aanPc5SdlgQrrEIb8w/wwCvWWnfEbYzo=
+gopkg.in/evanphx/json-patch.v4 v4.13.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M=
 gopkg.in/h2non/gock.v1 v1.1.2 h1:jBbHXgGBK/AoPVfJh5x4r/WxIrElvbLel8TCZkkZJoY=
 gopkg.in/h2non/gock.v1 v1.1.2/go.mod h1:n7UGz/ckNChHiK05rDoiC4MYSunEC/lyaUm2WWaDva0=
 gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc=
@@ -470,6 +470,8 @@ k8s.io/apimachinery v0.34.3 h1:/TB+SFEiQvN9HPldtlWOTp0hWbJ+fjU+wkxysf/aQnE=
 k8s.io/apimachinery v0.34.3/go.mod h1:/GwIlEcWuTX9zKIg2mbw0LRFIsXwrfoVxn+ef0X13lw=
 k8s.io/apiserver v0.34.3 h1:uGH1qpDvSiYG4HVFqc6A3L4CKiX+aBWDrrsxHYK0Bdo=
 k8s.io/apiserver v0.34.3/go.mod h1:QPnnahMO5C2m3lm6fPW3+JmyQbvHZQ8uudAu/493P2w=
+k8s.io/autoscaler/vertical-pod-autoscaler v1.5.1 h1:LlVtM3IKqIVHz1ZXC3ahe/mAtDWb7Eob0tyTzqFULqg=
+k8s.io/autoscaler/vertical-pod-autoscaler v1.5.1/go.mod h1:znhUnV0Yn+CkZu3TZ2HVqd8GFRMkPj/CXszX1gdBjTU=
 k8s.io/cli-runtime v0.34.1 h1:btlgAgTrYd4sk8vJTRG6zVtqBKt9ZMDeQZo2PIzbL7M=
 k8s.io/cli-runtime v0.34.1/go.mod h1:aVA65c+f0MZiMUPbseU/M9l1Wo2byeaGwUuQEQVVveE=
 k8s.io/client-go v0.34.3 h1:wtYtpzy/OPNYf7WyNBTj3iUA0XaBHVqhv4Iv3tbrF5A=
@@ -502,8 +504,8 @@ k8s.io/kube-aggregator v0.34.1 h1:WNLV0dVNoFKmuyvdWLd92iDSyD/TSTjqwaPj0U9XAEU=
 k8s.io/kube-aggregator v0.34.1/go.mod h1:RU8j+5ERfp0h+gIvWtxRPfsa5nK7rboDm8RST8BJfYQ=
 k8s.io/kube-controller-manager v0.34.1 h1:hrPRR4toT+xABAxzGpnldTL1RocYXyVhx6A5Einb9wU=
 k8s.io/kube-controller-manager v0.34.1/go.mod h1:+7jKjj5i7NLGM6zPHbdMh7qHaWFOBsF/oeUDdS70DSg=
-k8s.io/kube-openapi v0.0.0-20250710124328-f3f2b991d03b h1:MloQ9/bdJyIu9lb1PzujOPolHyvO06MXG5TUIj2mNAA=
-k8s.io/kube-openapi v0.0.0-20250710124328-f3f2b991d03b/go.mod h1:UZ2yyWbFTpuhSbFhv24aGNOdoRdJZgsIObGBUaYVsts=
+k8s.io/kube-openapi v0.0.0-20250814151709-d7b6acb124c3 h1:liMHz39T5dJO1aOKHLvwaCjDbf07wVh6yaUlTpunnkE=
+k8s.io/kube-openapi v0.0.0-20250814151709-d7b6acb124c3/go.mod h1:UZ2yyWbFTpuhSbFhv24aGNOdoRdJZgsIObGBUaYVsts=
 k8s.io/kube-proxy v0.34.1 h1:cIriNCJY5XmRhXCCyQiazyqi47lbwcBQf0H76fVOpkw=
 k8s.io/kube-proxy v0.34.1/go.mod h1:syed9c5+gUVFMo6p24SnlTHzsp+BMd4ACcTw2dbArw0=
 k8s.io/kube-scheduler v0.34.1 h1:S5td6VZwC3lCqERXclerDXhJ26zYc6JroY0s03+PqJ8=

From 8101c2f94cfd2f09534848e5d881244683709264 Mon Sep 17 00:00:00 2001
From: Jonathan Rosenboim <jrosenboim@nvidia.com>
Date: Mon, 2 Mar 2026 13:22:45 +0200
Subject: [PATCH 07/25] Run make generate

---
 .../kai/v1/admission/zz_generated.deepcopy.go |  5 +++
 .../kai/v1/binder/zz_generated.deepcopy.go    |  5 +++
 .../kai/v1/common/zz_generated.deepcopy.go    | 31 +++++++++++++++++++
 .../zz_generated.deepcopy.go                  |  5 +++
 .../zz_generated.deepcopy.go                  |  5 +++
 .../v1/pod_grouper/zz_generated.deepcopy.go   |  5 +++
 .../queue_controller/zz_generated.deepcopy.go |  5 +++
 .../kai/v1/scheduler/zz_generated.deepcopy.go |  5 +++
 pkg/apis/kai/v1/zz_generated.deepcopy.go      |  6 ++++
 9 files changed, 72 insertions(+)

diff --git a/pkg/apis/kai/v1/admission/zz_generated.deepcopy.go b/pkg/apis/kai/v1/admission/zz_generated.deepcopy.go
index 1a8dc1d96..0fdb5f3b0 100644
--- a/pkg/apis/kai/v1/admission/zz_generated.deepcopy.go
+++ b/pkg/apis/kai/v1/admission/zz_generated.deepcopy.go
@@ -56,6 +56,11 @@ func (in *Admission) DeepCopyInto(out *Admission) {
 		*out = new(string)
 		**out = **in
 	}
+	if in.VPA != nil {
+		in, out := &in.VPA, &out.VPA
+		*out = new(common.VPASpec)
+		(*in).DeepCopyInto(*out)
+	}
 }
 
 // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Admission.
diff --git a/pkg/apis/kai/v1/binder/zz_generated.deepcopy.go b/pkg/apis/kai/v1/binder/zz_generated.deepcopy.go
index 966241a10..645752049 100644
--- a/pkg/apis/kai/v1/binder/zz_generated.deepcopy.go
+++ b/pkg/apis/kai/v1/binder/zz_generated.deepcopy.go
@@ -56,6 +56,11 @@ func (in *Binder) DeepCopyInto(out *Binder) {
 		*out = new(bool)
 		**out = **in
 	}
+	if in.VPA != nil {
+		in, out := &in.VPA, &out.VPA
+		*out = new(common.VPASpec)
+		(*in).DeepCopyInto(*out)
+	}
 }
 
 // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Binder.
diff --git a/pkg/apis/kai/v1/common/zz_generated.deepcopy.go b/pkg/apis/kai/v1/common/zz_generated.deepcopy.go
index 2f75d2df5..3e0b6143c 100644
--- a/pkg/apis/kai/v1/common/zz_generated.deepcopy.go
+++ b/pkg/apis/kai/v1/common/zz_generated.deepcopy.go
@@ -11,6 +11,7 @@ package common
 
 import (
 	"k8s.io/api/core/v1"
+	autoscaling_k8s_iov1 "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1"
 )
 
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
@@ -146,3 +147,33 @@ func (in *Service) DeepCopy() *Service {
 	in.DeepCopyInto(out)
 	return out
 }
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *VPASpec) DeepCopyInto(out *VPASpec) {
+	*out = *in
+	if in.Enabled != nil {
+		in, out := &in.Enabled, &out.Enabled
+		*out = new(bool)
+		**out = **in
+	}
+	if in.UpdatePolicy != nil {
+		in, out := &in.UpdatePolicy, &out.UpdatePolicy
+		*out = new(autoscaling_k8s_iov1.PodUpdatePolicy)
+		(*in).DeepCopyInto(*out)
+	}
+	if in.ResourcePolicy != nil {
+		in, out := &in.ResourcePolicy, &out.ResourcePolicy
+		*out = new(autoscaling_k8s_iov1.PodResourcePolicy)
+		(*in).DeepCopyInto(*out)
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VPASpec.
+func (in *VPASpec) DeepCopy() *VPASpec {
+	if in == nil {
+		return nil
+	}
+	out := new(VPASpec)
+	in.DeepCopyInto(out)
+	return out
+}
diff --git a/pkg/apis/kai/v1/node_scale_adjuster/zz_generated.deepcopy.go b/pkg/apis/kai/v1/node_scale_adjuster/zz_generated.deepcopy.go
index b07990aa4..f31ad55ae 100644
--- a/pkg/apis/kai/v1/node_scale_adjuster/zz_generated.deepcopy.go
+++ b/pkg/apis/kai/v1/node_scale_adjuster/zz_generated.deepcopy.go
@@ -61,6 +61,11 @@ func (in *NodeScaleAdjuster) DeepCopyInto(out *NodeScaleAdjuster) {
 		*out = new(Args)
 		(*in).DeepCopyInto(*out)
 	}
+	if in.VPA != nil {
+		in, out := &in.VPA, &out.VPA
+		*out = new(common.VPASpec)
+		(*in).DeepCopyInto(*out)
+	}
 }
 
 // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeScaleAdjuster.
diff --git a/pkg/apis/kai/v1/pod_group_controller/zz_generated.deepcopy.go b/pkg/apis/kai/v1/pod_group_controller/zz_generated.deepcopy.go
index dceee04c2..95205e292 100644
--- a/pkg/apis/kai/v1/pod_group_controller/zz_generated.deepcopy.go
+++ b/pkg/apis/kai/v1/pod_group_controller/zz_generated.deepcopy.go
@@ -41,6 +41,11 @@ func (in *PodGroupController) DeepCopyInto(out *PodGroupController) {
 		*out = new(int32)
 		**out = **in
 	}
+	if in.VPA != nil {
+		in, out := &in.VPA, &out.VPA
+		*out = new(common.VPASpec)
+		(*in).DeepCopyInto(*out)
+	}
 }
 
 // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PodGroupController.
diff --git a/pkg/apis/kai/v1/pod_grouper/zz_generated.deepcopy.go b/pkg/apis/kai/v1/pod_grouper/zz_generated.deepcopy.go
index 8f9c83509..30800b7f2 100644
--- a/pkg/apis/kai/v1/pod_grouper/zz_generated.deepcopy.go
+++ b/pkg/apis/kai/v1/pod_grouper/zz_generated.deepcopy.go
@@ -71,6 +71,11 @@ func (in *PodGrouper) DeepCopyInto(out *PodGrouper) {
 		*out = new(int32)
 		**out = **in
 	}
+	if in.VPA != nil {
+		in, out := &in.VPA, &out.VPA
+		*out = new(common.VPASpec)
+		(*in).DeepCopyInto(*out)
+	}
 }
 
 // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PodGrouper.
diff --git a/pkg/apis/kai/v1/queue_controller/zz_generated.deepcopy.go b/pkg/apis/kai/v1/queue_controller/zz_generated.deepcopy.go
index e133778f6..737e9971b 100644
--- a/pkg/apis/kai/v1/queue_controller/zz_generated.deepcopy.go
+++ b/pkg/apis/kai/v1/queue_controller/zz_generated.deepcopy.go
@@ -81,6 +81,11 @@ func (in *QueueController) DeepCopyInto(out *QueueController) {
 		*out = new(string)
 		**out = **in
 	}
+	if in.VPA != nil {
+		in, out := &in.VPA, &out.VPA
+		*out = new(common.VPASpec)
+		(*in).DeepCopyInto(*out)
+	}
 }
 
 // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new QueueController.
diff --git a/pkg/apis/kai/v1/scheduler/zz_generated.deepcopy.go b/pkg/apis/kai/v1/scheduler/zz_generated.deepcopy.go
index 90e2dee4b..854101b10 100644
--- a/pkg/apis/kai/v1/scheduler/zz_generated.deepcopy.go
+++ b/pkg/apis/kai/v1/scheduler/zz_generated.deepcopy.go
@@ -37,6 +37,11 @@ func (in *Scheduler) DeepCopyInto(out *Scheduler) {
 		*out = new(int32)
 		**out = **in
 	}
+	if in.VPA != nil {
+		in, out := &in.VPA, &out.VPA
+		*out = new(common.VPASpec)
+		(*in).DeepCopyInto(*out)
+	}
 }
 
 // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Scheduler.
diff --git a/pkg/apis/kai/v1/zz_generated.deepcopy.go b/pkg/apis/kai/v1/zz_generated.deepcopy.go
index 45b78c48c..519b01ef9 100644
--- a/pkg/apis/kai/v1/zz_generated.deepcopy.go
+++ b/pkg/apis/kai/v1/zz_generated.deepcopy.go
@@ -12,6 +12,7 @@ package v1
 import (
 	"github.com/NVIDIA/KAI-scheduler/pkg/apis/kai/v1/admission"
 	"github.com/NVIDIA/KAI-scheduler/pkg/apis/kai/v1/binder"
+	"github.com/NVIDIA/KAI-scheduler/pkg/apis/kai/v1/common"
 	"github.com/NVIDIA/KAI-scheduler/pkg/apis/kai/v1/node_scale_adjuster"
 	"github.com/NVIDIA/KAI-scheduler/pkg/apis/kai/v1/pod_group_controller"
 	"github.com/NVIDIA/KAI-scheduler/pkg/apis/kai/v1/pod_grouper"
@@ -192,6 +193,11 @@ func (in *ConfigStatus) DeepCopy() *ConfigStatus {
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *GlobalConfig) DeepCopyInto(out *GlobalConfig) {
 	*out = *in
+	if in.VPA != nil {
+		in, out := &in.VPA, &out.VPA
+		*out = new(common.VPASpec)
+		(*in).DeepCopyInto(*out)
+	}
 	if in.Openshift != nil {
 		in, out := &in.Openshift, &out.Openshift
 		*out = new(bool)

From 47bc5050616c2cb018450468659024180518e045 Mon Sep 17 00:00:00 2001
From: Jonathan Rosenboim <jrosenboim@nvidia.com>
Date: Mon, 2 Mar 2026 13:30:27 +0200
Subject: [PATCH 08/25] Update tests to account for VPA

---
 pkg/apis/kai/v1/admission/admission_test.go            |  4 ++--
 pkg/apis/kai/v1/binder/binder_test.go                  | 10 +++++-----
 .../v1/node_scale_adjuster/node_scale_adjuster_test.go |  2 +-
 .../pod_group_controller/pod_group_controller_test.go  |  4 ++--
 pkg/apis/kai/v1/pod_grouper/pod_grouper_test.go        |  4 ++--
 .../kai/v1/queue_controller/queue_controller_test.go   |  4 ++--
 pkg/apis/kai/v1/scheduler/scheduler_test.go            | 10 +++++-----
 pkg/operator/operands/deployable/deployable_test.go    |  2 ++
 8 files changed, 21 insertions(+), 19 deletions(-)

diff --git a/pkg/apis/kai/v1/admission/admission_test.go b/pkg/apis/kai/v1/admission/admission_test.go
index e8dc15cf0..8c5aa85d9 100644
--- a/pkg/apis/kai/v1/admission/admission_test.go
+++ b/pkg/apis/kai/v1/admission/admission_test.go
@@ -22,7 +22,7 @@ var _ = Describe("Admission", func() {
 		Admission := &Admission{}
 		var replicaCount int32
 		replicaCount = 1
-		Admission.SetDefaultsWhereNeeded(&replicaCount)
+		Admission.SetDefaultsWhereNeeded(&replicaCount, nil)
 		Expect(*Admission.Service.Enabled).To(Equal(true))
 		Expect(*Admission.Service.Image.Name).To(Equal("admission"))
 		Expect(*Admission.Replicas).To(Equal(int32(1)))
@@ -32,7 +32,7 @@ var _ = Describe("Admission", func() {
 		Admission := &Admission{}
 		var replicaCount int32
 		replicaCount = 3
-		Admission.SetDefaultsWhereNeeded(&replicaCount)
+		Admission.SetDefaultsWhereNeeded(&replicaCount, nil)
 		Expect(*Admission.Replicas).To(Equal(int32(3)))
 	})
 })
diff --git a/pkg/apis/kai/v1/binder/binder_test.go b/pkg/apis/kai/v1/binder/binder_test.go
index 8a18b65c4..b43d0fa60 100644
--- a/pkg/apis/kai/v1/binder/binder_test.go
+++ b/pkg/apis/kai/v1/binder/binder_test.go
@@ -23,7 +23,7 @@ func TestBinder(t *testing.T) {
 var _ = Describe("Binder", func() {
 	It("Set Defaults", func(ctx context.Context) {
 		binder := &Binder{}
-		binder.SetDefaultsWhereNeeded(nil)
+		binder.SetDefaultsWhereNeeded(nil, nil)
 		Expect(*binder.Service.Enabled).To(Equal(true))
 		Expect(*binder.Service.Image.Name).To(Equal("binder"))
 		Expect(binder.Service.Resources.Requests[v1.ResourceCPU]).To(Equal(resource.MustParse("50m")))
@@ -35,14 +35,14 @@ var _ = Describe("Binder", func() {
 		binder := &Binder{}
 		var replicaCount int32
 		replicaCount = 3
-		binder.SetDefaultsWhereNeeded(&replicaCount)
+		binder.SetDefaultsWhereNeeded(&replicaCount, nil)
 		Expect(*binder.Replicas).To(Equal(int32(3)))
 	})
 
 	Context("ResourceReservation PodResources configuration", func() {
 		It("should not set default PodResources when not configured", func(ctx context.Context) {
 			binder := &Binder{}
-			binder.SetDefaultsWhereNeeded(nil)
+			binder.SetDefaultsWhereNeeded(nil, nil)
 
 			// PodResources should be nil when not configured
 			Expect(binder.ResourceReservation.PodResources).To(BeNil())
@@ -64,7 +64,7 @@ var _ = Describe("Binder", func() {
 					PodResources: podResources,
 				},
 			}
-			binder.SetDefaultsWhereNeeded(nil)
+			binder.SetDefaultsWhereNeeded(nil, nil)
 
 			// Configured values should be preserved
 			Expect(binder.ResourceReservation.PodResources).NotTo(BeNil())
@@ -88,7 +88,7 @@ var _ = Describe("Binder", func() {
 					PodResources: podResources,
 				},
 			}
-			binder.SetDefaultsWhereNeeded(nil)
+			binder.SetDefaultsWhereNeeded(nil, nil)
 
 			// Only CPU should be set
 			Expect(binder.ResourceReservation.PodResources).NotTo(BeNil())
diff --git a/pkg/apis/kai/v1/node_scale_adjuster/node_scale_adjuster_test.go b/pkg/apis/kai/v1/node_scale_adjuster/node_scale_adjuster_test.go
index 1f08bc6f0..317da60fd 100644
--- a/pkg/apis/kai/v1/node_scale_adjuster/node_scale_adjuster_test.go
+++ b/pkg/apis/kai/v1/node_scale_adjuster/node_scale_adjuster_test.go
@@ -19,7 +19,7 @@ func TestNodeScaleAdjuster(t *testing.T) {
 var _ = Describe("NodeScaleAdjuster", func() {
 	It("Set Defaults", func(ctx context.Context) {
 		adjuster := &NodeScaleAdjuster{}
-		adjuster.SetDefaultsWhereNeeded()
+		adjuster.SetDefaultsWhereNeeded(nil)
 		Expect(*adjuster.Service.Enabled).To(Equal(true))
 		Expect(*adjuster.Service.Image.Name).To(Equal(imageName))
 	})
diff --git a/pkg/apis/kai/v1/pod_group_controller/pod_group_controller_test.go b/pkg/apis/kai/v1/pod_group_controller/pod_group_controller_test.go
index 48b1c37dc..24e9c2fad 100644
--- a/pkg/apis/kai/v1/pod_group_controller/pod_group_controller_test.go
+++ b/pkg/apis/kai/v1/pod_group_controller/pod_group_controller_test.go
@@ -19,7 +19,7 @@ func TestPodGroupController(t *testing.T) {
 var _ = Describe("PodGroupController", func() {
 	It("Set Defaults", func(ctx context.Context) {
 		podGroupController := &PodGroupController{}
-		podGroupController.SetDefaultsWhereNeeded(nil)
+		podGroupController.SetDefaultsWhereNeeded(nil, nil)
 		Expect(*podGroupController.Service.Enabled).To(Equal(true))
 		Expect(*podGroupController.Service.Image.Name).To(Equal(imageName))
 	})
@@ -27,7 +27,7 @@ var _ = Describe("PodGroupController", func() {
 		podGroupController := &PodGroupController{}
 		var replicaCount int32
 		replicaCount = 3
-		podGroupController.SetDefaultsWhereNeeded(&replicaCount)
+		podGroupController.SetDefaultsWhereNeeded(&replicaCount, nil)
 		Expect(*podGroupController.Replicas).To(Equal(int32(3)))
 	})
 })
diff --git a/pkg/apis/kai/v1/pod_grouper/pod_grouper_test.go b/pkg/apis/kai/v1/pod_grouper/pod_grouper_test.go
index 7471d03e9..1d50ae9dd 100644
--- a/pkg/apis/kai/v1/pod_grouper/pod_grouper_test.go
+++ b/pkg/apis/kai/v1/pod_grouper/pod_grouper_test.go
@@ -21,7 +21,7 @@ var _ = Describe("PodGrouper", func() {
 		podGrouper := &PodGrouper{}
 		var replicaCount int32
 		replicaCount = 1
-		podGrouper.SetDefaultsWhereNeeded(&replicaCount)
+		podGrouper.SetDefaultsWhereNeeded(&replicaCount, nil)
 		Expect(*podGrouper.Service.Enabled).To(Equal(true))
 		Expect(*podGrouper.Service.Image.Name).To(Equal("podgrouper"))
 		Expect(*podGrouper.Replicas).To(Equal(int32(1)))
@@ -30,7 +30,7 @@ var _ = Describe("PodGrouper", func() {
 		podGrouper := &PodGrouper{}
 		var replicaCount int32
 		replicaCount = 3
-		podGrouper.SetDefaultsWhereNeeded(&replicaCount)
+		podGrouper.SetDefaultsWhereNeeded(&replicaCount, nil)
 		Expect(*podGrouper.Replicas).To(Equal(int32(3)))
 	})
 })
diff --git a/pkg/apis/kai/v1/queue_controller/queue_controller_test.go b/pkg/apis/kai/v1/queue_controller/queue_controller_test.go
index dc0f2dbcc..1912ac629 100644
--- a/pkg/apis/kai/v1/queue_controller/queue_controller_test.go
+++ b/pkg/apis/kai/v1/queue_controller/queue_controller_test.go
@@ -21,7 +21,7 @@ var _ = Describe("QueueController", func() {
 		queueController := &QueueController{}
 		var replicaCount int32
 		replicaCount = 1
-		queueController.SetDefaultsWhereNeeded(&replicaCount)
+		queueController.SetDefaultsWhereNeeded(&replicaCount, nil)
 		Expect(*queueController.Service.Enabled).To(Equal(true))
 		Expect(*queueController.Service.Image.Name).To(Equal("queuecontroller"))
 		Expect(*queueController.Replicas).To(Equal(int32(1)))
@@ -30,7 +30,7 @@ var _ = Describe("QueueController", func() {
 		queueController := &QueueController{}
 		var replicaCount int32
 		replicaCount = 3
-		queueController.SetDefaultsWhereNeeded(&replicaCount)
+		queueController.SetDefaultsWhereNeeded(&replicaCount, nil)
 		Expect(*queueController.Replicas).To(Equal(int32(3)))
 	})
 })
diff --git a/pkg/apis/kai/v1/scheduler/scheduler_test.go b/pkg/apis/kai/v1/scheduler/scheduler_test.go
index 1efb33c29..d744b61e9 100644
--- a/pkg/apis/kai/v1/scheduler/scheduler_test.go
+++ b/pkg/apis/kai/v1/scheduler/scheduler_test.go
@@ -23,7 +23,7 @@ var _ = Describe("Scheduler", func() {
 	It("Set Defaults when Service is nil", func(ctx context.Context) {
 		scheduler := &Scheduler{}
 		var replicaCount int32 = 1
-		scheduler.SetDefaultsWhereNeeded(&replicaCount)
+		scheduler.SetDefaultsWhereNeeded(&replicaCount, nil)
 		Expect(scheduler.Service).NotTo(BeNil())
 		Expect(*scheduler.Service.Enabled).To(Equal(true))
 		Expect(*scheduler.Service.Image.Name).To(Equal("scheduler"))
@@ -36,14 +36,14 @@ var _ = Describe("Scheduler", func() {
 	It("Set Defaults with GOGC unset", func(ctx context.Context) {
 		scheduler := &Scheduler{}
 		var replicaCount int32 = 2
-		scheduler.SetDefaultsWhereNeeded(&replicaCount)
+		scheduler.SetDefaultsWhereNeeded(&replicaCount, nil)
 		Expect(*scheduler.GOGC).To(Equal(400))
 	})
 
 	It("Set Defaults with SchedulerService unset", func(ctx context.Context) {
 		scheduler := &Scheduler{}
 		var replicaCount int32 = 3
-		scheduler.SetDefaultsWhereNeeded(&replicaCount)
+		scheduler.SetDefaultsWhereNeeded(&replicaCount, nil)
 		Expect(scheduler.SchedulerService).NotTo(BeNil())
 		Expect(*scheduler.SchedulerService.Type).To(Equal(v1.ServiceTypeClusterIP))
 		Expect(*scheduler.SchedulerService.Port).To(Equal(8080))
@@ -53,14 +53,14 @@ var _ = Describe("Scheduler", func() {
 	It("Replicas set to replicaCount value", func(ctx context.Context) {
 		scheduler := &Scheduler{}
 		var replicaCount int32 = 4
-		scheduler.SetDefaultsWhereNeeded(&replicaCount)
+		scheduler.SetDefaultsWhereNeeded(&replicaCount, nil)
 		Expect(*scheduler.Replicas).To(Equal(int32(4)))
 	})
 
 	It("Replicas default to 1 when replicaCount is nil", func(ctx context.Context) {
 		scheduler := &Scheduler{}
 		var replicaCount *int32
-		scheduler.SetDefaultsWhereNeeded(replicaCount)
+		scheduler.SetDefaultsWhereNeeded(replicaCount, nil)
 		Expect(*scheduler.Replicas).To(Equal(int32(1)))
 	})
 })
diff --git a/pkg/operator/operands/deployable/deployable_test.go b/pkg/operator/operands/deployable/deployable_test.go
index b48589ae8..0ac33543d 100644
--- a/pkg/operator/operands/deployable/deployable_test.go
+++ b/pkg/operator/operands/deployable/deployable_test.go
@@ -17,6 +17,7 @@ import (
 	"github.com/NVIDIA/KAI-scheduler/pkg/operator/operands/known_types"
 
 	monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
+	vpav1 "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1"
 	appsv1 "k8s.io/api/apps/v1"
 	v1 "k8s.io/api/core/v1"
 	apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
@@ -53,6 +54,7 @@ var _ = Describe("Deployable", func() {
 		Expect(kaiv1.AddToScheme(testScheme)).To(Succeed())
 		Expect(apiextensionsv1.AddToScheme(testScheme)).To(Succeed())
 		Expect(monitoringv1.AddToScheme(testScheme)).To(Succeed())
+		Expect(vpav1.AddToScheme(testScheme)).To(Succeed())
 
 		fakeClientBuilder = fake.NewClientBuilder().
 			WithScheme(testScheme).

From 97bd33f79a4643eb06aed4e6697c96025cb00668 Mon Sep 17 00:00:00 2001
From: Jonathan Rosenboim <jrosenboim@nvidia.com>
Date: Mon, 2 Mar 2026 15:14:06 +0200
Subject: [PATCH 09/25] Add option to setup vpa as part of the e2e script

---
 hack/setup-e2e-cluster.sh | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/hack/setup-e2e-cluster.sh b/hack/setup-e2e-cluster.sh
index 5332f5aa2..4d46cc2ae 100755
--- a/hack/setup-e2e-cluster.sh
+++ b/hack/setup-e2e-cluster.sh
@@ -18,6 +18,7 @@ KIND_CONFIG=${REPO_ROOT}/hack/e2e-kind-config.yaml
 # Parse named parameters
 TEST_THIRD_PARTY_INTEGRATIONS=${TEST_THIRD_PARTY_INTEGRATIONS:-"false"}
 LOCAL_IMAGES_BUILD=${LOCAL_IMAGES_BUILD:-"false"}
+INSTALL_VPA=${INSTALL_VPA:-"false"}
 
 while [[ $# -gt 0 ]]; do
   case $1 in
@@ -29,10 +30,15 @@ while [[ $# -gt 0 ]]; do
       LOCAL_IMAGES_BUILD="true"
       shift
       ;;
+    --install-vpa)
+      INSTALL_VPA="true"
+      shift
+      ;;
     -h|--help)
-      echo "Usage: $0 [--test-third-party-integrations] [--local-images-build]"
+      echo "Usage: $0 [--test-third-party-integrations] [--local-images-build] [--install-vpa]"
       echo "  --test-third-party-integrations: Install third party operators for compatibility testing"
       echo "  --local-images-build: Build and use local images instead of pulling from registry"
+      echo "  --install-vpa: Install Vertical Pod Autoscaler and metrics-server"
       exit 0
       ;;
     *)
@@ -67,6 +73,23 @@ helm install prometheus prometheus-community/kube-prometheus-stack --namespace m
     --set "prometheus.enabled=false" \
     --wait
 
+# Install VPA and its prerequisites
+if [ "$INSTALL_VPA" = "true" ]; then
+    echo "Installing metrics-server (required by VPA recommender)..."
+    kubectl apply -f https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/components.yaml
+    # kind uses self-signed kubelet certs, so metrics-server needs --kubelet-insecure-tls
+    kubectl patch deployment metrics-server -n kube-system --type=json \
+        -p '[{"op":"add","path":"/spec/template/spec/containers/0/args/-","value":"--kubelet-insecure-tls"}]'
+    kubectl wait --for=condition=available --timeout=120s deployment/metrics-server -n kube-system
+
+    echo "Installing Vertical Pod Autoscaler..."
+    VPA_TMPDIR=$(mktemp -d)
+    git clone https://github.com/kubernetes/autoscaler.git "$VPA_TMPDIR/autoscaler"
+    (cd "$VPA_TMPDIR/autoscaler/vertical-pod-autoscaler" && git checkout vertical-pod-autoscaler-1.5.1 && ./hack/vpa-up.sh)
+    rm -rf "$VPA_TMPDIR"
+    echo "VPA installation complete."
+fi
+
 # Install third party operators to check the compatibility with the kai-scheduler
 if [ "$TEST_THIRD_PARTY_INTEGRATIONS" = "true" ]; then
     ${REPO_ROOT}/hack/third_party_integrations/deploy_ray.sh

From 2e41494e8192133e8325945c66b27f85ade2a395 Mon Sep 17 00:00:00 2001
From: Jonathan Rosenboim <jrosenboim@nvidia.com>
Date: Mon, 2 Mar 2026 15:54:41 +0200
Subject: [PATCH 10/25] Add kai scheduler CRDs updates

---
 .../crds/kai.scheduler_configs.yaml           | 1058 ++++++++++++++++-
 .../crds/kai.scheduler_schedulingshards.yaml  |    2 +-
 .../crds/kai.scheduler_topologies.yaml        |   17 +-
 .../crds/scheduling.run.ai_bindrequests.yaml  |    2 +-
 .../crds/scheduling.run.ai_podgroups.yaml     |    2 +-
 .../crds/scheduling.run.ai_queues.yaml        |    2 +-
 6 files changed, 1062 insertions(+), 21 deletions(-)

diff --git a/deployments/kai-scheduler/crds/kai.scheduler_configs.yaml b/deployments/kai-scheduler/crds/kai.scheduler_configs.yaml
index 5f0bb574c..8d4450006 100644
--- a/deployments/kai-scheduler/crds/kai.scheduler_configs.yaml
+++ b/deployments/kai-scheduler/crds/kai.scheduler_configs.yaml
@@ -9,7 +9,7 @@ apiVersion: apiextensions.k8s.io/v1
 kind: CustomResourceDefinition
 metadata:
   annotations:
-    controller-gen.kubebuilder.io/version: v0.16.1
+    controller-gen.kubebuilder.io/version: v0.17.3
   name: configs.kai.scheduler
 spec:
   group: kai.scheduler
@@ -1094,6 +1094,138 @@ spec:
                     description: ValidatingWebhookConfigurationName is the name of
                       the ValidatingWebhookConfiguration for the admission service
                     type: string
+                  vpa:
+                    description: VPA specifies Vertical Pod Autoscaler configuration
+                      for the admission service
+                    properties:
+                      enabled:
+                        description: Enabled specifies if VPA should be enabled
+                        type: boolean
+                      resourcePolicy:
+                        description: ResourcePolicy controls how VPA computes recommended
+                          resources for containers
+                        properties:
+                          containerPolicies:
+                            description: Per-container resource policies.
+                            items:
+                              description: |-
+                                ContainerResourcePolicy controls how autoscaler computes the recommended
+                                resources for a specific container.
+                              properties:
+                                containerName:
+                                  description: |-
+                                    Name of the container or DefaultContainerResourcePolicy, in which
+                                    case the policy is used by the containers that don't have their own
+                                    policy specified.
+                                  type: string
+                                controlledResources:
+                                  description: |-
+                                    Specifies the type of recommendations that will be computed
+                                    (and possibly applied) by VPA.
+                                    If not specified, the default of [ResourceCPU, ResourceMemory] will be used.
+                                  items:
+                                    description: ResourceName is the name identifying
+                                      various resources in a ResourceList.
+                                    type: string
+                                  type: array
+                                controlledValues:
+                                  description: |-
+                                    Specifies which resource values should be controlled.
+                                    The default is "RequestsAndLimits".
+                                  enum:
+                                  - RequestsAndLimits
+                                  - RequestsOnly
+                                  type: string
+                                maxAllowed:
+                                  additionalProperties:
+                                    anyOf:
+                                    - type: integer
+                                    - type: string
+                                    pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                                    x-kubernetes-int-or-string: true
+                                  description: |-
+                                    Specifies the maximum amount of resources that will be recommended
+                                    for the container. The default is no maximum.
+                                  type: object
+                                minAllowed:
+                                  additionalProperties:
+                                    anyOf:
+                                    - type: integer
+                                    - type: string
+                                    pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                                    x-kubernetes-int-or-string: true
+                                  description: |-
+                                    Specifies the minimal amount of resources that will be recommended
+                                    for the container. The default is no minimum.
+                                  type: object
+                                mode:
+                                  description: Whether autoscaler is enabled for the
+                                    container. The default is "Auto".
+                                  enum:
+                                  - Auto
+                                  - "Off"
+                                  type: string
+                              type: object
+                            type: array
+                        type: object
+                      updatePolicy:
+                        description: UpdatePolicy controls when and how VPA applies
+                          changes to pod resources
+                        properties:
+                          evictionRequirements:
+                            description: |-
+                              EvictionRequirements is a list of EvictionRequirements that need to
+                              evaluate to true in order for a Pod to be evicted. If more than one
+                              EvictionRequirement is specified, all of them need to be fulfilled to allow eviction.
+                            items:
+                              description: |-
+                                EvictionRequirement defines a single condition which needs to be true in
+                                order to evict a Pod
+                              properties:
+                                changeRequirement:
+                                  description: EvictionChangeRequirement refers to
+                                    the relationship between the new target recommendation
+                                    for a Pod and its current requests, what kind
+                                    of change is necessary for the Pod to be evicted
+                                  enum:
+                                  - TargetHigherThanRequests
+                                  - TargetLowerThanRequests
+                                  type: string
+                                resources:
+                                  description: |-
+                                    Resources is a list of one or more resources that the condition applies
+                                    to. If more than one resource is given, the EvictionRequirement is fulfilled
+                                    if at least one resource meets `changeRequirement`.
+                                  items:
+                                    description: ResourceName is the name identifying
+                                      various resources in a ResourceList.
+                                    type: string
+                                  type: array
+                              required:
+                              - changeRequirement
+                              - resources
+                              type: object
+                            type: array
+                          minReplicas:
+                            description: |-
+                              Minimal number of replicas which need to be alive for Updater to attempt
+                              pod eviction (pending other checks like PDB). Only positive values are
+                              allowed. Overrides global '--min-replicas' flag.
+                            format: int32
+                            type: integer
+                          updateMode:
+                            description: |-
+                              Controls when autoscaler applies changes to the pod resources.
+                              The default is 'Auto'.
+                            enum:
+                            - "Off"
+                            - Initial
+                            - Recreate
+                            - InPlaceOrRecreate
+                            - Auto
+                            type: string
+                        type: object
+                    type: object
                   webhook:
                     description: Webhook defines configuration for the admission service
                     properties:
@@ -2265,6 +2397,138 @@ spec:
                     description: VolumeBindingTimeoutSeconds specifies the timeout
                       for volume binding in seconds
                     type: integer
+                  vpa:
+                    description: VPA specifies Vertical Pod Autoscaler configuration
+                      for the binder
+                    properties:
+                      enabled:
+                        description: Enabled specifies if VPA should be enabled
+                        type: boolean
+                      resourcePolicy:
+                        description: ResourcePolicy controls how VPA computes recommended
+                          resources for containers
+                        properties:
+                          containerPolicies:
+                            description: Per-container resource policies.
+                            items:
+                              description: |-
+                                ContainerResourcePolicy controls how autoscaler computes the recommended
+                                resources for a specific container.
+                              properties:
+                                containerName:
+                                  description: |-
+                                    Name of the container or DefaultContainerResourcePolicy, in which
+                                    case the policy is used by the containers that don't have their own
+                                    policy specified.
+                                  type: string
+                                controlledResources:
+                                  description: |-
+                                    Specifies the type of recommendations that will be computed
+                                    (and possibly applied) by VPA.
+                                    If not specified, the default of [ResourceCPU, ResourceMemory] will be used.
+                                  items:
+                                    description: ResourceName is the name identifying
+                                      various resources in a ResourceList.
+                                    type: string
+                                  type: array
+                                controlledValues:
+                                  description: |-
+                                    Specifies which resource values should be controlled.
+                                    The default is "RequestsAndLimits".
+                                  enum:
+                                  - RequestsAndLimits
+                                  - RequestsOnly
+                                  type: string
+                                maxAllowed:
+                                  additionalProperties:
+                                    anyOf:
+                                    - type: integer
+                                    - type: string
+                                    pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                                    x-kubernetes-int-or-string: true
+                                  description: |-
+                                    Specifies the maximum amount of resources that will be recommended
+                                    for the container. The default is no maximum.
+                                  type: object
+                                minAllowed:
+                                  additionalProperties:
+                                    anyOf:
+                                    - type: integer
+                                    - type: string
+                                    pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                                    x-kubernetes-int-or-string: true
+                                  description: |-
+                                    Specifies the minimal amount of resources that will be recommended
+                                    for the container. The default is no minimum.
+                                  type: object
+                                mode:
+                                  description: Whether autoscaler is enabled for the
+                                    container. The default is "Auto".
+                                  enum:
+                                  - Auto
+                                  - "Off"
+                                  type: string
+                              type: object
+                            type: array
+                        type: object
+                      updatePolicy:
+                        description: UpdatePolicy controls when and how VPA applies
+                          changes to pod resources
+                        properties:
+                          evictionRequirements:
+                            description: |-
+                              EvictionRequirements is a list of EvictionRequirements that need to
+                              evaluate to true in order for a Pod to be evicted. If more than one
+                              EvictionRequirement is specified, all of them need to be fulfilled to allow eviction.
+                            items:
+                              description: |-
+                                EvictionRequirement defines a single condition which needs to be true in
+                                order to evict a Pod
+                              properties:
+                                changeRequirement:
+                                  description: EvictionChangeRequirement refers to
+                                    the relationship between the new target recommendation
+                                    for a Pod and its current requests, what kind
+                                    of change is necessary for the Pod to be evicted
+                                  enum:
+                                  - TargetHigherThanRequests
+                                  - TargetLowerThanRequests
+                                  type: string
+                                resources:
+                                  description: |-
+                                    Resources is a list of one or more resources that the condition applies
+                                    to. If more than one resource is given, the EvictionRequirement is fulfilled
+                                    if at least one resource meets `changeRequirement`.
+                                  items:
+                                    description: ResourceName is the name identifying
+                                      various resources in a ResourceList.
+                                    type: string
+                                  type: array
+                              required:
+                              - changeRequirement
+                              - resources
+                              type: object
+                            type: array
+                          minReplicas:
+                            description: |-
+                              Minimal number of replicas which need to be alive for Updater to attempt
+                              pod eviction (pending other checks like PDB). Only positive values are
+                              allowed. Overrides global '--min-replicas' flag.
+                            format: int32
+                            type: integer
+                          updateMode:
+                            description: |-
+                              Controls when autoscaler applies changes to the pod resources.
+                              The default is 'Auto'.
+                            enum:
+                            - "Off"
+                            - Initial
+                            - Recreate
+                            - InPlaceOrRecreate
+                            - Auto
+                            type: string
+                        type: object
+                    type: object
                 type: object
               global:
                 description: Global defined global configuration of the system
@@ -3512,6 +3776,138 @@ spec:
                           type: string
                       type: object
                     type: array
+                  vpa:
+                    description: VPA defines the default Vertical Pod Autoscaler configuration
+                      for all services
+                    properties:
+                      enabled:
+                        description: Enabled specifies if VPA should be enabled
+                        type: boolean
+                      resourcePolicy:
+                        description: ResourcePolicy controls how VPA computes recommended
+                          resources for containers
+                        properties:
+                          containerPolicies:
+                            description: Per-container resource policies.
+                            items:
+                              description: |-
+                                ContainerResourcePolicy controls how autoscaler computes the recommended
+                                resources for a specific container.
+                              properties:
+                                containerName:
+                                  description: |-
+                                    Name of the container or DefaultContainerResourcePolicy, in which
+                                    case the policy is used by the containers that don't have their own
+                                    policy specified.
+                                  type: string
+                                controlledResources:
+                                  description: |-
+                                    Specifies the type of recommendations that will be computed
+                                    (and possibly applied) by VPA.
+                                    If not specified, the default of [ResourceCPU, ResourceMemory] will be used.
+                                  items:
+                                    description: ResourceName is the name identifying
+                                      various resources in a ResourceList.
+                                    type: string
+                                  type: array
+                                controlledValues:
+                                  description: |-
+                                    Specifies which resource values should be controlled.
+                                    The default is "RequestsAndLimits".
+                                  enum:
+                                  - RequestsAndLimits
+                                  - RequestsOnly
+                                  type: string
+                                maxAllowed:
+                                  additionalProperties:
+                                    anyOf:
+                                    - type: integer
+                                    - type: string
+                                    pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                                    x-kubernetes-int-or-string: true
+                                  description: |-
+                                    Specifies the maximum amount of resources that will be recommended
+                                    for the container. The default is no maximum.
+                                  type: object
+                                minAllowed:
+                                  additionalProperties:
+                                    anyOf:
+                                    - type: integer
+                                    - type: string
+                                    pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                                    x-kubernetes-int-or-string: true
+                                  description: |-
+                                    Specifies the minimal amount of resources that will be recommended
+                                    for the container. The default is no minimum.
+                                  type: object
+                                mode:
+                                  description: Whether autoscaler is enabled for the
+                                    container. The default is "Auto".
+                                  enum:
+                                  - Auto
+                                  - "Off"
+                                  type: string
+                              type: object
+                            type: array
+                        type: object
+                      updatePolicy:
+                        description: UpdatePolicy controls when and how VPA applies
+                          changes to pod resources
+                        properties:
+                          evictionRequirements:
+                            description: |-
+                              EvictionRequirements is a list of EvictionRequirements that need to
+                              evaluate to true in order for a Pod to be evicted. If more than one
+                              EvictionRequirement is specified, all of them need to be fulfilled to allow eviction.
+                            items:
+                              description: |-
+                                EvictionRequirement defines a single condition which needs to be true in
+                                order to evict a Pod
+                              properties:
+                                changeRequirement:
+                                  description: EvictionChangeRequirement refers to
+                                    the relationship between the new target recommendation
+                                    for a Pod and its current requests, what kind
+                                    of change is necessary for the Pod to be evicted
+                                  enum:
+                                  - TargetHigherThanRequests
+                                  - TargetLowerThanRequests
+                                  type: string
+                                resources:
+                                  description: |-
+                                    Resources is a list of one or more resources that the condition applies
+                                    to. If more than one resource is given, the EvictionRequirement is fulfilled
+                                    if at least one resource meets `changeRequirement`.
+                                  items:
+                                    description: ResourceName is the name identifying
+                                      various resources in a ResourceList.
+                                    type: string
+                                  type: array
+                              required:
+                              - changeRequirement
+                              - resources
+                              type: object
+                            type: array
+                          minReplicas:
+                            description: |-
+                              Minimal number of replicas which need to be alive for Updater to attempt
+                              pod eviction (pending other checks like PDB). Only positive values are
+                              allowed. Overrides global '--min-replicas' flag.
+                            format: int32
+                            type: integer
+                          updateMode:
+                            description: |-
+                              Controls when autoscaler applies changes to the pod resources.
+                              The default is 'Auto'.
+                            enum:
+                            - "Off"
+                            - Initial
+                            - Recreate
+                            - InPlaceOrRecreate
+                            - Auto
+                            type: string
+                        type: object
+                    type: object
                 type: object
               namespace:
                 description: Namespace is the namespace to create the operands in
@@ -4573,6 +4969,138 @@ spec:
                             type: object
                         type: object
                     type: object
+                  vpa:
+                    description: VPA specifies Vertical Pod Autoscaler configuration
+                      for the node-scale-adjuster
+                    properties:
+                      enabled:
+                        description: Enabled specifies if VPA should be enabled
+                        type: boolean
+                      resourcePolicy:
+                        description: ResourcePolicy controls how VPA computes recommended
+                          resources for containers
+                        properties:
+                          containerPolicies:
+                            description: Per-container resource policies.
+                            items:
+                              description: |-
+                                ContainerResourcePolicy controls how autoscaler computes the recommended
+                                resources for a specific container.
+                              properties:
+                                containerName:
+                                  description: |-
+                                    Name of the container or DefaultContainerResourcePolicy, in which
+                                    case the policy is used by the containers that don't have their own
+                                    policy specified.
+                                  type: string
+                                controlledResources:
+                                  description: |-
+                                    Specifies the type of recommendations that will be computed
+                                    (and possibly applied) by VPA.
+                                    If not specified, the default of [ResourceCPU, ResourceMemory] will be used.
+                                  items:
+                                    description: ResourceName is the name identifying
+                                      various resources in a ResourceList.
+                                    type: string
+                                  type: array
+                                controlledValues:
+                                  description: |-
+                                    Specifies which resource values should be controlled.
+                                    The default is "RequestsAndLimits".
+                                  enum:
+                                  - RequestsAndLimits
+                                  - RequestsOnly
+                                  type: string
+                                maxAllowed:
+                                  additionalProperties:
+                                    anyOf:
+                                    - type: integer
+                                    - type: string
+                                    pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                                    x-kubernetes-int-or-string: true
+                                  description: |-
+                                    Specifies the maximum amount of resources that will be recommended
+                                    for the container. The default is no maximum.
+                                  type: object
+                                minAllowed:
+                                  additionalProperties:
+                                    anyOf:
+                                    - type: integer
+                                    - type: string
+                                    pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                                    x-kubernetes-int-or-string: true
+                                  description: |-
+                                    Specifies the minimal amount of resources that will be recommended
+                                    for the container. The default is no minimum.
+                                  type: object
+                                mode:
+                                  description: Whether autoscaler is enabled for the
+                                    container. The default is "Auto".
+                                  enum:
+                                  - Auto
+                                  - "Off"
+                                  type: string
+                              type: object
+                            type: array
+                        type: object
+                      updatePolicy:
+                        description: UpdatePolicy controls when and how VPA applies
+                          changes to pod resources
+                        properties:
+                          evictionRequirements:
+                            description: |-
+                              EvictionRequirements is a list of EvictionRequirements that need to
+                              evaluate to true in order for a Pod to be evicted. If more than one
+                              EvictionRequirement is specified, all of them need to be fulfilled to allow eviction.
+                            items:
+                              description: |-
+                                EvictionRequirement defines a single condition which needs to be true in
+                                order to evict a Pod
+                              properties:
+                                changeRequirement:
+                                  description: EvictionChangeRequirement refers to
+                                    the relationship between the new target recommendation
+                                    for a Pod and its current requests, what kind
+                                    of change is necessary for the Pod to be evicted
+                                  enum:
+                                  - TargetHigherThanRequests
+                                  - TargetLowerThanRequests
+                                  type: string
+                                resources:
+                                  description: |-
+                                    Resources is a list of one or more resources that the condition applies
+                                    to. If more than one resource is given, the EvictionRequirement is fulfilled
+                                    if at least one resource meets `changeRequirement`.
+                                  items:
+                                    description: ResourceName is the name identifying
+                                      various resources in a ResourceList.
+                                    type: string
+                                  type: array
+                              required:
+                              - changeRequirement
+                              - resources
+                              type: object
+                            type: array
+                          minReplicas:
+                            description: |-
+                              Minimal number of replicas which need to be alive for Updater to attempt
+                              pod eviction (pending other checks like PDB). Only positive values are
+                              allowed. Overrides global '--min-replicas' flag.
+                            format: int32
+                            type: integer
+                          updateMode:
+                            description: |-
+                              Controls when autoscaler applies changes to the pod resources.
+                              The default is 'Auto'.
+                            enum:
+                            - "Off"
+                            - Initial
+                            - Recreate
+                            - InPlaceOrRecreate
+                            - Auto
+                            type: string
+                        type: object
+                    type: object
                 type: object
               podGroupController:
                 description: PodGroupController specifies configuration for the pod-group-controller
@@ -5623,6 +6151,138 @@ spec:
                             type: object
                         type: object
                     type: object
+                  vpa:
+                    description: VPA specifies Vertical Pod Autoscaler configuration
+                      for the pod group controller
+                    properties:
+                      enabled:
+                        description: Enabled specifies if VPA should be enabled
+                        type: boolean
+                      resourcePolicy:
+                        description: ResourcePolicy controls how VPA computes recommended
+                          resources for containers
+                        properties:
+                          containerPolicies:
+                            description: Per-container resource policies.
+                            items:
+                              description: |-
+                                ContainerResourcePolicy controls how autoscaler computes the recommended
+                                resources for a specific container.
+                              properties:
+                                containerName:
+                                  description: |-
+                                    Name of the container or DefaultContainerResourcePolicy, in which
+                                    case the policy is used by the containers that don't have their own
+                                    policy specified.
+                                  type: string
+                                controlledResources:
+                                  description: |-
+                                    Specifies the type of recommendations that will be computed
+                                    (and possibly applied) by VPA.
+                                    If not specified, the default of [ResourceCPU, ResourceMemory] will be used.
+                                  items:
+                                    description: ResourceName is the name identifying
+                                      various resources in a ResourceList.
+                                    type: string
+                                  type: array
+                                controlledValues:
+                                  description: |-
+                                    Specifies which resource values should be controlled.
+                                    The default is "RequestsAndLimits".
+                                  enum:
+                                  - RequestsAndLimits
+                                  - RequestsOnly
+                                  type: string
+                                maxAllowed:
+                                  additionalProperties:
+                                    anyOf:
+                                    - type: integer
+                                    - type: string
+                                    pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                                    x-kubernetes-int-or-string: true
+                                  description: |-
+                                    Specifies the maximum amount of resources that will be recommended
+                                    for the container. The default is no maximum.
+                                  type: object
+                                minAllowed:
+                                  additionalProperties:
+                                    anyOf:
+                                    - type: integer
+                                    - type: string
+                                    pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                                    x-kubernetes-int-or-string: true
+                                  description: |-
+                                    Specifies the minimal amount of resources that will be recommended
+                                    for the container. The default is no minimum.
+                                  type: object
+                                mode:
+                                  description: Whether autoscaler is enabled for the
+                                    container. The default is "Auto".
+                                  enum:
+                                  - Auto
+                                  - "Off"
+                                  type: string
+                              type: object
+                            type: array
+                        type: object
+                      updatePolicy:
+                        description: UpdatePolicy controls when and how VPA applies
+                          changes to pod resources
+                        properties:
+                          evictionRequirements:
+                            description: |-
+                              EvictionRequirements is a list of EvictionRequirements that need to
+                              evaluate to true in order for a Pod to be evicted. If more than one
+                              EvictionRequirement is specified, all of them need to be fulfilled to allow eviction.
+                            items:
+                              description: |-
+                                EvictionRequirement defines a single condition which needs to be true in
+                                order to evict a Pod
+                              properties:
+                                changeRequirement:
+                                  description: EvictionChangeRequirement refers to
+                                    the relationship between the new target recommendation
+                                    for a Pod and its current requests, what kind
+                                    of change is necessary for the Pod to be evicted
+                                  enum:
+                                  - TargetHigherThanRequests
+                                  - TargetLowerThanRequests
+                                  type: string
+                                resources:
+                                  description: |-
+                                    Resources is a list of one or more resources that the condition applies
+                                    to. If more than one resource is given, the EvictionRequirement is fulfilled
+                                    if at least one resource meets `changeRequirement`.
+                                  items:
+                                    description: ResourceName is the name identifying
+                                      various resources in a ResourceList.
+                                    type: string
+                                  type: array
+                              required:
+                              - changeRequirement
+                              - resources
+                              type: object
+                            type: array
+                          minReplicas:
+                            description: |-
+                              Minimal number of replicas which need to be alive for Updater to attempt
+                              pod eviction (pending other checks like PDB). Only positive values are
+                              allowed. Overrides global '--min-replicas' flag.
+                            format: int32
+                            type: integer
+                          updateMode:
+                            description: |-
+                              Controls when autoscaler applies changes to the pod resources.
+                              The default is 'Auto'.
+                            enum:
+                            - "Off"
+                            - Initial
+                            - Recreate
+                            - InPlaceOrRecreate
+                            - Auto
+                            type: string
+                        type: object
+                    type: object
                   webhooks:
                     description: Webhooks describes the configuration of the podgroup
                       controller webhooks
@@ -6697,6 +7357,138 @@ spec:
                             type: object
                         type: object
                     type: object
+                  vpa:
+                    description: VPA specifies Vertical Pod Autoscaler configuration
+                      for the pod-grouper
+                    properties:
+                      enabled:
+                        description: Enabled specifies if VPA should be enabled
+                        type: boolean
+                      resourcePolicy:
+                        description: ResourcePolicy controls how VPA computes recommended
+                          resources for containers
+                        properties:
+                          containerPolicies:
+                            description: Per-container resource policies.
+                            items:
+                              description: |-
+                                ContainerResourcePolicy controls how autoscaler computes the recommended
+                                resources for a specific container.
+                              properties:
+                                containerName:
+                                  description: |-
+                                    Name of the container or DefaultContainerResourcePolicy, in which
+                                    case the policy is used by the containers that don't have their own
+                                    policy specified.
+                                  type: string
+                                controlledResources:
+                                  description: |-
+                                    Specifies the type of recommendations that will be computed
+                                    (and possibly applied) by VPA.
+                                    If not specified, the default of [ResourceCPU, ResourceMemory] will be used.
+                                  items:
+                                    description: ResourceName is the name identifying
+                                      various resources in a ResourceList.
+                                    type: string
+                                  type: array
+                                controlledValues:
+                                  description: |-
+                                    Specifies which resource values should be controlled.
+                                    The default is "RequestsAndLimits".
+                                  enum:
+                                  - RequestsAndLimits
+                                  - RequestsOnly
+                                  type: string
+                                maxAllowed:
+                                  additionalProperties:
+                                    anyOf:
+                                    - type: integer
+                                    - type: string
+                                    pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                                    x-kubernetes-int-or-string: true
+                                  description: |-
+                                    Specifies the maximum amount of resources that will be recommended
+                                    for the container. The default is no maximum.
+                                  type: object
+                                minAllowed:
+                                  additionalProperties:
+                                    anyOf:
+                                    - type: integer
+                                    - type: string
+                                    pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                                    x-kubernetes-int-or-string: true
+                                  description: |-
+                                    Specifies the minimal amount of resources that will be recommended
+                                    for the container. The default is no minimum.
+                                  type: object
+                                mode:
+                                  description: Whether autoscaler is enabled for the
+                                    container. The default is "Auto".
+                                  enum:
+                                  - Auto
+                                  - "Off"
+                                  type: string
+                              type: object
+                            type: array
+                        type: object
+                      updatePolicy:
+                        description: UpdatePolicy controls when and how VPA applies
+                          changes to pod resources
+                        properties:
+                          evictionRequirements:
+                            description: |-
+                              EvictionRequirements is a list of EvictionRequirements that need to
+                              evaluate to true in order for a Pod to be evicted. If more than one
+                              EvictionRequirement is specified, all of them need to be fulfilled to allow eviction.
+                            items:
+                              description: |-
+                                EvictionRequirement defines a single condition which needs to be true in
+                                order to evict a Pod
+                              properties:
+                                changeRequirement:
+                                  description: EvictionChangeRequirement refers to
+                                    the relationship between the new target recommendation
+                                    for a Pod and its current requests, what kind
+                                    of change is necessary for the Pod to be evicted
+                                  enum:
+                                  - TargetHigherThanRequests
+                                  - TargetLowerThanRequests
+                                  type: string
+                                resources:
+                                  description: |-
+                                    Resources is a list of one or more resources that the condition applies
+                                    to. If more than one resource is given, the EvictionRequirement is fulfilled
+                                    if at least one resource meets `changeRequirement`.
+                                  items:
+                                    description: ResourceName is the name identifying
+                                      various resources in a ResourceList.
+                                    type: string
+                                  type: array
+                              required:
+                              - changeRequirement
+                              - resources
+                              type: object
+                            type: array
+                          minReplicas:
+                            description: |-
+                              Minimal number of replicas which need to be alive for Updater to attempt
+                              pod eviction (pending other checks like PDB). Only positive values are
+                              allowed. Overrides global '--min-replicas' flag.
+                            format: int32
+                            type: integer
+                          updateMode:
+                            description: |-
+                              Controls when autoscaler applies changes to the pod resources.
+                              The default is 'Auto'.
+                            enum:
+                            - "Off"
+                            - Initial
+                            - Recreate
+                            - InPlaceOrRecreate
+                            - Auto
+                            type: string
+                        type: object
+                    type: object
                 type: object
               prometheus:
                 description: Prometheus specifies configuration for Prometheus monitoring
@@ -7855,6 +8647,138 @@ spec:
                             type: object
                         type: object
                     type: object
+                  vpa:
+                    description: VPA specifies Vertical Pod Autoscaler configuration
+                      for the queue controller
+                    properties:
+                      enabled:
+                        description: Enabled specifies if VPA should be enabled
+                        type: boolean
+                      resourcePolicy:
+                        description: ResourcePolicy controls how VPA computes recommended
+                          resources for containers
+                        properties:
+                          containerPolicies:
+                            description: Per-container resource policies.
+                            items:
+                              description: |-
+                                ContainerResourcePolicy controls how autoscaler computes the recommended
+                                resources for a specific container.
+                              properties:
+                                containerName:
+                                  description: |-
+                                    Name of the container or DefaultContainerResourcePolicy, in which
+                                    case the policy is used by the containers that don't have their own
+                                    policy specified.
+                                  type: string
+                                controlledResources:
+                                  description: |-
+                                    Specifies the type of recommendations that will be computed
+                                    (and possibly applied) by VPA.
+                                    If not specified, the default of [ResourceCPU, ResourceMemory] will be used.
+                                  items:
+                                    description: ResourceName is the name identifying
+                                      various resources in a ResourceList.
+                                    type: string
+                                  type: array
+                                controlledValues:
+                                  description: |-
+                                    Specifies which resource values should be controlled.
+                                    The default is "RequestsAndLimits".
+                                  enum:
+                                  - RequestsAndLimits
+                                  - RequestsOnly
+                                  type: string
+                                maxAllowed:
+                                  additionalProperties:
+                                    anyOf:
+                                    - type: integer
+                                    - type: string
+                                    pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                                    x-kubernetes-int-or-string: true
+                                  description: |-
+                                    Specifies the maximum amount of resources that will be recommended
+                                    for the container. The default is no maximum.
+                                  type: object
+                                minAllowed:
+                                  additionalProperties:
+                                    anyOf:
+                                    - type: integer
+                                    - type: string
+                                    pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                                    x-kubernetes-int-or-string: true
+                                  description: |-
+                                    Specifies the minimal amount of resources that will be recommended
+                                    for the container. The default is no minimum.
+                                  type: object
+                                mode:
+                                  description: Whether autoscaler is enabled for the
+                                    container. The default is "Auto".
+                                  enum:
+                                  - Auto
+                                  - "Off"
+                                  type: string
+                              type: object
+                            type: array
+                        type: object
+                      updatePolicy:
+                        description: UpdatePolicy controls when and how VPA applies
+                          changes to pod resources
+                        properties:
+                          evictionRequirements:
+                            description: |-
+                              EvictionRequirements is a list of EvictionRequirements that need to
+                              evaluate to true in order for a Pod to be evicted. If more than one
+                              EvictionRequirement is specified, all of them need to be fulfilled to allow eviction.
+                            items:
+                              description: |-
+                                EvictionRequirement defines a single condition which needs to be true in
+                                order to evict a Pod
+                              properties:
+                                changeRequirement:
+                                  description: EvictionChangeRequirement refers to
+                                    the relationship between the new target recommendation
+                                    for a Pod and its current requests, what kind
+                                    of change is necessary for the Pod to be evicted
+                                  enum:
+                                  - TargetHigherThanRequests
+                                  - TargetLowerThanRequests
+                                  type: string
+                                resources:
+                                  description: |-
+                                    Resources is a list of one or more resources that the condition applies
+                                    to. If more than one resource is given, the EvictionRequirement is fulfilled
+                                    if at least one resource meets `changeRequirement`.
+                                  items:
+                                    description: ResourceName is the name identifying
+                                      various resources in a ResourceList.
+                                    type: string
+                                  type: array
+                              required:
+                              - changeRequirement
+                              - resources
+                              type: object
+                            type: array
+                          minReplicas:
+                            description: |-
+                              Minimal number of replicas which need to be alive for Updater to attempt
+                              pod eviction (pending other checks like PDB). Only positive values are
+                              allowed. Overrides global '--min-replicas' flag.
+                            format: int32
+                            type: integer
+                          updateMode:
+                            description: |-
+                              Controls when autoscaler applies changes to the pod resources.
+                              The default is 'Auto'.
+                            enum:
+                            - "Off"
+                            - Initial
+                            - Recreate
+                            - InPlaceOrRecreate
+                            - Auto
+                            type: string
+                        type: object
+                    type: object
                   webhooks:
                     description: Webhooks describes the configuration of the queue
                       controller webhooks
@@ -8915,6 +9839,138 @@ spec:
                             type: object
                         type: object
                     type: object
+                  vpa:
+                    description: VPA specifies Vertical Pod Autoscaler configuration
+                      for the scheduler
+                    properties:
+                      enabled:
+                        description: Enabled specifies if VPA should be enabled
+                        type: boolean
+                      resourcePolicy:
+                        description: ResourcePolicy controls how VPA computes recommended
+                          resources for containers
+                        properties:
+                          containerPolicies:
+                            description: Per-container resource policies.
+                            items:
+                              description: |-
+                                ContainerResourcePolicy controls how autoscaler computes the recommended
+                                resources for a specific container.
+                              properties:
+                                containerName:
+                                  description: |-
+                                    Name of the container or DefaultContainerResourcePolicy, in which
+                                    case the policy is used by the containers that don't have their own
+                                    policy specified.
+                                  type: string
+                                controlledResources:
+                                  description: |-
+                                    Specifies the type of recommendations that will be computed
+                                    (and possibly applied) by VPA.
+                                    If not specified, the default of [ResourceCPU, ResourceMemory] will be used.
+                                  items:
+                                    description: ResourceName is the name identifying
+                                      various resources in a ResourceList.
+                                    type: string
+                                  type: array
+                                controlledValues:
+                                  description: |-
+                                    Specifies which resource values should be controlled.
+                                    The default is "RequestsAndLimits".
+                                  enum:
+                                  - RequestsAndLimits
+                                  - RequestsOnly
+                                  type: string
+                                maxAllowed:
+                                  additionalProperties:
+                                    anyOf:
+                                    - type: integer
+                                    - type: string
+                                    pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                                    x-kubernetes-int-or-string: true
+                                  description: |-
+                                    Specifies the maximum amount of resources that will be recommended
+                                    for the container. The default is no maximum.
+                                  type: object
+                                minAllowed:
+                                  additionalProperties:
+                                    anyOf:
+                                    - type: integer
+                                    - type: string
+                                    pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                                    x-kubernetes-int-or-string: true
+                                  description: |-
+                                    Specifies the minimal amount of resources that will be recommended
+                                    for the container. The default is no minimum.
+                                  type: object
+                                mode:
+                                  description: Whether autoscaler is enabled for the
+                                    container. The default is "Auto".
+                                  enum:
+                                  - Auto
+                                  - "Off"
+                                  type: string
+                              type: object
+                            type: array
+                        type: object
+                      updatePolicy:
+                        description: UpdatePolicy controls when and how VPA applies
+                          changes to pod resources
+                        properties:
+                          evictionRequirements:
+                            description: |-
+                              EvictionRequirements is a list of EvictionRequirements that need to
+                              evaluate to true in order for a Pod to be evicted. If more than one
+                              EvictionRequirement is specified, all of them need to be fulfilled to allow eviction.
+                            items:
+                              description: |-
+                                EvictionRequirement defines a single condition which needs to be true in
+                                order to evict a Pod
+                              properties:
+                                changeRequirement:
+                                  description: EvictionChangeRequirement refers to
+                                    the relationship between the new target recommendation
+                                    for a Pod and its current requests, what kind
+                                    of change is necessary for the Pod to be evicted
+                                  enum:
+                                  - TargetHigherThanRequests
+                                  - TargetLowerThanRequests
+                                  type: string
+                                resources:
+                                  description: |-
+                                    Resources is a list of one or more resources that the condition applies
+                                    to. If more than one resource is given, the EvictionRequirement is fulfilled
+                                    if at least one resource meets `changeRequirement`.
+                                  items:
+                                    description: ResourceName is the name identifying
+                                      various resources in a ResourceList.
+                                    type: string
+                                  type: array
+                              required:
+                              - changeRequirement
+                              - resources
+                              type: object
+                            type: array
+                          minReplicas:
+                            description: |-
+                              Minimal number of replicas which need to be alive for Updater to attempt
+                              pod eviction (pending other checks like PDB). Only positive values are
+                              allowed. Overrides global '--min-replicas' flag.
+                            format: int32
+                            type: integer
+                          updateMode:
+                            description: |-
+                              Controls when autoscaler applies changes to the pod resources.
+                              The default is 'Auto'.
+                            enum:
+                            - "Off"
+                            - Initial
+                            - Recreate
+                            - InPlaceOrRecreate
+                            - Auto
+                            type: string
+                        type: object
+                    type: object
                 type: object
             type: object
           status:
diff --git a/deployments/kai-scheduler/crds/kai.scheduler_schedulingshards.yaml b/deployments/kai-scheduler/crds/kai.scheduler_schedulingshards.yaml
index d034b7cef..130ccad3d 100644
--- a/deployments/kai-scheduler/crds/kai.scheduler_schedulingshards.yaml
+++ b/deployments/kai-scheduler/crds/kai.scheduler_schedulingshards.yaml
@@ -9,7 +9,7 @@ apiVersion: apiextensions.k8s.io/v1
 kind: CustomResourceDefinition
 metadata:
   annotations:
-    controller-gen.kubebuilder.io/version: v0.16.1
+    controller-gen.kubebuilder.io/version: v0.17.3
   name: schedulingshards.kai.scheduler
 spec:
   group: kai.scheduler
diff --git a/deployments/kai-scheduler/crds/kai.scheduler_topologies.yaml b/deployments/kai-scheduler/crds/kai.scheduler_topologies.yaml
index 993ea19eb..8d46a3d6b 100644
--- a/deployments/kai-scheduler/crds/kai.scheduler_topologies.yaml
+++ b/deployments/kai-scheduler/crds/kai.scheduler_topologies.yaml
@@ -1,18 +1,3 @@
-# Copyright The Kubernetes Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
 # Copyright 2025 NVIDIA CORPORATION
 # SPDX-License-Identifier: Apache-2.0
 #
@@ -24,7 +9,7 @@ apiVersion: apiextensions.k8s.io/v1
 kind: CustomResourceDefinition
 metadata:
   annotations:
-    controller-gen.kubebuilder.io/version: v0.16.1
+    controller-gen.kubebuilder.io/version: v0.17.3
   name: topologies.kai.scheduler
 spec:
   group: kai.scheduler
diff --git a/deployments/kai-scheduler/crds/scheduling.run.ai_bindrequests.yaml b/deployments/kai-scheduler/crds/scheduling.run.ai_bindrequests.yaml
index 53be0a220..91e301296 100644
--- a/deployments/kai-scheduler/crds/scheduling.run.ai_bindrequests.yaml
+++ b/deployments/kai-scheduler/crds/scheduling.run.ai_bindrequests.yaml
@@ -9,7 +9,7 @@ apiVersion: apiextensions.k8s.io/v1
 kind: CustomResourceDefinition
 metadata:
   annotations:
-    controller-gen.kubebuilder.io/version: v0.16.1
+    controller-gen.kubebuilder.io/version: v0.17.3
   name: bindrequests.scheduling.run.ai
 spec:
   group: scheduling.run.ai
diff --git a/deployments/kai-scheduler/crds/scheduling.run.ai_podgroups.yaml b/deployments/kai-scheduler/crds/scheduling.run.ai_podgroups.yaml
index 58bbaacef..81df93a3d 100644
--- a/deployments/kai-scheduler/crds/scheduling.run.ai_podgroups.yaml
+++ b/deployments/kai-scheduler/crds/scheduling.run.ai_podgroups.yaml
@@ -9,7 +9,7 @@ apiVersion: apiextensions.k8s.io/v1
 kind: CustomResourceDefinition
 metadata:
   annotations:
-    controller-gen.kubebuilder.io/version: v0.16.1
+    controller-gen.kubebuilder.io/version: v0.17.3
   name: podgroups.scheduling.run.ai
 spec:
   group: scheduling.run.ai
diff --git a/deployments/kai-scheduler/crds/scheduling.run.ai_queues.yaml b/deployments/kai-scheduler/crds/scheduling.run.ai_queues.yaml
index 2b74d682b..18ac93868 100644
--- a/deployments/kai-scheduler/crds/scheduling.run.ai_queues.yaml
+++ b/deployments/kai-scheduler/crds/scheduling.run.ai_queues.yaml
@@ -9,7 +9,7 @@ apiVersion: apiextensions.k8s.io/v1
 kind: CustomResourceDefinition
 metadata:
   annotations:
-    controller-gen.kubebuilder.io/version: v0.16.1
+    controller-gen.kubebuilder.io/version: v0.17.3
   name: queues.scheduling.run.ai
 spec:
   group: scheduling.run.ai

From df799db3b01648db5c78cdd496664992d78456e9 Mon Sep 17 00:00:00 2001
From: Jonathan Rosenboim <jrosenboim@nvidia.com>
Date: Mon, 2 Mar 2026 16:08:50 +0200
Subject: [PATCH 11/25] Add VPA Field Inherit to allow detection of changes in
 existing VPA objects

---
 pkg/operator/controller/config_controller.go  |  3 +++
 .../controller/schedulingshard_controller.go  |  3 +++
 .../known_types/verticalpodautoscalers.go     | 25 +++++++++++++++++++
 3 files changed, 31 insertions(+)

diff --git a/pkg/operator/controller/config_controller.go b/pkg/operator/controller/config_controller.go
index ab0ac22bb..d0319320c 100644
--- a/pkg/operator/controller/config_controller.go
+++ b/pkg/operator/controller/config_controller.go
@@ -23,6 +23,7 @@ import (
 	admissionv1 "k8s.io/api/admissionregistration/v1"
 	"k8s.io/apimachinery/pkg/runtime"
 	"k8s.io/apimachinery/pkg/types"
+	vpav1 "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1"
 	ctrl "sigs.k8s.io/controller-runtime"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	"sigs.k8s.io/controller-runtime/pkg/handler"
@@ -148,6 +149,8 @@ func (r *ConfigReconciler) SetupWithManager(mgr ctrl.Manager) error {
 		known_types.ValidatingWebhookConfigurationFieldInherit)
 	r.deployable.RegisterFieldsInheritFromClusterObjects(&admissionv1.MutatingWebhookConfiguration{},
 		known_types.MutatingWebhookConfigurationFieldInherit)
+	r.deployable.RegisterFieldsInheritFromClusterObjects(&vpav1.VerticalPodAutoscaler{},
+		known_types.VPAFieldInherit)
 	r.StatusReconciler = status_reconciler.New(r.Client, r.deployable)
 
 	builder := ctrl.NewControllerManagedBy(mgr).
diff --git a/pkg/operator/controller/schedulingshard_controller.go b/pkg/operator/controller/schedulingshard_controller.go
index 59b9ddec3..e18dd02fb 100644
--- a/pkg/operator/controller/schedulingshard_controller.go
+++ b/pkg/operator/controller/schedulingshard_controller.go
@@ -23,6 +23,7 @@ import (
 	"golang.org/x/exp/slices"
 	admissionv1 "k8s.io/api/admissionregistration/v1"
 	"k8s.io/apimachinery/pkg/runtime"
+	vpav1 "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1"
 	ctrl "sigs.k8s.io/controller-runtime"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	"sigs.k8s.io/controller-runtime/pkg/handler"
@@ -91,6 +92,8 @@ func (r *SchedulingShardReconciler) Reconcile(ctx context.Context, req ctrl.Requ
 		known_types.ValidatingWebhookConfigurationFieldInherit)
 	r.deployablePerShard[shard.Name].RegisterFieldsInheritFromClusterObjects(&admissionv1.MutatingWebhookConfiguration{},
 		known_types.MutatingWebhookConfigurationFieldInherit)
+	r.deployablePerShard[shard.Name].RegisterFieldsInheritFromClusterObjects(&vpav1.VerticalPodAutoscaler{},
+		known_types.VPAFieldInherit)
 	r.statusReconcilers[shard.Name] = status_reconciler.New(r.Client, r.deployablePerShard[shard.Name])
 
 	deployable := r.deployablePerShard[shard.Name]
diff --git a/pkg/operator/operands/known_types/verticalpodautoscalers.go b/pkg/operator/operands/known_types/verticalpodautoscalers.go
index 3a6363b49..aec5e5c33 100644
--- a/pkg/operator/operands/known_types/verticalpodautoscalers.go
+++ b/pkg/operator/operands/known_types/verticalpodautoscalers.go
@@ -40,6 +40,31 @@ func registerVerticalPodAutoscalers() {
 	SetupSchedulingShardOwned(collectable)
 }
 
+// VPAFieldInherit copies server-managed metadata fields from the current cluster
+// object into the desired object so reflect.DeepEqual won't trigger false updates.
+func VPAFieldInherit(current, desired client.Object) {
+	if current == nil {
+		return
+	}
+	desired.SetResourceVersion(current.GetResourceVersion())
+	desired.SetUID(current.GetUID())
+	desired.SetCreationTimestamp(current.GetCreationTimestamp())
+	desired.SetGeneration(current.GetGeneration())
+	desired.SetOwnerReferences(current.GetOwnerReferences())
+	desired.SetManagedFields(current.GetManagedFields())
+	desired.SetAnnotations(mergeAnnotations(desired.GetAnnotations(), current.GetAnnotations()))
+
+	currentVPA, ok := current.(*vpav1.VerticalPodAutoscaler)
+	if !ok {
+		return
+	}
+	desiredVPA, ok := desired.(*vpav1.VerticalPodAutoscaler)
+	if !ok {
+		return
+	}
+	desiredVPA.Status = currentVPA.Status
+}
+
 func getCurrentVPAState(ctx context.Context, runtimeClient client.Client, reconciler client.Object) (map[string]client.Object, error) {
 	result := map[string]client.Object{}
 	vpas := &vpav1.VerticalPodAutoscalerList{}

From 1c966b5074f734aa32c247ca40493fe66c8cc9b6 Mon Sep 17 00:00:00 2001
From: Jonathan Rosenboim <jrosenboim@nvidia.com>
Date: Mon, 2 Mar 2026 17:13:15 +0200
Subject: [PATCH 12/25] Run make

---
 .../crds/kai.scheduler_topologies.yaml        | 15 +++++++++
 .../templates/rbac/admission.yaml             | 18 +++++------
 .../kai-scheduler/templates/rbac/binder.yaml  | 24 +++++++-------
 .../templates/rbac/nodescaleadjuster.yaml     | 31 ++++++------------
 .../templates/rbac/operator.yaml              | 12 +++++++
 .../templates/rbac/podgroupcontroller.yaml    | 22 ++++++-------
 .../templates/rbac/podgrouper.yaml            | 25 +++++++--------
 .../templates/rbac/queuecontroller.yaml       |  8 ++---
 .../templates/rbac/scheduler.yaml             | 32 ++++++-------------
 9 files changed, 95 insertions(+), 92 deletions(-)

diff --git a/deployments/kai-scheduler/crds/kai.scheduler_topologies.yaml b/deployments/kai-scheduler/crds/kai.scheduler_topologies.yaml
index 8d46a3d6b..364db292c 100644
--- a/deployments/kai-scheduler/crds/kai.scheduler_topologies.yaml
+++ b/deployments/kai-scheduler/crds/kai.scheduler_topologies.yaml
@@ -1,3 +1,18 @@
+# Copyright The Kubernetes Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
 # Copyright 2025 NVIDIA CORPORATION
 # SPDX-License-Identifier: Apache-2.0
 #
diff --git a/deployments/kai-scheduler/templates/rbac/admission.yaml b/deployments/kai-scheduler/templates/rbac/admission.yaml
index 88d0a08b1..72122bba3 100644
--- a/deployments/kai-scheduler/templates/rbac/admission.yaml
+++ b/deployments/kai-scheduler/templates/rbac/admission.yaml
@@ -11,9 +11,9 @@ metadata:
   name: kai-admission
 rules:
 - apiGroups:
-  - coordination.k8s.io
+  - ""
   resources:
-  - leases
+  - configmaps
   verbs:
   - create
   - delete
@@ -25,20 +25,20 @@ rules:
 - apiGroups:
   - ""
   resources:
-  - configmaps
+  - events
   verbs:
   - create
-  - delete
-  - get
-  - list
   - patch
   - update
-  - watch
 - apiGroups:
-  - ""
+  - coordination.k8s.io
   resources:
-  - events
+  - leases
   verbs:
   - create
+  - delete
+  - get
+  - list
   - patch
   - update
+  - watch
diff --git a/deployments/kai-scheduler/templates/rbac/binder.yaml b/deployments/kai-scheduler/templates/rbac/binder.yaml
index 4a4968f82..b4550f970 100644
--- a/deployments/kai-scheduler/templates/rbac/binder.yaml
+++ b/deployments/kai-scheduler/templates/rbac/binder.yaml
@@ -10,18 +10,6 @@ kind: ClusterRole
 metadata:
   name: kai-binder
 rules:
-- apiGroups:
-  - coordination.k8s.io
-  resources:
-  - leases
-  verbs:
-  - create
-  - delete
-  - get
-  - list
-  - patch
-  - update
-  - watch
 - apiGroups:
   - ""
   resources:
@@ -75,6 +63,18 @@ rules:
   - patch
   - update
   - watch
+- apiGroups:
+  - coordination.k8s.io
+  resources:
+  - leases
+  verbs:
+  - create
+  - delete
+  - get
+  - list
+  - patch
+  - update
+  - watch
 - apiGroups:
   - resource.k8s.io
   resources:
diff --git a/deployments/kai-scheduler/templates/rbac/nodescaleadjuster.yaml b/deployments/kai-scheduler/templates/rbac/nodescaleadjuster.yaml
index af2000e3d..515676fe9 100644
--- a/deployments/kai-scheduler/templates/rbac/nodescaleadjuster.yaml
+++ b/deployments/kai-scheduler/templates/rbac/nodescaleadjuster.yaml
@@ -13,6 +13,7 @@ rules:
 - apiGroups:
   - ""
   resources:
+  - configmaps
   - pods
   verbs:
   - create
@@ -23,43 +24,31 @@ rules:
   - update
   - watch
 - apiGroups:
-  - coordination.k8s.io
+  - ""
   resources:
-  - leases
+  - events
+  - pods/finalizers
   verbs:
   - create
-  - delete
-  - get
-  - list
   - patch
   - update
-  - watch
 - apiGroups:
   - ""
   resources:
-  - configmaps
+  - pods/status
   verbs:
-  - create
-  - delete
   - get
-  - list
   - patch
   - update
-  - watch
 - apiGroups:
-  - ""
+  - coordination.k8s.io
   resources:
-  - events
-  - pods/finalizers
+  - leases
   verbs:
   - create
-  - patch
-  - update
-- apiGroups:
-  - ""
-  resources:
-  - pods/status
-  verbs:
+  - delete
   - get
+  - list
   - patch
   - update
+  - watch
diff --git a/deployments/kai-scheduler/templates/rbac/operator.yaml b/deployments/kai-scheduler/templates/rbac/operator.yaml
index 59b0fcb59..95e2ce8c6 100644
--- a/deployments/kai-scheduler/templates/rbac/operator.yaml
+++ b/deployments/kai-scheduler/templates/rbac/operator.yaml
@@ -92,6 +92,18 @@ rules:
   - patch
   - update
   - watch
+- apiGroups:
+  - autoscaling.k8s.io
+  resources:
+  - verticalpodautoscalers
+  verbs:
+  - create
+  - delete
+  - get
+  - list
+  - patch
+  - update
+  - watch
 - apiGroups:
   - coordination.k8s.io
   resources:
diff --git a/deployments/kai-scheduler/templates/rbac/podgroupcontroller.yaml b/deployments/kai-scheduler/templates/rbac/podgroupcontroller.yaml
index 9b091f5a5..4af54ebe2 100644
--- a/deployments/kai-scheduler/templates/rbac/podgroupcontroller.yaml
+++ b/deployments/kai-scheduler/templates/rbac/podgroupcontroller.yaml
@@ -13,29 +13,29 @@ rules:
 - apiGroups:
   - ""
   resources:
-  - nodes
-  - pods
-  - pods/status
+  - configmaps
   verbs:
+  - create
+  - delete
   - get
   - list
+  - patch
+  - update
   - watch
 - apiGroups:
-  - coordination.k8s.io
+  - ""
   resources:
-  - leases
+  - nodes
+  - pods
+  - pods/status
   verbs:
-  - create
-  - delete
   - get
   - list
-  - patch
-  - update
   - watch
 - apiGroups:
-  - ""
+  - coordination.k8s.io
   resources:
-  - configmaps
+  - leases
   verbs:
   - create
   - delete
diff --git a/deployments/kai-scheduler/templates/rbac/podgrouper.yaml b/deployments/kai-scheduler/templates/rbac/podgrouper.yaml
index 1818196d2..f7e9f823f 100644
--- a/deployments/kai-scheduler/templates/rbac/podgrouper.yaml
+++ b/deployments/kai-scheduler/templates/rbac/podgrouper.yaml
@@ -14,10 +14,13 @@ rules:
   - ""
   resources:
   - configmaps
-  - namespaces
   verbs:
+  - create
+  - delete
   - get
   - list
+  - patch
+  - update
   - watch
 - apiGroups:
   - ""
@@ -31,6 +34,14 @@ rules:
   - patch
   - update
   - watch
+- apiGroups:
+  - ""
+  resources:
+  - namespaces
+  verbs:
+  - get
+  - list
+  - watch
 - apiGroups:
   - ""
   resources:
@@ -121,18 +132,6 @@ rules:
   - patch
   - update
   - watch
-- apiGroups:
-  - ""
-  resources:
-  - configmaps
-  verbs:
-  - create
-  - delete
-  - get
-  - list
-  - patch
-  - update
-  - watch
 - apiGroups:
   - egx.nvidia.io
   resources:
diff --git a/deployments/kai-scheduler/templates/rbac/queuecontroller.yaml b/deployments/kai-scheduler/templates/rbac/queuecontroller.yaml
index e7fe08fc6..a911cd9a0 100644
--- a/deployments/kai-scheduler/templates/rbac/queuecontroller.yaml
+++ b/deployments/kai-scheduler/templates/rbac/queuecontroller.yaml
@@ -11,9 +11,9 @@ metadata:
   name: queuecontroller
 rules:
 - apiGroups:
-  - coordination.k8s.io
+  - ""
   resources:
-  - leases
+  - configmaps
   verbs:
   - create
   - delete
@@ -23,9 +23,9 @@ rules:
   - update
   - watch
 - apiGroups:
-  - ""
+  - coordination.k8s.io
   resources:
-  - configmaps
+  - leases
   verbs:
   - create
   - delete
diff --git a/deployments/kai-scheduler/templates/rbac/scheduler.yaml b/deployments/kai-scheduler/templates/rbac/scheduler.yaml
index 67f94054f..43bdbe69b 100644
--- a/deployments/kai-scheduler/templates/rbac/scheduler.yaml
+++ b/deployments/kai-scheduler/templates/rbac/scheduler.yaml
@@ -14,17 +14,6 @@ rules:
   - ""
   resources:
   - configmaps
-  - namespaces
-  - nodes
-  - persistentvolumeclaims
-  - persistentvolumes
-  verbs:
-  - get
-  - list
-  - watch
-- apiGroups:
-  - ""
-  resources:
   - events
   - pods/status
   verbs:
@@ -38,29 +27,29 @@ rules:
 - apiGroups:
   - ""
   resources:
-  - pods
+  - namespaces
+  - nodes
+  - persistentvolumeclaims
+  - persistentvolumes
   verbs:
-  - delete
   - get
   - list
-  - patch
-  - update
   - watch
 - apiGroups:
   - ""
   resources:
-  - pods/finalizers
+  - pods
   verbs:
-  - create
   - delete
   - get
   - list
   - patch
   - update
+  - watch
 - apiGroups:
-  - coordination.k8s.io
+  - ""
   resources:
-  - leases
+  - pods/finalizers
   verbs:
   - create
   - delete
@@ -68,11 +57,10 @@ rules:
   - list
   - patch
   - update
-  - watch
 - apiGroups:
-  - ""
+  - coordination.k8s.io
   resources:
-  - configmaps
+  - leases
   verbs:
   - create
   - delete

From b8b141011295b410a00af3c4a793bb838f10b3f7 Mon Sep 17 00:00:00 2001
From: Jonathan Rosenboim <jrosenboim@nvidia.com>
Date: Mon, 2 Mar 2026 18:51:30 +0200
Subject: [PATCH 13/25] Support a case where VPA isn't installed

---
 .../known_types/verticalpodautoscalers.go     | 29 ++++++++++++++++---
 1 file changed, 25 insertions(+), 4 deletions(-)

diff --git a/pkg/operator/operands/known_types/verticalpodautoscalers.go b/pkg/operator/operands/known_types/verticalpodautoscalers.go
index aec5e5c33..8e4f3bbda 100644
--- a/pkg/operator/operands/known_types/verticalpodautoscalers.go
+++ b/pkg/operator/operands/known_types/verticalpodautoscalers.go
@@ -6,11 +6,13 @@ package known_types
 import (
 	"context"
 
+	"github.com/NVIDIA/KAI-scheduler/pkg/operator/operands/common"
 	vpav1 "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"sigs.k8s.io/controller-runtime/pkg/builder"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	"sigs.k8s.io/controller-runtime/pkg/client/fake"
+	"sigs.k8s.io/controller-runtime/pkg/log"
 	"sigs.k8s.io/controller-runtime/pkg/manager"
 )
 
@@ -24,13 +26,23 @@ func vpaIndexer(object client.Object) []string {
 }
 
 func registerVerticalPodAutoscalers() {
+	var vpaAvailable bool
 	collectable := &Collectable{
 		Collect: getCurrentVPAState,
 		InitWithManager: func(ctx context.Context, mgr manager.Manager) error {
-			return mgr.GetFieldIndexer().IndexField(ctx, &vpav1.VerticalPodAutoscaler{}, CollectableOwnerKey, vpaIndexer)
+			err := mgr.GetFieldIndexer().IndexField(ctx, &vpav1.VerticalPodAutoscaler{}, CollectableOwnerKey, vpaIndexer)
+			if err != nil {
+				log.FromContext(ctx).Info("VPA CRD not available, skipping field indexer registration")
+				return nil
+			}
+			vpaAvailable = true
+			return nil
 		},
-		InitWithBuilder: func(builder *builder.Builder) *builder.Builder {
-			return builder.Owns(&vpav1.VerticalPodAutoscaler{})
+		InitWithBuilder: func(b *builder.Builder) *builder.Builder {
+			if !vpaAvailable {
+				return b
+			}
+			return b.Owns(&vpav1.VerticalPodAutoscaler{})
 		},
 		InitWithFakeClientBuilder: func(fakeClientBuilder *fake.ClientBuilder) {
 			fakeClientBuilder.WithIndex(&vpav1.VerticalPodAutoscaler{}, CollectableOwnerKey, vpaIndexer)
@@ -67,10 +79,19 @@ func VPAFieldInherit(current, desired client.Object) {
 
 func getCurrentVPAState(ctx context.Context, runtimeClient client.Client, reconciler client.Object) (map[string]client.Object, error) {
 	result := map[string]client.Object{}
+
+	hasVPACRD, err := common.CheckCRDsAvailable(ctx, runtimeClient, "verticalpodautoscalers.autoscaling.k8s.io")
+	if err != nil {
+		return nil, err
+	}
+	if !hasVPACRD {
+		return result, nil
+	}
+
 	vpas := &vpav1.VerticalPodAutoscalerList{}
 	reconcilerKey := getReconcilerKey(reconciler)
 
-	err := runtimeClient.List(ctx, vpas, client.MatchingFields{CollectableOwnerKey: reconcilerKey})
+	err = runtimeClient.List(ctx, vpas, client.MatchingFields{CollectableOwnerKey: reconcilerKey})
 	if err != nil {
 		return nil, err
 	}

From 6d0cc1c38a3414ff225cff11762de66eb22d799a Mon Sep 17 00:00:00 2001
From: Jonathan Rosenboim <jrosenboim@nvidia.com>
Date: Mon, 2 Mar 2026 18:57:23 +0200
Subject: [PATCH 14/25] Fix import order

---
 pkg/operator/operands/known_types/verticalpodautoscalers.go | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pkg/operator/operands/known_types/verticalpodautoscalers.go b/pkg/operator/operands/known_types/verticalpodautoscalers.go
index 8e4f3bbda..c36347a2b 100644
--- a/pkg/operator/operands/known_types/verticalpodautoscalers.go
+++ b/pkg/operator/operands/known_types/verticalpodautoscalers.go
@@ -6,14 +6,15 @@ package known_types
 import (
 	"context"
 
-	"github.com/NVIDIA/KAI-scheduler/pkg/operator/operands/common"
-	vpav1 "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	vpav1 "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1"
 	"sigs.k8s.io/controller-runtime/pkg/builder"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	"sigs.k8s.io/controller-runtime/pkg/client/fake"
 	"sigs.k8s.io/controller-runtime/pkg/log"
 	"sigs.k8s.io/controller-runtime/pkg/manager"
+
+	"github.com/NVIDIA/KAI-scheduler/pkg/operator/operands/common"
 )
 
 func vpaIndexer(object client.Object) []string {

From a731841a2413ada74e27ab5a704812a33e4c3534 Mon Sep 17 00:00:00 2001
From: Jonathan Rosenboim <jrosenboim@nvidia.com>
Date: Wed, 4 Mar 2026 10:39:31 +0200
Subject: [PATCH 15/25] Edit yaml

---
 .../kai-scheduler/crds/kai.scheduler_schedulingshards.yaml      | 2 +-
 deployments/kai-scheduler/crds/kai.scheduler_topologies.yaml    | 2 +-
 deployments/kai-scheduler/crds/scheduling.run.ai_podgroups.yaml | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/deployments/kai-scheduler/crds/kai.scheduler_schedulingshards.yaml b/deployments/kai-scheduler/crds/kai.scheduler_schedulingshards.yaml
index 130ccad3d..d034b7cef 100644
--- a/deployments/kai-scheduler/crds/kai.scheduler_schedulingshards.yaml
+++ b/deployments/kai-scheduler/crds/kai.scheduler_schedulingshards.yaml
@@ -9,7 +9,7 @@ apiVersion: apiextensions.k8s.io/v1
 kind: CustomResourceDefinition
 metadata:
   annotations:
-    controller-gen.kubebuilder.io/version: v0.17.3
+    controller-gen.kubebuilder.io/version: v0.16.1
   name: schedulingshards.kai.scheduler
 spec:
   group: kai.scheduler
diff --git a/deployments/kai-scheduler/crds/kai.scheduler_topologies.yaml b/deployments/kai-scheduler/crds/kai.scheduler_topologies.yaml
index 364db292c..993ea19eb 100644
--- a/deployments/kai-scheduler/crds/kai.scheduler_topologies.yaml
+++ b/deployments/kai-scheduler/crds/kai.scheduler_topologies.yaml
@@ -24,7 +24,7 @@ apiVersion: apiextensions.k8s.io/v1
 kind: CustomResourceDefinition
 metadata:
   annotations:
-    controller-gen.kubebuilder.io/version: v0.17.3
+    controller-gen.kubebuilder.io/version: v0.16.1
   name: topologies.kai.scheduler
 spec:
   group: kai.scheduler
diff --git a/deployments/kai-scheduler/crds/scheduling.run.ai_podgroups.yaml b/deployments/kai-scheduler/crds/scheduling.run.ai_podgroups.yaml
index 81df93a3d..58bbaacef 100644
--- a/deployments/kai-scheduler/crds/scheduling.run.ai_podgroups.yaml
+++ b/deployments/kai-scheduler/crds/scheduling.run.ai_podgroups.yaml
@@ -9,7 +9,7 @@ apiVersion: apiextensions.k8s.io/v1
 kind: CustomResourceDefinition
 metadata:
   annotations:
-    controller-gen.kubebuilder.io/version: v0.17.3
+    controller-gen.kubebuilder.io/version: v0.16.1
   name: podgroups.scheduling.run.ai
 spec:
   group: scheduling.run.ai

From 6126f7b3e8d8d5f512269eae7b3a56caf05d23ab Mon Sep 17 00:00:00 2001
From: Jonathan Rosenboim <jrosenboim@nvidia.com>
Date: Wed, 4 Mar 2026 12:15:24 +0200
Subject: [PATCH 16/25] Run fmt fix

---
 pkg/operator/operands/deployable/deployable_test.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pkg/operator/operands/deployable/deployable_test.go b/pkg/operator/operands/deployable/deployable_test.go
index 0ac33543d..2136849a2 100644
--- a/pkg/operator/operands/deployable/deployable_test.go
+++ b/pkg/operator/operands/deployable/deployable_test.go
@@ -17,11 +17,11 @@ import (
 	"github.com/NVIDIA/KAI-scheduler/pkg/operator/operands/known_types"
 
 	monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
-	vpav1 "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1"
 	appsv1 "k8s.io/api/apps/v1"
 	v1 "k8s.io/api/core/v1"
 	apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	vpav1 "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1"
 	"k8s.io/client-go/kubernetes/scheme"
 	"k8s.io/utils/ptr"
 	"sigs.k8s.io/controller-runtime/pkg/client"

From d8603ca0a94fb760b20d7f63c09e7cd620475227 Mon Sep 17 00:00:00 2001
From: Jonathan Rosenboim <jrosenboim@nvidia.com>
Date: Wed, 4 Mar 2026 12:17:09 +0200
Subject: [PATCH 17/25] Edit versions

---
 deployments/kai-scheduler/crds/kai.scheduler_configs.yaml       | 2 +-
 .../kai-scheduler/crds/scheduling.run.ai_bindrequests.yaml      | 2 +-
 deployments/kai-scheduler/crds/scheduling.run.ai_queues.yaml    | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/deployments/kai-scheduler/crds/kai.scheduler_configs.yaml b/deployments/kai-scheduler/crds/kai.scheduler_configs.yaml
index 8d4450006..808483ed9 100644
--- a/deployments/kai-scheduler/crds/kai.scheduler_configs.yaml
+++ b/deployments/kai-scheduler/crds/kai.scheduler_configs.yaml
@@ -9,7 +9,7 @@ apiVersion: apiextensions.k8s.io/v1
 kind: CustomResourceDefinition
 metadata:
   annotations:
-    controller-gen.kubebuilder.io/version: v0.17.3
+    controller-gen.kubebuilder.io/version: v0.16.1
   name: configs.kai.scheduler
 spec:
   group: kai.scheduler
diff --git a/deployments/kai-scheduler/crds/scheduling.run.ai_bindrequests.yaml b/deployments/kai-scheduler/crds/scheduling.run.ai_bindrequests.yaml
index 91e301296..53be0a220 100644
--- a/deployments/kai-scheduler/crds/scheduling.run.ai_bindrequests.yaml
+++ b/deployments/kai-scheduler/crds/scheduling.run.ai_bindrequests.yaml
@@ -9,7 +9,7 @@ apiVersion: apiextensions.k8s.io/v1
 kind: CustomResourceDefinition
 metadata:
   annotations:
-    controller-gen.kubebuilder.io/version: v0.17.3
+    controller-gen.kubebuilder.io/version: v0.16.1
   name: bindrequests.scheduling.run.ai
 spec:
   group: scheduling.run.ai
diff --git a/deployments/kai-scheduler/crds/scheduling.run.ai_queues.yaml b/deployments/kai-scheduler/crds/scheduling.run.ai_queues.yaml
index 18ac93868..2b74d682b 100644
--- a/deployments/kai-scheduler/crds/scheduling.run.ai_queues.yaml
+++ b/deployments/kai-scheduler/crds/scheduling.run.ai_queues.yaml
@@ -9,7 +9,7 @@ apiVersion: apiextensions.k8s.io/v1
 kind: CustomResourceDefinition
 metadata:
   annotations:
-    controller-gen.kubebuilder.io/version: v0.17.3
+    controller-gen.kubebuilder.io/version: v0.16.1
   name: queues.scheduling.run.ai
 spec:
   group: scheduling.run.ai

From 02c64fc81342ecc5787e39d32ac7aa9166010495 Mon Sep 17 00:00:00 2001
From: Jonathan Rosenboim <jrosenboim@nvidia.com>
Date: Wed, 4 Mar 2026 17:34:49 +0200
Subject: [PATCH 18/25] Run make validate

---
 .../templates/rbac/admission.yaml             | 18 +++++------
 .../kai-scheduler/templates/rbac/binder.yaml  | 24 +++++++-------
 .../templates/rbac/nodescaleadjuster.yaml     | 31 ++++++++++++------
 .../templates/rbac/podgroupcontroller.yaml    | 22 ++++++-------
 .../templates/rbac/podgrouper.yaml            | 25 ++++++++-------
 .../templates/rbac/queuecontroller.yaml       |  8 ++---
 .../templates/rbac/scheduler.yaml             | 32 +++++++++++++------
 7 files changed, 92 insertions(+), 68 deletions(-)

diff --git a/deployments/kai-scheduler/templates/rbac/admission.yaml b/deployments/kai-scheduler/templates/rbac/admission.yaml
index 72122bba3..88d0a08b1 100644
--- a/deployments/kai-scheduler/templates/rbac/admission.yaml
+++ b/deployments/kai-scheduler/templates/rbac/admission.yaml
@@ -11,9 +11,9 @@ metadata:
   name: kai-admission
 rules:
 - apiGroups:
-  - ""
+  - coordination.k8s.io
   resources:
-  - configmaps
+  - leases
   verbs:
   - create
   - delete
@@ -25,20 +25,20 @@ rules:
 - apiGroups:
   - ""
   resources:
-  - events
+  - configmaps
   verbs:
   - create
+  - delete
+  - get
+  - list
   - patch
   - update
+  - watch
 - apiGroups:
-  - coordination.k8s.io
+  - ""
   resources:
-  - leases
+  - events
   verbs:
   - create
-  - delete
-  - get
-  - list
   - patch
   - update
-  - watch
diff --git a/deployments/kai-scheduler/templates/rbac/binder.yaml b/deployments/kai-scheduler/templates/rbac/binder.yaml
index b4550f970..4a4968f82 100644
--- a/deployments/kai-scheduler/templates/rbac/binder.yaml
+++ b/deployments/kai-scheduler/templates/rbac/binder.yaml
@@ -10,6 +10,18 @@ kind: ClusterRole
 metadata:
   name: kai-binder
 rules:
+- apiGroups:
+  - coordination.k8s.io
+  resources:
+  - leases
+  verbs:
+  - create
+  - delete
+  - get
+  - list
+  - patch
+  - update
+  - watch
 - apiGroups:
   - ""
   resources:
@@ -63,18 +75,6 @@ rules:
   - patch
   - update
   - watch
-- apiGroups:
-  - coordination.k8s.io
-  resources:
-  - leases
-  verbs:
-  - create
-  - delete
-  - get
-  - list
-  - patch
-  - update
-  - watch
 - apiGroups:
   - resource.k8s.io
   resources:
diff --git a/deployments/kai-scheduler/templates/rbac/nodescaleadjuster.yaml b/deployments/kai-scheduler/templates/rbac/nodescaleadjuster.yaml
index 515676fe9..af2000e3d 100644
--- a/deployments/kai-scheduler/templates/rbac/nodescaleadjuster.yaml
+++ b/deployments/kai-scheduler/templates/rbac/nodescaleadjuster.yaml
@@ -13,7 +13,6 @@ rules:
 - apiGroups:
   - ""
   resources:
-  - configmaps
   - pods
   verbs:
   - create
@@ -24,31 +23,43 @@ rules:
   - update
   - watch
 - apiGroups:
-  - ""
+  - coordination.k8s.io
   resources:
-  - events
-  - pods/finalizers
+  - leases
   verbs:
   - create
+  - delete
+  - get
+  - list
   - patch
   - update
+  - watch
 - apiGroups:
   - ""
   resources:
-  - pods/status
+  - configmaps
   verbs:
+  - create
+  - delete
   - get
+  - list
   - patch
   - update
+  - watch
 - apiGroups:
-  - coordination.k8s.io
+  - ""
   resources:
-  - leases
+  - events
+  - pods/finalizers
   verbs:
   - create
-  - delete
+  - patch
+  - update
+- apiGroups:
+  - ""
+  resources:
+  - pods/status
+  verbs:
   - get
-  - list
   - patch
   - update
-  - watch
diff --git a/deployments/kai-scheduler/templates/rbac/podgroupcontroller.yaml b/deployments/kai-scheduler/templates/rbac/podgroupcontroller.yaml
index 4af54ebe2..9b091f5a5 100644
--- a/deployments/kai-scheduler/templates/rbac/podgroupcontroller.yaml
+++ b/deployments/kai-scheduler/templates/rbac/podgroupcontroller.yaml
@@ -13,29 +13,29 @@ rules:
 - apiGroups:
   - ""
   resources:
-  - configmaps
+  - nodes
+  - pods
+  - pods/status
   verbs:
-  - create
-  - delete
   - get
   - list
-  - patch
-  - update
   - watch
 - apiGroups:
-  - ""
+  - coordination.k8s.io
   resources:
-  - nodes
-  - pods
-  - pods/status
+  - leases
   verbs:
+  - create
+  - delete
   - get
   - list
+  - patch
+  - update
   - watch
 - apiGroups:
-  - coordination.k8s.io
+  - ""
   resources:
-  - leases
+  - configmaps
   verbs:
   - create
   - delete
diff --git a/deployments/kai-scheduler/templates/rbac/podgrouper.yaml b/deployments/kai-scheduler/templates/rbac/podgrouper.yaml
index f7e9f823f..1818196d2 100644
--- a/deployments/kai-scheduler/templates/rbac/podgrouper.yaml
+++ b/deployments/kai-scheduler/templates/rbac/podgrouper.yaml
@@ -14,13 +14,10 @@ rules:
   - ""
   resources:
   - configmaps
+  - namespaces
   verbs:
-  - create
-  - delete
   - get
   - list
-  - patch
-  - update
   - watch
 - apiGroups:
   - ""
@@ -34,14 +31,6 @@ rules:
   - patch
   - update
   - watch
-- apiGroups:
-  - ""
-  resources:
-  - namespaces
-  verbs:
-  - get
-  - list
-  - watch
 - apiGroups:
   - ""
   resources:
@@ -132,6 +121,18 @@ rules:
   - patch
   - update
   - watch
+- apiGroups:
+  - ""
+  resources:
+  - configmaps
+  verbs:
+  - create
+  - delete
+  - get
+  - list
+  - patch
+  - update
+  - watch
 - apiGroups:
   - egx.nvidia.io
   resources:
diff --git a/deployments/kai-scheduler/templates/rbac/queuecontroller.yaml b/deployments/kai-scheduler/templates/rbac/queuecontroller.yaml
index a911cd9a0..e7fe08fc6 100644
--- a/deployments/kai-scheduler/templates/rbac/queuecontroller.yaml
+++ b/deployments/kai-scheduler/templates/rbac/queuecontroller.yaml
@@ -11,9 +11,9 @@ metadata:
   name: queuecontroller
 rules:
 - apiGroups:
-  - ""
+  - coordination.k8s.io
   resources:
-  - configmaps
+  - leases
   verbs:
   - create
   - delete
@@ -23,9 +23,9 @@ rules:
   - update
   - watch
 - apiGroups:
-  - coordination.k8s.io
+  - ""
   resources:
-  - leases
+  - configmaps
   verbs:
   - create
   - delete
diff --git a/deployments/kai-scheduler/templates/rbac/scheduler.yaml b/deployments/kai-scheduler/templates/rbac/scheduler.yaml
index 43bdbe69b..67f94054f 100644
--- a/deployments/kai-scheduler/templates/rbac/scheduler.yaml
+++ b/deployments/kai-scheduler/templates/rbac/scheduler.yaml
@@ -14,26 +14,26 @@ rules:
   - ""
   resources:
   - configmaps
-  - events
-  - pods/status
+  - namespaces
+  - nodes
+  - persistentvolumeclaims
+  - persistentvolumes
   verbs:
-  - create
-  - delete
   - get
   - list
-  - patch
-  - update
   - watch
 - apiGroups:
   - ""
   resources:
-  - namespaces
-  - nodes
-  - persistentvolumeclaims
-  - persistentvolumes
+  - events
+  - pods/status
   verbs:
+  - create
+  - delete
   - get
   - list
+  - patch
+  - update
   - watch
 - apiGroups:
   - ""
@@ -69,6 +69,18 @@ rules:
   - patch
   - update
   - watch
+- apiGroups:
+  - ""
+  resources:
+  - configmaps
+  verbs:
+  - create
+  - delete
+  - get
+  - list
+  - patch
+  - update
+  - watch
 - apiGroups:
   - kai.scheduler
   resources:

From 74e5604292ea1a6b20996f486d372baa6bf2b647 Mon Sep 17 00:00:00 2001
From: Jonathan Rosenboim <jrosenboim@nvidia.com>
Date: Wed, 4 Mar 2026 18:09:39 +0200
Subject: [PATCH 19/25] Edit changelog

---
 CHANGELOG.md | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index cd897599d..9360b8ac7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
 
 ## [Unreleased]
 
+## [v0.14.0] - 2026-03-02
+### Added
+- Added support for VPA configuration for the different components of the KAI Scheduler - [jrosenboimnvidia](https://github.com/NVIDIA/KAI-Scheduler/pull/1119)
+- Users that have VPA installed on their cluster can now utilize it for proper vertical autoscaling
+
 ## [v0.13.0] - 2026-03-02
 ### Added
 - Added `global.nodeSelector` propagation from Helm values to Config CR, ensuring operator-created sub-component deployments (admission, binder, scheduler, pod-grouper, etc.) receive the configured nodeSelector [#1102](https://github.com/NVIDIA/KAI-Scheduler/pull/1102) [yuanchen8911](https://github.com/yuanchen8911)

From 8018fa0d876722780a41fcaa8ca868ba7584bf49 Mon Sep 17 00:00:00 2001
From: Jonathan Rosenboim <jrosenboim@nvidia.com>
Date: Wed, 4 Mar 2026 18:20:52 +0200
Subject: [PATCH 20/25] Add VPA unittests

---
 pkg/apis/kai/v1/common/vpa_test.go            |  50 +++++++++
 pkg/operator/operands/common/vpa_test.go      |  99 +++++++++++++++++
 .../operands/known_types/known_types_test.go  | 101 ++++++++++++++++++
 3 files changed, 250 insertions(+)
 create mode 100644 pkg/apis/kai/v1/common/vpa_test.go
 create mode 100644 pkg/operator/operands/common/vpa_test.go

diff --git a/pkg/apis/kai/v1/common/vpa_test.go b/pkg/apis/kai/v1/common/vpa_test.go
new file mode 100644
index 000000000..111106e43
--- /dev/null
+++ b/pkg/apis/kai/v1/common/vpa_test.go
@@ -0,0 +1,50 @@
+// Copyright 2025 NVIDIA CORPORATION
+// SPDX-License-Identifier: Apache-2.0
+
+package common
+
+import (
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+	vpav1 "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1"
+	"k8s.io/utils/ptr"
+)
+
+var _ = Describe("VPASpec", func() {
+	Describe("SetDefaultsWhereNeeded", func() {
+		It("should set Enabled to false and UpdatePolicy to InPlaceOrRecreate when all fields are nil", func() {
+			vpa := &VPASpec{}
+			vpa.SetDefaultsWhereNeeded()
+
+			Expect(vpa.Enabled).To(Equal(ptr.To(false)))
+			expectedMode := vpav1.UpdateModeInPlaceOrRecreate
+			Expect(vpa.UpdatePolicy).To(Equal(&vpav1.PodUpdatePolicy{
+				UpdateMode: &expectedMode,
+			}))
+		})
+
+		It("should not override Enabled when already set", func() {
+			vpa := &VPASpec{Enabled: ptr.To(true)}
+			vpa.SetDefaultsWhereNeeded()
+
+			Expect(*vpa.Enabled).To(BeTrue())
+		})
+
+		It("should not override UpdatePolicy when already set", func() {
+			mode := vpav1.UpdateModeOff
+			vpa := &VPASpec{
+				UpdatePolicy: &vpav1.PodUpdatePolicy{UpdateMode: &mode},
+			}
+			vpa.SetDefaultsWhereNeeded()
+
+			Expect(*vpa.UpdatePolicy.UpdateMode).To(Equal(vpav1.UpdateModeOff))
+		})
+
+		It("should not set ResourcePolicy", func() {
+			vpa := &VPASpec{}
+			vpa.SetDefaultsWhereNeeded()
+
+			Expect(vpa.ResourcePolicy).To(BeNil())
+		})
+	})
+})
diff --git a/pkg/operator/operands/common/vpa_test.go b/pkg/operator/operands/common/vpa_test.go
new file mode 100644
index 000000000..acf678303
--- /dev/null
+++ b/pkg/operator/operands/common/vpa_test.go
@@ -0,0 +1,99 @@
+// Copyright 2025 NVIDIA CORPORATION
+// SPDX-License-Identifier: Apache-2.0
+
+package common
+
+import (
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+	appsv1 "k8s.io/api/apps/v1"
+	autoscalingv1 "k8s.io/api/autoscaling/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	vpav1 "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1"
+	"k8s.io/utils/ptr"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+
+	kaicommon "github.com/NVIDIA/KAI-scheduler/pkg/apis/kai/v1/common"
+)
+
+var _ = Describe("BuildVPA", func() {
+	It("should return nil when vpaSpec is nil", func() {
+		Expect(BuildVPA(nil, "name", "ns", "Deployment")).To(BeNil())
+	})
+
+	It("should return nil when Enabled is nil", func() {
+		Expect(BuildVPA(&kaicommon.VPASpec{}, "name", "ns", "Deployment")).To(BeNil())
+	})
+
+	It("should return nil when Enabled is false", func() {
+		spec := &kaicommon.VPASpec{Enabled: ptr.To(false)}
+		Expect(BuildVPA(spec, "name", "ns", "Deployment")).To(BeNil())
+	})
+
+	It("should build a VPA targeting the given resource when enabled", func() {
+		mode := vpav1.UpdateModeAuto
+		spec := &kaicommon.VPASpec{
+			Enabled:      ptr.To(true),
+			UpdatePolicy: &vpav1.PodUpdatePolicy{UpdateMode: &mode},
+		}
+
+		result := BuildVPA(spec, "my-deploy", "my-ns", "Deployment")
+		Expect(result).ToNot(BeNil())
+
+		vpa := result.(*vpav1.VerticalPodAutoscaler)
+		Expect(vpa.Name).To(Equal("my-deploy"))
+		Expect(vpa.Namespace).To(Equal("my-ns"))
+		Expect(vpa.Spec.TargetRef).To(Equal(&autoscalingv1.CrossVersionObjectReference{
+			APIVersion: "apps/v1",
+			Kind:       "Deployment",
+			Name:       "my-deploy",
+		}))
+		Expect(*vpa.Spec.UpdatePolicy.UpdateMode).To(Equal(vpav1.UpdateModeAuto))
+	})
+})
+
+var _ = Describe("BuildVPAFromObjects", func() {
+	It("should return nil when vpaSpec is nil", func() {
+		Expect(BuildVPAFromObjects(nil, nil, "ns")).To(BeNil())
+	})
+
+	It("should return nil when disabled", func() {
+		spec := &kaicommon.VPASpec{Enabled: ptr.To(false)}
+		Expect(BuildVPAFromObjects(spec, nil, "ns")).To(BeNil())
+	})
+
+	It("should return nil when no Deployment or DaemonSet found", func() {
+		spec := &kaicommon.VPASpec{Enabled: ptr.To(true)}
+		objects := []client.Object{
+			&metav1.PartialObjectMetadata{ObjectMeta: metav1.ObjectMeta{Name: "svc"}},
+		}
+		Expect(BuildVPAFromObjects(spec, objects, "ns")).To(BeNil())
+	})
+
+	It("should build VPA from the first Deployment", func() {
+		spec := &kaicommon.VPASpec{Enabled: ptr.To(true)}
+		objects := []client.Object{
+			&appsv1.Deployment{ObjectMeta: metav1.ObjectMeta{Name: "dep-1"}},
+			&appsv1.Deployment{ObjectMeta: metav1.ObjectMeta{Name: "dep-2"}},
+		}
+
+		result := BuildVPAFromObjects(spec, objects, "ns")
+		Expect(result).ToNot(BeNil())
+		vpa := result.(*vpav1.VerticalPodAutoscaler)
+		Expect(vpa.Name).To(Equal("dep-1"))
+		Expect(vpa.Spec.TargetRef.Kind).To(Equal("Deployment"))
+	})
+
+	It("should build VPA from a DaemonSet", func() {
+		spec := &kaicommon.VPASpec{Enabled: ptr.To(true)}
+		objects := []client.Object{
+			&appsv1.DaemonSet{ObjectMeta: metav1.ObjectMeta{Name: "ds-1"}},
+		}
+
+		result := BuildVPAFromObjects(spec, objects, "ns")
+		Expect(result).ToNot(BeNil())
+		vpa := result.(*vpav1.VerticalPodAutoscaler)
+		Expect(vpa.Name).To(Equal("ds-1"))
+		Expect(vpa.Spec.TargetRef.Kind).To(Equal("DaemonSet"))
+	})
+})
diff --git a/pkg/operator/operands/known_types/known_types_test.go b/pkg/operator/operands/known_types/known_types_test.go
index c8fc95fb4..75639c99a 100644
--- a/pkg/operator/operands/known_types/known_types_test.go
+++ b/pkg/operator/operands/known_types/known_types_test.go
@@ -8,10 +8,12 @@ import (
 
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
+	vpav1 "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1"
 
 	kaiv1 "github.com/NVIDIA/KAI-scheduler/pkg/apis/kai/v1"
 
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/types"
 )
 
 func TestKnownTypes(t *testing.T) {
@@ -44,3 +46,102 @@ var _ = Describe("KnownTypes", func() {
 		})
 	})
 })
+
+var _ = Describe("vpaIndexer", func() {
+	It("should return owner key for VPA owned by KAI", func() {
+		vpa := &vpav1.VerticalPodAutoscaler{
+			ObjectMeta: metav1.ObjectMeta{
+				Name:      "my-vpa",
+				Namespace: "ns",
+				OwnerReferences: []metav1.OwnerReference{
+					{
+						APIVersion: kaiv1.GroupVersion.String(),
+						Kind:       "Config",
+						Name:       SingletonInstanceName,
+						UID:        types.UID("uid-123"),
+						Controller: ptrBool(true),
+					},
+				},
+			},
+		}
+
+		keys := vpaIndexer(vpa)
+		Expect(keys).To(HaveLen(1))
+	})
+
+	It("should return nil for VPA not owned by KAI", func() {
+		vpa := &vpav1.VerticalPodAutoscaler{
+			ObjectMeta: metav1.ObjectMeta{
+				Name: "my-vpa",
+			},
+		}
+
+		keys := vpaIndexer(vpa)
+		Expect(keys).To(BeNil())
+	})
+})
+
+var _ = Describe("VPAFieldInherit", func() {
+	It("should be a no-op when current is nil", func() {
+		desired := &vpav1.VerticalPodAutoscaler{
+			ObjectMeta: metav1.ObjectMeta{Name: "vpa"},
+		}
+		VPAFieldInherit(nil, desired)
+		Expect(desired.GetResourceVersion()).To(BeEmpty())
+	})
+
+	It("should copy metadata and status from current to desired", func() {
+		current := &vpav1.VerticalPodAutoscaler{
+			ObjectMeta: metav1.ObjectMeta{
+				Name:            "vpa",
+				ResourceVersion: "42",
+				UID:             types.UID("abc"),
+				Generation:      3,
+				Annotations:     map[string]string{"server-added": "val"},
+			},
+			Status: vpav1.VerticalPodAutoscalerStatus{
+				Recommendation: &vpav1.RecommendedPodResources{},
+			},
+		}
+		desired := &vpav1.VerticalPodAutoscaler{
+			ObjectMeta: metav1.ObjectMeta{
+				Name:        "vpa",
+				Annotations: map[string]string{"user-set": "keep"},
+			},
+		}
+
+		VPAFieldInherit(current, desired)
+
+		Expect(desired.GetResourceVersion()).To(Equal("42"))
+		Expect(desired.GetUID()).To(Equal(types.UID("abc")))
+		Expect(desired.GetGeneration()).To(Equal(int64(3)))
+		Expect(desired.GetAnnotations()).To(HaveKeyWithValue("user-set", "keep"))
+		Expect(desired.GetAnnotations()).To(HaveKeyWithValue("server-added", "val"))
+		Expect(desired.Status.Recommendation).ToNot(BeNil())
+	})
+})
+
+var _ = Describe("mergeAnnotations", func() {
+	It("should return current annotations when desired is nil", func() {
+		result := mergeAnnotations(nil, map[string]string{"a": "1"})
+		Expect(result).To(Equal(map[string]string{"a": "1"}))
+	})
+
+	It("should not override desired annotations with current", func() {
+		result := mergeAnnotations(
+			map[string]string{"key": "desired"},
+			map[string]string{"key": "current"},
+		)
+		Expect(result["key"]).To(Equal("desired"))
+	})
+
+	It("should merge non-overlapping annotations", func() {
+		result := mergeAnnotations(
+			map[string]string{"a": "1"},
+			map[string]string{"b": "2"},
+		)
+		Expect(result).To(Equal(map[string]string{"a": "1", "b": "2"}))
+	})
+})
+
+func ptrBool(b bool) *bool { return &b }

From 82676fe4289328d6b95ca0269544f68e27f0f157 Mon Sep 17 00:00:00 2001
From: Jonathan Rosenboim <jrosenboim@nvidia.com>
Date: Wed, 4 Mar 2026 19:19:30 +0200
Subject: [PATCH 21/25] Add vpa to helm chart defaults

---
 .../kai-scheduler/templates/kai-config.yaml        |  4 ++++
 deployments/kai-scheduler/values.yaml              | 14 ++++++++++++++
 2 files changed, 18 insertions(+)

diff --git a/deployments/kai-scheduler/templates/kai-config.yaml b/deployments/kai-scheduler/templates/kai-config.yaml
index 4b1dc4ea6..7be32eb5f 100644
--- a/deployments/kai-scheduler/templates/kai-config.yaml
+++ b/deployments/kai-scheduler/templates/kai-config.yaml
@@ -43,6 +43,10 @@ spec:
     imagesPullSecret: {{ index .Values.global.imagePullSecrets 0 | default "" }}
     {{- end }}
     replicaCount: {{ .Values.operator.replicaCount | default 1 }}
+    {{- if .Values.global.vpa }}
+    vpa:
+      {{- toYaml .Values.global.vpa | nindent 6 }}
+    {{- end }}
 
   binder:
     service:
diff --git a/deployments/kai-scheduler/values.yaml b/deployments/kai-scheduler/values.yaml
index ac485a9ac..29035a929 100644
--- a/deployments/kai-scheduler/values.yaml
+++ b/deployments/kai-scheduler/values.yaml
@@ -16,6 +16,20 @@ global:
   tolerations: []
   namespaceLabelSelector: {}
   podLabelSelector: {}
+  vpa:
+    enabled: false
+    updatePolicy:
+      updateMode: InPlaceOrRecreate
+      minReplicas: 1
+    resourcePolicy:
+      containerPolicies:
+        - containerName: "*"
+          minAllowed:
+            cpu: 50m
+            memory: 500Mi
+          maxAllowed:
+            cpu: 2
+            memory: 5Gi
   resourceReservation:
     namespace: kai-resource-reservation
     serviceAccount: kai-resource-reservation

From 133a4f866fb5ef4a57816eacfca7ea6755cb1455 Mon Sep 17 00:00:00 2001
From: Jonathan Rosenboim <jrosenboim@nvidia.com>
Date: Thu, 5 Mar 2026 17:25:44 +0200
Subject: [PATCH 22/25] Add custom metric server

---
 hack/setup-e2e-cluster.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hack/setup-e2e-cluster.sh b/hack/setup-e2e-cluster.sh
index 4d46cc2ae..5d6f5248d 100755
--- a/hack/setup-e2e-cluster.sh
+++ b/hack/setup-e2e-cluster.sh
@@ -76,7 +76,7 @@ helm install prometheus prometheus-community/kube-prometheus-stack --namespace m
 # Install VPA and its prerequisites
 if [ "$INSTALL_VPA" = "true" ]; then
     echo "Installing metrics-server (required by VPA recommender)..."
-    kubectl apply -f https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/components.yaml
+    kubectl apply -f https://github.com/kubernetes-sigs/metrics-server/releases/download/v0.8.1/components.yaml
     # kind uses self-signed kubelet certs, so metrics-server needs --kubelet-insecure-tls
     kubectl patch deployment metrics-server -n kube-system --type=json \
         -p '[{"op":"add","path":"/spec/template/spec/containers/0/args/-","value":"--kubelet-insecure-tls"}]'

From ab27a0dfe4d37c73b9c96a5da833db741df2310a Mon Sep 17 00:00:00 2001
From: Jonathan Rosenboim <jrosenboim@nvidia.com>
Date: Thu, 5 Mar 2026 17:32:06 +0200
Subject: [PATCH 23/25] Add proper default if policy is left blank

---
 pkg/apis/kai/v1/common/vpa.go      |  7 ++++---
 pkg/apis/kai/v1/common/vpa_test.go | 10 ++++++++++
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/pkg/apis/kai/v1/common/vpa.go b/pkg/apis/kai/v1/common/vpa.go
index e4ba3ee9a..c21a1db02 100644
--- a/pkg/apis/kai/v1/common/vpa.go
+++ b/pkg/apis/kai/v1/common/vpa.go
@@ -29,9 +29,10 @@ func (v *VPASpec) SetDefaultsWhereNeeded() {
 		v.Enabled = ptr.To(false)
 	}
 	if v.UpdatePolicy == nil {
+		v.UpdatePolicy = &vpav1.PodUpdatePolicy{}
+	}
+	if v.UpdatePolicy.UpdateMode == nil {
 		mode := vpav1.UpdateModeInPlaceOrRecreate
-		v.UpdatePolicy = &vpav1.PodUpdatePolicy{
-			UpdateMode: &mode,
-		}
+		v.UpdatePolicy.UpdateMode = &mode
 	}
 }
diff --git a/pkg/apis/kai/v1/common/vpa_test.go b/pkg/apis/kai/v1/common/vpa_test.go
index 111106e43..961be98ab 100644
--- a/pkg/apis/kai/v1/common/vpa_test.go
+++ b/pkg/apis/kai/v1/common/vpa_test.go
@@ -40,6 +40,16 @@ var _ = Describe("VPASpec", func() {
 			Expect(*vpa.UpdatePolicy.UpdateMode).To(Equal(vpav1.UpdateModeOff))
 		})
 
+		It("should set UpdateMode to InPlaceOrRecreate when UpdatePolicy is set but UpdateMode is nil", func() {
+			vpa := &VPASpec{
+				UpdatePolicy: &vpav1.PodUpdatePolicy{},
+			}
+			vpa.SetDefaultsWhereNeeded()
+
+			Expect(vpa.UpdatePolicy.UpdateMode).NotTo(BeNil())
+			Expect(*vpa.UpdatePolicy.UpdateMode).To(Equal(vpav1.UpdateModeInPlaceOrRecreate))
+		})
+
 		It("should not set ResourcePolicy", func() {
 			vpa := &VPASpec{}
 			vpa.SetDefaultsWhereNeeded()

From e1f95285654a82a8edfa81a3be9a4a53c7731b93 Mon Sep 17 00:00:00 2001
From: Jonathan Rosenboim <jrosenboim@nvidia.com>
Date: Thu, 5 Mar 2026 17:38:10 +0200
Subject: [PATCH 24/25] Apply setDefaultsWhereNeeded in case of partial config

---
 pkg/apis/kai/v1/scheduler/scheduler.go      |  3 ++
 pkg/apis/kai/v1/scheduler/scheduler_test.go | 37 +++++++++++++++++++++
 2 files changed, 40 insertions(+)

diff --git a/pkg/apis/kai/v1/scheduler/scheduler.go b/pkg/apis/kai/v1/scheduler/scheduler.go
index e7d8beae0..78e34a24b 100644
--- a/pkg/apis/kai/v1/scheduler/scheduler.go
+++ b/pkg/apis/kai/v1/scheduler/scheduler.go
@@ -71,6 +71,9 @@ func (s *Scheduler) SetDefaultsWhereNeeded(replicaCount *int32, globalVPA *commo
 	if s.VPA == nil {
 		s.VPA = globalVPA
 	}
+	if s.VPA != nil {
+		s.VPA.SetDefaultsWhereNeeded()
+	}
 }
 
 // Service defines configuration for the scheduler service
diff --git a/pkg/apis/kai/v1/scheduler/scheduler_test.go b/pkg/apis/kai/v1/scheduler/scheduler_test.go
index d744b61e9..62b917896 100644
--- a/pkg/apis/kai/v1/scheduler/scheduler_test.go
+++ b/pkg/apis/kai/v1/scheduler/scheduler_test.go
@@ -12,6 +12,10 @@ import (
 
 	v1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/api/resource"
+	vpav1 "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1"
+	"k8s.io/utils/ptr"
+
+	kaicommon "github.com/NVIDIA/KAI-scheduler/pkg/apis/kai/v1/common"
 )
 
 func TestScheduler(t *testing.T) {
@@ -63,4 +67,37 @@ var _ = Describe("Scheduler", func() {
 		scheduler.SetDefaultsWhereNeeded(replicaCount, nil)
 		Expect(*scheduler.Replicas).To(Equal(int32(1)))
 	})
+
+	It("inherits globalVPA when VPA is nil", func(ctx context.Context) {
+		scheduler := &Scheduler{}
+		mode := vpav1.UpdateModeOff
+		globalVPA := &kaicommon.VPASpec{
+			Enabled:      ptr.To(true),
+			UpdatePolicy: &vpav1.PodUpdatePolicy{UpdateMode: &mode},
+		}
+		scheduler.SetDefaultsWhereNeeded(ptr.To(int32(1)), globalVPA)
+
+		Expect(scheduler.VPA).To(Equal(globalVPA))
+		Expect(*scheduler.VPA.UpdatePolicy.UpdateMode).To(Equal(vpav1.UpdateModeOff))
+	})
+
+	It("applies defaults to local VPA when UpdateMode is nil", func(ctx context.Context) {
+		scheduler := &Scheduler{
+			VPA: &kaicommon.VPASpec{
+				Enabled:      ptr.To(true),
+				UpdatePolicy: &vpav1.PodUpdatePolicy{},
+			},
+		}
+		scheduler.SetDefaultsWhereNeeded(ptr.To(int32(1)), nil)
+
+		Expect(scheduler.VPA.UpdatePolicy.UpdateMode).NotTo(BeNil())
+		Expect(*scheduler.VPA.UpdatePolicy.UpdateMode).To(Equal(vpav1.UpdateModeInPlaceOrRecreate))
+	})
+
+	It("does not call SetDefaultsWhereNeeded when VPA remains nil", func(ctx context.Context) {
+		scheduler := &Scheduler{}
+		scheduler.SetDefaultsWhereNeeded(ptr.To(int32(1)), nil)
+
+		Expect(scheduler.VPA).To(BeNil())
+	})
 })

From 20c4154b63d3c5c1d9c4627237ec47411bdf0ebc Mon Sep 17 00:00:00 2001
From: Jonathan Rosenboim <jrosenboim@nvidia.com>
Date: Thu, 5 Mar 2026 17:39:23 +0200
Subject: [PATCH 25/25] Add error to the log

---
 pkg/operator/operands/known_types/verticalpodautoscalers.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pkg/operator/operands/known_types/verticalpodautoscalers.go b/pkg/operator/operands/known_types/verticalpodautoscalers.go
index c36347a2b..b3a13b58b 100644
--- a/pkg/operator/operands/known_types/verticalpodautoscalers.go
+++ b/pkg/operator/operands/known_types/verticalpodautoscalers.go
@@ -33,7 +33,7 @@ func registerVerticalPodAutoscalers() {
 		InitWithManager: func(ctx context.Context, mgr manager.Manager) error {
 			err := mgr.GetFieldIndexer().IndexField(ctx, &vpav1.VerticalPodAutoscaler{}, CollectableOwnerKey, vpaIndexer)
 			if err != nil {
-				log.FromContext(ctx).Info("VPA CRD not available, skipping field indexer registration")
+				log.FromContext(ctx).Info("VPA CRD not available, skipping field indexer registration", "error", err)
 				return nil
 			}
 			vpaAvailable = true