scitix · LikiosSedo · Jan 16, 2026 · Feb 24, 2026 · Mar 11, 2026 · Mar 11, 2026
diff --git a/api/v1/arksapplication_types.go b/api/v1/arksapplication_types.go
@@ -25,6 +25,8 @@ type ArksDriver string
 type ArksRuntime string
 type ArksApplicationPhase string
 type ArksApplicationConditionType string
+type ArksApplicationMode string
+type ArksApplicationTrafficTarget string
 
 type ArksBackend string
 
@@ -42,12 +44,21 @@ const (
 	ArksApplicationLoaded ArksApplicationConditionType = "Loaded"
 	// ArksApplicationReady is the condition that indicates if the application is ready or not.
 	ArksApplicationReady ArksApplicationConditionType = "Ready"
+	// ArksApplicationTrafficTargetReady is the condition that indicates service traffic is routed to a ready target.
+	ArksApplicationTrafficTargetReady ArksApplicationConditionType = "TrafficTargetReady"
 
 	ArksRuntimeDefault ArksRuntime = "vllm" // The default driver is vLLM
 	ArksRuntimeVLLM    ArksRuntime = "vllm"
 	ArksRuntimeSGLang  ArksRuntime = "sglang"
 	ArksRuntimeDynamo  ArksRuntime = "dynamo"
 
+	ArksApplicationModeUnified       ArksApplicationMode = "unified"
+	ArksApplicationModeDisaggregated ArksApplicationMode = "disaggregated"
+
+	ArksApplicationTrafficTargetInference ArksApplicationTrafficTarget = "inference"
+	ArksApplicationTrafficTargetRouter    ArksApplicationTrafficTarget = "router"
+	ArksApplicationTrafficTargetPending   ArksApplicationTrafficTarget = "pending"
+
 	// Backend types for workload orchestration
 	ArksBackendLWS ArksBackend = "lws" // LeaderWorkerSet backend (no rolling update)
 	ArksBackendRBG ArksBackend = "rbg" // RoleBasedGroup backend (supports rolling update)
@@ -249,10 +260,48 @@ type ArksInstanceSpec struct {
 	InitContainers []corev1.Container `json:"initContainers"`
 }
 
+type ArksApplicationRouter struct {
+	// +optional
+	Enabled bool `json:"enabled,omitempty"`
+	// +optional
+	Replicas *int32 `json:"replicas,omitempty"`
+	// +optional
+	Image string `json:"image,omitempty"`
+	// +optional
+	CommandOverride []string `json:"commandOverride,omitempty"`
+	// +optional
+	Port int32 `json:"port,omitempty"`
+	// +optional
+	MetricPort int32 `json:"metricPort,omitempty"`
+	// +optional
+	RouterArgs []string `json:"routerArgs,omitempty"`
+	// +optional
+	InstanceSpec ArksInstanceSpec `json:"instanceSpec,omitempty"`
+}
+
+type ArksApplicationWorkload struct {
+	// +optional
+	Replicas *int32 `json:"replicas,omitempty"`
+	// +optional
+	Size int `json:"size,omitempty"`
+	// +optional
+	LeaderCommandOverride []string `json:"leaderCommandOverride,omitempty"`
+	// +optional
+	WorkerCommandOverride []string `json:"workerCommandOverride,omitempty"`
+	// +optional
+	RuntimeCommonArgs []string `json:"runtimeCommonArgs,omitempty"`
+	// +optional
+	InstanceSpec ArksInstanceSpec `json:"instanceSpec,omitempty"`
+}
+
 // ArksApplicationSpec defines the desired state of ArksApplication.
 type ArksApplicationSpec struct {
 	// INSERT ADDITIONAL SPEC FIELDS - desired state of cluster
 	// Important: Run "make" to regenerate code after modifying this file
+	// +optional
+	// +kubebuilder:validation:Enum=unified;disaggregated
+	Mode ArksApplicationMode `json:"mode,omitempty"`
+
 	// +optional
 	Replicas int `json:"replicas"`
 
@@ -296,18 +345,43 @@ type ArksApplicationSpec struct {
 	// +optional
 	// +kubebuilder:validation:Immutable
 	PodGroupPolicy *PodGroupPolicy `json:"podGroupPolicy"`
+
+	// +optional
+	Router ArksApplicationRouter `json:"router,omitempty"`
+
+	// +optional
+	Prefill *ArksApplicationWorkload `json:"prefill,omitempty"`
+
+	// +optional
+	Decode *ArksApplicationWorkload `json:"decode,omitempty"`
+
+	// +optional
+	CoordinationPolicy *CoordinationPolicy `json:"coordinationPolicy,omitempty"`
 }
 
 // ArksApplicationStatus defines the observed state of ArksApplication.
 type ArksApplicationStatus struct {
 	// INSERT ADDITIONAL STATUS FIELD - define observed state of cluster
 	// Important: Run "make" to regenerate code after modifying this file
 	Phase string `json:"phase"`
+	// +optional
+	Mode ArksApplicationMode `json:"mode,omitempty"`
+	// +optional
+	TrafficTarget ArksApplicationTrafficTarget `json:"trafficTarget,omitempty"`
 
 	Replicas        int32 `json:"replicas"`
 	ReadyReplicas   int32 `json:"readyReplicas"`
 	UpdatedReplicas int32 `json:"updatedReplicas"`
 
+	// +optional
+	Inference ArksComponentStatus `json:"inference,omitempty"`
+	// +optional
+	Router ArksComponentStatus `json:"router,omitempty"`
+	// +optional
+	Prefill ArksComponentStatus `json:"prefill,omitempty"`
+	// +optional
+	Decode ArksComponentStatus `json:"decode,omitempty"`
+
 	Conditions []ArksApplicationCondition `json:"conditions,omitempty"`
 }
 
@@ -316,6 +390,7 @@ type ArksApplicationStatus struct {
 // +kubebuilder:object:root=true
 // +kubebuilder:subresource:status
 // +kubebuilder:printcolumn:name="Phase",type="string",JSONPath=".status.phase",description="The current phase of the application"
+// +kubebuilder:printcolumn:name="Mode",type="string",JSONPath=".spec.mode",description="The inference topology mode"
 // +kubebuilder:printcolumn:name="Age",type="date",JSONPath=".metadata.creationTimestamp"
 // +kubebuilder:printcolumn:name="Replicas",type="string",JSONPath=".status.replicas"
 // +kubebuilder:printcolumn:name="Ready",type="string",JSONPath=".status.readyReplicas"

diff --git a/api/v1/arksdisaggregatedapplication_types.go b/api/v1/arksdisaggregatedapplication_types.go
@@ -66,6 +66,67 @@ type VolcanoSchedulingPodGroupPolicySource struct {
 	Queue string `json:"queue,omitempty"`
 }
 
+// CoordinationPolicy controls the coordination strategy for prefill and decode roles.
+// When configured, prefill and decode deployment/update will proceed in a coordinated manner.
+// This is independent of PodGroupPolicy and can be used with LWS-level gang scheduling.
+type CoordinationPolicy struct {
+	// Scaling defines the coordination strategy for initial deployment and scale-up.
+	// Takes effect when prefill/decode scales from 0 replicas, or when replicas increase.
+	// +optional
+	Scaling *ScalingCoordination `json:"scaling,omitempty"`
+
+	// RollingUpdate defines the coordination strategy for rolling updates.
+	// Takes effect when Pod template changes (e.g., image, config) trigger a rolling update.
+	// +optional
+	RollingUpdate *RollingUpdateCoordination `json:"rollingUpdate,omitempty"`
+}
+
+// ScalingCoordination defines the coordination strategy for scaling operations.
+// Ensures prefill and decode are created proportionally to avoid resource waste.
+type ScalingCoordination struct {
+	// MaxSkew defines the maximum allowed difference in deployment progress between prefill and decode.
+	// For example, with "10%", the deployment progress difference cannot exceed 10%.
+	// Only percentage values are supported.
+	// +optional
+	// +kubebuilder:default="10%"
+	// +kubebuilder:validation:Pattern=`^([0-9]|[1-9][0-9]|100)%$`
+	MaxSkew string `json:"maxSkew,omitempty"`
+
+	// Progression defines when to proceed to the next batch of deployment.
+	// - OrderScheduled: Wait for all pods in current batch to be scheduled (have nodeName).
+	// - OrderReady: Wait for all pods in current batch to be ready.
+	// +optional
+	// +kubebuilder:default="OrderScheduled"
+	// +kubebuilder:validation:Enum=OrderScheduled;OrderReady
+	Progression string `json:"progression,omitempty"`
+}
+
+// RollingUpdateCoordination defines the coordination strategy for rolling updates.
+// Ensures prefill and decode are updated synchronously to avoid version inconsistency.
+type RollingUpdateCoordination struct {
+	// MaxSkew defines the maximum allowed difference in update progress between prefill and decode.
+	// For example, with "5%", the update progress difference cannot exceed 5%.
+	// Only percentage values are supported.
+	// +optional
+	// +kubebuilder:default="5%"
+	// +kubebuilder:validation:Pattern=`^([0-9]|[1-9][0-9]|100)%$`
+	MaxSkew string `json:"maxSkew,omitempty"`
+
+	// MaxUnavailable defines the maximum number of unavailable replicas during the update (percentage).
+	// If configured, overrides the MaxUnavailable in each role's RolloutStrategy.
+	// Only percentage values are supported.
+	// +optional
+	// +kubebuilder:validation:Pattern=`^([0-9]|[1-9][0-9]|100)%$`
+	MaxUnavailable string `json:"maxUnavailable,omitempty"`
+
+	// Partition defines the partition point for rolling update (percentage).
+	// If configured, overrides the Partition in each role's RolloutStrategy.
+	// Only percentage values are supported.
+	// +optional
+	// +kubebuilder:validation:Pattern=`^([0-9]|[1-9][0-9]|100)%$`
+	Partition string `json:"partition,omitempty"`
+}
+
 type ArksDisaggregatedRouter struct {
 	// +optional
 	Replicas *int32 `json:"replicas"`
@@ -142,9 +203,19 @@ type ArksDisaggregatedApplicationSpec struct {
 	// Decode
 	Decode ArksDisaggregatedWorkload `json:"decode"`
 
+	// PodGroupPolicy controls RBG-level gang scheduling.
+	// When using LWS workloads with LWS-level gang scheduling enabled,
+	// leave this unset and use CoordinationPolicy instead.
 	// +optional
 	// +kubebuilder:validation:Immutable
-	PodGroupPolicy *PodGroupPolicy `json:"podGroupPolicy"`
+	PodGroupPolicy *PodGroupPolicy `json:"podGroupPolicy,omitempty"`
+
+	// CoordinationPolicy controls the coordinated scaling strategy for prefill and decode.
+	// This enables progressive deployment where prefill and decode are created in batches
+	// according to the specified ratio, independent of PodGroupPolicy.
+	// Use this with LWS-level gang scheduling for optimal deployment behavior.
+	// +optional
+	CoordinationPolicy *CoordinationPolicy `json:"coordinationPolicy,omitempty"`
 }
 
 type ArksComponentStatus struct {