Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 75 additions & 0 deletions api/v1/arksapplication_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ type ArksDriver string
type ArksRuntime string
type ArksApplicationPhase string
type ArksApplicationConditionType string
type ArksApplicationMode string
type ArksApplicationTrafficTarget string

type ArksBackend string

Expand All @@ -42,12 +44,21 @@ const (
ArksApplicationLoaded ArksApplicationConditionType = "Loaded"
// ArksApplicationReady is the condition that indicates if the application is ready or not.
ArksApplicationReady ArksApplicationConditionType = "Ready"
// ArksApplicationTrafficTargetReady is the condition that indicates service traffic is routed to a ready target.
ArksApplicationTrafficTargetReady ArksApplicationConditionType = "TrafficTargetReady"

ArksRuntimeDefault ArksRuntime = "vllm" // The default driver is vLLM
ArksRuntimeVLLM ArksRuntime = "vllm"
ArksRuntimeSGLang ArksRuntime = "sglang"
ArksRuntimeDynamo ArksRuntime = "dynamo"

ArksApplicationModeUnified ArksApplicationMode = "unified"
ArksApplicationModeDisaggregated ArksApplicationMode = "disaggregated"

ArksApplicationTrafficTargetInference ArksApplicationTrafficTarget = "inference"
ArksApplicationTrafficTargetRouter ArksApplicationTrafficTarget = "router"
ArksApplicationTrafficTargetPending ArksApplicationTrafficTarget = "pending"

// Backend types for workload orchestration
ArksBackendLWS ArksBackend = "lws" // LeaderWorkerSet backend (no rolling update)
ArksBackendRBG ArksBackend = "rbg" // RoleBasedGroup backend (supports rolling update)
Expand Down Expand Up @@ -249,10 +260,48 @@ type ArksInstanceSpec struct {
InitContainers []corev1.Container `json:"initContainers"`
}

type ArksApplicationRouter struct {
// +optional
Enabled bool `json:"enabled,omitempty"`
// +optional
Replicas *int32 `json:"replicas,omitempty"`
// +optional
Image string `json:"image,omitempty"`
// +optional
CommandOverride []string `json:"commandOverride,omitempty"`
// +optional
Port int32 `json:"port,omitempty"`
// +optional
MetricPort int32 `json:"metricPort,omitempty"`
// +optional
RouterArgs []string `json:"routerArgs,omitempty"`
// +optional
InstanceSpec ArksInstanceSpec `json:"instanceSpec,omitempty"`
}

type ArksApplicationWorkload struct {
// +optional
Replicas *int32 `json:"replicas,omitempty"`
// +optional
Size int `json:"size,omitempty"`
// +optional
LeaderCommandOverride []string `json:"leaderCommandOverride,omitempty"`
// +optional
WorkerCommandOverride []string `json:"workerCommandOverride,omitempty"`
// +optional
RuntimeCommonArgs []string `json:"runtimeCommonArgs,omitempty"`
// +optional
InstanceSpec ArksInstanceSpec `json:"instanceSpec,omitempty"`
}

// ArksApplicationSpec defines the desired state of ArksApplication.
type ArksApplicationSpec struct {
// INSERT ADDITIONAL SPEC FIELDS - desired state of cluster
// Important: Run "make" to regenerate code after modifying this file
// +optional
// +kubebuilder:validation:Enum=unified;disaggregated
Mode ArksApplicationMode `json:"mode,omitempty"`

// +optional
Replicas int `json:"replicas"`

Expand Down Expand Up @@ -296,18 +345,43 @@ type ArksApplicationSpec struct {
// +optional
// +kubebuilder:validation:Immutable
PodGroupPolicy *PodGroupPolicy `json:"podGroupPolicy"`

// +optional
Router ArksApplicationRouter `json:"router,omitempty"`

// +optional
Prefill *ArksApplicationWorkload `json:"prefill,omitempty"`

// +optional
Decode *ArksApplicationWorkload `json:"decode,omitempty"`

// +optional
CoordinationPolicy *CoordinationPolicy `json:"coordinationPolicy,omitempty"`
}

// ArksApplicationStatus defines the observed state of ArksApplication.
type ArksApplicationStatus struct {
// INSERT ADDITIONAL STATUS FIELD - define observed state of cluster
// Important: Run "make" to regenerate code after modifying this file
Phase string `json:"phase"`
// +optional
Mode ArksApplicationMode `json:"mode,omitempty"`
// +optional
TrafficTarget ArksApplicationTrafficTarget `json:"trafficTarget,omitempty"`

Replicas int32 `json:"replicas"`
ReadyReplicas int32 `json:"readyReplicas"`
UpdatedReplicas int32 `json:"updatedReplicas"`

// +optional
Inference ArksComponentStatus `json:"inference,omitempty"`
// +optional
Router ArksComponentStatus `json:"router,omitempty"`
// +optional
Prefill ArksComponentStatus `json:"prefill,omitempty"`
// +optional
Decode ArksComponentStatus `json:"decode,omitempty"`

Conditions []ArksApplicationCondition `json:"conditions,omitempty"`
}

Expand All @@ -316,6 +390,7 @@ type ArksApplicationStatus struct {
// +kubebuilder:object:root=true
// +kubebuilder:subresource:status
// +kubebuilder:printcolumn:name="Phase",type="string",JSONPath=".status.phase",description="The current phase of the application"
// +kubebuilder:printcolumn:name="Mode",type="string",JSONPath=".spec.mode",description="The inference topology mode"
// +kubebuilder:printcolumn:name="Age",type="date",JSONPath=".metadata.creationTimestamp"
// +kubebuilder:printcolumn:name="Replicas",type="string",JSONPath=".status.replicas"
// +kubebuilder:printcolumn:name="Ready",type="string",JSONPath=".status.readyReplicas"
Expand Down
73 changes: 72 additions & 1 deletion api/v1/arksdisaggregatedapplication_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,67 @@ type VolcanoSchedulingPodGroupPolicySource struct {
Queue string `json:"queue,omitempty"`
}

// CoordinationPolicy controls the coordination strategy for prefill and decode roles.
// When configured, prefill and decode deployment/update will proceed in a coordinated manner.
// This is independent of PodGroupPolicy and can be used with LWS-level gang scheduling.
type CoordinationPolicy struct {
// Scaling defines the coordination strategy for initial deployment and scale-up.
// Takes effect when prefill/decode scales from 0 replicas, or when replicas increase.
// +optional
Scaling *ScalingCoordination `json:"scaling,omitempty"`

// RollingUpdate defines the coordination strategy for rolling updates.
// Takes effect when Pod template changes (e.g., image, config) trigger a rolling update.
// +optional
RollingUpdate *RollingUpdateCoordination `json:"rollingUpdate,omitempty"`
}

// ScalingCoordination defines the coordination strategy for scaling operations.
// Ensures prefill and decode are created proportionally to avoid resource waste.
type ScalingCoordination struct {
// MaxSkew defines the maximum allowed difference in deployment progress between prefill and decode.
// For example, with "10%", the deployment progress difference cannot exceed 10%.
// Only percentage values are supported.
// +optional
// +kubebuilder:default="10%"
// +kubebuilder:validation:Pattern=`^([0-9]|[1-9][0-9]|100)%$`
MaxSkew string `json:"maxSkew,omitempty"`

// Progression defines when to proceed to the next batch of deployment.
// - OrderScheduled: Wait for all pods in current batch to be scheduled (have nodeName).
// - OrderReady: Wait for all pods in current batch to be ready.
// +optional
// +kubebuilder:default="OrderScheduled"
// +kubebuilder:validation:Enum=OrderScheduled;OrderReady
Progression string `json:"progression,omitempty"`
}

// RollingUpdateCoordination defines the coordination strategy for rolling updates.
// Ensures prefill and decode are updated synchronously to avoid version inconsistency.
type RollingUpdateCoordination struct {
// MaxSkew defines the maximum allowed difference in update progress between prefill and decode.
// For example, with "5%", the update progress difference cannot exceed 5%.
// Only percentage values are supported.
// +optional
// +kubebuilder:default="5%"
// +kubebuilder:validation:Pattern=`^([0-9]|[1-9][0-9]|100)%$`
MaxSkew string `json:"maxSkew,omitempty"`

// MaxUnavailable defines the maximum number of unavailable replicas during the update (percentage).
// If configured, overrides the MaxUnavailable in each role's RolloutStrategy.
// Only percentage values are supported.
// +optional
// +kubebuilder:validation:Pattern=`^([0-9]|[1-9][0-9]|100)%$`
MaxUnavailable string `json:"maxUnavailable,omitempty"`

// Partition defines the partition point for rolling update (percentage).
// If configured, overrides the Partition in each role's RolloutStrategy.
// Only percentage values are supported.
// +optional
// +kubebuilder:validation:Pattern=`^([0-9]|[1-9][0-9]|100)%$`
Partition string `json:"partition,omitempty"`
}

type ArksDisaggregatedRouter struct {
// +optional
Replicas *int32 `json:"replicas"`
Expand Down Expand Up @@ -142,9 +203,19 @@ type ArksDisaggregatedApplicationSpec struct {
// Decode
Decode ArksDisaggregatedWorkload `json:"decode"`

// PodGroupPolicy controls RBG-level gang scheduling.
// When using LWS workloads with LWS-level gang scheduling enabled,
// leave this unset and use CoordinationPolicy instead.
// +optional
// +kubebuilder:validation:Immutable
PodGroupPolicy *PodGroupPolicy `json:"podGroupPolicy"`
PodGroupPolicy *PodGroupPolicy `json:"podGroupPolicy,omitempty"`

// CoordinationPolicy controls the coordinated scaling strategy for prefill and decode.
// This enables progressive deployment where prefill and decode are created in batches
// according to the specified ratio, independent of PodGroupPolicy.
// Use this with LWS-level gang scheduling for optimal deployment behavior.
// +optional
CoordinationPolicy *CoordinationPolicy `json:"coordinationPolicy,omitempty"`
}

type ArksComponentStatus struct {
Expand Down
Loading