Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,12 @@ COPY ./internal/status-updater/ ./internal/status-updater/
COPY ./internal/kwok-dra-plugin/ ./internal/kwok-dra-plugin/
RUN --mount=type=cache,target=/root/.cache/go-build make build OS=$TARGETOS ARCH=$TARGETARCH COMPONENTS=kwok-dra-plugin

FROM common-builder AS kwok-compute-domain-dra-plugin-builder
COPY ./cmd/kwok-compute-domain-dra-plugin/ ./cmd/kwok-compute-domain-dra-plugin/
COPY ./pkg/compute-domain/ ./pkg/compute-domain/
COPY ./internal/kwok-compute-domain-dra-plugin/ ./internal/kwok-compute-domain-dra-plugin/
RUN --mount=type=cache,target=/root/.cache/go-build make build OS=$TARGETOS ARCH=$TARGETARCH COMPONENTS=kwok-compute-domain-dra-plugin

FROM common-builder AS preloader-builder
COPY ./cmd/preloader/ ./cmd/preloader/
RUN make build-preloader
Expand Down Expand Up @@ -114,6 +120,10 @@ FROM ubuntu AS kwok-dra-plugin
COPY --from=kwok-dra-plugin-builder /go/src/github.com/run-ai/fake-gpu-operator/bin/kwok-dra-plugin /bin/
ENTRYPOINT ["/bin/kwok-dra-plugin"]

FROM ubuntu AS kwok-compute-domain-dra-plugin
COPY --from=kwok-compute-domain-dra-plugin-builder /go/src/github.com/run-ai/fake-gpu-operator/bin/kwok-compute-domain-dra-plugin /bin/
ENTRYPOINT ["/bin/kwok-compute-domain-dra-plugin"]

FROM ubuntu AS compute-domain-controller
COPY --from=compute-domain-controller-builder /go/src/github.com/run-ai/fake-gpu-operator/bin/compute-domain-controller /bin/
ENTRYPOINT ["/bin/compute-domain-controller"]
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
BUILD_DIR=$(shell pwd)/bin
COMPONENTS?=device-plugin dra-plugin-gpu status-updater kwok-gpu-device-plugin kwok-dra-plugin status-exporter status-exporter-kwok topology-server mig-faker compute-domain-controller compute-domain-dra-plugin
COMPONENTS?=device-plugin dra-plugin-gpu status-updater kwok-gpu-device-plugin kwok-dra-plugin kwok-compute-domain-dra-plugin status-exporter status-exporter-kwok topology-server mig-faker compute-domain-controller compute-domain-dra-plugin

DOCKER_REPO_BASE=ghcr.io/run-ai/fake-gpu-operator
DOCKER_TAG?=0.0.0-dev
Expand Down
15 changes: 15 additions & 0 deletions cmd/kwok-compute-domain-dra-plugin/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package main

import (
"github.com/run-ai/fake-gpu-operator/internal/common/app"
"github.com/run-ai/fake-gpu-operator/internal/common/config"
kwokcomputedomaindraplugin "github.com/run-ai/fake-gpu-operator/internal/kwok-compute-domain-dra-plugin"
)

func main() {
requiredEnvVars := []string{kwokcomputedomaindraplugin.EnvFakeGpuOperatorNamespace}
config.ValidateConfig(requiredEnvVars)

appRunner := app.NewAppRunner(&kwokcomputedomaindraplugin.KWOKComputeDomainDraPluginApp{})
appRunner.Run()
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{{- if .Values.kwokComputeDomainDraPlugin.enabled }}
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: kwok-compute-domain-dra-plugin
rules:
- apiGroups:
- ""
resources:
- nodes
verbs:
- get
- list
- watch
- apiGroups:
- resource.k8s.io
resources:
- resourceslices
verbs:
- get
- list
- watch
- create
- update
- delete
{{- end }}
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{{- if .Values.kwokComputeDomainDraPlugin.enabled }}
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: kwok-compute-domain-dra-plugin
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: kwok-compute-domain-dra-plugin
subjects:
- kind: ServiceAccount
name: kwok-compute-domain-dra-plugin
namespace: "{{ .Release.Namespace }}"
{{- end }}
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
{{- if .Values.kwokComputeDomainDraPlugin.enabled }}
apiVersion: apps/v1
kind: Deployment
metadata:
name: kwok-compute-domain-dra-plugin
labels:
app: kwok-compute-domain-dra-plugin
spec:
selector:
matchLabels:
app: kwok-compute-domain-dra-plugin
component: kwok-compute-domain-dra-plugin
replicas: 1
template:
metadata:
labels:
app: kwok-compute-domain-dra-plugin
component: kwok-compute-domain-dra-plugin
spec:
containers:
- name: kwok-compute-domain-dra-plugin
image: "{{ .Values.kwokComputeDomainDraPlugin.image.repository }}:{{ .Values.kwokComputeDomainDraPlugin.image.tag | default .Chart.AppVersion }}"
imagePullPolicy: "{{ .Values.kwokComputeDomainDraPlugin.image.pullPolicy }}"
resources:
{{- toYaml .Values.kwokComputeDomainDraPlugin.resources | nindent 12 }}
env:
- name: FAKE_GPU_OPERATOR_NAMESPACE
value: "{{ .Release.Namespace }}"
restartPolicy: Always
serviceAccountName: kwok-compute-domain-dra-plugin
imagePullSecrets:
- name: gcr-secret
{{- end }}
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{{- if .Values.kwokComputeDomainDraPlugin.enabled }}
apiVersion: v1
kind: ServiceAccount
metadata:
name: kwok-compute-domain-dra-plugin
labels:
app: kwok-compute-domain-dra-plugin
{{- end }}
14 changes: 14 additions & 0 deletions deploy/fake-gpu-operator/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -207,3 +207,17 @@ computeDomainDraPlugin:
limits:
cpu: "200m"
memory: "400Mi"

kwokComputeDomainDraPlugin:
enabled: false
image:
pullPolicy: Always
repository: ghcr.io/run-ai/fake-gpu-operator/kwok-compute-domain-dra-plugin
tag: ""
resources:
requests:
cpu: "100m"
memory: "200Mi"
limits:
cpu: "200m"
memory: "400Mi"
90 changes: 90 additions & 0 deletions internal/kwok-compute-domain-dra-plugin/app.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
package kwokcomputedomaindraplugin

import (
"context"
"log"

"github.com/spf13/viper"
corev1 "k8s.io/api/core/v1"
resourceapi "k8s.io/api/resource/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/kubernetes"
"k8s.io/klog/v2"
ctrl "sigs.k8s.io/controller-runtime"

nodecontroller "github.com/run-ai/fake-gpu-operator/internal/kwok-compute-domain-dra-plugin/controllers/node"
)

const (
EnvFakeGpuOperatorNamespace = "FAKE_GPU_OPERATOR_NAMESPACE"
)

type KWOKComputeDomainDraPluginAppConfiguration struct {
FakeGpuOperatorNamespace string `mapstructure:"FAKE_GPU_OPERATOR_NAMESPACE" validate:"required"`
}

type KWOKComputeDomainDraPluginApp struct {
mgr ctrl.Manager
stopCh chan struct{}
}

func (app *KWOKComputeDomainDraPluginApp) Run() {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()

go func() {
<-app.stopCh
cancel()
}()

if err := app.mgr.Start(ctx); err != nil {
log.Fatalf("Failed to start manager: %v", err)
}
}

func (app *KWOKComputeDomainDraPluginApp) Init(stopCh chan struct{}) {
app.stopCh = stopCh

ctrl.SetLogger(klog.NewKlogr())

cfg, err := ctrl.GetConfig()
if err != nil {
log.Fatalf("Failed to get config: %v", err)
}
cfg.QPS = 100
cfg.Burst = 200

scheme := runtime.NewScheme()
if err := corev1.AddToScheme(scheme); err != nil {
log.Fatalf("Failed to add corev1 to scheme: %v", err)
}
if err := resourceapi.AddToScheme(scheme); err != nil {
log.Fatalf("Failed to add resource.k8s.io to scheme: %v", err)
}

namespace := viper.GetString(EnvFakeGpuOperatorNamespace)
app.mgr, err = ctrl.NewManager(cfg, ctrl.Options{
Scheme: scheme,
})
if err != nil {
log.Fatalf("Failed to create manager: %v", err)
}

kubeClient, err := kubernetes.NewForConfig(cfg)
if err != nil {
log.Fatalf("Failed to create kubernetes client: %v", err)
}

if err := nodecontroller.SetupWithManager(app.mgr, kubeClient, namespace); err != nil {
log.Fatalf("Failed to setup Node controller: %v", err)
}
}

func (app *KWOKComputeDomainDraPluginApp) Name() string {
return "KWOKComputeDomainDraPlugin"
}

func (app *KWOKComputeDomainDraPluginApp) GetConfig() interface{} {
var config KWOKComputeDomainDraPluginAppConfiguration
return config
}
Loading
Loading