Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
*.so
*.dylib
*~
dracpu
dracpu-admission

# Test binary, built with `go test -c`
*.test
Expand Down
4 changes: 3 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@ repos:
rev: v2.4.1
hooks:
- id: codespell
args: [--write-changes]
args:
- --write-changes
- --ignore-words-list=NotIn,AfterAll
- repo: https://github.com/pecigonzalo/pre-commit-shfmt
rev: v2.2.0
hooks:
Expand Down
2 changes: 2 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,10 @@ RUN go mod download
# build
COPY . .
RUN go build -o /go/bin/dracpu ./cmd/dracpu
RUN go build -o /go/bin/dracpu-admission ./cmd/dracpu-admission

# copy binary onto base image
FROM gcr.io/distroless/base-debian12
COPY --from=builder --chown=root:root /go/bin/dracpu /dracpu
COPY --from=builder --chown=root:root /go/bin/dracpu-admission /dracpu-admission
CMD ["/dracpu"]
127 changes: 103 additions & 24 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -42,17 +42,63 @@ default: build ## Default builds
help: ## Display this help.
@awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m<target>\033[0m\n"} /^[a-zA-Z_0-9-]+:.*?##/ { printf " \033[36m%-23s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST)

build: build-dracpu build-test-dracpuinfo build-test-dracputester ## build all the binaries
build: build-dracpu build-dracpu-admission build-test-dracpuinfo build-test-dracputester ## build all the binaries

build-dracpu: ## build dracpu
go build -v -o "$(OUT_DIR)/dracpu" ./cmd/dracpu

build-dracpu-admission: ## build dracpu admission webhook
go build -v -o "$(OUT_DIR)/dracpu-admission" ./cmd/dracpu-admission

clean: ## clean
rm -rf "$(OUT_DIR)/"

test-unit: ## run tests
CGO_ENABLED=1 go test -v -race -count 1 -coverprofile=coverage.out ./pkg/...

test-admission: ## run admission controller tests
go test -v ./pkg/admission ./cmd/dracpu-admission

with-kind: ## run a command with a temporary kind cluster
@if [ -z "$$CMD" ]; then \
echo "CMD is required. Example: CMD='echo hello' $(MAKE) with-kind"; \
exit 1; \
fi; \
created=0; \
while read -r name; do \
if [ "$$name" = "$(CLUSTER_NAME)" ]; then created=2; fi; \
done < <(kind get clusters); \
if [ "$$created" -eq 0 ]; then \
kind create cluster --name ${CLUSTER_NAME} --config hack/kind.yaml; \
created=1; \
fi; \
trap 'if [ "$$created" -eq 1 ]; then kind delete cluster --name ${CLUSTER_NAME}; fi' EXIT; \
bash -c "$$CMD"

test-e2e-admission: ## run admission e2e tests (requires kind cluster)
CMD='$(MAKE) kind-load-test-image kind-install-admission; kubectl -n kube-system rollout status deploy dracpu-admission --timeout=120s; go test -v ./test/e2e/ --ginkgo.v --ginkgo.focus="Admission Webhook"' \
$(MAKE) with-kind

test-e2e-admission-grouped-mode: ## run admission e2e tests in grouped mode
CMD='$(MAKE) kind-load-test-image kind-install-admission; kubectl -n kube-system rollout status deploy dracpu-admission --timeout=120s; kubectl -n kube-system patch daemonset dracpu --type='"'"'json'"'"' -p='"'"'[{"op":"replace","path":"/spec/template/spec/containers/0/args","value":["/dracpu","--v=4","--cpu-device-mode=grouped"]}]'"'"'; go test -v ./test/e2e/ --ginkgo.v --ginkgo.focus="Admission Webhook"' \
$(MAKE) with-kind

test-e2e-admission-individual-mode: ## run admission e2e tests in individual mode
CMD='$(MAKE) kind-load-test-image kind-install-admission; kubectl -n kube-system rollout status deploy dracpu-admission --timeout=120s; kubectl -n kube-system patch daemonset dracpu --type='"'"'json'"'"' -p='"'"'[{"op":"replace","path":"/spec/template/spec/containers/0/args","value":["/dracpu","--v=4","--cpu-device-mode=individual"]}]'"'"'; go test -v ./test/e2e/ --ginkgo.v --ginkgo.focus="Admission Webhook"' \
$(MAKE) with-kind

test-e2e-individual-mode: ## run e2e test reserved cpus suite
CMD='$(MAKE) kind-load-test-image kind-install-cpu-dra kind-install-admission; kubectl -n kube-system rollout status deploy dracpu-admission --timeout=120s; kubectl -n kube-system patch daemonset dracpu --type='"'"'json'"'"' -p='"'"'[{"op":"replace","path":"/spec/template/spec/containers/0/args","value":["/dracpu","--v=4","--cpu-device-mode=individual","--reserved-cpus=$(RESERVED_CPUS_E2E)"]}]'"'"'; env DRACPU_E2E_TEST_IMAGE=$(IMAGE_TEST) DRACPU_E2E_RESERVED_CPUS=$(RESERVED_CPUS_E2E) DRACPU_E2E_CPU_DEVICE_MODE=individual go test -v ./test/e2e/ --ginkgo.v' \
$(MAKE) with-kind

test-e2e-grouped-mode: ## run e2e test grouped mode suite
CMD='$(MAKE) kind-load-test-image kind-install-cpu-dra kind-install-admission; kubectl -n kube-system rollout status deploy dracpu-admission --timeout=120s; kubectl -n kube-system patch daemonset dracpu --type='"'"'json'"'"' -p='"'"'[{"op":"replace","path":"/spec/template/spec/containers/0/args","value":["/dracpu","--v=4","--cpu-device-mode=grouped","--reserved-cpus=$(RESERVED_CPUS_E2E)"]}]'"'"'; env DRACPU_E2E_TEST_IMAGE=$(IMAGE_TEST) DRACPU_E2E_RESERVED_CPUS=$(RESERVED_CPUS_E2E) DRACPU_E2E_CPU_DEVICE_MODE=grouped go test -v ./test/e2e/ --ginkgo.v' \
$(MAKE) with-kind

test-e2e-all: ## run all e2e tests (admission + cpu allocation)
CMD='$(MAKE) FORCE_BUILD=1 kind-load-test-image kind-install-cpu-dra kind-install-admission; kubectl -n kube-system rollout status deploy dracpu-admission --timeout=120s; reserved=$(DRACPU_E2E_RESERVED_CPUS); kubectl -n kube-system patch daemonset dracpu --type='"'"'json'"'"' -p='"'"'[{"op":"replace","path":"/spec/template/spec/containers/0/args","value":["/dracpu","--v=4","--cpu-device-mode=grouped","--reserved-cpus='"'"'$$reserved'"'"'"]}]'"'"'; env DRACPU_E2E_TEST_IMAGE=$(IMAGE_TEST) DRACPU_E2E_RESERVED_CPUS=$$reserved DRACPU_E2E_CPU_DEVICE_MODE=grouped go test -v ./test/e2e/ --ginkgo.v; kubectl -n kube-system patch daemonset dracpu --type='"'"'json'"'"' -p='"'"'[{"op":"replace","path":"/spec/template/spec/containers/0/args","value":["/dracpu","--v=4","--cpu-device-mode=individual","--reserved-cpus='"'"'$$reserved'"'"'"]}]'"'"'; env DRACPU_E2E_TEST_IMAGE=$(IMAGE_TEST) DRACPU_E2E_RESERVED_CPUS=$$reserved DRACPU_E2E_CPU_DEVICE_MODE=individual go test -v ./test/e2e/ --ginkgo.v' \
$(MAKE) with-kind

update: ## runs go mod tidy and go get -u
go get -u ./...
go mod tidy
Expand All @@ -69,6 +115,7 @@ REGISTRY := us-central1-docker.pkg.dev/k8s-staging-images
# this is an intentionally non-existent registry to be used only by local CI using the local image loading
REGISTRY_CI := dev.kind.local/ci
STAGING_IMAGE_NAME := ${REGISTRY}/${STAGING_REPO_NAME}/${IMAGE_NAME}
TESTING_IMAGE_NAME := ${REGISTRY}/${IMAGE_NAME}-test
# tag based on date-sha
GIT_VERSION := $(shell date +v%Y%m%d)-$(shell git rev-parse --short HEAD)
ifneq ($(shell git status --porcelain),)
Expand All @@ -78,25 +125,27 @@ TAG ?= $(GIT_VERSION)
# the full image tag
IMAGE_LATEST?=$(STAGING_IMAGE_NAME):latest
IMAGE := ${STAGING_IMAGE_NAME}:${TAG}
IMAGE_TESTING := "${TESTING_IMAGE_NAME}:${TAG}"
IMAGE_CI := ${REGISTRY_CI}/${IMAGE_NAME}:${TAG}
IMAGE_TEST := ${REGISTRY_CI}/${IMAGE_NAME}-test:${TAG}
# target platform(s)
PLATFORMS?=linux/amd64

# set convenient defaults for user variables
DRACPU_E2E_CPU_DEVICE_MODE ?= grouped
DRACPU_E2E_RESERVED_CPUS ?= 0

# shortcut
CI_MANIFEST_FILE := hack/ci/install-ci-$(DRACPU_E2E_CPU_DEVICE_MODE)-mode.yaml
# we need this because manifest processing always needs a nonempty value

# required to enable buildx
export DOCKER_CLI_EXPERIMENTAL=enabled
image: ## docker build load
docker build . -t ${STAGING_IMAGE_NAME} --load

build-image: ## build image
@if [ "$(FORCE_BUILD)" = "1" ]; then \
$(MAKE) build-image-force; \
elif docker image inspect "${IMAGE}" >/dev/null 2>&1; then \
echo "Image ${IMAGE} already exists; skipping build."; \
else \
$(MAKE) build-image-force; \
fi

build-image-force: ## force build image
docker buildx build . \
--platform="${PLATFORMS}" \
--tag="${IMAGE}" \
Expand All @@ -116,12 +165,35 @@ kind-cluster: ## create kind cluster
kind-load-image: build-image ## load the current container image into kind
kind load docker-image ${IMAGE} ${IMAGE_LATEST} --name ${CLUSTER_NAME}

kind-load-test-image: build-test-image ## load the test image into kind
kind load docker-image ${IMAGE_TEST} --name ${CLUSTER_NAME}

kind-uninstall-cpu-dra: ## remove cpu dra from kind cluster
kubectl delete -f install.yaml || true

kind-uninstall-admission: ## remove admission controller from kind cluster
kubectl delete -f install-admission.yaml || true
kubectl -n kube-system delete secret dracpu-admission-tls || true

kind-install-all: kind-install-cpu-dra kind-install-admission ## by default, is grouped mode. Use kind-install-indiviudal-mode to install individual mode

kind-install-individual-mode: kind-install-all ## install individual mode on kind
kubectl -n kube-system patch daemonset dracpu --type='json' -p='[{"op":"replace","path":"/spec/template/spec/containers/0/args","value":["/dracpu","--v=4","--cpu-device-mode=individual"]}]'

kind-install-cpu-dra: kind-uninstall-cpu-dra build-image kind-load-image ## install on cluster
kubectl apply -f install.yaml

kind-install-admission: kind-uninstall-admission build-image kind-load-image ## install admission controller only
kind load docker-image ${IMAGE_CI} --name ${CLUSTER_NAME}
kubectl apply -f install-admission.yaml
kubectl -n kube-system set image deploy/dracpu-admission dracpu-admission=${IMAGE_CI} --record
kubectl -n kube-system patch deploy dracpu-admission --type='json' -p='[{"op":"replace","path":"/spec/template/spec/containers/0/imagePullPolicy","value":"IfNotPresent"}]'
kubectl -n kube-system set env deploy/dracpu-admission \
DRACPU_ADMISSION_CLAIM_GET_RETRY_WAIT="$(ADMISSION_CLAIM_GET_RETRY_WAIT)" \
DRACPU_ADMISSION_CLAIM_GET_RETRY_TOTAL="$(ADMISSION_CLAIM_GET_RETRY_TOTAL)"
bash hack/webhook/generate-certs.sh
kubectl -n kube-system rollout restart deploy dracpu-admission

delete-kind-cluster: ## delete kind cluster
kind delete cluster --name ${CLUSTER_NAME}

Expand Down Expand Up @@ -150,19 +222,31 @@ define generate_ci_manifest
@rm hack/ci/*.part.yaml
endef

ci-manifests: install.yaml install-yq ## create the CI install manifests
ifneq ($(DRACPU_E2E_VERBOSE),)
@echo "setting up manifests for mode=$(DRACPU_E2E_CPU_DEVICE_MODE)"
endif
$(call generate_ci_manifest,$(CI_MANIFEST_FILE),.spec.template.spec.containers[0].args |= (del(.[] | select(. == "--cpu-device-mode=*")) | . + ["--cpu-device-mode=individual", "--reserved-cpus=$(DRACPU_E2E_RESERVED_CPUS)"]))
RESERVED_CPUS_E2E ?= 0
ADMISSION_CLAIM_GET_RETRY_WAIT ?= 50ms
ADMISSION_CLAIM_GET_RETRY_TOTAL ?= 500ms
ci-manifests-individual-mode: install.yaml install-yq ## create the CI install manifests for the individual mode test variant
$(call generate_ci_manifest,hack/ci/install-ci-individual-mode.yaml,.spec.template.spec.containers[0].args |= (del(.[] | select(. == "--cpu-device-mode=*")) | . + ["--cpu-device-mode=individual", "--reserved-cpus=$(RESERVED_CPUS_E2E)"]))

ci-kind-setup: ci-manifests build-image build-test-image ## setup a CI cluster from scratch using reserved CPUs
ifneq ($(DRACPU_E2E_VERBOSE),)
@echo "creating a kind cluster for mode=$(DRACPU_E2E_CPU_DEVICE_MODE)"
endif
$(call kind_setup,$(CI_MANIFEST_FILE))
ci-kind-setup-individual-mode: ci-manifests-individual-mode build-image build-test-image ## setup a CI cluster from scratch for the reserved cpus test variant
$(call kind_setup,hack/ci/install-ci-individual-mode.yaml)

ci-manifests-grouped-mode: install.yaml install-yq ## create the CI install manifests for the grouped mode test variant
$(call generate_ci_manifest,hack/ci/install-ci-grouped-mode.yaml,.spec.template.spec.containers[0].args |= (del(.[] | select(. == "--cpu-device-mode=*")) | . + ["--cpu-device-mode=grouped", "--reserved-cpus=$(RESERVED_CPUS_E2E)"]))

ci-kind-setup-grouped-mode: ci-manifests-grouped-mode build-image build-test-image ## setup a CI cluster from scratch for the reserved cpus test variant
$(call kind_setup,hack/ci/install-ci-grouped-mode.yaml)

build-test-image: ## build tests image
@if [ "$(FORCE_BUILD)" = "1" ]; then \
$(MAKE) build-test-image-force; \
elif docker image inspect "${IMAGE_TEST}" >/dev/null 2>&1; then \
echo "Image ${IMAGE_TEST} already exists; skipping build."; \
else \
$(MAKE) build-test-image-force; \
fi

build-test-image-force: ## force build tests image
docker buildx build . \
--file test/image/Dockerfile \
--platform="${PLATFORMS}" \
Expand All @@ -175,11 +259,6 @@ build-test-dracputester: ## build helper to serve as entry point and report cpu
build-test-dracpuinfo: ## build helper to expose hardware info in the internal dracpu format
go build -v -o "$(OUT_DIR)/dracpuinfo" ./test/image/dracpuinfo

test-e2e: ## run e2e test against an existing configured cluster
env DRACPU_E2E_TEST_IMAGE=$(IMAGE_TEST) DRACPU_E2E_RESERVED_CPUS=$(DRACPU_E2E_RESERVED_CPUS) go test -v ./test/e2e/ --ginkgo.v

test-e2e-kind: ci-kind-setup test-e2e ## run e2e test against a purpose-built kind cluster

Comment on lines -178 to -182
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the idea here was not to assume that e2e runs against the CI/development kind cluster; our e2e tests should run just fine against a real cluster. This is to reduce the chance of hidden/implicit assumptions sneak into our own e2e tests and also to enable to use the e2e tests as validation tool for real deployments, even though not production deployments likely (but we're quite far from production ready anyway... :\ )
If possible, we should keep this distinction.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the last item-I'm going to think on the best way to do this. The rest should be addressed. I think Praveen had an outstanding thing he told me in slack, which hopefully I can handle early next week.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ffromani I replaced all the e2e tests so that they will run on a cluster already up in the event that one is already up, BUT in the event that there is no cluster, starts one, runs the tests, and then shuts down the cluster. Is there a different flow you would like? I had to verify that I had this fully running with a different cluster.

lint: ## run the linter against the codebase
$(GOLANGCI_LINT) run ./...

Expand Down
16 changes: 16 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,22 @@ the claim would need to be updated or recreated manually.
manifest
- `kubectl apply -f https://raw.githubusercontent.com/kubernetes-sigs/dra-driver-cpu/refs/heads/main/install.yaml`

### Validating Admission Controller

The optional validating admission controller rejects invalid `ResourceClaim`
requests for the `dra.cpu` device class (for example, unsupported allocation
modes or non-integer capacity requests). It also validates pods so that when a
CPU request/limit is specified alongside `dra.cpu` claims, the CPU count matches
the claim count.

To install it:

- Apply the webhook deployment and configuration:
- `kubectl apply -f install-admission.yaml`
- Generate a self-signed certificate, create the TLS secret, and patch the
webhook CA bundle:
- `hack/webhook/generate-certs.sh`

### Example Usage

The driver supports two modes of operation. Each mode has a complete example manifest that includes both the ResourceClaim(s) and a sample Pod. The ResourceClaim requests a specific number of exclusive CPUs from the driver, and is referenced in the Pod spec to receive the allocated CPUs.
Expand Down
Loading