From 0ce6f13e7494ba0ce0191a03cbd07c18dee3075b Mon Sep 17 00:00:00 2001 From: RoshaniN Date: Wed, 24 Jul 2024 00:14:05 +0000 Subject: [PATCH 01/32] Initial commit --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index 9cc0eb2e..b56b9769 100644 --- a/README.md +++ b/README.md @@ -111,4 +111,3 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. - From 5b5e3403551c308591a1fb20139b3acb3df860fb Mon Sep 17 00:00:00 2001 From: RoshaniN Date: Thu, 25 Jul 2024 18:36:48 +0000 Subject: [PATCH 02/32] Initial scaffolding from Kubebuilder. --- Dockerfile | 33 +++ Makefile | 200 ++++++++++++++ PROJECT | 20 ++ api/v1/groupversion_info.go | 36 +++ api/v1/pathwaysapi_types.go | 64 +++++ api/v1/zz_generated.deepcopy.go | 114 ++++++++ cmd/main.go | 170 ++++++++++++ ...ways-api.pathways.domain_pathwaysapis.yaml | 54 ++++ config/crd/kustomization.yaml | 22 ++ config/crd/kustomizeconfig.yaml | 19 ++ config/default/kustomization.yaml | 146 ++++++++++ config/default/manager_metrics_patch.yaml | 4 + config/default/metrics_service.yaml | 17 ++ config/manager/kustomization.yaml | 2 + config/manager/manager.yaml | 95 +++++++ config/prometheus/kustomization.yaml | 2 + config/prometheus/monitor.yaml | 30 +++ config/rbac/kustomization.yaml | 27 ++ config/rbac/leader_election_role.yaml | 40 +++ config/rbac/leader_election_role_binding.yaml | 15 ++ config/rbac/metrics_auth_role.yaml | 17 ++ config/rbac/metrics_auth_role_binding.yaml | 12 + config/rbac/metrics_reader_role.yaml | 9 + config/rbac/pathwaysapi_editor_role.yaml | 27 ++ config/rbac/pathwaysapi_viewer_role.yaml | 23 ++ config/rbac/role.yaml | 32 +++ config/rbac/role_binding.yaml | 15 ++ config/rbac/service_account.yaml | 8 + config/samples/kustomization.yaml | 4 + .../samples/pathways-api_v1_pathwaysapi.yaml | 9 + go.mod | 95 +++++++ go.sum | 253 ++++++++++++++++++ hack/boilerplate.go.txt | 15 ++ internal/controller/pathwaysapi_controller.go | 62 +++++ .../controller/pathwaysapi_controller_test.go | 84 ++++++ internal/controller/suite_test.go | 90 +++++++ test/e2e/e2e_suite_test.go | 32 +++ test/e2e/e2e_test.go | 122 +++++++++ test/utils/utils.go | 140 ++++++++++ 39 files changed, 2159 insertions(+) create mode 100644 Dockerfile create mode 100644 Makefile create mode 100644 PROJECT create mode 100644 api/v1/groupversion_info.go create mode 100644 api/v1/pathwaysapi_types.go create mode 100644 api/v1/zz_generated.deepcopy.go create mode 100644 cmd/main.go create mode 100644 config/crd/bases/pathways-api.pathways.domain_pathwaysapis.yaml create mode 100644 config/crd/kustomization.yaml create mode 100644 config/crd/kustomizeconfig.yaml create mode 100644 config/default/kustomization.yaml create mode 100644 config/default/manager_metrics_patch.yaml create mode 100644 config/default/metrics_service.yaml create mode 100644 config/manager/kustomization.yaml create mode 100644 config/manager/manager.yaml create mode 100644 config/prometheus/kustomization.yaml create mode 100644 config/prometheus/monitor.yaml create mode 100644 config/rbac/kustomization.yaml create mode 100644 config/rbac/leader_election_role.yaml create mode 100644 config/rbac/leader_election_role_binding.yaml create mode 100644 config/rbac/metrics_auth_role.yaml create mode 100644 config/rbac/metrics_auth_role_binding.yaml create mode 100644 config/rbac/metrics_reader_role.yaml create mode 100644 config/rbac/pathwaysapi_editor_role.yaml create mode 100644 config/rbac/pathwaysapi_viewer_role.yaml create mode 100644 config/rbac/role.yaml create mode 100644 config/rbac/role_binding.yaml create mode 100644 config/rbac/service_account.yaml create mode 100644 config/samples/kustomization.yaml create mode 100644 config/samples/pathways-api_v1_pathwaysapi.yaml create mode 100644 go.mod create mode 100644 go.sum create mode 100644 hack/boilerplate.go.txt create mode 100644 internal/controller/pathwaysapi_controller.go create mode 100644 internal/controller/pathwaysapi_controller_test.go create mode 100644 internal/controller/suite_test.go create mode 100644 test/e2e/e2e_suite_test.go create mode 100644 test/e2e/e2e_test.go create mode 100644 test/utils/utils.go diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..a48973ee --- /dev/null +++ b/Dockerfile @@ -0,0 +1,33 @@ +# Build the manager binary +FROM golang:1.22 AS builder +ARG TARGETOS +ARG TARGETARCH + +WORKDIR /workspace +# Copy the Go Modules manifests +COPY go.mod go.mod +COPY go.sum go.sum +# cache deps before building and copying source so that we don't need to re-download as much +# and so that source changes don't invalidate our downloaded layer +RUN go mod download + +# Copy the go source +COPY cmd/main.go cmd/main.go +COPY api/ api/ +COPY internal/controller/ internal/controller/ + +# Build +# the GOARCH has not a default value to allow the binary be built according to the host where the command +# was called. For example, if we call make docker-build in a local env which has the Apple Silicon M1 SO +# the docker BUILDPLATFORM arg will be linux/arm64 when for Apple x86 it will be linux/amd64. Therefore, +# by leaving it empty we can ensure that the container and binary shipped on it will have the same platform. +RUN CGO_ENABLED=0 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} go build -a -o manager cmd/main.go + +# Use distroless as minimal base image to package the manager binary +# Refer to https://github.com/GoogleContainerTools/distroless for more details +FROM gcr.io/distroless/static:nonroot +WORKDIR / +COPY --from=builder /workspace/manager . +USER 65532:65532 + +ENTRYPOINT ["/manager"] diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..f01f7170 --- /dev/null +++ b/Makefile @@ -0,0 +1,200 @@ +# Image URL to use all building/pushing image targets +IMG ?= controller:latest +# ENVTEST_K8S_VERSION refers to the version of kubebuilder assets to be downloaded by envtest binary. +ENVTEST_K8S_VERSION = 1.30.0 + +# Get the currently used golang install path (in GOPATH/bin, unless GOBIN is set) +ifeq (,$(shell go env GOBIN)) +GOBIN=$(shell go env GOPATH)/bin +else +GOBIN=$(shell go env GOBIN) +endif + +# CONTAINER_TOOL defines the container tool to be used for building images. +# Be aware that the target commands are only tested with Docker which is +# scaffolded by default. However, you might want to replace it to use other +# tools. (i.e. podman) +CONTAINER_TOOL ?= docker + +# Setting SHELL to bash allows bash commands to be executed by recipes. +# Options are set to exit when a recipe line exits non-zero or a piped command fails. +SHELL = /usr/bin/env bash -o pipefail +.SHELLFLAGS = -ec + +.PHONY: all +all: build + +##@ General + +# The help target prints out all targets with their descriptions organized +# beneath their categories. The categories are represented by '##@' and the +# target descriptions by '##'. The awk command is responsible for reading the +# entire set of makefiles included in this invocation, looking for lines of the +# file as xyz: ## something, and then pretty-format the target and help. Then, +# if there's a line with ##@ something, that gets pretty-printed as a category. +# More info on the usage of ANSI control characters for terminal formatting: +# https://en.wikipedia.org/wiki/ANSI_escape_code#SGR_parameters +# More info on the awk command: +# http://linuxcommand.org/lc3_adv_awk.php + +.PHONY: help +help: ## Display this help. + @awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m\033[0m\n"} /^[a-zA-Z_0-9-]+:.*?##/ { printf " \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST) + +##@ Development + +.PHONY: manifests +manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects. + $(CONTROLLER_GEN) rbac:roleName=manager-role crd webhook paths="./..." output:crd:artifacts:config=config/crd/bases + +.PHONY: generate +generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations. + $(CONTROLLER_GEN) object:headerFile="hack/boilerplate.go.txt" paths="./..." + +.PHONY: fmt +fmt: ## Run go fmt against code. + go fmt ./... + +.PHONY: vet +vet: ## Run go vet against code. + go vet ./... + +.PHONY: test +test: manifests generate fmt vet envtest ## Run tests. + KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" go test $$(go list ./... | grep -v /e2e) -coverprofile cover.out + +# Utilize Kind or modify the e2e tests to load the image locally, enabling compatibility with other vendors. +.PHONY: test-e2e # Run the e2e tests against a Kind k8s instance that is spun up. +test-e2e: + go test ./test/e2e/ -v -ginkgo.v + +.PHONY: lint +lint: golangci-lint ## Run golangci-lint linter + $(GOLANGCI_LINT) run + +.PHONY: lint-fix +lint-fix: golangci-lint ## Run golangci-lint linter and perform fixes + $(GOLANGCI_LINT) run --fix + +##@ Build + +.PHONY: build +build: manifests generate fmt vet ## Build manager binary. + go build -o bin/manager cmd/main.go + +.PHONY: run +run: manifests generate fmt vet ## Run a controller from your host. + go run ./cmd/main.go + +# If you wish to build the manager image targeting other platforms you can use the --platform flag. +# (i.e. docker build --platform linux/arm64). However, you must enable docker buildKit for it. +# More info: https://docs.docker.com/develop/develop-images/build_enhancements/ +.PHONY: docker-build +docker-build: ## Build docker image with the manager. + $(CONTAINER_TOOL) build -t ${IMG} . + +.PHONY: docker-push +docker-push: ## Push docker image with the manager. + $(CONTAINER_TOOL) push ${IMG} + +# PLATFORMS defines the target platforms for the manager image be built to provide support to multiple +# architectures. (i.e. make docker-buildx IMG=myregistry/mypoperator:0.0.1). To use this option you need to: +# - be able to use docker buildx. More info: https://docs.docker.com/build/buildx/ +# - have enabled BuildKit. More info: https://docs.docker.com/develop/develop-images/build_enhancements/ +# - be able to push the image to your registry (i.e. if you do not set a valid value via IMG=> then the export will fail) +# To adequately provide solutions that are compatible with multiple platforms, you should consider using this option. +PLATFORMS ?= linux/arm64,linux/amd64,linux/s390x,linux/ppc64le +.PHONY: docker-buildx +docker-buildx: ## Build and push docker image for the manager for cross-platform support + # copy existing Dockerfile and insert --platform=${BUILDPLATFORM} into Dockerfile.cross, and preserve the original Dockerfile + sed -e '1 s/\(^FROM\)/FROM --platform=\$$\{BUILDPLATFORM\}/; t' -e ' 1,// s//FROM --platform=\$$\{BUILDPLATFORM\}/' Dockerfile > Dockerfile.cross + - $(CONTAINER_TOOL) buildx create --name pathways-api-builder + $(CONTAINER_TOOL) buildx use pathways-api-builder + - $(CONTAINER_TOOL) buildx build --push --platform=$(PLATFORMS) --tag ${IMG} -f Dockerfile.cross . + - $(CONTAINER_TOOL) buildx rm pathways-api-builder + rm Dockerfile.cross + +.PHONY: build-installer +build-installer: manifests generate kustomize ## Generate a consolidated YAML with CRDs and deployment. + mkdir -p dist + cd config/manager && $(KUSTOMIZE) edit set image controller=${IMG} + $(KUSTOMIZE) build config/default > dist/install.yaml + +##@ Deployment + +ifndef ignore-not-found + ignore-not-found = false +endif + +.PHONY: install +install: manifests kustomize ## Install CRDs into the K8s cluster specified in ~/.kube/config. + $(KUSTOMIZE) build config/crd | $(KUBECTL) apply -f - + +.PHONY: uninstall +uninstall: manifests kustomize ## Uninstall CRDs from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion. + $(KUSTOMIZE) build config/crd | $(KUBECTL) delete --ignore-not-found=$(ignore-not-found) -f - + +.PHONY: deploy +deploy: manifests kustomize ## Deploy controller to the K8s cluster specified in ~/.kube/config. + cd config/manager && $(KUSTOMIZE) edit set image controller=${IMG} + $(KUSTOMIZE) build config/default | $(KUBECTL) apply -f - + +.PHONY: undeploy +undeploy: kustomize ## Undeploy controller from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion. + $(KUSTOMIZE) build config/default | $(KUBECTL) delete --ignore-not-found=$(ignore-not-found) -f - + +##@ Dependencies + +## Location to install dependencies to +LOCALBIN ?= $(shell pwd)/bin +$(LOCALBIN): + mkdir -p $(LOCALBIN) + +## Tool Binaries +KUBECTL ?= kubectl +KUSTOMIZE ?= $(LOCALBIN)/kustomize +CONTROLLER_GEN ?= $(LOCALBIN)/controller-gen +ENVTEST ?= $(LOCALBIN)/setup-envtest +GOLANGCI_LINT = $(LOCALBIN)/golangci-lint + +## Tool Versions +KUSTOMIZE_VERSION ?= v5.4.2 +CONTROLLER_TOOLS_VERSION ?= v0.15.0 +ENVTEST_VERSION ?= release-0.18 +GOLANGCI_LINT_VERSION ?= v1.59.1 + +.PHONY: kustomize +kustomize: $(KUSTOMIZE) ## Download kustomize locally if necessary. +$(KUSTOMIZE): $(LOCALBIN) + $(call go-install-tool,$(KUSTOMIZE),sigs.k8s.io/kustomize/kustomize/v5,$(KUSTOMIZE_VERSION)) + +.PHONY: controller-gen +controller-gen: $(CONTROLLER_GEN) ## Download controller-gen locally if necessary. +$(CONTROLLER_GEN): $(LOCALBIN) + $(call go-install-tool,$(CONTROLLER_GEN),sigs.k8s.io/controller-tools/cmd/controller-gen,$(CONTROLLER_TOOLS_VERSION)) + +.PHONY: envtest +envtest: $(ENVTEST) ## Download setup-envtest locally if necessary. +$(ENVTEST): $(LOCALBIN) + $(call go-install-tool,$(ENVTEST),sigs.k8s.io/controller-runtime/tools/setup-envtest,$(ENVTEST_VERSION)) + +.PHONY: golangci-lint +golangci-lint: $(GOLANGCI_LINT) ## Download golangci-lint locally if necessary. +$(GOLANGCI_LINT): $(LOCALBIN) + $(call go-install-tool,$(GOLANGCI_LINT),github.com/golangci/golangci-lint/cmd/golangci-lint,$(GOLANGCI_LINT_VERSION)) + +# go-install-tool will 'go install' any package with custom target and name of binary, if it doesn't exist +# $1 - target path with name of binary +# $2 - package url which can be installed +# $3 - specific version of package +define go-install-tool +@[ -f "$(1)-$(3)" ] || { \ +set -e; \ +package=$(2)@$(3) ;\ +echo "Downloading $${package}" ;\ +rm -f $(1) || true ;\ +GOBIN=$(LOCALBIN) go install $${package} ;\ +mv $(1) $(1)-$(3) ;\ +} ;\ +ln -sf $(1)-$(3) $(1) +endef diff --git a/PROJECT b/PROJECT new file mode 100644 index 00000000..ba0223b2 --- /dev/null +++ b/PROJECT @@ -0,0 +1,20 @@ +# Code generated by tool. DO NOT EDIT. +# This file is used to track the info used to scaffold your project +# and allow the plugins properly work. +# More info: https://book.kubebuilder.io/reference/project-config.html +domain: pathways.domain +layout: +- go.kubebuilder.io/v4 +projectName: pathways-api +repo: pathways-api +resources: +- api: + crdVersion: v1 + namespaced: true + controller: true + domain: pathways.domain + group: pathways-api + kind: PathwaysAPI + path: pathways-api/api/v1 + version: v1 +version: "3" diff --git a/api/v1/groupversion_info.go b/api/v1/groupversion_info.go new file mode 100644 index 00000000..ac0fdacf --- /dev/null +++ b/api/v1/groupversion_info.go @@ -0,0 +1,36 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package v1 contains API Schema definitions for the pathways-api v1 API group +// +kubebuilder:object:generate=true +// +groupName=pathways-api.pathways.domain +package v1 + +import ( + "k8s.io/apimachinery/pkg/runtime/schema" + "sigs.k8s.io/controller-runtime/pkg/scheme" +) + +var ( + // GroupVersion is group version used to register these objects + GroupVersion = schema.GroupVersion{Group: "pathways-api.pathways.domain", Version: "v1"} + + // SchemeBuilder is used to add go types to the GroupVersionKind scheme + SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion} + + // AddToScheme adds the types in this group-version to the given scheme. + AddToScheme = SchemeBuilder.AddToScheme +) diff --git a/api/v1/pathwaysapi_types.go b/api/v1/pathwaysapi_types.go new file mode 100644 index 00000000..e9eb0c04 --- /dev/null +++ b/api/v1/pathwaysapi_types.go @@ -0,0 +1,64 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN! +// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized. + +// PathwaysAPISpec defines the desired state of PathwaysAPI +type PathwaysAPISpec struct { + // INSERT ADDITIONAL SPEC FIELDS - desired state of cluster + // Important: Run "make" to regenerate code after modifying this file + + // Foo is an example field of PathwaysAPI. Edit pathwaysapi_types.go to remove/update + Foo string `json:"foo,omitempty"` +} + +// PathwaysAPIStatus defines the observed state of PathwaysAPI +type PathwaysAPIStatus struct { + // INSERT ADDITIONAL STATUS FIELD - define observed state of cluster + // Important: Run "make" to regenerate code after modifying this file +} + +// +kubebuilder:object:root=true +// +kubebuilder:subresource:status + +// PathwaysAPI is the Schema for the pathwaysapis API +type PathwaysAPI struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec PathwaysAPISpec `json:"spec,omitempty"` + Status PathwaysAPIStatus `json:"status,omitempty"` +} + +// +kubebuilder:object:root=true + +// PathwaysAPIList contains a list of PathwaysAPI +type PathwaysAPIList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []PathwaysAPI `json:"items"` +} + +func init() { + SchemeBuilder.Register(&PathwaysAPI{}, &PathwaysAPIList{}) +} diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go new file mode 100644 index 00000000..506b4f2d --- /dev/null +++ b/api/v1/zz_generated.deepcopy.go @@ -0,0 +1,114 @@ +//go:build !ignore_autogenerated + +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Code generated by controller-gen. DO NOT EDIT. + +package v1 + +import ( + runtime "k8s.io/apimachinery/pkg/runtime" +) + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *PathwaysAPI) DeepCopyInto(out *PathwaysAPI) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + out.Spec = in.Spec + out.Status = in.Status +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PathwaysAPI. +func (in *PathwaysAPI) DeepCopy() *PathwaysAPI { + if in == nil { + return nil + } + out := new(PathwaysAPI) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *PathwaysAPI) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *PathwaysAPIList) DeepCopyInto(out *PathwaysAPIList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]PathwaysAPI, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PathwaysAPIList. +func (in *PathwaysAPIList) DeepCopy() *PathwaysAPIList { + if in == nil { + return nil + } + out := new(PathwaysAPIList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *PathwaysAPIList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *PathwaysAPISpec) DeepCopyInto(out *PathwaysAPISpec) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PathwaysAPISpec. +func (in *PathwaysAPISpec) DeepCopy() *PathwaysAPISpec { + if in == nil { + return nil + } + out := new(PathwaysAPISpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *PathwaysAPIStatus) DeepCopyInto(out *PathwaysAPIStatus) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PathwaysAPIStatus. +func (in *PathwaysAPIStatus) DeepCopy() *PathwaysAPIStatus { + if in == nil { + return nil + } + out := new(PathwaysAPIStatus) + in.DeepCopyInto(out) + return out +} diff --git a/cmd/main.go b/cmd/main.go new file mode 100644 index 00000000..ddd338e0 --- /dev/null +++ b/cmd/main.go @@ -0,0 +1,170 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package main + +import ( + "crypto/tls" + "flag" + "os" + + // Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.) + // to ensure that exec-entrypoint and run can make use of them. + _ "k8s.io/client-go/plugin/pkg/client/auth" + + "k8s.io/apimachinery/pkg/runtime" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + clientgoscheme "k8s.io/client-go/kubernetes/scheme" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/healthz" + "sigs.k8s.io/controller-runtime/pkg/log/zap" + "sigs.k8s.io/controller-runtime/pkg/metrics/filters" + metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" + "sigs.k8s.io/controller-runtime/pkg/webhook" + + pathwaysapiv1 "pathways-api/api/v1" + "pathways-api/internal/controller" + // +kubebuilder:scaffold:imports +) + +var ( + scheme = runtime.NewScheme() + setupLog = ctrl.Log.WithName("setup") +) + +func init() { + utilruntime.Must(clientgoscheme.AddToScheme(scheme)) + + utilruntime.Must(pathwaysapiv1.AddToScheme(scheme)) + // +kubebuilder:scaffold:scheme +} + +func main() { + var metricsAddr string + var enableLeaderElection bool + var probeAddr string + var secureMetrics bool + var enableHTTP2 bool + var tlsOpts []func(*tls.Config) + flag.StringVar(&metricsAddr, "metrics-bind-address", "0", "The address the metrics endpoint binds to. "+ + "Use :8443 for HTTPS or :8080 for HTTP, or leave as 0 to disable the metrics service.") + flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.") + flag.BoolVar(&enableLeaderElection, "leader-elect", false, + "Enable leader election for controller manager. "+ + "Enabling this will ensure there is only one active controller manager.") + flag.BoolVar(&secureMetrics, "metrics-secure", true, + "If set, the metrics endpoint is served securely via HTTPS. Use --metrics-secure=false to use HTTP instead.") + flag.BoolVar(&enableHTTP2, "enable-http2", false, + "If set, HTTP/2 will be enabled for the metrics and webhook servers") + opts := zap.Options{ + Development: true, + } + opts.BindFlags(flag.CommandLine) + flag.Parse() + + ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts))) + + // if the enable-http2 flag is false (the default), http/2 should be disabled + // due to its vulnerabilities. More specifically, disabling http/2 will + // prevent from being vulnerable to the HTTP/2 Stream Cancellation and + // Rapid Reset CVEs. For more information see: + // - https://github.com/advisories/GHSA-qppj-fm5r-hxr3 + // - https://github.com/advisories/GHSA-4374-p667-p6c8 + disableHTTP2 := func(c *tls.Config) { + setupLog.Info("disabling http/2") + c.NextProtos = []string{"http/1.1"} + } + + if !enableHTTP2 { + tlsOpts = append(tlsOpts, disableHTTP2) + } + + webhookServer := webhook.NewServer(webhook.Options{ + TLSOpts: tlsOpts, + }) + + // Metrics endpoint is enabled in 'config/default/kustomization.yaml'. The Metrics options configure the server. + // More info: + // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.18.4/pkg/metrics/server + // - https://book.kubebuilder.io/reference/metrics.html + metricsServerOptions := metricsserver.Options{ + BindAddress: metricsAddr, + SecureServing: secureMetrics, + // TODO(user): TLSOpts is used to allow configuring the TLS config used for the server. If certificates are + // not provided, self-signed certificates will be generated by default. This option is not recommended for + // production environments as self-signed certificates do not offer the same level of trust and security + // as certificates issued by a trusted Certificate Authority (CA). The primary risk is potentially allowing + // unauthorized access to sensitive metrics data. Consider replacing with CertDir, CertName, and KeyName + // to provide certificates, ensuring the server communicates using trusted and secure certificates. + TLSOpts: tlsOpts, + } + + if secureMetrics { + // FilterProvider is used to protect the metrics endpoint with authn/authz. + // These configurations ensure that only authorized users and service accounts + // can access the metrics endpoint. The RBAC are configured in 'config/rbac/kustomization.yaml'. More info: + // https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.18.4/pkg/metrics/filters#WithAuthenticationAndAuthorization + metricsServerOptions.FilterProvider = filters.WithAuthenticationAndAuthorization + } + + mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{ + Scheme: scheme, + Metrics: metricsServerOptions, + WebhookServer: webhookServer, + HealthProbeBindAddress: probeAddr, + LeaderElection: enableLeaderElection, + LeaderElectionID: "12183607.pathways.domain", + // LeaderElectionReleaseOnCancel defines if the leader should step down voluntarily + // when the Manager ends. This requires the binary to immediately end when the + // Manager is stopped, otherwise, this setting is unsafe. Setting this significantly + // speeds up voluntary leader transitions as the new leader don't have to wait + // LeaseDuration time first. + // + // In the default scaffold provided, the program ends immediately after + // the manager stops, so would be fine to enable this option. However, + // if you are doing or is intended to do any operation such as perform cleanups + // after the manager stops then its usage might be unsafe. + // LeaderElectionReleaseOnCancel: true, + }) + if err != nil { + setupLog.Error(err, "unable to start manager") + os.Exit(1) + } + + if err = (&controller.PathwaysAPIReconciler{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + }).SetupWithManager(mgr); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "PathwaysAPI") + os.Exit(1) + } + // +kubebuilder:scaffold:builder + + if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil { + setupLog.Error(err, "unable to set up health check") + os.Exit(1) + } + if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil { + setupLog.Error(err, "unable to set up ready check") + os.Exit(1) + } + + setupLog.Info("starting manager") + if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil { + setupLog.Error(err, "problem running manager") + os.Exit(1) + } +} diff --git a/config/crd/bases/pathways-api.pathways.domain_pathwaysapis.yaml b/config/crd/bases/pathways-api.pathways.domain_pathwaysapis.yaml new file mode 100644 index 00000000..447d12e2 --- /dev/null +++ b/config/crd/bases/pathways-api.pathways.domain_pathwaysapis.yaml @@ -0,0 +1,54 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.15.0 + name: pathwaysapis.pathways-api.pathways.domain +spec: + group: pathways-api.pathways.domain + names: + kind: PathwaysAPI + listKind: PathwaysAPIList + plural: pathwaysapis + singular: pathwaysapi + scope: Namespaced + versions: + - name: v1 + schema: + openAPIV3Schema: + description: PathwaysAPI is the Schema for the pathwaysapis API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: PathwaysAPISpec defines the desired state of PathwaysAPI + properties: + foo: + description: Foo is an example field of PathwaysAPI. Edit pathwaysapi_types.go + to remove/update + type: string + type: object + status: + description: PathwaysAPIStatus defines the observed state of PathwaysAPI + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/config/crd/kustomization.yaml b/config/crd/kustomization.yaml new file mode 100644 index 00000000..4bbbd663 --- /dev/null +++ b/config/crd/kustomization.yaml @@ -0,0 +1,22 @@ +# This kustomization.yaml is not intended to be run by itself, +# since it depends on service name and namespace that are out of this kustomize package. +# It should be run by config/default +resources: +- bases/pathways-api.pathways.domain_pathwaysapis.yaml +# +kubebuilder:scaffold:crdkustomizeresource + +patches: +# [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix. +# patches here are for enabling the conversion webhook for each CRD +# +kubebuilder:scaffold:crdkustomizewebhookpatch + +# [CERTMANAGER] To enable cert-manager, uncomment all the sections with [CERTMANAGER] prefix. +# patches here are for enabling the CA injection for each CRD +#- path: patches/cainjection_in_pathwaysapis.yaml +# +kubebuilder:scaffold:crdkustomizecainjectionpatch + +# [WEBHOOK] To enable webhook, uncomment the following section +# the following config is for teaching kustomize how to do kustomization for CRDs. + +#configurations: +#- kustomizeconfig.yaml diff --git a/config/crd/kustomizeconfig.yaml b/config/crd/kustomizeconfig.yaml new file mode 100644 index 00000000..ec5c150a --- /dev/null +++ b/config/crd/kustomizeconfig.yaml @@ -0,0 +1,19 @@ +# This file is for teaching kustomize how to substitute name and namespace reference in CRD +nameReference: +- kind: Service + version: v1 + fieldSpecs: + - kind: CustomResourceDefinition + version: v1 + group: apiextensions.k8s.io + path: spec/conversion/webhook/clientConfig/service/name + +namespace: +- kind: CustomResourceDefinition + version: v1 + group: apiextensions.k8s.io + path: spec/conversion/webhook/clientConfig/service/namespace + create: false + +varReference: +- path: metadata/annotations diff --git a/config/default/kustomization.yaml b/config/default/kustomization.yaml new file mode 100644 index 00000000..78841639 --- /dev/null +++ b/config/default/kustomization.yaml @@ -0,0 +1,146 @@ +# Adds namespace to all resources. +namespace: pathways-api-system + +# Value of this field is prepended to the +# names of all resources, e.g. a deployment named +# "wordpress" becomes "alices-wordpress". +# Note that it should also match with the prefix (text before '-') of the namespace +# field above. +namePrefix: pathways-api- + +# Labels to add to all resources and selectors. +#labels: +#- includeSelectors: true +# pairs: +# someName: someValue + +resources: +- ../crd +- ../rbac +- ../manager +# [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in +# crd/kustomization.yaml +#- ../webhook +# [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER'. 'WEBHOOK' components are required. +#- ../certmanager +# [PROMETHEUS] To enable prometheus monitor, uncomment all sections with 'PROMETHEUS'. +#- ../prometheus +# [METRICS] Expose the controller manager metrics service. +- metrics_service.yaml + +# Uncomment the patches line if you enable Metrics, and/or are using webhooks and cert-manager +patches: +# [METRICS] The following patch will enable the metrics endpoint using HTTPS and the port :8443. +# More info: https://book.kubebuilder.io/reference/metrics +- path: manager_metrics_patch.yaml + target: + kind: Deployment + +# [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in +# crd/kustomization.yaml +#- path: manager_webhook_patch.yaml + +# [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER'. +# Uncomment 'CERTMANAGER' sections in crd/kustomization.yaml to enable the CA injection in the admission webhooks. +# 'CERTMANAGER' needs to be enabled to use ca injection +#- path: webhookcainjection_patch.yaml + +# [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER' prefix. +# Uncomment the following replacements to add the cert-manager CA injection annotations +#replacements: +# - source: # Add cert-manager annotation to ValidatingWebhookConfiguration, MutatingWebhookConfiguration and CRDs +# kind: Certificate +# group: cert-manager.io +# version: v1 +# name: serving-cert # this name should match the one in certificate.yaml +# fieldPath: .metadata.namespace # namespace of the certificate CR +# targets: +# - select: +# kind: ValidatingWebhookConfiguration +# fieldPaths: +# - .metadata.annotations.[cert-manager.io/inject-ca-from] +# options: +# delimiter: '/' +# index: 0 +# create: true +# - select: +# kind: MutatingWebhookConfiguration +# fieldPaths: +# - .metadata.annotations.[cert-manager.io/inject-ca-from] +# options: +# delimiter: '/' +# index: 0 +# create: true +# - select: +# kind: CustomResourceDefinition +# fieldPaths: +# - .metadata.annotations.[cert-manager.io/inject-ca-from] +# options: +# delimiter: '/' +# index: 0 +# create: true +# - source: +# kind: Certificate +# group: cert-manager.io +# version: v1 +# name: serving-cert # this name should match the one in certificate.yaml +# fieldPath: .metadata.name +# targets: +# - select: +# kind: ValidatingWebhookConfiguration +# fieldPaths: +# - .metadata.annotations.[cert-manager.io/inject-ca-from] +# options: +# delimiter: '/' +# index: 1 +# create: true +# - select: +# kind: MutatingWebhookConfiguration +# fieldPaths: +# - .metadata.annotations.[cert-manager.io/inject-ca-from] +# options: +# delimiter: '/' +# index: 1 +# create: true +# - select: +# kind: CustomResourceDefinition +# fieldPaths: +# - .metadata.annotations.[cert-manager.io/inject-ca-from] +# options: +# delimiter: '/' +# index: 1 +# create: true +# - source: # Add cert-manager annotation to the webhook Service +# kind: Service +# version: v1 +# name: webhook-service +# fieldPath: .metadata.name # namespace of the service +# targets: +# - select: +# kind: Certificate +# group: cert-manager.io +# version: v1 +# fieldPaths: +# - .spec.dnsNames.0 +# - .spec.dnsNames.1 +# options: +# delimiter: '.' +# index: 0 +# create: true +# - source: +# kind: Service +# version: v1 +# name: webhook-service +# fieldPath: .metadata.namespace # namespace of the service +# targets: +# - select: +# kind: Certificate +# group: cert-manager.io +# version: v1 +# fieldPaths: +# - .spec.dnsNames.0 +# - .spec.dnsNames.1 +# options: +# delimiter: '.' +# index: 1 +# create: true diff --git a/config/default/manager_metrics_patch.yaml b/config/default/manager_metrics_patch.yaml new file mode 100644 index 00000000..2aaef653 --- /dev/null +++ b/config/default/manager_metrics_patch.yaml @@ -0,0 +1,4 @@ +# This patch adds the args to allow exposing the metrics endpoint using HTTPS +- op: add + path: /spec/template/spec/containers/0/args/0 + value: --metrics-bind-address=:8443 diff --git a/config/default/metrics_service.yaml b/config/default/metrics_service.yaml new file mode 100644 index 00000000..9b4019a7 --- /dev/null +++ b/config/default/metrics_service.yaml @@ -0,0 +1,17 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + control-plane: controller-manager + app.kubernetes.io/name: pathways-api + app.kubernetes.io/managed-by: kustomize + name: controller-manager-metrics-service + namespace: system +spec: + ports: + - name: https + port: 8443 + protocol: TCP + targetPort: 8443 + selector: + control-plane: controller-manager diff --git a/config/manager/kustomization.yaml b/config/manager/kustomization.yaml new file mode 100644 index 00000000..5c5f0b84 --- /dev/null +++ b/config/manager/kustomization.yaml @@ -0,0 +1,2 @@ +resources: +- manager.yaml diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml new file mode 100644 index 00000000..3eac4556 --- /dev/null +++ b/config/manager/manager.yaml @@ -0,0 +1,95 @@ +apiVersion: v1 +kind: Namespace +metadata: + labels: + control-plane: controller-manager + app.kubernetes.io/name: pathways-api + app.kubernetes.io/managed-by: kustomize + name: system +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: controller-manager + namespace: system + labels: + control-plane: controller-manager + app.kubernetes.io/name: pathways-api + app.kubernetes.io/managed-by: kustomize +spec: + selector: + matchLabels: + control-plane: controller-manager + replicas: 1 + template: + metadata: + annotations: + kubectl.kubernetes.io/default-container: manager + labels: + control-plane: controller-manager + spec: + # TODO(user): Uncomment the following code to configure the nodeAffinity expression + # according to the platforms which are supported by your solution. + # It is considered best practice to support multiple architectures. You can + # build your manager image using the makefile target docker-buildx. + # affinity: + # nodeAffinity: + # requiredDuringSchedulingIgnoredDuringExecution: + # nodeSelectorTerms: + # - matchExpressions: + # - key: kubernetes.io/arch + # operator: In + # values: + # - amd64 + # - arm64 + # - ppc64le + # - s390x + # - key: kubernetes.io/os + # operator: In + # values: + # - linux + securityContext: + runAsNonRoot: true + # TODO(user): For common cases that do not require escalating privileges + # it is recommended to ensure that all your Pods/Containers are restrictive. + # More info: https://kubernetes.io/docs/concepts/security/pod-security-standards/#restricted + # Please uncomment the following code if your project does NOT have to work on old Kubernetes + # versions < 1.19 or on vendors versions which do NOT support this field by default (i.e. Openshift < 4.11 ). + # seccompProfile: + # type: RuntimeDefault + containers: + - command: + - /manager + args: + - --leader-elect + - --health-probe-bind-address=:8081 + image: controller:latest + name: manager + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - "ALL" + livenessProbe: + httpGet: + path: /healthz + port: 8081 + initialDelaySeconds: 15 + periodSeconds: 20 + readinessProbe: + httpGet: + path: /readyz + port: 8081 + initialDelaySeconds: 5 + periodSeconds: 10 + # TODO(user): Configure the resources accordingly based on the project requirements. + # More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + resources: + limits: + cpu: 500m + memory: 128Mi + requests: + cpu: 10m + memory: 64Mi + serviceAccountName: controller-manager + terminationGracePeriodSeconds: 10 diff --git a/config/prometheus/kustomization.yaml b/config/prometheus/kustomization.yaml new file mode 100644 index 00000000..ed137168 --- /dev/null +++ b/config/prometheus/kustomization.yaml @@ -0,0 +1,2 @@ +resources: +- monitor.yaml diff --git a/config/prometheus/monitor.yaml b/config/prometheus/monitor.yaml new file mode 100644 index 00000000..81f32879 --- /dev/null +++ b/config/prometheus/monitor.yaml @@ -0,0 +1,30 @@ +# Prometheus Monitor Service (Metrics) +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + control-plane: controller-manager + app.kubernetes.io/name: pathways-api + app.kubernetes.io/managed-by: kustomize + name: controller-manager-metrics-monitor + namespace: system +spec: + endpoints: + - path: /metrics + port: https # Ensure this is the name of the port that exposes HTTPS metrics + scheme: https + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + tlsConfig: + # TODO(user): The option insecureSkipVerify: true is not recommended for production since it disables + # certificate verification. This poses a significant security risk by making the system vulnerable to + # man-in-the-middle attacks, where an attacker could intercept and manipulate the communication between + # Prometheus and the monitored services. This could lead to unauthorized access to sensitive metrics data, + # compromising the integrity and confidentiality of the information. + # Please use the following options for secure configurations: + # caFile: /etc/metrics-certs/ca.crt + # certFile: /etc/metrics-certs/tls.crt + # keyFile: /etc/metrics-certs/tls.key + insecureSkipVerify: true + selector: + matchLabels: + control-plane: controller-manager diff --git a/config/rbac/kustomization.yaml b/config/rbac/kustomization.yaml new file mode 100644 index 00000000..7f4a0a98 --- /dev/null +++ b/config/rbac/kustomization.yaml @@ -0,0 +1,27 @@ +resources: +# All RBAC will be applied under this service account in +# the deployment namespace. You may comment out this resource +# if your manager will use a service account that exists at +# runtime. Be sure to update RoleBinding and ClusterRoleBinding +# subjects if changing service account names. +- service_account.yaml +- role.yaml +- role_binding.yaml +- leader_election_role.yaml +- leader_election_role_binding.yaml +# The following RBAC configurations are used to protect +# the metrics endpoint with authn/authz. These configurations +# ensure that only authorized users and service accounts +# can access the metrics endpoint. Comment the following +# permissions if you want to disable this protection. +# More info: https://book.kubebuilder.io/reference/metrics.html +- metrics_auth_role.yaml +- metrics_auth_role_binding.yaml +- metrics_reader_role.yaml +# For each CRD, "Editor" and "Viewer" roles are scaffolded by +# default, aiding admins in cluster management. Those roles are +# not used by the Project itself. You can comment the following lines +# if you do not want those helpers be installed with your Project. +- pathwaysapi_editor_role.yaml +- pathwaysapi_viewer_role.yaml + diff --git a/config/rbac/leader_election_role.yaml b/config/rbac/leader_election_role.yaml new file mode 100644 index 00000000..0e759313 --- /dev/null +++ b/config/rbac/leader_election_role.yaml @@ -0,0 +1,40 @@ +# permissions to do leader election. +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + labels: + app.kubernetes.io/name: pathways-api + app.kubernetes.io/managed-by: kustomize + name: leader-election-role +rules: +- apiGroups: + - "" + resources: + - configmaps + verbs: + - get + - list + - watch + - create + - update + - patch + - delete +- apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - get + - list + - watch + - create + - update + - patch + - delete +- apiGroups: + - "" + resources: + - events + verbs: + - create + - patch diff --git a/config/rbac/leader_election_role_binding.yaml b/config/rbac/leader_election_role_binding.yaml new file mode 100644 index 00000000..f59fe83d --- /dev/null +++ b/config/rbac/leader_election_role_binding.yaml @@ -0,0 +1,15 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + labels: + app.kubernetes.io/name: pathways-api + app.kubernetes.io/managed-by: kustomize + name: leader-election-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: leader-election-role +subjects: +- kind: ServiceAccount + name: controller-manager + namespace: system diff --git a/config/rbac/metrics_auth_role.yaml b/config/rbac/metrics_auth_role.yaml new file mode 100644 index 00000000..32d2e4ec --- /dev/null +++ b/config/rbac/metrics_auth_role.yaml @@ -0,0 +1,17 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: metrics-auth-role +rules: +- apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create +- apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create diff --git a/config/rbac/metrics_auth_role_binding.yaml b/config/rbac/metrics_auth_role_binding.yaml new file mode 100644 index 00000000..e775d67f --- /dev/null +++ b/config/rbac/metrics_auth_role_binding.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: metrics-auth-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: metrics-auth-role +subjects: +- kind: ServiceAccount + name: controller-manager + namespace: system diff --git a/config/rbac/metrics_reader_role.yaml b/config/rbac/metrics_reader_role.yaml new file mode 100644 index 00000000..51a75db4 --- /dev/null +++ b/config/rbac/metrics_reader_role.yaml @@ -0,0 +1,9 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: metrics-reader +rules: +- nonResourceURLs: + - "/metrics" + verbs: + - get diff --git a/config/rbac/pathwaysapi_editor_role.yaml b/config/rbac/pathwaysapi_editor_role.yaml new file mode 100644 index 00000000..a480dcd3 --- /dev/null +++ b/config/rbac/pathwaysapi_editor_role.yaml @@ -0,0 +1,27 @@ +# permissions for end users to edit pathwaysapis. +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/name: pathways-api + app.kubernetes.io/managed-by: kustomize + name: pathwaysapi-editor-role +rules: +- apiGroups: + - pathways-api.pathways.domain + resources: + - pathwaysapis + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - pathways-api.pathways.domain + resources: + - pathwaysapis/status + verbs: + - get diff --git a/config/rbac/pathwaysapi_viewer_role.yaml b/config/rbac/pathwaysapi_viewer_role.yaml new file mode 100644 index 00000000..b1e6c442 --- /dev/null +++ b/config/rbac/pathwaysapi_viewer_role.yaml @@ -0,0 +1,23 @@ +# permissions for end users to view pathwaysapis. +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/name: pathways-api + app.kubernetes.io/managed-by: kustomize + name: pathwaysapi-viewer-role +rules: +- apiGroups: + - pathways-api.pathways.domain + resources: + - pathwaysapis + verbs: + - get + - list + - watch +- apiGroups: + - pathways-api.pathways.domain + resources: + - pathwaysapis/status + verbs: + - get diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml new file mode 100644 index 00000000..77393382 --- /dev/null +++ b/config/rbac/role.yaml @@ -0,0 +1,32 @@ +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: manager-role +rules: +- apiGroups: + - pathways-api.pathways.domain + resources: + - pathwaysapis + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - pathways-api.pathways.domain + resources: + - pathwaysapis/finalizers + verbs: + - update +- apiGroups: + - pathways-api.pathways.domain + resources: + - pathwaysapis/status + verbs: + - get + - patch + - update diff --git a/config/rbac/role_binding.yaml b/config/rbac/role_binding.yaml new file mode 100644 index 00000000..afe22ac5 --- /dev/null +++ b/config/rbac/role_binding.yaml @@ -0,0 +1,15 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + app.kubernetes.io/name: pathways-api + app.kubernetes.io/managed-by: kustomize + name: manager-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: manager-role +subjects: +- kind: ServiceAccount + name: controller-manager + namespace: system diff --git a/config/rbac/service_account.yaml b/config/rbac/service_account.yaml new file mode 100644 index 00000000..706311da --- /dev/null +++ b/config/rbac/service_account.yaml @@ -0,0 +1,8 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/name: pathways-api + app.kubernetes.io/managed-by: kustomize + name: controller-manager + namespace: system diff --git a/config/samples/kustomization.yaml b/config/samples/kustomization.yaml new file mode 100644 index 00000000..1c602273 --- /dev/null +++ b/config/samples/kustomization.yaml @@ -0,0 +1,4 @@ +## Append samples of your project ## +resources: +- pathways-api_v1_pathwaysapi.yaml +# +kubebuilder:scaffold:manifestskustomizesamples diff --git a/config/samples/pathways-api_v1_pathwaysapi.yaml b/config/samples/pathways-api_v1_pathwaysapi.yaml new file mode 100644 index 00000000..3cf0a1d9 --- /dev/null +++ b/config/samples/pathways-api_v1_pathwaysapi.yaml @@ -0,0 +1,9 @@ +apiVersion: pathways-api.pathways.domain/v1 +kind: PathwaysAPI +metadata: + labels: + app.kubernetes.io/name: pathways-api + app.kubernetes.io/managed-by: kustomize + name: pathwaysapi-sample +spec: + # TODO(user): Add fields here diff --git a/go.mod b/go.mod new file mode 100644 index 00000000..518400cf --- /dev/null +++ b/go.mod @@ -0,0 +1,95 @@ +module pathways-api + +go 1.22.0 + +require ( + github.com/onsi/ginkgo/v2 v2.17.1 + github.com/onsi/gomega v1.32.0 + k8s.io/apimachinery v0.30.1 + k8s.io/client-go v0.30.1 + sigs.k8s.io/controller-runtime v0.18.4 +) + +require ( + github.com/antlr/antlr4/runtime/Go/antlr/v4 v4.0.0-20230305170008-8188dc5388df // indirect + github.com/beorn7/perks v1.0.1 // indirect + github.com/blang/semver/v4 v4.0.0 // indirect + github.com/cenkalti/backoff/v4 v4.2.1 // indirect + github.com/cespare/xxhash/v2 v2.2.0 // indirect + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/emicklei/go-restful/v3 v3.11.0 // indirect + github.com/evanphx/json-patch/v5 v5.9.0 // indirect + github.com/felixge/httpsnoop v1.0.3 // indirect + github.com/fsnotify/fsnotify v1.7.0 // indirect + github.com/go-logr/logr v1.4.1 // indirect + github.com/go-logr/stdr v1.2.2 // indirect + github.com/go-logr/zapr v1.3.0 // indirect + github.com/go-openapi/jsonpointer v0.19.6 // indirect + github.com/go-openapi/jsonreference v0.20.2 // indirect + github.com/go-openapi/swag v0.22.3 // indirect + github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect + github.com/gogo/protobuf v1.3.2 // indirect + github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect + github.com/golang/protobuf v1.5.4 // indirect + github.com/google/cel-go v0.17.8 // indirect + github.com/google/gnostic-models v0.6.8 // indirect + github.com/google/go-cmp v0.6.0 // indirect + github.com/google/gofuzz v1.2.0 // indirect + github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1 // indirect + github.com/google/uuid v1.3.0 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0 // indirect + github.com/imdario/mergo v0.3.6 // indirect + github.com/josharian/intern v1.0.0 // indirect + github.com/json-iterator/go v1.1.12 // indirect + github.com/mailru/easyjson v0.7.7 // indirect + github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.2 // indirect + github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect + github.com/pkg/errors v0.9.1 // indirect + github.com/prometheus/client_golang v1.16.0 // indirect + github.com/prometheus/client_model v0.4.0 // indirect + github.com/prometheus/common v0.44.0 // indirect + github.com/prometheus/procfs v0.12.0 // indirect + github.com/spf13/pflag v1.0.5 // indirect + github.com/stoewer/go-strcase v1.2.0 // indirect + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.44.0 // indirect + go.opentelemetry.io/otel v1.19.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.19.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.19.0 // indirect + go.opentelemetry.io/otel/metric v1.19.0 // indirect + go.opentelemetry.io/otel/sdk v1.19.0 // indirect + go.opentelemetry.io/otel/trace v1.19.0 // indirect + go.opentelemetry.io/proto/otlp v1.0.0 // indirect + go.uber.org/multierr v1.11.0 // indirect + go.uber.org/zap v1.26.0 // indirect + golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e // indirect + golang.org/x/net v0.23.0 // indirect + golang.org/x/oauth2 v0.12.0 // indirect + golang.org/x/sync v0.6.0 // indirect + golang.org/x/sys v0.18.0 // indirect + golang.org/x/term v0.18.0 // indirect + golang.org/x/text v0.14.0 // indirect + golang.org/x/time v0.3.0 // indirect + golang.org/x/tools v0.18.0 // indirect + gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect + google.golang.org/appengine v1.6.7 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20230726155614-23370e0ffb3e // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d // indirect + google.golang.org/grpc v1.58.3 // indirect + google.golang.org/protobuf v1.33.0 // indirect + gopkg.in/inf.v0 v0.9.1 // indirect + gopkg.in/yaml.v2 v2.4.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect + k8s.io/api v0.30.1 // indirect + k8s.io/apiextensions-apiserver v0.30.1 // indirect + k8s.io/apiserver v0.30.1 // indirect + k8s.io/component-base v0.30.1 // indirect + k8s.io/klog/v2 v2.120.1 // indirect + k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 // indirect + k8s.io/utils v0.0.0-20230726121419-3b25d923346b // indirect + sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.29.0 // indirect + sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect + sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect + sigs.k8s.io/yaml v1.3.0 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 00000000..9ce9c0dd --- /dev/null +++ b/go.sum @@ -0,0 +1,253 @@ +github.com/antlr/antlr4/runtime/Go/antlr/v4 v4.0.0-20230305170008-8188dc5388df h1:7RFfzj4SSt6nnvCPbCqijJi1nWCd+TqAT3bYCStRC18= +github.com/antlr/antlr4/runtime/Go/antlr/v4 v4.0.0-20230305170008-8188dc5388df/go.mod h1:pSwJ0fSY5KhvocuWSx4fz3BA8OrA1bQn+K1Eli3BRwM= +github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= +github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM= +github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ= +github.com/cenkalti/backoff/v4 v4.2.1 h1:y4OZtCnogmCPw98Zjyt5a6+QwPLGkiQsYW5oUqylYbM= +github.com/cenkalti/backoff/v4 v4.2.1/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= +github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= +github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= +github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= +github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= +github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g= +github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= +github.com/evanphx/json-patch v4.12.0+incompatible h1:4onqiflcdA9EOZ4RxV643DvftH5pOlLGNtQ5lPWQu84= +github.com/evanphx/json-patch v4.12.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= +github.com/evanphx/json-patch/v5 v5.9.0 h1:kcBlZQbplgElYIlo/n1hJbls2z/1awpXxpRi0/FOJfg= +github.com/evanphx/json-patch/v5 v5.9.0/go.mod h1:VNkHZ/282BpEyt/tObQO8s5CMPmYYq14uClGH4abBuQ= +github.com/felixge/httpsnoop v1.0.3 h1:s/nj+GCswXYzN5v2DpNMuMQYe+0DDwt5WVCU6CWBdXk= +github.com/felixge/httpsnoop v1.0.3/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= +github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= +github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ= +github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= +github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ= +github.com/go-logr/zapr v1.3.0/go.mod h1:YKepepNBd1u/oyhd/yQmtjVXmm9uML4IXUgMOwR8/Gg= +github.com/go-openapi/jsonpointer v0.19.6 h1:eCs3fxoIi3Wh6vtgmLTOjdhSpiqphQ+DaPn38N2ZdrE= +github.com/go-openapi/jsonpointer v0.19.6/go.mod h1:osyAmYz/mB/C3I+WsTTSgw1ONzaLJoLCyoi6/zppojs= +github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2KvnJRumpMGbE= +github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En5Ap4rVB5KVcIDZG2k= +github.com/go-openapi/swag v0.22.3 h1:yMBqmnQ0gyZvEb/+KzuWZOXgllrXT4SADYbvDaXHv/g= +github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14= +github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI= +github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls= +github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= +github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= +github.com/golang/glog v1.1.0 h1:/d3pCKDPWNnvIWe0vVUpNP32qc8U3PDVxySP/y360qE= +github.com/golang/glog v1.1.0/go.mod h1:pfYeQZ3JWZoXTV5sFc986z3HTpwQs9At6P4ImfuP3NQ= +github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= +github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= +github.com/google/cel-go v0.17.8 h1:j9m730pMZt1Fc4oKhCLUHfjj6527LuhYcYw0Rl8gqto= +github.com/google/cel-go v0.17.8/go.mod h1:HXZKzB0LXqer5lHHgfWAnlYwJaQBDKMjxjulNQzhwhY= +github.com/google/gnostic-models v0.6.8 h1:yo/ABAfM5IMRsS1VnXjTBvUb61tFIHozhlYvRgGre9I= +github.com/google/gnostic-models v0.6.8/go.mod h1:5n7qKqH0f5wFt+aWF8CW6pZLLNOfYuF5OpfBSENuI8U= +github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= +github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1 h1:K6RDEckDVWvDI9JAJYCmNdQXq6neHJOYx3V6jnqNEec= +github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= +github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= +github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0 h1:YBftPWNWd4WwGqtY2yeZL2ef8rHAxPBD8KFhJpmcqms= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0/go.mod h1:YN5jB8ie0yfIUg6VvR9Kz84aCaG7AsGZnLjhHbUqwPg= +github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= +github.com/imdario/mergo v0.3.6 h1:xTNEAn+kxVO7dTZGu0CegyqKZmoWFI0rF8UxjlB2d28= +github.com/imdario/mergo v0.3.6/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA= +github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= +github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= +github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= +github.com/matttproud/golang_protobuf_extensions v1.0.4 h1:mmDVorXM7PCGKw94cs5zkfA9PSy5pEvNWRP0ET0TIVo= +github.com/matttproud/golang_protobuf_extensions v1.0.4/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= +github.com/onsi/ginkgo/v2 v2.17.1 h1:V++EzdbhI4ZV4ev0UTIj0PzhzOcReJFyJaLjtSF55M8= +github.com/onsi/ginkgo/v2 v2.17.1/go.mod h1:llBI3WDLL9Z6taip6f33H76YcWtJv+7R3HigUjbIBOs= +github.com/onsi/gomega v1.32.0 h1:JRYU78fJ1LPxlckP6Txi/EYqJvjtMrDC04/MM5XRHPk= +github.com/onsi/gomega v1.32.0/go.mod h1:a4x4gW6Pz2yK1MAmvluYme5lvYTn61afQ2ETw/8n4Lg= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/client_golang v1.16.0 h1:yk/hx9hDbrGHovbci4BY+pRMfSuuat626eFsHb7tmT8= +github.com/prometheus/client_golang v1.16.0/go.mod h1:Zsulrv/L9oM40tJ7T815tM89lFEugiJ9HzIqaAx4LKc= +github.com/prometheus/client_model v0.4.0 h1:5lQXD3cAg1OXBf4Wq03gTrXHeaV0TQvGfUooCfx1yqY= +github.com/prometheus/client_model v0.4.0/go.mod h1:oMQmHW1/JoDwqLtg57MGgP/Fb1CJEYF2imWWhWtMkYU= +github.com/prometheus/common v0.44.0 h1:+5BrQJwiBB9xsMygAB3TNvpQKOwlkc25LbISbrdOOfY= +github.com/prometheus/common v0.44.0/go.mod h1:ofAIvZbQ1e/nugmZGz4/qCb9Ap1VoSTIO7x0VV9VvuY= +github.com/prometheus/procfs v0.12.0 h1:jluTpSng7V9hY0O2R9DzzJHYb2xULk9VTR1V1R/k6Bo= +github.com/prometheus/procfs v0.12.0/go.mod h1:pcuDEFsWDnvcgNzo4EEweacyhjeA9Zk3cnaOZAZEfOo= +github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= +github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= +github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= +github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/stoewer/go-strcase v1.2.0 h1:Z2iHWqGXH00XYgqDmNgQbIBxf3wrNq0F3feEy0ainaU= +github.com/stoewer/go-strcase v1.2.0/go.mod h1:IBiWB2sKIp3wVVQ3Y035++gc+knqhUQag1KpM8ahLw8= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= +github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.44.0 h1:KfYpVmrjI7JuToy5k8XV3nkapjWx48k4E4JOtVstzQI= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.44.0/go.mod h1:SeQhzAEccGVZVEy7aH87Nh0km+utSpo1pTv6eMMop48= +go.opentelemetry.io/otel v1.19.0 h1:MuS/TNf4/j4IXsZuJegVzI1cwut7Qc00344rgH7p8bs= +go.opentelemetry.io/otel v1.19.0/go.mod h1:i0QyjOq3UPoTzff0PJB2N66fb4S0+rSbSB15/oyH9fY= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.19.0 h1:Mne5On7VWdx7omSrSSZvM4Kw7cS7NQkOOmLcgscI51U= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.19.0/go.mod h1:IPtUMKL4O3tH5y+iXVyAXqpAwMuzC1IrxVS81rummfE= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.19.0 h1:3d+S281UTjM+AbF31XSOYn1qXn3BgIdWl8HNEpx08Jk= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.19.0/go.mod h1:0+KuTDyKL4gjKCF75pHOX4wuzYDUZYfAQdSu43o+Z2I= +go.opentelemetry.io/otel/metric v1.19.0 h1:aTzpGtV0ar9wlV4Sna9sdJyII5jTVJEvKETPiOKwvpE= +go.opentelemetry.io/otel/metric v1.19.0/go.mod h1:L5rUsV9kM1IxCj1MmSdS+JQAcVm319EUrDVLrt7jqt8= +go.opentelemetry.io/otel/sdk v1.19.0 h1:6USY6zH+L8uMH8L3t1enZPR3WFEmSTADlqldyHtJi3o= +go.opentelemetry.io/otel/sdk v1.19.0/go.mod h1:NedEbbS4w3C6zElbLdPJKOpJQOrGUJ+GfzpjUvI0v1A= +go.opentelemetry.io/otel/trace v1.19.0 h1:DFVQmlVbfVeOuBRrwdtaehRrWiL1JoVs9CPIQ1Dzxpg= +go.opentelemetry.io/otel/trace v1.19.0/go.mod h1:mfaSyvGyEJEI0nyV2I4qhNQnbBOUUmYZpYojqMnX2vo= +go.opentelemetry.io/proto/otlp v1.0.0 h1:T0TX0tmXU8a3CbNXzEKGeU5mIVOdf0oykP+u2lIVU/I= +go.opentelemetry.io/proto/otlp v1.0.0/go.mod h1:Sy6pihPLfYHkr3NkUbEhGHFhINUSI/v80hjKIs5JXpM= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= +go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= +go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= +go.uber.org/zap v1.26.0 h1:sI7k6L95XOKS281NhVKOFCUNIvv9e0w4BF8N3u+tCRo= +go.uber.org/zap v1.26.0/go.mod h1:dtElttAiwGvoJ/vj4IwHBS/gXsEu/pZ50mUIRWuG0so= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e h1:+WEEuIdZHnUeJJmEUjyYC2gfUMj69yZXw17EnHg/otA= +golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e/go.mod h1:Kr81I6Kryrl9sr8s2FK3vxD90NdsKWRuOIl2O4CvYbA= +golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs= +golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= +golang.org/x/oauth2 v0.12.0 h1:smVPGxink+n1ZI5pkQa8y6fZT0RW0MgCO5bFpepy4B4= +golang.org/x/oauth2 v0.12.0/go.mod h1:A74bZ3aGXgCY0qaIC9Ahg6Lglin4AMAco8cIv9baba4= +golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.6.0 h1:5BMeUDZ7vkXGfEr1x9B4bRcTH4lpkTkpdh0T/J+qjbQ= +golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4= +golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.18.0 h1:FcHjZXDMxI8mM3nwhX9HlKop4C0YQvCVCdwYl2wOtE8= +golang.org/x/term v0.18.0/go.mod h1:ILwASektA3OnRv7amZ1xhE/KTR+u50pbXfZ03+6Nx58= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= +golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/time v0.3.0 h1:rg5rLMjNzMS1RkNLzCG38eapWhnYLFYXDXj2gOlr8j4= +golang.org/x/time v0.3.0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.18.0 h1:k8NLag8AGHnn+PHbl7g43CtqZAwG60vZkLqgyZgIHgQ= +golang.org/x/tools v0.18.0/go.mod h1:GL7B4CwcLLeo59yx/9UWWuNOW1n3VZ4f5axWfML7Lcg= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gomodules.xyz/jsonpatch/v2 v2.4.0 h1:Ci3iUJyx9UeRx7CeFN8ARgGbkESwJK+KB9lLcWxY/Zw= +gomodules.xyz/jsonpatch/v2 v2.4.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY= +google.golang.org/appengine v1.6.7 h1:FZR1q0exgwxzPzp/aF+VccGrSfxfPpkBqjIIEq3ru6c= +google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= +google.golang.org/genproto v0.0.0-20230803162519-f966b187b2e5 h1:L6iMMGrtzgHsWofoFcihmDEMYeDR9KN/ThbPWGrh++g= +google.golang.org/genproto v0.0.0-20230803162519-f966b187b2e5/go.mod h1:oH/ZOT02u4kWEp7oYBGYFFkCdKS/uYR9Z7+0/xuuFp8= +google.golang.org/genproto/googleapis/api v0.0.0-20230726155614-23370e0ffb3e h1:z3vDksarJxsAKM5dmEGv0GHwE2hKJ096wZra71Vs4sw= +google.golang.org/genproto/googleapis/api v0.0.0-20230726155614-23370e0ffb3e/go.mod h1:rsr7RhLuwsDKL7RmgDDCUc6yaGr1iqceVb5Wv6f6YvQ= +google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d h1:uvYuEyMHKNt+lT4K3bN6fGswmK8qSvcreM3BwjDh+y4= +google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d/go.mod h1:+Bk1OCOj40wS2hwAMA+aCW9ypzm63QTBBHp6lQ3p+9M= +google.golang.org/grpc v1.58.3 h1:BjnpXut1btbtgN/6sp+brB2Kbm2LjNXnidYujAVbSoQ= +google.golang.org/grpc v1.58.3/go.mod h1:tgX3ZQDlNJGU96V6yHh1T/JeoBQ2TXdr43YbYSsCJk0= +google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI= +google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= +gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= +gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +k8s.io/api v0.30.1 h1:kCm/6mADMdbAxmIh0LBjS54nQBE+U4KmbCfIkF5CpJY= +k8s.io/api v0.30.1/go.mod h1:ddbN2C0+0DIiPntan/bye3SW3PdwLa11/0yqwvuRrJM= +k8s.io/apiextensions-apiserver v0.30.1 h1:4fAJZ9985BmpJG6PkoxVRpXv9vmPUOVzl614xarePws= +k8s.io/apiextensions-apiserver v0.30.1/go.mod h1:R4GuSrlhgq43oRY9sF2IToFh7PVlF1JjfWdoG3pixk4= +k8s.io/apimachinery v0.30.1 h1:ZQStsEfo4n65yAdlGTfP/uSHMQSoYzU/oeEbkmF7P2U= +k8s.io/apimachinery v0.30.1/go.mod h1:iexa2somDaxdnj7bha06bhb43Zpa6eWH8N8dbqVjTUc= +k8s.io/apiserver v0.30.1 h1:BEWEe8bzS12nMtDKXzCF5Q5ovp6LjjYkSp8qOPk8LZ8= +k8s.io/apiserver v0.30.1/go.mod h1:i87ZnQ+/PGAmSbD/iEKM68bm1D5reX8fO4Ito4B01mo= +k8s.io/client-go v0.30.1 h1:uC/Ir6A3R46wdkgCV3vbLyNOYyCJ8oZnjtJGKfytl/Q= +k8s.io/client-go v0.30.1/go.mod h1:wrAqLNs2trwiCH/wxxmT/x3hKVH9PuV0GGW0oDoHVqc= +k8s.io/component-base v0.30.1 h1:bvAtlPh1UrdaZL20D9+sWxsJljMi0QZ3Lmw+kmZAaxQ= +k8s.io/component-base v0.30.1/go.mod h1:e/X9kDiOebwlI41AvBHuWdqFriSRrX50CdwA9TFaHLI= +k8s.io/klog/v2 v2.120.1 h1:QXU6cPEOIslTGvZaXvFWiP9VKyeet3sawzTOvdXb4Vw= +k8s.io/klog/v2 v2.120.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= +k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 h1:BZqlfIlq5YbRMFko6/PM7FjZpUb45WallggurYhKGag= +k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340/go.mod h1:yD4MZYeKMBwQKVht279WycxKyM84kkAx2DPrTXaeb98= +k8s.io/utils v0.0.0-20230726121419-3b25d923346b h1:sgn3ZU783SCgtaSJjpcVVlRqd6GSnlTLKgpAAttJvpI= +k8s.io/utils v0.0.0-20230726121419-3b25d923346b/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.29.0 h1:/U5vjBbQn3RChhv7P11uhYvCSm5G2GaIi5AIGBS6r4c= +sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.29.0/go.mod h1:z7+wmGM2dfIiLRfrC6jb5kV2Mq/sK1ZP303cxzkV5Y4= +sigs.k8s.io/controller-runtime v0.18.4 h1:87+guW1zhvuPLh1PHybKdYFLU0YJp4FhJRmiHvm5BZw= +sigs.k8s.io/controller-runtime v0.18.4/go.mod h1:TVoGrfdpbA9VRFaRnKgk9P5/atA0pMwq+f+msb9M8Sg= +sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo= +sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0= +sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4= +sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08= +sigs.k8s.io/yaml v1.3.0 h1:a2VclLzOGrwOHDiV8EfBGhvjHvP46CtW5j6POvhYGGo= +sigs.k8s.io/yaml v1.3.0/go.mod h1:GeOyir5tyXNByN85N/dRIT9es5UQNerPYEKK56eTBm8= diff --git a/hack/boilerplate.go.txt b/hack/boilerplate.go.txt new file mode 100644 index 00000000..ff72ff2a --- /dev/null +++ b/hack/boilerplate.go.txt @@ -0,0 +1,15 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ \ No newline at end of file diff --git a/internal/controller/pathwaysapi_controller.go b/internal/controller/pathwaysapi_controller.go new file mode 100644 index 00000000..1b7f0d73 --- /dev/null +++ b/internal/controller/pathwaysapi_controller.go @@ -0,0 +1,62 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + "context" + + "k8s.io/apimachinery/pkg/runtime" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" + + pathwaysapiv1 "pathways-api/api/v1" +) + +// PathwaysAPIReconciler reconciles a PathwaysAPI object +type PathwaysAPIReconciler struct { + client.Client + Scheme *runtime.Scheme +} + +// +kubebuilder:rbac:groups=pathways-api.pathways.domain,resources=pathwaysapis,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=pathways-api.pathways.domain,resources=pathwaysapis/status,verbs=get;update;patch +// +kubebuilder:rbac:groups=pathways-api.pathways.domain,resources=pathwaysapis/finalizers,verbs=update + +// Reconcile is part of the main kubernetes reconciliation loop which aims to +// move the current state of the cluster closer to the desired state. +// TODO(user): Modify the Reconcile function to compare the state specified by +// the PathwaysAPI object against the actual cluster state, and then +// perform operations to make the cluster state reflect the state specified by +// the user. +// +// For more details, check Reconcile and its Result here: +// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.18.4/pkg/reconcile +func (r *PathwaysAPIReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + _ = log.FromContext(ctx) + + // TODO(user): your logic here + + return ctrl.Result{}, nil +} + +// SetupWithManager sets up the controller with the Manager. +func (r *PathwaysAPIReconciler) SetupWithManager(mgr ctrl.Manager) error { + return ctrl.NewControllerManagedBy(mgr). + For(&pathwaysapiv1.PathwaysAPI{}). + Complete(r) +} diff --git a/internal/controller/pathwaysapi_controller_test.go b/internal/controller/pathwaysapi_controller_test.go new file mode 100644 index 00000000..819dee8a --- /dev/null +++ b/internal/controller/pathwaysapi_controller_test.go @@ -0,0 +1,84 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + "context" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + pathwaysapiv1 "pathways-api/api/v1" +) + +var _ = Describe("PathwaysAPI Controller", func() { + Context("When reconciling a resource", func() { + const resourceName = "test-resource" + + ctx := context.Background() + + typeNamespacedName := types.NamespacedName{ + Name: resourceName, + Namespace: "default", // TODO(user):Modify as needed + } + pathwaysapi := &pathwaysapiv1.PathwaysAPI{} + + BeforeEach(func() { + By("creating the custom resource for the Kind PathwaysAPI") + err := k8sClient.Get(ctx, typeNamespacedName, pathwaysapi) + if err != nil && errors.IsNotFound(err) { + resource := &pathwaysapiv1.PathwaysAPI{ + ObjectMeta: metav1.ObjectMeta{ + Name: resourceName, + Namespace: "default", + }, + // TODO(user): Specify other spec details if needed. + } + Expect(k8sClient.Create(ctx, resource)).To(Succeed()) + } + }) + + AfterEach(func() { + // TODO(user): Cleanup logic after each test, like removing the resource instance. + resource := &pathwaysapiv1.PathwaysAPI{} + err := k8sClient.Get(ctx, typeNamespacedName, resource) + Expect(err).NotTo(HaveOccurred()) + + By("Cleanup the specific resource instance PathwaysAPI") + Expect(k8sClient.Delete(ctx, resource)).To(Succeed()) + }) + It("should successfully reconcile the resource", func() { + By("Reconciling the created resource") + controllerReconciler := &PathwaysAPIReconciler{ + Client: k8sClient, + Scheme: k8sClient.Scheme(), + } + + _, err := controllerReconciler.Reconcile(ctx, reconcile.Request{ + NamespacedName: typeNamespacedName, + }) + Expect(err).NotTo(HaveOccurred()) + // TODO(user): Add more specific assertions depending on your controller's reconciliation logic. + // Example: If you expect a certain status condition after reconciliation, verify it here. + }) + }) +}) diff --git a/internal/controller/suite_test.go b/internal/controller/suite_test.go new file mode 100644 index 00000000..fb44145c --- /dev/null +++ b/internal/controller/suite_test.go @@ -0,0 +1,90 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + "fmt" + "path/filepath" + "runtime" + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "k8s.io/client-go/kubernetes/scheme" + "k8s.io/client-go/rest" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/envtest" + logf "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/log/zap" + + pathwaysapiv1 "pathways-api/api/v1" + // +kubebuilder:scaffold:imports +) + +// These tests use Ginkgo (BDD-style Go testing framework). Refer to +// http://onsi.github.io/ginkgo/ to learn more about Ginkgo. + +var cfg *rest.Config +var k8sClient client.Client +var testEnv *envtest.Environment + +func TestControllers(t *testing.T) { + RegisterFailHandler(Fail) + + RunSpecs(t, "Controller Suite") +} + +var _ = BeforeSuite(func() { + logf.SetLogger(zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true))) + + By("bootstrapping test environment") + testEnv = &envtest.Environment{ + CRDDirectoryPaths: []string{filepath.Join("..", "..", "config", "crd", "bases")}, + ErrorIfCRDPathMissing: true, + + // The BinaryAssetsDirectory is only required if you want to run the tests directly + // without call the makefile target test. If not informed it will look for the + // default path defined in controller-runtime which is /usr/local/kubebuilder/. + // Note that you must have the required binaries setup under the bin directory to perform + // the tests directly. When we run make test it will be setup and used automatically. + BinaryAssetsDirectory: filepath.Join("..", "..", "bin", "k8s", + fmt.Sprintf("1.30.0-%s-%s", runtime.GOOS, runtime.GOARCH)), + } + + var err error + // cfg is defined in this file globally. + cfg, err = testEnv.Start() + Expect(err).NotTo(HaveOccurred()) + Expect(cfg).NotTo(BeNil()) + + err = pathwaysapiv1.AddToScheme(scheme.Scheme) + Expect(err).NotTo(HaveOccurred()) + + // +kubebuilder:scaffold:scheme + + k8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}) + Expect(err).NotTo(HaveOccurred()) + Expect(k8sClient).NotTo(BeNil()) + +}) + +var _ = AfterSuite(func() { + By("tearing down the test environment") + err := testEnv.Stop() + Expect(err).NotTo(HaveOccurred()) +}) diff --git a/test/e2e/e2e_suite_test.go b/test/e2e/e2e_suite_test.go new file mode 100644 index 00000000..140dce13 --- /dev/null +++ b/test/e2e/e2e_suite_test.go @@ -0,0 +1,32 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2e + +import ( + "fmt" + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +// Run e2e tests using the Ginkgo runner. +func TestE2E(t *testing.T) { + RegisterFailHandler(Fail) + _, _ = fmt.Fprintf(GinkgoWriter, "Starting pathways-api suite\n") + RunSpecs(t, "e2e suite") +} diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go new file mode 100644 index 00000000..d519f2e4 --- /dev/null +++ b/test/e2e/e2e_test.go @@ -0,0 +1,122 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2e + +import ( + "fmt" + "os/exec" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "pathways-api/test/utils" +) + +const namespace = "pathways-api-system" + +var _ = Describe("controller", Ordered, func() { + BeforeAll(func() { + By("installing prometheus operator") + Expect(utils.InstallPrometheusOperator()).To(Succeed()) + + By("installing the cert-manager") + Expect(utils.InstallCertManager()).To(Succeed()) + + By("creating manager namespace") + cmd := exec.Command("kubectl", "create", "ns", namespace) + _, _ = utils.Run(cmd) + }) + + AfterAll(func() { + By("uninstalling the Prometheus manager bundle") + utils.UninstallPrometheusOperator() + + By("uninstalling the cert-manager bundle") + utils.UninstallCertManager() + + By("removing manager namespace") + cmd := exec.Command("kubectl", "delete", "ns", namespace) + _, _ = utils.Run(cmd) + }) + + Context("Operator", func() { + It("should run successfully", func() { + var controllerPodName string + var err error + + // projectimage stores the name of the image used in the example + var projectimage = "example.com/pathways-api:v0.0.1" + + By("building the manager(Operator) image") + cmd := exec.Command("make", "docker-build", fmt.Sprintf("IMG=%s", projectimage)) + _, err = utils.Run(cmd) + ExpectWithOffset(1, err).NotTo(HaveOccurred()) + + By("loading the the manager(Operator) image on Kind") + err = utils.LoadImageToKindClusterWithName(projectimage) + ExpectWithOffset(1, err).NotTo(HaveOccurred()) + + By("installing CRDs") + cmd = exec.Command("make", "install") + _, err = utils.Run(cmd) + ExpectWithOffset(1, err).NotTo(HaveOccurred()) + + By("deploying the controller-manager") + cmd = exec.Command("make", "deploy", fmt.Sprintf("IMG=%s", projectimage)) + _, err = utils.Run(cmd) + ExpectWithOffset(1, err).NotTo(HaveOccurred()) + + By("validating that the controller-manager pod is running as expected") + verifyControllerUp := func() error { + // Get pod name + + cmd = exec.Command("kubectl", "get", + "pods", "-l", "control-plane=controller-manager", + "-o", "go-template={{ range .items }}"+ + "{{ if not .metadata.deletionTimestamp }}"+ + "{{ .metadata.name }}"+ + "{{ \"\\n\" }}{{ end }}{{ end }}", + "-n", namespace, + ) + + podOutput, err := utils.Run(cmd) + ExpectWithOffset(2, err).NotTo(HaveOccurred()) + podNames := utils.GetNonEmptyLines(string(podOutput)) + if len(podNames) != 1 { + return fmt.Errorf("expect 1 controller pods running, but got %d", len(podNames)) + } + controllerPodName = podNames[0] + ExpectWithOffset(2, controllerPodName).Should(ContainSubstring("controller-manager")) + + // Validate pod status + cmd = exec.Command("kubectl", "get", + "pods", controllerPodName, "-o", "jsonpath={.status.phase}", + "-n", namespace, + ) + status, err := utils.Run(cmd) + ExpectWithOffset(2, err).NotTo(HaveOccurred()) + if string(status) != "Running" { + return fmt.Errorf("controller pod in %s status", status) + } + return nil + } + EventuallyWithOffset(1, verifyControllerUp, time.Minute, time.Second).Should(Succeed()) + + }) + }) +}) diff --git a/test/utils/utils.go b/test/utils/utils.go new file mode 100644 index 00000000..6b96ab5d --- /dev/null +++ b/test/utils/utils.go @@ -0,0 +1,140 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package utils + +import ( + "fmt" + "os" + "os/exec" + "strings" + + . "github.com/onsi/ginkgo/v2" //nolint:golint,revive +) + +const ( + prometheusOperatorVersion = "v0.72.0" + prometheusOperatorURL = "https://github.com/prometheus-operator/prometheus-operator/" + + "releases/download/%s/bundle.yaml" + + certmanagerVersion = "v1.14.4" + certmanagerURLTmpl = "https://github.com/jetstack/cert-manager/releases/download/%s/cert-manager.yaml" +) + +func warnError(err error) { + _, _ = fmt.Fprintf(GinkgoWriter, "warning: %v\n", err) +} + +// InstallPrometheusOperator installs the prometheus Operator to be used to export the enabled metrics. +func InstallPrometheusOperator() error { + url := fmt.Sprintf(prometheusOperatorURL, prometheusOperatorVersion) + cmd := exec.Command("kubectl", "create", "-f", url) + _, err := Run(cmd) + return err +} + +// Run executes the provided command within this context +func Run(cmd *exec.Cmd) ([]byte, error) { + dir, _ := GetProjectDir() + cmd.Dir = dir + + if err := os.Chdir(cmd.Dir); err != nil { + _, _ = fmt.Fprintf(GinkgoWriter, "chdir dir: %s\n", err) + } + + cmd.Env = append(os.Environ(), "GO111MODULE=on") + command := strings.Join(cmd.Args, " ") + _, _ = fmt.Fprintf(GinkgoWriter, "running: %s\n", command) + output, err := cmd.CombinedOutput() + if err != nil { + return output, fmt.Errorf("%s failed with error: (%v) %s", command, err, string(output)) + } + + return output, nil +} + +// UninstallPrometheusOperator uninstalls the prometheus +func UninstallPrometheusOperator() { + url := fmt.Sprintf(prometheusOperatorURL, prometheusOperatorVersion) + cmd := exec.Command("kubectl", "delete", "-f", url) + if _, err := Run(cmd); err != nil { + warnError(err) + } +} + +// UninstallCertManager uninstalls the cert manager +func UninstallCertManager() { + url := fmt.Sprintf(certmanagerURLTmpl, certmanagerVersion) + cmd := exec.Command("kubectl", "delete", "-f", url) + if _, err := Run(cmd); err != nil { + warnError(err) + } +} + +// InstallCertManager installs the cert manager bundle. +func InstallCertManager() error { + url := fmt.Sprintf(certmanagerURLTmpl, certmanagerVersion) + cmd := exec.Command("kubectl", "apply", "-f", url) + if _, err := Run(cmd); err != nil { + return err + } + // Wait for cert-manager-webhook to be ready, which can take time if cert-manager + // was re-installed after uninstalling on a cluster. + cmd = exec.Command("kubectl", "wait", "deployment.apps/cert-manager-webhook", + "--for", "condition=Available", + "--namespace", "cert-manager", + "--timeout", "5m", + ) + + _, err := Run(cmd) + return err +} + +// LoadImageToKindClusterWithName loads a local docker image to the kind cluster +func LoadImageToKindClusterWithName(name string) error { + cluster := "kind" + if v, ok := os.LookupEnv("KIND_CLUSTER"); ok { + cluster = v + } + kindOptions := []string{"load", "docker-image", name, "--name", cluster} + cmd := exec.Command("kind", kindOptions...) + _, err := Run(cmd) + return err +} + +// GetNonEmptyLines converts given command output string into individual objects +// according to line breakers, and ignores the empty elements in it. +func GetNonEmptyLines(output string) []string { + var res []string + elements := strings.Split(output, "\n") + for _, element := range elements { + if element != "" { + res = append(res, element) + } + } + + return res +} + +// GetProjectDir will return the directory where the project is +func GetProjectDir() (string, error) { + wd, err := os.Getwd() + if err != nil { + return wd, err + } + wd = strings.Replace(wd, "/test/e2e", "", -1) + return wd, nil +} From 38531aac66aed51e932c256d60db73e1bdee60cb Mon Sep 17 00:00:00 2001 From: RoshaniN Date: Tue, 30 Jul 2024 16:42:49 +0000 Subject: [PATCH 03/32] Simplified working Pathways spec. --- api/v1/pathwaysapi_types.go | 16 +++++++++++++-- ...ways-api.pathways.domain_pathwaysapis.yaml | 13 +++++++++--- config/manager/kustomization.yaml | 6 ++++++ .../samples/pathways-api_v1_pathwaysapi.yaml | 4 ++++ internal/controller/pathwaysapi_controller.go | 20 +++++++++++++++++-- 5 files changed, 52 insertions(+), 7 deletions(-) diff --git a/api/v1/pathwaysapi_types.go b/api/v1/pathwaysapi_types.go index e9eb0c04..4a2295d9 100644 --- a/api/v1/pathwaysapi_types.go +++ b/api/v1/pathwaysapi_types.go @@ -28,8 +28,20 @@ type PathwaysAPISpec struct { // INSERT ADDITIONAL SPEC FIELDS - desired state of cluster // Important: Run "make" to regenerate code after modifying this file - // Foo is an example field of PathwaysAPI. Edit pathwaysapi_types.go to remove/update - Foo string `json:"foo,omitempty"` + // Test message is an example field of PathwaysAPI. Edit pathwaysapi_types.go to remove/update + TextMessage string `json:"textMessage,omitempty"` + TpuType string `json:"tpuType,omitempty"` + NumSlices int32 `json:"numSlices,omitempty"` + WorkloadMode string `json:"workloadMode,omitempty"` + + // tpuType: v4-8 + // numSlices: 12 + // workloadMode: headless + // backoffLimit: 4 # pass this down to JobSet + // workloadImage: + // workloadName: + // workloadType: inference # training or inference + } // PathwaysAPIStatus defines the observed state of PathwaysAPI diff --git a/config/crd/bases/pathways-api.pathways.domain_pathwaysapis.yaml b/config/crd/bases/pathways-api.pathways.domain_pathwaysapis.yaml index 447d12e2..249b4a8b 100644 --- a/config/crd/bases/pathways-api.pathways.domain_pathwaysapis.yaml +++ b/config/crd/bases/pathways-api.pathways.domain_pathwaysapis.yaml @@ -39,9 +39,16 @@ spec: spec: description: PathwaysAPISpec defines the desired state of PathwaysAPI properties: - foo: - description: Foo is an example field of PathwaysAPI. Edit pathwaysapi_types.go - to remove/update + numSlices: + format: int32 + type: integer + textMessage: + description: Test message is an example field of PathwaysAPI. Edit + pathwaysapi_types.go to remove/update + type: string + tpuType: + type: string + workloadMode: type: string type: object status: diff --git a/config/manager/kustomization.yaml b/config/manager/kustomization.yaml index 5c5f0b84..9209b790 100644 --- a/config/manager/kustomization.yaml +++ b/config/manager/kustomization.yaml @@ -1,2 +1,8 @@ resources: - manager.yaml +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +images: +- name: controller + newName: us-docker.pkg.dev/cloud-tpu-multipod-dev/pathways/pathwaysapi + newTag: latest diff --git a/config/samples/pathways-api_v1_pathwaysapi.yaml b/config/samples/pathways-api_v1_pathwaysapi.yaml index 3cf0a1d9..6bf8c748 100644 --- a/config/samples/pathways-api_v1_pathwaysapi.yaml +++ b/config/samples/pathways-api_v1_pathwaysapi.yaml @@ -7,3 +7,7 @@ metadata: name: pathwaysapi-sample spec: # TODO(user): Add fields here + textMessage: "Hello World 2" + tpuType: v4-8 + numSlices: 2 + workloadMode: headless diff --git a/internal/controller/pathwaysapi_controller.go b/internal/controller/pathwaysapi_controller.go index 1b7f0d73..214dff91 100644 --- a/internal/controller/pathwaysapi_controller.go +++ b/internal/controller/pathwaysapi_controller.go @@ -20,11 +20,13 @@ import ( "context" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "k8s.io/klog/v2" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/log" - pathwaysapiv1 "pathways-api/api/v1" + pathwaysapi "pathways-api/api/v1" ) // PathwaysAPIReconciler reconciles a PathwaysAPI object @@ -50,6 +52,20 @@ func (r *PathwaysAPIReconciler) Reconcile(ctx context.Context, req ctrl.Request) _ = log.FromContext(ctx) // TODO(user): your logic here + pw := &pathwaysapi.PathwaysAPI{} + if err := r.Get(ctx, types.NamespacedName{Name: req.Name, Namespace: req.Namespace}, pw); err != nil { + // log.Error(err, "unable to fetch Pathways ") + return ctrl.Result{}, client.IgnoreNotFound(err) + } + log := ctrl.LoggerFrom(ctx).WithValues("pathwaysapi", klog.KObj(pw)) + pwMessage := pw.Spec.TextMessage + tpuType := pw.Spec.TpuType + numSlices := pw.Spec.NumSlices + workloadMode := pw.Spec.WorkloadMode + + ctx = ctrl.LoggerInto(ctx, log) + + log.Info("ROSHANI CONTROLLER WORKING...", "TextMessage", pwMessage, "TpuType", tpuType, "NumSlices", numSlices, "WorkloadMode", workloadMode) return ctrl.Result{}, nil } @@ -57,6 +73,6 @@ func (r *PathwaysAPIReconciler) Reconcile(ctx context.Context, req ctrl.Request) // SetupWithManager sets up the controller with the Manager. func (r *PathwaysAPIReconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). - For(&pathwaysapiv1.PathwaysAPI{}). + For(&pathwaysapi.PathwaysAPI{}). Complete(r) } From 5d966dd06d7b7d9510d34ca1daefc8e5548a9207 Mon Sep 17 00:00:00 2001 From: RoshaniN Date: Tue, 30 Jul 2024 21:48:13 +0000 Subject: [PATCH 04/32] Simplified Pathways spec with constructed JobSet object --- api/v1/pathwaysapi_types.go | 20 +++--- ...ways-api.pathways.domain_pathwaysapis.yaml | 2 +- .../samples/pathways-api_v1_pathwaysapi.yaml | 2 +- internal/controller/pathwaysapi_controller.go | 64 +++++++++++++++++++ 4 files changed, 76 insertions(+), 12 deletions(-) diff --git a/api/v1/pathwaysapi_types.go b/api/v1/pathwaysapi_types.go index 4a2295d9..31417913 100644 --- a/api/v1/pathwaysapi_types.go +++ b/api/v1/pathwaysapi_types.go @@ -28,22 +28,22 @@ type PathwaysAPISpec struct { // INSERT ADDITIONAL SPEC FIELDS - desired state of cluster // Important: Run "make" to regenerate code after modifying this file - // Test message is an example field of PathwaysAPI. Edit pathwaysapi_types.go to remove/update + // Text message is an example field of PathwaysAPI. Edit pathwaysapi_types.go to remove/update TextMessage string `json:"textMessage,omitempty"` TpuType string `json:"tpuType,omitempty"` NumSlices int32 `json:"numSlices,omitempty"` WorkloadMode string `json:"workloadMode,omitempty"` - - // tpuType: v4-8 - // numSlices: 12 - // workloadMode: headless - // backoffLimit: 4 # pass this down to JobSet - // workloadImage: - // workloadName: - // workloadType: inference # training or inference - + // JobSetSpec jobsetv1alpha2.JobSet `json:"jobSetSpec"` } +// tpuType: v4-8 +// numSlices: 12 +// workloadMode: headless +// backoffLimit: 4 # pass this down to JobSet +// workloadImage: +// workloadName: +// workloadType: inference # training or inference + // PathwaysAPIStatus defines the observed state of PathwaysAPI type PathwaysAPIStatus struct { // INSERT ADDITIONAL STATUS FIELD - define observed state of cluster diff --git a/config/crd/bases/pathways-api.pathways.domain_pathwaysapis.yaml b/config/crd/bases/pathways-api.pathways.domain_pathwaysapis.yaml index 249b4a8b..1751108d 100644 --- a/config/crd/bases/pathways-api.pathways.domain_pathwaysapis.yaml +++ b/config/crd/bases/pathways-api.pathways.domain_pathwaysapis.yaml @@ -43,7 +43,7 @@ spec: format: int32 type: integer textMessage: - description: Test message is an example field of PathwaysAPI. Edit + description: Text message is an example field of PathwaysAPI. Edit pathwaysapi_types.go to remove/update type: string tpuType: diff --git a/config/samples/pathways-api_v1_pathwaysapi.yaml b/config/samples/pathways-api_v1_pathwaysapi.yaml index 6bf8c748..0dc9b013 100644 --- a/config/samples/pathways-api_v1_pathwaysapi.yaml +++ b/config/samples/pathways-api_v1_pathwaysapi.yaml @@ -7,7 +7,7 @@ metadata: name: pathwaysapi-sample spec: # TODO(user): Add fields here - textMessage: "Hello World 2" + textMessage: "roshani-jobset3" tpuType: v4-8 numSlices: 2 workloadMode: headless diff --git a/internal/controller/pathwaysapi_controller.go b/internal/controller/pathwaysapi_controller.go index 214dff91..a0bcf62e 100644 --- a/internal/controller/pathwaysapi_controller.go +++ b/internal/controller/pathwaysapi_controller.go @@ -18,14 +18,24 @@ package controller import ( "context" + "fmt" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/tools/clientcmd" "k8s.io/klog/v2" + "k8s.io/utils/ptr" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/log" + batchv1 "k8s.io/api/batch/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + jobsetv1alpha2 "sigs.k8s.io/jobset/api/jobset/v1alpha2" + jobsetclient "sigs.k8s.io/jobset/client-go/clientset/versioned" + pathwaysapi "pathways-api/api/v1" ) @@ -67,6 +77,60 @@ func (r *PathwaysAPIReconciler) Reconcile(ctx context.Context, req ctrl.Request) log.Info("ROSHANI CONTROLLER WORKING...", "TextMessage", pwMessage, "TpuType", tpuType, "NumSlices", numSlices, "WorkloadMode", workloadMode) + // This env variable needs to be set as follows: KUBECONFIG=${HOME}/.kube/config + kubeconfig := "/home/roshanin/.kube/config" + + config, err := clientcmd.BuildConfigFromFlags("", kubeconfig) + if err != nil { + panic(err) + } + log.Info("Roshani, config established...") + + client := jobsetclient.NewForConfigOrDie(config) + log.Info("Roshani, client built...") + + js, err := client.JobsetV1alpha2().JobSets("default").Create(ctx, &jobsetv1alpha2.JobSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: pwMessage, + }, + Spec: jobsetv1alpha2.JobSetSpec{ + ReplicatedJobs: []jobsetv1alpha2.ReplicatedJob{ + { + Name: "rjob", + Template: batchv1.JobTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Name: "job", + }, + Spec: batchv1.JobSpec{ + Parallelism: ptr.To(numSlices), + Completions: ptr.To(numSlices), + BackoffLimit: ptr.To(int32(0)), + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "bash-container", + Image: "bash:latest", + Command: []string{"sleep", "60"}, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, metav1.CreateOptions{}) + + if err != nil { + panic(err) + } + log.Info("Roshani, created JobSet...") + fmt.Printf("successfully created JobSet: %s\n", js.Name) + // Also works - + // fmt.Printf("successfully created JobSet: %s\n", js.Spec.ReplicatedJobs[0].Template.Spec.Template.Spec.Containers[0].Name) + return ctrl.Result{}, nil } From 9ec22844255e21244647f1932ee13d347915042b Mon Sep 17 00:00:00 2001 From: RoshaniN Date: Mon, 5 Aug 2024 21:59:48 +0000 Subject: [PATCH 05/32] Simplified Pathways spec with constructed LWS and JobSet objects. --- api/v1/pathwaysapi_types.go | 3 +- ...ways-api.pathways.domain_pathwaysapis.yaml | 10 +- .../samples/pathways-api_v1_pathwaysapi.yaml | 3 +- go.mod | 47 ++++--- go.sum | 46 +++++++ internal/controller/pathwaysapi_controller.go | 124 +++++++++++++----- 6 files changed, 176 insertions(+), 57 deletions(-) diff --git a/api/v1/pathwaysapi_types.go b/api/v1/pathwaysapi_types.go index 31417913..11490c29 100644 --- a/api/v1/pathwaysapi_types.go +++ b/api/v1/pathwaysapi_types.go @@ -29,10 +29,11 @@ type PathwaysAPISpec struct { // Important: Run "make" to regenerate code after modifying this file // Text message is an example field of PathwaysAPI. Edit pathwaysapi_types.go to remove/update - TextMessage string `json:"textMessage,omitempty"` + WorkloadName string `json:"workloadName,omitempty"` TpuType string `json:"tpuType,omitempty"` NumSlices int32 `json:"numSlices,omitempty"` WorkloadMode string `json:"workloadMode,omitempty"` + WorkloadType string `json:"workloadType,omitempty"` // JobSetSpec jobsetv1alpha2.JobSet `json:"jobSetSpec"` } diff --git a/config/crd/bases/pathways-api.pathways.domain_pathwaysapis.yaml b/config/crd/bases/pathways-api.pathways.domain_pathwaysapis.yaml index 1751108d..a7cf82b1 100644 --- a/config/crd/bases/pathways-api.pathways.domain_pathwaysapis.yaml +++ b/config/crd/bases/pathways-api.pathways.domain_pathwaysapis.yaml @@ -42,14 +42,16 @@ spec: numSlices: format: int32 type: integer - textMessage: - description: Text message is an example field of PathwaysAPI. Edit - pathwaysapi_types.go to remove/update - type: string tpuType: type: string workloadMode: type: string + workloadName: + description: Text message is an example field of PathwaysAPI. Edit + pathwaysapi_types.go to remove/update + type: string + workloadType: + type: string type: object status: description: PathwaysAPIStatus defines the observed state of PathwaysAPI diff --git a/config/samples/pathways-api_v1_pathwaysapi.yaml b/config/samples/pathways-api_v1_pathwaysapi.yaml index 0dc9b013..0d40c16f 100644 --- a/config/samples/pathways-api_v1_pathwaysapi.yaml +++ b/config/samples/pathways-api_v1_pathwaysapi.yaml @@ -7,7 +7,8 @@ metadata: name: pathwaysapi-sample spec: # TODO(user): Add fields here - textMessage: "roshani-jobset3" + workloadName: "roshani-lws-3" tpuType: v4-8 numSlices: 2 workloadMode: headless + workloadType: inference diff --git a/go.mod b/go.mod index 518400cf..024202e6 100644 --- a/go.mod +++ b/go.mod @@ -3,13 +3,18 @@ module pathways-api go 1.22.0 require ( - github.com/onsi/ginkgo/v2 v2.17.1 - github.com/onsi/gomega v1.32.0 - k8s.io/apimachinery v0.30.1 - k8s.io/client-go v0.30.1 + github.com/onsi/ginkgo/v2 v2.19.0 + github.com/onsi/gomega v1.33.1 + k8s.io/apimachinery v0.30.3 + k8s.io/client-go v0.30.3 sigs.k8s.io/controller-runtime v0.18.4 ) +require ( + github.com/go-task/slim-sprig/v3 v3.0.0 // indirect + github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0 // indirect +) + require ( github.com/antlr/antlr4/runtime/Go/antlr/v4 v4.0.0-20230305170008-8188dc5388df // indirect github.com/beorn7/perks v1.0.1 // indirect @@ -35,10 +40,10 @@ require ( github.com/google/gnostic-models v0.6.8 // indirect github.com/google/go-cmp v0.6.0 // indirect github.com/google/gofuzz v1.2.0 // indirect - github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1 // indirect - github.com/google/uuid v1.3.0 // indirect + github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6 // indirect + github.com/google/uuid v1.3.1 // indirect github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0 // indirect - github.com/imdario/mergo v0.3.6 // indirect + github.com/imdario/mergo v0.3.16 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect github.com/mailru/easyjson v0.7.7 // indirect @@ -47,9 +52,9 @@ require ( github.com/modern-go/reflect2 v1.0.2 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/pkg/errors v0.9.1 // indirect - github.com/prometheus/client_golang v1.16.0 // indirect - github.com/prometheus/client_model v0.4.0 // indirect - github.com/prometheus/common v0.44.0 // indirect + github.com/prometheus/client_golang v1.18.0 // indirect + github.com/prometheus/client_model v0.5.0 // indirect + github.com/prometheus/common v0.45.0 // indirect github.com/prometheus/procfs v0.12.0 // indirect github.com/spf13/pflag v1.0.5 // indirect github.com/stoewer/go-strcase v1.2.0 // indirect @@ -63,15 +68,15 @@ require ( go.opentelemetry.io/proto/otlp v1.0.0 // indirect go.uber.org/multierr v1.11.0 // indirect go.uber.org/zap v1.26.0 // indirect - golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e // indirect - golang.org/x/net v0.23.0 // indirect + golang.org/x/exp v0.0.0-20230905200255-921286631fa9 // indirect + golang.org/x/net v0.25.0 // indirect golang.org/x/oauth2 v0.12.0 // indirect - golang.org/x/sync v0.6.0 // indirect - golang.org/x/sys v0.18.0 // indirect - golang.org/x/term v0.18.0 // indirect - golang.org/x/text v0.14.0 // indirect + golang.org/x/sync v0.7.0 // indirect + golang.org/x/sys v0.20.0 // indirect + golang.org/x/term v0.20.0 // indirect + golang.org/x/text v0.15.0 // indirect golang.org/x/time v0.3.0 // indirect - golang.org/x/tools v0.18.0 // indirect + golang.org/x/tools v0.21.0 // indirect gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect google.golang.org/appengine v1.6.7 // indirect google.golang.org/genproto/googleapis/api v0.0.0-20230726155614-23370e0ffb3e // indirect @@ -81,15 +86,17 @@ require ( gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect - k8s.io/api v0.30.1 // indirect + k8s.io/api v0.30.3 // indirect k8s.io/apiextensions-apiserver v0.30.1 // indirect k8s.io/apiserver v0.30.1 // indirect k8s.io/component-base v0.30.1 // indirect - k8s.io/klog/v2 v2.120.1 // indirect + k8s.io/klog/v2 v2.120.1 k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 // indirect k8s.io/utils v0.0.0-20230726121419-3b25d923346b // indirect sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.29.0 // indirect + sigs.k8s.io/jobset v0.5.2 sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect + sigs.k8s.io/lws v0.3.0 sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect - sigs.k8s.io/yaml v1.3.0 // indirect + sigs.k8s.io/yaml v1.4.0 // indirect ) diff --git a/go.sum b/go.sum index 9ce9c0dd..8c41361a 100644 --- a/go.sum +++ b/go.sum @@ -40,6 +40,8 @@ github.com/go-openapi/swag v0.22.3 h1:yMBqmnQ0gyZvEb/+KzuWZOXgllrXT4SADYbvDaXHv/ github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14= github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI= github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls= +github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= +github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/golang/glog v1.1.0 h1:/d3pCKDPWNnvIWe0vVUpNP32qc8U3PDVxySP/y360qE= @@ -62,13 +64,19 @@ github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1 h1:K6RDEckDVWvDI9JAJYCmNdQXq6neHJOYx3V6jnqNEec= github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= +github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6 h1:k7nVchz72niMH6YLQNvHSdIE7iqsQxK1P41mySCvssg= +github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6/go.mod h1:kf6iHlnVGwgKolg33glAes7Yg/8iWP8ukqeldJSO7jw= github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/google/uuid v1.3.1 h1:KjJaJ9iWZ3jOFZIf1Lqf4laDRCasjl0BCmnEGxkdLb4= +github.com/google/uuid v1.3.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0 h1:YBftPWNWd4WwGqtY2yeZL2ef8rHAxPBD8KFhJpmcqms= github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0/go.mod h1:YN5jB8ie0yfIUg6VvR9Kz84aCaG7AsGZnLjhHbUqwPg= github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= github.com/imdario/mergo v0.3.6 h1:xTNEAn+kxVO7dTZGu0CegyqKZmoWFI0rF8UxjlB2d28= github.com/imdario/mergo v0.3.6/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA= +github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4= +github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= @@ -86,6 +94,8 @@ github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0 github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= github.com/matttproud/golang_protobuf_extensions v1.0.4 h1:mmDVorXM7PCGKw94cs5zkfA9PSy5pEvNWRP0ET0TIVo= github.com/matttproud/golang_protobuf_extensions v1.0.4/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4= +github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0 h1:jWpvCLoY8Z/e3VKvlsiIGKtc+UG6U5vzxaoagmhXfyg= +github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0/go.mod h1:QUyp042oQthUoa9bqDv0ER0wrtXnBruoNd7aNjkbP+k= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= @@ -95,18 +105,28 @@ github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/onsi/ginkgo/v2 v2.17.1 h1:V++EzdbhI4ZV4ev0UTIj0PzhzOcReJFyJaLjtSF55M8= github.com/onsi/ginkgo/v2 v2.17.1/go.mod h1:llBI3WDLL9Z6taip6f33H76YcWtJv+7R3HigUjbIBOs= +github.com/onsi/ginkgo/v2 v2.19.0 h1:9Cnnf7UHo57Hy3k6/m5k3dRfGTMXGvxhHFvkDTCTpvA= +github.com/onsi/ginkgo/v2 v2.19.0/go.mod h1:rlwLi9PilAFJ8jCg9UE1QP6VBpd6/xj3SRC0d6TU0To= github.com/onsi/gomega v1.32.0 h1:JRYU78fJ1LPxlckP6Txi/EYqJvjtMrDC04/MM5XRHPk= github.com/onsi/gomega v1.32.0/go.mod h1:a4x4gW6Pz2yK1MAmvluYme5lvYTn61afQ2ETw/8n4Lg= +github.com/onsi/gomega v1.33.1 h1:dsYjIxxSR755MDmKVsaFQTE22ChNBcuuTWgkUDSubOk= +github.com/onsi/gomega v1.33.1/go.mod h1:U4R44UsT+9eLIaYRB2a5qajjtQYn0hauxvRm16AVYg0= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prometheus/client_golang v1.16.0 h1:yk/hx9hDbrGHovbci4BY+pRMfSuuat626eFsHb7tmT8= github.com/prometheus/client_golang v1.16.0/go.mod h1:Zsulrv/L9oM40tJ7T815tM89lFEugiJ9HzIqaAx4LKc= +github.com/prometheus/client_golang v1.18.0 h1:HzFfmkOzH5Q8L8G+kSJKUx5dtG87sewO+FoDDqP5Tbk= +github.com/prometheus/client_golang v1.18.0/go.mod h1:T+GXkCk5wSJyOqMIzVgvvjFDlkOQntgjkJWKrN5txjA= github.com/prometheus/client_model v0.4.0 h1:5lQXD3cAg1OXBf4Wq03gTrXHeaV0TQvGfUooCfx1yqY= github.com/prometheus/client_model v0.4.0/go.mod h1:oMQmHW1/JoDwqLtg57MGgP/Fb1CJEYF2imWWhWtMkYU= +github.com/prometheus/client_model v0.5.0 h1:VQw1hfvPvk3Uv6Qf29VrPF32JB6rtbgI6cYPYQjL0Qw= +github.com/prometheus/client_model v0.5.0/go.mod h1:dTiFglRmd66nLR9Pv9f0mZi7B7fk5Pm3gvsjB5tr+kI= github.com/prometheus/common v0.44.0 h1:+5BrQJwiBB9xsMygAB3TNvpQKOwlkc25LbISbrdOOfY= github.com/prometheus/common v0.44.0/go.mod h1:ofAIvZbQ1e/nugmZGz4/qCb9Ap1VoSTIO7x0VV9VvuY= +github.com/prometheus/common v0.45.0 h1:2BGz0eBc2hdMDLnO/8n0jeB3oPrt2D08CekT0lneoxM= +github.com/prometheus/common v0.45.0/go.mod h1:YJmSTw9BoKxJplESWWxlbyttQR4uaEcGyv9MZjVOJsY= github.com/prometheus/procfs v0.12.0 h1:jluTpSng7V9hY0O2R9DzzJHYb2xULk9VTR1V1R/k6Bo= github.com/prometheus/procfs v0.12.0/go.mod h1:pcuDEFsWDnvcgNzo4EEweacyhjeA9Zk3cnaOZAZEfOo= github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= @@ -155,6 +175,8 @@ golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8U golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e h1:+WEEuIdZHnUeJJmEUjyYC2gfUMj69yZXw17EnHg/otA= golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e/go.mod h1:Kr81I6Kryrl9sr8s2FK3vxD90NdsKWRuOIl2O4CvYbA= +golang.org/x/exp v0.0.0-20230905200255-921286631fa9 h1:GoHiUyI/Tp2nVkLI2mCxVkOjsbSXD66ic0XW0js0R9g= +golang.org/x/exp v0.0.0-20230905200255-921286631fa9/go.mod h1:S2oDrQGGwySpoQPVqRShND87VCbxmc6bL1Yd2oYrm6k= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= @@ -164,6 +186,8 @@ golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs= golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= +golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac= +golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= golang.org/x/oauth2 v0.12.0 h1:smVPGxink+n1ZI5pkQa8y6fZT0RW0MgCO5bFpepy4B4= golang.org/x/oauth2 v0.12.0/go.mod h1:A74bZ3aGXgCY0qaIC9Ahg6Lglin4AMAco8cIv9baba4= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -172,19 +196,27 @@ golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.6.0 h1:5BMeUDZ7vkXGfEr1x9B4bRcTH4lpkTkpdh0T/J+qjbQ= golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M= +golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4= golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y= +golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.18.0 h1:FcHjZXDMxI8mM3nwhX9HlKop4C0YQvCVCdwYl2wOtE8= golang.org/x/term v0.18.0/go.mod h1:ILwASektA3OnRv7amZ1xhE/KTR+u50pbXfZ03+6Nx58= +golang.org/x/term v0.20.0 h1:VnkxpohqXaOBYJtBmEppKUG6mXpi+4O6purfc2+sMhw= +golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk= +golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= golang.org/x/time v0.3.0 h1:rg5rLMjNzMS1RkNLzCG38eapWhnYLFYXDXj2gOlr8j4= golang.org/x/time v0.3.0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= @@ -193,6 +225,8 @@ golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roY golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.18.0 h1:k8NLag8AGHnn+PHbl7g43CtqZAwG60vZkLqgyZgIHgQ= golang.org/x/tools v0.18.0/go.mod h1:GL7B4CwcLLeo59yx/9UWWuNOW1n3VZ4f5axWfML7Lcg= +golang.org/x/tools v0.21.0 h1:qc0xYgIbsSDt9EyWz05J5wfa7LOVW0YTLOXrqdLAWIw= +golang.org/x/tools v0.21.0/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= @@ -225,14 +259,20 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= k8s.io/api v0.30.1 h1:kCm/6mADMdbAxmIh0LBjS54nQBE+U4KmbCfIkF5CpJY= k8s.io/api v0.30.1/go.mod h1:ddbN2C0+0DIiPntan/bye3SW3PdwLa11/0yqwvuRrJM= +k8s.io/api v0.30.3 h1:ImHwK9DCsPA9uoU3rVh4QHAHHK5dTSv1nxJUapx8hoQ= +k8s.io/api v0.30.3/go.mod h1:GPc8jlzoe5JG3pb0KJCSLX5oAFIW3/qNJITlDj8BH04= k8s.io/apiextensions-apiserver v0.30.1 h1:4fAJZ9985BmpJG6PkoxVRpXv9vmPUOVzl614xarePws= k8s.io/apiextensions-apiserver v0.30.1/go.mod h1:R4GuSrlhgq43oRY9sF2IToFh7PVlF1JjfWdoG3pixk4= k8s.io/apimachinery v0.30.1 h1:ZQStsEfo4n65yAdlGTfP/uSHMQSoYzU/oeEbkmF7P2U= k8s.io/apimachinery v0.30.1/go.mod h1:iexa2somDaxdnj7bha06bhb43Zpa6eWH8N8dbqVjTUc= +k8s.io/apimachinery v0.30.3 h1:q1laaWCmrszyQuSQCfNB8cFgCuDAoPszKY4ucAjDwHc= +k8s.io/apimachinery v0.30.3/go.mod h1:iexa2somDaxdnj7bha06bhb43Zpa6eWH8N8dbqVjTUc= k8s.io/apiserver v0.30.1 h1:BEWEe8bzS12nMtDKXzCF5Q5ovp6LjjYkSp8qOPk8LZ8= k8s.io/apiserver v0.30.1/go.mod h1:i87ZnQ+/PGAmSbD/iEKM68bm1D5reX8fO4Ito4B01mo= k8s.io/client-go v0.30.1 h1:uC/Ir6A3R46wdkgCV3vbLyNOYyCJ8oZnjtJGKfytl/Q= k8s.io/client-go v0.30.1/go.mod h1:wrAqLNs2trwiCH/wxxmT/x3hKVH9PuV0GGW0oDoHVqc= +k8s.io/client-go v0.30.3 h1:bHrJu3xQZNXIi8/MoxYtZBBWQQXwy16zqJwloXXfD3k= +k8s.io/client-go v0.30.3/go.mod h1:8d4pf8vYu665/kUbsxWAQ/JDBNWqfFeZnvFiVdmx89U= k8s.io/component-base v0.30.1 h1:bvAtlPh1UrdaZL20D9+sWxsJljMi0QZ3Lmw+kmZAaxQ= k8s.io/component-base v0.30.1/go.mod h1:e/X9kDiOebwlI41AvBHuWdqFriSRrX50CdwA9TFaHLI= k8s.io/klog/v2 v2.120.1 h1:QXU6cPEOIslTGvZaXvFWiP9VKyeet3sawzTOvdXb4Vw= @@ -245,9 +285,15 @@ sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.29.0 h1:/U5vjBbQn3RCh sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.29.0/go.mod h1:z7+wmGM2dfIiLRfrC6jb5kV2Mq/sK1ZP303cxzkV5Y4= sigs.k8s.io/controller-runtime v0.18.4 h1:87+guW1zhvuPLh1PHybKdYFLU0YJp4FhJRmiHvm5BZw= sigs.k8s.io/controller-runtime v0.18.4/go.mod h1:TVoGrfdpbA9VRFaRnKgk9P5/atA0pMwq+f+msb9M8Sg= +sigs.k8s.io/jobset v0.5.2 h1:276q5Pi/ErLYj+GQ0ydEXR6tx3LwBhEzHLQv+k8bYF4= +sigs.k8s.io/jobset v0.5.2/go.mod h1:Vg99rj/6OoGvy1uvywGEHOcVLCWWJYkJtisKqdWzcFw= sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo= sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0= +sigs.k8s.io/lws v0.3.0 h1:PtjiDHZWCxAeMyrsmPNN0i7KAVf6ocVEQFcojPWeA+k= +sigs.k8s.io/lws v0.3.0/go.mod h1:/R1Q2LB2eg6t9mX5M6V4HLkeucxBFgOyaKkSGh/FGAY= sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4= sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08= sigs.k8s.io/yaml v1.3.0 h1:a2VclLzOGrwOHDiV8EfBGhvjHvP46CtW5j6POvhYGGo= sigs.k8s.io/yaml v1.3.0/go.mod h1:GeOyir5tyXNByN85N/dRIT9es5UQNerPYEKK56eTBm8= +sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= +sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= diff --git a/internal/controller/pathwaysapi_controller.go b/internal/controller/pathwaysapi_controller.go index a0bcf62e..c1676e1d 100644 --- a/internal/controller/pathwaysapi_controller.go +++ b/internal/controller/pathwaysapi_controller.go @@ -35,6 +35,8 @@ import ( jobsetv1alpha2 "sigs.k8s.io/jobset/api/jobset/v1alpha2" jobsetclient "sigs.k8s.io/jobset/client-go/clientset/versioned" + leaderworkersetv1 "sigs.k8s.io/lws/api/leaderworkerset/v1" + lwsclient "sigs.k8s.io/lws/client-go/clientset/versioned" pathwaysapi "pathways-api/api/v1" ) @@ -68,10 +70,11 @@ func (r *PathwaysAPIReconciler) Reconcile(ctx context.Context, req ctrl.Request) return ctrl.Result{}, client.IgnoreNotFound(err) } log := ctrl.LoggerFrom(ctx).WithValues("pathwaysapi", klog.KObj(pw)) - pwMessage := pw.Spec.TextMessage + pwMessage := pw.Spec.WorkloadName tpuType := pw.Spec.TpuType numSlices := pw.Spec.NumSlices workloadMode := pw.Spec.WorkloadMode + workloadType := pw.Spec.WorkloadType ctx = ctrl.LoggerInto(ctx, log) @@ -86,32 +89,87 @@ func (r *PathwaysAPIReconciler) Reconcile(ctx context.Context, req ctrl.Request) } log.Info("Roshani, config established...") - client := jobsetclient.NewForConfigOrDie(config) + client := lwsclient.NewForConfigOrDie(config) log.Info("Roshani, client built...") - - js, err := client.JobsetV1alpha2().JobSets("default").Create(ctx, &jobsetv1alpha2.JobSet{ - ObjectMeta: metav1.ObjectMeta{ - Name: pwMessage, - }, - Spec: jobsetv1alpha2.JobSetSpec{ - ReplicatedJobs: []jobsetv1alpha2.ReplicatedJob{ - { - Name: "rjob", - Template: batchv1.JobTemplateSpec{ + if workloadType == "inference" { + lws, err := client.LeaderworkersetV1().LeaderWorkerSets("default").Create(ctx, &leaderworkersetv1.LeaderWorkerSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: pwMessage, + }, + Spec: leaderworkersetv1.LeaderWorkerSetSpec{ + Replicas: ptr.To(numSlices), + LeaderWorkerTemplate: leaderworkersetv1.LeaderWorkerTemplate{ + LeaderTemplate: &corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Name: workloadType, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "bash-container", + Image: "bash:latest", + Command: []string{"/bin/sh"}, + Args: []string{"-c", "while true; do echo hello; sleep 10; done"}, + }, + }, + // RestartPolicy: "Never", + }, + }, + WorkerTemplate: corev1.PodTemplateSpec{ ObjectMeta: metav1.ObjectMeta{ - Name: "job", + Name: "workers", + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "bash-container", + Image: "bash:latest", + Command: []string{"/bin/sh"}, + Args: []string{"-c", "while true; do echo hello; sleep 10; done"}, + }, + }, + // RestartPolicy: "Never", }, - Spec: batchv1.JobSpec{ - Parallelism: ptr.To(numSlices), - Completions: ptr.To(numSlices), - BackoffLimit: ptr.To(int32(0)), - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: "bash-container", - Image: "bash:latest", - Command: []string{"sleep", "60"}, + }, + Size: ptr.To(int32(2)), + }, + StartupPolicy: "LeaderReady", + }, + }, metav1.CreateOptions{}) + if err != nil { + panic(err) + } + log.Info("Roshani, created LeaderWorkerSet...") + fmt.Printf("successfully created LeaderWorkerSet: %s\n", lws.Name) + } else if workloadType == "training" { + // JobSet works --- + client := jobsetclient.NewForConfigOrDie(config) + + js, err := client.JobsetV1alpha2().JobSets("default").Create(ctx, &jobsetv1alpha2.JobSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: pwMessage, + }, + Spec: jobsetv1alpha2.JobSetSpec{ + ReplicatedJobs: []jobsetv1alpha2.ReplicatedJob{ + { + Name: "rjob", + Template: batchv1.JobTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Name: "job", + }, + Spec: batchv1.JobSpec{ + Parallelism: ptr.To(numSlices), + Completions: ptr.To(numSlices), + BackoffLimit: ptr.To(int32(0)), + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "bash-container", + Image: "bash:latest", + Command: []string{"echo"}, + Args: []string{"Hello"}, + }, }, }, }, @@ -120,16 +178,19 @@ func (r *PathwaysAPIReconciler) Reconcile(ctx context.Context, req ctrl.Request) }, }, }, - }, - }, metav1.CreateOptions{}) - - if err != nil { - panic(err) + }, metav1.CreateOptions{}) + if err != nil { + panic(err) + } + log.Info("Roshani, created JobSet...") + fmt.Printf("successfully created JobSet: %s\n", js.Name) } - log.Info("Roshani, created JobSet...") - fmt.Printf("successfully created JobSet: %s\n", js.Name) + // Also works - + // fmt.Printf("successfully created JobSet: %s\n", js.Name) // fmt.Printf("successfully created JobSet: %s\n", js.Spec.ReplicatedJobs[0].Template.Spec.Template.Spec.Containers[0].Name) + // Scribe for tracking JobSet, reconciliation calls for events + // k8 APIs queries. return ctrl.Result{}, nil } @@ -138,5 +199,6 @@ func (r *PathwaysAPIReconciler) Reconcile(ctx context.Context, req ctrl.Request) func (r *PathwaysAPIReconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). For(&pathwaysapi.PathwaysAPI{}). + // For JobSet and LWS Complete(r) } From c8217dffea45508f31a27bd8639330a5039234d8 Mon Sep 17 00:00:00 2001 From: RoshaniN Date: Wed, 4 Sep 2024 18:12:04 +0000 Subject: [PATCH 06/32] Updates to Pathways LWS spec - RBAC, etc. --- config/rbac/role.yaml | 3 + internal/controller/pathwaysapi_controller.go | 170 ++++++++++++++---- 2 files changed, 135 insertions(+), 38 deletions(-) diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 77393382..656d3a4e 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -30,3 +30,6 @@ rules: - get - patch - update +- apiGroups: ["leaderworkerset.x-k8s.io"] + resources: ["leaderworkersets"] + verbs: ["create", "update", "delete"] diff --git a/internal/controller/pathwaysapi_controller.go b/internal/controller/pathwaysapi_controller.go index c1676e1d..3effbb72 100644 --- a/internal/controller/pathwaysapi_controller.go +++ b/internal/controller/pathwaysapi_controller.go @@ -22,7 +22,6 @@ import ( "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" - "k8s.io/client-go/tools/clientcmd" "k8s.io/klog/v2" "k8s.io/utils/ptr" ctrl "sigs.k8s.io/controller-runtime" @@ -38,6 +37,11 @@ import ( leaderworkersetv1 "sigs.k8s.io/lws/api/leaderworkerset/v1" lwsclient "sigs.k8s.io/lws/client-go/clientset/versioned" + // jobsetv1alpha2 "sigs.k8s.io/jobset/api/jobset/v1alpha2" + // jobsetclient "sigs.k8s.io/jobset/client-go/clientset/versioned" + // leaderworkersetv1 "sigs.k8s.io/lws/api/leaderworkerset/v1" + // lwsclient "sigs.k8s.io/lws/client-go/clientset/versioned" + pathwaysapi "pathways-api/api/v1" ) @@ -47,10 +51,6 @@ type PathwaysAPIReconciler struct { Scheme *runtime.Scheme } -// +kubebuilder:rbac:groups=pathways-api.pathways.domain,resources=pathwaysapis,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups=pathways-api.pathways.domain,resources=pathwaysapis/status,verbs=get;update;patch -// +kubebuilder:rbac:groups=pathways-api.pathways.domain,resources=pathwaysapis/finalizers,verbs=update - // Reconcile is part of the main kubernetes reconciliation loop which aims to // move the current state of the cluster closer to the desired state. // TODO(user): Modify the Reconcile function to compare the state specified by @@ -60,6 +60,12 @@ type PathwaysAPIReconciler struct { // // For more details, check Reconcile and its Result here: // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.18.4/pkg/reconcile + +// +kubebuilder:rbac:groups=pathways-api.pathways.domain,resources=pathwaysapis,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=pathways-api.pathways.domain,resources=pathwaysapis/status,verbs=get;update;patch +// +kubebuilder:rbac:groups=pathways-api.pathways.domain,resources=pathwaysapis/finalizers,verbs=update +// +kubebuilder:rbac:groups=leaderworkerset.x-k8s.io,resources=leaderworkersets,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=leaderworkerset.x-k8s.io,resources=leaderworkersets/status,verbs=get;update;patch func (r *PathwaysAPIReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { _ = log.FromContext(ctx) @@ -80,70 +86,165 @@ func (r *PathwaysAPIReconciler) Reconcile(ctx context.Context, req ctrl.Request) log.Info("ROSHANI CONTROLLER WORKING...", "TextMessage", pwMessage, "TpuType", tpuType, "NumSlices", numSlices, "WorkloadMode", workloadMode) - // This env variable needs to be set as follows: KUBECONFIG=${HOME}/.kube/config - kubeconfig := "/home/roshanin/.kube/config" - - config, err := clientcmd.BuildConfigFromFlags("", kubeconfig) - if err != nil { - panic(err) - } + kubeconfig := ctrl.GetConfigOrDie() log.Info("Roshani, config established...") - client := lwsclient.NewForConfigOrDie(config) + truth := true + size := int32(5) // total number of workers (across all slices) + 1 + replicas := int32(1) + fmt.Printf("Replicas: %d , Size: %d \n", replicas, size) + + client := lwsclient.NewForConfigOrDie(kubeconfig) log.Info("Roshani, client built...") if workloadType == "inference" { lws, err := client.LeaderworkersetV1().LeaderWorkerSets("default").Create(ctx, &leaderworkersetv1.LeaderWorkerSet{ ObjectMeta: metav1.ObjectMeta{ - Name: pwMessage, + Name: pwMessage, + Annotations: map[string]string{"leaderworkerset.sigs.k8s.io/exclusive-topology": "cloud.google.com/gke-nodepool"}, }, Spec: leaderworkersetv1.LeaderWorkerSetSpec{ - Replicas: ptr.To(numSlices), + Replicas: ptr.To(replicas), + StartupPolicy: "LeaderCreated", // this seems to be a mandatory field now LeaderWorkerTemplate: leaderworkersetv1.LeaderWorkerTemplate{ + Size: ptr.To(size), LeaderTemplate: &corev1.PodTemplateSpec{ ObjectMeta: metav1.ObjectMeta{ - Name: workloadType, + Name: workloadType, + Labels: map[string]string{"xpk.google.com/workload": "pathways-headless"}, }, Spec: corev1.PodSpec{ + // NodeSelector: map[string]string{"cloud.google.com/gke-tpu-accelerator": "tpu-v5-lite-podslice", "cloud.google.com/gke-tpu-topology": "4x4"}, + // NodeSelector: map[string]string{"cloud.google.com/gke-tpu-accelerator": "tpu-v4-podslice", "cloud.google.com/gke-tpu-topology": "2x2x1"}, + NodeSelector: map[string]string{"cloud.google.com/gke-tpu-accelerator": "tpu-v4-podslice", "cloud.google.com/gke-tpu-topology": "2x2x2"}, + Tolerations: []corev1.Toleration{ + { + Key: "google.com/tpu", + Operator: "Exists", + Effect: "NoSchedule", + }, + }, Containers: []corev1.Container{ { - Name: "bash-container", - Image: "bash:latest", - Command: []string{"/bin/sh"}, - Args: []string{"-c", "while true; do echo hello; sleep 10; done"}, + Name: "pathways-proxy", + Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/proxy_server:latest", + ImagePullPolicy: "Always", + SecurityContext: &corev1.SecurityContext{Privileged: &truth}, + Args: []string{"--alsologtostderr", "--v=0", "--pathways_ifrt_proxy_server_resource_manager=$(LWS_LEADER_ADDRESS):38677", "--pathways_ifrt_proxy_server_port=38681", "--pathways_tmp_dir_pattern=gs://cloud-pathways-staging/tmp", "--pathways_plaque_network=gcp"}, + Ports: []corev1.ContainerPort{{ContainerPort: 38681}, {ContainerPort: 38682}}, + // Resources: []corev1.ResourceRequirements{ + // {Limits: corev1.ResourceList{{cpu: "24", memory: 100G,},}, + // }, + // }, + }, + { + Name: "pathways-rm", + Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/server:latest", + ImagePullPolicy: "Always", + SecurityContext: &corev1.SecurityContext{Privileged: &truth}, + Env: []corev1.EnvVar{{Name: "HOST_ADDRESS", Value: "$(LWS_LEADER_ADDRESS)"}, {Name: "TPU_SKIP_MDS_QUERY", Value: "true"}}, + Args: []string{"--pathways_server_port=38677", + "--pathways_server_provides_devices=false", + "--pathways_device_type=NONE", + "--pathways_persistent_compilation_cache=false", + "--pathways_compilation_mode=compile_at_worker", + "--pathways_tmp_dir_pattern=gs://cloud-pathways-staging/tmp", + "--pathways_resource_manager_expected_num_worker_jobs=2"}, + Ports: []corev1.ContainerPort{{ContainerPort: 38677}, {ContainerPort: 38678}}, + // Resources: []corev1.ResourceRequirements{Limits: {map[corev1.ResourceName]resource.Quantity{cpu: "24", memory: 100G,},},}, }, }, - // RestartPolicy: "Never", }, }, WorkerTemplate: corev1.PodTemplateSpec{ ObjectMeta: metav1.ObjectMeta{ - Name: "workers", + Name: workloadType, + Labels: map[string]string{"xpk.google.com/workload": "pathways-headless"}, }, Spec: corev1.PodSpec{ + // NodeSelector: map[string]string{"cloud.google.com/gke-tpu-accelerator": "tpu-v5-lite-podslice", "cloud.google.com/gke-tpu-topology": "4x4"}, + // NodeSelector: map[string]string{"cloud.google.com/gke-tpu-accelerator": "tpu-v4-podslice", "cloud.google.com/gke-tpu-topology": "2x2x1"}, + Tolerations: []corev1.Toleration{ + { + Key: "google.com/tpu", + Operator: "Exists", + Effect: "NoSchedule", + }, + }, + NodeSelector: map[string]string{"cloud.google.com/gke-tpu-accelerator": "tpu-v4-podslice", "cloud.google.com/gke-tpu-topology": "2x2x2"}, Containers: []corev1.Container{ { - Name: "bash-container", - Image: "bash:latest", - Command: []string{"/bin/sh"}, - Args: []string{"-c", "while true; do echo hello; sleep 10; done"}, + Name: "worker", + Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/server:latest", + ImagePullPolicy: "Always", + SecurityContext: &corev1.SecurityContext{Privileged: &truth}, + Args: []string{"--alsologtostderr", "--pathways_server_port=38679", "--pathways_resource_manager=$(LWS_LEADER_ADDRESS):38677", "--pathways_persistent_compilation_cache=false", "--pathways_compilation_mode=compile_at_worker", + "--xla_tpu_enable_data_parallel_all_reduce_opt=true", "--xla_tpu_data_parallel_opt_different_sized_ops=true", "--xla_tpu_enable_async_collective_fusion=true", "--xla_tpu_enable_async_collective_fusion_fuse_all_gather=true", + "--xla_tpu_enable_async_collective_fusion_multiple_steps=true", "--xla_tpu_overlap_compute_collective_tc=true", "--xla_enable_async_all_gather=true", "--pathways_tmp_dir_pattern=gs://cloud-pathways-staging/tmp"}, + Ports: []corev1.ContainerPort{{ContainerPort: 38679}, {ContainerPort: 38680}, {ContainerPort: 8471}, {ContainerPort: 8080}}, + // Resources: []corev1.ResourceRequirements{Limits: {map[corev1.ResourceName]resource.Quantity{cpu: "24", memory: 100G,},},}, }, }, - // RestartPolicy: "Never", }, }, - Size: ptr.To(int32(2)), }, - StartupPolicy: "LeaderReady", }, }, metav1.CreateOptions{}) + + // Pathways Spec + LWS ------ + + // lws, err := client.LeaderworkersetV1().LeaderWorkerSets("default").Create(ctx, &leaderworkersetv1.LeaderWorkerSet{ + // ObjectMeta: metav1.ObjectMeta{ + // Name: pwMessage, + // }, + // Spec: leaderworkersetv1.LeaderWorkerSetSpec{ + // Replicas: ptr.To(numSlices), + // LeaderWorkerTemplate: leaderworkersetv1.LeaderWorkerTemplate{ + // LeaderTemplate: &corev1.PodTemplateSpec{ + // ObjectMeta: metav1.ObjectMeta{ + // Name: workloadType, + // }, + // Spec: corev1.PodSpec{ + // Containers: []corev1.Container{ + // { + // Name: "bash-container", + // Image: "bash:latest", + // Command: []string{"/bin/sh"}, + // Args: []string{"-c", "while true; do echo hello; sleep 10; done"}, + // }, + // }, + // // RestartPolicy: "Never", + // }, + // }, + // WorkerTemplate: corev1.PodTemplateSpec{ + // ObjectMeta: metav1.ObjectMeta{ + // Name: "workers", + // }, + // Spec: corev1.PodSpec{ + // Containers: []corev1.Container{ + // { + // Name: "bash-container", + // Image: "bash:latest", + // Command: []string{"/bin/sh"}, + // Args: []string{"-c", "while true; do echo hello; sleep 10; done"}, + // }, + // }, + // // RestartPolicy: "Never", + // }, + // }, + // Size: ptr.To(int32(2)), + // }, + // StartupPolicy: "LeaderReady", + // }, + // }, metav1.CreateOptions{}) + if err != nil { panic(err) } log.Info("Roshani, created LeaderWorkerSet...") fmt.Printf("successfully created LeaderWorkerSet: %s\n", lws.Name) } else if workloadType == "training" { - // JobSet works --- - client := jobsetclient.NewForConfigOrDie(config) + // // Pathways Spec + JobSet ------ + client := jobsetclient.NewForConfigOrDie(kubeconfig) js, err := client.JobsetV1alpha2().JobSets("default").Create(ctx, &jobsetv1alpha2.JobSet{ ObjectMeta: metav1.ObjectMeta{ @@ -185,13 +286,6 @@ func (r *PathwaysAPIReconciler) Reconcile(ctx context.Context, req ctrl.Request) log.Info("Roshani, created JobSet...") fmt.Printf("successfully created JobSet: %s\n", js.Name) } - - // Also works - - // fmt.Printf("successfully created JobSet: %s\n", js.Name) - // fmt.Printf("successfully created JobSet: %s\n", js.Spec.ReplicatedJobs[0].Template.Spec.Template.Spec.Containers[0].Name) - // Scribe for tracking JobSet, reconciliation calls for events - // k8 APIs queries. - return ctrl.Result{}, nil } From 29195979114b87df44fa7978cc2285505cfe7675 Mon Sep 17 00:00:00 2001 From: RoshaniN Date: Tue, 10 Sep 2024 17:29:42 +0000 Subject: [PATCH 07/32] Pathways JobSet client.go example. --- .../samples/pathways-api_v1_pathwaysapi.yaml | 5 +- internal/controller/pathwaysapi_controller.go | 235 +++++++++++++++++- 2 files changed, 230 insertions(+), 10 deletions(-) diff --git a/config/samples/pathways-api_v1_pathwaysapi.yaml b/config/samples/pathways-api_v1_pathwaysapi.yaml index 0d40c16f..c90c5267 100644 --- a/config/samples/pathways-api_v1_pathwaysapi.yaml +++ b/config/samples/pathways-api_v1_pathwaysapi.yaml @@ -7,8 +7,9 @@ metadata: name: pathwaysapi-sample spec: # TODO(user): Add fields here - workloadName: "roshani-lws-3" - tpuType: v4-8 + workloadName: "roshani-try-lws-16" + tpuType: v4-16 numSlices: 2 workloadMode: headless workloadType: inference + # add a container spec, pass as a workload diff --git a/internal/controller/pathwaysapi_controller.go b/internal/controller/pathwaysapi_controller.go index 3effbb72..4cbfb67d 100644 --- a/internal/controller/pathwaysapi_controller.go +++ b/internal/controller/pathwaysapi_controller.go @@ -253,33 +253,252 @@ func (r *PathwaysAPIReconciler) Reconcile(ctx context.Context, req ctrl.Request) Spec: jobsetv1alpha2.JobSetSpec{ ReplicatedJobs: []jobsetv1alpha2.ReplicatedJob{ { - Name: "rjob", + Name: "worker", + Replicas: 2, Template: batchv1.JobTemplateSpec{ ObjectMeta: metav1.ObjectMeta{ - Name: "job", + Annotations: map[string]string{"alpha.jobset.sigs.k8s.io/exclusive-topology": "cloud.google.com/gke-nodepool"}, }, Spec: batchv1.JobSpec{ - Parallelism: ptr.To(numSlices), - Completions: ptr.To(numSlices), BackoffLimit: ptr.To(int32(0)), + Completions: ptr.To(int32(1)), + Parallelism: ptr.To(int32(1)), Template: corev1.PodTemplateSpec{ Spec: corev1.PodSpec{ + TerminationGracePeriodSeconds: ptr.To(int64(30)), Containers: []corev1.Container{ { - Name: "bash-container", - Image: "bash:latest", - Command: []string{"echo"}, - Args: []string{"Hello"}, + Name: "pathways-worker", + Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/server:latest", + ImagePullPolicy: "Always", + SecurityContext: &corev1.SecurityContext{Privileged: &truth}, + Args: []string{ + "--alsologtostderr", + "--pathways_server_port=38677", + fmt.Sprintf("--pathways_resource_manager=%s-rm-0-0.%s:38677", pwMessage, pwMessage), + "--pathways_persistent_compilation_cache=false", + "--pathways_compilation_mode=compile_at_worker", + "--xla_tpu_enable_data_parallel_all_reduce_opt=true", + "--xla_tpu_data_parallel_opt_different_sized_ops=true", + "--xla_tpu_enable_async_collective_fusion=true", + "--xla_tpu_enable_async_collective_fusion_fuse_all_gather=true", + "--xla_tpu_enable_async_collective_fusion_multiple_steps=true", + "--xla_tpu_overlap_compute_collective_tc=true", + "--xla_enable_async_all_gather=true", + "--pathways_tmp_dir_pattern=gs://cloud-pathways-staging/tmp", + }, + Ports: []corev1.ContainerPort{{ContainerPort: 38677}, {ContainerPort: 8471}, {ContainerPort: 8080}}, + VolumeMounts: []corev1.VolumeMount{ + { + Name: "shared-tmp", + MountPath: "/tmp", + }, + }, + // Resources: corev1.ResourceRequirements{ + // Limits: {map[corev1.ResourceName]Res{"google.com/tpu", 4}, + // }, + // Resources: corev1.ResourceRequirements{Limits: {map[corev1.ResourceName]{cpu: "24", memory: 100G,},},}, }, }, + NodeSelector: map[string]string{"cloud.google.com/gke-tpu-accelerator": "tpu-v5-lite-podslice", "cloud.google.com/gke-tpu-topology": "4x4"}, + // Volumes: []corev1.Volume{ + // {Name: "shared-tmp", VolumeSource: corev1.VolumeSource{HostPath: *corev1.HostPathVolumeSource{Path: "/tmp", Type: *corev1.HostPathType("DirectoryOrCreate")}}}, + // }, + }, + }, + }, + }, + }, + { + Name: "rm", + Replicas: 1, + Template: batchv1.JobTemplateSpec{ + Spec: batchv1.JobSpec{ + BackoffLimit: ptr.To(int32(0)), + Completions: ptr.To(int32(1)), + Parallelism: ptr.To(int32(1)), + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + // TerminationGracePeriodSeconds: ptr.To(int64(30)), + Containers: []corev1.Container{ + { + Name: "pathways-rm", + Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/server:latest", + ImagePullPolicy: "Always", + SecurityContext: &corev1.SecurityContext{Privileged: &truth}, + Args: []string{ + "--alsologtostderr", + "--pathways_server_port=38677", + "--pathways_server_provides_devices=false", + "--pathways_persistent_compilation_cache=false", + "--pathways_compilation_mode=compile_at_worker", + "--pathways_tmp_dir_pattern=gs://cloud-pathways-staging/tmp", + "--pathways_expected_instances=tpuv4:2x2x2,tpuv4:2x2x2", + }, + Env: []corev1.EnvVar{ + {Name: "REPLICATED_JOB_NAME", ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{FieldPath: "metadata.annotations['jobset.sigs.k8s.io/replicatedjob-name']"}}}, + {Name: "JOBSET_NAME", ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{FieldPath: "metadata.annotations['jobset.sigs.k8s.io/jobset-name']"}}}, + {Name: "HOST_ADDRESS", Value: fmt.Sprintf("%s-%s-0-0.%s", pwMessage, "rm", pwMessage)}, + {Name: "TPU_SKIP_MDS_QUERY", Value: "true"}, + }, + Ports: []corev1.ContainerPort{{ContainerPort: 38677}}, + VolumeMounts: []corev1.VolumeMount{ + { + Name: "shared-tmp", + MountPath: "/tmp", + }, + }, + // Resources: corev1.ResourceRequirements{ + // Limits: {map[corev1.ResourceName]{"google.com/tpu", 4}, + // }, + // Resources: corev1.ResourceRequirements{Limits: {map[corev1.ResourceName]{cpu: "24", memory: 100G,},},}, + }, + }, + NodeSelector: map[string]string{"cloud.google.com/gke-nodepool": "cpu-rm-np"}, + // Volumes: []corev1.Volume{ + // {Name: "shared-tmp", VolumeSource: corev1.VolumeSource{HostPath: &corev1.HostPathVolumeSource{Path: "/tmp", Type: &corev1.HostPathType("DirectoryOrCreate")}}}, + // }, + }, + }, + }, + }, + }, + { + Name: "proxy", + Replicas: 1, + Template: batchv1.JobTemplateSpec{ + Spec: batchv1.JobSpec{ + BackoffLimit: ptr.To(int32(0)), + Completions: ptr.To(int32(1)), + Parallelism: ptr.To(int32(1)), + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + // TerminationGracePeriodSeconds: ptr.To(int64(30)), + Containers: []corev1.Container{ + { + Name: "pathways-proxy", + Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/proxy_server:latest", + ImagePullPolicy: "Always", + SecurityContext: &corev1.SecurityContext{Privileged: &truth}, + Args: []string{ + "--alsologtostderr", + "--v=0", + fmt.Sprintf("--pathways_ifrt_proxy_server_resource_manager=%s-%s-0-0.%s:38677", pwMessage, "rm", pwMessage), + "--pathways_ifrt_server_port=38676", + "--pathways_tmp_dir_pattern=gs://cloud-pathways-staging/tmp", + "--pathways_plaque_network=gcp", + }, + Ports: []corev1.ContainerPort{{ContainerPort: 38676}}, + VolumeMounts: []corev1.VolumeMount{ + { + Name: "shared-tmp", + MountPath: "/tmp", + }, + }, + // Resources: corev1.ResourceRequirements{ + // Limits: corev1.ResourceList{"google.com/tpu": resource.Quantity{i: 4}}, + // }, + // Resources: corev1.ResourceRequirements{Limits: {map[corev1.ResourceName]{cpu: "24", memory: 100G,},},}, + }, + }, + NodeSelector: map[string]string{"cloud.google.com/gke-nodepool": "cpu-proxy-np"}, + // Volumes: []corev1.Volume{ + // {Name: "shared-tmp", VolumeSource: corev1.VolumeSource{HostPath: &corev1.HostPathVolumeSource{Path: "/tmp", Type: &corev1.HostPathDirectoryOrCreate}}}, + // }, + }, + }, + }, + }, + }, + { + Name: "main", + Replicas: 1, + Template: batchv1.JobTemplateSpec{ + Spec: batchv1.JobSpec{ + BackoffLimit: ptr.To(int32(0)), + Completions: ptr.To(int32(1)), + Parallelism: ptr.To(int32(1)), + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + // TerminationGracePeriodSeconds: ptr.To(int64(30)), + Containers: []corev1.Container{ + { + Name: "maxtext", + Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/maxtext_jax_stable:latest", + ImagePullPolicy: "Always", + SecurityContext: &corev1.SecurityContext{Privileged: &truth}, + Env: []corev1.EnvVar{ + {Name: "XCLOUD_ENVIRONMENT", Value: "GCP"}, + {Name: "JAX_PLATFORMS", Value: "proxy"}, + {Name: "JAX_BACKEND_TARGET", Value: fmt.Sprintf("grpc://%s-%s-0-0.%s:38676", pwMessage, "proxy", pwMessage)}, + {Name: "JOBSET_NAME", ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{FieldPath: "metadata.annotations['jobset.sigs.k8s.io/jobset-name']"}}}, + }, + VolumeMounts: []corev1.VolumeMount{ + { + Name: "shared-tmp", + MountPath: "/tmp", + }, + }, + // Resources: corev1.ResourceRequirements{ + // Limits: corev1.ResourceList{"google.com/tpu": resource.Quantity{i: 4}}, + // }, + // Resources: corev1.ResourceRequirements{Limits: {map[corev1.ResourceName]{cpu: "24", memory: 100G,},},}, + }, + }, + NodeSelector: map[string]string{"cloud.google.com/gke-nodepool": "cpu-user-np"}, + // Volumes: []corev1.Volume{ + // {Name: "shared-tmp", VolumeSource: corev1.VolumeSource{HostPath: &corev1.HostPathVolumeSource{Path: "/tmp", Type: &corev1.HostPathDirectoryOrCreate}}}, + // }, }, }, }, }, }, }, + SuccessPolicy: &jobsetv1alpha2.SuccessPolicy{ + Operator: "All", + TargetReplicatedJobs: []string{"main"}, + }, + FailurePolicy: &jobsetv1alpha2.FailurePolicy{ + MaxRestarts: 0, + }, }, }, metav1.CreateOptions{}) + + // js, err := client.JobsetV1alpha2().JobSets("default").Create(ctx, &jobsetv1alpha2.JobSet{ + // ObjectMeta: metav1.ObjectMeta{ + // Name: pwMessage, + // }, + // Spec: jobsetv1alpha2.JobSetSpec{ + // ReplicatedJobs: []jobsetv1alpha2.ReplicatedJob{ + // { + // Name: "rjob", + // Template: batchv1.JobTemplateSpec{ + // ObjectMeta: metav1.ObjectMeta{ + // Name: "job", + // }, + // Spec: batchv1.JobSpec{ + // Parallelism: ptr.To(numSlices), + // Completions: ptr.To(numSlices), + // BackoffLimit: ptr.To(int32(0)), + // Template: corev1.PodTemplateSpec{ + // Spec: corev1.PodSpec{ + // Containers: []corev1.Container{ + // { + // Name: "bash-container", + // Image: "bash:latest", + // Command: []string{"echo"}, + // Args: []string{"Hello"}, + // }, + // }, + // }, + // }, + // }, + // }, + // }, + // }, + // }, + // }, metav1.CreateOptions{}) if err != nil { panic(err) } From 6f870dc81f2ee6e7dfac8e3158a4c3018587fc80 Mon Sep 17 00:00:00 2001 From: RoshaniN Date: Fri, 20 Sep 2024 00:29:42 +0000 Subject: [PATCH 08/32] Pathways JobSet Inference using JobSet client. --- Makefile | 4 +- api/v1/pathwaysapi_types.go | 58 +- ...ways-api.pathways.domain_pathwaysapis.yaml | 8035 ++++++++++++++++- .../samples/pathways-api_v1_pathwaysapi.yaml | 17 +- internal/controller/pathwaysapi_controller.go | 606 +- internal/utils/container_configurations.go | 8 + internal/utils/extra_prototype.go | 43 + 7 files changed, 8348 insertions(+), 423 deletions(-) create mode 100644 internal/utils/container_configurations.go create mode 100644 internal/utils/extra_prototype.go diff --git a/Makefile b/Makefile index f01f7170..be17ef09 100644 --- a/Makefile +++ b/Makefile @@ -128,7 +128,7 @@ endif .PHONY: install install: manifests kustomize ## Install CRDs into the K8s cluster specified in ~/.kube/config. - $(KUSTOMIZE) build config/crd | $(KUBECTL) apply -f - + $(KUSTOMIZE) build config/crd | $(KUBECTL) apply --server-side -f - .PHONY: uninstall uninstall: manifests kustomize ## Uninstall CRDs from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion. @@ -137,7 +137,7 @@ uninstall: manifests kustomize ## Uninstall CRDs from the K8s cluster specified .PHONY: deploy deploy: manifests kustomize ## Deploy controller to the K8s cluster specified in ~/.kube/config. cd config/manager && $(KUSTOMIZE) edit set image controller=${IMG} - $(KUSTOMIZE) build config/default | $(KUBECTL) apply -f - + $(KUSTOMIZE) build config/default | $(KUBECTL) apply --server-side -f - .PHONY: undeploy undeploy: kustomize ## Undeploy controller from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion. diff --git a/api/v1/pathwaysapi_types.go b/api/v1/pathwaysapi_types.go index 11490c29..8c4dc4cc 100644 --- a/api/v1/pathwaysapi_types.go +++ b/api/v1/pathwaysapi_types.go @@ -17,38 +17,66 @@ limitations under the License. package v1 import ( + corev1 "k8s.io/api/core/v1" + // corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) // EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN! // NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized. -// PathwaysAPISpec defines the desired state of PathwaysAPI +// PathwaysCluster creates a Pathways workload. It sets up the TPU +// workers needed for training or inference, along with Pathways +// resources such as the Pathways Resource Manager(RM) and Proxy +// server at the specifiec controller node location. It provides +// an option to deploy a user workload and other containers within +// a Pod. If this pod is not provided, then the workload is assumed +// to be running in headless mode and the user can connect to Proxy, +// to run their workloads. type PathwaysAPISpec struct { // INSERT ADDITIONAL SPEC FIELDS - desired state of cluster // Important: Run "make" to regenerate code after modifying this file - // Text message is an example field of PathwaysAPI. Edit pathwaysapi_types.go to remove/update + // WorkloadName is the identifier for the Pathways workload deployment. WorkloadName string `json:"workloadName,omitempty"` - TpuType string `json:"tpuType,omitempty"` - NumSlices int32 `json:"numSlices,omitempty"` - WorkloadMode string `json:"workloadMode,omitempty"` - WorkloadType string `json:"workloadType,omitempty"` - // JobSetSpec jobsetv1alpha2.JobSet `json:"jobSetSpec"` -} -// tpuType: v4-8 -// numSlices: 12 -// workloadMode: headless -// backoffLimit: 4 # pass this down to JobSet -// workloadImage: -// workloadName: -// workloadType: inference # training or inference + // PathwaysWorkerNodeSelector is used to specify the nodeSelector for + // Pathways TPU workers (accelerator type and topology). + PathwaysWorkerNodeSelector map[string]string `json:"pathwaysWorkerNodeSelector,omitempty"` + + // PathwaysControllerNodeSelector is used to specify where Pathways resources + // such as RM and proxy should be deployed. + PathwaysControllerNodeSelector map[string]string `json:"pathwaysControllerNodeSelector,omitempty"` + + // Number of TPU slices requested for the Pathways workers. + NumSlices int32 `json:"numSlices,omitempty"` + + // PathwaysDir is the GCS location at which Pathways artifacts + // can be stored. + PathwaysDir string `json:"pathwaysDir,omitempty"` + + // PathwaysClientVersion is the version of the Pathways client. + PathwaysClientVersion string `json:"pathwaysClientVersion,omitempty"` + + // UserPodTemplate accepts a pod composed of user's workload + // (and other) containers. + // https://pkg.go.dev/k8s.io/api/core/v1#PodTemplateSpec + // +optional + UserPodTemplate *corev1.PodTemplateSpec `json:"template" protobuf:"bytes,6,opt,name=template"` +} // PathwaysAPIStatus defines the observed state of PathwaysAPI type PathwaysAPIStatus struct { // INSERT ADDITIONAL STATUS FIELD - define observed state of cluster // Important: Run "make" to regenerate code after modifying this file + + // +optional + Conditions []metav1.Condition `json:"conditions,omitempty"` + + // Track the state of the Pathways workload, acceptable values are - + // Suspended, Completed, Failed + // +optional + WorkloadState string `json:"workloadState,omitempty"` } // +kubebuilder:object:root=true diff --git a/config/crd/bases/pathways-api.pathways.domain_pathwaysapis.yaml b/config/crd/bases/pathways-api.pathways.domain_pathwaysapis.yaml index a7cf82b1..24b23a32 100644 --- a/config/crd/bases/pathways-api.pathways.domain_pathwaysapis.yaml +++ b/config/crd/bases/pathways-api.pathways.domain_pathwaysapis.yaml @@ -37,24 +37,8045 @@ spec: metadata: type: object spec: - description: PathwaysAPISpec defines the desired state of PathwaysAPI + description: |- + PathwaysCluster creates a Pathways workload. It sets up the TPU + workers needed for training or inference, along with Pathways + resources such as the Pathways Resource Manager(RM) and Proxy + server at the specifiec controller node location. It provides + an option to deploy a user workload and other containers within + a Pod. If this pod is not provided, then the workload is assumed + to be running in headless mode and the user can connect to Proxy, + to run their workloads. properties: numSlices: + description: Number of TPU slices requested for the Pathways workers. format: int32 type: integer - tpuType: + pathwaysClientVersion: + description: PathwaysClientVersion is the version of the Pathways + client. type: string - workloadMode: + pathwaysControllerNodeSelector: + additionalProperties: + type: string + description: |- + PathwaysControllerNodeSelector is used to specify where Pathways resources + such as RM and proxy should be deployed. + type: object + pathwaysDir: + description: |- + PathwaysDir is the GCS location at which Pathways artifacts + can be stored. type: string + pathwaysWorkerNodeSelector: + additionalProperties: + type: string + description: |- + PathwaysWorkerNodeSelector is used to specify the nodeSelector for + Pathways TPU workers (accelerator type and topology). + type: object + template: + description: |- + UserPodTemplate accepts a pod composed of user's workload + (and other) containers. + https://pkg.go.dev/k8s.io/api/core/v1#PodTemplateSpec + properties: + metadata: + description: |- + Standard object's metadata. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#metadata + type: object + spec: + description: |- + Specification of the desired behavior of the pod. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#spec-and-status + properties: + activeDeadlineSeconds: + description: |- + Optional duration in seconds the pod may be active on the node relative to + StartTime before the system will actively try to mark it failed and kill associated containers. + Value must be a positive integer. + format: int64 + type: integer + affinity: + description: If specified, the pod's scheduling constraints + properties: + nodeAffinity: + description: Describes node affinity scheduling rules + for the pod. + properties: + preferredDuringSchedulingIgnoredDuringExecution: + description: |- + The scheduler will prefer to schedule pods to nodes that satisfy + the affinity expressions specified by this field, but it may choose + a node that violates one or more of the expressions. The node that is + most preferred is the one with the greatest sum of weights, i.e. + for each node that meets all of the scheduling requirements (resource + request, requiredDuringScheduling affinity expressions, etc.), + compute a sum by iterating through the elements of this field and adding + "weight" to the sum if the node matches the corresponding matchExpressions; the + node(s) with the highest sum are the most preferred. + items: + description: |- + An empty preferred scheduling term matches all objects with implicit weight 0 + (i.e. it's a no-op). A null preferred scheduling term matches no objects (i.e. is also a no-op). + properties: + preference: + description: A node selector term, associated + with the corresponding weight. + properties: + matchExpressions: + description: A list of node selector requirements + by node's labels. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that the + selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchFields: + description: A list of node selector requirements + by node's fields. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that the + selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + type: object + x-kubernetes-map-type: atomic + weight: + description: Weight associated with matching + the corresponding nodeSelectorTerm, in the + range 1-100. + format: int32 + type: integer + required: + - preference + - weight + type: object + type: array + x-kubernetes-list-type: atomic + requiredDuringSchedulingIgnoredDuringExecution: + description: |- + If the affinity requirements specified by this field are not met at + scheduling time, the pod will not be scheduled onto the node. + If the affinity requirements specified by this field cease to be met + at some point during pod execution (e.g. due to an update), the system + may or may not try to eventually evict the pod from its node. + properties: + nodeSelectorTerms: + description: Required. A list of node selector + terms. The terms are ORed. + items: + description: |- + A null or empty node selector term matches no objects. The requirements of + them are ANDed. + The TopologySelectorTerm type implements a subset of the NodeSelectorTerm. + properties: + matchExpressions: + description: A list of node selector requirements + by node's labels. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that the + selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchFields: + description: A list of node selector requirements + by node's fields. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that the + selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + type: object + x-kubernetes-map-type: atomic + type: array + x-kubernetes-list-type: atomic + required: + - nodeSelectorTerms + type: object + x-kubernetes-map-type: atomic + type: object + podAffinity: + description: Describes pod affinity scheduling rules (e.g. + co-locate this pod in the same node, zone, etc. as some + other pod(s)). + properties: + preferredDuringSchedulingIgnoredDuringExecution: + description: |- + The scheduler will prefer to schedule pods to nodes that satisfy + the affinity expressions specified by this field, but it may choose + a node that violates one or more of the expressions. The node that is + most preferred is the one with the greatest sum of weights, i.e. + for each node that meets all of the scheduling requirements (resource + request, requiredDuringScheduling affinity expressions, etc.), + compute a sum by iterating through the elements of this field and adding + "weight" to the sum if the node has pods which matches the corresponding podAffinityTerm; the + node(s) with the highest sum are the most preferred. + items: + description: The weights of all of the matched WeightedPodAffinityTerm + fields are added per-node to find the most preferred + node(s) + properties: + podAffinityTerm: + description: Required. A pod affinity term, + associated with the corresponding weight. + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The + requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + This is an alpha field and requires enabling MatchLabelKeysInPodAffinity feature gate. + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + This is an alpha field and requires enabling MatchLabelKeysInPodAffinity feature gate. + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The + requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + weight: + description: |- + weight associated with matching the corresponding podAffinityTerm, + in the range 1-100. + format: int32 + type: integer + required: + - podAffinityTerm + - weight + type: object + type: array + x-kubernetes-list-type: atomic + requiredDuringSchedulingIgnoredDuringExecution: + description: |- + If the affinity requirements specified by this field are not met at + scheduling time, the pod will not be scheduled onto the node. + If the affinity requirements specified by this field cease to be met + at some point during pod execution (e.g. due to a pod label update), the + system may or may not try to eventually evict the pod from its node. + When there are multiple elements, the lists of nodes corresponding to each + podAffinityTerm are intersected, i.e. all terms must be satisfied. + items: + description: |- + Defines a set of pods (namely those matching the labelSelector + relative to the given namespace(s)) that this pod should be + co-located (affinity) or not co-located (anti-affinity) with, + where co-located is defined as running on a node whose value of + the label with key matches that of any node on which + a pod of the set of pods is running + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The requirements + are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key + that the selector applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + This is an alpha field and requires enabling MatchLabelKeysInPodAffinity feature gate. + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + This is an alpha field and requires enabling MatchLabelKeysInPodAffinity feature gate. + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The requirements + are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key + that the selector applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + type: array + x-kubernetes-list-type: atomic + type: object + podAntiAffinity: + description: Describes pod anti-affinity scheduling rules + (e.g. avoid putting this pod in the same node, zone, + etc. as some other pod(s)). + properties: + preferredDuringSchedulingIgnoredDuringExecution: + description: |- + The scheduler will prefer to schedule pods to nodes that satisfy + the anti-affinity expressions specified by this field, but it may choose + a node that violates one or more of the expressions. The node that is + most preferred is the one with the greatest sum of weights, i.e. + for each node that meets all of the scheduling requirements (resource + request, requiredDuringScheduling anti-affinity expressions, etc.), + compute a sum by iterating through the elements of this field and adding + "weight" to the sum if the node has pods which matches the corresponding podAffinityTerm; the + node(s) with the highest sum are the most preferred. + items: + description: The weights of all of the matched WeightedPodAffinityTerm + fields are added per-node to find the most preferred + node(s) + properties: + podAffinityTerm: + description: Required. A pod affinity term, + associated with the corresponding weight. + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The + requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + This is an alpha field and requires enabling MatchLabelKeysInPodAffinity feature gate. + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + This is an alpha field and requires enabling MatchLabelKeysInPodAffinity feature gate. + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The + requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + weight: + description: |- + weight associated with matching the corresponding podAffinityTerm, + in the range 1-100. + format: int32 + type: integer + required: + - podAffinityTerm + - weight + type: object + type: array + x-kubernetes-list-type: atomic + requiredDuringSchedulingIgnoredDuringExecution: + description: |- + If the anti-affinity requirements specified by this field are not met at + scheduling time, the pod will not be scheduled onto the node. + If the anti-affinity requirements specified by this field cease to be met + at some point during pod execution (e.g. due to a pod label update), the + system may or may not try to eventually evict the pod from its node. + When there are multiple elements, the lists of nodes corresponding to each + podAffinityTerm are intersected, i.e. all terms must be satisfied. + items: + description: |- + Defines a set of pods (namely those matching the labelSelector + relative to the given namespace(s)) that this pod should be + co-located (affinity) or not co-located (anti-affinity) with, + where co-located is defined as running on a node whose value of + the label with key matches that of any node on which + a pod of the set of pods is running + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The requirements + are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key + that the selector applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + This is an alpha field and requires enabling MatchLabelKeysInPodAffinity feature gate. + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + This is an alpha field and requires enabling MatchLabelKeysInPodAffinity feature gate. + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The requirements + are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key + that the selector applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + type: array + x-kubernetes-list-type: atomic + type: object + type: object + automountServiceAccountToken: + description: AutomountServiceAccountToken indicates whether + a service account token should be automatically mounted. + type: boolean + containers: + description: |- + List of containers belonging to the pod. + Containers cannot currently be added or removed. + There must be at least one container in a Pod. + Cannot be updated. + items: + description: A single application container that you want + to run within a pod. + properties: + args: + description: |- + Arguments to the entrypoint. + The container image's CMD is used if this is not provided. + Variable references $(VAR_NAME) are expanded using the container's environment. If a variable + cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced + to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will + produce the string literal "$(VAR_NAME)". Escaped references will never be expanded, regardless + of whether the variable exists or not. Cannot be updated. + More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell + items: + type: string + type: array + x-kubernetes-list-type: atomic + command: + description: |- + Entrypoint array. Not executed within a shell. + The container image's ENTRYPOINT is used if this is not provided. + Variable references $(VAR_NAME) are expanded using the container's environment. If a variable + cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced + to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will + produce the string literal "$(VAR_NAME)". Escaped references will never be expanded, regardless + of whether the variable exists or not. Cannot be updated. + More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell + items: + type: string + type: array + x-kubernetes-list-type: atomic + env: + description: |- + List of environment variables to set in the container. + Cannot be updated. + items: + description: EnvVar represents an environment variable + present in a Container. + properties: + name: + description: Name of the environment variable. + Must be a C_IDENTIFIER. + type: string + value: + description: |- + Variable references $(VAR_NAME) are expanded + using the previously defined environment variables in the container and + any service environment variables. If a variable cannot be resolved, + the reference in the input string will be unchanged. Double $$ are reduced + to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. + "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". + Escaped references will never be expanded, regardless of whether the variable + exists or not. + Defaults to "". + type: string + valueFrom: + description: Source for the environment variable's + value. Cannot be used if value is not empty. + properties: + configMapKeyRef: + description: Selects a key of a ConfigMap. + properties: + key: + description: The key to select. + type: string + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + TODO: Add other useful fields. apiVersion, kind, uid? + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. + type: string + optional: + description: Specify whether the ConfigMap + or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + fieldRef: + description: |- + Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['']`, `metadata.annotations['']`, + spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs. + properties: + apiVersion: + description: Version of the schema the + FieldPath is written in terms of, defaults + to "v1". + type: string + fieldPath: + description: Path of the field to select + in the specified API version. + type: string + required: + - fieldPath + type: object + x-kubernetes-map-type: atomic + resourceFieldRef: + description: |- + Selects a resource of the container: only resources limits and requests + (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported. + properties: + containerName: + description: 'Container name: required + for volumes, optional for env vars' + type: string + divisor: + anyOf: + - type: integer + - type: string + description: Specifies the output format + of the exposed resources, defaults to + "1" + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + resource: + description: 'Required: resource to select' + type: string + required: + - resource + type: object + x-kubernetes-map-type: atomic + secretKeyRef: + description: Selects a key of a secret in + the pod's namespace + properties: + key: + description: The key of the secret to + select from. Must be a valid secret + key. + type: string + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + TODO: Add other useful fields. apiVersion, kind, uid? + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. + type: string + optional: + description: Specify whether the Secret + or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + envFrom: + description: |- + List of sources to populate environment variables in the container. + The keys defined within a source must be a C_IDENTIFIER. All invalid keys + will be reported as an event when the container is starting. When a key exists in multiple + sources, the value associated with the last source will take precedence. + Values defined by an Env with a duplicate key will take precedence. + Cannot be updated. + items: + description: EnvFromSource represents the source of + a set of ConfigMaps + properties: + configMapRef: + description: The ConfigMap to select from + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + TODO: Add other useful fields. apiVersion, kind, uid? + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. + type: string + optional: + description: Specify whether the ConfigMap + must be defined + type: boolean + type: object + x-kubernetes-map-type: atomic + prefix: + description: An optional identifier to prepend + to each key in the ConfigMap. Must be a C_IDENTIFIER. + type: string + secretRef: + description: The Secret to select from + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + TODO: Add other useful fields. apiVersion, kind, uid? + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. + type: string + optional: + description: Specify whether the Secret must + be defined + type: boolean + type: object + x-kubernetes-map-type: atomic + type: object + type: array + x-kubernetes-list-type: atomic + image: + description: |- + Container image name. + More info: https://kubernetes.io/docs/concepts/containers/images + This field is optional to allow higher level config management to default or override + container images in workload controllers like Deployments and StatefulSets. + type: string + imagePullPolicy: + description: |- + Image pull policy. + One of Always, Never, IfNotPresent. + Defaults to Always if :latest tag is specified, or IfNotPresent otherwise. + Cannot be updated. + More info: https://kubernetes.io/docs/concepts/containers/images#updating-images + type: string + lifecycle: + description: |- + Actions that the management system should take in response to container lifecycle events. + Cannot be updated. + properties: + postStart: + description: |- + PostStart is called immediately after a container is created. If the handler fails, + the container is terminated and restarted according to its restart policy. + Other management of the container blocks until the hook completes. + More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks + properties: + exec: + description: Exec specifies the action to take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + httpGet: + description: HTTPGet specifies the http request + to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in the + request. HTTP allows repeated headers. + items: + description: HTTPHeader describes a custom + header to be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP + server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + sleep: + description: Sleep represents the duration that + the container should sleep before being terminated. + properties: + seconds: + description: Seconds is the number of seconds + to sleep. + format: int64 + type: integer + required: + - seconds + type: object + tcpSocket: + description: |- + Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept + for the backward compatibility. There are no validation of this field and + lifecycle hooks will fail in runtime when tcp handler is specified. + properties: + host: + description: 'Optional: Host name to connect + to, defaults to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + type: object + preStop: + description: |- + PreStop is called immediately before a container is terminated due to an + API request or management event such as liveness/startup probe failure, + preemption, resource contention, etc. The handler is not called if the + container crashes or exits. The Pod's termination grace period countdown begins before the + PreStop hook is executed. Regardless of the outcome of the handler, the + container will eventually terminate within the Pod's termination grace + period (unless delayed by finalizers). Other management of the container blocks until the hook completes + or until the termination grace period is reached. + More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks + properties: + exec: + description: Exec specifies the action to take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + httpGet: + description: HTTPGet specifies the http request + to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in the + request. HTTP allows repeated headers. + items: + description: HTTPHeader describes a custom + header to be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP + server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + sleep: + description: Sleep represents the duration that + the container should sleep before being terminated. + properties: + seconds: + description: Seconds is the number of seconds + to sleep. + format: int64 + type: integer + required: + - seconds + type: object + tcpSocket: + description: |- + Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept + for the backward compatibility. There are no validation of this field and + lifecycle hooks will fail in runtime when tcp handler is specified. + properties: + host: + description: 'Optional: Host name to connect + to, defaults to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + type: object + type: object + livenessProbe: + description: |- + Periodic probe of container liveness. + Container will be restarted if the probe fails. + Cannot be updated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + properties: + exec: + description: Exec specifies the action to take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + description: |- + Minimum consecutive failures for the probe to be considered failed after having succeeded. + Defaults to 3. Minimum value is 1. + format: int32 + type: integer + grpc: + description: GRPC specifies an action involving + a GRPC port. + properties: + port: + description: Port number of the gRPC service. + Number must be in the range 1 to 65535. + format: int32 + type: integer + service: + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object + httpGet: + description: HTTPGet specifies the http request + to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in the request. + HTTP allows repeated headers. + items: + description: HTTPHeader describes a custom + header to be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: |- + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: + description: |- + How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: |- + Minimum consecutive successes for the probe to be considered successful after having failed. + Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocket specifies an action involving + a TCP port. + properties: + host: + description: 'Optional: Host name to connect + to, defaults to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: |- + Optional duration in seconds the pod needs to terminate gracefully upon probe failure. + The grace period is the duration in seconds after the processes running in the pod are sent + a termination signal and the time when the processes are forcibly halted with a kill signal. + Set this value longer than the expected cleanup time for your process. + If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this + value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates stop immediately via + the kill signal (no opportunity to shut down). + This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. + Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: + description: |- + Number of seconds after which the probe times out. + Defaults to 1 second. Minimum value is 1. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + type: object + name: + description: |- + Name of the container specified as a DNS_LABEL. + Each container in a pod must have a unique name (DNS_LABEL). + Cannot be updated. + type: string + ports: + description: |- + List of ports to expose from the container. Not specifying a port here + DOES NOT prevent that port from being exposed. Any port which is + listening on the default "0.0.0.0" address inside a container will be + accessible from the network. + Modifying this array with strategic merge patch may corrupt the data. + For more information See https://github.com/kubernetes/kubernetes/issues/108255. + Cannot be updated. + items: + description: ContainerPort represents a network port + in a single container. + properties: + containerPort: + description: |- + Number of port to expose on the pod's IP address. + This must be a valid port number, 0 < x < 65536. + format: int32 + type: integer + hostIP: + description: What host IP to bind the external + port to. + type: string + hostPort: + description: |- + Number of port to expose on the host. + If specified, this must be a valid port number, 0 < x < 65536. + If HostNetwork is specified, this must match ContainerPort. + Most containers do not need this. + format: int32 + type: integer + name: + description: |- + If specified, this must be an IANA_SVC_NAME and unique within the pod. Each + named port in a pod must have a unique name. Name for the port that can be + referred to by services. + type: string + protocol: + default: TCP + description: |- + Protocol for port. Must be UDP, TCP, or SCTP. + Defaults to "TCP". + type: string + required: + - containerPort + type: object + type: array + x-kubernetes-list-map-keys: + - containerPort + - protocol + x-kubernetes-list-type: map + readinessProbe: + description: |- + Periodic probe of container service readiness. + Container will be removed from service endpoints if the probe fails. + Cannot be updated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + properties: + exec: + description: Exec specifies the action to take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + description: |- + Minimum consecutive failures for the probe to be considered failed after having succeeded. + Defaults to 3. Minimum value is 1. + format: int32 + type: integer + grpc: + description: GRPC specifies an action involving + a GRPC port. + properties: + port: + description: Port number of the gRPC service. + Number must be in the range 1 to 65535. + format: int32 + type: integer + service: + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object + httpGet: + description: HTTPGet specifies the http request + to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in the request. + HTTP allows repeated headers. + items: + description: HTTPHeader describes a custom + header to be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: |- + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: + description: |- + How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: |- + Minimum consecutive successes for the probe to be considered successful after having failed. + Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocket specifies an action involving + a TCP port. + properties: + host: + description: 'Optional: Host name to connect + to, defaults to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: |- + Optional duration in seconds the pod needs to terminate gracefully upon probe failure. + The grace period is the duration in seconds after the processes running in the pod are sent + a termination signal and the time when the processes are forcibly halted with a kill signal. + Set this value longer than the expected cleanup time for your process. + If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this + value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates stop immediately via + the kill signal (no opportunity to shut down). + This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. + Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: + description: |- + Number of seconds after which the probe times out. + Defaults to 1 second. Minimum value is 1. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + type: object + resizePolicy: + description: Resources resize policy for the container. + items: + description: ContainerResizePolicy represents resource + resize policy for the container. + properties: + resourceName: + description: |- + Name of the resource to which this resource resize policy applies. + Supported values: cpu, memory. + type: string + restartPolicy: + description: |- + Restart policy to apply when specified resource is resized. + If not specified, it defaults to NotRequired. + type: string + required: + - resourceName + - restartPolicy + type: object + type: array + x-kubernetes-list-type: atomic + resources: + description: |- + Compute Resources required by this container. + Cannot be updated. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + properties: + claims: + description: |- + Claims lists the names of resources, defined in spec.resourceClaims, + that are used by this container. + + + This is an alpha field and requires enabling the + DynamicResourceAllocation feature gate. + + + This field is immutable. It can only be set for containers. + items: + description: ResourceClaim references one entry + in PodSpec.ResourceClaims. + properties: + name: + description: |- + Name must match the name of one entry in pod.spec.resourceClaims of + the Pod where this field is used. It makes that resource available + inside a container. + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + type: object + restartPolicy: + description: |- + RestartPolicy defines the restart behavior of individual containers in a pod. + This field may only be set for init containers, and the only allowed value is "Always". + For non-init containers or when this field is not specified, + the restart behavior is defined by the Pod's restart policy and the container type. + Setting the RestartPolicy as "Always" for the init container will have the following effect: + this init container will be continually restarted on + exit until all regular containers have terminated. Once all regular + containers have completed, all init containers with restartPolicy "Always" + will be shut down. This lifecycle differs from normal init containers and + is often referred to as a "sidecar" container. Although this init + container still starts in the init container sequence, it does not wait + for the container to complete before proceeding to the next init + container. Instead, the next init container starts immediately after this + init container is started, or after any startupProbe has successfully + completed. + type: string + securityContext: + description: |- + SecurityContext defines the security options the container should be run with. + If set, the fields of SecurityContext override the equivalent fields of PodSecurityContext. + More info: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/ + properties: + allowPrivilegeEscalation: + description: |- + AllowPrivilegeEscalation controls whether a process can gain more + privileges than its parent process. This bool directly controls if + the no_new_privs flag will be set on the container process. + AllowPrivilegeEscalation is true always when the container is: + 1) run as Privileged + 2) has CAP_SYS_ADMIN + Note that this field cannot be set when spec.os.name is windows. + type: boolean + appArmorProfile: + description: |- + appArmorProfile is the AppArmor options to use by this container. If set, this profile + overrides the pod's appArmorProfile. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile loaded on the node that should be used. + The profile must be preconfigured on the node to work. + Must match the loaded name of the profile. + Must be set if and only if type is "Localhost". + type: string + type: + description: |- + type indicates which kind of AppArmor profile will be applied. + Valid options are: + Localhost - a profile pre-loaded on the node. + RuntimeDefault - the container runtime's default profile. + Unconfined - no AppArmor enforcement. + type: string + required: + - type + type: object + capabilities: + description: |- + The capabilities to add/drop when running containers. + Defaults to the default set of capabilities granted by the container runtime. + Note that this field cannot be set when spec.os.name is windows. + properties: + add: + description: Added capabilities + items: + description: Capability represent POSIX capabilities + type + type: string + type: array + x-kubernetes-list-type: atomic + drop: + description: Removed capabilities + items: + description: Capability represent POSIX capabilities + type + type: string + type: array + x-kubernetes-list-type: atomic + type: object + privileged: + description: |- + Run container in privileged mode. + Processes in privileged containers are essentially equivalent to root on the host. + Defaults to false. + Note that this field cannot be set when spec.os.name is windows. + type: boolean + procMount: + description: |- + procMount denotes the type of proc mount to use for the containers. + The default is DefaultProcMount which uses the container runtime defaults for + readonly paths and masked paths. + This requires the ProcMountType feature flag to be enabled. + Note that this field cannot be set when spec.os.name is windows. + type: string + readOnlyRootFilesystem: + description: |- + Whether this container has a read-only root filesystem. + Default is false. + Note that this field cannot be set when spec.os.name is windows. + type: boolean + runAsGroup: + description: |- + The GID to run the entrypoint of the container process. + Uses runtime default if unset. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + runAsNonRoot: + description: |- + Indicates that the container must run as a non-root user. + If true, the Kubelet will validate the image at runtime to ensure that it + does not run as UID 0 (root) and fail to start the container if it does. + If unset or false, no such validation will be performed. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: boolean + runAsUser: + description: |- + The UID to run the entrypoint of the container process. + Defaults to user specified in image metadata if unspecified. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + seLinuxOptions: + description: |- + The SELinux context to be applied to the container. + If unspecified, the container runtime will allocate a random SELinux context for each + container. May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is windows. + properties: + level: + description: Level is SELinux level label that + applies to the container. + type: string + role: + description: Role is a SELinux role label that + applies to the container. + type: string + type: + description: Type is a SELinux type label that + applies to the container. + type: string + user: + description: User is a SELinux user label that + applies to the container. + type: string + type: object + seccompProfile: + description: |- + The seccomp options to use by this container. If seccomp options are + provided at both the pod & container level, the container options + override the pod options. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile defined in a file on the node should be used. + The profile must be preconfigured on the node to work. + Must be a descending path, relative to the kubelet's configured seccomp profile location. + Must be set if type is "Localhost". Must NOT be set for any other type. + type: string + type: + description: |- + type indicates which kind of seccomp profile will be applied. + Valid options are: + + + Localhost - a profile defined in a file on the node should be used. + RuntimeDefault - the container runtime default profile should be used. + Unconfined - no profile should be applied. + type: string + required: + - type + type: object + windowsOptions: + description: |- + The Windows specific settings applied to all containers. + If unspecified, the options from the PodSecurityContext will be used. + If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is linux. + properties: + gmsaCredentialSpec: + description: |- + GMSACredentialSpec is where the GMSA admission webhook + (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the + GMSA credential spec named by the GMSACredentialSpecName field. + type: string + gmsaCredentialSpecName: + description: GMSACredentialSpecName is the name + of the GMSA credential spec to use. + type: string + hostProcess: + description: |- + HostProcess determines if a container should be run as a 'Host Process' container. + All of a Pod's containers must have the same effective HostProcess value + (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). + In addition, if HostProcess is true then HostNetwork must also be set to true. + type: boolean + runAsUserName: + description: |- + The UserName in Windows to run the entrypoint of the container process. + Defaults to the user specified in image metadata if unspecified. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: string + type: object + type: object + startupProbe: + description: |- + StartupProbe indicates that the Pod has successfully initialized. + If specified, no other probes are executed until this completes successfully. + If this probe fails, the Pod will be restarted, just as if the livenessProbe failed. + This can be used to provide different probe parameters at the beginning of a Pod's lifecycle, + when it might take a long time to load data or warm a cache, than during steady-state operation. + This cannot be updated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + properties: + exec: + description: Exec specifies the action to take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + description: |- + Minimum consecutive failures for the probe to be considered failed after having succeeded. + Defaults to 3. Minimum value is 1. + format: int32 + type: integer + grpc: + description: GRPC specifies an action involving + a GRPC port. + properties: + port: + description: Port number of the gRPC service. + Number must be in the range 1 to 65535. + format: int32 + type: integer + service: + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object + httpGet: + description: HTTPGet specifies the http request + to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in the request. + HTTP allows repeated headers. + items: + description: HTTPHeader describes a custom + header to be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: |- + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: + description: |- + How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: |- + Minimum consecutive successes for the probe to be considered successful after having failed. + Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocket specifies an action involving + a TCP port. + properties: + host: + description: 'Optional: Host name to connect + to, defaults to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: |- + Optional duration in seconds the pod needs to terminate gracefully upon probe failure. + The grace period is the duration in seconds after the processes running in the pod are sent + a termination signal and the time when the processes are forcibly halted with a kill signal. + Set this value longer than the expected cleanup time for your process. + If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this + value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates stop immediately via + the kill signal (no opportunity to shut down). + This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. + Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: + description: |- + Number of seconds after which the probe times out. + Defaults to 1 second. Minimum value is 1. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + type: object + stdin: + description: |- + Whether this container should allocate a buffer for stdin in the container runtime. If this + is not set, reads from stdin in the container will always result in EOF. + Default is false. + type: boolean + stdinOnce: + description: |- + Whether the container runtime should close the stdin channel after it has been opened by + a single attach. When stdin is true the stdin stream will remain open across multiple attach + sessions. If stdinOnce is set to true, stdin is opened on container start, is empty until the + first client attaches to stdin, and then remains open and accepts data until the client disconnects, + at which time stdin is closed and remains closed until the container is restarted. If this + flag is false, a container processes that reads from stdin will never receive an EOF. + Default is false + type: boolean + terminationMessagePath: + description: |- + Optional: Path at which the file to which the container's termination message + will be written is mounted into the container's filesystem. + Message written is intended to be brief final status, such as an assertion failure message. + Will be truncated by the node if greater than 4096 bytes. The total message length across + all containers will be limited to 12kb. + Defaults to /dev/termination-log. + Cannot be updated. + type: string + terminationMessagePolicy: + description: |- + Indicate how the termination message should be populated. File will use the contents of + terminationMessagePath to populate the container status message on both success and failure. + FallbackToLogsOnError will use the last chunk of container log output if the termination + message file is empty and the container exited with an error. + The log output is limited to 2048 bytes or 80 lines, whichever is smaller. + Defaults to File. + Cannot be updated. + type: string + tty: + description: |- + Whether this container should allocate a TTY for itself, also requires 'stdin' to be true. + Default is false. + type: boolean + volumeDevices: + description: volumeDevices is the list of block devices + to be used by the container. + items: + description: volumeDevice describes a mapping of a + raw block device within a container. + properties: + devicePath: + description: devicePath is the path inside of + the container that the device will be mapped + to. + type: string + name: + description: name must match the name of a persistentVolumeClaim + in the pod + type: string + required: + - devicePath + - name + type: object + type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map + volumeMounts: + description: |- + Pod volumes to mount into the container's filesystem. + Cannot be updated. + items: + description: VolumeMount describes a mounting of a + Volume within a container. + properties: + mountPath: + description: |- + Path within the container at which the volume should be mounted. Must + not contain ':'. + type: string + mountPropagation: + description: |- + mountPropagation determines how mounts are propagated from the host + to container and the other way around. + When not set, MountPropagationNone is used. + This field is beta in 1.10. + When RecursiveReadOnly is set to IfPossible or to Enabled, MountPropagation must be None or unspecified + (which defaults to None). + type: string + name: + description: This must match the Name of a Volume. + type: string + readOnly: + description: |- + Mounted read-only if true, read-write otherwise (false or unspecified). + Defaults to false. + type: boolean + recursiveReadOnly: + description: |- + RecursiveReadOnly specifies whether read-only mounts should be handled + recursively. + + + If ReadOnly is false, this field has no meaning and must be unspecified. + + + If ReadOnly is true, and this field is set to Disabled, the mount is not made + recursively read-only. If this field is set to IfPossible, the mount is made + recursively read-only, if it is supported by the container runtime. If this + field is set to Enabled, the mount is made recursively read-only if it is + supported by the container runtime, otherwise the pod will not be started and + an error will be generated to indicate the reason. + + + If this field is set to IfPossible or Enabled, MountPropagation must be set to + None (or be unspecified, which defaults to None). + + + If this field is not specified, it is treated as an equivalent of Disabled. + type: string + subPath: + description: |- + Path within the volume from which the container's volume should be mounted. + Defaults to "" (volume's root). + type: string + subPathExpr: + description: |- + Expanded path within the volume from which the container's volume should be mounted. + Behaves similarly to SubPath but environment variable references $(VAR_NAME) are expanded using the container's environment. + Defaults to "" (volume's root). + SubPathExpr and SubPath are mutually exclusive. + type: string + required: + - mountPath + - name + type: object + type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map + workingDir: + description: |- + Container's working directory. + If not specified, the container runtime's default will be used, which + might be configured in the container image. + Cannot be updated. + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + dnsConfig: + description: |- + Specifies the DNS parameters of a pod. + Parameters specified here will be merged to the generated DNS + configuration based on DNSPolicy. + properties: + nameservers: + description: |- + A list of DNS name server IP addresses. + This will be appended to the base nameservers generated from DNSPolicy. + Duplicated nameservers will be removed. + items: + type: string + type: array + x-kubernetes-list-type: atomic + options: + description: |- + A list of DNS resolver options. + This will be merged with the base options generated from DNSPolicy. + Duplicated entries will be removed. Resolution options given in Options + will override those that appear in the base DNSPolicy. + items: + description: PodDNSConfigOption defines DNS resolver + options of a pod. + properties: + name: + description: Required. + type: string + value: + type: string + type: object + type: array + x-kubernetes-list-type: atomic + searches: + description: |- + A list of DNS search domains for host-name lookup. + This will be appended to the base search paths generated from DNSPolicy. + Duplicated search paths will be removed. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + dnsPolicy: + description: |- + Set DNS policy for the pod. + Defaults to "ClusterFirst". + Valid values are 'ClusterFirstWithHostNet', 'ClusterFirst', 'Default' or 'None'. + DNS parameters given in DNSConfig will be merged with the policy selected with DNSPolicy. + To have DNS options set along with hostNetwork, you have to specify DNS policy + explicitly to 'ClusterFirstWithHostNet'. + type: string + enableServiceLinks: + description: |- + EnableServiceLinks indicates whether information about services should be injected into pod's + environment variables, matching the syntax of Docker links. + Optional: Defaults to true. + type: boolean + ephemeralContainers: + description: |- + List of ephemeral containers run in this pod. Ephemeral containers may be run in an existing + pod to perform user-initiated actions such as debugging. This list cannot be specified when + creating a pod, and it cannot be modified by updating the pod spec. In order to add an + ephemeral container to an existing pod, use the pod's ephemeralcontainers subresource. + items: + description: |- + An EphemeralContainer is a temporary container that you may add to an existing Pod for + user-initiated activities such as debugging. Ephemeral containers have no resource or + scheduling guarantees, and they will not be restarted when they exit or when a Pod is + removed or restarted. The kubelet may evict a Pod if an ephemeral container causes the + Pod to exceed its resource allocation. + + + To add an ephemeral container, use the ephemeralcontainers subresource of an existing + Pod. Ephemeral containers may not be removed or restarted. + properties: + args: + description: |- + Arguments to the entrypoint. + The image's CMD is used if this is not provided. + Variable references $(VAR_NAME) are expanded using the container's environment. If a variable + cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced + to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will + produce the string literal "$(VAR_NAME)". Escaped references will never be expanded, regardless + of whether the variable exists or not. Cannot be updated. + More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell + items: + type: string + type: array + x-kubernetes-list-type: atomic + command: + description: |- + Entrypoint array. Not executed within a shell. + The image's ENTRYPOINT is used if this is not provided. + Variable references $(VAR_NAME) are expanded using the container's environment. If a variable + cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced + to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will + produce the string literal "$(VAR_NAME)". Escaped references will never be expanded, regardless + of whether the variable exists or not. Cannot be updated. + More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell + items: + type: string + type: array + x-kubernetes-list-type: atomic + env: + description: |- + List of environment variables to set in the container. + Cannot be updated. + items: + description: EnvVar represents an environment variable + present in a Container. + properties: + name: + description: Name of the environment variable. + Must be a C_IDENTIFIER. + type: string + value: + description: |- + Variable references $(VAR_NAME) are expanded + using the previously defined environment variables in the container and + any service environment variables. If a variable cannot be resolved, + the reference in the input string will be unchanged. Double $$ are reduced + to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. + "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". + Escaped references will never be expanded, regardless of whether the variable + exists or not. + Defaults to "". + type: string + valueFrom: + description: Source for the environment variable's + value. Cannot be used if value is not empty. + properties: + configMapKeyRef: + description: Selects a key of a ConfigMap. + properties: + key: + description: The key to select. + type: string + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + TODO: Add other useful fields. apiVersion, kind, uid? + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. + type: string + optional: + description: Specify whether the ConfigMap + or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + fieldRef: + description: |- + Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['']`, `metadata.annotations['']`, + spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs. + properties: + apiVersion: + description: Version of the schema the + FieldPath is written in terms of, defaults + to "v1". + type: string + fieldPath: + description: Path of the field to select + in the specified API version. + type: string + required: + - fieldPath + type: object + x-kubernetes-map-type: atomic + resourceFieldRef: + description: |- + Selects a resource of the container: only resources limits and requests + (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported. + properties: + containerName: + description: 'Container name: required + for volumes, optional for env vars' + type: string + divisor: + anyOf: + - type: integer + - type: string + description: Specifies the output format + of the exposed resources, defaults to + "1" + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + resource: + description: 'Required: resource to select' + type: string + required: + - resource + type: object + x-kubernetes-map-type: atomic + secretKeyRef: + description: Selects a key of a secret in + the pod's namespace + properties: + key: + description: The key of the secret to + select from. Must be a valid secret + key. + type: string + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + TODO: Add other useful fields. apiVersion, kind, uid? + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. + type: string + optional: + description: Specify whether the Secret + or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + envFrom: + description: |- + List of sources to populate environment variables in the container. + The keys defined within a source must be a C_IDENTIFIER. All invalid keys + will be reported as an event when the container is starting. When a key exists in multiple + sources, the value associated with the last source will take precedence. + Values defined by an Env with a duplicate key will take precedence. + Cannot be updated. + items: + description: EnvFromSource represents the source of + a set of ConfigMaps + properties: + configMapRef: + description: The ConfigMap to select from + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + TODO: Add other useful fields. apiVersion, kind, uid? + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. + type: string + optional: + description: Specify whether the ConfigMap + must be defined + type: boolean + type: object + x-kubernetes-map-type: atomic + prefix: + description: An optional identifier to prepend + to each key in the ConfigMap. Must be a C_IDENTIFIER. + type: string + secretRef: + description: The Secret to select from + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + TODO: Add other useful fields. apiVersion, kind, uid? + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. + type: string + optional: + description: Specify whether the Secret must + be defined + type: boolean + type: object + x-kubernetes-map-type: atomic + type: object + type: array + x-kubernetes-list-type: atomic + image: + description: |- + Container image name. + More info: https://kubernetes.io/docs/concepts/containers/images + type: string + imagePullPolicy: + description: |- + Image pull policy. + One of Always, Never, IfNotPresent. + Defaults to Always if :latest tag is specified, or IfNotPresent otherwise. + Cannot be updated. + More info: https://kubernetes.io/docs/concepts/containers/images#updating-images + type: string + lifecycle: + description: Lifecycle is not allowed for ephemeral + containers. + properties: + postStart: + description: |- + PostStart is called immediately after a container is created. If the handler fails, + the container is terminated and restarted according to its restart policy. + Other management of the container blocks until the hook completes. + More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks + properties: + exec: + description: Exec specifies the action to take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + httpGet: + description: HTTPGet specifies the http request + to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in the + request. HTTP allows repeated headers. + items: + description: HTTPHeader describes a custom + header to be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP + server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + sleep: + description: Sleep represents the duration that + the container should sleep before being terminated. + properties: + seconds: + description: Seconds is the number of seconds + to sleep. + format: int64 + type: integer + required: + - seconds + type: object + tcpSocket: + description: |- + Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept + for the backward compatibility. There are no validation of this field and + lifecycle hooks will fail in runtime when tcp handler is specified. + properties: + host: + description: 'Optional: Host name to connect + to, defaults to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + type: object + preStop: + description: |- + PreStop is called immediately before a container is terminated due to an + API request or management event such as liveness/startup probe failure, + preemption, resource contention, etc. The handler is not called if the + container crashes or exits. The Pod's termination grace period countdown begins before the + PreStop hook is executed. Regardless of the outcome of the handler, the + container will eventually terminate within the Pod's termination grace + period (unless delayed by finalizers). Other management of the container blocks until the hook completes + or until the termination grace period is reached. + More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks + properties: + exec: + description: Exec specifies the action to take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + httpGet: + description: HTTPGet specifies the http request + to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in the + request. HTTP allows repeated headers. + items: + description: HTTPHeader describes a custom + header to be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP + server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + sleep: + description: Sleep represents the duration that + the container should sleep before being terminated. + properties: + seconds: + description: Seconds is the number of seconds + to sleep. + format: int64 + type: integer + required: + - seconds + type: object + tcpSocket: + description: |- + Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept + for the backward compatibility. There are no validation of this field and + lifecycle hooks will fail in runtime when tcp handler is specified. + properties: + host: + description: 'Optional: Host name to connect + to, defaults to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + type: object + type: object + livenessProbe: + description: Probes are not allowed for ephemeral containers. + properties: + exec: + description: Exec specifies the action to take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + description: |- + Minimum consecutive failures for the probe to be considered failed after having succeeded. + Defaults to 3. Minimum value is 1. + format: int32 + type: integer + grpc: + description: GRPC specifies an action involving + a GRPC port. + properties: + port: + description: Port number of the gRPC service. + Number must be in the range 1 to 65535. + format: int32 + type: integer + service: + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object + httpGet: + description: HTTPGet specifies the http request + to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in the request. + HTTP allows repeated headers. + items: + description: HTTPHeader describes a custom + header to be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: |- + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: + description: |- + How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: |- + Minimum consecutive successes for the probe to be considered successful after having failed. + Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocket specifies an action involving + a TCP port. + properties: + host: + description: 'Optional: Host name to connect + to, defaults to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: |- + Optional duration in seconds the pod needs to terminate gracefully upon probe failure. + The grace period is the duration in seconds after the processes running in the pod are sent + a termination signal and the time when the processes are forcibly halted with a kill signal. + Set this value longer than the expected cleanup time for your process. + If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this + value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates stop immediately via + the kill signal (no opportunity to shut down). + This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. + Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: + description: |- + Number of seconds after which the probe times out. + Defaults to 1 second. Minimum value is 1. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + type: object + name: + description: |- + Name of the ephemeral container specified as a DNS_LABEL. + This name must be unique among all containers, init containers and ephemeral containers. + type: string + ports: + description: Ports are not allowed for ephemeral containers. + items: + description: ContainerPort represents a network port + in a single container. + properties: + containerPort: + description: |- + Number of port to expose on the pod's IP address. + This must be a valid port number, 0 < x < 65536. + format: int32 + type: integer + hostIP: + description: What host IP to bind the external + port to. + type: string + hostPort: + description: |- + Number of port to expose on the host. + If specified, this must be a valid port number, 0 < x < 65536. + If HostNetwork is specified, this must match ContainerPort. + Most containers do not need this. + format: int32 + type: integer + name: + description: |- + If specified, this must be an IANA_SVC_NAME and unique within the pod. Each + named port in a pod must have a unique name. Name for the port that can be + referred to by services. + type: string + protocol: + default: TCP + description: |- + Protocol for port. Must be UDP, TCP, or SCTP. + Defaults to "TCP". + type: string + required: + - containerPort + type: object + type: array + x-kubernetes-list-map-keys: + - containerPort + - protocol + x-kubernetes-list-type: map + readinessProbe: + description: Probes are not allowed for ephemeral containers. + properties: + exec: + description: Exec specifies the action to take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + description: |- + Minimum consecutive failures for the probe to be considered failed after having succeeded. + Defaults to 3. Minimum value is 1. + format: int32 + type: integer + grpc: + description: GRPC specifies an action involving + a GRPC port. + properties: + port: + description: Port number of the gRPC service. + Number must be in the range 1 to 65535. + format: int32 + type: integer + service: + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object + httpGet: + description: HTTPGet specifies the http request + to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in the request. + HTTP allows repeated headers. + items: + description: HTTPHeader describes a custom + header to be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: |- + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: + description: |- + How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: |- + Minimum consecutive successes for the probe to be considered successful after having failed. + Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocket specifies an action involving + a TCP port. + properties: + host: + description: 'Optional: Host name to connect + to, defaults to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: |- + Optional duration in seconds the pod needs to terminate gracefully upon probe failure. + The grace period is the duration in seconds after the processes running in the pod are sent + a termination signal and the time when the processes are forcibly halted with a kill signal. + Set this value longer than the expected cleanup time for your process. + If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this + value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates stop immediately via + the kill signal (no opportunity to shut down). + This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. + Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: + description: |- + Number of seconds after which the probe times out. + Defaults to 1 second. Minimum value is 1. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + type: object + resizePolicy: + description: Resources resize policy for the container. + items: + description: ContainerResizePolicy represents resource + resize policy for the container. + properties: + resourceName: + description: |- + Name of the resource to which this resource resize policy applies. + Supported values: cpu, memory. + type: string + restartPolicy: + description: |- + Restart policy to apply when specified resource is resized. + If not specified, it defaults to NotRequired. + type: string + required: + - resourceName + - restartPolicy + type: object + type: array + x-kubernetes-list-type: atomic + resources: + description: |- + Resources are not allowed for ephemeral containers. Ephemeral containers use spare resources + already allocated to the pod. + properties: + claims: + description: |- + Claims lists the names of resources, defined in spec.resourceClaims, + that are used by this container. + + + This is an alpha field and requires enabling the + DynamicResourceAllocation feature gate. + + + This field is immutable. It can only be set for containers. + items: + description: ResourceClaim references one entry + in PodSpec.ResourceClaims. + properties: + name: + description: |- + Name must match the name of one entry in pod.spec.resourceClaims of + the Pod where this field is used. It makes that resource available + inside a container. + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + type: object + restartPolicy: + description: |- + Restart policy for the container to manage the restart behavior of each + container within a pod. + This may only be set for init containers. You cannot set this field on + ephemeral containers. + type: string + securityContext: + description: |- + Optional: SecurityContext defines the security options the ephemeral container should be run with. + If set, the fields of SecurityContext override the equivalent fields of PodSecurityContext. + properties: + allowPrivilegeEscalation: + description: |- + AllowPrivilegeEscalation controls whether a process can gain more + privileges than its parent process. This bool directly controls if + the no_new_privs flag will be set on the container process. + AllowPrivilegeEscalation is true always when the container is: + 1) run as Privileged + 2) has CAP_SYS_ADMIN + Note that this field cannot be set when spec.os.name is windows. + type: boolean + appArmorProfile: + description: |- + appArmorProfile is the AppArmor options to use by this container. If set, this profile + overrides the pod's appArmorProfile. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile loaded on the node that should be used. + The profile must be preconfigured on the node to work. + Must match the loaded name of the profile. + Must be set if and only if type is "Localhost". + type: string + type: + description: |- + type indicates which kind of AppArmor profile will be applied. + Valid options are: + Localhost - a profile pre-loaded on the node. + RuntimeDefault - the container runtime's default profile. + Unconfined - no AppArmor enforcement. + type: string + required: + - type + type: object + capabilities: + description: |- + The capabilities to add/drop when running containers. + Defaults to the default set of capabilities granted by the container runtime. + Note that this field cannot be set when spec.os.name is windows. + properties: + add: + description: Added capabilities + items: + description: Capability represent POSIX capabilities + type + type: string + type: array + x-kubernetes-list-type: atomic + drop: + description: Removed capabilities + items: + description: Capability represent POSIX capabilities + type + type: string + type: array + x-kubernetes-list-type: atomic + type: object + privileged: + description: |- + Run container in privileged mode. + Processes in privileged containers are essentially equivalent to root on the host. + Defaults to false. + Note that this field cannot be set when spec.os.name is windows. + type: boolean + procMount: + description: |- + procMount denotes the type of proc mount to use for the containers. + The default is DefaultProcMount which uses the container runtime defaults for + readonly paths and masked paths. + This requires the ProcMountType feature flag to be enabled. + Note that this field cannot be set when spec.os.name is windows. + type: string + readOnlyRootFilesystem: + description: |- + Whether this container has a read-only root filesystem. + Default is false. + Note that this field cannot be set when spec.os.name is windows. + type: boolean + runAsGroup: + description: |- + The GID to run the entrypoint of the container process. + Uses runtime default if unset. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + runAsNonRoot: + description: |- + Indicates that the container must run as a non-root user. + If true, the Kubelet will validate the image at runtime to ensure that it + does not run as UID 0 (root) and fail to start the container if it does. + If unset or false, no such validation will be performed. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: boolean + runAsUser: + description: |- + The UID to run the entrypoint of the container process. + Defaults to user specified in image metadata if unspecified. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + seLinuxOptions: + description: |- + The SELinux context to be applied to the container. + If unspecified, the container runtime will allocate a random SELinux context for each + container. May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is windows. + properties: + level: + description: Level is SELinux level label that + applies to the container. + type: string + role: + description: Role is a SELinux role label that + applies to the container. + type: string + type: + description: Type is a SELinux type label that + applies to the container. + type: string + user: + description: User is a SELinux user label that + applies to the container. + type: string + type: object + seccompProfile: + description: |- + The seccomp options to use by this container. If seccomp options are + provided at both the pod & container level, the container options + override the pod options. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile defined in a file on the node should be used. + The profile must be preconfigured on the node to work. + Must be a descending path, relative to the kubelet's configured seccomp profile location. + Must be set if type is "Localhost". Must NOT be set for any other type. + type: string + type: + description: |- + type indicates which kind of seccomp profile will be applied. + Valid options are: + + + Localhost - a profile defined in a file on the node should be used. + RuntimeDefault - the container runtime default profile should be used. + Unconfined - no profile should be applied. + type: string + required: + - type + type: object + windowsOptions: + description: |- + The Windows specific settings applied to all containers. + If unspecified, the options from the PodSecurityContext will be used. + If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is linux. + properties: + gmsaCredentialSpec: + description: |- + GMSACredentialSpec is where the GMSA admission webhook + (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the + GMSA credential spec named by the GMSACredentialSpecName field. + type: string + gmsaCredentialSpecName: + description: GMSACredentialSpecName is the name + of the GMSA credential spec to use. + type: string + hostProcess: + description: |- + HostProcess determines if a container should be run as a 'Host Process' container. + All of a Pod's containers must have the same effective HostProcess value + (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). + In addition, if HostProcess is true then HostNetwork must also be set to true. + type: boolean + runAsUserName: + description: |- + The UserName in Windows to run the entrypoint of the container process. + Defaults to the user specified in image metadata if unspecified. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: string + type: object + type: object + startupProbe: + description: Probes are not allowed for ephemeral containers. + properties: + exec: + description: Exec specifies the action to take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + description: |- + Minimum consecutive failures for the probe to be considered failed after having succeeded. + Defaults to 3. Minimum value is 1. + format: int32 + type: integer + grpc: + description: GRPC specifies an action involving + a GRPC port. + properties: + port: + description: Port number of the gRPC service. + Number must be in the range 1 to 65535. + format: int32 + type: integer + service: + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object + httpGet: + description: HTTPGet specifies the http request + to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in the request. + HTTP allows repeated headers. + items: + description: HTTPHeader describes a custom + header to be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: |- + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: + description: |- + How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: |- + Minimum consecutive successes for the probe to be considered successful after having failed. + Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocket specifies an action involving + a TCP port. + properties: + host: + description: 'Optional: Host name to connect + to, defaults to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: |- + Optional duration in seconds the pod needs to terminate gracefully upon probe failure. + The grace period is the duration in seconds after the processes running in the pod are sent + a termination signal and the time when the processes are forcibly halted with a kill signal. + Set this value longer than the expected cleanup time for your process. + If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this + value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates stop immediately via + the kill signal (no opportunity to shut down). + This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. + Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: + description: |- + Number of seconds after which the probe times out. + Defaults to 1 second. Minimum value is 1. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + type: object + stdin: + description: |- + Whether this container should allocate a buffer for stdin in the container runtime. If this + is not set, reads from stdin in the container will always result in EOF. + Default is false. + type: boolean + stdinOnce: + description: |- + Whether the container runtime should close the stdin channel after it has been opened by + a single attach. When stdin is true the stdin stream will remain open across multiple attach + sessions. If stdinOnce is set to true, stdin is opened on container start, is empty until the + first client attaches to stdin, and then remains open and accepts data until the client disconnects, + at which time stdin is closed and remains closed until the container is restarted. If this + flag is false, a container processes that reads from stdin will never receive an EOF. + Default is false + type: boolean + targetContainerName: + description: |- + If set, the name of the container from PodSpec that this ephemeral container targets. + The ephemeral container will be run in the namespaces (IPC, PID, etc) of this container. + If not set then the ephemeral container uses the namespaces configured in the Pod spec. + + + The container runtime must implement support for this feature. If the runtime does not + support namespace targeting then the result of setting this field is undefined. + type: string + terminationMessagePath: + description: |- + Optional: Path at which the file to which the container's termination message + will be written is mounted into the container's filesystem. + Message written is intended to be brief final status, such as an assertion failure message. + Will be truncated by the node if greater than 4096 bytes. The total message length across + all containers will be limited to 12kb. + Defaults to /dev/termination-log. + Cannot be updated. + type: string + terminationMessagePolicy: + description: |- + Indicate how the termination message should be populated. File will use the contents of + terminationMessagePath to populate the container status message on both success and failure. + FallbackToLogsOnError will use the last chunk of container log output if the termination + message file is empty and the container exited with an error. + The log output is limited to 2048 bytes or 80 lines, whichever is smaller. + Defaults to File. + Cannot be updated. + type: string + tty: + description: |- + Whether this container should allocate a TTY for itself, also requires 'stdin' to be true. + Default is false. + type: boolean + volumeDevices: + description: volumeDevices is the list of block devices + to be used by the container. + items: + description: volumeDevice describes a mapping of a + raw block device within a container. + properties: + devicePath: + description: devicePath is the path inside of + the container that the device will be mapped + to. + type: string + name: + description: name must match the name of a persistentVolumeClaim + in the pod + type: string + required: + - devicePath + - name + type: object + type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map + volumeMounts: + description: |- + Pod volumes to mount into the container's filesystem. Subpath mounts are not allowed for ephemeral containers. + Cannot be updated. + items: + description: VolumeMount describes a mounting of a + Volume within a container. + properties: + mountPath: + description: |- + Path within the container at which the volume should be mounted. Must + not contain ':'. + type: string + mountPropagation: + description: |- + mountPropagation determines how mounts are propagated from the host + to container and the other way around. + When not set, MountPropagationNone is used. + This field is beta in 1.10. + When RecursiveReadOnly is set to IfPossible or to Enabled, MountPropagation must be None or unspecified + (which defaults to None). + type: string + name: + description: This must match the Name of a Volume. + type: string + readOnly: + description: |- + Mounted read-only if true, read-write otherwise (false or unspecified). + Defaults to false. + type: boolean + recursiveReadOnly: + description: |- + RecursiveReadOnly specifies whether read-only mounts should be handled + recursively. + + + If ReadOnly is false, this field has no meaning and must be unspecified. + + + If ReadOnly is true, and this field is set to Disabled, the mount is not made + recursively read-only. If this field is set to IfPossible, the mount is made + recursively read-only, if it is supported by the container runtime. If this + field is set to Enabled, the mount is made recursively read-only if it is + supported by the container runtime, otherwise the pod will not be started and + an error will be generated to indicate the reason. + + + If this field is set to IfPossible or Enabled, MountPropagation must be set to + None (or be unspecified, which defaults to None). + + + If this field is not specified, it is treated as an equivalent of Disabled. + type: string + subPath: + description: |- + Path within the volume from which the container's volume should be mounted. + Defaults to "" (volume's root). + type: string + subPathExpr: + description: |- + Expanded path within the volume from which the container's volume should be mounted. + Behaves similarly to SubPath but environment variable references $(VAR_NAME) are expanded using the container's environment. + Defaults to "" (volume's root). + SubPathExpr and SubPath are mutually exclusive. + type: string + required: + - mountPath + - name + type: object + type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map + workingDir: + description: |- + Container's working directory. + If not specified, the container runtime's default will be used, which + might be configured in the container image. + Cannot be updated. + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + hostAliases: + description: |- + HostAliases is an optional list of hosts and IPs that will be injected into the pod's hosts + file if specified. + items: + description: |- + HostAlias holds the mapping between IP and hostnames that will be injected as an entry in the + pod's hosts file. + properties: + hostnames: + description: Hostnames for the above IP address. + items: + type: string + type: array + x-kubernetes-list-type: atomic + ip: + description: IP address of the host file entry. + type: string + required: + - ip + type: object + type: array + x-kubernetes-list-map-keys: + - ip + x-kubernetes-list-type: map + hostIPC: + description: |- + Use the host's ipc namespace. + Optional: Default to false. + type: boolean + hostNetwork: + description: |- + Host networking requested for this pod. Use the host's network namespace. + If this option is set, the ports that will be used must be specified. + Default to false. + type: boolean + hostPID: + description: |- + Use the host's pid namespace. + Optional: Default to false. + type: boolean + hostUsers: + description: |- + Use the host's user namespace. + Optional: Default to true. + If set to true or not present, the pod will be run in the host user namespace, useful + for when the pod needs a feature only available to the host user namespace, such as + loading a kernel module with CAP_SYS_MODULE. + When set to false, a new userns is created for the pod. Setting false is useful for + mitigating container breakout vulnerabilities even allowing users to run their + containers as root without actually having root privileges on the host. + This field is alpha-level and is only honored by servers that enable the UserNamespacesSupport feature. + type: boolean + hostname: + description: |- + Specifies the hostname of the Pod + If not specified, the pod's hostname will be set to a system-defined value. + type: string + imagePullSecrets: + description: |- + ImagePullSecrets is an optional list of references to secrets in the same namespace to use for pulling any of the images used by this PodSpec. + If specified, these secrets will be passed to individual puller implementations for them to use. + More info: https://kubernetes.io/docs/concepts/containers/images#specifying-imagepullsecrets-on-a-pod + items: + description: |- + LocalObjectReference contains enough information to let you locate the + referenced object inside the same namespace. + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + TODO: Add other useful fields. apiVersion, kind, uid? + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. + type: string + type: object + x-kubernetes-map-type: atomic + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + initContainers: + description: |- + List of initialization containers belonging to the pod. + Init containers are executed in order prior to containers being started. If any + init container fails, the pod is considered to have failed and is handled according + to its restartPolicy. The name for an init container or normal container must be + unique among all containers. + Init containers may not have Lifecycle actions, Readiness probes, Liveness probes, or Startup probes. + The resourceRequirements of an init container are taken into account during scheduling + by finding the highest request/limit for each resource type, and then using the max of + of that value or the sum of the normal containers. Limits are applied to init containers + in a similar fashion. + Init containers cannot currently be added or removed. + Cannot be updated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/init-containers/ + items: + description: A single application container that you want + to run within a pod. + properties: + args: + description: |- + Arguments to the entrypoint. + The container image's CMD is used if this is not provided. + Variable references $(VAR_NAME) are expanded using the container's environment. If a variable + cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced + to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will + produce the string literal "$(VAR_NAME)". Escaped references will never be expanded, regardless + of whether the variable exists or not. Cannot be updated. + More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell + items: + type: string + type: array + x-kubernetes-list-type: atomic + command: + description: |- + Entrypoint array. Not executed within a shell. + The container image's ENTRYPOINT is used if this is not provided. + Variable references $(VAR_NAME) are expanded using the container's environment. If a variable + cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced + to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will + produce the string literal "$(VAR_NAME)". Escaped references will never be expanded, regardless + of whether the variable exists or not. Cannot be updated. + More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell + items: + type: string + type: array + x-kubernetes-list-type: atomic + env: + description: |- + List of environment variables to set in the container. + Cannot be updated. + items: + description: EnvVar represents an environment variable + present in a Container. + properties: + name: + description: Name of the environment variable. + Must be a C_IDENTIFIER. + type: string + value: + description: |- + Variable references $(VAR_NAME) are expanded + using the previously defined environment variables in the container and + any service environment variables. If a variable cannot be resolved, + the reference in the input string will be unchanged. Double $$ are reduced + to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. + "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". + Escaped references will never be expanded, regardless of whether the variable + exists or not. + Defaults to "". + type: string + valueFrom: + description: Source for the environment variable's + value. Cannot be used if value is not empty. + properties: + configMapKeyRef: + description: Selects a key of a ConfigMap. + properties: + key: + description: The key to select. + type: string + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + TODO: Add other useful fields. apiVersion, kind, uid? + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. + type: string + optional: + description: Specify whether the ConfigMap + or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + fieldRef: + description: |- + Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['']`, `metadata.annotations['']`, + spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs. + properties: + apiVersion: + description: Version of the schema the + FieldPath is written in terms of, defaults + to "v1". + type: string + fieldPath: + description: Path of the field to select + in the specified API version. + type: string + required: + - fieldPath + type: object + x-kubernetes-map-type: atomic + resourceFieldRef: + description: |- + Selects a resource of the container: only resources limits and requests + (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported. + properties: + containerName: + description: 'Container name: required + for volumes, optional for env vars' + type: string + divisor: + anyOf: + - type: integer + - type: string + description: Specifies the output format + of the exposed resources, defaults to + "1" + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + resource: + description: 'Required: resource to select' + type: string + required: + - resource + type: object + x-kubernetes-map-type: atomic + secretKeyRef: + description: Selects a key of a secret in + the pod's namespace + properties: + key: + description: The key of the secret to + select from. Must be a valid secret + key. + type: string + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + TODO: Add other useful fields. apiVersion, kind, uid? + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. + type: string + optional: + description: Specify whether the Secret + or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + envFrom: + description: |- + List of sources to populate environment variables in the container. + The keys defined within a source must be a C_IDENTIFIER. All invalid keys + will be reported as an event when the container is starting. When a key exists in multiple + sources, the value associated with the last source will take precedence. + Values defined by an Env with a duplicate key will take precedence. + Cannot be updated. + items: + description: EnvFromSource represents the source of + a set of ConfigMaps + properties: + configMapRef: + description: The ConfigMap to select from + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + TODO: Add other useful fields. apiVersion, kind, uid? + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. + type: string + optional: + description: Specify whether the ConfigMap + must be defined + type: boolean + type: object + x-kubernetes-map-type: atomic + prefix: + description: An optional identifier to prepend + to each key in the ConfigMap. Must be a C_IDENTIFIER. + type: string + secretRef: + description: The Secret to select from + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + TODO: Add other useful fields. apiVersion, kind, uid? + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. + type: string + optional: + description: Specify whether the Secret must + be defined + type: boolean + type: object + x-kubernetes-map-type: atomic + type: object + type: array + x-kubernetes-list-type: atomic + image: + description: |- + Container image name. + More info: https://kubernetes.io/docs/concepts/containers/images + This field is optional to allow higher level config management to default or override + container images in workload controllers like Deployments and StatefulSets. + type: string + imagePullPolicy: + description: |- + Image pull policy. + One of Always, Never, IfNotPresent. + Defaults to Always if :latest tag is specified, or IfNotPresent otherwise. + Cannot be updated. + More info: https://kubernetes.io/docs/concepts/containers/images#updating-images + type: string + lifecycle: + description: |- + Actions that the management system should take in response to container lifecycle events. + Cannot be updated. + properties: + postStart: + description: |- + PostStart is called immediately after a container is created. If the handler fails, + the container is terminated and restarted according to its restart policy. + Other management of the container blocks until the hook completes. + More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks + properties: + exec: + description: Exec specifies the action to take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + httpGet: + description: HTTPGet specifies the http request + to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in the + request. HTTP allows repeated headers. + items: + description: HTTPHeader describes a custom + header to be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP + server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + sleep: + description: Sleep represents the duration that + the container should sleep before being terminated. + properties: + seconds: + description: Seconds is the number of seconds + to sleep. + format: int64 + type: integer + required: + - seconds + type: object + tcpSocket: + description: |- + Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept + for the backward compatibility. There are no validation of this field and + lifecycle hooks will fail in runtime when tcp handler is specified. + properties: + host: + description: 'Optional: Host name to connect + to, defaults to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + type: object + preStop: + description: |- + PreStop is called immediately before a container is terminated due to an + API request or management event such as liveness/startup probe failure, + preemption, resource contention, etc. The handler is not called if the + container crashes or exits. The Pod's termination grace period countdown begins before the + PreStop hook is executed. Regardless of the outcome of the handler, the + container will eventually terminate within the Pod's termination grace + period (unless delayed by finalizers). Other management of the container blocks until the hook completes + or until the termination grace period is reached. + More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks + properties: + exec: + description: Exec specifies the action to take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + httpGet: + description: HTTPGet specifies the http request + to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in the + request. HTTP allows repeated headers. + items: + description: HTTPHeader describes a custom + header to be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP + server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + sleep: + description: Sleep represents the duration that + the container should sleep before being terminated. + properties: + seconds: + description: Seconds is the number of seconds + to sleep. + format: int64 + type: integer + required: + - seconds + type: object + tcpSocket: + description: |- + Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept + for the backward compatibility. There are no validation of this field and + lifecycle hooks will fail in runtime when tcp handler is specified. + properties: + host: + description: 'Optional: Host name to connect + to, defaults to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + type: object + type: object + livenessProbe: + description: |- + Periodic probe of container liveness. + Container will be restarted if the probe fails. + Cannot be updated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + properties: + exec: + description: Exec specifies the action to take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + description: |- + Minimum consecutive failures for the probe to be considered failed after having succeeded. + Defaults to 3. Minimum value is 1. + format: int32 + type: integer + grpc: + description: GRPC specifies an action involving + a GRPC port. + properties: + port: + description: Port number of the gRPC service. + Number must be in the range 1 to 65535. + format: int32 + type: integer + service: + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object + httpGet: + description: HTTPGet specifies the http request + to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in the request. + HTTP allows repeated headers. + items: + description: HTTPHeader describes a custom + header to be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: |- + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: + description: |- + How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: |- + Minimum consecutive successes for the probe to be considered successful after having failed. + Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocket specifies an action involving + a TCP port. + properties: + host: + description: 'Optional: Host name to connect + to, defaults to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: |- + Optional duration in seconds the pod needs to terminate gracefully upon probe failure. + The grace period is the duration in seconds after the processes running in the pod are sent + a termination signal and the time when the processes are forcibly halted with a kill signal. + Set this value longer than the expected cleanup time for your process. + If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this + value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates stop immediately via + the kill signal (no opportunity to shut down). + This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. + Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: + description: |- + Number of seconds after which the probe times out. + Defaults to 1 second. Minimum value is 1. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + type: object + name: + description: |- + Name of the container specified as a DNS_LABEL. + Each container in a pod must have a unique name (DNS_LABEL). + Cannot be updated. + type: string + ports: + description: |- + List of ports to expose from the container. Not specifying a port here + DOES NOT prevent that port from being exposed. Any port which is + listening on the default "0.0.0.0" address inside a container will be + accessible from the network. + Modifying this array with strategic merge patch may corrupt the data. + For more information See https://github.com/kubernetes/kubernetes/issues/108255. + Cannot be updated. + items: + description: ContainerPort represents a network port + in a single container. + properties: + containerPort: + description: |- + Number of port to expose on the pod's IP address. + This must be a valid port number, 0 < x < 65536. + format: int32 + type: integer + hostIP: + description: What host IP to bind the external + port to. + type: string + hostPort: + description: |- + Number of port to expose on the host. + If specified, this must be a valid port number, 0 < x < 65536. + If HostNetwork is specified, this must match ContainerPort. + Most containers do not need this. + format: int32 + type: integer + name: + description: |- + If specified, this must be an IANA_SVC_NAME and unique within the pod. Each + named port in a pod must have a unique name. Name for the port that can be + referred to by services. + type: string + protocol: + default: TCP + description: |- + Protocol for port. Must be UDP, TCP, or SCTP. + Defaults to "TCP". + type: string + required: + - containerPort + type: object + type: array + x-kubernetes-list-map-keys: + - containerPort + - protocol + x-kubernetes-list-type: map + readinessProbe: + description: |- + Periodic probe of container service readiness. + Container will be removed from service endpoints if the probe fails. + Cannot be updated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + properties: + exec: + description: Exec specifies the action to take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + description: |- + Minimum consecutive failures for the probe to be considered failed after having succeeded. + Defaults to 3. Minimum value is 1. + format: int32 + type: integer + grpc: + description: GRPC specifies an action involving + a GRPC port. + properties: + port: + description: Port number of the gRPC service. + Number must be in the range 1 to 65535. + format: int32 + type: integer + service: + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object + httpGet: + description: HTTPGet specifies the http request + to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in the request. + HTTP allows repeated headers. + items: + description: HTTPHeader describes a custom + header to be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: |- + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: + description: |- + How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: |- + Minimum consecutive successes for the probe to be considered successful after having failed. + Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocket specifies an action involving + a TCP port. + properties: + host: + description: 'Optional: Host name to connect + to, defaults to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: |- + Optional duration in seconds the pod needs to terminate gracefully upon probe failure. + The grace period is the duration in seconds after the processes running in the pod are sent + a termination signal and the time when the processes are forcibly halted with a kill signal. + Set this value longer than the expected cleanup time for your process. + If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this + value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates stop immediately via + the kill signal (no opportunity to shut down). + This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. + Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: + description: |- + Number of seconds after which the probe times out. + Defaults to 1 second. Minimum value is 1. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + type: object + resizePolicy: + description: Resources resize policy for the container. + items: + description: ContainerResizePolicy represents resource + resize policy for the container. + properties: + resourceName: + description: |- + Name of the resource to which this resource resize policy applies. + Supported values: cpu, memory. + type: string + restartPolicy: + description: |- + Restart policy to apply when specified resource is resized. + If not specified, it defaults to NotRequired. + type: string + required: + - resourceName + - restartPolicy + type: object + type: array + x-kubernetes-list-type: atomic + resources: + description: |- + Compute Resources required by this container. + Cannot be updated. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + properties: + claims: + description: |- + Claims lists the names of resources, defined in spec.resourceClaims, + that are used by this container. + + + This is an alpha field and requires enabling the + DynamicResourceAllocation feature gate. + + + This field is immutable. It can only be set for containers. + items: + description: ResourceClaim references one entry + in PodSpec.ResourceClaims. + properties: + name: + description: |- + Name must match the name of one entry in pod.spec.resourceClaims of + the Pod where this field is used. It makes that resource available + inside a container. + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + type: object + restartPolicy: + description: |- + RestartPolicy defines the restart behavior of individual containers in a pod. + This field may only be set for init containers, and the only allowed value is "Always". + For non-init containers or when this field is not specified, + the restart behavior is defined by the Pod's restart policy and the container type. + Setting the RestartPolicy as "Always" for the init container will have the following effect: + this init container will be continually restarted on + exit until all regular containers have terminated. Once all regular + containers have completed, all init containers with restartPolicy "Always" + will be shut down. This lifecycle differs from normal init containers and + is often referred to as a "sidecar" container. Although this init + container still starts in the init container sequence, it does not wait + for the container to complete before proceeding to the next init + container. Instead, the next init container starts immediately after this + init container is started, or after any startupProbe has successfully + completed. + type: string + securityContext: + description: |- + SecurityContext defines the security options the container should be run with. + If set, the fields of SecurityContext override the equivalent fields of PodSecurityContext. + More info: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/ + properties: + allowPrivilegeEscalation: + description: |- + AllowPrivilegeEscalation controls whether a process can gain more + privileges than its parent process. This bool directly controls if + the no_new_privs flag will be set on the container process. + AllowPrivilegeEscalation is true always when the container is: + 1) run as Privileged + 2) has CAP_SYS_ADMIN + Note that this field cannot be set when spec.os.name is windows. + type: boolean + appArmorProfile: + description: |- + appArmorProfile is the AppArmor options to use by this container. If set, this profile + overrides the pod's appArmorProfile. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile loaded on the node that should be used. + The profile must be preconfigured on the node to work. + Must match the loaded name of the profile. + Must be set if and only if type is "Localhost". + type: string + type: + description: |- + type indicates which kind of AppArmor profile will be applied. + Valid options are: + Localhost - a profile pre-loaded on the node. + RuntimeDefault - the container runtime's default profile. + Unconfined - no AppArmor enforcement. + type: string + required: + - type + type: object + capabilities: + description: |- + The capabilities to add/drop when running containers. + Defaults to the default set of capabilities granted by the container runtime. + Note that this field cannot be set when spec.os.name is windows. + properties: + add: + description: Added capabilities + items: + description: Capability represent POSIX capabilities + type + type: string + type: array + x-kubernetes-list-type: atomic + drop: + description: Removed capabilities + items: + description: Capability represent POSIX capabilities + type + type: string + type: array + x-kubernetes-list-type: atomic + type: object + privileged: + description: |- + Run container in privileged mode. + Processes in privileged containers are essentially equivalent to root on the host. + Defaults to false. + Note that this field cannot be set when spec.os.name is windows. + type: boolean + procMount: + description: |- + procMount denotes the type of proc mount to use for the containers. + The default is DefaultProcMount which uses the container runtime defaults for + readonly paths and masked paths. + This requires the ProcMountType feature flag to be enabled. + Note that this field cannot be set when spec.os.name is windows. + type: string + readOnlyRootFilesystem: + description: |- + Whether this container has a read-only root filesystem. + Default is false. + Note that this field cannot be set when spec.os.name is windows. + type: boolean + runAsGroup: + description: |- + The GID to run the entrypoint of the container process. + Uses runtime default if unset. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + runAsNonRoot: + description: |- + Indicates that the container must run as a non-root user. + If true, the Kubelet will validate the image at runtime to ensure that it + does not run as UID 0 (root) and fail to start the container if it does. + If unset or false, no such validation will be performed. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: boolean + runAsUser: + description: |- + The UID to run the entrypoint of the container process. + Defaults to user specified in image metadata if unspecified. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + seLinuxOptions: + description: |- + The SELinux context to be applied to the container. + If unspecified, the container runtime will allocate a random SELinux context for each + container. May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is windows. + properties: + level: + description: Level is SELinux level label that + applies to the container. + type: string + role: + description: Role is a SELinux role label that + applies to the container. + type: string + type: + description: Type is a SELinux type label that + applies to the container. + type: string + user: + description: User is a SELinux user label that + applies to the container. + type: string + type: object + seccompProfile: + description: |- + The seccomp options to use by this container. If seccomp options are + provided at both the pod & container level, the container options + override the pod options. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile defined in a file on the node should be used. + The profile must be preconfigured on the node to work. + Must be a descending path, relative to the kubelet's configured seccomp profile location. + Must be set if type is "Localhost". Must NOT be set for any other type. + type: string + type: + description: |- + type indicates which kind of seccomp profile will be applied. + Valid options are: + + + Localhost - a profile defined in a file on the node should be used. + RuntimeDefault - the container runtime default profile should be used. + Unconfined - no profile should be applied. + type: string + required: + - type + type: object + windowsOptions: + description: |- + The Windows specific settings applied to all containers. + If unspecified, the options from the PodSecurityContext will be used. + If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is linux. + properties: + gmsaCredentialSpec: + description: |- + GMSACredentialSpec is where the GMSA admission webhook + (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the + GMSA credential spec named by the GMSACredentialSpecName field. + type: string + gmsaCredentialSpecName: + description: GMSACredentialSpecName is the name + of the GMSA credential spec to use. + type: string + hostProcess: + description: |- + HostProcess determines if a container should be run as a 'Host Process' container. + All of a Pod's containers must have the same effective HostProcess value + (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). + In addition, if HostProcess is true then HostNetwork must also be set to true. + type: boolean + runAsUserName: + description: |- + The UserName in Windows to run the entrypoint of the container process. + Defaults to the user specified in image metadata if unspecified. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: string + type: object + type: object + startupProbe: + description: |- + StartupProbe indicates that the Pod has successfully initialized. + If specified, no other probes are executed until this completes successfully. + If this probe fails, the Pod will be restarted, just as if the livenessProbe failed. + This can be used to provide different probe parameters at the beginning of a Pod's lifecycle, + when it might take a long time to load data or warm a cache, than during steady-state operation. + This cannot be updated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + properties: + exec: + description: Exec specifies the action to take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + description: |- + Minimum consecutive failures for the probe to be considered failed after having succeeded. + Defaults to 3. Minimum value is 1. + format: int32 + type: integer + grpc: + description: GRPC specifies an action involving + a GRPC port. + properties: + port: + description: Port number of the gRPC service. + Number must be in the range 1 to 65535. + format: int32 + type: integer + service: + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object + httpGet: + description: HTTPGet specifies the http request + to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in the request. + HTTP allows repeated headers. + items: + description: HTTPHeader describes a custom + header to be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: |- + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: + description: |- + How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: |- + Minimum consecutive successes for the probe to be considered successful after having failed. + Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocket specifies an action involving + a TCP port. + properties: + host: + description: 'Optional: Host name to connect + to, defaults to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: |- + Optional duration in seconds the pod needs to terminate gracefully upon probe failure. + The grace period is the duration in seconds after the processes running in the pod are sent + a termination signal and the time when the processes are forcibly halted with a kill signal. + Set this value longer than the expected cleanup time for your process. + If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this + value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates stop immediately via + the kill signal (no opportunity to shut down). + This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. + Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: + description: |- + Number of seconds after which the probe times out. + Defaults to 1 second. Minimum value is 1. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + type: object + stdin: + description: |- + Whether this container should allocate a buffer for stdin in the container runtime. If this + is not set, reads from stdin in the container will always result in EOF. + Default is false. + type: boolean + stdinOnce: + description: |- + Whether the container runtime should close the stdin channel after it has been opened by + a single attach. When stdin is true the stdin stream will remain open across multiple attach + sessions. If stdinOnce is set to true, stdin is opened on container start, is empty until the + first client attaches to stdin, and then remains open and accepts data until the client disconnects, + at which time stdin is closed and remains closed until the container is restarted. If this + flag is false, a container processes that reads from stdin will never receive an EOF. + Default is false + type: boolean + terminationMessagePath: + description: |- + Optional: Path at which the file to which the container's termination message + will be written is mounted into the container's filesystem. + Message written is intended to be brief final status, such as an assertion failure message. + Will be truncated by the node if greater than 4096 bytes. The total message length across + all containers will be limited to 12kb. + Defaults to /dev/termination-log. + Cannot be updated. + type: string + terminationMessagePolicy: + description: |- + Indicate how the termination message should be populated. File will use the contents of + terminationMessagePath to populate the container status message on both success and failure. + FallbackToLogsOnError will use the last chunk of container log output if the termination + message file is empty and the container exited with an error. + The log output is limited to 2048 bytes or 80 lines, whichever is smaller. + Defaults to File. + Cannot be updated. + type: string + tty: + description: |- + Whether this container should allocate a TTY for itself, also requires 'stdin' to be true. + Default is false. + type: boolean + volumeDevices: + description: volumeDevices is the list of block devices + to be used by the container. + items: + description: volumeDevice describes a mapping of a + raw block device within a container. + properties: + devicePath: + description: devicePath is the path inside of + the container that the device will be mapped + to. + type: string + name: + description: name must match the name of a persistentVolumeClaim + in the pod + type: string + required: + - devicePath + - name + type: object + type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map + volumeMounts: + description: |- + Pod volumes to mount into the container's filesystem. + Cannot be updated. + items: + description: VolumeMount describes a mounting of a + Volume within a container. + properties: + mountPath: + description: |- + Path within the container at which the volume should be mounted. Must + not contain ':'. + type: string + mountPropagation: + description: |- + mountPropagation determines how mounts are propagated from the host + to container and the other way around. + When not set, MountPropagationNone is used. + This field is beta in 1.10. + When RecursiveReadOnly is set to IfPossible or to Enabled, MountPropagation must be None or unspecified + (which defaults to None). + type: string + name: + description: This must match the Name of a Volume. + type: string + readOnly: + description: |- + Mounted read-only if true, read-write otherwise (false or unspecified). + Defaults to false. + type: boolean + recursiveReadOnly: + description: |- + RecursiveReadOnly specifies whether read-only mounts should be handled + recursively. + + + If ReadOnly is false, this field has no meaning and must be unspecified. + + + If ReadOnly is true, and this field is set to Disabled, the mount is not made + recursively read-only. If this field is set to IfPossible, the mount is made + recursively read-only, if it is supported by the container runtime. If this + field is set to Enabled, the mount is made recursively read-only if it is + supported by the container runtime, otherwise the pod will not be started and + an error will be generated to indicate the reason. + + + If this field is set to IfPossible or Enabled, MountPropagation must be set to + None (or be unspecified, which defaults to None). + + + If this field is not specified, it is treated as an equivalent of Disabled. + type: string + subPath: + description: |- + Path within the volume from which the container's volume should be mounted. + Defaults to "" (volume's root). + type: string + subPathExpr: + description: |- + Expanded path within the volume from which the container's volume should be mounted. + Behaves similarly to SubPath but environment variable references $(VAR_NAME) are expanded using the container's environment. + Defaults to "" (volume's root). + SubPathExpr and SubPath are mutually exclusive. + type: string + required: + - mountPath + - name + type: object + type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map + workingDir: + description: |- + Container's working directory. + If not specified, the container runtime's default will be used, which + might be configured in the container image. + Cannot be updated. + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + nodeName: + description: |- + NodeName is a request to schedule this pod onto a specific node. If it is non-empty, + the scheduler simply schedules this pod onto that node, assuming that it fits resource + requirements. + type: string + nodeSelector: + additionalProperties: + type: string + description: |- + NodeSelector is a selector which must be true for the pod to fit on a node. + Selector which must match a node's labels for the pod to be scheduled on that node. + More info: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/ + type: object + x-kubernetes-map-type: atomic + os: + description: |- + Specifies the OS of the containers in the pod. + Some pod and container fields are restricted if this is set. + + + If the OS field is set to linux, the following fields must be unset: + -securityContext.windowsOptions + + + If the OS field is set to windows, following fields must be unset: + - spec.hostPID + - spec.hostIPC + - spec.hostUsers + - spec.securityContext.appArmorProfile + - spec.securityContext.seLinuxOptions + - spec.securityContext.seccompProfile + - spec.securityContext.fsGroup + - spec.securityContext.fsGroupChangePolicy + - spec.securityContext.sysctls + - spec.shareProcessNamespace + - spec.securityContext.runAsUser + - spec.securityContext.runAsGroup + - spec.securityContext.supplementalGroups + - spec.containers[*].securityContext.appArmorProfile + - spec.containers[*].securityContext.seLinuxOptions + - spec.containers[*].securityContext.seccompProfile + - spec.containers[*].securityContext.capabilities + - spec.containers[*].securityContext.readOnlyRootFilesystem + - spec.containers[*].securityContext.privileged + - spec.containers[*].securityContext.allowPrivilegeEscalation + - spec.containers[*].securityContext.procMount + - spec.containers[*].securityContext.runAsUser + - spec.containers[*].securityContext.runAsGroup + properties: + name: + description: |- + Name is the name of the operating system. The currently supported values are linux and windows. + Additional value may be defined in future and can be one of: + https://github.com/opencontainers/runtime-spec/blob/master/config.md#platform-specific-configuration + Clients should expect to handle additional values and treat unrecognized values in this field as os: null + type: string + required: + - name + type: object + overhead: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Overhead represents the resource overhead associated with running a pod for a given RuntimeClass. + This field will be autopopulated at admission time by the RuntimeClass admission controller. If + the RuntimeClass admission controller is enabled, overhead must not be set in Pod create requests. + The RuntimeClass admission controller will reject Pod create requests which have the overhead already + set. If RuntimeClass is configured and selected in the PodSpec, Overhead will be set to the value + defined in the corresponding RuntimeClass, otherwise it will remain unset and treated as zero. + More info: https://git.k8s.io/enhancements/keps/sig-node/688-pod-overhead/README.md + type: object + preemptionPolicy: + description: |- + PreemptionPolicy is the Policy for preempting pods with lower priority. + One of Never, PreemptLowerPriority. + Defaults to PreemptLowerPriority if unset. + type: string + priority: + description: |- + The priority value. Various system components use this field to find the + priority of the pod. When Priority Admission Controller is enabled, it + prevents users from setting this field. The admission controller populates + this field from PriorityClassName. + The higher the value, the higher the priority. + format: int32 + type: integer + priorityClassName: + description: |- + If specified, indicates the pod's priority. "system-node-critical" and + "system-cluster-critical" are two special keywords which indicate the + highest priorities with the former being the highest priority. Any other + name must be defined by creating a PriorityClass object with that name. + If not specified, the pod priority will be default or zero if there is no + default. + type: string + readinessGates: + description: |- + If specified, all readiness gates will be evaluated for pod readiness. + A pod is ready when all its containers are ready AND + all conditions specified in the readiness gates have status equal to "True" + More info: https://git.k8s.io/enhancements/keps/sig-network/580-pod-readiness-gates + items: + description: PodReadinessGate contains the reference to + a pod condition + properties: + conditionType: + description: ConditionType refers to a condition in + the pod's condition list with matching type. + type: string + required: + - conditionType + type: object + type: array + x-kubernetes-list-type: atomic + resourceClaims: + description: |- + ResourceClaims defines which ResourceClaims must be allocated + and reserved before the Pod is allowed to start. The resources + will be made available to those containers which consume them + by name. + + + This is an alpha field and requires enabling the + DynamicResourceAllocation feature gate. + + + This field is immutable. + items: + description: |- + PodResourceClaim references exactly one ResourceClaim through a ClaimSource. + It adds a name to it that uniquely identifies the ResourceClaim inside the Pod. + Containers that need access to the ResourceClaim reference it with this name. + properties: + name: + description: |- + Name uniquely identifies this resource claim inside the pod. + This must be a DNS_LABEL. + type: string + source: + description: Source describes where to find the ResourceClaim. + properties: + resourceClaimName: + description: |- + ResourceClaimName is the name of a ResourceClaim object in the same + namespace as this pod. + type: string + resourceClaimTemplateName: + description: |- + ResourceClaimTemplateName is the name of a ResourceClaimTemplate + object in the same namespace as this pod. + + + The template will be used to create a new ResourceClaim, which will + be bound to this pod. When this pod is deleted, the ResourceClaim + will also be deleted. The pod name and resource name, along with a + generated component, will be used to form a unique name for the + ResourceClaim, which will be recorded in pod.status.resourceClaimStatuses. + + + This field is immutable and no changes will be made to the + corresponding ResourceClaim by the control plane after creating the + ResourceClaim. + type: string + type: object + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + restartPolicy: + description: |- + Restart policy for all containers within the pod. + One of Always, OnFailure, Never. In some contexts, only a subset of those values may be permitted. + Default to Always. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#restart-policy + type: string + runtimeClassName: + description: |- + RuntimeClassName refers to a RuntimeClass object in the node.k8s.io group, which should be used + to run this pod. If no RuntimeClass resource matches the named class, the pod will not be run. + If unset or empty, the "legacy" RuntimeClass will be used, which is an implicit class with an + empty definition that uses the default runtime handler. + More info: https://git.k8s.io/enhancements/keps/sig-node/585-runtime-class + type: string + schedulerName: + description: |- + If specified, the pod will be dispatched by specified scheduler. + If not specified, the pod will be dispatched by default scheduler. + type: string + schedulingGates: + description: |- + SchedulingGates is an opaque list of values that if specified will block scheduling the pod. + If schedulingGates is not empty, the pod will stay in the SchedulingGated state and the + scheduler will not attempt to schedule the pod. + + + SchedulingGates can only be set at pod creation time, and be removed only afterwards. + items: + description: PodSchedulingGate is associated to a Pod to + guard its scheduling. + properties: + name: + description: |- + Name of the scheduling gate. + Each scheduling gate must have a unique name field. + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + securityContext: + description: |- + SecurityContext holds pod-level security attributes and common container settings. + Optional: Defaults to empty. See type description for default values of each field. + properties: + appArmorProfile: + description: |- + appArmorProfile is the AppArmor options to use by the containers in this pod. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile loaded on the node that should be used. + The profile must be preconfigured on the node to work. + Must match the loaded name of the profile. + Must be set if and only if type is "Localhost". + type: string + type: + description: |- + type indicates which kind of AppArmor profile will be applied. + Valid options are: + Localhost - a profile pre-loaded on the node. + RuntimeDefault - the container runtime's default profile. + Unconfined - no AppArmor enforcement. + type: string + required: + - type + type: object + fsGroup: + description: |- + A special supplemental group that applies to all containers in a pod. + Some volume types allow the Kubelet to change the ownership of that volume + to be owned by the pod: + + + 1. The owning GID will be the FSGroup + 2. The setgid bit is set (new files created in the volume will be owned by FSGroup) + 3. The permission bits are OR'd with rw-rw---- + + + If unset, the Kubelet will not modify the ownership and permissions of any volume. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + fsGroupChangePolicy: + description: |- + fsGroupChangePolicy defines behavior of changing ownership and permission of the volume + before being exposed inside Pod. This field will only apply to + volume types which support fsGroup based ownership(and permissions). + It will have no effect on ephemeral volume types such as: secret, configmaps + and emptydir. + Valid values are "OnRootMismatch" and "Always". If not specified, "Always" is used. + Note that this field cannot be set when spec.os.name is windows. + type: string + runAsGroup: + description: |- + The GID to run the entrypoint of the container process. + Uses runtime default if unset. + May also be set in SecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence + for that container. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + runAsNonRoot: + description: |- + Indicates that the container must run as a non-root user. + If true, the Kubelet will validate the image at runtime to ensure that it + does not run as UID 0 (root) and fail to start the container if it does. + If unset or false, no such validation will be performed. + May also be set in SecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: boolean + runAsUser: + description: |- + The UID to run the entrypoint of the container process. + Defaults to user specified in image metadata if unspecified. + May also be set in SecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence + for that container. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + seLinuxOptions: + description: |- + The SELinux context to be applied to all containers. + If unspecified, the container runtime will allocate a random SELinux context for each + container. May also be set in SecurityContext. If set in + both SecurityContext and PodSecurityContext, the value specified in SecurityContext + takes precedence for that container. + Note that this field cannot be set when spec.os.name is windows. + properties: + level: + description: Level is SELinux level label that applies + to the container. + type: string + role: + description: Role is a SELinux role label that applies + to the container. + type: string + type: + description: Type is a SELinux type label that applies + to the container. + type: string + user: + description: User is a SELinux user label that applies + to the container. + type: string + type: object + seccompProfile: + description: |- + The seccomp options to use by the containers in this pod. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile defined in a file on the node should be used. + The profile must be preconfigured on the node to work. + Must be a descending path, relative to the kubelet's configured seccomp profile location. + Must be set if type is "Localhost". Must NOT be set for any other type. + type: string + type: + description: |- + type indicates which kind of seccomp profile will be applied. + Valid options are: + + + Localhost - a profile defined in a file on the node should be used. + RuntimeDefault - the container runtime default profile should be used. + Unconfined - no profile should be applied. + type: string + required: + - type + type: object + supplementalGroups: + description: |- + A list of groups applied to the first process run in each container, in addition + to the container's primary GID, the fsGroup (if specified), and group memberships + defined in the container image for the uid of the container process. If unspecified, + no additional groups are added to any container. Note that group memberships + defined in the container image for the uid of the container process are still effective, + even if they are not included in this list. + Note that this field cannot be set when spec.os.name is windows. + items: + format: int64 + type: integer + type: array + x-kubernetes-list-type: atomic + sysctls: + description: |- + Sysctls hold a list of namespaced sysctls used for the pod. Pods with unsupported + sysctls (by the container runtime) might fail to launch. + Note that this field cannot be set when spec.os.name is windows. + items: + description: Sysctl defines a kernel parameter to be + set + properties: + name: + description: Name of a property to set + type: string + value: + description: Value of a property to set + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + windowsOptions: + description: |- + The Windows specific settings applied to all containers. + If unspecified, the options within a container's SecurityContext will be used. + If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is linux. + properties: + gmsaCredentialSpec: + description: |- + GMSACredentialSpec is where the GMSA admission webhook + (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the + GMSA credential spec named by the GMSACredentialSpecName field. + type: string + gmsaCredentialSpecName: + description: GMSACredentialSpecName is the name of + the GMSA credential spec to use. + type: string + hostProcess: + description: |- + HostProcess determines if a container should be run as a 'Host Process' container. + All of a Pod's containers must have the same effective HostProcess value + (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). + In addition, if HostProcess is true then HostNetwork must also be set to true. + type: boolean + runAsUserName: + description: |- + The UserName in Windows to run the entrypoint of the container process. + Defaults to the user specified in image metadata if unspecified. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: string + type: object + type: object + serviceAccount: + description: |- + DeprecatedServiceAccount is a deprecated alias for ServiceAccountName. + Deprecated: Use serviceAccountName instead. + type: string + serviceAccountName: + description: |- + ServiceAccountName is the name of the ServiceAccount to use to run this pod. + More info: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/ + type: string + setHostnameAsFQDN: + description: |- + If true the pod's hostname will be configured as the pod's FQDN, rather than the leaf name (the default). + In Linux containers, this means setting the FQDN in the hostname field of the kernel (the nodename field of struct utsname). + In Windows containers, this means setting the registry value of hostname for the registry key HKEY_LOCAL_MACHINE\\SYSTEM\\CurrentControlSet\\Services\\Tcpip\\Parameters to FQDN. + If a pod does not have FQDN, this has no effect. + Default to false. + type: boolean + shareProcessNamespace: + description: |- + Share a single process namespace between all of the containers in a pod. + When this is set containers will be able to view and signal processes from other containers + in the same pod, and the first process in each container will not be assigned PID 1. + HostPID and ShareProcessNamespace cannot both be set. + Optional: Default to false. + type: boolean + subdomain: + description: |- + If specified, the fully qualified Pod hostname will be "...svc.". + If not specified, the pod will not have a domainname at all. + type: string + terminationGracePeriodSeconds: + description: |- + Optional duration in seconds the pod needs to terminate gracefully. May be decreased in delete request. + Value must be non-negative integer. The value zero indicates stop immediately via + the kill signal (no opportunity to shut down). + If this value is nil, the default grace period will be used instead. + The grace period is the duration in seconds after the processes running in the pod are sent + a termination signal and the time when the processes are forcibly halted with a kill signal. + Set this value longer than the expected cleanup time for your process. + Defaults to 30 seconds. + format: int64 + type: integer + tolerations: + description: If specified, the pod's tolerations. + items: + description: |- + The pod this Toleration is attached to tolerates any taint that matches + the triple using the matching operator . + properties: + effect: + description: |- + Effect indicates the taint effect to match. Empty means match all taint effects. + When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute. + type: string + key: + description: |- + Key is the taint key that the toleration applies to. Empty means match all taint keys. + If the key is empty, operator must be Exists; this combination means to match all values and all keys. + type: string + operator: + description: |- + Operator represents a key's relationship to the value. + Valid operators are Exists and Equal. Defaults to Equal. + Exists is equivalent to wildcard for value, so that a pod can + tolerate all taints of a particular category. + type: string + tolerationSeconds: + description: |- + TolerationSeconds represents the period of time the toleration (which must be + of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default, + it is not set, which means tolerate the taint forever (do not evict). Zero and + negative values will be treated as 0 (evict immediately) by the system. + format: int64 + type: integer + value: + description: |- + Value is the taint value the toleration matches to. + If the operator is Exists, the value should be empty, otherwise just a regular string. + type: string + type: object + type: array + x-kubernetes-list-type: atomic + topologySpreadConstraints: + description: |- + TopologySpreadConstraints describes how a group of pods ought to spread across topology + domains. Scheduler will schedule pods in a way which abides by the constraints. + All topologySpreadConstraints are ANDed. + items: + description: TopologySpreadConstraint specifies how to spread + matching pods among the given topology. + properties: + labelSelector: + description: |- + LabelSelector is used to find matching pods. + Pods that match this label selector are counted to determine the number of pods + in their corresponding topology domain. + properties: + matchExpressions: + description: matchExpressions is a list of label + selector requirements. The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the + selector applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select the pods over which + spreading will be calculated. The keys are used to lookup values from the + incoming pod labels, those key-value labels are ANDed with labelSelector + to select the group of existing pods over which spreading will be calculated + for the incoming pod. The same key is forbidden to exist in both MatchLabelKeys and LabelSelector. + MatchLabelKeys cannot be set when LabelSelector isn't set. + Keys that don't exist in the incoming pod labels will + be ignored. A null or empty list means only match against labelSelector. + + + This is a beta field and requires the MatchLabelKeysInPodTopologySpread feature gate to be enabled (enabled by default). + items: + type: string + type: array + x-kubernetes-list-type: atomic + maxSkew: + description: |- + MaxSkew describes the degree to which pods may be unevenly distributed. + When `whenUnsatisfiable=DoNotSchedule`, it is the maximum permitted difference + between the number of matching pods in the target topology and the global minimum. + The global minimum is the minimum number of matching pods in an eligible domain + or zero if the number of eligible domains is less than MinDomains. + For example, in a 3-zone cluster, MaxSkew is set to 1, and pods with the same + labelSelector spread as 2/2/1: + In this case, the global minimum is 1. + | zone1 | zone2 | zone3 | + | P P | P P | P | + - if MaxSkew is 1, incoming pod can only be scheduled to zone3 to become 2/2/2; + scheduling it onto zone1(zone2) would make the ActualSkew(3-1) on zone1(zone2) + violate MaxSkew(1). + - if MaxSkew is 2, incoming pod can be scheduled onto any zone. + When `whenUnsatisfiable=ScheduleAnyway`, it is used to give higher precedence + to topologies that satisfy it. + It's a required field. Default value is 1 and 0 is not allowed. + format: int32 + type: integer + minDomains: + description: |- + MinDomains indicates a minimum number of eligible domains. + When the number of eligible domains with matching topology keys is less than minDomains, + Pod Topology Spread treats "global minimum" as 0, and then the calculation of Skew is performed. + And when the number of eligible domains with matching topology keys equals or greater than minDomains, + this value has no effect on scheduling. + As a result, when the number of eligible domains is less than minDomains, + scheduler won't schedule more than maxSkew Pods to those domains. + If value is nil, the constraint behaves as if MinDomains is equal to 1. + Valid values are integers greater than 0. + When value is not nil, WhenUnsatisfiable must be DoNotSchedule. + + + For example, in a 3-zone cluster, MaxSkew is set to 2, MinDomains is set to 5 and pods with the same + labelSelector spread as 2/2/2: + | zone1 | zone2 | zone3 | + | P P | P P | P P | + The number of domains is less than 5(MinDomains), so "global minimum" is treated as 0. + In this situation, new pod with the same labelSelector cannot be scheduled, + because computed skew will be 3(3 - 0) if new Pod is scheduled to any of the three zones, + it will violate MaxSkew. + format: int32 + type: integer + nodeAffinityPolicy: + description: |- + NodeAffinityPolicy indicates how we will treat Pod's nodeAffinity/nodeSelector + when calculating pod topology spread skew. Options are: + - Honor: only nodes matching nodeAffinity/nodeSelector are included in the calculations. + - Ignore: nodeAffinity/nodeSelector are ignored. All nodes are included in the calculations. + + + If this value is nil, the behavior is equivalent to the Honor policy. + This is a beta-level feature default enabled by the NodeInclusionPolicyInPodTopologySpread feature flag. + type: string + nodeTaintsPolicy: + description: |- + NodeTaintsPolicy indicates how we will treat node taints when calculating + pod topology spread skew. Options are: + - Honor: nodes without taints, along with tainted nodes for which the incoming pod + has a toleration, are included. + - Ignore: node taints are ignored. All nodes are included. + + + If this value is nil, the behavior is equivalent to the Ignore policy. + This is a beta-level feature default enabled by the NodeInclusionPolicyInPodTopologySpread feature flag. + type: string + topologyKey: + description: |- + TopologyKey is the key of node labels. Nodes that have a label with this key + and identical values are considered to be in the same topology. + We consider each as a "bucket", and try to put balanced number + of pods into each bucket. + We define a domain as a particular instance of a topology. + Also, we define an eligible domain as a domain whose nodes meet the requirements of + nodeAffinityPolicy and nodeTaintsPolicy. + e.g. If TopologyKey is "kubernetes.io/hostname", each Node is a domain of that topology. + And, if TopologyKey is "topology.kubernetes.io/zone", each zone is a domain of that topology. + It's a required field. + type: string + whenUnsatisfiable: + description: |- + WhenUnsatisfiable indicates how to deal with a pod if it doesn't satisfy + the spread constraint. + - DoNotSchedule (default) tells the scheduler not to schedule it. + - ScheduleAnyway tells the scheduler to schedule the pod in any location, + but giving higher precedence to topologies that would help reduce the + skew. + A constraint is considered "Unsatisfiable" for an incoming pod + if and only if every possible node assignment for that pod would violate + "MaxSkew" on some topology. + For example, in a 3-zone cluster, MaxSkew is set to 1, and pods with the same + labelSelector spread as 3/1/1: + | zone1 | zone2 | zone3 | + | P P P | P | P | + If WhenUnsatisfiable is set to DoNotSchedule, incoming pod can only be scheduled + to zone2(zone3) to become 3/2/1(3/1/2) as ActualSkew(2-1) on zone2(zone3) satisfies + MaxSkew(1). In other words, the cluster can still be imbalanced, but scheduler + won't make it *more* imbalanced. + It's a required field. + type: string + required: + - maxSkew + - topologyKey + - whenUnsatisfiable + type: object + type: array + x-kubernetes-list-map-keys: + - topologyKey + - whenUnsatisfiable + x-kubernetes-list-type: map + volumes: + description: |- + List of volumes that can be mounted by containers belonging to the pod. + More info: https://kubernetes.io/docs/concepts/storage/volumes + items: + description: Volume represents a named volume in a pod that + may be accessed by any container in the pod. + properties: + awsElasticBlockStore: + description: |- + awsElasticBlockStore represents an AWS Disk resource that is attached to a + kubelet's host machine and then exposed to the pod. + More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore + properties: + fsType: + description: |- + fsType is the filesystem type of the volume that you want to mount. + Tip: Ensure that the filesystem type is supported by the host operating system. + Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. + More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore + TODO: how do we prevent errors in the filesystem from compromising the machine + type: string + partition: + description: |- + partition is the partition in the volume that you want to mount. + If omitted, the default is to mount by volume name. + Examples: For volume /dev/sda1, you specify the partition as "1". + Similarly, the volume partition for /dev/sda is "0" (or you can leave the property empty). + format: int32 + type: integer + readOnly: + description: |- + readOnly value true will force the readOnly setting in VolumeMounts. + More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore + type: boolean + volumeID: + description: |- + volumeID is unique ID of the persistent disk resource in AWS (Amazon EBS volume). + More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore + type: string + required: + - volumeID + type: object + azureDisk: + description: azureDisk represents an Azure Data Disk + mount on the host and bind mount to the pod. + properties: + cachingMode: + description: 'cachingMode is the Host Caching mode: + None, Read Only, Read Write.' + type: string + diskName: + description: diskName is the Name of the data disk + in the blob storage + type: string + diskURI: + description: diskURI is the URI of data disk in + the blob storage + type: string + fsType: + description: |- + fsType is Filesystem type to mount. + Must be a filesystem type supported by the host operating system. + Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. + type: string + kind: + description: 'kind expected values are Shared: multiple + blob disks per storage account Dedicated: single + blob disk per storage account Managed: azure + managed data disk (only in managed availability + set). defaults to shared' + type: string + readOnly: + description: |- + readOnly Defaults to false (read/write). ReadOnly here will force + the ReadOnly setting in VolumeMounts. + type: boolean + required: + - diskName + - diskURI + type: object + azureFile: + description: azureFile represents an Azure File Service + mount on the host and bind mount to the pod. + properties: + readOnly: + description: |- + readOnly defaults to false (read/write). ReadOnly here will force + the ReadOnly setting in VolumeMounts. + type: boolean + secretName: + description: secretName is the name of secret that + contains Azure Storage Account Name and Key + type: string + shareName: + description: shareName is the azure share Name + type: string + required: + - secretName + - shareName + type: object + cephfs: + description: cephFS represents a Ceph FS mount on the + host that shares a pod's lifetime + properties: + monitors: + description: |- + monitors is Required: Monitors is a collection of Ceph monitors + More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it + items: + type: string + type: array + x-kubernetes-list-type: atomic + path: + description: 'path is Optional: Used as the mounted + root, rather than the full Ceph tree, default + is /' + type: string + readOnly: + description: |- + readOnly is Optional: Defaults to false (read/write). ReadOnly here will force + the ReadOnly setting in VolumeMounts. + More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it + type: boolean + secretFile: + description: |- + secretFile is Optional: SecretFile is the path to key ring for User, default is /etc/ceph/user.secret + More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it + type: string + secretRef: + description: |- + secretRef is Optional: SecretRef is reference to the authentication secret for User, default is empty. + More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + TODO: Add other useful fields. apiVersion, kind, uid? + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. + type: string + type: object + x-kubernetes-map-type: atomic + user: + description: |- + user is optional: User is the rados user name, default is admin + More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it + type: string + required: + - monitors + type: object + cinder: + description: |- + cinder represents a cinder volume attached and mounted on kubelets host machine. + More info: https://examples.k8s.io/mysql-cinder-pd/README.md + properties: + fsType: + description: |- + fsType is the filesystem type to mount. + Must be a filesystem type supported by the host operating system. + Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. + More info: https://examples.k8s.io/mysql-cinder-pd/README.md + type: string + readOnly: + description: |- + readOnly defaults to false (read/write). ReadOnly here will force + the ReadOnly setting in VolumeMounts. + More info: https://examples.k8s.io/mysql-cinder-pd/README.md + type: boolean + secretRef: + description: |- + secretRef is optional: points to a secret object containing parameters used to connect + to OpenStack. + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + TODO: Add other useful fields. apiVersion, kind, uid? + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. + type: string + type: object + x-kubernetes-map-type: atomic + volumeID: + description: |- + volumeID used to identify the volume in cinder. + More info: https://examples.k8s.io/mysql-cinder-pd/README.md + type: string + required: + - volumeID + type: object + configMap: + description: configMap represents a configMap that should + populate this volume + properties: + defaultMode: + description: |- + defaultMode is optional: mode bits used to set permissions on created files by default. + Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. + YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. + Defaults to 0644. + Directories within the path are not affected by this setting. + This might be in conflict with other options that affect the file + mode, like fsGroup, and the result can be other mode bits set. + format: int32 + type: integer + items: + description: |- + items if unspecified, each key-value pair in the Data field of the referenced + ConfigMap will be projected into the volume as a file whose name is the + key and content is the value. If specified, the listed keys will be + projected into the specified paths, and unlisted keys will not be + present. If a key is specified which is not present in the ConfigMap, + the volume setup will error unless it is marked optional. Paths must be + relative and may not contain the '..' path or start with '..'. + items: + description: Maps a string key to a path within + a volume. + properties: + key: + description: key is the key to project. + type: string + mode: + description: |- + mode is Optional: mode bits used to set permissions on this file. + Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. + YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. + If not specified, the volume defaultMode will be used. + This might be in conflict with other options that affect the file + mode, like fsGroup, and the result can be other mode bits set. + format: int32 + type: integer + path: + description: |- + path is the relative path of the file to map the key to. + May not be an absolute path. + May not contain the path element '..'. + May not start with the string '..'. + type: string + required: + - key + - path + type: object + type: array + x-kubernetes-list-type: atomic + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + TODO: Add other useful fields. apiVersion, kind, uid? + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. + type: string + optional: + description: optional specify whether the ConfigMap + or its keys must be defined + type: boolean + type: object + x-kubernetes-map-type: atomic + csi: + description: csi (Container Storage Interface) represents + ephemeral storage that is handled by certain external + CSI drivers (Beta feature). + properties: + driver: + description: |- + driver is the name of the CSI driver that handles this volume. + Consult with your admin for the correct name as registered in the cluster. + type: string + fsType: + description: |- + fsType to mount. Ex. "ext4", "xfs", "ntfs". + If not provided, the empty value is passed to the associated CSI driver + which will determine the default filesystem to apply. + type: string + nodePublishSecretRef: + description: |- + nodePublishSecretRef is a reference to the secret object containing + sensitive information to pass to the CSI driver to complete the CSI + NodePublishVolume and NodeUnpublishVolume calls. + This field is optional, and may be empty if no secret is required. If the + secret object contains more than one secret, all secret references are passed. + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + TODO: Add other useful fields. apiVersion, kind, uid? + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. + type: string + type: object + x-kubernetes-map-type: atomic + readOnly: + description: |- + readOnly specifies a read-only configuration for the volume. + Defaults to false (read/write). + type: boolean + volumeAttributes: + additionalProperties: + type: string + description: |- + volumeAttributes stores driver-specific properties that are passed to the CSI + driver. Consult your driver's documentation for supported values. + type: object + required: + - driver + type: object + downwardAPI: + description: downwardAPI represents downward API about + the pod that should populate this volume + properties: + defaultMode: + description: |- + Optional: mode bits to use on created files by default. Must be a + Optional: mode bits used to set permissions on created files by default. + Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. + YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. + Defaults to 0644. + Directories within the path are not affected by this setting. + This might be in conflict with other options that affect the file + mode, like fsGroup, and the result can be other mode bits set. + format: int32 + type: integer + items: + description: Items is a list of downward API volume + file + items: + description: DownwardAPIVolumeFile represents + information to create the file containing the + pod field + properties: + fieldRef: + description: 'Required: Selects a field of + the pod: only annotations, labels, name, + namespace and uid are supported.' + properties: + apiVersion: + description: Version of the schema the + FieldPath is written in terms of, defaults + to "v1". + type: string + fieldPath: + description: Path of the field to select + in the specified API version. + type: string + required: + - fieldPath + type: object + x-kubernetes-map-type: atomic + mode: + description: |- + Optional: mode bits used to set permissions on this file, must be an octal value + between 0000 and 0777 or a decimal value between 0 and 511. + YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. + If not specified, the volume defaultMode will be used. + This might be in conflict with other options that affect the file + mode, like fsGroup, and the result can be other mode bits set. + format: int32 + type: integer + path: + description: 'Required: Path is the relative + path name of the file to be created. Must + not be absolute or contain the ''..'' path. + Must be utf-8 encoded. The first item of + the relative path must not start with ''..''' + type: string + resourceFieldRef: + description: |- + Selects a resource of the container: only resources limits and requests + (limits.cpu, limits.memory, requests.cpu and requests.memory) are currently supported. + properties: + containerName: + description: 'Container name: required + for volumes, optional for env vars' + type: string + divisor: + anyOf: + - type: integer + - type: string + description: Specifies the output format + of the exposed resources, defaults to + "1" + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + resource: + description: 'Required: resource to select' + type: string + required: + - resource + type: object + x-kubernetes-map-type: atomic + required: + - path + type: object + type: array + x-kubernetes-list-type: atomic + type: object + emptyDir: + description: |- + emptyDir represents a temporary directory that shares a pod's lifetime. + More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir + properties: + medium: + description: |- + medium represents what type of storage medium should back this directory. + The default is "" which means to use the node's default medium. + Must be an empty string (default) or Memory. + More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir + type: string + sizeLimit: + anyOf: + - type: integer + - type: string + description: |- + sizeLimit is the total amount of local storage required for this EmptyDir volume. + The size limit is also applicable for memory medium. + The maximum usage on memory medium EmptyDir would be the minimum value between + the SizeLimit specified here and the sum of memory limits of all containers in a pod. + The default is nil which means that the limit is undefined. + More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: object + ephemeral: + description: |- + ephemeral represents a volume that is handled by a cluster storage driver. + The volume's lifecycle is tied to the pod that defines it - it will be created before the pod starts, + and deleted when the pod is removed. + + + Use this if: + a) the volume is only needed while the pod runs, + b) features of normal volumes like restoring from snapshot or capacity + tracking are needed, + c) the storage driver is specified through a storage class, and + d) the storage driver supports dynamic volume provisioning through + a PersistentVolumeClaim (see EphemeralVolumeSource for more + information on the connection between this volume type + and PersistentVolumeClaim). + + + Use PersistentVolumeClaim or one of the vendor-specific + APIs for volumes that persist for longer than the lifecycle + of an individual pod. + + + Use CSI for light-weight local ephemeral volumes if the CSI driver is meant to + be used that way - see the documentation of the driver for + more information. + + + A pod can use both types of ephemeral volumes and + persistent volumes at the same time. + properties: + volumeClaimTemplate: + description: |- + Will be used to create a stand-alone PVC to provision the volume. + The pod in which this EphemeralVolumeSource is embedded will be the + owner of the PVC, i.e. the PVC will be deleted together with the + pod. The name of the PVC will be `-` where + `` is the name from the `PodSpec.Volumes` array + entry. Pod validation will reject the pod if the concatenated name + is not valid for a PVC (for example, too long). + + + An existing PVC with that name that is not owned by the pod + will *not* be used for the pod to avoid using an unrelated + volume by mistake. Starting the pod is then blocked until + the unrelated PVC is removed. If such a pre-created PVC is + meant to be used by the pod, the PVC has to updated with an + owner reference to the pod once the pod exists. Normally + this should not be necessary, but it may be useful when + manually reconstructing a broken cluster. + + + This field is read-only and no changes will be made by Kubernetes + to the PVC after it has been created. + + + Required, must not be nil. + properties: + metadata: + description: |- + May contain labels and annotations that will be copied into the PVC + when creating it. No other fields are allowed and will be rejected during + validation. + type: object + spec: + description: |- + The specification for the PersistentVolumeClaim. The entire content is + copied unchanged into the PVC that gets created from this + template. The same fields as in a PersistentVolumeClaim + are also valid here. + properties: + accessModes: + description: |- + accessModes contains the desired access modes the volume should have. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#access-modes-1 + items: + type: string + type: array + x-kubernetes-list-type: atomic + dataSource: + description: |- + dataSource field can be used to specify either: + * An existing VolumeSnapshot object (snapshot.storage.k8s.io/VolumeSnapshot) + * An existing PVC (PersistentVolumeClaim) + If the provisioner or an external controller can support the specified data source, + it will create a new volume based on the contents of the specified data source. + When the AnyVolumeDataSource feature gate is enabled, dataSource contents will be copied to dataSourceRef, + and dataSourceRef contents will be copied to dataSource when dataSourceRef.namespace is not specified. + If the namespace is specified, then dataSourceRef will not be copied to dataSource. + properties: + apiGroup: + description: |- + APIGroup is the group for the resource being referenced. + If APIGroup is not specified, the specified Kind must be in the core API group. + For any other third-party types, APIGroup is required. + type: string + kind: + description: Kind is the type of resource + being referenced + type: string + name: + description: Name is the name of resource + being referenced + type: string + required: + - kind + - name + type: object + x-kubernetes-map-type: atomic + dataSourceRef: + description: |- + dataSourceRef specifies the object from which to populate the volume with data, if a non-empty + volume is desired. This may be any object from a non-empty API group (non + core object) or a PersistentVolumeClaim object. + When this field is specified, volume binding will only succeed if the type of + the specified object matches some installed volume populator or dynamic + provisioner. + This field will replace the functionality of the dataSource field and as such + if both fields are non-empty, they must have the same value. For backwards + compatibility, when namespace isn't specified in dataSourceRef, + both fields (dataSource and dataSourceRef) will be set to the same + value automatically if one of them is empty and the other is non-empty. + When namespace is specified in dataSourceRef, + dataSource isn't set to the same value and must be empty. + There are three important differences between dataSource and dataSourceRef: + * While dataSource only allows two specific types of objects, dataSourceRef + allows any non-core object, as well as PersistentVolumeClaim objects. + * While dataSource ignores disallowed values (dropping them), dataSourceRef + preserves all values, and generates an error if a disallowed value is + specified. + * While dataSource only allows local objects, dataSourceRef allows objects + in any namespaces. + (Beta) Using this field requires the AnyVolumeDataSource feature gate to be enabled. + (Alpha) Using the namespace field of dataSourceRef requires the CrossNamespaceVolumeDataSource feature gate to be enabled. + properties: + apiGroup: + description: |- + APIGroup is the group for the resource being referenced. + If APIGroup is not specified, the specified Kind must be in the core API group. + For any other third-party types, APIGroup is required. + type: string + kind: + description: Kind is the type of resource + being referenced + type: string + name: + description: Name is the name of resource + being referenced + type: string + namespace: + description: |- + Namespace is the namespace of resource being referenced + Note that when a namespace is specified, a gateway.networking.k8s.io/ReferenceGrant object is required in the referent namespace to allow that namespace's owner to accept the reference. See the ReferenceGrant documentation for details. + (Alpha) This field requires the CrossNamespaceVolumeDataSource feature gate to be enabled. + type: string + required: + - kind + - name + type: object + resources: + description: |- + resources represents the minimum resources the volume should have. + If RecoverVolumeExpansionFailure feature is enabled users are allowed to specify resource requirements + that are lower than previous value but must still be higher than capacity recorded in the + status field of the claim. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#resources + properties: + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + type: object + selector: + description: selector is a label query over + volumes to consider for binding. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The + requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + storageClassName: + description: |- + storageClassName is the name of the StorageClass required by the claim. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#class-1 + type: string + volumeAttributesClassName: + description: |- + volumeAttributesClassName may be used to set the VolumeAttributesClass used by this claim. + If specified, the CSI driver will create or update the volume with the attributes defined + in the corresponding VolumeAttributesClass. This has a different purpose than storageClassName, + it can be changed after the claim is created. An empty string value means that no VolumeAttributesClass + will be applied to the claim but it's not allowed to reset this field to empty string once it is set. + If unspecified and the PersistentVolumeClaim is unbound, the default VolumeAttributesClass + will be set by the persistentvolume controller if it exists. + If the resource referred to by volumeAttributesClass does not exist, this PersistentVolumeClaim will be + set to a Pending state, as reflected by the modifyVolumeStatus field, until such as a resource + exists. + More info: https://kubernetes.io/docs/concepts/storage/volume-attributes-classes/ + (Alpha) Using this field requires the VolumeAttributesClass feature gate to be enabled. + type: string + volumeMode: + description: |- + volumeMode defines what type of volume is required by the claim. + Value of Filesystem is implied when not included in claim spec. + type: string + volumeName: + description: volumeName is the binding reference + to the PersistentVolume backing this claim. + type: string + type: object + required: + - spec + type: object + type: object + fc: + description: fc represents a Fibre Channel resource + that is attached to a kubelet's host machine and then + exposed to the pod. + properties: + fsType: + description: |- + fsType is the filesystem type to mount. + Must be a filesystem type supported by the host operating system. + Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. + TODO: how do we prevent errors in the filesystem from compromising the machine + type: string + lun: + description: 'lun is Optional: FC target lun number' + format: int32 + type: integer + readOnly: + description: |- + readOnly is Optional: Defaults to false (read/write). ReadOnly here will force + the ReadOnly setting in VolumeMounts. + type: boolean + targetWWNs: + description: 'targetWWNs is Optional: FC target + worldwide names (WWNs)' + items: + type: string + type: array + x-kubernetes-list-type: atomic + wwids: + description: |- + wwids Optional: FC volume world wide identifiers (wwids) + Either wwids or combination of targetWWNs and lun must be set, but not both simultaneously. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + flexVolume: + description: |- + flexVolume represents a generic volume resource that is + provisioned/attached using an exec based plugin. + properties: + driver: + description: driver is the name of the driver to + use for this volume. + type: string + fsType: + description: |- + fsType is the filesystem type to mount. + Must be a filesystem type supported by the host operating system. + Ex. "ext4", "xfs", "ntfs". The default filesystem depends on FlexVolume script. + type: string + options: + additionalProperties: + type: string + description: 'options is Optional: this field holds + extra command options if any.' + type: object + readOnly: + description: |- + readOnly is Optional: defaults to false (read/write). ReadOnly here will force + the ReadOnly setting in VolumeMounts. + type: boolean + secretRef: + description: |- + secretRef is Optional: secretRef is reference to the secret object containing + sensitive information to pass to the plugin scripts. This may be + empty if no secret object is specified. If the secret object + contains more than one secret, all secrets are passed to the plugin + scripts. + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + TODO: Add other useful fields. apiVersion, kind, uid? + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. + type: string + type: object + x-kubernetes-map-type: atomic + required: + - driver + type: object + flocker: + description: flocker represents a Flocker volume attached + to a kubelet's host machine. This depends on the Flocker + control service being running + properties: + datasetName: + description: |- + datasetName is Name of the dataset stored as metadata -> name on the dataset for Flocker + should be considered as deprecated + type: string + datasetUUID: + description: datasetUUID is the UUID of the dataset. + This is unique identifier of a Flocker dataset + type: string + type: object + gcePersistentDisk: + description: |- + gcePersistentDisk represents a GCE Disk resource that is attached to a + kubelet's host machine and then exposed to the pod. + More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk + properties: + fsType: + description: |- + fsType is filesystem type of the volume that you want to mount. + Tip: Ensure that the filesystem type is supported by the host operating system. + Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. + More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk + TODO: how do we prevent errors in the filesystem from compromising the machine + type: string + partition: + description: |- + partition is the partition in the volume that you want to mount. + If omitted, the default is to mount by volume name. + Examples: For volume /dev/sda1, you specify the partition as "1". + Similarly, the volume partition for /dev/sda is "0" (or you can leave the property empty). + More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk + format: int32 + type: integer + pdName: + description: |- + pdName is unique name of the PD resource in GCE. Used to identify the disk in GCE. + More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk + type: string + readOnly: + description: |- + readOnly here will force the ReadOnly setting in VolumeMounts. + Defaults to false. + More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk + type: boolean + required: + - pdName + type: object + gitRepo: + description: |- + gitRepo represents a git repository at a particular revision. + DEPRECATED: GitRepo is deprecated. To provision a container with a git repo, mount an + EmptyDir into an InitContainer that clones the repo using git, then mount the EmptyDir + into the Pod's container. + properties: + directory: + description: |- + directory is the target directory name. + Must not contain or start with '..'. If '.' is supplied, the volume directory will be the + git repository. Otherwise, if specified, the volume will contain the git repository in + the subdirectory with the given name. + type: string + repository: + description: repository is the URL + type: string + revision: + description: revision is the commit hash for the + specified revision. + type: string + required: + - repository + type: object + glusterfs: + description: |- + glusterfs represents a Glusterfs mount on the host that shares a pod's lifetime. + More info: https://examples.k8s.io/volumes/glusterfs/README.md + properties: + endpoints: + description: |- + endpoints is the endpoint name that details Glusterfs topology. + More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod + type: string + path: + description: |- + path is the Glusterfs volume path. + More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod + type: string + readOnly: + description: |- + readOnly here will force the Glusterfs volume to be mounted with read-only permissions. + Defaults to false. + More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod + type: boolean + required: + - endpoints + - path + type: object + hostPath: + description: |- + hostPath represents a pre-existing file or directory on the host + machine that is directly exposed to the container. This is generally + used for system agents or other privileged things that are allowed + to see the host machine. Most containers will NOT need this. + More info: https://kubernetes.io/docs/concepts/storage/volumes#hostpath + --- + TODO(jonesdl) We need to restrict who can use host directory mounts and who can/can not + mount host directories as read/write. + properties: + path: + description: |- + path of the directory on the host. + If the path is a symlink, it will follow the link to the real path. + More info: https://kubernetes.io/docs/concepts/storage/volumes#hostpath + type: string + type: + description: |- + type for HostPath Volume + Defaults to "" + More info: https://kubernetes.io/docs/concepts/storage/volumes#hostpath + type: string + required: + - path + type: object + iscsi: + description: |- + iscsi represents an ISCSI Disk resource that is attached to a + kubelet's host machine and then exposed to the pod. + More info: https://examples.k8s.io/volumes/iscsi/README.md + properties: + chapAuthDiscovery: + description: chapAuthDiscovery defines whether support + iSCSI Discovery CHAP authentication + type: boolean + chapAuthSession: + description: chapAuthSession defines whether support + iSCSI Session CHAP authentication + type: boolean + fsType: + description: |- + fsType is the filesystem type of the volume that you want to mount. + Tip: Ensure that the filesystem type is supported by the host operating system. + Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. + More info: https://kubernetes.io/docs/concepts/storage/volumes#iscsi + TODO: how do we prevent errors in the filesystem from compromising the machine + type: string + initiatorName: + description: |- + initiatorName is the custom iSCSI Initiator Name. + If initiatorName is specified with iscsiInterface simultaneously, new iSCSI interface + : will be created for the connection. + type: string + iqn: + description: iqn is the target iSCSI Qualified Name. + type: string + iscsiInterface: + description: |- + iscsiInterface is the interface Name that uses an iSCSI transport. + Defaults to 'default' (tcp). + type: string + lun: + description: lun represents iSCSI Target Lun number. + format: int32 + type: integer + portals: + description: |- + portals is the iSCSI Target Portal List. The portal is either an IP or ip_addr:port if the port + is other than default (typically TCP ports 860 and 3260). + items: + type: string + type: array + x-kubernetes-list-type: atomic + readOnly: + description: |- + readOnly here will force the ReadOnly setting in VolumeMounts. + Defaults to false. + type: boolean + secretRef: + description: secretRef is the CHAP Secret for iSCSI + target and initiator authentication + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + TODO: Add other useful fields. apiVersion, kind, uid? + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. + type: string + type: object + x-kubernetes-map-type: atomic + targetPortal: + description: |- + targetPortal is iSCSI Target Portal. The Portal is either an IP or ip_addr:port if the port + is other than default (typically TCP ports 860 and 3260). + type: string + required: + - iqn + - lun + - targetPortal + type: object + name: + description: |- + name of the volume. + Must be a DNS_LABEL and unique within the pod. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + nfs: + description: |- + nfs represents an NFS mount on the host that shares a pod's lifetime + More info: https://kubernetes.io/docs/concepts/storage/volumes#nfs + properties: + path: + description: |- + path that is exported by the NFS server. + More info: https://kubernetes.io/docs/concepts/storage/volumes#nfs + type: string + readOnly: + description: |- + readOnly here will force the NFS export to be mounted with read-only permissions. + Defaults to false. + More info: https://kubernetes.io/docs/concepts/storage/volumes#nfs + type: boolean + server: + description: |- + server is the hostname or IP address of the NFS server. + More info: https://kubernetes.io/docs/concepts/storage/volumes#nfs + type: string + required: + - path + - server + type: object + persistentVolumeClaim: + description: |- + persistentVolumeClaimVolumeSource represents a reference to a + PersistentVolumeClaim in the same namespace. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#persistentvolumeclaims + properties: + claimName: + description: |- + claimName is the name of a PersistentVolumeClaim in the same namespace as the pod using this volume. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#persistentvolumeclaims + type: string + readOnly: + description: |- + readOnly Will force the ReadOnly setting in VolumeMounts. + Default false. + type: boolean + required: + - claimName + type: object + photonPersistentDisk: + description: photonPersistentDisk represents a PhotonController + persistent disk attached and mounted on kubelets host + machine + properties: + fsType: + description: |- + fsType is the filesystem type to mount. + Must be a filesystem type supported by the host operating system. + Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. + type: string + pdID: + description: pdID is the ID that identifies Photon + Controller persistent disk + type: string + required: + - pdID + type: object + portworxVolume: + description: portworxVolume represents a portworx volume + attached and mounted on kubelets host machine + properties: + fsType: + description: |- + fSType represents the filesystem type to mount + Must be a filesystem type supported by the host operating system. + Ex. "ext4", "xfs". Implicitly inferred to be "ext4" if unspecified. + type: string + readOnly: + description: |- + readOnly defaults to false (read/write). ReadOnly here will force + the ReadOnly setting in VolumeMounts. + type: boolean + volumeID: + description: volumeID uniquely identifies a Portworx + volume + type: string + required: + - volumeID + type: object + projected: + description: projected items for all in one resources + secrets, configmaps, and downward API + properties: + defaultMode: + description: |- + defaultMode are the mode bits used to set permissions on created files by default. + Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. + YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. + Directories within the path are not affected by this setting. + This might be in conflict with other options that affect the file + mode, like fsGroup, and the result can be other mode bits set. + format: int32 + type: integer + sources: + description: sources is the list of volume projections + items: + description: Projection that may be projected + along with other supported volume types + properties: + clusterTrustBundle: + description: |- + ClusterTrustBundle allows a pod to access the `.spec.trustBundle` field + of ClusterTrustBundle objects in an auto-updating file. + + + Alpha, gated by the ClusterTrustBundleProjection feature gate. + + + ClusterTrustBundle objects can either be selected by name, or by the + combination of signer name and a label selector. + + + Kubelet performs aggressive normalization of the PEM contents written + into the pod filesystem. Esoteric PEM features such as inter-block + comments and block headers are stripped. Certificates are deduplicated. + The ordering of certificates within the file is arbitrary, and Kubelet + may change the order over time. + properties: + labelSelector: + description: |- + Select all ClusterTrustBundles that match this label selector. Only has + effect if signerName is set. Mutually-exclusive with name. If unset, + interpreted as "match nothing". If set but empty, interpreted as "match + everything". + properties: + matchExpressions: + description: matchExpressions is a + list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + name: + description: |- + Select a single ClusterTrustBundle by object name. Mutually-exclusive + with signerName and labelSelector. + type: string + optional: + description: |- + If true, don't block pod startup if the referenced ClusterTrustBundle(s) + aren't available. If using name, then the named ClusterTrustBundle is + allowed not to exist. If using signerName, then the combination of + signerName and labelSelector is allowed to match zero + ClusterTrustBundles. + type: boolean + path: + description: Relative path from the volume + root to write the bundle. + type: string + signerName: + description: |- + Select all ClusterTrustBundles that match this signer name. + Mutually-exclusive with name. The contents of all selected + ClusterTrustBundles will be unified and deduplicated. + type: string + required: + - path + type: object + configMap: + description: configMap information about the + configMap data to project + properties: + items: + description: |- + items if unspecified, each key-value pair in the Data field of the referenced + ConfigMap will be projected into the volume as a file whose name is the + key and content is the value. If specified, the listed keys will be + projected into the specified paths, and unlisted keys will not be + present. If a key is specified which is not present in the ConfigMap, + the volume setup will error unless it is marked optional. Paths must be + relative and may not contain the '..' path or start with '..'. + items: + description: Maps a string key to a + path within a volume. + properties: + key: + description: key is the key to project. + type: string + mode: + description: |- + mode is Optional: mode bits used to set permissions on this file. + Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. + YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. + If not specified, the volume defaultMode will be used. + This might be in conflict with other options that affect the file + mode, like fsGroup, and the result can be other mode bits set. + format: int32 + type: integer + path: + description: |- + path is the relative path of the file to map the key to. + May not be an absolute path. + May not contain the path element '..'. + May not start with the string '..'. + type: string + required: + - key + - path + type: object + type: array + x-kubernetes-list-type: atomic + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + TODO: Add other useful fields. apiVersion, kind, uid? + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. + type: string + optional: + description: optional specify whether + the ConfigMap or its keys must be defined + type: boolean + type: object + x-kubernetes-map-type: atomic + downwardAPI: + description: downwardAPI information about + the downwardAPI data to project + properties: + items: + description: Items is a list of DownwardAPIVolume + file + items: + description: DownwardAPIVolumeFile represents + information to create the file containing + the pod field + properties: + fieldRef: + description: 'Required: Selects + a field of the pod: only annotations, + labels, name, namespace and uid + are supported.' + properties: + apiVersion: + description: Version of the + schema the FieldPath is written + in terms of, defaults to "v1". + type: string + fieldPath: + description: Path of the field + to select in the specified + API version. + type: string + required: + - fieldPath + type: object + x-kubernetes-map-type: atomic + mode: + description: |- + Optional: mode bits used to set permissions on this file, must be an octal value + between 0000 and 0777 or a decimal value between 0 and 511. + YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. + If not specified, the volume defaultMode will be used. + This might be in conflict with other options that affect the file + mode, like fsGroup, and the result can be other mode bits set. + format: int32 + type: integer + path: + description: 'Required: Path is the + relative path name of the file + to be created. Must not be absolute + or contain the ''..'' path. Must + be utf-8 encoded. The first item + of the relative path must not + start with ''..''' + type: string + resourceFieldRef: + description: |- + Selects a resource of the container: only resources limits and requests + (limits.cpu, limits.memory, requests.cpu and requests.memory) are currently supported. + properties: + containerName: + description: 'Container name: + required for volumes, optional + for env vars' + type: string + divisor: + anyOf: + - type: integer + - type: string + description: Specifies the output + format of the exposed resources, + defaults to "1" + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + resource: + description: 'Required: resource + to select' + type: string + required: + - resource + type: object + x-kubernetes-map-type: atomic + required: + - path + type: object + type: array + x-kubernetes-list-type: atomic + type: object + secret: + description: secret information about the + secret data to project + properties: + items: + description: |- + items if unspecified, each key-value pair in the Data field of the referenced + Secret will be projected into the volume as a file whose name is the + key and content is the value. If specified, the listed keys will be + projected into the specified paths, and unlisted keys will not be + present. If a key is specified which is not present in the Secret, + the volume setup will error unless it is marked optional. Paths must be + relative and may not contain the '..' path or start with '..'. + items: + description: Maps a string key to a + path within a volume. + properties: + key: + description: key is the key to project. + type: string + mode: + description: |- + mode is Optional: mode bits used to set permissions on this file. + Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. + YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. + If not specified, the volume defaultMode will be used. + This might be in conflict with other options that affect the file + mode, like fsGroup, and the result can be other mode bits set. + format: int32 + type: integer + path: + description: |- + path is the relative path of the file to map the key to. + May not be an absolute path. + May not contain the path element '..'. + May not start with the string '..'. + type: string + required: + - key + - path + type: object + type: array + x-kubernetes-list-type: atomic + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + TODO: Add other useful fields. apiVersion, kind, uid? + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. + type: string + optional: + description: optional field specify whether + the Secret or its key must be defined + type: boolean + type: object + x-kubernetes-map-type: atomic + serviceAccountToken: + description: serviceAccountToken is information + about the serviceAccountToken data to project + properties: + audience: + description: |- + audience is the intended audience of the token. A recipient of a token + must identify itself with an identifier specified in the audience of the + token, and otherwise should reject the token. The audience defaults to the + identifier of the apiserver. + type: string + expirationSeconds: + description: |- + expirationSeconds is the requested duration of validity of the service + account token. As the token approaches expiration, the kubelet volume + plugin will proactively rotate the service account token. The kubelet will + start trying to rotate the token if the token is older than 80 percent of + its time to live or if the token is older than 24 hours.Defaults to 1 hour + and must be at least 10 minutes. + format: int64 + type: integer + path: + description: |- + path is the path relative to the mount point of the file to project the + token into. + type: string + required: + - path + type: object + type: object + type: array + x-kubernetes-list-type: atomic + type: object + quobyte: + description: quobyte represents a Quobyte mount on the + host that shares a pod's lifetime + properties: + group: + description: |- + group to map volume access to + Default is no group + type: string + readOnly: + description: |- + readOnly here will force the Quobyte volume to be mounted with read-only permissions. + Defaults to false. + type: boolean + registry: + description: |- + registry represents a single or multiple Quobyte Registry services + specified as a string as host:port pair (multiple entries are separated with commas) + which acts as the central registry for volumes + type: string + tenant: + description: |- + tenant owning the given Quobyte volume in the Backend + Used with dynamically provisioned Quobyte volumes, value is set by the plugin + type: string + user: + description: |- + user to map volume access to + Defaults to serivceaccount user + type: string + volume: + description: volume is a string that references + an already created Quobyte volume by name. + type: string + required: + - registry + - volume + type: object + rbd: + description: |- + rbd represents a Rados Block Device mount on the host that shares a pod's lifetime. + More info: https://examples.k8s.io/volumes/rbd/README.md + properties: + fsType: + description: |- + fsType is the filesystem type of the volume that you want to mount. + Tip: Ensure that the filesystem type is supported by the host operating system. + Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. + More info: https://kubernetes.io/docs/concepts/storage/volumes#rbd + TODO: how do we prevent errors in the filesystem from compromising the machine + type: string + image: + description: |- + image is the rados image name. + More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it + type: string + keyring: + description: |- + keyring is the path to key ring for RBDUser. + Default is /etc/ceph/keyring. + More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it + type: string + monitors: + description: |- + monitors is a collection of Ceph monitors. + More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it + items: + type: string + type: array + x-kubernetes-list-type: atomic + pool: + description: |- + pool is the rados pool name. + Default is rbd. + More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it + type: string + readOnly: + description: |- + readOnly here will force the ReadOnly setting in VolumeMounts. + Defaults to false. + More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it + type: boolean + secretRef: + description: |- + secretRef is name of the authentication secret for RBDUser. If provided + overrides keyring. + Default is nil. + More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + TODO: Add other useful fields. apiVersion, kind, uid? + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. + type: string + type: object + x-kubernetes-map-type: atomic + user: + description: |- + user is the rados user name. + Default is admin. + More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it + type: string + required: + - image + - monitors + type: object + scaleIO: + description: scaleIO represents a ScaleIO persistent + volume attached and mounted on Kubernetes nodes. + properties: + fsType: + description: |- + fsType is the filesystem type to mount. + Must be a filesystem type supported by the host operating system. + Ex. "ext4", "xfs", "ntfs". + Default is "xfs". + type: string + gateway: + description: gateway is the host address of the + ScaleIO API Gateway. + type: string + protectionDomain: + description: protectionDomain is the name of the + ScaleIO Protection Domain for the configured storage. + type: string + readOnly: + description: |- + readOnly Defaults to false (read/write). ReadOnly here will force + the ReadOnly setting in VolumeMounts. + type: boolean + secretRef: + description: |- + secretRef references to the secret for ScaleIO user and other + sensitive information. If this is not provided, Login operation will fail. + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + TODO: Add other useful fields. apiVersion, kind, uid? + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. + type: string + type: object + x-kubernetes-map-type: atomic + sslEnabled: + description: sslEnabled Flag enable/disable SSL + communication with Gateway, default false + type: boolean + storageMode: + description: |- + storageMode indicates whether the storage for a volume should be ThickProvisioned or ThinProvisioned. + Default is ThinProvisioned. + type: string + storagePool: + description: storagePool is the ScaleIO Storage + Pool associated with the protection domain. + type: string + system: + description: system is the name of the storage system + as configured in ScaleIO. + type: string + volumeName: + description: |- + volumeName is the name of a volume already created in the ScaleIO system + that is associated with this volume source. + type: string + required: + - gateway + - secretRef + - system + type: object + secret: + description: |- + secret represents a secret that should populate this volume. + More info: https://kubernetes.io/docs/concepts/storage/volumes#secret + properties: + defaultMode: + description: |- + defaultMode is Optional: mode bits used to set permissions on created files by default. + Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. + YAML accepts both octal and decimal values, JSON requires decimal values + for mode bits. Defaults to 0644. + Directories within the path are not affected by this setting. + This might be in conflict with other options that affect the file + mode, like fsGroup, and the result can be other mode bits set. + format: int32 + type: integer + items: + description: |- + items If unspecified, each key-value pair in the Data field of the referenced + Secret will be projected into the volume as a file whose name is the + key and content is the value. If specified, the listed keys will be + projected into the specified paths, and unlisted keys will not be + present. If a key is specified which is not present in the Secret, + the volume setup will error unless it is marked optional. Paths must be + relative and may not contain the '..' path or start with '..'. + items: + description: Maps a string key to a path within + a volume. + properties: + key: + description: key is the key to project. + type: string + mode: + description: |- + mode is Optional: mode bits used to set permissions on this file. + Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. + YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. + If not specified, the volume defaultMode will be used. + This might be in conflict with other options that affect the file + mode, like fsGroup, and the result can be other mode bits set. + format: int32 + type: integer + path: + description: |- + path is the relative path of the file to map the key to. + May not be an absolute path. + May not contain the path element '..'. + May not start with the string '..'. + type: string + required: + - key + - path + type: object + type: array + x-kubernetes-list-type: atomic + optional: + description: optional field specify whether the + Secret or its keys must be defined + type: boolean + secretName: + description: |- + secretName is the name of the secret in the pod's namespace to use. + More info: https://kubernetes.io/docs/concepts/storage/volumes#secret + type: string + type: object + storageos: + description: storageOS represents a StorageOS volume + attached and mounted on Kubernetes nodes. + properties: + fsType: + description: |- + fsType is the filesystem type to mount. + Must be a filesystem type supported by the host operating system. + Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. + type: string + readOnly: + description: |- + readOnly defaults to false (read/write). ReadOnly here will force + the ReadOnly setting in VolumeMounts. + type: boolean + secretRef: + description: |- + secretRef specifies the secret to use for obtaining the StorageOS API + credentials. If not specified, default values will be attempted. + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + TODO: Add other useful fields. apiVersion, kind, uid? + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. + type: string + type: object + x-kubernetes-map-type: atomic + volumeName: + description: |- + volumeName is the human-readable name of the StorageOS volume. Volume + names are only unique within a namespace. + type: string + volumeNamespace: + description: |- + volumeNamespace specifies the scope of the volume within StorageOS. If no + namespace is specified then the Pod's namespace will be used. This allows the + Kubernetes name scoping to be mirrored within StorageOS for tighter integration. + Set VolumeName to any name to override the default behaviour. + Set to "default" if you are not using namespaces within StorageOS. + Namespaces that do not pre-exist within StorageOS will be created. + type: string + type: object + vsphereVolume: + description: vsphereVolume represents a vSphere volume + attached and mounted on kubelets host machine + properties: + fsType: + description: |- + fsType is filesystem type to mount. + Must be a filesystem type supported by the host operating system. + Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. + type: string + storagePolicyID: + description: storagePolicyID is the storage Policy + Based Management (SPBM) profile ID associated + with the StoragePolicyName. + type: string + storagePolicyName: + description: storagePolicyName is the storage Policy + Based Management (SPBM) profile name. + type: string + volumePath: + description: volumePath is the path that identifies + vSphere volume vmdk + type: string + required: + - volumePath + type: object + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + required: + - containers + type: object + type: object workloadName: - description: Text message is an example field of PathwaysAPI. Edit - pathwaysapi_types.go to remove/update - type: string - workloadType: + description: WorkloadName is the identifier for the Pathways workload + deployment. type: string type: object status: description: PathwaysAPIStatus defines the observed state of PathwaysAPI + properties: + conditions: + items: + description: "Condition contains details for one aspect of the current + state of this API Resource.\n---\nThis struct is intended for + direct use as an array at the field path .status.conditions. For + example,\n\n\n\ttype FooStatus struct{\n\t // Represents the + observations of a foo's current state.\n\t // Known .status.conditions.type + are: \"Available\", \"Progressing\", and \"Degraded\"\n\t // + +patchMergeKey=type\n\t // +patchStrategy=merge\n\t // +listType=map\n\t + \ // +listMapKey=type\n\t Conditions []metav1.Condition `json:\"conditions,omitempty\" + patchStrategy:\"merge\" patchMergeKey:\"type\" protobuf:\"bytes,1,rep,name=conditions\"`\n\n\n\t + \ // other fields\n\t}" + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: |- + type of condition in CamelCase or in foo.example.com/CamelCase. + --- + Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be + useful (see .node.status.conditions), the ability to deconflict is important. + The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt) + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + workloadState: + description: |- + Track the state of the Pathways workload, acceptable values are - + Suspended, Completed, Failed + type: string type: object type: object served: true diff --git a/config/samples/pathways-api_v1_pathwaysapi.yaml b/config/samples/pathways-api_v1_pathwaysapi.yaml index c90c5267..a8ed09e8 100644 --- a/config/samples/pathways-api_v1_pathwaysapi.yaml +++ b/config/samples/pathways-api_v1_pathwaysapi.yaml @@ -6,10 +6,13 @@ metadata: app.kubernetes.io/managed-by: kustomize name: pathwaysapi-sample spec: - # TODO(user): Add fields here - workloadName: "roshani-try-lws-16" - tpuType: v4-16 - numSlices: 2 - workloadMode: headless - workloadType: inference - # add a container spec, pass as a workload + workloadName: "roshani-in-8" + pathwaysWorkerNodeSelector: + cloud.google.com/gke-tpu-accelerator: tpu-v4-podslice + cloud.google.com/gke-tpu-topology: 2x2x2 + pathwaysControllerNodeSelector: + cloud.google.com/gke-tpu-accelerator: tpu-v4-podslice + cloud.google.com/gke-tpu-topology: 2x2x2 + numSlices: 1 + pathwaysDir: "gs://cloud-pathways-staging/tmp" + # add a spec, to pass as a workload diff --git a/internal/controller/pathwaysapi_controller.go b/internal/controller/pathwaysapi_controller.go index 4cbfb67d..6a8453e2 100644 --- a/internal/controller/pathwaysapi_controller.go +++ b/internal/controller/pathwaysapi_controller.go @@ -34,8 +34,6 @@ import ( jobsetv1alpha2 "sigs.k8s.io/jobset/api/jobset/v1alpha2" jobsetclient "sigs.k8s.io/jobset/client-go/clientset/versioned" - leaderworkersetv1 "sigs.k8s.io/lws/api/leaderworkerset/v1" - lwsclient "sigs.k8s.io/lws/client-go/clientset/versioned" // jobsetv1alpha2 "sigs.k8s.io/jobset/api/jobset/v1alpha2" // jobsetclient "sigs.k8s.io/jobset/client-go/clientset/versioned" @@ -76,435 +74,259 @@ func (r *PathwaysAPIReconciler) Reconcile(ctx context.Context, req ctrl.Request) return ctrl.Result{}, client.IgnoreNotFound(err) } log := ctrl.LoggerFrom(ctx).WithValues("pathwaysapi", klog.KObj(pw)) - pwMessage := pw.Spec.WorkloadName - tpuType := pw.Spec.TpuType - numSlices := pw.Spec.NumSlices - workloadMode := pw.Spec.WorkloadMode - workloadType := pw.Spec.WorkloadType + pwWorkloadName := pw.Spec.WorkloadName ctx = ctrl.LoggerInto(ctx, log) - log.Info("ROSHANI CONTROLLER WORKING...", "TextMessage", pwMessage, "TpuType", tpuType, "NumSlices", numSlices, "WorkloadMode", workloadMode) + log.Info("ROSHANI CONTROLLER WORKING...", "WorkloadName ", pwWorkloadName, " NumSlices ", pw.Spec.NumSlices) kubeconfig := ctrl.GetConfigOrDie() log.Info("Roshani, config established...") truth := true - size := int32(5) // total number of workers (across all slices) + 1 - replicas := int32(1) - fmt.Printf("Replicas: %d , Size: %d \n", replicas, size) + volumeSourceType := corev1.HostPathDirectoryOrCreate - client := lwsclient.NewForConfigOrDie(kubeconfig) - log.Info("Roshani, client built...") - if workloadType == "inference" { - lws, err := client.LeaderworkersetV1().LeaderWorkerSets("default").Create(ctx, &leaderworkersetv1.LeaderWorkerSet{ - ObjectMeta: metav1.ObjectMeta{ - Name: pwMessage, - Annotations: map[string]string{"leaderworkerset.sigs.k8s.io/exclusive-topology": "cloud.google.com/gke-nodepool"}, - }, - Spec: leaderworkersetv1.LeaderWorkerSetSpec{ - Replicas: ptr.To(replicas), - StartupPolicy: "LeaderCreated", // this seems to be a mandatory field now - LeaderWorkerTemplate: leaderworkersetv1.LeaderWorkerTemplate{ - Size: ptr.To(size), - LeaderTemplate: &corev1.PodTemplateSpec{ - ObjectMeta: metav1.ObjectMeta{ - Name: workloadType, - Labels: map[string]string{"xpk.google.com/workload": "pathways-headless"}, - }, - Spec: corev1.PodSpec{ - // NodeSelector: map[string]string{"cloud.google.com/gke-tpu-accelerator": "tpu-v5-lite-podslice", "cloud.google.com/gke-tpu-topology": "4x4"}, - // NodeSelector: map[string]string{"cloud.google.com/gke-tpu-accelerator": "tpu-v4-podslice", "cloud.google.com/gke-tpu-topology": "2x2x1"}, - NodeSelector: map[string]string{"cloud.google.com/gke-tpu-accelerator": "tpu-v4-podslice", "cloud.google.com/gke-tpu-topology": "2x2x2"}, - Tolerations: []corev1.Toleration{ - { - Key: "google.com/tpu", - Operator: "Exists", - Effect: "NoSchedule", - }, - }, - Containers: []corev1.Container{ - { - Name: "pathways-proxy", - Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/proxy_server:latest", - ImagePullPolicy: "Always", - SecurityContext: &corev1.SecurityContext{Privileged: &truth}, - Args: []string{"--alsologtostderr", "--v=0", "--pathways_ifrt_proxy_server_resource_manager=$(LWS_LEADER_ADDRESS):38677", "--pathways_ifrt_proxy_server_port=38681", "--pathways_tmp_dir_pattern=gs://cloud-pathways-staging/tmp", "--pathways_plaque_network=gcp"}, - Ports: []corev1.ContainerPort{{ContainerPort: 38681}, {ContainerPort: 38682}}, - // Resources: []corev1.ResourceRequirements{ - // {Limits: corev1.ResourceList{{cpu: "24", memory: 100G,},}, - // }, - // }, - }, - { - Name: "pathways-rm", - Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/server:latest", - ImagePullPolicy: "Always", - SecurityContext: &corev1.SecurityContext{Privileged: &truth}, - Env: []corev1.EnvVar{{Name: "HOST_ADDRESS", Value: "$(LWS_LEADER_ADDRESS)"}, {Name: "TPU_SKIP_MDS_QUERY", Value: "true"}}, - Args: []string{"--pathways_server_port=38677", - "--pathways_server_provides_devices=false", - "--pathways_device_type=NONE", - "--pathways_persistent_compilation_cache=false", - "--pathways_compilation_mode=compile_at_worker", - "--pathways_tmp_dir_pattern=gs://cloud-pathways-staging/tmp", - "--pathways_resource_manager_expected_num_worker_jobs=2"}, - Ports: []corev1.ContainerPort{{ContainerPort: 38677}, {ContainerPort: 38678}}, - // Resources: []corev1.ResourceRequirements{Limits: {map[corev1.ResourceName]resource.Quantity{cpu: "24", memory: 100G,},},}, - }, - }, - }, - }, - WorkerTemplate: corev1.PodTemplateSpec{ - ObjectMeta: metav1.ObjectMeta{ - Name: workloadType, - Labels: map[string]string{"xpk.google.com/workload": "pathways-headless"}, - }, - Spec: corev1.PodSpec{ - // NodeSelector: map[string]string{"cloud.google.com/gke-tpu-accelerator": "tpu-v5-lite-podslice", "cloud.google.com/gke-tpu-topology": "4x4"}, - // NodeSelector: map[string]string{"cloud.google.com/gke-tpu-accelerator": "tpu-v4-podslice", "cloud.google.com/gke-tpu-topology": "2x2x1"}, - Tolerations: []corev1.Toleration{ - { - Key: "google.com/tpu", - Operator: "Exists", - Effect: "NoSchedule", - }, - }, - NodeSelector: map[string]string{"cloud.google.com/gke-tpu-accelerator": "tpu-v4-podslice", "cloud.google.com/gke-tpu-topology": "2x2x2"}, - Containers: []corev1.Container{ - { - Name: "worker", - Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/server:latest", - ImagePullPolicy: "Always", - SecurityContext: &corev1.SecurityContext{Privileged: &truth}, - Args: []string{"--alsologtostderr", "--pathways_server_port=38679", "--pathways_resource_manager=$(LWS_LEADER_ADDRESS):38677", "--pathways_persistent_compilation_cache=false", "--pathways_compilation_mode=compile_at_worker", - "--xla_tpu_enable_data_parallel_all_reduce_opt=true", "--xla_tpu_data_parallel_opt_different_sized_ops=true", "--xla_tpu_enable_async_collective_fusion=true", "--xla_tpu_enable_async_collective_fusion_fuse_all_gather=true", - "--xla_tpu_enable_async_collective_fusion_multiple_steps=true", "--xla_tpu_overlap_compute_collective_tc=true", "--xla_enable_async_all_gather=true", "--pathways_tmp_dir_pattern=gs://cloud-pathways-staging/tmp"}, - Ports: []corev1.ContainerPort{{ContainerPort: 38679}, {ContainerPort: 38680}, {ContainerPort: 8471}, {ContainerPort: 8080}}, - // Resources: []corev1.ResourceRequirements{Limits: {map[corev1.ResourceName]resource.Quantity{cpu: "24", memory: 100G,},},}, - }, - }, - }, - }, - }, - }, - }, metav1.CreateOptions{}) - - // Pathways Spec + LWS ------ + RMContainerSpec := corev1.Container{ + Name: "pathways-rm", + Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/server:latest", + ImagePullPolicy: "Always", + SecurityContext: &corev1.SecurityContext{Privileged: &truth}, + Args: []string{ + "--alsologtostderr", + "--pathways_server_port=38677", + "--pathways_server_provides_devices=false", + "--pathways_device_type=NONE", + "--pathways_persistent_compilation_cache=false", + "--pathways_compilation_mode=compile_at_worker", + fmt.Sprintf("--pathways_tmp_dir_pattern=%s", pw.Spec.PathwaysDir), + "--pathways_expected_instances=tpuv4:2x2x2", + }, + Env: []corev1.EnvVar{ + {Name: "REPLICATED_JOB_NAME", ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{FieldPath: "metadata.annotations['jobset.sigs.k8s.io/replicatedjob-name']"}}}, + {Name: "JOBSET_NAME", ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{FieldPath: "metadata.annotations['jobset.sigs.k8s.io/jobset-name']"}}}, + {Name: "HOST_ADDRESS", Value: fmt.Sprintf("%s-%s-0-0.%s", pwWorkloadName, "leader", pwWorkloadName)}, + {Name: "TPU_SKIP_MDS_QUERY", Value: "true"}, + }, + Ports: []corev1.ContainerPort{{ContainerPort: 38677}, {ContainerPort: 38678}}, + } - // lws, err := client.LeaderworkersetV1().LeaderWorkerSets("default").Create(ctx, &leaderworkersetv1.LeaderWorkerSet{ - // ObjectMeta: metav1.ObjectMeta{ - // Name: pwMessage, - // }, - // Spec: leaderworkersetv1.LeaderWorkerSetSpec{ - // Replicas: ptr.To(numSlices), - // LeaderWorkerTemplate: leaderworkersetv1.LeaderWorkerTemplate{ - // LeaderTemplate: &corev1.PodTemplateSpec{ - // ObjectMeta: metav1.ObjectMeta{ - // Name: workloadType, - // }, - // Spec: corev1.PodSpec{ - // Containers: []corev1.Container{ - // { - // Name: "bash-container", - // Image: "bash:latest", - // Command: []string{"/bin/sh"}, - // Args: []string{"-c", "while true; do echo hello; sleep 10; done"}, - // }, - // }, - // // RestartPolicy: "Never", - // }, - // }, - // WorkerTemplate: corev1.PodTemplateSpec{ - // ObjectMeta: metav1.ObjectMeta{ - // Name: "workers", - // }, - // Spec: corev1.PodSpec{ - // Containers: []corev1.Container{ - // { - // Name: "bash-container", - // Image: "bash:latest", - // Command: []string{"/bin/sh"}, - // Args: []string{"-c", "while true; do echo hello; sleep 10; done"}, - // }, - // }, - // // RestartPolicy: "Never", - // }, - // }, - // Size: ptr.To(int32(2)), - // }, - // StartupPolicy: "LeaderReady", - // }, - // }, metav1.CreateOptions{}) + ProxyContainerSpec := corev1.Container{ + Name: "pathways-proxy", + Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/proxy_server:latest", + ImagePullPolicy: "Always", + SecurityContext: &corev1.SecurityContext{Privileged: &truth}, + Args: []string{ + "--alsologtostderr", + "--v=0", + fmt.Sprintf("--pathways_ifrt_proxy_server_resource_manager=%s-%s-0-0.%s:38677", pwWorkloadName, "leader", pwWorkloadName), + "--pathways_ifrt_proxy_server_port=38681", + fmt.Sprintf("--pathways_tmp_dir_pattern=%s", pw.Spec.PathwaysDir), + "--pathways_plaque_network=gcp", + }, + Ports: []corev1.ContainerPort{{ContainerPort: 38681}, {ContainerPort: 38682}}, + } - if err != nil { - panic(err) - } - log.Info("Roshani, created LeaderWorkerSet...") - fmt.Printf("successfully created LeaderWorkerSet: %s\n", lws.Name) - } else if workloadType == "training" { - // // Pathways Spec + JobSet ------ - client := jobsetclient.NewForConfigOrDie(kubeconfig) + fmt.Printf("Replicas: %d \n", pw.Spec.NumSlices) - js, err := client.JobsetV1alpha2().JobSets("default").Create(ctx, &jobsetv1alpha2.JobSet{ - ObjectMeta: metav1.ObjectMeta{ - Name: pwMessage, + // // Pathways Spec + JobSet for batch inference ------ + client := jobsetclient.NewForConfigOrDie(kubeconfig) + log.Info("Roshani, client built for JobSet...") + js, err := client.JobsetV1alpha2().JobSets("default").Create(ctx, &jobsetv1alpha2.JobSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: pwWorkloadName, + }, + Spec: jobsetv1alpha2.JobSetSpec{ + FailurePolicy: &jobsetv1alpha2.FailurePolicy{ + MaxRestarts: 4, }, - Spec: jobsetv1alpha2.JobSetSpec{ - ReplicatedJobs: []jobsetv1alpha2.ReplicatedJob{ - { - Name: "worker", - Replicas: 2, - Template: batchv1.JobTemplateSpec{ - ObjectMeta: metav1.ObjectMeta{ - Annotations: map[string]string{"alpha.jobset.sigs.k8s.io/exclusive-topology": "cloud.google.com/gke-nodepool"}, - }, - Spec: batchv1.JobSpec{ - BackoffLimit: ptr.To(int32(0)), - Completions: ptr.To(int32(1)), - Parallelism: ptr.To(int32(1)), - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - TerminationGracePeriodSeconds: ptr.To(int64(30)), - Containers: []corev1.Container{ - { - Name: "pathways-worker", - Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/server:latest", - ImagePullPolicy: "Always", - SecurityContext: &corev1.SecurityContext{Privileged: &truth}, - Args: []string{ - "--alsologtostderr", - "--pathways_server_port=38677", - fmt.Sprintf("--pathways_resource_manager=%s-rm-0-0.%s:38677", pwMessage, pwMessage), - "--pathways_persistent_compilation_cache=false", - "--pathways_compilation_mode=compile_at_worker", - "--xla_tpu_enable_data_parallel_all_reduce_opt=true", - "--xla_tpu_data_parallel_opt_different_sized_ops=true", - "--xla_tpu_enable_async_collective_fusion=true", - "--xla_tpu_enable_async_collective_fusion_fuse_all_gather=true", - "--xla_tpu_enable_async_collective_fusion_multiple_steps=true", - "--xla_tpu_overlap_compute_collective_tc=true", - "--xla_enable_async_all_gather=true", - "--pathways_tmp_dir_pattern=gs://cloud-pathways-staging/tmp", + ReplicatedJobs: []jobsetv1alpha2.ReplicatedJob{ + { + Name: "leader", + Replicas: 1, + Template: batchv1.JobTemplateSpec{ + Spec: batchv1.JobSpec{ + BackoffLimit: ptr.To(int32(0)), + Completions: ptr.To(int32(1)), + Parallelism: ptr.To(int32(1)), + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + // TerminationGracePeriodSeconds: ptr.To(int64(30)), + Affinity: &corev1.Affinity{ + PodAffinity: &corev1.PodAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: []corev1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "jobset.sigs.k8s.io/jobset-name", + Operator: metav1.LabelSelectorOpIn, + Values: []string{pwWorkloadName}, + }, + }, + }, + TopologyKey: "cloud.google.com/gke-nodepool", }, - Ports: []corev1.ContainerPort{{ContainerPort: 38677}, {ContainerPort: 8471}, {ContainerPort: 8080}}, - VolumeMounts: []corev1.VolumeMount{ - { - Name: "shared-tmp", - MountPath: "/tmp", + }, + }, // end PodAffinity + PodAntiAffinity: &corev1.PodAntiAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: []corev1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "jobset.sigs.k8s.io/jobset-name", + Operator: metav1.LabelSelectorOpNotIn, + Values: []string{pwWorkloadName}, + }, + { + Key: "job-name", + Operator: metav1.LabelSelectorOpExists, + }, + }, }, + TopologyKey: "cloud.google.com/gke-nodepool", }, - // Resources: corev1.ResourceRequirements{ - // Limits: {map[corev1.ResourceName]Res{"google.com/tpu", 4}, - // }, - // Resources: corev1.ResourceRequirements{Limits: {map[corev1.ResourceName]{cpu: "24", memory: 100G,},},}, }, + }, // end PodAntiAffinity + }, // end Affinity + NodeSelector: pw.Spec.PathwaysControllerNodeSelector, + Tolerations: []corev1.Toleration{ + { + Key: "google.com/tpu", + Operator: "Exists", + Effect: "NoSchedule", }, - NodeSelector: map[string]string{"cloud.google.com/gke-tpu-accelerator": "tpu-v5-lite-podslice", "cloud.google.com/gke-tpu-topology": "4x4"}, - // Volumes: []corev1.Volume{ - // {Name: "shared-tmp", VolumeSource: corev1.VolumeSource{HostPath: *corev1.HostPathVolumeSource{Path: "/tmp", Type: *corev1.HostPathType("DirectoryOrCreate")}}}, - // }, }, - }, - }, - }, - }, - { - Name: "rm", - Replicas: 1, - Template: batchv1.JobTemplateSpec{ - Spec: batchv1.JobSpec{ - BackoffLimit: ptr.To(int32(0)), - Completions: ptr.To(int32(1)), - Parallelism: ptr.To(int32(1)), - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - // TerminationGracePeriodSeconds: ptr.To(int64(30)), - Containers: []corev1.Container{ - { - Name: "pathways-rm", - Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/server:latest", - ImagePullPolicy: "Always", - SecurityContext: &corev1.SecurityContext{Privileged: &truth}, - Args: []string{ - "--alsologtostderr", - "--pathways_server_port=38677", - "--pathways_server_provides_devices=false", - "--pathways_persistent_compilation_cache=false", - "--pathways_compilation_mode=compile_at_worker", - "--pathways_tmp_dir_pattern=gs://cloud-pathways-staging/tmp", - "--pathways_expected_instances=tpuv4:2x2x2,tpuv4:2x2x2", - }, - Env: []corev1.EnvVar{ - {Name: "REPLICATED_JOB_NAME", ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{FieldPath: "metadata.annotations['jobset.sigs.k8s.io/replicatedjob-name']"}}}, - {Name: "JOBSET_NAME", ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{FieldPath: "metadata.annotations['jobset.sigs.k8s.io/jobset-name']"}}}, - {Name: "HOST_ADDRESS", Value: fmt.Sprintf("%s-%s-0-0.%s", pwMessage, "rm", pwMessage)}, - {Name: "TPU_SKIP_MDS_QUERY", Value: "true"}, - }, - Ports: []corev1.ContainerPort{{ContainerPort: 38677}}, - VolumeMounts: []corev1.VolumeMount{ - { - Name: "shared-tmp", - MountPath: "/tmp", - }, + Volumes: []corev1.Volume{ + { + Name: "shared-tmp", + VolumeSource: corev1.VolumeSource{ + HostPath: &corev1.HostPathVolumeSource{ + Path: "/tmp", + Type: &volumeSourceType, }, - // Resources: corev1.ResourceRequirements{ - // Limits: {map[corev1.ResourceName]{"google.com/tpu", 4}, - // }, - // Resources: corev1.ResourceRequirements{Limits: {map[corev1.ResourceName]{cpu: "24", memory: 100G,},},}, }, }, - NodeSelector: map[string]string{"cloud.google.com/gke-nodepool": "cpu-rm-np"}, - // Volumes: []corev1.Volume{ - // {Name: "shared-tmp", VolumeSource: corev1.VolumeSource{HostPath: &corev1.HostPathVolumeSource{Path: "/tmp", Type: &corev1.HostPathType("DirectoryOrCreate")}}}, - // }, - }, - }, + }, // end Volumes + Containers: []corev1.Container{ + RMContainerSpec, + ProxyContainerSpec, + + { + Name: "jetstream", + Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/maxtext_jax_stable:latest", + ImagePullPolicy: "Always", + SecurityContext: &corev1.SecurityContext{Privileged: &truth}, + Env: []corev1.EnvVar{ + {Name: "XCLOUD_ENVIRONMENT", Value: "GCP"}, + {Name: "JAX_PLATFORMS", Value: "proxy"}, + {Name: "JAX_BACKEND_TARGET", Value: fmt.Sprintf("grpc://%s-%s-0-0.%s:38681", pwWorkloadName, "leader", pwWorkloadName)}, + {Name: "JOBSET_NAME", ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{FieldPath: "metadata.annotations['jobset.sigs.k8s.io/jobset-name']"}}}, + }, + Ports: []corev1.ContainerPort{{ContainerPort: 9000}}, + Command: []string{"bash", "-c", "echo Start ; (JAX_TRACEBACK_FILTERING=off python3 MaxText/maxengine_server.py MaxText/configs/inference_jetstream.yml tokenizer_path=assets/tokenizer.llama2 load_parameters_path=gs://runner-maxtext-logs/2024-05-07-23-34/unscanned_chkpt/checkpoints/0/items max_prefill_predict_length=1024 max_target_length=2048 async_checkpointing=false model_name='llama2-70b' steps=1 ici_fsdp_parallelism=1 ici_autoregressive_parallelism=-1 ici_tensor_parallelism=1 scan_layers=false weight_dtype=bfloat16 per_device_batch_size=2); echo End; sleep infinity;"}, + // Resources: corev1.ResourceRequirements{ + // Limits: corev1.ResourceList{"google.com/tpu": resource.Quantity{i: 4}}, + // }, + // Resources: corev1.ResourceRequirements{Limits: {map[corev1.ResourceName]{cpu: "24", memory: 100G,},},}, + }, // end jetstream + + { + Name: "tester", + Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/maxtext_jax_stable:latest", + ImagePullPolicy: "Always", + SecurityContext: &corev1.SecurityContext{Privileged: &truth}, + Command: []string{"bash", "-c", "echo Start ;for i in {1..5}; do echo Sending request $i; python3 JetStream/jetstream/tools/requester.py --tokenizer assets/tokenizer.llama2 --max_tokens=16 --server=0.0.0.0 --text=\"why earth is round\"; EXIT_CODE=$?; echo Completed request; echo EXIT_CODE=$EXIT_CODE; if [[ $EXIT_CODE -ne 0 ]]; then break; fi; done; echo Last EXIT_CODE=$EXIT_CODE; echo End; sleep infinity;"}, + }, // end tester + + }, // end leader []containers + }, // end PodSpec }, }, }, - { - Name: "proxy", - Replicas: 1, - Template: batchv1.JobTemplateSpec{ - Spec: batchv1.JobSpec{ - BackoffLimit: ptr.To(int32(0)), - Completions: ptr.To(int32(1)), - Parallelism: ptr.To(int32(1)), - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - // TerminationGracePeriodSeconds: ptr.To(int64(30)), - Containers: []corev1.Container{ - { - Name: "pathways-proxy", - Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/proxy_server:latest", - ImagePullPolicy: "Always", - SecurityContext: &corev1.SecurityContext{Privileged: &truth}, - Args: []string{ - "--alsologtostderr", - "--v=0", - fmt.Sprintf("--pathways_ifrt_proxy_server_resource_manager=%s-%s-0-0.%s:38677", pwMessage, "rm", pwMessage), - "--pathways_ifrt_server_port=38676", - "--pathways_tmp_dir_pattern=gs://cloud-pathways-staging/tmp", - "--pathways_plaque_network=gcp", - }, - Ports: []corev1.ContainerPort{{ContainerPort: 38676}}, - VolumeMounts: []corev1.VolumeMount{ - { - Name: "shared-tmp", - MountPath: "/tmp", - }, + }, // end replicated Job + { + Name: "worker", + Replicas: int32(pw.Spec.NumSlices), + Template: batchv1.JobTemplateSpec{ + // ObjectMeta: metav1.ObjectMeta{ + // Annotations: map[string]string{"alpha.jobset.sigs.k8s.io/exclusive-topology": "cloud.google.com/gke-nodepool"}, + // }, + Spec: batchv1.JobSpec{ + BackoffLimit: ptr.To(int32(0)), + Completions: ptr.To(int32(2)), + Parallelism: ptr.To(int32(2)), + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + TerminationGracePeriodSeconds: ptr.To(int64(30)), + Containers: []corev1.Container{ + { + Name: "pathways-worker", + Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/server:latest", + ImagePullPolicy: "Always", + SecurityContext: &corev1.SecurityContext{Privileged: &truth}, + Args: []string{ + "--alsologtostderr", + "--pathways_server_port=38679", + fmt.Sprintf("--pathways_resource_manager=%s-%s-0-0.%s:38677", pwWorkloadName, "leader", pwWorkloadName), + "--pathways_persistent_compilation_cache=false", + "--pathways_compilation_mode=compile_at_worker", + "--xla_tpu_enable_data_parallel_all_reduce_opt=true", + "--xla_tpu_data_parallel_opt_different_sized_ops=true", + "--xla_tpu_enable_async_collective_fusion=true", + "--xla_tpu_enable_async_collective_fusion_fuse_all_gather=true", + "--xla_tpu_enable_async_collective_fusion_multiple_steps=true", + "--xla_tpu_overlap_compute_collective_tc=true", + "--xla_enable_async_all_gather=true", + fmt.Sprintf("--pathways_tmp_dir_pattern=%s", pw.Spec.PathwaysDir), + }, + Ports: []corev1.ContainerPort{{ContainerPort: 38679}, {ContainerPort: 38680}, {ContainerPort: 8471}, {ContainerPort: 8080}}, + VolumeMounts: []corev1.VolumeMount{ + { + Name: "shared-tmp", + MountPath: "/tmp", }, - // Resources: corev1.ResourceRequirements{ - // Limits: corev1.ResourceList{"google.com/tpu": resource.Quantity{i: 4}}, - // }, - // Resources: corev1.ResourceRequirements{Limits: {map[corev1.ResourceName]{cpu: "24", memory: 100G,},},}, }, + }, // end Pathways worker container + }, + NodeSelector: pw.Spec.PathwaysWorkerNodeSelector, + // NodeSelector: map[string]string{ + // "cloud.google.com/gke-tpu-accelerator": "tpu-v4-podslice", + // "cloud.google.com/gke-tpu-topology": "2x2x2"}, + // NodeSelector: map[string]string{"cloud.google.com/gke-tpu-accelerator": "tpu-v5-lite-podslice", "cloud.google.com/gke-tpu-topology": "4x4"}, + Tolerations: []corev1.Toleration{ + { + Key: "google.com/tpu", + Operator: "Exists", + Effect: "NoSchedule", }, - NodeSelector: map[string]string{"cloud.google.com/gke-nodepool": "cpu-proxy-np"}, - // Volumes: []corev1.Volume{ - // {Name: "shared-tmp", VolumeSource: corev1.VolumeSource{HostPath: &corev1.HostPathVolumeSource{Path: "/tmp", Type: &corev1.HostPathDirectoryOrCreate}}}, - // }, }, - }, - }, - }, - }, - { - Name: "main", - Replicas: 1, - Template: batchv1.JobTemplateSpec{ - Spec: batchv1.JobSpec{ - BackoffLimit: ptr.To(int32(0)), - Completions: ptr.To(int32(1)), - Parallelism: ptr.To(int32(1)), - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - // TerminationGracePeriodSeconds: ptr.To(int64(30)), - Containers: []corev1.Container{ - { - Name: "maxtext", - Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/maxtext_jax_stable:latest", - ImagePullPolicy: "Always", - SecurityContext: &corev1.SecurityContext{Privileged: &truth}, - Env: []corev1.EnvVar{ - {Name: "XCLOUD_ENVIRONMENT", Value: "GCP"}, - {Name: "JAX_PLATFORMS", Value: "proxy"}, - {Name: "JAX_BACKEND_TARGET", Value: fmt.Sprintf("grpc://%s-%s-0-0.%s:38676", pwMessage, "proxy", pwMessage)}, - {Name: "JOBSET_NAME", ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{FieldPath: "metadata.annotations['jobset.sigs.k8s.io/jobset-name']"}}}, - }, - VolumeMounts: []corev1.VolumeMount{ - { - Name: "shared-tmp", - MountPath: "/tmp", - }, + Volumes: []corev1.Volume{ + { + Name: "shared-tmp", + VolumeSource: corev1.VolumeSource{ + HostPath: &corev1.HostPathVolumeSource{ + Path: "/tmp", + Type: &volumeSourceType, }, - // Resources: corev1.ResourceRequirements{ - // Limits: corev1.ResourceList{"google.com/tpu": resource.Quantity{i: 4}}, - // }, - // Resources: corev1.ResourceRequirements{Limits: {map[corev1.ResourceName]{cpu: "24", memory: 100G,},},}, }, }, - NodeSelector: map[string]string{"cloud.google.com/gke-nodepool": "cpu-user-np"}, - // Volumes: []corev1.Volume{ - // {Name: "shared-tmp", VolumeSource: corev1.VolumeSource{HostPath: &corev1.HostPathVolumeSource{Path: "/tmp", Type: &corev1.HostPathDirectoryOrCreate}}}, - // }, - }, + }, // end Volumes }, }, }, }, - }, - SuccessPolicy: &jobsetv1alpha2.SuccessPolicy{ - Operator: "All", - TargetReplicatedJobs: []string{"main"}, - }, - FailurePolicy: &jobsetv1alpha2.FailurePolicy{ - MaxRestarts: 0, - }, + }, // end worker replicated job }, - }, metav1.CreateOptions{}) + }, + }, metav1.CreateOptions{}) - // js, err := client.JobsetV1alpha2().JobSets("default").Create(ctx, &jobsetv1alpha2.JobSet{ - // ObjectMeta: metav1.ObjectMeta{ - // Name: pwMessage, - // }, - // Spec: jobsetv1alpha2.JobSetSpec{ - // ReplicatedJobs: []jobsetv1alpha2.ReplicatedJob{ - // { - // Name: "rjob", - // Template: batchv1.JobTemplateSpec{ - // ObjectMeta: metav1.ObjectMeta{ - // Name: "job", - // }, - // Spec: batchv1.JobSpec{ - // Parallelism: ptr.To(numSlices), - // Completions: ptr.To(numSlices), - // BackoffLimit: ptr.To(int32(0)), - // Template: corev1.PodTemplateSpec{ - // Spec: corev1.PodSpec{ - // Containers: []corev1.Container{ - // { - // Name: "bash-container", - // Image: "bash:latest", - // Command: []string{"echo"}, - // Args: []string{"Hello"}, - // }, - // }, - // }, - // }, - // }, - // }, - // }, - // }, - // }, - // }, metav1.CreateOptions{}) - if err != nil { - panic(err) - } - log.Info("Roshani, created JobSet...") - fmt.Printf("successfully created JobSet: %s\n", js.Name) + if err != nil { + panic(err) } + log.Info("Roshani, created JobSet...") + fmt.Printf("Roshani successfully created JobSet: %s\n", js.Name) + return ctrl.Result{}, nil } diff --git a/internal/utils/container_configurations.go b/internal/utils/container_configurations.go new file mode 100644 index 00000000..c6fbc581 --- /dev/null +++ b/internal/utils/container_configurations.go @@ -0,0 +1,8 @@ +package utils + +// import ( +// corev1 "k8s.io/api/core/v1" +// metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +// ) + +// func (rm *corev1.Container) ResourceManagerContainer(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { diff --git a/internal/utils/extra_prototype.go b/internal/utils/extra_prototype.go new file mode 100644 index 00000000..b52e8b8b --- /dev/null +++ b/internal/utils/extra_prototype.go @@ -0,0 +1,43 @@ +package utils + +// RM AND PROXY SPEC- + +// { +// Name: "pathways-rm", +// Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/server:latest", +// ImagePullPolicy: "Always", +// SecurityContext: &corev1.SecurityContext{Privileged: &truth}, +// Args: []string{ +// "--alsologtostderr", +// "--pathways_server_port=38677", +// "--pathways_server_provides_devices=false", +// "--pathways_device_type=NONE", +// "--pathways_persistent_compilation_cache=false", +// "--pathways_compilation_mode=compile_at_worker", +// fmt.Sprintf("--pathways_tmp_dir_pattern=%s", pw.Spec.PathwaysDir), +// "--pathways_expected_instances=tpuv4:2x2x2", +// }, +// Env: []corev1.EnvVar{ +// {Name: "REPLICATED_JOB_NAME", ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{FieldPath: "metadata.annotations['jobset.sigs.k8s.io/replicatedjob-name']"}}}, +// {Name: "JOBSET_NAME", ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{FieldPath: "metadata.annotations['jobset.sigs.k8s.io/jobset-name']"}}}, +// {Name: "HOST_ADDRESS", Value: fmt.Sprintf("%s-%s-0-0.%s", pwWorkloadName, "leader", pwWorkloadName)}, +// {Name: "TPU_SKIP_MDS_QUERY", Value: "true"}, +// }, +// Ports: []corev1.ContainerPort{{ContainerPort: 38677}, {ContainerPort: 38678}}, +// }, // end pathways-rm + +// { +// Name: "pathways-proxy", +// Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/proxy_server:latest", +// ImagePullPolicy: "Always", +// SecurityContext: &corev1.SecurityContext{Privileged: &truth}, +// Args: []string{ +// "--alsologtostderr", +// "--v=0", +// fmt.Sprintf("--pathways_ifrt_proxy_server_resource_manager=%s-%s-0-0.%s:38677", pwWorkloadName, "leader", pwWorkloadName), +// "--pathways_ifrt_proxy_server_port=38681", +// fmt.Sprintf("--pathways_tmp_dir_pattern=%s", pw.Spec.PathwaysDir), +// "--pathways_plaque_network=gcp", +// }, +// Ports: []corev1.ContainerPort{{ContainerPort: 38681}, {ContainerPort: 38682}}, +// }, // end pathways-proxy From 4069742e71e10195d7f3fbadb31c1e8ea4740ae1 Mon Sep 17 00:00:00 2001 From: RoshaniN Date: Tue, 1 Oct 2024 01:06:44 +0000 Subject: [PATCH 09/32] Pathways JobSet Inference using JobSet client 2 - working after resource limits and removing tolerations. --- api/v1/pathwaysapi_types.go | 46 ++-- api/v1/zz_generated.deepcopy.go | 32 ++- ...ways-api.pathways.domain_pathwaysapis.yaml | 4 + config/rbac/role.yaml | 23 +- config/samples/jobset_example.yaml | 204 ++++++++++++++++++ .../samples/pathways-api_v1_pathwaysapi.yaml | 5 +- internal/controller/pathwaysapi_controller.go | 98 +++++---- internal/utils/container_configurations.go | 1 + internal/utils/extra_prototype.go | 5 + 9 files changed, 349 insertions(+), 69 deletions(-) create mode 100644 config/samples/jobset_example.yaml diff --git a/api/v1/pathwaysapi_types.go b/api/v1/pathwaysapi_types.go index 8c4dc4cc..51d08970 100644 --- a/api/v1/pathwaysapi_types.go +++ b/api/v1/pathwaysapi_types.go @@ -25,6 +25,27 @@ import ( // EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN! // NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized. +// +kubebuilder:object:root=true +// +kubebuilder:subresource:status + +// PathwaysAPI is the Schema for the pathwaysapis API +type PathwaysAPI struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec PathwaysAPISpec `json:"spec,omitempty"` + Status PathwaysAPIStatus `json:"status,omitempty"` +} + +// +kubebuilder:object:root=true + +// PathwaysAPIList contains a list of PathwaysAPI +type PathwaysAPIList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []PathwaysAPI `json:"items"` +} + // PathwaysCluster creates a Pathways workload. It sets up the TPU // workers needed for training or inference, along with Pathways // resources such as the Pathways Resource Manager(RM) and Proxy @@ -33,6 +54,7 @@ import ( // a Pod. If this pod is not provided, then the workload is assumed // to be running in headless mode and the user can connect to Proxy, // to run their workloads. + type PathwaysAPISpec struct { // INSERT ADDITIONAL SPEC FIELDS - desired state of cluster // Important: Run "make" to regenerate code after modifying this file @@ -48,6 +70,9 @@ type PathwaysAPISpec struct { // such as RM and proxy should be deployed. PathwaysControllerNodeSelector map[string]string `json:"pathwaysControllerNodeSelector,omitempty"` + // Maximum number of times the JobSet is restarted. + MaxRestarts int32 `json:"maxRestarts,omitempty"` + // Number of TPU slices requested for the Pathways workers. NumSlices int32 `json:"numSlices,omitempty"` @@ -79,27 +104,6 @@ type PathwaysAPIStatus struct { WorkloadState string `json:"workloadState,omitempty"` } -// +kubebuilder:object:root=true -// +kubebuilder:subresource:status - -// PathwaysAPI is the Schema for the pathwaysapis API -type PathwaysAPI struct { - metav1.TypeMeta `json:",inline"` - metav1.ObjectMeta `json:"metadata,omitempty"` - - Spec PathwaysAPISpec `json:"spec,omitempty"` - Status PathwaysAPIStatus `json:"status,omitempty"` -} - -// +kubebuilder:object:root=true - -// PathwaysAPIList contains a list of PathwaysAPI -type PathwaysAPIList struct { - metav1.TypeMeta `json:",inline"` - metav1.ListMeta `json:"metadata,omitempty"` - Items []PathwaysAPI `json:"items"` -} - func init() { SchemeBuilder.Register(&PathwaysAPI{}, &PathwaysAPIList{}) } diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go index 506b4f2d..5f426282 100644 --- a/api/v1/zz_generated.deepcopy.go +++ b/api/v1/zz_generated.deepcopy.go @@ -21,6 +21,8 @@ limitations under the License. package v1 import ( + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" runtime "k8s.io/apimachinery/pkg/runtime" ) @@ -29,8 +31,8 @@ func (in *PathwaysAPI) DeepCopyInto(out *PathwaysAPI) { *out = *in out.TypeMeta = in.TypeMeta in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) - out.Spec = in.Spec - out.Status = in.Status + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PathwaysAPI. @@ -86,6 +88,25 @@ func (in *PathwaysAPIList) DeepCopyObject() runtime.Object { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *PathwaysAPISpec) DeepCopyInto(out *PathwaysAPISpec) { *out = *in + if in.PathwaysWorkerNodeSelector != nil { + in, out := &in.PathwaysWorkerNodeSelector, &out.PathwaysWorkerNodeSelector + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + if in.PathwaysControllerNodeSelector != nil { + in, out := &in.PathwaysControllerNodeSelector, &out.PathwaysControllerNodeSelector + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + if in.UserPodTemplate != nil { + in, out := &in.UserPodTemplate, &out.UserPodTemplate + *out = new(corev1.PodTemplateSpec) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PathwaysAPISpec. @@ -101,6 +122,13 @@ func (in *PathwaysAPISpec) DeepCopy() *PathwaysAPISpec { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *PathwaysAPIStatus) DeepCopyInto(out *PathwaysAPIStatus) { *out = *in + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]metav1.Condition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PathwaysAPIStatus. diff --git a/config/crd/bases/pathways-api.pathways.domain_pathwaysapis.yaml b/config/crd/bases/pathways-api.pathways.domain_pathwaysapis.yaml index 24b23a32..9ca4d0fb 100644 --- a/config/crd/bases/pathways-api.pathways.domain_pathwaysapis.yaml +++ b/config/crd/bases/pathways-api.pathways.domain_pathwaysapis.yaml @@ -47,6 +47,10 @@ spec: to be running in headless mode and the user can connect to Proxy, to run their workloads. properties: + maxRestarts: + description: Maximum number of times the JobSet is restarted. + format: int32 + type: integer numSlices: description: Number of TPU slices requested for the Pathways workers. format: int32 diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 656d3a4e..db9afaff 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -30,6 +30,23 @@ rules: - get - patch - update -- apiGroups: ["leaderworkerset.x-k8s.io"] - resources: ["leaderworkersets"] - verbs: ["create", "update", "delete"] +- apiGroups: + - "jobset.x-k8s.io" + resources: + - jobsets + verbs: + - get + - list + - watch + - create + - update + - patch + - delete +- apiGroups: + - jobset.x-k8s.io + resources: + - jobsets/status + verbs: + - get + - patch + - update diff --git a/config/samples/jobset_example.yaml b/config/samples/jobset_example.yaml new file mode 100644 index 00000000..6edd91b7 --- /dev/null +++ b/config/samples/jobset_example.yaml @@ -0,0 +1,204 @@ +apiVersion: jobset.x-k8s.io/v1alpha2 +kind: JobSet +metadata: + name: pathways-jobset-inference + # annotations: + # alpha.jobset.sigs.k8s.io/exclusive-topology: cloud.google.com/gke-nodepool # 1:1 job replica to node pool assignment +spec: + failurePolicy: + maxRestarts: 4 # The set will be restarted on failures up to 4 times. + replicatedJobs: + - name: leader # Part of the name of the child Jobs () + replicas: 1 # Replicas of the Pathways Resource Manager, Proxy, JetStream and Tester. Should always be 1. + template: + spec: # JobSpec + parallelism: 1 # Must be set to number of nodes in each node pool + completions: 1 # Must be set to number of nodes in each node pool + backoffLimit: 0 # Must be set to 0. Fail the job when any pod fails. + template: + spec: + affinity: + podAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: jobset.sigs.k8s.io/jobset-name + operator: In + values: + - pathways-jobset-inference + topologyKey: cloud.google.com/gke-nodepool + podAntiAffinity: # ensures only this job lands on the rack + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: jobset.sigs.k8s.io/jobset-name + operator: NotIn + values: + - pathways-jobset-inference + - key: job-name + operator: Exists + namespaceSelector: {} + topologyKey: cloud.google.com/gke-nodepool + nodeSelector: + cloud.google.com/gke-tpu-accelerator: tpu-v4-podslice + cloud.google.com/gke-tpu-topology: 2x2x2 + tolerations: + - effect: NoSchedule + key: google.com/tpu + operator: Exists + # nodeSelector: + # cloud.google.com/gke-nodepool: cpu-user-np + volumes: + - name: shared-tmp + hostPath: + path: /tmp + type: DirectoryOrCreate + containers: + - name: pathways-rm + image: us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/server:latest + imagePullPolicy: Always # Sometimes k8s was reusing the old image + args: + - --alsologtostderr + - --pathways_server_port=38677 + - --pathways_server_provides_devices=false + - --pathways_device_type=NONE + - --pathways_persistent_compilation_cache=false + - --pathways_compilation_mode=compile_at_worker + - --pathways_tmp_dir_pattern=gs://cloud-pathways-staging/tmp + - --pathways_expected_instances=tpuv4:2x2x2 + env: + - name: TPU_SKIP_MDS_QUERY + value: "true" + - name: REPLICATED_JOB_NAME + valueFrom: + fieldRef: + fieldPath: metadata.annotations['jobset.sigs.k8s.io/replicatedjob-name'] + - name: JOBSET_NAME + valueFrom: + fieldRef: + fieldPath: metadata.annotations['jobset.sigs.k8s.io/jobset-name'] + - name: HOST_ADDRESS + value: $(JOBSET_NAME)-$(REPLICATED_JOB_NAME)-0-0.$(JOBSET_NAME) + ports: + - containerPort: 38677 + - containerPort: 38678 + resources: + limits: + cpu: "4" + memory: "8G" + securityContext: + privileged: true + - name: pathways-proxy + args: + - --alsologtostderr + - --v=0 + - --pathways_ifrt_proxy_server_resource_manager=pathways-jobset-inference-leader-0-0.pathways-jobset-inference:38677 + # - --pathways_ifrt_proxy_server_resource_manager=localhost:38677 + - --pathways_ifrt_proxy_server_port=38681 + - --pathways_tmp_dir_pattern=gs://cloud-pathways-staging/tmp + - --pathways_plaque_network=gcp + image: us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/proxy_server:latest + imagePullPolicy: Always + ports: + - containerPort: 38681 + - containerPort: 38682 + resources: + limits: + cpu: "4" + memory: 10G + securityContext: + privileged: true + - name: jetstream + image: us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/maxtext_jax_stable:latest + imagePullPolicy: Always + ports: + - containerPort: 9000 + env: + - name: XCLOUD_ENVIRONMENT + value: GCP + - name: JAX_PLATFORMS + value: proxy + - name: JAX_BACKEND_TARGET + value: grpc://pathways-jobset-inference-leader-0-0.pathways-jobset-inference:38681 + # value: grpc://localhost:38681 + command: + - bash + - -c + - 'echo Start: $(date); _sigterm() ( kill -SIGTERM $! 2>/dev/null;); trap + _sigterm SIGTERM; (JAX_TRACEBACK_FILTERING=off python3 MaxText/maxengine_server.py + MaxText/configs/inference_jetstream.yml tokenizer_path=assets/tokenizer.llama2 + load_parameters_path=gs://runner-maxtext-logs/2024-05-07-23-34/unscanned_chkpt/checkpoints/0/items + max_prefill_predict_length=1024 max_target_length=2048 async_checkpointing=false + model_name=''llama2-70b'' steps=1 ici_fsdp_parallelism=1 ici_autoregressive_parallelism=-1 + ici_tensor_parallelism=1 scan_layers=false weight_dtype=bfloat16 per_device_batch_size=2) + & PID=$!; while kill -0 $PID 2>/dev/null; do sleep 5; done; wait $PID; + EXIT_CODE=$? echo EXIT_CODE=$EXIT_CODE; echo End sleep: $(date); sleep + infinity;' + - name: tester + image: us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/maxtext_jax_stable:latest + imagePullPolicy: Always + env: null + command: + - bash + - -c + - 'echo Start: $(date); _sigterm() ( kill -SIGTERM $! 2>/dev/null;); trap + _sigterm SIGTERM; for i in {1..5}; do echo Sending request $i; time python3 + JetStream/jetstream/tools/requester.py --tokenizer assets/tokenizer.llama2 + --max_tokens=16 --server=0.0.0.0 --text="why earth is round"; EXIT_CODE=$?; + echo Completed request; echo EXIT_CODE=$EXIT_CODE; if [[ $EXIT_CODE -ne + 0 ]]; then break; fi; done; echo Last EXIT_CODE=$EXIT_CODE; echo End sleep: + $(date); sleep infinity;' + securityContext: + privileged: true + - name: worker # Part of the name of the child Jobs () + replicas: 1 # Number of slices + template: + spec: + parallelism: 2 # Must be set to number of nodes in each node pool + completions: 2 # Must be set to number of nodes in each node pool + backoffLimit: 0 # Must be set to 0. Fail the job when any pod fails. + template: + spec: + nodeSelector: + cloud.google.com/gke-tpu-accelerator: tpu-v4-podslice + cloud.google.com/gke-tpu-topology: 2x2x2 + volumes: + - name: shared-tmp + hostPath: + path: /tmp + type: DirectoryOrCreate + containers: + - name: pathways-worker + securityContext: + privileged: true + image: us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/server:latest + imagePullPolicy: Always # Sometimes k8s was reusing the old image + env: + - name: TPU_MIN_LOG_LEVEL + value: "0" + - name: TF_CPP_MIN_LOG_LEVEL + value: "0" + - name: XCLOUD_ENVIRONMENT + value: GCP + args: + - --alsologtostderr + - --pathways_server_port=38679 # changed to not match rm port + - --pathways_resource_manager=pathways-jobset-inference-leader-0-0.pathways-jobset-inference:38677 + - --pathways_persistent_compilation_cache=false + - --pathways_compilation_mode=compile_at_worker + - --xla_tpu_enable_data_parallel_all_reduce_opt=true + - --xla_tpu_data_parallel_opt_different_sized_ops=true + - --xla_tpu_enable_async_collective_fusion=true + - --xla_tpu_enable_async_collective_fusion_fuse_all_gather=true + - --xla_tpu_enable_async_collective_fusion_multiple_steps=true + - --xla_tpu_overlap_compute_collective_tc=true + - --xla_enable_async_all_gather=true + - --pathways_tmp_dir_pattern=gs://cloud-pathways-staging/tmp + ports: + - containerPort: 38679 + - containerPort: 38680 + - containerPort: 8471 + - containerPort: 8080 + resources: + limits: + google.com/tpu: 4 # Number of TPU chips per worker diff --git a/config/samples/pathways-api_v1_pathwaysapi.yaml b/config/samples/pathways-api_v1_pathwaysapi.yaml index a8ed09e8..fbbbd5d2 100644 --- a/config/samples/pathways-api_v1_pathwaysapi.yaml +++ b/config/samples/pathways-api_v1_pathwaysapi.yaml @@ -6,7 +6,7 @@ metadata: app.kubernetes.io/managed-by: kustomize name: pathwaysapi-sample spec: - workloadName: "roshani-in-8" + workloadName: "roshani-in-20" pathwaysWorkerNodeSelector: cloud.google.com/gke-tpu-accelerator: tpu-v4-podslice cloud.google.com/gke-tpu-topology: 2x2x2 @@ -15,4 +15,5 @@ spec: cloud.google.com/gke-tpu-topology: 2x2x2 numSlices: 1 pathwaysDir: "gs://cloud-pathways-staging/tmp" - # add a spec, to pass as a workload + + diff --git a/internal/controller/pathwaysapi_controller.go b/internal/controller/pathwaysapi_controller.go index 6a8453e2..b95d6651 100644 --- a/internal/controller/pathwaysapi_controller.go +++ b/internal/controller/pathwaysapi_controller.go @@ -20,13 +20,13 @@ import ( "context" "fmt" + "k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" "k8s.io/klog/v2" "k8s.io/utils/ptr" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/log" batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" @@ -35,11 +35,6 @@ import ( jobsetv1alpha2 "sigs.k8s.io/jobset/api/jobset/v1alpha2" jobsetclient "sigs.k8s.io/jobset/client-go/clientset/versioned" - // jobsetv1alpha2 "sigs.k8s.io/jobset/api/jobset/v1alpha2" - // jobsetclient "sigs.k8s.io/jobset/client-go/clientset/versioned" - // leaderworkersetv1 "sigs.k8s.io/lws/api/leaderworkerset/v1" - // lwsclient "sigs.k8s.io/lws/client-go/clientset/versioned" - pathwaysapi "pathways-api/api/v1" ) @@ -62,29 +57,38 @@ type PathwaysAPIReconciler struct { // +kubebuilder:rbac:groups=pathways-api.pathways.domain,resources=pathwaysapis,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=pathways-api.pathways.domain,resources=pathwaysapis/status,verbs=get;update;patch // +kubebuilder:rbac:groups=pathways-api.pathways.domain,resources=pathwaysapis/finalizers,verbs=update -// +kubebuilder:rbac:groups=leaderworkerset.x-k8s.io,resources=leaderworkersets,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups=leaderworkerset.x-k8s.io,resources=leaderworkersets/status,verbs=get;update;patch +// +kubebuilder:rbac:groups=jobset.x-k8s.io,resources=jobsets,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=jobset.x-k8s.io,resources=jobsets/status,verbs=get;update;patch func (r *PathwaysAPIReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { - _ = log.FromContext(ctx) - - // TODO(user): your logic here pw := &pathwaysapi.PathwaysAPI{} + log := ctrl.LoggerFrom(ctx).WithValues("pathwaysapi", klog.KObj(pw)) if err := r.Get(ctx, types.NamespacedName{Name: req.Name, Namespace: req.Namespace}, pw); err != nil { - // log.Error(err, "unable to fetch Pathways ") + log.Info("Unable to fetch Pathways ") return ctrl.Result{}, client.IgnoreNotFound(err) } + if err := r.createJobSet(ctx, pw); err != nil { + log.Error(err, "Roshani, failed to create JobSet \n") + return ctrl.Result{}, err + } + return ctrl.Result{}, nil +} + +func (r *PathwaysAPIReconciler) createJobSet(ctx context.Context, pw *pathwaysapi.PathwaysAPI) error { log := ctrl.LoggerFrom(ctx).WithValues("pathwaysapi", klog.KObj(pw)) pwWorkloadName := pw.Spec.WorkloadName ctx = ctrl.LoggerInto(ctx, log) - log.Info("ROSHANI CONTROLLER WORKING...", "WorkloadName ", pwWorkloadName, " NumSlices ", pw.Spec.NumSlices) + log.Info("ROSHANI CONTROLLER WORKING...", "WorkloadName ", pwWorkloadName, " NumSlices ", pw.Spec.NumSlices, "WorkerNodeSelector", pw.Spec.PathwaysWorkerNodeSelector) kubeconfig := ctrl.GetConfigOrDie() log.Info("Roshani, config established...") + // Some predefined variables truth := true volumeSourceType := corev1.HostPathDirectoryOrCreate + replicatedJobName := &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{FieldPath: "metadata.annotations['jobset.sigs.k8s.io/replicatedjob-name']"}} + jobsetName := &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{FieldPath: "metadata.annotations['jobset.sigs.k8s.io/jobset-name']"}} RMContainerSpec := corev1.Container{ Name: "pathways-rm", @@ -102,12 +106,13 @@ func (r *PathwaysAPIReconciler) Reconcile(ctx context.Context, req ctrl.Request) "--pathways_expected_instances=tpuv4:2x2x2", }, Env: []corev1.EnvVar{ - {Name: "REPLICATED_JOB_NAME", ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{FieldPath: "metadata.annotations['jobset.sigs.k8s.io/replicatedjob-name']"}}}, - {Name: "JOBSET_NAME", ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{FieldPath: "metadata.annotations['jobset.sigs.k8s.io/jobset-name']"}}}, + {Name: "REPLICATED_JOB_NAME", ValueFrom: replicatedJobName}, + {Name: "JOBSET_NAME", ValueFrom: jobsetName}, {Name: "HOST_ADDRESS", Value: fmt.Sprintf("%s-%s-0-0.%s", pwWorkloadName, "leader", pwWorkloadName)}, {Name: "TPU_SKIP_MDS_QUERY", Value: "true"}, }, - Ports: []corev1.ContainerPort{{ContainerPort: 38677}, {ContainerPort: 38678}}, + Ports: []corev1.ContainerPort{{ContainerPort: 38677}, {ContainerPort: 38678}}, + Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{"cpu": *resource.NewQuantity(4, resource.DecimalSI), "memory": *resource.NewQuantity(8000000000, resource.DecimalSI)}}, } ProxyContainerSpec := corev1.Container{ @@ -123,15 +128,15 @@ func (r *PathwaysAPIReconciler) Reconcile(ctx context.Context, req ctrl.Request) fmt.Sprintf("--pathways_tmp_dir_pattern=%s", pw.Spec.PathwaysDir), "--pathways_plaque_network=gcp", }, - Ports: []corev1.ContainerPort{{ContainerPort: 38681}, {ContainerPort: 38682}}, + Ports: []corev1.ContainerPort{{ContainerPort: 38681}, {ContainerPort: 38682}}, + Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{"cpu": *resource.NewQuantity(4, resource.DecimalSI), "memory": *resource.NewQuantity(10000000000, resource.DecimalSI)}}, } - fmt.Printf("Replicas: %d \n", pw.Spec.NumSlices) - // // Pathways Spec + JobSet for batch inference ------ client := jobsetclient.NewForConfigOrDie(kubeconfig) log.Info("Roshani, client built for JobSet...") - js, err := client.JobsetV1alpha2().JobSets("default").Create(ctx, &jobsetv1alpha2.JobSet{ + + mainJobSetConfig := jobsetv1alpha2.JobSet{ ObjectMeta: metav1.ObjectMeta{ Name: pwWorkloadName, }, @@ -211,10 +216,9 @@ func (r *PathwaysAPIReconciler) Reconcile(ctx context.Context, req ctrl.Request) Containers: []corev1.Container{ RMContainerSpec, ProxyContainerSpec, - { Name: "jetstream", - Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/maxtext_jax_stable:latest", + Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/maxtext_jax_stable:latest", // revert to stable ImagePullPolicy: "Always", SecurityContext: &corev1.SecurityContext{Privileged: &truth}, Env: []corev1.EnvVar{ @@ -233,7 +237,7 @@ func (r *PathwaysAPIReconciler) Reconcile(ctx context.Context, req ctrl.Request) { Name: "tester", - Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/maxtext_jax_stable:latest", + Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/maxtext_jax_stable:latest", // revert to stable ImagePullPolicy: "Always", SecurityContext: &corev1.SecurityContext{Privileged: &truth}, Command: []string{"bash", "-c", "echo Start ;for i in {1..5}; do echo Sending request $i; python3 JetStream/jetstream/tools/requester.py --tokenizer assets/tokenizer.llama2 --max_tokens=16 --server=0.0.0.0 --text=\"why earth is round\"; EXIT_CODE=$?; echo Completed request; echo EXIT_CODE=$EXIT_CODE; if [[ $EXIT_CODE -ne 0 ]]; then break; fi; done; echo Last EXIT_CODE=$EXIT_CODE; echo End; sleep infinity;"}, @@ -258,7 +262,6 @@ func (r *PathwaysAPIReconciler) Reconcile(ctx context.Context, req ctrl.Request) Parallelism: ptr.To(int32(2)), Template: corev1.PodTemplateSpec{ Spec: corev1.PodSpec{ - TerminationGracePeriodSeconds: ptr.To(int64(30)), Containers: []corev1.Container{ { Name: "pathways-worker", @@ -280,6 +283,11 @@ func (r *PathwaysAPIReconciler) Reconcile(ctx context.Context, req ctrl.Request) "--xla_enable_async_all_gather=true", fmt.Sprintf("--pathways_tmp_dir_pattern=%s", pw.Spec.PathwaysDir), }, + Env: []corev1.EnvVar{ + {Name: "TPU_MIN_LOG_LEVEL", Value: "0"}, + {Name: "TF_CPP_MIN_LOG_LEVEL", Value: "0"}, + {Name: "XCLOUD_ENVIRONMENT", Value: "GCP"}, + }, Ports: []corev1.ContainerPort{{ContainerPort: 38679}, {ContainerPort: 38680}, {ContainerPort: 8471}, {ContainerPort: 8080}}, VolumeMounts: []corev1.VolumeMount{ { @@ -287,20 +295,17 @@ func (r *PathwaysAPIReconciler) Reconcile(ctx context.Context, req ctrl.Request) MountPath: "/tmp", }, }, + Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{"google.com/tpu": *resource.NewQuantity(4, resource.DecimalSI)}}, }, // end Pathways worker container }, NodeSelector: pw.Spec.PathwaysWorkerNodeSelector, - // NodeSelector: map[string]string{ - // "cloud.google.com/gke-tpu-accelerator": "tpu-v4-podslice", - // "cloud.google.com/gke-tpu-topology": "2x2x2"}, - // NodeSelector: map[string]string{"cloud.google.com/gke-tpu-accelerator": "tpu-v5-lite-podslice", "cloud.google.com/gke-tpu-topology": "4x4"}, - Tolerations: []corev1.Toleration{ - { - Key: "google.com/tpu", - Operator: "Exists", - Effect: "NoSchedule", - }, - }, + // Tolerations: []corev1.Toleration{ + // { + // Key: "google.com/tpu", + // Operator: "Exists", + // Effect: "NoSchedule", + // }, + // }, //unschedulable without this and resources Volumes: []corev1.Volume{ { Name: "shared-tmp", @@ -319,21 +324,32 @@ func (r *PathwaysAPIReconciler) Reconcile(ctx context.Context, req ctrl.Request) }, // end worker replicated job }, }, - }, metav1.CreateOptions{}) + } + + // Set Pathways controller as the owner of the JobSet for garbage collection. + if err := ctrl.SetControllerReference(pw, &mainJobSetConfig, r.Scheme); err != nil { + log.Info("Roshani, failed to set Pathways as owner of JobSet.", "error ", err) // - clearly not working + // return err + } else { + log.Info("Roshani, successfully set Pathways as owner of JobSet.") + } + + js, err := client.JobsetV1alpha2().JobSets("default").Create(ctx, &mainJobSetConfig, metav1.CreateOptions{}) if err != nil { - panic(err) + log.Info("Roshani, failed to create JobSet: ", "JobSet name", js.Name) + return err + } else { + log.Info("Roshani, successfully created JobSet: ", "JobSet name", js.Name) } - log.Info("Roshani, created JobSet...") - fmt.Printf("Roshani successfully created JobSet: %s\n", js.Name) - return ctrl.Result{}, nil + return nil } // SetupWithManager sets up the controller with the Manager. func (r *PathwaysAPIReconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). For(&pathwaysapi.PathwaysAPI{}). - // For JobSet and LWS + // Owns(&jobsetv1alpha2.JobSet{}). // For JobSet Complete(r) } diff --git a/internal/utils/container_configurations.go b/internal/utils/container_configurations.go index c6fbc581..2809f306 100644 --- a/internal/utils/container_configurations.go +++ b/internal/utils/container_configurations.go @@ -6,3 +6,4 @@ package utils // ) // func (rm *corev1.Container) ResourceManagerContainer(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { +// } diff --git a/internal/utils/extra_prototype.go b/internal/utils/extra_prototype.go index b52e8b8b..d5260fb6 100644 --- a/internal/utils/extra_prototype.go +++ b/internal/utils/extra_prototype.go @@ -41,3 +41,8 @@ package utils // }, // Ports: []corev1.ContainerPort{{ContainerPort: 38681}, {ContainerPort: 38682}}, // }, // end pathways-proxy + +// NodeSelector: map[string]string{ +// "cloud.google.com/gke-tpu-accelerator": "tpu-v4-podslice", +// "cloud.google.com/gke-tpu-topology": "2x2x2"}, +// NodeSelector: map[string]string{"cloud.google.com/gke-tpu-accelerator": "tpu-v5-lite-podslice", "cloud.google.com/gke-tpu-topology": "4x4"}, From 2893b0e0fb812f2a0617a7fc61d2727df20f43b9 Mon Sep 17 00:00:00 2001 From: RoshaniN Date: Tue, 1 Oct 2024 21:26:04 +0000 Subject: [PATCH 10/32] Adding utils - moved RM, Proxy container specs and affinity to utils. --- ...ways-api.pathways.domain_pathwaysapis.yaml | 9 -- internal/controller/pathwaysapi_controller.go | 126 +++--------------- internal/utils/container_configurations.go | 9 -- internal/utils/jobset_configurations.go | 103 ++++++++++++++ 4 files changed, 124 insertions(+), 123 deletions(-) delete mode 100644 internal/utils/container_configurations.go create mode 100644 internal/utils/jobset_configurations.go diff --git a/config/crd/bases/pathways-api.pathways.domain_pathwaysapis.yaml b/config/crd/bases/pathways-api.pathways.domain_pathwaysapis.yaml index 9ca4d0fb..f4f31945 100644 --- a/config/crd/bases/pathways-api.pathways.domain_pathwaysapis.yaml +++ b/config/crd/bases/pathways-api.pathways.domain_pathwaysapis.yaml @@ -37,15 +37,6 @@ spec: metadata: type: object spec: - description: |- - PathwaysCluster creates a Pathways workload. It sets up the TPU - workers needed for training or inference, along with Pathways - resources such as the Pathways Resource Manager(RM) and Proxy - server at the specifiec controller node location. It provides - an option to deploy a user workload and other containers within - a Pod. If this pod is not provided, then the workload is assumed - to be running in headless mode and the user can connect to Proxy, - to run their workloads. properties: maxRestarts: description: Maximum number of times the JobSet is restarted. diff --git a/internal/controller/pathwaysapi_controller.go b/internal/controller/pathwaysapi_controller.go index b95d6651..d1abbed5 100644 --- a/internal/controller/pathwaysapi_controller.go +++ b/internal/controller/pathwaysapi_controller.go @@ -36,6 +36,7 @@ import ( jobsetclient "sigs.k8s.io/jobset/client-go/clientset/versioned" pathwaysapi "pathways-api/api/v1" + utils "pathways-api/internal/utils" ) // PathwaysAPIReconciler reconciles a PathwaysAPI object @@ -62,24 +63,32 @@ type PathwaysAPIReconciler struct { func (r *PathwaysAPIReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { pw := &pathwaysapi.PathwaysAPI{} log := ctrl.LoggerFrom(ctx).WithValues("pathwaysapi", klog.KObj(pw)) + + // 1. Fetch the object if err := r.Get(ctx, types.NamespacedName{Name: req.Name, Namespace: req.Namespace}, pw); err != nil { log.Info("Unable to fetch Pathways ") return ctrl.Result{}, client.IgnoreNotFound(err) } + + // 2. Process the object + + // 3. Update the cluster - create update and delete other resources if err := r.createJobSet(ctx, pw); err != nil { log.Error(err, "Roshani, failed to create JobSet \n") return ctrl.Result{}, err } + + //4. Update the object's status using Conditions + + //5. Return a result return ctrl.Result{}, nil } func (r *PathwaysAPIReconciler) createJobSet(ctx context.Context, pw *pathwaysapi.PathwaysAPI) error { log := ctrl.LoggerFrom(ctx).WithValues("pathwaysapi", klog.KObj(pw)) - pwWorkloadName := pw.Spec.WorkloadName - ctx = ctrl.LoggerInto(ctx, log) - log.Info("ROSHANI CONTROLLER WORKING...", "WorkloadName ", pwWorkloadName, " NumSlices ", pw.Spec.NumSlices, "WorkerNodeSelector", pw.Spec.PathwaysWorkerNodeSelector) + log.Info("ROSHANI CONTROLLER WORKING...", "WorkloadName ", pw.Spec.WorkloadName, " NumSlices ", pw.Spec.NumSlices, "WorkerNodeSelector", pw.Spec.PathwaysWorkerNodeSelector) kubeconfig := ctrl.GetConfigOrDie() log.Info("Roshani, config established...") @@ -87,50 +96,10 @@ func (r *PathwaysAPIReconciler) createJobSet(ctx context.Context, pw *pathwaysap // Some predefined variables truth := true volumeSourceType := corev1.HostPathDirectoryOrCreate - replicatedJobName := &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{FieldPath: "metadata.annotations['jobset.sigs.k8s.io/replicatedjob-name']"}} - jobsetName := &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{FieldPath: "metadata.annotations['jobset.sigs.k8s.io/jobset-name']"}} - RMContainerSpec := corev1.Container{ - Name: "pathways-rm", - Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/server:latest", - ImagePullPolicy: "Always", - SecurityContext: &corev1.SecurityContext{Privileged: &truth}, - Args: []string{ - "--alsologtostderr", - "--pathways_server_port=38677", - "--pathways_server_provides_devices=false", - "--pathways_device_type=NONE", - "--pathways_persistent_compilation_cache=false", - "--pathways_compilation_mode=compile_at_worker", - fmt.Sprintf("--pathways_tmp_dir_pattern=%s", pw.Spec.PathwaysDir), - "--pathways_expected_instances=tpuv4:2x2x2", - }, - Env: []corev1.EnvVar{ - {Name: "REPLICATED_JOB_NAME", ValueFrom: replicatedJobName}, - {Name: "JOBSET_NAME", ValueFrom: jobsetName}, - {Name: "HOST_ADDRESS", Value: fmt.Sprintf("%s-%s-0-0.%s", pwWorkloadName, "leader", pwWorkloadName)}, - {Name: "TPU_SKIP_MDS_QUERY", Value: "true"}, - }, - Ports: []corev1.ContainerPort{{ContainerPort: 38677}, {ContainerPort: 38678}}, - Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{"cpu": *resource.NewQuantity(4, resource.DecimalSI), "memory": *resource.NewQuantity(8000000000, resource.DecimalSI)}}, - } - - ProxyContainerSpec := corev1.Container{ - Name: "pathways-proxy", - Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/proxy_server:latest", - ImagePullPolicy: "Always", - SecurityContext: &corev1.SecurityContext{Privileged: &truth}, - Args: []string{ - "--alsologtostderr", - "--v=0", - fmt.Sprintf("--pathways_ifrt_proxy_server_resource_manager=%s-%s-0-0.%s:38677", pwWorkloadName, "leader", pwWorkloadName), - "--pathways_ifrt_proxy_server_port=38681", - fmt.Sprintf("--pathways_tmp_dir_pattern=%s", pw.Spec.PathwaysDir), - "--pathways_plaque_network=gcp", - }, - Ports: []corev1.ContainerPort{{ContainerPort: 38681}, {ContainerPort: 38682}}, - Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{"cpu": *resource.NewQuantity(4, resource.DecimalSI), "memory": *resource.NewQuantity(10000000000, resource.DecimalSI)}}, - } + RMContainerSpec, _ := utils.MakeResourceManagerContainer(pw) + ProxyContainerSpec, _ := utils.MakeProxyContainer(pw) + affinitySpec, _ := utils.MakePodAffinityRules(pw) // // Pathways Spec + JobSet for batch inference ------ client := jobsetclient.NewForConfigOrDie(kubeconfig) @@ -138,7 +107,7 @@ func (r *PathwaysAPIReconciler) createJobSet(ctx context.Context, pw *pathwaysap mainJobSetConfig := jobsetv1alpha2.JobSet{ ObjectMeta: metav1.ObjectMeta{ - Name: pwWorkloadName, + Name: pw.Spec.WorkloadName, }, Spec: jobsetv1alpha2.JobSetSpec{ FailurePolicy: &jobsetv1alpha2.FailurePolicy{ @@ -155,45 +124,7 @@ func (r *PathwaysAPIReconciler) createJobSet(ctx context.Context, pw *pathwaysap Parallelism: ptr.To(int32(1)), Template: corev1.PodTemplateSpec{ Spec: corev1.PodSpec{ - // TerminationGracePeriodSeconds: ptr.To(int64(30)), - Affinity: &corev1.Affinity{ - PodAffinity: &corev1.PodAffinity{ - RequiredDuringSchedulingIgnoredDuringExecution: []corev1.PodAffinityTerm{ - { - LabelSelector: &metav1.LabelSelector{ - MatchExpressions: []metav1.LabelSelectorRequirement{ - { - Key: "jobset.sigs.k8s.io/jobset-name", - Operator: metav1.LabelSelectorOpIn, - Values: []string{pwWorkloadName}, - }, - }, - }, - TopologyKey: "cloud.google.com/gke-nodepool", - }, - }, - }, // end PodAffinity - PodAntiAffinity: &corev1.PodAntiAffinity{ - RequiredDuringSchedulingIgnoredDuringExecution: []corev1.PodAffinityTerm{ - { - LabelSelector: &metav1.LabelSelector{ - MatchExpressions: []metav1.LabelSelectorRequirement{ - { - Key: "jobset.sigs.k8s.io/jobset-name", - Operator: metav1.LabelSelectorOpNotIn, - Values: []string{pwWorkloadName}, - }, - { - Key: "job-name", - Operator: metav1.LabelSelectorOpExists, - }, - }, - }, - TopologyKey: "cloud.google.com/gke-nodepool", - }, - }, - }, // end PodAntiAffinity - }, // end Affinity + Affinity: affinitySpec, NodeSelector: pw.Spec.PathwaysControllerNodeSelector, Tolerations: []corev1.Toleration{ { @@ -214,8 +145,8 @@ func (r *PathwaysAPIReconciler) createJobSet(ctx context.Context, pw *pathwaysap }, }, // end Volumes Containers: []corev1.Container{ - RMContainerSpec, - ProxyContainerSpec, + *RMContainerSpec, + *ProxyContainerSpec, { Name: "jetstream", Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/maxtext_jax_stable:latest", // revert to stable @@ -224,15 +155,11 @@ func (r *PathwaysAPIReconciler) createJobSet(ctx context.Context, pw *pathwaysap Env: []corev1.EnvVar{ {Name: "XCLOUD_ENVIRONMENT", Value: "GCP"}, {Name: "JAX_PLATFORMS", Value: "proxy"}, - {Name: "JAX_BACKEND_TARGET", Value: fmt.Sprintf("grpc://%s-%s-0-0.%s:38681", pwWorkloadName, "leader", pwWorkloadName)}, + {Name: "JAX_BACKEND_TARGET", Value: fmt.Sprintf("grpc://%s-%s-0-0.%s:38681", pw.Spec.WorkloadName, "leader", pw.Spec.WorkloadName)}, {Name: "JOBSET_NAME", ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{FieldPath: "metadata.annotations['jobset.sigs.k8s.io/jobset-name']"}}}, }, Ports: []corev1.ContainerPort{{ContainerPort: 9000}}, Command: []string{"bash", "-c", "echo Start ; (JAX_TRACEBACK_FILTERING=off python3 MaxText/maxengine_server.py MaxText/configs/inference_jetstream.yml tokenizer_path=assets/tokenizer.llama2 load_parameters_path=gs://runner-maxtext-logs/2024-05-07-23-34/unscanned_chkpt/checkpoints/0/items max_prefill_predict_length=1024 max_target_length=2048 async_checkpointing=false model_name='llama2-70b' steps=1 ici_fsdp_parallelism=1 ici_autoregressive_parallelism=-1 ici_tensor_parallelism=1 scan_layers=false weight_dtype=bfloat16 per_device_batch_size=2); echo End; sleep infinity;"}, - // Resources: corev1.ResourceRequirements{ - // Limits: corev1.ResourceList{"google.com/tpu": resource.Quantity{i: 4}}, - // }, - // Resources: corev1.ResourceRequirements{Limits: {map[corev1.ResourceName]{cpu: "24", memory: 100G,},},}, }, // end jetstream { @@ -242,7 +169,6 @@ func (r *PathwaysAPIReconciler) createJobSet(ctx context.Context, pw *pathwaysap SecurityContext: &corev1.SecurityContext{Privileged: &truth}, Command: []string{"bash", "-c", "echo Start ;for i in {1..5}; do echo Sending request $i; python3 JetStream/jetstream/tools/requester.py --tokenizer assets/tokenizer.llama2 --max_tokens=16 --server=0.0.0.0 --text=\"why earth is round\"; EXIT_CODE=$?; echo Completed request; echo EXIT_CODE=$EXIT_CODE; if [[ $EXIT_CODE -ne 0 ]]; then break; fi; done; echo Last EXIT_CODE=$EXIT_CODE; echo End; sleep infinity;"}, }, // end tester - }, // end leader []containers }, // end PodSpec }, @@ -253,9 +179,6 @@ func (r *PathwaysAPIReconciler) createJobSet(ctx context.Context, pw *pathwaysap Name: "worker", Replicas: int32(pw.Spec.NumSlices), Template: batchv1.JobTemplateSpec{ - // ObjectMeta: metav1.ObjectMeta{ - // Annotations: map[string]string{"alpha.jobset.sigs.k8s.io/exclusive-topology": "cloud.google.com/gke-nodepool"}, - // }, Spec: batchv1.JobSpec{ BackoffLimit: ptr.To(int32(0)), Completions: ptr.To(int32(2)), @@ -271,7 +194,7 @@ func (r *PathwaysAPIReconciler) createJobSet(ctx context.Context, pw *pathwaysap Args: []string{ "--alsologtostderr", "--pathways_server_port=38679", - fmt.Sprintf("--pathways_resource_manager=%s-%s-0-0.%s:38677", pwWorkloadName, "leader", pwWorkloadName), + fmt.Sprintf("--pathways_resource_manager=%s-%s-0-0.%s:38677", pw.Spec.WorkloadName, "leader", pw.Spec.WorkloadName), "--pathways_persistent_compilation_cache=false", "--pathways_compilation_mode=compile_at_worker", "--xla_tpu_enable_data_parallel_all_reduce_opt=true", @@ -299,13 +222,6 @@ func (r *PathwaysAPIReconciler) createJobSet(ctx context.Context, pw *pathwaysap }, // end Pathways worker container }, NodeSelector: pw.Spec.PathwaysWorkerNodeSelector, - // Tolerations: []corev1.Toleration{ - // { - // Key: "google.com/tpu", - // Operator: "Exists", - // Effect: "NoSchedule", - // }, - // }, //unschedulable without this and resources Volumes: []corev1.Volume{ { Name: "shared-tmp", diff --git a/internal/utils/container_configurations.go b/internal/utils/container_configurations.go deleted file mode 100644 index 2809f306..00000000 --- a/internal/utils/container_configurations.go +++ /dev/null @@ -1,9 +0,0 @@ -package utils - -// import ( -// corev1 "k8s.io/api/core/v1" -// metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" -// ) - -// func (rm *corev1.Container) ResourceManagerContainer(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { -// } diff --git a/internal/utils/jobset_configurations.go b/internal/utils/jobset_configurations.go new file mode 100644 index 00000000..490580b5 --- /dev/null +++ b/internal/utils/jobset_configurations.go @@ -0,0 +1,103 @@ +package utils + +import ( + "fmt" + + pathwaysapi "pathways-api/api/v1" + + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +func MakeResourceManagerContainer(pw *pathwaysapi.PathwaysAPI) (*corev1.Container, error) { + truth := true + rmContainerSpec := corev1.Container{ + Name: "pathways-rm", + Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/server:latest", + ImagePullPolicy: "Always", + SecurityContext: &corev1.SecurityContext{Privileged: &truth}, + Args: []string{ + "--alsologtostderr", + "--pathways_server_port=38677", + "--pathways_server_provides_devices=false", + "--pathways_device_type=NONE", + "--pathways_persistent_compilation_cache=false", + "--pathways_compilation_mode=compile_at_worker", + fmt.Sprintf("--pathways_tmp_dir_pattern=%s", pw.Spec.PathwaysDir), + "--pathways_expected_instances=tpuv4:2x2x2", // CHANGE + }, + Env: []corev1.EnvVar{ + {Name: "REPLICATED_JOB_NAME", ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{FieldPath: "metadata.annotations['jobset.sigs.k8s.io/replicatedjob-name']"}}}, + {Name: "JOBSET_NAME", ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{FieldPath: "metadata.annotations['jobset.sigs.k8s.io/jobset-name']"}}}, + {Name: "HOST_ADDRESS", Value: fmt.Sprintf("%s-%s-0-0.%s", pw.Spec.WorkloadName, "leader", pw.Spec.WorkloadName)}, + {Name: "TPU_SKIP_MDS_QUERY", Value: "true"}, + }, + Ports: []corev1.ContainerPort{{ContainerPort: 38677}, {ContainerPort: 38678}}, + Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{"cpu": *resource.NewQuantity(4, resource.DecimalSI), "memory": *resource.NewQuantity(8000000000, resource.DecimalSI)}}, + } + return &rmContainerSpec, nil +} + +func MakeProxyContainer(pw *pathwaysapi.PathwaysAPI) (*corev1.Container, error) { + truth := true + proxyContainerSpec := corev1.Container{ + Name: "pathways-proxy", + Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/proxy_server:latest", + ImagePullPolicy: "Always", + SecurityContext: &corev1.SecurityContext{Privileged: &truth}, + Args: []string{ + "--alsologtostderr", + "--v=0", + fmt.Sprintf("--pathways_ifrt_proxy_server_resource_manager=%s-%s-0-0.%s:38677", pw.Spec.WorkloadName, "leader", pw.Spec.WorkloadName), + "--pathways_ifrt_proxy_server_port=38681", + fmt.Sprintf("--pathways_tmp_dir_pattern=%s", pw.Spec.PathwaysDir), + "--pathways_plaque_network=gcp", + }, + Ports: []corev1.ContainerPort{{ContainerPort: 38681}, {ContainerPort: 38682}}, + Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{"cpu": *resource.NewQuantity(4, resource.DecimalSI), "memory": *resource.NewQuantity(10000000000, resource.DecimalSI)}}, + } + return &proxyContainerSpec, nil +} + +func MakePodAffinityRules(pw *pathwaysapi.PathwaysAPI) (*corev1.Affinity, error) { + affinity := corev1.Affinity{ + PodAffinity: &corev1.PodAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: []corev1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "jobset.sigs.k8s.io/jobset-name", + Operator: metav1.LabelSelectorOpIn, + Values: []string{pw.Spec.WorkloadName}, + }, + }, + }, + TopologyKey: "cloud.google.com/gke-nodepool", + }, + }, + }, // end PodAffinity + PodAntiAffinity: &corev1.PodAntiAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: []corev1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "jobset.sigs.k8s.io/jobset-name", + Operator: metav1.LabelSelectorOpNotIn, + Values: []string{pw.Spec.WorkloadName}, + }, + { + Key: "job-name", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "cloud.google.com/gke-nodepool", + }, + }, + }, // end PodAntiAffinity + } // end Affinity + return &affinity, nil +} From cf438630ae6798f98414617e8357d1ed14dc13d1 Mon Sep 17 00:00:00 2001 From: RoshaniN Date: Mon, 7 Oct 2024 19:23:47 +0000 Subject: [PATCH 11/32] Added license automatically. Controller code, API structure. --- Dockerfile | 14 ++ api/v1/pathwaysapi_types.go | 35 ++- ...ways-api.pathways.domain_pathwaysapis.yaml | 46 +++- config/crd/kustomization.yaml | 16 +- config/crd/kustomizeconfig.yaml | 14 ++ config/default/kustomization.yaml | 14 ++ config/default/manager_metrics_patch.yaml | 14 ++ config/default/metrics_service.yaml | 14 ++ config/manager/kustomization.yaml | 14 ++ config/manager/manager.yaml | 14 ++ config/prometheus/kustomization.yaml | 14 ++ config/prometheus/monitor.yaml | 14 ++ config/rbac/kustomization.yaml | 14 ++ config/rbac/leader_election_role.yaml | 14 ++ config/rbac/leader_election_role_binding.yaml | 14 ++ config/rbac/metrics_auth_role.yaml | 14 ++ config/rbac/metrics_auth_role_binding.yaml | 14 ++ config/rbac/metrics_reader_role.yaml | 14 ++ config/rbac/pathwaysapi_editor_role.yaml | 14 ++ config/rbac/pathwaysapi_viewer_role.yaml | 14 ++ config/rbac/role.yaml | 14 ++ config/rbac/role_binding.yaml | 14 ++ config/rbac/service_account.yaml | 14 ++ config/samples/jobset_example.yaml | 14 ++ config/samples/kustomization.yaml | 14 ++ .../samples/pathways-api_v1_pathwaysapi.yaml | 19 +- go.mod | 1 - internal/controller/pathwaysapi_controller.go | 226 ++++++++++++++++-- internal/utils/extra_prototype.go | 48 ---- internal/utils/jobset_configurations.go | 103 -------- 30 files changed, 598 insertions(+), 204 deletions(-) delete mode 100644 internal/utils/extra_prototype.go delete mode 100644 internal/utils/jobset_configurations.go diff --git a/Dockerfile b/Dockerfile index a48973ee..b66a974e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,3 +1,17 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # Build the manager binary FROM golang:1.22 AS builder ARG TARGETOS diff --git a/api/v1/pathwaysapi_types.go b/api/v1/pathwaysapi_types.go index 51d08970..7b06af3c 100644 --- a/api/v1/pathwaysapi_types.go +++ b/api/v1/pathwaysapi_types.go @@ -27,6 +27,7 @@ import ( // +kubebuilder:object:root=true // +kubebuilder:subresource:status +//+kubebuilder:resource:scope=Namespaced // PathwaysAPI is the Schema for the pathwaysapis API type PathwaysAPI struct { @@ -46,7 +47,7 @@ type PathwaysAPIList struct { Items []PathwaysAPI `json:"items"` } -// PathwaysCluster creates a Pathways workload. It sets up the TPU +// PathwaysJob creates a Pathways workload. It sets up the TPU // workers needed for training or inference, along with Pathways // resources such as the Pathways Resource Manager(RM) and Proxy // server at the specifiec controller node location. It provides @@ -62,6 +63,13 @@ type PathwaysAPISpec struct { // WorkloadName is the identifier for the Pathways workload deployment. WorkloadName string `json:"workloadName,omitempty"` + // ColocationPolicy defines whether the user job and the Pathways resources (RM, proxy) + // must be colocated on TPUs with the Pathways workers or not. + // Users may opt for best-effort placement where scheduler places the RM and proxy + // on the CPU nodepools by default. + // Default is best-effort. + ColocationPolicy ColocationPolicy `json:"colocationPolicy,omitempty"` + // PathwaysWorkerNodeSelector is used to specify the nodeSelector for // Pathways TPU workers (accelerator type and topology). PathwaysWorkerNodeSelector map[string]string `json:"pathwaysWorkerNodeSelector,omitempty"` @@ -76,12 +84,13 @@ type PathwaysAPISpec struct { // Number of TPU slices requested for the Pathways workers. NumSlices int32 `json:"numSlices,omitempty"` - // PathwaysDir is the GCS location at which Pathways artifacts - // can be stored. + // PathwaysDir is a persistent location like GCS at which temporary + // Pathways artifacts can be stored like HBM state during interruptions. + // Currently, Pathways supports a precreated GCS directory only. PathwaysDir string `json:"pathwaysDir,omitempty"` - // PathwaysClientVersion is the version of the Pathways client. - PathwaysClientVersion string `json:"pathwaysClientVersion,omitempty"` + // PathwaysVersion is the version of the Pathways client. + PathwaysVersion string `json:"pathwaysVersion,omitempty"` // UserPodTemplate accepts a pod composed of user's workload // (and other) containers. @@ -95,15 +104,21 @@ type PathwaysAPIStatus struct { // INSERT ADDITIONAL STATUS FIELD - define observed state of cluster // Important: Run "make" to regenerate code after modifying this file - // +optional - Conditions []metav1.Condition `json:"conditions,omitempty"` - // Track the state of the Pathways workload, acceptable values are - - // Suspended, Completed, Failed + // Running, Suspended, Completed, Failed. + // Contains a human readable message to provide additional details to the // user. // +optional - WorkloadState string `json:"workloadState,omitempty"` + Conditions []metav1.Condition `json:"conditions,omitempty"` } +// +kubebuilder:validation:Enum=colocate;best-effort +type ColocationPolicy string + +const ( + Colocate ColocationPolicy = "colocate" + BestEffort ColocationPolicy = "best-effort" +) + func init() { SchemeBuilder.Register(&PathwaysAPI{}, &PathwaysAPIList{}) } diff --git a/config/crd/bases/pathways-api.pathways.domain_pathwaysapis.yaml b/config/crd/bases/pathways-api.pathways.domain_pathwaysapis.yaml index f4f31945..aafec19a 100644 --- a/config/crd/bases/pathways-api.pathways.domain_pathwaysapis.yaml +++ b/config/crd/bases/pathways-api.pathways.domain_pathwaysapis.yaml @@ -1,3 +1,17 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + --- apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition @@ -38,6 +52,17 @@ spec: type: object spec: properties: + colocationPolicy: + description: |- + ColocationPolicy defines whether the user job and the Pathways resources (RM, proxy) + must be colocated on TPUs with the Pathways workers or not. + Users may opt for best-effort placement where scheduler places the RM and proxy + on the CPU nodepools by default. + Default is best-effort. + enum: + - colocate + - best-effort + type: string maxRestarts: description: Maximum number of times the JobSet is restarted. format: int32 @@ -46,10 +71,6 @@ spec: description: Number of TPU slices requested for the Pathways workers. format: int32 type: integer - pathwaysClientVersion: - description: PathwaysClientVersion is the version of the Pathways - client. - type: string pathwaysControllerNodeSelector: additionalProperties: type: string @@ -59,8 +80,12 @@ spec: type: object pathwaysDir: description: |- - PathwaysDir is the GCS location at which Pathways artifacts - can be stored. + PathwaysDir is a persistent location like GCS at which temporary + Pathways artifacts can be stored like HBM state during interruptions. + Currently, Pathways supports a precreated GCS directory only. + type: string + pathwaysVersion: + description: PathwaysVersion is the version of the Pathways client. type: string pathwaysWorkerNodeSelector: additionalProperties: @@ -7998,6 +8023,10 @@ spec: description: PathwaysAPIStatus defines the observed state of PathwaysAPI properties: conditions: + description: |- + Track the state of the Pathways workload, acceptable values are - + Running, Suspended, Completed, Failed. + Contains a human readable message to provide additional details to the // user. items: description: "Condition contains details for one aspect of the current state of this API Resource.\n---\nThis struct is intended for @@ -8066,11 +8095,6 @@ spec: - type type: object type: array - workloadState: - description: |- - Track the state of the Pathways workload, acceptable values are - - Suspended, Completed, Failed - type: string type: object type: object served: true diff --git a/config/crd/kustomization.yaml b/config/crd/kustomization.yaml index 4bbbd663..6332199b 100644 --- a/config/crd/kustomization.yaml +++ b/config/crd/kustomization.yaml @@ -1,3 +1,17 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # This kustomization.yaml is not intended to be run by itself, # since it depends on service name and namespace that are out of this kustomize package. # It should be run by config/default @@ -5,7 +19,7 @@ resources: - bases/pathways-api.pathways.domain_pathwaysapis.yaml # +kubebuilder:scaffold:crdkustomizeresource -patches: +# patches: # [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix. # patches here are for enabling the conversion webhook for each CRD # +kubebuilder:scaffold:crdkustomizewebhookpatch diff --git a/config/crd/kustomizeconfig.yaml b/config/crd/kustomizeconfig.yaml index ec5c150a..c0e022ee 100644 --- a/config/crd/kustomizeconfig.yaml +++ b/config/crd/kustomizeconfig.yaml @@ -1,3 +1,17 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # This file is for teaching kustomize how to substitute name and namespace reference in CRD nameReference: - kind: Service diff --git a/config/default/kustomization.yaml b/config/default/kustomization.yaml index 78841639..19480ca0 100644 --- a/config/default/kustomization.yaml +++ b/config/default/kustomization.yaml @@ -1,3 +1,17 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # Adds namespace to all resources. namespace: pathways-api-system diff --git a/config/default/manager_metrics_patch.yaml b/config/default/manager_metrics_patch.yaml index 2aaef653..3674d350 100644 --- a/config/default/manager_metrics_patch.yaml +++ b/config/default/manager_metrics_patch.yaml @@ -1,3 +1,17 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # This patch adds the args to allow exposing the metrics endpoint using HTTPS - op: add path: /spec/template/spec/containers/0/args/0 diff --git a/config/default/metrics_service.yaml b/config/default/metrics_service.yaml index 9b4019a7..f4ec4021 100644 --- a/config/default/metrics_service.yaml +++ b/config/default/metrics_service.yaml @@ -1,3 +1,17 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + apiVersion: v1 kind: Service metadata: diff --git a/config/manager/kustomization.yaml b/config/manager/kustomization.yaml index 9209b790..771d8487 100644 --- a/config/manager/kustomization.yaml +++ b/config/manager/kustomization.yaml @@ -1,3 +1,17 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + resources: - manager.yaml apiVersion: kustomize.config.k8s.io/v1beta1 diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml index 3eac4556..0d5b799e 100644 --- a/config/manager/manager.yaml +++ b/config/manager/manager.yaml @@ -1,3 +1,17 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + apiVersion: v1 kind: Namespace metadata: diff --git a/config/prometheus/kustomization.yaml b/config/prometheus/kustomization.yaml index ed137168..302b7006 100644 --- a/config/prometheus/kustomization.yaml +++ b/config/prometheus/kustomization.yaml @@ -1,2 +1,16 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + resources: - monitor.yaml diff --git a/config/prometheus/monitor.yaml b/config/prometheus/monitor.yaml index 81f32879..9259d148 100644 --- a/config/prometheus/monitor.yaml +++ b/config/prometheus/monitor.yaml @@ -1,3 +1,17 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # Prometheus Monitor Service (Metrics) apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor diff --git a/config/rbac/kustomization.yaml b/config/rbac/kustomization.yaml index 7f4a0a98..f57be9aa 100644 --- a/config/rbac/kustomization.yaml +++ b/config/rbac/kustomization.yaml @@ -1,3 +1,17 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + resources: # All RBAC will be applied under this service account in # the deployment namespace. You may comment out this resource diff --git a/config/rbac/leader_election_role.yaml b/config/rbac/leader_election_role.yaml index 0e759313..e08881bc 100644 --- a/config/rbac/leader_election_role.yaml +++ b/config/rbac/leader_election_role.yaml @@ -1,3 +1,17 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # permissions to do leader election. apiVersion: rbac.authorization.k8s.io/v1 kind: Role diff --git a/config/rbac/leader_election_role_binding.yaml b/config/rbac/leader_election_role_binding.yaml index f59fe83d..0c9103d3 100644 --- a/config/rbac/leader_election_role_binding.yaml +++ b/config/rbac/leader_election_role_binding.yaml @@ -1,3 +1,17 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding metadata: diff --git a/config/rbac/metrics_auth_role.yaml b/config/rbac/metrics_auth_role.yaml index 32d2e4ec..fa14995c 100644 --- a/config/rbac/metrics_auth_role.yaml +++ b/config/rbac/metrics_auth_role.yaml @@ -1,3 +1,17 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: diff --git a/config/rbac/metrics_auth_role_binding.yaml b/config/rbac/metrics_auth_role_binding.yaml index e775d67f..c46accd4 100644 --- a/config/rbac/metrics_auth_role_binding.yaml +++ b/config/rbac/metrics_auth_role_binding.yaml @@ -1,3 +1,17 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: diff --git a/config/rbac/metrics_reader_role.yaml b/config/rbac/metrics_reader_role.yaml index 51a75db4..4caa96a1 100644 --- a/config/rbac/metrics_reader_role.yaml +++ b/config/rbac/metrics_reader_role.yaml @@ -1,3 +1,17 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: diff --git a/config/rbac/pathwaysapi_editor_role.yaml b/config/rbac/pathwaysapi_editor_role.yaml index a480dcd3..40f7aeea 100644 --- a/config/rbac/pathwaysapi_editor_role.yaml +++ b/config/rbac/pathwaysapi_editor_role.yaml @@ -1,3 +1,17 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # permissions for end users to edit pathwaysapis. apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole diff --git a/config/rbac/pathwaysapi_viewer_role.yaml b/config/rbac/pathwaysapi_viewer_role.yaml index b1e6c442..efb4fc4f 100644 --- a/config/rbac/pathwaysapi_viewer_role.yaml +++ b/config/rbac/pathwaysapi_viewer_role.yaml @@ -1,3 +1,17 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # permissions for end users to view pathwaysapis. apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index db9afaff..8033ac7b 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -1,3 +1,17 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole diff --git a/config/rbac/role_binding.yaml b/config/rbac/role_binding.yaml index afe22ac5..8afb66a7 100644 --- a/config/rbac/role_binding.yaml +++ b/config/rbac/role_binding.yaml @@ -1,3 +1,17 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: diff --git a/config/rbac/service_account.yaml b/config/rbac/service_account.yaml index 706311da..85bb665d 100644 --- a/config/rbac/service_account.yaml +++ b/config/rbac/service_account.yaml @@ -1,3 +1,17 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + apiVersion: v1 kind: ServiceAccount metadata: diff --git a/config/samples/jobset_example.yaml b/config/samples/jobset_example.yaml index 6edd91b7..2ffc2488 100644 --- a/config/samples/jobset_example.yaml +++ b/config/samples/jobset_example.yaml @@ -1,3 +1,17 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + apiVersion: jobset.x-k8s.io/v1alpha2 kind: JobSet metadata: diff --git a/config/samples/kustomization.yaml b/config/samples/kustomization.yaml index 1c602273..bd84c32d 100644 --- a/config/samples/kustomization.yaml +++ b/config/samples/kustomization.yaml @@ -1,3 +1,17 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + ## Append samples of your project ## resources: - pathways-api_v1_pathwaysapi.yaml diff --git a/config/samples/pathways-api_v1_pathwaysapi.yaml b/config/samples/pathways-api_v1_pathwaysapi.yaml index fbbbd5d2..b9752974 100644 --- a/config/samples/pathways-api_v1_pathwaysapi.yaml +++ b/config/samples/pathways-api_v1_pathwaysapi.yaml @@ -1,12 +1,23 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + apiVersion: pathways-api.pathways.domain/v1 kind: PathwaysAPI metadata: - labels: - app.kubernetes.io/name: pathways-api - app.kubernetes.io/managed-by: kustomize name: pathwaysapi-sample spec: - workloadName: "roshani-in-20" + workloadName: "roshani-in-27" pathwaysWorkerNodeSelector: cloud.google.com/gke-tpu-accelerator: tpu-v4-podslice cloud.google.com/gke-tpu-topology: 2x2x2 diff --git a/go.mod b/go.mod index 024202e6..0e1bb303 100644 --- a/go.mod +++ b/go.mod @@ -96,7 +96,6 @@ require ( sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.29.0 // indirect sigs.k8s.io/jobset v0.5.2 sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect - sigs.k8s.io/lws v0.3.0 sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect sigs.k8s.io/yaml v1.4.0 // indirect ) diff --git a/internal/controller/pathwaysapi_controller.go b/internal/controller/pathwaysapi_controller.go index d1abbed5..725f0eb8 100644 --- a/internal/controller/pathwaysapi_controller.go +++ b/internal/controller/pathwaysapi_controller.go @@ -22,7 +22,6 @@ import ( "k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/types" "k8s.io/klog/v2" "k8s.io/utils/ptr" ctrl "sigs.k8s.io/controller-runtime" @@ -36,7 +35,6 @@ import ( jobsetclient "sigs.k8s.io/jobset/client-go/clientset/versioned" pathwaysapi "pathways-api/api/v1" - utils "pathways-api/internal/utils" ) // PathwaysAPIReconciler reconciles a PathwaysAPI object @@ -55,6 +53,7 @@ type PathwaysAPIReconciler struct { // For more details, check Reconcile and its Result here: // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.18.4/pkg/reconcile +// +kubebuilder:rbac:groups="",resources=events,verbs=create;watch;update;patch // +kubebuilder:rbac:groups=pathways-api.pathways.domain,resources=pathwaysapis,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=pathways-api.pathways.domain,resources=pathwaysapis/status,verbs=get;update;patch // +kubebuilder:rbac:groups=pathways-api.pathways.domain,resources=pathwaysapis/finalizers,verbs=update @@ -63,19 +62,75 @@ type PathwaysAPIReconciler struct { func (r *PathwaysAPIReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { pw := &pathwaysapi.PathwaysAPI{} log := ctrl.LoggerFrom(ctx).WithValues("pathwaysapi", klog.KObj(pw)) + ctx = ctrl.LoggerInto(ctx, log) + + log.Info("ROSHANI CONTROLLER WORKING...", "WorkloadName ", pw.Spec.WorkloadName, " NumSlices ", pw.Spec.NumSlices, "WorkerNodeSelector", pw.Spec.PathwaysWorkerNodeSelector) - // 1. Fetch the object - if err := r.Get(ctx, types.NamespacedName{Name: req.Name, Namespace: req.Namespace}, pw); err != nil { + // 1. Fetch the Pathways object + // if err := r.Get(ctx, types.NamespacedName{Name: req.Name, Namespace: req.Namespace}, pw); err != nil { + if err := r.Get(ctx, req.NamespacedName, pw); err != nil { log.Info("Unable to fetch Pathways ") return ctrl.Result{}, client.IgnoreNotFound(err) } // 2. Process the object + kubeconfig := ctrl.GetConfigOrDie() + log.Info("Roshani, config established...") + + jobSetClient := jobsetclient.NewForConfigOrDie(kubeconfig) + log.Info("Roshani, client built for JobSet...") + + // 2.1 Figure out if PathwaysJob is already present and in "Suspended / Completed / Failed states", + // if it is the case, there is nothing to do. + + // JobSet list + var jsList *jobsetv1alpha2.JobSetList + + jsList, err := jobSetClient.JobsetV1alpha2().JobSets("default").List(ctx, metav1.ListOptions{}) + if err != nil { + log.Info("Roshani, can't list JobSets: ", "error ", err) + return ctrl.Result{}, err + } else { + log.Info("Roshani, can list JobSets") + for _, job := range jsList.Items { + for _, condition := range job.Status.Conditions { + log.Info("Roshani Jobset condtion", job.ObjectMeta.Name, condition.Type) + } + if job.ObjectMeta.Name == pw.Spec.WorkloadName && + (job.Status.Conditions[0].Type == string(jobsetv1alpha2.JobSetStartupPolicyCompleted) || + job.Status.Conditions[0].Type == string(jobsetv1alpha2.JobSetStartupPolicyInProgress)) { + log.Info("Roshani, found JobSet ", "JobSet name", pw.Spec.WorkloadName) + log.Info("Roshani, nothing to reconcile here") + return ctrl.Result{}, nil + // Nothing to reconcile here. + } + } + } + + // Currently leading to race conditions ---. + // var pwList pathwaysapi.PathwaysAPIList + // if err := r.List(ctx, &pwList, &client.ListOptions{}); err != nil { + // log.Error(err, "Roshani, failed to list Pathways") + // return ctrl.Result{}, err + // } else { + // log.Info("Roshani, successfully listed Pathways") + // for _, job := range pwList.Items { + // log.Info("ROSHANI", "Job name ", job.Spec.WorkloadName, "Pathways workload name ", pw.Spec.WorkloadName) + // if job.Spec.WorkloadName == pw.Spec.WorkloadName { + // log.Info("Roshani, found Pathways, not creating workload: ", "JobSet name", pw.Spec.WorkloadName) + // return ctrl.Result{}, nil + // // Nothing to reconcile here. + // } + // } + // } + // 3. Update the cluster - create update and delete other resources - if err := r.createJobSet(ctx, pw); err != nil { + if err := r.createJobSet(ctx, pw, jobSetClient); err != nil { log.Error(err, "Roshani, failed to create JobSet \n") return ctrl.Result{}, err + } else { + log.Info("Roshani, successfully created JobSet \n") } //4. Update the object's status using Conditions @@ -84,26 +139,49 @@ func (r *PathwaysAPIReconciler) Reconcile(ctx context.Context, req ctrl.Request) return ctrl.Result{}, nil } -func (r *PathwaysAPIReconciler) createJobSet(ctx context.Context, pw *pathwaysapi.PathwaysAPI) error { - log := ctrl.LoggerFrom(ctx).WithValues("pathwaysapi", klog.KObj(pw)) - ctx = ctrl.LoggerInto(ctx, log) +// function to listChildJobSets, based on https://github.com/kubernetes-sigs/jobset/blob/main/client-go/clientset/versioned/typed/jobset/v1alpha2/jobset.go#L44 - log.Info("ROSHANI CONTROLLER WORKING...", "WorkloadName ", pw.Spec.WorkloadName, " NumSlices ", pw.Spec.NumSlices, "WorkerNodeSelector", pw.Spec.PathwaysWorkerNodeSelector) +// +// function to updatePathwaysJob Status ~~ updateJobSetStatus. Pathways status is same as JobSet Status. This function will mainly update Conditions and Message. +// similar to https://github.com/kubernetes-sigs/jobset/blob/main/pkg/controllers/jobset_controller.go#L248 +// JobSet conditions - https://github.com/kubernetes-sigs/jobset/blob/main/pkg/controllers/jobset_controller.go#L822 - kubeconfig := ctrl.GetConfigOrDie() - log.Info("Roshani, config established...") +// function to suspendJobSet + +// function to resumeJobSet + +// function to deleteJobSet, based on https://github.com/kubernetes-sigs/jobset/blob/main/client-go/clientset/versioned/typed/jobset/v1alpha2/jobset.go#L41 + +// function isJobSetFinished reuse jobSetFinished + +// funtion pathwaysJobFinished (?) + +// function setCondition and updateCondition + +// function setPathwaysJobCompletedCondition + +// function setPathwaysJobFailedCondition + +// function setPathwaysJobSuspendedCondition + +// function setPathwaysJobResumedCondition + +func (r *PathwaysAPIReconciler) createJobSet(ctx context.Context, pw *pathwaysapi.PathwaysAPI, jobSetClient *jobsetclient.Clientset) error { + // log := ctrl.LoggerFrom(ctx) + log2 := ctrl.LoggerFrom(ctx).WithValues("pathwaysapi", klog.KObj(pw)) + ctx = ctrl.LoggerInto(ctx, log2) + + log2.Info("ROSHANI in createJobSet") // Some predefined variables truth := true volumeSourceType := corev1.HostPathDirectoryOrCreate - RMContainerSpec, _ := utils.MakeResourceManagerContainer(pw) - ProxyContainerSpec, _ := utils.MakeProxyContainer(pw) - affinitySpec, _ := utils.MakePodAffinityRules(pw) + RMContainerSpec, _ := MakeResourceManagerContainer(pw) + ProxyContainerSpec, _ := MakeProxyContainer(pw) + affinitySpec, _ := MakePodAffinityRules(pw) // // Pathways Spec + JobSet for batch inference ------ - client := jobsetclient.NewForConfigOrDie(kubeconfig) - log.Info("Roshani, client built for JobSet...") mainJobSetConfig := jobsetv1alpha2.JobSet{ ObjectMeta: metav1.ObjectMeta{ @@ -241,24 +319,26 @@ func (r *PathwaysAPIReconciler) createJobSet(ctx context.Context, pw *pathwaysap }, }, } + // var lock sync.Mutex // Set Pathways controller as the owner of the JobSet for garbage collection. - if err := ctrl.SetControllerReference(pw, &mainJobSetConfig, r.Scheme); err != nil { - log.Info("Roshani, failed to set Pathways as owner of JobSet.", "error ", err) // - clearly not working - // return err - } else { - log.Info("Roshani, successfully set Pathways as owner of JobSet.") - } + // if err := ctrl.SetControllerReference(pw, &mainJobSetConfig, r.Scheme); err != nil { + // // lock.Lock() + // // defer lock.Unlock() + // log2.Info("Roshani, failed to set Pathways as owner of JobSet.", "error ", err) // - clearly not working + // // return err + // } else { + // log2.Info("Roshani, successfully set Pathways as owner of JobSet.") + // } - js, err := client.JobsetV1alpha2().JobSets("default").Create(ctx, &mainJobSetConfig, metav1.CreateOptions{}) + js, err := jobSetClient.JobsetV1alpha2().JobSets("default").Create(ctx, &mainJobSetConfig, metav1.CreateOptions{}) if err != nil { - log.Info("Roshani, failed to create JobSet: ", "JobSet name", js.Name) + log2.Info("Roshani, failed to create JobSet: ", "JobSet name", js.Name) return err } else { - log.Info("Roshani, successfully created JobSet: ", "JobSet name", js.Name) + log2.Info("Roshani, successfully created JobSet: ", "JobSet name", js.Name) } - return nil } @@ -269,3 +349,97 @@ func (r *PathwaysAPIReconciler) SetupWithManager(mgr ctrl.Manager) error { // Owns(&jobsetv1alpha2.JobSet{}). // For JobSet Complete(r) } + +// helpers + +func MakeResourceManagerContainer(pw *pathwaysapi.PathwaysAPI) (*corev1.Container, error) { + truth := true + rmContainerSpec := corev1.Container{ + Name: "pathways-rm", + Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/server:latest", + ImagePullPolicy: "Always", + SecurityContext: &corev1.SecurityContext{Privileged: &truth}, + Args: []string{ + "--alsologtostderr", + "--pathways_server_port=38677", + "--pathways_server_provides_devices=false", + "--pathways_device_type=NONE", + "--pathways_persistent_compilation_cache=false", + "--pathways_compilation_mode=compile_at_worker", + fmt.Sprintf("--pathways_tmp_dir_pattern=%s", pw.Spec.PathwaysDir), + "--pathways_expected_instances=tpuv4:2x2x2", // CHANGE + }, + Env: []corev1.EnvVar{ + {Name: "REPLICATED_JOB_NAME", ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{FieldPath: "metadata.annotations['jobset.sigs.k8s.io/replicatedjob-name']"}}}, + {Name: "JOBSET_NAME", ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{FieldPath: "metadata.annotations['jobset.sigs.k8s.io/jobset-name']"}}}, + {Name: "HOST_ADDRESS", Value: fmt.Sprintf("%s-%s-0-0.%s", pw.Spec.WorkloadName, "leader", pw.Spec.WorkloadName)}, + {Name: "TPU_SKIP_MDS_QUERY", Value: "true"}, + }, + Ports: []corev1.ContainerPort{{ContainerPort: 38677}, {ContainerPort: 38678}}, + Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{"cpu": *resource.NewQuantity(4, resource.DecimalSI), "memory": *resource.NewQuantity(8000000000, resource.DecimalSI)}}, + } + return &rmContainerSpec, nil +} + +func MakeProxyContainer(pw *pathwaysapi.PathwaysAPI) (*corev1.Container, error) { + truth := true + proxyContainerSpec := corev1.Container{ + Name: "pathways-proxy", + Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/proxy_server:latest", + ImagePullPolicy: "Always", + SecurityContext: &corev1.SecurityContext{Privileged: &truth}, + Args: []string{ + "--alsologtostderr", + "--v=0", + fmt.Sprintf("--pathways_ifrt_proxy_server_resource_manager=%s-%s-0-0.%s:38677", pw.Spec.WorkloadName, "leader", pw.Spec.WorkloadName), + "--pathways_ifrt_proxy_server_port=38681", + fmt.Sprintf("--pathways_tmp_dir_pattern=%s", pw.Spec.PathwaysDir), + "--pathways_plaque_network=gcp", + }, + Ports: []corev1.ContainerPort{{ContainerPort: 38681}, {ContainerPort: 38682}}, + Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{"cpu": *resource.NewQuantity(4, resource.DecimalSI), "memory": *resource.NewQuantity(10000000000, resource.DecimalSI)}}, + } + return &proxyContainerSpec, nil +} + +func MakePodAffinityRules(pw *pathwaysapi.PathwaysAPI) (*corev1.Affinity, error) { + affinity := corev1.Affinity{ + PodAffinity: &corev1.PodAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: []corev1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "jobset.sigs.k8s.io/jobset-name", + Operator: metav1.LabelSelectorOpIn, + Values: []string{pw.Spec.WorkloadName}, + }, + }, + }, + TopologyKey: "cloud.google.com/gke-nodepool", + }, + }, + }, // end PodAffinity + PodAntiAffinity: &corev1.PodAntiAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: []corev1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "jobset.sigs.k8s.io/jobset-name", + Operator: metav1.LabelSelectorOpNotIn, + Values: []string{pw.Spec.WorkloadName}, + }, + { + Key: "job-name", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "cloud.google.com/gke-nodepool", + }, + }, + }, // end PodAntiAffinity + } // end Affinity + return &affinity, nil +} diff --git a/internal/utils/extra_prototype.go b/internal/utils/extra_prototype.go deleted file mode 100644 index d5260fb6..00000000 --- a/internal/utils/extra_prototype.go +++ /dev/null @@ -1,48 +0,0 @@ -package utils - -// RM AND PROXY SPEC- - -// { -// Name: "pathways-rm", -// Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/server:latest", -// ImagePullPolicy: "Always", -// SecurityContext: &corev1.SecurityContext{Privileged: &truth}, -// Args: []string{ -// "--alsologtostderr", -// "--pathways_server_port=38677", -// "--pathways_server_provides_devices=false", -// "--pathways_device_type=NONE", -// "--pathways_persistent_compilation_cache=false", -// "--pathways_compilation_mode=compile_at_worker", -// fmt.Sprintf("--pathways_tmp_dir_pattern=%s", pw.Spec.PathwaysDir), -// "--pathways_expected_instances=tpuv4:2x2x2", -// }, -// Env: []corev1.EnvVar{ -// {Name: "REPLICATED_JOB_NAME", ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{FieldPath: "metadata.annotations['jobset.sigs.k8s.io/replicatedjob-name']"}}}, -// {Name: "JOBSET_NAME", ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{FieldPath: "metadata.annotations['jobset.sigs.k8s.io/jobset-name']"}}}, -// {Name: "HOST_ADDRESS", Value: fmt.Sprintf("%s-%s-0-0.%s", pwWorkloadName, "leader", pwWorkloadName)}, -// {Name: "TPU_SKIP_MDS_QUERY", Value: "true"}, -// }, -// Ports: []corev1.ContainerPort{{ContainerPort: 38677}, {ContainerPort: 38678}}, -// }, // end pathways-rm - -// { -// Name: "pathways-proxy", -// Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/proxy_server:latest", -// ImagePullPolicy: "Always", -// SecurityContext: &corev1.SecurityContext{Privileged: &truth}, -// Args: []string{ -// "--alsologtostderr", -// "--v=0", -// fmt.Sprintf("--pathways_ifrt_proxy_server_resource_manager=%s-%s-0-0.%s:38677", pwWorkloadName, "leader", pwWorkloadName), -// "--pathways_ifrt_proxy_server_port=38681", -// fmt.Sprintf("--pathways_tmp_dir_pattern=%s", pw.Spec.PathwaysDir), -// "--pathways_plaque_network=gcp", -// }, -// Ports: []corev1.ContainerPort{{ContainerPort: 38681}, {ContainerPort: 38682}}, -// }, // end pathways-proxy - -// NodeSelector: map[string]string{ -// "cloud.google.com/gke-tpu-accelerator": "tpu-v4-podslice", -// "cloud.google.com/gke-tpu-topology": "2x2x2"}, -// NodeSelector: map[string]string{"cloud.google.com/gke-tpu-accelerator": "tpu-v5-lite-podslice", "cloud.google.com/gke-tpu-topology": "4x4"}, diff --git a/internal/utils/jobset_configurations.go b/internal/utils/jobset_configurations.go deleted file mode 100644 index 490580b5..00000000 --- a/internal/utils/jobset_configurations.go +++ /dev/null @@ -1,103 +0,0 @@ -package utils - -import ( - "fmt" - - pathwaysapi "pathways-api/api/v1" - - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/resource" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" -) - -func MakeResourceManagerContainer(pw *pathwaysapi.PathwaysAPI) (*corev1.Container, error) { - truth := true - rmContainerSpec := corev1.Container{ - Name: "pathways-rm", - Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/server:latest", - ImagePullPolicy: "Always", - SecurityContext: &corev1.SecurityContext{Privileged: &truth}, - Args: []string{ - "--alsologtostderr", - "--pathways_server_port=38677", - "--pathways_server_provides_devices=false", - "--pathways_device_type=NONE", - "--pathways_persistent_compilation_cache=false", - "--pathways_compilation_mode=compile_at_worker", - fmt.Sprintf("--pathways_tmp_dir_pattern=%s", pw.Spec.PathwaysDir), - "--pathways_expected_instances=tpuv4:2x2x2", // CHANGE - }, - Env: []corev1.EnvVar{ - {Name: "REPLICATED_JOB_NAME", ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{FieldPath: "metadata.annotations['jobset.sigs.k8s.io/replicatedjob-name']"}}}, - {Name: "JOBSET_NAME", ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{FieldPath: "metadata.annotations['jobset.sigs.k8s.io/jobset-name']"}}}, - {Name: "HOST_ADDRESS", Value: fmt.Sprintf("%s-%s-0-0.%s", pw.Spec.WorkloadName, "leader", pw.Spec.WorkloadName)}, - {Name: "TPU_SKIP_MDS_QUERY", Value: "true"}, - }, - Ports: []corev1.ContainerPort{{ContainerPort: 38677}, {ContainerPort: 38678}}, - Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{"cpu": *resource.NewQuantity(4, resource.DecimalSI), "memory": *resource.NewQuantity(8000000000, resource.DecimalSI)}}, - } - return &rmContainerSpec, nil -} - -func MakeProxyContainer(pw *pathwaysapi.PathwaysAPI) (*corev1.Container, error) { - truth := true - proxyContainerSpec := corev1.Container{ - Name: "pathways-proxy", - Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/proxy_server:latest", - ImagePullPolicy: "Always", - SecurityContext: &corev1.SecurityContext{Privileged: &truth}, - Args: []string{ - "--alsologtostderr", - "--v=0", - fmt.Sprintf("--pathways_ifrt_proxy_server_resource_manager=%s-%s-0-0.%s:38677", pw.Spec.WorkloadName, "leader", pw.Spec.WorkloadName), - "--pathways_ifrt_proxy_server_port=38681", - fmt.Sprintf("--pathways_tmp_dir_pattern=%s", pw.Spec.PathwaysDir), - "--pathways_plaque_network=gcp", - }, - Ports: []corev1.ContainerPort{{ContainerPort: 38681}, {ContainerPort: 38682}}, - Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{"cpu": *resource.NewQuantity(4, resource.DecimalSI), "memory": *resource.NewQuantity(10000000000, resource.DecimalSI)}}, - } - return &proxyContainerSpec, nil -} - -func MakePodAffinityRules(pw *pathwaysapi.PathwaysAPI) (*corev1.Affinity, error) { - affinity := corev1.Affinity{ - PodAffinity: &corev1.PodAffinity{ - RequiredDuringSchedulingIgnoredDuringExecution: []corev1.PodAffinityTerm{ - { - LabelSelector: &metav1.LabelSelector{ - MatchExpressions: []metav1.LabelSelectorRequirement{ - { - Key: "jobset.sigs.k8s.io/jobset-name", - Operator: metav1.LabelSelectorOpIn, - Values: []string{pw.Spec.WorkloadName}, - }, - }, - }, - TopologyKey: "cloud.google.com/gke-nodepool", - }, - }, - }, // end PodAffinity - PodAntiAffinity: &corev1.PodAntiAffinity{ - RequiredDuringSchedulingIgnoredDuringExecution: []corev1.PodAffinityTerm{ - { - LabelSelector: &metav1.LabelSelector{ - MatchExpressions: []metav1.LabelSelectorRequirement{ - { - Key: "jobset.sigs.k8s.io/jobset-name", - Operator: metav1.LabelSelectorOpNotIn, - Values: []string{pw.Spec.WorkloadName}, - }, - { - Key: "job-name", - Operator: metav1.LabelSelectorOpExists, - }, - }, - }, - TopologyKey: "cloud.google.com/gke-nodepool", - }, - }, - }, // end PodAntiAffinity - } // end Affinity - return &affinity, nil -} From d642cb10db39e7c06550b1ca10279d8f3233d21c Mon Sep 17 00:00:00 2001 From: RoshaniN Date: Mon, 7 Oct 2024 21:02:41 +0000 Subject: [PATCH 12/32] Some extras. --- .dockerignore | 3 ++ .gitignore | 27 ++++++++++++ .golangci.yml | 61 +++++++++++++++++++++++++++ pkg/utils/extra_prototype.go | 80 ++++++++++++++++++++++++++++++++++++ 4 files changed, 171 insertions(+) create mode 100644 .dockerignore create mode 100644 .gitignore create mode 100644 .golangci.yml create mode 100644 pkg/utils/extra_prototype.go diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 00000000..a3aab7af --- /dev/null +++ b/.dockerignore @@ -0,0 +1,3 @@ +# More info: https://docs.docker.com/engine/reference/builder/#dockerignore-file +# Ignore build and test binaries. +bin/ diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..ada68ff0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,27 @@ +# Binaries for programs and plugins +*.exe +*.exe~ +*.dll +*.so +*.dylib +bin/* +Dockerfile.cross + +# Test binary, built with `go test -c` +*.test + +# Output of the go coverage tool, specifically when used with LiteIDE +*.out + +# Go workspace file +go.work + +# Kubernetes Generated files - skip generated files, except for vendored files +!vendor/**/zz_generated.* + +# editor and IDE paraphernalia +.idea +.vscode +*.swp +*.swo +*~ diff --git a/.golangci.yml b/.golangci.yml new file mode 100644 index 00000000..1a174953 --- /dev/null +++ b/.golangci.yml @@ -0,0 +1,61 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +run: + timeout: 5m + allow-parallel-runners: true + +issues: + # don't skip warning about doc comments + # don't exclude the default set of lint + exclude-use-default: false + # restore some of the defaults + # (fill in the rest as needed) + exclude-rules: + - path: "api/*" + linters: + - lll + - path: "internal/*" + linters: + - dupl + - lll +linters: + disable-all: true + enable: + - dupl + - errcheck + - exportloopref + - ginkgolinter + - goconst + - gocyclo + - gofmt + - goimports + - gosimple + - govet + - ineffassign + - lll + - misspell + - nakedret + - prealloc + - revive + - staticcheck + - typecheck + - unconvert + - unparam + - unused + +linters-settings: + revive: + rules: + - name: comment-spacings diff --git a/pkg/utils/extra_prototype.go b/pkg/utils/extra_prototype.go new file mode 100644 index 00000000..77c51726 --- /dev/null +++ b/pkg/utils/extra_prototype.go @@ -0,0 +1,80 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package utils + +// RM AND PROXY SPEC- + +// { +// Name: "pathways-rm", +// Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/server:latest", +// ImagePullPolicy: "Always", +// SecurityContext: &corev1.SecurityContext{Privileged: &truth}, +// Args: []string{ +// "--alsologtostderr", +// "--pathways_server_port=38677", +// "--pathways_server_provides_devices=false", +// "--pathways_device_type=NONE", +// "--pathways_persistent_compilation_cache=false", +// "--pathways_compilation_mode=compile_at_worker", +// fmt.Sprintf("--pathways_tmp_dir_pattern=%s", pw.Spec.PathwaysDir), +// "--pathways_expected_instances=tpuv4:2x2x2", +// }, +// Env: []corev1.EnvVar{ +// {Name: "REPLICATED_JOB_NAME", ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{FieldPath: "metadata.annotations['jobset.sigs.k8s.io/replicatedjob-name']"}}}, +// {Name: "JOBSET_NAME", ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{FieldPath: "metadata.annotations['jobset.sigs.k8s.io/jobset-name']"}}}, +// {Name: "HOST_ADDRESS", Value: fmt.Sprintf("%s-%s-0-0.%s", pwWorkloadName, "leader", pwWorkloadName)}, +// {Name: "TPU_SKIP_MDS_QUERY", Value: "true"}, +// }, +// Ports: []corev1.ContainerPort{{ContainerPort: 38677}, {ContainerPort: 38678}}, +// }, // end pathways-rm + +// { +// Name: "pathways-proxy", +// Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/proxy_server:latest", +// ImagePullPolicy: "Always", +// SecurityContext: &corev1.SecurityContext{Privileged: &truth}, +// Args: []string{ +// "--alsologtostderr", +// "--v=0", +// fmt.Sprintf("--pathways_ifrt_proxy_server_resource_manager=%s-%s-0-0.%s:38677", pwWorkloadName, "leader", pwWorkloadName), +// "--pathways_ifrt_proxy_server_port=38681", +// fmt.Sprintf("--pathways_tmp_dir_pattern=%s", pw.Spec.PathwaysDir), +// "--pathways_plaque_network=gcp", +// }, +// Ports: []corev1.ContainerPort{{ContainerPort: 38681}, {ContainerPort: 38682}}, +// }, // end pathways-proxy + +// NodeSelector: map[string]string{ +// "cloud.google.com/gke-tpu-accelerator": "tpu-v4-podslice", +// "cloud.google.com/gke-tpu-topology": "2x2x2"}, +// NodeSelector: map[string]string{"cloud.google.com/gke-tpu-accelerator": "tpu-v5-lite-podslice", "cloud.google.com/gke-tpu-topology": "4x4"}, + +// List JobSets using client + +// jsList, err := jobSetClient.JobsetV1alpha2().JobSets("default").List(ctx, metav1.ListOptions{}) + +// if err != nil { +// log.Info("Roshani, can list JobSets: ") +// for _, js := range jsList.Items { +// if js.ObjectMeta.Name == pw.Spec.WorkloadName { +// log.Info("Roshani, found JobSet: ", "JobSet name", pw.Spec.WorkloadName) +// return ctrl.Result{}, nil +// // Nothing to reconcile here. +// } +// } +// } else { +// log.Info("Roshani, error listing JobSets: ", "error ", err) +// return ctrl.Result{}, err +// } From cc74c7105bb1b9be4085c6faae4e77b3d49af001 Mon Sep 17 00:00:00 2001 From: RoshaniN Date: Mon, 7 Oct 2024 23:30:53 +0000 Subject: [PATCH 13/32] Set Pathways as the owner of JobSet. --- api/v1/pathwaysapi_types.go | 5 +- ...ways-api.pathways.domain_pathwaysapis.yaml | 4 - .../samples/pathways-api_v1_pathwaysapi.yaml | 3 +- internal/controller/pathwaysapi_controller.go | 77 ++++--------------- pkg/utils/extra_prototype.go | 44 +++++++++++ 5 files changed, 62 insertions(+), 71 deletions(-) diff --git a/api/v1/pathwaysapi_types.go b/api/v1/pathwaysapi_types.go index 7b06af3c..a6be0fad 100644 --- a/api/v1/pathwaysapi_types.go +++ b/api/v1/pathwaysapi_types.go @@ -27,7 +27,7 @@ import ( // +kubebuilder:object:root=true // +kubebuilder:subresource:status -//+kubebuilder:resource:scope=Namespaced +// +kubebuilder:resource:scope=Namespaced // PathwaysAPI is the Schema for the pathwaysapis API type PathwaysAPI struct { @@ -60,9 +60,6 @@ type PathwaysAPISpec struct { // INSERT ADDITIONAL SPEC FIELDS - desired state of cluster // Important: Run "make" to regenerate code after modifying this file - // WorkloadName is the identifier for the Pathways workload deployment. - WorkloadName string `json:"workloadName,omitempty"` - // ColocationPolicy defines whether the user job and the Pathways resources (RM, proxy) // must be colocated on TPUs with the Pathways workers or not. // Users may opt for best-effort placement where scheduler places the RM and proxy diff --git a/config/crd/bases/pathways-api.pathways.domain_pathwaysapis.yaml b/config/crd/bases/pathways-api.pathways.domain_pathwaysapis.yaml index aafec19a..79f3fded 100644 --- a/config/crd/bases/pathways-api.pathways.domain_pathwaysapis.yaml +++ b/config/crd/bases/pathways-api.pathways.domain_pathwaysapis.yaml @@ -8014,10 +8014,6 @@ spec: - containers type: object type: object - workloadName: - description: WorkloadName is the identifier for the Pathways workload - deployment. - type: string type: object status: description: PathwaysAPIStatus defines the observed state of PathwaysAPI diff --git a/config/samples/pathways-api_v1_pathwaysapi.yaml b/config/samples/pathways-api_v1_pathwaysapi.yaml index b9752974..51e32138 100644 --- a/config/samples/pathways-api_v1_pathwaysapi.yaml +++ b/config/samples/pathways-api_v1_pathwaysapi.yaml @@ -15,9 +15,8 @@ apiVersion: pathways-api.pathways.domain/v1 kind: PathwaysAPI metadata: - name: pathwaysapi-sample + name: pathways-1 spec: - workloadName: "roshani-in-27" pathwaysWorkerNodeSelector: cloud.google.com/gke-tpu-accelerator: tpu-v4-podslice cloud.google.com/gke-tpu-topology: 2x2x2 diff --git a/internal/controller/pathwaysapi_controller.go b/internal/controller/pathwaysapi_controller.go index 725f0eb8..efe931ce 100644 --- a/internal/controller/pathwaysapi_controller.go +++ b/internal/controller/pathwaysapi_controller.go @@ -64,10 +64,9 @@ func (r *PathwaysAPIReconciler) Reconcile(ctx context.Context, req ctrl.Request) log := ctrl.LoggerFrom(ctx).WithValues("pathwaysapi", klog.KObj(pw)) ctx = ctrl.LoggerInto(ctx, log) - log.Info("ROSHANI CONTROLLER WORKING...", "WorkloadName ", pw.Spec.WorkloadName, " NumSlices ", pw.Spec.NumSlices, "WorkerNodeSelector", pw.Spec.PathwaysWorkerNodeSelector) + log.Info("ROSHANI CONTROLLER WORKING...", "req.NamespacedName", req.NamespacedName.String(), "req.Namespace", req.Namespace) // 1. Fetch the Pathways object - // if err := r.Get(ctx, types.NamespacedName{Name: req.Name, Namespace: req.Namespace}, pw); err != nil { if err := r.Get(ctx, req.NamespacedName, pw); err != nil { log.Info("Unable to fetch Pathways ") return ctrl.Result{}, client.IgnoreNotFound(err) @@ -84,47 +83,6 @@ func (r *PathwaysAPIReconciler) Reconcile(ctx context.Context, req ctrl.Request) // 2.1 Figure out if PathwaysJob is already present and in "Suspended / Completed / Failed states", // if it is the case, there is nothing to do. - // JobSet list - var jsList *jobsetv1alpha2.JobSetList - - jsList, err := jobSetClient.JobsetV1alpha2().JobSets("default").List(ctx, metav1.ListOptions{}) - if err != nil { - log.Info("Roshani, can't list JobSets: ", "error ", err) - return ctrl.Result{}, err - } else { - log.Info("Roshani, can list JobSets") - for _, job := range jsList.Items { - for _, condition := range job.Status.Conditions { - log.Info("Roshani Jobset condtion", job.ObjectMeta.Name, condition.Type) - } - if job.ObjectMeta.Name == pw.Spec.WorkloadName && - (job.Status.Conditions[0].Type == string(jobsetv1alpha2.JobSetStartupPolicyCompleted) || - job.Status.Conditions[0].Type == string(jobsetv1alpha2.JobSetStartupPolicyInProgress)) { - log.Info("Roshani, found JobSet ", "JobSet name", pw.Spec.WorkloadName) - log.Info("Roshani, nothing to reconcile here") - return ctrl.Result{}, nil - // Nothing to reconcile here. - } - } - } - - // Currently leading to race conditions ---. - // var pwList pathwaysapi.PathwaysAPIList - // if err := r.List(ctx, &pwList, &client.ListOptions{}); err != nil { - // log.Error(err, "Roshani, failed to list Pathways") - // return ctrl.Result{}, err - // } else { - // log.Info("Roshani, successfully listed Pathways") - // for _, job := range pwList.Items { - // log.Info("ROSHANI", "Job name ", job.Spec.WorkloadName, "Pathways workload name ", pw.Spec.WorkloadName) - // if job.Spec.WorkloadName == pw.Spec.WorkloadName { - // log.Info("Roshani, found Pathways, not creating workload: ", "JobSet name", pw.Spec.WorkloadName) - // return ctrl.Result{}, nil - // // Nothing to reconcile here. - // } - // } - // } - // 3. Update the cluster - create update and delete other resources if err := r.createJobSet(ctx, pw, jobSetClient); err != nil { log.Error(err, "Roshani, failed to create JobSet \n") @@ -167,11 +125,10 @@ func (r *PathwaysAPIReconciler) Reconcile(ctx context.Context, req ctrl.Request) // function setPathwaysJobResumedCondition func (r *PathwaysAPIReconciler) createJobSet(ctx context.Context, pw *pathwaysapi.PathwaysAPI, jobSetClient *jobsetclient.Clientset) error { - // log := ctrl.LoggerFrom(ctx) log2 := ctrl.LoggerFrom(ctx).WithValues("pathwaysapi", klog.KObj(pw)) ctx = ctrl.LoggerInto(ctx, log2) - log2.Info("ROSHANI in createJobSet") + log2.Info("ROSHANI in createJobSet", "Name ", pw.GetName(), "Namespace ", pw.GetNamespace()) // Some predefined variables truth := true @@ -185,7 +142,8 @@ func (r *PathwaysAPIReconciler) createJobSet(ctx context.Context, pw *pathwaysap mainJobSetConfig := jobsetv1alpha2.JobSet{ ObjectMeta: metav1.ObjectMeta{ - Name: pw.Spec.WorkloadName, + Name: pw.GetName(), + Namespace: pw.GetNamespace(), }, Spec: jobsetv1alpha2.JobSetSpec{ FailurePolicy: &jobsetv1alpha2.FailurePolicy{ @@ -233,7 +191,7 @@ func (r *PathwaysAPIReconciler) createJobSet(ctx context.Context, pw *pathwaysap Env: []corev1.EnvVar{ {Name: "XCLOUD_ENVIRONMENT", Value: "GCP"}, {Name: "JAX_PLATFORMS", Value: "proxy"}, - {Name: "JAX_BACKEND_TARGET", Value: fmt.Sprintf("grpc://%s-%s-0-0.%s:38681", pw.Spec.WorkloadName, "leader", pw.Spec.WorkloadName)}, + {Name: "JAX_BACKEND_TARGET", Value: fmt.Sprintf("grpc://%s-%s-0-0.%s:38681", pw.GetName(), "leader", pw.GetName())}, {Name: "JOBSET_NAME", ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{FieldPath: "metadata.annotations['jobset.sigs.k8s.io/jobset-name']"}}}, }, Ports: []corev1.ContainerPort{{ContainerPort: 9000}}, @@ -272,7 +230,7 @@ func (r *PathwaysAPIReconciler) createJobSet(ctx context.Context, pw *pathwaysap Args: []string{ "--alsologtostderr", "--pathways_server_port=38679", - fmt.Sprintf("--pathways_resource_manager=%s-%s-0-0.%s:38677", pw.Spec.WorkloadName, "leader", pw.Spec.WorkloadName), + fmt.Sprintf("--pathways_resource_manager=%s-%s-0-0.%s:38677", pw.GetName(), "leader", pw.GetName()), "--pathways_persistent_compilation_cache=false", "--pathways_compilation_mode=compile_at_worker", "--xla_tpu_enable_data_parallel_all_reduce_opt=true", @@ -322,16 +280,13 @@ func (r *PathwaysAPIReconciler) createJobSet(ctx context.Context, pw *pathwaysap // var lock sync.Mutex // Set Pathways controller as the owner of the JobSet for garbage collection. - // if err := ctrl.SetControllerReference(pw, &mainJobSetConfig, r.Scheme); err != nil { - // // lock.Lock() - // // defer lock.Unlock() - // log2.Info("Roshani, failed to set Pathways as owner of JobSet.", "error ", err) // - clearly not working - // // return err - // } else { - // log2.Info("Roshani, successfully set Pathways as owner of JobSet.") - // } + if err := ctrl.SetControllerReference(pw, &mainJobSetConfig, r.Scheme); err != nil { + log2.Info("Roshani, failed to set Pathways as owner of JobSet.", "error ", err) + } else { + log2.Info("Roshani, successfully set Pathways as owner of JobSet.") + } - js, err := jobSetClient.JobsetV1alpha2().JobSets("default").Create(ctx, &mainJobSetConfig, metav1.CreateOptions{}) + js, err := jobSetClient.JobsetV1alpha2().JobSets(pw.GetObjectMeta().GetNamespace()).Create(ctx, &mainJobSetConfig, metav1.CreateOptions{}) if err != nil { log2.Info("Roshani, failed to create JobSet: ", "JobSet name", js.Name) @@ -372,7 +327,7 @@ func MakeResourceManagerContainer(pw *pathwaysapi.PathwaysAPI) (*corev1.Containe Env: []corev1.EnvVar{ {Name: "REPLICATED_JOB_NAME", ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{FieldPath: "metadata.annotations['jobset.sigs.k8s.io/replicatedjob-name']"}}}, {Name: "JOBSET_NAME", ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{FieldPath: "metadata.annotations['jobset.sigs.k8s.io/jobset-name']"}}}, - {Name: "HOST_ADDRESS", Value: fmt.Sprintf("%s-%s-0-0.%s", pw.Spec.WorkloadName, "leader", pw.Spec.WorkloadName)}, + {Name: "HOST_ADDRESS", Value: fmt.Sprintf("%s-%s-0-0.%s", pw.GetName(), "leader", pw.GetName())}, {Name: "TPU_SKIP_MDS_QUERY", Value: "true"}, }, Ports: []corev1.ContainerPort{{ContainerPort: 38677}, {ContainerPort: 38678}}, @@ -391,7 +346,7 @@ func MakeProxyContainer(pw *pathwaysapi.PathwaysAPI) (*corev1.Container, error) Args: []string{ "--alsologtostderr", "--v=0", - fmt.Sprintf("--pathways_ifrt_proxy_server_resource_manager=%s-%s-0-0.%s:38677", pw.Spec.WorkloadName, "leader", pw.Spec.WorkloadName), + fmt.Sprintf("--pathways_ifrt_proxy_server_resource_manager=%s-%s-0-0.%s:38677", pw.GetName(), "leader", pw.GetName()), "--pathways_ifrt_proxy_server_port=38681", fmt.Sprintf("--pathways_tmp_dir_pattern=%s", pw.Spec.PathwaysDir), "--pathways_plaque_network=gcp", @@ -412,7 +367,7 @@ func MakePodAffinityRules(pw *pathwaysapi.PathwaysAPI) (*corev1.Affinity, error) { Key: "jobset.sigs.k8s.io/jobset-name", Operator: metav1.LabelSelectorOpIn, - Values: []string{pw.Spec.WorkloadName}, + Values: []string{pw.GetName()}, }, }, }, @@ -428,7 +383,7 @@ func MakePodAffinityRules(pw *pathwaysapi.PathwaysAPI) (*corev1.Affinity, error) { Key: "jobset.sigs.k8s.io/jobset-name", Operator: metav1.LabelSelectorOpNotIn, - Values: []string{pw.Spec.WorkloadName}, + Values: []string{pw.GetName()}, }, { Key: "job-name", diff --git a/pkg/utils/extra_prototype.go b/pkg/utils/extra_prototype.go index 77c51726..6cc486d8 100644 --- a/pkg/utils/extra_prototype.go +++ b/pkg/utils/extra_prototype.go @@ -61,6 +61,8 @@ package utils // "cloud.google.com/gke-tpu-topology": "2x2x2"}, // NodeSelector: map[string]string{"cloud.google.com/gke-tpu-accelerator": "tpu-v5-lite-podslice", "cloud.google.com/gke-tpu-topology": "4x4"}, +//----------------LIST---------------- + // List JobSets using client // jsList, err := jobSetClient.JobsetV1alpha2().JobSets("default").List(ctx, metav1.ListOptions{}) @@ -78,3 +80,45 @@ package utils // log.Info("Roshani, error listing JobSets: ", "error ", err) // return ctrl.Result{}, err // } + +// +// // JobSet list +// var jsList *jobsetv1alpha2.JobSetList + +// jsList, err := jobSetClient.JobsetV1alpha2().JobSets("default").List(ctx, metav1.ListOptions{}) +// if err != nil { +// log.Info("Roshani, can't list JobSets: ", "error ", err) +// return ctrl.Result{}, err +// } else { +// log.Info("Roshani, can list JobSets") +// for _, job := range jsList.Items { +// for _, condition := range job.Status.Conditions { +// log.Info("Roshani Jobset condtion", job.ObjectMeta.Name, condition.Type) +// } +// if job.ObjectMeta.Name == pw.GetName() && +// (job.Status.Conditions[0].Type == string(jobsetv1alpha2.JobSetStartupPolicyCompleted) || +// job.Status.Conditions[0].Type == string(jobsetv1alpha2.JobSetStartupPolicyInProgress)) { +// log.Info("Roshani, found JobSet ", "JobSet name", pw.GetName()) +// log.Info("Roshani, nothing to reconcile here") +// return ctrl.Result{}, nil +// // Nothing to reconcile here. +// } +// } +// } + +// Currently leading to race conditions ---. +// var pwList pathwaysapi.PathwaysAPIList +// if err := r.List(ctx, &pwList, &client.ListOptions{}); err != nil { +// log.Error(err, "Roshani, failed to list Pathways") +// return ctrl.Result{}, err +// } else { +// log.Info("Roshani, successfully listed Pathways") +// for _, job := range pwList.Items { +// log.Info("ROSHANI", "Job name ", job.Spec.WorkloadName, "Pathways workload name ", pw.GetName()) +// if job.Spec.WorkloadName == pw.GetName() { +// log.Info("Roshani, found Pathways, not creating workload: ", "JobSet name", pw.GetName()) +// return ctrl.Result{}, nil +// // Nothing to reconcile here. +// } +// } +// } From 084dace5da35b476033db977cb8d60b86917bcb7 Mon Sep 17 00:00:00 2001 From: RoshaniN Date: Mon, 7 Oct 2024 23:43:00 +0000 Subject: [PATCH 14/32] Renamed PathwaysAPI to PathwaysJob --- Makefile | 6 +-- PROJECT | 10 ++--- api/v1/groupversion_info.go | 6 +-- api/v1/pathwaysapi_types.go | 22 +++++------ api/v1/zz_generated.deepcopy.go | 38 +++++++++---------- cmd/main.go | 8 ++-- ...ways-api.pathways.domain_pathwaysapis.yaml | 28 ++++---------- config/crd/kustomization.yaml | 2 +- config/default/kustomization.yaml | 4 +- config/default/metrics_service.yaml | 2 +- config/manager/kustomization.yaml | 2 +- config/manager/manager.yaml | 4 +- config/prometheus/monitor.yaml | 2 +- config/rbac/leader_election_role.yaml | 2 +- config/rbac/leader_election_role_binding.yaml | 2 +- config/rbac/pathwaysapi_editor_role.yaml | 14 +++---- config/rbac/pathwaysapi_viewer_role.yaml | 14 +++---- config/rbac/role.yaml | 12 +++--- config/rbac/role_binding.yaml | 2 +- config/rbac/service_account.yaml | 2 +- .../samples/pathways-api_v1_pathwaysapi.yaml | 4 +- go.mod | 2 +- internal/controller/pathwaysapi_controller.go | 30 +++++++-------- .../controller/pathwaysapi_controller_test.go | 16 ++++---- internal/controller/suite_test.go | 4 +- pkg/utils/extra_prototype.go | 2 +- test/e2e/e2e_suite_test.go | 2 +- test/e2e/e2e_test.go | 6 +-- 28 files changed, 116 insertions(+), 132 deletions(-) diff --git a/Makefile b/Makefile index be17ef09..a1e45739 100644 --- a/Makefile +++ b/Makefile @@ -108,10 +108,10 @@ PLATFORMS ?= linux/arm64,linux/amd64,linux/s390x,linux/ppc64le docker-buildx: ## Build and push docker image for the manager for cross-platform support # copy existing Dockerfile and insert --platform=${BUILDPLATFORM} into Dockerfile.cross, and preserve the original Dockerfile sed -e '1 s/\(^FROM\)/FROM --platform=\$$\{BUILDPLATFORM\}/; t' -e ' 1,// s//FROM --platform=\$$\{BUILDPLATFORM\}/' Dockerfile > Dockerfile.cross - - $(CONTAINER_TOOL) buildx create --name pathways-api-builder - $(CONTAINER_TOOL) buildx use pathways-api-builder + - $(CONTAINER_TOOL) buildx create --name pathways-job-builder + $(CONTAINER_TOOL) buildx use pathways-job-builder - $(CONTAINER_TOOL) buildx build --push --platform=$(PLATFORMS) --tag ${IMG} -f Dockerfile.cross . - - $(CONTAINER_TOOL) buildx rm pathways-api-builder + - $(CONTAINER_TOOL) buildx rm pathways-job-builder rm Dockerfile.cross .PHONY: build-installer diff --git a/PROJECT b/PROJECT index ba0223b2..f70a3dcc 100644 --- a/PROJECT +++ b/PROJECT @@ -5,16 +5,16 @@ domain: pathways.domain layout: - go.kubebuilder.io/v4 -projectName: pathways-api -repo: pathways-api +projectName: pathways-job +repo: pathways-job resources: - api: crdVersion: v1 namespaced: true controller: true domain: pathways.domain - group: pathways-api - kind: PathwaysAPI - path: pathways-api/api/v1 + group: pathways-job + kind: PathwaysJob + path: pathways-job/api/v1 version: v1 version: "3" diff --git a/api/v1/groupversion_info.go b/api/v1/groupversion_info.go index ac0fdacf..f64254e1 100644 --- a/api/v1/groupversion_info.go +++ b/api/v1/groupversion_info.go @@ -14,9 +14,9 @@ See the License for the specific language governing permissions and limitations under the License. */ -// Package v1 contains API Schema definitions for the pathways-api v1 API group +// Package v1 contains API Schema definitions for the pathways-job v1 API group // +kubebuilder:object:generate=true -// +groupName=pathways-api.pathways.domain +// +groupName=pathways-job.pathways.domain package v1 import ( @@ -26,7 +26,7 @@ import ( var ( // GroupVersion is group version used to register these objects - GroupVersion = schema.GroupVersion{Group: "pathways-api.pathways.domain", Version: "v1"} + GroupVersion = schema.GroupVersion{Group: "pathways-job.pathways.domain", Version: "v1"} // SchemeBuilder is used to add go types to the GroupVersionKind scheme SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion} diff --git a/api/v1/pathwaysapi_types.go b/api/v1/pathwaysapi_types.go index a6be0fad..15f72bd1 100644 --- a/api/v1/pathwaysapi_types.go +++ b/api/v1/pathwaysapi_types.go @@ -29,22 +29,22 @@ import ( // +kubebuilder:subresource:status // +kubebuilder:resource:scope=Namespaced -// PathwaysAPI is the Schema for the pathwaysapis API -type PathwaysAPI struct { +// PathwaysJob is the Schema for the PathwaysJobs API +type PathwaysJob struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata,omitempty"` - Spec PathwaysAPISpec `json:"spec,omitempty"` - Status PathwaysAPIStatus `json:"status,omitempty"` + Spec PathwaysJobSpec `json:"spec,omitempty"` + Status PathwaysJobStatus `json:"status,omitempty"` } // +kubebuilder:object:root=true -// PathwaysAPIList contains a list of PathwaysAPI -type PathwaysAPIList struct { +// PathwaysJobList contains a list of PathwaysJob +type PathwaysJobList struct { metav1.TypeMeta `json:",inline"` metav1.ListMeta `json:"metadata,omitempty"` - Items []PathwaysAPI `json:"items"` + Items []PathwaysJob `json:"items"` } // PathwaysJob creates a Pathways workload. It sets up the TPU @@ -56,7 +56,7 @@ type PathwaysAPIList struct { // to be running in headless mode and the user can connect to Proxy, // to run their workloads. -type PathwaysAPISpec struct { +type PathwaysJobSpec struct { // INSERT ADDITIONAL SPEC FIELDS - desired state of cluster // Important: Run "make" to regenerate code after modifying this file @@ -96,8 +96,8 @@ type PathwaysAPISpec struct { UserPodTemplate *corev1.PodTemplateSpec `json:"template" protobuf:"bytes,6,opt,name=template"` } -// PathwaysAPIStatus defines the observed state of PathwaysAPI -type PathwaysAPIStatus struct { +// PathwaysJobStatus defines the observed state of PathwaysJob +type PathwaysJobStatus struct { // INSERT ADDITIONAL STATUS FIELD - define observed state of cluster // Important: Run "make" to regenerate code after modifying this file @@ -117,5 +117,5 @@ const ( ) func init() { - SchemeBuilder.Register(&PathwaysAPI{}, &PathwaysAPIList{}) + SchemeBuilder.Register(&PathwaysJob{}, &PathwaysJobList{}) } diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go index 5f426282..c212d84e 100644 --- a/api/v1/zz_generated.deepcopy.go +++ b/api/v1/zz_generated.deepcopy.go @@ -27,7 +27,7 @@ import ( ) // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *PathwaysAPI) DeepCopyInto(out *PathwaysAPI) { +func (in *PathwaysJob) DeepCopyInto(out *PathwaysJob) { *out = *in out.TypeMeta = in.TypeMeta in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) @@ -35,18 +35,18 @@ func (in *PathwaysAPI) DeepCopyInto(out *PathwaysAPI) { in.Status.DeepCopyInto(&out.Status) } -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PathwaysAPI. -func (in *PathwaysAPI) DeepCopy() *PathwaysAPI { +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PathwaysJob. +func (in *PathwaysJob) DeepCopy() *PathwaysJob { if in == nil { return nil } - out := new(PathwaysAPI) + out := new(PathwaysJob) in.DeepCopyInto(out) return out } // DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. -func (in *PathwaysAPI) DeepCopyObject() runtime.Object { +func (in *PathwaysJob) DeepCopyObject() runtime.Object { if c := in.DeepCopy(); c != nil { return c } @@ -54,31 +54,31 @@ func (in *PathwaysAPI) DeepCopyObject() runtime.Object { } // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *PathwaysAPIList) DeepCopyInto(out *PathwaysAPIList) { +func (in *PathwaysJobList) DeepCopyInto(out *PathwaysJobList) { *out = *in out.TypeMeta = in.TypeMeta in.ListMeta.DeepCopyInto(&out.ListMeta) if in.Items != nil { in, out := &in.Items, &out.Items - *out = make([]PathwaysAPI, len(*in)) + *out = make([]PathwaysJob, len(*in)) for i := range *in { (*in)[i].DeepCopyInto(&(*out)[i]) } } } -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PathwaysAPIList. -func (in *PathwaysAPIList) DeepCopy() *PathwaysAPIList { +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PathwaysJobList. +func (in *PathwaysJobList) DeepCopy() *PathwaysJobList { if in == nil { return nil } - out := new(PathwaysAPIList) + out := new(PathwaysJobList) in.DeepCopyInto(out) return out } // DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. -func (in *PathwaysAPIList) DeepCopyObject() runtime.Object { +func (in *PathwaysJobList) DeepCopyObject() runtime.Object { if c := in.DeepCopy(); c != nil { return c } @@ -86,7 +86,7 @@ func (in *PathwaysAPIList) DeepCopyObject() runtime.Object { } // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *PathwaysAPISpec) DeepCopyInto(out *PathwaysAPISpec) { +func (in *PathwaysJobSpec) DeepCopyInto(out *PathwaysJobSpec) { *out = *in if in.PathwaysWorkerNodeSelector != nil { in, out := &in.PathwaysWorkerNodeSelector, &out.PathwaysWorkerNodeSelector @@ -109,18 +109,18 @@ func (in *PathwaysAPISpec) DeepCopyInto(out *PathwaysAPISpec) { } } -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PathwaysAPISpec. -func (in *PathwaysAPISpec) DeepCopy() *PathwaysAPISpec { +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PathwaysJobSpec. +func (in *PathwaysJobSpec) DeepCopy() *PathwaysJobSpec { if in == nil { return nil } - out := new(PathwaysAPISpec) + out := new(PathwaysJobSpec) in.DeepCopyInto(out) return out } // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *PathwaysAPIStatus) DeepCopyInto(out *PathwaysAPIStatus) { +func (in *PathwaysJobStatus) DeepCopyInto(out *PathwaysJobStatus) { *out = *in if in.Conditions != nil { in, out := &in.Conditions, &out.Conditions @@ -131,12 +131,12 @@ func (in *PathwaysAPIStatus) DeepCopyInto(out *PathwaysAPIStatus) { } } -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PathwaysAPIStatus. -func (in *PathwaysAPIStatus) DeepCopy() *PathwaysAPIStatus { +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PathwaysJobStatus. +func (in *PathwaysJobStatus) DeepCopy() *PathwaysJobStatus { if in == nil { return nil } - out := new(PathwaysAPIStatus) + out := new(PathwaysJobStatus) in.DeepCopyInto(out) return out } diff --git a/cmd/main.go b/cmd/main.go index ddd338e0..96880fde 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -35,8 +35,8 @@ import ( metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" "sigs.k8s.io/controller-runtime/pkg/webhook" - pathwaysapiv1 "pathways-api/api/v1" - "pathways-api/internal/controller" + pathwaysjobv1 "pathways-job/api/v1" + "pathways-job/internal/controller" // +kubebuilder:scaffold:imports ) @@ -48,7 +48,7 @@ var ( func init() { utilruntime.Must(clientgoscheme.AddToScheme(scheme)) - utilruntime.Must(pathwaysapiv1.AddToScheme(scheme)) + utilruntime.Must(pathwaysjobv1.AddToScheme(scheme)) // +kubebuilder:scaffold:scheme } @@ -148,7 +148,7 @@ func main() { Client: mgr.GetClient(), Scheme: mgr.GetScheme(), }).SetupWithManager(mgr); err != nil { - setupLog.Error(err, "unable to create controller", "controller", "PathwaysAPI") + setupLog.Error(err, "unable to create controller", "controller", "PathwaysJob") os.Exit(1) } // +kubebuilder:scaffold:builder diff --git a/config/crd/bases/pathways-api.pathways.domain_pathwaysapis.yaml b/config/crd/bases/pathways-api.pathways.domain_pathwaysapis.yaml index 79f3fded..71f8b303 100644 --- a/config/crd/bases/pathways-api.pathways.domain_pathwaysapis.yaml +++ b/config/crd/bases/pathways-api.pathways.domain_pathwaysapis.yaml @@ -1,37 +1,23 @@ -# Copyright 2024 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - --- apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: controller-gen.kubebuilder.io/version: v0.15.0 - name: pathwaysapis.pathways-api.pathways.domain + name: pathwaysjobs.pathways-job.pathways.domain spec: - group: pathways-api.pathways.domain + group: pathways-job.pathways.domain names: - kind: PathwaysAPI + kind: PathwaysJob listKind: PathwaysAPIList - plural: pathwaysapis - singular: pathwaysapi + plural: pathwaysjobs + singular: pathwaysjob scope: Namespaced versions: - name: v1 schema: openAPIV3Schema: - description: PathwaysAPI is the Schema for the pathwaysapis API + description: PathwaysJob is the Schema for the pathwaysjobs API properties: apiVersion: description: |- @@ -8016,7 +8002,7 @@ spec: type: object type: object status: - description: PathwaysAPIStatus defines the observed state of PathwaysAPI + description: PathwaysAPIStatus defines the observed state of PathwaysJob properties: conditions: description: |- diff --git a/config/crd/kustomization.yaml b/config/crd/kustomization.yaml index 6332199b..9a48289e 100644 --- a/config/crd/kustomization.yaml +++ b/config/crd/kustomization.yaml @@ -16,7 +16,7 @@ # since it depends on service name and namespace that are out of this kustomize package. # It should be run by config/default resources: -- bases/pathways-api.pathways.domain_pathwaysapis.yaml +- bases/pathways-job.pathways.domain_pathwaysapis.yaml # +kubebuilder:scaffold:crdkustomizeresource # patches: diff --git a/config/default/kustomization.yaml b/config/default/kustomization.yaml index 19480ca0..53182412 100644 --- a/config/default/kustomization.yaml +++ b/config/default/kustomization.yaml @@ -13,14 +13,14 @@ # limitations under the License. # Adds namespace to all resources. -namespace: pathways-api-system +namespace: pathways-job-system # Value of this field is prepended to the # names of all resources, e.g. a deployment named # "wordpress" becomes "alices-wordpress". # Note that it should also match with the prefix (text before '-') of the namespace # field above. -namePrefix: pathways-api- +namePrefix: pathways-job- # Labels to add to all resources and selectors. #labels: diff --git a/config/default/metrics_service.yaml b/config/default/metrics_service.yaml index f4ec4021..2249f2ac 100644 --- a/config/default/metrics_service.yaml +++ b/config/default/metrics_service.yaml @@ -17,7 +17,7 @@ kind: Service metadata: labels: control-plane: controller-manager - app.kubernetes.io/name: pathways-api + app.kubernetes.io/name: pathways-job app.kubernetes.io/managed-by: kustomize name: controller-manager-metrics-service namespace: system diff --git a/config/manager/kustomization.yaml b/config/manager/kustomization.yaml index 771d8487..118bfe52 100644 --- a/config/manager/kustomization.yaml +++ b/config/manager/kustomization.yaml @@ -18,5 +18,5 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization images: - name: controller - newName: us-docker.pkg.dev/cloud-tpu-multipod-dev/pathways/pathwaysapi + newName: us-docker.pkg.dev/cloud-tpu-multipod-dev/pathways/pathwaysjob newTag: latest diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml index 0d5b799e..d988312c 100644 --- a/config/manager/manager.yaml +++ b/config/manager/manager.yaml @@ -17,7 +17,7 @@ kind: Namespace metadata: labels: control-plane: controller-manager - app.kubernetes.io/name: pathways-api + app.kubernetes.io/name: pathways-job app.kubernetes.io/managed-by: kustomize name: system --- @@ -28,7 +28,7 @@ metadata: namespace: system labels: control-plane: controller-manager - app.kubernetes.io/name: pathways-api + app.kubernetes.io/name: pathways-job app.kubernetes.io/managed-by: kustomize spec: selector: diff --git a/config/prometheus/monitor.yaml b/config/prometheus/monitor.yaml index 9259d148..662d0f5d 100644 --- a/config/prometheus/monitor.yaml +++ b/config/prometheus/monitor.yaml @@ -18,7 +18,7 @@ kind: ServiceMonitor metadata: labels: control-plane: controller-manager - app.kubernetes.io/name: pathways-api + app.kubernetes.io/name: pathways-job app.kubernetes.io/managed-by: kustomize name: controller-manager-metrics-monitor namespace: system diff --git a/config/rbac/leader_election_role.yaml b/config/rbac/leader_election_role.yaml index e08881bc..1a285657 100644 --- a/config/rbac/leader_election_role.yaml +++ b/config/rbac/leader_election_role.yaml @@ -17,7 +17,7 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: Role metadata: labels: - app.kubernetes.io/name: pathways-api + app.kubernetes.io/name: pathways-job app.kubernetes.io/managed-by: kustomize name: leader-election-role rules: diff --git a/config/rbac/leader_election_role_binding.yaml b/config/rbac/leader_election_role_binding.yaml index 0c9103d3..f208dc1c 100644 --- a/config/rbac/leader_election_role_binding.yaml +++ b/config/rbac/leader_election_role_binding.yaml @@ -16,7 +16,7 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding metadata: labels: - app.kubernetes.io/name: pathways-api + app.kubernetes.io/name: pathways-job app.kubernetes.io/managed-by: kustomize name: leader-election-rolebinding roleRef: diff --git a/config/rbac/pathwaysapi_editor_role.yaml b/config/rbac/pathwaysapi_editor_role.yaml index 40f7aeea..5d375d90 100644 --- a/config/rbac/pathwaysapi_editor_role.yaml +++ b/config/rbac/pathwaysapi_editor_role.yaml @@ -12,19 +12,19 @@ # See the License for the specific language governing permissions and # limitations under the License. -# permissions for end users to edit pathwaysapis. +# permissions for end users to edit pathwaysjobs. apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: labels: - app.kubernetes.io/name: pathways-api + app.kubernetes.io/name: pathways-job app.kubernetes.io/managed-by: kustomize - name: pathwaysapi-editor-role + name: pathwaysjob-editor-role rules: - apiGroups: - - pathways-api.pathways.domain + - pathways-job.pathways.domain resources: - - pathwaysapis + - pathwaysjobs verbs: - create - delete @@ -34,8 +34,8 @@ rules: - update - watch - apiGroups: - - pathways-api.pathways.domain + - pathways-job.pathways.domain resources: - - pathwaysapis/status + - pathwaysjobs/status verbs: - get diff --git a/config/rbac/pathwaysapi_viewer_role.yaml b/config/rbac/pathwaysapi_viewer_role.yaml index efb4fc4f..31172d9a 100644 --- a/config/rbac/pathwaysapi_viewer_role.yaml +++ b/config/rbac/pathwaysapi_viewer_role.yaml @@ -12,26 +12,26 @@ # See the License for the specific language governing permissions and # limitations under the License. -# permissions for end users to view pathwaysapis. +# permissions for end users to view pathwaysjobs. apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: labels: - app.kubernetes.io/name: pathways-api + app.kubernetes.io/name: pathways-job app.kubernetes.io/managed-by: kustomize - name: pathwaysapi-viewer-role + name: pathwaysjob-viewer-role rules: - apiGroups: - - pathways-api.pathways.domain + - pathways-job.pathways.domain resources: - - pathwaysapis + - pathwaysjobs verbs: - get - list - watch - apiGroups: - - pathways-api.pathways.domain + - pathways-job.pathways.domain resources: - - pathwaysapis/status + - pathwaysjobs/status verbs: - get diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 8033ac7b..28333745 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -19,9 +19,9 @@ metadata: name: manager-role rules: - apiGroups: - - pathways-api.pathways.domain + - pathways-job.pathways.domain resources: - - pathwaysapis + - pathwaysjobs verbs: - create - delete @@ -31,15 +31,15 @@ rules: - update - watch - apiGroups: - - pathways-api.pathways.domain + - pathways-job.pathways.domain resources: - - pathwaysapis/finalizers + - pathwaysjobs/finalizers verbs: - update - apiGroups: - - pathways-api.pathways.domain + - pathways-job.pathways.domain resources: - - pathwaysapis/status + - pathwaysjobs/status verbs: - get - patch diff --git a/config/rbac/role_binding.yaml b/config/rbac/role_binding.yaml index 8afb66a7..829048b6 100644 --- a/config/rbac/role_binding.yaml +++ b/config/rbac/role_binding.yaml @@ -16,7 +16,7 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: labels: - app.kubernetes.io/name: pathways-api + app.kubernetes.io/name: pathways-job app.kubernetes.io/managed-by: kustomize name: manager-rolebinding roleRef: diff --git a/config/rbac/service_account.yaml b/config/rbac/service_account.yaml index 85bb665d..edc98059 100644 --- a/config/rbac/service_account.yaml +++ b/config/rbac/service_account.yaml @@ -16,7 +16,7 @@ apiVersion: v1 kind: ServiceAccount metadata: labels: - app.kubernetes.io/name: pathways-api + app.kubernetes.io/name: pathways-job app.kubernetes.io/managed-by: kustomize name: controller-manager namespace: system diff --git a/config/samples/pathways-api_v1_pathwaysapi.yaml b/config/samples/pathways-api_v1_pathwaysapi.yaml index 51e32138..fd14811c 100644 --- a/config/samples/pathways-api_v1_pathwaysapi.yaml +++ b/config/samples/pathways-api_v1_pathwaysapi.yaml @@ -12,8 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -apiVersion: pathways-api.pathways.domain/v1 -kind: PathwaysAPI +apiVersion: pathways-job.pathways.domain/v1 +kind: PathwaysJob metadata: name: pathways-1 spec: diff --git a/go.mod b/go.mod index 0e1bb303..9aa338ab 100644 --- a/go.mod +++ b/go.mod @@ -1,4 +1,4 @@ -module pathways-api +module pathways-job go 1.22.0 diff --git a/internal/controller/pathwaysapi_controller.go b/internal/controller/pathwaysapi_controller.go index efe931ce..4449be9e 100644 --- a/internal/controller/pathwaysapi_controller.go +++ b/internal/controller/pathwaysapi_controller.go @@ -34,10 +34,10 @@ import ( jobsetv1alpha2 "sigs.k8s.io/jobset/api/jobset/v1alpha2" jobsetclient "sigs.k8s.io/jobset/client-go/clientset/versioned" - pathwaysapi "pathways-api/api/v1" + pathwaysjob "pathways-job/api/v1" ) -// PathwaysAPIReconciler reconciles a PathwaysAPI object +// PathwaysAPIReconciler reconciles a PathwaysJob object type PathwaysAPIReconciler struct { client.Client Scheme *runtime.Scheme @@ -46,7 +46,7 @@ type PathwaysAPIReconciler struct { // Reconcile is part of the main kubernetes reconciliation loop which aims to // move the current state of the cluster closer to the desired state. // TODO(user): Modify the Reconcile function to compare the state specified by -// the PathwaysAPI object against the actual cluster state, and then +// the PathwaysJob object against the actual cluster state, and then // perform operations to make the cluster state reflect the state specified by // the user. // @@ -54,14 +54,14 @@ type PathwaysAPIReconciler struct { // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.18.4/pkg/reconcile // +kubebuilder:rbac:groups="",resources=events,verbs=create;watch;update;patch -// +kubebuilder:rbac:groups=pathways-api.pathways.domain,resources=pathwaysapis,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups=pathways-api.pathways.domain,resources=pathwaysapis/status,verbs=get;update;patch -// +kubebuilder:rbac:groups=pathways-api.pathways.domain,resources=pathwaysapis/finalizers,verbs=update +// +kubebuilder:rbac:groups=pathways-job.pathways.domain,resources=pathwaysjobs,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=pathways-job.pathways.domain,resources=pathwaysjobs/status,verbs=get;update;patch +// +kubebuilder:rbac:groups=pathways-job.pathways.domain,resources=pathwaysjobs/finalizers,verbs=update // +kubebuilder:rbac:groups=jobset.x-k8s.io,resources=jobsets,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=jobset.x-k8s.io,resources=jobsets/status,verbs=get;update;patch func (r *PathwaysAPIReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { - pw := &pathwaysapi.PathwaysAPI{} - log := ctrl.LoggerFrom(ctx).WithValues("pathwaysapi", klog.KObj(pw)) + pw := &pathwaysjob.PathwaysJob{} + log := ctrl.LoggerFrom(ctx).WithValues("pathwaysjob", klog.KObj(pw)) ctx = ctrl.LoggerInto(ctx, log) log.Info("ROSHANI CONTROLLER WORKING...", "req.NamespacedName", req.NamespacedName.String(), "req.Namespace", req.Namespace) @@ -87,8 +87,6 @@ func (r *PathwaysAPIReconciler) Reconcile(ctx context.Context, req ctrl.Request) if err := r.createJobSet(ctx, pw, jobSetClient); err != nil { log.Error(err, "Roshani, failed to create JobSet \n") return ctrl.Result{}, err - } else { - log.Info("Roshani, successfully created JobSet \n") } //4. Update the object's status using Conditions @@ -124,8 +122,8 @@ func (r *PathwaysAPIReconciler) Reconcile(ctx context.Context, req ctrl.Request) // function setPathwaysJobResumedCondition -func (r *PathwaysAPIReconciler) createJobSet(ctx context.Context, pw *pathwaysapi.PathwaysAPI, jobSetClient *jobsetclient.Clientset) error { - log2 := ctrl.LoggerFrom(ctx).WithValues("pathwaysapi", klog.KObj(pw)) +func (r *PathwaysAPIReconciler) createJobSet(ctx context.Context, pw *pathwaysjob.PathwaysJob, jobSetClient *jobsetclient.Clientset) error { + log2 := ctrl.LoggerFrom(ctx).WithValues("pathwaysjob", klog.KObj(pw)) ctx = ctrl.LoggerInto(ctx, log2) log2.Info("ROSHANI in createJobSet", "Name ", pw.GetName(), "Namespace ", pw.GetNamespace()) @@ -300,14 +298,14 @@ func (r *PathwaysAPIReconciler) createJobSet(ctx context.Context, pw *pathwaysap // SetupWithManager sets up the controller with the Manager. func (r *PathwaysAPIReconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). - For(&pathwaysapi.PathwaysAPI{}). + For(&pathwaysjob.PathwaysJob{}). // Owns(&jobsetv1alpha2.JobSet{}). // For JobSet Complete(r) } // helpers -func MakeResourceManagerContainer(pw *pathwaysapi.PathwaysAPI) (*corev1.Container, error) { +func MakeResourceManagerContainer(pw *pathwaysjob.PathwaysJob) (*corev1.Container, error) { truth := true rmContainerSpec := corev1.Container{ Name: "pathways-rm", @@ -336,7 +334,7 @@ func MakeResourceManagerContainer(pw *pathwaysapi.PathwaysAPI) (*corev1.Containe return &rmContainerSpec, nil } -func MakeProxyContainer(pw *pathwaysapi.PathwaysAPI) (*corev1.Container, error) { +func MakeProxyContainer(pw *pathwaysjob.PathwaysJob) (*corev1.Container, error) { truth := true proxyContainerSpec := corev1.Container{ Name: "pathways-proxy", @@ -357,7 +355,7 @@ func MakeProxyContainer(pw *pathwaysapi.PathwaysAPI) (*corev1.Container, error) return &proxyContainerSpec, nil } -func MakePodAffinityRules(pw *pathwaysapi.PathwaysAPI) (*corev1.Affinity, error) { +func MakePodAffinityRules(pw *pathwaysjob.PathwaysJob) (*corev1.Affinity, error) { affinity := corev1.Affinity{ PodAffinity: &corev1.PodAffinity{ RequiredDuringSchedulingIgnoredDuringExecution: []corev1.PodAffinityTerm{ diff --git a/internal/controller/pathwaysapi_controller_test.go b/internal/controller/pathwaysapi_controller_test.go index 819dee8a..53ae94f4 100644 --- a/internal/controller/pathwaysapi_controller_test.go +++ b/internal/controller/pathwaysapi_controller_test.go @@ -27,10 +27,10 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - pathwaysapiv1 "pathways-api/api/v1" + pathwaysjobv1 "pathways-job/api/v1" ) -var _ = Describe("PathwaysAPI Controller", func() { +var _ = Describe("PathwaysJob Controller", func() { Context("When reconciling a resource", func() { const resourceName = "test-resource" @@ -40,13 +40,13 @@ var _ = Describe("PathwaysAPI Controller", func() { Name: resourceName, Namespace: "default", // TODO(user):Modify as needed } - pathwaysapi := &pathwaysapiv1.PathwaysAPI{} + pathwaysjob := &pathwaysjobv1.PathwaysJob{} BeforeEach(func() { - By("creating the custom resource for the Kind PathwaysAPI") - err := k8sClient.Get(ctx, typeNamespacedName, pathwaysapi) + By("creating the custom resource for the Kind PathwaysJob") + err := k8sClient.Get(ctx, typeNamespacedName, pathwaysjob) if err != nil && errors.IsNotFound(err) { - resource := &pathwaysapiv1.PathwaysAPI{ + resource := &pathwaysjobv1.PathwaysJob{ ObjectMeta: metav1.ObjectMeta{ Name: resourceName, Namespace: "default", @@ -59,11 +59,11 @@ var _ = Describe("PathwaysAPI Controller", func() { AfterEach(func() { // TODO(user): Cleanup logic after each test, like removing the resource instance. - resource := &pathwaysapiv1.PathwaysAPI{} + resource := &pathwaysjobv1.PathwaysJob{} err := k8sClient.Get(ctx, typeNamespacedName, resource) Expect(err).NotTo(HaveOccurred()) - By("Cleanup the specific resource instance PathwaysAPI") + By("Cleanup the specific resource instance PathwaysJob") Expect(k8sClient.Delete(ctx, resource)).To(Succeed()) }) It("should successfully reconcile the resource", func() { diff --git a/internal/controller/suite_test.go b/internal/controller/suite_test.go index fb44145c..28d47d84 100644 --- a/internal/controller/suite_test.go +++ b/internal/controller/suite_test.go @@ -32,7 +32,7 @@ import ( logf "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/log/zap" - pathwaysapiv1 "pathways-api/api/v1" + pathwaysjobv1 "pathways-job/api/v1" // +kubebuilder:scaffold:imports ) @@ -72,7 +72,7 @@ var _ = BeforeSuite(func() { Expect(err).NotTo(HaveOccurred()) Expect(cfg).NotTo(BeNil()) - err = pathwaysapiv1.AddToScheme(scheme.Scheme) + err = pathwaysjobv1.AddToScheme(scheme.Scheme) Expect(err).NotTo(HaveOccurred()) // +kubebuilder:scaffold:scheme diff --git a/pkg/utils/extra_prototype.go b/pkg/utils/extra_prototype.go index 6cc486d8..6c32dc85 100644 --- a/pkg/utils/extra_prototype.go +++ b/pkg/utils/extra_prototype.go @@ -107,7 +107,7 @@ package utils // } // Currently leading to race conditions ---. -// var pwList pathwaysapi.PathwaysAPIList +// var pwList pathwaysjob.PathwaysAPIList // if err := r.List(ctx, &pwList, &client.ListOptions{}); err != nil { // log.Error(err, "Roshani, failed to list Pathways") // return ctrl.Result{}, err diff --git a/test/e2e/e2e_suite_test.go b/test/e2e/e2e_suite_test.go index 140dce13..98b15632 100644 --- a/test/e2e/e2e_suite_test.go +++ b/test/e2e/e2e_suite_test.go @@ -27,6 +27,6 @@ import ( // Run e2e tests using the Ginkgo runner. func TestE2E(t *testing.T) { RegisterFailHandler(Fail) - _, _ = fmt.Fprintf(GinkgoWriter, "Starting pathways-api suite\n") + _, _ = fmt.Fprintf(GinkgoWriter, "Starting pathways-job suite\n") RunSpecs(t, "e2e suite") } diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index d519f2e4..7f3e4eee 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -24,10 +24,10 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - "pathways-api/test/utils" + "pathways-job/test/utils" ) -const namespace = "pathways-api-system" +const namespace = "pathways-job-system" var _ = Describe("controller", Ordered, func() { BeforeAll(func() { @@ -60,7 +60,7 @@ var _ = Describe("controller", Ordered, func() { var err error // projectimage stores the name of the image used in the example - var projectimage = "example.com/pathways-api:v0.0.1" + var projectimage = "example.com/pathways-job:v0.0.1" By("building the manager(Operator) image") cmd := exec.Command("make", "docker-build", fmt.Sprintf("IMG=%s", projectimage)) From 8883b32635e007666fdf44966362d8490500a17b Mon Sep 17 00:00:00 2001 From: RoshaniN Date: Tue, 8 Oct 2024 18:55:02 +0000 Subject: [PATCH 15/32] Renamed PathwaysAPI to PathwaysJob 2, changed RBAC references. --- ...hwaysapi_types.go => pathwaysjob_types.go} | 0 cmd/main.go | 2 +- ...ays-job.pathways.domain_pathwaysjobs.yaml} | 20 ++++++++++++++++--- config/crd/kustomization.yaml | 4 ++-- config/rbac/kustomization.yaml | 4 ++-- ...role.yaml => pathwaysjob_editor_role.yaml} | 0 ...role.yaml => pathwaysjob_viewer_role.yaml} | 0 config/samples/kustomization.yaml | 2 +- ....yaml => pathways-job_v1_pathwaysjob.yaml} | 0 ...ontroller.go => pathwaysjob_controller.go} | 10 +++++----- ...test.go => pathwaysjob_controller_test.go} | 2 +- pkg/utils/extra_prototype.go | 2 +- 12 files changed, 30 insertions(+), 16 deletions(-) rename api/v1/{pathwaysapi_types.go => pathwaysjob_types.go} (100%) rename config/crd/bases/{pathways-api.pathways.domain_pathwaysapis.yaml => pathways-job.pathways.domain_pathwaysjobs.yaml} (99%) rename config/rbac/{pathwaysapi_editor_role.yaml => pathwaysjob_editor_role.yaml} (100%) rename config/rbac/{pathwaysapi_viewer_role.yaml => pathwaysjob_viewer_role.yaml} (100%) rename config/samples/{pathways-api_v1_pathwaysapi.yaml => pathways-job_v1_pathwaysjob.yaml} (100%) rename internal/controller/{pathwaysapi_controller.go => pathwaysjob_controller.go} (98%) rename internal/controller/{pathwaysapi_controller_test.go => pathwaysjob_controller_test.go} (98%) diff --git a/api/v1/pathwaysapi_types.go b/api/v1/pathwaysjob_types.go similarity index 100% rename from api/v1/pathwaysapi_types.go rename to api/v1/pathwaysjob_types.go diff --git a/cmd/main.go b/cmd/main.go index 96880fde..ff0753ec 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -144,7 +144,7 @@ func main() { os.Exit(1) } - if err = (&controller.PathwaysAPIReconciler{ + if err = (&controller.PathwaysJobReconciler{ Client: mgr.GetClient(), Scheme: mgr.GetScheme(), }).SetupWithManager(mgr); err != nil { diff --git a/config/crd/bases/pathways-api.pathways.domain_pathwaysapis.yaml b/config/crd/bases/pathways-job.pathways.domain_pathwaysjobs.yaml similarity index 99% rename from config/crd/bases/pathways-api.pathways.domain_pathwaysapis.yaml rename to config/crd/bases/pathways-job.pathways.domain_pathwaysjobs.yaml index 71f8b303..b0f7ad3f 100644 --- a/config/crd/bases/pathways-api.pathways.domain_pathwaysapis.yaml +++ b/config/crd/bases/pathways-job.pathways.domain_pathwaysjobs.yaml @@ -1,3 +1,17 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + --- apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition @@ -9,7 +23,7 @@ spec: group: pathways-job.pathways.domain names: kind: PathwaysJob - listKind: PathwaysAPIList + listKind: PathwaysJobList plural: pathwaysjobs singular: pathwaysjob scope: Namespaced @@ -17,7 +31,7 @@ spec: - name: v1 schema: openAPIV3Schema: - description: PathwaysJob is the Schema for the pathwaysjobs API + description: PathwaysJob is the Schema for the PathwaysJobs API properties: apiVersion: description: |- @@ -8002,7 +8016,7 @@ spec: type: object type: object status: - description: PathwaysAPIStatus defines the observed state of PathwaysJob + description: PathwaysJobStatus defines the observed state of PathwaysJob properties: conditions: description: |- diff --git a/config/crd/kustomization.yaml b/config/crd/kustomization.yaml index 9a48289e..56361cb7 100644 --- a/config/crd/kustomization.yaml +++ b/config/crd/kustomization.yaml @@ -16,7 +16,7 @@ # since it depends on service name and namespace that are out of this kustomize package. # It should be run by config/default resources: -- bases/pathways-job.pathways.domain_pathwaysapis.yaml +- bases/pathways-job.pathways.domain_pathwaysjobs.yaml # +kubebuilder:scaffold:crdkustomizeresource # patches: @@ -26,7 +26,7 @@ resources: # [CERTMANAGER] To enable cert-manager, uncomment all the sections with [CERTMANAGER] prefix. # patches here are for enabling the CA injection for each CRD -#- path: patches/cainjection_in_pathwaysapis.yaml +#- path: patches/cainjection_in_pathwaysjobs.yaml # +kubebuilder:scaffold:crdkustomizecainjectionpatch # [WEBHOOK] To enable webhook, uncomment the following section diff --git a/config/rbac/kustomization.yaml b/config/rbac/kustomization.yaml index f57be9aa..7dc72fe5 100644 --- a/config/rbac/kustomization.yaml +++ b/config/rbac/kustomization.yaml @@ -36,6 +36,6 @@ resources: # default, aiding admins in cluster management. Those roles are # not used by the Project itself. You can comment the following lines # if you do not want those helpers be installed with your Project. -- pathwaysapi_editor_role.yaml -- pathwaysapi_viewer_role.yaml +- pathwaysjob_editor_role.yaml +- pathwaysjob_viewer_role.yaml diff --git a/config/rbac/pathwaysapi_editor_role.yaml b/config/rbac/pathwaysjob_editor_role.yaml similarity index 100% rename from config/rbac/pathwaysapi_editor_role.yaml rename to config/rbac/pathwaysjob_editor_role.yaml diff --git a/config/rbac/pathwaysapi_viewer_role.yaml b/config/rbac/pathwaysjob_viewer_role.yaml similarity index 100% rename from config/rbac/pathwaysapi_viewer_role.yaml rename to config/rbac/pathwaysjob_viewer_role.yaml diff --git a/config/samples/kustomization.yaml b/config/samples/kustomization.yaml index bd84c32d..07b2b0e8 100644 --- a/config/samples/kustomization.yaml +++ b/config/samples/kustomization.yaml @@ -14,5 +14,5 @@ ## Append samples of your project ## resources: -- pathways-api_v1_pathwaysapi.yaml +- pathways-api_v1_pathwaysjob.yaml # +kubebuilder:scaffold:manifestskustomizesamples diff --git a/config/samples/pathways-api_v1_pathwaysapi.yaml b/config/samples/pathways-job_v1_pathwaysjob.yaml similarity index 100% rename from config/samples/pathways-api_v1_pathwaysapi.yaml rename to config/samples/pathways-job_v1_pathwaysjob.yaml diff --git a/internal/controller/pathwaysapi_controller.go b/internal/controller/pathwaysjob_controller.go similarity index 98% rename from internal/controller/pathwaysapi_controller.go rename to internal/controller/pathwaysjob_controller.go index 4449be9e..cfd72082 100644 --- a/internal/controller/pathwaysapi_controller.go +++ b/internal/controller/pathwaysjob_controller.go @@ -37,8 +37,8 @@ import ( pathwaysjob "pathways-job/api/v1" ) -// PathwaysAPIReconciler reconciles a PathwaysJob object -type PathwaysAPIReconciler struct { +// PathwaysJobReconciler reconciles a PathwaysJob object +type PathwaysJobReconciler struct { client.Client Scheme *runtime.Scheme } @@ -59,7 +59,7 @@ type PathwaysAPIReconciler struct { // +kubebuilder:rbac:groups=pathways-job.pathways.domain,resources=pathwaysjobs/finalizers,verbs=update // +kubebuilder:rbac:groups=jobset.x-k8s.io,resources=jobsets,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=jobset.x-k8s.io,resources=jobsets/status,verbs=get;update;patch -func (r *PathwaysAPIReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { +func (r *PathwaysJobReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { pw := &pathwaysjob.PathwaysJob{} log := ctrl.LoggerFrom(ctx).WithValues("pathwaysjob", klog.KObj(pw)) ctx = ctrl.LoggerInto(ctx, log) @@ -122,7 +122,7 @@ func (r *PathwaysAPIReconciler) Reconcile(ctx context.Context, req ctrl.Request) // function setPathwaysJobResumedCondition -func (r *PathwaysAPIReconciler) createJobSet(ctx context.Context, pw *pathwaysjob.PathwaysJob, jobSetClient *jobsetclient.Clientset) error { +func (r *PathwaysJobReconciler) createJobSet(ctx context.Context, pw *pathwaysjob.PathwaysJob, jobSetClient *jobsetclient.Clientset) error { log2 := ctrl.LoggerFrom(ctx).WithValues("pathwaysjob", klog.KObj(pw)) ctx = ctrl.LoggerInto(ctx, log2) @@ -296,7 +296,7 @@ func (r *PathwaysAPIReconciler) createJobSet(ctx context.Context, pw *pathwaysjo } // SetupWithManager sets up the controller with the Manager. -func (r *PathwaysAPIReconciler) SetupWithManager(mgr ctrl.Manager) error { +func (r *PathwaysJobReconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). For(&pathwaysjob.PathwaysJob{}). // Owns(&jobsetv1alpha2.JobSet{}). // For JobSet diff --git a/internal/controller/pathwaysapi_controller_test.go b/internal/controller/pathwaysjob_controller_test.go similarity index 98% rename from internal/controller/pathwaysapi_controller_test.go rename to internal/controller/pathwaysjob_controller_test.go index 53ae94f4..1e877a61 100644 --- a/internal/controller/pathwaysapi_controller_test.go +++ b/internal/controller/pathwaysjob_controller_test.go @@ -68,7 +68,7 @@ var _ = Describe("PathwaysJob Controller", func() { }) It("should successfully reconcile the resource", func() { By("Reconciling the created resource") - controllerReconciler := &PathwaysAPIReconciler{ + controllerReconciler := &PathwaysJobReconciler{ Client: k8sClient, Scheme: k8sClient.Scheme(), } diff --git a/pkg/utils/extra_prototype.go b/pkg/utils/extra_prototype.go index 6c32dc85..682dc49d 100644 --- a/pkg/utils/extra_prototype.go +++ b/pkg/utils/extra_prototype.go @@ -107,7 +107,7 @@ package utils // } // Currently leading to race conditions ---. -// var pwList pathwaysjob.PathwaysAPIList +// var pwList pathwaysjob.PathwaysJobList // if err := r.List(ctx, &pwList, &client.ListOptions{}); err != nil { // log.Error(err, "Roshani, failed to list Pathways") // return ctrl.Result{}, err From a3c19b5da8a62410828e45038ea0ce1ec8fef148 Mon Sep 17 00:00:00 2001 From: RoshaniN Date: Wed, 9 Oct 2024 00:37:44 +0000 Subject: [PATCH 16/32] Introduced WorkerSpec and ColocationPolicy. --- api/v1/pathwaysjob_types.go | 44 ++++++----- api/v1/zz_generated.deepcopy.go | 32 ++++---- ...ways-job.pathways.domain_pathwaysjobs.yaml | 75 ++++++++++--------- config/manager/kustomization.yaml | 2 - .../samples/pathways-job_v1_pathwaysjob.yaml | 12 +-- internal/controller/pathwaysjob_controller.go | 18 +++-- 6 files changed, 101 insertions(+), 82 deletions(-) diff --git a/api/v1/pathwaysjob_types.go b/api/v1/pathwaysjob_types.go index 15f72bd1..2551bb69 100644 --- a/api/v1/pathwaysjob_types.go +++ b/api/v1/pathwaysjob_types.go @@ -18,7 +18,6 @@ package v1 import ( corev1 "k8s.io/api/core/v1" - // corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -60,27 +59,19 @@ type PathwaysJobSpec struct { // INSERT ADDITIONAL SPEC FIELDS - desired state of cluster // Important: Run "make" to regenerate code after modifying this file - // ColocationPolicy defines whether the user job and the Pathways resources (RM, proxy) - // must be colocated on TPUs with the Pathways workers or not. - // Users may opt for best-effort placement where scheduler places the RM and proxy - // on the CPU nodepools by default. + // ColocationPolicy defines whether the user job and the Pathways + // resources (RM, proxy) must be colocated on TPUs, with the Pathways + // workers or not. If user chooses to "colocate", then the Pathways RM + // and proxy run together with the user job as a single pod. + // Users may opt for "best-effort" placement where scheduler places the + // RM and proxy (as a single pod) on the CPU nodepools by default. User + // workload will be deployed separately,as a pod. // Default is best-effort. ColocationPolicy ColocationPolicy `json:"colocationPolicy,omitempty"` - // PathwaysWorkerNodeSelector is used to specify the nodeSelector for - // Pathways TPU workers (accelerator type and topology). - PathwaysWorkerNodeSelector map[string]string `json:"pathwaysWorkerNodeSelector,omitempty"` - - // PathwaysControllerNodeSelector is used to specify where Pathways resources - // such as RM and proxy should be deployed. - PathwaysControllerNodeSelector map[string]string `json:"pathwaysControllerNodeSelector,omitempty"` - // Maximum number of times the JobSet is restarted. MaxRestarts int32 `json:"maxRestarts,omitempty"` - // Number of TPU slices requested for the Pathways workers. - NumSlices int32 `json:"numSlices,omitempty"` - // PathwaysDir is a persistent location like GCS at which temporary // Pathways artifacts can be stored like HBM state during interruptions. // Currently, Pathways supports a precreated GCS directory only. @@ -89,11 +80,15 @@ type PathwaysJobSpec struct { // PathwaysVersion is the version of the Pathways client. PathwaysVersion string `json:"pathwaysVersion,omitempty"` + // The list of worker types created for the Pathways Job. Currently only + // one type of worker is supported. + Workers []WorkerSpec `json:"workers"` + // UserPodTemplate accepts a pod composed of user's workload // (and other) containers. // https://pkg.go.dev/k8s.io/api/core/v1#PodTemplateSpec // +optional - UserPodTemplate *corev1.PodTemplateSpec `json:"template" protobuf:"bytes,6,opt,name=template"` + UserPodTemplate *corev1.PodTemplateSpec `json:"template,omitempty" protobuf:"bytes,6,opt,name=template"` } // PathwaysJobStatus defines the observed state of PathwaysJob @@ -116,6 +111,21 @@ const ( BestEffort ColocationPolicy = "best-effort" ) +// The WorkerSpec struct takes in the specifications for the +// Pathways workers. +type WorkerSpec struct { + // This will translate to a nodeSelector of the form + // cloud.google.com/gke-tpu-accelerator: tpu-v5-lite-podslice + Type string `json:"type"` + + // This will translate to a nodeSelector of the form + // cloud.google.com/gke-tpu-topology:2x2 + Topology string `json:"topology"` + + // Number of TPU slices requested for the Pathways workers. + NumSlices int32 `json:"numSlices"` +} + func init() { SchemeBuilder.Register(&PathwaysJob{}, &PathwaysJobList{}) } diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go index c212d84e..f1f78ca0 100644 --- a/api/v1/zz_generated.deepcopy.go +++ b/api/v1/zz_generated.deepcopy.go @@ -88,19 +88,10 @@ func (in *PathwaysJobList) DeepCopyObject() runtime.Object { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *PathwaysJobSpec) DeepCopyInto(out *PathwaysJobSpec) { *out = *in - if in.PathwaysWorkerNodeSelector != nil { - in, out := &in.PathwaysWorkerNodeSelector, &out.PathwaysWorkerNodeSelector - *out = make(map[string]string, len(*in)) - for key, val := range *in { - (*out)[key] = val - } - } - if in.PathwaysControllerNodeSelector != nil { - in, out := &in.PathwaysControllerNodeSelector, &out.PathwaysControllerNodeSelector - *out = make(map[string]string, len(*in)) - for key, val := range *in { - (*out)[key] = val - } + if in.Workers != nil { + in, out := &in.Workers, &out.Workers + *out = make([]WorkerSpec, len(*in)) + copy(*out, *in) } if in.UserPodTemplate != nil { in, out := &in.UserPodTemplate, &out.UserPodTemplate @@ -140,3 +131,18 @@ func (in *PathwaysJobStatus) DeepCopy() *PathwaysJobStatus { in.DeepCopyInto(out) return out } + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *WorkerSpec) DeepCopyInto(out *WorkerSpec) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new WorkerSpec. +func (in *WorkerSpec) DeepCopy() *WorkerSpec { + if in == nil { + return nil + } + out := new(WorkerSpec) + in.DeepCopyInto(out) + return out +} diff --git a/config/crd/bases/pathways-job.pathways.domain_pathwaysjobs.yaml b/config/crd/bases/pathways-job.pathways.domain_pathwaysjobs.yaml index b0f7ad3f..5fc0f588 100644 --- a/config/crd/bases/pathways-job.pathways.domain_pathwaysjobs.yaml +++ b/config/crd/bases/pathways-job.pathways.domain_pathwaysjobs.yaml @@ -1,17 +1,3 @@ -# Copyright 2024 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - --- apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition @@ -54,10 +40,13 @@ spec: properties: colocationPolicy: description: |- - ColocationPolicy defines whether the user job and the Pathways resources (RM, proxy) - must be colocated on TPUs with the Pathways workers or not. - Users may opt for best-effort placement where scheduler places the RM and proxy - on the CPU nodepools by default. + ColocationPolicy defines whether the user job and the Pathways + resources (RM, proxy) must be colocated on TPUs, with the Pathways + workers or not. If user chooses to "colocate", then the Pathways RM + and proxy run together with the user job as a single pod. + Users may opt for "best-effort" placement where scheduler places the + RM and proxy (as a single pod) on the CPU nodepools by default. User + workload will be deployed separately,as a pod. Default is best-effort. enum: - colocate @@ -67,17 +56,6 @@ spec: description: Maximum number of times the JobSet is restarted. format: int32 type: integer - numSlices: - description: Number of TPU slices requested for the Pathways workers. - format: int32 - type: integer - pathwaysControllerNodeSelector: - additionalProperties: - type: string - description: |- - PathwaysControllerNodeSelector is used to specify where Pathways resources - such as RM and proxy should be deployed. - type: object pathwaysDir: description: |- PathwaysDir is a persistent location like GCS at which temporary @@ -87,13 +65,6 @@ spec: pathwaysVersion: description: PathwaysVersion is the version of the Pathways client. type: string - pathwaysWorkerNodeSelector: - additionalProperties: - type: string - description: |- - PathwaysWorkerNodeSelector is used to specify the nodeSelector for - Pathways TPU workers (accelerator type and topology). - type: object template: description: |- UserPodTemplate accepts a pod composed of user's workload @@ -8014,6 +7985,38 @@ spec: - containers type: object type: object + workers: + description: |- + The list of worker types created for the Pathways Job. Currently only + one type of worker is supported. + items: + description: |- + The WorkerSpec struct takes in the specifications for the + Pathways workers. + properties: + numSlices: + description: Number of TPU slices requested for the Pathways + workers. + format: int32 + type: integer + topology: + description: |- + This will translate to a nodeSelector of the form + cloud.google.com/gke-tpu-topology:2x2 + type: string + type: + description: |- + This will translate to a nodeSelector of the form + cloud.google.com/gke-tpu-accelerator: tpu-v5-lite-podslice + type: string + required: + - numSlices + - topology + - type + type: object + type: array + required: + - workers type: object status: description: PathwaysJobStatus defines the observed state of PathwaysJob diff --git a/config/manager/kustomization.yaml b/config/manager/kustomization.yaml index 118bfe52..66aadf0c 100644 --- a/config/manager/kustomization.yaml +++ b/config/manager/kustomization.yaml @@ -18,5 +18,3 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization images: - name: controller - newName: us-docker.pkg.dev/cloud-tpu-multipod-dev/pathways/pathwaysjob - newTag: latest diff --git a/config/samples/pathways-job_v1_pathwaysjob.yaml b/config/samples/pathways-job_v1_pathwaysjob.yaml index fd14811c..23583776 100644 --- a/config/samples/pathways-job_v1_pathwaysjob.yaml +++ b/config/samples/pathways-job_v1_pathwaysjob.yaml @@ -17,13 +17,9 @@ kind: PathwaysJob metadata: name: pathways-1 spec: - pathwaysWorkerNodeSelector: - cloud.google.com/gke-tpu-accelerator: tpu-v4-podslice - cloud.google.com/gke-tpu-topology: 2x2x2 - pathwaysControllerNodeSelector: - cloud.google.com/gke-tpu-accelerator: tpu-v4-podslice - cloud.google.com/gke-tpu-topology: 2x2x2 - numSlices: 1 + workers: + - type: tpu-v4-podslice + topology: 2x2x2 + numSlices: 1 pathwaysDir: "gs://cloud-pathways-staging/tmp" - diff --git a/internal/controller/pathwaysjob_controller.go b/internal/controller/pathwaysjob_controller.go index cfd72082..f7fa47b6 100644 --- a/internal/controller/pathwaysjob_controller.go +++ b/internal/controller/pathwaysjob_controller.go @@ -158,8 +158,11 @@ func (r *PathwaysJobReconciler) createJobSet(ctx context.Context, pw *pathwaysjo Parallelism: ptr.To(int32(1)), Template: corev1.PodTemplateSpec{ Spec: corev1.PodSpec{ - Affinity: affinitySpec, - NodeSelector: pw.Spec.PathwaysControllerNodeSelector, + Affinity: affinitySpec, + NodeSelector: map[string]string{ + "cloud.google.com/gke-tpu-accelerator": pw.Spec.Workers[0].Type, + "cloud.google.com/gke-tpu-topology": pw.Spec.Workers[0].Topology, + }, Tolerations: []corev1.Toleration{ { Key: "google.com/tpu", @@ -211,12 +214,12 @@ func (r *PathwaysJobReconciler) createJobSet(ctx context.Context, pw *pathwaysjo }, // end replicated Job { Name: "worker", - Replicas: int32(pw.Spec.NumSlices), + Replicas: int32(pw.Spec.Workers[0].NumSlices), Template: batchv1.JobTemplateSpec{ Spec: batchv1.JobSpec{ BackoffLimit: ptr.To(int32(0)), - Completions: ptr.To(int32(2)), - Parallelism: ptr.To(int32(2)), + Completions: ptr.To(int32(2)), // remember to update + Parallelism: ptr.To(int32(2)), // remember to update Template: corev1.PodTemplateSpec{ Spec: corev1.PodSpec{ Containers: []corev1.Container{ @@ -255,7 +258,10 @@ func (r *PathwaysJobReconciler) createJobSet(ctx context.Context, pw *pathwaysjo Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{"google.com/tpu": *resource.NewQuantity(4, resource.DecimalSI)}}, }, // end Pathways worker container }, - NodeSelector: pw.Spec.PathwaysWorkerNodeSelector, + NodeSelector: map[string]string{ + "cloud.google.com/gke-tpu-accelerator": pw.Spec.Workers[0].Type, + "cloud.google.com/gke-tpu-topology": pw.Spec.Workers[0].Topology, + }, Volumes: []corev1.Volume{ { Name: "shared-tmp", From 7e559f86ab83091ae79e790b90099fd8140c8934 Mon Sep 17 00:00:00 2001 From: RoshaniN Date: Fri, 11 Oct 2024 17:29:01 +0000 Subject: [PATCH 17/32] Updated Pathways flags. --- config/manager/kustomization.yaml | 2 + .../samples/pathways-job_v1_pathwaysjob.yaml | 3 +- internal/controller/pathwaysjob_controller.go | 38 ++++++------------- 3 files changed, 14 insertions(+), 29 deletions(-) diff --git a/config/manager/kustomization.yaml b/config/manager/kustomization.yaml index 66aadf0c..118bfe52 100644 --- a/config/manager/kustomization.yaml +++ b/config/manager/kustomization.yaml @@ -18,3 +18,5 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization images: - name: controller + newName: us-docker.pkg.dev/cloud-tpu-multipod-dev/pathways/pathwaysjob + newTag: latest diff --git a/config/samples/pathways-job_v1_pathwaysjob.yaml b/config/samples/pathways-job_v1_pathwaysjob.yaml index 23583776..aa51db67 100644 --- a/config/samples/pathways-job_v1_pathwaysjob.yaml +++ b/config/samples/pathways-job_v1_pathwaysjob.yaml @@ -21,5 +21,4 @@ spec: - type: tpu-v4-podslice topology: 2x2x2 numSlices: 1 - pathwaysDir: "gs://cloud-pathways-staging/tmp" - + pathwaysDir: "gs://cloud-pathways-staging/tmp" \ No newline at end of file diff --git a/internal/controller/pathwaysjob_controller.go b/internal/controller/pathwaysjob_controller.go index f7fa47b6..04ac152b 100644 --- a/internal/controller/pathwaysjob_controller.go +++ b/internal/controller/pathwaysjob_controller.go @@ -229,19 +229,9 @@ func (r *PathwaysJobReconciler) createJobSet(ctx context.Context, pw *pathwaysjo ImagePullPolicy: "Always", SecurityContext: &corev1.SecurityContext{Privileged: &truth}, Args: []string{ - "--alsologtostderr", - "--pathways_server_port=38679", - fmt.Sprintf("--pathways_resource_manager=%s-%s-0-0.%s:38677", pw.GetName(), "leader", pw.GetName()), - "--pathways_persistent_compilation_cache=false", - "--pathways_compilation_mode=compile_at_worker", - "--xla_tpu_enable_data_parallel_all_reduce_opt=true", - "--xla_tpu_data_parallel_opt_different_sized_ops=true", - "--xla_tpu_enable_async_collective_fusion=true", - "--xla_tpu_enable_async_collective_fusion_fuse_all_gather=true", - "--xla_tpu_enable_async_collective_fusion_multiple_steps=true", - "--xla_tpu_overlap_compute_collective_tc=true", - "--xla_enable_async_all_gather=true", - fmt.Sprintf("--pathways_tmp_dir_pattern=%s", pw.Spec.PathwaysDir), + "--server_port=38679", + fmt.Sprintf("--resource_manager_address=%s-%s-0-0.%s:38677", pw.GetName(), "leader", pw.GetName()), + fmt.Sprintf("--gcs_scratch_location=%s", pw.Spec.PathwaysDir), }, Env: []corev1.EnvVar{ {Name: "TPU_MIN_LOG_LEVEL", Value: "0"}, @@ -319,14 +309,11 @@ func MakeResourceManagerContainer(pw *pathwaysjob.PathwaysJob) (*corev1.Containe ImagePullPolicy: "Always", SecurityContext: &corev1.SecurityContext{Privileged: &truth}, Args: []string{ - "--alsologtostderr", - "--pathways_server_port=38677", - "--pathways_server_provides_devices=false", - "--pathways_device_type=NONE", - "--pathways_persistent_compilation_cache=false", - "--pathways_compilation_mode=compile_at_worker", - fmt.Sprintf("--pathways_tmp_dir_pattern=%s", pw.Spec.PathwaysDir), - "--pathways_expected_instances=tpuv4:2x2x2", // CHANGE + "--server_port=38677", + fmt.Sprintf("--gcs_scratch_location=%s", pw.Spec.PathwaysDir), + "--node_type=resource_manager", + fmt.Sprintf("--instance_count=%d", int32(pw.Spec.Workers[0].NumSlices)), + "--instance_type=tpuv4:2x2x2", // Change }, Env: []corev1.EnvVar{ {Name: "REPLICATED_JOB_NAME", ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{FieldPath: "metadata.annotations['jobset.sigs.k8s.io/replicatedjob-name']"}}}, @@ -348,12 +335,9 @@ func MakeProxyContainer(pw *pathwaysjob.PathwaysJob) (*corev1.Container, error) ImagePullPolicy: "Always", SecurityContext: &corev1.SecurityContext{Privileged: &truth}, Args: []string{ - "--alsologtostderr", - "--v=0", - fmt.Sprintf("--pathways_ifrt_proxy_server_resource_manager=%s-%s-0-0.%s:38677", pw.GetName(), "leader", pw.GetName()), - "--pathways_ifrt_proxy_server_port=38681", - fmt.Sprintf("--pathways_tmp_dir_pattern=%s", pw.Spec.PathwaysDir), - "--pathways_plaque_network=gcp", + "--server_port=38681", + fmt.Sprintf("--resource_manager_address=%s-%s-0-0.%s:38677", pw.GetName(), "leader", pw.GetName()), + fmt.Sprintf("--gcs_scratch_location=%s", pw.Spec.PathwaysDir), }, Ports: []corev1.ContainerPort{{ContainerPort: 38681}, {ContainerPort: 38682}}, Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{"cpu": *resource.NewQuantity(4, resource.DecimalSI), "memory": *resource.NewQuantity(10000000000, resource.DecimalSI)}}, From a783df11e9d603edc66887e895fed21b560dbe6d Mon Sep 17 00:00:00 2001 From: RoshaniN Date: Tue, 15 Oct 2024 00:43:51 +0000 Subject: [PATCH 18/32] Added hostNetwork, removed resources, moved userpodspec to YAML. --- .../samples/pathways-job_v1_pathwaysjob.yaml | 46 +++++- internal/controller/pathwaysjob_controller.go | 139 +++++++++--------- pkg/utils/extra_prototype.go | 26 +++- 3 files changed, 136 insertions(+), 75 deletions(-) diff --git a/config/samples/pathways-job_v1_pathwaysjob.yaml b/config/samples/pathways-job_v1_pathwaysjob.yaml index aa51db67..4ee9f054 100644 --- a/config/samples/pathways-job_v1_pathwaysjob.yaml +++ b/config/samples/pathways-job_v1_pathwaysjob.yaml @@ -21,4 +21,48 @@ spec: - type: tpu-v4-podslice topology: 2x2x2 numSlices: 1 - pathwaysDir: "gs://cloud-pathways-staging/tmp" \ No newline at end of file + pathwaysDir: "gs://cloud-pathways-staging/tmp" + template: # UserPodTemplate + spec: + containers: + - name: jetstream + image: us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/maxtext_jax_stable:latest + imagePullPolicy: Always + ports: + - containerPort: 9000 + env: + - name: XCLOUD_ENVIRONMENT + value: GCP + - name: JAX_PLATFORMS + value: proxy + - name: JAX_BACKEND_TARGET + value: grpc://pathways-1-leader-0-0.pathways-1:38681 + command: + - bash + - -c + - 'echo Start: $(date); _sigterm() ( kill -SIGTERM $! 2>/dev/null;); trap + _sigterm SIGTERM; (JAX_TRACEBACK_FILTERING=off python3 MaxText/maxengine_server.py + MaxText/configs/inference_jetstream.yml tokenizer_path=assets/tokenizer.llama2 + load_parameters_path=gs://runner-maxtext-logs/2024-05-07-23-34/unscanned_chkpt/checkpoints/0/items + max_prefill_predict_length=1024 max_target_length=2048 async_checkpointing=false + model_name=''llama2-70b'' steps=1 ici_fsdp_parallelism=1 ici_autoregressive_parallelism=-1 + ici_tensor_parallelism=1 scan_layers=false weight_dtype=bfloat16 per_device_batch_size=2) + & PID=$!; while kill -0 $PID 2>/dev/null; do sleep 5; done; wait $PID; + EXIT_CODE=$? echo EXIT_CODE=$EXIT_CODE; echo End sleep: $(date); sleep + infinity;' + - name: tester + image: us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/maxtext_jax_stable:latest + imagePullPolicy: Always + env: null + command: + - bash + - -c + - 'echo Start: $(date); _sigterm() ( kill -SIGTERM $! 2>/dev/null;); trap + _sigterm SIGTERM; for i in {1..2}; do echo Sending request $i; time python3 + JetStream/jetstream/tools/requester.py --tokenizer assets/tokenizer.llama2 + --max_tokens=16 --server=0.0.0.0 --text="why earth is round"; EXIT_CODE=$?; + echo Completed request; echo EXIT_CODE=$EXIT_CODE; if [[ $EXIT_CODE -ne + 0 ]]; then break; fi; done; echo Last EXIT_CODE=$EXIT_CODE; echo End sleep: + $(date); sleep infinity;' + securityContext: + privileged: true diff --git a/internal/controller/pathwaysjob_controller.go b/internal/controller/pathwaysjob_controller.go index 04ac152b..8a4f881a 100644 --- a/internal/controller/pathwaysjob_controller.go +++ b/internal/controller/pathwaysjob_controller.go @@ -132,11 +132,8 @@ func (r *PathwaysJobReconciler) createJobSet(ctx context.Context, pw *pathwaysjo truth := true volumeSourceType := corev1.HostPathDirectoryOrCreate - RMContainerSpec, _ := MakeResourceManagerContainer(pw) - ProxyContainerSpec, _ := MakeProxyContainer(pw) - affinitySpec, _ := MakePodAffinityRules(pw) - // // Pathways Spec + JobSet for batch inference ------ + leaderJob, _ := MakeLeaderJob(pw) mainJobSetConfig := jobsetv1alpha2.JobSet{ ObjectMeta: metav1.ObjectMeta{ @@ -148,70 +145,7 @@ func (r *PathwaysJobReconciler) createJobSet(ctx context.Context, pw *pathwaysjo MaxRestarts: 4, }, ReplicatedJobs: []jobsetv1alpha2.ReplicatedJob{ - { - Name: "leader", - Replicas: 1, - Template: batchv1.JobTemplateSpec{ - Spec: batchv1.JobSpec{ - BackoffLimit: ptr.To(int32(0)), - Completions: ptr.To(int32(1)), - Parallelism: ptr.To(int32(1)), - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Affinity: affinitySpec, - NodeSelector: map[string]string{ - "cloud.google.com/gke-tpu-accelerator": pw.Spec.Workers[0].Type, - "cloud.google.com/gke-tpu-topology": pw.Spec.Workers[0].Topology, - }, - Tolerations: []corev1.Toleration{ - { - Key: "google.com/tpu", - Operator: "Exists", - Effect: "NoSchedule", - }, - }, - Volumes: []corev1.Volume{ - { - Name: "shared-tmp", - VolumeSource: corev1.VolumeSource{ - HostPath: &corev1.HostPathVolumeSource{ - Path: "/tmp", - Type: &volumeSourceType, - }, - }, - }, - }, // end Volumes - Containers: []corev1.Container{ - *RMContainerSpec, - *ProxyContainerSpec, - { - Name: "jetstream", - Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/maxtext_jax_stable:latest", // revert to stable - ImagePullPolicy: "Always", - SecurityContext: &corev1.SecurityContext{Privileged: &truth}, - Env: []corev1.EnvVar{ - {Name: "XCLOUD_ENVIRONMENT", Value: "GCP"}, - {Name: "JAX_PLATFORMS", Value: "proxy"}, - {Name: "JAX_BACKEND_TARGET", Value: fmt.Sprintf("grpc://%s-%s-0-0.%s:38681", pw.GetName(), "leader", pw.GetName())}, - {Name: "JOBSET_NAME", ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{FieldPath: "metadata.annotations['jobset.sigs.k8s.io/jobset-name']"}}}, - }, - Ports: []corev1.ContainerPort{{ContainerPort: 9000}}, - Command: []string{"bash", "-c", "echo Start ; (JAX_TRACEBACK_FILTERING=off python3 MaxText/maxengine_server.py MaxText/configs/inference_jetstream.yml tokenizer_path=assets/tokenizer.llama2 load_parameters_path=gs://runner-maxtext-logs/2024-05-07-23-34/unscanned_chkpt/checkpoints/0/items max_prefill_predict_length=1024 max_target_length=2048 async_checkpointing=false model_name='llama2-70b' steps=1 ici_fsdp_parallelism=1 ici_autoregressive_parallelism=-1 ici_tensor_parallelism=1 scan_layers=false weight_dtype=bfloat16 per_device_batch_size=2); echo End; sleep infinity;"}, - }, // end jetstream - - { - Name: "tester", - Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/maxtext_jax_stable:latest", // revert to stable - ImagePullPolicy: "Always", - SecurityContext: &corev1.SecurityContext{Privileged: &truth}, - Command: []string{"bash", "-c", "echo Start ;for i in {1..5}; do echo Sending request $i; python3 JetStream/jetstream/tools/requester.py --tokenizer assets/tokenizer.llama2 --max_tokens=16 --server=0.0.0.0 --text=\"why earth is round\"; EXIT_CODE=$?; echo Completed request; echo EXIT_CODE=$EXIT_CODE; if [[ $EXIT_CODE -ne 0 ]]; then break; fi; done; echo Last EXIT_CODE=$EXIT_CODE; echo End; sleep infinity;"}, - }, // end tester - }, // end leader []containers - }, // end PodSpec - }, - }, - }, - }, // end replicated Job + *leaderJob, { Name: "worker", Replicas: int32(pw.Spec.Workers[0].NumSlices), @@ -263,6 +197,8 @@ func (r *PathwaysJobReconciler) createJobSet(ctx context.Context, pw *pathwaysjo }, }, }, // end Volumes + HostNetwork: true, // For performance == McJAX + DNSPolicy: corev1.DNSClusterFirstWithHostNet, // For performance == McJAX }, }, }, @@ -299,7 +235,7 @@ func (r *PathwaysJobReconciler) SetupWithManager(mgr ctrl.Manager) error { Complete(r) } -// helpers +// Some Pathways helpers func MakeResourceManagerContainer(pw *pathwaysjob.PathwaysJob) (*corev1.Container, error) { truth := true @@ -321,8 +257,8 @@ func MakeResourceManagerContainer(pw *pathwaysjob.PathwaysJob) (*corev1.Containe {Name: "HOST_ADDRESS", Value: fmt.Sprintf("%s-%s-0-0.%s", pw.GetName(), "leader", pw.GetName())}, {Name: "TPU_SKIP_MDS_QUERY", Value: "true"}, }, - Ports: []corev1.ContainerPort{{ContainerPort: 38677}, {ContainerPort: 38678}}, - Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{"cpu": *resource.NewQuantity(4, resource.DecimalSI), "memory": *resource.NewQuantity(8000000000, resource.DecimalSI)}}, + Ports: []corev1.ContainerPort{{ContainerPort: 38677}, {ContainerPort: 38678}}, + // Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{"cpu": *resource.NewQuantity(4, resource.DecimalSI), "memory": *resource.NewQuantity(8000000000, resource.DecimalSI)}}, } return &rmContainerSpec, nil } @@ -339,8 +275,8 @@ func MakeProxyContainer(pw *pathwaysjob.PathwaysJob) (*corev1.Container, error) fmt.Sprintf("--resource_manager_address=%s-%s-0-0.%s:38677", pw.GetName(), "leader", pw.GetName()), fmt.Sprintf("--gcs_scratch_location=%s", pw.Spec.PathwaysDir), }, - Ports: []corev1.ContainerPort{{ContainerPort: 38681}, {ContainerPort: 38682}}, - Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{"cpu": *resource.NewQuantity(4, resource.DecimalSI), "memory": *resource.NewQuantity(10000000000, resource.DecimalSI)}}, + Ports: []corev1.ContainerPort{{ContainerPort: 38681}, {ContainerPort: 38682}}, + // Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{"cpu": *resource.NewQuantity(4, resource.DecimalSI), "memory": *resource.NewQuantity(10000000000, resource.DecimalSI)}}, } return &proxyContainerSpec, nil } @@ -386,3 +322,60 @@ func MakePodAffinityRules(pw *pathwaysjob.PathwaysJob) (*corev1.Affinity, error) } // end Affinity return &affinity, nil } + +func GetUserContainerList(pw *pathwaysjob.PathwaysJob) ([]corev1.Container, error) { + containerList := pw.Spec.UserPodTemplate.Spec.Containers + return containerList, nil +} + +func MakeLeaderJob(pw *pathwaysjob.PathwaysJob) (*jobsetv1alpha2.ReplicatedJob, error) { + // truth := true + volumeSourceType := corev1.HostPathDirectoryOrCreate + RMContainerSpec, _ := MakeResourceManagerContainer(pw) + ProxyContainerSpec, _ := MakeProxyContainer(pw) + affinitySpec, _ := MakePodAffinityRules(pw) + userContainerList, _ := GetUserContainerList(pw) + + leaderJob := jobsetv1alpha2.ReplicatedJob{ + Name: "leader", + Replicas: 1, + Template: batchv1.JobTemplateSpec{ + Spec: batchv1.JobSpec{ + BackoffLimit: ptr.To(int32(0)), + Completions: ptr.To(int32(1)), + Parallelism: ptr.To(int32(1)), + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Affinity: affinitySpec, + NodeSelector: map[string]string{ + "cloud.google.com/gke-tpu-accelerator": pw.Spec.Workers[0].Type, + "cloud.google.com/gke-tpu-topology": pw.Spec.Workers[0].Topology, + }, + HostNetwork: true, // For performance == McJAX + DNSPolicy: corev1.DNSClusterFirstWithHostNet, // For performance == McJAX + Tolerations: []corev1.Toleration{ + { + Key: "google.com/tpu", + Operator: "Exists", + Effect: "NoSchedule", + }, + }, + Volumes: []corev1.Volume{ + { + Name: "shared-tmp", + VolumeSource: corev1.VolumeSource{ + HostPath: &corev1.HostPathVolumeSource{ + Path: "/tmp", + Type: &volumeSourceType, + }, + }, + }, + }, // end Volumes + Containers: append([]corev1.Container{*RMContainerSpec, *ProxyContainerSpec}, userContainerList...), // end leader []containers + }, // end PodSpec + }, + }, + }, + } // end replicated Job + return &leaderJob, nil +} diff --git a/pkg/utils/extra_prototype.go b/pkg/utils/extra_prototype.go index 682dc49d..69cf265e 100644 --- a/pkg/utils/extra_prototype.go +++ b/pkg/utils/extra_prototype.go @@ -14,7 +14,7 @@ package utils -// RM AND PROXY SPEC- +// ----------------RM AND PROXY SPEC---------------- // { // Name: "pathways-rm", @@ -61,6 +61,30 @@ package utils // "cloud.google.com/gke-tpu-topology": "2x2x2"}, // NodeSelector: map[string]string{"cloud.google.com/gke-tpu-accelerator": "tpu-v5-lite-podslice", "cloud.google.com/gke-tpu-topology": "4x4"}, +// ----------------jetstream and tester containers---------------- +// { +// Name: "jetstream", +// Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/maxtext_jax_stable:latest", // revert to stable +// ImagePullPolicy: "Always", +// SecurityContext: &corev1.SecurityContext{Privileged: &truth}, +// Env: []corev1.EnvVar{ +// {Name: "XCLOUD_ENVIRONMENT", Value: "GCP"}, +// {Name: "JAX_PLATFORMS", Value: "proxy"}, +// {Name: "JAX_BACKEND_TARGET", Value: fmt.Sprintf("grpc://%s-%s-0-0.%s:38681", pw.GetName(), "leader", pw.GetName())}, +// {Name: "JOBSET_NAME", ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{FieldPath: "metadata.annotations['jobset.sigs.k8s.io/jobset-name']"}}}, +// }, +// Ports: []corev1.ContainerPort{{ContainerPort: 9000}}, +// Command: []string{"bash", "-c", "echo Start ; (JAX_TRACEBACK_FILTERING=off python3 MaxText/maxengine_server.py MaxText/configs/inference_jetstream.yml tokenizer_path=assets/tokenizer.llama2 load_parameters_path=gs://runner-maxtext-logs/2024-05-07-23-34/unscanned_chkpt/checkpoints/0/items max_prefill_predict_length=1024 max_target_length=2048 async_checkpointing=false model_name='llama2-70b' steps=1 ici_fsdp_parallelism=1 ici_autoregressive_parallelism=-1 ici_tensor_parallelism=1 scan_layers=false weight_dtype=bfloat16 per_device_batch_size=2); echo End; sleep infinity;"}, +// }, // end jetstream + +// { +// Name: "tester", +// Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/maxtext_jax_stable:latest", // revert to stable +// ImagePullPolicy: "Always", +// SecurityContext: &corev1.SecurityContext{Privileged: &truth}, +// Command: []string{"bash", "-c", "echo Start ;for i in {1..5}; do echo Sending request $i; python3 JetStream/jetstream/tools/requester.py --tokenizer assets/tokenizer.llama2 --max_tokens=16 --server=0.0.0.0 --text=\"why earth is round\"; EXIT_CODE=$?; echo Completed request; echo EXIT_CODE=$EXIT_CODE; if [[ $EXIT_CODE -ne 0 ]]; then break; fi; done; echo Last EXIT_CODE=$EXIT_CODE; echo End; sleep infinity;"}, +// }, // end tester + //----------------LIST---------------- // List JobSets using client From c651057fb695167f625ffc104d61a0041b3635fc Mon Sep 17 00:00:00 2001 From: RoshaniN Date: Wed, 16 Oct 2024 18:12:12 +0000 Subject: [PATCH 19/32] Reconciliation - List JobSets and Create only if the JobSet does not exist. --- .../samples/pathways-job_v1_pathwaysjob.yaml | 7 +- internal/controller/pathwaysjob_controller.go | 83 +++++++++++++++---- pkg/utils/extra_prototype.go | 1 - 3 files changed, 69 insertions(+), 22 deletions(-) diff --git a/config/samples/pathways-job_v1_pathwaysjob.yaml b/config/samples/pathways-job_v1_pathwaysjob.yaml index 4ee9f054..7a50f08e 100644 --- a/config/samples/pathways-job_v1_pathwaysjob.yaml +++ b/config/samples/pathways-job_v1_pathwaysjob.yaml @@ -15,7 +15,7 @@ apiVersion: pathways-job.pathways.domain/v1 kind: PathwaysJob metadata: - name: pathways-1 + name: pathways-trial2 spec: workers: - type: tpu-v4-podslice @@ -49,11 +49,10 @@ spec: ici_tensor_parallelism=1 scan_layers=false weight_dtype=bfloat16 per_device_batch_size=2) & PID=$!; while kill -0 $PID 2>/dev/null; do sleep 5; done; wait $PID; EXIT_CODE=$? echo EXIT_CODE=$EXIT_CODE; echo End sleep: $(date); sleep - infinity;' + 500;' - name: tester image: us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/maxtext_jax_stable:latest imagePullPolicy: Always - env: null command: - bash - -c @@ -63,6 +62,6 @@ spec: --max_tokens=16 --server=0.0.0.0 --text="why earth is round"; EXIT_CODE=$?; echo Completed request; echo EXIT_CODE=$EXIT_CODE; if [[ $EXIT_CODE -ne 0 ]]; then break; fi; done; echo Last EXIT_CODE=$EXIT_CODE; echo End sleep: - $(date); sleep infinity;' + $(date); sleep 500;' securityContext: privileged: true diff --git a/internal/controller/pathwaysjob_controller.go b/internal/controller/pathwaysjob_controller.go index 8a4f881a..7cf822aa 100644 --- a/internal/controller/pathwaysjob_controller.go +++ b/internal/controller/pathwaysjob_controller.go @@ -64,31 +64,56 @@ func (r *PathwaysJobReconciler) Reconcile(ctx context.Context, req ctrl.Request) log := ctrl.LoggerFrom(ctx).WithValues("pathwaysjob", klog.KObj(pw)) ctx = ctrl.LoggerInto(ctx, log) - log.Info("ROSHANI CONTROLLER WORKING...", "req.NamespacedName", req.NamespacedName.String(), "req.Namespace", req.Namespace) + log.Info("PathwaysJob: CONTROLLER WORKING...", "req.NamespacedName", req.NamespacedName.String(), "req.Namespace", req.Namespace) // 1. Fetch the Pathways object if err := r.Get(ctx, req.NamespacedName, pw); err != nil { - log.Info("Unable to fetch Pathways ") + log.Info("PathwaysJob: Unable to fetch Pathways ") return ctrl.Result{}, client.IgnoreNotFound(err) } // 2. Process the object kubeconfig := ctrl.GetConfigOrDie() - log.Info("Roshani, config established...") + // log.Info("PathwaysJob: config established...") jobSetClient := jobsetclient.NewForConfigOrDie(kubeconfig) - log.Info("Roshani, client built for JobSet...") + // log.Info("PathwaysJob: client built for JobSet...") // 2.1 Figure out if PathwaysJob is already present and in "Suspended / Completed / Failed states", // if it is the case, there is nothing to do. - // 3. Update the cluster - create update and delete other resources - if err := r.createJobSet(ctx, pw, jobSetClient); err != nil { - log.Error(err, "Roshani, failed to create JobSet \n") + childJobSets, err := r.listChildJobSets(ctx, pw, jobSetClient) + if err != nil { + log.Error(err, "PathwaysJob: failed to list JobSets \n") return ctrl.Result{}, err } + // 2.1.1 List childJobSets + for _, jobset := range childJobSets { + if jobset.GetName() == pw.GetName() { + log.Info("PathwaysJob: JobSet exists, not creating \n\n\n") + for _, c := range jobset.Status.Conditions { + log.Info("PathwaysJob: Condition is ", "Type", c.Type) + } + } else { + // 3. Update the cluster - create update and delete other resources + log.Info("PathwaysJob: creating JobSet \n") + if err := r.createJobSet(ctx, pw, jobSetClient); err != nil { + log.Error(err, "PathwaysJob: failed to create JobSet \n") + return ctrl.Result{}, err + } + } + } + // report status + + // // 3. Update the cluster - create update and delete other resources + // log.Info("PathwaysJob: creating JobSet \n") + // if err := r.createJobSet(ctx, pw, jobSetClient); err != nil { + // log.Error(err, "PathwaysJob: failed to create JobSet \n") + // return ctrl.Result{}, err + // } + //4. Update the object's status using Conditions //5. Return a result @@ -122,18 +147,33 @@ func (r *PathwaysJobReconciler) Reconcile(ctx context.Context, req ctrl.Request) // function setPathwaysJobResumedCondition +func (r *PathwaysJobReconciler) listChildJobSets(ctx context.Context, pw *pathwaysjob.PathwaysJob, jobSetClient *jobsetclient.Clientset) ([]jobsetv1alpha2.JobSet, error) { + log3 := ctrl.LoggerFrom(ctx).WithValues("pathwaysjob", klog.KObj(pw)) + // ctx = ctrl.LoggerInto(ctx, log3) + log3.Info("PathwaysJob: in listChildJobSets", "Name ", pw.GetName(), "Namespace ", pw.GetNamespace()) + + var jsList *jobsetv1alpha2.JobSetList + jsList, err := jobSetClient.JobsetV1alpha2().JobSets(pw.GetObjectMeta().GetNamespace()).List(ctx, metav1.ListOptions{}) + + if err != nil { + log3.Info("PathwaysJob: can't list JobSets: ", "error ", err) + return nil, err + } + return jsList.Items, nil +} + func (r *PathwaysJobReconciler) createJobSet(ctx context.Context, pw *pathwaysjob.PathwaysJob, jobSetClient *jobsetclient.Clientset) error { log2 := ctrl.LoggerFrom(ctx).WithValues("pathwaysjob", klog.KObj(pw)) - ctx = ctrl.LoggerInto(ctx, log2) + // ctx = ctrl.LoggerInto(ctx, log2) - log2.Info("ROSHANI in createJobSet", "Name ", pw.GetName(), "Namespace ", pw.GetNamespace()) + log2.Info("PathwaysJob: in createJobSet", "Name ", pw.GetName(), "Namespace ", pw.GetNamespace()) // Some predefined variables truth := true volumeSourceType := corev1.HostPathDirectoryOrCreate // // Pathways Spec + JobSet for batch inference ------ - leaderJob, _ := MakeLeaderJob(pw) + leaderJob, _ := MakeLeaderJob(ctx, pw) mainJobSetConfig := jobsetv1alpha2.JobSet{ ObjectMeta: metav1.ObjectMeta{ @@ -211,24 +251,29 @@ func (r *PathwaysJobReconciler) createJobSet(ctx context.Context, pw *pathwaysjo // Set Pathways controller as the owner of the JobSet for garbage collection. if err := ctrl.SetControllerReference(pw, &mainJobSetConfig, r.Scheme); err != nil { - log2.Info("Roshani, failed to set Pathways as owner of JobSet.", "error ", err) + log2.Info("PathwaysJob: failed to set Pathways as owner of JobSet.", "error ", err) } else { - log2.Info("Roshani, successfully set Pathways as owner of JobSet.") + log2.Info("PathwaysJob: successfully set Pathways as owner of JobSet.") } js, err := jobSetClient.JobsetV1alpha2().JobSets(pw.GetObjectMeta().GetNamespace()).Create(ctx, &mainJobSetConfig, metav1.CreateOptions{}) if err != nil { - log2.Info("Roshani, failed to create JobSet: ", "JobSet name", js.Name) + log2.Info("PathwaysJob: failed to create JobSet: ", "JobSet name", js.Name) return err } else { - log2.Info("Roshani, successfully created JobSet: ", "JobSet name", js.Name) + log2.Info("PathwaysJob: successfully created JobSet: ", "JobSet name", js.Name) } return nil } // SetupWithManager sets up the controller with the Manager. func (r *PathwaysJobReconciler) SetupWithManager(mgr ctrl.Manager) error { + + // if err := jobsetv1alpha2.AddToScheme(mgr.GetScheme()); err != nil { + // return err + // } + return ctrl.NewControllerManagedBy(mgr). For(&pathwaysjob.PathwaysJob{}). // Owns(&jobsetv1alpha2.JobSet{}). // For JobSet @@ -328,13 +373,17 @@ func GetUserContainerList(pw *pathwaysjob.PathwaysJob) ([]corev1.Container, erro return containerList, nil } -func MakeLeaderJob(pw *pathwaysjob.PathwaysJob) (*jobsetv1alpha2.ReplicatedJob, error) { +func MakeLeaderJob(ctx context.Context, pw *pathwaysjob.PathwaysJob) (*jobsetv1alpha2.ReplicatedJob, error) { // truth := true volumeSourceType := corev1.HostPathDirectoryOrCreate RMContainerSpec, _ := MakeResourceManagerContainer(pw) ProxyContainerSpec, _ := MakeProxyContainer(pw) affinitySpec, _ := MakePodAffinityRules(pw) - userContainerList, _ := GetUserContainerList(pw) + containerList, _ := GetUserContainerList(pw) + containerList = append(containerList, *RMContainerSpec, *ProxyContainerSpec) + + // log3 := ctrl.LoggerFrom(ctx).WithValues("pathwaysjob", klog.KObj(pw)) + // log3.Info("PathwaysJob:...MakeLeaderJob", "Length of container list is", len(containerList)) leaderJob := jobsetv1alpha2.ReplicatedJob{ Name: "leader", @@ -371,7 +420,7 @@ func MakeLeaderJob(pw *pathwaysjob.PathwaysJob) (*jobsetv1alpha2.ReplicatedJob, }, }, }, // end Volumes - Containers: append([]corev1.Container{*RMContainerSpec, *ProxyContainerSpec}, userContainerList...), // end leader []containers + Containers: containerList, // end leader []containers }, // end PodSpec }, }, diff --git a/pkg/utils/extra_prototype.go b/pkg/utils/extra_prototype.go index 69cf265e..9a06a30d 100644 --- a/pkg/utils/extra_prototype.go +++ b/pkg/utils/extra_prototype.go @@ -108,7 +108,6 @@ package utils // // // JobSet list // var jsList *jobsetv1alpha2.JobSetList - // jsList, err := jobSetClient.JobsetV1alpha2().JobSets("default").List(ctx, metav1.ListOptions{}) // if err != nil { // log.Info("Roshani, can't list JobSets: ", "error ", err) From 32c84e3402d2a79429662a9c63cb49fda208b6a3 Mon Sep 17 00:00:00 2001 From: RoshaniN Date: Thu, 17 Oct 2024 05:34:37 +0000 Subject: [PATCH 20/32] Fix reconciliation logic to avoid multiple calls to createJobSet. --- api/v1/pathwaysjob_types.go | 46 +- api/v1/zz_generated.deepcopy.go | 26 +- ...ways-job.pathways.domain_pathwaysjobs.yaml | 13455 ++++++++-------- .../samples/pathways-job_v1_pathwaysjob.yaml | 83 +- internal/controller/pathwaysjob_controller.go | 369 +- pkg/utils/extra_prototype.go | 41 + 6 files changed, 7151 insertions(+), 6869 deletions(-) diff --git a/api/v1/pathwaysjob_types.go b/api/v1/pathwaysjob_types.go index 2551bb69..aa95278f 100644 --- a/api/v1/pathwaysjob_types.go +++ b/api/v1/pathwaysjob_types.go @@ -59,16 +59,6 @@ type PathwaysJobSpec struct { // INSERT ADDITIONAL SPEC FIELDS - desired state of cluster // Important: Run "make" to regenerate code after modifying this file - // ColocationPolicy defines whether the user job and the Pathways - // resources (RM, proxy) must be colocated on TPUs, with the Pathways - // workers or not. If user chooses to "colocate", then the Pathways RM - // and proxy run together with the user job as a single pod. - // Users may opt for "best-effort" placement where scheduler places the - // RM and proxy (as a single pod) on the CPU nodepools by default. User - // workload will be deployed separately,as a pod. - // Default is best-effort. - ColocationPolicy ColocationPolicy `json:"colocationPolicy,omitempty"` - // Maximum number of times the JobSet is restarted. MaxRestarts int32 `json:"maxRestarts,omitempty"` @@ -84,11 +74,8 @@ type PathwaysJobSpec struct { // one type of worker is supported. Workers []WorkerSpec `json:"workers"` - // UserPodTemplate accepts a pod composed of user's workload - // (and other) containers. - // https://pkg.go.dev/k8s.io/api/core/v1#PodTemplateSpec - // +optional - UserPodTemplate *corev1.PodTemplateSpec `json:"template,omitempty" protobuf:"bytes,6,opt,name=template"` + // Pathways single-controller specifications and user workload. + Controller *ControllerSpec `json:"controller"` } // PathwaysJobStatus defines the observed state of PathwaysJob @@ -103,15 +90,15 @@ type PathwaysJobStatus struct { Conditions []metav1.Condition `json:"conditions,omitempty"` } -// +kubebuilder:validation:Enum=colocate;best-effort -type ColocationPolicy string +// +kubebuilder:validation:Enum=colocate;default +type DeploymentMode string const ( - Colocate ColocationPolicy = "colocate" - BestEffort ColocationPolicy = "best-effort" + Colocate DeploymentMode = "colocate" + Default DeploymentMode = "default" ) -// The WorkerSpec struct takes in the specifications for the +// The WorkerSpec struct lists the specifications for the // Pathways workers. type WorkerSpec struct { // This will translate to a nodeSelector of the form @@ -126,6 +113,25 @@ type WorkerSpec struct { NumSlices int32 `json:"numSlices"` } +// The ControllerSpec struct lists the specifications for the +// Pathways controller. User workload can also be provided here. +type ControllerSpec struct { + // DeploymentMode defines whether the user job and the Pathways + // resources (RM, proxy) must be colocated on TPUs, with the Pathways + // workers or not. If user chooses to "colocate", then the Pathways RM + // and proxy run together with the user job as a single pod. + // Users may opt for "default" placement where scheduler places the + // RM pod and the proxy pod on the CPU nodepools by default. User + // workload will be deployed separately, as a pod. + DeploymentMode DeploymentMode `json:"deploymentMode,omitempty"` + + // UserPodTemplate accepts a pod composed of user's workload + // (and other) containers. + // https://pkg.go.dev/k8s.io/api/core/v1#PodTemplateSpec + // +optional + UserPodTemplate *corev1.PodTemplateSpec `json:"template,omitempty" protobuf:"bytes,6,opt,name=template"` +} + func init() { SchemeBuilder.Register(&PathwaysJob{}, &PathwaysJobList{}) } diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go index f1f78ca0..0779538a 100644 --- a/api/v1/zz_generated.deepcopy.go +++ b/api/v1/zz_generated.deepcopy.go @@ -26,6 +26,26 @@ import ( runtime "k8s.io/apimachinery/pkg/runtime" ) +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ControllerSpec) DeepCopyInto(out *ControllerSpec) { + *out = *in + if in.UserPodTemplate != nil { + in, out := &in.UserPodTemplate, &out.UserPodTemplate + *out = new(corev1.PodTemplateSpec) + (*in).DeepCopyInto(*out) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ControllerSpec. +func (in *ControllerSpec) DeepCopy() *ControllerSpec { + if in == nil { + return nil + } + out := new(ControllerSpec) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *PathwaysJob) DeepCopyInto(out *PathwaysJob) { *out = *in @@ -93,9 +113,9 @@ func (in *PathwaysJobSpec) DeepCopyInto(out *PathwaysJobSpec) { *out = make([]WorkerSpec, len(*in)) copy(*out, *in) } - if in.UserPodTemplate != nil { - in, out := &in.UserPodTemplate, &out.UserPodTemplate - *out = new(corev1.PodTemplateSpec) + if in.Controller != nil { + in, out := &in.Controller, &out.Controller + *out = new(ControllerSpec) (*in).DeepCopyInto(*out) } } diff --git a/config/crd/bases/pathways-job.pathways.domain_pathwaysjobs.yaml b/config/crd/bases/pathways-job.pathways.domain_pathwaysjobs.yaml index 5fc0f588..47d4c047 100644 --- a/config/crd/bases/pathways-job.pathways.domain_pathwaysjobs.yaml +++ b/config/crd/bases/pathways-job.pathways.domain_pathwaysjobs.yaml @@ -38,283 +38,456 @@ spec: type: object spec: properties: - colocationPolicy: - description: |- - ColocationPolicy defines whether the user job and the Pathways - resources (RM, proxy) must be colocated on TPUs, with the Pathways - workers or not. If user chooses to "colocate", then the Pathways RM - and proxy run together with the user job as a single pod. - Users may opt for "best-effort" placement where scheduler places the - RM and proxy (as a single pod) on the CPU nodepools by default. User - workload will be deployed separately,as a pod. - Default is best-effort. - enum: - - colocate - - best-effort - type: string - maxRestarts: - description: Maximum number of times the JobSet is restarted. - format: int32 - type: integer - pathwaysDir: - description: |- - PathwaysDir is a persistent location like GCS at which temporary - Pathways artifacts can be stored like HBM state during interruptions. - Currently, Pathways supports a precreated GCS directory only. - type: string - pathwaysVersion: - description: PathwaysVersion is the version of the Pathways client. - type: string - template: - description: |- - UserPodTemplate accepts a pod composed of user's workload - (and other) containers. - https://pkg.go.dev/k8s.io/api/core/v1#PodTemplateSpec + controller: + description: Pathways single-controller specifications and user workload. properties: - metadata: + deploymentMode: description: |- - Standard object's metadata. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#metadata - type: object - spec: + DeploymentMode defines whether the user job and the Pathways + resources (RM, proxy) must be colocated on TPUs, with the Pathways + workers or not. If user chooses to "colocate", then the Pathways RM + and proxy run together with the user job as a single pod. + Users may opt for "default" placement where scheduler places the + RM pod and the proxy pod on the CPU nodepools by default. User + workload will be deployed separately, as a pod. + enum: + - colocate + - default + type: string + template: description: |- - Specification of the desired behavior of the pod. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#spec-and-status + UserPodTemplate accepts a pod composed of user's workload + (and other) containers. + https://pkg.go.dev/k8s.io/api/core/v1#PodTemplateSpec properties: - activeDeadlineSeconds: + metadata: + description: |- + Standard object's metadata. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#metadata + type: object + spec: description: |- - Optional duration in seconds the pod may be active on the node relative to - StartTime before the system will actively try to mark it failed and kill associated containers. - Value must be a positive integer. - format: int64 - type: integer - affinity: - description: If specified, the pod's scheduling constraints + Specification of the desired behavior of the pod. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#spec-and-status properties: - nodeAffinity: - description: Describes node affinity scheduling rules - for the pod. + activeDeadlineSeconds: + description: |- + Optional duration in seconds the pod may be active on the node relative to + StartTime before the system will actively try to mark it failed and kill associated containers. + Value must be a positive integer. + format: int64 + type: integer + affinity: + description: If specified, the pod's scheduling constraints properties: - preferredDuringSchedulingIgnoredDuringExecution: - description: |- - The scheduler will prefer to schedule pods to nodes that satisfy - the affinity expressions specified by this field, but it may choose - a node that violates one or more of the expressions. The node that is - most preferred is the one with the greatest sum of weights, i.e. - for each node that meets all of the scheduling requirements (resource - request, requiredDuringScheduling affinity expressions, etc.), - compute a sum by iterating through the elements of this field and adding - "weight" to the sum if the node matches the corresponding matchExpressions; the - node(s) with the highest sum are the most preferred. - items: - description: |- - An empty preferred scheduling term matches all objects with implicit weight 0 - (i.e. it's a no-op). A null preferred scheduling term matches no objects (i.e. is also a no-op). - properties: - preference: - description: A node selector term, associated - with the corresponding weight. + nodeAffinity: + description: Describes node affinity scheduling rules + for the pod. + properties: + preferredDuringSchedulingIgnoredDuringExecution: + description: |- + The scheduler will prefer to schedule pods to nodes that satisfy + the affinity expressions specified by this field, but it may choose + a node that violates one or more of the expressions. The node that is + most preferred is the one with the greatest sum of weights, i.e. + for each node that meets all of the scheduling requirements (resource + request, requiredDuringScheduling affinity expressions, etc.), + compute a sum by iterating through the elements of this field and adding + "weight" to the sum if the node matches the corresponding matchExpressions; the + node(s) with the highest sum are the most preferred. + items: + description: |- + An empty preferred scheduling term matches all objects with implicit weight 0 + (i.e. it's a no-op). A null preferred scheduling term matches no objects (i.e. is also a no-op). properties: - matchExpressions: - description: A list of node selector requirements - by node's labels. - items: - description: |- - A node selector requirement is a selector that contains values, a key, and an operator - that relates the key and values. - properties: - key: - description: The label key that the - selector applies to. - type: string - operator: + preference: + description: A node selector term, associated + with the corresponding weight. + properties: + matchExpressions: + description: A list of node selector + requirements by node's labels. + items: description: |- - Represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. - type: string - values: + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that + the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchFields: + description: A list of node selector + requirements by node's fields. + items: description: |- - An array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. If the operator is Gt or Lt, the values - array must have a single element, which will be interpreted as an integer. - This array is replaced during a strategic merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchFields: - description: A list of node selector requirements - by node's fields. - items: - description: |- - A node selector requirement is a selector that contains values, a key, and an operator - that relates the key and values. - properties: - key: - description: The label key that the - selector applies to. - type: string - operator: + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that + the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + type: object + x-kubernetes-map-type: atomic + weight: + description: Weight associated with matching + the corresponding nodeSelectorTerm, in + the range 1-100. + format: int32 + type: integer + required: + - preference + - weight + type: object + type: array + x-kubernetes-list-type: atomic + requiredDuringSchedulingIgnoredDuringExecution: + description: |- + If the affinity requirements specified by this field are not met at + scheduling time, the pod will not be scheduled onto the node. + If the affinity requirements specified by this field cease to be met + at some point during pod execution (e.g. due to an update), the system + may or may not try to eventually evict the pod from its node. + properties: + nodeSelectorTerms: + description: Required. A list of node selector + terms. The terms are ORed. + items: + description: |- + A null or empty node selector term matches no objects. The requirements of + them are ANDed. + The TopologySelectorTerm type implements a subset of the NodeSelectorTerm. + properties: + matchExpressions: + description: A list of node selector + requirements by node's labels. + items: description: |- - Represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. - type: string - values: + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that + the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchFields: + description: A list of node selector + requirements by node's fields. + items: description: |- - An array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. If the operator is Gt or Lt, the values - array must have a single element, which will be interpreted as an integer. - This array is replaced during a strategic merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - type: object - x-kubernetes-map-type: atomic - weight: - description: Weight associated with matching - the corresponding nodeSelectorTerm, in the - range 1-100. - format: int32 - type: integer - required: - - preference - - weight - type: object - type: array - x-kubernetes-list-type: atomic - requiredDuringSchedulingIgnoredDuringExecution: - description: |- - If the affinity requirements specified by this field are not met at - scheduling time, the pod will not be scheduled onto the node. - If the affinity requirements specified by this field cease to be met - at some point during pod execution (e.g. due to an update), the system - may or may not try to eventually evict the pod from its node. + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that + the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + type: object + x-kubernetes-map-type: atomic + type: array + x-kubernetes-list-type: atomic + required: + - nodeSelectorTerms + type: object + x-kubernetes-map-type: atomic + type: object + podAffinity: + description: Describes pod affinity scheduling rules + (e.g. co-locate this pod in the same node, zone, + etc. as some other pod(s)). properties: - nodeSelectorTerms: - description: Required. A list of node selector - terms. The terms are ORed. + preferredDuringSchedulingIgnoredDuringExecution: + description: |- + The scheduler will prefer to schedule pods to nodes that satisfy + the affinity expressions specified by this field, but it may choose + a node that violates one or more of the expressions. The node that is + most preferred is the one with the greatest sum of weights, i.e. + for each node that meets all of the scheduling requirements (resource + request, requiredDuringScheduling affinity expressions, etc.), + compute a sum by iterating through the elements of this field and adding + "weight" to the sum if the node has pods which matches the corresponding podAffinityTerm; the + node(s) with the highest sum are the most preferred. items: - description: |- - A null or empty node selector term matches no objects. The requirements of - them are ANDed. - The TopologySelectorTerm type implements a subset of the NodeSelectorTerm. + description: The weights of all of the matched + WeightedPodAffinityTerm fields are added per-node + to find the most preferred node(s) properties: - matchExpressions: - description: A list of node selector requirements - by node's labels. - items: - description: |- - A node selector requirement is a selector that contains values, a key, and an operator - that relates the key and values. - properties: - key: - description: The label key that the - selector applies to. - type: string - operator: - description: |- - Represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + podAffinityTerm: + description: Required. A pod affinity term, + associated with the corresponding weight. + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions is + a list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + This is an alpha field and requires enabling MatchLabelKeysInPodAffinity feature gate. + items: type: string - values: - description: |- - An array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. If the operator is Gt or Lt, the values - array must have a single element, which will be interpreted as an integer. - This array is replaced during a strategic merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchFields: - description: A list of node selector requirements - by node's fields. - items: - description: |- - A node selector requirement is a selector that contains values, a key, and an operator - that relates the key and values. - properties: - key: - description: The label key that the - selector applies to. + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + This is an alpha field and requires enabling MatchLabelKeysInPodAffinity feature gate. + items: type: string - operator: - description: |- - Represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is + a list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: type: string - values: - description: |- - An array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. If the operator is Gt or Lt, the values - array must have a single element, which will be interpreted as an integer. - This array is replaced during a strategic merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + weight: + description: |- + weight associated with matching the corresponding podAffinityTerm, + in the range 1-100. + format: int32 + type: integer + required: + - podAffinityTerm + - weight type: object - x-kubernetes-map-type: atomic type: array x-kubernetes-list-type: atomic - required: - - nodeSelectorTerms - type: object - x-kubernetes-map-type: atomic - type: object - podAffinity: - description: Describes pod affinity scheduling rules (e.g. - co-locate this pod in the same node, zone, etc. as some - other pod(s)). - properties: - preferredDuringSchedulingIgnoredDuringExecution: - description: |- - The scheduler will prefer to schedule pods to nodes that satisfy - the affinity expressions specified by this field, but it may choose - a node that violates one or more of the expressions. The node that is - most preferred is the one with the greatest sum of weights, i.e. - for each node that meets all of the scheduling requirements (resource - request, requiredDuringScheduling affinity expressions, etc.), - compute a sum by iterating through the elements of this field and adding - "weight" to the sum if the node has pods which matches the corresponding podAffinityTerm; the - node(s) with the highest sum are the most preferred. - items: - description: The weights of all of the matched WeightedPodAffinityTerm - fields are added per-node to find the most preferred - node(s) - properties: - podAffinityTerm: - description: Required. A pod affinity term, - associated with the corresponding weight. + requiredDuringSchedulingIgnoredDuringExecution: + description: |- + If the affinity requirements specified by this field are not met at + scheduling time, the pod will not be scheduled onto the node. + If the affinity requirements specified by this field cease to be met + at some point during pod execution (e.g. due to a pod label update), the + system may or may not try to eventually evict the pod from its node. + When there are multiple elements, the lists of nodes corresponding to each + podAffinityTerm are intersected, i.e. all terms must be satisfied. + items: + description: |- + Defines a set of pods (namely those matching the labelSelector + relative to the given namespace(s)) that this pod should be + co-located (affinity) or not co-located (anti-affinity) with, + where co-located is defined as running on a node whose value of + the label with key matches that of any node on which + a pod of the set of pods is running properties: labelSelector: description: |- @@ -470,215 +643,217 @@ spec: required: - topologyKey type: object - weight: - description: |- - weight associated with matching the corresponding podAffinityTerm, - in the range 1-100. - format: int32 - type: integer - required: - - podAffinityTerm - - weight - type: object - type: array - x-kubernetes-list-type: atomic - requiredDuringSchedulingIgnoredDuringExecution: - description: |- - If the affinity requirements specified by this field are not met at - scheduling time, the pod will not be scheduled onto the node. - If the affinity requirements specified by this field cease to be met - at some point during pod execution (e.g. due to a pod label update), the - system may or may not try to eventually evict the pod from its node. - When there are multiple elements, the lists of nodes corresponding to each - podAffinityTerm are intersected, i.e. all terms must be satisfied. - items: - description: |- - Defines a set of pods (namely those matching the labelSelector - relative to the given namespace(s)) that this pod should be - co-located (affinity) or not co-located (anti-affinity) with, - where co-located is defined as running on a node whose value of - the label with key matches that of any node on which - a pod of the set of pods is running - properties: - labelSelector: - description: |- - A label query over a set of resources, in this case pods. - If it's null, this PodAffinityTerm matches with no Pods. + type: array + x-kubernetes-list-type: atomic + type: object + podAntiAffinity: + description: Describes pod anti-affinity scheduling + rules (e.g. avoid putting this pod in the same node, + zone, etc. as some other pod(s)). + properties: + preferredDuringSchedulingIgnoredDuringExecution: + description: |- + The scheduler will prefer to schedule pods to nodes that satisfy + the anti-affinity expressions specified by this field, but it may choose + a node that violates one or more of the expressions. The node that is + most preferred is the one with the greatest sum of weights, i.e. + for each node that meets all of the scheduling requirements (resource + request, requiredDuringScheduling anti-affinity expressions, etc.), + compute a sum by iterating through the elements of this field and adding + "weight" to the sum if the node has pods which matches the corresponding podAffinityTerm; the + node(s) with the highest sum are the most preferred. + items: + description: The weights of all of the matched + WeightedPodAffinityTerm fields are added per-node + to find the most preferred node(s) properties: - matchExpressions: - description: matchExpressions is a list - of label selector requirements. The requirements - are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label key - that the selector applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. + podAffinityTerm: + description: Required. A pod affinity term, + associated with the corresponding weight. + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions is + a list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + This is an alpha field and requires enabling MatchLabelKeysInPodAffinity feature gate. + items: type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - matchLabelKeys: - description: |- - MatchLabelKeys is a set of pod label keys to select which pods will - be taken into consideration. The keys are used to lookup values from the - incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` - to select the group of existing pods which pods will be taken into consideration - for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming - pod labels will be ignored. The default value is empty. - The same key is forbidden to exist in both matchLabelKeys and labelSelector. - Also, matchLabelKeys cannot be set when labelSelector isn't set. - This is an alpha field and requires enabling MatchLabelKeysInPodAffinity feature gate. - items: - type: string - type: array - x-kubernetes-list-type: atomic - mismatchLabelKeys: - description: |- - MismatchLabelKeys is a set of pod label keys to select which pods will - be taken into consideration. The keys are used to lookup values from the - incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` - to select the group of existing pods which pods will be taken into consideration - for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming - pod labels will be ignored. The default value is empty. - The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. - Also, mismatchLabelKeys cannot be set when labelSelector isn't set. - This is an alpha field and requires enabling MatchLabelKeysInPodAffinity feature gate. - items: - type: string - type: array - x-kubernetes-list-type: atomic - namespaceSelector: - description: |- - A label query over the set of namespaces that the term applies to. - The term is applied to the union of the namespaces selected by this field - and the ones listed in the namespaces field. - null selector and null or empty namespaces list means "this pod's namespace". - An empty selector ({}) matches all namespaces. - properties: - matchExpressions: - description: matchExpressions is a list - of label selector requirements. The requirements - are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label key - that the selector applies to. + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + This is an alpha field and requires enabling MatchLabelKeysInPodAffinity feature gate. + items: type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is + a list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey type: object + weight: + description: |- + weight associated with matching the corresponding podAffinityTerm, + in the range 1-100. + format: int32 + type: integer + required: + - podAffinityTerm + - weight type: object - x-kubernetes-map-type: atomic - namespaces: - description: |- - namespaces specifies a static list of namespace names that the term applies to. - The term is applied to the union of the namespaces listed in this field - and the ones selected by namespaceSelector. - null or empty namespaces list and null namespaceSelector means "this pod's namespace". - items: - type: string - type: array - x-kubernetes-list-type: atomic - topologyKey: + type: array + x-kubernetes-list-type: atomic + requiredDuringSchedulingIgnoredDuringExecution: + description: |- + If the anti-affinity requirements specified by this field are not met at + scheduling time, the pod will not be scheduled onto the node. + If the anti-affinity requirements specified by this field cease to be met + at some point during pod execution (e.g. due to a pod label update), the + system may or may not try to eventually evict the pod from its node. + When there are multiple elements, the lists of nodes corresponding to each + podAffinityTerm are intersected, i.e. all terms must be satisfied. + items: description: |- - This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching - the labelSelector in the specified namespaces, where co-located is defined as running on a node - whose value of the label with key topologyKey matches that of any node on which any of the - selected pods is running. - Empty topologyKey is not allowed. - type: string - required: - - topologyKey - type: object - type: array - x-kubernetes-list-type: atomic - type: object - podAntiAffinity: - description: Describes pod anti-affinity scheduling rules - (e.g. avoid putting this pod in the same node, zone, - etc. as some other pod(s)). - properties: - preferredDuringSchedulingIgnoredDuringExecution: - description: |- - The scheduler will prefer to schedule pods to nodes that satisfy - the anti-affinity expressions specified by this field, but it may choose - a node that violates one or more of the expressions. The node that is - most preferred is the one with the greatest sum of weights, i.e. - for each node that meets all of the scheduling requirements (resource - request, requiredDuringScheduling anti-affinity expressions, etc.), - compute a sum by iterating through the elements of this field and adding - "weight" to the sum if the node has pods which matches the corresponding podAffinityTerm; the - node(s) with the highest sum are the most preferred. - items: - description: The weights of all of the matched WeightedPodAffinityTerm - fields are added per-node to find the most preferred - node(s) - properties: - podAffinityTerm: - description: Required. A pod affinity term, - associated with the corresponding weight. + Defines a set of pods (namely those matching the labelSelector + relative to the given namespace(s)) that this pod should be + co-located (affinity) or not co-located (anti-affinity) with, + where co-located is defined as running on a node whose value of + the label with key matches that of any node on which + a pod of the set of pods is running properties: labelSelector: description: |- @@ -834,268 +1009,200 @@ spec: required: - topologyKey type: object - weight: - description: |- - weight associated with matching the corresponding podAffinityTerm, - in the range 1-100. - format: int32 - type: integer - required: - - podAffinityTerm - - weight - type: object - type: array - x-kubernetes-list-type: atomic - requiredDuringSchedulingIgnoredDuringExecution: - description: |- - If the anti-affinity requirements specified by this field are not met at - scheduling time, the pod will not be scheduled onto the node. - If the anti-affinity requirements specified by this field cease to be met - at some point during pod execution (e.g. due to a pod label update), the - system may or may not try to eventually evict the pod from its node. - When there are multiple elements, the lists of nodes corresponding to each - podAffinityTerm are intersected, i.e. all terms must be satisfied. - items: + type: array + x-kubernetes-list-type: atomic + type: object + type: object + automountServiceAccountToken: + description: AutomountServiceAccountToken indicates whether + a service account token should be automatically mounted. + type: boolean + containers: + description: |- + List of containers belonging to the pod. + Containers cannot currently be added or removed. + There must be at least one container in a Pod. + Cannot be updated. + items: + description: A single application container that you + want to run within a pod. + properties: + args: + description: |- + Arguments to the entrypoint. + The container image's CMD is used if this is not provided. + Variable references $(VAR_NAME) are expanded using the container's environment. If a variable + cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced + to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will + produce the string literal "$(VAR_NAME)". Escaped references will never be expanded, regardless + of whether the variable exists or not. Cannot be updated. + More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell + items: + type: string + type: array + x-kubernetes-list-type: atomic + command: + description: |- + Entrypoint array. Not executed within a shell. + The container image's ENTRYPOINT is used if this is not provided. + Variable references $(VAR_NAME) are expanded using the container's environment. If a variable + cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced + to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will + produce the string literal "$(VAR_NAME)". Escaped references will never be expanded, regardless + of whether the variable exists or not. Cannot be updated. + More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell + items: + type: string + type: array + x-kubernetes-list-type: atomic + env: description: |- - Defines a set of pods (namely those matching the labelSelector - relative to the given namespace(s)) that this pod should be - co-located (affinity) or not co-located (anti-affinity) with, - where co-located is defined as running on a node whose value of - the label with key matches that of any node on which - a pod of the set of pods is running - properties: - labelSelector: - description: |- - A label query over a set of resources, in this case pods. - If it's null, this PodAffinityTerm matches with no Pods. - properties: - matchExpressions: - description: matchExpressions is a list - of label selector requirements. The requirements - are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. + List of environment variables to set in the container. + Cannot be updated. + items: + description: EnvVar represents an environment + variable present in a Container. + properties: + name: + description: Name of the environment variable. + Must be a C_IDENTIFIER. + type: string + value: + description: |- + Variable references $(VAR_NAME) are expanded + using the previously defined environment variables in the container and + any service environment variables. If a variable cannot be resolved, + the reference in the input string will be unchanged. Double $$ are reduced + to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. + "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". + Escaped references will never be expanded, regardless of whether the variable + exists or not. + Defaults to "". + type: string + valueFrom: + description: Source for the environment variable's + value. Cannot be used if value is not empty. + properties: + configMapKeyRef: + description: Selects a key of a ConfigMap. properties: key: - description: key is the label key - that the selector applies to. + description: The key to select. type: string - operator: + name: + default: "" description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + TODO: Add other useful fields. apiVersion, kind, uid? + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic + optional: + description: Specify whether the ConfigMap + or its key must be defined + type: boolean required: - key - - operator type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - matchLabelKeys: - description: |- - MatchLabelKeys is a set of pod label keys to select which pods will - be taken into consideration. The keys are used to lookup values from the - incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` - to select the group of existing pods which pods will be taken into consideration - for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming - pod labels will be ignored. The default value is empty. - The same key is forbidden to exist in both matchLabelKeys and labelSelector. - Also, matchLabelKeys cannot be set when labelSelector isn't set. - This is an alpha field and requires enabling MatchLabelKeysInPodAffinity feature gate. - items: - type: string - type: array - x-kubernetes-list-type: atomic - mismatchLabelKeys: - description: |- - MismatchLabelKeys is a set of pod label keys to select which pods will - be taken into consideration. The keys are used to lookup values from the - incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` - to select the group of existing pods which pods will be taken into consideration - for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming - pod labels will be ignored. The default value is empty. - The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. - Also, mismatchLabelKeys cannot be set when labelSelector isn't set. - This is an alpha field and requires enabling MatchLabelKeysInPodAffinity feature gate. - items: - type: string - type: array - x-kubernetes-list-type: atomic - namespaceSelector: - description: |- - A label query over the set of namespaces that the term applies to. - The term is applied to the union of the namespaces selected by this field - and the ones listed in the namespaces field. - null selector and null or empty namespaces list means "this pod's namespace". - An empty selector ({}) matches all namespaces. - properties: - matchExpressions: - description: matchExpressions is a list - of label selector requirements. The requirements - are ANDed. - items: + x-kubernetes-map-type: atomic + fieldRef: + description: |- + Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['']`, `metadata.annotations['']`, + spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs. + properties: + apiVersion: + description: Version of the schema + the FieldPath is written in terms + of, defaults to "v1". + type: string + fieldPath: + description: Path of the field to + select in the specified API version. + type: string + required: + - fieldPath + type: object + x-kubernetes-map-type: atomic + resourceFieldRef: description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. + Selects a resource of the container: only resources limits and requests + (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported. + properties: + containerName: + description: 'Container name: required + for volumes, optional for env vars' + type: string + divisor: + anyOf: + - type: integer + - type: string + description: Specifies the output + format of the exposed resources, + defaults to "1" + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + resource: + description: 'Required: resource to + select' + type: string + required: + - resource + type: object + x-kubernetes-map-type: atomic + secretKeyRef: + description: Selects a key of a secret + in the pod's namespace properties: key: - description: key is the label key - that the selector applies to. + description: The key of the secret + to select from. Must be a valid + secret key. type: string - operator: + name: + default: "" description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + TODO: Add other useful fields. apiVersion, kind, uid? + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic + optional: + description: Specify whether the Secret + or its key must be defined + type: boolean required: - key - - operator type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - namespaces: - description: |- - namespaces specifies a static list of namespace names that the term applies to. - The term is applied to the union of the namespaces listed in this field - and the ones selected by namespaceSelector. - null or empty namespaces list and null namespaceSelector means "this pod's namespace". - items: - type: string - type: array - x-kubernetes-list-type: atomic - topologyKey: - description: |- - This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching - the labelSelector in the specified namespaces, where co-located is defined as running on a node - whose value of the label with key topologyKey matches that of any node on which any of the - selected pods is running. - Empty topologyKey is not allowed. - type: string - required: - - topologyKey - type: object - type: array - x-kubernetes-list-type: atomic - type: object - type: object - automountServiceAccountToken: - description: AutomountServiceAccountToken indicates whether - a service account token should be automatically mounted. - type: boolean - containers: - description: |- - List of containers belonging to the pod. - Containers cannot currently be added or removed. - There must be at least one container in a Pod. - Cannot be updated. - items: - description: A single application container that you want - to run within a pod. - properties: - args: - description: |- - Arguments to the entrypoint. - The container image's CMD is used if this is not provided. - Variable references $(VAR_NAME) are expanded using the container's environment. If a variable - cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will - produce the string literal "$(VAR_NAME)". Escaped references will never be expanded, regardless - of whether the variable exists or not. Cannot be updated. - More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell - items: - type: string - type: array - x-kubernetes-list-type: atomic - command: - description: |- - Entrypoint array. Not executed within a shell. - The container image's ENTRYPOINT is used if this is not provided. - Variable references $(VAR_NAME) are expanded using the container's environment. If a variable - cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will - produce the string literal "$(VAR_NAME)". Escaped references will never be expanded, regardless - of whether the variable exists or not. Cannot be updated. - More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell - items: - type: string - type: array - x-kubernetes-list-type: atomic - env: - description: |- - List of environment variables to set in the container. - Cannot be updated. - items: - description: EnvVar represents an environment variable - present in a Container. - properties: - name: - description: Name of the environment variable. - Must be a C_IDENTIFIER. - type: string - value: - description: |- - Variable references $(VAR_NAME) are expanded - using the previously defined environment variables in the container and - any service environment variables. If a variable cannot be resolved, - the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. - "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". - Escaped references will never be expanded, regardless of whether the variable - exists or not. - Defaults to "". - type: string - valueFrom: - description: Source for the environment variable's - value. Cannot be used if value is not empty. + x-kubernetes-map-type: atomic + type: object + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + envFrom: + description: |- + List of sources to populate environment variables in the container. + The keys defined within a source must be a C_IDENTIFIER. All invalid keys + will be reported as an event when the container is starting. When a key exists in multiple + sources, the value associated with the last source will take precedence. + Values defined by an Env with a duplicate key will take precedence. + Cannot be updated. + items: + description: EnvFromSource represents the source + of a set of ConfigMaps properties: - configMapKeyRef: - description: Selects a key of a ConfigMap. + configMapRef: + description: The ConfigMap to select from properties: - key: - description: The key to select. - type: string name: default: "" description: |- @@ -1109,64 +1216,18 @@ spec: type: string optional: description: Specify whether the ConfigMap - or its key must be defined + must be defined type: boolean - required: - - key type: object x-kubernetes-map-type: atomic - fieldRef: - description: |- - Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['']`, `metadata.annotations['']`, - spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs. + prefix: + description: An optional identifier to prepend + to each key in the ConfigMap. Must be a + C_IDENTIFIER. + type: string + secretRef: + description: The Secret to select from properties: - apiVersion: - description: Version of the schema the - FieldPath is written in terms of, defaults - to "v1". - type: string - fieldPath: - description: Path of the field to select - in the specified API version. - type: string - required: - - fieldPath - type: object - x-kubernetes-map-type: atomic - resourceFieldRef: - description: |- - Selects a resource of the container: only resources limits and requests - (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported. - properties: - containerName: - description: 'Container name: required - for volumes, optional for env vars' - type: string - divisor: - anyOf: - - type: integer - - type: string - description: Specifies the output format - of the exposed resources, defaults to - "1" - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - resource: - description: 'Required: resource to select' - type: string - required: - - resource - type: object - x-kubernetes-map-type: atomic - secretKeyRef: - description: Selects a key of a secret in - the pod's namespace - properties: - key: - description: The key of the secret to - select from. Must be a valid secret - key. - type: string name: default: "" description: |- @@ -1180,105 +1241,271 @@ spec: type: string optional: description: Specify whether the Secret - or its key must be defined + must be defined type: boolean - required: - - key type: object x-kubernetes-map-type: atomic type: object - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - envFrom: - description: |- - List of sources to populate environment variables in the container. - The keys defined within a source must be a C_IDENTIFIER. All invalid keys - will be reported as an event when the container is starting. When a key exists in multiple - sources, the value associated with the last source will take precedence. - Values defined by an Env with a duplicate key will take precedence. - Cannot be updated. - items: - description: EnvFromSource represents the source of - a set of ConfigMaps - properties: - configMapRef: - description: The ConfigMap to select from - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - TODO: Add other useful fields. apiVersion, kind, uid? - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. - type: string - optional: - description: Specify whether the ConfigMap - must be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - prefix: - description: An optional identifier to prepend - to each key in the ConfigMap. Must be a C_IDENTIFIER. - type: string - secretRef: - description: The Secret to select from - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - TODO: Add other useful fields. apiVersion, kind, uid? - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. - type: string - optional: - description: Specify whether the Secret must - be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - type: object - type: array - x-kubernetes-list-type: atomic - image: - description: |- - Container image name. - More info: https://kubernetes.io/docs/concepts/containers/images - This field is optional to allow higher level config management to default or override - container images in workload controllers like Deployments and StatefulSets. - type: string - imagePullPolicy: - description: |- - Image pull policy. - One of Always, Never, IfNotPresent. - Defaults to Always if :latest tag is specified, or IfNotPresent otherwise. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/containers/images#updating-images - type: string - lifecycle: - description: |- - Actions that the management system should take in response to container lifecycle events. - Cannot be updated. - properties: - postStart: + type: array + x-kubernetes-list-type: atomic + image: + description: |- + Container image name. + More info: https://kubernetes.io/docs/concepts/containers/images + This field is optional to allow higher level config management to default or override + container images in workload controllers like Deployments and StatefulSets. + type: string + imagePullPolicy: + description: |- + Image pull policy. + One of Always, Never, IfNotPresent. + Defaults to Always if :latest tag is specified, or IfNotPresent otherwise. + Cannot be updated. + More info: https://kubernetes.io/docs/concepts/containers/images#updating-images + type: string + lifecycle: + description: |- + Actions that the management system should take in response to container lifecycle events. + Cannot be updated. + properties: + postStart: + description: |- + PostStart is called immediately after a container is created. If the handler fails, + the container is terminated and restarted according to its restart policy. + Other management of the container blocks until the hook completes. + More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks + properties: + exec: + description: Exec specifies the action to + take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + httpGet: + description: HTTPGet specifies the http + request to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in + the request. HTTP allows repeated + headers. + items: + description: HTTPHeader describes + a custom header to be used in HTTP + probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field + value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP + server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + sleep: + description: Sleep represents the duration + that the container should sleep before + being terminated. + properties: + seconds: + description: Seconds is the number of + seconds to sleep. + format: int64 + type: integer + required: + - seconds + type: object + tcpSocket: + description: |- + Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept + for the backward compatibility. There are no validation of this field and + lifecycle hooks will fail in runtime when tcp handler is specified. + properties: + host: + description: 'Optional: Host name to + connect to, defaults to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + type: object + preStop: + description: |- + PreStop is called immediately before a container is terminated due to an + API request or management event such as liveness/startup probe failure, + preemption, resource contention, etc. The handler is not called if the + container crashes or exits. The Pod's termination grace period countdown begins before the + PreStop hook is executed. Regardless of the outcome of the handler, the + container will eventually terminate within the Pod's termination grace + period (unless delayed by finalizers). Other management of the container blocks until the hook completes + or until the termination grace period is reached. + More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks + properties: + exec: + description: Exec specifies the action to + take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + httpGet: + description: HTTPGet specifies the http + request to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in + the request. HTTP allows repeated + headers. + items: + description: HTTPHeader describes + a custom header to be used in HTTP + probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field + value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP + server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + sleep: + description: Sleep represents the duration + that the container should sleep before + being terminated. + properties: + seconds: + description: Seconds is the number of + seconds to sleep. + format: int64 + type: integer + required: + - seconds + type: object + tcpSocket: + description: |- + Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept + for the backward compatibility. There are no validation of this field and + lifecycle hooks will fail in runtime when tcp handler is specified. + properties: + host: + description: 'Optional: Host name to + connect to, defaults to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + type: object + type: object + livenessProbe: description: |- - PostStart is called immediately after a container is created. If the handler fails, - the container is terminated and restarted according to its restart policy. - Other management of the container blocks until the hook completes. - More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks + Periodic probe of container liveness. + Container will be restarted if the probe fails. + Cannot be updated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes properties: exec: description: Exec specifies the action to take. @@ -1295,6 +1522,32 @@ spec: type: array x-kubernetes-list-type: atomic type: object + failureThreshold: + description: |- + Minimum consecutive failures for the probe to be considered failed after having succeeded. + Defaults to 3. Minimum value is 1. + format: int32 + type: integer + grpc: + description: GRPC specifies an action involving + a GRPC port. + properties: + port: + description: Port number of the gRPC service. + Number must be in the range 1 to 65535. + format: int32 + type: integer + service: + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object httpGet: description: HTTPGet specifies the http request to perform. @@ -1346,23 +1599,27 @@ spec: required: - port type: object - sleep: - description: Sleep represents the duration that - the container should sleep before being terminated. - properties: - seconds: - description: Seconds is the number of seconds - to sleep. - format: int64 - type: integer - required: - - seconds - type: object - tcpSocket: + initialDelaySeconds: + description: |- + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: description: |- - Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept - for the backward compatibility. There are no validation of this field and - lifecycle hooks will fail in runtime when tcp handler is specified. + How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: |- + Minimum consecutive successes for the probe to be considered successful after having failed. + Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocket specifies an action involving + a TCP port. properties: host: description: 'Optional: Host name to connect @@ -1380,18 +1637,91 @@ spec: required: - port type: object + terminationGracePeriodSeconds: + description: |- + Optional duration in seconds the pod needs to terminate gracefully upon probe failure. + The grace period is the duration in seconds after the processes running in the pod are sent + a termination signal and the time when the processes are forcibly halted with a kill signal. + Set this value longer than the expected cleanup time for your process. + If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this + value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates stop immediately via + the kill signal (no opportunity to shut down). + This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. + Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: + description: |- + Number of seconds after which the probe times out. + Defaults to 1 second. Minimum value is 1. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer type: object - preStop: + name: description: |- - PreStop is called immediately before a container is terminated due to an - API request or management event such as liveness/startup probe failure, - preemption, resource contention, etc. The handler is not called if the - container crashes or exits. The Pod's termination grace period countdown begins before the - PreStop hook is executed. Regardless of the outcome of the handler, the - container will eventually terminate within the Pod's termination grace - period (unless delayed by finalizers). Other management of the container blocks until the hook completes - or until the termination grace period is reached. - More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks + Name of the container specified as a DNS_LABEL. + Each container in a pod must have a unique name (DNS_LABEL). + Cannot be updated. + type: string + ports: + description: |- + List of ports to expose from the container. Not specifying a port here + DOES NOT prevent that port from being exposed. Any port which is + listening on the default "0.0.0.0" address inside a container will be + accessible from the network. + Modifying this array with strategic merge patch may corrupt the data. + For more information See https://github.com/kubernetes/kubernetes/issues/108255. + Cannot be updated. + items: + description: ContainerPort represents a network + port in a single container. + properties: + containerPort: + description: |- + Number of port to expose on the pod's IP address. + This must be a valid port number, 0 < x < 65536. + format: int32 + type: integer + hostIP: + description: What host IP to bind the external + port to. + type: string + hostPort: + description: |- + Number of port to expose on the host. + If specified, this must be a valid port number, 0 < x < 65536. + If HostNetwork is specified, this must match ContainerPort. + Most containers do not need this. + format: int32 + type: integer + name: + description: |- + If specified, this must be an IANA_SVC_NAME and unique within the pod. Each + named port in a pod must have a unique name. Name for the port that can be + referred to by services. + type: string + protocol: + default: TCP + description: |- + Protocol for port. Must be UDP, TCP, or SCTP. + Defaults to "TCP". + type: string + required: + - containerPort + type: object + type: array + x-kubernetes-list-map-keys: + - containerPort + - protocol + x-kubernetes-list-type: map + readinessProbe: + description: |- + Periodic probe of container service readiness. + Container will be removed from service endpoints if the probe fails. + Cannot be updated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes properties: exec: description: Exec specifies the action to take. @@ -1408,6 +1738,32 @@ spec: type: array x-kubernetes-list-type: atomic type: object + failureThreshold: + description: |- + Minimum consecutive failures for the probe to be considered failed after having succeeded. + Defaults to 3. Minimum value is 1. + format: int32 + type: integer + grpc: + description: GRPC specifies an action involving + a GRPC port. + properties: + port: + description: Port number of the gRPC service. + Number must be in the range 1 to 65535. + format: int32 + type: integer + service: + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object httpGet: description: HTTPGet specifies the http request to perform. @@ -1459,23 +1815,27 @@ spec: required: - port type: object - sleep: - description: Sleep represents the duration that - the container should sleep before being terminated. - properties: - seconds: - description: Seconds is the number of seconds - to sleep. - format: int64 - type: integer - required: - - seconds - type: object - tcpSocket: + initialDelaySeconds: + description: |- + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: description: |- - Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept - for the backward compatibility. There are no validation of this field and - lifecycle hooks will fail in runtime when tcp handler is specified. + How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: |- + Minimum consecutive successes for the probe to be considered successful after having failed. + Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocket specifies an action involving + a TCP port. properties: host: description: 'Optional: Host name to connect @@ -1493,1307 +1853,1393 @@ spec: required: - port type: object - type: object - type: object - livenessProbe: - description: |- - Periodic probe of container liveness. - Container will be restarted if the probe fails. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - properties: - exec: - description: Exec specifies the action to take. - properties: - command: + terminationGracePeriodSeconds: description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic + Optional duration in seconds the pod needs to terminate gracefully upon probe failure. + The grace period is the duration in seconds after the processes running in the pod are sent + a termination signal and the time when the processes are forcibly halted with a kill signal. + Set this value longer than the expected cleanup time for your process. + If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this + value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates stop immediately via + the kill signal (no opportunity to shut down). + This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. + Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: + description: |- + Number of seconds after which the probe times out. + Defaults to 1 second. Minimum value is 1. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer type: object - failureThreshold: + resizePolicy: + description: Resources resize policy for the container. + items: + description: ContainerResizePolicy represents + resource resize policy for the container. + properties: + resourceName: + description: |- + Name of the resource to which this resource resize policy applies. + Supported values: cpu, memory. + type: string + restartPolicy: + description: |- + Restart policy to apply when specified resource is resized. + If not specified, it defaults to NotRequired. + type: string + required: + - resourceName + - restartPolicy + type: object + type: array + x-kubernetes-list-type: atomic + resources: description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. + Compute Resources required by this container. + Cannot be updated. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: + claims: description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + Claims lists the names of resources, defined in spec.resourceClaims, + that are used by this container. - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in the request. - HTTP allows repeated headers. + This is an alpha field and requires enabling the + DynamicResourceAllocation feature gate. + + + This field is immutable. It can only be set for containers. items: - description: HTTPHeader describes a custom - header to be used in HTTP probes + description: ResourceClaim references one + entry in PodSpec.ResourceClaims. properties: name: description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value + Name must match the name of one entry in pod.spec.resourceClaims of + the Pod where this field is used. It makes that resource available + inside a container. type: string required: - name - - value type: object type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP server. - type: string - port: - anyOf: - - type: integer - - type: string + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action involving - a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this - value overrides the value provided by the pod spec. - Value must be non-negative integer. The value zero indicates stop immediately via - the kill signal (no opportunity to shut down). - This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. - Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. - format: int64 - type: integer - timeoutSeconds: + restartPolicy: + description: |- + RestartPolicy defines the restart behavior of individual containers in a pod. + This field may only be set for init containers, and the only allowed value is "Always". + For non-init containers or when this field is not specified, + the restart behavior is defined by the Pod's restart policy and the container type. + Setting the RestartPolicy as "Always" for the init container will have the following effect: + this init container will be continually restarted on + exit until all regular containers have terminated. Once all regular + containers have completed, all init containers with restartPolicy "Always" + will be shut down. This lifecycle differs from normal init containers and + is often referred to as a "sidecar" container. Although this init + container still starts in the init container sequence, it does not wait + for the container to complete before proceeding to the next init + container. Instead, the next init container starts immediately after this + init container is started, or after any startupProbe has successfully + completed. + type: string + securityContext: description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - name: - description: |- - Name of the container specified as a DNS_LABEL. - Each container in a pod must have a unique name (DNS_LABEL). - Cannot be updated. - type: string - ports: - description: |- - List of ports to expose from the container. Not specifying a port here - DOES NOT prevent that port from being exposed. Any port which is - listening on the default "0.0.0.0" address inside a container will be - accessible from the network. - Modifying this array with strategic merge patch may corrupt the data. - For more information See https://github.com/kubernetes/kubernetes/issues/108255. - Cannot be updated. - items: - description: ContainerPort represents a network port - in a single container. - properties: - containerPort: - description: |- - Number of port to expose on the pod's IP address. - This must be a valid port number, 0 < x < 65536. - format: int32 - type: integer - hostIP: - description: What host IP to bind the external - port to. - type: string - hostPort: - description: |- - Number of port to expose on the host. - If specified, this must be a valid port number, 0 < x < 65536. - If HostNetwork is specified, this must match ContainerPort. - Most containers do not need this. - format: int32 - type: integer - name: - description: |- - If specified, this must be an IANA_SVC_NAME and unique within the pod. Each - named port in a pod must have a unique name. Name for the port that can be - referred to by services. - type: string - protocol: - default: TCP - description: |- - Protocol for port. Must be UDP, TCP, or SCTP. - Defaults to "TCP". - type: string - required: - - containerPort - type: object - type: array - x-kubernetes-list-map-keys: - - containerPort - - protocol - x-kubernetes-list-type: map - readinessProbe: - description: |- - Periodic probe of container service readiness. - Container will be removed from service endpoints if the probe fails. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - properties: - exec: - description: Exec specifies the action to take. + SecurityContext defines the security options the container should be run with. + If set, the fields of SecurityContext override the equivalent fields of PodSecurityContext. + More info: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/ properties: - command: + allowPrivilegeEscalation: description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: + AllowPrivilegeEscalation controls whether a process can gain more + privileges than its parent process. This bool directly controls if + the no_new_privs flag will be set on the container process. + AllowPrivilegeEscalation is true always when the container is: + 1) run as Privileged + 2) has CAP_SYS_ADMIN + Note that this field cannot be set when spec.os.name is windows. + type: boolean + appArmorProfile: description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: + appArmorProfile is the AppArmor options to use by this container. If set, this profile + overrides the pod's appArmorProfile. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile loaded on the node that should be used. + The profile must be preconfigured on the node to work. + Must match the loaded name of the profile. + Must be set if and only if type is "Localhost". + type: string + type: + description: |- + type indicates which kind of AppArmor profile will be applied. + Valid options are: + Localhost - a profile pre-loaded on the node. + RuntimeDefault - the container runtime's default profile. + Unconfined - no AppArmor enforcement. + type: string + required: + - type + type: object + capabilities: description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in the request. - HTTP allows repeated headers. - items: - description: HTTPHeader describes a custom - header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. + The capabilities to add/drop when running containers. + Defaults to the default set of capabilities granted by the container runtime. + Note that this field cannot be set when spec.os.name is windows. + properties: + add: + description: Added capabilities + items: + description: Capability represent POSIX + capabilities type type: string - value: - description: The header field value + type: array + x-kubernetes-list-type: atomic + drop: + description: Removed capabilities + items: + description: Capability represent POSIX + capabilities type type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action involving - a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this - value overrides the value provided by the pod spec. - Value must be non-negative integer. The value zero indicates stop immediately via - the kill signal (no opportunity to shut down). - This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. - Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. - format: int64 - type: integer - timeoutSeconds: - description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - resizePolicy: - description: Resources resize policy for the container. - items: - description: ContainerResizePolicy represents resource - resize policy for the container. - properties: - resourceName: - description: |- - Name of the resource to which this resource resize policy applies. - Supported values: cpu, memory. - type: string - restartPolicy: - description: |- - Restart policy to apply when specified resource is resized. - If not specified, it defaults to NotRequired. - type: string - required: - - resourceName - - restartPolicy - type: object - type: array - x-kubernetes-list-type: atomic - resources: - description: |- - Compute Resources required by this container. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - properties: - claims: - description: |- - Claims lists the names of resources, defined in spec.resourceClaims, - that are used by this container. - - - This is an alpha field and requires enabling the - DynamicResourceAllocation feature gate. - - - This field is immutable. It can only be set for containers. - items: - description: ResourceClaim references one entry - in PodSpec.ResourceClaims. - properties: - name: - description: |- - Name must match the name of one entry in pod.spec.resourceClaims of - the Pod where this field is used. It makes that resource available - inside a container. - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - limits: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Limits describes the maximum amount of compute resources allowed. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - type: object - requests: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Requests describes the minimum amount of compute resources required. - If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, - otherwise to an implementation-defined value. Requests cannot exceed Limits. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - type: object - type: object - restartPolicy: - description: |- - RestartPolicy defines the restart behavior of individual containers in a pod. - This field may only be set for init containers, and the only allowed value is "Always". - For non-init containers or when this field is not specified, - the restart behavior is defined by the Pod's restart policy and the container type. - Setting the RestartPolicy as "Always" for the init container will have the following effect: - this init container will be continually restarted on - exit until all regular containers have terminated. Once all regular - containers have completed, all init containers with restartPolicy "Always" - will be shut down. This lifecycle differs from normal init containers and - is often referred to as a "sidecar" container. Although this init - container still starts in the init container sequence, it does not wait - for the container to complete before proceeding to the next init - container. Instead, the next init container starts immediately after this - init container is started, or after any startupProbe has successfully - completed. - type: string - securityContext: - description: |- - SecurityContext defines the security options the container should be run with. - If set, the fields of SecurityContext override the equivalent fields of PodSecurityContext. - More info: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/ - properties: - allowPrivilegeEscalation: - description: |- - AllowPrivilegeEscalation controls whether a process can gain more - privileges than its parent process. This bool directly controls if - the no_new_privs flag will be set on the container process. - AllowPrivilegeEscalation is true always when the container is: - 1) run as Privileged - 2) has CAP_SYS_ADMIN - Note that this field cannot be set when spec.os.name is windows. - type: boolean - appArmorProfile: - description: |- - appArmorProfile is the AppArmor options to use by this container. If set, this profile - overrides the pod's appArmorProfile. - Note that this field cannot be set when spec.os.name is windows. - properties: - localhostProfile: - description: |- - localhostProfile indicates a profile loaded on the node that should be used. - The profile must be preconfigured on the node to work. - Must match the loaded name of the profile. - Must be set if and only if type is "Localhost". - type: string - type: + type: array + x-kubernetes-list-type: atomic + type: object + privileged: description: |- - type indicates which kind of AppArmor profile will be applied. - Valid options are: - Localhost - a profile pre-loaded on the node. - RuntimeDefault - the container runtime's default profile. - Unconfined - no AppArmor enforcement. - type: string - required: - - type - type: object - capabilities: - description: |- - The capabilities to add/drop when running containers. - Defaults to the default set of capabilities granted by the container runtime. - Note that this field cannot be set when spec.os.name is windows. - properties: - add: - description: Added capabilities - items: - description: Capability represent POSIX capabilities - type - type: string - type: array - x-kubernetes-list-type: atomic - drop: - description: Removed capabilities - items: - description: Capability represent POSIX capabilities - type - type: string - type: array - x-kubernetes-list-type: atomic - type: object - privileged: - description: |- - Run container in privileged mode. - Processes in privileged containers are essentially equivalent to root on the host. - Defaults to false. - Note that this field cannot be set when spec.os.name is windows. - type: boolean - procMount: - description: |- - procMount denotes the type of proc mount to use for the containers. - The default is DefaultProcMount which uses the container runtime defaults for - readonly paths and masked paths. - This requires the ProcMountType feature flag to be enabled. - Note that this field cannot be set when spec.os.name is windows. - type: string - readOnlyRootFilesystem: - description: |- - Whether this container has a read-only root filesystem. - Default is false. - Note that this field cannot be set when spec.os.name is windows. - type: boolean - runAsGroup: - description: |- - The GID to run the entrypoint of the container process. - Uses runtime default if unset. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - format: int64 - type: integer - runAsNonRoot: - description: |- - Indicates that the container must run as a non-root user. - If true, the Kubelet will validate the image at runtime to ensure that it - does not run as UID 0 (root) and fail to start the container if it does. - If unset or false, no such validation will be performed. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - type: boolean - runAsUser: - description: |- - The UID to run the entrypoint of the container process. - Defaults to user specified in image metadata if unspecified. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - format: int64 - type: integer - seLinuxOptions: - description: |- - The SELinux context to be applied to the container. - If unspecified, the container runtime will allocate a random SELinux context for each - container. May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - properties: - level: - description: Level is SELinux level label that - applies to the container. - type: string - role: - description: Role is a SELinux role label that - applies to the container. - type: string - type: - description: Type is a SELinux type label that - applies to the container. - type: string - user: - description: User is a SELinux user label that - applies to the container. - type: string - type: object - seccompProfile: - description: |- - The seccomp options to use by this container. If seccomp options are - provided at both the pod & container level, the container options - override the pod options. - Note that this field cannot be set when spec.os.name is windows. - properties: - localhostProfile: + Run container in privileged mode. + Processes in privileged containers are essentially equivalent to root on the host. + Defaults to false. + Note that this field cannot be set when spec.os.name is windows. + type: boolean + procMount: description: |- - localhostProfile indicates a profile defined in a file on the node should be used. - The profile must be preconfigured on the node to work. - Must be a descending path, relative to the kubelet's configured seccomp profile location. - Must be set if type is "Localhost". Must NOT be set for any other type. + procMount denotes the type of proc mount to use for the containers. + The default is DefaultProcMount which uses the container runtime defaults for + readonly paths and masked paths. + This requires the ProcMountType feature flag to be enabled. + Note that this field cannot be set when spec.os.name is windows. type: string - type: + readOnlyRootFilesystem: description: |- - type indicates which kind of seccomp profile will be applied. - Valid options are: - - - Localhost - a profile defined in a file on the node should be used. - RuntimeDefault - the container runtime default profile should be used. - Unconfined - no profile should be applied. - type: string - required: - - type - type: object - windowsOptions: - description: |- - The Windows specific settings applied to all containers. - If unspecified, the options from the PodSecurityContext will be used. - If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is linux. - properties: - gmsaCredentialSpec: + Whether this container has a read-only root filesystem. + Default is false. + Note that this field cannot be set when spec.os.name is windows. + type: boolean + runAsGroup: description: |- - GMSACredentialSpec is where the GMSA admission webhook - (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the - GMSA credential spec named by the GMSACredentialSpecName field. - type: string - gmsaCredentialSpecName: - description: GMSACredentialSpecName is the name - of the GMSA credential spec to use. - type: string - hostProcess: + The GID to run the entrypoint of the container process. + Uses runtime default if unset. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + runAsNonRoot: description: |- - HostProcess determines if a container should be run as a 'Host Process' container. - All of a Pod's containers must have the same effective HostProcess value - (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). - In addition, if HostProcess is true then HostNetwork must also be set to true. + Indicates that the container must run as a non-root user. + If true, the Kubelet will validate the image at runtime to ensure that it + does not run as UID 0 (root) and fail to start the container if it does. + If unset or false, no such validation will be performed. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. type: boolean - runAsUserName: + runAsUser: description: |- - The UserName in Windows to run the entrypoint of the container process. - Defaults to the user specified in image metadata if unspecified. - May also be set in PodSecurityContext. If set in both SecurityContext and + The UID to run the entrypoint of the container process. + Defaults to user specified in image metadata if unspecified. + May also be set in PodSecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. - type: string - type: object - type: object - startupProbe: - description: |- - StartupProbe indicates that the Pod has successfully initialized. - If specified, no other probes are executed until this completes successfully. - If this probe fails, the Pod will be restarted, just as if the livenessProbe failed. - This can be used to provide different probe parameters at the beginning of a Pod's lifecycle, - when it might take a long time to load data or warm a cache, than during steady-state operation. - This cannot be updated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - properties: - exec: - description: Exec specifies the action to take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 + Note that this field cannot be set when spec.os.name is windows. + format: int64 type: integer - service: + seLinuxOptions: + description: |- + The SELinux context to be applied to the container. + If unspecified, the container runtime will allocate a random SELinux context for each + container. May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is windows. + properties: + level: + description: Level is SELinux level label + that applies to the container. + type: string + role: + description: Role is a SELinux role label + that applies to the container. + type: string + type: + description: Type is a SELinux type label + that applies to the container. + type: string + user: + description: User is a SELinux user label + that applies to the container. + type: string + type: object + seccompProfile: description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + The seccomp options to use by this container. If seccomp options are + provided at both the pod & container level, the container options + override the pod options. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile defined in a file on the node should be used. + The profile must be preconfigured on the node to work. + Must be a descending path, relative to the kubelet's configured seccomp profile location. + Must be set if type is "Localhost". Must NOT be set for any other type. + type: string + type: + description: |- + type indicates which kind of seccomp profile will be applied. + Valid options are: - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in the request. - HTTP allows repeated headers. - items: - description: HTTPHeader describes a custom - header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: + Localhost - a profile defined in a file on the node should be used. + RuntimeDefault - the container runtime default profile should be used. + Unconfined - no profile should be applied. + type: string + required: + - type + type: object + windowsOptions: description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port + The Windows specific settings applied to all containers. + If unspecified, the options from the PodSecurityContext will be used. + If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is linux. + properties: + gmsaCredentialSpec: + description: |- + GMSACredentialSpec is where the GMSA admission webhook + (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the + GMSA credential spec named by the GMSACredentialSpecName field. + type: string + gmsaCredentialSpecName: + description: GMSACredentialSpecName is the + name of the GMSA credential spec to use. + type: string + hostProcess: + description: |- + HostProcess determines if a container should be run as a 'Host Process' container. + All of a Pod's containers must have the same effective HostProcess value + (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). + In addition, if HostProcess is true then HostNetwork must also be set to true. + type: boolean + runAsUserName: + description: |- + The UserName in Windows to run the entrypoint of the container process. + Defaults to the user specified in image metadata if unspecified. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: string + type: object type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. + startupProbe: + description: |- + StartupProbe indicates that the Pod has successfully initialized. + If specified, no other probes are executed until this completes successfully. + If this probe fails, the Pod will be restarted, just as if the livenessProbe failed. + This can be used to provide different probe parameters at the beginning of a Pod's lifecycle, + when it might take a long time to load data or warm a cache, than during steady-state operation. + This cannot be updated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action involving - a TCP port. properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string + exec: + description: Exec specifies the action to take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port + Minimum consecutive failures for the probe to be considered failed after having succeeded. + Defaults to 3. Minimum value is 1. + format: int32 + type: integer + grpc: + description: GRPC specifies an action involving + a GRPC port. + properties: + port: + description: Port number of the gRPC service. + Number must be in the range 1 to 65535. + format: int32 + type: integer + service: + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object + httpGet: + description: HTTPGet specifies the http request + to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in the + request. HTTP allows repeated headers. + items: + description: HTTPHeader describes a custom + header to be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP + server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: |- + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: + description: |- + How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: |- + Minimum consecutive successes for the probe to be considered successful after having failed. + Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocket specifies an action involving + a TCP port. + properties: + host: + description: 'Optional: Host name to connect + to, defaults to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: |- + Optional duration in seconds the pod needs to terminate gracefully upon probe failure. + The grace period is the duration in seconds after the processes running in the pod are sent + a termination signal and the time when the processes are forcibly halted with a kill signal. + Set this value longer than the expected cleanup time for your process. + If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this + value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates stop immediately via + the kill signal (no opportunity to shut down). + This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. + Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: + description: |- + Number of seconds after which the probe times out. + Defaults to 1 second. Minimum value is 1. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer type: object - terminationGracePeriodSeconds: + stdin: description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this - value overrides the value provided by the pod spec. - Value must be non-negative integer. The value zero indicates stop immediately via - the kill signal (no opportunity to shut down). - This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. - Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. - format: int64 - type: integer - timeoutSeconds: + Whether this container should allocate a buffer for stdin in the container runtime. If this + is not set, reads from stdin in the container will always result in EOF. + Default is false. + type: boolean + stdinOnce: + description: |- + Whether the container runtime should close the stdin channel after it has been opened by + a single attach. When stdin is true the stdin stream will remain open across multiple attach + sessions. If stdinOnce is set to true, stdin is opened on container start, is empty until the + first client attaches to stdin, and then remains open and accepts data until the client disconnects, + at which time stdin is closed and remains closed until the container is restarted. If this + flag is false, a container processes that reads from stdin will never receive an EOF. + Default is false + type: boolean + terminationMessagePath: + description: |- + Optional: Path at which the file to which the container's termination message + will be written is mounted into the container's filesystem. + Message written is intended to be brief final status, such as an assertion failure message. + Will be truncated by the node if greater than 4096 bytes. The total message length across + all containers will be limited to 12kb. + Defaults to /dev/termination-log. + Cannot be updated. + type: string + terminationMessagePolicy: + description: |- + Indicate how the termination message should be populated. File will use the contents of + terminationMessagePath to populate the container status message on both success and failure. + FallbackToLogsOnError will use the last chunk of container log output if the termination + message file is empty and the container exited with an error. + The log output is limited to 2048 bytes or 80 lines, whichever is smaller. + Defaults to File. + Cannot be updated. + type: string + tty: description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - stdin: - description: |- - Whether this container should allocate a buffer for stdin in the container runtime. If this - is not set, reads from stdin in the container will always result in EOF. - Default is false. - type: boolean - stdinOnce: - description: |- - Whether the container runtime should close the stdin channel after it has been opened by - a single attach. When stdin is true the stdin stream will remain open across multiple attach - sessions. If stdinOnce is set to true, stdin is opened on container start, is empty until the - first client attaches to stdin, and then remains open and accepts data until the client disconnects, - at which time stdin is closed and remains closed until the container is restarted. If this - flag is false, a container processes that reads from stdin will never receive an EOF. - Default is false - type: boolean - terminationMessagePath: - description: |- - Optional: Path at which the file to which the container's termination message - will be written is mounted into the container's filesystem. - Message written is intended to be brief final status, such as an assertion failure message. - Will be truncated by the node if greater than 4096 bytes. The total message length across - all containers will be limited to 12kb. - Defaults to /dev/termination-log. - Cannot be updated. - type: string - terminationMessagePolicy: - description: |- - Indicate how the termination message should be populated. File will use the contents of - terminationMessagePath to populate the container status message on both success and failure. - FallbackToLogsOnError will use the last chunk of container log output if the termination - message file is empty and the container exited with an error. - The log output is limited to 2048 bytes or 80 lines, whichever is smaller. - Defaults to File. - Cannot be updated. - type: string - tty: - description: |- - Whether this container should allocate a TTY for itself, also requires 'stdin' to be true. - Default is false. - type: boolean - volumeDevices: - description: volumeDevices is the list of block devices - to be used by the container. - items: - description: volumeDevice describes a mapping of a - raw block device within a container. - properties: - devicePath: - description: devicePath is the path inside of - the container that the device will be mapped - to. - type: string - name: - description: name must match the name of a persistentVolumeClaim - in the pod - type: string - required: - - devicePath - - name - type: object - type: array - x-kubernetes-list-map-keys: - - devicePath - x-kubernetes-list-type: map - volumeMounts: - description: |- - Pod volumes to mount into the container's filesystem. - Cannot be updated. - items: - description: VolumeMount describes a mounting of a - Volume within a container. - properties: - mountPath: - description: |- - Path within the container at which the volume should be mounted. Must - not contain ':'. - type: string - mountPropagation: - description: |- - mountPropagation determines how mounts are propagated from the host - to container and the other way around. - When not set, MountPropagationNone is used. - This field is beta in 1.10. - When RecursiveReadOnly is set to IfPossible or to Enabled, MountPropagation must be None or unspecified - (which defaults to None). - type: string - name: - description: This must match the Name of a Volume. - type: string - readOnly: - description: |- - Mounted read-only if true, read-write otherwise (false or unspecified). - Defaults to false. - type: boolean - recursiveReadOnly: - description: |- - RecursiveReadOnly specifies whether read-only mounts should be handled - recursively. + Whether this container should allocate a TTY for itself, also requires 'stdin' to be true. + Default is false. + type: boolean + volumeDevices: + description: volumeDevices is the list of block + devices to be used by the container. + items: + description: volumeDevice describes a mapping + of a raw block device within a container. + properties: + devicePath: + description: devicePath is the path inside + of the container that the device will be + mapped to. + type: string + name: + description: name must match the name of a + persistentVolumeClaim in the pod + type: string + required: + - devicePath + - name + type: object + type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map + volumeMounts: + description: |- + Pod volumes to mount into the container's filesystem. + Cannot be updated. + items: + description: VolumeMount describes a mounting + of a Volume within a container. + properties: + mountPath: + description: |- + Path within the container at which the volume should be mounted. Must + not contain ':'. + type: string + mountPropagation: + description: |- + mountPropagation determines how mounts are propagated from the host + to container and the other way around. + When not set, MountPropagationNone is used. + This field is beta in 1.10. + When RecursiveReadOnly is set to IfPossible or to Enabled, MountPropagation must be None or unspecified + (which defaults to None). + type: string + name: + description: This must match the Name of a + Volume. + type: string + readOnly: + description: |- + Mounted read-only if true, read-write otherwise (false or unspecified). + Defaults to false. + type: boolean + recursiveReadOnly: + description: |- + RecursiveReadOnly specifies whether read-only mounts should be handled + recursively. - If ReadOnly is false, this field has no meaning and must be unspecified. + If ReadOnly is false, this field has no meaning and must be unspecified. - If ReadOnly is true, and this field is set to Disabled, the mount is not made - recursively read-only. If this field is set to IfPossible, the mount is made - recursively read-only, if it is supported by the container runtime. If this - field is set to Enabled, the mount is made recursively read-only if it is - supported by the container runtime, otherwise the pod will not be started and - an error will be generated to indicate the reason. + If ReadOnly is true, and this field is set to Disabled, the mount is not made + recursively read-only. If this field is set to IfPossible, the mount is made + recursively read-only, if it is supported by the container runtime. If this + field is set to Enabled, the mount is made recursively read-only if it is + supported by the container runtime, otherwise the pod will not be started and + an error will be generated to indicate the reason. - If this field is set to IfPossible or Enabled, MountPropagation must be set to - None (or be unspecified, which defaults to None). + If this field is set to IfPossible or Enabled, MountPropagation must be set to + None (or be unspecified, which defaults to None). - If this field is not specified, it is treated as an equivalent of Disabled. - type: string - subPath: - description: |- - Path within the volume from which the container's volume should be mounted. - Defaults to "" (volume's root). - type: string - subPathExpr: - description: |- - Expanded path within the volume from which the container's volume should be mounted. - Behaves similarly to SubPath but environment variable references $(VAR_NAME) are expanded using the container's environment. - Defaults to "" (volume's root). - SubPathExpr and SubPath are mutually exclusive. - type: string - required: - - mountPath - - name - type: object - type: array - x-kubernetes-list-map-keys: - - mountPath - x-kubernetes-list-type: map - workingDir: - description: |- - Container's working directory. - If not specified, the container runtime's default will be used, which - might be configured in the container image. - Cannot be updated. - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - dnsConfig: - description: |- - Specifies the DNS parameters of a pod. - Parameters specified here will be merged to the generated DNS - configuration based on DNSPolicy. - properties: - nameservers: - description: |- - A list of DNS name server IP addresses. - This will be appended to the base nameservers generated from DNSPolicy. - Duplicated nameservers will be removed. - items: - type: string + If this field is not specified, it is treated as an equivalent of Disabled. + type: string + subPath: + description: |- + Path within the volume from which the container's volume should be mounted. + Defaults to "" (volume's root). + type: string + subPathExpr: + description: |- + Expanded path within the volume from which the container's volume should be mounted. + Behaves similarly to SubPath but environment variable references $(VAR_NAME) are expanded using the container's environment. + Defaults to "" (volume's root). + SubPathExpr and SubPath are mutually exclusive. + type: string + required: + - mountPath + - name + type: object + type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map + workingDir: + description: |- + Container's working directory. + If not specified, the container runtime's default will be used, which + might be configured in the container image. + Cannot be updated. + type: string + required: + - name + type: object type: array - x-kubernetes-list-type: atomic - options: + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + dnsConfig: description: |- - A list of DNS resolver options. - This will be merged with the base options generated from DNSPolicy. - Duplicated entries will be removed. Resolution options given in Options - will override those that appear in the base DNSPolicy. - items: - description: PodDNSConfigOption defines DNS resolver - options of a pod. - properties: + Specifies the DNS parameters of a pod. + Parameters specified here will be merged to the generated DNS + configuration based on DNSPolicy. + properties: + nameservers: + description: |- + A list of DNS name server IP addresses. + This will be appended to the base nameservers generated from DNSPolicy. + Duplicated nameservers will be removed. + items: + type: string + type: array + x-kubernetes-list-type: atomic + options: + description: |- + A list of DNS resolver options. + This will be merged with the base options generated from DNSPolicy. + Duplicated entries will be removed. Resolution options given in Options + will override those that appear in the base DNSPolicy. + items: + description: PodDNSConfigOption defines DNS resolver + options of a pod. + properties: + name: + description: Required. + type: string + value: + type: string + type: object + type: array + x-kubernetes-list-type: atomic + searches: + description: |- + A list of DNS search domains for host-name lookup. + This will be appended to the base search paths generated from DNSPolicy. + Duplicated search paths will be removed. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + dnsPolicy: + description: |- + Set DNS policy for the pod. + Defaults to "ClusterFirst". + Valid values are 'ClusterFirstWithHostNet', 'ClusterFirst', 'Default' or 'None'. + DNS parameters given in DNSConfig will be merged with the policy selected with DNSPolicy. + To have DNS options set along with hostNetwork, you have to specify DNS policy + explicitly to 'ClusterFirstWithHostNet'. + type: string + enableServiceLinks: + description: |- + EnableServiceLinks indicates whether information about services should be injected into pod's + environment variables, matching the syntax of Docker links. + Optional: Defaults to true. + type: boolean + ephemeralContainers: + description: |- + List of ephemeral containers run in this pod. Ephemeral containers may be run in an existing + pod to perform user-initiated actions such as debugging. This list cannot be specified when + creating a pod, and it cannot be modified by updating the pod spec. In order to add an + ephemeral container to an existing pod, use the pod's ephemeralcontainers subresource. + items: + description: |- + An EphemeralContainer is a temporary container that you may add to an existing Pod for + user-initiated activities such as debugging. Ephemeral containers have no resource or + scheduling guarantees, and they will not be restarted when they exit or when a Pod is + removed or restarted. The kubelet may evict a Pod if an ephemeral container causes the + Pod to exceed its resource allocation. + + + To add an ephemeral container, use the ephemeralcontainers subresource of an existing + Pod. Ephemeral containers may not be removed or restarted. + properties: + args: + description: |- + Arguments to the entrypoint. + The image's CMD is used if this is not provided. + Variable references $(VAR_NAME) are expanded using the container's environment. If a variable + cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced + to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will + produce the string literal "$(VAR_NAME)". Escaped references will never be expanded, regardless + of whether the variable exists or not. Cannot be updated. + More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell + items: + type: string + type: array + x-kubernetes-list-type: atomic + command: + description: |- + Entrypoint array. Not executed within a shell. + The image's ENTRYPOINT is used if this is not provided. + Variable references $(VAR_NAME) are expanded using the container's environment. If a variable + cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced + to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will + produce the string literal "$(VAR_NAME)". Escaped references will never be expanded, regardless + of whether the variable exists or not. Cannot be updated. + More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell + items: + type: string + type: array + x-kubernetes-list-type: atomic + env: + description: |- + List of environment variables to set in the container. + Cannot be updated. + items: + description: EnvVar represents an environment + variable present in a Container. + properties: + name: + description: Name of the environment variable. + Must be a C_IDENTIFIER. + type: string + value: + description: |- + Variable references $(VAR_NAME) are expanded + using the previously defined environment variables in the container and + any service environment variables. If a variable cannot be resolved, + the reference in the input string will be unchanged. Double $$ are reduced + to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. + "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". + Escaped references will never be expanded, regardless of whether the variable + exists or not. + Defaults to "". + type: string + valueFrom: + description: Source for the environment variable's + value. Cannot be used if value is not empty. + properties: + configMapKeyRef: + description: Selects a key of a ConfigMap. + properties: + key: + description: The key to select. + type: string + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + TODO: Add other useful fields. apiVersion, kind, uid? + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. + type: string + optional: + description: Specify whether the ConfigMap + or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + fieldRef: + description: |- + Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['']`, `metadata.annotations['']`, + spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs. + properties: + apiVersion: + description: Version of the schema + the FieldPath is written in terms + of, defaults to "v1". + type: string + fieldPath: + description: Path of the field to + select in the specified API version. + type: string + required: + - fieldPath + type: object + x-kubernetes-map-type: atomic + resourceFieldRef: + description: |- + Selects a resource of the container: only resources limits and requests + (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported. + properties: + containerName: + description: 'Container name: required + for volumes, optional for env vars' + type: string + divisor: + anyOf: + - type: integer + - type: string + description: Specifies the output + format of the exposed resources, + defaults to "1" + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + resource: + description: 'Required: resource to + select' + type: string + required: + - resource + type: object + x-kubernetes-map-type: atomic + secretKeyRef: + description: Selects a key of a secret + in the pod's namespace + properties: + key: + description: The key of the secret + to select from. Must be a valid + secret key. + type: string + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + TODO: Add other useful fields. apiVersion, kind, uid? + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. + type: string + optional: + description: Specify whether the Secret + or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + envFrom: + description: |- + List of sources to populate environment variables in the container. + The keys defined within a source must be a C_IDENTIFIER. All invalid keys + will be reported as an event when the container is starting. When a key exists in multiple + sources, the value associated with the last source will take precedence. + Values defined by an Env with a duplicate key will take precedence. + Cannot be updated. + items: + description: EnvFromSource represents the source + of a set of ConfigMaps + properties: + configMapRef: + description: The ConfigMap to select from + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + TODO: Add other useful fields. apiVersion, kind, uid? + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. + type: string + optional: + description: Specify whether the ConfigMap + must be defined + type: boolean + type: object + x-kubernetes-map-type: atomic + prefix: + description: An optional identifier to prepend + to each key in the ConfigMap. Must be a + C_IDENTIFIER. + type: string + secretRef: + description: The Secret to select from + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + TODO: Add other useful fields. apiVersion, kind, uid? + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. + type: string + optional: + description: Specify whether the Secret + must be defined + type: boolean + type: object + x-kubernetes-map-type: atomic + type: object + type: array + x-kubernetes-list-type: atomic + image: + description: |- + Container image name. + More info: https://kubernetes.io/docs/concepts/containers/images + type: string + imagePullPolicy: + description: |- + Image pull policy. + One of Always, Never, IfNotPresent. + Defaults to Always if :latest tag is specified, or IfNotPresent otherwise. + Cannot be updated. + More info: https://kubernetes.io/docs/concepts/containers/images#updating-images + type: string + lifecycle: + description: Lifecycle is not allowed for ephemeral + containers. + properties: + postStart: + description: |- + PostStart is called immediately after a container is created. If the handler fails, + the container is terminated and restarted according to its restart policy. + Other management of the container blocks until the hook completes. + More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks + properties: + exec: + description: Exec specifies the action to + take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + httpGet: + description: HTTPGet specifies the http + request to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in + the request. HTTP allows repeated + headers. + items: + description: HTTPHeader describes + a custom header to be used in HTTP + probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field + value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP + server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + sleep: + description: Sleep represents the duration + that the container should sleep before + being terminated. + properties: + seconds: + description: Seconds is the number of + seconds to sleep. + format: int64 + type: integer + required: + - seconds + type: object + tcpSocket: + description: |- + Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept + for the backward compatibility. There are no validation of this field and + lifecycle hooks will fail in runtime when tcp handler is specified. + properties: + host: + description: 'Optional: Host name to + connect to, defaults to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + type: object + preStop: + description: |- + PreStop is called immediately before a container is terminated due to an + API request or management event such as liveness/startup probe failure, + preemption, resource contention, etc. The handler is not called if the + container crashes or exits. The Pod's termination grace period countdown begins before the + PreStop hook is executed. Regardless of the outcome of the handler, the + container will eventually terminate within the Pod's termination grace + period (unless delayed by finalizers). Other management of the container blocks until the hook completes + or until the termination grace period is reached. + More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks + properties: + exec: + description: Exec specifies the action to + take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + httpGet: + description: HTTPGet specifies the http + request to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in + the request. HTTP allows repeated + headers. + items: + description: HTTPHeader describes + a custom header to be used in HTTP + probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field + value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP + server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + sleep: + description: Sleep represents the duration + that the container should sleep before + being terminated. + properties: + seconds: + description: Seconds is the number of + seconds to sleep. + format: int64 + type: integer + required: + - seconds + type: object + tcpSocket: + description: |- + Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept + for the backward compatibility. There are no validation of this field and + lifecycle hooks will fail in runtime when tcp handler is specified. + properties: + host: + description: 'Optional: Host name to + connect to, defaults to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + type: object + type: object + livenessProbe: + description: Probes are not allowed for ephemeral + containers. + properties: + exec: + description: Exec specifies the action to take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + description: |- + Minimum consecutive failures for the probe to be considered failed after having succeeded. + Defaults to 3. Minimum value is 1. + format: int32 + type: integer + grpc: + description: GRPC specifies an action involving + a GRPC port. + properties: + port: + description: Port number of the gRPC service. + Number must be in the range 1 to 65535. + format: int32 + type: integer + service: + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object + httpGet: + description: HTTPGet specifies the http request + to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in the + request. HTTP allows repeated headers. + items: + description: HTTPHeader describes a custom + header to be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP + server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: |- + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: + description: |- + How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: |- + Minimum consecutive successes for the probe to be considered successful after having failed. + Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocket specifies an action involving + a TCP port. + properties: + host: + description: 'Optional: Host name to connect + to, defaults to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: |- + Optional duration in seconds the pod needs to terminate gracefully upon probe failure. + The grace period is the duration in seconds after the processes running in the pod are sent + a termination signal and the time when the processes are forcibly halted with a kill signal. + Set this value longer than the expected cleanup time for your process. + If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this + value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates stop immediately via + the kill signal (no opportunity to shut down). + This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. + Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: + description: |- + Number of seconds after which the probe times out. + Defaults to 1 second. Minimum value is 1. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + type: object name: - description: Required. - type: string - value: + description: |- + Name of the ephemeral container specified as a DNS_LABEL. + This name must be unique among all containers, init containers and ephemeral containers. type: string - type: object - type: array - x-kubernetes-list-type: atomic - searches: - description: |- - A list of DNS search domains for host-name lookup. - This will be appended to the base search paths generated from DNSPolicy. - Duplicated search paths will be removed. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - dnsPolicy: - description: |- - Set DNS policy for the pod. - Defaults to "ClusterFirst". - Valid values are 'ClusterFirstWithHostNet', 'ClusterFirst', 'Default' or 'None'. - DNS parameters given in DNSConfig will be merged with the policy selected with DNSPolicy. - To have DNS options set along with hostNetwork, you have to specify DNS policy - explicitly to 'ClusterFirstWithHostNet'. - type: string - enableServiceLinks: - description: |- - EnableServiceLinks indicates whether information about services should be injected into pod's - environment variables, matching the syntax of Docker links. - Optional: Defaults to true. - type: boolean - ephemeralContainers: - description: |- - List of ephemeral containers run in this pod. Ephemeral containers may be run in an existing - pod to perform user-initiated actions such as debugging. This list cannot be specified when - creating a pod, and it cannot be modified by updating the pod spec. In order to add an - ephemeral container to an existing pod, use the pod's ephemeralcontainers subresource. - items: - description: |- - An EphemeralContainer is a temporary container that you may add to an existing Pod for - user-initiated activities such as debugging. Ephemeral containers have no resource or - scheduling guarantees, and they will not be restarted when they exit or when a Pod is - removed or restarted. The kubelet may evict a Pod if an ephemeral container causes the - Pod to exceed its resource allocation. - - - To add an ephemeral container, use the ephemeralcontainers subresource of an existing - Pod. Ephemeral containers may not be removed or restarted. - properties: - args: - description: |- - Arguments to the entrypoint. - The image's CMD is used if this is not provided. - Variable references $(VAR_NAME) are expanded using the container's environment. If a variable - cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will - produce the string literal "$(VAR_NAME)". Escaped references will never be expanded, regardless - of whether the variable exists or not. Cannot be updated. - More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell - items: - type: string - type: array - x-kubernetes-list-type: atomic - command: - description: |- - Entrypoint array. Not executed within a shell. - The image's ENTRYPOINT is used if this is not provided. - Variable references $(VAR_NAME) are expanded using the container's environment. If a variable - cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will - produce the string literal "$(VAR_NAME)". Escaped references will never be expanded, regardless - of whether the variable exists or not. Cannot be updated. - More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell - items: - type: string - type: array - x-kubernetes-list-type: atomic - env: - description: |- - List of environment variables to set in the container. - Cannot be updated. - items: - description: EnvVar represents an environment variable - present in a Container. - properties: - name: - description: Name of the environment variable. - Must be a C_IDENTIFIER. - type: string - value: - description: |- - Variable references $(VAR_NAME) are expanded - using the previously defined environment variables in the container and - any service environment variables. If a variable cannot be resolved, - the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. - "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". - Escaped references will never be expanded, regardless of whether the variable - exists or not. - Defaults to "". - type: string - valueFrom: - description: Source for the environment variable's - value. Cannot be used if value is not empty. + ports: + description: Ports are not allowed for ephemeral + containers. + items: + description: ContainerPort represents a network + port in a single container. properties: - configMapKeyRef: - description: Selects a key of a ConfigMap. - properties: - key: - description: The key to select. - type: string - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - TODO: Add other useful fields. apiVersion, kind, uid? - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. - type: string - optional: - description: Specify whether the ConfigMap - or its key must be defined - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - fieldRef: + containerPort: description: |- - Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['']`, `metadata.annotations['']`, - spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs. - properties: - apiVersion: - description: Version of the schema the - FieldPath is written in terms of, defaults - to "v1". - type: string - fieldPath: - description: Path of the field to select - in the specified API version. - type: string - required: - - fieldPath - type: object - x-kubernetes-map-type: atomic - resourceFieldRef: + Number of port to expose on the pod's IP address. + This must be a valid port number, 0 < x < 65536. + format: int32 + type: integer + hostIP: + description: What host IP to bind the external + port to. + type: string + hostPort: description: |- - Selects a resource of the container: only resources limits and requests - (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported. - properties: - containerName: - description: 'Container name: required - for volumes, optional for env vars' - type: string - divisor: - anyOf: - - type: integer - - type: string - description: Specifies the output format - of the exposed resources, defaults to - "1" - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - resource: - description: 'Required: resource to select' - type: string - required: - - resource - type: object - x-kubernetes-map-type: atomic - secretKeyRef: - description: Selects a key of a secret in - the pod's namespace - properties: - key: - description: The key of the secret to - select from. Must be a valid secret - key. - type: string - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - TODO: Add other useful fields. apiVersion, kind, uid? - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. - type: string - optional: - description: Specify whether the Secret - or its key must be defined - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - envFrom: - description: |- - List of sources to populate environment variables in the container. - The keys defined within a source must be a C_IDENTIFIER. All invalid keys - will be reported as an event when the container is starting. When a key exists in multiple - sources, the value associated with the last source will take precedence. - Values defined by an Env with a duplicate key will take precedence. - Cannot be updated. - items: - description: EnvFromSource represents the source of - a set of ConfigMaps - properties: - configMapRef: - description: The ConfigMap to select from - properties: + Number of port to expose on the host. + If specified, this must be a valid port number, 0 < x < 65536. + If HostNetwork is specified, this must match ContainerPort. + Most containers do not need this. + format: int32 + type: integer name: - default: "" description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - TODO: Add other useful fields. apiVersion, kind, uid? - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. + If specified, this must be an IANA_SVC_NAME and unique within the pod. Each + named port in a pod must have a unique name. Name for the port that can be + referred to by services. type: string - optional: - description: Specify whether the ConfigMap - must be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - prefix: - description: An optional identifier to prepend - to each key in the ConfigMap. Must be a C_IDENTIFIER. - type: string - secretRef: - description: The Secret to select from - properties: - name: - default: "" + protocol: + default: TCP description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - TODO: Add other useful fields. apiVersion, kind, uid? - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. + Protocol for port. Must be UDP, TCP, or SCTP. + Defaults to "TCP". type: string - optional: - description: Specify whether the Secret must - be defined - type: boolean + required: + - containerPort type: object - x-kubernetes-map-type: atomic - type: object - type: array - x-kubernetes-list-type: atomic - image: - description: |- - Container image name. - More info: https://kubernetes.io/docs/concepts/containers/images - type: string - imagePullPolicy: - description: |- - Image pull policy. - One of Always, Never, IfNotPresent. - Defaults to Always if :latest tag is specified, or IfNotPresent otherwise. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/containers/images#updating-images - type: string - lifecycle: - description: Lifecycle is not allowed for ephemeral - containers. - properties: - postStart: - description: |- - PostStart is called immediately after a container is created. If the handler fails, - the container is terminated and restarted according to its restart policy. - Other management of the container blocks until the hook completes. - More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks + type: array + x-kubernetes-list-map-keys: + - containerPort + - protocol + x-kubernetes-list-type: map + readinessProbe: + description: Probes are not allowed for ephemeral + containers. properties: exec: description: Exec specifies the action to take. @@ -2810,6 +3256,32 @@ spec: type: array x-kubernetes-list-type: atomic type: object + failureThreshold: + description: |- + Minimum consecutive failures for the probe to be considered failed after having succeeded. + Defaults to 3. Minimum value is 1. + format: int32 + type: integer + grpc: + description: GRPC specifies an action involving + a GRPC port. + properties: + port: + description: Port number of the gRPC service. + Number must be in the range 1 to 65535. + format: int32 + type: integer + service: + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object httpGet: description: HTTPGet specifies the http request to perform. @@ -2849,64 +3321,363 @@ spec: - type: integer - type: string description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: |- + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: + description: |- + How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: |- + Minimum consecutive successes for the probe to be considered successful after having failed. + Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocket specifies an action involving + a TCP port. + properties: + host: + description: 'Optional: Host name to connect + to, defaults to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: |- + Optional duration in seconds the pod needs to terminate gracefully upon probe failure. + The grace period is the duration in seconds after the processes running in the pod are sent + a termination signal and the time when the processes are forcibly halted with a kill signal. + Set this value longer than the expected cleanup time for your process. + If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this + value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates stop immediately via + the kill signal (no opportunity to shut down). + This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. + Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: + description: |- + Number of seconds after which the probe times out. + Defaults to 1 second. Minimum value is 1. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + type: object + resizePolicy: + description: Resources resize policy for the container. + items: + description: ContainerResizePolicy represents + resource resize policy for the container. + properties: + resourceName: + description: |- + Name of the resource to which this resource resize policy applies. + Supported values: cpu, memory. + type: string + restartPolicy: + description: |- + Restart policy to apply when specified resource is resized. + If not specified, it defaults to NotRequired. + type: string + required: + - resourceName + - restartPolicy + type: object + type: array + x-kubernetes-list-type: atomic + resources: + description: |- + Resources are not allowed for ephemeral containers. Ephemeral containers use spare resources + already allocated to the pod. + properties: + claims: + description: |- + Claims lists the names of resources, defined in spec.resourceClaims, + that are used by this container. + + + This is an alpha field and requires enabling the + DynamicResourceAllocation feature gate. + + + This field is immutable. It can only be set for containers. + items: + description: ResourceClaim references one + entry in PodSpec.ResourceClaims. + properties: + name: + description: |- + Name must match the name of one entry in pod.spec.resourceClaims of + the Pod where this field is used. It makes that resource available + inside a container. + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + type: object + restartPolicy: + description: |- + Restart policy for the container to manage the restart behavior of each + container within a pod. + This may only be set for init containers. You cannot set this field on + ephemeral containers. + type: string + securityContext: + description: |- + Optional: SecurityContext defines the security options the ephemeral container should be run with. + If set, the fields of SecurityContext override the equivalent fields of PodSecurityContext. + properties: + allowPrivilegeEscalation: + description: |- + AllowPrivilegeEscalation controls whether a process can gain more + privileges than its parent process. This bool directly controls if + the no_new_privs flag will be set on the container process. + AllowPrivilegeEscalation is true always when the container is: + 1) run as Privileged + 2) has CAP_SYS_ADMIN + Note that this field cannot be set when spec.os.name is windows. + type: boolean + appArmorProfile: + description: |- + appArmorProfile is the AppArmor options to use by this container. If set, this profile + overrides the pod's appArmorProfile. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile loaded on the node that should be used. + The profile must be preconfigured on the node to work. + Must match the loaded name of the profile. + Must be set if and only if type is "Localhost". + type: string + type: description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. + type indicates which kind of AppArmor profile will be applied. + Valid options are: + Localhost - a profile pre-loaded on the node. + RuntimeDefault - the container runtime's default profile. + Unconfined - no AppArmor enforcement. type: string required: - - port + - type type: object - sleep: - description: Sleep represents the duration that - the container should sleep before being terminated. + capabilities: + description: |- + The capabilities to add/drop when running containers. + Defaults to the default set of capabilities granted by the container runtime. + Note that this field cannot be set when spec.os.name is windows. properties: - seconds: - description: Seconds is the number of seconds - to sleep. - format: int64 - type: integer - required: - - seconds + add: + description: Added capabilities + items: + description: Capability represent POSIX + capabilities type + type: string + type: array + x-kubernetes-list-type: atomic + drop: + description: Removed capabilities + items: + description: Capability represent POSIX + capabilities type + type: string + type: array + x-kubernetes-list-type: atomic type: object - tcpSocket: + privileged: + description: |- + Run container in privileged mode. + Processes in privileged containers are essentially equivalent to root on the host. + Defaults to false. + Note that this field cannot be set when spec.os.name is windows. + type: boolean + procMount: description: |- - Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept - for the backward compatibility. There are no validation of this field and - lifecycle hooks will fail in runtime when tcp handler is specified. + procMount denotes the type of proc mount to use for the containers. + The default is DefaultProcMount which uses the container runtime defaults for + readonly paths and masked paths. + This requires the ProcMountType feature flag to be enabled. + Note that this field cannot be set when spec.os.name is windows. + type: string + readOnlyRootFilesystem: + description: |- + Whether this container has a read-only root filesystem. + Default is false. + Note that this field cannot be set when spec.os.name is windows. + type: boolean + runAsGroup: + description: |- + The GID to run the entrypoint of the container process. + Uses runtime default if unset. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + runAsNonRoot: + description: |- + Indicates that the container must run as a non-root user. + If true, the Kubelet will validate the image at runtime to ensure that it + does not run as UID 0 (root) and fail to start the container if it does. + If unset or false, no such validation will be performed. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: boolean + runAsUser: + description: |- + The UID to run the entrypoint of the container process. + Defaults to user specified in image metadata if unspecified. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + seLinuxOptions: + description: |- + The SELinux context to be applied to the container. + If unspecified, the container runtime will allocate a random SELinux context for each + container. May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is windows. properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' + level: + description: Level is SELinux level label + that applies to the container. type: string - port: - anyOf: - - type: integer - - type: string + role: + description: Role is a SELinux role label + that applies to the container. + type: string + type: + description: Type is a SELinux type label + that applies to the container. + type: string + user: + description: User is a SELinux user label + that applies to the container. + type: string + type: object + seccompProfile: + description: |- + The seccomp options to use by this container. If seccomp options are + provided at both the pod & container level, the container options + override the pod options. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true + localhostProfile indicates a profile defined in a file on the node should be used. + The profile must be preconfigured on the node to work. + Must be a descending path, relative to the kubelet's configured seccomp profile location. + Must be set if type is "Localhost". Must NOT be set for any other type. + type: string + type: + description: |- + type indicates which kind of seccomp profile will be applied. + Valid options are: + + + Localhost - a profile defined in a file on the node should be used. + RuntimeDefault - the container runtime default profile should be used. + Unconfined - no profile should be applied. + type: string required: - - port + - type + type: object + windowsOptions: + description: |- + The Windows specific settings applied to all containers. + If unspecified, the options from the PodSecurityContext will be used. + If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is linux. + properties: + gmsaCredentialSpec: + description: |- + GMSACredentialSpec is where the GMSA admission webhook + (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the + GMSA credential spec named by the GMSACredentialSpecName field. + type: string + gmsaCredentialSpecName: + description: GMSACredentialSpecName is the + name of the GMSA credential spec to use. + type: string + hostProcess: + description: |- + HostProcess determines if a container should be run as a 'Host Process' container. + All of a Pod's containers must have the same effective HostProcess value + (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). + In addition, if HostProcess is true then HostNetwork must also be set to true. + type: boolean + runAsUserName: + description: |- + The UserName in Windows to run the entrypoint of the container process. + Defaults to the user specified in image metadata if unspecified. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: string type: object type: object - preStop: - description: |- - PreStop is called immediately before a container is terminated due to an - API request or management event such as liveness/startup probe failure, - preemption, resource contention, etc. The handler is not called if the - container crashes or exits. The Pod's termination grace period countdown begins before the - PreStop hook is executed. Regardless of the outcome of the handler, the - container will eventually terminate within the Pod's termination grace - period (unless delayed by finalizers). Other management of the container blocks until the hook completes - or until the termination grace period is reached. - More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks + startupProbe: + description: Probes are not allowed for ephemeral + containers. properties: exec: description: Exec specifies the action to take. @@ -2923,6 +3694,32 @@ spec: type: array x-kubernetes-list-type: atomic type: object + failureThreshold: + description: |- + Minimum consecutive failures for the probe to be considered failed after having succeeded. + Defaults to 3. Minimum value is 1. + format: int32 + type: integer + grpc: + description: GRPC specifies an action involving + a GRPC port. + properties: + port: + description: Port number of the gRPC service. + Number must be in the range 1 to 65535. + format: int32 + type: integer + service: + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object httpGet: description: HTTPGet specifies the http request to perform. @@ -2974,23 +3771,27 @@ spec: required: - port type: object - sleep: - description: Sleep represents the duration that - the container should sleep before being terminated. - properties: - seconds: - description: Seconds is the number of seconds - to sleep. - format: int64 - type: integer - required: - - seconds - type: object - tcpSocket: + initialDelaySeconds: + description: |- + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: + description: |- + How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: description: |- - Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept - for the backward compatibility. There are no validation of this field and - lifecycle hooks will fail in runtime when tcp handler is specified. + Minimum consecutive successes for the probe to be considered successful after having failed. + Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocket specifies an action involving + a TCP port. properties: host: description: 'Optional: Host name to connect @@ -3005,1132 +3806,474 @@ spec: Number must be in the range 1 to 65535. Name must be an IANA_SVC_NAME. x-kubernetes-int-or-string: true - required: - - port - type: object - type: object - type: object - livenessProbe: - description: Probes are not allowed for ephemeral containers. - properties: - exec: - description: Exec specifies the action to take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in the request. - HTTP allows repeated headers. - items: - description: HTTPHeader describes a custom - header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action involving - a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this - value overrides the value provided by the pod spec. - Value must be non-negative integer. The value zero indicates stop immediately via - the kill signal (no opportunity to shut down). - This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. - Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. - format: int64 - type: integer - timeoutSeconds: - description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - name: - description: |- - Name of the ephemeral container specified as a DNS_LABEL. - This name must be unique among all containers, init containers and ephemeral containers. - type: string - ports: - description: Ports are not allowed for ephemeral containers. - items: - description: ContainerPort represents a network port - in a single container. - properties: - containerPort: - description: |- - Number of port to expose on the pod's IP address. - This must be a valid port number, 0 < x < 65536. - format: int32 - type: integer - hostIP: - description: What host IP to bind the external - port to. - type: string - hostPort: - description: |- - Number of port to expose on the host. - If specified, this must be a valid port number, 0 < x < 65536. - If HostNetwork is specified, this must match ContainerPort. - Most containers do not need this. - format: int32 - type: integer - name: - description: |- - If specified, this must be an IANA_SVC_NAME and unique within the pod. Each - named port in a pod must have a unique name. Name for the port that can be - referred to by services. - type: string - protocol: - default: TCP - description: |- - Protocol for port. Must be UDP, TCP, or SCTP. - Defaults to "TCP". - type: string - required: - - containerPort - type: object - type: array - x-kubernetes-list-map-keys: - - containerPort - - protocol - x-kubernetes-list-type: map - readinessProbe: - description: Probes are not allowed for ephemeral containers. - properties: - exec: - description: Exec specifies the action to take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in the request. - HTTP allows repeated headers. - items: - description: HTTPHeader describes a custom - header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action involving - a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string + required: + - port + type: object + terminationGracePeriodSeconds: description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port + Optional duration in seconds the pod needs to terminate gracefully upon probe failure. + The grace period is the duration in seconds after the processes running in the pod are sent + a termination signal and the time when the processes are forcibly halted with a kill signal. + Set this value longer than the expected cleanup time for your process. + If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this + value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates stop immediately via + the kill signal (no opportunity to shut down). + This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. + Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: + description: |- + Number of seconds after which the probe times out. + Defaults to 1 second. Minimum value is 1. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this - value overrides the value provided by the pod spec. - Value must be non-negative integer. The value zero indicates stop immediately via - the kill signal (no opportunity to shut down). - This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. - Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. - format: int64 - type: integer - timeoutSeconds: + stdin: description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - resizePolicy: - description: Resources resize policy for the container. - items: - description: ContainerResizePolicy represents resource - resize policy for the container. - properties: - resourceName: - description: |- - Name of the resource to which this resource resize policy applies. - Supported values: cpu, memory. - type: string - restartPolicy: - description: |- - Restart policy to apply when specified resource is resized. - If not specified, it defaults to NotRequired. - type: string - required: - - resourceName - - restartPolicy - type: object - type: array - x-kubernetes-list-type: atomic - resources: - description: |- - Resources are not allowed for ephemeral containers. Ephemeral containers use spare resources - already allocated to the pod. - properties: - claims: + Whether this container should allocate a buffer for stdin in the container runtime. If this + is not set, reads from stdin in the container will always result in EOF. + Default is false. + type: boolean + stdinOnce: + description: |- + Whether the container runtime should close the stdin channel after it has been opened by + a single attach. When stdin is true the stdin stream will remain open across multiple attach + sessions. If stdinOnce is set to true, stdin is opened on container start, is empty until the + first client attaches to stdin, and then remains open and accepts data until the client disconnects, + at which time stdin is closed and remains closed until the container is restarted. If this + flag is false, a container processes that reads from stdin will never receive an EOF. + Default is false + type: boolean + targetContainerName: description: |- - Claims lists the names of resources, defined in spec.resourceClaims, - that are used by this container. + If set, the name of the container from PodSpec that this ephemeral container targets. + The ephemeral container will be run in the namespaces (IPC, PID, etc) of this container. + If not set then the ephemeral container uses the namespaces configured in the Pod spec. - This is an alpha field and requires enabling the - DynamicResourceAllocation feature gate. - - - This field is immutable. It can only be set for containers. + The container runtime must implement support for this feature. If the runtime does not + support namespace targeting then the result of setting this field is undefined. + type: string + terminationMessagePath: + description: |- + Optional: Path at which the file to which the container's termination message + will be written is mounted into the container's filesystem. + Message written is intended to be brief final status, such as an assertion failure message. + Will be truncated by the node if greater than 4096 bytes. The total message length across + all containers will be limited to 12kb. + Defaults to /dev/termination-log. + Cannot be updated. + type: string + terminationMessagePolicy: + description: |- + Indicate how the termination message should be populated. File will use the contents of + terminationMessagePath to populate the container status message on both success and failure. + FallbackToLogsOnError will use the last chunk of container log output if the termination + message file is empty and the container exited with an error. + The log output is limited to 2048 bytes or 80 lines, whichever is smaller. + Defaults to File. + Cannot be updated. + type: string + tty: + description: |- + Whether this container should allocate a TTY for itself, also requires 'stdin' to be true. + Default is false. + type: boolean + volumeDevices: + description: volumeDevices is the list of block + devices to be used by the container. items: - description: ResourceClaim references one entry - in PodSpec.ResourceClaims. + description: volumeDevice describes a mapping + of a raw block device within a container. properties: + devicePath: + description: devicePath is the path inside + of the container that the device will be + mapped to. + type: string name: - description: |- - Name must match the name of one entry in pod.spec.resourceClaims of - the Pod where this field is used. It makes that resource available - inside a container. + description: name must match the name of a + persistentVolumeClaim in the pod type: string required: + - devicePath - name type: object type: array x-kubernetes-list-map-keys: - - name + - devicePath x-kubernetes-list-type: map - limits: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Limits describes the maximum amount of compute resources allowed. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - type: object - requests: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Requests describes the minimum amount of compute resources required. - If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, - otherwise to an implementation-defined value. Requests cannot exceed Limits. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - type: object - type: object - restartPolicy: - description: |- - Restart policy for the container to manage the restart behavior of each - container within a pod. - This may only be set for init containers. You cannot set this field on - ephemeral containers. - type: string - securityContext: - description: |- - Optional: SecurityContext defines the security options the ephemeral container should be run with. - If set, the fields of SecurityContext override the equivalent fields of PodSecurityContext. - properties: - allowPrivilegeEscalation: - description: |- - AllowPrivilegeEscalation controls whether a process can gain more - privileges than its parent process. This bool directly controls if - the no_new_privs flag will be set on the container process. - AllowPrivilegeEscalation is true always when the container is: - 1) run as Privileged - 2) has CAP_SYS_ADMIN - Note that this field cannot be set when spec.os.name is windows. - type: boolean - appArmorProfile: - description: |- - appArmorProfile is the AppArmor options to use by this container. If set, this profile - overrides the pod's appArmorProfile. - Note that this field cannot be set when spec.os.name is windows. - properties: - localhostProfile: - description: |- - localhostProfile indicates a profile loaded on the node that should be used. - The profile must be preconfigured on the node to work. - Must match the loaded name of the profile. - Must be set if and only if type is "Localhost". - type: string - type: - description: |- - type indicates which kind of AppArmor profile will be applied. - Valid options are: - Localhost - a profile pre-loaded on the node. - RuntimeDefault - the container runtime's default profile. - Unconfined - no AppArmor enforcement. - type: string - required: - - type - type: object - capabilities: + volumeMounts: description: |- - The capabilities to add/drop when running containers. - Defaults to the default set of capabilities granted by the container runtime. - Note that this field cannot be set when spec.os.name is windows. - properties: - add: - description: Added capabilities - items: - description: Capability represent POSIX capabilities - type + Pod volumes to mount into the container's filesystem. Subpath mounts are not allowed for ephemeral containers. + Cannot be updated. + items: + description: VolumeMount describes a mounting + of a Volume within a container. + properties: + mountPath: + description: |- + Path within the container at which the volume should be mounted. Must + not contain ':'. type: string - type: array - x-kubernetes-list-type: atomic - drop: - description: Removed capabilities - items: - description: Capability represent POSIX capabilities - type + mountPropagation: + description: |- + mountPropagation determines how mounts are propagated from the host + to container and the other way around. + When not set, MountPropagationNone is used. + This field is beta in 1.10. + When RecursiveReadOnly is set to IfPossible or to Enabled, MountPropagation must be None or unspecified + (which defaults to None). type: string - type: array - x-kubernetes-list-type: atomic - type: object - privileged: - description: |- - Run container in privileged mode. - Processes in privileged containers are essentially equivalent to root on the host. - Defaults to false. - Note that this field cannot be set when spec.os.name is windows. - type: boolean - procMount: - description: |- - procMount denotes the type of proc mount to use for the containers. - The default is DefaultProcMount which uses the container runtime defaults for - readonly paths and masked paths. - This requires the ProcMountType feature flag to be enabled. - Note that this field cannot be set when spec.os.name is windows. - type: string - readOnlyRootFilesystem: - description: |- - Whether this container has a read-only root filesystem. - Default is false. - Note that this field cannot be set when spec.os.name is windows. - type: boolean - runAsGroup: - description: |- - The GID to run the entrypoint of the container process. - Uses runtime default if unset. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - format: int64 - type: integer - runAsNonRoot: - description: |- - Indicates that the container must run as a non-root user. - If true, the Kubelet will validate the image at runtime to ensure that it - does not run as UID 0 (root) and fail to start the container if it does. - If unset or false, no such validation will be performed. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - type: boolean - runAsUser: - description: |- - The UID to run the entrypoint of the container process. - Defaults to user specified in image metadata if unspecified. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - format: int64 - type: integer - seLinuxOptions: - description: |- - The SELinux context to be applied to the container. - If unspecified, the container runtime will allocate a random SELinux context for each - container. May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - properties: - level: - description: Level is SELinux level label that - applies to the container. - type: string - role: - description: Role is a SELinux role label that - applies to the container. - type: string - type: - description: Type is a SELinux type label that - applies to the container. - type: string - user: - description: User is a SELinux user label that - applies to the container. - type: string - type: object - seccompProfile: - description: |- - The seccomp options to use by this container. If seccomp options are - provided at both the pod & container level, the container options - override the pod options. - Note that this field cannot be set when spec.os.name is windows. - properties: - localhostProfile: - description: |- - localhostProfile indicates a profile defined in a file on the node should be used. - The profile must be preconfigured on the node to work. - Must be a descending path, relative to the kubelet's configured seccomp profile location. - Must be set if type is "Localhost". Must NOT be set for any other type. - type: string - type: - description: |- - type indicates which kind of seccomp profile will be applied. - Valid options are: - - - Localhost - a profile defined in a file on the node should be used. - RuntimeDefault - the container runtime default profile should be used. - Unconfined - no profile should be applied. - type: string - required: - - type - type: object - windowsOptions: - description: |- - The Windows specific settings applied to all containers. - If unspecified, the options from the PodSecurityContext will be used. - If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is linux. - properties: - gmsaCredentialSpec: - description: |- - GMSACredentialSpec is where the GMSA admission webhook - (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the - GMSA credential spec named by the GMSACredentialSpecName field. - type: string - gmsaCredentialSpecName: - description: GMSACredentialSpecName is the name - of the GMSA credential spec to use. - type: string - hostProcess: - description: |- - HostProcess determines if a container should be run as a 'Host Process' container. - All of a Pod's containers must have the same effective HostProcess value - (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). - In addition, if HostProcess is true then HostNetwork must also be set to true. - type: boolean - runAsUserName: - description: |- - The UserName in Windows to run the entrypoint of the container process. - Defaults to the user specified in image metadata if unspecified. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - type: string - type: object - type: object - startupProbe: - description: Probes are not allowed for ephemeral containers. - properties: - exec: - description: Exec specifies the action to take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: + name: + description: This must match the Name of a + Volume. type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in the request. - HTTP allows repeated headers. - items: - description: HTTPHeader describes a custom - header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action involving - a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this - value overrides the value provided by the pod spec. - Value must be non-negative integer. The value zero indicates stop immediately via - the kill signal (no opportunity to shut down). - This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. - Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. - format: int64 - type: integer - timeoutSeconds: - description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - stdin: - description: |- - Whether this container should allocate a buffer for stdin in the container runtime. If this - is not set, reads from stdin in the container will always result in EOF. - Default is false. - type: boolean - stdinOnce: - description: |- - Whether the container runtime should close the stdin channel after it has been opened by - a single attach. When stdin is true the stdin stream will remain open across multiple attach - sessions. If stdinOnce is set to true, stdin is opened on container start, is empty until the - first client attaches to stdin, and then remains open and accepts data until the client disconnects, - at which time stdin is closed and remains closed until the container is restarted. If this - flag is false, a container processes that reads from stdin will never receive an EOF. - Default is false - type: boolean - targetContainerName: - description: |- - If set, the name of the container from PodSpec that this ephemeral container targets. - The ephemeral container will be run in the namespaces (IPC, PID, etc) of this container. - If not set then the ephemeral container uses the namespaces configured in the Pod spec. - - - The container runtime must implement support for this feature. If the runtime does not - support namespace targeting then the result of setting this field is undefined. - type: string - terminationMessagePath: - description: |- - Optional: Path at which the file to which the container's termination message - will be written is mounted into the container's filesystem. - Message written is intended to be brief final status, such as an assertion failure message. - Will be truncated by the node if greater than 4096 bytes. The total message length across - all containers will be limited to 12kb. - Defaults to /dev/termination-log. - Cannot be updated. - type: string - terminationMessagePolicy: - description: |- - Indicate how the termination message should be populated. File will use the contents of - terminationMessagePath to populate the container status message on both success and failure. - FallbackToLogsOnError will use the last chunk of container log output if the termination - message file is empty and the container exited with an error. - The log output is limited to 2048 bytes or 80 lines, whichever is smaller. - Defaults to File. - Cannot be updated. - type: string - tty: - description: |- - Whether this container should allocate a TTY for itself, also requires 'stdin' to be true. - Default is false. - type: boolean - volumeDevices: - description: volumeDevices is the list of block devices - to be used by the container. - items: - description: volumeDevice describes a mapping of a - raw block device within a container. - properties: - devicePath: - description: devicePath is the path inside of - the container that the device will be mapped - to. - type: string - name: - description: name must match the name of a persistentVolumeClaim - in the pod - type: string - required: - - devicePath - - name - type: object - type: array - x-kubernetes-list-map-keys: - - devicePath - x-kubernetes-list-type: map - volumeMounts: - description: |- - Pod volumes to mount into the container's filesystem. Subpath mounts are not allowed for ephemeral containers. - Cannot be updated. - items: - description: VolumeMount describes a mounting of a - Volume within a container. - properties: - mountPath: - description: |- - Path within the container at which the volume should be mounted. Must - not contain ':'. - type: string - mountPropagation: - description: |- - mountPropagation determines how mounts are propagated from the host - to container and the other way around. - When not set, MountPropagationNone is used. - This field is beta in 1.10. - When RecursiveReadOnly is set to IfPossible or to Enabled, MountPropagation must be None or unspecified - (which defaults to None). - type: string - name: - description: This must match the Name of a Volume. - type: string - readOnly: - description: |- - Mounted read-only if true, read-write otherwise (false or unspecified). - Defaults to false. - type: boolean - recursiveReadOnly: - description: |- - RecursiveReadOnly specifies whether read-only mounts should be handled - recursively. + readOnly: + description: |- + Mounted read-only if true, read-write otherwise (false or unspecified). + Defaults to false. + type: boolean + recursiveReadOnly: + description: |- + RecursiveReadOnly specifies whether read-only mounts should be handled + recursively. - If ReadOnly is false, this field has no meaning and must be unspecified. + If ReadOnly is false, this field has no meaning and must be unspecified. - If ReadOnly is true, and this field is set to Disabled, the mount is not made - recursively read-only. If this field is set to IfPossible, the mount is made - recursively read-only, if it is supported by the container runtime. If this - field is set to Enabled, the mount is made recursively read-only if it is - supported by the container runtime, otherwise the pod will not be started and - an error will be generated to indicate the reason. + If ReadOnly is true, and this field is set to Disabled, the mount is not made + recursively read-only. If this field is set to IfPossible, the mount is made + recursively read-only, if it is supported by the container runtime. If this + field is set to Enabled, the mount is made recursively read-only if it is + supported by the container runtime, otherwise the pod will not be started and + an error will be generated to indicate the reason. - If this field is set to IfPossible or Enabled, MountPropagation must be set to - None (or be unspecified, which defaults to None). + If this field is set to IfPossible or Enabled, MountPropagation must be set to + None (or be unspecified, which defaults to None). - If this field is not specified, it is treated as an equivalent of Disabled. - type: string - subPath: - description: |- - Path within the volume from which the container's volume should be mounted. - Defaults to "" (volume's root). - type: string - subPathExpr: - description: |- - Expanded path within the volume from which the container's volume should be mounted. - Behaves similarly to SubPath but environment variable references $(VAR_NAME) are expanded using the container's environment. - Defaults to "" (volume's root). - SubPathExpr and SubPath are mutually exclusive. - type: string - required: - - mountPath - - name - type: object - type: array - x-kubernetes-list-map-keys: - - mountPath - x-kubernetes-list-type: map - workingDir: - description: |- - Container's working directory. - If not specified, the container runtime's default will be used, which - might be configured in the container image. - Cannot be updated. - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - hostAliases: - description: |- - HostAliases is an optional list of hosts and IPs that will be injected into the pod's hosts - file if specified. - items: - description: |- - HostAlias holds the mapping between IP and hostnames that will be injected as an entry in the - pod's hosts file. - properties: - hostnames: - description: Hostnames for the above IP address. - items: - type: string - type: array - x-kubernetes-list-type: atomic - ip: - description: IP address of the host file entry. - type: string - required: - - ip - type: object - type: array - x-kubernetes-list-map-keys: - - ip - x-kubernetes-list-type: map - hostIPC: - description: |- - Use the host's ipc namespace. - Optional: Default to false. - type: boolean - hostNetwork: - description: |- - Host networking requested for this pod. Use the host's network namespace. - If this option is set, the ports that will be used must be specified. - Default to false. - type: boolean - hostPID: - description: |- - Use the host's pid namespace. - Optional: Default to false. - type: boolean - hostUsers: - description: |- - Use the host's user namespace. - Optional: Default to true. - If set to true or not present, the pod will be run in the host user namespace, useful - for when the pod needs a feature only available to the host user namespace, such as - loading a kernel module with CAP_SYS_MODULE. - When set to false, a new userns is created for the pod. Setting false is useful for - mitigating container breakout vulnerabilities even allowing users to run their - containers as root without actually having root privileges on the host. - This field is alpha-level and is only honored by servers that enable the UserNamespacesSupport feature. - type: boolean - hostname: - description: |- - Specifies the hostname of the Pod - If not specified, the pod's hostname will be set to a system-defined value. - type: string - imagePullSecrets: - description: |- - ImagePullSecrets is an optional list of references to secrets in the same namespace to use for pulling any of the images used by this PodSpec. - If specified, these secrets will be passed to individual puller implementations for them to use. - More info: https://kubernetes.io/docs/concepts/containers/images#specifying-imagepullsecrets-on-a-pod - items: - description: |- - LocalObjectReference contains enough information to let you locate the - referenced object inside the same namespace. - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - TODO: Add other useful fields. apiVersion, kind, uid? - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. - type: string - type: object - x-kubernetes-map-type: atomic - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - initContainers: - description: |- - List of initialization containers belonging to the pod. - Init containers are executed in order prior to containers being started. If any - init container fails, the pod is considered to have failed and is handled according - to its restartPolicy. The name for an init container or normal container must be - unique among all containers. - Init containers may not have Lifecycle actions, Readiness probes, Liveness probes, or Startup probes. - The resourceRequirements of an init container are taken into account during scheduling - by finding the highest request/limit for each resource type, and then using the max of - of that value or the sum of the normal containers. Limits are applied to init containers - in a similar fashion. - Init containers cannot currently be added or removed. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/init-containers/ - items: - description: A single application container that you want - to run within a pod. - properties: - args: - description: |- - Arguments to the entrypoint. - The container image's CMD is used if this is not provided. - Variable references $(VAR_NAME) are expanded using the container's environment. If a variable - cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will - produce the string literal "$(VAR_NAME)". Escaped references will never be expanded, regardless - of whether the variable exists or not. Cannot be updated. - More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell - items: - type: string - type: array - x-kubernetes-list-type: atomic - command: + If this field is not specified, it is treated as an equivalent of Disabled. + type: string + subPath: + description: |- + Path within the volume from which the container's volume should be mounted. + Defaults to "" (volume's root). + type: string + subPathExpr: + description: |- + Expanded path within the volume from which the container's volume should be mounted. + Behaves similarly to SubPath but environment variable references $(VAR_NAME) are expanded using the container's environment. + Defaults to "" (volume's root). + SubPathExpr and SubPath are mutually exclusive. + type: string + required: + - mountPath + - name + type: object + type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map + workingDir: + description: |- + Container's working directory. + If not specified, the container runtime's default will be used, which + might be configured in the container image. + Cannot be updated. + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + hostAliases: + description: |- + HostAliases is an optional list of hosts and IPs that will be injected into the pod's hosts + file if specified. + items: description: |- - Entrypoint array. Not executed within a shell. - The container image's ENTRYPOINT is used if this is not provided. - Variable references $(VAR_NAME) are expanded using the container's environment. If a variable - cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will - produce the string literal "$(VAR_NAME)". Escaped references will never be expanded, regardless - of whether the variable exists or not. Cannot be updated. - More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell - items: - type: string - type: array - x-kubernetes-list-type: atomic - env: + HostAlias holds the mapping between IP and hostnames that will be injected as an entry in the + pod's hosts file. + properties: + hostnames: + description: Hostnames for the above IP address. + items: + type: string + type: array + x-kubernetes-list-type: atomic + ip: + description: IP address of the host file entry. + type: string + required: + - ip + type: object + type: array + x-kubernetes-list-map-keys: + - ip + x-kubernetes-list-type: map + hostIPC: + description: |- + Use the host's ipc namespace. + Optional: Default to false. + type: boolean + hostNetwork: + description: |- + Host networking requested for this pod. Use the host's network namespace. + If this option is set, the ports that will be used must be specified. + Default to false. + type: boolean + hostPID: + description: |- + Use the host's pid namespace. + Optional: Default to false. + type: boolean + hostUsers: + description: |- + Use the host's user namespace. + Optional: Default to true. + If set to true or not present, the pod will be run in the host user namespace, useful + for when the pod needs a feature only available to the host user namespace, such as + loading a kernel module with CAP_SYS_MODULE. + When set to false, a new userns is created for the pod. Setting false is useful for + mitigating container breakout vulnerabilities even allowing users to run their + containers as root without actually having root privileges on the host. + This field is alpha-level and is only honored by servers that enable the UserNamespacesSupport feature. + type: boolean + hostname: + description: |- + Specifies the hostname of the Pod + If not specified, the pod's hostname will be set to a system-defined value. + type: string + imagePullSecrets: + description: |- + ImagePullSecrets is an optional list of references to secrets in the same namespace to use for pulling any of the images used by this PodSpec. + If specified, these secrets will be passed to individual puller implementations for them to use. + More info: https://kubernetes.io/docs/concepts/containers/images#specifying-imagepullsecrets-on-a-pod + items: description: |- - List of environment variables to set in the container. - Cannot be updated. - items: - description: EnvVar represents an environment variable - present in a Container. - properties: - name: - description: Name of the environment variable. - Must be a C_IDENTIFIER. + LocalObjectReference contains enough information to let you locate the + referenced object inside the same namespace. + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + TODO: Add other useful fields. apiVersion, kind, uid? + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. + type: string + type: object + x-kubernetes-map-type: atomic + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + initContainers: + description: |- + List of initialization containers belonging to the pod. + Init containers are executed in order prior to containers being started. If any + init container fails, the pod is considered to have failed and is handled according + to its restartPolicy. The name for an init container or normal container must be + unique among all containers. + Init containers may not have Lifecycle actions, Readiness probes, Liveness probes, or Startup probes. + The resourceRequirements of an init container are taken into account during scheduling + by finding the highest request/limit for each resource type, and then using the max of + of that value or the sum of the normal containers. Limits are applied to init containers + in a similar fashion. + Init containers cannot currently be added or removed. + Cannot be updated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/init-containers/ + items: + description: A single application container that you + want to run within a pod. + properties: + args: + description: |- + Arguments to the entrypoint. + The container image's CMD is used if this is not provided. + Variable references $(VAR_NAME) are expanded using the container's environment. If a variable + cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced + to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will + produce the string literal "$(VAR_NAME)". Escaped references will never be expanded, regardless + of whether the variable exists or not. Cannot be updated. + More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell + items: type: string - value: - description: |- - Variable references $(VAR_NAME) are expanded - using the previously defined environment variables in the container and - any service environment variables. If a variable cannot be resolved, - the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. - "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". - Escaped references will never be expanded, regardless of whether the variable - exists or not. - Defaults to "". + type: array + x-kubernetes-list-type: atomic + command: + description: |- + Entrypoint array. Not executed within a shell. + The container image's ENTRYPOINT is used if this is not provided. + Variable references $(VAR_NAME) are expanded using the container's environment. If a variable + cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced + to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will + produce the string literal "$(VAR_NAME)". Escaped references will never be expanded, regardless + of whether the variable exists or not. Cannot be updated. + More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell + items: type: string - valueFrom: - description: Source for the environment variable's - value. Cannot be used if value is not empty. + type: array + x-kubernetes-list-type: atomic + env: + description: |- + List of environment variables to set in the container. + Cannot be updated. + items: + description: EnvVar represents an environment + variable present in a Container. properties: - configMapKeyRef: - description: Selects a key of a ConfigMap. + name: + description: Name of the environment variable. + Must be a C_IDENTIFIER. + type: string + value: + description: |- + Variable references $(VAR_NAME) are expanded + using the previously defined environment variables in the container and + any service environment variables. If a variable cannot be resolved, + the reference in the input string will be unchanged. Double $$ are reduced + to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. + "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". + Escaped references will never be expanded, regardless of whether the variable + exists or not. + Defaults to "". + type: string + valueFrom: + description: Source for the environment variable's + value. Cannot be used if value is not empty. + properties: + configMapKeyRef: + description: Selects a key of a ConfigMap. + properties: + key: + description: The key to select. + type: string + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + TODO: Add other useful fields. apiVersion, kind, uid? + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. + type: string + optional: + description: Specify whether the ConfigMap + or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + fieldRef: + description: |- + Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['']`, `metadata.annotations['']`, + spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs. + properties: + apiVersion: + description: Version of the schema + the FieldPath is written in terms + of, defaults to "v1". + type: string + fieldPath: + description: Path of the field to + select in the specified API version. + type: string + required: + - fieldPath + type: object + x-kubernetes-map-type: atomic + resourceFieldRef: + description: |- + Selects a resource of the container: only resources limits and requests + (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported. + properties: + containerName: + description: 'Container name: required + for volumes, optional for env vars' + type: string + divisor: + anyOf: + - type: integer + - type: string + description: Specifies the output + format of the exposed resources, + defaults to "1" + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + resource: + description: 'Required: resource to + select' + type: string + required: + - resource + type: object + x-kubernetes-map-type: atomic + secretKeyRef: + description: Selects a key of a secret + in the pod's namespace + properties: + key: + description: The key of the secret + to select from. Must be a valid + secret key. + type: string + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + TODO: Add other useful fields. apiVersion, kind, uid? + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. + type: string + optional: + description: Specify whether the Secret + or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + envFrom: + description: |- + List of sources to populate environment variables in the container. + The keys defined within a source must be a C_IDENTIFIER. All invalid keys + will be reported as an event when the container is starting. When a key exists in multiple + sources, the value associated with the last source will take precedence. + Values defined by an Env with a duplicate key will take precedence. + Cannot be updated. + items: + description: EnvFromSource represents the source + of a set of ConfigMaps + properties: + configMapRef: + description: The ConfigMap to select from properties: - key: - description: The key to select. - type: string name: default: "" description: |- @@ -4144,64 +4287,18 @@ spec: type: string optional: description: Specify whether the ConfigMap - or its key must be defined + must be defined type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - fieldRef: - description: |- - Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['']`, `metadata.annotations['']`, - spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs. - properties: - apiVersion: - description: Version of the schema the - FieldPath is written in terms of, defaults - to "v1". - type: string - fieldPath: - description: Path of the field to select - in the specified API version. - type: string - required: - - fieldPath - type: object - x-kubernetes-map-type: atomic - resourceFieldRef: - description: |- - Selects a resource of the container: only resources limits and requests - (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported. - properties: - containerName: - description: 'Container name: required - for volumes, optional for env vars' - type: string - divisor: - anyOf: - - type: integer - - type: string - description: Specifies the output format - of the exposed resources, defaults to - "1" - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - resource: - description: 'Required: resource to select' - type: string - required: - - resource type: object x-kubernetes-map-type: atomic - secretKeyRef: - description: Selects a key of a secret in - the pod's namespace + prefix: + description: An optional identifier to prepend + to each key in the ConfigMap. Must be a + C_IDENTIFIER. + type: string + secretRef: + description: The Secret to select from properties: - key: - description: The key of the secret to - select from. Must be a valid secret - key. - type: string name: default: "" description: |- @@ -4215,105 +4312,271 @@ spec: type: string optional: description: Specify whether the Secret - or its key must be defined + must be defined type: boolean - required: - - key type: object x-kubernetes-map-type: atomic type: object - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - envFrom: - description: |- - List of sources to populate environment variables in the container. - The keys defined within a source must be a C_IDENTIFIER. All invalid keys - will be reported as an event when the container is starting. When a key exists in multiple - sources, the value associated with the last source will take precedence. - Values defined by an Env with a duplicate key will take precedence. - Cannot be updated. - items: - description: EnvFromSource represents the source of - a set of ConfigMaps - properties: - configMapRef: - description: The ConfigMap to select from - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - TODO: Add other useful fields. apiVersion, kind, uid? - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. - type: string - optional: - description: Specify whether the ConfigMap - must be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - prefix: - description: An optional identifier to prepend - to each key in the ConfigMap. Must be a C_IDENTIFIER. - type: string - secretRef: - description: The Secret to select from - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - TODO: Add other useful fields. apiVersion, kind, uid? - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. - type: string - optional: - description: Specify whether the Secret must - be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - type: object - type: array - x-kubernetes-list-type: atomic - image: - description: |- - Container image name. - More info: https://kubernetes.io/docs/concepts/containers/images - This field is optional to allow higher level config management to default or override - container images in workload controllers like Deployments and StatefulSets. - type: string - imagePullPolicy: - description: |- - Image pull policy. - One of Always, Never, IfNotPresent. - Defaults to Always if :latest tag is specified, or IfNotPresent otherwise. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/containers/images#updating-images - type: string - lifecycle: - description: |- - Actions that the management system should take in response to container lifecycle events. - Cannot be updated. - properties: - postStart: + type: array + x-kubernetes-list-type: atomic + image: + description: |- + Container image name. + More info: https://kubernetes.io/docs/concepts/containers/images + This field is optional to allow higher level config management to default or override + container images in workload controllers like Deployments and StatefulSets. + type: string + imagePullPolicy: + description: |- + Image pull policy. + One of Always, Never, IfNotPresent. + Defaults to Always if :latest tag is specified, or IfNotPresent otherwise. + Cannot be updated. + More info: https://kubernetes.io/docs/concepts/containers/images#updating-images + type: string + lifecycle: + description: |- + Actions that the management system should take in response to container lifecycle events. + Cannot be updated. + properties: + postStart: + description: |- + PostStart is called immediately after a container is created. If the handler fails, + the container is terminated and restarted according to its restart policy. + Other management of the container blocks until the hook completes. + More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks + properties: + exec: + description: Exec specifies the action to + take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + httpGet: + description: HTTPGet specifies the http + request to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in + the request. HTTP allows repeated + headers. + items: + description: HTTPHeader describes + a custom header to be used in HTTP + probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field + value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP + server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + sleep: + description: Sleep represents the duration + that the container should sleep before + being terminated. + properties: + seconds: + description: Seconds is the number of + seconds to sleep. + format: int64 + type: integer + required: + - seconds + type: object + tcpSocket: + description: |- + Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept + for the backward compatibility. There are no validation of this field and + lifecycle hooks will fail in runtime when tcp handler is specified. + properties: + host: + description: 'Optional: Host name to + connect to, defaults to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + type: object + preStop: + description: |- + PreStop is called immediately before a container is terminated due to an + API request or management event such as liveness/startup probe failure, + preemption, resource contention, etc. The handler is not called if the + container crashes or exits. The Pod's termination grace period countdown begins before the + PreStop hook is executed. Regardless of the outcome of the handler, the + container will eventually terminate within the Pod's termination grace + period (unless delayed by finalizers). Other management of the container blocks until the hook completes + or until the termination grace period is reached. + More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks + properties: + exec: + description: Exec specifies the action to + take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + httpGet: + description: HTTPGet specifies the http + request to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in + the request. HTTP allows repeated + headers. + items: + description: HTTPHeader describes + a custom header to be used in HTTP + probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field + value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP + server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + sleep: + description: Sleep represents the duration + that the container should sleep before + being terminated. + properties: + seconds: + description: Seconds is the number of + seconds to sleep. + format: int64 + type: integer + required: + - seconds + type: object + tcpSocket: + description: |- + Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept + for the backward compatibility. There are no validation of this field and + lifecycle hooks will fail in runtime when tcp handler is specified. + properties: + host: + description: 'Optional: Host name to + connect to, defaults to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + type: object + type: object + livenessProbe: description: |- - PostStart is called immediately after a container is created. If the handler fails, - the container is terminated and restarted according to its restart policy. - Other management of the container blocks until the hook completes. - More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks + Periodic probe of container liveness. + Container will be restarted if the probe fails. + Cannot be updated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes properties: exec: description: Exec specifies the action to take. @@ -4330,6 +4593,32 @@ spec: type: array x-kubernetes-list-type: atomic type: object + failureThreshold: + description: |- + Minimum consecutive failures for the probe to be considered failed after having succeeded. + Defaults to 3. Minimum value is 1. + format: int32 + type: integer + grpc: + description: GRPC specifies an action involving + a GRPC port. + properties: + port: + description: Port number of the gRPC service. + Number must be in the range 1 to 65535. + format: int32 + type: integer + service: + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object httpGet: description: HTTPGet specifies the http request to perform. @@ -4381,23 +4670,27 @@ spec: required: - port type: object - sleep: - description: Sleep represents the duration that - the container should sleep before being terminated. - properties: - seconds: - description: Seconds is the number of seconds - to sleep. - format: int64 - type: integer - required: - - seconds - type: object - tcpSocket: + initialDelaySeconds: + description: |- + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: description: |- - Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept - for the backward compatibility. There are no validation of this field and - lifecycle hooks will fail in runtime when tcp handler is specified. + How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: |- + Minimum consecutive successes for the probe to be considered successful after having failed. + Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocket specifies an action involving + a TCP port. properties: host: description: 'Optional: Host name to connect @@ -4415,18 +4708,91 @@ spec: required: - port type: object + terminationGracePeriodSeconds: + description: |- + Optional duration in seconds the pod needs to terminate gracefully upon probe failure. + The grace period is the duration in seconds after the processes running in the pod are sent + a termination signal and the time when the processes are forcibly halted with a kill signal. + Set this value longer than the expected cleanup time for your process. + If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this + value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates stop immediately via + the kill signal (no opportunity to shut down). + This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. + Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: + description: |- + Number of seconds after which the probe times out. + Defaults to 1 second. Minimum value is 1. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer type: object - preStop: + name: + description: |- + Name of the container specified as a DNS_LABEL. + Each container in a pod must have a unique name (DNS_LABEL). + Cannot be updated. + type: string + ports: + description: |- + List of ports to expose from the container. Not specifying a port here + DOES NOT prevent that port from being exposed. Any port which is + listening on the default "0.0.0.0" address inside a container will be + accessible from the network. + Modifying this array with strategic merge patch may corrupt the data. + For more information See https://github.com/kubernetes/kubernetes/issues/108255. + Cannot be updated. + items: + description: ContainerPort represents a network + port in a single container. + properties: + containerPort: + description: |- + Number of port to expose on the pod's IP address. + This must be a valid port number, 0 < x < 65536. + format: int32 + type: integer + hostIP: + description: What host IP to bind the external + port to. + type: string + hostPort: + description: |- + Number of port to expose on the host. + If specified, this must be a valid port number, 0 < x < 65536. + If HostNetwork is specified, this must match ContainerPort. + Most containers do not need this. + format: int32 + type: integer + name: + description: |- + If specified, this must be an IANA_SVC_NAME and unique within the pod. Each + named port in a pod must have a unique name. Name for the port that can be + referred to by services. + type: string + protocol: + default: TCP + description: |- + Protocol for port. Must be UDP, TCP, or SCTP. + Defaults to "TCP". + type: string + required: + - containerPort + type: object + type: array + x-kubernetes-list-map-keys: + - containerPort + - protocol + x-kubernetes-list-type: map + readinessProbe: description: |- - PreStop is called immediately before a container is terminated due to an - API request or management event such as liveness/startup probe failure, - preemption, resource contention, etc. The handler is not called if the - container crashes or exits. The Pod's termination grace period countdown begins before the - PreStop hook is executed. Regardless of the outcome of the handler, the - container will eventually terminate within the Pod's termination grace - period (unless delayed by finalizers). Other management of the container blocks until the hook completes - or until the termination grace period is reached. - More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks + Periodic probe of container service readiness. + Container will be removed from service endpoints if the probe fails. + Cannot be updated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes properties: exec: description: Exec specifies the action to take. @@ -4443,6 +4809,32 @@ spec: type: array x-kubernetes-list-type: atomic type: object + failureThreshold: + description: |- + Minimum consecutive failures for the probe to be considered failed after having succeeded. + Defaults to 3. Minimum value is 1. + format: int32 + type: integer + grpc: + description: GRPC specifies an action involving + a GRPC port. + properties: + port: + description: Port number of the gRPC service. + Number must be in the range 1 to 65535. + format: int32 + type: integer + service: + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object httpGet: description: HTTPGet specifies the http request to perform. @@ -4494,23 +4886,27 @@ spec: required: - port type: object - sleep: - description: Sleep represents the duration that - the container should sleep before being terminated. - properties: - seconds: - description: Seconds is the number of seconds - to sleep. - format: int64 - type: integer - required: - - seconds - type: object - tcpSocket: + initialDelaySeconds: + description: |- + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: description: |- - Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept - for the backward compatibility. There are no validation of this field and - lifecycle hooks will fail in runtime when tcp handler is specified. + How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: |- + Minimum consecutive successes for the probe to be considered successful after having failed. + Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocket specifies an action involving + a TCP port. properties: host: description: 'Optional: Host name to connect @@ -4521,1982 +4917,1594 @@ spec: - type: integer - type: string description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - type: object - type: object - livenessProbe: - description: |- - Periodic probe of container liveness. - Container will be restarted if the probe fails. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - properties: - exec: - description: Exec specifies the action to take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in the request. - HTTP allows repeated headers. - items: - description: HTTPHeader describes a custom - header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action involving - a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this - value overrides the value provided by the pod spec. - Value must be non-negative integer. The value zero indicates stop immediately via - the kill signal (no opportunity to shut down). - This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. - Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. - format: int64 - type: integer - timeoutSeconds: - description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - name: - description: |- - Name of the container specified as a DNS_LABEL. - Each container in a pod must have a unique name (DNS_LABEL). - Cannot be updated. - type: string - ports: - description: |- - List of ports to expose from the container. Not specifying a port here - DOES NOT prevent that port from being exposed. Any port which is - listening on the default "0.0.0.0" address inside a container will be - accessible from the network. - Modifying this array with strategic merge patch may corrupt the data. - For more information See https://github.com/kubernetes/kubernetes/issues/108255. - Cannot be updated. - items: - description: ContainerPort represents a network port - in a single container. - properties: - containerPort: - description: |- - Number of port to expose on the pod's IP address. - This must be a valid port number, 0 < x < 65536. - format: int32 - type: integer - hostIP: - description: What host IP to bind the external - port to. - type: string - hostPort: - description: |- - Number of port to expose on the host. - If specified, this must be a valid port number, 0 < x < 65536. - If HostNetwork is specified, this must match ContainerPort. - Most containers do not need this. - format: int32 - type: integer - name: - description: |- - If specified, this must be an IANA_SVC_NAME and unique within the pod. Each - named port in a pod must have a unique name. Name for the port that can be - referred to by services. - type: string - protocol: - default: TCP - description: |- - Protocol for port. Must be UDP, TCP, or SCTP. - Defaults to "TCP". - type: string - required: - - containerPort - type: object - type: array - x-kubernetes-list-map-keys: - - containerPort - - protocol - x-kubernetes-list-type: map - readinessProbe: - description: |- - Periodic probe of container service readiness. - Container will be removed from service endpoints if the probe fails. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - properties: - exec: - description: Exec specifies the action to take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in the request. - HTTP allows repeated headers. - items: - description: HTTPHeader describes a custom - header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action involving - a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this - value overrides the value provided by the pod spec. - Value must be non-negative integer. The value zero indicates stop immediately via - the kill signal (no opportunity to shut down). - This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. - Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. - format: int64 - type: integer - timeoutSeconds: - description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - resizePolicy: - description: Resources resize policy for the container. - items: - description: ContainerResizePolicy represents resource - resize policy for the container. - properties: - resourceName: - description: |- - Name of the resource to which this resource resize policy applies. - Supported values: cpu, memory. - type: string - restartPolicy: - description: |- - Restart policy to apply when specified resource is resized. - If not specified, it defaults to NotRequired. - type: string - required: - - resourceName - - restartPolicy - type: object - type: array - x-kubernetes-list-type: atomic - resources: - description: |- - Compute Resources required by this container. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - properties: - claims: - description: |- - Claims lists the names of resources, defined in spec.resourceClaims, - that are used by this container. - - - This is an alpha field and requires enabling the - DynamicResourceAllocation feature gate. - - - This field is immutable. It can only be set for containers. + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: |- + Optional duration in seconds the pod needs to terminate gracefully upon probe failure. + The grace period is the duration in seconds after the processes running in the pod are sent + a termination signal and the time when the processes are forcibly halted with a kill signal. + Set this value longer than the expected cleanup time for your process. + If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this + value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates stop immediately via + the kill signal (no opportunity to shut down). + This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. + Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: + description: |- + Number of seconds after which the probe times out. + Defaults to 1 second. Minimum value is 1. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + type: object + resizePolicy: + description: Resources resize policy for the container. items: - description: ResourceClaim references one entry - in PodSpec.ResourceClaims. + description: ContainerResizePolicy represents + resource resize policy for the container. properties: - name: + resourceName: + description: |- + Name of the resource to which this resource resize policy applies. + Supported values: cpu, memory. + type: string + restartPolicy: description: |- - Name must match the name of one entry in pod.spec.resourceClaims of - the Pod where this field is used. It makes that resource available - inside a container. + Restart policy to apply when specified resource is resized. + If not specified, it defaults to NotRequired. type: string required: - - name + - resourceName + - restartPolicy type: object type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - limits: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Limits describes the maximum amount of compute resources allowed. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - type: object - requests: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true + x-kubernetes-list-type: atomic + resources: description: |- - Requests describes the minimum amount of compute resources required. - If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, - otherwise to an implementation-defined value. Requests cannot exceed Limits. + Compute Resources required by this container. + Cannot be updated. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - type: object - type: object - restartPolicy: - description: |- - RestartPolicy defines the restart behavior of individual containers in a pod. - This field may only be set for init containers, and the only allowed value is "Always". - For non-init containers or when this field is not specified, - the restart behavior is defined by the Pod's restart policy and the container type. - Setting the RestartPolicy as "Always" for the init container will have the following effect: - this init container will be continually restarted on - exit until all regular containers have terminated. Once all regular - containers have completed, all init containers with restartPolicy "Always" - will be shut down. This lifecycle differs from normal init containers and - is often referred to as a "sidecar" container. Although this init - container still starts in the init container sequence, it does not wait - for the container to complete before proceeding to the next init - container. Instead, the next init container starts immediately after this - init container is started, or after any startupProbe has successfully - completed. - type: string - securityContext: - description: |- - SecurityContext defines the security options the container should be run with. - If set, the fields of SecurityContext override the equivalent fields of PodSecurityContext. - More info: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/ - properties: - allowPrivilegeEscalation: - description: |- - AllowPrivilegeEscalation controls whether a process can gain more - privileges than its parent process. This bool directly controls if - the no_new_privs flag will be set on the container process. - AllowPrivilegeEscalation is true always when the container is: - 1) run as Privileged - 2) has CAP_SYS_ADMIN - Note that this field cannot be set when spec.os.name is windows. - type: boolean - appArmorProfile: - description: |- - appArmorProfile is the AppArmor options to use by this container. If set, this profile - overrides the pod's appArmorProfile. - Note that this field cannot be set when spec.os.name is windows. properties: - localhostProfile: - description: |- - localhostProfile indicates a profile loaded on the node that should be used. - The profile must be preconfigured on the node to work. - Must match the loaded name of the profile. - Must be set if and only if type is "Localhost". - type: string - type: + claims: description: |- - type indicates which kind of AppArmor profile will be applied. - Valid options are: - Localhost - a profile pre-loaded on the node. - RuntimeDefault - the container runtime's default profile. - Unconfined - no AppArmor enforcement. - type: string - required: - - type - type: object - capabilities: - description: |- - The capabilities to add/drop when running containers. - Defaults to the default set of capabilities granted by the container runtime. - Note that this field cannot be set when spec.os.name is windows. - properties: - add: - description: Added capabilities - items: - description: Capability represent POSIX capabilities - type - type: string - type: array - x-kubernetes-list-type: atomic - drop: - description: Removed capabilities + Claims lists the names of resources, defined in spec.resourceClaims, + that are used by this container. + + + This is an alpha field and requires enabling the + DynamicResourceAllocation feature gate. + + + This field is immutable. It can only be set for containers. items: - description: Capability represent POSIX capabilities - type - type: string + description: ResourceClaim references one + entry in PodSpec.ResourceClaims. + properties: + name: + description: |- + Name must match the name of one entry in pod.spec.resourceClaims of + the Pod where this field is used. It makes that resource available + inside a container. + type: string + required: + - name + type: object type: array - x-kubernetes-list-type: atomic + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object type: object - privileged: - description: |- - Run container in privileged mode. - Processes in privileged containers are essentially equivalent to root on the host. - Defaults to false. - Note that this field cannot be set when spec.os.name is windows. - type: boolean - procMount: - description: |- - procMount denotes the type of proc mount to use for the containers. - The default is DefaultProcMount which uses the container runtime defaults for - readonly paths and masked paths. - This requires the ProcMountType feature flag to be enabled. - Note that this field cannot be set when spec.os.name is windows. + restartPolicy: + description: |- + RestartPolicy defines the restart behavior of individual containers in a pod. + This field may only be set for init containers, and the only allowed value is "Always". + For non-init containers or when this field is not specified, + the restart behavior is defined by the Pod's restart policy and the container type. + Setting the RestartPolicy as "Always" for the init container will have the following effect: + this init container will be continually restarted on + exit until all regular containers have terminated. Once all regular + containers have completed, all init containers with restartPolicy "Always" + will be shut down. This lifecycle differs from normal init containers and + is often referred to as a "sidecar" container. Although this init + container still starts in the init container sequence, it does not wait + for the container to complete before proceeding to the next init + container. Instead, the next init container starts immediately after this + init container is started, or after any startupProbe has successfully + completed. type: string - readOnlyRootFilesystem: - description: |- - Whether this container has a read-only root filesystem. - Default is false. - Note that this field cannot be set when spec.os.name is windows. - type: boolean - runAsGroup: - description: |- - The GID to run the entrypoint of the container process. - Uses runtime default if unset. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - format: int64 - type: integer - runAsNonRoot: - description: |- - Indicates that the container must run as a non-root user. - If true, the Kubelet will validate the image at runtime to ensure that it - does not run as UID 0 (root) and fail to start the container if it does. - If unset or false, no such validation will be performed. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - type: boolean - runAsUser: - description: |- - The UID to run the entrypoint of the container process. - Defaults to user specified in image metadata if unspecified. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - format: int64 - type: integer - seLinuxOptions: - description: |- - The SELinux context to be applied to the container. - If unspecified, the container runtime will allocate a random SELinux context for each - container. May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - properties: - level: - description: Level is SELinux level label that - applies to the container. - type: string - role: - description: Role is a SELinux role label that - applies to the container. - type: string - type: - description: Type is a SELinux type label that - applies to the container. - type: string - user: - description: User is a SELinux user label that - applies to the container. - type: string - type: object - seccompProfile: + securityContext: description: |- - The seccomp options to use by this container. If seccomp options are - provided at both the pod & container level, the container options - override the pod options. - Note that this field cannot be set when spec.os.name is windows. + SecurityContext defines the security options the container should be run with. + If set, the fields of SecurityContext override the equivalent fields of PodSecurityContext. + More info: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/ properties: - localhostProfile: + allowPrivilegeEscalation: + description: |- + AllowPrivilegeEscalation controls whether a process can gain more + privileges than its parent process. This bool directly controls if + the no_new_privs flag will be set on the container process. + AllowPrivilegeEscalation is true always when the container is: + 1) run as Privileged + 2) has CAP_SYS_ADMIN + Note that this field cannot be set when spec.os.name is windows. + type: boolean + appArmorProfile: + description: |- + appArmorProfile is the AppArmor options to use by this container. If set, this profile + overrides the pod's appArmorProfile. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile loaded on the node that should be used. + The profile must be preconfigured on the node to work. + Must match the loaded name of the profile. + Must be set if and only if type is "Localhost". + type: string + type: + description: |- + type indicates which kind of AppArmor profile will be applied. + Valid options are: + Localhost - a profile pre-loaded on the node. + RuntimeDefault - the container runtime's default profile. + Unconfined - no AppArmor enforcement. + type: string + required: + - type + type: object + capabilities: + description: |- + The capabilities to add/drop when running containers. + Defaults to the default set of capabilities granted by the container runtime. + Note that this field cannot be set when spec.os.name is windows. + properties: + add: + description: Added capabilities + items: + description: Capability represent POSIX + capabilities type + type: string + type: array + x-kubernetes-list-type: atomic + drop: + description: Removed capabilities + items: + description: Capability represent POSIX + capabilities type + type: string + type: array + x-kubernetes-list-type: atomic + type: object + privileged: + description: |- + Run container in privileged mode. + Processes in privileged containers are essentially equivalent to root on the host. + Defaults to false. + Note that this field cannot be set when spec.os.name is windows. + type: boolean + procMount: description: |- - localhostProfile indicates a profile defined in a file on the node should be used. - The profile must be preconfigured on the node to work. - Must be a descending path, relative to the kubelet's configured seccomp profile location. - Must be set if type is "Localhost". Must NOT be set for any other type. + procMount denotes the type of proc mount to use for the containers. + The default is DefaultProcMount which uses the container runtime defaults for + readonly paths and masked paths. + This requires the ProcMountType feature flag to be enabled. + Note that this field cannot be set when spec.os.name is windows. type: string - type: + readOnlyRootFilesystem: + description: |- + Whether this container has a read-only root filesystem. + Default is false. + Note that this field cannot be set when spec.os.name is windows. + type: boolean + runAsGroup: + description: |- + The GID to run the entrypoint of the container process. + Uses runtime default if unset. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + runAsNonRoot: + description: |- + Indicates that the container must run as a non-root user. + If true, the Kubelet will validate the image at runtime to ensure that it + does not run as UID 0 (root) and fail to start the container if it does. + If unset or false, no such validation will be performed. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: boolean + runAsUser: + description: |- + The UID to run the entrypoint of the container process. + Defaults to user specified in image metadata if unspecified. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + seLinuxOptions: + description: |- + The SELinux context to be applied to the container. + If unspecified, the container runtime will allocate a random SELinux context for each + container. May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is windows. + properties: + level: + description: Level is SELinux level label + that applies to the container. + type: string + role: + description: Role is a SELinux role label + that applies to the container. + type: string + type: + description: Type is a SELinux type label + that applies to the container. + type: string + user: + description: User is a SELinux user label + that applies to the container. + type: string + type: object + seccompProfile: description: |- - type indicates which kind of seccomp profile will be applied. - Valid options are: + The seccomp options to use by this container. If seccomp options are + provided at both the pod & container level, the container options + override the pod options. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile defined in a file on the node should be used. + The profile must be preconfigured on the node to work. + Must be a descending path, relative to the kubelet's configured seccomp profile location. + Must be set if type is "Localhost". Must NOT be set for any other type. + type: string + type: + description: |- + type indicates which kind of seccomp profile will be applied. + Valid options are: - Localhost - a profile defined in a file on the node should be used. - RuntimeDefault - the container runtime default profile should be used. - Unconfined - no profile should be applied. - type: string - required: - - type + Localhost - a profile defined in a file on the node should be used. + RuntimeDefault - the container runtime default profile should be used. + Unconfined - no profile should be applied. + type: string + required: + - type + type: object + windowsOptions: + description: |- + The Windows specific settings applied to all containers. + If unspecified, the options from the PodSecurityContext will be used. + If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is linux. + properties: + gmsaCredentialSpec: + description: |- + GMSACredentialSpec is where the GMSA admission webhook + (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the + GMSA credential spec named by the GMSACredentialSpecName field. + type: string + gmsaCredentialSpecName: + description: GMSACredentialSpecName is the + name of the GMSA credential spec to use. + type: string + hostProcess: + description: |- + HostProcess determines if a container should be run as a 'Host Process' container. + All of a Pod's containers must have the same effective HostProcess value + (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). + In addition, if HostProcess is true then HostNetwork must also be set to true. + type: boolean + runAsUserName: + description: |- + The UserName in Windows to run the entrypoint of the container process. + Defaults to the user specified in image metadata if unspecified. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: string + type: object type: object - windowsOptions: - description: |- - The Windows specific settings applied to all containers. - If unspecified, the options from the PodSecurityContext will be used. - If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is linux. + startupProbe: + description: |- + StartupProbe indicates that the Pod has successfully initialized. + If specified, no other probes are executed until this completes successfully. + If this probe fails, the Pod will be restarted, just as if the livenessProbe failed. + This can be used to provide different probe parameters at the beginning of a Pod's lifecycle, + when it might take a long time to load data or warm a cache, than during steady-state operation. + This cannot be updated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes properties: - gmsaCredentialSpec: + exec: + description: Exec specifies the action to take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: description: |- - GMSACredentialSpec is where the GMSA admission webhook - (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the - GMSA credential spec named by the GMSACredentialSpecName field. - type: string - gmsaCredentialSpecName: - description: GMSACredentialSpecName is the name - of the GMSA credential spec to use. - type: string - hostProcess: + Minimum consecutive failures for the probe to be considered failed after having succeeded. + Defaults to 3. Minimum value is 1. + format: int32 + type: integer + grpc: + description: GRPC specifies an action involving + a GRPC port. + properties: + port: + description: Port number of the gRPC service. + Number must be in the range 1 to 65535. + format: int32 + type: integer + service: + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object + httpGet: + description: HTTPGet specifies the http request + to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in the + request. HTTP allows repeated headers. + items: + description: HTTPHeader describes a custom + header to be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP + server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: description: |- - HostProcess determines if a container should be run as a 'Host Process' container. - All of a Pod's containers must have the same effective HostProcess value - (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). - In addition, if HostProcess is true then HostNetwork must also be set to true. - type: boolean - runAsUserName: + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: + description: |- + How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: |- + Minimum consecutive successes for the probe to be considered successful after having failed. + Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocket specifies an action involving + a TCP port. + properties: + host: + description: 'Optional: Host name to connect + to, defaults to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: description: |- - The UserName in Windows to run the entrypoint of the container process. - Defaults to the user specified in image metadata if unspecified. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - type: string - type: object - type: object - startupProbe: - description: |- - StartupProbe indicates that the Pod has successfully initialized. - If specified, no other probes are executed until this completes successfully. - If this probe fails, the Pod will be restarted, just as if the livenessProbe failed. - This can be used to provide different probe parameters at the beginning of a Pod's lifecycle, - when it might take a long time to load data or warm a cache, than during steady-state operation. - This cannot be updated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - properties: - exec: - description: Exec specifies the action to take. - properties: - command: + Optional duration in seconds the pod needs to terminate gracefully upon probe failure. + The grace period is the duration in seconds after the processes running in the pod are sent + a termination signal and the time when the processes are forcibly halted with a kill signal. + Set this value longer than the expected cleanup time for your process. + If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this + value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates stop immediately via + the kill signal (no opportunity to shut down). + This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. + Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. + Number of seconds after which the probe times out. + Defaults to 1 second. Minimum value is 1. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes format: int32 type: integer - service: - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in the request. - HTTP allows repeated headers. - items: - description: HTTPHeader describes a custom - header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: + stdin: description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action involving - a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: + Whether this container should allocate a buffer for stdin in the container runtime. If this + is not set, reads from stdin in the container will always result in EOF. + Default is false. + type: boolean + stdinOnce: + description: |- + Whether the container runtime should close the stdin channel after it has been opened by + a single attach. When stdin is true the stdin stream will remain open across multiple attach + sessions. If stdinOnce is set to true, stdin is opened on container start, is empty until the + first client attaches to stdin, and then remains open and accepts data until the client disconnects, + at which time stdin is closed and remains closed until the container is restarted. If this + flag is false, a container processes that reads from stdin will never receive an EOF. + Default is false + type: boolean + terminationMessagePath: + description: |- + Optional: Path at which the file to which the container's termination message + will be written is mounted into the container's filesystem. + Message written is intended to be brief final status, such as an assertion failure message. + Will be truncated by the node if greater than 4096 bytes. The total message length across + all containers will be limited to 12kb. + Defaults to /dev/termination-log. + Cannot be updated. + type: string + terminationMessagePolicy: + description: |- + Indicate how the termination message should be populated. File will use the contents of + terminationMessagePath to populate the container status message on both success and failure. + FallbackToLogsOnError will use the last chunk of container log output if the termination + message file is empty and the container exited with an error. + The log output is limited to 2048 bytes or 80 lines, whichever is smaller. + Defaults to File. + Cannot be updated. + type: string + tty: description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this - value overrides the value provided by the pod spec. - Value must be non-negative integer. The value zero indicates stop immediately via - the kill signal (no opportunity to shut down). - This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. - Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. - format: int64 - type: integer - timeoutSeconds: + Whether this container should allocate a TTY for itself, also requires 'stdin' to be true. + Default is false. + type: boolean + volumeDevices: + description: volumeDevices is the list of block + devices to be used by the container. + items: + description: volumeDevice describes a mapping + of a raw block device within a container. + properties: + devicePath: + description: devicePath is the path inside + of the container that the device will be + mapped to. + type: string + name: + description: name must match the name of a + persistentVolumeClaim in the pod + type: string + required: + - devicePath + - name + type: object + type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map + volumeMounts: description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - stdin: - description: |- - Whether this container should allocate a buffer for stdin in the container runtime. If this - is not set, reads from stdin in the container will always result in EOF. - Default is false. - type: boolean - stdinOnce: - description: |- - Whether the container runtime should close the stdin channel after it has been opened by - a single attach. When stdin is true the stdin stream will remain open across multiple attach - sessions. If stdinOnce is set to true, stdin is opened on container start, is empty until the - first client attaches to stdin, and then remains open and accepts data until the client disconnects, - at which time stdin is closed and remains closed until the container is restarted. If this - flag is false, a container processes that reads from stdin will never receive an EOF. - Default is false - type: boolean - terminationMessagePath: - description: |- - Optional: Path at which the file to which the container's termination message - will be written is mounted into the container's filesystem. - Message written is intended to be brief final status, such as an assertion failure message. - Will be truncated by the node if greater than 4096 bytes. The total message length across - all containers will be limited to 12kb. - Defaults to /dev/termination-log. - Cannot be updated. - type: string - terminationMessagePolicy: - description: |- - Indicate how the termination message should be populated. File will use the contents of - terminationMessagePath to populate the container status message on both success and failure. - FallbackToLogsOnError will use the last chunk of container log output if the termination - message file is empty and the container exited with an error. - The log output is limited to 2048 bytes or 80 lines, whichever is smaller. - Defaults to File. - Cannot be updated. - type: string - tty: - description: |- - Whether this container should allocate a TTY for itself, also requires 'stdin' to be true. - Default is false. - type: boolean - volumeDevices: - description: volumeDevices is the list of block devices - to be used by the container. - items: - description: volumeDevice describes a mapping of a - raw block device within a container. - properties: - devicePath: - description: devicePath is the path inside of - the container that the device will be mapped - to. - type: string - name: - description: name must match the name of a persistentVolumeClaim - in the pod - type: string - required: - - devicePath - - name - type: object - type: array - x-kubernetes-list-map-keys: - - devicePath - x-kubernetes-list-type: map - volumeMounts: - description: |- - Pod volumes to mount into the container's filesystem. - Cannot be updated. - items: - description: VolumeMount describes a mounting of a - Volume within a container. - properties: - mountPath: - description: |- - Path within the container at which the volume should be mounted. Must - not contain ':'. - type: string - mountPropagation: - description: |- - mountPropagation determines how mounts are propagated from the host - to container and the other way around. - When not set, MountPropagationNone is used. - This field is beta in 1.10. - When RecursiveReadOnly is set to IfPossible or to Enabled, MountPropagation must be None or unspecified - (which defaults to None). - type: string - name: - description: This must match the Name of a Volume. - type: string - readOnly: - description: |- - Mounted read-only if true, read-write otherwise (false or unspecified). - Defaults to false. - type: boolean - recursiveReadOnly: - description: |- - RecursiveReadOnly specifies whether read-only mounts should be handled - recursively. + Pod volumes to mount into the container's filesystem. + Cannot be updated. + items: + description: VolumeMount describes a mounting + of a Volume within a container. + properties: + mountPath: + description: |- + Path within the container at which the volume should be mounted. Must + not contain ':'. + type: string + mountPropagation: + description: |- + mountPropagation determines how mounts are propagated from the host + to container and the other way around. + When not set, MountPropagationNone is used. + This field is beta in 1.10. + When RecursiveReadOnly is set to IfPossible or to Enabled, MountPropagation must be None or unspecified + (which defaults to None). + type: string + name: + description: This must match the Name of a + Volume. + type: string + readOnly: + description: |- + Mounted read-only if true, read-write otherwise (false or unspecified). + Defaults to false. + type: boolean + recursiveReadOnly: + description: |- + RecursiveReadOnly specifies whether read-only mounts should be handled + recursively. - If ReadOnly is false, this field has no meaning and must be unspecified. + If ReadOnly is false, this field has no meaning and must be unspecified. - If ReadOnly is true, and this field is set to Disabled, the mount is not made - recursively read-only. If this field is set to IfPossible, the mount is made - recursively read-only, if it is supported by the container runtime. If this - field is set to Enabled, the mount is made recursively read-only if it is - supported by the container runtime, otherwise the pod will not be started and - an error will be generated to indicate the reason. + If ReadOnly is true, and this field is set to Disabled, the mount is not made + recursively read-only. If this field is set to IfPossible, the mount is made + recursively read-only, if it is supported by the container runtime. If this + field is set to Enabled, the mount is made recursively read-only if it is + supported by the container runtime, otherwise the pod will not be started and + an error will be generated to indicate the reason. - If this field is set to IfPossible or Enabled, MountPropagation must be set to - None (or be unspecified, which defaults to None). + If this field is set to IfPossible or Enabled, MountPropagation must be set to + None (or be unspecified, which defaults to None). - If this field is not specified, it is treated as an equivalent of Disabled. - type: string - subPath: - description: |- - Path within the volume from which the container's volume should be mounted. - Defaults to "" (volume's root). - type: string - subPathExpr: - description: |- - Expanded path within the volume from which the container's volume should be mounted. - Behaves similarly to SubPath but environment variable references $(VAR_NAME) are expanded using the container's environment. - Defaults to "" (volume's root). - SubPathExpr and SubPath are mutually exclusive. - type: string - required: - - mountPath - - name - type: object - type: array - x-kubernetes-list-map-keys: - - mountPath - x-kubernetes-list-type: map - workingDir: - description: |- - Container's working directory. - If not specified, the container runtime's default will be used, which - might be configured in the container image. - Cannot be updated. + If this field is not specified, it is treated as an equivalent of Disabled. + type: string + subPath: + description: |- + Path within the volume from which the container's volume should be mounted. + Defaults to "" (volume's root). + type: string + subPathExpr: + description: |- + Expanded path within the volume from which the container's volume should be mounted. + Behaves similarly to SubPath but environment variable references $(VAR_NAME) are expanded using the container's environment. + Defaults to "" (volume's root). + SubPathExpr and SubPath are mutually exclusive. + type: string + required: + - mountPath + - name + type: object + type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map + workingDir: + description: |- + Container's working directory. + If not specified, the container runtime's default will be used, which + might be configured in the container image. + Cannot be updated. + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + nodeName: + description: |- + NodeName is a request to schedule this pod onto a specific node. If it is non-empty, + the scheduler simply schedules this pod onto that node, assuming that it fits resource + requirements. + type: string + nodeSelector: + additionalProperties: type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - nodeName: - description: |- - NodeName is a request to schedule this pod onto a specific node. If it is non-empty, - the scheduler simply schedules this pod onto that node, assuming that it fits resource - requirements. - type: string - nodeSelector: - additionalProperties: - type: string - description: |- - NodeSelector is a selector which must be true for the pod to fit on a node. - Selector which must match a node's labels for the pod to be scheduled on that node. - More info: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/ - type: object - x-kubernetes-map-type: atomic - os: - description: |- - Specifies the OS of the containers in the pod. - Some pod and container fields are restricted if this is set. + description: |- + NodeSelector is a selector which must be true for the pod to fit on a node. + Selector which must match a node's labels for the pod to be scheduled on that node. + More info: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/ + type: object + x-kubernetes-map-type: atomic + os: + description: |- + Specifies the OS of the containers in the pod. + Some pod and container fields are restricted if this is set. - If the OS field is set to linux, the following fields must be unset: - -securityContext.windowsOptions + If the OS field is set to linux, the following fields must be unset: + -securityContext.windowsOptions - If the OS field is set to windows, following fields must be unset: - - spec.hostPID - - spec.hostIPC - - spec.hostUsers - - spec.securityContext.appArmorProfile - - spec.securityContext.seLinuxOptions - - spec.securityContext.seccompProfile - - spec.securityContext.fsGroup - - spec.securityContext.fsGroupChangePolicy - - spec.securityContext.sysctls - - spec.shareProcessNamespace - - spec.securityContext.runAsUser - - spec.securityContext.runAsGroup - - spec.securityContext.supplementalGroups - - spec.containers[*].securityContext.appArmorProfile - - spec.containers[*].securityContext.seLinuxOptions - - spec.containers[*].securityContext.seccompProfile - - spec.containers[*].securityContext.capabilities - - spec.containers[*].securityContext.readOnlyRootFilesystem - - spec.containers[*].securityContext.privileged - - spec.containers[*].securityContext.allowPrivilegeEscalation - - spec.containers[*].securityContext.procMount - - spec.containers[*].securityContext.runAsUser - - spec.containers[*].securityContext.runAsGroup - properties: - name: + If the OS field is set to windows, following fields must be unset: + - spec.hostPID + - spec.hostIPC + - spec.hostUsers + - spec.securityContext.appArmorProfile + - spec.securityContext.seLinuxOptions + - spec.securityContext.seccompProfile + - spec.securityContext.fsGroup + - spec.securityContext.fsGroupChangePolicy + - spec.securityContext.sysctls + - spec.shareProcessNamespace + - spec.securityContext.runAsUser + - spec.securityContext.runAsGroup + - spec.securityContext.supplementalGroups + - spec.containers[*].securityContext.appArmorProfile + - spec.containers[*].securityContext.seLinuxOptions + - spec.containers[*].securityContext.seccompProfile + - spec.containers[*].securityContext.capabilities + - spec.containers[*].securityContext.readOnlyRootFilesystem + - spec.containers[*].securityContext.privileged + - spec.containers[*].securityContext.allowPrivilegeEscalation + - spec.containers[*].securityContext.procMount + - spec.containers[*].securityContext.runAsUser + - spec.containers[*].securityContext.runAsGroup + properties: + name: + description: |- + Name is the name of the operating system. The currently supported values are linux and windows. + Additional value may be defined in future and can be one of: + https://github.com/opencontainers/runtime-spec/blob/master/config.md#platform-specific-configuration + Clients should expect to handle additional values and treat unrecognized values in this field as os: null + type: string + required: + - name + type: object + overhead: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Overhead represents the resource overhead associated with running a pod for a given RuntimeClass. + This field will be autopopulated at admission time by the RuntimeClass admission controller. If + the RuntimeClass admission controller is enabled, overhead must not be set in Pod create requests. + The RuntimeClass admission controller will reject Pod create requests which have the overhead already + set. If RuntimeClass is configured and selected in the PodSpec, Overhead will be set to the value + defined in the corresponding RuntimeClass, otherwise it will remain unset and treated as zero. + More info: https://git.k8s.io/enhancements/keps/sig-node/688-pod-overhead/README.md + type: object + preemptionPolicy: description: |- - Name is the name of the operating system. The currently supported values are linux and windows. - Additional value may be defined in future and can be one of: - https://github.com/opencontainers/runtime-spec/blob/master/config.md#platform-specific-configuration - Clients should expect to handle additional values and treat unrecognized values in this field as os: null + PreemptionPolicy is the Policy for preempting pods with lower priority. + One of Never, PreemptLowerPriority. + Defaults to PreemptLowerPriority if unset. type: string - required: - - name - type: object - overhead: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Overhead represents the resource overhead associated with running a pod for a given RuntimeClass. - This field will be autopopulated at admission time by the RuntimeClass admission controller. If - the RuntimeClass admission controller is enabled, overhead must not be set in Pod create requests. - The RuntimeClass admission controller will reject Pod create requests which have the overhead already - set. If RuntimeClass is configured and selected in the PodSpec, Overhead will be set to the value - defined in the corresponding RuntimeClass, otherwise it will remain unset and treated as zero. - More info: https://git.k8s.io/enhancements/keps/sig-node/688-pod-overhead/README.md - type: object - preemptionPolicy: - description: |- - PreemptionPolicy is the Policy for preempting pods with lower priority. - One of Never, PreemptLowerPriority. - Defaults to PreemptLowerPriority if unset. - type: string - priority: - description: |- - The priority value. Various system components use this field to find the - priority of the pod. When Priority Admission Controller is enabled, it - prevents users from setting this field. The admission controller populates - this field from PriorityClassName. - The higher the value, the higher the priority. - format: int32 - type: integer - priorityClassName: - description: |- - If specified, indicates the pod's priority. "system-node-critical" and - "system-cluster-critical" are two special keywords which indicate the - highest priorities with the former being the highest priority. Any other - name must be defined by creating a PriorityClass object with that name. - If not specified, the pod priority will be default or zero if there is no - default. - type: string - readinessGates: - description: |- - If specified, all readiness gates will be evaluated for pod readiness. - A pod is ready when all its containers are ready AND - all conditions specified in the readiness gates have status equal to "True" - More info: https://git.k8s.io/enhancements/keps/sig-network/580-pod-readiness-gates - items: - description: PodReadinessGate contains the reference to - a pod condition - properties: - conditionType: - description: ConditionType refers to a condition in - the pod's condition list with matching type. - type: string - required: - - conditionType - type: object - type: array - x-kubernetes-list-type: atomic - resourceClaims: - description: |- - ResourceClaims defines which ResourceClaims must be allocated - and reserved before the Pod is allowed to start. The resources - will be made available to those containers which consume them - by name. + priority: + description: |- + The priority value. Various system components use this field to find the + priority of the pod. When Priority Admission Controller is enabled, it + prevents users from setting this field. The admission controller populates + this field from PriorityClassName. + The higher the value, the higher the priority. + format: int32 + type: integer + priorityClassName: + description: |- + If specified, indicates the pod's priority. "system-node-critical" and + "system-cluster-critical" are two special keywords which indicate the + highest priorities with the former being the highest priority. Any other + name must be defined by creating a PriorityClass object with that name. + If not specified, the pod priority will be default or zero if there is no + default. + type: string + readinessGates: + description: |- + If specified, all readiness gates will be evaluated for pod readiness. + A pod is ready when all its containers are ready AND + all conditions specified in the readiness gates have status equal to "True" + More info: https://git.k8s.io/enhancements/keps/sig-network/580-pod-readiness-gates + items: + description: PodReadinessGate contains the reference + to a pod condition + properties: + conditionType: + description: ConditionType refers to a condition + in the pod's condition list with matching type. + type: string + required: + - conditionType + type: object + type: array + x-kubernetes-list-type: atomic + resourceClaims: + description: |- + ResourceClaims defines which ResourceClaims must be allocated + and reserved before the Pod is allowed to start. The resources + will be made available to those containers which consume them + by name. - This is an alpha field and requires enabling the - DynamicResourceAllocation feature gate. + This is an alpha field and requires enabling the + DynamicResourceAllocation feature gate. - This field is immutable. - items: - description: |- - PodResourceClaim references exactly one ResourceClaim through a ClaimSource. - It adds a name to it that uniquely identifies the ResourceClaim inside the Pod. - Containers that need access to the ResourceClaim reference it with this name. - properties: - name: + This field is immutable. + items: description: |- - Name uniquely identifies this resource claim inside the pod. - This must be a DNS_LABEL. - type: string - source: - description: Source describes where to find the ResourceClaim. + PodResourceClaim references exactly one ResourceClaim through a ClaimSource. + It adds a name to it that uniquely identifies the ResourceClaim inside the Pod. + Containers that need access to the ResourceClaim reference it with this name. properties: - resourceClaimName: + name: description: |- - ResourceClaimName is the name of a ResourceClaim object in the same - namespace as this pod. + Name uniquely identifies this resource claim inside the pod. + This must be a DNS_LABEL. type: string - resourceClaimTemplateName: - description: |- - ResourceClaimTemplateName is the name of a ResourceClaimTemplate - object in the same namespace as this pod. - - - The template will be used to create a new ResourceClaim, which will - be bound to this pod. When this pod is deleted, the ResourceClaim - will also be deleted. The pod name and resource name, along with a - generated component, will be used to form a unique name for the - ResourceClaim, which will be recorded in pod.status.resourceClaimStatuses. - - - This field is immutable and no changes will be made to the - corresponding ResourceClaim by the control plane after creating the + source: + description: Source describes where to find the ResourceClaim. - type: string - type: object - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - restartPolicy: - description: |- - Restart policy for all containers within the pod. - One of Always, OnFailure, Never. In some contexts, only a subset of those values may be permitted. - Default to Always. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#restart-policy - type: string - runtimeClassName: - description: |- - RuntimeClassName refers to a RuntimeClass object in the node.k8s.io group, which should be used - to run this pod. If no RuntimeClass resource matches the named class, the pod will not be run. - If unset or empty, the "legacy" RuntimeClass will be used, which is an implicit class with an - empty definition that uses the default runtime handler. - More info: https://git.k8s.io/enhancements/keps/sig-node/585-runtime-class - type: string - schedulerName: - description: |- - If specified, the pod will be dispatched by specified scheduler. - If not specified, the pod will be dispatched by default scheduler. - type: string - schedulingGates: - description: |- - SchedulingGates is an opaque list of values that if specified will block scheduling the pod. - If schedulingGates is not empty, the pod will stay in the SchedulingGated state and the - scheduler will not attempt to schedule the pod. - - - SchedulingGates can only be set at pod creation time, and be removed only afterwards. - items: - description: PodSchedulingGate is associated to a Pod to - guard its scheduling. - properties: - name: - description: |- - Name of the scheduling gate. - Each scheduling gate must have a unique name field. - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - securityContext: - description: |- - SecurityContext holds pod-level security attributes and common container settings. - Optional: Defaults to empty. See type description for default values of each field. - properties: - appArmorProfile: - description: |- - appArmorProfile is the AppArmor options to use by the containers in this pod. - Note that this field cannot be set when spec.os.name is windows. - properties: - localhostProfile: - description: |- - localhostProfile indicates a profile loaded on the node that should be used. - The profile must be preconfigured on the node to work. - Must match the loaded name of the profile. - Must be set if and only if type is "Localhost". - type: string - type: - description: |- - type indicates which kind of AppArmor profile will be applied. - Valid options are: - Localhost - a profile pre-loaded on the node. - RuntimeDefault - the container runtime's default profile. - Unconfined - no AppArmor enforcement. - type: string - required: - - type - type: object - fsGroup: - description: |- - A special supplemental group that applies to all containers in a pod. - Some volume types allow the Kubelet to change the ownership of that volume - to be owned by the pod: + properties: + resourceClaimName: + description: |- + ResourceClaimName is the name of a ResourceClaim object in the same + namespace as this pod. + type: string + resourceClaimTemplateName: + description: |- + ResourceClaimTemplateName is the name of a ResourceClaimTemplate + object in the same namespace as this pod. - 1. The owning GID will be the FSGroup - 2. The setgid bit is set (new files created in the volume will be owned by FSGroup) - 3. The permission bits are OR'd with rw-rw---- + The template will be used to create a new ResourceClaim, which will + be bound to this pod. When this pod is deleted, the ResourceClaim + will also be deleted. The pod name and resource name, along with a + generated component, will be used to form a unique name for the + ResourceClaim, which will be recorded in pod.status.resourceClaimStatuses. - If unset, the Kubelet will not modify the ownership and permissions of any volume. - Note that this field cannot be set when spec.os.name is windows. - format: int64 - type: integer - fsGroupChangePolicy: + This field is immutable and no changes will be made to the + corresponding ResourceClaim by the control plane after creating the + ResourceClaim. + type: string + type: object + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + restartPolicy: description: |- - fsGroupChangePolicy defines behavior of changing ownership and permission of the volume - before being exposed inside Pod. This field will only apply to - volume types which support fsGroup based ownership(and permissions). - It will have no effect on ephemeral volume types such as: secret, configmaps - and emptydir. - Valid values are "OnRootMismatch" and "Always". If not specified, "Always" is used. - Note that this field cannot be set when spec.os.name is windows. + Restart policy for all containers within the pod. + One of Always, OnFailure, Never. In some contexts, only a subset of those values may be permitted. + Default to Always. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#restart-policy type: string - runAsGroup: - description: |- - The GID to run the entrypoint of the container process. - Uses runtime default if unset. - May also be set in SecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence - for that container. - Note that this field cannot be set when spec.os.name is windows. - format: int64 - type: integer - runAsNonRoot: - description: |- - Indicates that the container must run as a non-root user. - If true, the Kubelet will validate the image at runtime to ensure that it - does not run as UID 0 (root) and fail to start the container if it does. - If unset or false, no such validation will be performed. - May also be set in SecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - type: boolean - runAsUser: + runtimeClassName: description: |- - The UID to run the entrypoint of the container process. - Defaults to user specified in image metadata if unspecified. - May also be set in SecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence - for that container. - Note that this field cannot be set when spec.os.name is windows. - format: int64 - type: integer - seLinuxOptions: + RuntimeClassName refers to a RuntimeClass object in the node.k8s.io group, which should be used + to run this pod. If no RuntimeClass resource matches the named class, the pod will not be run. + If unset or empty, the "legacy" RuntimeClass will be used, which is an implicit class with an + empty definition that uses the default runtime handler. + More info: https://git.k8s.io/enhancements/keps/sig-node/585-runtime-class + type: string + schedulerName: description: |- - The SELinux context to be applied to all containers. - If unspecified, the container runtime will allocate a random SELinux context for each - container. May also be set in SecurityContext. If set in - both SecurityContext and PodSecurityContext, the value specified in SecurityContext - takes precedence for that container. - Note that this field cannot be set when spec.os.name is windows. - properties: - level: - description: Level is SELinux level label that applies - to the container. - type: string - role: - description: Role is a SELinux role label that applies - to the container. - type: string - type: - description: Type is a SELinux type label that applies - to the container. - type: string - user: - description: User is a SELinux user label that applies - to the container. - type: string - type: object - seccompProfile: + If specified, the pod will be dispatched by specified scheduler. + If not specified, the pod will be dispatched by default scheduler. + type: string + schedulingGates: description: |- - The seccomp options to use by the containers in this pod. - Note that this field cannot be set when spec.os.name is windows. - properties: - localhostProfile: - description: |- - localhostProfile indicates a profile defined in a file on the node should be used. - The profile must be preconfigured on the node to work. - Must be a descending path, relative to the kubelet's configured seccomp profile location. - Must be set if type is "Localhost". Must NOT be set for any other type. - type: string - type: - description: |- - type indicates which kind of seccomp profile will be applied. - Valid options are: - + SchedulingGates is an opaque list of values that if specified will block scheduling the pod. + If schedulingGates is not empty, the pod will stay in the SchedulingGated state and the + scheduler will not attempt to schedule the pod. - Localhost - a profile defined in a file on the node should be used. - RuntimeDefault - the container runtime default profile should be used. - Unconfined - no profile should be applied. - type: string - required: - - type - type: object - supplementalGroups: - description: |- - A list of groups applied to the first process run in each container, in addition - to the container's primary GID, the fsGroup (if specified), and group memberships - defined in the container image for the uid of the container process. If unspecified, - no additional groups are added to any container. Note that group memberships - defined in the container image for the uid of the container process are still effective, - even if they are not included in this list. - Note that this field cannot be set when spec.os.name is windows. - items: - format: int64 - type: integer - type: array - x-kubernetes-list-type: atomic - sysctls: - description: |- - Sysctls hold a list of namespaced sysctls used for the pod. Pods with unsupported - sysctls (by the container runtime) might fail to launch. - Note that this field cannot be set when spec.os.name is windows. + + SchedulingGates can only be set at pod creation time, and be removed only afterwards. items: - description: Sysctl defines a kernel parameter to be - set + description: PodSchedulingGate is associated to a Pod + to guard its scheduling. properties: name: - description: Name of a property to set - type: string - value: - description: Value of a property to set + description: |- + Name of the scheduling gate. + Each scheduling gate must have a unique name field. type: string required: - name - - value type: object type: array - x-kubernetes-list-type: atomic - windowsOptions: + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + securityContext: description: |- - The Windows specific settings applied to all containers. - If unspecified, the options within a container's SecurityContext will be used. - If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is linux. + SecurityContext holds pod-level security attributes and common container settings. + Optional: Defaults to empty. See type description for default values of each field. properties: - gmsaCredentialSpec: - description: |- - GMSACredentialSpec is where the GMSA admission webhook - (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the - GMSA credential spec named by the GMSACredentialSpecName field. - type: string - gmsaCredentialSpecName: - description: GMSACredentialSpecName is the name of - the GMSA credential spec to use. - type: string - hostProcess: + appArmorProfile: description: |- - HostProcess determines if a container should be run as a 'Host Process' container. - All of a Pod's containers must have the same effective HostProcess value - (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). - In addition, if HostProcess is true then HostNetwork must also be set to true. - type: boolean - runAsUserName: - description: |- - The UserName in Windows to run the entrypoint of the container process. - Defaults to the user specified in image metadata if unspecified. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - type: string - type: object - type: object - serviceAccount: - description: |- - DeprecatedServiceAccount is a deprecated alias for ServiceAccountName. - Deprecated: Use serviceAccountName instead. - type: string - serviceAccountName: - description: |- - ServiceAccountName is the name of the ServiceAccount to use to run this pod. - More info: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/ - type: string - setHostnameAsFQDN: - description: |- - If true the pod's hostname will be configured as the pod's FQDN, rather than the leaf name (the default). - In Linux containers, this means setting the FQDN in the hostname field of the kernel (the nodename field of struct utsname). - In Windows containers, this means setting the registry value of hostname for the registry key HKEY_LOCAL_MACHINE\\SYSTEM\\CurrentControlSet\\Services\\Tcpip\\Parameters to FQDN. - If a pod does not have FQDN, this has no effect. - Default to false. - type: boolean - shareProcessNamespace: - description: |- - Share a single process namespace between all of the containers in a pod. - When this is set containers will be able to view and signal processes from other containers - in the same pod, and the first process in each container will not be assigned PID 1. - HostPID and ShareProcessNamespace cannot both be set. - Optional: Default to false. - type: boolean - subdomain: - description: |- - If specified, the fully qualified Pod hostname will be "...svc.". - If not specified, the pod will not have a domainname at all. - type: string - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully. May be decreased in delete request. - Value must be non-negative integer. The value zero indicates stop immediately via - the kill signal (no opportunity to shut down). - If this value is nil, the default grace period will be used instead. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - Defaults to 30 seconds. - format: int64 - type: integer - tolerations: - description: If specified, the pod's tolerations. - items: - description: |- - The pod this Toleration is attached to tolerates any taint that matches - the triple using the matching operator . - properties: - effect: - description: |- - Effect indicates the taint effect to match. Empty means match all taint effects. - When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute. - type: string - key: - description: |- - Key is the taint key that the toleration applies to. Empty means match all taint keys. - If the key is empty, operator must be Exists; this combination means to match all values and all keys. - type: string - operator: - description: |- - Operator represents a key's relationship to the value. - Valid operators are Exists and Equal. Defaults to Equal. - Exists is equivalent to wildcard for value, so that a pod can - tolerate all taints of a particular category. - type: string - tolerationSeconds: - description: |- - TolerationSeconds represents the period of time the toleration (which must be - of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default, - it is not set, which means tolerate the taint forever (do not evict). Zero and - negative values will be treated as 0 (evict immediately) by the system. - format: int64 - type: integer - value: - description: |- - Value is the taint value the toleration matches to. - If the operator is Exists, the value should be empty, otherwise just a regular string. - type: string - type: object - type: array - x-kubernetes-list-type: atomic - topologySpreadConstraints: - description: |- - TopologySpreadConstraints describes how a group of pods ought to spread across topology - domains. Scheduler will schedule pods in a way which abides by the constraints. - All topologySpreadConstraints are ANDed. - items: - description: TopologySpreadConstraint specifies how to spread - matching pods among the given topology. - properties: - labelSelector: - description: |- - LabelSelector is used to find matching pods. - Pods that match this label selector are counted to determine the number of pods - in their corresponding topology domain. - properties: - matchExpressions: - description: matchExpressions is a list of label - selector requirements. The requirements are ANDed. - items: + appArmorProfile is the AppArmor options to use by the containers in this pod. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label key that the - selector applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: + localhostProfile indicates a profile loaded on the node that should be used. + The profile must be preconfigured on the node to work. + Must match the loaded name of the profile. + Must be set if and only if type is "Localhost". type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - matchLabelKeys: - description: |- - MatchLabelKeys is a set of pod label keys to select the pods over which - spreading will be calculated. The keys are used to lookup values from the - incoming pod labels, those key-value labels are ANDed with labelSelector - to select the group of existing pods over which spreading will be calculated - for the incoming pod. The same key is forbidden to exist in both MatchLabelKeys and LabelSelector. - MatchLabelKeys cannot be set when LabelSelector isn't set. - Keys that don't exist in the incoming pod labels will - be ignored. A null or empty list means only match against labelSelector. - - - This is a beta field and requires the MatchLabelKeysInPodTopologySpread feature gate to be enabled (enabled by default). - items: - type: string - type: array - x-kubernetes-list-type: atomic - maxSkew: - description: |- - MaxSkew describes the degree to which pods may be unevenly distributed. - When `whenUnsatisfiable=DoNotSchedule`, it is the maximum permitted difference - between the number of matching pods in the target topology and the global minimum. - The global minimum is the minimum number of matching pods in an eligible domain - or zero if the number of eligible domains is less than MinDomains. - For example, in a 3-zone cluster, MaxSkew is set to 1, and pods with the same - labelSelector spread as 2/2/1: - In this case, the global minimum is 1. - | zone1 | zone2 | zone3 | - | P P | P P | P | - - if MaxSkew is 1, incoming pod can only be scheduled to zone3 to become 2/2/2; - scheduling it onto zone1(zone2) would make the ActualSkew(3-1) on zone1(zone2) - violate MaxSkew(1). - - if MaxSkew is 2, incoming pod can be scheduled onto any zone. - When `whenUnsatisfiable=ScheduleAnyway`, it is used to give higher precedence - to topologies that satisfy it. - It's a required field. Default value is 1 and 0 is not allowed. - format: int32 - type: integer - minDomains: - description: |- - MinDomains indicates a minimum number of eligible domains. - When the number of eligible domains with matching topology keys is less than minDomains, - Pod Topology Spread treats "global minimum" as 0, and then the calculation of Skew is performed. - And when the number of eligible domains with matching topology keys equals or greater than minDomains, - this value has no effect on scheduling. - As a result, when the number of eligible domains is less than minDomains, - scheduler won't schedule more than maxSkew Pods to those domains. - If value is nil, the constraint behaves as if MinDomains is equal to 1. - Valid values are integers greater than 0. - When value is not nil, WhenUnsatisfiable must be DoNotSchedule. + type: + description: |- + type indicates which kind of AppArmor profile will be applied. + Valid options are: + Localhost - a profile pre-loaded on the node. + RuntimeDefault - the container runtime's default profile. + Unconfined - no AppArmor enforcement. + type: string + required: + - type + type: object + fsGroup: + description: |- + A special supplemental group that applies to all containers in a pod. + Some volume types allow the Kubelet to change the ownership of that volume + to be owned by the pod: - For example, in a 3-zone cluster, MaxSkew is set to 2, MinDomains is set to 5 and pods with the same - labelSelector spread as 2/2/2: - | zone1 | zone2 | zone3 | - | P P | P P | P P | - The number of domains is less than 5(MinDomains), so "global minimum" is treated as 0. - In this situation, new pod with the same labelSelector cannot be scheduled, - because computed skew will be 3(3 - 0) if new Pod is scheduled to any of the three zones, - it will violate MaxSkew. - format: int32 - type: integer - nodeAffinityPolicy: - description: |- - NodeAffinityPolicy indicates how we will treat Pod's nodeAffinity/nodeSelector - when calculating pod topology spread skew. Options are: - - Honor: only nodes matching nodeAffinity/nodeSelector are included in the calculations. - - Ignore: nodeAffinity/nodeSelector are ignored. All nodes are included in the calculations. + 1. The owning GID will be the FSGroup + 2. The setgid bit is set (new files created in the volume will be owned by FSGroup) + 3. The permission bits are OR'd with rw-rw---- - If this value is nil, the behavior is equivalent to the Honor policy. - This is a beta-level feature default enabled by the NodeInclusionPolicyInPodTopologySpread feature flag. - type: string - nodeTaintsPolicy: - description: |- - NodeTaintsPolicy indicates how we will treat node taints when calculating - pod topology spread skew. Options are: - - Honor: nodes without taints, along with tainted nodes for which the incoming pod - has a toleration, are included. - - Ignore: node taints are ignored. All nodes are included. + If unset, the Kubelet will not modify the ownership and permissions of any volume. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + fsGroupChangePolicy: + description: |- + fsGroupChangePolicy defines behavior of changing ownership and permission of the volume + before being exposed inside Pod. This field will only apply to + volume types which support fsGroup based ownership(and permissions). + It will have no effect on ephemeral volume types such as: secret, configmaps + and emptydir. + Valid values are "OnRootMismatch" and "Always". If not specified, "Always" is used. + Note that this field cannot be set when spec.os.name is windows. + type: string + runAsGroup: + description: |- + The GID to run the entrypoint of the container process. + Uses runtime default if unset. + May also be set in SecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence + for that container. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + runAsNonRoot: + description: |- + Indicates that the container must run as a non-root user. + If true, the Kubelet will validate the image at runtime to ensure that it + does not run as UID 0 (root) and fail to start the container if it does. + If unset or false, no such validation will be performed. + May also be set in SecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: boolean + runAsUser: + description: |- + The UID to run the entrypoint of the container process. + Defaults to user specified in image metadata if unspecified. + May also be set in SecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence + for that container. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + seLinuxOptions: + description: |- + The SELinux context to be applied to all containers. + If unspecified, the container runtime will allocate a random SELinux context for each + container. May also be set in SecurityContext. If set in + both SecurityContext and PodSecurityContext, the value specified in SecurityContext + takes precedence for that container. + Note that this field cannot be set when spec.os.name is windows. + properties: + level: + description: Level is SELinux level label that + applies to the container. + type: string + role: + description: Role is a SELinux role label that + applies to the container. + type: string + type: + description: Type is a SELinux type label that + applies to the container. + type: string + user: + description: User is a SELinux user label that + applies to the container. + type: string + type: object + seccompProfile: + description: |- + The seccomp options to use by the containers in this pod. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile defined in a file on the node should be used. + The profile must be preconfigured on the node to work. + Must be a descending path, relative to the kubelet's configured seccomp profile location. + Must be set if type is "Localhost". Must NOT be set for any other type. + type: string + type: + description: |- + type indicates which kind of seccomp profile will be applied. + Valid options are: - If this value is nil, the behavior is equivalent to the Ignore policy. - This is a beta-level feature default enabled by the NodeInclusionPolicyInPodTopologySpread feature flag. - type: string - topologyKey: - description: |- - TopologyKey is the key of node labels. Nodes that have a label with this key - and identical values are considered to be in the same topology. - We consider each as a "bucket", and try to put balanced number - of pods into each bucket. - We define a domain as a particular instance of a topology. - Also, we define an eligible domain as a domain whose nodes meet the requirements of - nodeAffinityPolicy and nodeTaintsPolicy. - e.g. If TopologyKey is "kubernetes.io/hostname", each Node is a domain of that topology. - And, if TopologyKey is "topology.kubernetes.io/zone", each zone is a domain of that topology. - It's a required field. - type: string - whenUnsatisfiable: - description: |- - WhenUnsatisfiable indicates how to deal with a pod if it doesn't satisfy - the spread constraint. - - DoNotSchedule (default) tells the scheduler not to schedule it. - - ScheduleAnyway tells the scheduler to schedule the pod in any location, - but giving higher precedence to topologies that would help reduce the - skew. - A constraint is considered "Unsatisfiable" for an incoming pod - if and only if every possible node assignment for that pod would violate - "MaxSkew" on some topology. - For example, in a 3-zone cluster, MaxSkew is set to 1, and pods with the same - labelSelector spread as 3/1/1: - | zone1 | zone2 | zone3 | - | P P P | P | P | - If WhenUnsatisfiable is set to DoNotSchedule, incoming pod can only be scheduled - to zone2(zone3) to become 3/2/1(3/1/2) as ActualSkew(2-1) on zone2(zone3) satisfies - MaxSkew(1). In other words, the cluster can still be imbalanced, but scheduler - won't make it *more* imbalanced. - It's a required field. - type: string - required: - - maxSkew - - topologyKey - - whenUnsatisfiable - type: object - type: array - x-kubernetes-list-map-keys: - - topologyKey - - whenUnsatisfiable - x-kubernetes-list-type: map - volumes: - description: |- - List of volumes that can be mounted by containers belonging to the pod. - More info: https://kubernetes.io/docs/concepts/storage/volumes - items: - description: Volume represents a named volume in a pod that - may be accessed by any container in the pod. - properties: - awsElasticBlockStore: + Localhost - a profile defined in a file on the node should be used. + RuntimeDefault - the container runtime default profile should be used. + Unconfined - no profile should be applied. + type: string + required: + - type + type: object + supplementalGroups: + description: |- + A list of groups applied to the first process run in each container, in addition + to the container's primary GID, the fsGroup (if specified), and group memberships + defined in the container image for the uid of the container process. If unspecified, + no additional groups are added to any container. Note that group memberships + defined in the container image for the uid of the container process are still effective, + even if they are not included in this list. + Note that this field cannot be set when spec.os.name is windows. + items: + format: int64 + type: integer + type: array + x-kubernetes-list-type: atomic + sysctls: + description: |- + Sysctls hold a list of namespaced sysctls used for the pod. Pods with unsupported + sysctls (by the container runtime) might fail to launch. + Note that this field cannot be set when spec.os.name is windows. + items: + description: Sysctl defines a kernel parameter to + be set + properties: + name: + description: Name of a property to set + type: string + value: + description: Value of a property to set + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + windowsOptions: + description: |- + The Windows specific settings applied to all containers. + If unspecified, the options within a container's SecurityContext will be used. + If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is linux. + properties: + gmsaCredentialSpec: + description: |- + GMSACredentialSpec is where the GMSA admission webhook + (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the + GMSA credential spec named by the GMSACredentialSpecName field. + type: string + gmsaCredentialSpecName: + description: GMSACredentialSpecName is the name + of the GMSA credential spec to use. + type: string + hostProcess: + description: |- + HostProcess determines if a container should be run as a 'Host Process' container. + All of a Pod's containers must have the same effective HostProcess value + (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). + In addition, if HostProcess is true then HostNetwork must also be set to true. + type: boolean + runAsUserName: + description: |- + The UserName in Windows to run the entrypoint of the container process. + Defaults to the user specified in image metadata if unspecified. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: string + type: object + type: object + serviceAccount: + description: |- + DeprecatedServiceAccount is a deprecated alias for ServiceAccountName. + Deprecated: Use serviceAccountName instead. + type: string + serviceAccountName: + description: |- + ServiceAccountName is the name of the ServiceAccount to use to run this pod. + More info: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/ + type: string + setHostnameAsFQDN: + description: |- + If true the pod's hostname will be configured as the pod's FQDN, rather than the leaf name (the default). + In Linux containers, this means setting the FQDN in the hostname field of the kernel (the nodename field of struct utsname). + In Windows containers, this means setting the registry value of hostname for the registry key HKEY_LOCAL_MACHINE\\SYSTEM\\CurrentControlSet\\Services\\Tcpip\\Parameters to FQDN. + If a pod does not have FQDN, this has no effect. + Default to false. + type: boolean + shareProcessNamespace: + description: |- + Share a single process namespace between all of the containers in a pod. + When this is set containers will be able to view and signal processes from other containers + in the same pod, and the first process in each container will not be assigned PID 1. + HostPID and ShareProcessNamespace cannot both be set. + Optional: Default to false. + type: boolean + subdomain: + description: |- + If specified, the fully qualified Pod hostname will be "...svc.". + If not specified, the pod will not have a domainname at all. + type: string + terminationGracePeriodSeconds: + description: |- + Optional duration in seconds the pod needs to terminate gracefully. May be decreased in delete request. + Value must be non-negative integer. The value zero indicates stop immediately via + the kill signal (no opportunity to shut down). + If this value is nil, the default grace period will be used instead. + The grace period is the duration in seconds after the processes running in the pod are sent + a termination signal and the time when the processes are forcibly halted with a kill signal. + Set this value longer than the expected cleanup time for your process. + Defaults to 30 seconds. + format: int64 + type: integer + tolerations: + description: If specified, the pod's tolerations. + items: description: |- - awsElasticBlockStore represents an AWS Disk resource that is attached to a - kubelet's host machine and then exposed to the pod. - More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore + The pod this Toleration is attached to tolerates any taint that matches + the triple using the matching operator . properties: - fsType: + effect: description: |- - fsType is the filesystem type of the volume that you want to mount. - Tip: Ensure that the filesystem type is supported by the host operating system. - Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore - TODO: how do we prevent errors in the filesystem from compromising the machine + Effect indicates the taint effect to match. Empty means match all taint effects. + When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute. type: string - partition: + key: description: |- - partition is the partition in the volume that you want to mount. - If omitted, the default is to mount by volume name. - Examples: For volume /dev/sda1, you specify the partition as "1". - Similarly, the volume partition for /dev/sda is "0" (or you can leave the property empty). - format: int32 - type: integer - readOnly: - description: |- - readOnly value true will force the readOnly setting in VolumeMounts. - More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore - type: boolean - volumeID: - description: |- - volumeID is unique ID of the persistent disk resource in AWS (Amazon EBS volume). - More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore - type: string - required: - - volumeID - type: object - azureDisk: - description: azureDisk represents an Azure Data Disk - mount on the host and bind mount to the pod. - properties: - cachingMode: - description: 'cachingMode is the Host Caching mode: - None, Read Only, Read Write.' - type: string - diskName: - description: diskName is the Name of the data disk - in the blob storage + Key is the taint key that the toleration applies to. Empty means match all taint keys. + If the key is empty, operator must be Exists; this combination means to match all values and all keys. type: string - diskURI: - description: diskURI is the URI of data disk in - the blob storage - type: string - fsType: + operator: description: |- - fsType is Filesystem type to mount. - Must be a filesystem type supported by the host operating system. - Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - type: string - kind: - description: 'kind expected values are Shared: multiple - blob disks per storage account Dedicated: single - blob disk per storage account Managed: azure - managed data disk (only in managed availability - set). defaults to shared' + Operator represents a key's relationship to the value. + Valid operators are Exists and Equal. Defaults to Equal. + Exists is equivalent to wildcard for value, so that a pod can + tolerate all taints of a particular category. type: string - readOnly: + tolerationSeconds: description: |- - readOnly Defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - type: boolean - required: - - diskName - - diskURI - type: object - azureFile: - description: azureFile represents an Azure File Service - mount on the host and bind mount to the pod. - properties: - readOnly: + TolerationSeconds represents the period of time the toleration (which must be + of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default, + it is not set, which means tolerate the taint forever (do not evict). Zero and + negative values will be treated as 0 (evict immediately) by the system. + format: int64 + type: integer + value: description: |- - readOnly defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - type: boolean - secretName: - description: secretName is the name of secret that - contains Azure Storage Account Name and Key + Value is the taint value the toleration matches to. + If the operator is Exists, the value should be empty, otherwise just a regular string. type: string - shareName: - description: shareName is the azure share Name - type: string - required: - - secretName - - shareName type: object - cephfs: - description: cephFS represents a Ceph FS mount on the - host that shares a pod's lifetime + type: array + x-kubernetes-list-type: atomic + topologySpreadConstraints: + description: |- + TopologySpreadConstraints describes how a group of pods ought to spread across topology + domains. Scheduler will schedule pods in a way which abides by the constraints. + All topologySpreadConstraints are ANDed. + items: + description: TopologySpreadConstraint specifies how + to spread matching pods among the given topology. properties: - monitors: + labelSelector: description: |- - monitors is Required: Monitors is a collection of Ceph monitors - More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it + LabelSelector is used to find matching pods. + Pods that match this label selector are counted to determine the number of pods + in their corresponding topology domain. + properties: + matchExpressions: + description: matchExpressions is a list of label + selector requirements. The requirements are + ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that + the selector applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select the pods over which + spreading will be calculated. The keys are used to lookup values from the + incoming pod labels, those key-value labels are ANDed with labelSelector + to select the group of existing pods over which spreading will be calculated + for the incoming pod. The same key is forbidden to exist in both MatchLabelKeys and LabelSelector. + MatchLabelKeys cannot be set when LabelSelector isn't set. + Keys that don't exist in the incoming pod labels will + be ignored. A null or empty list means only match against labelSelector. + + + This is a beta field and requires the MatchLabelKeysInPodTopologySpread feature gate to be enabled (enabled by default). items: type: string type: array x-kubernetes-list-type: atomic - path: - description: 'path is Optional: Used as the mounted - root, rather than the full Ceph tree, default - is /' - type: string - readOnly: + maxSkew: + description: |- + MaxSkew describes the degree to which pods may be unevenly distributed. + When `whenUnsatisfiable=DoNotSchedule`, it is the maximum permitted difference + between the number of matching pods in the target topology and the global minimum. + The global minimum is the minimum number of matching pods in an eligible domain + or zero if the number of eligible domains is less than MinDomains. + For example, in a 3-zone cluster, MaxSkew is set to 1, and pods with the same + labelSelector spread as 2/2/1: + In this case, the global minimum is 1. + | zone1 | zone2 | zone3 | + | P P | P P | P | + - if MaxSkew is 1, incoming pod can only be scheduled to zone3 to become 2/2/2; + scheduling it onto zone1(zone2) would make the ActualSkew(3-1) on zone1(zone2) + violate MaxSkew(1). + - if MaxSkew is 2, incoming pod can be scheduled onto any zone. + When `whenUnsatisfiable=ScheduleAnyway`, it is used to give higher precedence + to topologies that satisfy it. + It's a required field. Default value is 1 and 0 is not allowed. + format: int32 + type: integer + minDomains: + description: |- + MinDomains indicates a minimum number of eligible domains. + When the number of eligible domains with matching topology keys is less than minDomains, + Pod Topology Spread treats "global minimum" as 0, and then the calculation of Skew is performed. + And when the number of eligible domains with matching topology keys equals or greater than minDomains, + this value has no effect on scheduling. + As a result, when the number of eligible domains is less than minDomains, + scheduler won't schedule more than maxSkew Pods to those domains. + If value is nil, the constraint behaves as if MinDomains is equal to 1. + Valid values are integers greater than 0. + When value is not nil, WhenUnsatisfiable must be DoNotSchedule. + + + For example, in a 3-zone cluster, MaxSkew is set to 2, MinDomains is set to 5 and pods with the same + labelSelector spread as 2/2/2: + | zone1 | zone2 | zone3 | + | P P | P P | P P | + The number of domains is less than 5(MinDomains), so "global minimum" is treated as 0. + In this situation, new pod with the same labelSelector cannot be scheduled, + because computed skew will be 3(3 - 0) if new Pod is scheduled to any of the three zones, + it will violate MaxSkew. + format: int32 + type: integer + nodeAffinityPolicy: description: |- - readOnly is Optional: Defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it - type: boolean - secretFile: + NodeAffinityPolicy indicates how we will treat Pod's nodeAffinity/nodeSelector + when calculating pod topology spread skew. Options are: + - Honor: only nodes matching nodeAffinity/nodeSelector are included in the calculations. + - Ignore: nodeAffinity/nodeSelector are ignored. All nodes are included in the calculations. + + + If this value is nil, the behavior is equivalent to the Honor policy. + This is a beta-level feature default enabled by the NodeInclusionPolicyInPodTopologySpread feature flag. + type: string + nodeTaintsPolicy: description: |- - secretFile is Optional: SecretFile is the path to key ring for User, default is /etc/ceph/user.secret - More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it + NodeTaintsPolicy indicates how we will treat node taints when calculating + pod topology spread skew. Options are: + - Honor: nodes without taints, along with tainted nodes for which the incoming pod + has a toleration, are included. + - Ignore: node taints are ignored. All nodes are included. + + + If this value is nil, the behavior is equivalent to the Ignore policy. + This is a beta-level feature default enabled by the NodeInclusionPolicyInPodTopologySpread feature flag. + type: string + topologyKey: + description: |- + TopologyKey is the key of node labels. Nodes that have a label with this key + and identical values are considered to be in the same topology. + We consider each as a "bucket", and try to put balanced number + of pods into each bucket. + We define a domain as a particular instance of a topology. + Also, we define an eligible domain as a domain whose nodes meet the requirements of + nodeAffinityPolicy and nodeTaintsPolicy. + e.g. If TopologyKey is "kubernetes.io/hostname", each Node is a domain of that topology. + And, if TopologyKey is "topology.kubernetes.io/zone", each zone is a domain of that topology. + It's a required field. + type: string + whenUnsatisfiable: + description: |- + WhenUnsatisfiable indicates how to deal with a pod if it doesn't satisfy + the spread constraint. + - DoNotSchedule (default) tells the scheduler not to schedule it. + - ScheduleAnyway tells the scheduler to schedule the pod in any location, + but giving higher precedence to topologies that would help reduce the + skew. + A constraint is considered "Unsatisfiable" for an incoming pod + if and only if every possible node assignment for that pod would violate + "MaxSkew" on some topology. + For example, in a 3-zone cluster, MaxSkew is set to 1, and pods with the same + labelSelector spread as 3/1/1: + | zone1 | zone2 | zone3 | + | P P P | P | P | + If WhenUnsatisfiable is set to DoNotSchedule, incoming pod can only be scheduled + to zone2(zone3) to become 3/2/1(3/1/2) as ActualSkew(2-1) on zone2(zone3) satisfies + MaxSkew(1). In other words, the cluster can still be imbalanced, but scheduler + won't make it *more* imbalanced. + It's a required field. type: string - secretRef: + required: + - maxSkew + - topologyKey + - whenUnsatisfiable + type: object + type: array + x-kubernetes-list-map-keys: + - topologyKey + - whenUnsatisfiable + x-kubernetes-list-type: map + volumes: + description: |- + List of volumes that can be mounted by containers belonging to the pod. + More info: https://kubernetes.io/docs/concepts/storage/volumes + items: + description: Volume represents a named volume in a pod + that may be accessed by any container in the pod. + properties: + awsElasticBlockStore: description: |- - secretRef is Optional: SecretRef is reference to the authentication secret for User, default is empty. - More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it + awsElasticBlockStore represents an AWS Disk resource that is attached to a + kubelet's host machine and then exposed to the pod. + More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore properties: - name: - default: "" + fsType: + description: |- + fsType is the filesystem type of the volume that you want to mount. + Tip: Ensure that the filesystem type is supported by the host operating system. + Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. + More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore + TODO: how do we prevent errors in the filesystem from compromising the machine + type: string + partition: + description: |- + partition is the partition in the volume that you want to mount. + If omitted, the default is to mount by volume name. + Examples: For volume /dev/sda1, you specify the partition as "1". + Similarly, the volume partition for /dev/sda is "0" (or you can leave the property empty). + format: int32 + type: integer + readOnly: + description: |- + readOnly value true will force the readOnly setting in VolumeMounts. + More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore + type: boolean + volumeID: + description: |- + volumeID is unique ID of the persistent disk resource in AWS (Amazon EBS volume). + More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore + type: string + required: + - volumeID + type: object + azureDisk: + description: azureDisk represents an Azure Data + Disk mount on the host and bind mount to the pod. + properties: + cachingMode: + description: 'cachingMode is the Host Caching + mode: None, Read Only, Read Write.' + type: string + diskName: + description: diskName is the Name of the data + disk in the blob storage + type: string + diskURI: + description: diskURI is the URI of data disk + in the blob storage + type: string + fsType: + description: |- + fsType is Filesystem type to mount. + Must be a filesystem type supported by the host operating system. + Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. + type: string + kind: + description: 'kind expected values are Shared: + multiple blob disks per storage account Dedicated: + single blob disk per storage account Managed: + azure managed data disk (only in managed availability + set). defaults to shared' + type: string + readOnly: + description: |- + readOnly Defaults to false (read/write). ReadOnly here will force + the ReadOnly setting in VolumeMounts. + type: boolean + required: + - diskName + - diskURI + type: object + azureFile: + description: azureFile represents an Azure File + Service mount on the host and bind mount to the + pod. + properties: + readOnly: + description: |- + readOnly defaults to false (read/write). ReadOnly here will force + the ReadOnly setting in VolumeMounts. + type: boolean + secretName: + description: secretName is the name of secret + that contains Azure Storage Account Name and + Key + type: string + shareName: + description: shareName is the azure share Name + type: string + required: + - secretName + - shareName + type: object + cephfs: + description: cephFS represents a Ceph FS mount on + the host that shares a pod's lifetime + properties: + monitors: + description: |- + monitors is Required: Monitors is a collection of Ceph monitors + More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it + items: + type: string + type: array + x-kubernetes-list-type: atomic + path: + description: 'path is Optional: Used as the + mounted root, rather than the full Ceph tree, + default is /' + type: string + readOnly: + description: |- + readOnly is Optional: Defaults to false (read/write). ReadOnly here will force + the ReadOnly setting in VolumeMounts. + More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it + type: boolean + secretFile: + description: |- + secretFile is Optional: SecretFile is the path to key ring for User, default is /etc/ceph/user.secret + More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it + type: string + secretRef: + description: |- + secretRef is Optional: SecretRef is reference to the authentication secret for User, default is empty. + More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + TODO: Add other useful fields. apiVersion, kind, uid? + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. + type: string + type: object + x-kubernetes-map-type: atomic + user: description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - TODO: Add other useful fields. apiVersion, kind, uid? - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. + user is optional: User is the rados user name, default is admin + More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it type: string + required: + - monitors type: object - x-kubernetes-map-type: atomic - user: - description: |- - user is optional: User is the rados user name, default is admin - More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it - type: string - required: - - monitors - type: object - cinder: - description: |- - cinder represents a cinder volume attached and mounted on kubelets host machine. - More info: https://examples.k8s.io/mysql-cinder-pd/README.md - properties: - fsType: + cinder: description: |- - fsType is the filesystem type to mount. - Must be a filesystem type supported by the host operating system. - Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. + cinder represents a cinder volume attached and mounted on kubelets host machine. More info: https://examples.k8s.io/mysql-cinder-pd/README.md - type: string - readOnly: - description: |- - readOnly defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - More info: https://examples.k8s.io/mysql-cinder-pd/README.md - type: boolean - secretRef: - description: |- - secretRef is optional: points to a secret object containing parameters used to connect - to OpenStack. properties: - name: - default: "" + fsType: description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - TODO: Add other useful fields. apiVersion, kind, uid? - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. + fsType is the filesystem type to mount. + Must be a filesystem type supported by the host operating system. + Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. + More info: https://examples.k8s.io/mysql-cinder-pd/README.md + type: string + readOnly: + description: |- + readOnly defaults to false (read/write). ReadOnly here will force + the ReadOnly setting in VolumeMounts. + More info: https://examples.k8s.io/mysql-cinder-pd/README.md + type: boolean + secretRef: + description: |- + secretRef is optional: points to a secret object containing parameters used to connect + to OpenStack. + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + TODO: Add other useful fields. apiVersion, kind, uid? + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. + type: string + type: object + x-kubernetes-map-type: atomic + volumeID: + description: |- + volumeID used to identify the volume in cinder. + More info: https://examples.k8s.io/mysql-cinder-pd/README.md type: string + required: + - volumeID type: object - x-kubernetes-map-type: atomic - volumeID: - description: |- - volumeID used to identify the volume in cinder. - More info: https://examples.k8s.io/mysql-cinder-pd/README.md - type: string - required: - - volumeID - type: object - configMap: - description: configMap represents a configMap that should - populate this volume - properties: - defaultMode: - description: |- - defaultMode is optional: mode bits used to set permissions on created files by default. - Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - Defaults to 0644. - Directories within the path are not affected by this setting. - This might be in conflict with other options that affect the file - mode, like fsGroup, and the result can be other mode bits set. - format: int32 - type: integer - items: - description: |- - items if unspecified, each key-value pair in the Data field of the referenced - ConfigMap will be projected into the volume as a file whose name is the - key and content is the value. If specified, the listed keys will be - projected into the specified paths, and unlisted keys will not be - present. If a key is specified which is not present in the ConfigMap, - the volume setup will error unless it is marked optional. Paths must be - relative and may not contain the '..' path or start with '..'. - items: - description: Maps a string key to a path within - a volume. - properties: - key: - description: key is the key to project. - type: string - mode: - description: |- - mode is Optional: mode bits used to set permissions on this file. - Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - If not specified, the volume defaultMode will be used. - This might be in conflict with other options that affect the file - mode, like fsGroup, and the result can be other mode bits set. - format: int32 - type: integer - path: - description: |- - path is the relative path of the file to map the key to. - May not be an absolute path. - May not contain the path element '..'. - May not start with the string '..'. - type: string - required: - - key - - path - type: object - type: array - x-kubernetes-list-type: atomic - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - TODO: Add other useful fields. apiVersion, kind, uid? - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. - type: string - optional: - description: optional specify whether the ConfigMap - or its keys must be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - csi: - description: csi (Container Storage Interface) represents - ephemeral storage that is handled by certain external - CSI drivers (Beta feature). - properties: - driver: - description: |- - driver is the name of the CSI driver that handles this volume. - Consult with your admin for the correct name as registered in the cluster. - type: string - fsType: - description: |- - fsType to mount. Ex. "ext4", "xfs", "ntfs". - If not provided, the empty value is passed to the associated CSI driver - which will determine the default filesystem to apply. - type: string - nodePublishSecretRef: - description: |- - nodePublishSecretRef is a reference to the secret object containing - sensitive information to pass to the CSI driver to complete the CSI - NodePublishVolume and NodeUnpublishVolume calls. - This field is optional, and may be empty if no secret is required. If the - secret object contains more than one secret, all secret references are passed. + configMap: + description: configMap represents a configMap that + should populate this volume properties: + defaultMode: + description: |- + defaultMode is optional: mode bits used to set permissions on created files by default. + Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. + YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. + Defaults to 0644. + Directories within the path are not affected by this setting. + This might be in conflict with other options that affect the file + mode, like fsGroup, and the result can be other mode bits set. + format: int32 + type: integer + items: + description: |- + items if unspecified, each key-value pair in the Data field of the referenced + ConfigMap will be projected into the volume as a file whose name is the + key and content is the value. If specified, the listed keys will be + projected into the specified paths, and unlisted keys will not be + present. If a key is specified which is not present in the ConfigMap, + the volume setup will error unless it is marked optional. Paths must be + relative and may not contain the '..' path or start with '..'. + items: + description: Maps a string key to a path within + a volume. + properties: + key: + description: key is the key to project. + type: string + mode: + description: |- + mode is Optional: mode bits used to set permissions on this file. + Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. + YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. + If not specified, the volume defaultMode will be used. + This might be in conflict with other options that affect the file + mode, like fsGroup, and the result can be other mode bits set. + format: int32 + type: integer + path: + description: |- + path is the relative path of the file to map the key to. + May not be an absolute path. + May not contain the path element '..'. + May not start with the string '..'. + type: string + required: + - key + - path + type: object + type: array + x-kubernetes-list-type: atomic name: default: "" description: |- @@ -6508,1490 +6516,1562 @@ spec: More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. type: string + optional: + description: optional specify whether the ConfigMap + or its keys must be defined + type: boolean type: object x-kubernetes-map-type: atomic - readOnly: - description: |- - readOnly specifies a read-only configuration for the volume. - Defaults to false (read/write). - type: boolean - volumeAttributes: - additionalProperties: - type: string - description: |- - volumeAttributes stores driver-specific properties that are passed to the CSI - driver. Consult your driver's documentation for supported values. - type: object - required: - - driver - type: object - downwardAPI: - description: downwardAPI represents downward API about - the pod that should populate this volume - properties: - defaultMode: - description: |- - Optional: mode bits to use on created files by default. Must be a - Optional: mode bits used to set permissions on created files by default. - Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - Defaults to 0644. - Directories within the path are not affected by this setting. - This might be in conflict with other options that affect the file - mode, like fsGroup, and the result can be other mode bits set. - format: int32 - type: integer - items: - description: Items is a list of downward API volume - file - items: - description: DownwardAPIVolumeFile represents - information to create the file containing the - pod field - properties: - fieldRef: - description: 'Required: Selects a field of - the pod: only annotations, labels, name, - namespace and uid are supported.' - properties: - apiVersion: - description: Version of the schema the - FieldPath is written in terms of, defaults - to "v1". - type: string - fieldPath: - description: Path of the field to select - in the specified API version. - type: string - required: - - fieldPath - type: object - x-kubernetes-map-type: atomic - mode: - description: |- - Optional: mode bits used to set permissions on this file, must be an octal value - between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - If not specified, the volume defaultMode will be used. - This might be in conflict with other options that affect the file - mode, like fsGroup, and the result can be other mode bits set. - format: int32 - type: integer - path: - description: 'Required: Path is the relative - path name of the file to be created. Must - not be absolute or contain the ''..'' path. - Must be utf-8 encoded. The first item of - the relative path must not start with ''..''' + csi: + description: csi (Container Storage Interface) represents + ephemeral storage that is handled by certain external + CSI drivers (Beta feature). + properties: + driver: + description: |- + driver is the name of the CSI driver that handles this volume. + Consult with your admin for the correct name as registered in the cluster. + type: string + fsType: + description: |- + fsType to mount. Ex. "ext4", "xfs", "ntfs". + If not provided, the empty value is passed to the associated CSI driver + which will determine the default filesystem to apply. + type: string + nodePublishSecretRef: + description: |- + nodePublishSecretRef is a reference to the secret object containing + sensitive information to pass to the CSI driver to complete the CSI + NodePublishVolume and NodeUnpublishVolume calls. + This field is optional, and may be empty if no secret is required. If the + secret object contains more than one secret, all secret references are passed. + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + TODO: Add other useful fields. apiVersion, kind, uid? + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. + type: string + type: object + x-kubernetes-map-type: atomic + readOnly: + description: |- + readOnly specifies a read-only configuration for the volume. + Defaults to false (read/write). + type: boolean + volumeAttributes: + additionalProperties: type: string - resourceFieldRef: - description: |- - Selects a resource of the container: only resources limits and requests - (limits.cpu, limits.memory, requests.cpu and requests.memory) are currently supported. + description: |- + volumeAttributes stores driver-specific properties that are passed to the CSI + driver. Consult your driver's documentation for supported values. + type: object + required: + - driver + type: object + downwardAPI: + description: downwardAPI represents downward API + about the pod that should populate this volume + properties: + defaultMode: + description: |- + Optional: mode bits to use on created files by default. Must be a + Optional: mode bits used to set permissions on created files by default. + Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. + YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. + Defaults to 0644. + Directories within the path are not affected by this setting. + This might be in conflict with other options that affect the file + mode, like fsGroup, and the result can be other mode bits set. + format: int32 + type: integer + items: + description: Items is a list of downward API + volume file + items: + description: DownwardAPIVolumeFile represents + information to create the file containing + the pod field properties: - containerName: - description: 'Container name: required - for volumes, optional for env vars' - type: string - divisor: - anyOf: - - type: integer - - type: string - description: Specifies the output format - of the exposed resources, defaults to - "1" - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - resource: - description: 'Required: resource to select' + fieldRef: + description: 'Required: Selects a field + of the pod: only annotations, labels, + name, namespace and uid are supported.' + properties: + apiVersion: + description: Version of the schema + the FieldPath is written in terms + of, defaults to "v1". + type: string + fieldPath: + description: Path of the field to + select in the specified API version. + type: string + required: + - fieldPath + type: object + x-kubernetes-map-type: atomic + mode: + description: |- + Optional: mode bits used to set permissions on this file, must be an octal value + between 0000 and 0777 or a decimal value between 0 and 511. + YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. + If not specified, the volume defaultMode will be used. + This might be in conflict with other options that affect the file + mode, like fsGroup, and the result can be other mode bits set. + format: int32 + type: integer + path: + description: 'Required: Path is the relative + path name of the file to be created. + Must not be absolute or contain the + ''..'' path. Must be utf-8 encoded. + The first item of the relative path + must not start with ''..''' type: string + resourceFieldRef: + description: |- + Selects a resource of the container: only resources limits and requests + (limits.cpu, limits.memory, requests.cpu and requests.memory) are currently supported. + properties: + containerName: + description: 'Container name: required + for volumes, optional for env vars' + type: string + divisor: + anyOf: + - type: integer + - type: string + description: Specifies the output + format of the exposed resources, + defaults to "1" + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + resource: + description: 'Required: resource to + select' + type: string + required: + - resource + type: object + x-kubernetes-map-type: atomic required: - - resource + - path type: object - x-kubernetes-map-type: atomic - required: - - path - type: object - type: array - x-kubernetes-list-type: atomic - type: object - emptyDir: - description: |- - emptyDir represents a temporary directory that shares a pod's lifetime. - More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir - properties: - medium: + type: array + x-kubernetes-list-type: atomic + type: object + emptyDir: description: |- - medium represents what type of storage medium should back this directory. - The default is "" which means to use the node's default medium. - Must be an empty string (default) or Memory. + emptyDir represents a temporary directory that shares a pod's lifetime. More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir - type: string - sizeLimit: - anyOf: - - type: integer - - type: string + properties: + medium: + description: |- + medium represents what type of storage medium should back this directory. + The default is "" which means to use the node's default medium. + Must be an empty string (default) or Memory. + More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir + type: string + sizeLimit: + anyOf: + - type: integer + - type: string + description: |- + sizeLimit is the total amount of local storage required for this EmptyDir volume. + The size limit is also applicable for memory medium. + The maximum usage on memory medium EmptyDir would be the minimum value between + the SizeLimit specified here and the sum of memory limits of all containers in a pod. + The default is nil which means that the limit is undefined. + More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: object + ephemeral: description: |- - sizeLimit is the total amount of local storage required for this EmptyDir volume. - The size limit is also applicable for memory medium. - The maximum usage on memory medium EmptyDir would be the minimum value between - the SizeLimit specified here and the sum of memory limits of all containers in a pod. - The default is nil which means that the limit is undefined. - More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - type: object - ephemeral: - description: |- - ephemeral represents a volume that is handled by a cluster storage driver. - The volume's lifecycle is tied to the pod that defines it - it will be created before the pod starts, - and deleted when the pod is removed. + ephemeral represents a volume that is handled by a cluster storage driver. + The volume's lifecycle is tied to the pod that defines it - it will be created before the pod starts, + and deleted when the pod is removed. - Use this if: - a) the volume is only needed while the pod runs, - b) features of normal volumes like restoring from snapshot or capacity - tracking are needed, - c) the storage driver is specified through a storage class, and - d) the storage driver supports dynamic volume provisioning through - a PersistentVolumeClaim (see EphemeralVolumeSource for more - information on the connection between this volume type - and PersistentVolumeClaim). + Use this if: + a) the volume is only needed while the pod runs, + b) features of normal volumes like restoring from snapshot or capacity + tracking are needed, + c) the storage driver is specified through a storage class, and + d) the storage driver supports dynamic volume provisioning through + a PersistentVolumeClaim (see EphemeralVolumeSource for more + information on the connection between this volume type + and PersistentVolumeClaim). - Use PersistentVolumeClaim or one of the vendor-specific - APIs for volumes that persist for longer than the lifecycle - of an individual pod. + Use PersistentVolumeClaim or one of the vendor-specific + APIs for volumes that persist for longer than the lifecycle + of an individual pod. - Use CSI for light-weight local ephemeral volumes if the CSI driver is meant to - be used that way - see the documentation of the driver for - more information. + Use CSI for light-weight local ephemeral volumes if the CSI driver is meant to + be used that way - see the documentation of the driver for + more information. - A pod can use both types of ephemeral volumes and - persistent volumes at the same time. - properties: - volumeClaimTemplate: - description: |- - Will be used to create a stand-alone PVC to provision the volume. - The pod in which this EphemeralVolumeSource is embedded will be the - owner of the PVC, i.e. the PVC will be deleted together with the - pod. The name of the PVC will be `-` where - `` is the name from the `PodSpec.Volumes` array - entry. Pod validation will reject the pod if the concatenated name - is not valid for a PVC (for example, too long). - + A pod can use both types of ephemeral volumes and + persistent volumes at the same time. + properties: + volumeClaimTemplate: + description: |- + Will be used to create a stand-alone PVC to provision the volume. + The pod in which this EphemeralVolumeSource is embedded will be the + owner of the PVC, i.e. the PVC will be deleted together with the + pod. The name of the PVC will be `-` where + `` is the name from the `PodSpec.Volumes` array + entry. Pod validation will reject the pod if the concatenated name + is not valid for a PVC (for example, too long). - An existing PVC with that name that is not owned by the pod - will *not* be used for the pod to avoid using an unrelated - volume by mistake. Starting the pod is then blocked until - the unrelated PVC is removed. If such a pre-created PVC is - meant to be used by the pod, the PVC has to updated with an - owner reference to the pod once the pod exists. Normally - this should not be necessary, but it may be useful when - manually reconstructing a broken cluster. + An existing PVC with that name that is not owned by the pod + will *not* be used for the pod to avoid using an unrelated + volume by mistake. Starting the pod is then blocked until + the unrelated PVC is removed. If such a pre-created PVC is + meant to be used by the pod, the PVC has to updated with an + owner reference to the pod once the pod exists. Normally + this should not be necessary, but it may be useful when + manually reconstructing a broken cluster. - This field is read-only and no changes will be made by Kubernetes - to the PVC after it has been created. + This field is read-only and no changes will be made by Kubernetes + to the PVC after it has been created. - Required, must not be nil. - properties: - metadata: - description: |- - May contain labels and annotations that will be copied into the PVC - when creating it. No other fields are allowed and will be rejected during - validation. - type: object - spec: - description: |- - The specification for the PersistentVolumeClaim. The entire content is - copied unchanged into the PVC that gets created from this - template. The same fields as in a PersistentVolumeClaim - are also valid here. - properties: - accessModes: - description: |- - accessModes contains the desired access modes the volume should have. - More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#access-modes-1 - items: - type: string - type: array - x-kubernetes-list-type: atomic - dataSource: - description: |- - dataSource field can be used to specify either: - * An existing VolumeSnapshot object (snapshot.storage.k8s.io/VolumeSnapshot) - * An existing PVC (PersistentVolumeClaim) - If the provisioner or an external controller can support the specified data source, - it will create a new volume based on the contents of the specified data source. - When the AnyVolumeDataSource feature gate is enabled, dataSource contents will be copied to dataSourceRef, - and dataSourceRef contents will be copied to dataSource when dataSourceRef.namespace is not specified. - If the namespace is specified, then dataSourceRef will not be copied to dataSource. - properties: - apiGroup: - description: |- - APIGroup is the group for the resource being referenced. - If APIGroup is not specified, the specified Kind must be in the core API group. - For any other third-party types, APIGroup is required. - type: string - kind: - description: Kind is the type of resource - being referenced - type: string - name: - description: Name is the name of resource - being referenced - type: string - required: - - kind - - name - type: object - x-kubernetes-map-type: atomic - dataSourceRef: - description: |- - dataSourceRef specifies the object from which to populate the volume with data, if a non-empty - volume is desired. This may be any object from a non-empty API group (non - core object) or a PersistentVolumeClaim object. - When this field is specified, volume binding will only succeed if the type of - the specified object matches some installed volume populator or dynamic - provisioner. - This field will replace the functionality of the dataSource field and as such - if both fields are non-empty, they must have the same value. For backwards - compatibility, when namespace isn't specified in dataSourceRef, - both fields (dataSource and dataSourceRef) will be set to the same - value automatically if one of them is empty and the other is non-empty. - When namespace is specified in dataSourceRef, - dataSource isn't set to the same value and must be empty. - There are three important differences between dataSource and dataSourceRef: - * While dataSource only allows two specific types of objects, dataSourceRef - allows any non-core object, as well as PersistentVolumeClaim objects. - * While dataSource ignores disallowed values (dropping them), dataSourceRef - preserves all values, and generates an error if a disallowed value is - specified. - * While dataSource only allows local objects, dataSourceRef allows objects - in any namespaces. - (Beta) Using this field requires the AnyVolumeDataSource feature gate to be enabled. - (Alpha) Using the namespace field of dataSourceRef requires the CrossNamespaceVolumeDataSource feature gate to be enabled. - properties: - apiGroup: - description: |- - APIGroup is the group for the resource being referenced. - If APIGroup is not specified, the specified Kind must be in the core API group. - For any other third-party types, APIGroup is required. - type: string - kind: - description: Kind is the type of resource - being referenced - type: string - name: - description: Name is the name of resource - being referenced - type: string - namespace: - description: |- - Namespace is the namespace of resource being referenced - Note that when a namespace is specified, a gateway.networking.k8s.io/ReferenceGrant object is required in the referent namespace to allow that namespace's owner to accept the reference. See the ReferenceGrant documentation for details. - (Alpha) This field requires the CrossNamespaceVolumeDataSource feature gate to be enabled. - type: string - required: - - kind - - name - type: object - resources: - description: |- - resources represents the minimum resources the volume should have. - If RecoverVolumeExpansionFailure feature is enabled users are allowed to specify resource requirements - that are lower than previous value but must still be higher than capacity recorded in the - status field of the claim. - More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#resources - properties: - limits: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Limits describes the maximum amount of compute resources allowed. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - type: object - requests: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Requests describes the minimum amount of compute resources required. - If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, - otherwise to an implementation-defined value. Requests cannot exceed Limits. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - type: object + + Required, must not be nil. + properties: + metadata: + description: |- + May contain labels and annotations that will be copied into the PVC + when creating it. No other fields are allowed and will be rejected during + validation. type: object - selector: - description: selector is a label query over - volumes to consider for binding. + spec: + description: |- + The specification for the PersistentVolumeClaim. The entire content is + copied unchanged into the PVC that gets created from this + template. The same fields as in a PersistentVolumeClaim + are also valid here. properties: - matchExpressions: - description: matchExpressions is a list - of label selector requirements. The - requirements are ANDed. + accessModes: + description: |- + accessModes contains the desired access modes the volume should have. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#access-modes-1 items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label - key that the selector applies - to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object + type: string type: array x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string + dataSource: description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. + dataSource field can be used to specify either: + * An existing VolumeSnapshot object (snapshot.storage.k8s.io/VolumeSnapshot) + * An existing PVC (PersistentVolumeClaim) + If the provisioner or an external controller can support the specified data source, + it will create a new volume based on the contents of the specified data source. + When the AnyVolumeDataSource feature gate is enabled, dataSource contents will be copied to dataSourceRef, + and dataSourceRef contents will be copied to dataSource when dataSourceRef.namespace is not specified. + If the namespace is specified, then dataSourceRef will not be copied to dataSource. + properties: + apiGroup: + description: |- + APIGroup is the group for the resource being referenced. + If APIGroup is not specified, the specified Kind must be in the core API group. + For any other third-party types, APIGroup is required. + type: string + kind: + description: Kind is the type of + resource being referenced + type: string + name: + description: Name is the name of + resource being referenced + type: string + required: + - kind + - name + type: object + x-kubernetes-map-type: atomic + dataSourceRef: + description: |- + dataSourceRef specifies the object from which to populate the volume with data, if a non-empty + volume is desired. This may be any object from a non-empty API group (non + core object) or a PersistentVolumeClaim object. + When this field is specified, volume binding will only succeed if the type of + the specified object matches some installed volume populator or dynamic + provisioner. + This field will replace the functionality of the dataSource field and as such + if both fields are non-empty, they must have the same value. For backwards + compatibility, when namespace isn't specified in dataSourceRef, + both fields (dataSource and dataSourceRef) will be set to the same + value automatically if one of them is empty and the other is non-empty. + When namespace is specified in dataSourceRef, + dataSource isn't set to the same value and must be empty. + There are three important differences between dataSource and dataSourceRef: + * While dataSource only allows two specific types of objects, dataSourceRef + allows any non-core object, as well as PersistentVolumeClaim objects. + * While dataSource ignores disallowed values (dropping them), dataSourceRef + preserves all values, and generates an error if a disallowed value is + specified. + * While dataSource only allows local objects, dataSourceRef allows objects + in any namespaces. + (Beta) Using this field requires the AnyVolumeDataSource feature gate to be enabled. + (Alpha) Using the namespace field of dataSourceRef requires the CrossNamespaceVolumeDataSource feature gate to be enabled. + properties: + apiGroup: + description: |- + APIGroup is the group for the resource being referenced. + If APIGroup is not specified, the specified Kind must be in the core API group. + For any other third-party types, APIGroup is required. + type: string + kind: + description: Kind is the type of + resource being referenced + type: string + name: + description: Name is the name of + resource being referenced + type: string + namespace: + description: |- + Namespace is the namespace of resource being referenced + Note that when a namespace is specified, a gateway.networking.k8s.io/ReferenceGrant object is required in the referent namespace to allow that namespace's owner to accept the reference. See the ReferenceGrant documentation for details. + (Alpha) This field requires the CrossNamespaceVolumeDataSource feature gate to be enabled. + type: string + required: + - kind + - name + type: object + resources: + description: |- + resources represents the minimum resources the volume should have. + If RecoverVolumeExpansionFailure feature is enabled users are allowed to specify resource requirements + that are lower than previous value but must still be higher than capacity recorded in the + status field of the claim. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#resources + properties: + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + type: object + selector: + description: selector is a label query + over volumes to consider for binding. + properties: + matchExpressions: + description: matchExpressions is + a list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object type: object + x-kubernetes-map-type: atomic + storageClassName: + description: |- + storageClassName is the name of the StorageClass required by the claim. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#class-1 + type: string + volumeAttributesClassName: + description: |- + volumeAttributesClassName may be used to set the VolumeAttributesClass used by this claim. + If specified, the CSI driver will create or update the volume with the attributes defined + in the corresponding VolumeAttributesClass. This has a different purpose than storageClassName, + it can be changed after the claim is created. An empty string value means that no VolumeAttributesClass + will be applied to the claim but it's not allowed to reset this field to empty string once it is set. + If unspecified and the PersistentVolumeClaim is unbound, the default VolumeAttributesClass + will be set by the persistentvolume controller if it exists. + If the resource referred to by volumeAttributesClass does not exist, this PersistentVolumeClaim will be + set to a Pending state, as reflected by the modifyVolumeStatus field, until such as a resource + exists. + More info: https://kubernetes.io/docs/concepts/storage/volume-attributes-classes/ + (Alpha) Using this field requires the VolumeAttributesClass feature gate to be enabled. + type: string + volumeMode: + description: |- + volumeMode defines what type of volume is required by the claim. + Value of Filesystem is implied when not included in claim spec. + type: string + volumeName: + description: volumeName is the binding + reference to the PersistentVolume + backing this claim. + type: string type: object - x-kubernetes-map-type: atomic - storageClassName: - description: |- - storageClassName is the name of the StorageClass required by the claim. - More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#class-1 - type: string - volumeAttributesClassName: - description: |- - volumeAttributesClassName may be used to set the VolumeAttributesClass used by this claim. - If specified, the CSI driver will create or update the volume with the attributes defined - in the corresponding VolumeAttributesClass. This has a different purpose than storageClassName, - it can be changed after the claim is created. An empty string value means that no VolumeAttributesClass - will be applied to the claim but it's not allowed to reset this field to empty string once it is set. - If unspecified and the PersistentVolumeClaim is unbound, the default VolumeAttributesClass - will be set by the persistentvolume controller if it exists. - If the resource referred to by volumeAttributesClass does not exist, this PersistentVolumeClaim will be - set to a Pending state, as reflected by the modifyVolumeStatus field, until such as a resource - exists. - More info: https://kubernetes.io/docs/concepts/storage/volume-attributes-classes/ - (Alpha) Using this field requires the VolumeAttributesClass feature gate to be enabled. - type: string - volumeMode: + required: + - spec + type: object + type: object + fc: + description: fc represents a Fibre Channel resource + that is attached to a kubelet's host machine and + then exposed to the pod. + properties: + fsType: + description: |- + fsType is the filesystem type to mount. + Must be a filesystem type supported by the host operating system. + Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. + TODO: how do we prevent errors in the filesystem from compromising the machine + type: string + lun: + description: 'lun is Optional: FC target lun + number' + format: int32 + type: integer + readOnly: + description: |- + readOnly is Optional: Defaults to false (read/write). ReadOnly here will force + the ReadOnly setting in VolumeMounts. + type: boolean + targetWWNs: + description: 'targetWWNs is Optional: FC target + worldwide names (WWNs)' + items: + type: string + type: array + x-kubernetes-list-type: atomic + wwids: + description: |- + wwids Optional: FC volume world wide identifiers (wwids) + Either wwids or combination of targetWWNs and lun must be set, but not both simultaneously. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + flexVolume: + description: |- + flexVolume represents a generic volume resource that is + provisioned/attached using an exec based plugin. + properties: + driver: + description: driver is the name of the driver + to use for this volume. + type: string + fsType: + description: |- + fsType is the filesystem type to mount. + Must be a filesystem type supported by the host operating system. + Ex. "ext4", "xfs", "ntfs". The default filesystem depends on FlexVolume script. + type: string + options: + additionalProperties: + type: string + description: 'options is Optional: this field + holds extra command options if any.' + type: object + readOnly: + description: |- + readOnly is Optional: defaults to false (read/write). ReadOnly here will force + the ReadOnly setting in VolumeMounts. + type: boolean + secretRef: + description: |- + secretRef is Optional: secretRef is reference to the secret object containing + sensitive information to pass to the plugin scripts. This may be + empty if no secret object is specified. If the secret object + contains more than one secret, all secrets are passed to the plugin + scripts. + properties: + name: + default: "" description: |- - volumeMode defines what type of volume is required by the claim. - Value of Filesystem is implied when not included in claim spec. - type: string - volumeName: - description: volumeName is the binding reference - to the PersistentVolume backing this claim. + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + TODO: Add other useful fields. apiVersion, kind, uid? + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. type: string type: object + x-kubernetes-map-type: atomic required: - - spec - type: object - type: object - fc: - description: fc represents a Fibre Channel resource - that is attached to a kubelet's host machine and then - exposed to the pod. - properties: - fsType: - description: |- - fsType is the filesystem type to mount. - Must be a filesystem type supported by the host operating system. - Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - TODO: how do we prevent errors in the filesystem from compromising the machine - type: string - lun: - description: 'lun is Optional: FC target lun number' - format: int32 - type: integer - readOnly: - description: |- - readOnly is Optional: Defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - type: boolean - targetWWNs: - description: 'targetWWNs is Optional: FC target - worldwide names (WWNs)' - items: - type: string - type: array - x-kubernetes-list-type: atomic - wwids: - description: |- - wwids Optional: FC volume world wide identifiers (wwids) - Either wwids or combination of targetWWNs and lun must be set, but not both simultaneously. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - flexVolume: - description: |- - flexVolume represents a generic volume resource that is - provisioned/attached using an exec based plugin. - properties: - driver: - description: driver is the name of the driver to - use for this volume. - type: string - fsType: - description: |- - fsType is the filesystem type to mount. - Must be a filesystem type supported by the host operating system. - Ex. "ext4", "xfs", "ntfs". The default filesystem depends on FlexVolume script. - type: string - options: - additionalProperties: - type: string - description: 'options is Optional: this field holds - extra command options if any.' + - driver type: object - readOnly: - description: |- - readOnly is Optional: defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - type: boolean - secretRef: - description: |- - secretRef is Optional: secretRef is reference to the secret object containing - sensitive information to pass to the plugin scripts. This may be - empty if no secret object is specified. If the secret object - contains more than one secret, all secrets are passed to the plugin - scripts. + flocker: + description: flocker represents a Flocker volume + attached to a kubelet's host machine. This depends + on the Flocker control service being running properties: - name: - default: "" + datasetName: description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - TODO: Add other useful fields. apiVersion, kind, uid? - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. + datasetName is Name of the dataset stored as metadata -> name on the dataset for Flocker + should be considered as deprecated + type: string + datasetUUID: + description: datasetUUID is the UUID of the + dataset. This is unique identifier of a Flocker + dataset type: string type: object - x-kubernetes-map-type: atomic - required: - - driver - type: object - flocker: - description: flocker represents a Flocker volume attached - to a kubelet's host machine. This depends on the Flocker - control service being running - properties: - datasetName: - description: |- - datasetName is Name of the dataset stored as metadata -> name on the dataset for Flocker - should be considered as deprecated - type: string - datasetUUID: - description: datasetUUID is the UUID of the dataset. - This is unique identifier of a Flocker dataset - type: string - type: object - gcePersistentDisk: - description: |- - gcePersistentDisk represents a GCE Disk resource that is attached to a - kubelet's host machine and then exposed to the pod. - More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk - properties: - fsType: - description: |- - fsType is filesystem type of the volume that you want to mount. - Tip: Ensure that the filesystem type is supported by the host operating system. - Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk - TODO: how do we prevent errors in the filesystem from compromising the machine - type: string - partition: - description: |- - partition is the partition in the volume that you want to mount. - If omitted, the default is to mount by volume name. - Examples: For volume /dev/sda1, you specify the partition as "1". - Similarly, the volume partition for /dev/sda is "0" (or you can leave the property empty). - More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk - format: int32 - type: integer - pdName: - description: |- - pdName is unique name of the PD resource in GCE. Used to identify the disk in GCE. - More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk - type: string - readOnly: + gcePersistentDisk: description: |- - readOnly here will force the ReadOnly setting in VolumeMounts. - Defaults to false. + gcePersistentDisk represents a GCE Disk resource that is attached to a + kubelet's host machine and then exposed to the pod. More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk - type: boolean - required: - - pdName - type: object - gitRepo: - description: |- - gitRepo represents a git repository at a particular revision. - DEPRECATED: GitRepo is deprecated. To provision a container with a git repo, mount an - EmptyDir into an InitContainer that clones the repo using git, then mount the EmptyDir - into the Pod's container. - properties: - directory: - description: |- - directory is the target directory name. - Must not contain or start with '..'. If '.' is supplied, the volume directory will be the - git repository. Otherwise, if specified, the volume will contain the git repository in - the subdirectory with the given name. - type: string - repository: - description: repository is the URL - type: string - revision: - description: revision is the commit hash for the - specified revision. - type: string - required: - - repository - type: object - glusterfs: - description: |- - glusterfs represents a Glusterfs mount on the host that shares a pod's lifetime. - More info: https://examples.k8s.io/volumes/glusterfs/README.md - properties: - endpoints: - description: |- - endpoints is the endpoint name that details Glusterfs topology. - More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod - type: string - path: - description: |- - path is the Glusterfs volume path. - More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod - type: string - readOnly: + properties: + fsType: + description: |- + fsType is filesystem type of the volume that you want to mount. + Tip: Ensure that the filesystem type is supported by the host operating system. + Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. + More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk + TODO: how do we prevent errors in the filesystem from compromising the machine + type: string + partition: + description: |- + partition is the partition in the volume that you want to mount. + If omitted, the default is to mount by volume name. + Examples: For volume /dev/sda1, you specify the partition as "1". + Similarly, the volume partition for /dev/sda is "0" (or you can leave the property empty). + More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk + format: int32 + type: integer + pdName: + description: |- + pdName is unique name of the PD resource in GCE. Used to identify the disk in GCE. + More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk + type: string + readOnly: + description: |- + readOnly here will force the ReadOnly setting in VolumeMounts. + Defaults to false. + More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk + type: boolean + required: + - pdName + type: object + gitRepo: description: |- - readOnly here will force the Glusterfs volume to be mounted with read-only permissions. - Defaults to false. - More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod - type: boolean - required: - - endpoints - - path - type: object - hostPath: - description: |- - hostPath represents a pre-existing file or directory on the host - machine that is directly exposed to the container. This is generally - used for system agents or other privileged things that are allowed - to see the host machine. Most containers will NOT need this. - More info: https://kubernetes.io/docs/concepts/storage/volumes#hostpath - --- - TODO(jonesdl) We need to restrict who can use host directory mounts and who can/can not - mount host directories as read/write. - properties: - path: + gitRepo represents a git repository at a particular revision. + DEPRECATED: GitRepo is deprecated. To provision a container with a git repo, mount an + EmptyDir into an InitContainer that clones the repo using git, then mount the EmptyDir + into the Pod's container. + properties: + directory: + description: |- + directory is the target directory name. + Must not contain or start with '..'. If '.' is supplied, the volume directory will be the + git repository. Otherwise, if specified, the volume will contain the git repository in + the subdirectory with the given name. + type: string + repository: + description: repository is the URL + type: string + revision: + description: revision is the commit hash for + the specified revision. + type: string + required: + - repository + type: object + glusterfs: description: |- - path of the directory on the host. - If the path is a symlink, it will follow the link to the real path. - More info: https://kubernetes.io/docs/concepts/storage/volumes#hostpath - type: string - type: + glusterfs represents a Glusterfs mount on the host that shares a pod's lifetime. + More info: https://examples.k8s.io/volumes/glusterfs/README.md + properties: + endpoints: + description: |- + endpoints is the endpoint name that details Glusterfs topology. + More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod + type: string + path: + description: |- + path is the Glusterfs volume path. + More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod + type: string + readOnly: + description: |- + readOnly here will force the Glusterfs volume to be mounted with read-only permissions. + Defaults to false. + More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod + type: boolean + required: + - endpoints + - path + type: object + hostPath: description: |- - type for HostPath Volume - Defaults to "" + hostPath represents a pre-existing file or directory on the host + machine that is directly exposed to the container. This is generally + used for system agents or other privileged things that are allowed + to see the host machine. Most containers will NOT need this. More info: https://kubernetes.io/docs/concepts/storage/volumes#hostpath - type: string - required: - - path - type: object - iscsi: - description: |- - iscsi represents an ISCSI Disk resource that is attached to a - kubelet's host machine and then exposed to the pod. - More info: https://examples.k8s.io/volumes/iscsi/README.md - properties: - chapAuthDiscovery: - description: chapAuthDiscovery defines whether support - iSCSI Discovery CHAP authentication - type: boolean - chapAuthSession: - description: chapAuthSession defines whether support - iSCSI Session CHAP authentication - type: boolean - fsType: - description: |- - fsType is the filesystem type of the volume that you want to mount. - Tip: Ensure that the filesystem type is supported by the host operating system. - Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - More info: https://kubernetes.io/docs/concepts/storage/volumes#iscsi - TODO: how do we prevent errors in the filesystem from compromising the machine - type: string - initiatorName: - description: |- - initiatorName is the custom iSCSI Initiator Name. - If initiatorName is specified with iscsiInterface simultaneously, new iSCSI interface - : will be created for the connection. - type: string - iqn: - description: iqn is the target iSCSI Qualified Name. - type: string - iscsiInterface: - description: |- - iscsiInterface is the interface Name that uses an iSCSI transport. - Defaults to 'default' (tcp). - type: string - lun: - description: lun represents iSCSI Target Lun number. - format: int32 - type: integer - portals: - description: |- - portals is the iSCSI Target Portal List. The portal is either an IP or ip_addr:port if the port - is other than default (typically TCP ports 860 and 3260). - items: - type: string - type: array - x-kubernetes-list-type: atomic - readOnly: + --- + TODO(jonesdl) We need to restrict who can use host directory mounts and who can/can not + mount host directories as read/write. + properties: + path: + description: |- + path of the directory on the host. + If the path is a symlink, it will follow the link to the real path. + More info: https://kubernetes.io/docs/concepts/storage/volumes#hostpath + type: string + type: + description: |- + type for HostPath Volume + Defaults to "" + More info: https://kubernetes.io/docs/concepts/storage/volumes#hostpath + type: string + required: + - path + type: object + iscsi: description: |- - readOnly here will force the ReadOnly setting in VolumeMounts. - Defaults to false. - type: boolean - secretRef: - description: secretRef is the CHAP Secret for iSCSI - target and initiator authentication + iscsi represents an ISCSI Disk resource that is attached to a + kubelet's host machine and then exposed to the pod. + More info: https://examples.k8s.io/volumes/iscsi/README.md properties: - name: - default: "" + chapAuthDiscovery: + description: chapAuthDiscovery defines whether + support iSCSI Discovery CHAP authentication + type: boolean + chapAuthSession: + description: chapAuthSession defines whether + support iSCSI Session CHAP authentication + type: boolean + fsType: description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - TODO: Add other useful fields. apiVersion, kind, uid? - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. + fsType is the filesystem type of the volume that you want to mount. + Tip: Ensure that the filesystem type is supported by the host operating system. + Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. + More info: https://kubernetes.io/docs/concepts/storage/volumes#iscsi + TODO: how do we prevent errors in the filesystem from compromising the machine + type: string + initiatorName: + description: |- + initiatorName is the custom iSCSI Initiator Name. + If initiatorName is specified with iscsiInterface simultaneously, new iSCSI interface + : will be created for the connection. + type: string + iqn: + description: iqn is the target iSCSI Qualified + Name. + type: string + iscsiInterface: + description: |- + iscsiInterface is the interface Name that uses an iSCSI transport. + Defaults to 'default' (tcp). + type: string + lun: + description: lun represents iSCSI Target Lun + number. + format: int32 + type: integer + portals: + description: |- + portals is the iSCSI Target Portal List. The portal is either an IP or ip_addr:port if the port + is other than default (typically TCP ports 860 and 3260). + items: + type: string + type: array + x-kubernetes-list-type: atomic + readOnly: + description: |- + readOnly here will force the ReadOnly setting in VolumeMounts. + Defaults to false. + type: boolean + secretRef: + description: secretRef is the CHAP Secret for + iSCSI target and initiator authentication + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + TODO: Add other useful fields. apiVersion, kind, uid? + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. + type: string + type: object + x-kubernetes-map-type: atomic + targetPortal: + description: |- + targetPortal is iSCSI Target Portal. The Portal is either an IP or ip_addr:port if the port + is other than default (typically TCP ports 860 and 3260). type: string + required: + - iqn + - lun + - targetPortal type: object - x-kubernetes-map-type: atomic - targetPortal: - description: |- - targetPortal is iSCSI Target Portal. The Portal is either an IP or ip_addr:port if the port - is other than default (typically TCP ports 860 and 3260). - type: string - required: - - iqn - - lun - - targetPortal - type: object - name: - description: |- - name of the volume. - Must be a DNS_LABEL and unique within the pod. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - nfs: - description: |- - nfs represents an NFS mount on the host that shares a pod's lifetime - More info: https://kubernetes.io/docs/concepts/storage/volumes#nfs - properties: - path: + name: description: |- - path that is exported by the NFS server. - More info: https://kubernetes.io/docs/concepts/storage/volumes#nfs + name of the volume. + Must be a DNS_LABEL and unique within the pod. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names type: string - readOnly: - description: |- - readOnly here will force the NFS export to be mounted with read-only permissions. - Defaults to false. - More info: https://kubernetes.io/docs/concepts/storage/volumes#nfs - type: boolean - server: + nfs: description: |- - server is the hostname or IP address of the NFS server. + nfs represents an NFS mount on the host that shares a pod's lifetime More info: https://kubernetes.io/docs/concepts/storage/volumes#nfs - type: string - required: - - path - - server - type: object - persistentVolumeClaim: - description: |- - persistentVolumeClaimVolumeSource represents a reference to a - PersistentVolumeClaim in the same namespace. - More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#persistentvolumeclaims - properties: - claimName: + properties: + path: + description: |- + path that is exported by the NFS server. + More info: https://kubernetes.io/docs/concepts/storage/volumes#nfs + type: string + readOnly: + description: |- + readOnly here will force the NFS export to be mounted with read-only permissions. + Defaults to false. + More info: https://kubernetes.io/docs/concepts/storage/volumes#nfs + type: boolean + server: + description: |- + server is the hostname or IP address of the NFS server. + More info: https://kubernetes.io/docs/concepts/storage/volumes#nfs + type: string + required: + - path + - server + type: object + persistentVolumeClaim: description: |- - claimName is the name of a PersistentVolumeClaim in the same namespace as the pod using this volume. + persistentVolumeClaimVolumeSource represents a reference to a + PersistentVolumeClaim in the same namespace. More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#persistentvolumeclaims - type: string - readOnly: - description: |- - readOnly Will force the ReadOnly setting in VolumeMounts. - Default false. - type: boolean - required: - - claimName - type: object - photonPersistentDisk: - description: photonPersistentDisk represents a PhotonController - persistent disk attached and mounted on kubelets host - machine - properties: - fsType: - description: |- - fsType is the filesystem type to mount. - Must be a filesystem type supported by the host operating system. - Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - type: string - pdID: - description: pdID is the ID that identifies Photon - Controller persistent disk - type: string - required: - - pdID - type: object - portworxVolume: - description: portworxVolume represents a portworx volume - attached and mounted on kubelets host machine - properties: - fsType: - description: |- - fSType represents the filesystem type to mount - Must be a filesystem type supported by the host operating system. - Ex. "ext4", "xfs". Implicitly inferred to be "ext4" if unspecified. - type: string - readOnly: - description: |- - readOnly defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - type: boolean - volumeID: - description: volumeID uniquely identifies a Portworx - volume - type: string - required: - - volumeID - type: object - projected: - description: projected items for all in one resources - secrets, configmaps, and downward API - properties: - defaultMode: - description: |- - defaultMode are the mode bits used to set permissions on created files by default. - Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - Directories within the path are not affected by this setting. - This might be in conflict with other options that affect the file - mode, like fsGroup, and the result can be other mode bits set. - format: int32 - type: integer - sources: - description: sources is the list of volume projections - items: - description: Projection that may be projected - along with other supported volume types - properties: - clusterTrustBundle: - description: |- - ClusterTrustBundle allows a pod to access the `.spec.trustBundle` field - of ClusterTrustBundle objects in an auto-updating file. + properties: + claimName: + description: |- + claimName is the name of a PersistentVolumeClaim in the same namespace as the pod using this volume. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#persistentvolumeclaims + type: string + readOnly: + description: |- + readOnly Will force the ReadOnly setting in VolumeMounts. + Default false. + type: boolean + required: + - claimName + type: object + photonPersistentDisk: + description: photonPersistentDisk represents a PhotonController + persistent disk attached and mounted on kubelets + host machine + properties: + fsType: + description: |- + fsType is the filesystem type to mount. + Must be a filesystem type supported by the host operating system. + Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. + type: string + pdID: + description: pdID is the ID that identifies + Photon Controller persistent disk + type: string + required: + - pdID + type: object + portworxVolume: + description: portworxVolume represents a portworx + volume attached and mounted on kubelets host machine + properties: + fsType: + description: |- + fSType represents the filesystem type to mount + Must be a filesystem type supported by the host operating system. + Ex. "ext4", "xfs". Implicitly inferred to be "ext4" if unspecified. + type: string + readOnly: + description: |- + readOnly defaults to false (read/write). ReadOnly here will force + the ReadOnly setting in VolumeMounts. + type: boolean + volumeID: + description: volumeID uniquely identifies a + Portworx volume + type: string + required: + - volumeID + type: object + projected: + description: projected items for all in one resources + secrets, configmaps, and downward API + properties: + defaultMode: + description: |- + defaultMode are the mode bits used to set permissions on created files by default. + Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. + YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. + Directories within the path are not affected by this setting. + This might be in conflict with other options that affect the file + mode, like fsGroup, and the result can be other mode bits set. + format: int32 + type: integer + sources: + description: sources is the list of volume projections + items: + description: Projection that may be projected + along with other supported volume types + properties: + clusterTrustBundle: + description: |- + ClusterTrustBundle allows a pod to access the `.spec.trustBundle` field + of ClusterTrustBundle objects in an auto-updating file. - Alpha, gated by the ClusterTrustBundleProjection feature gate. + Alpha, gated by the ClusterTrustBundleProjection feature gate. - ClusterTrustBundle objects can either be selected by name, or by the - combination of signer name and a label selector. + ClusterTrustBundle objects can either be selected by name, or by the + combination of signer name and a label selector. - Kubelet performs aggressive normalization of the PEM contents written - into the pod filesystem. Esoteric PEM features such as inter-block - comments and block headers are stripped. Certificates are deduplicated. - The ordering of certificates within the file is arbitrary, and Kubelet - may change the order over time. - properties: - labelSelector: - description: |- - Select all ClusterTrustBundles that match this label selector. Only has - effect if signerName is set. Mutually-exclusive with name. If unset, - interpreted as "match nothing". If set but empty, interpreted as "match - everything". + Kubelet performs aggressive normalization of the PEM contents written + into the pod filesystem. Esoteric PEM features such as inter-block + comments and block headers are stripped. Certificates are deduplicated. + The ordering of certificates within the file is arbitrary, and Kubelet + may change the order over time. + properties: + labelSelector: + description: |- + Select all ClusterTrustBundles that match this label selector. Only has + effect if signerName is set. Mutually-exclusive with name. If unset, + interpreted as "match nothing". If set but empty, interpreted as "match + everything". + properties: + matchExpressions: + description: matchExpressions + is a list of label selector + requirements. The requirements + are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the + label key that the selector + applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + name: + description: |- + Select a single ClusterTrustBundle by object name. Mutually-exclusive + with signerName and labelSelector. + type: string + optional: + description: |- + If true, don't block pod startup if the referenced ClusterTrustBundle(s) + aren't available. If using name, then the named ClusterTrustBundle is + allowed not to exist. If using signerName, then the combination of + signerName and labelSelector is allowed to match zero + ClusterTrustBundles. + type: boolean + path: + description: Relative path from the + volume root to write the bundle. + type: string + signerName: + description: |- + Select all ClusterTrustBundles that match this signer name. + Mutually-exclusive with name. The contents of all selected + ClusterTrustBundles will be unified and deduplicated. + type: string + required: + - path + type: object + configMap: + description: configMap information about + the configMap data to project properties: - matchExpressions: - description: matchExpressions is a - list of label selector requirements. - The requirements are ANDed. + items: + description: |- + items if unspecified, each key-value pair in the Data field of the referenced + ConfigMap will be projected into the volume as a file whose name is the + key and content is the value. If specified, the listed keys will be + projected into the specified paths, and unlisted keys will not be + present. If a key is specified which is not present in the ConfigMap, + the volume setup will error unless it is marked optional. Paths must be + relative and may not contain the '..' path or start with '..'. items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. + description: Maps a string key to + a path within a volume. properties: key: - description: key is the label - key that the selector applies - to. + description: key is the key + to project. type: string - operator: + mode: description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: + mode is Optional: mode bits used to set permissions on this file. + Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. + YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. + If not specified, the volume defaultMode will be used. + This might be in conflict with other options that affect the file + mode, like fsGroup, and the result can be other mode bits set. + format: int32 + type: integer + path: description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic + path is the relative path of the file to map the key to. + May not be an absolute path. + May not contain the path element '..'. + May not start with the string '..'. + type: string required: - key - - operator + - path type: object type: array x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string + name: + default: "" description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + TODO: Add other useful fields. apiVersion, kind, uid? + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. + type: string + optional: + description: optional specify whether + the ConfigMap or its keys must be + defined + type: boolean type: object x-kubernetes-map-type: atomic - name: - description: |- - Select a single ClusterTrustBundle by object name. Mutually-exclusive - with signerName and labelSelector. - type: string - optional: - description: |- - If true, don't block pod startup if the referenced ClusterTrustBundle(s) - aren't available. If using name, then the named ClusterTrustBundle is - allowed not to exist. If using signerName, then the combination of - signerName and labelSelector is allowed to match zero - ClusterTrustBundles. - type: boolean - path: - description: Relative path from the volume - root to write the bundle. - type: string - signerName: - description: |- - Select all ClusterTrustBundles that match this signer name. - Mutually-exclusive with name. The contents of all selected - ClusterTrustBundles will be unified and deduplicated. - type: string - required: - - path - type: object - configMap: - description: configMap information about the - configMap data to project - properties: - items: - description: |- - items if unspecified, each key-value pair in the Data field of the referenced - ConfigMap will be projected into the volume as a file whose name is the - key and content is the value. If specified, the listed keys will be - projected into the specified paths, and unlisted keys will not be - present. If a key is specified which is not present in the ConfigMap, - the volume setup will error unless it is marked optional. Paths must be - relative and may not contain the '..' path or start with '..'. - items: - description: Maps a string key to a - path within a volume. - properties: - key: - description: key is the key to project. - type: string - mode: - description: |- - mode is Optional: mode bits used to set permissions on this file. - Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - If not specified, the volume defaultMode will be used. - This might be in conflict with other options that affect the file - mode, like fsGroup, and the result can be other mode bits set. - format: int32 - type: integer - path: - description: |- - path is the relative path of the file to map the key to. - May not be an absolute path. - May not contain the path element '..'. - May not start with the string '..'. - type: string - required: - - key - - path - type: object - type: array - x-kubernetes-list-type: atomic - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - TODO: Add other useful fields. apiVersion, kind, uid? - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. - type: string - optional: - description: optional specify whether - the ConfigMap or its keys must be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - downwardAPI: - description: downwardAPI information about - the downwardAPI data to project - properties: - items: - description: Items is a list of DownwardAPIVolume - file - items: - description: DownwardAPIVolumeFile represents - information to create the file containing - the pod field - properties: - fieldRef: - description: 'Required: Selects - a field of the pod: only annotations, - labels, name, namespace and uid - are supported.' + downwardAPI: + description: downwardAPI information about + the downwardAPI data to project + properties: + items: + description: Items is a list of DownwardAPIVolume + file + items: + description: DownwardAPIVolumeFile + represents information to create + the file containing the pod field properties: - apiVersion: - description: Version of the - schema the FieldPath is written - in terms of, defaults to "v1". - type: string - fieldPath: - description: Path of the field - to select in the specified - API version. + fieldRef: + description: 'Required: Selects + a field of the pod: only annotations, + labels, name, namespace and + uid are supported.' + properties: + apiVersion: + description: Version of + the schema the FieldPath + is written in terms of, + defaults to "v1". + type: string + fieldPath: + description: Path of the + field to select in the + specified API version. + type: string + required: + - fieldPath + type: object + x-kubernetes-map-type: atomic + mode: + description: |- + Optional: mode bits used to set permissions on this file, must be an octal value + between 0000 and 0777 or a decimal value between 0 and 511. + YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. + If not specified, the volume defaultMode will be used. + This might be in conflict with other options that affect the file + mode, like fsGroup, and the result can be other mode bits set. + format: int32 + type: integer + path: + description: 'Required: Path + is the relative path name + of the file to be created. + Must not be absolute or contain + the ''..'' path. Must be utf-8 + encoded. The first item of + the relative path must not + start with ''..''' type: string + resourceFieldRef: + description: |- + Selects a resource of the container: only resources limits and requests + (limits.cpu, limits.memory, requests.cpu and requests.memory) are currently supported. + properties: + containerName: + description: 'Container + name: required for volumes, + optional for env vars' + type: string + divisor: + anyOf: + - type: integer + - type: string + description: Specifies the + output format of the exposed + resources, defaults to + "1" + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + resource: + description: 'Required: + resource to select' + type: string + required: + - resource + type: object + x-kubernetes-map-type: atomic required: - - fieldPath + - path type: object - x-kubernetes-map-type: atomic - mode: - description: |- - Optional: mode bits used to set permissions on this file, must be an octal value - between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - If not specified, the volume defaultMode will be used. - This might be in conflict with other options that affect the file - mode, like fsGroup, and the result can be other mode bits set. - format: int32 - type: integer - path: - description: 'Required: Path is the - relative path name of the file - to be created. Must not be absolute - or contain the ''..'' path. Must - be utf-8 encoded. The first item - of the relative path must not - start with ''..''' - type: string - resourceFieldRef: - description: |- - Selects a resource of the container: only resources limits and requests - (limits.cpu, limits.memory, requests.cpu and requests.memory) are currently supported. + type: array + x-kubernetes-list-type: atomic + type: object + secret: + description: secret information about + the secret data to project + properties: + items: + description: |- + items if unspecified, each key-value pair in the Data field of the referenced + Secret will be projected into the volume as a file whose name is the + key and content is the value. If specified, the listed keys will be + projected into the specified paths, and unlisted keys will not be + present. If a key is specified which is not present in the Secret, + the volume setup will error unless it is marked optional. Paths must be + relative and may not contain the '..' path or start with '..'. + items: + description: Maps a string key to + a path within a volume. properties: - containerName: - description: 'Container name: - required for volumes, optional - for env vars' + key: + description: key is the key + to project. type: string - divisor: - anyOf: - - type: integer - - type: string - description: Specifies the output - format of the exposed resources, - defaults to "1" - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - resource: - description: 'Required: resource - to select' + mode: + description: |- + mode is Optional: mode bits used to set permissions on this file. + Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. + YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. + If not specified, the volume defaultMode will be used. + This might be in conflict with other options that affect the file + mode, like fsGroup, and the result can be other mode bits set. + format: int32 + type: integer + path: + description: |- + path is the relative path of the file to map the key to. + May not be an absolute path. + May not contain the path element '..'. + May not start with the string '..'. type: string required: - - resource + - key + - path type: object - x-kubernetes-map-type: atomic - required: - - path - type: object - type: array - x-kubernetes-list-type: atomic - type: object - secret: - description: secret information about the - secret data to project - properties: - items: - description: |- - items if unspecified, each key-value pair in the Data field of the referenced - Secret will be projected into the volume as a file whose name is the - key and content is the value. If specified, the listed keys will be - projected into the specified paths, and unlisted keys will not be - present. If a key is specified which is not present in the Secret, - the volume setup will error unless it is marked optional. Paths must be - relative and may not contain the '..' path or start with '..'. - items: - description: Maps a string key to a - path within a volume. - properties: - key: - description: key is the key to project. - type: string - mode: - description: |- - mode is Optional: mode bits used to set permissions on this file. - Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - If not specified, the volume defaultMode will be used. - This might be in conflict with other options that affect the file - mode, like fsGroup, and the result can be other mode bits set. - format: int32 - type: integer - path: - description: |- - path is the relative path of the file to map the key to. - May not be an absolute path. - May not contain the path element '..'. - May not start with the string '..'. - type: string - required: - - key - - path - type: object - type: array - x-kubernetes-list-type: atomic - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - TODO: Add other useful fields. apiVersion, kind, uid? - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. - type: string - optional: - description: optional field specify whether - the Secret or its key must be defined - type: boolean + type: array + x-kubernetes-list-type: atomic + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + TODO: Add other useful fields. apiVersion, kind, uid? + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. + type: string + optional: + description: optional field specify + whether the Secret or its key must + be defined + type: boolean + type: object + x-kubernetes-map-type: atomic + serviceAccountToken: + description: serviceAccountToken is information + about the serviceAccountToken data to + project + properties: + audience: + description: |- + audience is the intended audience of the token. A recipient of a token + must identify itself with an identifier specified in the audience of the + token, and otherwise should reject the token. The audience defaults to the + identifier of the apiserver. + type: string + expirationSeconds: + description: |- + expirationSeconds is the requested duration of validity of the service + account token. As the token approaches expiration, the kubelet volume + plugin will proactively rotate the service account token. The kubelet will + start trying to rotate the token if the token is older than 80 percent of + its time to live or if the token is older than 24 hours.Defaults to 1 hour + and must be at least 10 minutes. + format: int64 + type: integer + path: + description: |- + path is the path relative to the mount point of the file to project the + token into. + type: string + required: + - path + type: object type: object - x-kubernetes-map-type: atomic - serviceAccountToken: - description: serviceAccountToken is information - about the serviceAccountToken data to project + type: array + x-kubernetes-list-type: atomic + type: object + quobyte: + description: quobyte represents a Quobyte mount + on the host that shares a pod's lifetime + properties: + group: + description: |- + group to map volume access to + Default is no group + type: string + readOnly: + description: |- + readOnly here will force the Quobyte volume to be mounted with read-only permissions. + Defaults to false. + type: boolean + registry: + description: |- + registry represents a single or multiple Quobyte Registry services + specified as a string as host:port pair (multiple entries are separated with commas) + which acts as the central registry for volumes + type: string + tenant: + description: |- + tenant owning the given Quobyte volume in the Backend + Used with dynamically provisioned Quobyte volumes, value is set by the plugin + type: string + user: + description: |- + user to map volume access to + Defaults to serivceaccount user + type: string + volume: + description: volume is a string that references + an already created Quobyte volume by name. + type: string + required: + - registry + - volume + type: object + rbd: + description: |- + rbd represents a Rados Block Device mount on the host that shares a pod's lifetime. + More info: https://examples.k8s.io/volumes/rbd/README.md + properties: + fsType: + description: |- + fsType is the filesystem type of the volume that you want to mount. + Tip: Ensure that the filesystem type is supported by the host operating system. + Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. + More info: https://kubernetes.io/docs/concepts/storage/volumes#rbd + TODO: how do we prevent errors in the filesystem from compromising the machine + type: string + image: + description: |- + image is the rados image name. + More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it + type: string + keyring: + description: |- + keyring is the path to key ring for RBDUser. + Default is /etc/ceph/keyring. + More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it + type: string + monitors: + description: |- + monitors is a collection of Ceph monitors. + More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it + items: + type: string + type: array + x-kubernetes-list-type: atomic + pool: + description: |- + pool is the rados pool name. + Default is rbd. + More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it + type: string + readOnly: + description: |- + readOnly here will force the ReadOnly setting in VolumeMounts. + Defaults to false. + More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it + type: boolean + secretRef: + description: |- + secretRef is name of the authentication secret for RBDUser. If provided + overrides keyring. + Default is nil. + More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + TODO: Add other useful fields. apiVersion, kind, uid? + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. + type: string + type: object + x-kubernetes-map-type: atomic + user: + description: |- + user is the rados user name. + Default is admin. + More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it + type: string + required: + - image + - monitors + type: object + scaleIO: + description: scaleIO represents a ScaleIO persistent + volume attached and mounted on Kubernetes nodes. + properties: + fsType: + description: |- + fsType is the filesystem type to mount. + Must be a filesystem type supported by the host operating system. + Ex. "ext4", "xfs", "ntfs". + Default is "xfs". + type: string + gateway: + description: gateway is the host address of + the ScaleIO API Gateway. + type: string + protectionDomain: + description: protectionDomain is the name of + the ScaleIO Protection Domain for the configured + storage. + type: string + readOnly: + description: |- + readOnly Defaults to false (read/write). ReadOnly here will force + the ReadOnly setting in VolumeMounts. + type: boolean + secretRef: + description: |- + secretRef references to the secret for ScaleIO user and other + sensitive information. If this is not provided, Login operation will fail. + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + TODO: Add other useful fields. apiVersion, kind, uid? + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. + type: string + type: object + x-kubernetes-map-type: atomic + sslEnabled: + description: sslEnabled Flag enable/disable + SSL communication with Gateway, default false + type: boolean + storageMode: + description: |- + storageMode indicates whether the storage for a volume should be ThickProvisioned or ThinProvisioned. + Default is ThinProvisioned. + type: string + storagePool: + description: storagePool is the ScaleIO Storage + Pool associated with the protection domain. + type: string + system: + description: system is the name of the storage + system as configured in ScaleIO. + type: string + volumeName: + description: |- + volumeName is the name of a volume already created in the ScaleIO system + that is associated with this volume source. + type: string + required: + - gateway + - secretRef + - system + type: object + secret: + description: |- + secret represents a secret that should populate this volume. + More info: https://kubernetes.io/docs/concepts/storage/volumes#secret + properties: + defaultMode: + description: |- + defaultMode is Optional: mode bits used to set permissions on created files by default. + Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. + YAML accepts both octal and decimal values, JSON requires decimal values + for mode bits. Defaults to 0644. + Directories within the path are not affected by this setting. + This might be in conflict with other options that affect the file + mode, like fsGroup, and the result can be other mode bits set. + format: int32 + type: integer + items: + description: |- + items If unspecified, each key-value pair in the Data field of the referenced + Secret will be projected into the volume as a file whose name is the + key and content is the value. If specified, the listed keys will be + projected into the specified paths, and unlisted keys will not be + present. If a key is specified which is not present in the Secret, + the volume setup will error unless it is marked optional. Paths must be + relative and may not contain the '..' path or start with '..'. + items: + description: Maps a string key to a path within + a volume. properties: - audience: - description: |- - audience is the intended audience of the token. A recipient of a token - must identify itself with an identifier specified in the audience of the - token, and otherwise should reject the token. The audience defaults to the - identifier of the apiserver. + key: + description: key is the key to project. type: string - expirationSeconds: + mode: description: |- - expirationSeconds is the requested duration of validity of the service - account token. As the token approaches expiration, the kubelet volume - plugin will proactively rotate the service account token. The kubelet will - start trying to rotate the token if the token is older than 80 percent of - its time to live or if the token is older than 24 hours.Defaults to 1 hour - and must be at least 10 minutes. - format: int64 + mode is Optional: mode bits used to set permissions on this file. + Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. + YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. + If not specified, the volume defaultMode will be used. + This might be in conflict with other options that affect the file + mode, like fsGroup, and the result can be other mode bits set. + format: int32 type: integer path: description: |- - path is the path relative to the mount point of the file to project the - token into. + path is the relative path of the file to map the key to. + May not be an absolute path. + May not contain the path element '..'. + May not start with the string '..'. type: string required: + - key - path type: object - type: object - type: array - x-kubernetes-list-type: atomic - type: object - quobyte: - description: quobyte represents a Quobyte mount on the - host that shares a pod's lifetime - properties: - group: - description: |- - group to map volume access to - Default is no group - type: string - readOnly: - description: |- - readOnly here will force the Quobyte volume to be mounted with read-only permissions. - Defaults to false. - type: boolean - registry: - description: |- - registry represents a single or multiple Quobyte Registry services - specified as a string as host:port pair (multiple entries are separated with commas) - which acts as the central registry for volumes - type: string - tenant: - description: |- - tenant owning the given Quobyte volume in the Backend - Used with dynamically provisioned Quobyte volumes, value is set by the plugin - type: string - user: - description: |- - user to map volume access to - Defaults to serivceaccount user - type: string - volume: - description: volume is a string that references - an already created Quobyte volume by name. - type: string - required: - - registry - - volume - type: object - rbd: - description: |- - rbd represents a Rados Block Device mount on the host that shares a pod's lifetime. - More info: https://examples.k8s.io/volumes/rbd/README.md - properties: - fsType: - description: |- - fsType is the filesystem type of the volume that you want to mount. - Tip: Ensure that the filesystem type is supported by the host operating system. - Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - More info: https://kubernetes.io/docs/concepts/storage/volumes#rbd - TODO: how do we prevent errors in the filesystem from compromising the machine - type: string - image: - description: |- - image is the rados image name. - More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it - type: string - keyring: - description: |- - keyring is the path to key ring for RBDUser. - Default is /etc/ceph/keyring. - More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it - type: string - monitors: - description: |- - monitors is a collection of Ceph monitors. - More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it - items: - type: string - type: array - x-kubernetes-list-type: atomic - pool: - description: |- - pool is the rados pool name. - Default is rbd. - More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it - type: string - readOnly: - description: |- - readOnly here will force the ReadOnly setting in VolumeMounts. - Defaults to false. - More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it - type: boolean - secretRef: - description: |- - secretRef is name of the authentication secret for RBDUser. If provided - overrides keyring. - Default is nil. - More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it - properties: - name: - default: "" + type: array + x-kubernetes-list-type: atomic + optional: + description: optional field specify whether + the Secret or its keys must be defined + type: boolean + secretName: description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - TODO: Add other useful fields. apiVersion, kind, uid? - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. + secretName is the name of the secret in the pod's namespace to use. + More info: https://kubernetes.io/docs/concepts/storage/volumes#secret type: string type: object - x-kubernetes-map-type: atomic - user: - description: |- - user is the rados user name. - Default is admin. - More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it - type: string - required: - - image - - monitors - type: object - scaleIO: - description: scaleIO represents a ScaleIO persistent - volume attached and mounted on Kubernetes nodes. - properties: - fsType: - description: |- - fsType is the filesystem type to mount. - Must be a filesystem type supported by the host operating system. - Ex. "ext4", "xfs", "ntfs". - Default is "xfs". - type: string - gateway: - description: gateway is the host address of the - ScaleIO API Gateway. - type: string - protectionDomain: - description: protectionDomain is the name of the - ScaleIO Protection Domain for the configured storage. - type: string - readOnly: - description: |- - readOnly Defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - type: boolean - secretRef: - description: |- - secretRef references to the secret for ScaleIO user and other - sensitive information. If this is not provided, Login operation will fail. + storageos: + description: storageOS represents a StorageOS volume + attached and mounted on Kubernetes nodes. properties: - name: - default: "" + fsType: description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - TODO: Add other useful fields. apiVersion, kind, uid? - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. + fsType is the filesystem type to mount. + Must be a filesystem type supported by the host operating system. + Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. + type: string + readOnly: + description: |- + readOnly defaults to false (read/write). ReadOnly here will force + the ReadOnly setting in VolumeMounts. + type: boolean + secretRef: + description: |- + secretRef specifies the secret to use for obtaining the StorageOS API + credentials. If not specified, default values will be attempted. + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + TODO: Add other useful fields. apiVersion, kind, uid? + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. + type: string + type: object + x-kubernetes-map-type: atomic + volumeName: + description: |- + volumeName is the human-readable name of the StorageOS volume. Volume + names are only unique within a namespace. + type: string + volumeNamespace: + description: |- + volumeNamespace specifies the scope of the volume within StorageOS. If no + namespace is specified then the Pod's namespace will be used. This allows the + Kubernetes name scoping to be mirrored within StorageOS for tighter integration. + Set VolumeName to any name to override the default behaviour. + Set to "default" if you are not using namespaces within StorageOS. + Namespaces that do not pre-exist within StorageOS will be created. type: string type: object - x-kubernetes-map-type: atomic - sslEnabled: - description: sslEnabled Flag enable/disable SSL - communication with Gateway, default false - type: boolean - storageMode: - description: |- - storageMode indicates whether the storage for a volume should be ThickProvisioned or ThinProvisioned. - Default is ThinProvisioned. - type: string - storagePool: - description: storagePool is the ScaleIO Storage - Pool associated with the protection domain. - type: string - system: - description: system is the name of the storage system - as configured in ScaleIO. - type: string - volumeName: - description: |- - volumeName is the name of a volume already created in the ScaleIO system - that is associated with this volume source. - type: string - required: - - gateway - - secretRef - - system - type: object - secret: - description: |- - secret represents a secret that should populate this volume. - More info: https://kubernetes.io/docs/concepts/storage/volumes#secret - properties: - defaultMode: - description: |- - defaultMode is Optional: mode bits used to set permissions on created files by default. - Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values - for mode bits. Defaults to 0644. - Directories within the path are not affected by this setting. - This might be in conflict with other options that affect the file - mode, like fsGroup, and the result can be other mode bits set. - format: int32 - type: integer - items: - description: |- - items If unspecified, each key-value pair in the Data field of the referenced - Secret will be projected into the volume as a file whose name is the - key and content is the value. If specified, the listed keys will be - projected into the specified paths, and unlisted keys will not be - present. If a key is specified which is not present in the Secret, - the volume setup will error unless it is marked optional. Paths must be - relative and may not contain the '..' path or start with '..'. - items: - description: Maps a string key to a path within - a volume. - properties: - key: - description: key is the key to project. - type: string - mode: - description: |- - mode is Optional: mode bits used to set permissions on this file. - Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - If not specified, the volume defaultMode will be used. - This might be in conflict with other options that affect the file - mode, like fsGroup, and the result can be other mode bits set. - format: int32 - type: integer - path: - description: |- - path is the relative path of the file to map the key to. - May not be an absolute path. - May not contain the path element '..'. - May not start with the string '..'. - type: string - required: - - key - - path - type: object - type: array - x-kubernetes-list-type: atomic - optional: - description: optional field specify whether the - Secret or its keys must be defined - type: boolean - secretName: - description: |- - secretName is the name of the secret in the pod's namespace to use. - More info: https://kubernetes.io/docs/concepts/storage/volumes#secret - type: string - type: object - storageos: - description: storageOS represents a StorageOS volume - attached and mounted on Kubernetes nodes. - properties: - fsType: - description: |- - fsType is the filesystem type to mount. - Must be a filesystem type supported by the host operating system. - Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - type: string - readOnly: - description: |- - readOnly defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - type: boolean - secretRef: - description: |- - secretRef specifies the secret to use for obtaining the StorageOS API - credentials. If not specified, default values will be attempted. + vsphereVolume: + description: vsphereVolume represents a vSphere + volume attached and mounted on kubelets host machine properties: - name: - default: "" + fsType: description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - TODO: Add other useful fields. apiVersion, kind, uid? - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. + fsType is filesystem type to mount. + Must be a filesystem type supported by the host operating system. + Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. + type: string + storagePolicyID: + description: storagePolicyID is the storage + Policy Based Management (SPBM) profile ID + associated with the StoragePolicyName. type: string + storagePolicyName: + description: storagePolicyName is the storage + Policy Based Management (SPBM) profile name. + type: string + volumePath: + description: volumePath is the path that identifies + vSphere volume vmdk + type: string + required: + - volumePath type: object - x-kubernetes-map-type: atomic - volumeName: - description: |- - volumeName is the human-readable name of the StorageOS volume. Volume - names are only unique within a namespace. - type: string - volumeNamespace: - description: |- - volumeNamespace specifies the scope of the volume within StorageOS. If no - namespace is specified then the Pod's namespace will be used. This allows the - Kubernetes name scoping to be mirrored within StorageOS for tighter integration. - Set VolumeName to any name to override the default behaviour. - Set to "default" if you are not using namespaces within StorageOS. - Namespaces that do not pre-exist within StorageOS will be created. - type: string - type: object - vsphereVolume: - description: vsphereVolume represents a vSphere volume - attached and mounted on kubelets host machine - properties: - fsType: - description: |- - fsType is filesystem type to mount. - Must be a filesystem type supported by the host operating system. - Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - type: string - storagePolicyID: - description: storagePolicyID is the storage Policy - Based Management (SPBM) profile ID associated - with the StoragePolicyName. - type: string - storagePolicyName: - description: storagePolicyName is the storage Policy - Based Management (SPBM) profile name. - type: string - volumePath: - description: volumePath is the path that identifies - vSphere volume vmdk - type: string required: - - volumePath + - name type: object - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - required: - - containers + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + required: + - containers + type: object type: object type: object + maxRestarts: + description: Maximum number of times the JobSet is restarted. + format: int32 + type: integer + pathwaysDir: + description: |- + PathwaysDir is a persistent location like GCS at which temporary + Pathways artifacts can be stored like HBM state during interruptions. + Currently, Pathways supports a precreated GCS directory only. + type: string + pathwaysVersion: + description: PathwaysVersion is the version of the Pathways client. + type: string workers: description: |- The list of worker types created for the Pathways Job. Currently only one type of worker is supported. items: description: |- - The WorkerSpec struct takes in the specifications for the + The WorkerSpec struct lists the specifications for the Pathways workers. properties: numSlices: @@ -8016,6 +8096,7 @@ spec: type: object type: array required: + - controller - workers type: object status: diff --git a/config/samples/pathways-job_v1_pathwaysjob.yaml b/config/samples/pathways-job_v1_pathwaysjob.yaml index 7a50f08e..c1770052 100644 --- a/config/samples/pathways-job_v1_pathwaysjob.yaml +++ b/config/samples/pathways-job_v1_pathwaysjob.yaml @@ -17,51 +17,54 @@ kind: PathwaysJob metadata: name: pathways-trial2 spec: + maxRestarts: 0 workers: - type: tpu-v4-podslice topology: 2x2x2 numSlices: 1 pathwaysDir: "gs://cloud-pathways-staging/tmp" - template: # UserPodTemplate - spec: - containers: - - name: jetstream - image: us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/maxtext_jax_stable:latest - imagePullPolicy: Always - ports: - - containerPort: 9000 - env: - - name: XCLOUD_ENVIRONMENT - value: GCP - - name: JAX_PLATFORMS - value: proxy - - name: JAX_BACKEND_TARGET - value: grpc://pathways-1-leader-0-0.pathways-1:38681 - command: + controller: + deploymentMode: "colocate" + template: # UserPodTemplate + spec: + containers: + - name: jetstream + image: us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/maxtext_jax_stable:latest + imagePullPolicy: Always + ports: + - containerPort: 9000 + env: + - name: XCLOUD_ENVIRONMENT + value: GCP + - name: JAX_PLATFORMS + value: proxy + - name: JAX_BACKEND_TARGET + value: grpc://pathways-1-leader-0-0.pathways-1:38681 + command: + - bash + - -c + - 'echo Start: $(date); _sigterm() ( kill -SIGTERM $! 2>/dev/null;); trap + _sigterm SIGTERM; (JAX_TRACEBACK_FILTERING=off python3 MaxText/maxengine_server.py + MaxText/configs/inference_jetstream.yml tokenizer_path=assets/tokenizer.llama2 + load_parameters_path=gs://runner-maxtext-logs/2024-05-07-23-34/unscanned_chkpt/checkpoints/0/items + max_prefill_predict_length=1024 max_target_length=2048 async_checkpointing=false + model_name=''llama2-70b'' steps=1 ici_fsdp_parallelism=1 ici_autoregressive_parallelism=-1 + ici_tensor_parallelism=1 scan_layers=false weight_dtype=bfloat16 per_device_batch_size=2) + & PID=$!; while kill -0 $PID 2>/dev/null; do sleep 5; done; wait $PID; + EXIT_CODE=$? echo EXIT_CODE=$EXIT_CODE; echo End sleep: $(date); sleep + 50;' + - name: tester + image: us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/maxtext_jax_stable:latest + imagePullPolicy: Always + command: - bash - -c - 'echo Start: $(date); _sigterm() ( kill -SIGTERM $! 2>/dev/null;); trap - _sigterm SIGTERM; (JAX_TRACEBACK_FILTERING=off python3 MaxText/maxengine_server.py - MaxText/configs/inference_jetstream.yml tokenizer_path=assets/tokenizer.llama2 - load_parameters_path=gs://runner-maxtext-logs/2024-05-07-23-34/unscanned_chkpt/checkpoints/0/items - max_prefill_predict_length=1024 max_target_length=2048 async_checkpointing=false - model_name=''llama2-70b'' steps=1 ici_fsdp_parallelism=1 ici_autoregressive_parallelism=-1 - ici_tensor_parallelism=1 scan_layers=false weight_dtype=bfloat16 per_device_batch_size=2) - & PID=$!; while kill -0 $PID 2>/dev/null; do sleep 5; done; wait $PID; - EXIT_CODE=$? echo EXIT_CODE=$EXIT_CODE; echo End sleep: $(date); sleep - 500;' - - name: tester - image: us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/maxtext_jax_stable:latest - imagePullPolicy: Always - command: - - bash - - -c - - 'echo Start: $(date); _sigterm() ( kill -SIGTERM $! 2>/dev/null;); trap - _sigterm SIGTERM; for i in {1..2}; do echo Sending request $i; time python3 - JetStream/jetstream/tools/requester.py --tokenizer assets/tokenizer.llama2 - --max_tokens=16 --server=0.0.0.0 --text="why earth is round"; EXIT_CODE=$?; - echo Completed request; echo EXIT_CODE=$EXIT_CODE; if [[ $EXIT_CODE -ne - 0 ]]; then break; fi; done; echo Last EXIT_CODE=$EXIT_CODE; echo End sleep: - $(date); sleep 500;' - securityContext: - privileged: true + _sigterm SIGTERM; for i in {1..2}; do echo Sending request $i; time python3 + JetStream/jetstream/tools/requester.py --tokenizer assets/tokenizer.llama2 + --max_tokens=16 --server=0.0.0.0 --text="why earth is round"; EXIT_CODE=$?; + echo Completed request; echo EXIT_CODE=$EXIT_CODE; if [[ $EXIT_CODE -ne + 0 ]]; then break; fi; done; echo Last EXIT_CODE=$EXIT_CODE; echo End sleep: + $(date); sleep 50;' + securityContext: + privileged: true diff --git a/internal/controller/pathwaysjob_controller.go b/internal/controller/pathwaysjob_controller.go index 7cf822aa..086e7ea1 100644 --- a/internal/controller/pathwaysjob_controller.go +++ b/internal/controller/pathwaysjob_controller.go @@ -72,57 +72,46 @@ func (r *PathwaysJobReconciler) Reconcile(ctx context.Context, req ctrl.Request) return ctrl.Result{}, client.IgnoreNotFound(err) } - // 2. Process the object - + // 2. Process the Pathways object and build a JobSet client kubeconfig := ctrl.GetConfigOrDie() // log.Info("PathwaysJob: config established...") jobSetClient := jobsetclient.NewForConfigOrDie(kubeconfig) // log.Info("PathwaysJob: client built for JobSet...") - // 2.1 Figure out if PathwaysJob is already present and in "Suspended / Completed / Failed states", + // 2.1 Figure out if PathwaysJob is already present // if it is the case, there is nothing to do. + // (ToDo) check states in "Suspended / Completed / Failed states", - childJobSets, err := r.listChildJobSets(ctx, pw, jobSetClient) + childJobSet, err := r.getChildJobSet(ctx, pw, jobSetClient) if err != nil { - log.Error(err, "PathwaysJob: failed to list JobSets \n") - return ctrl.Result{}, err - } - - // 2.1.1 List childJobSets - for _, jobset := range childJobSets { - if jobset.GetName() == pw.GetName() { - log.Info("PathwaysJob: JobSet exists, not creating \n\n\n") - for _, c := range jobset.Status.Conditions { - log.Info("PathwaysJob: Condition is ", "Type", c.Type) - } - } else { - // 3. Update the cluster - create update and delete other resources - log.Info("PathwaysJob: creating JobSet \n") - if err := r.createJobSet(ctx, pw, jobSetClient); err != nil { - log.Error(err, "PathwaysJob: failed to create JobSet \n") - return ctrl.Result{}, err - } + log.Info("PathwaysJob: can't find JobSet \n") + // return ctrl.Result{}, err + } else if childJobSet != nil { + // Not reaching this part of the code now, but that is good? + log.Info("PathwaysJob: JobSet exists, not creating \n\n\n") + for _, c := range childJobSet.Status.Conditions { + log.Info("PathwaysJob: Condition is ", "Type", c.Type) } + return ctrl.Result{}, nil } - // report status - // // 3. Update the cluster - create update and delete other resources - // log.Info("PathwaysJob: creating JobSet \n") - // if err := r.createJobSet(ctx, pw, jobSetClient); err != nil { - // log.Error(err, "PathwaysJob: failed to create JobSet \n") - // return ctrl.Result{}, err - // } + // 3. Update the cluster - create update and delete other resources + log.Info("PathwaysJob: creating JobSet \n") + if err := r.createJobSet(ctx, pw, jobSetClient); err != nil { + log.Error(err, "PathwaysJob: failed to create JobSet \n") + return ctrl.Result{}, err + } - //4. Update the object's status using Conditions + //4. Update the object's status using Conditions (?) //5. Return a result + log.Info("PathwaysJob: DONE DONE DONE!") return ctrl.Result{}, nil } // function to listChildJobSets, based on https://github.com/kubernetes-sigs/jobset/blob/main/client-go/clientset/versioned/typed/jobset/v1alpha2/jobset.go#L44 -// // function to updatePathwaysJob Status ~~ updateJobSetStatus. Pathways status is same as JobSet Status. This function will mainly update Conditions and Message. // similar to https://github.com/kubernetes-sigs/jobset/blob/main/pkg/controllers/jobset_controller.go#L248 // JobSet conditions - https://github.com/kubernetes-sigs/jobset/blob/main/pkg/controllers/jobset_controller.go#L822 @@ -146,34 +135,42 @@ func (r *PathwaysJobReconciler) Reconcile(ctx context.Context, req ctrl.Request) // function setPathwaysJobSuspendedCondition // function setPathwaysJobResumedCondition - -func (r *PathwaysJobReconciler) listChildJobSets(ctx context.Context, pw *pathwaysjob.PathwaysJob, jobSetClient *jobsetclient.Clientset) ([]jobsetv1alpha2.JobSet, error) { - log3 := ctrl.LoggerFrom(ctx).WithValues("pathwaysjob", klog.KObj(pw)) +func (r *PathwaysJobReconciler) getChildJobSet(ctx context.Context, pw *pathwaysjob.PathwaysJob, jobSetClient *jobsetclient.Clientset) (*jobsetv1alpha2.JobSet, error) { + log3 := ctrl.LoggerFrom(ctx) + // .WithValues("pathwaysjob", klog.KObj(pw)) // ctx = ctrl.LoggerInto(ctx, log3) - log3.Info("PathwaysJob: in listChildJobSets", "Name ", pw.GetName(), "Namespace ", pw.GetNamespace()) - var jsList *jobsetv1alpha2.JobSetList - jsList, err := jobSetClient.JobsetV1alpha2().JobSets(pw.GetObjectMeta().GetNamespace()).List(ctx, metav1.ListOptions{}) + log3.Info("PathwaysJob: in getChildJobSet", "Name ", pw.GetName(), "Namespace ", pw.GetNamespace()) + var js *jobsetv1alpha2.JobSet + js, err := jobSetClient.JobsetV1alpha2().JobSets(pw.GetObjectMeta().GetNamespace()).Get(ctx, pw.GetName(), metav1.GetOptions{}) if err != nil { - log3.Info("PathwaysJob: can't list JobSets: ", "error ", err) + // log3.Info("PathwaysJob: can't get JobSets: ", "error ", err) return nil, err } - return jsList.Items, nil + return js, nil } func (r *PathwaysJobReconciler) createJobSet(ctx context.Context, pw *pathwaysjob.PathwaysJob, jobSetClient *jobsetclient.Clientset) error { - log2 := ctrl.LoggerFrom(ctx).WithValues("pathwaysjob", klog.KObj(pw)) + log2 := ctrl.LoggerFrom(ctx) + // .WithValues("pathwaysjob", klog.KObj(pw)) // ctx = ctrl.LoggerInto(ctx, log2) log2.Info("PathwaysJob: in createJobSet", "Name ", pw.GetName(), "Namespace ", pw.GetNamespace()) - // Some predefined variables - truth := true - volumeSourceType := corev1.HostPathDirectoryOrCreate + var jobs []jobsetv1alpha2.ReplicatedJob + var rmJobName string // // Pathways Spec + JobSet for batch inference ------ - leaderJob, _ := MakeLeaderJob(ctx, pw) + if pw.Spec.Controller.DeploymentMode == pathwaysjob.Colocate { + rmJobName = "leader" + jobs, _ = MakeLeaderJobForColocatedDeployment(ctx, pw, rmJobName) + } else { + rmJobName = "rm" + jobs, _ = MakeJobsForDefaultDeployment(ctx, pw, rmJobName) + } + + workerJob, _ := MakeWorkerJob(ctx, pw, rmJobName) mainJobSetConfig := jobsetv1alpha2.JobSet{ ObjectMeta: metav1.ObjectMeta{ @@ -184,70 +181,9 @@ func (r *PathwaysJobReconciler) createJobSet(ctx context.Context, pw *pathwaysjo FailurePolicy: &jobsetv1alpha2.FailurePolicy{ MaxRestarts: 4, }, - ReplicatedJobs: []jobsetv1alpha2.ReplicatedJob{ - *leaderJob, - { - Name: "worker", - Replicas: int32(pw.Spec.Workers[0].NumSlices), - Template: batchv1.JobTemplateSpec{ - Spec: batchv1.JobSpec{ - BackoffLimit: ptr.To(int32(0)), - Completions: ptr.To(int32(2)), // remember to update - Parallelism: ptr.To(int32(2)), // remember to update - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: "pathways-worker", - Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/server:latest", - ImagePullPolicy: "Always", - SecurityContext: &corev1.SecurityContext{Privileged: &truth}, - Args: []string{ - "--server_port=38679", - fmt.Sprintf("--resource_manager_address=%s-%s-0-0.%s:38677", pw.GetName(), "leader", pw.GetName()), - fmt.Sprintf("--gcs_scratch_location=%s", pw.Spec.PathwaysDir), - }, - Env: []corev1.EnvVar{ - {Name: "TPU_MIN_LOG_LEVEL", Value: "0"}, - {Name: "TF_CPP_MIN_LOG_LEVEL", Value: "0"}, - {Name: "XCLOUD_ENVIRONMENT", Value: "GCP"}, - }, - Ports: []corev1.ContainerPort{{ContainerPort: 38679}, {ContainerPort: 38680}, {ContainerPort: 8471}, {ContainerPort: 8080}}, - VolumeMounts: []corev1.VolumeMount{ - { - Name: "shared-tmp", - MountPath: "/tmp", - }, - }, - Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{"google.com/tpu": *resource.NewQuantity(4, resource.DecimalSI)}}, - }, // end Pathways worker container - }, - NodeSelector: map[string]string{ - "cloud.google.com/gke-tpu-accelerator": pw.Spec.Workers[0].Type, - "cloud.google.com/gke-tpu-topology": pw.Spec.Workers[0].Topology, - }, - Volumes: []corev1.Volume{ - { - Name: "shared-tmp", - VolumeSource: corev1.VolumeSource{ - HostPath: &corev1.HostPathVolumeSource{ - Path: "/tmp", - Type: &volumeSourceType, - }, - }, - }, - }, // end Volumes - HostNetwork: true, // For performance == McJAX - DNSPolicy: corev1.DNSClusterFirstWithHostNet, // For performance == McJAX - }, - }, - }, - }, - }, // end worker replicated job - }, + ReplicatedJobs: append(jobs, workerJob), }, } - // var lock sync.Mutex // Set Pathways controller as the owner of the JobSet for garbage collection. if err := ctrl.SetControllerReference(pw, &mainJobSetConfig, r.Scheme); err != nil { @@ -259,7 +195,6 @@ func (r *PathwaysJobReconciler) createJobSet(ctx context.Context, pw *pathwaysjo js, err := jobSetClient.JobsetV1alpha2().JobSets(pw.GetObjectMeta().GetNamespace()).Create(ctx, &mainJobSetConfig, metav1.CreateOptions{}) if err != nil { - log2.Info("PathwaysJob: failed to create JobSet: ", "JobSet name", js.Name) return err } else { log2.Info("PathwaysJob: successfully created JobSet: ", "JobSet name", js.Name) @@ -280,10 +215,11 @@ func (r *PathwaysJobReconciler) SetupWithManager(mgr ctrl.Manager) error { Complete(r) } -// Some Pathways helpers +// ---------------------- PATHWAYS HELPERS -------------------------- -func MakeResourceManagerContainer(pw *pathwaysjob.PathwaysJob) (*corev1.Container, error) { +func MakeResourceManagerContainer(pw *pathwaysjob.PathwaysJob, rmJobName string) (*corev1.Container, error) { truth := true + rmContainerSpec := corev1.Container{ Name: "pathways-rm", Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/server:latest", @@ -299,7 +235,7 @@ func MakeResourceManagerContainer(pw *pathwaysjob.PathwaysJob) (*corev1.Containe Env: []corev1.EnvVar{ {Name: "REPLICATED_JOB_NAME", ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{FieldPath: "metadata.annotations['jobset.sigs.k8s.io/replicatedjob-name']"}}}, {Name: "JOBSET_NAME", ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{FieldPath: "metadata.annotations['jobset.sigs.k8s.io/jobset-name']"}}}, - {Name: "HOST_ADDRESS", Value: fmt.Sprintf("%s-%s-0-0.%s", pw.GetName(), "leader", pw.GetName())}, + {Name: "HOST_ADDRESS", Value: fmt.Sprintf("%s-%s-0-0.%s", pw.GetName(), rmJobName, pw.GetName())}, {Name: "TPU_SKIP_MDS_QUERY", Value: "true"}, }, Ports: []corev1.ContainerPort{{ContainerPort: 38677}, {ContainerPort: 38678}}, @@ -308,8 +244,10 @@ func MakeResourceManagerContainer(pw *pathwaysjob.PathwaysJob) (*corev1.Containe return &rmContainerSpec, nil } -func MakeProxyContainer(pw *pathwaysjob.PathwaysJob) (*corev1.Container, error) { +func MakeProxyContainer(pw *pathwaysjob.PathwaysJob, rmJobName string) (*corev1.Container, error) { + // Some predefined variables truth := true + proxyContainerSpec := corev1.Container{ Name: "pathways-proxy", Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/proxy_server:latest", @@ -317,7 +255,7 @@ func MakeProxyContainer(pw *pathwaysjob.PathwaysJob) (*corev1.Container, error) SecurityContext: &corev1.SecurityContext{Privileged: &truth}, Args: []string{ "--server_port=38681", - fmt.Sprintf("--resource_manager_address=%s-%s-0-0.%s:38677", pw.GetName(), "leader", pw.GetName()), + fmt.Sprintf("--resource_manager_address=%s-%s-0-0.%s:38677", pw.GetName(), rmJobName, pw.GetName()), fmt.Sprintf("--gcs_scratch_location=%s", pw.Spec.PathwaysDir), }, Ports: []corev1.ContainerPort{{ContainerPort: 38681}, {ContainerPort: 38682}}, @@ -369,15 +307,16 @@ func MakePodAffinityRules(pw *pathwaysjob.PathwaysJob) (*corev1.Affinity, error) } func GetUserContainerList(pw *pathwaysjob.PathwaysJob) ([]corev1.Container, error) { - containerList := pw.Spec.UserPodTemplate.Spec.Containers + containerList := pw.Spec.Controller.UserPodTemplate.Spec.Containers return containerList, nil } -func MakeLeaderJob(ctx context.Context, pw *pathwaysjob.PathwaysJob) (*jobsetv1alpha2.ReplicatedJob, error) { - // truth := true +func MakeLeaderJobForColocatedDeployment(ctx context.Context, pw *pathwaysjob.PathwaysJob, rmJobName string) ([]jobsetv1alpha2.ReplicatedJob, error) { + // Some predefined variables volumeSourceType := corev1.HostPathDirectoryOrCreate - RMContainerSpec, _ := MakeResourceManagerContainer(pw) - ProxyContainerSpec, _ := MakeProxyContainer(pw) + + RMContainerSpec, _ := MakeResourceManagerContainer(pw, rmJobName) + ProxyContainerSpec, _ := MakeProxyContainer(pw, rmJobName) affinitySpec, _ := MakePodAffinityRules(pw) containerList, _ := GetUserContainerList(pw) containerList = append(containerList, *RMContainerSpec, *ProxyContainerSpec) @@ -386,7 +325,7 @@ func MakeLeaderJob(ctx context.Context, pw *pathwaysjob.PathwaysJob) (*jobsetv1a // log3.Info("PathwaysJob:...MakeLeaderJob", "Length of container list is", len(containerList)) leaderJob := jobsetv1alpha2.ReplicatedJob{ - Name: "leader", + Name: rmJobName, Replicas: 1, Template: batchv1.JobTemplateSpec{ Spec: batchv1.JobSpec{ @@ -426,5 +365,197 @@ func MakeLeaderJob(ctx context.Context, pw *pathwaysjob.PathwaysJob) (*jobsetv1a }, }, } // end replicated Job - return &leaderJob, nil + return []jobsetv1alpha2.ReplicatedJob{leaderJob}, nil +} + +func MakeJobsForDefaultDeployment(ctx context.Context, pw *pathwaysjob.PathwaysJob, rmJobName string) ([]jobsetv1alpha2.ReplicatedJob, error) { + // Some predefined variables + volumeSourceType := corev1.HostPathDirectoryOrCreate + + RMContainerSpec, _ := MakeResourceManagerContainer(pw, rmJobName) + ProxyContainerSpec, _ := MakeProxyContainer(pw, rmJobName) + + // log3 := ctrl.LoggerFrom(ctx).WithValues("pathwaysjob", klog.KObj(pw)) + // log3.Info("PathwaysJob:...MakeLeaderJob", "Length of container list is", len(containerList)) + + rmJob := jobsetv1alpha2.ReplicatedJob{ + Name: "rm", + Replicas: 1, + Template: batchv1.JobTemplateSpec{ + Spec: batchv1.JobSpec{ + BackoffLimit: ptr.To(int32(0)), + Completions: ptr.To(int32(1)), + Parallelism: ptr.To(int32(1)), + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + HostNetwork: true, // For performance == McJAX + DNSPolicy: corev1.DNSClusterFirstWithHostNet, // For performance == McJAX + Tolerations: []corev1.Toleration{ + { + Key: "google.com/tpu", + Operator: "Exists", + Effect: "NoSchedule", + }, + }, + Volumes: []corev1.Volume{ + { + Name: "shared-tmp", + VolumeSource: corev1.VolumeSource{ + HostPath: &corev1.HostPathVolumeSource{ + Path: "/tmp", + Type: &volumeSourceType, + }, + }, + }, + }, // end Volumes + Containers: []corev1.Container{*RMContainerSpec}, // end leader []containers + }, // end PodSpec + }, + }, + }, + } // end replicated Job + + proxyJob := jobsetv1alpha2.ReplicatedJob{ + Name: "proxy", + Replicas: 1, + Template: batchv1.JobTemplateSpec{ + Spec: batchv1.JobSpec{ + BackoffLimit: ptr.To(int32(0)), + Completions: ptr.To(int32(1)), + Parallelism: ptr.To(int32(1)), + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + HostNetwork: true, // For performance == McJAX + DNSPolicy: corev1.DNSClusterFirstWithHostNet, // For performance == McJAX + Tolerations: []corev1.Toleration{ + { + Key: "google.com/tpu", + Operator: "Exists", + Effect: "NoSchedule", + }, + }, + Volumes: []corev1.Volume{ + { + Name: "shared-tmp", + VolumeSource: corev1.VolumeSource{ + HostPath: &corev1.HostPathVolumeSource{ + Path: "/tmp", + Type: &volumeSourceType, + }, + }, + }, + }, // end Volumes + Containers: []corev1.Container{*ProxyContainerSpec}, // end leader []containers + }, // end PodSpec + }, + }, + }, + } // end replicated Job + + userJob := jobsetv1alpha2.ReplicatedJob{ + Name: "user-job", + Replicas: 1, + Template: batchv1.JobTemplateSpec{ + Spec: batchv1.JobSpec{ + BackoffLimit: ptr.To(int32(0)), + Completions: ptr.To(int32(1)), + Parallelism: ptr.To(int32(1)), + Template: corev1.PodTemplateSpec{ + Spec: pw.Spec.Controller.UserPodTemplate.Spec, + // corev1.PodSpec{ + // HostNetwork: true, // For performance == McJAX + // DNSPolicy: corev1.DNSClusterFirstWithHostNet, // For performance == McJAX + // Tolerations: []corev1.Toleration{ + // { + // Key: "google.com/tpu", + // Operator: "Exists", + // Effect: "NoSchedule", + // }, + // }, + // Volumes: []corev1.Volume{ + // { + // Name: "shared-tmp", + // VolumeSource: corev1.VolumeSource{ + // HostPath: &corev1.HostPathVolumeSource{ + // Path: "/tmp", + // Type: &volumeSourceType, + // }, + // }, + // }, + // }, // end Volumes + // Containers: []corev1.Container{*RMContainerSpec}, // end leader []containers + // }, // end PodSpec + }, + }, + }, + } // end replicated Job + + return []jobsetv1alpha2.ReplicatedJob{rmJob, proxyJob, userJob}, nil +} + +// Constructs JobSet's replicated job for the Pathways worker +func MakeWorkerJob(ctx context.Context, pw *pathwaysjob.PathwaysJob, rmJobName string) (jobsetv1alpha2.ReplicatedJob, error) { + // Some predefined variables + truth := true + volumeSourceType := corev1.HostPathDirectoryOrCreate + + workerJob := jobsetv1alpha2.ReplicatedJob{ + Name: "worker", + Replicas: int32(pw.Spec.Workers[0].NumSlices), + Template: batchv1.JobTemplateSpec{ + Spec: batchv1.JobSpec{ + BackoffLimit: ptr.To(int32(0)), + Completions: ptr.To(int32(2)), // remember to update + Parallelism: ptr.To(int32(2)), // remember to update + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "pathways-worker", + Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/server:latest", + ImagePullPolicy: "Always", + SecurityContext: &corev1.SecurityContext{Privileged: &truth}, + Args: []string{ + "--server_port=38679", + fmt.Sprintf("--resource_manager_address=%s-%s-0-0.%s:38677", pw.GetName(), rmJobName, pw.GetName()), + fmt.Sprintf("--gcs_scratch_location=%s", pw.Spec.PathwaysDir), + }, + Env: []corev1.EnvVar{ + {Name: "TPU_MIN_LOG_LEVEL", Value: "0"}, + {Name: "TF_CPP_MIN_LOG_LEVEL", Value: "0"}, + {Name: "XCLOUD_ENVIRONMENT", Value: "GCP"}, + }, + Ports: []corev1.ContainerPort{{ContainerPort: 38679}, {ContainerPort: 38680}, {ContainerPort: 8471}, {ContainerPort: 8080}}, + VolumeMounts: []corev1.VolumeMount{ + { + Name: "shared-tmp", + MountPath: "/tmp", + }, + }, + Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{"google.com/tpu": *resource.NewQuantity(4, resource.DecimalSI)}}, + }, // end Pathways worker container + }, + NodeSelector: map[string]string{ + "cloud.google.com/gke-tpu-accelerator": pw.Spec.Workers[0].Type, + "cloud.google.com/gke-tpu-topology": pw.Spec.Workers[0].Topology, + }, + Volumes: []corev1.Volume{ + { + Name: "shared-tmp", + VolumeSource: corev1.VolumeSource{ + HostPath: &corev1.HostPathVolumeSource{ + Path: "/tmp", + Type: &volumeSourceType, + }, + }, + }, + }, // end Volumes + HostNetwork: true, // For performance == McJAX + DNSPolicy: corev1.DNSClusterFirstWithHostNet, // For performance == McJAX + }, + }, + }, + }, + } // end worker replicated job + return workerJob, nil } diff --git a/pkg/utils/extra_prototype.go b/pkg/utils/extra_prototype.go index 9a06a30d..2cc5ac29 100644 --- a/pkg/utils/extra_prototype.go +++ b/pkg/utils/extra_prototype.go @@ -145,3 +145,44 @@ package utils // } // } // } + +// --------LIST childJobSets -------------- +// func (r *PathwaysJobReconciler) listChildJobSets(ctx context.Context, pw *pathwaysjob.PathwaysJob, jobSetClient *jobsetclient.Clientset) ([]jobsetv1alpha2.JobSet, error) { +// log3 := ctrl.LoggerFrom(ctx).WithValues("pathwaysjob", klog.KObj(pw)) +// // ctx = ctrl.LoggerInto(ctx, log3) +// log3.Info("PathwaysJob: in listChildJobSets", "Name ", pw.GetName(), "Namespace ", pw.GetNamespace()) + +// var jsList *jobsetv1alpha2.JobSetList +// jsList, err := jobSetClient.JobsetV1alpha2().JobSets(pw.GetObjectMeta().GetNamespace()).List(ctx, metav1.ListOptions{}) + +// if err != nil { +// log3.Info("PathwaysJob: can't list JobSets: ", "error ", err) +// return nil, err +// } +// return jsList.Items, nil +// } + +// report status + +// // 3. Update the cluster - create update and delete other resources +// log.Info("PathwaysJob: creating JobSet \n") +// if err := r.createJobSet(ctx, pw, jobSetClient); err != nil { +// log.Error(err, "PathwaysJob: failed to create JobSet \n") +// return ctrl.Result{}, err +// } + +// childJobSets, err := r.listChildJobSets(ctx, pw, jobSetClient) +// if err != nil { +// log.Error(err, "PathwaysJob: failed to list JobSets \n") +// return ctrl.Result{}, err +// } + +// // 2.1.1 List childJobSets +// for _, jobset := range childJobSets { +// if jobset.GetName() == pw.GetName() { +// log.Info("PathwaysJob: JobSet exists, not creating \n\n\n") +// for _, c := range jobset.Status.Conditions { +// log.Info("PathwaysJob: Condition is ", "Type", c.Type) +// } +// } +// } From 1c001da1b1cb833544247a8cc5bf6aaa7d02994d Mon Sep 17 00:00:00 2001 From: RoshaniN Date: Wed, 23 Oct 2024 23:05:41 +0000 Subject: [PATCH 21/32] Detailing PathwaysJobStatus definitions. --- api/v1/pathwaysjob_types.go | 51 +++++++++- api/v1/zz_generated.deepcopy.go | 25 +++++ ...ways-job.pathways.domain_pathwaysjobs.yaml | 45 ++++++++- .../samples/pathways-job_v1_pathwaysjob.yaml | 94 ++++++++++++------- internal/controller/pathwaysjob_controller.go | 61 +++++++++--- 5 files changed, 224 insertions(+), 52 deletions(-) diff --git a/api/v1/pathwaysjob_types.go b/api/v1/pathwaysjob_types.go index aa95278f..f9f1a62f 100644 --- a/api/v1/pathwaysjob_types.go +++ b/api/v1/pathwaysjob_types.go @@ -83,11 +83,26 @@ type PathwaysJobStatus struct { // INSERT ADDITIONAL STATUS FIELD - define observed state of cluster // Important: Run "make" to regenerate code after modifying this file - // Track the state of the Pathways workload, acceptable values are - - // Running, Suspended, Completed, Failed. - // Contains a human readable message to provide additional details to the // user. + // Aggregate of the PathwaysJob workload, based on worker and + // controller statuses. + // One of - Pending, Running, Suspended, Completed, Failed. + // Contains a human readable message to provide additional details to the + // user. Conditions are mentioned below in more detail. // +optional Conditions []metav1.Condition `json:"conditions,omitempty"` + + // Track the of the Pathways TPU workers - + // derived from Worker replicatedJob + WorkerStatus *PathwaysComponentStatus `json:"workerStatus,omitempty"` + + // Tracks the of the Pathways controller - + // 1. derived from "leader" replicatedJob in colocated mode + // (leader job contains "rm", "proxy" and "user" as containers) + // 2. derived from "rm" and "proxy" replicatedJobs in + // default + headless mode. + // 3. derived from "rm", "proxy" and "user-job" replicatedJobs in + // default + container mode. + ControllerStatus *PathwaysComponentStatus `json:"controllerStatus,omitempty"` } // +kubebuilder:validation:Enum=colocate;default @@ -131,6 +146,36 @@ type ControllerSpec struct { // +optional UserPodTemplate *corev1.PodTemplateSpec `json:"template,omitempty" protobuf:"bytes,6,opt,name=template"` } +type PathwaysConditionType string + +// These are built-in conditions for PathwaysJob. +const ( + // PathwaysJobPending means the underlying JobSet may be deployed, but + // pods are yet to be scheduled on nodes. + PathwaysJobPending PathwaysConditionType = "Pending" + // PathwaysJobRunning means the underlying JobSet has been scheduled and + // is in progress. + PathwaysJobRunning PathwaysConditionType = "Running" + // PathwaysJobCompleted means the underlying JobSet has completed its + // execution. + PathwaysJobCompleted PathwaysConditionType = "Completed" + // PathwaysJobFailed means the JobSet has failed its execution. + // Reason for failure may be found in Condition.Message + PathwaysJobFailed PathwaysConditionType = "Failed" + // PathwaysJobSuspended means the underlying Jobset is suspended. + PathwaysJobSuspended PathwaysConditionType = "Suspended" +) + +type PathwaysComponentStatus struct { + // of the Pathways Component ~~ (Worker or Controller + // replicatedJobs) + // Pending - one of more jobs ready but not active. + // Running - all jobs active. + // Suspended - all jobs suspended. + // Completed - all jobs completed successfully. + // Failed - one or more jobs failed. + CurrentStatus string `json:"currentStatus,omitempty"` +} func init() { SchemeBuilder.Register(&PathwaysJob{}, &PathwaysJobList{}) diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go index 0779538a..8ea8aa1d 100644 --- a/api/v1/zz_generated.deepcopy.go +++ b/api/v1/zz_generated.deepcopy.go @@ -46,6 +46,21 @@ func (in *ControllerSpec) DeepCopy() *ControllerSpec { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *PathwaysComponentStatus) DeepCopyInto(out *PathwaysComponentStatus) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PathwaysComponentStatus. +func (in *PathwaysComponentStatus) DeepCopy() *PathwaysComponentStatus { + if in == nil { + return nil + } + out := new(PathwaysComponentStatus) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *PathwaysJob) DeepCopyInto(out *PathwaysJob) { *out = *in @@ -140,6 +155,16 @@ func (in *PathwaysJobStatus) DeepCopyInto(out *PathwaysJobStatus) { (*in)[i].DeepCopyInto(&(*out)[i]) } } + if in.WorkerStatus != nil { + in, out := &in.WorkerStatus, &out.WorkerStatus + *out = new(PathwaysComponentStatus) + **out = **in + } + if in.ControllerStatus != nil { + in, out := &in.ControllerStatus, &out.ControllerStatus + *out = new(PathwaysComponentStatus) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PathwaysJobStatus. diff --git a/config/crd/bases/pathways-job.pathways.domain_pathwaysjobs.yaml b/config/crd/bases/pathways-job.pathways.domain_pathwaysjobs.yaml index 47d4c047..b21e7112 100644 --- a/config/crd/bases/pathways-job.pathways.domain_pathwaysjobs.yaml +++ b/config/crd/bases/pathways-job.pathways.domain_pathwaysjobs.yaml @@ -8104,9 +8104,11 @@ spec: properties: conditions: description: |- - Track the state of the Pathways workload, acceptable values are - - Running, Suspended, Completed, Failed. - Contains a human readable message to provide additional details to the // user. + Aggregate of the PathwaysJob workload, based on worker and + controller statuses. + One of - Pending, Running, Suspended, Completed, Failed. + Contains a human readable message to provide additional details to the + user. Conditions are mentioned below in more detail. items: description: "Condition contains details for one aspect of the current state of this API Resource.\n---\nThis struct is intended for @@ -8175,6 +8177,43 @@ spec: - type type: object type: array + controllerStatus: + description: |- + Tracks the of the Pathways controller - + 1. derived from "leader" replicatedJob in colocated mode + (leader job contains "rm", "proxy" and "user" as containers) + 2. derived from "rm" and "proxy" replicatedJobs in + default + headless mode. + 3. derived from "rm", "proxy" and "user-job" replicatedJobs in + default + container mode. + properties: + currentStatus: + description: |2- + of the Pathways Component ~~ (Worker or Controller + replicatedJobs) + Pending - one of more jobs ready but not active. + Running - all jobs active. + Suspended - all jobs suspended. + Completed - all jobs completed successfully. + Failed - one or more jobs failed. + type: string + type: object + workerStatus: + description: |- + Track the of the Pathways TPU workers - + derived from Worker replicatedJob + properties: + currentStatus: + description: |2- + of the Pathways Component ~~ (Worker or Controller + replicatedJobs) + Pending - one of more jobs ready but not active. + Running - all jobs active. + Suspended - all jobs suspended. + Completed - all jobs completed successfully. + Failed - one or more jobs failed. + type: string + type: object type: object type: object served: true diff --git a/config/samples/pathways-job_v1_pathwaysjob.yaml b/config/samples/pathways-job_v1_pathwaysjob.yaml index c1770052..60762e51 100644 --- a/config/samples/pathways-job_v1_pathwaysjob.yaml +++ b/config/samples/pathways-job_v1_pathwaysjob.yaml @@ -15,7 +15,7 @@ apiVersion: pathways-job.pathways.domain/v1 kind: PathwaysJob metadata: - name: pathways-trial2 + name: pathways-trial1 spec: maxRestarts: 0 workers: @@ -25,46 +25,74 @@ spec: pathwaysDir: "gs://cloud-pathways-staging/tmp" controller: deploymentMode: "colocate" - template: # UserPodTemplate + template: spec: containers: - - name: jetstream - image: us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/maxtext_jax_stable:latest - imagePullPolicy: Always - ports: - - containerPort: 9000 + - name: user env: - name: XCLOUD_ENVIRONMENT value: GCP - name: JAX_PLATFORMS value: proxy - name: JAX_BACKEND_TARGET - value: grpc://pathways-1-leader-0-0.pathways-1:38681 - command: - - bash - - -c - - 'echo Start: $(date); _sigterm() ( kill -SIGTERM $! 2>/dev/null;); trap - _sigterm SIGTERM; (JAX_TRACEBACK_FILTERING=off python3 MaxText/maxengine_server.py - MaxText/configs/inference_jetstream.yml tokenizer_path=assets/tokenizer.llama2 - load_parameters_path=gs://runner-maxtext-logs/2024-05-07-23-34/unscanned_chkpt/checkpoints/0/items - max_prefill_predict_length=1024 max_target_length=2048 async_checkpointing=false - model_name=''llama2-70b'' steps=1 ici_fsdp_parallelism=1 ici_autoregressive_parallelism=-1 - ici_tensor_parallelism=1 scan_layers=false weight_dtype=bfloat16 per_device_batch_size=2) - & PID=$!; while kill -0 $PID 2>/dev/null; do sleep 5; done; wait $PID; - EXIT_CODE=$? echo EXIT_CODE=$EXIT_CODE; echo End sleep: $(date); sleep - 50;' - - name: tester + value: grpc://pathways-trial1-leader-0-0.pathways-trial1:38681 image: us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/maxtext_jax_stable:latest imagePullPolicy: Always command: - - bash - - -c - - 'echo Start: $(date); _sigterm() ( kill -SIGTERM $! 2>/dev/null;); trap - _sigterm SIGTERM; for i in {1..2}; do echo Sending request $i; time python3 - JetStream/jetstream/tools/requester.py --tokenizer assets/tokenizer.llama2 - --max_tokens=16 --server=0.0.0.0 --text="why earth is round"; EXIT_CODE=$?; - echo Completed request; echo EXIT_CODE=$EXIT_CODE; if [[ $EXIT_CODE -ne - 0 ]]; then break; fi; done; echo Last EXIT_CODE=$EXIT_CODE; echo End sleep: - $(date); sleep 50;' - securityContext: - privileged: true + - bash + - -c + - | + (python3 MaxText/train.py MaxText/configs/base.yml base_output_directory=gs://cloud-pathways-staging dataset_path=gs://maxtext-dataset/ per_device_batch_size=1 enable_checkpointing=false remat_policy=full global_parameter_scale=4 steps=10 max_target_length=2048 use_iota_embed=true reuse_example_batch=1 dataset_type=synthetic attention=flash gcs_metrics=True run_name=roshanin-regular); + volumeMounts: + - mountPath: /tmp + name: shared-tmp + # resources: + # limits: + # cpu: "20" + # memory: 90G + + # template: # UserPodTemplate + # spec: + # containers: + # - name: jetstream + # image: us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/maxtext_jax_stable:latest + # imagePullPolicy: Always + # ports: + # - containerPort: 9000 + # env: + # - name: XCLOUD_ENVIRONMENT + # value: GCP + # - name: JAX_PLATFORMS + # value: proxy + # - name: JAX_BACKEND_TARGET + # value: grpc://pathways-trial1-leader-0-0.pathways-trial1:38681 + # command: + # - bash + # - -c + # - 'echo Start: $(date); _sigterm() ( kill -SIGTERM $! 2>/dev/null;); trap + # _sigterm SIGTERM; (JAX_TRACEBACK_FILTERING=off python3 MaxText/maxengine_server.py + # MaxText/configs/inference_jetstream.yml tokenizer_path=assets/tokenizer.llama2 + # load_parameters_path=gs://runner-maxtext-logs/2024-05-07-23-34/unscanned_chkpt/checkpoints/0/items + # max_prefill_predict_length=1024 max_target_length=2048 async_checkpointing=false + # model_name=''llama2-70b'' steps=1 ici_fsdp_parallelism=1 ici_autoregressive_parallelism=-1 + # ici_tensor_parallelism=1 scan_layers=false weight_dtype=bfloat16 per_device_batch_size=2) + # & PID=$!; while kill -0 $PID 2>/dev/null; do sleep 5; done; wait $PID; + # EXIT_CODE=$? echo EXIT_CODE=$EXIT_CODE; echo End sleep: $(date); sleep + # 50;' + # - name: tester + # image: us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/maxtext_jax_stable:latest + # imagePullPolicy: Always + # command: + # - bash + # - -c + # - 'echo Start: $(date); _sigterm() ( kill -SIGTERM $! 2>/dev/null;); trap + # _sigterm SIGTERM; for i in {1..2}; do echo Sending request $i; time python3 + # JetStream/jetstream/tools/requester.py --tokenizer assets/tokenizer.llama2 + # --max_tokens=16 --server=0.0.0.0 --text="why earth is round"; EXIT_CODE=$?; + # echo Completed request; echo EXIT_CODE=$EXIT_CODE; if [[ $EXIT_CODE -ne + # 0 ]]; then break; fi; done; echo Last EXIT_CODE=$EXIT_CODE; echo End sleep: + # $(date); sleep 50;' + # securityContext: + # privileged: true + + diff --git a/internal/controller/pathwaysjob_controller.go b/internal/controller/pathwaysjob_controller.go index 086e7ea1..d3ebab0a 100644 --- a/internal/controller/pathwaysjob_controller.go +++ b/internal/controller/pathwaysjob_controller.go @@ -85,25 +85,25 @@ func (r *PathwaysJobReconciler) Reconcile(ctx context.Context, req ctrl.Request) childJobSet, err := r.getChildJobSet(ctx, pw, jobSetClient) if err != nil { - log.Info("PathwaysJob: can't find JobSet \n") + log.Info("PathwaysJob: can't find JobSet") // return ctrl.Result{}, err } else if childJobSet != nil { - // Not reaching this part of the code now, but that is good? - log.Info("PathwaysJob: JobSet exists, not creating \n\n\n") - for _, c := range childJobSet.Status.Conditions { - log.Info("PathwaysJob: Condition is ", "Type", c.Type) - } + log.Info("PathwaysJob: JobSet exists, not creating") + // 2.2 Find out JobSet's status + r.findJobSetStatus(ctx, childJobSet) return ctrl.Result{}, nil } // 3. Update the cluster - create update and delete other resources log.Info("PathwaysJob: creating JobSet \n") if err := r.createJobSet(ctx, pw, jobSetClient); err != nil { - log.Error(err, "PathwaysJob: failed to create JobSet \n") + log.Error(err, "PathwaysJob: failed to create JobSet") return ctrl.Result{}, err } //4. Update the object's status using Conditions (?) + // childJobSet, _ = r.getChildJobSet(ctx, pw, jobSetClient) + // r.findJobSetStatus(ctx, childJobSet) //5. Return a result log.Info("PathwaysJob: DONE DONE DONE!") @@ -158,10 +158,12 @@ func (r *PathwaysJobReconciler) createJobSet(ctx context.Context, pw *pathwaysjo log2.Info("PathwaysJob: in createJobSet", "Name ", pw.GetName(), "Namespace ", pw.GetNamespace()) + // truth := true + var jobs []jobsetv1alpha2.ReplicatedJob var rmJobName string - // // Pathways Spec + JobSet for batch inference ------ + // // Pathways Spec + JobSet for training or batch inference ------ if pw.Spec.Controller.DeploymentMode == pathwaysjob.Colocate { rmJobName = "leader" jobs, _ = MakeLeaderJobForColocatedDeployment(ctx, pw, rmJobName) @@ -179,9 +181,11 @@ func (r *PathwaysJobReconciler) createJobSet(ctx context.Context, pw *pathwaysjo }, Spec: jobsetv1alpha2.JobSetSpec{ FailurePolicy: &jobsetv1alpha2.FailurePolicy{ - MaxRestarts: 4, + MaxRestarts: pw.Spec.MaxRestarts, }, + SuccessPolicy: &jobsetv1alpha2.SuccessPolicy{Operator: jobsetv1alpha2.OperatorAny, TargetReplicatedJobs: []string{rmJobName}}, // change this when needed ReplicatedJobs: append(jobs, workerJob), + // Suspend: &truth, }, } @@ -205,16 +209,47 @@ func (r *PathwaysJobReconciler) createJobSet(ctx context.Context, pw *pathwaysjo // SetupWithManager sets up the controller with the Manager. func (r *PathwaysJobReconciler) SetupWithManager(mgr ctrl.Manager) error { - // if err := jobsetv1alpha2.AddToScheme(mgr.GetScheme()); err != nil { - // return err - // } + if err := jobsetv1alpha2.AddToScheme(mgr.GetScheme()); err != nil { + return err + } return ctrl.NewControllerManagedBy(mgr). For(&pathwaysjob.PathwaysJob{}). - // Owns(&jobsetv1alpha2.JobSet{}). // For JobSet + Owns(&jobsetv1alpha2.JobSet{}). // For JobSet Complete(r) } +func (r *PathwaysJobReconciler) findJobSetStatus(ctx context.Context, js *jobsetv1alpha2.JobSet) { + // (bool, jobsetv1alpha2.JobSetConditionType) + log := ctrl.LoggerFrom(ctx) + log.Info("PathwaysJob findJobSetStatus", "Jobset name", js.ObjectMeta.Name) + + for _, c := range js.Status.Conditions { + log.Info("\n\n PathwaysJob CONDITION", "CONDITION", c.Type, "Status", c.Status, "Message", c.Message) + if (c.Type == string(jobsetv1alpha2.JobSetSuspended) || c.Type == string(jobsetv1alpha2.JobSetCompleted) || c.Type == string(jobsetv1alpha2.JobSetFailed)) && c.Status == metav1.ConditionTrue { + log.Info("\n\n PathwaysJob: JobSet in TERMINAL STATE", "Condition ", c.Type) + } + if (c.Type == string(jobsetv1alpha2.JobSetStartupPolicyCompleted) || + c.Type == string(jobsetv1alpha2.JobSetStartupPolicyInProgress)) && c.Status == metav1.ConditionTrue { + log.Info("\n\n PathwaysJob: JobSet in TERMINAL STATE", "Condition ", c.Type) + } + } + + // for _, condition := range js.Status.Conditions { + // log.Info("PathwaysJob findJobSetStatus Jobset ", "condition ", condition.Type) + // if condition.Type == string(jobsetv1alpha2.JobSetStartupPolicyCompleted) || + // condition.Type == string(jobsetv1alpha2.JobSetStartupPolicyInProgress) { + // // && c.Status == corev1.ConditionTrue + // // return true, condition.Type + // } + // } + // return false, "" + for _, status := range js.Status.ReplicatedJobsStatus { + log.Info("PathwaysJob RJ status ", "Name ", status.Name, "Ready ", status.Ready, "Succeeded ", status.Succeeded, "Failed ", status.Failed, "Active ", status.Active, "Suspended ", status.Suspended) + } + +} + // ---------------------- PATHWAYS HELPERS -------------------------- func MakeResourceManagerContainer(pw *pathwaysjob.PathwaysJob, rmJobName string) (*corev1.Container, error) { From c2ea1358e60e67fe4ff36ef8a08146376299ae35 Mon Sep 17 00:00:00 2001 From: Roshani Narasimhan Date: Wed, 30 Oct 2024 23:47:04 +0000 Subject: [PATCH 22/32] Redefined PathwaysJobStatus. --- api/v1/pathwaysjob_types.go | 57 +++++++++++----- api/v1/zz_generated.deepcopy.go | 68 ++++++++++++++++--- ...ways-job.pathways.domain_pathwaysjobs.yaml | 47 ++++++------- internal/controller/pathwaysjob_controller.go | 51 +++++++++++++- 4 files changed, 171 insertions(+), 52 deletions(-) diff --git a/api/v1/pathwaysjob_types.go b/api/v1/pathwaysjob_types.go index f9f1a62f..814ee5a7 100644 --- a/api/v1/pathwaysjob_types.go +++ b/api/v1/pathwaysjob_types.go @@ -87,13 +87,12 @@ type PathwaysJobStatus struct { // controller statuses. // One of - Pending, Running, Suspended, Completed, Failed. // Contains a human readable message to provide additional details to the - // user. Conditions are mentioned below in more detail. + // user. Conditions are mentioned in PathwaysConditionType. // +optional Conditions []metav1.Condition `json:"conditions,omitempty"` // Track the of the Pathways TPU workers - - // derived from Worker replicatedJob - WorkerStatus *PathwaysComponentStatus `json:"workerStatus,omitempty"` + WorkersStatus *WorkersStatus `json:"workersStatus,omitempty"` // Tracks the of the Pathways controller - // 1. derived from "leader" replicatedJob in colocated mode @@ -146,15 +145,16 @@ type ControllerSpec struct { // +optional UserPodTemplate *corev1.PodTemplateSpec `json:"template,omitempty" protobuf:"bytes,6,opt,name=template"` } + type PathwaysConditionType string // These are built-in conditions for PathwaysJob. const ( - // PathwaysJobPending means the underlying JobSet may be deployed, but - // pods are yet to be scheduled on nodes. + // PathwaysJobPending means the PathwaysJob is constructed and/or may be + // deployed, but pods are yet to be scheduled on nodes. PathwaysJobPending PathwaysConditionType = "Pending" - // PathwaysJobRunning means the underlying JobSet has been scheduled and - // is in progress. + // PathwaysJobRunning means PathwaysJob has been scheduled and + // Pathways servers have started running. PathwaysJobRunning PathwaysConditionType = "Running" // PathwaysJobCompleted means the underlying JobSet has completed its // execution. @@ -166,17 +166,42 @@ const ( PathwaysJobSuspended PathwaysConditionType = "Suspended" ) -type PathwaysComponentStatus struct { - // of the Pathways Component ~~ (Worker or Controller - // replicatedJobs) - // Pending - one of more jobs ready but not active. - // Running - all jobs active. - // Suspended - all jobs suspended. - // Completed - all jobs completed successfully. - // Failed - one or more jobs failed. - CurrentStatus string `json:"currentStatus,omitempty"` +type ControllerStatus struct { + // Status of the Pathways Controller + CurrentStatus *PathwaysComponentStatus `json:"currentStatus,omitempty"` +} + +type WorkersStatus struct { + // Status aggregated over all TPU slices. + // One of - Pending, Running, Suspended, Completed, Failed. + AggregateWorkersStatus *PathwaysComponentStatus `json:"aggregateWorkersStatus,omitempty"` + // Status details on each TPU worker slice + WorkersSliceStatus []WorkerSliceStatus `json:"workersSliceStatus,omitempty"` +} + +type WorkerSliceStatus struct { + // Individual TPU slice's status. + SliceStatus *PathwaysComponentStatus `json:"sliceStatus,omitempty"` + // Number of workers in the slice that are ready. + Ready int32 `json:"ready,omitempty"` } +type PathwaysComponentStatus string + +// Pending - one of more jobs ready but not active. +// Running - all jobs active. +// Suspended - all jobs suspended. +// Completed - all jobs completed successfully. +// Failed - one or more jobs failed. +const ( + PathwaysComponentStatusPending PathwaysComponentStatus = "Pending" + // Running will be based on a readiness probe + PathwaysComponentStatusRunning PathwaysComponentStatus = "Running" + PathwaysComponentStatusCompleted PathwaysComponentStatus = "Completed" + PathwaysComponentStatusFailed PathwaysComponentStatus = "Failed" + PathwaysComponentStatusSuspended PathwaysComponentStatus = "Suspended" +) + func init() { SchemeBuilder.Register(&PathwaysJob{}, &PathwaysJobList{}) } diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go index 8ea8aa1d..cbe5041a 100644 --- a/api/v1/zz_generated.deepcopy.go +++ b/api/v1/zz_generated.deepcopy.go @@ -47,16 +47,21 @@ func (in *ControllerSpec) DeepCopy() *ControllerSpec { } // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *PathwaysComponentStatus) DeepCopyInto(out *PathwaysComponentStatus) { +func (in *ControllerStatus) DeepCopyInto(out *ControllerStatus) { *out = *in + if in.CurrentStatus != nil { + in, out := &in.CurrentStatus, &out.CurrentStatus + *out = new(PathwaysComponentStatus) + **out = **in + } } -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PathwaysComponentStatus. -func (in *PathwaysComponentStatus) DeepCopy() *PathwaysComponentStatus { +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ControllerStatus. +func (in *ControllerStatus) DeepCopy() *ControllerStatus { if in == nil { return nil } - out := new(PathwaysComponentStatus) + out := new(ControllerStatus) in.DeepCopyInto(out) return out } @@ -155,10 +160,10 @@ func (in *PathwaysJobStatus) DeepCopyInto(out *PathwaysJobStatus) { (*in)[i].DeepCopyInto(&(*out)[i]) } } - if in.WorkerStatus != nil { - in, out := &in.WorkerStatus, &out.WorkerStatus - *out = new(PathwaysComponentStatus) - **out = **in + if in.WorkersStatus != nil { + in, out := &in.WorkersStatus, &out.WorkersStatus + *out = new(WorkersStatus) + (*in).DeepCopyInto(*out) } if in.ControllerStatus != nil { in, out := &in.ControllerStatus, &out.ControllerStatus @@ -177,6 +182,26 @@ func (in *PathwaysJobStatus) DeepCopy() *PathwaysJobStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *WorkerSliceStatus) DeepCopyInto(out *WorkerSliceStatus) { + *out = *in + if in.SliceStatus != nil { + in, out := &in.SliceStatus, &out.SliceStatus + *out = new(PathwaysComponentStatus) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new WorkerSliceStatus. +func (in *WorkerSliceStatus) DeepCopy() *WorkerSliceStatus { + if in == nil { + return nil + } + out := new(WorkerSliceStatus) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *WorkerSpec) DeepCopyInto(out *WorkerSpec) { *out = *in @@ -191,3 +216,30 @@ func (in *WorkerSpec) DeepCopy() *WorkerSpec { in.DeepCopyInto(out) return out } + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *WorkersStatus) DeepCopyInto(out *WorkersStatus) { + *out = *in + if in.AggregateWorkersStatus != nil { + in, out := &in.AggregateWorkersStatus, &out.AggregateWorkersStatus + *out = new(PathwaysComponentStatus) + **out = **in + } + if in.WorkersSliceStatus != nil { + in, out := &in.WorkersSliceStatus, &out.WorkersSliceStatus + *out = make([]WorkerSliceStatus, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new WorkersStatus. +func (in *WorkersStatus) DeepCopy() *WorkersStatus { + if in == nil { + return nil + } + out := new(WorkersStatus) + in.DeepCopyInto(out) + return out +} diff --git a/config/crd/bases/pathways-job.pathways.domain_pathwaysjobs.yaml b/config/crd/bases/pathways-job.pathways.domain_pathwaysjobs.yaml index b21e7112..4b83f297 100644 --- a/config/crd/bases/pathways-job.pathways.domain_pathwaysjobs.yaml +++ b/config/crd/bases/pathways-job.pathways.domain_pathwaysjobs.yaml @@ -8108,7 +8108,7 @@ spec: controller statuses. One of - Pending, Running, Suspended, Completed, Failed. Contains a human readable message to provide additional details to the - user. Conditions are mentioned below in more detail. + user. Conditions are mentioned in PathwaysConditionType. items: description: "Condition contains details for one aspect of the current state of this API Resource.\n---\nThis struct is intended for @@ -8186,33 +8186,28 @@ spec: default + headless mode. 3. derived from "rm", "proxy" and "user-job" replicatedJobs in default + container mode. + type: string + workersStatus: + description: Track the of the Pathways TPU workers - properties: - currentStatus: - description: |2- - of the Pathways Component ~~ (Worker or Controller - replicatedJobs) - Pending - one of more jobs ready but not active. - Running - all jobs active. - Suspended - all jobs suspended. - Completed - all jobs completed successfully. - Failed - one or more jobs failed. - type: string - type: object - workerStatus: - description: |- - Track the of the Pathways TPU workers - - derived from Worker replicatedJob - properties: - currentStatus: - description: |2- - of the Pathways Component ~~ (Worker or Controller - replicatedJobs) - Pending - one of more jobs ready but not active. - Running - all jobs active. - Suspended - all jobs suspended. - Completed - all jobs completed successfully. - Failed - one or more jobs failed. + aggregateWorkersStatus: + description: |- + Status aggregated over all TPU slices. + One of - Pending, Running, Suspended, Completed, Failed. type: string + workersSliceStatus: + description: Status details on each TPU worker slice + items: + properties: + ready: + description: Number of workers in the slice that are ready. + format: int32 + type: integer + sliceStatus: + description: Individual TPU slice's status. + type: string + type: object + type: array type: object type: object type: object diff --git a/internal/controller/pathwaysjob_controller.go b/internal/controller/pathwaysjob_controller.go index d3ebab0a..984d1d24 100644 --- a/internal/controller/pathwaysjob_controller.go +++ b/internal/controller/pathwaysjob_controller.go @@ -102,8 +102,8 @@ func (r *PathwaysJobReconciler) Reconcile(ctx context.Context, req ctrl.Request) } //4. Update the object's status using Conditions (?) - // childJobSet, _ = r.getChildJobSet(ctx, pw, jobSetClient) - // r.findJobSetStatus(ctx, childJobSet) + childJobSet, _ = r.getChildJobSet(ctx, pw, jobSetClient) + r.findJobSetStatus(ctx, childJobSet) //5. Return a result log.Info("PathwaysJob: DONE DONE DONE!") @@ -250,6 +250,53 @@ func (r *PathwaysJobReconciler) findJobSetStatus(ctx context.Context, js *jobset } +// func calculatePathwaysComponentStatus(ctx context.Context, rjs *jobsetv1alpha2.ReplicatedJobStatus, totalJobs int32) (string, error) { +// // From the replicated Job Status struct, determine if this component is in one of +// // Pending - one of more jobs ready but not active. +// // Running - all jobs active. +// // Suspended - one or more jobs suspended. +// // Completed - all jobs succeeded. +// // Failed - one or more jobs failed. +// var currentStatus string +// if rjs.Failed > 0 { +// currentStatus = "Failed" +// } else if rjs.Succeeded > 0 { +// currentStatus = "Suspended" +// } else if rjs.Ready > 0 { +// currentStatus = "Pending" +// } else if rjs.Active == totalJobs { +// currentStatus = "Running" +// } else if rjs.Succeeded == totalJobs { +// currentStatus = "Completed" +// } + +// return currentStatus, nil +// } + +// func updatePathwaysWorkerStatus(ctx context.Context, pw *pathwaysjob.PathwaysJob) error { +// // find worker replicated job, find parallelisms in worker replicated Job for the job count , +// // call calculatePathwaysComponentStatus +// // update status +// } + +// func updatePathwaysControllerStatus(ctx context.Context, pw *pathwaysjob.PathwaysJob) error { +// // for colocate mode - +// // find leader replicated job +// // call calculatePathwaysComponentStatus +// // update status + +// // for deafult + headless mode - +// // find rm and proxy replicated jobs +// // call calculatePathwaysComponentStatus +// // update status, combining both statuses + +// // for deafult + container mode - +// // find rm and proxy replicated jobs +// // call calculatePathwaysComponentStatus +// // update status, combining three statuses + +// } + // ---------------------- PATHWAYS HELPERS -------------------------- func MakeResourceManagerContainer(pw *pathwaysjob.PathwaysJob, rmJobName string) (*corev1.Container, error) { From d30202ba2caa315af939eb1117d2a374712afbf0 Mon Sep 17 00:00:00 2001 From: RoshaniN Date: Fri, 28 Feb 2025 01:14:52 +0000 Subject: [PATCH 23/32] Simplify and test. --- api/v1/pathwaysjob_types.go | 2 + ...ways-job.pathways.domain_pathwaysjobs.yaml | 1 + internal/controller/pathwaysjob_controller.go | 281 +++++++++--------- pkg/utils/extra_prototype.go | 26 ++ 4 files changed, 170 insertions(+), 140 deletions(-) diff --git a/api/v1/pathwaysjob_types.go b/api/v1/pathwaysjob_types.go index 814ee5a7..879f074a 100644 --- a/api/v1/pathwaysjob_types.go +++ b/api/v1/pathwaysjob_types.go @@ -171,6 +171,7 @@ type ControllerStatus struct { CurrentStatus *PathwaysComponentStatus `json:"currentStatus,omitempty"` } +// ReplicatedJob Status in JobSet type WorkersStatus struct { // Status aggregated over all TPU slices. // One of - Pending, Running, Suspended, Completed, Failed. @@ -179,6 +180,7 @@ type WorkersStatus struct { WorkersSliceStatus []WorkerSliceStatus `json:"workersSliceStatus,omitempty"` } +// Job Status in JobSet type WorkerSliceStatus struct { // Individual TPU slice's status. SliceStatus *PathwaysComponentStatus `json:"sliceStatus,omitempty"` diff --git a/config/crd/bases/pathways-job.pathways.domain_pathwaysjobs.yaml b/config/crd/bases/pathways-job.pathways.domain_pathwaysjobs.yaml index 4b83f297..8123b52c 100644 --- a/config/crd/bases/pathways-job.pathways.domain_pathwaysjobs.yaml +++ b/config/crd/bases/pathways-job.pathways.domain_pathwaysjobs.yaml @@ -8198,6 +8198,7 @@ spec: workersSliceStatus: description: Status details on each TPU worker slice items: + description: Job Status in JobSet properties: ready: description: Number of workers in the slice that are ready. diff --git a/internal/controller/pathwaysjob_controller.go b/internal/controller/pathwaysjob_controller.go index 984d1d24..da227a79 100644 --- a/internal/controller/pathwaysjob_controller.go +++ b/internal/controller/pathwaysjob_controller.go @@ -110,31 +110,6 @@ func (r *PathwaysJobReconciler) Reconcile(ctx context.Context, req ctrl.Request) return ctrl.Result{}, nil } -// function to listChildJobSets, based on https://github.com/kubernetes-sigs/jobset/blob/main/client-go/clientset/versioned/typed/jobset/v1alpha2/jobset.go#L44 - -// function to updatePathwaysJob Status ~~ updateJobSetStatus. Pathways status is same as JobSet Status. This function will mainly update Conditions and Message. -// similar to https://github.com/kubernetes-sigs/jobset/blob/main/pkg/controllers/jobset_controller.go#L248 -// JobSet conditions - https://github.com/kubernetes-sigs/jobset/blob/main/pkg/controllers/jobset_controller.go#L822 - -// function to suspendJobSet - -// function to resumeJobSet - -// function to deleteJobSet, based on https://github.com/kubernetes-sigs/jobset/blob/main/client-go/clientset/versioned/typed/jobset/v1alpha2/jobset.go#L41 - -// function isJobSetFinished reuse jobSetFinished - -// funtion pathwaysJobFinished (?) - -// function setCondition and updateCondition - -// function setPathwaysJobCompletedCondition - -// function setPathwaysJobFailedCondition - -// function setPathwaysJobSuspendedCondition - -// function setPathwaysJobResumedCondition func (r *PathwaysJobReconciler) getChildJobSet(ctx context.Context, pw *pathwaysjob.PathwaysJob, jobSetClient *jobsetclient.Clientset) (*jobsetv1alpha2.JobSet, error) { log3 := ctrl.LoggerFrom(ctx) // .WithValues("pathwaysjob", klog.KObj(pw)) @@ -244,58 +219,13 @@ func (r *PathwaysJobReconciler) findJobSetStatus(ctx context.Context, js *jobset // } // } // return false, "" - for _, status := range js.Status.ReplicatedJobsStatus { - log.Info("PathwaysJob RJ status ", "Name ", status.Name, "Ready ", status.Ready, "Succeeded ", status.Succeeded, "Failed ", status.Failed, "Active ", status.Active, "Suspended ", status.Suspended) - } - -} - -// func calculatePathwaysComponentStatus(ctx context.Context, rjs *jobsetv1alpha2.ReplicatedJobStatus, totalJobs int32) (string, error) { -// // From the replicated Job Status struct, determine if this component is in one of -// // Pending - one of more jobs ready but not active. -// // Running - all jobs active. -// // Suspended - one or more jobs suspended. -// // Completed - all jobs succeeded. -// // Failed - one or more jobs failed. -// var currentStatus string -// if rjs.Failed > 0 { -// currentStatus = "Failed" -// } else if rjs.Succeeded > 0 { -// currentStatus = "Suspended" -// } else if rjs.Ready > 0 { -// currentStatus = "Pending" -// } else if rjs.Active == totalJobs { -// currentStatus = "Running" -// } else if rjs.Succeeded == totalJobs { -// currentStatus = "Completed" -// } - -// return currentStatus, nil -// } - -// func updatePathwaysWorkerStatus(ctx context.Context, pw *pathwaysjob.PathwaysJob) error { -// // find worker replicated job, find parallelisms in worker replicated Job for the job count , -// // call calculatePathwaysComponentStatus -// // update status -// } - -// func updatePathwaysControllerStatus(ctx context.Context, pw *pathwaysjob.PathwaysJob) error { -// // for colocate mode - -// // find leader replicated job -// // call calculatePathwaysComponentStatus -// // update status + // for _, status := range js.Status.ReplicatedJobsStatus { + // log.Info("PathwaysJob RJ status ", "Name ", status.Name, "Ready ", status.Ready, "Succeeded ", status.Succeeded, "Failed ", status.Failed, "Active ", status.Active, "Suspended ", status.Suspended) + // } -// // for deafult + headless mode - -// // find rm and proxy replicated jobs -// // call calculatePathwaysComponentStatus -// // update status, combining both statuses + // updateWorkerStatus(ctx, js) -// // for deafult + container mode - -// // find rm and proxy replicated jobs -// // call calculatePathwaysComponentStatus -// // update status, combining three statuses - -// } +} // ---------------------- PATHWAYS HELPERS -------------------------- @@ -346,6 +276,73 @@ func MakeProxyContainer(pw *pathwaysjob.PathwaysJob, rmJobName string) (*corev1. return &proxyContainerSpec, nil } +// Constructs JobSet's replicated job for the Pathways worker +func MakeWorkerJob(ctx context.Context, pw *pathwaysjob.PathwaysJob, rmJobName string) (jobsetv1alpha2.ReplicatedJob, error) { + // Some predefined variables + truth := true + volumeSourceType := corev1.HostPathDirectoryOrCreate + + workerJob := jobsetv1alpha2.ReplicatedJob{ + Name: "worker", + Replicas: int32(pw.Spec.Workers[0].NumSlices), + Template: batchv1.JobTemplateSpec{ + Spec: batchv1.JobSpec{ + BackoffLimit: ptr.To(int32(0)), + Completions: ptr.To(int32(2)), // remember to update + Parallelism: ptr.To(int32(2)), // remember to update + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "pathways-worker", + Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/server:latest", + ImagePullPolicy: "Always", + SecurityContext: &corev1.SecurityContext{Privileged: &truth}, + Args: []string{ + "--server_port=38679", + fmt.Sprintf("--resource_manager_address=%s-%s-0-0.%s:38677", pw.GetName(), rmJobName, pw.GetName()), + fmt.Sprintf("--gcs_scratch_location=%s", pw.Spec.PathwaysDir), + }, + Env: []corev1.EnvVar{ + {Name: "TPU_MIN_LOG_LEVEL", Value: "0"}, + {Name: "TF_CPP_MIN_LOG_LEVEL", Value: "0"}, + {Name: "XCLOUD_ENVIRONMENT", Value: "GCP"}, + }, + Ports: []corev1.ContainerPort{{ContainerPort: 38679}, {ContainerPort: 38680}, {ContainerPort: 8471}, {ContainerPort: 8080}}, + VolumeMounts: []corev1.VolumeMount{ + { + Name: "shared-tmp", + MountPath: "/tmp", + }, + }, + Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{"google.com/tpu": *resource.NewQuantity(4, resource.DecimalSI)}}, + }, // end Pathways worker container + }, + NodeSelector: map[string]string{ + "cloud.google.com/gke-tpu-accelerator": pw.Spec.Workers[0].Type, + "cloud.google.com/gke-tpu-topology": pw.Spec.Workers[0].Topology, + }, + Volumes: []corev1.Volume{ + { + Name: "shared-tmp", + VolumeSource: corev1.VolumeSource{ + HostPath: &corev1.HostPathVolumeSource{ + Path: "/tmp", + Type: &volumeSourceType, + }, + }, + }, + }, // end Volumes + HostNetwork: true, // For performance == McJAX + DNSPolicy: corev1.DNSClusterFirstWithHostNet, // For performance == McJAX + }, + }, + }, + }, + } // end worker replicated job + return workerJob, nil +} + func MakePodAffinityRules(pw *pathwaysjob.PathwaysJob) (*corev1.Affinity, error) { affinity := corev1.Affinity{ PodAffinity: &corev1.PodAffinity{ @@ -575,69 +572,73 @@ func MakeJobsForDefaultDeployment(ctx context.Context, pw *pathwaysjob.PathwaysJ return []jobsetv1alpha2.ReplicatedJob{rmJob, proxyJob, userJob}, nil } -// Constructs JobSet's replicated job for the Pathways worker -func MakeWorkerJob(ctx context.Context, pw *pathwaysjob.PathwaysJob, rmJobName string) (jobsetv1alpha2.ReplicatedJob, error) { - // Some predefined variables - truth := true - volumeSourceType := corev1.HostPathDirectoryOrCreate +// ---------------------- PATHWAYS STATUS HELPERS -------------------------- - workerJob := jobsetv1alpha2.ReplicatedJob{ - Name: "worker", - Replicas: int32(pw.Spec.Workers[0].NumSlices), - Template: batchv1.JobTemplateSpec{ - Spec: batchv1.JobSpec{ - BackoffLimit: ptr.To(int32(0)), - Completions: ptr.To(int32(2)), // remember to update - Parallelism: ptr.To(int32(2)), // remember to update - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: "pathways-worker", - Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/server:latest", - ImagePullPolicy: "Always", - SecurityContext: &corev1.SecurityContext{Privileged: &truth}, - Args: []string{ - "--server_port=38679", - fmt.Sprintf("--resource_manager_address=%s-%s-0-0.%s:38677", pw.GetName(), rmJobName, pw.GetName()), - fmt.Sprintf("--gcs_scratch_location=%s", pw.Spec.PathwaysDir), - }, - Env: []corev1.EnvVar{ - {Name: "TPU_MIN_LOG_LEVEL", Value: "0"}, - {Name: "TF_CPP_MIN_LOG_LEVEL", Value: "0"}, - {Name: "XCLOUD_ENVIRONMENT", Value: "GCP"}, - }, - Ports: []corev1.ContainerPort{{ContainerPort: 38679}, {ContainerPort: 38680}, {ContainerPort: 8471}, {ContainerPort: 8080}}, - VolumeMounts: []corev1.VolumeMount{ - { - Name: "shared-tmp", - MountPath: "/tmp", - }, - }, - Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{"google.com/tpu": *resource.NewQuantity(4, resource.DecimalSI)}}, - }, // end Pathways worker container - }, - NodeSelector: map[string]string{ - "cloud.google.com/gke-tpu-accelerator": pw.Spec.Workers[0].Type, - "cloud.google.com/gke-tpu-topology": pw.Spec.Workers[0].Topology, - }, - Volumes: []corev1.Volume{ - { - Name: "shared-tmp", - VolumeSource: corev1.VolumeSource{ - HostPath: &corev1.HostPathVolumeSource{ - Path: "/tmp", - Type: &volumeSourceType, - }, - }, - }, - }, // end Volumes - HostNetwork: true, // For performance == McJAX - DNSPolicy: corev1.DNSClusterFirstWithHostNet, // For performance == McJAX - }, - }, - }, - }, - } // end worker replicated job - return workerJob, nil -} +// func calculatePathwaysComponentStatus(ctx context.Context, rjs *jobsetv1alpha2.ReplicatedJobStatus, totalJobs int32) (string, error) { +// // From the replicated Job Status struct, determine if this component is in one of +// // Pending - one of more jobs ready but not active. +// // Running - all jobs active. +// // Suspended - one or more jobs suspended. +// // Completed - all jobs succeeded. +// // Failed - one or more jobs failed. +// var currentStatus string +// if rjs.Failed > 0 { +// currentStatus = "Failed" +// } else if rjs.Succeeded > 0 { +// currentStatus = "Suspended" +// } else if rjs.Ready > 0 { +// currentStatus = "Pending" +// } else if rjs.Active == totalJobs { +// currentStatus = "Running" +// } else if rjs.Succeeded == totalJobs { +// currentStatus = "Completed" +// } + +// return currentStatus, nil +// } + +// func updatePathwaysControllerStatus(ctx context.Context, pw *pathwaysjob.PathwaysJob) error { +// // for colocate mode - +// // find leader replicated job +// // call calculatePathwaysComponentStatus +// // update status + +// // for deafult + headless mode - +// // find rm and proxy replicated jobs +// // call calculatePathwaysComponentStatus +// // update status, combining both statuses + +// // for deafult + container mode - +// // find rm and proxy replicated jobs +// // call calculatePathwaysComponentStatus +// // update status, combining three statuses + +// } + +// func updateWorkerStatus(ctx context.Context, js *jobsetv1alpha2.JobSet) error { +// // find worker replicated job, find parallelisms in worker replicated Job for the job count , +// // compare with ReplicatedJobStatus +// // call updateWorkerSliceStatus +// // update status +// log2 := ctrl.LoggerFrom(ctx) +// workerReplicatedJobStatus := findReplicatedJobStatusByName(js, "worker") +// log2.Info("PathwaysJob: in updateWorkerStatus", "Name ", workerReplicatedJobStatus.Name, "Ready ", workerReplicatedJobStatus.Ready, "Succeeded ", workerReplicatedJobStatus.Succeeded, "Failed ", workerReplicatedJobStatus.Failed, "Active ", workerReplicatedJobStatus.Active, "Suspended ", workerReplicatedJobStatus.Suspended) +// return nil +// } + +// func updateWorkerSliceStatus(ctx context.Context) error { +// // find worker job, find parallelisms in worker replicated Job for the job count , +// // call calculatePathwaysComponentStatus +// // update status +// // JobSetSpec -> ReplicatedJobs -> Template -> JobSpec, JobStatus + +// } + +// func findReplicatedJobStatusByName(js *jobsetv1alpha2.JobSet, replicatedJobName string) *jobsetv1alpha2.ReplicatedJobStatus { +// for _, rjob := range js.Status.ReplicatedJobsStatus { +// if rjob.Name == replicatedJobName { +// return &rjob +// } +// } +// return nil // Replicated job not found +// } diff --git a/pkg/utils/extra_prototype.go b/pkg/utils/extra_prototype.go index 2cc5ac29..89026032 100644 --- a/pkg/utils/extra_prototype.go +++ b/pkg/utils/extra_prototype.go @@ -186,3 +186,29 @@ package utils // } // } // } + +// function to listChildJobSets, based on https://github.com/kubernetes-sigs/jobset/blob/main/client-go/clientset/versioned/typed/jobset/v1alpha2/jobset.go#L44 + +// function to updatePathwaysJob Status ~~ updateJobSetStatus. Pathways status is same as JobSet Status. This function will mainly update Conditions and Message. +// similar to https://github.com/kubernetes-sigs/jobset/blob/main/pkg/controllers/jobset_controller.go#L248 +// JobSet conditions - https://github.com/kubernetes-sigs/jobset/blob/main/pkg/controllers/jobset_controller.go#L822 + +// function to suspendJobSet + +// function to resumeJobSet + +// function to deleteJobSet, based on https://github.com/kubernetes-sigs/jobset/blob/main/client-go/clientset/versioned/typed/jobset/v1alpha2/jobset.go#L41 + +// function isJobSetFinished reuse jobSetFinished + +// funtion pathwaysJobFinished (?) + +// function setCondition and updateCondition + +// function setPathwaysJobCompletedCondition + +// function setPathwaysJobFailedCondition + +// function setPathwaysJobSuspendedCondition + +// function setPathwaysJobResumedCondition From 14a85219faf7c6d95f2b9a3b3682c91c872afff2 Mon Sep 17 00:00:00 2001 From: RoshaniN Date: Tue, 4 Mar 2025 06:54:40 +0000 Subject: [PATCH 24/32] Update port numbers, topology etc. Evaluate colocate mode. --- .../samples/pathways-job_v1_pathwaysjob.yaml | 10 +++---- internal/controller/pathwaysjob_controller.go | 29 +++++++++++-------- 2 files changed, 22 insertions(+), 17 deletions(-) diff --git a/config/samples/pathways-job_v1_pathwaysjob.yaml b/config/samples/pathways-job_v1_pathwaysjob.yaml index 60762e51..ecc547f2 100644 --- a/config/samples/pathways-job_v1_pathwaysjob.yaml +++ b/config/samples/pathways-job_v1_pathwaysjob.yaml @@ -15,12 +15,12 @@ apiVersion: pathways-job.pathways.domain/v1 kind: PathwaysJob metadata: - name: pathways-trial1 + name: pathways-trial11 spec: - maxRestarts: 0 + maxRestarts: 4 workers: - type: tpu-v4-podslice - topology: 2x2x2 + topology: 2x2x1 numSlices: 1 pathwaysDir: "gs://cloud-pathways-staging/tmp" controller: @@ -35,7 +35,7 @@ spec: - name: JAX_PLATFORMS value: proxy - name: JAX_BACKEND_TARGET - value: grpc://pathways-trial1-leader-0-0.pathways-trial1:38681 + value: grpc://pathways-trial11-leader-0-0.pathways-trial11:29008 image: us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/maxtext_jax_stable:latest imagePullPolicy: Always command: @@ -65,7 +65,7 @@ spec: # - name: JAX_PLATFORMS # value: proxy # - name: JAX_BACKEND_TARGET - # value: grpc://pathways-trial1-leader-0-0.pathways-trial1:38681 + # value: grpc://pathways-trial1-leader-0-0.pathways-trial1:29000 # command: # - bash # - -c diff --git a/internal/controller/pathwaysjob_controller.go b/internal/controller/pathwaysjob_controller.go index da227a79..22dd888a 100644 --- a/internal/controller/pathwaysjob_controller.go +++ b/internal/controller/pathwaysjob_controller.go @@ -149,6 +149,8 @@ func (r *PathwaysJobReconciler) createJobSet(ctx context.Context, pw *pathwaysjo workerJob, _ := MakeWorkerJob(ctx, pw, rmJobName) + log2.Info("Length of jobs - ", "HERERERERERE", len(append(jobs, workerJob))) + mainJobSetConfig := jobsetv1alpha2.JobSet{ ObjectMeta: metav1.ObjectMeta{ Name: pw.GetName(), @@ -238,11 +240,11 @@ func MakeResourceManagerContainer(pw *pathwaysjob.PathwaysJob, rmJobName string) ImagePullPolicy: "Always", SecurityContext: &corev1.SecurityContext{Privileged: &truth}, Args: []string{ - "--server_port=38677", + "--server_port=29001", fmt.Sprintf("--gcs_scratch_location=%s", pw.Spec.PathwaysDir), "--node_type=resource_manager", fmt.Sprintf("--instance_count=%d", int32(pw.Spec.Workers[0].NumSlices)), - "--instance_type=tpuv4:2x2x2", // Change + "--instance_type=tpuv4:2x2x1", // Remember to change }, Env: []corev1.EnvVar{ {Name: "REPLICATED_JOB_NAME", ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{FieldPath: "metadata.annotations['jobset.sigs.k8s.io/replicatedjob-name']"}}}, @@ -250,7 +252,7 @@ func MakeResourceManagerContainer(pw *pathwaysjob.PathwaysJob, rmJobName string) {Name: "HOST_ADDRESS", Value: fmt.Sprintf("%s-%s-0-0.%s", pw.GetName(), rmJobName, pw.GetName())}, {Name: "TPU_SKIP_MDS_QUERY", Value: "true"}, }, - Ports: []corev1.ContainerPort{{ContainerPort: 38677}, {ContainerPort: 38678}}, + Ports: []corev1.ContainerPort{{ContainerPort: 29001}, {ContainerPort: 29002}}, // Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{"cpu": *resource.NewQuantity(4, resource.DecimalSI), "memory": *resource.NewQuantity(8000000000, resource.DecimalSI)}}, } return &rmContainerSpec, nil @@ -266,11 +268,11 @@ func MakeProxyContainer(pw *pathwaysjob.PathwaysJob, rmJobName string) (*corev1. ImagePullPolicy: "Always", SecurityContext: &corev1.SecurityContext{Privileged: &truth}, Args: []string{ - "--server_port=38681", - fmt.Sprintf("--resource_manager_address=%s-%s-0-0.%s:38677", pw.GetName(), rmJobName, pw.GetName()), + "--server_port=29008", + fmt.Sprintf("--resource_manager_address=%s-%s-0-0.%s:29001", pw.GetName(), rmJobName, pw.GetName()), fmt.Sprintf("--gcs_scratch_location=%s", pw.Spec.PathwaysDir), }, - Ports: []corev1.ContainerPort{{ContainerPort: 38681}, {ContainerPort: 38682}}, + Ports: []corev1.ContainerPort{{ContainerPort: 29008}}, // Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{"cpu": *resource.NewQuantity(4, resource.DecimalSI), "memory": *resource.NewQuantity(10000000000, resource.DecimalSI)}}, } return &proxyContainerSpec, nil @@ -282,14 +284,17 @@ func MakeWorkerJob(ctx context.Context, pw *pathwaysjob.PathwaysJob, rmJobName s truth := true volumeSourceType := corev1.HostPathDirectoryOrCreate + logx := ctrl.LoggerFrom(ctx) + logx.Info("************************* PathwaysJob MakeWorkerJob ", "Number of jobs", pw.Spec.Workers[0].NumSlices) + workerJob := jobsetv1alpha2.ReplicatedJob{ Name: "worker", Replicas: int32(pw.Spec.Workers[0].NumSlices), Template: batchv1.JobTemplateSpec{ Spec: batchv1.JobSpec{ - BackoffLimit: ptr.To(int32(0)), - Completions: ptr.To(int32(2)), // remember to update - Parallelism: ptr.To(int32(2)), // remember to update + BackoffLimit: ptr.To(int32(4)), + Completions: ptr.To(int32(1)), // number of workers remember to change + Parallelism: ptr.To(int32(1)), // number of workers remember to change Template: corev1.PodTemplateSpec{ Spec: corev1.PodSpec{ Containers: []corev1.Container{ @@ -299,8 +304,8 @@ func MakeWorkerJob(ctx context.Context, pw *pathwaysjob.PathwaysJob, rmJobName s ImagePullPolicy: "Always", SecurityContext: &corev1.SecurityContext{Privileged: &truth}, Args: []string{ - "--server_port=38679", - fmt.Sprintf("--resource_manager_address=%s-%s-0-0.%s:38677", pw.GetName(), rmJobName, pw.GetName()), + "--server_port=29005", + fmt.Sprintf("--resource_manager_address=%s-%s-0-0.%s:29001", pw.GetName(), rmJobName, pw.GetName()), fmt.Sprintf("--gcs_scratch_location=%s", pw.Spec.PathwaysDir), }, Env: []corev1.EnvVar{ @@ -308,7 +313,7 @@ func MakeWorkerJob(ctx context.Context, pw *pathwaysjob.PathwaysJob, rmJobName s {Name: "TF_CPP_MIN_LOG_LEVEL", Value: "0"}, {Name: "XCLOUD_ENVIRONMENT", Value: "GCP"}, }, - Ports: []corev1.ContainerPort{{ContainerPort: 38679}, {ContainerPort: 38680}, {ContainerPort: 8471}, {ContainerPort: 8080}}, + Ports: []corev1.ContainerPort{{ContainerPort: 29005}, {ContainerPort: 29006}, {ContainerPort: 8471}, {ContainerPort: 8080}}, VolumeMounts: []corev1.VolumeMount{ { Name: "shared-tmp", From 6b94be9bc8d68f2775e4b2c5981c09638558edb0 Mon Sep 17 00:00:00 2001 From: RoshaniN Date: Wed, 5 Mar 2025 20:28:41 +0000 Subject: [PATCH 25/32] Clean up for release. --- .../samples/pathways-job_v1_pathwaysjob.yaml | 56 +---- internal/controller/pathwaysjob_controller.go | 213 +++++------------- 2 files changed, 68 insertions(+), 201 deletions(-) diff --git a/config/samples/pathways-job_v1_pathwaysjob.yaml b/config/samples/pathways-job_v1_pathwaysjob.yaml index ecc547f2..203ce9c3 100644 --- a/config/samples/pathways-job_v1_pathwaysjob.yaml +++ b/config/samples/pathways-job_v1_pathwaysjob.yaml @@ -15,7 +15,7 @@ apiVersion: pathways-job.pathways.domain/v1 kind: PathwaysJob metadata: - name: pathways-trial11 + name: pathways-trial39 spec: maxRestarts: 4 workers: @@ -25,6 +25,7 @@ spec: pathwaysDir: "gs://cloud-pathways-staging/tmp" controller: deploymentMode: "colocate" + # deploymentMode: "default" template: spec: containers: @@ -35,64 +36,19 @@ spec: - name: JAX_PLATFORMS value: proxy - name: JAX_BACKEND_TARGET - value: grpc://pathways-trial11-leader-0-0.pathways-trial11:29008 + value: grpc://pathways-trial39-leader-0-0.pathways-trial39:29008 + # value: grpc://pathways-trial38-proxy-0-0.pathways-trial38:29008 image: us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/maxtext_jax_stable:latest imagePullPolicy: Always command: - bash - -c - | - (python3 MaxText/train.py MaxText/configs/base.yml base_output_directory=gs://cloud-pathways-staging dataset_path=gs://maxtext-dataset/ per_device_batch_size=1 enable_checkpointing=false remat_policy=full global_parameter_scale=4 steps=10 max_target_length=2048 use_iota_embed=true reuse_example_batch=1 dataset_type=synthetic attention=flash gcs_metrics=True run_name=roshanin-regular); + (python3 MaxText/train.py MaxText/configs/base.yml base_output_directory=gs://cloud-pathways-staging dataset_path=gs://maxtext-dataset/ steps=10 run_name=roshanin-pathways1 enable_single_controller=true attention=dot_product monitor_goodput=False enable_tensorboard=True enable_checkpointing=False); volumeMounts: - mountPath: /tmp name: shared-tmp # resources: # limits: # cpu: "20" - # memory: 90G - - # template: # UserPodTemplate - # spec: - # containers: - # - name: jetstream - # image: us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/maxtext_jax_stable:latest - # imagePullPolicy: Always - # ports: - # - containerPort: 9000 - # env: - # - name: XCLOUD_ENVIRONMENT - # value: GCP - # - name: JAX_PLATFORMS - # value: proxy - # - name: JAX_BACKEND_TARGET - # value: grpc://pathways-trial1-leader-0-0.pathways-trial1:29000 - # command: - # - bash - # - -c - # - 'echo Start: $(date); _sigterm() ( kill -SIGTERM $! 2>/dev/null;); trap - # _sigterm SIGTERM; (JAX_TRACEBACK_FILTERING=off python3 MaxText/maxengine_server.py - # MaxText/configs/inference_jetstream.yml tokenizer_path=assets/tokenizer.llama2 - # load_parameters_path=gs://runner-maxtext-logs/2024-05-07-23-34/unscanned_chkpt/checkpoints/0/items - # max_prefill_predict_length=1024 max_target_length=2048 async_checkpointing=false - # model_name=''llama2-70b'' steps=1 ici_fsdp_parallelism=1 ici_autoregressive_parallelism=-1 - # ici_tensor_parallelism=1 scan_layers=false weight_dtype=bfloat16 per_device_batch_size=2) - # & PID=$!; while kill -0 $PID 2>/dev/null; do sleep 5; done; wait $PID; - # EXIT_CODE=$? echo EXIT_CODE=$EXIT_CODE; echo End sleep: $(date); sleep - # 50;' - # - name: tester - # image: us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/maxtext_jax_stable:latest - # imagePullPolicy: Always - # command: - # - bash - # - -c - # - 'echo Start: $(date); _sigterm() ( kill -SIGTERM $! 2>/dev/null;); trap - # _sigterm SIGTERM; for i in {1..2}; do echo Sending request $i; time python3 - # JetStream/jetstream/tools/requester.py --tokenizer assets/tokenizer.llama2 - # --max_tokens=16 --server=0.0.0.0 --text="why earth is round"; EXIT_CODE=$?; - # echo Completed request; echo EXIT_CODE=$EXIT_CODE; if [[ $EXIT_CODE -ne - # 0 ]]; then break; fi; done; echo Last EXIT_CODE=$EXIT_CODE; echo End sleep: - # $(date); sleep 50;' - # securityContext: - # privileged: true - - + # memory: 90G \ No newline at end of file diff --git a/internal/controller/pathwaysjob_controller.go b/internal/controller/pathwaysjob_controller.go index 22dd888a..f396247c 100644 --- a/internal/controller/pathwaysjob_controller.go +++ b/internal/controller/pathwaysjob_controller.go @@ -120,7 +120,6 @@ func (r *PathwaysJobReconciler) getChildJobSet(ctx context.Context, pw *pathways var js *jobsetv1alpha2.JobSet js, err := jobSetClient.JobsetV1alpha2().JobSets(pw.GetObjectMeta().GetNamespace()).Get(ctx, pw.GetName(), metav1.GetOptions{}) if err != nil { - // log3.Info("PathwaysJob: can't get JobSets: ", "error ", err) return nil, err } return js, nil @@ -133,12 +132,10 @@ func (r *PathwaysJobReconciler) createJobSet(ctx context.Context, pw *pathwaysjo log2.Info("PathwaysJob: in createJobSet", "Name ", pw.GetName(), "Namespace ", pw.GetNamespace()) - // truth := true - var jobs []jobsetv1alpha2.ReplicatedJob var rmJobName string - // // Pathways Spec + JobSet for training or batch inference ------ + // Pathways Spec + JobSet for training or batch inference ------ if pw.Spec.Controller.DeploymentMode == pathwaysjob.Colocate { rmJobName = "leader" jobs, _ = MakeLeaderJobForColocatedDeployment(ctx, pw, rmJobName) @@ -149,8 +146,6 @@ func (r *PathwaysJobReconciler) createJobSet(ctx context.Context, pw *pathwaysjo workerJob, _ := MakeWorkerJob(ctx, pw, rmJobName) - log2.Info("Length of jobs - ", "HERERERERERE", len(append(jobs, workerJob))) - mainJobSetConfig := jobsetv1alpha2.JobSet{ ObjectMeta: metav1.ObjectMeta{ Name: pw.GetName(), @@ -160,9 +155,8 @@ func (r *PathwaysJobReconciler) createJobSet(ctx context.Context, pw *pathwaysjo FailurePolicy: &jobsetv1alpha2.FailurePolicy{ MaxRestarts: pw.Spec.MaxRestarts, }, - SuccessPolicy: &jobsetv1alpha2.SuccessPolicy{Operator: jobsetv1alpha2.OperatorAny, TargetReplicatedJobs: []string{rmJobName}}, // change this when needed + SuccessPolicy: &jobsetv1alpha2.SuccessPolicy{Operator: jobsetv1alpha2.OperatorAll, TargetReplicatedJobs: []string{rmJobName}}, // ToDo: change to user job ReplicatedJobs: append(jobs, workerJob), - // Suspend: &truth, }, } @@ -197,7 +191,6 @@ func (r *PathwaysJobReconciler) SetupWithManager(mgr ctrl.Manager) error { } func (r *PathwaysJobReconciler) findJobSetStatus(ctx context.Context, js *jobsetv1alpha2.JobSet) { - // (bool, jobsetv1alpha2.JobSetConditionType) log := ctrl.LoggerFrom(ctx) log.Info("PathwaysJob findJobSetStatus", "Jobset name", js.ObjectMeta.Name) @@ -211,32 +204,17 @@ func (r *PathwaysJobReconciler) findJobSetStatus(ctx context.Context, js *jobset log.Info("\n\n PathwaysJob: JobSet in TERMINAL STATE", "Condition ", c.Type) } } - - // for _, condition := range js.Status.Conditions { - // log.Info("PathwaysJob findJobSetStatus Jobset ", "condition ", condition.Type) - // if condition.Type == string(jobsetv1alpha2.JobSetStartupPolicyCompleted) || - // condition.Type == string(jobsetv1alpha2.JobSetStartupPolicyInProgress) { - // // && c.Status == corev1.ConditionTrue - // // return true, condition.Type - // } - // } - // return false, "" - // for _, status := range js.Status.ReplicatedJobsStatus { - // log.Info("PathwaysJob RJ status ", "Name ", status.Name, "Ready ", status.Ready, "Succeeded ", status.Succeeded, "Failed ", status.Failed, "Active ", status.Active, "Suspended ", status.Suspended) - // } - - // updateWorkerStatus(ctx, js) - } // ---------------------- PATHWAYS HELPERS -------------------------- +// Constructs the Pathways resource manager container spec for the underlying JobSet func MakeResourceManagerContainer(pw *pathwaysjob.PathwaysJob, rmJobName string) (*corev1.Container, error) { truth := true rmContainerSpec := corev1.Container{ Name: "pathways-rm", - Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/server:latest", + Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/sanitized_server:latest", ImagePullPolicy: "Always", SecurityContext: &corev1.SecurityContext{Privileged: &truth}, Args: []string{ @@ -244,7 +222,7 @@ func MakeResourceManagerContainer(pw *pathwaysjob.PathwaysJob, rmJobName string) fmt.Sprintf("--gcs_scratch_location=%s", pw.Spec.PathwaysDir), "--node_type=resource_manager", fmt.Sprintf("--instance_count=%d", int32(pw.Spec.Workers[0].NumSlices)), - "--instance_type=tpuv4:2x2x1", // Remember to change + "--instance_type=tpuv4:2x2x1", // ToDo: update with Spec map }, Env: []corev1.EnvVar{ {Name: "REPLICATED_JOB_NAME", ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{FieldPath: "metadata.annotations['jobset.sigs.k8s.io/replicatedjob-name']"}}}, @@ -258,13 +236,13 @@ func MakeResourceManagerContainer(pw *pathwaysjob.PathwaysJob, rmJobName string) return &rmContainerSpec, nil } +// Constructs the Pathways proxy container spec for the underlying JobSet func MakeProxyContainer(pw *pathwaysjob.PathwaysJob, rmJobName string) (*corev1.Container, error) { - // Some predefined variables truth := true proxyContainerSpec := corev1.Container{ Name: "pathways-proxy", - Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/proxy_server:latest", + Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/sanitized_proxy_server:latest", ImagePullPolicy: "Always", SecurityContext: &corev1.SecurityContext{Privileged: &truth}, Args: []string{ @@ -272,21 +250,18 @@ func MakeProxyContainer(pw *pathwaysjob.PathwaysJob, rmJobName string) (*corev1. fmt.Sprintf("--resource_manager_address=%s-%s-0-0.%s:29001", pw.GetName(), rmJobName, pw.GetName()), fmt.Sprintf("--gcs_scratch_location=%s", pw.Spec.PathwaysDir), }, + // "--xla_tpu_spmd_rng_bit_generator_unsafe=True", Ports: []corev1.ContainerPort{{ContainerPort: 29008}}, // Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{"cpu": *resource.NewQuantity(4, resource.DecimalSI), "memory": *resource.NewQuantity(10000000000, resource.DecimalSI)}}, } return &proxyContainerSpec, nil } -// Constructs JobSet's replicated job for the Pathways worker +// Constructs Pathways worker replicated job for the underlying JobSet func MakeWorkerJob(ctx context.Context, pw *pathwaysjob.PathwaysJob, rmJobName string) (jobsetv1alpha2.ReplicatedJob, error) { - // Some predefined variables truth := true volumeSourceType := corev1.HostPathDirectoryOrCreate - logx := ctrl.LoggerFrom(ctx) - logx.Info("************************* PathwaysJob MakeWorkerJob ", "Number of jobs", pw.Spec.Workers[0].NumSlices) - workerJob := jobsetv1alpha2.ReplicatedJob{ Name: "worker", Replicas: int32(pw.Spec.Workers[0].NumSlices), @@ -300,7 +275,7 @@ func MakeWorkerJob(ctx context.Context, pw *pathwaysjob.PathwaysJob, rmJobName s Containers: []corev1.Container{ { Name: "pathways-worker", - Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/server:latest", + Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/sanitized_server:latest", ImagePullPolicy: "Always", SecurityContext: &corev1.SecurityContext{Privileged: &truth}, Args: []string{ @@ -348,6 +323,7 @@ func MakeWorkerJob(ctx context.Context, pw *pathwaysjob.PathwaysJob, rmJobName s return workerJob, nil } +// Affinity rules to allow the leader pod to coexist with worker pod in the 'colocate' mode. func MakePodAffinityRules(pw *pathwaysjob.PathwaysJob) (*corev1.Affinity, error) { affinity := corev1.Affinity{ PodAffinity: &corev1.PodAffinity{ @@ -390,13 +366,16 @@ func MakePodAffinityRules(pw *pathwaysjob.PathwaysJob) (*corev1.Affinity, error) return &affinity, nil } +// Get the containers (main workload and any sidecars) from the user's pod spec. +// This is used to inject the containers into the leader pod in the 'colocate' deployment mode. func GetUserContainerList(pw *pathwaysjob.PathwaysJob) ([]corev1.Container, error) { containerList := pw.Spec.Controller.UserPodTemplate.Spec.Containers return containerList, nil } +// Construct the "leader" replicated job containing the Pathways RM, Pathways Proxy and User job +// as containers within a pod for the 'colocate' deployment mode. func MakeLeaderJobForColocatedDeployment(ctx context.Context, pw *pathwaysjob.PathwaysJob, rmJobName string) ([]jobsetv1alpha2.ReplicatedJob, error) { - // Some predefined variables volumeSourceType := corev1.HostPathDirectoryOrCreate RMContainerSpec, _ := MakeResourceManagerContainer(pw, rmJobName) @@ -405,9 +384,6 @@ func MakeLeaderJobForColocatedDeployment(ctx context.Context, pw *pathwaysjob.Pa containerList, _ := GetUserContainerList(pw) containerList = append(containerList, *RMContainerSpec, *ProxyContainerSpec) - // log3 := ctrl.LoggerFrom(ctx).WithValues("pathwaysjob", klog.KObj(pw)) - // log3.Info("PathwaysJob:...MakeLeaderJob", "Length of container list is", len(containerList)) - leaderJob := jobsetv1alpha2.ReplicatedJob{ Name: rmJobName, Replicas: 1, @@ -452,15 +428,13 @@ func MakeLeaderJobForColocatedDeployment(ctx context.Context, pw *pathwaysjob.Pa return []jobsetv1alpha2.ReplicatedJob{leaderJob}, nil } +// Construct replicated jobs for Pathways RM, Pathways Proxy and the user job for the 'default' deployment mode. func MakeJobsForDefaultDeployment(ctx context.Context, pw *pathwaysjob.PathwaysJob, rmJobName string) ([]jobsetv1alpha2.ReplicatedJob, error) { - // Some predefined variables volumeSourceType := corev1.HostPathDirectoryOrCreate RMContainerSpec, _ := MakeResourceManagerContainer(pw, rmJobName) ProxyContainerSpec, _ := MakeProxyContainer(pw, rmJobName) - - // log3 := ctrl.LoggerFrom(ctx).WithValues("pathwaysjob", klog.KObj(pw)) - // log3.Info("PathwaysJob:...MakeLeaderJob", "Length of container list is", len(containerList)) + userContainerList, _ := GetUserContainerList(pw) rmJob := jobsetv1alpha2.ReplicatedJob{ Name: "rm", @@ -536,114 +510,51 @@ func MakeJobsForDefaultDeployment(ctx context.Context, pw *pathwaysjob.PathwaysJ }, } // end replicated Job - userJob := jobsetv1alpha2.ReplicatedJob{ - Name: "user-job", - Replicas: 1, - Template: batchv1.JobTemplateSpec{ - Spec: batchv1.JobSpec{ - BackoffLimit: ptr.To(int32(0)), - Completions: ptr.To(int32(1)), - Parallelism: ptr.To(int32(1)), - Template: corev1.PodTemplateSpec{ - Spec: pw.Spec.Controller.UserPodTemplate.Spec, - // corev1.PodSpec{ - // HostNetwork: true, // For performance == McJAX - // DNSPolicy: corev1.DNSClusterFirstWithHostNet, // For performance == McJAX - // Tolerations: []corev1.Toleration{ - // { - // Key: "google.com/tpu", - // Operator: "Exists", - // Effect: "NoSchedule", - // }, - // }, - // Volumes: []corev1.Volume{ - // { - // Name: "shared-tmp", - // VolumeSource: corev1.VolumeSource{ - // HostPath: &corev1.HostPathVolumeSource{ - // Path: "/tmp", - // Type: &volumeSourceType, - // }, - // }, - // }, - // }, // end Volumes - // Containers: []corev1.Container{*RMContainerSpec}, // end leader []containers - // }, // end PodSpec + // Adding user job conditionally for headless mode, if the user has provided containers in PodSpec. + // ToDo: Add other things in PodSpec + if len(userContainerList) > 0 { + userJob := jobsetv1alpha2.ReplicatedJob{ + Name: "user-job", + Replicas: 1, + Template: batchv1.JobTemplateSpec{ + Spec: batchv1.JobSpec{ + BackoffLimit: ptr.To(int32(0)), + Completions: ptr.To(int32(1)), + Parallelism: ptr.To(int32(1)), + // Template: corev1.PodTemplateSpec{ + // Spec: pw.Spec.Controller.UserPodTemplate.Spec, + // }, + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + HostNetwork: true, // For performance == McJAX + DNSPolicy: corev1.DNSClusterFirstWithHostNet, // For performance == McJAX + Tolerations: []corev1.Toleration{ // tolerations are important here to not run this job on TPUs + { + Key: "google.com/tpu", + Operator: "Exists", + Effect: "NoSchedule", + }, + }, + Volumes: []corev1.Volume{ + { + Name: "shared-tmp", + VolumeSource: corev1.VolumeSource{ + HostPath: &corev1.HostPathVolumeSource{ + Path: "/tmp", + Type: &volumeSourceType, + }, + }, + }, + }, // end Volumes + Containers: userContainerList, + }, // end PodSpec + }, }, }, - }, - } // end replicated Job - - return []jobsetv1alpha2.ReplicatedJob{rmJob, proxyJob, userJob}, nil + } // end replicated Job + // Default mode jobs, when user pod is provided + return []jobsetv1alpha2.ReplicatedJob{rmJob, proxyJob, userJob}, nil + } + // Default mode jobs, headless mode + return []jobsetv1alpha2.ReplicatedJob{rmJob, proxyJob}, nil } - -// ---------------------- PATHWAYS STATUS HELPERS -------------------------- - -// func calculatePathwaysComponentStatus(ctx context.Context, rjs *jobsetv1alpha2.ReplicatedJobStatus, totalJobs int32) (string, error) { -// // From the replicated Job Status struct, determine if this component is in one of -// // Pending - one of more jobs ready but not active. -// // Running - all jobs active. -// // Suspended - one or more jobs suspended. -// // Completed - all jobs succeeded. -// // Failed - one or more jobs failed. -// var currentStatus string -// if rjs.Failed > 0 { -// currentStatus = "Failed" -// } else if rjs.Succeeded > 0 { -// currentStatus = "Suspended" -// } else if rjs.Ready > 0 { -// currentStatus = "Pending" -// } else if rjs.Active == totalJobs { -// currentStatus = "Running" -// } else if rjs.Succeeded == totalJobs { -// currentStatus = "Completed" -// } - -// return currentStatus, nil -// } - -// func updatePathwaysControllerStatus(ctx context.Context, pw *pathwaysjob.PathwaysJob) error { -// // for colocate mode - -// // find leader replicated job -// // call calculatePathwaysComponentStatus -// // update status - -// // for deafult + headless mode - -// // find rm and proxy replicated jobs -// // call calculatePathwaysComponentStatus -// // update status, combining both statuses - -// // for deafult + container mode - -// // find rm and proxy replicated jobs -// // call calculatePathwaysComponentStatus -// // update status, combining three statuses - -// } - -// func updateWorkerStatus(ctx context.Context, js *jobsetv1alpha2.JobSet) error { -// // find worker replicated job, find parallelisms in worker replicated Job for the job count , -// // compare with ReplicatedJobStatus -// // call updateWorkerSliceStatus -// // update status -// log2 := ctrl.LoggerFrom(ctx) -// workerReplicatedJobStatus := findReplicatedJobStatusByName(js, "worker") -// log2.Info("PathwaysJob: in updateWorkerStatus", "Name ", workerReplicatedJobStatus.Name, "Ready ", workerReplicatedJobStatus.Ready, "Succeeded ", workerReplicatedJobStatus.Succeeded, "Failed ", workerReplicatedJobStatus.Failed, "Active ", workerReplicatedJobStatus.Active, "Suspended ", workerReplicatedJobStatus.Suspended) -// return nil -// } - -// func updateWorkerSliceStatus(ctx context.Context) error { -// // find worker job, find parallelisms in worker replicated Job for the job count , -// // call calculatePathwaysComponentStatus -// // update status -// // JobSetSpec -> ReplicatedJobs -> Template -> JobSpec, JobStatus - -// } - -// func findReplicatedJobStatusByName(js *jobsetv1alpha2.JobSet, replicatedJobName string) *jobsetv1alpha2.ReplicatedJobStatus { -// for _, rjob := range js.Status.ReplicatedJobsStatus { -// if rjob.Name == replicatedJobName { -// return &rjob -// } -// } -// return nil // Replicated job not found -// } From 30fad5c164407aa435d9a4b4349a7cc90fd90a69 Mon Sep 17 00:00:00 2001 From: RoshaniN Date: Fri, 7 Mar 2025 00:19:33 +0000 Subject: [PATCH 26/32] Update README, update licenses. --- .golangci.yml | 2 +- Dockerfile | 2 +- README.md | 17 +++++++++++++---- api/v1/groupversion_info.go | 2 +- api/v1/pathwaysjob_types.go | 2 +- api/v1/zz_generated.deepcopy.go | 2 +- cmd/main.go | 2 +- config/crd/kustomization.yaml | 2 +- config/crd/kustomizeconfig.yaml | 2 +- config/default/kustomization.yaml | 2 +- config/default/manager_metrics_patch.yaml | 2 +- config/default/metrics_service.yaml | 2 +- config/manager/kustomization.yaml | 2 +- config/manager/manager.yaml | 2 +- config/prometheus/kustomization.yaml | 2 +- config/prometheus/monitor.yaml | 2 +- config/rbac/kustomization.yaml | 2 +- config/rbac/leader_election_role.yaml | 2 +- config/rbac/leader_election_role_binding.yaml | 2 +- config/rbac/metrics_auth_role.yaml | 2 +- config/rbac/metrics_auth_role_binding.yaml | 2 +- config/rbac/metrics_reader_role.yaml | 2 +- config/rbac/pathwaysjob_editor_role.yaml | 2 +- config/rbac/pathwaysjob_viewer_role.yaml | 2 +- config/rbac/role.yaml | 2 +- config/rbac/role_binding.yaml | 2 +- config/rbac/service_account.yaml | 2 +- config/samples/jobset_example.yaml | 2 +- config/samples/kustomization.yaml | 2 +- config/samples/pathways-job_v1_pathwaysjob.yaml | 2 +- hack/boilerplate.go.txt | 2 +- internal/controller/pathwaysjob_controller.go | 2 +- .../controller/pathwaysjob_controller_test.go | 2 +- internal/controller/suite_test.go | 2 +- pkg/utils/extra_prototype.go | 2 +- test/e2e/e2e_suite_test.go | 2 +- test/e2e/e2e_test.go | 2 +- test/utils/utils.go | 2 +- 38 files changed, 50 insertions(+), 41 deletions(-) diff --git a/.golangci.yml b/.golangci.yml index 1a174953..b33bfba4 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -1,4 +1,4 @@ -# Copyright 2024 Google LLC +# Copyright 2025 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/Dockerfile b/Dockerfile index b66a974e..59f7e13e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -# Copyright 2024 Google LLC +# Copyright 2025 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/README.md b/README.md index b56b9769..537e545d 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,16 @@ # pathways-job -Pathways API is an OSS Kubernetes-native API, for deploying ML training and inference workloads, using Pathways on GKE. - +PathwaysJob API is an OSS Kubernetes-native API, to deploy ML training and batch inference workloads, using Pathways on GKE. +//ToDo(roshanin) - an intro of what Pathways is. ## Description -// TODO(roshanin): An in-depth paragraph about your project and overview of use +The PathwaysJob is an API that provides an easy way to run JAX workloads using Pathways. It support two modes of deployment. +### Colocate mode +The 'colocate' mode bundles the Pathways resource manager(RM), the Pathways proxy and the user workload containers into a single pod called "leader" and deploys them besides a "worker" pod on one of the TPU workers. This is preferred for Pathways batch inference workloads, where latency is crucial. +### Default mode +The default mode is preferred for Pathways training workloads where the worker utilizes the TPUs completely. The Pathways RM and Pathways proxy are scheduled as pods on a CPU nodepool and the "workers" are scheduled on TPUs. +#### With a dockerized workload +The user workload is also scheduled as a pod on the CPU nodepool. +#### Headless mode for interactive supercomputing +The user workload is typically on a Vertex AI notebook, so users can connect to the PathwaysJob via port-forwarding. ## Getting Started @@ -11,6 +19,7 @@ Pathways API is an OSS Kubernetes-native API, for deploying ML training and infe - docker version 17.03+. - kubectl version v1.11.3+. - Access to a Kubernetes v1.11.3+ cluster. +- JobSet //ToDo(roshanin) install JobSet ### To Deploy on the cluster **Build and push your image to the location specified by `IMG`:** @@ -98,7 +107,7 @@ More information can be found via the [Kubebuilder Documentation](https://book.k ## License -Copyright 2024. +Copyright 2025. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/api/v1/groupversion_info.go b/api/v1/groupversion_info.go index f64254e1..16359574 100644 --- a/api/v1/groupversion_info.go +++ b/api/v1/groupversion_info.go @@ -1,5 +1,5 @@ /* -Copyright 2024. +Copyright 2025. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/api/v1/pathwaysjob_types.go b/api/v1/pathwaysjob_types.go index 879f074a..6bafb0fc 100644 --- a/api/v1/pathwaysjob_types.go +++ b/api/v1/pathwaysjob_types.go @@ -1,5 +1,5 @@ /* -Copyright 2024. +Copyright 2025. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go index cbe5041a..1d917b85 100644 --- a/api/v1/zz_generated.deepcopy.go +++ b/api/v1/zz_generated.deepcopy.go @@ -1,7 +1,7 @@ //go:build !ignore_autogenerated /* -Copyright 2024. +Copyright 2025. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/cmd/main.go b/cmd/main.go index ff0753ec..9162ee7e 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -1,5 +1,5 @@ /* -Copyright 2024. +Copyright 2025. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/config/crd/kustomization.yaml b/config/crd/kustomization.yaml index 56361cb7..9ee2a152 100644 --- a/config/crd/kustomization.yaml +++ b/config/crd/kustomization.yaml @@ -1,4 +1,4 @@ -# Copyright 2024 Google LLC +# Copyright 2025 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/config/crd/kustomizeconfig.yaml b/config/crd/kustomizeconfig.yaml index c0e022ee..337df85c 100644 --- a/config/crd/kustomizeconfig.yaml +++ b/config/crd/kustomizeconfig.yaml @@ -1,4 +1,4 @@ -# Copyright 2024 Google LLC +# Copyright 2025 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/config/default/kustomization.yaml b/config/default/kustomization.yaml index 53182412..e5844c9a 100644 --- a/config/default/kustomization.yaml +++ b/config/default/kustomization.yaml @@ -1,4 +1,4 @@ -# Copyright 2024 Google LLC +# Copyright 2025 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/config/default/manager_metrics_patch.yaml b/config/default/manager_metrics_patch.yaml index 3674d350..2f6bbe92 100644 --- a/config/default/manager_metrics_patch.yaml +++ b/config/default/manager_metrics_patch.yaml @@ -1,4 +1,4 @@ -# Copyright 2024 Google LLC +# Copyright 2025 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/config/default/metrics_service.yaml b/config/default/metrics_service.yaml index 2249f2ac..8fda50cf 100644 --- a/config/default/metrics_service.yaml +++ b/config/default/metrics_service.yaml @@ -1,4 +1,4 @@ -# Copyright 2024 Google LLC +# Copyright 2025 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/config/manager/kustomization.yaml b/config/manager/kustomization.yaml index 118bfe52..901b48a1 100644 --- a/config/manager/kustomization.yaml +++ b/config/manager/kustomization.yaml @@ -1,4 +1,4 @@ -# Copyright 2024 Google LLC +# Copyright 2025 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml index d988312c..941467f3 100644 --- a/config/manager/manager.yaml +++ b/config/manager/manager.yaml @@ -1,4 +1,4 @@ -# Copyright 2024 Google LLC +# Copyright 2025 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/config/prometheus/kustomization.yaml b/config/prometheus/kustomization.yaml index 302b7006..80fabded 100644 --- a/config/prometheus/kustomization.yaml +++ b/config/prometheus/kustomization.yaml @@ -1,4 +1,4 @@ -# Copyright 2024 Google LLC +# Copyright 2025 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/config/prometheus/monitor.yaml b/config/prometheus/monitor.yaml index 662d0f5d..4624dffc 100644 --- a/config/prometheus/monitor.yaml +++ b/config/prometheus/monitor.yaml @@ -1,4 +1,4 @@ -# Copyright 2024 Google LLC +# Copyright 2025 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/config/rbac/kustomization.yaml b/config/rbac/kustomization.yaml index 7dc72fe5..d35979e7 100644 --- a/config/rbac/kustomization.yaml +++ b/config/rbac/kustomization.yaml @@ -1,4 +1,4 @@ -# Copyright 2024 Google LLC +# Copyright 2025 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/config/rbac/leader_election_role.yaml b/config/rbac/leader_election_role.yaml index 1a285657..30fdcf87 100644 --- a/config/rbac/leader_election_role.yaml +++ b/config/rbac/leader_election_role.yaml @@ -1,4 +1,4 @@ -# Copyright 2024 Google LLC +# Copyright 2025 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/config/rbac/leader_election_role_binding.yaml b/config/rbac/leader_election_role_binding.yaml index f208dc1c..187e60d4 100644 --- a/config/rbac/leader_election_role_binding.yaml +++ b/config/rbac/leader_election_role_binding.yaml @@ -1,4 +1,4 @@ -# Copyright 2024 Google LLC +# Copyright 2025 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/config/rbac/metrics_auth_role.yaml b/config/rbac/metrics_auth_role.yaml index fa14995c..2135dd6c 100644 --- a/config/rbac/metrics_auth_role.yaml +++ b/config/rbac/metrics_auth_role.yaml @@ -1,4 +1,4 @@ -# Copyright 2024 Google LLC +# Copyright 2025 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/config/rbac/metrics_auth_role_binding.yaml b/config/rbac/metrics_auth_role_binding.yaml index c46accd4..766ce422 100644 --- a/config/rbac/metrics_auth_role_binding.yaml +++ b/config/rbac/metrics_auth_role_binding.yaml @@ -1,4 +1,4 @@ -# Copyright 2024 Google LLC +# Copyright 2025 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/config/rbac/metrics_reader_role.yaml b/config/rbac/metrics_reader_role.yaml index 4caa96a1..f0301dcf 100644 --- a/config/rbac/metrics_reader_role.yaml +++ b/config/rbac/metrics_reader_role.yaml @@ -1,4 +1,4 @@ -# Copyright 2024 Google LLC +# Copyright 2025 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/config/rbac/pathwaysjob_editor_role.yaml b/config/rbac/pathwaysjob_editor_role.yaml index 5d375d90..ea4033af 100644 --- a/config/rbac/pathwaysjob_editor_role.yaml +++ b/config/rbac/pathwaysjob_editor_role.yaml @@ -1,4 +1,4 @@ -# Copyright 2024 Google LLC +# Copyright 2025 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/config/rbac/pathwaysjob_viewer_role.yaml b/config/rbac/pathwaysjob_viewer_role.yaml index 31172d9a..c04316d9 100644 --- a/config/rbac/pathwaysjob_viewer_role.yaml +++ b/config/rbac/pathwaysjob_viewer_role.yaml @@ -1,4 +1,4 @@ -# Copyright 2024 Google LLC +# Copyright 2025 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 28333745..bb6964f7 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -1,4 +1,4 @@ -# Copyright 2024 Google LLC +# Copyright 2025 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/config/rbac/role_binding.yaml b/config/rbac/role_binding.yaml index 829048b6..7857b0aa 100644 --- a/config/rbac/role_binding.yaml +++ b/config/rbac/role_binding.yaml @@ -1,4 +1,4 @@ -# Copyright 2024 Google LLC +# Copyright 2025 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/config/rbac/service_account.yaml b/config/rbac/service_account.yaml index edc98059..cd8f4be4 100644 --- a/config/rbac/service_account.yaml +++ b/config/rbac/service_account.yaml @@ -1,4 +1,4 @@ -# Copyright 2024 Google LLC +# Copyright 2025 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/config/samples/jobset_example.yaml b/config/samples/jobset_example.yaml index 2ffc2488..e897a3e2 100644 --- a/config/samples/jobset_example.yaml +++ b/config/samples/jobset_example.yaml @@ -1,4 +1,4 @@ -# Copyright 2024 Google LLC +# Copyright 2025 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/config/samples/kustomization.yaml b/config/samples/kustomization.yaml index 07b2b0e8..202b6e77 100644 --- a/config/samples/kustomization.yaml +++ b/config/samples/kustomization.yaml @@ -1,4 +1,4 @@ -# Copyright 2024 Google LLC +# Copyright 2025 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/config/samples/pathways-job_v1_pathwaysjob.yaml b/config/samples/pathways-job_v1_pathwaysjob.yaml index 203ce9c3..b405dda4 100644 --- a/config/samples/pathways-job_v1_pathwaysjob.yaml +++ b/config/samples/pathways-job_v1_pathwaysjob.yaml @@ -1,4 +1,4 @@ -# Copyright 2024 Google LLC +# Copyright 2025 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/hack/boilerplate.go.txt b/hack/boilerplate.go.txt index ff72ff2a..221dcbe0 100644 --- a/hack/boilerplate.go.txt +++ b/hack/boilerplate.go.txt @@ -1,5 +1,5 @@ /* -Copyright 2024. +Copyright 2025. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/internal/controller/pathwaysjob_controller.go b/internal/controller/pathwaysjob_controller.go index f396247c..c9d06c11 100644 --- a/internal/controller/pathwaysjob_controller.go +++ b/internal/controller/pathwaysjob_controller.go @@ -1,5 +1,5 @@ /* -Copyright 2024. +Copyright 2025. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/internal/controller/pathwaysjob_controller_test.go b/internal/controller/pathwaysjob_controller_test.go index 1e877a61..e0f669c0 100644 --- a/internal/controller/pathwaysjob_controller_test.go +++ b/internal/controller/pathwaysjob_controller_test.go @@ -1,5 +1,5 @@ /* -Copyright 2024. +Copyright 2025. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/internal/controller/suite_test.go b/internal/controller/suite_test.go index 28d47d84..d4ff8131 100644 --- a/internal/controller/suite_test.go +++ b/internal/controller/suite_test.go @@ -1,5 +1,5 @@ /* -Copyright 2024. +Copyright 2025. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/pkg/utils/extra_prototype.go b/pkg/utils/extra_prototype.go index 89026032..2b805398 100644 --- a/pkg/utils/extra_prototype.go +++ b/pkg/utils/extra_prototype.go @@ -1,4 +1,4 @@ -// Copyright 2024 Google LLC +// Copyright 2025 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/test/e2e/e2e_suite_test.go b/test/e2e/e2e_suite_test.go index 98b15632..3977104d 100644 --- a/test/e2e/e2e_suite_test.go +++ b/test/e2e/e2e_suite_test.go @@ -1,5 +1,5 @@ /* -Copyright 2024. +Copyright 2025. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index 7f3e4eee..d240d323 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -1,5 +1,5 @@ /* -Copyright 2024. +Copyright 2025. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/test/utils/utils.go b/test/utils/utils.go index 6b96ab5d..87db8781 100644 --- a/test/utils/utils.go +++ b/test/utils/utils.go @@ -1,5 +1,5 @@ /* -Copyright 2024. +Copyright 2025. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. From a307565c059f1fff500907145a279ab2798ab304 Mon Sep 17 00:00:00 2001 From: RoshaniN Date: Sat, 8 Mar 2025 00:23:42 +0000 Subject: [PATCH 27/32] Generic TPU type, topology and parallelisms; add input validations. --- api/v1/pathwaysjob_types.go | 12 +++- ...ways-job.pathways.domain_pathwaysjobs.yaml | 5 ++ internal/controller/pathwaysjob_controller.go | 69 +++++++++++++++++-- 3 files changed, 79 insertions(+), 7 deletions(-) diff --git a/api/v1/pathwaysjob_types.go b/api/v1/pathwaysjob_types.go index 6bafb0fc..e4067f25 100644 --- a/api/v1/pathwaysjob_types.go +++ b/api/v1/pathwaysjob_types.go @@ -112,12 +112,22 @@ const ( Default DeploymentMode = "default" ) +// +kubebuilder:validation:Enum=tpu-v4-podslice;tpu-v5p-slice;tpu-v5-lite-podslice;tpu-v6e-slice +type WorkerType string + +const ( + tpu_v4_podslice WorkerType = "tpu-v4-podslice" + tpu_v5p_slice WorkerType = "tpu-v5p-slice" + tpu_v5_lite_podslice WorkerType = "tpu-v5-lite-podslice" + tpu_v6e_slice WorkerType = "tpu-v6e-slice" +) + // The WorkerSpec struct lists the specifications for the // Pathways workers. type WorkerSpec struct { // This will translate to a nodeSelector of the form // cloud.google.com/gke-tpu-accelerator: tpu-v5-lite-podslice - Type string `json:"type"` + Type WorkerType `json:"type"` // This will translate to a nodeSelector of the form // cloud.google.com/gke-tpu-topology:2x2 diff --git a/config/crd/bases/pathways-job.pathways.domain_pathwaysjobs.yaml b/config/crd/bases/pathways-job.pathways.domain_pathwaysjobs.yaml index 8123b52c..bf2270c3 100644 --- a/config/crd/bases/pathways-job.pathways.domain_pathwaysjobs.yaml +++ b/config/crd/bases/pathways-job.pathways.domain_pathwaysjobs.yaml @@ -8088,6 +8088,11 @@ spec: description: |- This will translate to a nodeSelector of the form cloud.google.com/gke-tpu-accelerator: tpu-v5-lite-podslice + enum: + - tpu-v4-podslice + - tpu-v5p-slice + - tpu-v5-lite-podslice + - tpu-v6e-slice type: string required: - numSlices diff --git a/internal/controller/pathwaysjob_controller.go b/internal/controller/pathwaysjob_controller.go index c9d06c11..db0e86b8 100644 --- a/internal/controller/pathwaysjob_controller.go +++ b/internal/controller/pathwaysjob_controller.go @@ -19,6 +19,8 @@ package controller import ( "context" "fmt" + "strconv" + "strings" "k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/runtime" @@ -43,6 +45,14 @@ type PathwaysJobReconciler struct { Scheme *runtime.Scheme } +// Public variables to store TPU information +var ( + TPUVersion string + TPUTopology string + InstanceType string + NumVMs int32 +) + // Reconcile is part of the main kubernetes reconciliation loop which aims to // move the current state of the cluster closer to the desired state. // TODO(user): Modify the Reconcile function to compare the state specified by @@ -85,7 +95,7 @@ func (r *PathwaysJobReconciler) Reconcile(ctx context.Context, req ctrl.Request) childJobSet, err := r.getChildJobSet(ctx, pw, jobSetClient) if err != nil { - log.Info("PathwaysJob: can't find JobSet") + log.Info("PathwaysJob: can't find JobSet, may create one!") // return ctrl.Result{}, err } else if childJobSet != nil { log.Info("PathwaysJob: JobSet exists, not creating") @@ -135,6 +145,9 @@ func (r *PathwaysJobReconciler) createJobSet(ctx context.Context, pw *pathwaysjo var jobs []jobsetv1alpha2.ReplicatedJob var rmJobName string + calculateTPUInfo(ctx, pw) + log2.Info("PathwaysJob: in createJobSet TPU variables", "TPUVersion", TPUVersion, "TPUTopology", TPUTopology, "InstanceType", InstanceType, "NumVMs", NumVMs) + // Pathways Spec + JobSet for training or batch inference ------ if pw.Spec.Controller.DeploymentMode == pathwaysjob.Colocate { rmJobName = "leader" @@ -208,6 +221,50 @@ func (r *PathwaysJobReconciler) findJobSetStatus(ctx context.Context, js *jobset // ---------------------- PATHWAYS HELPERS -------------------------- +func extractTPUVersionFromWorkersType(ctx context.Context, tpuGKEAcceleratorType pathwaysjob.WorkerType) string { + log := ctrl.LoggerFrom(ctx) + parts := strings.Split(string(tpuGKEAcceleratorType), "-") + if len(parts) >= 2 && strings.HasPrefix(parts[1], "v") { + log.Info("TPU type and version", "value ", parts[0]+parts[1]) + return parts[0] + parts[1] + } + return "" +} + +func validateTPUTopology(topology string) string { + // ToDo: validate topology based on the TPU type + return topology +} +func calculateVMsFromTopology(topology string) int32 { + parts := strings.Split(topology, "x") // Examples - 2x2x4 or 4x4 + if len(parts) < 2 { + return 0 + } + // Calculate the number of chips based on the Topology. + chips := 1 + for _, part := range parts { + num, _ := strconv.Atoi(part) + chips *= num + } + vms := 1 + chipsperVM := 4 + if chips >= chipsperVM { + vms = chips / chipsperVM + } + + return int32(vms) +} + +func calculateTPUInfo(ctx context.Context, pw *pathwaysjob.PathwaysJob) { + tpuVersion := extractTPUVersionFromWorkersType(ctx, pw.Spec.Workers[0].Type) + TPUVersion = tpuVersion // setting public variable + tpuTopology := validateTPUTopology(pw.Spec.Workers[0].Topology) + TPUTopology = tpuTopology // setting public variable + InstanceType = tpuVersion + ":" + tpuTopology + NumVMs = calculateVMsFromTopology(pw.Spec.Workers[0].Topology) + +} + // Constructs the Pathways resource manager container spec for the underlying JobSet func MakeResourceManagerContainer(pw *pathwaysjob.PathwaysJob, rmJobName string) (*corev1.Container, error) { truth := true @@ -222,7 +279,7 @@ func MakeResourceManagerContainer(pw *pathwaysjob.PathwaysJob, rmJobName string) fmt.Sprintf("--gcs_scratch_location=%s", pw.Spec.PathwaysDir), "--node_type=resource_manager", fmt.Sprintf("--instance_count=%d", int32(pw.Spec.Workers[0].NumSlices)), - "--instance_type=tpuv4:2x2x1", // ToDo: update with Spec map + fmt.Sprintf("--instance_type=%s", InstanceType), }, Env: []corev1.EnvVar{ {Name: "REPLICATED_JOB_NAME", ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{FieldPath: "metadata.annotations['jobset.sigs.k8s.io/replicatedjob-name']"}}}, @@ -268,8 +325,8 @@ func MakeWorkerJob(ctx context.Context, pw *pathwaysjob.PathwaysJob, rmJobName s Template: batchv1.JobTemplateSpec{ Spec: batchv1.JobSpec{ BackoffLimit: ptr.To(int32(4)), - Completions: ptr.To(int32(1)), // number of workers remember to change - Parallelism: ptr.To(int32(1)), // number of workers remember to change + Completions: ptr.To(int32(NumVMs)), // number of workers remember to change + Parallelism: ptr.To(int32(NumVMs)), // number of workers remember to change Template: corev1.PodTemplateSpec{ Spec: corev1.PodSpec{ Containers: []corev1.Container{ @@ -299,7 +356,7 @@ func MakeWorkerJob(ctx context.Context, pw *pathwaysjob.PathwaysJob, rmJobName s }, // end Pathways worker container }, NodeSelector: map[string]string{ - "cloud.google.com/gke-tpu-accelerator": pw.Spec.Workers[0].Type, + "cloud.google.com/gke-tpu-accelerator": string(pw.Spec.Workers[0].Type), "cloud.google.com/gke-tpu-topology": pw.Spec.Workers[0].Topology, }, Volumes: []corev1.Volume{ @@ -396,7 +453,7 @@ func MakeLeaderJobForColocatedDeployment(ctx context.Context, pw *pathwaysjob.Pa Spec: corev1.PodSpec{ Affinity: affinitySpec, NodeSelector: map[string]string{ - "cloud.google.com/gke-tpu-accelerator": pw.Spec.Workers[0].Type, + "cloud.google.com/gke-tpu-accelerator": string(pw.Spec.Workers[0].Type), "cloud.google.com/gke-tpu-topology": pw.Spec.Workers[0].Topology, }, HostNetwork: true, // For performance == McJAX From 82394dc9328bd297dbe738250bd4481f740d287f Mon Sep 17 00:00:00 2001 From: RoshaniN Date: Sun, 9 Mar 2025 00:49:23 +0000 Subject: [PATCH 28/32] Install and deploy JobSet along with PathwaysJob. Add comments. --- Makefile | 11 +++++-- internal/controller/pathwaysjob_controller.go | 31 ++++++++++--------- 2 files changed, 25 insertions(+), 17 deletions(-) diff --git a/Makefile b/Makefile index a1e45739..4e5bdff6 100644 --- a/Makefile +++ b/Makefile @@ -126,22 +126,29 @@ ifndef ignore-not-found ignore-not-found = false endif +JOBSET_VERSION ?= v0.8.0 +JOBSET_MANIFEST_URL := https://github.com/kubernetes-sigs/jobset/releases/download/${JOBSET_VERSION}/manifests.yaml + .PHONY: install -install: manifests kustomize ## Install CRDs into the K8s cluster specified in ~/.kube/config. +install: manifests kustomize ## Install PathwaysJob and JobSet CRDs into the K8s cluster specified in ~/.kube/config. $(KUSTOMIZE) build config/crd | $(KUBECTL) apply --server-side -f - + $(KUBECTL) apply --server-side -f ${JOBSET_MANIFEST_URL} .PHONY: uninstall -uninstall: manifests kustomize ## Uninstall CRDs from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion. +uninstall: manifests kustomize ## Uninstall PathwaysJob and JobSet CRDs from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion. $(KUSTOMIZE) build config/crd | $(KUBECTL) delete --ignore-not-found=$(ignore-not-found) -f - + $(KUBECTL) delete --ignore-not-found=$(ignore-not-found) -f ${JOBSET_MANIFEST_URL} .PHONY: deploy deploy: manifests kustomize ## Deploy controller to the K8s cluster specified in ~/.kube/config. cd config/manager && $(KUSTOMIZE) edit set image controller=${IMG} $(KUSTOMIZE) build config/default | $(KUBECTL) apply --server-side -f - + $(KUBECTL) apply --server-side -f ${JOBSET_MANIFEST_URL} .PHONY: undeploy undeploy: kustomize ## Undeploy controller from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion. $(KUSTOMIZE) build config/default | $(KUBECTL) delete --ignore-not-found=$(ignore-not-found) -f - + $(KUBECTL) delete --ignore-not-found=$(ignore-not-found) -f ${JOBSET_MANIFEST_URL} ##@ Dependencies diff --git a/internal/controller/pathwaysjob_controller.go b/internal/controller/pathwaysjob_controller.go index db0e86b8..983b48db 100644 --- a/internal/controller/pathwaysjob_controller.go +++ b/internal/controller/pathwaysjob_controller.go @@ -135,6 +135,7 @@ func (r *PathwaysJobReconciler) getChildJobSet(ctx context.Context, pw *pathways return js, nil } +// Create the JobSet for 'colocated' or 'default' deployment modes. func (r *PathwaysJobReconciler) createJobSet(ctx context.Context, pw *pathwaysjob.PathwaysJob, jobSetClient *jobsetclient.Clientset) error { log2 := ctrl.LoggerFrom(ctx) // .WithValues("pathwaysjob", klog.KObj(pw)) @@ -199,10 +200,11 @@ func (r *PathwaysJobReconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). For(&pathwaysjob.PathwaysJob{}). - Owns(&jobsetv1alpha2.JobSet{}). // For JobSet + Owns(&jobsetv1alpha2.JobSet{}). // PathwaysJob owns the underlying JobSet object Complete(r) } +// Find the status of the underlying JobSet for 'colocated' or 'default' deployment modes. func (r *PathwaysJobReconciler) findJobSetStatus(ctx context.Context, js *jobsetv1alpha2.JobSet) { log := ctrl.LoggerFrom(ctx) log.Info("PathwaysJob findJobSetStatus", "Jobset name", js.ObjectMeta.Name) @@ -220,21 +222,24 @@ func (r *PathwaysJobReconciler) findJobSetStatus(ctx context.Context, js *jobset } // ---------------------- PATHWAYS HELPERS -------------------------- - -func extractTPUVersionFromWorkersType(ctx context.Context, tpuGKEAcceleratorType pathwaysjob.WorkerType) string { +// Find TPU version from the worker's type (- used to determine Pathways instance_type) +func extractTPUVersionFromWorkerType(ctx context.Context, tpuGKEAcceleratorType pathwaysjob.WorkerType) string { log := ctrl.LoggerFrom(ctx) parts := strings.Split(string(tpuGKEAcceleratorType), "-") if len(parts) >= 2 && strings.HasPrefix(parts[1], "v") { log.Info("TPU type and version", "value ", parts[0]+parts[1]) - return parts[0] + parts[1] + return parts[0] + parts[1] // example tpuv4 / tpuv5 } return "" } -func validateTPUTopology(topology string) string { +// Validate that topology provided is valid for the provided worker type. +func validateTPUTopologyWithType(tpuGKEAcceleratorType pathwaysjob.WorkerType, topology string) string { // ToDo: validate topology based on the TPU type return topology } + +// Calculate the number of VMs based on the Topology (- used in completions/parallelisms) func calculateVMsFromTopology(topology string) int32 { parts := strings.Split(topology, "x") // Examples - 2x2x4 or 4x4 if len(parts) < 2 { @@ -251,18 +256,15 @@ func calculateVMsFromTopology(topology string) int32 { if chips >= chipsperVM { vms = chips / chipsperVM } - return int32(vms) } +// Calculate all TPU related information func calculateTPUInfo(ctx context.Context, pw *pathwaysjob.PathwaysJob) { - tpuVersion := extractTPUVersionFromWorkersType(ctx, pw.Spec.Workers[0].Type) - TPUVersion = tpuVersion // setting public variable - tpuTopology := validateTPUTopology(pw.Spec.Workers[0].Topology) - TPUTopology = tpuTopology // setting public variable - InstanceType = tpuVersion + ":" + tpuTopology - NumVMs = calculateVMsFromTopology(pw.Spec.Workers[0].Topology) - + TPUVersion := extractTPUVersionFromWorkerType(ctx, pw.Spec.Workers[0].Type) // setting public variable + TPUTopology := validateTPUTopologyWithType(pw.Spec.Workers[0].Type, pw.Spec.Workers[0].Topology) // setting public variable + InstanceType = TPUVersion + ":" + TPUTopology // setting public variable + NumVMs = calculateVMsFromTopology(pw.Spec.Workers[0].Topology) // setting public variable } // Constructs the Pathways resource manager container spec for the underlying JobSet @@ -307,14 +309,13 @@ func MakeProxyContainer(pw *pathwaysjob.PathwaysJob, rmJobName string) (*corev1. fmt.Sprintf("--resource_manager_address=%s-%s-0-0.%s:29001", pw.GetName(), rmJobName, pw.GetName()), fmt.Sprintf("--gcs_scratch_location=%s", pw.Spec.PathwaysDir), }, - // "--xla_tpu_spmd_rng_bit_generator_unsafe=True", Ports: []corev1.ContainerPort{{ContainerPort: 29008}}, // Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{"cpu": *resource.NewQuantity(4, resource.DecimalSI), "memory": *resource.NewQuantity(10000000000, resource.DecimalSI)}}, } return &proxyContainerSpec, nil } -// Constructs Pathways worker replicated job for the underlying JobSet +// Constructs Pathways worker replicated job for both 'colocated' and 'default' deployment modes. func MakeWorkerJob(ctx context.Context, pw *pathwaysjob.PathwaysJob, rmJobName string) (jobsetv1alpha2.ReplicatedJob, error) { truth := true volumeSourceType := corev1.HostPathDirectoryOrCreate From 2b9eac4963a08fac42d81c31fb8ad3a3197cb446 Mon Sep 17 00:00:00 2001 From: RoshaniN Date: Tue, 11 Mar 2025 01:03:37 +0000 Subject: [PATCH 29/32] Add spec maps for concrete validations, fix headless mode, cleanup. --- README.md | 2 +- api/v1/pathwaysjob_types.go | 1 - config/samples/jobset_example.yaml | 218 ------------------ .../samples/pathways-job_v1_pathwaysjob.yaml | 93 ++++++-- internal/controller/pathwaysjob_controller.go | 101 +++++--- pkg/utils/extra_prototype.go | 214 ----------------- 6 files changed, 154 insertions(+), 475 deletions(-) delete mode 100644 pkg/utils/extra_prototype.go diff --git a/README.md b/README.md index 537e545d..9102cbf2 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # pathways-job PathwaysJob API is an OSS Kubernetes-native API, to deploy ML training and batch inference workloads, using Pathways on GKE. -//ToDo(roshanin) - an intro of what Pathways is. +//ToDo(roshanin) - add intro for Pathways. ## Description The PathwaysJob is an API that provides an easy way to run JAX workloads using Pathways. It support two modes of deployment. ### Colocate mode diff --git a/api/v1/pathwaysjob_types.go b/api/v1/pathwaysjob_types.go index e4067f25..741befab 100644 --- a/api/v1/pathwaysjob_types.go +++ b/api/v1/pathwaysjob_types.go @@ -21,7 +21,6 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) -// EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN! // NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized. // +kubebuilder:object:root=true diff --git a/config/samples/jobset_example.yaml b/config/samples/jobset_example.yaml index e897a3e2..e69de29b 100644 --- a/config/samples/jobset_example.yaml +++ b/config/samples/jobset_example.yaml @@ -1,218 +0,0 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -apiVersion: jobset.x-k8s.io/v1alpha2 -kind: JobSet -metadata: - name: pathways-jobset-inference - # annotations: - # alpha.jobset.sigs.k8s.io/exclusive-topology: cloud.google.com/gke-nodepool # 1:1 job replica to node pool assignment -spec: - failurePolicy: - maxRestarts: 4 # The set will be restarted on failures up to 4 times. - replicatedJobs: - - name: leader # Part of the name of the child Jobs () - replicas: 1 # Replicas of the Pathways Resource Manager, Proxy, JetStream and Tester. Should always be 1. - template: - spec: # JobSpec - parallelism: 1 # Must be set to number of nodes in each node pool - completions: 1 # Must be set to number of nodes in each node pool - backoffLimit: 0 # Must be set to 0. Fail the job when any pod fails. - template: - spec: - affinity: - podAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchExpressions: - - key: jobset.sigs.k8s.io/jobset-name - operator: In - values: - - pathways-jobset-inference - topologyKey: cloud.google.com/gke-nodepool - podAntiAffinity: # ensures only this job lands on the rack - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchExpressions: - - key: jobset.sigs.k8s.io/jobset-name - operator: NotIn - values: - - pathways-jobset-inference - - key: job-name - operator: Exists - namespaceSelector: {} - topologyKey: cloud.google.com/gke-nodepool - nodeSelector: - cloud.google.com/gke-tpu-accelerator: tpu-v4-podslice - cloud.google.com/gke-tpu-topology: 2x2x2 - tolerations: - - effect: NoSchedule - key: google.com/tpu - operator: Exists - # nodeSelector: - # cloud.google.com/gke-nodepool: cpu-user-np - volumes: - - name: shared-tmp - hostPath: - path: /tmp - type: DirectoryOrCreate - containers: - - name: pathways-rm - image: us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/server:latest - imagePullPolicy: Always # Sometimes k8s was reusing the old image - args: - - --alsologtostderr - - --pathways_server_port=38677 - - --pathways_server_provides_devices=false - - --pathways_device_type=NONE - - --pathways_persistent_compilation_cache=false - - --pathways_compilation_mode=compile_at_worker - - --pathways_tmp_dir_pattern=gs://cloud-pathways-staging/tmp - - --pathways_expected_instances=tpuv4:2x2x2 - env: - - name: TPU_SKIP_MDS_QUERY - value: "true" - - name: REPLICATED_JOB_NAME - valueFrom: - fieldRef: - fieldPath: metadata.annotations['jobset.sigs.k8s.io/replicatedjob-name'] - - name: JOBSET_NAME - valueFrom: - fieldRef: - fieldPath: metadata.annotations['jobset.sigs.k8s.io/jobset-name'] - - name: HOST_ADDRESS - value: $(JOBSET_NAME)-$(REPLICATED_JOB_NAME)-0-0.$(JOBSET_NAME) - ports: - - containerPort: 38677 - - containerPort: 38678 - resources: - limits: - cpu: "4" - memory: "8G" - securityContext: - privileged: true - - name: pathways-proxy - args: - - --alsologtostderr - - --v=0 - - --pathways_ifrt_proxy_server_resource_manager=pathways-jobset-inference-leader-0-0.pathways-jobset-inference:38677 - # - --pathways_ifrt_proxy_server_resource_manager=localhost:38677 - - --pathways_ifrt_proxy_server_port=38681 - - --pathways_tmp_dir_pattern=gs://cloud-pathways-staging/tmp - - --pathways_plaque_network=gcp - image: us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/proxy_server:latest - imagePullPolicy: Always - ports: - - containerPort: 38681 - - containerPort: 38682 - resources: - limits: - cpu: "4" - memory: 10G - securityContext: - privileged: true - - name: jetstream - image: us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/maxtext_jax_stable:latest - imagePullPolicy: Always - ports: - - containerPort: 9000 - env: - - name: XCLOUD_ENVIRONMENT - value: GCP - - name: JAX_PLATFORMS - value: proxy - - name: JAX_BACKEND_TARGET - value: grpc://pathways-jobset-inference-leader-0-0.pathways-jobset-inference:38681 - # value: grpc://localhost:38681 - command: - - bash - - -c - - 'echo Start: $(date); _sigterm() ( kill -SIGTERM $! 2>/dev/null;); trap - _sigterm SIGTERM; (JAX_TRACEBACK_FILTERING=off python3 MaxText/maxengine_server.py - MaxText/configs/inference_jetstream.yml tokenizer_path=assets/tokenizer.llama2 - load_parameters_path=gs://runner-maxtext-logs/2024-05-07-23-34/unscanned_chkpt/checkpoints/0/items - max_prefill_predict_length=1024 max_target_length=2048 async_checkpointing=false - model_name=''llama2-70b'' steps=1 ici_fsdp_parallelism=1 ici_autoregressive_parallelism=-1 - ici_tensor_parallelism=1 scan_layers=false weight_dtype=bfloat16 per_device_batch_size=2) - & PID=$!; while kill -0 $PID 2>/dev/null; do sleep 5; done; wait $PID; - EXIT_CODE=$? echo EXIT_CODE=$EXIT_CODE; echo End sleep: $(date); sleep - infinity;' - - name: tester - image: us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/maxtext_jax_stable:latest - imagePullPolicy: Always - env: null - command: - - bash - - -c - - 'echo Start: $(date); _sigterm() ( kill -SIGTERM $! 2>/dev/null;); trap - _sigterm SIGTERM; for i in {1..5}; do echo Sending request $i; time python3 - JetStream/jetstream/tools/requester.py --tokenizer assets/tokenizer.llama2 - --max_tokens=16 --server=0.0.0.0 --text="why earth is round"; EXIT_CODE=$?; - echo Completed request; echo EXIT_CODE=$EXIT_CODE; if [[ $EXIT_CODE -ne - 0 ]]; then break; fi; done; echo Last EXIT_CODE=$EXIT_CODE; echo End sleep: - $(date); sleep infinity;' - securityContext: - privileged: true - - name: worker # Part of the name of the child Jobs () - replicas: 1 # Number of slices - template: - spec: - parallelism: 2 # Must be set to number of nodes in each node pool - completions: 2 # Must be set to number of nodes in each node pool - backoffLimit: 0 # Must be set to 0. Fail the job when any pod fails. - template: - spec: - nodeSelector: - cloud.google.com/gke-tpu-accelerator: tpu-v4-podslice - cloud.google.com/gke-tpu-topology: 2x2x2 - volumes: - - name: shared-tmp - hostPath: - path: /tmp - type: DirectoryOrCreate - containers: - - name: pathways-worker - securityContext: - privileged: true - image: us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/server:latest - imagePullPolicy: Always # Sometimes k8s was reusing the old image - env: - - name: TPU_MIN_LOG_LEVEL - value: "0" - - name: TF_CPP_MIN_LOG_LEVEL - value: "0" - - name: XCLOUD_ENVIRONMENT - value: GCP - args: - - --alsologtostderr - - --pathways_server_port=38679 # changed to not match rm port - - --pathways_resource_manager=pathways-jobset-inference-leader-0-0.pathways-jobset-inference:38677 - - --pathways_persistent_compilation_cache=false - - --pathways_compilation_mode=compile_at_worker - - --xla_tpu_enable_data_parallel_all_reduce_opt=true - - --xla_tpu_data_parallel_opt_different_sized_ops=true - - --xla_tpu_enable_async_collective_fusion=true - - --xla_tpu_enable_async_collective_fusion_fuse_all_gather=true - - --xla_tpu_enable_async_collective_fusion_multiple_steps=true - - --xla_tpu_overlap_compute_collective_tc=true - - --xla_enable_async_all_gather=true - - --pathways_tmp_dir_pattern=gs://cloud-pathways-staging/tmp - ports: - - containerPort: 38679 - - containerPort: 38680 - - containerPort: 8471 - - containerPort: 8080 - resources: - limits: - google.com/tpu: 4 # Number of TPU chips per worker diff --git a/config/samples/pathways-job_v1_pathwaysjob.yaml b/config/samples/pathways-job_v1_pathwaysjob.yaml index b405dda4..1bbe9e59 100644 --- a/config/samples/pathways-job_v1_pathwaysjob.yaml +++ b/config/samples/pathways-job_v1_pathwaysjob.yaml @@ -15,18 +15,19 @@ apiVersion: pathways-job.pathways.domain/v1 kind: PathwaysJob metadata: - name: pathways-trial39 + name: pathways-trial62 spec: - maxRestarts: 4 + maxRestarts: 10 workers: - type: tpu-v4-podslice - topology: 2x2x1 - numSlices: 1 + topology: 2x2x2 + numSlices: 2 pathwaysDir: "gs://cloud-pathways-staging/tmp" controller: - deploymentMode: "colocate" - # deploymentMode: "default" - template: + # #Pod template for training, default mode. + + deploymentMode: default + template: # UserPodTemplate spec: containers: - name: user @@ -36,8 +37,7 @@ spec: - name: JAX_PLATFORMS value: proxy - name: JAX_BACKEND_TARGET - value: grpc://pathways-trial39-leader-0-0.pathways-trial39:29008 - # value: grpc://pathways-trial38-proxy-0-0.pathways-trial38:29008 + value: grpc://pathways-trial62-proxy-0-0.pathways-trial62:29008 image: us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/maxtext_jax_stable:latest imagePullPolicy: Always command: @@ -48,7 +48,74 @@ spec: volumeMounts: - mountPath: /tmp name: shared-tmp - # resources: - # limits: - # cpu: "20" - # memory: 90G \ No newline at end of file + resources: + limits: + cpu: "20" + memory: 90G + + + + # #Pod template for inference, colocate mode. + + + # deploymentMode: colocate + # template: # UserPodTemplate + # spec: + # containers: + # - name: jetstream + # image: us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/maxtext_jax_stable:latest + # imagePullPolicy: Always + # ports: + # - containerPort: 9000 + # env: + # - name: XCLOUD_ENVIRONMENT + # value: GCP + # - name: JAX_PLATFORMS + # value: proxy + # - name: JAX_BACKEND_TARGET + # value: grpc://pathways-trial61-leader-0-0.pathways-trial61:29008 + # command: + # - bash + # - -c + # - 'echo Start: $(date); + # _sigterm() ( kill -SIGTERM $! 2>/dev/null;); + # trap _sigterm SIGTERM; + # (JAX_TRACEBACK_FILTERING=off python3 MaxText/maxengine_server.py + # MaxText/configs/inference_jetstream.yml tokenizer_path=assets/tokenizer.llama2 + # load_parameters_path=gs://runner-maxtext-logs/2024-05-07-23-34/unscanned_chkpt/checkpoints/0/items + # max_prefill_predict_length=1024 max_target_length=2048 async_checkpointing=false + # model_name=''llama2-70b'' steps=1 ici_fsdp_parallelism=1 ici_autoregressive_parallelism=-1 + # ici_tensor_parallelism=1 scan_layers=false weight_dtype=bfloat16 + # per_device_batch_size=2) & PID=$!; + # while kill -0 $PID 2>/dev/null; + # do sleep 5; + # done; + # wait $PID; + # EXIT_CODE=$? + # echo EXIT_CODE=$EXIT_CODE; + # echo End sleep: $(date); + # sleep infinity;' + # - name: tester + # image: us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/maxtext_jax_stable:latest + # imagePullPolicy: Always + # command: + # - bash + # - -c + # - 'echo Start: $(date); + # _sigterm() ( kill -SIGTERM $! 2>/dev/null;); + # trap _sigterm SIGTERM; + # for i in {1..5}; do + # echo Sending request $i; + # time python3 JetStream/jetstream/tools/requester.py --tokenizer assets/tokenizer.llama2 --max_tokens=16 --server=0.0.0.0 --text=\"why earth is round\"; + # EXIT_CODE=$?; + # echo Completed request; + # echo EXIT_CODE=$EXIT_CODE; + # if [[ $EXIT_CODE -ne 0 ]]; then + # break; + # fi; + # done; + # echo Last EXIT_CODE=$EXIT_CODE; + # echo End sleep: $(date); + # sleep infinity;' + # securityContext: + # privileged: true diff --git a/internal/controller/pathwaysjob_controller.go b/internal/controller/pathwaysjob_controller.go index 983b48db..2c5e742b 100644 --- a/internal/controller/pathwaysjob_controller.go +++ b/internal/controller/pathwaysjob_controller.go @@ -19,6 +19,7 @@ package controller import ( "context" "fmt" + "slices" "strconv" "strings" @@ -53,15 +54,46 @@ var ( NumVMs int32 ) +// Map to convert worker type to Instance type +var WorkerTypeToTPUVersionMap = map[string]string{ + "tpu-v6e-slice": "tpuv6e", + "tpu-v5p-slice": "tpuv5", + "tpu-v5-lite-podslice": "tpuv5", + "tpu-v4-podslice": "tpuv4", +} + +// Allowed topologies for each worker type +var ValidTpuTopologiesMap = map[string][]string{ + "tpu-v6e-slice": { + "1x1", "2x2", "2x4", "4x4", "4x8", "8x8", "8x16", "16x16", + }, + "tpu-v5p-slice": { + "2x2x1", "2x2x2", "2x2x4", "2x4x4", "4x4x4", "4x4x8", "4x4x12", "4x8x8", + "4x4x20", "4x8x12", "4x4x28", "8x8x8", "4x12x12", "4x8x20", "4x4x44", + "8x8x12", "4x4x52", "4x8x28", "4x12x20", "8x8x16", "4x4x68", "8x12x12", + "4x4x76", "8x8x20", "4x12x28", "4x8x44", "4x4x92", "8x12x16", "4x20x20", + "4x8x52", "12x12x12", "8x8x28", "4x4x116", "8x12x20", "4x4x124", "8x16x16", + "4x12x44", "4x8x68", "4x20x28", "12x12x16", "4x4x148", "4x8x76", "4x12x52", + "8x16x20", "4x4x164", "8x12x28", "4x4x172", "8x8x44", "12x12x20", "4x8x92", + "4x4x188", "12x16x16", "4x28x28", "8x20x20", "4x12x68", "8x8x52", "4x4x212", + "12x12x24", "4x20x44", "8x16x28", "4x12x76", "4x8x116", "4x4x236", "12x16x20", + "4x4x244", "4x8x124", "12x12x28", "16x16x16", "4x20x52", "8x12x44", "8x8x68", + "4x12x92", "8x20x28", "12x16x24", "4x8x148", "12x20x20", "8x8x76", "4x28x44", + "8x12x52", "16x16x20", "12x12x36", "4x8x164", "12x16x28", "4x20x68", "4x8x172", + "4x12x116", "8x16x44", "12x20x24", "4x28x52", "8x8x92", "4x12x124", "4x8x188", + "4x20x76", "16x16x24", "12x24x24", "16x20x28", + }, + "tpu-v5-lite-podslice": { + "2x4", "4x4", "4x8", "8x8", "8x16", "16x16", + }, + "tpu-v4-podslice": { + "2x2x1", "2x2x2", "2x2x4", "2x4x4", "4x4x4", "4x4x8", "4x8x8", "8x8x8", + "8x8x12", "8x8x16", "8x16x16", + }, +} + // Reconcile is part of the main kubernetes reconciliation loop which aims to // move the current state of the cluster closer to the desired state. -// TODO(user): Modify the Reconcile function to compare the state specified by -// the PathwaysJob object against the actual cluster state, and then -// perform operations to make the cluster state reflect the state specified by -// the user. -// -// For more details, check Reconcile and its Result here: -// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.18.4/pkg/reconcile // +kubebuilder:rbac:groups="",resources=events,verbs=create;watch;update;patch // +kubebuilder:rbac:groups=pathways-job.pathways.domain,resources=pathwaysjobs,verbs=get;list;watch;create;update;patch;delete @@ -146,9 +178,13 @@ func (r *PathwaysJobReconciler) createJobSet(ctx context.Context, pw *pathwaysjo var jobs []jobsetv1alpha2.ReplicatedJob var rmJobName string - calculateTPUInfo(ctx, pw) - log2.Info("PathwaysJob: in createJobSet TPU variables", "TPUVersion", TPUVersion, "TPUTopology", TPUTopology, "InstanceType", InstanceType, "NumVMs", NumVMs) - + err := calculateTPUInfo(ctx, pw) + if err != nil { + log2.Info("PathwaysJob: in createJobSet calculateTPUInfo ", " Error: ", err) + return err + } else { + log2.Info("PathwaysJob: in createJobSet calculateTPUInfo ", "TPUVersion", TPUVersion, "TPUTopology", TPUTopology, "InstanceType", InstanceType, "NumVMs", NumVMs) + } // Pathways Spec + JobSet for training or batch inference ------ if pw.Spec.Controller.DeploymentMode == pathwaysjob.Colocate { rmJobName = "leader" @@ -223,20 +259,20 @@ func (r *PathwaysJobReconciler) findJobSetStatus(ctx context.Context, js *jobset // ---------------------- PATHWAYS HELPERS -------------------------- // Find TPU version from the worker's type (- used to determine Pathways instance_type) -func extractTPUVersionFromWorkerType(ctx context.Context, tpuGKEAcceleratorType pathwaysjob.WorkerType) string { - log := ctrl.LoggerFrom(ctx) - parts := strings.Split(string(tpuGKEAcceleratorType), "-") - if len(parts) >= 2 && strings.HasPrefix(parts[1], "v") { - log.Info("TPU type and version", "value ", parts[0]+parts[1]) - return parts[0] + parts[1] // example tpuv4 / tpuv5 - } - return "" +func constructTPUVersionFromWorkerType(tpuGKEAcceleratorType pathwaysjob.WorkerType) string { + // Worker types are already validated in the YAML. + return WorkerTypeToTPUVersionMap[string(tpuGKEAcceleratorType)] } // Validate that topology provided is valid for the provided worker type. -func validateTPUTopologyWithType(tpuGKEAcceleratorType pathwaysjob.WorkerType, topology string) string { - // ToDo: validate topology based on the TPU type - return topology +func validateTPUTopologyWithWorkerType(ctx context.Context, tpuGKEAcceleratorType pathwaysjob.WorkerType, topology string) (string, error) { + log := ctrl.LoggerFrom(ctx) + if slices.Contains(ValidTpuTopologiesMap[string(tpuGKEAcceleratorType)], topology) { + return topology, nil + } else { + log.Info("Invalid topology!!! ", "Worker type ", string(tpuGKEAcceleratorType), " cannot have topology ", topology) + return "", fmt.Errorf("invalid TPU topology for worker type") + } } // Calculate the number of VMs based on the Topology (- used in completions/parallelisms) @@ -260,11 +296,16 @@ func calculateVMsFromTopology(topology string) int32 { } // Calculate all TPU related information -func calculateTPUInfo(ctx context.Context, pw *pathwaysjob.PathwaysJob) { - TPUVersion := extractTPUVersionFromWorkerType(ctx, pw.Spec.Workers[0].Type) // setting public variable - TPUTopology := validateTPUTopologyWithType(pw.Spec.Workers[0].Type, pw.Spec.Workers[0].Topology) // setting public variable - InstanceType = TPUVersion + ":" + TPUTopology // setting public variable - NumVMs = calculateVMsFromTopology(pw.Spec.Workers[0].Topology) // setting public variable +func calculateTPUInfo(ctx context.Context, pw *pathwaysjob.PathwaysJob) error { + // setting public variable + TPUVersion := constructTPUVersionFromWorkerType(pw.Spec.Workers[0].Type) + TPUTopology, err := validateTPUTopologyWithWorkerType(ctx, pw.Spec.Workers[0].Type, pw.Spec.Workers[0].Topology) + if err != nil { + return err + } + InstanceType = TPUVersion + ":" + TPUTopology + NumVMs = calculateVMsFromTopology(pw.Spec.Workers[0].Topology) + return nil } // Constructs the Pathways resource manager container spec for the underlying JobSet @@ -310,7 +351,7 @@ func MakeProxyContainer(pw *pathwaysjob.PathwaysJob, rmJobName string) (*corev1. fmt.Sprintf("--gcs_scratch_location=%s", pw.Spec.PathwaysDir), }, Ports: []corev1.ContainerPort{{ContainerPort: 29008}}, - // Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{"cpu": *resource.NewQuantity(4, resource.DecimalSI), "memory": *resource.NewQuantity(10000000000, resource.DecimalSI)}}, + // Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{"cpu": *resource.NewQuantity(4, resource.DecimalSI), "memory": *resource.NewQuantity(100000000000, resource.DecimalSI)}}, //100GiB } return &proxyContainerSpec, nil } @@ -427,6 +468,10 @@ func MakePodAffinityRules(pw *pathwaysjob.PathwaysJob) (*corev1.Affinity, error) // Get the containers (main workload and any sidecars) from the user's pod spec. // This is used to inject the containers into the leader pod in the 'colocate' deployment mode. func GetUserContainerList(pw *pathwaysjob.PathwaysJob) ([]corev1.Container, error) { + // When workload is to be run in headless mode, no user pod will be provided. + if pw.Spec.Controller.UserPodTemplate == nil { + return nil, nil + } containerList := pw.Spec.Controller.UserPodTemplate.Spec.Containers return containerList, nil } @@ -570,7 +615,7 @@ func MakeJobsForDefaultDeployment(ctx context.Context, pw *pathwaysjob.PathwaysJ // Adding user job conditionally for headless mode, if the user has provided containers in PodSpec. // ToDo: Add other things in PodSpec - if len(userContainerList) > 0 { + if userContainerList != nil { userJob := jobsetv1alpha2.ReplicatedJob{ Name: "user-job", Replicas: 1, diff --git a/pkg/utils/extra_prototype.go b/pkg/utils/extra_prototype.go deleted file mode 100644 index 2b805398..00000000 --- a/pkg/utils/extra_prototype.go +++ /dev/null @@ -1,214 +0,0 @@ -// Copyright 2025 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package utils - -// ----------------RM AND PROXY SPEC---------------- - -// { -// Name: "pathways-rm", -// Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/server:latest", -// ImagePullPolicy: "Always", -// SecurityContext: &corev1.SecurityContext{Privileged: &truth}, -// Args: []string{ -// "--alsologtostderr", -// "--pathways_server_port=38677", -// "--pathways_server_provides_devices=false", -// "--pathways_device_type=NONE", -// "--pathways_persistent_compilation_cache=false", -// "--pathways_compilation_mode=compile_at_worker", -// fmt.Sprintf("--pathways_tmp_dir_pattern=%s", pw.Spec.PathwaysDir), -// "--pathways_expected_instances=tpuv4:2x2x2", -// }, -// Env: []corev1.EnvVar{ -// {Name: "REPLICATED_JOB_NAME", ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{FieldPath: "metadata.annotations['jobset.sigs.k8s.io/replicatedjob-name']"}}}, -// {Name: "JOBSET_NAME", ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{FieldPath: "metadata.annotations['jobset.sigs.k8s.io/jobset-name']"}}}, -// {Name: "HOST_ADDRESS", Value: fmt.Sprintf("%s-%s-0-0.%s", pwWorkloadName, "leader", pwWorkloadName)}, -// {Name: "TPU_SKIP_MDS_QUERY", Value: "true"}, -// }, -// Ports: []corev1.ContainerPort{{ContainerPort: 38677}, {ContainerPort: 38678}}, -// }, // end pathways-rm - -// { -// Name: "pathways-proxy", -// Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/proxy_server:latest", -// ImagePullPolicy: "Always", -// SecurityContext: &corev1.SecurityContext{Privileged: &truth}, -// Args: []string{ -// "--alsologtostderr", -// "--v=0", -// fmt.Sprintf("--pathways_ifrt_proxy_server_resource_manager=%s-%s-0-0.%s:38677", pwWorkloadName, "leader", pwWorkloadName), -// "--pathways_ifrt_proxy_server_port=38681", -// fmt.Sprintf("--pathways_tmp_dir_pattern=%s", pw.Spec.PathwaysDir), -// "--pathways_plaque_network=gcp", -// }, -// Ports: []corev1.ContainerPort{{ContainerPort: 38681}, {ContainerPort: 38682}}, -// }, // end pathways-proxy - -// NodeSelector: map[string]string{ -// "cloud.google.com/gke-tpu-accelerator": "tpu-v4-podslice", -// "cloud.google.com/gke-tpu-topology": "2x2x2"}, -// NodeSelector: map[string]string{"cloud.google.com/gke-tpu-accelerator": "tpu-v5-lite-podslice", "cloud.google.com/gke-tpu-topology": "4x4"}, - -// ----------------jetstream and tester containers---------------- -// { -// Name: "jetstream", -// Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/maxtext_jax_stable:latest", // revert to stable -// ImagePullPolicy: "Always", -// SecurityContext: &corev1.SecurityContext{Privileged: &truth}, -// Env: []corev1.EnvVar{ -// {Name: "XCLOUD_ENVIRONMENT", Value: "GCP"}, -// {Name: "JAX_PLATFORMS", Value: "proxy"}, -// {Name: "JAX_BACKEND_TARGET", Value: fmt.Sprintf("grpc://%s-%s-0-0.%s:38681", pw.GetName(), "leader", pw.GetName())}, -// {Name: "JOBSET_NAME", ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{FieldPath: "metadata.annotations['jobset.sigs.k8s.io/jobset-name']"}}}, -// }, -// Ports: []corev1.ContainerPort{{ContainerPort: 9000}}, -// Command: []string{"bash", "-c", "echo Start ; (JAX_TRACEBACK_FILTERING=off python3 MaxText/maxengine_server.py MaxText/configs/inference_jetstream.yml tokenizer_path=assets/tokenizer.llama2 load_parameters_path=gs://runner-maxtext-logs/2024-05-07-23-34/unscanned_chkpt/checkpoints/0/items max_prefill_predict_length=1024 max_target_length=2048 async_checkpointing=false model_name='llama2-70b' steps=1 ici_fsdp_parallelism=1 ici_autoregressive_parallelism=-1 ici_tensor_parallelism=1 scan_layers=false weight_dtype=bfloat16 per_device_batch_size=2); echo End; sleep infinity;"}, -// }, // end jetstream - -// { -// Name: "tester", -// Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/maxtext_jax_stable:latest", // revert to stable -// ImagePullPolicy: "Always", -// SecurityContext: &corev1.SecurityContext{Privileged: &truth}, -// Command: []string{"bash", "-c", "echo Start ;for i in {1..5}; do echo Sending request $i; python3 JetStream/jetstream/tools/requester.py --tokenizer assets/tokenizer.llama2 --max_tokens=16 --server=0.0.0.0 --text=\"why earth is round\"; EXIT_CODE=$?; echo Completed request; echo EXIT_CODE=$EXIT_CODE; if [[ $EXIT_CODE -ne 0 ]]; then break; fi; done; echo Last EXIT_CODE=$EXIT_CODE; echo End; sleep infinity;"}, -// }, // end tester - -//----------------LIST---------------- - -// List JobSets using client - -// jsList, err := jobSetClient.JobsetV1alpha2().JobSets("default").List(ctx, metav1.ListOptions{}) - -// if err != nil { -// log.Info("Roshani, can list JobSets: ") -// for _, js := range jsList.Items { -// if js.ObjectMeta.Name == pw.Spec.WorkloadName { -// log.Info("Roshani, found JobSet: ", "JobSet name", pw.Spec.WorkloadName) -// return ctrl.Result{}, nil -// // Nothing to reconcile here. -// } -// } -// } else { -// log.Info("Roshani, error listing JobSets: ", "error ", err) -// return ctrl.Result{}, err -// } - -// -// // JobSet list -// var jsList *jobsetv1alpha2.JobSetList -// jsList, err := jobSetClient.JobsetV1alpha2().JobSets("default").List(ctx, metav1.ListOptions{}) -// if err != nil { -// log.Info("Roshani, can't list JobSets: ", "error ", err) -// return ctrl.Result{}, err -// } else { -// log.Info("Roshani, can list JobSets") -// for _, job := range jsList.Items { -// for _, condition := range job.Status.Conditions { -// log.Info("Roshani Jobset condtion", job.ObjectMeta.Name, condition.Type) -// } -// if job.ObjectMeta.Name == pw.GetName() && -// (job.Status.Conditions[0].Type == string(jobsetv1alpha2.JobSetStartupPolicyCompleted) || -// job.Status.Conditions[0].Type == string(jobsetv1alpha2.JobSetStartupPolicyInProgress)) { -// log.Info("Roshani, found JobSet ", "JobSet name", pw.GetName()) -// log.Info("Roshani, nothing to reconcile here") -// return ctrl.Result{}, nil -// // Nothing to reconcile here. -// } -// } -// } - -// Currently leading to race conditions ---. -// var pwList pathwaysjob.PathwaysJobList -// if err := r.List(ctx, &pwList, &client.ListOptions{}); err != nil { -// log.Error(err, "Roshani, failed to list Pathways") -// return ctrl.Result{}, err -// } else { -// log.Info("Roshani, successfully listed Pathways") -// for _, job := range pwList.Items { -// log.Info("ROSHANI", "Job name ", job.Spec.WorkloadName, "Pathways workload name ", pw.GetName()) -// if job.Spec.WorkloadName == pw.GetName() { -// log.Info("Roshani, found Pathways, not creating workload: ", "JobSet name", pw.GetName()) -// return ctrl.Result{}, nil -// // Nothing to reconcile here. -// } -// } -// } - -// --------LIST childJobSets -------------- -// func (r *PathwaysJobReconciler) listChildJobSets(ctx context.Context, pw *pathwaysjob.PathwaysJob, jobSetClient *jobsetclient.Clientset) ([]jobsetv1alpha2.JobSet, error) { -// log3 := ctrl.LoggerFrom(ctx).WithValues("pathwaysjob", klog.KObj(pw)) -// // ctx = ctrl.LoggerInto(ctx, log3) -// log3.Info("PathwaysJob: in listChildJobSets", "Name ", pw.GetName(), "Namespace ", pw.GetNamespace()) - -// var jsList *jobsetv1alpha2.JobSetList -// jsList, err := jobSetClient.JobsetV1alpha2().JobSets(pw.GetObjectMeta().GetNamespace()).List(ctx, metav1.ListOptions{}) - -// if err != nil { -// log3.Info("PathwaysJob: can't list JobSets: ", "error ", err) -// return nil, err -// } -// return jsList.Items, nil -// } - -// report status - -// // 3. Update the cluster - create update and delete other resources -// log.Info("PathwaysJob: creating JobSet \n") -// if err := r.createJobSet(ctx, pw, jobSetClient); err != nil { -// log.Error(err, "PathwaysJob: failed to create JobSet \n") -// return ctrl.Result{}, err -// } - -// childJobSets, err := r.listChildJobSets(ctx, pw, jobSetClient) -// if err != nil { -// log.Error(err, "PathwaysJob: failed to list JobSets \n") -// return ctrl.Result{}, err -// } - -// // 2.1.1 List childJobSets -// for _, jobset := range childJobSets { -// if jobset.GetName() == pw.GetName() { -// log.Info("PathwaysJob: JobSet exists, not creating \n\n\n") -// for _, c := range jobset.Status.Conditions { -// log.Info("PathwaysJob: Condition is ", "Type", c.Type) -// } -// } -// } - -// function to listChildJobSets, based on https://github.com/kubernetes-sigs/jobset/blob/main/client-go/clientset/versioned/typed/jobset/v1alpha2/jobset.go#L44 - -// function to updatePathwaysJob Status ~~ updateJobSetStatus. Pathways status is same as JobSet Status. This function will mainly update Conditions and Message. -// similar to https://github.com/kubernetes-sigs/jobset/blob/main/pkg/controllers/jobset_controller.go#L248 -// JobSet conditions - https://github.com/kubernetes-sigs/jobset/blob/main/pkg/controllers/jobset_controller.go#L822 - -// function to suspendJobSet - -// function to resumeJobSet - -// function to deleteJobSet, based on https://github.com/kubernetes-sigs/jobset/blob/main/client-go/clientset/versioned/typed/jobset/v1alpha2/jobset.go#L41 - -// function isJobSetFinished reuse jobSetFinished - -// funtion pathwaysJobFinished (?) - -// function setCondition and updateCondition - -// function setPathwaysJobCompletedCondition - -// function setPathwaysJobFailedCondition - -// function setPathwaysJobSuspendedCondition - -// function setPathwaysJobResumedCondition From 9a9c97b432b01e4763c20acbc0b79e75d843e816 Mon Sep 17 00:00:00 2001 From: RoshaniN Date: Tue, 11 Mar 2025 17:17:58 +0000 Subject: [PATCH 30/32] Adding Codeowners file. --- .github/CODEOWNERS | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 .github/CODEOWNERS diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 00000000..4a514a16 --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1,2 @@ +# Code owners and required reviewers. +* @RoshaniN \ No newline at end of file From afd9e827e05086651dbdccdafabd3652b17c5087 Mon Sep 17 00:00:00 2001 From: RoshaniN Date: Mon, 17 Mar 2025 20:14:49 +0000 Subject: [PATCH 31/32] Update images, add annotation. Update sample config YAML to use Python image. --- README.md | 1 - api/v1/pathwaysjob_types.go | 5 +- ...ways-job.pathways.domain_pathwaysjobs.yaml | 6 +- config/manager/kustomization.yaml | 2 +- config/samples/jobset_example.yaml | 0 config/samples/kustomization.yaml | 2 +- .../samples/pathways-job_v1_pathwaysjob.yaml | 93 +++---------------- internal/controller/pathwaysjob_controller.go | 71 +++++++++----- 8 files changed, 70 insertions(+), 110 deletions(-) delete mode 100644 config/samples/jobset_example.yaml diff --git a/README.md b/README.md index 9102cbf2..d8f8cd57 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,6 @@ The user workload is typically on a Vertex AI notebook, so users can connect to - docker version 17.03+. - kubectl version v1.11.3+. - Access to a Kubernetes v1.11.3+ cluster. -- JobSet //ToDo(roshanin) install JobSet ### To Deploy on the cluster **Build and push your image to the location specified by `IMG`:** diff --git a/api/v1/pathwaysjob_types.go b/api/v1/pathwaysjob_types.go index 741befab..cc09e1ab 100644 --- a/api/v1/pathwaysjob_types.go +++ b/api/v1/pathwaysjob_types.go @@ -61,12 +61,13 @@ type PathwaysJobSpec struct { // Maximum number of times the JobSet is restarted. MaxRestarts int32 `json:"maxRestarts,omitempty"` - // PathwaysDir is a persistent location like GCS at which temporary + // PathwaysDir is a persistent GCS location at which temporary // Pathways artifacts can be stored like HBM state during interruptions. // Currently, Pathways supports a precreated GCS directory only. PathwaysDir string `json:"pathwaysDir,omitempty"` - // PathwaysVersion is the version of the Pathways client. + // PathwaysVersion is the version of the Pathways cluster. + // This indicates the version of the Pathways RM, Proxy and Workers. PathwaysVersion string `json:"pathwaysVersion,omitempty"` // The list of worker types created for the Pathways Job. Currently only diff --git a/config/crd/bases/pathways-job.pathways.domain_pathwaysjobs.yaml b/config/crd/bases/pathways-job.pathways.domain_pathwaysjobs.yaml index bf2270c3..e3271c54 100644 --- a/config/crd/bases/pathways-job.pathways.domain_pathwaysjobs.yaml +++ b/config/crd/bases/pathways-job.pathways.domain_pathwaysjobs.yaml @@ -8058,12 +8058,14 @@ spec: type: integer pathwaysDir: description: |- - PathwaysDir is a persistent location like GCS at which temporary + PathwaysDir is a persistent GCS location at which temporary Pathways artifacts can be stored like HBM state during interruptions. Currently, Pathways supports a precreated GCS directory only. type: string pathwaysVersion: - description: PathwaysVersion is the version of the Pathways client. + description: |- + PathwaysVersion is the version of the Pathways cluster. + This indicates the version of the Pathways RM, Proxy and Workers. type: string workers: description: |- diff --git a/config/manager/kustomization.yaml b/config/manager/kustomization.yaml index 901b48a1..6a8aceda 100644 --- a/config/manager/kustomization.yaml +++ b/config/manager/kustomization.yaml @@ -18,5 +18,5 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization images: - name: controller - newName: us-docker.pkg.dev/cloud-tpu-multipod-dev/pathways/pathwaysjob + newName: us-docker.pkg.dev/cloud-tpu-v2-images/pathways/pathwaysjob newTag: latest diff --git a/config/samples/jobset_example.yaml b/config/samples/jobset_example.yaml deleted file mode 100644 index e69de29b..00000000 diff --git a/config/samples/kustomization.yaml b/config/samples/kustomization.yaml index 202b6e77..4ac798bd 100644 --- a/config/samples/kustomization.yaml +++ b/config/samples/kustomization.yaml @@ -14,5 +14,5 @@ ## Append samples of your project ## resources: -- pathways-api_v1_pathwaysjob.yaml +- pathways-job_v1_pathwaysjob.yaml # +kubebuilder:scaffold:manifestskustomizesamples diff --git a/config/samples/pathways-job_v1_pathwaysjob.yaml b/config/samples/pathways-job_v1_pathwaysjob.yaml index 1bbe9e59..d7447195 100644 --- a/config/samples/pathways-job_v1_pathwaysjob.yaml +++ b/config/samples/pathways-job_v1_pathwaysjob.yaml @@ -15,14 +15,14 @@ apiVersion: pathways-job.pathways.domain/v1 kind: PathwaysJob metadata: - name: pathways-trial62 + name: pathways-trial spec: maxRestarts: 10 workers: - type: tpu-v4-podslice topology: 2x2x2 numSlices: 2 - pathwaysDir: "gs://cloud-pathways-staging/tmp" + pathwaysDir: "gs://test-bucket/tmp" #This bucket needs to be created in advance. controller: # #Pod template for training, default mode. @@ -37,85 +37,14 @@ spec: - name: JAX_PLATFORMS value: proxy - name: JAX_BACKEND_TARGET - value: grpc://pathways-trial62-proxy-0-0.pathways-trial62:29008 - image: us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/maxtext_jax_stable:latest + value: grpc://pathways-trial-proxy-0-0.pathways-trial:29008 + image: python:3.13 imagePullPolicy: Always command: - - bash - - -c - - | - (python3 MaxText/train.py MaxText/configs/base.yml base_output_directory=gs://cloud-pathways-staging dataset_path=gs://maxtext-dataset/ steps=10 run_name=roshanin-pathways1 enable_single_controller=true attention=dot_product monitor_goodput=False enable_tensorboard=True enable_checkpointing=False); - volumeMounts: - - mountPath: /tmp - name: shared-tmp - resources: - limits: - cpu: "20" - memory: 90G - - - - # #Pod template for inference, colocate mode. - - - # deploymentMode: colocate - # template: # UserPodTemplate - # spec: - # containers: - # - name: jetstream - # image: us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/maxtext_jax_stable:latest - # imagePullPolicy: Always - # ports: - # - containerPort: 9000 - # env: - # - name: XCLOUD_ENVIRONMENT - # value: GCP - # - name: JAX_PLATFORMS - # value: proxy - # - name: JAX_BACKEND_TARGET - # value: grpc://pathways-trial61-leader-0-0.pathways-trial61:29008 - # command: - # - bash - # - -c - # - 'echo Start: $(date); - # _sigterm() ( kill -SIGTERM $! 2>/dev/null;); - # trap _sigterm SIGTERM; - # (JAX_TRACEBACK_FILTERING=off python3 MaxText/maxengine_server.py - # MaxText/configs/inference_jetstream.yml tokenizer_path=assets/tokenizer.llama2 - # load_parameters_path=gs://runner-maxtext-logs/2024-05-07-23-34/unscanned_chkpt/checkpoints/0/items - # max_prefill_predict_length=1024 max_target_length=2048 async_checkpointing=false - # model_name=''llama2-70b'' steps=1 ici_fsdp_parallelism=1 ici_autoregressive_parallelism=-1 - # ici_tensor_parallelism=1 scan_layers=false weight_dtype=bfloat16 - # per_device_batch_size=2) & PID=$!; - # while kill -0 $PID 2>/dev/null; - # do sleep 5; - # done; - # wait $PID; - # EXIT_CODE=$? - # echo EXIT_CODE=$EXIT_CODE; - # echo End sleep: $(date); - # sleep infinity;' - # - name: tester - # image: us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/maxtext_jax_stable:latest - # imagePullPolicy: Always - # command: - # - bash - # - -c - # - 'echo Start: $(date); - # _sigterm() ( kill -SIGTERM $! 2>/dev/null;); - # trap _sigterm SIGTERM; - # for i in {1..5}; do - # echo Sending request $i; - # time python3 JetStream/jetstream/tools/requester.py --tokenizer assets/tokenizer.llama2 --max_tokens=16 --server=0.0.0.0 --text=\"why earth is round\"; - # EXIT_CODE=$?; - # echo Completed request; - # echo EXIT_CODE=$EXIT_CODE; - # if [[ $EXIT_CODE -ne 0 ]]; then - # break; - # fi; - # done; - # echo Last EXIT_CODE=$EXIT_CODE; - # echo End sleep: $(date); - # sleep infinity;' - # securityContext: - # privileged: true + - /bin/sh + - -c + - | + pip install --upgrade pip + pip install -U --pre jax jaxlib -f https://storage.googleapis.com/jax-releases/jax_nightly_releases.html + pip install pathwaysutils + python -c "import jax; import pathwaysutils; print(\"Number of JAX devices is\", len(jax.devices()))" \ No newline at end of file diff --git a/internal/controller/pathwaysjob_controller.go b/internal/controller/pathwaysjob_controller.go index 2c5e742b..bff91373 100644 --- a/internal/controller/pathwaysjob_controller.go +++ b/internal/controller/pathwaysjob_controller.go @@ -58,7 +58,7 @@ var ( var WorkerTypeToTPUVersionMap = map[string]string{ "tpu-v6e-slice": "tpuv6e", "tpu-v5p-slice": "tpuv5", - "tpu-v5-lite-podslice": "tpuv5", + "tpu-v5-lite-podslice": "tpuv5e", "tpu-v4-podslice": "tpuv4", } @@ -153,11 +153,10 @@ func (r *PathwaysJobReconciler) Reconcile(ctx context.Context, req ctrl.Request) } func (r *PathwaysJobReconciler) getChildJobSet(ctx context.Context, pw *pathwaysjob.PathwaysJob, jobSetClient *jobsetclient.Clientset) (*jobsetv1alpha2.JobSet, error) { - log3 := ctrl.LoggerFrom(ctx) - // .WithValues("pathwaysjob", klog.KObj(pw)) - // ctx = ctrl.LoggerInto(ctx, log3) + log := ctrl.LoggerFrom(ctx).WithValues("pathwaysjob", klog.KObj(pw)) + ctx = ctrl.LoggerInto(ctx, log) - log3.Info("PathwaysJob: in getChildJobSet", "Name ", pw.GetName(), "Namespace ", pw.GetNamespace()) + log.Info("PathwaysJob: in getChildJobSet", "Name ", pw.GetName(), "Namespace ", pw.GetNamespace()) var js *jobsetv1alpha2.JobSet js, err := jobSetClient.JobsetV1alpha2().JobSets(pw.GetObjectMeta().GetNamespace()).Get(ctx, pw.GetName(), metav1.GetOptions{}) @@ -169,21 +168,20 @@ func (r *PathwaysJobReconciler) getChildJobSet(ctx context.Context, pw *pathways // Create the JobSet for 'colocated' or 'default' deployment modes. func (r *PathwaysJobReconciler) createJobSet(ctx context.Context, pw *pathwaysjob.PathwaysJob, jobSetClient *jobsetclient.Clientset) error { - log2 := ctrl.LoggerFrom(ctx) - // .WithValues("pathwaysjob", klog.KObj(pw)) - // ctx = ctrl.LoggerInto(ctx, log2) + log := ctrl.LoggerFrom(ctx).WithValues("pathwaysjob", klog.KObj(pw)) + ctx = ctrl.LoggerInto(ctx, log) - log2.Info("PathwaysJob: in createJobSet", "Name ", pw.GetName(), "Namespace ", pw.GetNamespace()) + log.Info("PathwaysJob: in createJobSet", "Name ", pw.GetName(), "Namespace ", pw.GetNamespace()) var jobs []jobsetv1alpha2.ReplicatedJob var rmJobName string err := calculateTPUInfo(ctx, pw) if err != nil { - log2.Info("PathwaysJob: in createJobSet calculateTPUInfo ", " Error: ", err) + log.Info("PathwaysJob: in createJobSet calculateTPUInfo ", " Error: ", err) return err } else { - log2.Info("PathwaysJob: in createJobSet calculateTPUInfo ", "TPUVersion", TPUVersion, "TPUTopology", TPUTopology, "InstanceType", InstanceType, "NumVMs", NumVMs) + log.Info("PathwaysJob: in createJobSet calculateTPUInfo ", "TPUVersion", TPUVersion, "TPUTopology", TPUTopology, "InstanceType", InstanceType, "NumVMs", NumVMs) } // Pathways Spec + JobSet for training or batch inference ------ if pw.Spec.Controller.DeploymentMode == pathwaysjob.Colocate { @@ -212,9 +210,9 @@ func (r *PathwaysJobReconciler) createJobSet(ctx context.Context, pw *pathwaysjo // Set Pathways controller as the owner of the JobSet for garbage collection. if err := ctrl.SetControllerReference(pw, &mainJobSetConfig, r.Scheme); err != nil { - log2.Info("PathwaysJob: failed to set Pathways as owner of JobSet.", "error ", err) + log.Info("PathwaysJob: failed to set Pathways as owner of JobSet.", "error ", err) } else { - log2.Info("PathwaysJob: successfully set Pathways as owner of JobSet.") + log.Info("PathwaysJob: successfully set Pathways as owner of JobSet.") } js, err := jobSetClient.JobsetV1alpha2().JobSets(pw.GetObjectMeta().GetNamespace()).Create(ctx, &mainJobSetConfig, metav1.CreateOptions{}) @@ -222,7 +220,7 @@ func (r *PathwaysJobReconciler) createJobSet(ctx context.Context, pw *pathwaysjo if err != nil { return err } else { - log2.Info("PathwaysJob: successfully created JobSet: ", "JobSet name", js.Name) + log.Info("PathwaysJob: successfully created JobSet: ", "JobSet name", js.Name) } return nil } @@ -278,17 +276,15 @@ func validateTPUTopologyWithWorkerType(ctx context.Context, tpuGKEAcceleratorTyp // Calculate the number of VMs based on the Topology (- used in completions/parallelisms) func calculateVMsFromTopology(topology string) int32 { parts := strings.Split(topology, "x") // Examples - 2x2x4 or 4x4 - if len(parts) < 2 { - return 0 - } // Calculate the number of chips based on the Topology. + // The topology must have already been validated with the worker type. chips := 1 for _, part := range parts { num, _ := strconv.Atoi(part) chips *= num } vms := 1 - chipsperVM := 4 + chipsperVM := 4 // ToDo (roshanin): Add support for VMs with 8 chips per host. if chips >= chipsperVM { vms = chips / chipsperVM } @@ -308,13 +304,24 @@ func calculateTPUInfo(ctx context.Context, pw *pathwaysjob.PathwaysJob) error { return nil } +// Construct image tag based on Pathways version +func makeImageTagUsingPathwaysVersion(pw *pathwaysjob.PathwaysJob) string { + var tag string + if pw.Spec.PathwaysVersion != "" { + tag = string(pw.Spec.PathwaysVersion) + } else { + tag = "latest" + } + return tag +} + // Constructs the Pathways resource manager container spec for the underlying JobSet func MakeResourceManagerContainer(pw *pathwaysjob.PathwaysJob, rmJobName string) (*corev1.Container, error) { truth := true rmContainerSpec := corev1.Container{ Name: "pathways-rm", - Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/sanitized_server:latest", + Image: fmt.Sprintf("us-docker.pkg.dev/cloud-tpu-v2-images/pathways/server:%s", makeImageTagUsingPathwaysVersion(pw)), ImagePullPolicy: "Always", SecurityContext: &corev1.SecurityContext{Privileged: &truth}, Args: []string{ @@ -342,7 +349,7 @@ func MakeProxyContainer(pw *pathwaysjob.PathwaysJob, rmJobName string) (*corev1. proxyContainerSpec := corev1.Container{ Name: "pathways-proxy", - Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/sanitized_proxy_server:latest", + Image: fmt.Sprintf("us-docker.pkg.dev/cloud-tpu-v2-images/pathways/proxy_server:%s", makeImageTagUsingPathwaysVersion(pw)), ImagePullPolicy: "Always", SecurityContext: &corev1.SecurityContext{Privileged: &truth}, Args: []string{ @@ -360,6 +367,15 @@ func MakeProxyContainer(pw *pathwaysjob.PathwaysJob, rmJobName string) (*corev1. func MakeWorkerJob(ctx context.Context, pw *pathwaysjob.PathwaysJob, rmJobName string) (jobsetv1alpha2.ReplicatedJob, error) { truth := true volumeSourceType := corev1.HostPathDirectoryOrCreate + objectMeta := metav1.ObjectMeta{} + + if pw.Spec.Controller.DeploymentMode == pathwaysjob.Default { + objectMeta = metav1.ObjectMeta{ + Annotations: map[string]string{ + "alpha.jobset.sigs.k8s.io/exclusive-topology": "cloud.google.com/gke-nodepool", + }, + } + } workerJob := jobsetv1alpha2.ReplicatedJob{ Name: "worker", @@ -370,11 +386,12 @@ func MakeWorkerJob(ctx context.Context, pw *pathwaysjob.PathwaysJob, rmJobName s Completions: ptr.To(int32(NumVMs)), // number of workers remember to change Parallelism: ptr.To(int32(NumVMs)), // number of workers remember to change Template: corev1.PodTemplateSpec{ + ObjectMeta: objectMeta, Spec: corev1.PodSpec{ Containers: []corev1.Container{ { Name: "pathways-worker", - Image: "us-docker.pkg.dev/cloud-tpu-v2-images-dev/pathways/sanitized_server:latest", + Image: fmt.Sprintf("us-docker.pkg.dev/cloud-tpu-v2-images/pathways/server:%s", makeImageTagUsingPathwaysVersion(pw)), ImagePullPolicy: "Always", SecurityContext: &corev1.SecurityContext{Privileged: &truth}, Args: []string{ @@ -549,6 +566,10 @@ func MakeJobsForDefaultDeployment(ctx context.Context, pw *pathwaysjob.PathwaysJ Parallelism: ptr.To(int32(1)), Template: corev1.PodTemplateSpec{ Spec: corev1.PodSpec{ + NodeSelector: map[string]string{ // predictably place RM on CPUs + "cloud.google.com/machine-family": "n2", + "node.kubernetes.io/instance-type": "n2-standard-64", + }, HostNetwork: true, // For performance == McJAX DNSPolicy: corev1.DNSClusterFirstWithHostNet, // For performance == McJAX Tolerations: []corev1.Toleration{ @@ -586,6 +607,10 @@ func MakeJobsForDefaultDeployment(ctx context.Context, pw *pathwaysjob.PathwaysJ Parallelism: ptr.To(int32(1)), Template: corev1.PodTemplateSpec{ Spec: corev1.PodSpec{ + NodeSelector: map[string]string{ // predictably place RM on CPUs + "cloud.google.com/machine-family": "n2", + "node.kubernetes.io/instance-type": "n2-standard-64", + }, HostNetwork: true, // For performance == McJAX DNSPolicy: corev1.DNSClusterFirstWithHostNet, // For performance == McJAX Tolerations: []corev1.Toleration{ @@ -629,6 +654,10 @@ func MakeJobsForDefaultDeployment(ctx context.Context, pw *pathwaysjob.PathwaysJ // }, Template: corev1.PodTemplateSpec{ Spec: corev1.PodSpec{ + NodeSelector: map[string]string{ // predictably place RM on CPUs + "cloud.google.com/machine-family": "n2", + "node.kubernetes.io/instance-type": "n2-standard-64", + }, HostNetwork: true, // For performance == McJAX DNSPolicy: corev1.DNSClusterFirstWithHostNet, // For performance == McJAX Tolerations: []corev1.Toleration{ // tolerations are important here to not run this job on TPUs From 27a43ed507f0a8f403cf588b0ca38352aef78468 Mon Sep 17 00:00:00 2001 From: RoshaniN Date: Mon, 17 Mar 2025 23:00:13 +0000 Subject: [PATCH 32/32] Change proxy port back to 29000. --- config/samples/pathways-job_v1_pathwaysjob.yaml | 2 +- internal/controller/pathwaysjob_controller.go | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/config/samples/pathways-job_v1_pathwaysjob.yaml b/config/samples/pathways-job_v1_pathwaysjob.yaml index d7447195..2557249a 100644 --- a/config/samples/pathways-job_v1_pathwaysjob.yaml +++ b/config/samples/pathways-job_v1_pathwaysjob.yaml @@ -37,7 +37,7 @@ spec: - name: JAX_PLATFORMS value: proxy - name: JAX_BACKEND_TARGET - value: grpc://pathways-trial-proxy-0-0.pathways-trial:29008 + value: grpc://pathways-trial-proxy-0-0.pathways-trial:29000 image: python:3.13 imagePullPolicy: Always command: diff --git a/internal/controller/pathwaysjob_controller.go b/internal/controller/pathwaysjob_controller.go index bff91373..cf8e6735 100644 --- a/internal/controller/pathwaysjob_controller.go +++ b/internal/controller/pathwaysjob_controller.go @@ -353,11 +353,11 @@ func MakeProxyContainer(pw *pathwaysjob.PathwaysJob, rmJobName string) (*corev1. ImagePullPolicy: "Always", SecurityContext: &corev1.SecurityContext{Privileged: &truth}, Args: []string{ - "--server_port=29008", + "--server_port=29000", fmt.Sprintf("--resource_manager_address=%s-%s-0-0.%s:29001", pw.GetName(), rmJobName, pw.GetName()), fmt.Sprintf("--gcs_scratch_location=%s", pw.Spec.PathwaysDir), }, - Ports: []corev1.ContainerPort{{ContainerPort: 29008}}, + Ports: []corev1.ContainerPort{{ContainerPort: 29000}}, // Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{"cpu": *resource.NewQuantity(4, resource.DecimalSI), "memory": *resource.NewQuantity(100000000000, resource.DecimalSI)}}, //100GiB } return &proxyContainerSpec, nil