Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions CONTEXT.md
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ spec:
- type: "network.kubernetes.io/NetworkProxyReady"
requiredStatus: "True"
taint:
key: "readiness.k8s.io/NetworkReady"
key: "readiness.k8s.io/network/not-ready"
effect: "NoSchedule"
value: "pending"
enforcementMode: "bootstrap-only"
Expand All @@ -278,7 +278,7 @@ spec:
- type: "storage.kubernetes.io/CSIReady"
requiredStatus: "True"
taint:
key: "readiness.k8s.io/StorageReady"
key: "readiness.k8s.io/storage/not-ready"
effect: "NoSchedule"
enforcementMode: "continuous"
gracePeriod: "60s"
Expand Down Expand Up @@ -421,4 +421,4 @@ rules:
### Networking
- Cluster internal only
- Health check endpoints on :8081
- Metrics endpoint on :8080
- Metrics endpoint on :8080
17 changes: 16 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ spec:
- type: "example.com/CNIReady"
requiredStatus: "True"
taint:
key: "readiness.k8s.io/NetworkReady"
key: "readiness.k8s.io/example.com/network-not-ready"
effect: "NoSchedule"
value: "pending"
enforcementMode: "bootstrap-only"
Expand All @@ -62,6 +62,21 @@ spec:

Find a more detailed walkthrough of setting up Node Readiness Controller in your Kind cluster [here](https://github.com/kubernetes-sigs/node-readiness-controller/blob/main/docs/TEST_README.md).

### Taint Key Conventions

All taint keys must use the `readiness.k8s.io/` prefix. The following core prefixes are reserved and not allowed for user rules:
- `readiness.k8s.io/system/*`
- `readiness.k8s.io/core/*`
- `readiness.k8s.io/node/*`
- `readiness.k8s.io/device/*`
- `readiness.k8s.io/network/*`
- `readiness.k8s.io/storage/*`

Use user-space keys under `readiness.k8s.io/*` with a DNS-style component to avoid conflicts, for example:
- `readiness.k8s.io/example.com/network-not-ready`
- `readiness.k8s.io/projectcalico.org/cni-ready`
- `readiness.k8s.io/vendor.io/storage-driver-ready`

## High-level Roadmap

- [X] Release v0.1.0
Expand Down
6 changes: 6 additions & 0 deletions api/v1alpha1/nodereadinessrule_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,12 @@ type NodeReadinessRuleSpec struct {
//
// +required
// +kubebuilder:validation:XValidation:rule="self.key.startsWith('readiness.k8s.io/')",message="taint key must start with 'readiness.k8s.io/'"
// +kubebuilder:validation:XValidation:rule="!self.key.startsWith('readiness.k8s.io/system/')",message="reserved taint prefix 'readiness.k8s.io/system/*' is not allowed"
// +kubebuilder:validation:XValidation:rule="!self.key.startsWith('readiness.k8s.io/core/')",message="reserved taint prefix 'readiness.k8s.io/core/*' is not allowed"
// +kubebuilder:validation:XValidation:rule="!self.key.startsWith('readiness.k8s.io/node/')",message="reserved taint prefix 'readiness.k8s.io/node/*' is not allowed"
// +kubebuilder:validation:XValidation:rule="!self.key.startsWith('readiness.k8s.io/device/')",message="reserved taint prefix 'readiness.k8s.io/device/*' is not allowed"
// +kubebuilder:validation:XValidation:rule="!self.key.startsWith('readiness.k8s.io/network/')",message="reserved taint prefix 'readiness.k8s.io/network/*' is not allowed"
// +kubebuilder:validation:XValidation:rule="!self.key.startsWith('readiness.k8s.io/storage/')",message="reserved taint prefix 'readiness.k8s.io/storage/*' is not allowed"
// +kubebuilder:validation:XValidation:rule="self.key.size() <= 253",message="taint key length must be at most 253 characters"
// +kubebuilder:validation:XValidation:rule="size(self.key.split('/')) == 2",message="taint key must have exactly one '/' separator (prefix/name format)"
// +kubebuilder:validation:XValidation:rule="size(self.key.split('/')[1]) > 0 && size(self.key.split('/')[1]) <= 63",message="taint key name part must be 1-63 characters"
Expand Down
18 changes: 18 additions & 0 deletions config/crd/bases/readiness.node.x-k8s.io_nodereadinessrules.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,24 @@ spec:
x-kubernetes-validations:
- message: taint key must start with 'readiness.k8s.io/'
rule: self.key.startsWith('readiness.k8s.io/')
- message: reserved taint prefix 'readiness.k8s.io/system/*' is not
allowed
rule: '!self.key.startsWith(''readiness.k8s.io/system/'')'
- message: reserved taint prefix 'readiness.k8s.io/core/*' is not
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Priyankasaggu11929 identified in #155 that this is not a valid prefix format for taint (subprefix not allowed in Taints), so this behavior cannot be achieved without upstream change :(

allowed
rule: '!self.key.startsWith(''readiness.k8s.io/core/'')'
- message: reserved taint prefix 'readiness.k8s.io/node/*' is not
allowed
rule: '!self.key.startsWith(''readiness.k8s.io/node/'')'
- message: reserved taint prefix 'readiness.k8s.io/device/*' is not
allowed
rule: '!self.key.startsWith(''readiness.k8s.io/device/'')'
- message: reserved taint prefix 'readiness.k8s.io/network/*' is not
allowed
rule: '!self.key.startsWith(''readiness.k8s.io/network/'')'
- message: reserved taint prefix 'readiness.k8s.io/storage/*' is not
allowed
rule: '!self.key.startsWith(''readiness.k8s.io/storage/'')'
- message: taint key length must be at most 253 characters
rule: self.key.size() <= 253
- message: taint key must have exactly one '/' separator (prefix/name
Expand Down
2 changes: 1 addition & 1 deletion config/samples/v1alpha1_nodereadinessrule.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ spec:
- type: "network.kubernetes.io/CNIReady"
requiredStatus: "True"
taint:
key: "readiness.k8s.io/NetworkReady"
key: "readiness.k8s.io/example.com/network-not-ready"
effect: "NoSchedule"
value: "pending"
enforcementMode: "bootstrap-only"
Expand Down
4 changes: 2 additions & 2 deletions config/testing/kind/kind-3node-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@ nodes:
kubeletExtraArgs:
node-labels: "reserved-for=platform"
register-with-taints: "node-restriction.kubernetes.io/reserved-for=platform:NoExecute"
- role: worker # workers; reserved labels like node-role.kubernetes.io/worker cannot be used in kind ref: kind/issues/3536
- role: worker # workers; reserved labels like node-role.kubernetes.io/worker cannot be used in kind ref: kind/issues/3536
kubeadmConfigPatches:
- |
kind: JoinConfiguration
nodeRegistration:
kubeletExtraArgs:
node-labels: "reserved-for=worker"
register-with-taints: "readiness.k8s.io/NetworkReady=pending:NoSchedule"
register-with-taints: "readiness.k8s.io/projectcalico.org/network-not-ready=pending:NoSchedule"
2 changes: 1 addition & 1 deletion config/testing/kind/test-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,4 @@ nodes:
kind: JoinConfiguration
nodeRegistration:
kubeletExtraArgs:
register-with-taints: "readiness.k8s.io/NetworkReady=pending:NoSchedule"
register-with-taints: "readiness.k8s.io/projectcalico.org/network-not-ready=pending:NoSchedule"
6 changes: 3 additions & 3 deletions docs/TEST_README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ The test demonstrates a realistic, production-aligned scenario where critical ad
The test uses a 3-node Kind cluster:
1. **`nrr-test-control-plane`**: The Kubernetes control plane. The NRR controller will run here unless specifically configured.
2. **`nrr-test-worker` (Platform Node)**: A dedicated node for running cluster-critical addons. It is labeled `reserved-for=platform` and has a corresponding taint to repel normal application workloads. Cert-manager will run here.
3. **`nrr-test-worker2` (Application Node)**: A standard worker node that starts with a `readiness.k8s.io/NetworkReady=pending:NoSchedule` taint, simulating a node that is not yet ready for application traffic.
3. **`nrr-test-worker2` (Application Node)**: A standard worker node that starts with a `readiness.k8s.io/projectcalico.org/network-not-ready=pending:NoSchedule` taint, simulating a node that is not yet ready for application traffic.

## Running the Test

Expand Down Expand Up @@ -94,7 +94,7 @@ kubectl apply -f examples/cni-readiness/network-readiness-rule.yaml
Check that the application worker node (`nrr-test-worker2`) has the `NetworkReady` taint.

```bash
# The output should include 'readiness.k8s.io/NetworkReady'
# The output should include 'readiness.k8s.io/projectcalico.org/network-not-ready'
kubectl get node nrr-test-worker2 -o jsonpath='Taints:{"\n"}{range .spec.taints[*]}{.key}{"\n"}{end}'
```

Expand All @@ -120,7 +120,7 @@ examples/cni-readiness/apply-calico.sh

2. **Verify the taint has been removed from the application node:**
```bash
# The output should NO LONGER include 'readiness.k8s.io/NetworkReady'
# The output should NO LONGER include 'readiness.k8s.io/projectcalico.org/network-not-ready'
kubectl get node nrr-test-worker2 -o jsonpath='Taints:{"\n"}{range .spec.taints[*]}{.key}{"\n"}{end}'
```

Expand Down
12 changes: 6 additions & 6 deletions docs/book/src/examples/cni-readiness.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ In many Kubernetes clusters, the CNI plugin runs as a DaemonSet. When a new node
This guide demonstrates how to use the Node Readiness Controller to prevent pods from being scheduled on a node until the Container Network Interface (CNI) plugin (e.g., Calico) is fully initialized and ready.

The high-level steps are:
1. Node is bootstrapped with a [startup taint](https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/) `readiness.k8s.io/NetworkReady=pending:NoSchedule` immediately upon joining.
2. A reporter DaemonSet is deployed to monitor the CNI's health and report it to the API server as node-condition (`projectcalico.org/CalicoReady`).
1. Node is bootstrapped with a [startup taint](https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/) `readiness.k8s.io/projectcalico.org/network-not-ready=pending:NoSchedule` immediately upon joining.
2. A reporter DaemonSet is deployed to monitor the CNI's health and report it to the API server as node-condition (`projectcalico.org/CalicoReady`).
3. Node Readiness Controller will untaint the node only when the CNI reports it is ready.

## Step-by-Step Guide
Expand Down Expand Up @@ -85,7 +85,7 @@ subjects:

### 3. Create the Node Readiness Rule

Now define the rule that enforces the requirement. This tells the controller: *"Keep the `readiness.k8s.io/NetworkReady` taint on the node until `projectcalico.org/CalicoReady` is True."*
Now define the rule that enforces the requirement. This tells the controller: *"Keep the `readiness.k8s.io/projectcalico.org/network-not-ready` taint on the node until `projectcalico.org/CalicoReady` is True."*

```yaml
# network-readiness-rule.yaml
Expand All @@ -101,7 +101,7 @@ spec:

# The taint to manage
taint:
key: "readiness.k8s.io/NetworkReady"
key: "readiness.k8s.io/projectcalico.org/network-not-ready"
effect: "NoSchedule"
value: "pending"

Expand Down Expand Up @@ -135,11 +135,11 @@ To test this, add a new node to the cluster.

1. **Check the Node Taints**:
Immediately upon joining, the node should have the taint:
`readiness.k8s.io/NetworkReady=pending:NoSchedule`.
`readiness.k8s.io/projectcalico.org/network-not-ready=pending:NoSchedule`.

2. **Check Node Conditions**:
Watch the node conditions. You will initially see `projectcalico.org/CalicoReady` as `False` or missing.
Once Calico starts, the reporter will update it to `True`.

3. **Check Taint Removal**:
As soon as the condition becomes `True`, the Node Readiness Controller will remove the taint, and workloads will be scheduled.
As soon as the condition becomes `True`, the Node Readiness Controller will remove the taint, and workloads will be scheduled.
4 changes: 2 additions & 2 deletions docs/book/src/introduction.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ spec:
- type: "example.com/CNIReady"
requiredStatus: "True"
taint:
key: "readiness.k8s.io/NetworkReady"
key: "readiness.k8s.io/example.com/network-not-ready"
effect: "NoSchedule"
value: "pending"
enforcementMode: "bootstrap-only"
Expand All @@ -72,4 +72,4 @@ See the Kubernetes community on the [community page](http://kubernetes.io/commun

## Project Status

This project is currently in **alpha**. The API may change in future releases.
This project is currently in **alpha**. The API may change in future releases.
2 changes: 1 addition & 1 deletion docs/book/src/reference/api-spec.md
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ _Appears in:_
| `conditions` _[ConditionRequirement](#conditionrequirement) array_ | conditions contains a list of the Node conditions that defines the specific<br />criteria that must be met for taints to be managed on the target Node.<br />The presence or status of these conditions directly triggers the application or removal of Node taints. | | MaxItems: 32 <br />MinItems: 1 <br /> |
| `enforcementMode` _[EnforcementMode](#enforcementmode)_ | enforcementMode specifies how the controller maintains the desired state.<br />enforcementMode is one of bootstrap-only, continuous.<br />"bootstrap-only" applies the configuration once during initial setup.<br />"continuous" ensures the state is monitored and corrected throughout the resource lifecycle. | | Enum: [bootstrap-only continuous] <br /> |
| `taint` _[Taint](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.35/#taint-v1-core)_ | taint defines the specific Taint (Key, Value, and Effect) to be managed<br />on Nodes that meet the defined condition criteria. | | |
<br />Key must start with `readiness.k8s.io/`. Reserved core prefixes are forbidden: `readiness.k8s.io/{system,core,node,device,network,storage}/*`. Use user-space keys such as `readiness.k8s.io/<dns.subdomain>/<component>`.
| `nodeSelector` _[LabelSelector](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.35/#labelselector-v1-meta)_ | nodeSelector limits the scope of this rule to a specific subset of Nodes. | | |
| `dryRun` _boolean_ | dryRun when set to true, The controller will evaluate Node conditions and log intended taint modifications<br />without persisting changes to the cluster. Proposed actions are reflected in the resource status. | | |

Expand Down Expand Up @@ -204,4 +205,3 @@ _Appears in:_
| `Present` | TaintStatusPresent represent the taint present on the Node.<br /> |
| `Absent` | TaintStatusAbsent represent the taint absent on the Node.<br /> |


13 changes: 13 additions & 0 deletions docs/book/src/user-guide/concepts.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,19 @@ Typical taint keys look like:
- `readiness.k8s.io/csi.vendor.com/storage-driver-not-ready`
- `readiness.k8s.io/<dns.subdomain>/<component-name>`

Reserved core prefixes (not allowed for user rules):
- `readiness.k8s.io/system/*`
- `readiness.k8s.io/core/*`
- `readiness.k8s.io/node/*`
- `readiness.k8s.io/device/*`
- `readiness.k8s.io/network/*`
- `readiness.k8s.io/storage/*`

Use vendor/user-space paths under `readiness.k8s.io/*` that include a DNS-style component to avoid conflicts, for example:
- `readiness.k8s.io/example.com/my-component`
- `readiness.k8s.io/projectcalico.org/network-not-ready`
- `readiness.k8s.io/vendor.io/storage-driver-ready`

The segment after `readiness.k8s.io/` should describe the dependency or subsystem whose readiness is being guarded (for example, a CNI plugin, storage backend, or security agent). Treat this domain as reserved for the controller and closely related components, and avoid reusing it for unrelated taints.

## Enforcement Modes
Expand Down
20 changes: 18 additions & 2 deletions docs/book/src/user-guide/getting-started.md
Original file line number Diff line number Diff line change
Expand Up @@ -133,10 +133,26 @@ taint:
**Invalid:**
```yaml
taint:
key: "network-ready" # Missing prefix
key: "node.kubernetes.io/ready" # Wrong prefix
key: "network-ready" # Missing prefix
key: "node.kubernetes.io/ready" # Wrong prefix
key: "readiness.k8s.io/system/foo" # Reserved prefix
key: "readiness.k8s.io/network/bar" # Reserved prefix
key: "readiness.k8s.io/storage/not-ready" # Reserved prefix
```

Reserved core prefixes under `readiness.k8s.io/*` are forbidden to avoid conflicts with future controller features:
- `readiness.k8s.io/system/*`
- `readiness.k8s.io/core/*`
- `readiness.k8s.io/node/*`
- `readiness.k8s.io/device/*`
- `readiness.k8s.io/network/*`
- `readiness.k8s.io/storage/*`

Use vendor/user-space keys with DNS-style components, for example:
- `readiness.k8s.io/example.com/network-not-ready`
- `readiness.k8s.io/projectcalico.org/cni-ready`
- `readiness.k8s.io/vendor.io/storage-driver-ready`


## Testing with Dry Run

Expand Down
2 changes: 1 addition & 1 deletion examples/cni-readiness/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
This example demonstrates how to use the Node Readiness Controller to ensure nodes are only marked ready for workloads after the CNI (Calico) has fully initialized.

### How it works:
1. Nodes join with a `readiness.k8s.io/NetworkReady=pending:NoSchedule` taint.
1. Nodes join with a `readiness.k8s.io/network/not-ready=pending:NoSchedule` taint.
2. A lightweight DaemonSet (`cni-reporter-ds.yaml`)
monitors Calico's health endpoint (`localhost:9099/readiness`) and updates a
node condition `projectcalico.org/CalicoReady`.
Expand Down
2 changes: 1 addition & 1 deletion examples/cni-readiness/network-readiness-dryrun-rule.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ spec:
- type: "projectcalico.org/CalicoReady"
requiredStatus: "True"
taint:
key: "readiness.k8s.io/NetworkReady"
key: "readiness.k8s.io/projectcalico.org/network-not-ready"
effect: "NoSchedule"
value: "pending"
enforcementMode: "bootstrap-only"
Expand Down
2 changes: 1 addition & 1 deletion examples/cni-readiness/network-readiness-rule.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ spec:
- type: "projectcalico.org/CalicoReady"
requiredStatus: "True"
taint:
key: "readiness.k8s.io/NetworkReady"
key: "readiness.k8s.io/projectcalico.org/network-not-ready"
effect: "NoSchedule"
value: "pending"
enforcementMode: "continuous"
Expand Down
26 changes: 26 additions & 0 deletions internal/webhook/nodereadinessgaterule_webhook.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package webhook
import (
"context"
"fmt"
"strings"

corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -70,6 +71,29 @@ func (w *NodeReadinessRuleWebhook) validateSpec(spec readinessv1alpha1.NodeReadi
allErrs = append(allErrs, field.Required(field.NewPath("spec", "nodeSelector"), "nodeSelector must not be empty"))
}

// Validate taint - reserved prefix validation (defense in depth, also validated by CRD)
taintField := field.NewPath("spec", "taint")
if spec.Taint.Key != "" {
if !strings.HasPrefix(spec.Taint.Key, "readiness.k8s.io/") {
allErrs = append(allErrs, field.Invalid(taintField.Child("key"), spec.Taint.Key, "taint key must start with 'readiness.k8s.io/'"))
} else {
reserved := []string{
"readiness.k8s.io/system/",
"readiness.k8s.io/core/",
"readiness.k8s.io/node/",
"readiness.k8s.io/device/",
"readiness.k8s.io/network/",
"readiness.k8s.io/storage/",
}
for _, p := range reserved {
if strings.HasPrefix(spec.Taint.Key, p) {
allErrs = append(allErrs, field.Invalid(taintField.Child("key"), spec.Taint.Key, fmt.Sprintf("reserved taint prefix '%s*' is not allowed", p)))
break
}
}
}
}

return allErrs
}

Expand Down Expand Up @@ -197,3 +221,5 @@ func (w *NodeReadinessRuleWebhook) ValidateDelete(ctx context.Context, obj runti
// No validation needed for delete operations
return nil, nil
}

// Made with Bob
Loading
Loading