-
Notifications
You must be signed in to change notification settings - Fork 233
Add feature gate for disabling the MHC controller #1151
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -68,6 +68,10 @@ spec: | |
| fieldRef: | ||
| apiVersion: v1 | ||
| fieldPath: metadata.namespace | ||
| - name: POD_NAME | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. needed for the getting the controller ref for the recorder for the feature gate accessor... |
||
| valueFrom: | ||
| fieldRef: | ||
| fieldPath: metadata.name | ||
| - name: METRICS_PORT | ||
| value: "8080" | ||
| resources: | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,6 +2,7 @@ package operator | |
|
|
||
| import ( | ||
| "context" | ||
| "errors" | ||
| "fmt" | ||
| "os" | ||
| "time" | ||
|
|
@@ -12,9 +13,10 @@ import ( | |
| configinformersv1 "github.com/openshift/client-go/config/informers/externalversions/config/v1" | ||
| configlistersv1 "github.com/openshift/client-go/config/listers/config/v1" | ||
| machineclientset "github.com/openshift/client-go/machine/clientset/versioned" | ||
| "github.com/openshift/library-go/pkg/operator/configobserver/featuregates" | ||
| "github.com/openshift/library-go/pkg/operator/events" | ||
| "github.com/openshift/library-go/pkg/operator/resource/resourceapply" | ||
| admissionregistrationv1 "k8s.io/api/admissionregistration/v1" | ||
| "k8s.io/apimachinery/pkg/api/errors" | ||
| metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
| utilruntime "k8s.io/apimachinery/pkg/util/runtime" | ||
| "k8s.io/apimachinery/pkg/util/wait" | ||
|
|
@@ -39,6 +41,9 @@ const ( | |
| // 5ms, 10ms, 20ms, 40ms, 80ms, 160ms, 320ms, 640ms, 1.3s, 2.6s, 5.1s, 10.2s, 20.4s, 41s, 82s | ||
| maxRetries = 15 | ||
| maoOwnedAnnotation = "machine.openshift.io/owned" | ||
|
|
||
| releaseVersionEnvVariableName = "RELEASE_VERSION" | ||
| releaseVersionUnknownValue = "unknown" | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this the same value as the raw version string when it's not initialised?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I didn't understand how/when this is used tbh, I copied it from here: https://github.com/openshift/cluster-cloud-controller-manager-operator/blob/43facae32a53c73ad567129150db6865673bdf1d/pkg/controllers/status.go#L224
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok, I think it's fine to leave as that for now, lets see if anything breaks! |
||
| ) | ||
|
|
||
| // Operator defines machine api operator. | ||
|
|
@@ -53,6 +58,7 @@ type Operator struct { | |
| machineClient machineclientset.Interface | ||
| dynamicClient dynamic.Interface | ||
| eventRecorder record.EventRecorder | ||
| recorder events.Recorder | ||
|
|
||
| syncHandler func(ic string) (reconcile.Result, error) | ||
|
|
||
|
|
@@ -71,8 +77,7 @@ type Operator struct { | |
| mutatingWebhookLister admissionlisterv1.MutatingWebhookConfigurationLister | ||
| mutatingWebhookListerSynced cache.InformerSynced | ||
|
|
||
| featureGateLister configlistersv1.FeatureGateLister | ||
| featureGateCacheSynced cache.InformerSynced | ||
| featureGateAccessor featuregates.FeatureGateAccess | ||
|
|
||
| // queue only ever has one item, but it has nice error handling backoff/retry semantics | ||
| queue workqueue.RateLimitingInterface | ||
|
|
@@ -91,6 +96,7 @@ func New( | |
| deployInformer appsinformersv1.DeploymentInformer, | ||
| daemonsetInformer appsinformersv1.DaemonSetInformer, | ||
| featureGateInformer configinformersv1.FeatureGateInformer, | ||
| clusterVersionInformer configinformersv1.ClusterVersionInformer, | ||
| validatingWebhookInformer admissioninformersv1.ValidatingWebhookConfigurationInformer, | ||
| mutatingWebhookInformer admissioninformersv1.MutatingWebhookConfigurationInformer, | ||
| proxyInformer configinformersv1.ProxyInformer, | ||
|
|
@@ -99,13 +105,18 @@ func New( | |
| machineClient machineclientset.Interface, | ||
| dynamicClient dynamic.Interface, | ||
|
|
||
| recorder record.EventRecorder, | ||
| eventRecorder record.EventRecorder, | ||
| recorder events.Recorder, | ||
| ) (*Operator, error) { | ||
| // we must report the version from the release payload when we report available at that level | ||
| // TODO we will report the version of the operands (so our machine api implementation version) | ||
| operandVersions := []osconfigv1.OperandVersion{} | ||
| if releaseVersion := os.Getenv("RELEASE_VERSION"); len(releaseVersion) > 0 { | ||
| releaseVersion := os.Getenv(releaseVersionEnvVariableName) | ||
| if len(releaseVersion) > 0 { | ||
| operandVersions = append(operandVersions, osconfigv1.OperandVersion{Name: "operator", Version: releaseVersion}) | ||
| } else { | ||
| klog.Infof("%s environment variable is missing, defaulting to %q", releaseVersionEnvVariableName, releaseVersionUnknownValue) | ||
| releaseVersion = releaseVersionUnknownValue | ||
| } | ||
|
|
||
| optr := &Operator{ | ||
|
|
@@ -116,7 +127,8 @@ func New( | |
| osClient: osClient, | ||
| machineClient: machineClient, | ||
| dynamicClient: dynamicClient, | ||
| eventRecorder: recorder, | ||
| eventRecorder: eventRecorder, | ||
| recorder: recorder, | ||
| queue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "machineapioperator"), | ||
| operandVersions: operandVersions, | ||
| } | ||
|
|
@@ -137,10 +149,36 @@ func New( | |
| if err != nil { | ||
| return nil, fmt.Errorf("error adding event handler to mutatingwebhook informer: %v", err) | ||
| } | ||
| _, err = featureGateInformer.Informer().AddEventHandler(optr.eventHandler()) | ||
| if err != nil { | ||
| return nil, fmt.Errorf("error adding event handler to featuregates informer: %v", err) | ||
| } | ||
|
|
||
| desiredVersion := releaseVersion | ||
| missingVersion := "0.0.1-snapshot" | ||
| featureGateAccessor := featuregates.NewFeatureGateAccess( | ||
| desiredVersion, missingVersion, | ||
| clusterVersionInformer, featureGateInformer, | ||
| recorder, | ||
| ) | ||
| featureGateAccessor.SetChangeHandler(func(featureChange featuregates.FeatureChange) { | ||
| if featureChange.Previous == nil { | ||
| // When the initial featuregate is set, the previous version is nil. | ||
| // Nothing to do in this case, it's handled by the 1st sync, which only runs after the initial feature set was received. | ||
| return | ||
| } | ||
|
|
||
| klog.V(4).InfoS("FeatureGates changed", "enabled", featureChange.New.Enabled, "disabled", featureChange.New.Disabled) | ||
| prevDisableMHC := featuregates.NewFeatureGate(featureChange.Previous.Enabled, featureChange.Previous.Disabled). | ||
| Enabled(osconfigv1.FeatureGateMachineAPIOperatorDisableMachineHealthCheckController) | ||
| newDisableMHC := featuregates.NewFeatureGate(featureChange.New.Enabled, featureChange.New.Disabled). | ||
| Enabled(osconfigv1.FeatureGateMachineAPIOperatorDisableMachineHealthCheckController) | ||
|
|
||
| if prevDisableMHC != newDisableMHC { | ||
| klog.V(2).InfoS("Resync for modified feature gate", | ||
| "FeatureGateMachineAPIOperatorDisableMachineHealthCheckController enabled", newDisableMHC, | ||
| ) | ||
| workQueueKey := fmt.Sprintf("%s/%s", optr.namespace, optr.name) | ||
| optr.queue.Add(workQueueKey) | ||
| } | ||
| }) | ||
| optr.featureGateAccessor = featureGateAccessor | ||
|
|
||
| optr.config = config | ||
| optr.syncHandler = optr.sync | ||
|
|
@@ -160,9 +198,6 @@ func New( | |
| optr.mutatingWebhookLister = mutatingWebhookInformer.Lister() | ||
| optr.mutatingWebhookListerSynced = mutatingWebhookInformer.Informer().HasSynced | ||
|
|
||
| optr.featureGateLister = featureGateInformer.Lister() | ||
| optr.featureGateCacheSynced = featureGateInformer.Informer().HasSynced | ||
|
|
||
| return optr, nil | ||
| } | ||
|
|
||
|
|
@@ -179,12 +214,24 @@ func (optr *Operator) Run(workers int, stopCh <-chan struct{}) { | |
| optr.validatingWebhookListerSynced, | ||
| optr.deployListerSynced, | ||
| optr.daemonsetListerSynced, | ||
| optr.proxyListerSynced, | ||
| optr.featureGateCacheSynced) { | ||
| optr.proxyListerSynced) { | ||
| klog.Error("Failed to sync caches") | ||
| return | ||
| } | ||
| klog.Info("Synced up caches") | ||
|
|
||
| ctx, cancelFeatureGateAccessor := context.WithCancel(context.Background()) | ||
| defer cancelFeatureGateAccessor() | ||
| go optr.featureGateAccessor.Run(ctx) | ||
| klog.Info("Started feature gate accessor") | ||
| select { | ||
| case <-optr.featureGateAccessor.InitialFeatureGatesObserved(): | ||
| klog.V(4).Info("FeatureGates initialized") | ||
| case <-time.After(1 * time.Minute): | ||
| klog.Error(errors.New("timed out waiting for FeatureGate detection"), "unable to start operator") | ||
| return | ||
| } | ||
|
|
||
| for i := 0; i < workers; i++ { | ||
| go wait.Until(optr.worker, time.Second, stopCh) | ||
| } | ||
|
|
@@ -353,19 +400,6 @@ func (optr *Operator) sync(key string) (reconcile.Result, error) { | |
| return optr.syncAll(operatorConfig) | ||
| } | ||
|
|
||
| func getFeatureGate(lister configlistersv1.FeatureGateLister) (*osconfigv1.FeatureGate, error) { | ||
| featureGate, err := lister.Get("cluster") | ||
| if errors.IsNotFound(err) { | ||
| // No feature gate is set, therefore cannot be external. | ||
| // This is not an error as the feature gate is an optional resource. | ||
| return nil, nil | ||
| } else if err != nil { | ||
| return nil, fmt.Errorf("could not fetch featuregate: %v", err) | ||
| } | ||
|
|
||
| return featureGate, nil | ||
| } | ||
|
|
||
| func (optr *Operator) maoConfigFromInfrastructure() (*OperatorConfig, error) { | ||
| infra, err := optr.osClient.ConfigV1().Infrastructures().Get(context.Background(), "cluster", metav1.GetOptions{}) | ||
| if err != nil { | ||
|
|
@@ -382,12 +416,7 @@ func (optr *Operator) maoConfigFromInfrastructure() (*OperatorConfig, error) { | |
| return nil, err | ||
| } | ||
|
|
||
| featureGate, err := getFeatureGate(optr.featureGateLister) | ||
| if err != nil { | ||
| return nil, err | ||
| } | ||
|
|
||
| providerControllerImage, err := getProviderControllerFromImages(provider, featureGate, *images) | ||
| providerControllerImage, err := getProviderControllerFromImages(provider, *images) | ||
| if err != nil { | ||
| return nil, err | ||
| } | ||
|
|
@@ -412,14 +441,25 @@ func (optr *Operator) maoConfigFromInfrastructure() (*OperatorConfig, error) { | |
| return nil, err | ||
| } | ||
|
|
||
| // in case the MHC controller is disabled, leave its image empty | ||
| mhcImage := machineAPIOperatorImage | ||
| featureGates, err := optr.featureGateAccessor.CurrentFeatureGates() | ||
| if err != nil { | ||
| return nil, err | ||
| } | ||
| if featureGates.Enabled(osconfigv1.FeatureGateMachineAPIOperatorDisableMachineHealthCheckController) { | ||
| klog.V(2).Info("Disabling MHC controller") | ||
| mhcImage = "" | ||
| } | ||
|
|
||
| return &OperatorConfig{ | ||
| TargetNamespace: optr.namespace, | ||
| Proxy: clusterWideProxy, | ||
| Controllers: Controllers{ | ||
| Provider: providerControllerImage, | ||
| MachineSet: machineAPIOperatorImage, | ||
| NodeLink: machineAPIOperatorImage, | ||
| MachineHealthCheck: machineAPIOperatorImage, | ||
| MachineHealthCheck: mhcImage, | ||
| KubeRBACProxy: kubeRBACProxy, | ||
| TerminationHandler: terminationHandlerImage, | ||
| }, | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
needed for the getting the controller ref for the recorder for the feature gate accessor...