diff --git a/cmd/main.go b/cmd/main.go index 4372f5f4..1e6e414d 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -13,6 +13,7 @@ import ( _ "k8s.io/client-go/plugin/pkg/client/auth" cmacmev1 "github.com/cert-manager/cert-manager/pkg/apis/acme/v1" + cmv1 "github.com/cert-manager/cert-manager/pkg/apis/certmanager/v1" envoygatewayv1alpha1 "github.com/envoyproxy/gateway/api/v1alpha1" multiclusterproviders "go.miloapis.com/milo/pkg/multicluster-runtime" milomulticluster "go.miloapis.com/milo/pkg/multicluster-runtime/milo" @@ -65,6 +66,7 @@ func init() { utilruntime.Must(envoygatewayv1alpha1.AddToScheme(scheme)) utilruntime.Must(networkingv1alpha1.AddToScheme(scheme)) utilruntime.Must(cmacmev1.AddToScheme(scheme)) + utilruntime.Must(cmv1.AddToScheme(scheme)) utilruntime.Must(dnsv1alpha1.AddToScheme(scheme)) // +kubebuilder:scaffold:scheme } @@ -121,6 +123,8 @@ func main() { // TODO(jreese) validate the config cfg := ctrl.GetConfigOrDie() + cfg.QPS = 50 + cfg.Burst = 100 deploymentCluster, err := cluster.New(cfg, func(o *cluster.Options) { o.Scheme = scheme @@ -187,6 +191,8 @@ func main() { setupLog.Error(err, "unable to load control plane kubeconfig") os.Exit(1) } + downstreamRestConfig.QPS = 50 + downstreamRestConfig.Burst = 100 downstreamCluster, err := cluster.New(downstreamRestConfig, func(o *cluster.Options) { o.Scheme = scheme @@ -439,11 +445,15 @@ func initializeClusterDiscovery( if err != nil { return nil, nil, fmt.Errorf("unable to get discovery rest config: %w", err) } + discoveryRestConfig.QPS = 50 + discoveryRestConfig.Burst = 100 projectRestConfig, err := serverConfig.Discovery.ProjectRestConfig() if err != nil { return nil, nil, fmt.Errorf("unable to get project rest config: %w", err) } + projectRestConfig.QPS = 50 + projectRestConfig.Burst = 100 discoveryManager, err := manager.New(discoveryRestConfig, manager.Options{ Client: client.Options{ diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 82e382b1..c4cef16e 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -52,8 +52,12 @@ rules: resources: - certificates verbs: + - create + - delete - get - list + - patch + - update - watch - apiGroups: - coordination.k8s.io diff --git a/config/rbac_downstream/role.yaml b/config/rbac_downstream/role.yaml index a6de2cb8..c1880d33 100644 --- a/config/rbac_downstream/role.yaml +++ b/config/rbac_downstream/role.yaml @@ -139,8 +139,12 @@ rules: resources: - certificates verbs: + - create + - delete - get - list + - patch + - update - watch - apiGroups: - gateway.envoyproxy.io diff --git a/internal/config/config.go b/internal/config/config.go index b1473cf7..42acb6bb 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -486,17 +486,6 @@ type GatewayConfig struct { // issuer name, the operator will use the value as is. ClusterIssuerMap map[string]string `json:"clusterIssuerMap,omitempty"` - // PerGatewayCertificateIssuer will result in the operator to expect a - // cert-manager Issuer to exist with the same name as the gateway. Any value - // provided for the "gateway.networking.datumapis.com/certificate-issuer" - // option will be replaced with the gateway's name. The Issuer resources will - // be managed by Kyverno policies, and not by this operator. - // - // TODO(jreese) Remove this once we've either implemented DNS validation, - // found a path to attach cert-manager generated routes to the gateway they're - // needed for, or implement our own ACME integration. - PerGatewayCertificateIssuer bool `json:"perGatewayCertificateIssuer,omitempty"` - // ListenerTLSOptions specifies the TLS options to program on generated // TLS listeners. // +default={"gateway.networking.datumapis.com/certificate-issuer": "auto"} @@ -559,6 +548,15 @@ type GatewayConfig struct { // Defaults to false. EnableDNSIntegration bool `json:"enableDNSIntegration,omitempty"` + // DefaultListenerTLSSecretName, if provided, is the name of a + // pre-provisioned TLS certificate secret to use for the default HTTPS + // listener (named "default-https"). When set, this listener references + // the shared secret instead of requesting an individual certificate + // via cert-manager. + // + // The secret must exist in every downstream gateway namespace. + DefaultListenerTLSSecretName string `json:"defaultListenerTLSSecretName,omitempty"` + // MaxConcurrentReconciles is the maximum number of concurrent gateway // reconciliations. Higher values allow the controller to process gateways // across multiple projects in parallel. @@ -567,6 +565,12 @@ type GatewayConfig struct { MaxConcurrentReconciles int `json:"maxConcurrentReconciles,omitempty"` } +// HasDefaultListenerTLSSecret returns true when a shared TLS certificate +// secret has been configured for default HTTPS listeners. +func (c *GatewayConfig) HasDefaultListenerTLSSecret() bool { + return c.DefaultListenerTLSSecretName != "" +} + // ShouldDeleteErroredChallenges returns whether the operator should automatically // delete errored ACME challenges. Defaults to true if not explicitly set. func (c *GatewayConfig) ShouldDeleteErroredChallenges() bool { diff --git a/internal/controller/challenge_controller.go b/internal/controller/challenge_controller.go index c1fdfd7c..a6d77255 100644 --- a/internal/controller/challenge_controller.go +++ b/internal/controller/challenge_controller.go @@ -81,13 +81,9 @@ func (r *ChallengeReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( } // isGatewayRelatedIssuer checks if the given issuer reference is for a Gateway-managed -// certificate issuer. This includes: -// - ClusterIssuers that are mapped in the ClusterIssuerMap configuration -// - Issuers (namespace-scoped) when PerGatewayCertificateIssuer mode is enabled +// certificate issuer (ClusterIssuers that are mapped in the ClusterIssuerMap configuration). func (r *ChallengeReconciler) isGatewayRelatedIssuer(ref cmmeta.ObjectReference) bool { - // Check if ClusterIssuer is in the configured map if ref.Kind == "ClusterIssuer" || ref.Kind == "" { - // Default kind is ClusterIssuer for cert-manager for _, mappedIssuer := range r.Config.Gateway.ClusterIssuerMap { if mappedIssuer == ref.Name { return true @@ -95,11 +91,6 @@ func (r *ChallengeReconciler) isGatewayRelatedIssuer(ref cmmeta.ObjectReference) } } - // In per-gateway mode, any namespace-scoped Issuer is gateway-related - if r.Config.Gateway.PerGatewayCertificateIssuer && ref.Kind == "Issuer" { - return true - } - return false } diff --git a/internal/controller/challenge_controller_test.go b/internal/controller/challenge_controller_test.go index 7464c800..ee69665b 100644 --- a/internal/controller/challenge_controller_test.go +++ b/internal/controller/challenge_controller_test.go @@ -75,7 +75,7 @@ func TestChallengeReconciler(t *testing.T) { expectDeleted: true, }, { - name: "errored challenge with Issuer in per-gateway mode is deleted", + name: "errored challenge with namespace-scoped Issuer is not deleted", challenge: &cmacmev1.Challenge{ ObjectMeta: metav1.ObjectMeta{ Namespace: upstreamNamespace.Name, @@ -95,11 +95,9 @@ func TestChallengeReconciler(t *testing.T) { }, }, config: config.NetworkServicesOperator{ - Gateway: config.GatewayConfig{ - PerGatewayCertificateIssuer: true, - }, + Gateway: config.GatewayConfig{}, }, - expectDeleted: true, + expectDeleted: false, }, { name: "errored challenge with non-gateway ClusterIssuer is not deleted", @@ -258,7 +256,6 @@ func TestChallengeReconciler(t *testing.T) { }, config: config.NetworkServicesOperator{ Gateway: config.GatewayConfig{ - PerGatewayCertificateIssuer: false, ClusterIssuerMap: map[string]string{ "letsencrypt": "letsencrypt-prod", }, @@ -368,25 +365,12 @@ func TestIsGatewayRelatedIssuer(t *testing.T) { expected: true, }, { - name: "Issuer in per-gateway mode returns true", - ref: cmmeta.ObjectReference{ - Kind: "Issuer", - Name: "gateway-issuer", - }, - config: config.GatewayConfig{ - PerGatewayCertificateIssuer: true, - }, - expected: true, - }, - { - name: "Issuer not in per-gateway mode returns false", + name: "namespace-scoped Issuer returns false", ref: cmmeta.ObjectReference{ Kind: "Issuer", Name: "gateway-issuer", }, - config: config.GatewayConfig{ - PerGatewayCertificateIssuer: false, - }, + config: config.GatewayConfig{}, expected: false, }, { diff --git a/internal/controller/gateway_controller.go b/internal/controller/gateway_controller.go index 17a3619f..0735e74f 100644 --- a/internal/controller/gateway_controller.go +++ b/internal/controller/gateway_controller.go @@ -5,11 +5,12 @@ package controller import ( "context" "fmt" - "maps" "slices" "strings" "time" + cmv1 "github.com/cert-manager/cert-manager/pkg/apis/certmanager/v1" + cmmeta "github.com/cert-manager/cert-manager/pkg/apis/meta/v1" envoygatewayv1alpha1 "github.com/envoyproxy/gateway/api/v1alpha1" corev1 "k8s.io/api/core/v1" discoveryv1 "k8s.io/api/discovery/v1" @@ -81,6 +82,8 @@ type GatewayReconciler struct { // +kubebuilder:rbac:groups=gateway.networking.k8s.io,resources=backendtlspolicies/status,verbs=get;update;patch // +kubebuilder:rbac:groups=gateway.networking.k8s.io,resources=backendtlspolicies/finalizers,verbs=update +// +kubebuilder:rbac:groups=cert-manager.io,resources=certificates,verbs=get;list;watch;create;update;patch;delete + // +kubebuilder:rbac:groups=externaldns.k8s.io,resources=dnsendpoints,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=externaldns.k8s.io,resources=dnsendpoints/status,verbs=get;update;patch // +kubebuilder:rbac:groups=externaldns.k8s.io,resources=dnsendpoints/finalizers,verbs=update @@ -89,16 +92,23 @@ func (r *GatewayReconciler) Reconcile(ctx context.Context, req mcreconcile.Reque logger := log.FromContext(ctx, "cluster", req.ClusterName, "namespace", req.Namespace, "name", req.Name) ctx = log.IntoContext(ctx, logger) + logger.Info("gateway reconcile dequeued") + cl, err := r.mgr.GetCluster(ctx, req.ClusterName) if err != nil { + logger.Error(err, "failed to get cluster") return ctrl.Result{}, err } + logger.Info("got cluster, fetching gateway") + var gateway gatewayv1.Gateway if err := cl.GetClient().Get(ctx, req.NamespacedName, &gateway); err != nil { if apierrors.IsNotFound(err) { + logger.Info("gateway not found, skipping") return ctrl.Result{}, nil } + logger.Error(err, "failed to get gateway") return ctrl.Result{}, err } @@ -106,12 +116,15 @@ func (r *GatewayReconciler) Reconcile(ctx context.Context, req mcreconcile.Reque var upstreamGatewayClass gatewayv1.GatewayClass if err := cl.GetClient().Get(ctx, types.NamespacedName{Name: string(gateway.Spec.GatewayClassName)}, &upstreamGatewayClass); err != nil { if apierrors.IsNotFound(err) { + logger.Info("gateway class not found, skipping") return ctrl.Result{}, nil } + logger.Error(err, "failed to get gateway class") return ctrl.Result{}, err } if upstreamGatewayClass.Spec.ControllerName != r.Config.Gateway.ControllerName { + logger.Info("gateway class controller name mismatch, skipping", "expected", r.Config.Gateway.ControllerName, "actual", upstreamGatewayClass.Spec.ControllerName) return ctrl.Result{}, nil } @@ -133,7 +146,9 @@ func (r *GatewayReconciler) Reconcile(ctx context.Context, req mcreconcile.Reque } if r.prepareUpstreamGateway(&gateway) { + logger.Info("preparing upstream gateway (adding finalizer/defaults)") if err := cl.GetClient().Update(ctx, &gateway); err != nil { + logger.Error(err, "failed preparing upstream gateway") return ctrl.Result{}, fmt.Errorf("failed preparing upstream gateway: %w", err) } @@ -252,7 +267,6 @@ func (r *GatewayReconciler) ensureDownstreamGateway( desiredDownstreamGateway := r.getDesiredDownstreamGateway( ctx, - upstreamClusterName, upstreamGateway, claimedHostnames, ) @@ -271,13 +285,9 @@ func (r *GatewayReconciler) ensureDownstreamGateway( return result, nil } } else { - // Sync cert-manager annotations: add desired, remove stale - syncCertManagerAnnotations(downstreamGateway, desiredDownstreamGateway) - if !equality.Semantic.DeepEqual(downstreamGateway.Annotations, desiredDownstreamGateway.Annotations) || !equality.Semantic.DeepEqual(downstreamGateway.Spec, desiredDownstreamGateway.Spec) { - // Take care not to clobber other annotations - maps.Copy(downstreamGateway.Annotations, desiredDownstreamGateway.Annotations) + downstreamGateway.Annotations = desiredDownstreamGateway.Annotations downstreamGateway.Spec = desiredDownstreamGateway.Spec if err := downstreamClient.Update(ctx, downstreamGateway); err != nil { result.Err = fmt.Errorf("failed updating downstream gateway: %w", err) @@ -286,6 +296,18 @@ func (r *GatewayReconciler) ensureDownstreamGateway( } } + certResult := r.ensureListenerCertificates( + ctx, + upstreamGateway, + downstreamGateway, + downstreamClient, + downstreamStrategy, + claimedHostnames, + ) + if certResult.ShouldReturn() { + return certResult.Merge(result), nil + } + dnsResult := r.ensureDownstreamGatewayDNSEndpoints( ctx, downstreamGateway, @@ -352,7 +374,6 @@ func (r *GatewayReconciler) ensureDownstreamGateway( func (r *GatewayReconciler) getDesiredDownstreamGateway( ctx context.Context, - upstreamClusterName string, upstreamGateway *gatewayv1.Gateway, claimedHostnames []string, ) *gatewayv1.Gateway { @@ -361,63 +382,55 @@ func (r *GatewayReconciler) getDesiredDownstreamGateway( var listeners []gatewayv1.Listener + wildcardSuffix := "." + r.Config.Gateway.TargetDomain + for listenerIndex, l := range upstreamGateway.Spec.Listeners { - // Only process listeners with verified custom hostnames; unclaimed - // hostnames must not influence cert-manager annotations or downstream - // listener configuration. if l.Hostname != nil && !slices.Contains(claimedHostnames, string(*l.Hostname)) { logger.Info("skipping downstream gateway listener with unclaimed hostname", "upstream_listener_index", listenerIndex, "hostname", *l.Hostname) continue } - if l.TLS != nil && l.TLS.Options[certificateIssuerTLSOption] != "" { - if r.Config.Gateway.PerGatewayCertificateIssuer { - if !metav1.HasAnnotation(downstreamGateway.ObjectMeta, "cert-manager.io/issuer") { - metav1.SetMetaDataAnnotation(&downstreamGateway.ObjectMeta, "cert-manager.io/issuer", upstreamGateway.Name) - } - } else { - clusterIssuerName := string(l.TLS.Options[certificateIssuerTLSOption]) - if r.Config.Gateway.ClusterIssuerMap[clusterIssuerName] != "" { - clusterIssuerName = r.Config.Gateway.ClusterIssuerMap[clusterIssuerName] - } - if !metav1.HasAnnotation(downstreamGateway.ObjectMeta, "cert-manager.io/cluster-issuer") { - metav1.SetMetaDataAnnotation(&downstreamGateway.ObjectMeta, "cert-manager.io/cluster-issuer", clusterIssuerName) - } - - // Add labels so that secrets created by cert-manager can be propagated - // See: https://cert-manager.io/docs/reference/annotations/#cert-manageriosecret-template - if !metav1.HasAnnotation(downstreamGateway.ObjectMeta, "cert-manager.io/secret-template") { - metav1.SetMetaDataAnnotation( - &downstreamGateway.ObjectMeta, - "cert-manager.io/secret-template", - fmt.Sprintf( - `{"labels": {"%s": "%s"}}`, - downstreamclient.UpstreamOwnerClusterNameLabel, - fmt.Sprintf("cluster-%s", strings.ReplaceAll(upstreamClusterName, "/", "_")), - ), - ) - } - } + // Per-listener TLS decision: hostnames covered by the wildcard + // (*.targetDomain) reference the pre-provisioned shared secret; + // all others reference a per-listener secret populated by a + // Certificate resource created in ensureListenerCertificates. + hostnameUnderWildcard := false + if l.Hostname != nil { + h := string(*l.Hostname) + hostnameUnderWildcard = strings.HasSuffix(h, wildcardSuffix) || h == r.Config.Gateway.TargetDomain } + useSharedTLS := hostnameUnderWildcard && r.Config.Gateway.HasDefaultListenerTLSSecret() if l.Hostname != nil { listenerCopy := l.DeepCopy() if l.TLS != nil && l.TLS.Options[certificateIssuerTLSOption] != "" { - // Translate upstream TLS settings to downstream TLS settings delete(listenerCopy.TLS.Options, certificateIssuerTLSOption) tlsMode := gatewayv1.TLSModeTerminate - listenerCopy.TLS = &gatewayv1.GatewayTLSConfig{ - Mode: &tlsMode, - // TODO(jreese) investigate secret deletion when Cert (gateway) is deleted - // See: https://cert-manager.io/docs/usage/certificate/#cleaning-up-secrets-when-certificates-are-deleted - CertificateRefs: []gatewayv1.SecretObjectReference{ - { - Group: ptr.To(gatewayv1.Group("")), - Kind: ptr.To(gatewayv1.Kind("Secret")), - Name: gatewayv1.ObjectName(resourcename.GetValidDNS1123Name(fmt.Sprintf("%s-%s", upstreamGateway.Name, l.Name))), + if useSharedTLS { + listenerCopy.TLS = &gatewayv1.GatewayTLSConfig{ + Mode: &tlsMode, + CertificateRefs: []gatewayv1.SecretObjectReference{ + { + Group: ptr.To(gatewayv1.Group("")), + Kind: ptr.To(gatewayv1.Kind("Secret")), + Name: gatewayv1.ObjectName(r.Config.Gateway.DefaultListenerTLSSecretName), + }, }, - }, + } + } else { + // Secret name must match the Certificate created by + // ensureListenerCertificates for this listener. + listenerCopy.TLS = &gatewayv1.GatewayTLSConfig{ + Mode: &tlsMode, + CertificateRefs: []gatewayv1.SecretObjectReference{ + { + Group: ptr.To(gatewayv1.Group("")), + Kind: ptr.To(gatewayv1.Kind("Secret")), + Name: gatewayv1.ObjectName(listenerCertificateSecretName(upstreamGateway.Name, l.Name)), + }, + }, + } } } @@ -433,26 +446,150 @@ func (r *GatewayReconciler) getDesiredDownstreamGateway( return &downstreamGateway } -// certManagerAnnotationKeys are the annotations managed by the gateway controller -// for cert-manager integration. These are synced from the desired state and stale -// entries are removed when custom hostnames become unclaimed. -var certManagerAnnotationKeys = []string{ - "cert-manager.io/issuer", - "cert-manager.io/cluster-issuer", - "cert-manager.io/secret-template", +// listenerCertificateSecretName returns the deterministic Secret name that a +// Certificate resource will populate for a given gateway listener. +func listenerCertificateSecretName(gatewayName string, listenerName gatewayv1.SectionName) string { + return resourcename.GetValidDNS1123Name(fmt.Sprintf("%s-%s", gatewayName, listenerName)) +} + +// listenerCertificateName returns the deterministic Certificate resource name +// for a given gateway listener. +func listenerCertificateName(gatewayName string, listenerName gatewayv1.SectionName) string { + return resourcename.GetValidDNS1123Name(fmt.Sprintf("%s-%s", gatewayName, listenerName)) } -// syncCertManagerAnnotations ensures the downstream gateway's cert-manager -// annotations match the desired state: desired keys are set, and keys that -// are no longer needed are removed. -func syncCertManagerAnnotations(downstream, desired *gatewayv1.Gateway) { - for _, key := range certManagerAnnotationKeys { - if v, ok := desired.Annotations[key]; ok { - metav1.SetMetaDataAnnotation(&downstream.ObjectMeta, key, v) - } else { - delete(downstream.Annotations, key) +// ensureListenerCertificates creates, updates, or deletes cert-manager +// Certificate resources for each listener that requires an individual TLS +// certificate. Listeners whose hostnames fall under the wildcard target domain +// use the shared TLS secret and do not get a Certificate. +func (r *GatewayReconciler) ensureListenerCertificates( + ctx context.Context, + upstreamGateway *gatewayv1.Gateway, + downstreamGateway *gatewayv1.Gateway, + downstreamClient client.Client, + downstreamStrategy downstreamclient.ResourceStrategy, + claimedHostnames []string, +) (result Result) { + logger := log.FromContext(ctx) + wildcardSuffix := "." + r.Config.Gateway.TargetDomain + + desiredCerts := make(map[string]bool) + + // Only create Certificates for listeners with custom hostnames outside the + // wildcard scope. Wildcard-covered listeners use the shared TLS secret and + // don't need individual Certificates. + for _, l := range upstreamGateway.Spec.Listeners { + if l.TLS == nil || l.TLS.Options[certificateIssuerTLSOption] == "" || l.Hostname == nil { + continue + } + hostname := string(*l.Hostname) + if !slices.Contains(claimedHostnames, hostname) { + continue + } + // Skip hostnames covered by the wildcard — they use the shared secret. + if strings.HasSuffix(hostname, wildcardSuffix) || hostname == r.Config.Gateway.TargetDomain { + continue + } + + clusterIssuerName := string(l.TLS.Options[certificateIssuerTLSOption]) + if mapped := r.Config.Gateway.ClusterIssuerMap[clusterIssuerName]; mapped != "" { + clusterIssuerName = mapped + } + + certName := listenerCertificateName(upstreamGateway.Name, l.Name) + secretName := listenerCertificateSecretName(upstreamGateway.Name, l.Name) + desiredCerts[certName] = true + + cert := &cmv1.Certificate{ + ObjectMeta: metav1.ObjectMeta{ + Name: certName, + Namespace: downstreamGateway.Namespace, + }, + } + + if err := downstreamClient.Get(ctx, client.ObjectKeyFromObject(cert), cert); client.IgnoreNotFound(err) != nil { + result.Err = fmt.Errorf("failed to get Certificate %s: %w", certName, err) + return result + } + + isNew := cert.CreationTimestamp.IsZero() + if isNew { + if err := downstreamStrategy.SetControllerReference(ctx, upstreamGateway, cert); err != nil { + result.Err = fmt.Errorf("failed to set controller reference on Certificate %s: %w", certName, err) + return result + } + } + + desiredSpec := cmv1.CertificateSpec{ + SecretName: secretName, + SecretTemplate: &cmv1.CertificateSecretTemplate{ + Labels: map[string]string{ + downstreamclient.UpstreamOwnerClusterNameLabel: cert.Labels[downstreamclient.UpstreamOwnerClusterNameLabel], + }, + }, + DNSNames: []string{hostname}, + IssuerRef: cmmeta.ObjectReference{ + Name: clusterIssuerName, + Kind: "ClusterIssuer", + }, + } + + var opResult string + var err error + if isNew { + cert.Spec = desiredSpec + err = downstreamClient.Create(ctx, cert) + opResult = "created" + } else if !equality.Semantic.DeepEqual(cert.Spec, desiredSpec) { + cert.Spec = desiredSpec + err = downstreamClient.Update(ctx, cert) + opResult = "updated" + } + if err != nil { + result.Err = fmt.Errorf("failed to ensure Certificate %s: %w", certName, err) + return result + } + if opResult != "" { + logger.Info("Certificate reconciled", "certificate", certName, "operation", opResult) } } + + // Clean up Certificate resources for listeners that no longer need them. + // Two ownership patterns are checked: + // 1. Certificates created by NSO via the downstream strategy (upstream-owner labels) + // 2. Legacy Certificates created by cert-manager's gateway-shim (controller ownerRef + // to the downstream gateway, no upstream-owner labels) + var certList cmv1.CertificateList + if err := downstreamClient.List(ctx, &certList, + client.InNamespace(downstreamGateway.Namespace), + ); err != nil { + result.Err = fmt.Errorf("failed to list Certificates: %w", err) + return result + } + + for i := range certList.Items { + cert := &certList.Items[i] + if desiredCerts[cert.Name] { + continue + } + + ownedByStrategy := cert.Labels[downstreamclient.UpstreamOwnerKindLabel] == KindGateway && + cert.Labels[downstreamclient.UpstreamOwnerNameLabel] == upstreamGateway.Name && + cert.Labels[downstreamclient.UpstreamOwnerNamespaceLabel] == upstreamGateway.Namespace + ownedByGatewayShim := metav1.IsControlledBy(cert, downstreamGateway) + + if !ownedByStrategy && !ownedByGatewayShim { + continue + } + + logger.Info("deleting stale Certificate", "certificate", cert.Name) + if err := downstreamClient.Delete(ctx, cert); client.IgnoreNotFound(err) != nil { + result.Err = fmt.Errorf("failed to delete stale Certificate %s: %w", cert.Name, err) + return result + } + } + + return result } func (r *GatewayReconciler) reconcileGatewayStatus( @@ -1270,7 +1407,28 @@ func (r *GatewayReconciler) ensureDownstreamHTTPRoute( resourceResult, err := controllerutil.CreateOrUpdate(ctx, downstreamClient, resource, func() error { switch obj := resource.(type) { case *corev1.Service: - obj.Spec = desiredDownstreamResource.(*corev1.Service).Spec + desired := desiredDownstreamResource.(*corev1.Service) + obj.Spec.Type = desired.Spec.Type + obj.Spec.ClusterIP = desired.Spec.ClusterIP + obj.Spec.InternalTrafficPolicy = desired.Spec.InternalTrafficPolicy + obj.Spec.TrafficDistribution = desired.Spec.TrafficDistribution + + // Merge ports by name rather than overwriting the slice, so + // server-defaulted fields like TargetPort are preserved. + for _, dp := range desired.Spec.Ports { + found := false + for i, ep := range obj.Spec.Ports { + if ep.Name == dp.Name { + obj.Spec.Ports[i].Port = dp.Port + obj.Spec.Ports[i].Protocol = dp.Protocol + found = true + break + } + } + if !found { + obj.Spec.Ports = append(obj.Spec.Ports, dp) + } + } case *discoveryv1.EndpointSlice: desiredEndpointSlice := desiredDownstreamResource.(*discoveryv1.EndpointSlice) // Since endpointslices get duplicated for routes, add them as a controller @@ -1610,6 +1768,13 @@ func (r *GatewayReconciler) SetupWithManager(mgr mcmanager.Manager) error { downstreamHTTPRouteClusterSource, _ := downstreamHTTPRouteSource.ForCluster("", r.DownstreamCluster) + downstreamCertificateSource := mcsource.TypedKind( + &cmv1.Certificate{}, + downstreamclient.TypedEnqueueRequestForUpstreamOwner[*cmv1.Certificate](&gatewayv1.Gateway{}), + ) + + downstreamCertificateClusterSource, _ := downstreamCertificateSource.ForCluster("", r.DownstreamCluster) + builder := mcbuilder.ControllerManagedBy(mgr). For(&gatewayv1.Gateway{}). Watches( @@ -1629,7 +1794,8 @@ func (r *GatewayReconciler) SetupWithManager(mgr mcmanager.Manager) error { r.listGatewaysForHTTPRouteFilterFunc, ). WatchesRawSource(downstreamGatewayClusterSource). - WatchesRawSource(downstreamHTTPRouteClusterSource) + WatchesRawSource(downstreamHTTPRouteClusterSource). + WatchesRawSource(downstreamCertificateClusterSource) if r.Config.Gateway.EnableDNSIntegration { builder = builder. diff --git a/internal/controller/gateway_controller_test.go b/internal/controller/gateway_controller_test.go index 1e653acf..18072480 100644 --- a/internal/controller/gateway_controller_test.go +++ b/internal/controller/gateway_controller_test.go @@ -6,17 +6,21 @@ import ( "slices" "testing" + cmv1 "github.com/cert-manager/cert-manager/pkg/apis/certmanager/v1" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" corev1 "k8s.io/api/core/v1" discoveryv1 "k8s.io/api/discovery/v1" apimeta "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/apimachinery/pkg/util/uuid" "k8s.io/client-go/kubernetes/scheme" "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/log/zap" gatewayv1 "sigs.k8s.io/gateway-api/apis/v1" @@ -33,6 +37,7 @@ func TestEnsureDownstreamGateway(t *testing.T) { assert.NoError(t, gatewayv1.Install(testScheme)) assert.NoError(t, discoveryv1.AddToScheme(testScheme)) assert.NoError(t, networkingv1alpha.AddToScheme(testScheme)) + assert.NoError(t, cmv1.AddToScheme(testScheme)) upstreamNamespace := &corev1.Namespace{ ObjectMeta: metav1.ObjectMeta{ @@ -238,6 +243,315 @@ func TestEnsureDownstreamGateway(t *testing.T) { } } +func TestEnsureDownstreamGatewayWildcardCert(t *testing.T) { + testScheme := runtime.NewScheme() + assert.NoError(t, scheme.AddToScheme(testScheme)) + assert.NoError(t, gatewayv1.Install(testScheme)) + assert.NoError(t, discoveryv1.AddToScheme(testScheme)) + assert.NoError(t, networkingv1alpha.AddToScheme(testScheme)) + assert.NoError(t, cmv1.AddToScheme(testScheme)) + + upstreamNamespace := &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + UID: uuid.NewUUID(), + }, + } + + defaultTLSSecretName := "wildcard-test-suite-tls" + + baseConfig := config.GatewayConfig{ + DownstreamGatewayClassName: "test-suite", + DownstreamHostnameAccountingNamespace: "default", + TargetDomain: "test-suite.com", + IPFamilies: []networkingv1alpha.IPFamily{ + networkingv1alpha.IPv4Protocol, + networkingv1alpha.IPv6Protocol, + }, + ListenerTLSOptions: map[gatewayv1.AnnotationKey]gatewayv1.AnnotationValue{ + gatewayv1.AnnotationKey(certificateIssuerTLSOption): "test-issuer", + }, + } + + tests := []struct { + name string + defaultTLSSecretName string + upstreamGateway *gatewayv1.Gateway + existingUpstreamObjects []client.Object + existingDownstreamObjects []client.Object + assert func(t *testing.T, upstreamGateway, downstreamGateway *gatewayv1.Gateway) + }{ + { + name: "default https listener uses shared TLS secret", + defaultTLSSecretName: defaultTLSSecretName, + upstreamGateway: newGateway(config.NetworkServicesOperator{Gateway: baseConfig}, upstreamNamespace.Name, "test-gw", func(g *gatewayv1.Gateway) { + }), + assert: func(t *testing.T, upstreamGateway, downstreamGateway *gatewayv1.Gateway) { + httpsListener := gatewayutil.GetListenerByName( + downstreamGateway.Spec.Listeners, + gatewayutil.DefaultHTTPSListenerName, + ) + if assert.NotNil(t, httpsListener, "default HTTPS listener should exist on downstream gateway") { + if assert.NotNil(t, httpsListener.TLS, "TLS config should be set") { + if assert.Len(t, httpsListener.TLS.CertificateRefs, 1) { + assert.Equal(t, + gatewayv1.ObjectName(defaultTLSSecretName), + httpsListener.TLS.CertificateRefs[0].Name, + "default listener should reference the shared TLS secret", + ) + } + } + } + + assert.Empty(t, downstreamGateway.Annotations["cert-manager.io/cluster-issuer"], + "cert-manager annotation should not be set; Certificates are created directly", + ) + }, + }, + { + name: "custom hostname gets individual cert with shared TLS secret enabled", + defaultTLSSecretName: defaultTLSSecretName, + upstreamGateway: newGateway(config.NetworkServicesOperator{Gateway: baseConfig}, upstreamNamespace.Name, "test-gw", func(g *gatewayv1.Gateway) { + g.Spec.Listeners = append(g.Spec.Listeners, + gatewayv1.Listener{ + Name: "https-hostname-0", + Protocol: gatewayv1.HTTPSProtocolType, + Port: DefaultHTTPSPort, + Hostname: ptr.To(gatewayv1.Hostname("custom.example.com")), + AllowedRoutes: &gatewayv1.AllowedRoutes{ + Namespaces: &gatewayv1.RouteNamespaces{ + From: ptr.To(gatewayv1.NamespacesFromSame), + }, + }, + TLS: &gatewayv1.GatewayTLSConfig{ + Mode: ptr.To(gatewayv1.TLSModeTerminate), + Options: map[gatewayv1.AnnotationKey]gatewayv1.AnnotationValue{ + gatewayv1.AnnotationKey(certificateIssuerTLSOption): "test-issuer", + }, + }, + }, + ) + }), + existingUpstreamObjects: []client.Object{ + newDomain(upstreamNamespace.Name, "custom.example.com", func(d *networkingv1alpha.Domain) { + d.Spec.DomainName = "custom.example.com" + apimeta.SetStatusCondition(&d.Status.Conditions, metav1.Condition{ + Type: networkingv1alpha.DomainConditionVerified, + Status: metav1.ConditionTrue, + }) + }), + }, + assert: func(t *testing.T, upstreamGateway, downstreamGateway *gatewayv1.Gateway) { + httpsListener := gatewayutil.GetListenerByName( + downstreamGateway.Spec.Listeners, + gatewayutil.DefaultHTTPSListenerName, + ) + if assert.NotNil(t, httpsListener, "default HTTPS listener should exist") { + if assert.NotNil(t, httpsListener.TLS) { + if assert.Len(t, httpsListener.TLS.CertificateRefs, 1) { + assert.Equal(t, + gatewayv1.ObjectName(defaultTLSSecretName), + httpsListener.TLS.CertificateRefs[0].Name, + "default listener under wildcard should still use shared TLS even with custom listeners", + ) + } + } + } + + customListener := gatewayutil.GetListenerByName( + downstreamGateway.Spec.Listeners, + "https-hostname-0", + ) + if assert.NotNil(t, customListener, "custom HTTPS listener should exist") { + if assert.NotNil(t, customListener.TLS) { + if assert.Len(t, customListener.TLS.CertificateRefs, 1) { + assert.Equal(t, + gatewayv1.ObjectName("test-gw-https-hostname-0"), + customListener.TLS.CertificateRefs[0].Name, + "custom listener should reference a per-listener cert secret", + ) + } + } + } + + assert.Empty(t, downstreamGateway.Annotations["cert-manager.io/cluster-issuer"], + "cert-manager annotation should not be set; Certificates are created directly", + ) + }, + }, + { + name: "subdomain of target domain uses shared wildcard cert", + defaultTLSSecretName: defaultTLSSecretName, + upstreamGateway: newGateway(config.NetworkServicesOperator{Gateway: baseConfig}, upstreamNamespace.Name, "test-gw", func(g *gatewayv1.Gateway) { + g.Spec.Listeners = append(g.Spec.Listeners, + gatewayv1.Listener{ + Name: "https-hostname-0", + Protocol: gatewayv1.HTTPSProtocolType, + Port: DefaultHTTPSPort, + Hostname: ptr.To(gatewayv1.Hostname("canary.test-suite.com")), + AllowedRoutes: &gatewayv1.AllowedRoutes{ + Namespaces: &gatewayv1.RouteNamespaces{ + From: ptr.To(gatewayv1.NamespacesFromSame), + }, + }, + TLS: &gatewayv1.GatewayTLSConfig{ + Mode: ptr.To(gatewayv1.TLSModeTerminate), + Options: map[gatewayv1.AnnotationKey]gatewayv1.AnnotationValue{ + gatewayv1.AnnotationKey(certificateIssuerTLSOption): "test-issuer", + }, + }, + }, + ) + }), + existingUpstreamObjects: []client.Object{ + newDomain(upstreamNamespace.Name, "canary.test-suite.com", func(d *networkingv1alpha.Domain) { + d.Spec.DomainName = "canary.test-suite.com" + apimeta.SetStatusCondition(&d.Status.Conditions, metav1.Condition{ + Type: networkingv1alpha.DomainConditionVerified, + Status: metav1.ConditionTrue, + }) + }), + }, + assert: func(t *testing.T, upstreamGateway, downstreamGateway *gatewayv1.Gateway) { + // Both default and subdomain listeners are covered by the wildcard, + // so both should reference the shared TLS secret. + httpsListener := gatewayutil.GetListenerByName( + downstreamGateway.Spec.Listeners, + gatewayutil.DefaultHTTPSListenerName, + ) + if assert.NotNil(t, httpsListener, "default HTTPS listener should exist") { + if assert.NotNil(t, httpsListener.TLS) { + if assert.Len(t, httpsListener.TLS.CertificateRefs, 1) { + assert.Equal(t, + gatewayv1.ObjectName(defaultTLSSecretName), + httpsListener.TLS.CertificateRefs[0].Name, + "default listener should use shared wildcard cert", + ) + } + } + } + + customListener := gatewayutil.GetListenerByName( + downstreamGateway.Spec.Listeners, + "https-hostname-0", + ) + if assert.NotNil(t, customListener, "subdomain listener should exist") { + if assert.NotNil(t, customListener.TLS) { + if assert.Len(t, customListener.TLS.CertificateRefs, 1) { + assert.Equal(t, + gatewayv1.ObjectName(defaultTLSSecretName), + customListener.TLS.CertificateRefs[0].Name, + "subdomain listener covered by wildcard should use shared cert", + ) + } + } + } + + assert.Empty(t, downstreamGateway.Annotations["cert-manager.io/cluster-issuer"], + "cert-manager annotation should not be set; Certificates are created directly", + ) + }, + }, + { + name: "without shared TLS secret all listeners get individual certs", + defaultTLSSecretName: "", + upstreamGateway: newGateway(config.NetworkServicesOperator{Gateway: baseConfig}, upstreamNamespace.Name, "test-gw", func(g *gatewayv1.Gateway) { + }), + assert: func(t *testing.T, upstreamGateway, downstreamGateway *gatewayv1.Gateway) { + httpsListener := gatewayutil.GetListenerByName( + downstreamGateway.Spec.Listeners, + gatewayutil.DefaultHTTPSListenerName, + ) + if assert.NotNil(t, httpsListener, "default HTTPS listener should exist") { + if assert.NotNil(t, httpsListener.TLS) { + if assert.Len(t, httpsListener.TLS.CertificateRefs, 1) { + assert.Equal(t, + gatewayv1.ObjectName("test-gw-default-https"), + httpsListener.TLS.CertificateRefs[0].Name, + "without wildcard config, default listener should get a per-listener cert", + ) + } + } + } + + assert.Empty(t, downstreamGateway.Annotations["cert-manager.io/cluster-issuer"], + "cert-manager annotation should not be set; Certificates are created directly", + ) + }, + }, + } + + logger := zap.New(zap.UseFlagOptions(&zap.Options{Development: true})) + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + testCfg := config.NetworkServicesOperator{Gateway: *baseConfig.DeepCopy()} + testCfg.Gateway.DefaultListenerTLSSecretName = tt.defaultTLSSecretName + + tt.existingUpstreamObjects = append(tt.existingUpstreamObjects, &gatewayv1.GatewayClass{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + }, + Spec: gatewayv1.GatewayClassSpec{ + ControllerName: gatewayv1.GatewayController("test"), + }, + }) + + for _, obj := range append(tt.existingUpstreamObjects, tt.existingDownstreamObjects...) { + obj.SetUID(uuid.NewUUID()) + obj.SetCreationTimestamp(metav1.Now()) + } + + fakeUpstreamClient := fake.NewClientBuilder(). + WithScheme(testScheme). + WithObjects(tt.upstreamGateway, upstreamNamespace). + WithObjects(tt.existingUpstreamObjects...). + WithStatusSubresource(tt.upstreamGateway). + WithStatusSubresource(tt.existingUpstreamObjects...). + Build() + + fakeDownstreamClient := fake.NewClientBuilder(). + WithScheme(testScheme). + WithObjects(tt.existingDownstreamObjects...). + WithStatusSubresource(&gatewayv1.Gateway{}). + WithStatusSubresource(tt.existingDownstreamObjects...). + Build() + + ctx := context.Background() + ctx = log.IntoContext(ctx, logger) + + mgr := &fakeMockManager{cl: fakeUpstreamClient} + + reconciler := &GatewayReconciler{ + mgr: mgr, + Config: testCfg, + DownstreamCluster: &fakeCluster{cl: fakeDownstreamClient}, + } + + downstreamStrategy := downstreamclient.NewMappedNamespaceResourceStrategy("test", fakeUpstreamClient, fakeDownstreamClient) + + reconciler.prepareUpstreamGateway(tt.upstreamGateway) + result, downstreamGateway := reconciler.ensureDownstreamGateway( + ctx, + "test-suite", + fakeUpstreamClient, + tt.upstreamGateway, + downstreamStrategy, + ) + assert.NoError(t, result.Err, "failed ensuring downstream gateway") + + _, err := result.Complete(ctx) + assert.NoError(t, err, "failed completing result") + + if tt.assert != nil { + updatedUpstreamGateway := &gatewayv1.Gateway{} + assert.NoError(t, fakeUpstreamClient.Get(ctx, client.ObjectKeyFromObject(tt.upstreamGateway), updatedUpstreamGateway)) + tt.assert(t, updatedUpstreamGateway, downstreamGateway) + } + }) + } +} + func TestEnsureDownstreamGatewayHTTPRoutes(t *testing.T) { testScheme := runtime.NewScheme() assert.NoError(t, scheme.AddToScheme(testScheme)) @@ -807,22 +1121,18 @@ func newGateway( return gw } -func TestGetDesiredDownstreamGateway_UnclaimedHostnameNoAnnotations(t *testing.T) { +func TestGetDesiredDownstreamGateway_UnclaimedHostnameSkipped(t *testing.T) { logger := zap.New(zap.UseFlagOptions(&zap.Options{Development: true})) ctx := log.IntoContext(context.Background(), logger) tests := []struct { - name string - perGatewayCertIssuer bool - listeners []gatewayv1.Listener - claimedHostnames []string - expectIssuerAnnotation bool - expectClusterAnnotation bool - expectListeners int + name string + listeners []gatewayv1.Listener + claimedHostnames []string + expectListeners int }{ { - name: "unclaimed hostname with TLS option does not produce annotations", - perGatewayCertIssuer: true, + name: "unclaimed hostname is skipped", listeners: []gatewayv1.Listener{ { Name: "custom-https", @@ -836,14 +1146,11 @@ func TestGetDesiredDownstreamGateway_UnclaimedHostnameNoAnnotations(t *testing.T }, }, }, - claimedHostnames: nil, - expectIssuerAnnotation: false, - expectClusterAnnotation: false, - expectListeners: 0, + claimedHostnames: nil, + expectListeners: 0, }, { - name: "claimed hostname with TLS option produces issuer annotation (per-gateway mode)", - perGatewayCertIssuer: true, + name: "claimed hostname produces listener", listeners: []gatewayv1.Listener{ { Name: "custom-https", @@ -857,35 +1164,11 @@ func TestGetDesiredDownstreamGateway_UnclaimedHostnameNoAnnotations(t *testing.T }, }, }, - claimedHostnames: []string{"claimed.example.com"}, - expectIssuerAnnotation: true, - expectClusterAnnotation: false, - expectListeners: 1, + claimedHostnames: []string{"claimed.example.com"}, + expectListeners: 1, }, { - name: "claimed hostname with TLS option produces cluster-issuer annotation (cluster mode)", - perGatewayCertIssuer: false, - listeners: []gatewayv1.Listener{ - { - Name: "custom-https", - Port: 443, - Protocol: gatewayv1.HTTPSProtocolType, - Hostname: ptr.To(gatewayv1.Hostname("claimed.example.com")), - TLS: &gatewayv1.GatewayTLSConfig{ - Options: map[gatewayv1.AnnotationKey]gatewayv1.AnnotationValue{ - certificateIssuerTLSOption: "letsencrypt", - }, - }, - }, - }, - claimedHostnames: []string{"claimed.example.com"}, - expectIssuerAnnotation: false, - expectClusterAnnotation: true, - expectListeners: 1, - }, - { - name: "mix: only claimed hostname contributes annotations", - perGatewayCertIssuer: true, + name: "mix: only claimed hostname produces listener", listeners: []gatewayv1.Listener{ { Name: "unclaimed-https", @@ -910,10 +1193,8 @@ func TestGetDesiredDownstreamGateway_UnclaimedHostnameNoAnnotations(t *testing.T }, }, }, - claimedHostnames: []string{"claimed.example.com"}, - expectIssuerAnnotation: true, - expectClusterAnnotation: false, - expectListeners: 1, + claimedHostnames: []string{"claimed.example.com"}, + expectListeners: 1, }, } @@ -922,8 +1203,7 @@ func TestGetDesiredDownstreamGateway_UnclaimedHostnameNoAnnotations(t *testing.T reconciler := &GatewayReconciler{ Config: config.NetworkServicesOperator{ Gateway: config.GatewayConfig{ - DownstreamGatewayClassName: "envoy", - PerGatewayCertificateIssuer: tt.perGatewayCertIssuer, + DownstreamGatewayClassName: "envoy", }, }, } @@ -933,89 +1213,14 @@ func TestGetDesiredDownstreamGateway_UnclaimedHostnameNoAnnotations(t *testing.T Spec: gatewayv1.GatewaySpec{Listeners: tt.listeners}, } - desired := reconciler.getDesiredDownstreamGateway(ctx, "test-cluster", upstream, tt.claimedHostnames) - - _, hasIssuer := desired.Annotations["cert-manager.io/issuer"] - _, hasClusterIssuer := desired.Annotations["cert-manager.io/cluster-issuer"] + desired := reconciler.getDesiredDownstreamGateway(ctx, upstream, tt.claimedHostnames) - assert.Equal(t, tt.expectIssuerAnnotation, hasIssuer, "cert-manager.io/issuer annotation presence") - assert.Equal(t, tt.expectClusterAnnotation, hasClusterIssuer, "cert-manager.io/cluster-issuer annotation presence") + assert.Empty(t, desired.Annotations, "desired gateway should have no cert-manager annotations") assert.Len(t, desired.Spec.Listeners, tt.expectListeners, "downstream listener count") }) } } -func TestSyncCertManagerAnnotations(t *testing.T) { - t.Parallel() - - tests := []struct { - name string - existingAnnotations map[string]string - desiredAnnotations map[string]string - wantAnnotations map[string]string - }{ - { - name: "stale issuer annotation removed", - existingAnnotations: map[string]string{ - "cert-manager.io/issuer": "old-issuer", - "other-annotation": "keep-me", - }, - desiredAnnotations: map[string]string{}, - wantAnnotations: map[string]string{ - "other-annotation": "keep-me", - }, - }, - { - name: "issuer annotation updated to cluster-issuer", - existingAnnotations: map[string]string{ - "cert-manager.io/issuer": "old-issuer", - }, - desiredAnnotations: map[string]string{ - "cert-manager.io/cluster-issuer": "new-cluster-issuer", - }, - wantAnnotations: map[string]string{ - "cert-manager.io/cluster-issuer": "new-cluster-issuer", - }, - }, - { - name: "no-op when annotations match", - existingAnnotations: map[string]string{"cert-manager.io/issuer": "my-issuer"}, - desiredAnnotations: map[string]string{"cert-manager.io/issuer": "my-issuer"}, - wantAnnotations: map[string]string{"cert-manager.io/issuer": "my-issuer"}, - }, - { - name: "all three cert-manager annotations cleaned up", - existingAnnotations: map[string]string{ - "cert-manager.io/issuer": "old", - "cert-manager.io/cluster-issuer": "old", - "cert-manager.io/secret-template": "old", - "unrelated": "preserved", - }, - desiredAnnotations: map[string]string{}, - wantAnnotations: map[string]string{ - "unrelated": "preserved", - }, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - t.Parallel() - - existing := &gatewayv1.Gateway{ - ObjectMeta: metav1.ObjectMeta{Annotations: tt.existingAnnotations}, - } - desired := &gatewayv1.Gateway{ - ObjectMeta: metav1.ObjectMeta{Annotations: tt.desiredAnnotations}, - } - - syncCertManagerAnnotations(existing, desired) - - assert.Equal(t, tt.wantAnnotations, existing.Annotations) - }) - } -} - func newHTTPRoute(namespace, name string, opts ...func(*gatewayv1.HTTPRoute)) *gatewayv1.HTTPRoute { route := &gatewayv1.HTTPRoute{ ObjectMeta: metav1.ObjectMeta{ @@ -1032,3 +1237,155 @@ func newHTTPRoute(namespace, name string, opts ...func(*gatewayv1.HTTPRoute)) *g return route } + +// TestServiceSpecClobberingCausesReconciliationStorm proves that overwriting the +// entire Service.Spec during CreateOrUpdate causes spurious updates when the +// existing Service contains server-defaulted fields that the desired spec omits. +// This is the root cause of the gateway controller reconciliation storm: every +// reconciliation produces an "updated" result, which re-enqueues the gateway, +// creating an infinite loop. +func TestServiceSpecClobberingCausesReconciliationStorm(t *testing.T) { + testScheme := runtime.NewScheme() + require.NoError(t, scheme.AddToScheme(testScheme)) + + // This is what the NSO controller builds as the desired Service — it only + // sets the fields it cares about. + desiredService := &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-svc", + Namespace: "default", + }, + Spec: corev1.ServiceSpec{ + Type: corev1.ServiceTypeClusterIP, + ClusterIP: "None", + Ports: []corev1.ServicePort{ + { + Name: "http", + Port: 80, + Protocol: corev1.ProtocolTCP, + }, + }, + InternalTrafficPolicy: ptr.To(corev1.ServiceInternalTrafficPolicyCluster), + }, + } + + // This is what the API server returns after creating the Service — it has + // server-defaulted fields like sessionAffinity, ipFamilyPolicy, ipFamilies, + // clusterIPs, and port targetPort that the controller never explicitly sets. + existingService := &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-svc", + Namespace: "default", + ResourceVersion: "1", + }, + Spec: corev1.ServiceSpec{ + Type: corev1.ServiceTypeClusterIP, + ClusterIP: "None", + ClusterIPs: []string{ + "None", + }, + Ports: []corev1.ServicePort{ + { + Name: "http", + Port: 80, + Protocol: corev1.ProtocolTCP, + TargetPort: intstr.FromInt32(80), + }, + }, + InternalTrafficPolicy: ptr.To(corev1.ServiceInternalTrafficPolicyCluster), + SessionAffinity: corev1.ServiceAffinityNone, + IPFamilies: []corev1.IPFamily{corev1.IPv4Protocol}, + IPFamilyPolicy: ptr.To(corev1.IPFamilyPolicySingleStack), + }, + } + + t.Run("whole_spec_overwrite_causes_perpetual_updates", func(t *testing.T) { + cl := fake.NewClientBuilder(). + WithScheme(testScheme). + WithObjects(existingService.DeepCopy()). + Build() + + svc := desiredService.DeepCopy() + result, err := controllerutil.CreateOrUpdate(context.Background(), cl, svc, func() error { + svc.Spec = desiredService.Spec + return nil + }) + require.NoError(t, err) + assert.Equal(t, controllerutil.OperationResultUpdated, result, + "BUG: whole-spec overwrite returns 'updated' even when intent is unchanged, "+ + "because server-defaulted fields (sessionAffinity, ipFamilies, clusterIPs, "+ + "targetPort, ipFamilyPolicy) are stripped") + + // On a real API server, the server re-defaults these fields on every + // write, so the next GET returns the full spec again and the cycle + // repeats forever — creating an infinite reconciliation storm. + }) + + t.Run("field_level_patching_preserves_server_defaults", func(t *testing.T) { + cl := fake.NewClientBuilder(). + WithScheme(testScheme). + WithObjects(existingService.DeepCopy()). + Build() + + svc := desiredService.DeepCopy() + result, err := controllerutil.CreateOrUpdate(context.Background(), cl, svc, func() error { + desired := desiredService + svc.Spec.Type = desired.Spec.Type + svc.Spec.ClusterIP = desired.Spec.ClusterIP + svc.Spec.InternalTrafficPolicy = desired.Spec.InternalTrafficPolicy + svc.Spec.TrafficDistribution = desired.Spec.TrafficDistribution + + // Merge ports: update matching ports by name, preserving + // server-defaulted fields like TargetPort. + for _, dp := range desired.Spec.Ports { + found := false + for i, ep := range svc.Spec.Ports { + if ep.Name == dp.Name { + svc.Spec.Ports[i].Port = dp.Port + svc.Spec.Ports[i].Protocol = dp.Protocol + found = true + break + } + } + if !found { + svc.Spec.Ports = append(svc.Spec.Ports, dp) + } + } + + return nil + }) + require.NoError(t, err) + assert.Equal(t, controllerutil.OperationResultNone, result, + "FIX: field-level patching returns 'unchanged' because server-defaulted "+ + "fields (sessionAffinity, ipFamilies, clusterIPs, targetPort, ipFamilyPolicy) are preserved") + + // Run it a second time to prove idempotency. + result, err = controllerutil.CreateOrUpdate(context.Background(), cl, svc, func() error { + desired := desiredService + svc.Spec.Type = desired.Spec.Type + svc.Spec.ClusterIP = desired.Spec.ClusterIP + svc.Spec.InternalTrafficPolicy = desired.Spec.InternalTrafficPolicy + svc.Spec.TrafficDistribution = desired.Spec.TrafficDistribution + + for _, dp := range desired.Spec.Ports { + found := false + for i, ep := range svc.Spec.Ports { + if ep.Name == dp.Name { + svc.Spec.Ports[i].Port = dp.Port + svc.Spec.Ports[i].Protocol = dp.Protocol + found = true + break + } + } + if !found { + svc.Spec.Ports = append(svc.Spec.Ports, dp) + } + } + + return nil + }) + require.NoError(t, err) + assert.Equal(t, controllerutil.OperationResultNone, result, + "idempotent: second call also returns 'unchanged'") + }) +} diff --git a/internal/controller/trafficprotectionpolicy_controller.go b/internal/controller/trafficprotectionpolicy_controller.go index 2a09b05c..ad362fd0 100644 --- a/internal/controller/trafficprotectionpolicy_controller.go +++ b/internal/controller/trafficprotectionpolicy_controller.go @@ -38,6 +38,7 @@ import ( "go.datum.net/network-services-operator/internal/config" downstreamclient "go.datum.net/network-services-operator/internal/downstreamclient" gatewaystatus "go.datum.net/network-services-operator/internal/gatewayapi/status" + gatewayutil "go.datum.net/network-services-operator/internal/util/gateway" "go.datum.net/network-services-operator/internal/util/resourcename" ) @@ -294,13 +295,18 @@ func (r *TrafficProtectionPolicyReconciler) checkHTTPSListenerCertificatesReady( ) (*certificateReadinessResult, error) { logger := log.FromContext(ctx) - // Collect unique HTTPS listeners from attachments + // Collect unique HTTPS listeners from attachments, skipping default + // listeners that use a shared TLS secret (which is pre-provisioned and + // doesn't have a per-listener Certificate resource). httpsListeners := make(map[string]struct{}) for _, attachment := range attachments { if attachment.Listener != nil { // Check if this specific listener is HTTPS for _, l := range attachment.Gateway.Spec.Listeners { if l.Name == *attachment.Listener && l.Protocol == gatewayv1.HTTPSProtocolType { + if gatewayutil.IsDefaultListener(l) && r.Config.Gateway.HasDefaultListenerTLSSecret() { + continue + } certName := resourcename.GetValidDNS1123Name(fmt.Sprintf("%s-%s", attachment.Gateway.Name, l.Name)) httpsListeners[certName] = struct{}{} } @@ -309,6 +315,9 @@ func (r *TrafficProtectionPolicyReconciler) checkHTTPSListenerCertificatesReady( // Policy targets all listeners on the gateway for _, l := range attachment.Gateway.Spec.Listeners { if l.Protocol == gatewayv1.HTTPSProtocolType { + if gatewayutil.IsDefaultListener(l) && r.Config.Gateway.HasDefaultListenerTLSSecret() { + continue + } certName := resourcename.GetValidDNS1123Name(fmt.Sprintf("%s-%s", attachment.Gateway.Name, l.Name)) httpsListeners[certName] = struct{}{} } diff --git a/internal/downstreamclient/mappednamespace.go b/internal/downstreamclient/mappednamespace.go index 372cd14c..e2dccfdc 100644 --- a/internal/downstreamclient/mappednamespace.go +++ b/internal/downstreamclient/mappednamespace.go @@ -146,17 +146,20 @@ func (c *mappedNamespaceResourceStrategy) SetControllerReference(ctx context.Con downstreamClient := c.GetClient() var anchorConfigMap corev1.ConfigMap - if err := downstreamClient.Get(ctx, client.ObjectKey{Namespace: controlled.GetNamespace(), Name: anchorName}, &anchorConfigMap); client.IgnoreNotFound(err) != nil { - return fmt.Errorf("failed listing configmaps: %w", err) - } - - if anchorConfigMap.CreationTimestamp.IsZero() { - anchorConfigMap.Name = anchorName - anchorConfigMap.Labels = anchorLabels - anchorConfigMap.Namespace = controlled.GetNamespace() + err = downstreamClient.Get(ctx, client.ObjectKey{Namespace: controlled.GetNamespace(), Name: anchorName}, &anchorConfigMap) + if apierrors.IsNotFound(err) { + anchorConfigMap = corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: anchorName, + Namespace: controlled.GetNamespace(), + Labels: anchorLabels, + }, + } if err := downstreamClient.Create(ctx, &anchorConfigMap); err != nil { return fmt.Errorf("failed creating anchor configmap: %w", err) } + } else if err != nil { + return fmt.Errorf("failed getting anchor configmap: %w", err) } if err := controllerutil.SetOwnerReference(&anchorConfigMap, controlled, downstreamClient.Scheme(), opts...); err != nil {