From 8c2d8ced41953afa6f1a0bd4e5c5db18f3b29880 Mon Sep 17 00:00:00 2001 From: ian-flores Date: Thu, 22 Jan 2026 10:43:25 -0800 Subject: [PATCH 1/3] fix(team-operator): add retain_on_delete protection for CRDs and namespace When migrating from kustomize to Helm deployment, Pulumi would see old kustomize-managed CRD resources as orphaned and delete them, causing cascade deletion of Site custom resources. This fix: - Creates protected CRD resources with retain_on_delete=True - Uses aliases to link to old kustomize resource URNs - Sets skip_crds=True on Helm release to avoid conflicts - Adds ignore_changes=["*"] so Pulumi doesn't modify CRDs - Adds retain_on_delete=True to posit-team namespace Fixes cascade deletion issue seen in npower01-production migration. --- .../src/ptd/pulumi_resources/team_operator.py | 82 ++++++++++++++++++- 1 file changed, 80 insertions(+), 2 deletions(-) diff --git a/python-pulumi/src/ptd/pulumi_resources/team_operator.py b/python-pulumi/src/ptd/pulumi_resources/team_operator.py index 0b36ff8..a60b5e4 100644 --- a/python-pulumi/src/ptd/pulumi_resources/team_operator.py +++ b/python-pulumi/src/ptd/pulumi_resources/team_operator.py @@ -67,6 +67,7 @@ def __init__( self._define_image() self._define_posit_team_namespace() + self._define_protected_crds() self._define_migration_resources() self._define_helm_release() @@ -84,9 +85,85 @@ def _define_posit_team_namespace(self): self.posit_team_namespace = kubernetes.core.v1.Namespace( f"{self.workload.compound_name}-{self.release}-{ptd.POSIT_TEAM_NAMESPACE}", metadata={"name": ptd.POSIT_TEAM_NAMESPACE}, - opts=pulumi.ResourceOptions(parent=self), + opts=pulumi.ResourceOptions( + parent=self, + retain_on_delete=True, # Don't delete namespace during migration + ), ) + def _define_protected_crds(self): + """Create placeholder CRD resources with retain_on_delete to prevent deletion during migration. + + When migrating from kustomize to Helm, Pulumi sees old kustomize-managed CRD resources + as orphaned and deletes them. This causes cascade deletion of Site custom resources. + + This method creates CRD resources with: + - retain_on_delete=True: Prevents Pulumi from deleting CRDs even if removed from code + - aliases: Links to old kustomize resource URNs so Pulumi recognizes them as existing + - Minimal spec: Helm will update with full spec; we just need to prevent deletion + + The CRDs are created with skip_await to avoid waiting for Helm to populate them. + """ + self.protected_crds = [] + + # Old kustomize.Directory resource name pattern + old_kustomize_name = f"{self.workload.compound_name}-{self.release}-team-operator-kustomization" + + for crd_name in KUSTOMIZE_CRDS: + # Parse CRD name: plural.group (e.g., "chronicles.core.posit.team") + parts = crd_name.split(".", 1) # Split into [plural, group] + plural = parts[0] + group = parts[1] if len(parts) > 1 else "" + + # Create a protected CRD resource + # The actual CRD spec will be managed by Helm; we just need Pulumi to not delete it + crd = kubernetes.apiextensions.v1.CustomResourceDefinition( + f"{self.workload.compound_name}-{self.release}-{crd_name}-protected", + metadata=kubernetes.meta.v1.ObjectMetaArgs( + name=crd_name, + annotations={ + # Mark as protected to prevent Helm uninstall from deleting + "helm.sh/resource-policy": "keep", + }, + ), + spec=kubernetes.apiextensions.v1.CustomResourceDefinitionSpecArgs( + group=group, + names=kubernetes.apiextensions.v1.CustomResourceDefinitionNamesArgs( + kind=plural.title().rstrip("s"), + plural=plural, + singular=plural.rstrip("s"), + ), + scope="Namespaced", + versions=[ + kubernetes.apiextensions.v1.CustomResourceDefinitionVersionArgs( + name="v1alpha1", + served=True, + storage=True, + schema=kubernetes.apiextensions.v1.CustomResourceValidationArgs( + open_apiv3_schema=kubernetes.apiextensions.v1.JSONSchemaPropsArgs( + type="object", + x_kubernetes_preserve_unknown_fields=True, + ), + ), + ), + ], + ), + opts=pulumi.ResourceOptions( + parent=self, + retain_on_delete=True, # CRITICAL: Don't delete CRDs when removed from Pulumi + ignore_changes=["*"], # Helm manages the actual spec + aliases=[ + # Alias to old kustomize.Directory child resource URN + pulumi.Alias( + name=f"{old_kustomize_name}-{crd_name}", + type_="kubernetes:apiextensions.k8s.io/v1:CustomResourceDefinition", + parent=self, + ), + ], + ), + ) + self.protected_crds.append(crd) + def _define_migration_resources(self): """Create resources to migrate from kustomize to Helm. @@ -297,7 +374,7 @@ def _define_helm_release(self): chart_version = self.cluster_cfg.team_operator_chart_version or DEFAULT_CHART_VERSION # Dependencies for the Helm release - depends = [self.posit_team_namespace] + depends = [self.posit_team_namespace] + self.protected_crds if self.migration_job: depends.append(self.migration_job) @@ -307,6 +384,7 @@ def _define_helm_release(self): version=chart_version, namespace=ptd.POSIT_TEAM_SYSTEM_NAMESPACE, create_namespace=True, + skip_crds=True, # CRDs managed separately with retain_on_delete protection values=helm_values, ) From 39c179a022b3b2078f25a8b316054fbc83a7c45b Mon Sep 17 00:00:00 2001 From: ian-flores Date: Thu, 22 Jan 2026 11:07:09 -0800 Subject: [PATCH 2/3] fix(team-operator): simplify to namespace protection only Remove CRD protection code that was causing conflicts with existing CRDs. Keep only retain_on_delete=True on namespace. CRD protection needs more work to handle existing CRDs properly. --- .../src/ptd/pulumi_resources/team_operator.py | 77 +------------------ 1 file changed, 1 insertion(+), 76 deletions(-) diff --git a/python-pulumi/src/ptd/pulumi_resources/team_operator.py b/python-pulumi/src/ptd/pulumi_resources/team_operator.py index a60b5e4..d027583 100644 --- a/python-pulumi/src/ptd/pulumi_resources/team_operator.py +++ b/python-pulumi/src/ptd/pulumi_resources/team_operator.py @@ -67,7 +67,6 @@ def __init__( self._define_image() self._define_posit_team_namespace() - self._define_protected_crds() self._define_migration_resources() self._define_helm_release() @@ -91,79 +90,6 @@ def _define_posit_team_namespace(self): ), ) - def _define_protected_crds(self): - """Create placeholder CRD resources with retain_on_delete to prevent deletion during migration. - - When migrating from kustomize to Helm, Pulumi sees old kustomize-managed CRD resources - as orphaned and deletes them. This causes cascade deletion of Site custom resources. - - This method creates CRD resources with: - - retain_on_delete=True: Prevents Pulumi from deleting CRDs even if removed from code - - aliases: Links to old kustomize resource URNs so Pulumi recognizes them as existing - - Minimal spec: Helm will update with full spec; we just need to prevent deletion - - The CRDs are created with skip_await to avoid waiting for Helm to populate them. - """ - self.protected_crds = [] - - # Old kustomize.Directory resource name pattern - old_kustomize_name = f"{self.workload.compound_name}-{self.release}-team-operator-kustomization" - - for crd_name in KUSTOMIZE_CRDS: - # Parse CRD name: plural.group (e.g., "chronicles.core.posit.team") - parts = crd_name.split(".", 1) # Split into [plural, group] - plural = parts[0] - group = parts[1] if len(parts) > 1 else "" - - # Create a protected CRD resource - # The actual CRD spec will be managed by Helm; we just need Pulumi to not delete it - crd = kubernetes.apiextensions.v1.CustomResourceDefinition( - f"{self.workload.compound_name}-{self.release}-{crd_name}-protected", - metadata=kubernetes.meta.v1.ObjectMetaArgs( - name=crd_name, - annotations={ - # Mark as protected to prevent Helm uninstall from deleting - "helm.sh/resource-policy": "keep", - }, - ), - spec=kubernetes.apiextensions.v1.CustomResourceDefinitionSpecArgs( - group=group, - names=kubernetes.apiextensions.v1.CustomResourceDefinitionNamesArgs( - kind=plural.title().rstrip("s"), - plural=plural, - singular=plural.rstrip("s"), - ), - scope="Namespaced", - versions=[ - kubernetes.apiextensions.v1.CustomResourceDefinitionVersionArgs( - name="v1alpha1", - served=True, - storage=True, - schema=kubernetes.apiextensions.v1.CustomResourceValidationArgs( - open_apiv3_schema=kubernetes.apiextensions.v1.JSONSchemaPropsArgs( - type="object", - x_kubernetes_preserve_unknown_fields=True, - ), - ), - ), - ], - ), - opts=pulumi.ResourceOptions( - parent=self, - retain_on_delete=True, # CRITICAL: Don't delete CRDs when removed from Pulumi - ignore_changes=["*"], # Helm manages the actual spec - aliases=[ - # Alias to old kustomize.Directory child resource URN - pulumi.Alias( - name=f"{old_kustomize_name}-{crd_name}", - type_="kubernetes:apiextensions.k8s.io/v1:CustomResourceDefinition", - parent=self, - ), - ], - ), - ) - self.protected_crds.append(crd) - def _define_migration_resources(self): """Create resources to migrate from kustomize to Helm. @@ -374,7 +300,7 @@ def _define_helm_release(self): chart_version = self.cluster_cfg.team_operator_chart_version or DEFAULT_CHART_VERSION # Dependencies for the Helm release - depends = [self.posit_team_namespace] + self.protected_crds + depends = [self.posit_team_namespace] if self.migration_job: depends.append(self.migration_job) @@ -384,7 +310,6 @@ def _define_helm_release(self): version=chart_version, namespace=ptd.POSIT_TEAM_SYSTEM_NAMESPACE, create_namespace=True, - skip_crds=True, # CRDs managed separately with retain_on_delete protection values=helm_values, ) From 745afce2df54a73187a3929a5c2dbc923a8e4b94 Mon Sep 17 00:00:00 2001 From: ian-flores Date: Fri, 23 Jan 2026 14:22:48 -0800 Subject: [PATCH 3/3] Add team_operator_skip_crds config for safe CRD migration Add configuration option to skip CRD installation during Helm deployment, enabling safe migration from kustomize-managed CRDs. When enabled: - Sets crd.enable=false in Helm values (prevents template rendering) - Sets skip_crds=true on ReleaseArgs (skips crds/ directory) - Keeps crd.keep=true as defense-in-depth This allows the migration job to patch existing CRDs with Helm ownership labels without risk of accidental deletion during the kustomize-to-Helm transition. --- python-pulumi/src/ptd/__init__.py | 6 ++++++ .../src/ptd/pulumi_resources/team_operator.py | 13 +++++++++++++ 2 files changed, 19 insertions(+) diff --git a/python-pulumi/src/ptd/__init__.py b/python-pulumi/src/ptd/__init__.py index d96d85d..7781939 100644 --- a/python-pulumi/src/ptd/__init__.py +++ b/python-pulumi/src/ptd/__init__.py @@ -428,6 +428,12 @@ class WorkloadClusterConfig: custom_k8s_resources: list[str] | None = None # List of subfolder names from custom_k8s_resources/ to apply # Tolerations for team-operator pods (controller and migration job) team_operator_tolerations: tuple[Toleration, ...] = () + # Skip CRD installation during Helm deployment (for safe migration from kustomize). + # When True, CRDs are not rendered by Helm templates (crd.enable=false) and the + # Helm release skips the crds/ directory. This allows the migration job to patch + # existing CRDs with Helm ownership labels without risk of accidental deletion. + # After migration, set to False to let Helm manage CRDs going forward. + team_operator_skip_crds: bool = False def load_workload_cluster_site_dict( diff --git a/python-pulumi/src/ptd/pulumi_resources/team_operator.py b/python-pulumi/src/ptd/pulumi_resources/team_operator.py index d027583..0febfcb 100644 --- a/python-pulumi/src/ptd/pulumi_resources/team_operator.py +++ b/python-pulumi/src/ptd/pulumi_resources/team_operator.py @@ -291,6 +291,15 @@ def _define_helm_release(self): for t in self.cluster_cfg.team_operator_tolerations ], }, + # CRD configuration for safe migration from kustomize to Helm. + # When skip_crds=True: crd.enable=False prevents Helm from rendering CRD templates, + # allowing migration job to patch existing CRDs without risk of deletion. + # When skip_crds=False (default): Helm manages CRDs normally. + # crd.keep=True adds helm.sh/resource-policy: keep as defense-in-depth. + "crd": { + "enable": not self.cluster_cfg.team_operator_skip_crds, + "keep": True, + }, } # OCI Helm chart from public repository @@ -311,6 +320,10 @@ def _define_helm_release(self): namespace=ptd.POSIT_TEAM_SYSTEM_NAMESPACE, create_namespace=True, values=helm_values, + # Skip CRDs at Helm level (belt-and-suspenders with crd.enable in values). + # This tells Helm CLI to skip the crds/ directory if the chart ever moves + # CRDs there. Combined with crd.enable=False, provides complete CRD skip. + skip_crds=self.cluster_cfg.team_operator_skip_crds, ) self.helm_release = kubernetes.helm.v3.Release(