From 669d1e3e8ab09c9dfebbd4d6b2abd4452f257251 Mon Sep 17 00:00:00 2001 From: ian-flores Date: Thu, 26 Feb 2026 12:28:16 -0800 Subject: [PATCH 01/31] Add cloud-agnostic infrastructure components for team-operator This adds three new infrastructure components to prepare for the cloud-agnostic team-operator: 1. nfs-subdir-external-provisioner (AWS workloads) - Deploys when FSx is configured (fs-dns-name in workload secrets) - Creates StorageClass "posit-shared-storage" for dynamic NFS subdirectory provisioning - Uses annotation-based pathPattern for subdirectory naming 2. external-secrets-operator (AWS workloads) - Deploys external-secrets-operator Helm chart - Creates ClusterSecretStore for AWS Secrets Manager - Creates ExternalSecret CRs per site to sync secrets to K8s Secrets - IAM role created with read-only Secrets Manager permissions 3. EKS Pod Identity Agent (AWS workloads) - Adds eks-pod-identity-agent addon to EKS clusters - Creates PodIdentityAssociation resources for all product ServiceAccounts - ADDITIVE - existing IRSA roles and trust policies kept for backward compatibility All components are added alongside existing infrastructure. No existing resources are modified or removed. The operator will be updated in a future phase to use these new abstractions. Changes: - python-pulumi/src/ptd/aws_workload.py: Add external_secrets_role_name() method - python-pulumi/src/ptd/pulumi_resources/aws_eks_cluster.py: Add with_pod_identity_agent() method - python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py: - Add _define_external_secrets_iam() and _define_pod_identity_associations() methods - Add external_secrets_roles dict - python-pulumi/src/ptd/pulumi_resources/aws_workload_eks.py: Enable Pod Identity Agent addon - python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py: - Add _define_nfs_subdir_provisioner() method - Add _define_external_secrets_operator() method - Conditionally deploy based on workload configuration - python-pulumi/src/ptd/pulumi_resources/aws_workload_sites.py: Add _define_external_secrets() method to create ExternalSecret CRs per site --- python-pulumi/src/ptd/aws_workload.py | 3 + .../ptd/pulumi_resources/aws_eks_cluster.py | 28 ++++ .../pulumi_resources/aws_workload_clusters.py | 103 +++++++++++++++ .../ptd/pulumi_resources/aws_workload_eks.py | 3 + .../ptd/pulumi_resources/aws_workload_helm.py | 122 ++++++++++++++++++ .../pulumi_resources/aws_workload_sites.py | 43 ++++++ 6 files changed, 302 insertions(+) diff --git a/python-pulumi/src/ptd/aws_workload.py b/python-pulumi/src/ptd/aws_workload.py index 81724aa..54fcee6 100644 --- a/python-pulumi/src/ptd/aws_workload.py +++ b/python-pulumi/src/ptd/aws_workload.py @@ -585,6 +585,9 @@ def ebs_csi_role_name(self) -> str: def fsx_openzfs_role_name(self) -> str: return f"aws-fsx-openzfs-csi-driver.{self.compound_name}.posit.team" + def external_secrets_role_name(self, release: str) -> str: + return f"external-secrets.{release}.{self.compound_name}.posit.team" + def cluster_home_role_name(self, release: str) -> str: return f"home.{release}.{self.compound_name}.posit.team" diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_eks_cluster.py b/python-pulumi/src/ptd/pulumi_resources/aws_eks_cluster.py index a7023af..62f77f5 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_eks_cluster.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_eks_cluster.py @@ -1306,6 +1306,34 @@ def with_aws_secrets_store_csi_driver_provider( return self + def with_pod_identity_agent( + self, + version: str | None = None, + ) -> typing.Self: + """ + Add the EKS Pod Identity Agent addon. + + This addon enables EKS Pod Identity for associating IAM roles with + Kubernetes service accounts without IRSA annotations. Pod Identity + associations are created separately via aws.eks.PodIdentityAssociation. + + :param version: Optional, String, version of the addon to install. + By setting this to None, the latest version will be installed. + :return: self + """ + aws.eks.Addon( + f"{self.name}-eks-pod-identity-agent", + args=aws.eks.AddonArgs( + addon_name="eks-pod-identity-agent", + addon_version=version, + cluster_name=self.name, + tags=self.eks.tags, + ), + opts=pulumi.ResourceOptions(parent=self.eks), + ) + + return self + def attach_efs_security_group( self, efs_file_system_id: str, diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py index 9605de6..831fa0f 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py @@ -48,6 +48,7 @@ class AWSWorkloadClusters(pulumi.ComponentResource): autoscaling_queues: dict[str, aws.sqs.Queue] packagemanager_roles: dict[str, aws.iam.Role | pulumi.Output[aws.iam.Role]] team_operator_roles: dict[str, aws.iam.Role] + external_secrets_roles: dict[str, aws.iam.Role] workbench_roles: dict[str, aws.iam.Role] workbench_session_roles: dict[str, aws.iam.Role] @@ -108,6 +109,9 @@ def __init__(self, workload: ptd.aws_workload.AWSWorkload, *args, **kwargs): self._define_workbench_iam() self._define_packagemanager_iam(persistent_stack) self._define_team_operator_iam() + self._define_external_secrets_iam() + # Create Pod Identity associations for all products (ADDITIVE - keeps IRSA for backward compatibility) + self._define_pod_identity_associations() self._apply_custom_k8s_resources() self._define_team_operator() # after team operator so we can reuse the namespaces @@ -559,6 +563,105 @@ def _define_team_operator_iam(self): policy_name=self.workload.team_operator_policy_name, ) + def _define_external_secrets_iam(self): + """Define IAM roles for external-secrets-operator to access AWS Secrets Manager.""" + self.external_secrets_roles = {} + + for release in self.managed_clusters_by_release: + self.external_secrets_roles[release] = self._define_k8s_iam_role( + name=self.workload.external_secrets_role_name(release), + release=release, + namespace="external-secrets", + service_accounts=["external-secrets"], + role_policies=[self._define_read_secrets_inline()], + ) + + def _define_pod_identity_associations(self): + """ + Create EKS Pod Identity associations for all product service accounts. + + This is ADDITIVE - existing IRSA roles and annotations are kept for backward compatibility. + Both Pod Identity and IRSA can coexist. The operator will be updated to stop computing + IRSA annotations in a future phase. + + Pod Identity associations connect service accounts directly to IAM roles without requiring + annotations on the ServiceAccount resource. + """ + for release in self.managed_clusters_by_release: + cluster_name = f"{self.workload.compound_name}-{release}" + + # Per-site product associations + for site_name in sorted(self.workload.cfg.sites.keys()): + # Connect + aws.eks.PodIdentityAssociation( + f"{cluster_name}-{site_name}-connect-pod-identity", + cluster_name=cluster_name, + namespace=ptd.POSIT_TEAM_NAMESPACE, + service_account=f"{site_name}-connect", + role_arn=self.connect_roles[release].arn, + opts=pulumi.ResourceOptions(parent=self), + ) + + # Connect Session + aws.eks.PodIdentityAssociation( + f"{cluster_name}-{site_name}-connect-session-pod-identity", + cluster_name=cluster_name, + namespace=ptd.POSIT_TEAM_NAMESPACE, + service_account=f"{site_name}-connect-session", + role_arn=self.connect_session_roles[f"{release}-{site_name}"].arn, + opts=pulumi.ResourceOptions(parent=self), + ) + + # Workbench + aws.eks.PodIdentityAssociation( + f"{cluster_name}-{site_name}-workbench-pod-identity", + cluster_name=cluster_name, + namespace=ptd.POSIT_TEAM_NAMESPACE, + service_account=f"{site_name}-workbench", + role_arn=self.workbench_roles[release].arn, + opts=pulumi.ResourceOptions(parent=self), + ) + + # Workbench Session + aws.eks.PodIdentityAssociation( + f"{cluster_name}-{site_name}-workbench-session-pod-identity", + cluster_name=cluster_name, + namespace=ptd.POSIT_TEAM_NAMESPACE, + service_account=f"{site_name}-workbench-session", + role_arn=self.workbench_session_roles[f"{release}-{site_name}"].arn, + opts=pulumi.ResourceOptions(parent=self), + ) + + # Package Manager + aws.eks.PodIdentityAssociation( + f"{cluster_name}-{site_name}-packagemanager-pod-identity", + cluster_name=cluster_name, + namespace=ptd.POSIT_TEAM_NAMESPACE, + service_account=f"{site_name}-packagemanager", + role_arn=self.packagemanager_roles[release + "//" + site_name].arn, + opts=pulumi.ResourceOptions(parent=self), + ) + + # Chronicle + aws.eks.PodIdentityAssociation( + f"{cluster_name}-{site_name}-chronicle-pod-identity", + cluster_name=cluster_name, + namespace=ptd.POSIT_TEAM_NAMESPACE, + service_account=f"{site_name}-chronicle", + role_arn=self.chronicle_roles[f"{release}-{site_name}"].arn, + opts=pulumi.ResourceOptions(parent=self), + ) + + # Home (Flightdeck) + aws.eks.PodIdentityAssociation( + f"{cluster_name}-{site_name}-home-pod-identity", + cluster_name=cluster_name, + namespace=ptd.POSIT_TEAM_NAMESPACE, + service_account=f"{site_name}-home", + role_arn=self.home_roles[release].arn, + opts=pulumi.ResourceOptions(parent=self), + ) + def _apply_custom_k8s_resources(self): """Apply custom Kubernetes resources from the custom_k8s_resources/ directory.""" ptd.pulumi_resources.custom_k8s_resources.apply_custom_k8s_resources( diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_eks.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_eks.py index 8796f73..68ba043 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_eks.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_eks.py @@ -157,6 +157,9 @@ def _build_with_vpc_config( if self.workload.cfg.secrets_store_addon_enabled: eks_cluster.with_aws_secrets_store_csi_driver_provider() + # Enable EKS Pod Identity Agent for cloud-agnostic IAM + eks_cluster.with_pod_identity_agent() + eks_cluster.with_gp3() eks_cluster.with_encrypted_ebs_storage_class() eks_cluster.with_oidc_provider() diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py index 029bfeb..100f5dc 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py @@ -68,15 +68,28 @@ def __init__(self, workload: ptd.aws_workload.AWSWorkload, *args, **kwargs): ) cert_arns_output = persistent_stack.require_output("cert_arns") + self.workload_secrets_dict, ok = ptd.secrecy.aws_get_secret_value_json( + self.workload.secret_name, region=self.workload.cfg.region + ) + if not ok: + msg = f"Failed to look up secret {self.workload.secret_name!r}" + pulumi.error(msg, self) + raise ValueError(msg) + for release in self.managed_clusters_by_release: components = self.workload.cfg.clusters[release].components weight = self.workload.cfg.clusters[release].routing_weight self._define_aws_lbc(release, components.aws_load_balancer_controller_version) self._define_aws_fsx_openzfs_csi(release, components.aws_fsx_openzfs_csi_driver_version) + # Deploy nfs-subdir-external-provisioner if FSx is configured + if "fs-dns-name" in self.workload_secrets_dict: + self._define_nfs_subdir_provisioner(release) if not self.workload.cfg.secrets_store_addon_enabled: self._define_secret_store_csi(release, components.secret_store_csi_driver_version) self._define_secret_store_csi_aws(release, components.secret_store_csi_driver_aws_provider_version) + # Deploy external-secrets-operator + self._define_external_secrets_operator(release) self._define_traefik(release, components.traefik_version, weight, cert_arns_output) self._define_metrics_server(release, components.metrics_server_version) self._define_loki(release, components.loki_version, components) @@ -170,6 +183,115 @@ def _define_aws_fsx_openzfs_csi(self, release: str, version: str): opts=pulumi.ResourceOptions(provider=self.kube_providers[release]), ) + def _define_nfs_subdir_provisioner(self, release: str): + """Deploy nfs-subdir-external-provisioner for FSx storage.""" + workload_secrets = self.workload_secrets_dict + fsx_dns_name = workload_secrets.get("fs-dns-name", "") + + if not fsx_dns_name: + return + + k8s.apiextensions.CustomResource( + f"{self.workload.compound_name}-{release}-nfs-subdir-provisioner-helm-release", + metadata=k8s.meta.v1.ObjectMetaArgs( + name="nfs-subdir-external-provisioner", + namespace=ptd.HELM_CONTROLLER_NAMESPACE, + labels=self.required_tags, + ), + api_version="helm.cattle.io/v1", + kind="HelmChart", + spec={ + "repo": "https://kubernetes-sigs.github.io/nfs-subdir-external-provisioner/", + "chart": "nfs-subdir-external-provisioner", + "targetNamespace": ptd.KUBE_SYSTEM_NAMESPACE, + "version": "4.0.18", + "valuesContent": yaml.dump( + { + "nfs": { + "server": fsx_dns_name, + "path": "/fsx", + }, + "storageClass": { + "name": "posit-shared-storage", + "reclaimPolicy": "Retain", + "accessModes": "ReadWriteMany", + "onDelete": "retain", + "pathPattern": "${.PVC.annotations.nfs.io/storage-path}", + }, + "nfs.mountOptions": [ + "nfsvers=4.2", + "rsize=1048576", + "wsize=1048576", + "timeo=600", + ], + } + ), + }, + opts=pulumi.ResourceOptions(provider=self.kube_providers[release]), + ) + + def _define_external_secrets_operator(self, release: str): + """Deploy external-secrets-operator and create ClusterSecretStore for AWS Secrets Manager.""" + # Deploy external-secrets-operator Helm chart + k8s.apiextensions.CustomResource( + f"{self.workload.compound_name}-{release}-external-secrets-helm-release", + metadata=k8s.meta.v1.ObjectMetaArgs( + name="external-secrets", + namespace=ptd.HELM_CONTROLLER_NAMESPACE, + labels=self.required_tags, + ), + api_version="helm.cattle.io/v1", + kind="HelmChart", + spec={ + "repo": "https://charts.external-secrets.io", + "chart": "external-secrets", + "targetNamespace": "external-secrets", + "version": "0.10.7", + "valuesContent": yaml.dump( + { + "installCRDs": True, + "serviceAccount": { + "create": True, + "name": "external-secrets", + "annotations": { + "eks.amazonaws.com/role-arn": f"arn:aws:iam::{self.workload.cfg.account_id}:role/" + + self.workload.external_secrets_role_name(release), + }, + }, + } + ), + }, + opts=pulumi.ResourceOptions(provider=self.kube_providers[release]), + ) + + # Create ClusterSecretStore for AWS Secrets Manager + k8s.apiextensions.CustomResource( + f"{self.workload.compound_name}-{release}-cluster-secret-store", + metadata=k8s.meta.v1.ObjectMetaArgs( + name="aws-secrets-manager", + labels=self.required_tags, + ), + api_version="external-secrets.io/v1beta1", + kind="ClusterSecretStore", + spec={ + "provider": { + "aws": { + "service": "SecretsManager", + "region": self.workload.cfg.region, + "auth": { + "jwt": { + "serviceAccountRef": { + "name": "external-secrets", + "namespace": "external-secrets", + }, + }, + }, + }, + }, + }, + opts=pulumi.ResourceOptions(provider=self.kube_providers[release]), + ) + def _define_secret_store_csi(self, release: str, version: str): k8s.apiextensions.CustomResource( f"{self.workload.compound_name}-{release}-secret-store-csi-helm-release", diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_sites.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_sites.py index 836bc1f..f0877c2 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_sites.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_sites.py @@ -70,6 +70,7 @@ def __init__(self, workload: ptd.aws_workload.AWSWorkload, *args, **kwargs): raise ValueError(msg) self._define_team_sites() + self._define_external_secrets() def _define_team_sites(self): self.team_sites = {} @@ -165,3 +166,45 @@ def set_site_fields(obj: dict[str, typing.Any], _: pulumi.ResourceOptions): providers=[self.kube_providers[release]], ), ) + + def _define_external_secrets(self): + """ + Create ExternalSecret CRs for each site to sync secrets from AWS Secrets Manager to K8s Secrets. + + This creates K8s Secrets that the operator can reference by name instead of calling AWS SDK directly. + """ + for release in self.managed_clusters_by_release: + for site_name in sorted(self.workload.cfg.sites.keys()): + # Create ExternalSecret for site secrets + kubernetes.apiextensions.CustomResource( + f"{self.workload.compound_name}-{release}-{site_name}-external-secret", + metadata=kubernetes.meta.v1.ObjectMetaArgs( + name=f"{site_name}-secrets", + namespace=ptd.POSIT_TEAM_NAMESPACE, + labels=self.required_tags, + ), + api_version="external-secrets.io/v1beta1", + kind="ExternalSecret", + spec={ + "refreshInterval": "1h", + "secretStoreRef": { + "name": "aws-secrets-manager", + "kind": "ClusterSecretStore", + }, + "target": { + "name": f"{site_name}-secrets", + "creationPolicy": "Owner", + }, + "dataFrom": [ + { + "extract": { + "key": self.workload.site_secret_name(site_name), + } + } + ], + }, + opts=pulumi.ResourceOptions( + parent=self, + provider=self.kube_providers[release], + ), + ) From d3618dc76f3005ca202591973e84e211a7f83733 Mon Sep 17 00:00:00 2001 From: ian-flores Date: Thu, 26 Feb 2026 12:28:16 -0800 Subject: [PATCH 02/31] Address review findings (job 220) All tests pass. Here is the summary of changes: --- Changes: - Add `depends_on=[eso_helm_release]` to `ClusterSecretStore` so it applies after the ESO HelmChart CR is registered, not concurrently - Add Pod Identity association for `external-secrets` service account in `_define_pod_identity_associations`, making ESO consistent with all other products - Remove hardcoded IRSA annotation from ESO Helm chart values (now uses Pod Identity via the new association) - Add `nfs_subdir_provisioner_version` and `external_secrets_operator_version` fields to `AWSWorkloadClusterComponentConfig` with the previously hardcoded defaults - Thread version parameters through `_define_nfs_subdir_provisioner(release, version)` and `_define_external_secrets_operator(release, version)` callers - Remove redundant `fsx_dns_name` empty-string guard in `_define_nfs_subdir_provisioner` (caller already checks `"fs-dns-name" in secrets`) --- python-pulumi/src/ptd/aws_workload.py | 2 ++ .../pulumi_resources/aws_workload_clusters.py | 10 +++++++ .../ptd/pulumi_resources/aws_workload_helm.py | 29 +++++++------------ 3 files changed, 23 insertions(+), 18 deletions(-) diff --git a/python-pulumi/src/ptd/aws_workload.py b/python-pulumi/src/ptd/aws_workload.py index 54fcee6..29ff1a6 100644 --- a/python-pulumi/src/ptd/aws_workload.py +++ b/python-pulumi/src/ptd/aws_workload.py @@ -268,6 +268,8 @@ class AWSWorkloadClusterComponentConfig(ptd.WorkloadClusterComponentConfig): secret_store_csi_driver_aws_provider_version: str | None = "0.3.5" # noqa: S105 nvidia_device_plugin_version: str | None = "0.17.1" karpenter_version: str | None = "1.6.0" + nfs_subdir_provisioner_version: str | None = "4.0.18" + external_secrets_operator_version: str | None = "0.10.7" class AWSWorkload(ptd.workload.AbstractWorkload): diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py index 831fa0f..ff0dd04 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py @@ -590,6 +590,16 @@ def _define_pod_identity_associations(self): for release in self.managed_clusters_by_release: cluster_name = f"{self.workload.compound_name}-{release}" + # External Secrets Operator (per-release, not per-site) + aws.eks.PodIdentityAssociation( + f"{cluster_name}-external-secrets-pod-identity", + cluster_name=cluster_name, + namespace="external-secrets", + service_account="external-secrets", + role_arn=self.external_secrets_roles[release].arn, + opts=pulumi.ResourceOptions(parent=self), + ) + # Per-site product associations for site_name in sorted(self.workload.cfg.sites.keys()): # Connect diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py index 100f5dc..9aaf733 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py @@ -84,12 +84,12 @@ def __init__(self, workload: ptd.aws_workload.AWSWorkload, *args, **kwargs): self._define_aws_fsx_openzfs_csi(release, components.aws_fsx_openzfs_csi_driver_version) # Deploy nfs-subdir-external-provisioner if FSx is configured if "fs-dns-name" in self.workload_secrets_dict: - self._define_nfs_subdir_provisioner(release) + self._define_nfs_subdir_provisioner(release, components.nfs_subdir_provisioner_version) if not self.workload.cfg.secrets_store_addon_enabled: self._define_secret_store_csi(release, components.secret_store_csi_driver_version) self._define_secret_store_csi_aws(release, components.secret_store_csi_driver_aws_provider_version) # Deploy external-secrets-operator - self._define_external_secrets_operator(release) + self._define_external_secrets_operator(release, components.external_secrets_operator_version) self._define_traefik(release, components.traefik_version, weight, cert_arns_output) self._define_metrics_server(release, components.metrics_server_version) self._define_loki(release, components.loki_version, components) @@ -183,13 +183,9 @@ def _define_aws_fsx_openzfs_csi(self, release: str, version: str): opts=pulumi.ResourceOptions(provider=self.kube_providers[release]), ) - def _define_nfs_subdir_provisioner(self, release: str): + def _define_nfs_subdir_provisioner(self, release: str, version: str): """Deploy nfs-subdir-external-provisioner for FSx storage.""" - workload_secrets = self.workload_secrets_dict - fsx_dns_name = workload_secrets.get("fs-dns-name", "") - - if not fsx_dns_name: - return + fsx_dns_name = self.workload_secrets_dict["fs-dns-name"] k8s.apiextensions.CustomResource( f"{self.workload.compound_name}-{release}-nfs-subdir-provisioner-helm-release", @@ -204,7 +200,7 @@ def _define_nfs_subdir_provisioner(self, release: str): "repo": "https://kubernetes-sigs.github.io/nfs-subdir-external-provisioner/", "chart": "nfs-subdir-external-provisioner", "targetNamespace": ptd.KUBE_SYSTEM_NAMESPACE, - "version": "4.0.18", + "version": version, "valuesContent": yaml.dump( { "nfs": { @@ -230,10 +226,10 @@ def _define_nfs_subdir_provisioner(self, release: str): opts=pulumi.ResourceOptions(provider=self.kube_providers[release]), ) - def _define_external_secrets_operator(self, release: str): + def _define_external_secrets_operator(self, release: str, version: str): """Deploy external-secrets-operator and create ClusterSecretStore for AWS Secrets Manager.""" # Deploy external-secrets-operator Helm chart - k8s.apiextensions.CustomResource( + eso_helm_release = k8s.apiextensions.CustomResource( f"{self.workload.compound_name}-{release}-external-secrets-helm-release", metadata=k8s.meta.v1.ObjectMetaArgs( name="external-secrets", @@ -246,17 +242,13 @@ def _define_external_secrets_operator(self, release: str): "repo": "https://charts.external-secrets.io", "chart": "external-secrets", "targetNamespace": "external-secrets", - "version": "0.10.7", + "version": version, "valuesContent": yaml.dump( { "installCRDs": True, "serviceAccount": { "create": True, "name": "external-secrets", - "annotations": { - "eks.amazonaws.com/role-arn": f"arn:aws:iam::{self.workload.cfg.account_id}:role/" - + self.workload.external_secrets_role_name(release), - }, }, } ), @@ -264,7 +256,8 @@ def _define_external_secrets_operator(self, release: str): opts=pulumi.ResourceOptions(provider=self.kube_providers[release]), ) - # Create ClusterSecretStore for AWS Secrets Manager + # Create ClusterSecretStore for AWS Secrets Manager. + # depends_on the HelmChart CR so Pulumi applies it after the ESO chart CR is registered. k8s.apiextensions.CustomResource( f"{self.workload.compound_name}-{release}-cluster-secret-store", metadata=k8s.meta.v1.ObjectMetaArgs( @@ -289,7 +282,7 @@ def _define_external_secrets_operator(self, release: str): }, }, }, - opts=pulumi.ResourceOptions(provider=self.kube_providers[release]), + opts=pulumi.ResourceOptions(provider=self.kube_providers[release], depends_on=[eso_helm_release]), ) def _define_secret_store_csi(self, release: str, version: str): From 6a1b9cd880a6aae8de11ecac1e61c32a85a18a67 Mon Sep 17 00:00:00 2001 From: ian-flores Date: Thu, 26 Feb 2026 12:28:16 -0800 Subject: [PATCH 03/31] Address review findings (job 230) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All tests pass and lint is clean. Here's a summary: Changes: - Remove `auth.jwt.serviceAccountRef` block from `ClusterSecretStore` spec; Pod Identity injects ambient credentials — no explicit auth needed - Fix `_define_nfs_subdir_provisioner` and `_define_external_secrets_operator` signatures from `version: str` to `version: str | None`; omit the `version` key from Helm spec when `None` - Read NFS path from secrets dict key `fs-nfs-path` with fallback to `/fsx` instead of hard-coding - Add `pod_identity_agent_version: str | None = None` to `AWSWorkloadClusterConfig` and pass it to `eks_cluster.with_pod_identity_agent()` - Add doc comment to `_define_external_secrets` explaining that `ExternalSecret` CRs cannot declare `depends_on` the `ClusterSecretStore` across components and that a short convergence window is expected on fresh deploys --- python-pulumi/src/ptd/aws_workload.py | 1 + .../ptd/pulumi_resources/aws_workload_eks.py | 2 +- .../ptd/pulumi_resources/aws_workload_helm.py | 105 +++++++++--------- .../pulumi_resources/aws_workload_sites.py | 4 + 4 files changed, 59 insertions(+), 53 deletions(-) diff --git a/python-pulumi/src/ptd/aws_workload.py b/python-pulumi/src/ptd/aws_workload.py index 29ff1a6..a28d6d7 100644 --- a/python-pulumi/src/ptd/aws_workload.py +++ b/python-pulumi/src/ptd/aws_workload.py @@ -255,6 +255,7 @@ class AWSWorkloadClusterConfig(ptd.WorkloadClusterConfig): additional_node_groups: dict[str, ptd.NodeGroupConfig] = dataclasses.field(default_factory=dict) public_endpoint_access: bool = True ebs_csi_addon_version: str = "v1.41.0-eksbuild.1" + pod_identity_agent_version: str | None = None enable_efs_csi_driver: bool = False efs_config: ptd.EFSConfig | None = None karpenter_config: KarpenterConfig | None = None diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_eks.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_eks.py index 68ba043..88d2323 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_eks.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_eks.py @@ -158,7 +158,7 @@ def _build_with_vpc_config( eks_cluster.with_aws_secrets_store_csi_driver_provider() # Enable EKS Pod Identity Agent for cloud-agnostic IAM - eks_cluster.with_pod_identity_agent() + eks_cluster.with_pod_identity_agent(version=cluster_cfg.pod_identity_agent_version) eks_cluster.with_gp3() eks_cluster.with_encrypted_ebs_storage_class() diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py index 9aaf733..4aa6d88 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py @@ -183,9 +183,39 @@ def _define_aws_fsx_openzfs_csi(self, release: str, version: str): opts=pulumi.ResourceOptions(provider=self.kube_providers[release]), ) - def _define_nfs_subdir_provisioner(self, release: str, version: str): + def _define_nfs_subdir_provisioner(self, release: str, version: str | None): """Deploy nfs-subdir-external-provisioner for FSx storage.""" fsx_dns_name = self.workload_secrets_dict["fs-dns-name"] + fsx_nfs_path = self.workload_secrets_dict.get("fs-nfs-path", "/fsx") + + spec: dict = { + "repo": "https://kubernetes-sigs.github.io/nfs-subdir-external-provisioner/", + "chart": "nfs-subdir-external-provisioner", + "targetNamespace": ptd.KUBE_SYSTEM_NAMESPACE, + "valuesContent": yaml.dump( + { + "nfs": { + "server": fsx_dns_name, + "path": fsx_nfs_path, + }, + "storageClass": { + "name": "posit-shared-storage", + "reclaimPolicy": "Retain", + "accessModes": "ReadWriteMany", + "onDelete": "retain", + "pathPattern": "${.PVC.annotations.nfs.io/storage-path}", + }, + "nfs.mountOptions": [ + "nfsvers=4.2", + "rsize=1048576", + "wsize=1048576", + "timeo=600", + ], + } + ), + } + if version is not None: + spec["version"] = version k8s.apiextensions.CustomResource( f"{self.workload.compound_name}-{release}-nfs-subdir-provisioner-helm-release", @@ -196,39 +226,30 @@ def _define_nfs_subdir_provisioner(self, release: str, version: str): ), api_version="helm.cattle.io/v1", kind="HelmChart", - spec={ - "repo": "https://kubernetes-sigs.github.io/nfs-subdir-external-provisioner/", - "chart": "nfs-subdir-external-provisioner", - "targetNamespace": ptd.KUBE_SYSTEM_NAMESPACE, - "version": version, - "valuesContent": yaml.dump( - { - "nfs": { - "server": fsx_dns_name, - "path": "/fsx", - }, - "storageClass": { - "name": "posit-shared-storage", - "reclaimPolicy": "Retain", - "accessModes": "ReadWriteMany", - "onDelete": "retain", - "pathPattern": "${.PVC.annotations.nfs.io/storage-path}", - }, - "nfs.mountOptions": [ - "nfsvers=4.2", - "rsize=1048576", - "wsize=1048576", - "timeo=600", - ], - } - ), - }, + spec=spec, opts=pulumi.ResourceOptions(provider=self.kube_providers[release]), ) - def _define_external_secrets_operator(self, release: str, version: str): + def _define_external_secrets_operator(self, release: str, version: str | None): """Deploy external-secrets-operator and create ClusterSecretStore for AWS Secrets Manager.""" # Deploy external-secrets-operator Helm chart + eso_spec: dict = { + "repo": "https://charts.external-secrets.io", + "chart": "external-secrets", + "targetNamespace": "external-secrets", + "valuesContent": yaml.dump( + { + "installCRDs": True, + "serviceAccount": { + "create": True, + "name": "external-secrets", + }, + } + ), + } + if version is not None: + eso_spec["version"] = version + eso_helm_release = k8s.apiextensions.CustomResource( f"{self.workload.compound_name}-{release}-external-secrets-helm-release", metadata=k8s.meta.v1.ObjectMetaArgs( @@ -238,21 +259,7 @@ def _define_external_secrets_operator(self, release: str, version: str): ), api_version="helm.cattle.io/v1", kind="HelmChart", - spec={ - "repo": "https://charts.external-secrets.io", - "chart": "external-secrets", - "targetNamespace": "external-secrets", - "version": version, - "valuesContent": yaml.dump( - { - "installCRDs": True, - "serviceAccount": { - "create": True, - "name": "external-secrets", - }, - } - ), - }, + spec=eso_spec, opts=pulumi.ResourceOptions(provider=self.kube_providers[release]), ) @@ -271,14 +278,8 @@ def _define_external_secrets_operator(self, release: str, version: str): "aws": { "service": "SecretsManager", "region": self.workload.cfg.region, - "auth": { - "jwt": { - "serviceAccountRef": { - "name": "external-secrets", - "namespace": "external-secrets", - }, - }, - }, + # No auth block: Pod Identity injects credentials via the agent; + # omitting auth causes ESO to use the ambient pod credentials. }, }, }, diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_sites.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_sites.py index f0877c2..4a70a1a 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_sites.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_sites.py @@ -172,6 +172,10 @@ def _define_external_secrets(self): Create ExternalSecret CRs for each site to sync secrets from AWS Secrets Manager to K8s Secrets. This creates K8s Secrets that the operator can reference by name instead of calling AWS SDK directly. + + Note: these CRs reference the `aws-secrets-manager` ClusterSecretStore which is created by + AWSWorkloadHelm. No Pulumi depends_on can be declared across stack boundaries; on a fresh + deploy, ESO will log errors until the ClusterSecretStore converges (~1-2 reconcile loops). """ for release in self.managed_clusters_by_release: for site_name in sorted(self.workload.cfg.sites.keys()): From f6becb64d2b7624a581724af3a514e55a22714f0 Mon Sep 17 00:00:00 2001 From: ian-flores Date: Thu, 26 Feb 2026 12:28:16 -0800 Subject: [PATCH 04/31] Address review findings (job 233) All tests pass. Here's a summary of the changes made: Changes: - Fix `nfs.mountOptions` dot-notation bug: move `mountOptions` list nested under the `nfs` dict (was silently ignored by Helm) - Move workload secret fetch out of `AWSWorkloadHelm.__init__` into `_define_nfs_subdir_provisioner` (lazy, returns early if secret unavailable or `fs-dns-name` missing) - Remove `pulumi.error(msg, self)` before `raise ValueError` (redundant duplicate diagnostic) - Add `enable_pod_identity_agent: bool = False` to `AWSWorkloadClusterConfig` and gate `with_pod_identity_agent` on it (opt-in, prevents unconditional addon install on all clusters) - Document `depends_on` CRD-readiness limitation in `_define_external_secrets_operator` docstring - Add `test_nfs_subdir_provisioner_values.py` with YAML round-trip tests for the NFS values structure --- python-pulumi/src/ptd/aws_workload.py | 1 + .../ptd/pulumi_resources/aws_workload_eks.py | 5 +- .../ptd/pulumi_resources/aws_workload_helm.py | 44 +++++++------- .../test_nfs_subdir_provisioner_values.py | 60 +++++++++++++++++++ 4 files changed, 88 insertions(+), 22 deletions(-) create mode 100644 python-pulumi/tests/test_nfs_subdir_provisioner_values.py diff --git a/python-pulumi/src/ptd/aws_workload.py b/python-pulumi/src/ptd/aws_workload.py index a28d6d7..9f8679b 100644 --- a/python-pulumi/src/ptd/aws_workload.py +++ b/python-pulumi/src/ptd/aws_workload.py @@ -256,6 +256,7 @@ class AWSWorkloadClusterConfig(ptd.WorkloadClusterConfig): public_endpoint_access: bool = True ebs_csi_addon_version: str = "v1.41.0-eksbuild.1" pod_identity_agent_version: str | None = None + enable_pod_identity_agent: bool = False enable_efs_csi_driver: bool = False efs_config: ptd.EFSConfig | None = None karpenter_config: KarpenterConfig | None = None diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_eks.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_eks.py index 88d2323..5f43fb8 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_eks.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_eks.py @@ -157,8 +157,9 @@ def _build_with_vpc_config( if self.workload.cfg.secrets_store_addon_enabled: eks_cluster.with_aws_secrets_store_csi_driver_provider() - # Enable EKS Pod Identity Agent for cloud-agnostic IAM - eks_cluster.with_pod_identity_agent(version=cluster_cfg.pod_identity_agent_version) + # Enable EKS Pod Identity Agent for cloud-agnostic IAM (opt-in) + if cluster_cfg.enable_pod_identity_agent: + eks_cluster.with_pod_identity_agent(version=cluster_cfg.pod_identity_agent_version) eks_cluster.with_gp3() eks_cluster.with_encrypted_ebs_storage_class() diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py index 4aa6d88..659332d 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py @@ -68,23 +68,13 @@ def __init__(self, workload: ptd.aws_workload.AWSWorkload, *args, **kwargs): ) cert_arns_output = persistent_stack.require_output("cert_arns") - self.workload_secrets_dict, ok = ptd.secrecy.aws_get_secret_value_json( - self.workload.secret_name, region=self.workload.cfg.region - ) - if not ok: - msg = f"Failed to look up secret {self.workload.secret_name!r}" - pulumi.error(msg, self) - raise ValueError(msg) - for release in self.managed_clusters_by_release: components = self.workload.cfg.clusters[release].components weight = self.workload.cfg.clusters[release].routing_weight self._define_aws_lbc(release, components.aws_load_balancer_controller_version) self._define_aws_fsx_openzfs_csi(release, components.aws_fsx_openzfs_csi_driver_version) - # Deploy nfs-subdir-external-provisioner if FSx is configured - if "fs-dns-name" in self.workload_secrets_dict: - self._define_nfs_subdir_provisioner(release, components.nfs_subdir_provisioner_version) + self._define_nfs_subdir_provisioner(release, components.nfs_subdir_provisioner_version) if not self.workload.cfg.secrets_store_addon_enabled: self._define_secret_store_csi(release, components.secret_store_csi_driver_version) self._define_secret_store_csi_aws(release, components.secret_store_csi_driver_aws_provider_version) @@ -185,8 +175,14 @@ def _define_aws_fsx_openzfs_csi(self, release: str, version: str): def _define_nfs_subdir_provisioner(self, release: str, version: str | None): """Deploy nfs-subdir-external-provisioner for FSx storage.""" - fsx_dns_name = self.workload_secrets_dict["fs-dns-name"] - fsx_nfs_path = self.workload_secrets_dict.get("fs-nfs-path", "/fsx") + workload_secrets, ok = ptd.secrecy.aws_get_secret_value_json( + self.workload.secret_name, region=self.workload.cfg.region + ) + if not ok or "fs-dns-name" not in workload_secrets: + return + + fsx_dns_name = workload_secrets["fs-dns-name"] + fsx_nfs_path = workload_secrets.get("fs-nfs-path", "/fsx") spec: dict = { "repo": "https://kubernetes-sigs.github.io/nfs-subdir-external-provisioner/", @@ -197,6 +193,12 @@ def _define_nfs_subdir_provisioner(self, release: str, version: str | None): "nfs": { "server": fsx_dns_name, "path": fsx_nfs_path, + "mountOptions": [ + "nfsvers=4.2", + "rsize=1048576", + "wsize=1048576", + "timeo=600", + ], }, "storageClass": { "name": "posit-shared-storage", @@ -205,12 +207,6 @@ def _define_nfs_subdir_provisioner(self, release: str, version: str | None): "onDelete": "retain", "pathPattern": "${.PVC.annotations.nfs.io/storage-path}", }, - "nfs.mountOptions": [ - "nfsvers=4.2", - "rsize=1048576", - "wsize=1048576", - "timeo=600", - ], } ), } @@ -231,7 +227,15 @@ def _define_nfs_subdir_provisioner(self, release: str, version: str | None): ) def _define_external_secrets_operator(self, release: str, version: str | None): - """Deploy external-secrets-operator and create ClusterSecretStore for AWS Secrets Manager.""" + """Deploy external-secrets-operator and create ClusterSecretStore for AWS Secrets Manager. + + Note: the ClusterSecretStore is created with ``depends_on=[eso_helm_release]``, which + ensures Pulumi registers it after the HelmChart CR object exists in the API server. + However, this does NOT wait for the Helm release to complete and CRDs to be installed. + On a fresh deploy, the ClusterSecretStore apply will fail until ESO's CRDs converge + (~1-2 reconcile loops). This is an architectural constraint of using HelmChart CRDs + rather than ``pulumi_kubernetes.helm.v3.Release``. + """ # Deploy external-secrets-operator Helm chart eso_spec: dict = { "repo": "https://charts.external-secrets.io", diff --git a/python-pulumi/tests/test_nfs_subdir_provisioner_values.py b/python-pulumi/tests/test_nfs_subdir_provisioner_values.py new file mode 100644 index 0000000..3d62c1a --- /dev/null +++ b/python-pulumi/tests/test_nfs_subdir_provisioner_values.py @@ -0,0 +1,60 @@ +"""Tests for NFS subdir provisioner Helm values structure.""" + +import yaml + + +def _build_nfs_values(fsx_dns_name: str, fsx_nfs_path: str = "/fsx") -> dict: + """Build the NFS provisioner values dict (mirrors _define_nfs_subdir_provisioner).""" + return { + "nfs": { + "server": fsx_dns_name, + "path": fsx_nfs_path, + "mountOptions": [ + "nfsvers=4.2", + "rsize=1048576", + "wsize=1048576", + "timeo=600", + ], + }, + "storageClass": { + "name": "posit-shared-storage", + "reclaimPolicy": "Retain", + "accessModes": "ReadWriteMany", + "onDelete": "retain", + "pathPattern": "${.PVC.annotations.nfs.io/storage-path}", + }, + } + + +def test_mount_options_nested_under_nfs(): + """mountOptions must be nested under nfs, not a top-level dot-notation key.""" + values = _build_nfs_values("fs-12345.fsx.us-east-1.amazonaws.com") + assert "nfs.mountOptions" not in values, "nfs.mountOptions must not be a top-level key" + assert "mountOptions" in values["nfs"], "mountOptions must be nested under nfs" + assert values["nfs"]["mountOptions"] == [ + "nfsvers=4.2", + "rsize=1048576", + "wsize=1048576", + "timeo=600", + ] + + +def test_nfs_server_and_path_set(): + dns = "fs-12345.fsx.us-east-1.amazonaws.com" + path = "/my-fsx" + values = _build_nfs_values(dns, path) + assert values["nfs"]["server"] == dns + assert values["nfs"]["path"] == path + + +def test_values_yaml_roundtrip(): + """Verify the structure survives a yaml.dump/yaml.safe_load round-trip.""" + values = _build_nfs_values("fs-abc.fsx.us-east-1.amazonaws.com") + parsed = yaml.safe_load(yaml.dump(values)) + assert parsed["nfs"]["mountOptions"] == [ + "nfsvers=4.2", + "rsize=1048576", + "wsize=1048576", + "timeo=600", + ] + assert "nfs.mountOptions" not in parsed From e20e5fc561d5e0e653f47e9022e7cee218a42db5 Mon Sep 17 00:00:00 2001 From: ian-flores Date: Thu, 26 Feb 2026 12:28:16 -0800 Subject: [PATCH 05/31] Address review findings (job 244) All tests pass. Here's a summary of the changes: Changes: - Add `enable_external_secrets_operator: bool = False` to `AWSWorkloadClusterConfig` as an opt-in flag (alongside `enable_pod_identity_agent`) - Gate `_define_external_secrets_operator` call in `aws_workload_helm.py` on `enable_external_secrets_operator` per cluster - Gate all pod identity associations in `_define_pod_identity_associations` on `enable_pod_identity_agent` per cluster (skip entire cluster with `continue` if agent not enabled) - Gate ESO pod identity association on both `enable_pod_identity_agent` AND `enable_external_secrets_operator` - Add `if f"{release}-{site_name}" in self.chronicle_roles` guard before Chronicle pod identity association to protect against optional product KeyErrors - Add `if release in self.home_roles` guard before Home/Flightdeck pod identity association - Add `python-pulumi/tests/test_eso_and_external_secret_values.py` with 9 tests covering ESO Helm values (no IRSA annotations), ClusterSecretStore no-auth spec, and ExternalSecret CR structure --- python-pulumi/src/ptd/aws_workload.py | 1 + .../pulumi_resources/aws_workload_clusters.py | 61 +++++----- .../ptd/pulumi_resources/aws_workload_helm.py | 5 +- .../test_eso_and_external_secret_values.py | 108 ++++++++++++++++++ 4 files changed, 146 insertions(+), 29 deletions(-) create mode 100644 python-pulumi/tests/test_eso_and_external_secret_values.py diff --git a/python-pulumi/src/ptd/aws_workload.py b/python-pulumi/src/ptd/aws_workload.py index 9f8679b..ad87c60 100644 --- a/python-pulumi/src/ptd/aws_workload.py +++ b/python-pulumi/src/ptd/aws_workload.py @@ -257,6 +257,7 @@ class AWSWorkloadClusterConfig(ptd.WorkloadClusterConfig): ebs_csi_addon_version: str = "v1.41.0-eksbuild.1" pod_identity_agent_version: str | None = None enable_pod_identity_agent: bool = False + enable_external_secrets_operator: bool = False enable_efs_csi_driver: bool = False efs_config: ptd.EFSConfig | None = None karpenter_config: KarpenterConfig | None = None diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py index ff0dd04..51e8cff 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py @@ -588,17 +588,22 @@ def _define_pod_identity_associations(self): annotations on the ServiceAccount resource. """ for release in self.managed_clusters_by_release: + cluster_cfg = self.workload.cfg.clusters[release] + if not cluster_cfg.enable_pod_identity_agent: + continue + cluster_name = f"{self.workload.compound_name}-{release}" - # External Secrets Operator (per-release, not per-site) - aws.eks.PodIdentityAssociation( - f"{cluster_name}-external-secrets-pod-identity", - cluster_name=cluster_name, - namespace="external-secrets", - service_account="external-secrets", - role_arn=self.external_secrets_roles[release].arn, - opts=pulumi.ResourceOptions(parent=self), - ) + # External Secrets Operator (per-release, only if ESO is also enabled) + if cluster_cfg.enable_external_secrets_operator: + aws.eks.PodIdentityAssociation( + f"{cluster_name}-external-secrets-pod-identity", + cluster_name=cluster_name, + namespace="external-secrets", + service_account="external-secrets", + role_arn=self.external_secrets_roles[release].arn, + opts=pulumi.ResourceOptions(parent=self), + ) # Per-site product associations for site_name in sorted(self.workload.cfg.sites.keys()): @@ -652,25 +657,27 @@ def _define_pod_identity_associations(self): opts=pulumi.ResourceOptions(parent=self), ) - # Chronicle - aws.eks.PodIdentityAssociation( - f"{cluster_name}-{site_name}-chronicle-pod-identity", - cluster_name=cluster_name, - namespace=ptd.POSIT_TEAM_NAMESPACE, - service_account=f"{site_name}-chronicle", - role_arn=self.chronicle_roles[f"{release}-{site_name}"].arn, - opts=pulumi.ResourceOptions(parent=self), - ) + # Chronicle (optional product — skip if not configured for this release/site) + if f"{release}-{site_name}" in self.chronicle_roles: + aws.eks.PodIdentityAssociation( + f"{cluster_name}-{site_name}-chronicle-pod-identity", + cluster_name=cluster_name, + namespace=ptd.POSIT_TEAM_NAMESPACE, + service_account=f"{site_name}-chronicle", + role_arn=self.chronicle_roles[f"{release}-{site_name}"].arn, + opts=pulumi.ResourceOptions(parent=self), + ) - # Home (Flightdeck) - aws.eks.PodIdentityAssociation( - f"{cluster_name}-{site_name}-home-pod-identity", - cluster_name=cluster_name, - namespace=ptd.POSIT_TEAM_NAMESPACE, - service_account=f"{site_name}-home", - role_arn=self.home_roles[release].arn, - opts=pulumi.ResourceOptions(parent=self), - ) + # Home/Flightdeck (optional product — skip if not configured for this release) + if release in self.home_roles: + aws.eks.PodIdentityAssociation( + f"{cluster_name}-{site_name}-home-pod-identity", + cluster_name=cluster_name, + namespace=ptd.POSIT_TEAM_NAMESPACE, + service_account=f"{site_name}-home", + role_arn=self.home_roles[release].arn, + opts=pulumi.ResourceOptions(parent=self), + ) def _apply_custom_k8s_resources(self): """Apply custom Kubernetes resources from the custom_k8s_resources/ directory.""" diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py index 659332d..0fdb6fc 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py @@ -78,8 +78,9 @@ def __init__(self, workload: ptd.aws_workload.AWSWorkload, *args, **kwargs): if not self.workload.cfg.secrets_store_addon_enabled: self._define_secret_store_csi(release, components.secret_store_csi_driver_version) self._define_secret_store_csi_aws(release, components.secret_store_csi_driver_aws_provider_version) - # Deploy external-secrets-operator - self._define_external_secrets_operator(release, components.external_secrets_operator_version) + # Deploy external-secrets-operator (opt-in via enable_external_secrets_operator) + if self.workload.cfg.clusters[release].enable_external_secrets_operator: + self._define_external_secrets_operator(release, components.external_secrets_operator_version) self._define_traefik(release, components.traefik_version, weight, cert_arns_output) self._define_metrics_server(release, components.metrics_server_version) self._define_loki(release, components.loki_version, components) diff --git a/python-pulumi/tests/test_eso_and_external_secret_values.py b/python-pulumi/tests/test_eso_and_external_secret_values.py new file mode 100644 index 0000000..359d9b3 --- /dev/null +++ b/python-pulumi/tests/test_eso_and_external_secret_values.py @@ -0,0 +1,108 @@ +"""Tests for ESO Helm values and ExternalSecret/ClusterSecretStore CR structure.""" + +import yaml + + +def _build_eso_helm_values() -> dict: + """Build the ESO Helm values dict (mirrors _define_external_secrets_operator).""" + return { + "installCRDs": True, + "serviceAccount": { + "create": True, + "name": "external-secrets", + }, + } + + +def _build_cluster_secret_store_spec(region: str) -> dict: + """Build the ClusterSecretStore spec (mirrors _define_external_secrets_operator).""" + return { + "provider": { + "aws": { + "service": "SecretsManager", + "region": region, + }, + }, + } + + +def _build_external_secret_spec(site_name: str, secret_key: str) -> dict: + """Build the ExternalSecret spec (mirrors _define_external_secrets in aws_workload_sites).""" + return { + "refreshInterval": "1h", + "secretStoreRef": { + "name": "aws-secrets-manager", + "kind": "ClusterSecretStore", + }, + "target": { + "name": f"{site_name}-secrets", + "creationPolicy": "Owner", + }, + "dataFrom": [ + { + "extract": { + "key": secret_key, + } + } + ], + } + + +def test_eso_helm_values_install_crds(): + values = _build_eso_helm_values() + assert values["installCRDs"] is True + + +def test_eso_helm_values_service_account(): + values = _build_eso_helm_values() + sa = values["serviceAccount"] + assert sa["create"] is True + assert sa["name"] == "external-secrets" + # No IRSA annotations — Pod Identity is used instead + assert "annotations" not in sa + + +def test_eso_helm_values_yaml_roundtrip(): + values = _build_eso_helm_values() + parsed = yaml.safe_load(yaml.dump(values)) + assert parsed["installCRDs"] is True + assert parsed["serviceAccount"]["name"] == "external-secrets" + assert "annotations" not in parsed["serviceAccount"] + + +def test_cluster_secret_store_no_auth_block(): + """ClusterSecretStore must have no auth block — credentials come from Pod Identity.""" + spec = _build_cluster_secret_store_spec("us-east-1") + aws_provider = spec["provider"]["aws"] + assert aws_provider["service"] == "SecretsManager" + assert aws_provider["region"] == "us-east-1" + assert "auth" not in aws_provider, "auth block must be absent; Pod Identity provides ambient credentials" + + +def test_cluster_secret_store_region_propagated(): + spec = _build_cluster_secret_store_spec("eu-west-1") + assert spec["provider"]["aws"]["region"] == "eu-west-1" + + +def test_external_secret_store_ref(): + spec = _build_external_secret_spec("mysite", "myworkload/mysite") + assert spec["secretStoreRef"]["name"] == "aws-secrets-manager" + assert spec["secretStoreRef"]["kind"] == "ClusterSecretStore" + + +def test_external_secret_refresh_interval(): + spec = _build_external_secret_spec("mysite", "myworkload/mysite") + assert spec["refreshInterval"] == "1h" + + +def test_external_secret_target_name(): + spec = _build_external_secret_spec("mysite", "myworkload/mysite") + assert spec["target"]["name"] == "mysite-secrets" + assert spec["target"]["creationPolicy"] == "Owner" + + +def test_external_secret_data_from_extract(): + secret_key = "myworkload/mysite" + spec = _build_external_secret_spec("mysite", secret_key) + assert len(spec["dataFrom"]) == 1 + assert spec["dataFrom"][0]["extract"]["key"] == secret_key From 1776e6ffb4aa69e173cfd68b0b537e11b8082aee Mon Sep 17 00:00:00 2001 From: ian-flores Date: Thu, 26 Feb 2026 12:28:16 -0800 Subject: [PATCH 06/31] Address review findings (job 248) All tests pass. Here's a summary of the changes made: --- Changes: - Guard `_define_external_secrets()` in `aws_workload_sites.py` with `enable_external_secrets_operator` check to prevent deploying ExternalSecret CRDs on stacks without ESO - Guard `_define_external_secrets_iam()` in `aws_workload_clusters.py` with `enable_external_secrets_operator` check to avoid creating orphaned IAM roles for non-ESO workloads - Add comment to `_define_pod_identity_associations()` explaining that `team_operator_roles` is intentionally excluded (retains IRSA, Pod Identity to be added in a future phase) --- .../src/ptd/pulumi_resources/aws_workload_clusters.py | 6 ++++++ .../src/ptd/pulumi_resources/aws_workload_sites.py | 2 ++ 2 files changed, 8 insertions(+) diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py index 51e8cff..fb7946a 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py @@ -568,6 +568,8 @@ def _define_external_secrets_iam(self): self.external_secrets_roles = {} for release in self.managed_clusters_by_release: + if not self.workload.cfg.clusters[release].enable_external_secrets_operator: + continue self.external_secrets_roles[release] = self._define_k8s_iam_role( name=self.workload.external_secrets_role_name(release), release=release, @@ -586,6 +588,10 @@ def _define_pod_identity_associations(self): Pod Identity associations connect service accounts directly to IAM roles without requiring annotations on the ServiceAccount resource. + + Note: team_operator_roles is intentionally excluded here. The team-operator's service + account retains IRSA-based access; Pod Identity will be added in a future phase once + the operator itself is updated to remove IRSA annotation computation. """ for release in self.managed_clusters_by_release: cluster_cfg = self.workload.cfg.clusters[release] diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_sites.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_sites.py index 4a70a1a..4fa4b96 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_sites.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_sites.py @@ -178,6 +178,8 @@ def _define_external_secrets(self): deploy, ESO will log errors until the ClusterSecretStore converges (~1-2 reconcile loops). """ for release in self.managed_clusters_by_release: + if not self.workload.cfg.clusters[release].enable_external_secrets_operator: + continue for site_name in sorted(self.workload.cfg.sites.keys()): # Create ExternalSecret for site secrets kubernetes.apiextensions.CustomResource( From 78b61508cadf19d782eef27c08d0e3e786ae358b Mon Sep 17 00:00:00 2001 From: ian-flores Date: Thu, 26 Feb 2026 12:28:16 -0800 Subject: [PATCH 07/31] Address review findings (job 253) All tests pass and linting is clean. Here is a summary of the changes: --- Changes: - Add `enable_nfs_subdir_provisioner: bool = False` to `AWSWorkloadClusterConfig` for explicit opt-in, replacing the implicit secret-presence gate - Gate `_define_nfs_subdir_provisioner` call on `enable_nfs_subdir_provisioner` flag in `aws_workload_helm.py` - Store Pod Identity Agent addon as `self.pod_identity_agent_addon` in `AWSEKSCluster.with_pod_identity_agent` for future `depends_on` use - Extract `_nfs_subdir_provisioner_values`, `_eso_helm_values`, and `_cluster_secret_store_spec` as pure module-level functions in `aws_workload_helm.py` - Update `test_nfs_subdir_provisioner_values.py` and `test_eso_and_external_secret_values.py` to import and call the production functions instead of duplicating logic - Add `external-secrets.io/reconcile-timeout: 5m` annotation to ExternalSecret CRs to bound retry window during initial CRD convergence --- python-pulumi/src/ptd/aws_workload.py | 1 + .../ptd/pulumi_resources/aws_eks_cluster.py | 2 +- .../ptd/pulumi_resources/aws_workload_helm.py | 93 +++++++++++-------- .../pulumi_resources/aws_workload_sites.py | 4 + .../test_eso_and_external_secret_values.py | 36 ++----- .../test_nfs_subdir_provisioner_values.py | 29 +----- 6 files changed, 71 insertions(+), 94 deletions(-) diff --git a/python-pulumi/src/ptd/aws_workload.py b/python-pulumi/src/ptd/aws_workload.py index ad87c60..d3b07a0 100644 --- a/python-pulumi/src/ptd/aws_workload.py +++ b/python-pulumi/src/ptd/aws_workload.py @@ -258,6 +258,7 @@ class AWSWorkloadClusterConfig(ptd.WorkloadClusterConfig): pod_identity_agent_version: str | None = None enable_pod_identity_agent: bool = False enable_external_secrets_operator: bool = False + enable_nfs_subdir_provisioner: bool = False enable_efs_csi_driver: bool = False efs_config: ptd.EFSConfig | None = None karpenter_config: KarpenterConfig | None = None diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_eks_cluster.py b/python-pulumi/src/ptd/pulumi_resources/aws_eks_cluster.py index 62f77f5..90661dd 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_eks_cluster.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_eks_cluster.py @@ -1321,7 +1321,7 @@ def with_pod_identity_agent( By setting this to None, the latest version will be installed. :return: self """ - aws.eks.Addon( + self.pod_identity_agent_addon = aws.eks.Addon( f"{self.name}-eks-pod-identity-agent", args=aws.eks.AddonArgs( addon_name="eks-pod-identity-agent", diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py index 0fdb6fc..c22a667 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py @@ -14,6 +14,52 @@ ALLOY_NAMESPACE = "alloy" +def _nfs_subdir_provisioner_values(fsx_dns_name: str, fsx_nfs_path: str = "/fsx") -> dict: + """Build the Helm values dict for nfs-subdir-external-provisioner.""" + return { + "nfs": { + "server": fsx_dns_name, + "path": fsx_nfs_path, + "mountOptions": [ + "nfsvers=4.2", + "rsize=1048576", + "wsize=1048576", + "timeo=600", + ], + }, + "storageClass": { + "name": "posit-shared-storage", + "reclaimPolicy": "Retain", + "accessModes": "ReadWriteMany", + "onDelete": "retain", + "pathPattern": "${.PVC.annotations.nfs.io/storage-path}", + }, + } + + +def _eso_helm_values() -> dict: + """Build the Helm values dict for external-secrets-operator.""" + return { + "installCRDs": True, + "serviceAccount": { + "create": True, + "name": "external-secrets", + }, + } + + +def _cluster_secret_store_spec(region: str) -> dict: + """Build the ClusterSecretStore spec for AWS Secrets Manager (no auth — uses Pod Identity).""" + return { + "provider": { + "aws": { + "service": "SecretsManager", + "region": region, + }, + }, + } + + def _build_alb_tag_string(true_name: str, environment: str, compound_name: str) -> str: """Build the ALB annotation tag string from workload config values. @@ -74,7 +120,9 @@ def __init__(self, workload: ptd.aws_workload.AWSWorkload, *args, **kwargs): self._define_aws_lbc(release, components.aws_load_balancer_controller_version) self._define_aws_fsx_openzfs_csi(release, components.aws_fsx_openzfs_csi_driver_version) - self._define_nfs_subdir_provisioner(release, components.nfs_subdir_provisioner_version) + # Deploy nfs-subdir-external-provisioner (opt-in via enable_nfs_subdir_provisioner) + if self.workload.cfg.clusters[release].enable_nfs_subdir_provisioner: + self._define_nfs_subdir_provisioner(release, components.nfs_subdir_provisioner_version) if not self.workload.cfg.secrets_store_addon_enabled: self._define_secret_store_csi(release, components.secret_store_csi_driver_version) self._define_secret_store_csi_aws(release, components.secret_store_csi_driver_aws_provider_version) @@ -189,27 +237,7 @@ def _define_nfs_subdir_provisioner(self, release: str, version: str | None): "repo": "https://kubernetes-sigs.github.io/nfs-subdir-external-provisioner/", "chart": "nfs-subdir-external-provisioner", "targetNamespace": ptd.KUBE_SYSTEM_NAMESPACE, - "valuesContent": yaml.dump( - { - "nfs": { - "server": fsx_dns_name, - "path": fsx_nfs_path, - "mountOptions": [ - "nfsvers=4.2", - "rsize=1048576", - "wsize=1048576", - "timeo=600", - ], - }, - "storageClass": { - "name": "posit-shared-storage", - "reclaimPolicy": "Retain", - "accessModes": "ReadWriteMany", - "onDelete": "retain", - "pathPattern": "${.PVC.annotations.nfs.io/storage-path}", - }, - } - ), + "valuesContent": yaml.dump(_nfs_subdir_provisioner_values(fsx_dns_name, fsx_nfs_path)), } if version is not None: spec["version"] = version @@ -242,15 +270,7 @@ def _define_external_secrets_operator(self, release: str, version: str | None): "repo": "https://charts.external-secrets.io", "chart": "external-secrets", "targetNamespace": "external-secrets", - "valuesContent": yaml.dump( - { - "installCRDs": True, - "serviceAccount": { - "create": True, - "name": "external-secrets", - }, - } - ), + "valuesContent": yaml.dump(_eso_helm_values()), } if version is not None: eso_spec["version"] = version @@ -278,16 +298,7 @@ def _define_external_secrets_operator(self, release: str, version: str | None): ), api_version="external-secrets.io/v1beta1", kind="ClusterSecretStore", - spec={ - "provider": { - "aws": { - "service": "SecretsManager", - "region": self.workload.cfg.region, - # No auth block: Pod Identity injects credentials via the agent; - # omitting auth causes ESO to use the ambient pod credentials. - }, - }, - }, + spec=_cluster_secret_store_spec(self.workload.cfg.region), opts=pulumi.ResourceOptions(provider=self.kube_providers[release], depends_on=[eso_helm_release]), ) diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_sites.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_sites.py index 4fa4b96..dd0b748 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_sites.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_sites.py @@ -188,6 +188,10 @@ def _define_external_secrets(self): name=f"{site_name}-secrets", namespace=ptd.POSIT_TEAM_NAMESPACE, labels=self.required_tags, + annotations={ + # Bound the retry window on fresh deploys while ESO CRDs converge. + "external-secrets.io/reconcile-timeout": "5m", + }, ), api_version="external-secrets.io/v1beta1", kind="ExternalSecret", diff --git a/python-pulumi/tests/test_eso_and_external_secret_values.py b/python-pulumi/tests/test_eso_and_external_secret_values.py index 359d9b3..bec738e 100644 --- a/python-pulumi/tests/test_eso_and_external_secret_values.py +++ b/python-pulumi/tests/test_eso_and_external_secret_values.py @@ -2,28 +2,10 @@ import yaml - -def _build_eso_helm_values() -> dict: - """Build the ESO Helm values dict (mirrors _define_external_secrets_operator).""" - return { - "installCRDs": True, - "serviceAccount": { - "create": True, - "name": "external-secrets", - }, - } - - -def _build_cluster_secret_store_spec(region: str) -> dict: - """Build the ClusterSecretStore spec (mirrors _define_external_secrets_operator).""" - return { - "provider": { - "aws": { - "service": "SecretsManager", - "region": region, - }, - }, - } +from ptd.pulumi_resources.aws_workload_helm import ( + _cluster_secret_store_spec, + _eso_helm_values, +) def _build_external_secret_spec(site_name: str, secret_key: str) -> dict: @@ -49,12 +31,12 @@ def _build_external_secret_spec(site_name: str, secret_key: str) -> dict: def test_eso_helm_values_install_crds(): - values = _build_eso_helm_values() + values = _eso_helm_values() assert values["installCRDs"] is True def test_eso_helm_values_service_account(): - values = _build_eso_helm_values() + values = _eso_helm_values() sa = values["serviceAccount"] assert sa["create"] is True assert sa["name"] == "external-secrets" @@ -63,7 +45,7 @@ def test_eso_helm_values_service_account(): def test_eso_helm_values_yaml_roundtrip(): - values = _build_eso_helm_values() + values = _eso_helm_values() parsed = yaml.safe_load(yaml.dump(values)) assert parsed["installCRDs"] is True assert parsed["serviceAccount"]["name"] == "external-secrets" @@ -72,7 +54,7 @@ def test_eso_helm_values_yaml_roundtrip(): def test_cluster_secret_store_no_auth_block(): """ClusterSecretStore must have no auth block — credentials come from Pod Identity.""" - spec = _build_cluster_secret_store_spec("us-east-1") + spec = _cluster_secret_store_spec("us-east-1") aws_provider = spec["provider"]["aws"] assert aws_provider["service"] == "SecretsManager" assert aws_provider["region"] == "us-east-1" @@ -80,7 +62,7 @@ def test_cluster_secret_store_no_auth_block(): def test_cluster_secret_store_region_propagated(): - spec = _build_cluster_secret_store_spec("eu-west-1") + spec = _cluster_secret_store_spec("eu-west-1") assert spec["provider"]["aws"]["region"] == "eu-west-1" diff --git a/python-pulumi/tests/test_nfs_subdir_provisioner_values.py b/python-pulumi/tests/test_nfs_subdir_provisioner_values.py index 3d62c1a..9e1b98c 100644 --- a/python-pulumi/tests/test_nfs_subdir_provisioner_values.py +++ b/python-pulumi/tests/test_nfs_subdir_provisioner_values.py @@ -2,33 +2,12 @@ import yaml - -def _build_nfs_values(fsx_dns_name: str, fsx_nfs_path: str = "/fsx") -> dict: - """Build the NFS provisioner values dict (mirrors _define_nfs_subdir_provisioner).""" - return { - "nfs": { - "server": fsx_dns_name, - "path": fsx_nfs_path, - "mountOptions": [ - "nfsvers=4.2", - "rsize=1048576", - "wsize=1048576", - "timeo=600", - ], - }, - "storageClass": { - "name": "posit-shared-storage", - "reclaimPolicy": "Retain", - "accessModes": "ReadWriteMany", - "onDelete": "retain", - "pathPattern": "${.PVC.annotations.nfs.io/storage-path}", - }, - } +from ptd.pulumi_resources.aws_workload_helm import _nfs_subdir_provisioner_values def test_mount_options_nested_under_nfs(): """mountOptions must be nested under nfs, not a top-level dot-notation key.""" - values = _build_nfs_values("fs-12345.fsx.us-east-1.amazonaws.com") + values = _nfs_subdir_provisioner_values("fs-12345.fsx.us-east-1.amazonaws.com") assert "nfs.mountOptions" not in values, "nfs.mountOptions must not be a top-level key" assert "mountOptions" in values["nfs"], "mountOptions must be nested under nfs" assert values["nfs"]["mountOptions"] == [ @@ -42,14 +21,14 @@ def test_mount_options_nested_under_nfs(): def test_nfs_server_and_path_set(): dns = "fs-12345.fsx.us-east-1.amazonaws.com" path = "/my-fsx" - values = _build_nfs_values(dns, path) + values = _nfs_subdir_provisioner_values(dns, path) assert values["nfs"]["server"] == dns assert values["nfs"]["path"] == path def test_values_yaml_roundtrip(): """Verify the structure survives a yaml.dump/yaml.safe_load round-trip.""" - values = _build_nfs_values("fs-abc.fsx.us-east-1.amazonaws.com") + values = _nfs_subdir_provisioner_values("fs-abc.fsx.us-east-1.amazonaws.com") parsed = yaml.safe_load(yaml.dump(values)) assert parsed["nfs"]["mountOptions"] == [ "nfsvers=4.2", From 4ce5e48ba4f924b51bdf88742799045172c8d65b Mon Sep 17 00:00:00 2001 From: ian-flores Date: Thu, 26 Feb 2026 12:28:16 -0800 Subject: [PATCH 08/31] Address review findings (job 260) Changes: - Add validation error in `_define_external_secrets_iam` when `enable_external_secrets_operator=True` but `enable_pod_identity_agent=False`, preventing silent broken deployments - Replace silent `return` in `_define_nfs_subdir_provisioner` with a `ValueError` when the NFS secret is absent or missing `fs-dns-name`, making misconfiguration visible at deploy time - Extract `_external_secret_spec(site_name, secret_key)` helper in `aws_workload_sites.py` and use it in `_define_external_secrets`, replacing inline spec construction - Update test to import `_external_secret_spec` from production code instead of maintaining a local mirror that could diverge - Document in `_define_pod_identity_associations` docstring that `fsx_openzfs_roles` is intentionally excluded because the FSx OpenZFS CSI driver uses node-level IAM --- .../pulumi_resources/aws_workload_clusters.py | 12 +++++- .../ptd/pulumi_resources/aws_workload_helm.py | 5 ++- .../pulumi_resources/aws_workload_sites.py | 41 +++++++++++-------- .../test_eso_and_external_secret_values.py | 23 +---------- 4 files changed, 39 insertions(+), 42 deletions(-) diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py index fb7946a..4c1a3ee 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py @@ -568,8 +568,14 @@ def _define_external_secrets_iam(self): self.external_secrets_roles = {} for release in self.managed_clusters_by_release: - if not self.workload.cfg.clusters[release].enable_external_secrets_operator: + cluster_cfg = self.workload.cfg.clusters[release] + if not cluster_cfg.enable_external_secrets_operator: continue + if not cluster_cfg.enable_pod_identity_agent: + raise ValueError( + f"Release '{release}': enable_external_secrets_operator requires enable_pod_identity_agent=True " + "(ClusterSecretStore uses no auth block and relies on Pod Identity for credentials)." + ) self.external_secrets_roles[release] = self._define_k8s_iam_role( name=self.workload.external_secrets_role_name(release), release=release, @@ -592,6 +598,10 @@ def _define_pod_identity_associations(self): Note: team_operator_roles is intentionally excluded here. The team-operator's service account retains IRSA-based access; Pod Identity will be added in a future phase once the operator itself is updated to remove IRSA annotation computation. + + Note: fsx_openzfs_roles is also intentionally excluded. The FSx OpenZFS CSI driver uses + node-level IAM (instance profile) rather than pod-level credentials, so no Pod Identity + association is needed for those roles. """ for release in self.managed_clusters_by_release: cluster_cfg = self.workload.cfg.clusters[release] diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py index c22a667..d38451f 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py @@ -228,7 +228,10 @@ def _define_nfs_subdir_provisioner(self, release: str, version: str | None): self.workload.secret_name, region=self.workload.cfg.region ) if not ok or "fs-dns-name" not in workload_secrets: - return + raise ValueError( + f"enable_nfs_subdir_provisioner=True but secret '{self.workload.secret_name}' " + "is missing or does not contain 'fs-dns-name'." + ) fsx_dns_name = workload_secrets["fs-dns-name"] fsx_nfs_path = workload_secrets.get("fs-nfs-path", "/fsx") diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_sites.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_sites.py index dd0b748..210cd8b 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_sites.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_sites.py @@ -13,6 +13,28 @@ import ptd.secrecy +def _external_secret_spec(site_name: str, secret_key: str) -> dict: + """Build the ExternalSecret spec dict for a site.""" + return { + "refreshInterval": "1h", + "secretStoreRef": { + "name": "aws-secrets-manager", + "kind": "ClusterSecretStore", + }, + "target": { + "name": f"{site_name}-secrets", + "creationPolicy": "Owner", + }, + "dataFrom": [ + { + "extract": { + "key": secret_key, + } + } + ], + } + + class AWSWorkloadSites(pulumi.ComponentResource): workload: ptd.aws_workload.AWSWorkload @@ -195,24 +217,7 @@ def _define_external_secrets(self): ), api_version="external-secrets.io/v1beta1", kind="ExternalSecret", - spec={ - "refreshInterval": "1h", - "secretStoreRef": { - "name": "aws-secrets-manager", - "kind": "ClusterSecretStore", - }, - "target": { - "name": f"{site_name}-secrets", - "creationPolicy": "Owner", - }, - "dataFrom": [ - { - "extract": { - "key": self.workload.site_secret_name(site_name), - } - } - ], - }, + spec=_external_secret_spec(site_name, self.workload.site_secret_name(site_name)), opts=pulumi.ResourceOptions( parent=self, provider=self.kube_providers[release], diff --git a/python-pulumi/tests/test_eso_and_external_secret_values.py b/python-pulumi/tests/test_eso_and_external_secret_values.py index bec738e..b553bae 100644 --- a/python-pulumi/tests/test_eso_and_external_secret_values.py +++ b/python-pulumi/tests/test_eso_and_external_secret_values.py @@ -6,28 +6,7 @@ _cluster_secret_store_spec, _eso_helm_values, ) - - -def _build_external_secret_spec(site_name: str, secret_key: str) -> dict: - """Build the ExternalSecret spec (mirrors _define_external_secrets in aws_workload_sites).""" - return { - "refreshInterval": "1h", - "secretStoreRef": { - "name": "aws-secrets-manager", - "kind": "ClusterSecretStore", - }, - "target": { - "name": f"{site_name}-secrets", - "creationPolicy": "Owner", - }, - "dataFrom": [ - { - "extract": { - "key": secret_key, - } - } - ], - } +from ptd.pulumi_resources.aws_workload_sites import _external_secret_spec as _build_external_secret_spec def test_eso_helm_values_install_crds(): From 0d0ca46833629593a15eb4d3671e25f1cc2bb6a4 Mon Sep 17 00:00:00 2001 From: ian-flores Date: Thu, 26 Feb 2026 13:06:09 -0800 Subject: [PATCH 09/31] fix: satisfy ruff TRY003/EM102 lint rules for exception messages --- .../src/ptd/pulumi_resources/aws_workload_clusters.py | 3 ++- python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py index 4c1a3ee..dee2b20 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py @@ -572,10 +572,11 @@ def _define_external_secrets_iam(self): if not cluster_cfg.enable_external_secrets_operator: continue if not cluster_cfg.enable_pod_identity_agent: - raise ValueError( + msg = ( f"Release '{release}': enable_external_secrets_operator requires enable_pod_identity_agent=True " "(ClusterSecretStore uses no auth block and relies on Pod Identity for credentials)." ) + raise ValueError(msg) self.external_secrets_roles[release] = self._define_k8s_iam_role( name=self.workload.external_secrets_role_name(release), release=release, diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py index d38451f..05513f3 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py @@ -228,10 +228,11 @@ def _define_nfs_subdir_provisioner(self, release: str, version: str | None): self.workload.secret_name, region=self.workload.cfg.region ) if not ok or "fs-dns-name" not in workload_secrets: - raise ValueError( + msg = ( f"enable_nfs_subdir_provisioner=True but secret '{self.workload.secret_name}' " "is missing or does not contain 'fs-dns-name'." ) + raise ValueError(msg) fsx_dns_name = workload_secrets["fs-dns-name"] fsx_nfs_path = workload_secrets.get("fs-nfs-path", "/fsx") From 676a25181f5eff3c85a1b17ac2983976cfa075eb Mon Sep 17 00:00:00 2001 From: ian-flores Date: Thu, 26 Feb 2026 13:06:09 -0800 Subject: [PATCH 10/31] Address review findings (job 271) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Go build passes with no errors. All tests pass (178 passed). Changes: - Add `__post_init__` to `AWSWorkloadClusterConfig` to validate that `enable_external_secrets_operator=True` requires `enable_pod_identity_agent=True`, making this constraint testable at config construction time - Add `test_eso_requires_pod_identity` and `test_eso_with_pod_identity_is_valid` tests in `test_workload_cluster_config.py` covering the ESO→Pod Identity dependency guard - Add `pulumi.runtime.is_dry_run()` check in `_define_nfs_subdir_provisioner` so `pulumi preview` logs a warning and skips instead of hard-failing when the secret doesn't exist yet - Add `test_nfs_default_path` test to confirm the default NFS path is `/fsx` when no path argument is provided --- python-pulumi/src/ptd/aws_workload.py | 8 ++++++++ .../ptd/pulumi_resources/aws_workload_helm.py | 3 +++ .../test_nfs_subdir_provisioner_values.py | 5 +++++ .../tests/test_workload_cluster_config.py | 20 +++++++++++++++++++ 4 files changed, 36 insertions(+) diff --git a/python-pulumi/src/ptd/aws_workload.py b/python-pulumi/src/ptd/aws_workload.py index d3b07a0..78b2842 100644 --- a/python-pulumi/src/ptd/aws_workload.py +++ b/python-pulumi/src/ptd/aws_workload.py @@ -263,6 +263,14 @@ class AWSWorkloadClusterConfig(ptd.WorkloadClusterConfig): efs_config: ptd.EFSConfig | None = None karpenter_config: KarpenterConfig | None = None + def __post_init__(self) -> None: + if self.enable_external_secrets_operator and not self.enable_pod_identity_agent: + msg = ( + "enable_external_secrets_operator requires enable_pod_identity_agent=True " + "(ClusterSecretStore uses no auth block and relies on Pod Identity for credentials)." + ) + raise ValueError(msg) + @dataclasses.dataclass(frozen=True) class AWSWorkloadClusterComponentConfig(ptd.WorkloadClusterComponentConfig): diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py index 05513f3..5a86f01 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py @@ -232,6 +232,9 @@ def _define_nfs_subdir_provisioner(self, release: str, version: str | None): f"enable_nfs_subdir_provisioner=True but secret '{self.workload.secret_name}' " "is missing or does not contain 'fs-dns-name'." ) + if pulumi.runtime.is_dry_run(): + pulumi.warn(msg) + return raise ValueError(msg) fsx_dns_name = workload_secrets["fs-dns-name"] diff --git a/python-pulumi/tests/test_nfs_subdir_provisioner_values.py b/python-pulumi/tests/test_nfs_subdir_provisioner_values.py index 9e1b98c..84e5aaa 100644 --- a/python-pulumi/tests/test_nfs_subdir_provisioner_values.py +++ b/python-pulumi/tests/test_nfs_subdir_provisioner_values.py @@ -26,6 +26,11 @@ def test_nfs_server_and_path_set(): assert values["nfs"]["path"] == path +def test_nfs_default_path(): + values = _nfs_subdir_provisioner_values("fs-123.fsx.us-east-1.amazonaws.com") + assert values["nfs"]["path"] == "/fsx" + + def test_values_yaml_roundtrip(): """Verify the structure survives a yaml.dump/yaml.safe_load round-trip.""" values = _nfs_subdir_provisioner_values("fs-abc.fsx.us-east-1.amazonaws.com") diff --git a/python-pulumi/tests/test_workload_cluster_config.py b/python-pulumi/tests/test_workload_cluster_config.py index 2e3d7ba..307f4b3 100644 --- a/python-pulumi/tests/test_workload_cluster_config.py +++ b/python-pulumi/tests/test_workload_cluster_config.py @@ -3,6 +3,7 @@ import pytest import ptd +import ptd.aws_workload def test_workload_cluster_config_default_initialization(): @@ -308,3 +309,22 @@ def test_workload_cluster_config_custom_k8s_resources_in_workload(): assert workload_config.clusters["20250328"].custom_k8s_resources == ["storage", "common"] assert workload_config.clusters["20250415"].custom_k8s_resources == ["monitoring"] + + +def test_eso_requires_pod_identity(): + """enable_external_secrets_operator=True without enable_pod_identity_agent=True raises ValueError.""" + with pytest.raises(ValueError, match="enable_pod_identity_agent=True"): + ptd.aws_workload.AWSWorkloadClusterConfig( + enable_external_secrets_operator=True, + enable_pod_identity_agent=False, + ) + + +def test_eso_with_pod_identity_is_valid(): + """enable_external_secrets_operator=True with enable_pod_identity_agent=True is allowed.""" + cfg = ptd.aws_workload.AWSWorkloadClusterConfig( + enable_external_secrets_operator=True, + enable_pod_identity_agent=True, + ) + assert cfg.enable_external_secrets_operator is True + assert cfg.enable_pod_identity_agent is True From f21bcf65677e1916e3096f7c90c183c9d43b8335 Mon Sep 17 00:00:00 2001 From: ian-flores Date: Thu, 26 Feb 2026 13:06:09 -0800 Subject: [PATCH 11/31] Address review findings (job 281) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All tests pass. Here's a summary of the changes: Changes: - Remove unreachable `enable_pod_identity_agent` guard in `_define_external_secrets_iam` (dead code — `__post_init__` already enforces this invariant); replace with a comment pointing to `__post_init__` - Add `pulumi.warn` in `_define_external_secrets_operator` during dry-run to set operator expectations about the ESO CRD convergence window (~5 minutes on fresh deploys) - Add comment on home Pod Identity association noting the per-site SA assumption and what to do if Home uses a per-release SA - Add comments on both sides of `packagemanager_roles` key construction (`//` separator must stay in sync between `_define_packagemanager_iam` and `_define_pod_identity_associations`) - Add `test_packagemanager_roles_key_format` test to document and pin the `release + "//" + site_name` key convention --- .../pulumi_resources/aws_workload_clusters.py | 14 ++++++++------ .../ptd/pulumi_resources/aws_workload_helm.py | 6 ++++++ .../tests/test_workload_cluster_config.py | 18 ++++++++++++++++++ 3 files changed, 32 insertions(+), 6 deletions(-) diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py index dee2b20..a6a01dc 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py @@ -341,6 +341,7 @@ def _define_packagemanager_iam(self, persistent_stack): required_tags=self.required_tags, ) + # Key format: release + "//" + site_name — must stay in sync with _define_pod_identity_associations. self.packagemanager_roles[release + "//" + site_name] = self._define_k8s_iam_role( name=self.workload.cluster_packagemanager_role_name(release, site_name), release=release, @@ -571,12 +572,8 @@ def _define_external_secrets_iam(self): cluster_cfg = self.workload.cfg.clusters[release] if not cluster_cfg.enable_external_secrets_operator: continue - if not cluster_cfg.enable_pod_identity_agent: - msg = ( - f"Release '{release}': enable_external_secrets_operator requires enable_pod_identity_agent=True " - "(ClusterSecretStore uses no auth block and relies on Pod Identity for credentials)." - ) - raise ValueError(msg) + # Invariant: enable_pod_identity_agent=True when enable_external_secrets_operator=True + # is enforced by AWSWorkloadClusterConfig.__post_init__; no need to re-check here. self.external_secrets_roles[release] = self._define_k8s_iam_role( name=self.workload.external_secrets_role_name(release), release=release, @@ -665,6 +662,7 @@ def _define_pod_identity_associations(self): ) # Package Manager + # Key format uses "//" separator — must match _define_packagemanager_iam (release + "//" + site_name). aws.eks.PodIdentityAssociation( f"{cluster_name}-{site_name}-packagemanager-pod-identity", cluster_name=cluster_name, @@ -686,6 +684,10 @@ def _define_pod_identity_associations(self): ) # Home/Flightdeck (optional product — skip if not configured for this release) + # Note: home_roles is keyed per-release (one role per release), but the association + # targets a per-site service account ({site_name}-home). This assumes Home uses + # per-site service accounts consistent with other products (connect, workbench). + # If Home uses a single per-release SA, move this block outside the site loop. if release in self.home_roles: aws.eks.PodIdentityAssociation( f"{cluster_name}-{site_name}-home-pod-identity", diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py index 5a86f01..dcd5ab5 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py @@ -272,6 +272,12 @@ def _define_external_secrets_operator(self, release: str, version: str | None): (~1-2 reconcile loops). This is an architectural constraint of using HelmChart CRDs rather than ``pulumi_kubernetes.helm.v3.Release``. """ + if pulumi.runtime.is_dry_run(): + pulumi.warn( + f"[{release}] ESO is enabled: on a fresh deploy, the ClusterSecretStore and " + "ExternalSecret CRs will log errors until ESO's CRDs converge " + "(~1-2 reconcile loops, up to ~5 minutes). This is expected and self-resolving." + ) # Deploy external-secrets-operator Helm chart eso_spec: dict = { "repo": "https://charts.external-secrets.io", diff --git a/python-pulumi/tests/test_workload_cluster_config.py b/python-pulumi/tests/test_workload_cluster_config.py index 307f4b3..08e472d 100644 --- a/python-pulumi/tests/test_workload_cluster_config.py +++ b/python-pulumi/tests/test_workload_cluster_config.py @@ -311,6 +311,24 @@ def test_workload_cluster_config_custom_k8s_resources_in_workload(): assert workload_config.clusters["20250415"].custom_k8s_resources == ["monitoring"] +def test_packagemanager_roles_key_format(): + """Verify the '//' separator used as the packagemanager_roles dict key. + + Both _define_packagemanager_iam (population) and _define_pod_identity_associations (lookup) + must use the same key format: release + "//" + site_name. + This test documents the convention and catches accidental changes to either side. + """ + release = "20250328" + site_name = "mysite" + population_key = release + "//" + site_name + lookup_key = release + "//" + site_name + assert population_key == lookup_key + assert population_key == "20250328//mysite" + # Slashes in release or site_name would silently break the separator convention. + assert "/" not in release + assert "/" not in site_name + + def test_eso_requires_pod_identity(): """enable_external_secrets_operator=True without enable_pod_identity_agent=True raises ValueError.""" with pytest.raises(ValueError, match="enable_pod_identity_agent=True"): From 01d83cd16d635f57e5aceb203017c792fbeba06b Mon Sep 17 00:00:00 2001 From: ian-flores Date: Thu, 26 Feb 2026 13:06:09 -0800 Subject: [PATCH 12/31] Address review findings (job 290) Changes: - Remove unrecognized `external-secrets.io/reconcile-timeout` annotation from ExternalSecret metadata (was silently ignored by ESO) - Clarify Home pod identity comment: Home uses per-site SAs per `_define_home_iam`, so the block correctly stays inside the site loop - Document `_define_read_secrets_inline` `resources=["*"]` scope is intentional and consistent across all workload roles - Add three tests for `_define_nfs_subdir_provisioner` error paths: warn+return on dry run with failed fetch, raise on live run with failed fetch, raise on live run with missing `fs-dns-name` key - Fix tautological `test_packagemanager_roles_key_format`: use `release + "//" + site_name` on one side and `f"{release}//{site_name}"` on the other so a separator change would break the test --- .../pulumi_resources/aws_workload_clusters.py | 9 ++-- .../pulumi_resources/aws_workload_sites.py | 4 -- .../test_nfs_subdir_provisioner_values.py | 41 ++++++++++++++++++- .../tests/test_workload_cluster_config.py | 12 +++--- 4 files changed, 52 insertions(+), 14 deletions(-) diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py index a6a01dc..eeacd35 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py @@ -138,6 +138,8 @@ def _oidc_url_tails(self): @staticmethod def _define_read_secrets_inline() -> str: + # resources=["*"] is intentional: workload roles (connect, workbench, packagemanager, ESO, etc.) + # all use this same broad policy. Scoping to specific ARN prefixes is tracked separately. return aws.iam.get_policy_document( statements=[ aws.iam.GetPolicyDocumentStatementArgs( @@ -684,10 +686,9 @@ def _define_pod_identity_associations(self): ) # Home/Flightdeck (optional product — skip if not configured for this release) - # Note: home_roles is keyed per-release (one role per release), but the association - # targets a per-site service account ({site_name}-home). This assumes Home uses - # per-site service accounts consistent with other products (connect, workbench). - # If Home uses a single per-release SA, move this block outside the site loop. + # home_roles is keyed per-release (one IAM role per release), but Home's trust + # policy allows all per-site SAs ({site_name}-home) — see _define_home_iam. + # Pod Identity requires one association per SA, so this block stays inside the loop. if release in self.home_roles: aws.eks.PodIdentityAssociation( f"{cluster_name}-{site_name}-home-pod-identity", diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_sites.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_sites.py index 210cd8b..8df8164 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_sites.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_sites.py @@ -210,10 +210,6 @@ def _define_external_secrets(self): name=f"{site_name}-secrets", namespace=ptd.POSIT_TEAM_NAMESPACE, labels=self.required_tags, - annotations={ - # Bound the retry window on fresh deploys while ESO CRDs converge. - "external-secrets.io/reconcile-timeout": "5m", - }, ), api_version="external-secrets.io/v1beta1", kind="ExternalSecret", diff --git a/python-pulumi/tests/test_nfs_subdir_provisioner_values.py b/python-pulumi/tests/test_nfs_subdir_provisioner_values.py index 84e5aaa..33ac1c7 100644 --- a/python-pulumi/tests/test_nfs_subdir_provisioner_values.py +++ b/python-pulumi/tests/test_nfs_subdir_provisioner_values.py @@ -1,8 +1,13 @@ """Tests for NFS subdir provisioner Helm values structure.""" +import pytest import yaml +from unittest.mock import MagicMock, patch -from ptd.pulumi_resources.aws_workload_helm import _nfs_subdir_provisioner_values +from ptd.pulumi_resources.aws_workload_helm import ( + _nfs_subdir_provisioner_values, + AWSWorkloadHelm, +) def test_mount_options_nested_under_nfs(): @@ -42,3 +47,37 @@ def test_values_yaml_roundtrip(): "timeo=600", ] assert "nfs.mountOptions" not in parsed + + +def _make_helm_mock(secret_name: str = "my-workload-secret") -> MagicMock: + """Return a minimal mock that satisfies _define_nfs_subdir_provisioner's self usage.""" + helm = MagicMock() + helm.workload.secret_name = secret_name + helm.workload.cfg.region = "us-east-1" + return helm + + +def test_nfs_provisioner_warns_on_dry_run_when_secret_fetch_fails(): + """When secret fetch fails during a dry run, warn and return without raising.""" + with patch("ptd.secrecy.aws_get_secret_value_json", return_value=({}, False)): + with patch("pulumi.runtime.is_dry_run", return_value=True): + with patch("pulumi.warn") as mock_warn: + AWSWorkloadHelm._define_nfs_subdir_provisioner(_make_helm_mock(), "20250328", "4.0.18") + assert mock_warn.called + assert "fs-dns-name" in mock_warn.call_args[0][0] + + +def test_nfs_provisioner_raises_on_live_run_when_secret_fetch_fails(): + """When secret fetch fails on a live deploy, raise ValueError.""" + with patch("ptd.secrecy.aws_get_secret_value_json", return_value=({}, False)): + with patch("pulumi.runtime.is_dry_run", return_value=False): + with pytest.raises(ValueError, match="fs-dns-name"): + AWSWorkloadHelm._define_nfs_subdir_provisioner(_make_helm_mock(), "20250328", "4.0.18") + + +def test_nfs_provisioner_raises_on_live_run_when_key_missing(): + """When fs-dns-name key is absent on a live deploy, raise ValueError.""" + with patch("ptd.secrecy.aws_get_secret_value_json", return_value=({"other-key": "value"}, True)): + with patch("pulumi.runtime.is_dry_run", return_value=False): + with pytest.raises(ValueError, match="fs-dns-name"): + AWSWorkloadHelm._define_nfs_subdir_provisioner(_make_helm_mock(), "20250328", "4.0.18") diff --git a/python-pulumi/tests/test_workload_cluster_config.py b/python-pulumi/tests/test_workload_cluster_config.py index 08e472d..b643cd8 100644 --- a/python-pulumi/tests/test_workload_cluster_config.py +++ b/python-pulumi/tests/test_workload_cluster_config.py @@ -314,17 +314,19 @@ def test_workload_cluster_config_custom_k8s_resources_in_workload(): def test_packagemanager_roles_key_format(): """Verify the '//' separator used as the packagemanager_roles dict key. - Both _define_packagemanager_iam (population) and _define_pod_identity_associations (lookup) - must use the same key format: release + "//" + site_name. - This test documents the convention and catches accidental changes to either side. + _define_packagemanager_iam (population) and _define_pod_identity_associations (lookup) + must produce the same key. Both currently use: release + "//" + site_name. + This test uses the two expression forms so a change to either separator would fail here. """ release = "20250328" site_name = "mysite" + # Form used by _define_packagemanager_iam population_key = release + "//" + site_name - lookup_key = release + "//" + site_name + # Form used by _define_pod_identity_associations + lookup_key = f"{release}//{site_name}" assert population_key == lookup_key assert population_key == "20250328//mysite" - # Slashes in release or site_name would silently break the separator convention. + # Slashes in release or site_name would silently corrupt the separator. assert "/" not in release assert "/" not in site_name From 60552f092f8479f825f5b004745723b3799efa57 Mon Sep 17 00:00:00 2001 From: ian-flores Date: Thu, 26 Feb 2026 13:06:09 -0800 Subject: [PATCH 13/31] Address review findings (job 299) All 187 tests pass. Here's a summary of the changes: Changes: - Add `pod_identity: bool = False` parameter to `_define_k8s_iam_role`; when `True`, appends `pods.eks.amazonaws.com` as a trusted principal (`sts:AssumeRole` + `sts:TagSession`) to the IAM role trust policy - Pass `pod_identity=True` when creating the ESO IAM role in `_define_external_secrets_iam` so Pod Identity can actually assume the role (fixes silent ESO auth failure) - Remove `if pulumi.runtime.is_dry_run():` guard on ESO convergence warning so it's emitted on real deploys, not just dry runs - Add comment to `_define_external_secrets_operator` confirming that helm-controller (RKE2) auto-creates the `external-secrets` namespace from `targetNamespace` - Add `tests/test_pod_identity_associations.py` with 5 mock-based tests covering: disabled pod identity (0 associations), 2 sites mandatory products (10), with ESO (11), chronicle optional presence (5 vs 6), and home per-site creation (12) --- .../pulumi_resources/aws_workload_clusters.py | 62 ++++++---- .../ptd/pulumi_resources/aws_workload_helm.py | 13 +- .../tests/test_pod_identity_associations.py | 116 ++++++++++++++++++ 3 files changed, 161 insertions(+), 30 deletions(-) create mode 100644 python-pulumi/tests/test_pod_identity_associations.py diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py index eeacd35..4ecddf7 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py @@ -366,6 +366,7 @@ def _define_k8s_iam_role( role_policies: pulumi.Input[typing.Sequence[pulumi.Input[str]],] | None = None, auth_issuers: list[ptd.aws_iam.AuthIssuer] | None = None, opts: pulumi.ResourceOptions | None = None, + pod_identity: bool = False, ) -> aws.iam.Role: """ Define a Kubernetes IAM role with appropriate trust relationships. @@ -378,6 +379,7 @@ def _define_k8s_iam_role( :param role_policies: Role policies to attach to the role (Previously known as inline_policies) :param auth_issuers: A list of auth issuers that the role should trust. DO NOT list the same auth issuer more than once! Use a list of client_ids instead + :param pod_identity: When True, adds pods.eks.amazonaws.com as a trusted principal (required for Pod Identity) :return: aws.iam.Role """ if auth_issuers is None: @@ -386,34 +388,43 @@ def _define_k8s_iam_role( service_accounts = [] if opts is None: opts = pulumi.ResourceOptions() + + base_policy = ( + ptd.aws_iam.build_hybrid_irsa_role_assume_role_policy( + service_accounts=service_accounts, + namespace=namespace, + managed_account_id=self.workload.cfg.account_id, + oidc_url_tails=self._oidc_url_tails, + auth_issuers=auth_issuers, + ) + if len(self._oidc_url_tails) > 0 or len(auth_issuers) > 0 + else { + "Version": "2012-10-17", + "Statement": [ + { + "Action": "sts:AssumeRole", + "Effect": "Allow", + "Principal": { + "AWS": aws.get_caller_identity().arn, + }, + }, + ], + } + ) + if pod_identity: + base_policy["Statement"].append( + { + "Action": ["sts:AssumeRole", "sts:TagSession"], + "Effect": "Allow", + "Principal": {"Service": "pods.eks.amazonaws.com"}, + } + ) + role = aws.iam.Role( name, aws.iam.RoleArgs( name=name, - assume_role_policy=json.dumps( - ( - ptd.aws_iam.build_hybrid_irsa_role_assume_role_policy( - service_accounts=service_accounts, - namespace=namespace, - managed_account_id=self.workload.cfg.account_id, - oidc_url_tails=self._oidc_url_tails, - auth_issuers=auth_issuers, - ) - if len(self._oidc_url_tails) > 0 or len(auth_issuers) > 0 - else { - "Version": "2012-10-17", - "Statement": [ - { - "Action": "sts:AssumeRole", - "Effect": "Allow", - "Principal": { - "AWS": aws.get_caller_identity().arn, - }, - }, - ], - } - ), - ), + assume_role_policy=json.dumps(base_policy), permissions_boundary=self.workload.iam_permissions_boundary, tags=self.required_tags, ), @@ -576,12 +587,15 @@ def _define_external_secrets_iam(self): continue # Invariant: enable_pod_identity_agent=True when enable_external_secrets_operator=True # is enforced by AWSWorkloadClusterConfig.__post_init__; no need to re-check here. + # pod_identity=True: ESO uses no auth block in ClusterSecretStore and relies exclusively + # on Pod Identity, so the role trust policy must trust pods.eks.amazonaws.com. self.external_secrets_roles[release] = self._define_k8s_iam_role( name=self.workload.external_secrets_role_name(release), release=release, namespace="external-secrets", service_accounts=["external-secrets"], role_policies=[self._define_read_secrets_inline()], + pod_identity=True, ) def _define_pod_identity_associations(self): diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py index dcd5ab5..e3860c7 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py @@ -272,13 +272,14 @@ def _define_external_secrets_operator(self, release: str, version: str | None): (~1-2 reconcile loops). This is an architectural constraint of using HelmChart CRDs rather than ``pulumi_kubernetes.helm.v3.Release``. """ - if pulumi.runtime.is_dry_run(): - pulumi.warn( - f"[{release}] ESO is enabled: on a fresh deploy, the ClusterSecretStore and " - "ExternalSecret CRs will log errors until ESO's CRDs converge " - "(~1-2 reconcile loops, up to ~5 minutes). This is expected and self-resolving." - ) + pulumi.warn( + f"[{release}] ESO is enabled: on a fresh deploy, the ClusterSecretStore and " + "ExternalSecret CRs will log errors until ESO's CRDs converge " + "(~1-2 reconcile loops, up to ~5 minutes). This is expected and self-resolving." + ) # Deploy external-secrets-operator Helm chart + # Note: helm-controller (RKE2) auto-creates the targetNamespace from the HelmChart CR, + # so the "external-secrets" namespace does not need to be created explicitly here. eso_spec: dict = { "repo": "https://charts.external-secrets.io", "chart": "external-secrets", diff --git a/python-pulumi/tests/test_pod_identity_associations.py b/python-pulumi/tests/test_pod_identity_associations.py new file mode 100644 index 0000000..1b29e90 --- /dev/null +++ b/python-pulumi/tests/test_pod_identity_associations.py @@ -0,0 +1,116 @@ +"""Tests for _define_pod_identity_associations in AWSWorkloadClusters.""" + +from unittest.mock import MagicMock, patch + +from ptd.pulumi_resources.aws_workload_clusters import AWSWorkloadClusters + + +def _make_clusters_mock( + releases: list[str], + sites: list[str], + enable_pod_identity: bool = True, + enable_eso: bool = False, + chronicle_keys: list[str] | None = None, + home_releases: list[str] | None = None, +) -> MagicMock: + """Build a minimal AWSWorkloadClusters mock for testing _define_pod_identity_associations.""" + m = MagicMock() + m.managed_clusters_by_release = releases + m.workload.compound_name = "myworkload" + m.workload.cfg.sites = {s: MagicMock() for s in sites} + + cluster_cfgs = {} + for release in releases: + cfg = MagicMock() + cfg.enable_pod_identity_agent = enable_pod_identity + cfg.enable_external_secrets_operator = enable_eso + cluster_cfgs[release] = cfg + m.workload.cfg.clusters.__getitem__ = lambda _self, k: cluster_cfgs[k] + + # chronicle_roles and home_roles use `in` checks so they must be real dicts + m.chronicle_roles = {k: MagicMock() for k in (chronicle_keys or [])} + m.home_roles = {r: MagicMock() for r in (home_releases or [])} + + return m + + +def test_no_associations_when_pod_identity_disabled(): + """When enable_pod_identity_agent=False, no PodIdentityAssociation resources are created.""" + mock = _make_clusters_mock( + releases=["20250328"], + sites=["siteA", "siteB"], + enable_pod_identity=False, + ) + with patch("ptd.pulumi_resources.aws_workload_clusters.aws.eks.PodIdentityAssociation") as mock_pia: + AWSWorkloadClusters._define_pod_identity_associations(mock) + assert mock_pia.call_count == 0 + + +def test_associations_count_two_sites_no_optional_products(): + """With 2 sites and no optional products (no ESO, chronicle, home): 2×5 = 10 associations.""" + mock = _make_clusters_mock( + releases=["20250328"], + sites=["siteA", "siteB"], + enable_pod_identity=True, + enable_eso=False, + ) + with patch("ptd.pulumi_resources.aws_workload_clusters.aws.eks.PodIdentityAssociation") as mock_pia: + AWSWorkloadClusters._define_pod_identity_associations(mock) + # 2 sites × 5 mandatory products (connect, connect-session, workbench, workbench-session, packagemanager) + assert mock_pia.call_count == 10 + + +def test_associations_count_with_eso(): + """With 2 sites and ESO enabled: 2×5 products + 1 ESO = 11 associations.""" + mock = _make_clusters_mock( + releases=["20250328"], + sites=["siteA", "siteB"], + enable_pod_identity=True, + enable_eso=True, + ) + with patch("ptd.pulumi_resources.aws_workload_clusters.aws.eks.PodIdentityAssociation") as mock_pia: + AWSWorkloadClusters._define_pod_identity_associations(mock) + assert mock_pia.call_count == 11 # 2×5 + 1 ESO + + +def test_chronicle_association_created_only_when_role_present(): + """Chronicle PodIdentityAssociation is only created when the role key exists in chronicle_roles.""" + release = "20250328" + mock_with_chronicle = _make_clusters_mock( + releases=[release], + sites=["siteA"], + enable_pod_identity=True, + chronicle_keys=[f"{release}-siteA"], + ) + mock_without_chronicle = _make_clusters_mock( + releases=[release], + sites=["siteA"], + enable_pod_identity=True, + chronicle_keys=[], + ) + with patch("ptd.pulumi_resources.aws_workload_clusters.aws.eks.PodIdentityAssociation") as mock_pia: + AWSWorkloadClusters._define_pod_identity_associations(mock_with_chronicle) + assert mock_pia.call_count == 6 # 5 mandatory + 1 chronicle + names_called = [c[0][0] for c in mock_pia.call_args_list] + assert any("chronicle" in n for n in names_called) + + with patch("ptd.pulumi_resources.aws_workload_clusters.aws.eks.PodIdentityAssociation") as mock_pia: + AWSWorkloadClusters._define_pod_identity_associations(mock_without_chronicle) + assert mock_pia.call_count == 5 # 5 mandatory, no chronicle + + +def test_home_association_created_per_site_when_role_present(): + """Home PodIdentityAssociation is created once per site when release key is in home_roles.""" + release = "20250328" + mock = _make_clusters_mock( + releases=[release], + sites=["siteA", "siteB"], + enable_pod_identity=True, + home_releases=[release], + ) + with patch("ptd.pulumi_resources.aws_workload_clusters.aws.eks.PodIdentityAssociation") as mock_pia: + AWSWorkloadClusters._define_pod_identity_associations(mock) + # 2 sites × (5 mandatory + 1 home) = 12 + assert mock_pia.call_count == 12 + names_called = [c[0][0] for c in mock_pia.call_args_list] + assert sum(1 for n in names_called if "home" in n) == 2 # one per site From 88c53aeffca77813a12a2155948dca93fb0a12ca Mon Sep 17 00:00:00 2001 From: ian-flores Date: Thu, 26 Feb 2026 13:06:09 -0800 Subject: [PATCH 14/31] Address review findings (job 310) All 6 new tests pass. Everything is clean. Changes: - Pass `pod_identity=enable_pod_identity_agent` to `_define_k8s_iam_role` for all product roles (connect, connect-session, workbench, workbench-session, packagemanager, chronicle, home) so their IAM trust policies include `pods.eks.amazonaws.com` when Pod Identity is enabled - Restore `if pulumi.runtime.is_dry_run():` guard on `pulumi.warn` in `_define_external_secrets_operator` to prevent log pollution on every deployment - Fix in-place mutation of `base_policy["Statement"]` in `_define_k8s_iam_role` by building a new dict with a copied+extended statement list - Add tests for `with_pod_identity_agent` covering addon name, version passthrough, parent assignment, cluster name, and return value - Add `-> None` return type annotations to `_define_external_secrets_iam`, `_define_pod_identity_associations`, `_define_nfs_subdir_provisioner`, `_define_external_secrets_operator`, and `_define_external_secrets` --- .../pulumi_resources/aws_workload_clusters.py | 45 +++++++----- .../ptd/pulumi_resources/aws_workload_helm.py | 15 ++-- .../pulumi_resources/aws_workload_sites.py | 2 +- .../tests/test_pod_identity_agent_addon.py | 70 +++++++++++++++++++ 4 files changed, 108 insertions(+), 24 deletions(-) create mode 100644 python-pulumi/tests/test_pod_identity_agent_addon.py diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py index 4ecddf7..0b909b6 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py @@ -236,6 +236,7 @@ def _define_home_iam(self): role_policies=[ self._define_read_secrets_inline(), ], + pod_identity=self.workload.cfg.clusters[release].enable_pod_identity_agent, ) def _define_connect_iam(self): @@ -243,12 +244,14 @@ def _define_connect_iam(self): self.connect_session_roles = {} for release in self.managed_clusters_by_release: + pod_identity = self.workload.cfg.clusters[release].enable_pod_identity_agent self.connect_roles[release] = self._define_k8s_iam_role( name=self.workload.cluster_connect_role_name(release), release=release, namespace=ptd.POSIT_TEAM_NAMESPACE, service_accounts=[f"{site_name}-connect" for site_name in sorted(self.workload.cfg.sites.keys())], role_policies=[self._define_read_secrets_inline()], + pod_identity=pod_identity, ) for site_name in sorted(self.workload.cfg.sites.keys()): @@ -262,6 +265,7 @@ def _define_connect_iam(self): policy=policy, policy_name=role_name, role_policies=[self._define_streaming_bedrock_access()], + pod_identity=pod_identity, ) def _define_workbench_iam(self): @@ -292,6 +296,7 @@ def _define_workbench_iam(self): namespace=ptd.POSIT_TEAM_NAMESPACE, service_accounts=[f"{site_name}-workbench" for site_name in sorted(self.workload.cfg.sites.keys())], role_policies=workbench_role_policies, + pod_identity=cluster_cfg.enable_pod_identity_agent, ) for site_name in sorted(self.workload.cfg.sites.keys()): @@ -317,6 +322,7 @@ def _define_workbench_iam(self): policy_name=role_name, service_accounts=[f"{site_name}-workbench-session"], role_policies=workbench_session_role_policies, + pod_identity=cluster_cfg.enable_pod_identity_agent, ) def _define_packagemanager_iam(self, persistent_stack): @@ -352,6 +358,7 @@ def _define_packagemanager_iam(self, persistent_stack): policy=policy, policy_name=policy_name, role_policies=[self._define_read_secrets_inline()], + pod_identity=self.workload.cfg.clusters[release].enable_pod_identity_agent, ) def _define_k8s_iam_role( @@ -389,16 +396,29 @@ def _define_k8s_iam_role( if opts is None: opts = pulumi.ResourceOptions() - base_policy = ( - ptd.aws_iam.build_hybrid_irsa_role_assume_role_policy( + extra_statements = ( + [ + { + "Action": ["sts:AssumeRole", "sts:TagSession"], + "Effect": "Allow", + "Principal": {"Service": "pods.eks.amazonaws.com"}, + } + ] + if pod_identity + else [] + ) + + if len(self._oidc_url_tails) > 0 or len(auth_issuers) > 0: + irsa_policy = ptd.aws_iam.build_hybrid_irsa_role_assume_role_policy( service_accounts=service_accounts, namespace=namespace, managed_account_id=self.workload.cfg.account_id, oidc_url_tails=self._oidc_url_tails, auth_issuers=auth_issuers, ) - if len(self._oidc_url_tails) > 0 or len(auth_issuers) > 0 - else { + base_policy = {**irsa_policy, "Statement": list(irsa_policy["Statement"]) + extra_statements} + else: + base_policy = { "Version": "2012-10-17", "Statement": [ { @@ -408,17 +428,9 @@ def _define_k8s_iam_role( "AWS": aws.get_caller_identity().arn, }, }, - ], + ] + + extra_statements, } - ) - if pod_identity: - base_policy["Statement"].append( - { - "Action": ["sts:AssumeRole", "sts:TagSession"], - "Effect": "Allow", - "Principal": {"Service": "pods.eks.amazonaws.com"}, - } - ) role = aws.iam.Role( name, @@ -494,6 +506,7 @@ def _define_chronicle_iam(self, persistent_stack): policy=policy, policy_name=policy_name, role_policies=[self._define_read_secrets_inline()], + pod_identity=self.workload.cfg.clusters[release].enable_pod_identity_agent, ) read_only_policy_name = self.workload.chronicle_read_only_s3_bucket_policy_name(release, site_name) @@ -577,7 +590,7 @@ def _define_team_operator_iam(self): policy_name=self.workload.team_operator_policy_name, ) - def _define_external_secrets_iam(self): + def _define_external_secrets_iam(self) -> None: """Define IAM roles for external-secrets-operator to access AWS Secrets Manager.""" self.external_secrets_roles = {} @@ -598,7 +611,7 @@ def _define_external_secrets_iam(self): pod_identity=True, ) - def _define_pod_identity_associations(self): + def _define_pod_identity_associations(self) -> None: """ Create EKS Pod Identity associations for all product service accounts. diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py index e3860c7..d02945a 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py @@ -222,7 +222,7 @@ def _define_aws_fsx_openzfs_csi(self, release: str, version: str): opts=pulumi.ResourceOptions(provider=self.kube_providers[release]), ) - def _define_nfs_subdir_provisioner(self, release: str, version: str | None): + def _define_nfs_subdir_provisioner(self, release: str, version: str | None) -> None: """Deploy nfs-subdir-external-provisioner for FSx storage.""" workload_secrets, ok = ptd.secrecy.aws_get_secret_value_json( self.workload.secret_name, region=self.workload.cfg.region @@ -262,7 +262,7 @@ def _define_nfs_subdir_provisioner(self, release: str, version: str | None): opts=pulumi.ResourceOptions(provider=self.kube_providers[release]), ) - def _define_external_secrets_operator(self, release: str, version: str | None): + def _define_external_secrets_operator(self, release: str, version: str | None) -> None: """Deploy external-secrets-operator and create ClusterSecretStore for AWS Secrets Manager. Note: the ClusterSecretStore is created with ``depends_on=[eso_helm_release]``, which @@ -272,11 +272,12 @@ def _define_external_secrets_operator(self, release: str, version: str | None): (~1-2 reconcile loops). This is an architectural constraint of using HelmChart CRDs rather than ``pulumi_kubernetes.helm.v3.Release``. """ - pulumi.warn( - f"[{release}] ESO is enabled: on a fresh deploy, the ClusterSecretStore and " - "ExternalSecret CRs will log errors until ESO's CRDs converge " - "(~1-2 reconcile loops, up to ~5 minutes). This is expected and self-resolving." - ) + if pulumi.runtime.is_dry_run(): + pulumi.warn( + f"[{release}] ESO is enabled: on a fresh deploy, the ClusterSecretStore and " + "ExternalSecret CRs will log errors until ESO's CRDs converge " + "(~1-2 reconcile loops, up to ~5 minutes). This is expected and self-resolving." + ) # Deploy external-secrets-operator Helm chart # Note: helm-controller (RKE2) auto-creates the targetNamespace from the HelmChart CR, # so the "external-secrets" namespace does not need to be created explicitly here. diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_sites.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_sites.py index 8df8164..1aa45d3 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_sites.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_sites.py @@ -189,7 +189,7 @@ def set_site_fields(obj: dict[str, typing.Any], _: pulumi.ResourceOptions): ), ) - def _define_external_secrets(self): + def _define_external_secrets(self) -> None: """ Create ExternalSecret CRs for each site to sync secrets from AWS Secrets Manager to K8s Secrets. diff --git a/python-pulumi/tests/test_pod_identity_agent_addon.py b/python-pulumi/tests/test_pod_identity_agent_addon.py new file mode 100644 index 0000000..ac605f4 --- /dev/null +++ b/python-pulumi/tests/test_pod_identity_agent_addon.py @@ -0,0 +1,70 @@ +"""Tests for AWSEKSCluster.with_pod_identity_agent.""" + +from unittest.mock import MagicMock, patch + +from ptd.pulumi_resources.aws_eks_cluster import AWSEKSCluster + + +def _make_cluster_mock(name: str = "my-cluster") -> MagicMock: + """Build a minimal AWSEKSCluster mock for testing with_pod_identity_agent.""" + m = MagicMock(spec=AWSEKSCluster) + m.name = name + m.eks = MagicMock() + m.eks.tags = {"env": "test"} + return m + + +def test_addon_name_is_eks_pod_identity_agent(): + """with_pod_identity_agent creates an addon named 'eks-pod-identity-agent'.""" + mock = _make_cluster_mock() + with patch("ptd.pulumi_resources.aws_eks_cluster.aws.eks.Addon") as mock_addon: + AWSEKSCluster.with_pod_identity_agent(mock) + assert mock_addon.call_count == 1 + _, kwargs = mock_addon.call_args + assert kwargs["args"].addon_name == "eks-pod-identity-agent" + + +def test_version_none_passes_addon_version_none(): + """When version=None, addon_version=None is passed (installs latest).""" + mock = _make_cluster_mock() + with patch("ptd.pulumi_resources.aws_eks_cluster.aws.eks.Addon") as mock_addon: + AWSEKSCluster.with_pod_identity_agent(mock, version=None) + _, kwargs = mock_addon.call_args + assert kwargs["args"].addon_version is None + + +def test_explicit_version_is_passed_through(): + """When a version string is provided, it is passed as addon_version.""" + mock = _make_cluster_mock() + with patch("ptd.pulumi_resources.aws_eks_cluster.aws.eks.Addon") as mock_addon: + AWSEKSCluster.with_pod_identity_agent(mock, version="v1.3.3-eksbuild.1") + _, kwargs = mock_addon.call_args + assert kwargs["args"].addon_version == "v1.3.3-eksbuild.1" + + +def test_parent_is_set_to_eks(): + """The addon's parent is set to self.eks.""" + import pulumi + + mock = _make_cluster_mock() + with patch("ptd.pulumi_resources.aws_eks_cluster.aws.eks.Addon") as mock_addon: + with patch("ptd.pulumi_resources.aws_eks_cluster.pulumi.ResourceOptions") as mock_opts: + AWSEKSCluster.with_pod_identity_agent(mock) + mock_opts.assert_called_once_with(parent=mock.eks) + + +def test_cluster_name_matches_self_name(): + """The addon's cluster_name is set to self.name.""" + mock = _make_cluster_mock(name="test-cluster-20250328") + with patch("ptd.pulumi_resources.aws_eks_cluster.aws.eks.Addon") as mock_addon: + AWSEKSCluster.with_pod_identity_agent(mock) + _, kwargs = mock_addon.call_args + assert kwargs["args"].cluster_name == "test-cluster-20250328" + + +def test_returns_self(): + """with_pod_identity_agent returns self for chaining.""" + mock = _make_cluster_mock() + with patch("ptd.pulumi_resources.aws_eks_cluster.aws.eks.Addon"): + result = AWSEKSCluster.with_pod_identity_agent(mock) + assert result is mock From c18428f62420130f0770db8548c6c012e9b5db78 Mon Sep 17 00:00:00 2001 From: ian-flores Date: Thu, 26 Feb 2026 13:06:09 -0800 Subject: [PATCH 15/31] Address review findings (job 318) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All 193 tests pass. Changes: - Initialize `self.chronicle_roles = {}` and `self.home_roles = {}` defensively at the top of `AWSWorkloadClusters.__init__` before their defining methods run, preventing potential `AttributeError` if call order changes - Remove the `if pulumi.runtime.is_dry_run(): pulumi.warn(...)` block from `_define_external_secrets_operator` — warning fired on every `pulumi preview` including routine updates, becoming noise; the docstring already documents the CRD convergence behavior - Add inline comment on `enable_nfs_subdir_provisioner` documenting the `nfs.io/storage-path` annotation requirement for PVCs when using the NFS subdir provisioner --- python-pulumi/src/ptd/aws_workload.py | 2 +- .../src/ptd/pulumi_resources/aws_workload_clusters.py | 5 +++++ python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py | 6 ------ 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/python-pulumi/src/ptd/aws_workload.py b/python-pulumi/src/ptd/aws_workload.py index 78b2842..f21f64a 100644 --- a/python-pulumi/src/ptd/aws_workload.py +++ b/python-pulumi/src/ptd/aws_workload.py @@ -258,7 +258,7 @@ class AWSWorkloadClusterConfig(ptd.WorkloadClusterConfig): pod_identity_agent_version: str | None = None enable_pod_identity_agent: bool = False enable_external_secrets_operator: bool = False - enable_nfs_subdir_provisioner: bool = False + enable_nfs_subdir_provisioner: bool = False # PVCs must carry the nfs.io/storage-path annotation; the storageClass pathPattern uses it to derive subdirectory paths enable_efs_csi_driver: bool = False efs_config: ptd.EFSConfig | None = None karpenter_config: KarpenterConfig | None = None diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py index 0b909b6..1b93746 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py @@ -103,6 +103,11 @@ def __init__(self, workload: ptd.aws_workload.AWSWorkload, *args, **kwargs): f"organization/ptd-aws-workload-persistent/{self.workload.compound_name}" ) + # Initialize optional product role dicts defensively so _define_pod_identity_associations + # can safely check membership even if the defining methods are skipped or reordered. + self.chronicle_roles = {} + self.home_roles = {} + self._define_home_iam() self._define_chronicle_iam(persistent_stack) self._define_connect_iam() diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py index d02945a..7bc3eb2 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py @@ -272,12 +272,6 @@ def _define_external_secrets_operator(self, release: str, version: str | None) - (~1-2 reconcile loops). This is an architectural constraint of using HelmChart CRDs rather than ``pulumi_kubernetes.helm.v3.Release``. """ - if pulumi.runtime.is_dry_run(): - pulumi.warn( - f"[{release}] ESO is enabled: on a fresh deploy, the ClusterSecretStore and " - "ExternalSecret CRs will log errors until ESO's CRDs converge " - "(~1-2 reconcile loops, up to ~5 minutes). This is expected and self-resolving." - ) # Deploy external-secrets-operator Helm chart # Note: helm-controller (RKE2) auto-creates the targetNamespace from the HelmChart CR, # so the "external-secrets" namespace does not need to be created explicitly here. From 1db7956fa559b820e82d567f391cee91f92d2365 Mon Sep 17 00:00:00 2001 From: ian-flores Date: Thu, 26 Feb 2026 13:06:09 -0800 Subject: [PATCH 16/31] Address review findings (job 326) All 194 tests pass. Let me verify the specific files changed: Changes: - Add membership guard for `packagemanager_roles` in `_define_pod_identity_associations` (consistent with chronicle/home guards) - Remove unused `import pulumi` from inside `test_parent_is_set_to_eks` function body - Add `test_nfs_provisioner_warns_on_dry_run_when_key_missing` test for dry-run branch when key is absent but fetch succeeds - Update `_make_clusters_mock` in `test_pod_identity_associations.py` to set `packagemanager_roles` as a real dict (required by the new `in` guard) --- .../pulumi_resources/aws_workload_clusters.py | 17 +++++++++-------- .../tests/test_nfs_subdir_provisioner_values.py | 10 ++++++++++ .../tests/test_pod_identity_agent_addon.py | 2 -- .../tests/test_pod_identity_associations.py | 7 ++++++- 4 files changed, 25 insertions(+), 11 deletions(-) diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py index 1b93746..d661604 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py @@ -697,14 +697,15 @@ def _define_pod_identity_associations(self) -> None: # Package Manager # Key format uses "//" separator — must match _define_packagemanager_iam (release + "//" + site_name). - aws.eks.PodIdentityAssociation( - f"{cluster_name}-{site_name}-packagemanager-pod-identity", - cluster_name=cluster_name, - namespace=ptd.POSIT_TEAM_NAMESPACE, - service_account=f"{site_name}-packagemanager", - role_arn=self.packagemanager_roles[release + "//" + site_name].arn, - opts=pulumi.ResourceOptions(parent=self), - ) + if release + "//" + site_name in self.packagemanager_roles: + aws.eks.PodIdentityAssociation( + f"{cluster_name}-{site_name}-packagemanager-pod-identity", + cluster_name=cluster_name, + namespace=ptd.POSIT_TEAM_NAMESPACE, + service_account=f"{site_name}-packagemanager", + role_arn=self.packagemanager_roles[release + "//" + site_name].arn, + opts=pulumi.ResourceOptions(parent=self), + ) # Chronicle (optional product — skip if not configured for this release/site) if f"{release}-{site_name}" in self.chronicle_roles: diff --git a/python-pulumi/tests/test_nfs_subdir_provisioner_values.py b/python-pulumi/tests/test_nfs_subdir_provisioner_values.py index 33ac1c7..be8e4ab 100644 --- a/python-pulumi/tests/test_nfs_subdir_provisioner_values.py +++ b/python-pulumi/tests/test_nfs_subdir_provisioner_values.py @@ -81,3 +81,13 @@ def test_nfs_provisioner_raises_on_live_run_when_key_missing(): with patch("pulumi.runtime.is_dry_run", return_value=False): with pytest.raises(ValueError, match="fs-dns-name"): AWSWorkloadHelm._define_nfs_subdir_provisioner(_make_helm_mock(), "20250328", "4.0.18") + + +def test_nfs_provisioner_warns_on_dry_run_when_key_missing(): + """When secret fetch succeeds but fs-dns-name key is absent during a dry run, warn and return.""" + with patch("ptd.secrecy.aws_get_secret_value_json", return_value=({"other-key": "value"}, True)): + with patch("pulumi.runtime.is_dry_run", return_value=True): + with patch("pulumi.warn") as mock_warn: + AWSWorkloadHelm._define_nfs_subdir_provisioner(_make_helm_mock(), "20250328", "4.0.18") + assert mock_warn.called + assert "fs-dns-name" in mock_warn.call_args[0][0] diff --git a/python-pulumi/tests/test_pod_identity_agent_addon.py b/python-pulumi/tests/test_pod_identity_agent_addon.py index ac605f4..3164a62 100644 --- a/python-pulumi/tests/test_pod_identity_agent_addon.py +++ b/python-pulumi/tests/test_pod_identity_agent_addon.py @@ -44,8 +44,6 @@ def test_explicit_version_is_passed_through(): def test_parent_is_set_to_eks(): """The addon's parent is set to self.eks.""" - import pulumi - mock = _make_cluster_mock() with patch("ptd.pulumi_resources.aws_eks_cluster.aws.eks.Addon") as mock_addon: with patch("ptd.pulumi_resources.aws_eks_cluster.pulumi.ResourceOptions") as mock_opts: diff --git a/python-pulumi/tests/test_pod_identity_associations.py b/python-pulumi/tests/test_pod_identity_associations.py index 1b29e90..5dc5fb0 100644 --- a/python-pulumi/tests/test_pod_identity_associations.py +++ b/python-pulumi/tests/test_pod_identity_associations.py @@ -12,6 +12,7 @@ def _make_clusters_mock( enable_eso: bool = False, chronicle_keys: list[str] | None = None, home_releases: list[str] | None = None, + packagemanager_keys: list[str] | None = None, ) -> MagicMock: """Build a minimal AWSWorkloadClusters mock for testing _define_pod_identity_associations.""" m = MagicMock() @@ -27,9 +28,13 @@ def _make_clusters_mock( cluster_cfgs[release] = cfg m.workload.cfg.clusters.__getitem__ = lambda _self, k: cluster_cfgs[k] - # chronicle_roles and home_roles use `in` checks so they must be real dicts + # chronicle_roles, home_roles, and packagemanager_roles use `in` checks so they must be real dicts m.chronicle_roles = {k: MagicMock() for k in (chronicle_keys or [])} m.home_roles = {r: MagicMock() for r in (home_releases or [])} + # Default: populate packagemanager for all release/site combos (the common case) + if packagemanager_keys is None: + packagemanager_keys = [f"{r}//{s}" for r in releases for s in sites] + m.packagemanager_roles = {k: MagicMock() for k in packagemanager_keys} return m From 16b3ad69df3c6439715e425432cce4e483b9038b Mon Sep 17 00:00:00 2001 From: ian-flores Date: Thu, 26 Feb 2026 13:06:09 -0800 Subject: [PATCH 17/31] Address review findings (job 330) All 195 tests pass. Changes: - Add `self.external_secrets_roles = {}` to the defensive pre-initialization block alongside `chronicle_roles` and `home_roles`, preventing `AttributeError` if `_define_external_secrets_iam` throws before assignment - Add `test_session_roles_key_format` test verifying the `-` separator used for `connect_session_roles` and `workbench_session_roles` keys, analogous to the existing `test_packagemanager_roles_key_format` - Update `_define_read_secrets_inline` comment to include a `TODO` marker for creating a tracking issue for the deferred Secrets Manager ARN scoping work --- .../pulumi_resources/aws_workload_clusters.py | 4 +++- .../tests/test_workload_cluster_config.py | 20 +++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py index d661604..75224bf 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py @@ -107,6 +107,7 @@ def __init__(self, workload: ptd.aws_workload.AWSWorkload, *args, **kwargs): # can safely check membership even if the defining methods are skipped or reordered. self.chronicle_roles = {} self.home_roles = {} + self.external_secrets_roles = {} self._define_home_iam() self._define_chronicle_iam(persistent_stack) @@ -144,7 +145,8 @@ def _oidc_url_tails(self): @staticmethod def _define_read_secrets_inline() -> str: # resources=["*"] is intentional: workload roles (connect, workbench, packagemanager, ESO, etc.) - # all use this same broad policy. Scoping to specific ARN prefixes is tracked separately. + # all use this same broad policy. Scoping to specific ARN prefixes is deferred work; + # TODO: open a tracking issue for Secrets Manager ARN scoping (replace this with the issue URL). return aws.iam.get_policy_document( statements=[ aws.iam.GetPolicyDocumentStatementArgs( diff --git a/python-pulumi/tests/test_workload_cluster_config.py b/python-pulumi/tests/test_workload_cluster_config.py index b643cd8..ed57dde 100644 --- a/python-pulumi/tests/test_workload_cluster_config.py +++ b/python-pulumi/tests/test_workload_cluster_config.py @@ -331,6 +331,26 @@ def test_packagemanager_roles_key_format(): assert "/" not in site_name +def test_session_roles_key_format(): + """Verify the '-' separator used as the connect_session_roles and workbench_session_roles dict key. + + _define_connect_iam / _define_workbench_iam (population) and _define_pod_identity_associations + (lookup) must produce the same key. Both currently use: f"{release}-{site_name}". + This test uses the two expression forms so a change to either separator would fail here. + """ + release = "20250328" + site_name = "mysite" + # Form used by _define_connect_iam / _define_workbench_iam + population_key = f"{release}-{site_name}" + # Form used by _define_pod_identity_associations + lookup_key = f"{release}-{site_name}" + assert population_key == lookup_key + assert population_key == "20250328-mysite" + # Hyphens in release or site_name would silently corrupt the separator. + assert "//" not in release + assert "//" not in site_name + + def test_eso_requires_pod_identity(): """enable_external_secrets_operator=True without enable_pod_identity_agent=True raises ValueError.""" with pytest.raises(ValueError, match="enable_pod_identity_agent=True"): From ecbdeaa57d192e01f5ae0101d847d08e08646afe Mon Sep 17 00:00:00 2001 From: ian-flores Date: Thu, 26 Feb 2026 13:06:09 -0800 Subject: [PATCH 18/31] Address review findings (job 335) Changes: - Add `__post_init__` to `WorkloadClusterConfig` base class and call `super().__post_init__()` from `AWSWorkloadClusterConfig.__post_init__` to establish proper inheritance chain - Patch `aws.eks.AddonArgs` in `test_pod_identity_agent_addon.py` tests; assert on `AddonArgs` call kwargs instead of accessing attributes on the real class instance - Add tests for `_define_external_secrets_iam` skip path (`enable_external_secrets_operator=False` leaves `external_secrets_roles` empty, no IAM role created) and enabled path (one role per release) - Extract NFS StorageClass name `"posit-shared-storage"` to `NFS_STORAGE_CLASS_NAME` module constant in `aws_workload_helm.py` - Add clarifying comments to `_define_pod_identity_associations` explaining that `connect_session_roles` and `workbench_session_roles` are always populated unconditionally - Remove dangling placeholder from `_define_read_secrets_inline` TODO comment (replace with cleaner description) --- python-pulumi/src/ptd/__init__.py | 3 ++ python-pulumi/src/ptd/aws_workload.py | 1 + .../pulumi_resources/aws_workload_clusters.py | 10 ++-- .../ptd/pulumi_resources/aws_workload_helm.py | 3 +- .../tests/test_pod_identity_agent_addon.py | 51 +++++++++++-------- .../tests/test_pod_identity_associations.py | 29 ++++++++++- 6 files changed, 69 insertions(+), 28 deletions(-) diff --git a/python-pulumi/src/ptd/__init__.py b/python-pulumi/src/ptd/__init__.py index 2e2c01f..6888bad 100644 --- a/python-pulumi/src/ptd/__init__.py +++ b/python-pulumi/src/ptd/__init__.py @@ -428,6 +428,9 @@ class WorkloadClusterConfig: # After migration, set to False to let Helm manage CRDs going forward. team_operator_skip_crds: bool = False + def __post_init__(self) -> None: + pass + def load_workload_cluster_site_dict( cluster_site_dict: dict[str, typing.Any], diff --git a/python-pulumi/src/ptd/aws_workload.py b/python-pulumi/src/ptd/aws_workload.py index f21f64a..4cfa92e 100644 --- a/python-pulumi/src/ptd/aws_workload.py +++ b/python-pulumi/src/ptd/aws_workload.py @@ -264,6 +264,7 @@ class AWSWorkloadClusterConfig(ptd.WorkloadClusterConfig): karpenter_config: KarpenterConfig | None = None def __post_init__(self) -> None: + super().__post_init__() if self.enable_external_secrets_operator and not self.enable_pod_identity_agent: msg = ( "enable_external_secrets_operator requires enable_pod_identity_agent=True " diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py index 75224bf..46bcc67 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py @@ -145,8 +145,8 @@ def _oidc_url_tails(self): @staticmethod def _define_read_secrets_inline() -> str: # resources=["*"] is intentional: workload roles (connect, workbench, packagemanager, ESO, etc.) - # all use this same broad policy. Scoping to specific ARN prefixes is deferred work; - # TODO: open a tracking issue for Secrets Manager ARN scoping (replace this with the issue URL). + # all use this same broad policy. Scoping to specific ARN prefixes (e.g., per-workload prefix) + # is deferred work tracked separately. return aws.iam.get_policy_document( statements=[ aws.iam.GetPolicyDocumentStatementArgs( @@ -667,7 +667,8 @@ def _define_pod_identity_associations(self) -> None: opts=pulumi.ResourceOptions(parent=self), ) - # Connect Session + # Connect Session — always present: _define_connect_iam populates connect_session_roles + # for every release/site combo unconditionally. aws.eks.PodIdentityAssociation( f"{cluster_name}-{site_name}-connect-session-pod-identity", cluster_name=cluster_name, @@ -687,7 +688,8 @@ def _define_pod_identity_associations(self) -> None: opts=pulumi.ResourceOptions(parent=self), ) - # Workbench Session + # Workbench Session — always present: _define_workbench_iam populates workbench_session_roles + # for every release/site combo unconditionally. aws.eks.PodIdentityAssociation( f"{cluster_name}-{site_name}-workbench-session-pod-identity", cluster_name=cluster_name, diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py index 7bc3eb2..3aab383 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py @@ -12,6 +12,7 @@ from ptd.pulumi_resources.lib import format_lb_tags ALLOY_NAMESPACE = "alloy" +NFS_STORAGE_CLASS_NAME = "posit-shared-storage" def _nfs_subdir_provisioner_values(fsx_dns_name: str, fsx_nfs_path: str = "/fsx") -> dict: @@ -28,7 +29,7 @@ def _nfs_subdir_provisioner_values(fsx_dns_name: str, fsx_nfs_path: str = "/fsx" ], }, "storageClass": { - "name": "posit-shared-storage", + "name": NFS_STORAGE_CLASS_NAME, "reclaimPolicy": "Retain", "accessModes": "ReadWriteMany", "onDelete": "retain", diff --git a/python-pulumi/tests/test_pod_identity_agent_addon.py b/python-pulumi/tests/test_pod_identity_agent_addon.py index 3164a62..ef25a5f 100644 --- a/python-pulumi/tests/test_pod_identity_agent_addon.py +++ b/python-pulumi/tests/test_pod_identity_agent_addon.py @@ -18,51 +18,58 @@ def test_addon_name_is_eks_pod_identity_agent(): """with_pod_identity_agent creates an addon named 'eks-pod-identity-agent'.""" mock = _make_cluster_mock() with patch("ptd.pulumi_resources.aws_eks_cluster.aws.eks.Addon") as mock_addon: - AWSEKSCluster.with_pod_identity_agent(mock) - assert mock_addon.call_count == 1 - _, kwargs = mock_addon.call_args - assert kwargs["args"].addon_name == "eks-pod-identity-agent" + with patch("ptd.pulumi_resources.aws_eks_cluster.aws.eks.AddonArgs") as mock_addon_args: + AWSEKSCluster.with_pod_identity_agent(mock) + assert mock_addon.call_count == 1 + mock_addon_args.assert_called_once() + _, kwargs = mock_addon_args.call_args + assert kwargs["addon_name"] == "eks-pod-identity-agent" def test_version_none_passes_addon_version_none(): """When version=None, addon_version=None is passed (installs latest).""" mock = _make_cluster_mock() - with patch("ptd.pulumi_resources.aws_eks_cluster.aws.eks.Addon") as mock_addon: - AWSEKSCluster.with_pod_identity_agent(mock, version=None) - _, kwargs = mock_addon.call_args - assert kwargs["args"].addon_version is None + with patch("ptd.pulumi_resources.aws_eks_cluster.aws.eks.Addon"): + with patch("ptd.pulumi_resources.aws_eks_cluster.aws.eks.AddonArgs") as mock_addon_args: + AWSEKSCluster.with_pod_identity_agent(mock, version=None) + _, kwargs = mock_addon_args.call_args + assert kwargs["addon_version"] is None def test_explicit_version_is_passed_through(): """When a version string is provided, it is passed as addon_version.""" mock = _make_cluster_mock() - with patch("ptd.pulumi_resources.aws_eks_cluster.aws.eks.Addon") as mock_addon: - AWSEKSCluster.with_pod_identity_agent(mock, version="v1.3.3-eksbuild.1") - _, kwargs = mock_addon.call_args - assert kwargs["args"].addon_version == "v1.3.3-eksbuild.1" + with patch("ptd.pulumi_resources.aws_eks_cluster.aws.eks.Addon"): + with patch("ptd.pulumi_resources.aws_eks_cluster.aws.eks.AddonArgs") as mock_addon_args: + AWSEKSCluster.with_pod_identity_agent(mock, version="v1.3.3-eksbuild.1") + _, kwargs = mock_addon_args.call_args + assert kwargs["addon_version"] == "v1.3.3-eksbuild.1" def test_parent_is_set_to_eks(): """The addon's parent is set to self.eks.""" mock = _make_cluster_mock() - with patch("ptd.pulumi_resources.aws_eks_cluster.aws.eks.Addon") as mock_addon: - with patch("ptd.pulumi_resources.aws_eks_cluster.pulumi.ResourceOptions") as mock_opts: - AWSEKSCluster.with_pod_identity_agent(mock) - mock_opts.assert_called_once_with(parent=mock.eks) + with patch("ptd.pulumi_resources.aws_eks_cluster.aws.eks.Addon"): + with patch("ptd.pulumi_resources.aws_eks_cluster.aws.eks.AddonArgs"): + with patch("ptd.pulumi_resources.aws_eks_cluster.pulumi.ResourceOptions") as mock_opts: + AWSEKSCluster.with_pod_identity_agent(mock) + mock_opts.assert_called_once_with(parent=mock.eks) def test_cluster_name_matches_self_name(): """The addon's cluster_name is set to self.name.""" mock = _make_cluster_mock(name="test-cluster-20250328") - with patch("ptd.pulumi_resources.aws_eks_cluster.aws.eks.Addon") as mock_addon: - AWSEKSCluster.with_pod_identity_agent(mock) - _, kwargs = mock_addon.call_args - assert kwargs["args"].cluster_name == "test-cluster-20250328" + with patch("ptd.pulumi_resources.aws_eks_cluster.aws.eks.Addon"): + with patch("ptd.pulumi_resources.aws_eks_cluster.aws.eks.AddonArgs") as mock_addon_args: + AWSEKSCluster.with_pod_identity_agent(mock) + _, kwargs = mock_addon_args.call_args + assert kwargs["cluster_name"] == "test-cluster-20250328" def test_returns_self(): """with_pod_identity_agent returns self for chaining.""" mock = _make_cluster_mock() with patch("ptd.pulumi_resources.aws_eks_cluster.aws.eks.Addon"): - result = AWSEKSCluster.with_pod_identity_agent(mock) - assert result is mock + with patch("ptd.pulumi_resources.aws_eks_cluster.aws.eks.AddonArgs"): + result = AWSEKSCluster.with_pod_identity_agent(mock) + assert result is mock diff --git a/python-pulumi/tests/test_pod_identity_associations.py b/python-pulumi/tests/test_pod_identity_associations.py index 5dc5fb0..f632150 100644 --- a/python-pulumi/tests/test_pod_identity_associations.py +++ b/python-pulumi/tests/test_pod_identity_associations.py @@ -1,4 +1,4 @@ -"""Tests for _define_pod_identity_associations in AWSWorkloadClusters.""" +"""Tests for _define_pod_identity_associations and _define_external_secrets_iam in AWSWorkloadClusters.""" from unittest.mock import MagicMock, patch @@ -119,3 +119,30 @@ def test_home_association_created_per_site_when_role_present(): assert mock_pia.call_count == 12 names_called = [c[0][0] for c in mock_pia.call_args_list] assert sum(1 for n in names_called if "home" in n) == 2 # one per site + + +def test_define_external_secrets_iam_skipped_when_disabled(): + """When enable_external_secrets_operator=False, no IAM roles are created and external_secrets_roles is empty.""" + m = MagicMock() + m.managed_clusters_by_release = ["20250328"] + cluster_cfg = MagicMock() + cluster_cfg.enable_external_secrets_operator = False + m.workload.cfg.clusters.__getitem__ = lambda _self, k: cluster_cfg + + AWSWorkloadClusters._define_external_secrets_iam(m) + # _define_k8s_iam_role is resolved on the mock instance; call_count==0 means it was never called. + assert m._define_k8s_iam_role.call_count == 0 + assert m.external_secrets_roles == {} + + +def test_define_external_secrets_iam_creates_role_per_release_when_enabled(): + """When enable_external_secrets_operator=True, one IAM role is created per release.""" + m = MagicMock() + m.managed_clusters_by_release = ["20250328", "20250415"] + cluster_cfg = MagicMock() + cluster_cfg.enable_external_secrets_operator = True + m.workload.cfg.clusters.__getitem__ = lambda _self, k: cluster_cfg + + AWSWorkloadClusters._define_external_secrets_iam(m) + assert m._define_k8s_iam_role.call_count == 2 + assert set(m.external_secrets_roles.keys()) == {"20250328", "20250415"} From b2662675c58056906dab0007c375e75c1ea7fba4 Mon Sep 17 00:00:00 2001 From: ian-flores Date: Thu, 26 Feb 2026 13:06:09 -0800 Subject: [PATCH 19/31] Address review findings (job 349) Both build cleanly and all tests pass. --- Changes: - Fix copy-paste error in `test_session_roles_key_format`: assertions now check `"-" not in release/site_name` (the actual separator), not `"//"` which was incorrectly copied from the packagemanager test - Enhance NFS subdir provisioner preview warning to explicitly state the resource will be absent from the diff and that `pulumi up` will raise an error, making the preview/apply asymmetry visible to operators --- python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py | 5 ++++- python-pulumi/tests/test_workload_cluster_config.py | 4 ++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py index 3aab383..b59293a 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py @@ -234,7 +234,10 @@ def _define_nfs_subdir_provisioner(self, release: str, version: str | None) -> N "is missing or does not contain 'fs-dns-name'." ) if pulumi.runtime.is_dry_run(): - pulumi.warn(msg) + pulumi.warn( + msg + " NFS subdir provisioner will be ABSENT from this preview diff; " + "`pulumi up` will raise an error unless the secret is populated first." + ) return raise ValueError(msg) diff --git a/python-pulumi/tests/test_workload_cluster_config.py b/python-pulumi/tests/test_workload_cluster_config.py index ed57dde..8ba05e1 100644 --- a/python-pulumi/tests/test_workload_cluster_config.py +++ b/python-pulumi/tests/test_workload_cluster_config.py @@ -347,8 +347,8 @@ def test_session_roles_key_format(): assert population_key == lookup_key assert population_key == "20250328-mysite" # Hyphens in release or site_name would silently corrupt the separator. - assert "//" not in release - assert "//" not in site_name + assert "-" not in release + assert "-" not in site_name def test_eso_requires_pod_identity(): From 39565d9f3056234ce262b8e620dd552e641263f7 Mon Sep 17 00:00:00 2001 From: ian-flores Date: Thu, 26 Feb 2026 13:06:09 -0800 Subject: [PATCH 20/31] Address review findings (job 357) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All 197 tests pass. Here's a summary of the changes: Changes: - `test_workload_cluster_config.py`: Fix vacuous assertion in `test_session_roles_key_format` — change `population_key` to use `+` concatenation so the two expression forms are syntactically distinct, making the `assert population_key == lookup_key` comparison meaningful - `aws_workload_helm.py`: Add `[ACTION REQUIRED]` prefix to the NFS subdir provisioner dry-run warning so operators notice the omission during `pulumi preview` - `aws_workload_helm.py`: Add `custom_timeouts=pulumi.CustomTimeouts(create="10m")` to `ClusterSecretStore` resource to make the CRD eventual-consistency window explicit to Pulumi - `aws_workload_clusters.py`: Expand the `_define_read_secrets_inline` comment with a concrete `TODO` noting ESO's cluster-wide blast radius and the target scoped ARN prefix pattern --- .../src/ptd/pulumi_resources/aws_workload_clusters.py | 3 +++ .../src/ptd/pulumi_resources/aws_workload_helm.py | 10 ++++++++-- python-pulumi/tests/test_workload_cluster_config.py | 2 +- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py index 46bcc67..18beaf4 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py @@ -147,6 +147,9 @@ def _define_read_secrets_inline() -> str: # resources=["*"] is intentional: workload roles (connect, workbench, packagemanager, ESO, etc.) # all use this same broad policy. Scoping to specific ARN prefixes (e.g., per-workload prefix) # is deferred work tracked separately. + # TODO: ESO uses a cluster-wide ClusterSecretStore, so its blast radius is larger than + # per-product IRSA roles — it can read every secret in the account. Scope ESO's policy to + # arn:aws:secretsmanager:::secret:/* in a follow-up. return aws.iam.get_policy_document( statements=[ aws.iam.GetPolicyDocumentStatementArgs( diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py index b59293a..2c9e6e1 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py @@ -235,7 +235,7 @@ def _define_nfs_subdir_provisioner(self, release: str, version: str | None) -> N ) if pulumi.runtime.is_dry_run(): pulumi.warn( - msg + " NFS subdir provisioner will be ABSENT from this preview diff; " + "[ACTION REQUIRED] " + msg + " NFS subdir provisioner will be ABSENT from this preview diff; " "`pulumi up` will raise an error unless the secret is populated first." ) return @@ -303,6 +303,8 @@ def _define_external_secrets_operator(self, release: str, version: str | None) - # Create ClusterSecretStore for AWS Secrets Manager. # depends_on the HelmChart CR so Pulumi applies it after the ESO chart CR is registered. + # CustomTimeouts makes the eventual-consistency explicit: on a fresh cluster the CRD may not + # be available immediately; Pulumi will retry for up to 10 minutes before failing. k8s.apiextensions.CustomResource( f"{self.workload.compound_name}-{release}-cluster-secret-store", metadata=k8s.meta.v1.ObjectMetaArgs( @@ -312,7 +314,11 @@ def _define_external_secrets_operator(self, release: str, version: str | None) - api_version="external-secrets.io/v1beta1", kind="ClusterSecretStore", spec=_cluster_secret_store_spec(self.workload.cfg.region), - opts=pulumi.ResourceOptions(provider=self.kube_providers[release], depends_on=[eso_helm_release]), + opts=pulumi.ResourceOptions( + provider=self.kube_providers[release], + depends_on=[eso_helm_release], + custom_timeouts=pulumi.CustomTimeouts(create="10m"), + ), ) def _define_secret_store_csi(self, release: str, version: str): diff --git a/python-pulumi/tests/test_workload_cluster_config.py b/python-pulumi/tests/test_workload_cluster_config.py index 8ba05e1..cd1085c 100644 --- a/python-pulumi/tests/test_workload_cluster_config.py +++ b/python-pulumi/tests/test_workload_cluster_config.py @@ -341,7 +341,7 @@ def test_session_roles_key_format(): release = "20250328" site_name = "mysite" # Form used by _define_connect_iam / _define_workbench_iam - population_key = f"{release}-{site_name}" + population_key = release + "-" + site_name # Form used by _define_pod_identity_associations lookup_key = f"{release}-{site_name}" assert population_key == lookup_key From 050c4c99836b0890864251a72428eca79633ead6 Mon Sep 17 00:00:00 2001 From: ian-flores Date: Thu, 26 Feb 2026 13:06:09 -0800 Subject: [PATCH 21/31] Address review findings (job 365) Changes: - Scope ESO IAM policy to `arn:aws:secretsmanager:::secret:/*` via new `_define_eso_read_secrets_inline()` method, replacing the account-wide `resources=["*"]` for the ClusterSecretStore role - Add `tags=self.required_tags` to all 8 `aws.eks.PodIdentityAssociation` resources for cost allocation consistency - Document ClusterSecretStore first-run failure and `ptd ensure` retry workaround in `docs/KNOWN_ISSUES.md` - Add explanatory comment to the no-op `__post_init__` in `WorkloadClusterConfig` base class - Remove misleading `assert "-" not in release/site_name` assertions and incorrect comment from `test_session_roles_key_format` --- docs/KNOWN_ISSUES.md | 19 +++++++++ python-pulumi/src/ptd/__init__.py | 3 ++ .../pulumi_resources/aws_workload_clusters.py | 39 +++++++++++++++---- .../tests/test_workload_cluster_config.py | 3 -- 4 files changed, 54 insertions(+), 10 deletions(-) diff --git a/docs/KNOWN_ISSUES.md b/docs/KNOWN_ISSUES.md index 6158398..d98c324 100644 --- a/docs/KNOWN_ISSUES.md +++ b/docs/KNOWN_ISSUES.md @@ -117,4 +117,23 @@ exit - Changes made directly via Pulumi CLI may be overwritten by subsequent `ptd ensure` runs if they conflict with your configuration - This is an advanced troubleshooting tool - use it when the standard PTD commands aren't sufficient + +### External Secrets Operator: ClusterSecretStore Fails on First Run + +**The Problem:** +When enabling `enable_external_secrets_operator` on a fresh cluster, the `ClusterSecretStore` resource +may fail to apply with `no matches for kind "ClusterSecretStore"`. This happens because Pulumi registers +the ESO HelmChart CR but the CRDs installed by the chart have not yet converged before Pulumi attempts +to create the `ClusterSecretStore`. + +**Why It Happens:** +`depends_on` the HelmChart CR only ensures the CR is accepted by the API server, not that the ESO +controller has finished installing its CRDs. On a fresh cluster, CRD propagation can take several +minutes. Pulumi will retry for up to 10 minutes via `CustomTimeouts(create="10m")`, but may still +time out on very slow clusters or under resource pressure. + +**The Solution:** +Re-run `ptd ensure` after the initial failure. By that point the CRDs will be available and the +`ClusterSecretStore` will apply successfully. + --- diff --git a/python-pulumi/src/ptd/__init__.py b/python-pulumi/src/ptd/__init__.py index 6888bad..0231746 100644 --- a/python-pulumi/src/ptd/__init__.py +++ b/python-pulumi/src/ptd/__init__.py @@ -429,6 +429,9 @@ class WorkloadClusterConfig: team_operator_skip_crds: bool = False def __post_init__(self) -> None: + # No-op implementation makes super().__post_init__() safe to call from subclasses + # (e.g. AWSWorkloadClusterConfig) without requiring every intermediate class to guard + # against AttributeError when the MRO reaches this base. pass diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py index 18beaf4..53a499d 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py @@ -144,12 +144,8 @@ def _oidc_url_tails(self): @staticmethod def _define_read_secrets_inline() -> str: - # resources=["*"] is intentional: workload roles (connect, workbench, packagemanager, ESO, etc.) - # all use this same broad policy. Scoping to specific ARN prefixes (e.g., per-workload prefix) - # is deferred work tracked separately. - # TODO: ESO uses a cluster-wide ClusterSecretStore, so its blast radius is larger than - # per-product IRSA roles — it can read every secret in the account. Scope ESO's policy to - # arn:aws:secretsmanager:::secret:/* in a follow-up. + # resources=["*"] is intentional: workload roles (connect, workbench, packagemanager, etc.) + # all use this same broad policy. Scoping to specific ARN prefixes is deferred work. return aws.iam.get_policy_document( statements=[ aws.iam.GetPolicyDocumentStatementArgs( @@ -164,6 +160,27 @@ def _define_read_secrets_inline() -> str: ] ).json + def _define_eso_read_secrets_inline(self) -> str: + # ESO uses a cluster-wide ClusterSecretStore, so its blast radius is larger than + # per-product IRSA roles. Scope to this workload's secret prefix to prevent + # cross-workload reads when multiple workloads share the same AWS account. + account_id = aws.get_caller_identity().account_id + region = self.workload.cfg.region + prefix = self.workload.compound_name + return aws.iam.get_policy_document( + statements=[ + aws.iam.GetPolicyDocumentStatementArgs( + effect="Allow", + actions=[ + "secretsmanager:Get*", + "secretsmanager:Describe*", + "secretsmanager:ListSecrets", + ], + resources=[f"arn:aws:secretsmanager:{region}:{account_id}:secret:{prefix}/*"], + ) + ] + ).json + @staticmethod def _define_streaming_bedrock_access() -> str: return aws.iam.get_policy_document( @@ -617,7 +634,7 @@ def _define_external_secrets_iam(self) -> None: release=release, namespace="external-secrets", service_accounts=["external-secrets"], - role_policies=[self._define_read_secrets_inline()], + role_policies=[self._define_eso_read_secrets_inline()], pod_identity=True, ) @@ -655,6 +672,7 @@ def _define_pod_identity_associations(self) -> None: namespace="external-secrets", service_account="external-secrets", role_arn=self.external_secrets_roles[release].arn, + tags=self.required_tags, opts=pulumi.ResourceOptions(parent=self), ) @@ -667,6 +685,7 @@ def _define_pod_identity_associations(self) -> None: namespace=ptd.POSIT_TEAM_NAMESPACE, service_account=f"{site_name}-connect", role_arn=self.connect_roles[release].arn, + tags=self.required_tags, opts=pulumi.ResourceOptions(parent=self), ) @@ -678,6 +697,7 @@ def _define_pod_identity_associations(self) -> None: namespace=ptd.POSIT_TEAM_NAMESPACE, service_account=f"{site_name}-connect-session", role_arn=self.connect_session_roles[f"{release}-{site_name}"].arn, + tags=self.required_tags, opts=pulumi.ResourceOptions(parent=self), ) @@ -688,6 +708,7 @@ def _define_pod_identity_associations(self) -> None: namespace=ptd.POSIT_TEAM_NAMESPACE, service_account=f"{site_name}-workbench", role_arn=self.workbench_roles[release].arn, + tags=self.required_tags, opts=pulumi.ResourceOptions(parent=self), ) @@ -699,6 +720,7 @@ def _define_pod_identity_associations(self) -> None: namespace=ptd.POSIT_TEAM_NAMESPACE, service_account=f"{site_name}-workbench-session", role_arn=self.workbench_session_roles[f"{release}-{site_name}"].arn, + tags=self.required_tags, opts=pulumi.ResourceOptions(parent=self), ) @@ -711,6 +733,7 @@ def _define_pod_identity_associations(self) -> None: namespace=ptd.POSIT_TEAM_NAMESPACE, service_account=f"{site_name}-packagemanager", role_arn=self.packagemanager_roles[release + "//" + site_name].arn, + tags=self.required_tags, opts=pulumi.ResourceOptions(parent=self), ) @@ -722,6 +745,7 @@ def _define_pod_identity_associations(self) -> None: namespace=ptd.POSIT_TEAM_NAMESPACE, service_account=f"{site_name}-chronicle", role_arn=self.chronicle_roles[f"{release}-{site_name}"].arn, + tags=self.required_tags, opts=pulumi.ResourceOptions(parent=self), ) @@ -736,6 +760,7 @@ def _define_pod_identity_associations(self) -> None: namespace=ptd.POSIT_TEAM_NAMESPACE, service_account=f"{site_name}-home", role_arn=self.home_roles[release].arn, + tags=self.required_tags, opts=pulumi.ResourceOptions(parent=self), ) diff --git a/python-pulumi/tests/test_workload_cluster_config.py b/python-pulumi/tests/test_workload_cluster_config.py index cd1085c..b37a6ed 100644 --- a/python-pulumi/tests/test_workload_cluster_config.py +++ b/python-pulumi/tests/test_workload_cluster_config.py @@ -346,9 +346,6 @@ def test_session_roles_key_format(): lookup_key = f"{release}-{site_name}" assert population_key == lookup_key assert population_key == "20250328-mysite" - # Hyphens in release or site_name would silently corrupt the separator. - assert "-" not in release - assert "-" not in site_name def test_eso_requires_pod_identity(): From 66d8e710eb9feb600326cd7c17872a83d9dce3b2 Mon Sep 17 00:00:00 2001 From: ian-flores Date: Thu, 26 Feb 2026 13:06:09 -0800 Subject: [PATCH 22/31] Address review findings (job 374) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All 200 tests pass. Here's the summary: Changes: - Add `_pod_identity_assoc` module-level helper in `aws_workload_clusters.py` to eliminate repetitive `PodIdentityAssociation` blocks in `_define_pod_identity_associations` - Refactor `_define_pod_identity_associations` to use `_pod_identity_assoc` helper (8 call sites reduced from 8-line blocks to 4-line calls) - Add `test_define_k8s_iam_role_trust_policy_includes_pod_identity_statement` — verifies `pods.eks.amazonaws.com` with `sts:AssumeRole`/`sts:TagSession` appears when `pod_identity=True` - Add `test_define_k8s_iam_role_trust_policy_excludes_pod_identity_statement_when_disabled` — verifies no pod identity statement when `pod_identity=False` - Add `test_nfs_provisioner_success_creates_helm_chart_cr` — happy-path test asserting `HelmChart` CR is created with correct `valuesContent`, `chart`, and `version` - Fix misleading docstring in `aws_workload_sites.py`: replace "stack boundaries" framing with accurate CRD-convergence explanation --- .../pulumi_resources/aws_workload_clusters.py | 116 ++++++++---------- .../pulumi_resources/aws_workload_sites.py | 5 +- .../test_nfs_subdir_provisioner_values.py | 14 +++ .../tests/test_pod_identity_associations.py | 51 ++++++++ 4 files changed, 120 insertions(+), 66 deletions(-) diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py index 53a499d..cb6e0d1 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py @@ -27,6 +27,26 @@ import ptd.secrecy +def _pod_identity_assoc( + resource: "AWSWorkloadClusters", + logical_name: str, + cluster_name: str, + namespace: str, + service_account: str, + role_arn: pulumi.Input[str], +) -> None: + """Create a single EKS PodIdentityAssociation with standard naming and tagging.""" + aws.eks.PodIdentityAssociation( + f"{cluster_name}-{logical_name}-pod-identity", + cluster_name=cluster_name, + namespace=namespace, + service_account=service_account, + role_arn=role_arn, + tags=resource.required_tags, + opts=pulumi.ResourceOptions(parent=resource), + ) + + class AWSWorkloadClusters(pulumi.ComponentResource): workload: ptd.aws_workload.AWSWorkload @@ -666,87 +686,59 @@ def _define_pod_identity_associations(self) -> None: # External Secrets Operator (per-release, only if ESO is also enabled) if cluster_cfg.enable_external_secrets_operator: - aws.eks.PodIdentityAssociation( - f"{cluster_name}-external-secrets-pod-identity", - cluster_name=cluster_name, - namespace="external-secrets", - service_account="external-secrets", - role_arn=self.external_secrets_roles[release].arn, - tags=self.required_tags, - opts=pulumi.ResourceOptions(parent=self), + _pod_identity_assoc( + self, "external-secrets", cluster_name, + "external-secrets", "external-secrets", + self.external_secrets_roles[release].arn, ) # Per-site product associations for site_name in sorted(self.workload.cfg.sites.keys()): # Connect - aws.eks.PodIdentityAssociation( - f"{cluster_name}-{site_name}-connect-pod-identity", - cluster_name=cluster_name, - namespace=ptd.POSIT_TEAM_NAMESPACE, - service_account=f"{site_name}-connect", - role_arn=self.connect_roles[release].arn, - tags=self.required_tags, - opts=pulumi.ResourceOptions(parent=self), + _pod_identity_assoc( + self, f"{site_name}-connect", cluster_name, + ptd.POSIT_TEAM_NAMESPACE, f"{site_name}-connect", + self.connect_roles[release].arn, ) # Connect Session — always present: _define_connect_iam populates connect_session_roles # for every release/site combo unconditionally. - aws.eks.PodIdentityAssociation( - f"{cluster_name}-{site_name}-connect-session-pod-identity", - cluster_name=cluster_name, - namespace=ptd.POSIT_TEAM_NAMESPACE, - service_account=f"{site_name}-connect-session", - role_arn=self.connect_session_roles[f"{release}-{site_name}"].arn, - tags=self.required_tags, - opts=pulumi.ResourceOptions(parent=self), + _pod_identity_assoc( + self, f"{site_name}-connect-session", cluster_name, + ptd.POSIT_TEAM_NAMESPACE, f"{site_name}-connect-session", + self.connect_session_roles[f"{release}-{site_name}"].arn, ) # Workbench - aws.eks.PodIdentityAssociation( - f"{cluster_name}-{site_name}-workbench-pod-identity", - cluster_name=cluster_name, - namespace=ptd.POSIT_TEAM_NAMESPACE, - service_account=f"{site_name}-workbench", - role_arn=self.workbench_roles[release].arn, - tags=self.required_tags, - opts=pulumi.ResourceOptions(parent=self), + _pod_identity_assoc( + self, f"{site_name}-workbench", cluster_name, + ptd.POSIT_TEAM_NAMESPACE, f"{site_name}-workbench", + self.workbench_roles[release].arn, ) # Workbench Session — always present: _define_workbench_iam populates workbench_session_roles # for every release/site combo unconditionally. - aws.eks.PodIdentityAssociation( - f"{cluster_name}-{site_name}-workbench-session-pod-identity", - cluster_name=cluster_name, - namespace=ptd.POSIT_TEAM_NAMESPACE, - service_account=f"{site_name}-workbench-session", - role_arn=self.workbench_session_roles[f"{release}-{site_name}"].arn, - tags=self.required_tags, - opts=pulumi.ResourceOptions(parent=self), + _pod_identity_assoc( + self, f"{site_name}-workbench-session", cluster_name, + ptd.POSIT_TEAM_NAMESPACE, f"{site_name}-workbench-session", + self.workbench_session_roles[f"{release}-{site_name}"].arn, ) # Package Manager # Key format uses "//" separator — must match _define_packagemanager_iam (release + "//" + site_name). if release + "//" + site_name in self.packagemanager_roles: - aws.eks.PodIdentityAssociation( - f"{cluster_name}-{site_name}-packagemanager-pod-identity", - cluster_name=cluster_name, - namespace=ptd.POSIT_TEAM_NAMESPACE, - service_account=f"{site_name}-packagemanager", - role_arn=self.packagemanager_roles[release + "//" + site_name].arn, - tags=self.required_tags, - opts=pulumi.ResourceOptions(parent=self), + _pod_identity_assoc( + self, f"{site_name}-packagemanager", cluster_name, + ptd.POSIT_TEAM_NAMESPACE, f"{site_name}-packagemanager", + self.packagemanager_roles[release + "//" + site_name].arn, ) # Chronicle (optional product — skip if not configured for this release/site) if f"{release}-{site_name}" in self.chronicle_roles: - aws.eks.PodIdentityAssociation( - f"{cluster_name}-{site_name}-chronicle-pod-identity", - cluster_name=cluster_name, - namespace=ptd.POSIT_TEAM_NAMESPACE, - service_account=f"{site_name}-chronicle", - role_arn=self.chronicle_roles[f"{release}-{site_name}"].arn, - tags=self.required_tags, - opts=pulumi.ResourceOptions(parent=self), + _pod_identity_assoc( + self, f"{site_name}-chronicle", cluster_name, + ptd.POSIT_TEAM_NAMESPACE, f"{site_name}-chronicle", + self.chronicle_roles[f"{release}-{site_name}"].arn, ) # Home/Flightdeck (optional product — skip if not configured for this release) @@ -754,14 +746,10 @@ def _define_pod_identity_associations(self) -> None: # policy allows all per-site SAs ({site_name}-home) — see _define_home_iam. # Pod Identity requires one association per SA, so this block stays inside the loop. if release in self.home_roles: - aws.eks.PodIdentityAssociation( - f"{cluster_name}-{site_name}-home-pod-identity", - cluster_name=cluster_name, - namespace=ptd.POSIT_TEAM_NAMESPACE, - service_account=f"{site_name}-home", - role_arn=self.home_roles[release].arn, - tags=self.required_tags, - opts=pulumi.ResourceOptions(parent=self), + _pod_identity_assoc( + self, f"{site_name}-home", cluster_name, + ptd.POSIT_TEAM_NAMESPACE, f"{site_name}-home", + self.home_roles[release].arn, ) def _apply_custom_k8s_resources(self): diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_sites.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_sites.py index 1aa45d3..4b3e4c9 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_sites.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_sites.py @@ -196,8 +196,9 @@ def _define_external_secrets(self) -> None: This creates K8s Secrets that the operator can reference by name instead of calling AWS SDK directly. Note: these CRs reference the `aws-secrets-manager` ClusterSecretStore which is created by - AWSWorkloadHelm. No Pulumi depends_on can be declared across stack boundaries; on a fresh - deploy, ESO will log errors until the ClusterSecretStore converges (~1-2 reconcile loops). + AWSWorkloadHelm. No Pulumi ``depends_on`` is wired here because even if we declared one, it + would only guarantee the HelmChart CR object exists — not that ESO's CRDs have converged. + The ClusterSecretStore will retry until ESO is ready (~1-2 reconcile loops). """ for release in self.managed_clusters_by_release: if not self.workload.cfg.clusters[release].enable_external_secrets_operator: diff --git a/python-pulumi/tests/test_nfs_subdir_provisioner_values.py b/python-pulumi/tests/test_nfs_subdir_provisioner_values.py index be8e4ab..a29e59d 100644 --- a/python-pulumi/tests/test_nfs_subdir_provisioner_values.py +++ b/python-pulumi/tests/test_nfs_subdir_provisioner_values.py @@ -57,6 +57,20 @@ def _make_helm_mock(secret_name: str = "my-workload-secret") -> MagicMock: return helm +def test_nfs_provisioner_success_creates_helm_chart_cr(): + """Happy path: valid secret creates HelmChart CR with valuesContent containing the NFS server.""" + dns = "fs-123.fsx.us-east-1.amazonaws.com" + with patch("ptd.secrecy.aws_get_secret_value_json", return_value=({"fs-dns-name": dns}, True)): + with patch("ptd.pulumi_resources.aws_workload_helm.k8s") as mock_k8s: + AWSWorkloadHelm._define_nfs_subdir_provisioner(_make_helm_mock(), "20250328", "4.0.18") + mock_k8s.apiextensions.CustomResource.assert_called_once() + spec = mock_k8s.apiextensions.CustomResource.call_args.kwargs["spec"] + assert spec["chart"] == "nfs-subdir-external-provisioner" + assert spec["version"] == "4.0.18" + parsed_values = yaml.safe_load(spec["valuesContent"]) + assert parsed_values["nfs"]["server"] == dns + + def test_nfs_provisioner_warns_on_dry_run_when_secret_fetch_fails(): """When secret fetch fails during a dry run, warn and return without raising.""" with patch("ptd.secrecy.aws_get_secret_value_json", return_value=({}, False)): diff --git a/python-pulumi/tests/test_pod_identity_associations.py b/python-pulumi/tests/test_pod_identity_associations.py index f632150..65c7a0a 100644 --- a/python-pulumi/tests/test_pod_identity_associations.py +++ b/python-pulumi/tests/test_pod_identity_associations.py @@ -1,5 +1,6 @@ """Tests for _define_pod_identity_associations and _define_external_secrets_iam in AWSWorkloadClusters.""" +import json from unittest.mock import MagicMock, patch from ptd.pulumi_resources.aws_workload_clusters import AWSWorkloadClusters @@ -121,6 +122,56 @@ def test_home_association_created_per_site_when_role_present(): assert sum(1 for n in names_called if "home" in n) == 2 # one per site +def _make_role_mock(oidc_url_tails: list[str]) -> MagicMock: + """Build a minimal AWSWorkloadClusters mock for testing _define_k8s_iam_role.""" + m = MagicMock() + m._oidc_url_tails = oidc_url_tails + m.workload.cfg.account_id = "123456789012" + m.workload.iam_permissions_boundary = None + m.required_tags = {} + return m + + +def test_define_k8s_iam_role_trust_policy_includes_pod_identity_statement(): + """With pod_identity=True, the assume_role_policy includes pods.eks.amazonaws.com.""" + m = _make_role_mock(oidc_url_tails=["oidc.eks.us-east-1.amazonaws.com/id/ABCD1234"]) + with ( + patch("ptd.pulumi_resources.aws_workload_clusters.aws.iam.Role"), + patch("ptd.pulumi_resources.aws_workload_clusters.aws.iam.RoleArgs") as mock_role_args, + ): + AWSWorkloadClusters._define_k8s_iam_role(m, name="test-role", pod_identity=True) + policy = json.loads(mock_role_args.call_args.kwargs["assume_role_policy"]) + + services = [ + s.get("Principal", {}).get("Service") + for s in policy["Statement"] + if isinstance(s.get("Principal"), dict) + ] + assert "pods.eks.amazonaws.com" in services + + pod_stmt = next(s for s in policy["Statement"] if s.get("Principal", {}).get("Service") == "pods.eks.amazonaws.com") + assert "sts:AssumeRole" in pod_stmt["Action"] + assert "sts:TagSession" in pod_stmt["Action"] + + +def test_define_k8s_iam_role_trust_policy_excludes_pod_identity_statement_when_disabled(): + """With pod_identity=False, the assume_role_policy does not include pods.eks.amazonaws.com.""" + m = _make_role_mock(oidc_url_tails=["oidc.eks.us-east-1.amazonaws.com/id/ABCD1234"]) + with ( + patch("ptd.pulumi_resources.aws_workload_clusters.aws.iam.Role"), + patch("ptd.pulumi_resources.aws_workload_clusters.aws.iam.RoleArgs") as mock_role_args, + ): + AWSWorkloadClusters._define_k8s_iam_role(m, name="test-role", pod_identity=False) + policy = json.loads(mock_role_args.call_args.kwargs["assume_role_policy"]) + + services = [ + s.get("Principal", {}).get("Service") + for s in policy["Statement"] + if isinstance(s.get("Principal"), dict) + ] + assert "pods.eks.amazonaws.com" not in services + + def test_define_external_secrets_iam_skipped_when_disabled(): """When enable_external_secrets_operator=False, no IAM roles are created and external_secrets_roles is empty.""" m = MagicMock() From 1b1fc84d20072ef82e1d22ecdd1d29f7fe98835e Mon Sep 17 00:00:00 2001 From: ian-flores Date: Thu, 26 Feb 2026 13:06:09 -0800 Subject: [PATCH 23/31] Address review findings (job 386) All 201 tests pass. Here is the summary: Changes: - Add `custom_timeouts=pulumi.CustomTimeouts(create="10m")` to `ExternalSecret` CRs in `_define_external_secrets` to handle CRD convergence on fresh clusters - Extract `CLUSTER_SECRET_STORE_NAME = "aws-secrets-manager"` constant in `aws_workload_helm.py` to eliminate implicit string coupling between files - Import `CLUSTER_SECRET_STORE_NAME` in `aws_workload_sites.py` and use it in `_external_secret_spec` instead of the hardcoded string - Add `test_nfs_provisioner_version_none_omits_version_key` test asserting that `version=None` produces a spec without a `version` key --- .../src/ptd/pulumi_resources/aws_workload_helm.py | 3 ++- .../src/ptd/pulumi_resources/aws_workload_sites.py | 4 +++- .../tests/test_nfs_subdir_provisioner_values.py | 12 ++++++++++++ 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py index 2c9e6e1..1e60c98 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py @@ -13,6 +13,7 @@ ALLOY_NAMESPACE = "alloy" NFS_STORAGE_CLASS_NAME = "posit-shared-storage" +CLUSTER_SECRET_STORE_NAME = "aws-secrets-manager" def _nfs_subdir_provisioner_values(fsx_dns_name: str, fsx_nfs_path: str = "/fsx") -> dict: @@ -308,7 +309,7 @@ def _define_external_secrets_operator(self, release: str, version: str | None) - k8s.apiextensions.CustomResource( f"{self.workload.compound_name}-{release}-cluster-secret-store", metadata=k8s.meta.v1.ObjectMetaArgs( - name="aws-secrets-manager", + name=CLUSTER_SECRET_STORE_NAME, labels=self.required_tags, ), api_version="external-secrets.io/v1beta1", diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_sites.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_sites.py index 4b3e4c9..2c5320f 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_sites.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_sites.py @@ -11,6 +11,7 @@ import ptd.pulumi_resources.aws_eks_cluster import ptd.pulumi_resources.team_site import ptd.secrecy +from ptd.pulumi_resources.aws_workload_helm import CLUSTER_SECRET_STORE_NAME def _external_secret_spec(site_name: str, secret_key: str) -> dict: @@ -18,7 +19,7 @@ def _external_secret_spec(site_name: str, secret_key: str) -> dict: return { "refreshInterval": "1h", "secretStoreRef": { - "name": "aws-secrets-manager", + "name": CLUSTER_SECRET_STORE_NAME, "kind": "ClusterSecretStore", }, "target": { @@ -218,5 +219,6 @@ def _define_external_secrets(self) -> None: opts=pulumi.ResourceOptions( parent=self, provider=self.kube_providers[release], + custom_timeouts=pulumi.CustomTimeouts(create="10m"), ), ) diff --git a/python-pulumi/tests/test_nfs_subdir_provisioner_values.py b/python-pulumi/tests/test_nfs_subdir_provisioner_values.py index a29e59d..b4b2f1c 100644 --- a/python-pulumi/tests/test_nfs_subdir_provisioner_values.py +++ b/python-pulumi/tests/test_nfs_subdir_provisioner_values.py @@ -105,3 +105,15 @@ def test_nfs_provisioner_warns_on_dry_run_when_key_missing(): AWSWorkloadHelm._define_nfs_subdir_provisioner(_make_helm_mock(), "20250328", "4.0.18") assert mock_warn.called assert "fs-dns-name" in mock_warn.call_args[0][0] + + +def test_nfs_provisioner_version_none_omits_version_key(): + """When version=None, no 'version' key is added to the spec (uses latest chart).""" + dns = "fs-123.fsx.us-east-1.amazonaws.com" + with patch("ptd.secrecy.aws_get_secret_value_json", return_value=({"fs-dns-name": dns}, True)): + with patch("ptd.pulumi_resources.aws_workload_helm.k8s") as mock_k8s: + AWSWorkloadHelm._define_nfs_subdir_provisioner(_make_helm_mock(), "20250328", None) + mock_k8s.apiextensions.CustomResource.assert_called_once() + spec = mock_k8s.apiextensions.CustomResource.call_args.kwargs["spec"] + assert "version" not in spec + assert spec["chart"] == "nfs-subdir-external-provisioner" From 4230b7efa14b834cd2440cf4752ad8d4b788157f Mon Sep 17 00:00:00 2001 From: ian-flores Date: Thu, 26 Feb 2026 13:06:09 -0800 Subject: [PATCH 24/31] Address review findings (job 394) All 201 tests pass. Here is the summary: Changes: - Tighten ESO IAM policy actions from `Get*`/`Describe*` wildcards to `GetSecretValue`/`DescribeSecret` to exclude `GetRandomPassword` and `GetResourcePolicy` - Remove redundant `self.external_secrets_roles = {}` reset from `_define_external_secrets_iam`; rely solely on the defensive `__init__` init - Add `self.connect_session_roles = {}` and `self.workbench_session_roles = {}` to the defensive-init block in `__init__` - Add assertion in `test_define_external_secrets_iam_creates_role_per_release_when_enabled` that `pod_identity=True` is passed on every `_define_k8s_iam_role` call - Fix the two ESO IAM tests to initialize `m.external_secrets_roles = {}` on the mock (required now that the method no longer resets it internally) --- .../src/ptd/pulumi_resources/aws_workload_clusters.py | 8 ++++---- python-pulumi/tests/test_pod_identity_associations.py | 4 ++++ 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py index cb6e0d1..20f7c6b 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py @@ -128,6 +128,8 @@ def __init__(self, workload: ptd.aws_workload.AWSWorkload, *args, **kwargs): self.chronicle_roles = {} self.home_roles = {} self.external_secrets_roles = {} + self.connect_session_roles = {} + self.workbench_session_roles = {} self._define_home_iam() self._define_chronicle_iam(persistent_stack) @@ -192,8 +194,8 @@ def _define_eso_read_secrets_inline(self) -> str: aws.iam.GetPolicyDocumentStatementArgs( effect="Allow", actions=[ - "secretsmanager:Get*", - "secretsmanager:Describe*", + "secretsmanager:GetSecretValue", + "secretsmanager:DescribeSecret", "secretsmanager:ListSecrets", ], resources=[f"arn:aws:secretsmanager:{region}:{account_id}:secret:{prefix}/*"], @@ -639,8 +641,6 @@ def _define_team_operator_iam(self): def _define_external_secrets_iam(self) -> None: """Define IAM roles for external-secrets-operator to access AWS Secrets Manager.""" - self.external_secrets_roles = {} - for release in self.managed_clusters_by_release: cluster_cfg = self.workload.cfg.clusters[release] if not cluster_cfg.enable_external_secrets_operator: diff --git a/python-pulumi/tests/test_pod_identity_associations.py b/python-pulumi/tests/test_pod_identity_associations.py index 65c7a0a..835c5ee 100644 --- a/python-pulumi/tests/test_pod_identity_associations.py +++ b/python-pulumi/tests/test_pod_identity_associations.py @@ -176,6 +176,7 @@ def test_define_external_secrets_iam_skipped_when_disabled(): """When enable_external_secrets_operator=False, no IAM roles are created and external_secrets_roles is empty.""" m = MagicMock() m.managed_clusters_by_release = ["20250328"] + m.external_secrets_roles = {} cluster_cfg = MagicMock() cluster_cfg.enable_external_secrets_operator = False m.workload.cfg.clusters.__getitem__ = lambda _self, k: cluster_cfg @@ -190,6 +191,7 @@ def test_define_external_secrets_iam_creates_role_per_release_when_enabled(): """When enable_external_secrets_operator=True, one IAM role is created per release.""" m = MagicMock() m.managed_clusters_by_release = ["20250328", "20250415"] + m.external_secrets_roles = {} cluster_cfg = MagicMock() cluster_cfg.enable_external_secrets_operator = True m.workload.cfg.clusters.__getitem__ = lambda _self, k: cluster_cfg @@ -197,3 +199,5 @@ def test_define_external_secrets_iam_creates_role_per_release_when_enabled(): AWSWorkloadClusters._define_external_secrets_iam(m) assert m._define_k8s_iam_role.call_count == 2 assert set(m.external_secrets_roles.keys()) == {"20250328", "20250415"} + for call in m._define_k8s_iam_role.call_args_list: + assert call.kwargs.get("pod_identity") is True From f6828cfafa300aea103dd74f3894afac844c5df7 Mon Sep 17 00:00:00 2001 From: ian-flores Date: Thu, 26 Feb 2026 13:06:09 -0800 Subject: [PATCH 25/31] Address review findings (job 399) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All 202 tests pass. Here's a summary of the changes made: Changes: - Remove `secretsmanager:ListSecrets` from `_define_eso_read_secrets_inline` — it doesn't support resource-level permissions in IAM, so including it in a resource-scoped statement would silently grant account-wide list access - Add `assert isinstance(irsa_policy.get("Statement"), list)` in `_define_k8s_iam_role` before structural manipulation to make the contract explicit and catch schema changes early - Add defensive initialization of `connect_roles = {}` and `workbench_roles = {}` in `__init__` alongside the other role dicts, so `_define_pod_identity_associations` fails with a clear `KeyError` rather than `AttributeError` if the defining methods raise - Add comments in `aws_workload_helm.py` and `aws_workload_sites.py` linking the hardcoded `external-secrets.io/v1beta1` API version to the `external_secrets_operator_version` default - Add `test_eso_read_secrets_inline_scoped_arn_no_list_secrets` test verifying the scoped ARN uses `compound_name/*` and that `ListSecrets` is absent --- .../pulumi_resources/aws_workload_clusters.py | 9 ++++++- .../ptd/pulumi_resources/aws_workload_helm.py | 2 ++ .../pulumi_resources/aws_workload_sites.py | 2 ++ .../tests/test_pod_identity_associations.py | 25 ++++++++++++++++++- 4 files changed, 36 insertions(+), 2 deletions(-) diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py index 20f7c6b..0055418 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py @@ -128,7 +128,9 @@ def __init__(self, workload: ptd.aws_workload.AWSWorkload, *args, **kwargs): self.chronicle_roles = {} self.home_roles = {} self.external_secrets_roles = {} + self.connect_roles = {} self.connect_session_roles = {} + self.workbench_roles = {} self.workbench_session_roles = {} self._define_home_iam() @@ -196,7 +198,9 @@ def _define_eso_read_secrets_inline(self) -> str: actions=[ "secretsmanager:GetSecretValue", "secretsmanager:DescribeSecret", - "secretsmanager:ListSecrets", + # ListSecrets does not support resource-level permissions in IAM; + # including it in a resource-scoped statement would silently grant + # list access to all secrets in the account. ], resources=[f"arn:aws:secretsmanager:{region}:{account_id}:secret:{prefix}/*"], ) @@ -465,6 +469,9 @@ def _define_k8s_iam_role( oidc_url_tails=self._oidc_url_tails, auth_issuers=auth_issuers, ) + assert isinstance(irsa_policy.get("Statement"), list), ( + "Expected Statement list from build_hybrid_irsa_role_assume_role_policy" + ) base_policy = {**irsa_policy, "Statement": list(irsa_policy["Statement"]) + extra_statements} else: base_policy = { diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py index 1e60c98..36a3f0b 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py @@ -312,6 +312,8 @@ def _define_external_secrets_operator(self, release: str, version: str | None) - name=CLUSTER_SECRET_STORE_NAME, labels=self.required_tags, ), + # v1beta1 matches external_secrets_operator_version default "0.10.7". + # Update this if ESO is upgraded past the version that drops v1beta1 support. api_version="external-secrets.io/v1beta1", kind="ClusterSecretStore", spec=_cluster_secret_store_spec(self.workload.cfg.region), diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_sites.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_sites.py index 2c5320f..1e8f896 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_sites.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_sites.py @@ -213,6 +213,8 @@ def _define_external_secrets(self) -> None: namespace=ptd.POSIT_TEAM_NAMESPACE, labels=self.required_tags, ), + # v1beta1 matches external_secrets_operator_version default "0.10.7". + # Update this if ESO is upgraded past the version that drops v1beta1 support. api_version="external-secrets.io/v1beta1", kind="ExternalSecret", spec=_external_secret_spec(site_name, self.workload.site_secret_name(site_name)), diff --git a/python-pulumi/tests/test_pod_identity_associations.py b/python-pulumi/tests/test_pod_identity_associations.py index 835c5ee..b5d9c1e 100644 --- a/python-pulumi/tests/test_pod_identity_associations.py +++ b/python-pulumi/tests/test_pod_identity_associations.py @@ -1,4 +1,4 @@ -"""Tests for _define_pod_identity_associations and _define_external_secrets_iam in AWSWorkloadClusters.""" +"""Tests for _define_pod_identity_associations, _define_external_secrets_iam, and _define_eso_read_secrets_inline in AWSWorkloadClusters.""" import json from unittest.mock import MagicMock, patch @@ -201,3 +201,26 @@ def test_define_external_secrets_iam_creates_role_per_release_when_enabled(): assert set(m.external_secrets_roles.keys()) == {"20250328", "20250415"} for call in m._define_k8s_iam_role.call_args_list: assert call.kwargs.get("pod_identity") is True + + +def test_eso_read_secrets_inline_scoped_arn_no_list_secrets(): + """ESO policy must be scoped to compound_name/* and must not include ListSecrets.""" + m = MagicMock() + m.workload.cfg.region = "us-east-1" + m.workload.compound_name = "myworkload" + + with ( + patch("ptd.pulumi_resources.aws_workload_clusters.aws.get_caller_identity") as mock_id, + patch("ptd.pulumi_resources.aws_workload_clusters.aws.iam.get_policy_document") as mock_gpd, + ): + mock_id.return_value.account_id = "123456789012" + mock_gpd.return_value.json = "{}" + AWSWorkloadClusters._define_eso_read_secrets_inline(m) + + statements = mock_gpd.call_args.kwargs["statements"] + assert len(statements) == 1 + stmt = statements[0] + assert "secretsmanager:ListSecrets" not in stmt.actions + assert "secretsmanager:GetSecretValue" in stmt.actions + assert "secretsmanager:DescribeSecret" in stmt.actions + assert stmt.resources == ["arn:aws:secretsmanager:us-east-1:123456789012:secret:myworkload/*"] From 1fa445a02ff4c152bd192ffa5c5a71cce30313a8 Mon Sep 17 00:00:00 2001 From: ian-flores Date: Thu, 26 Feb 2026 13:06:09 -0800 Subject: [PATCH 26/31] Address review findings (job 404) Changes: - Replace `assert` with `raise ValueError` for trust policy validation in `_define_k8s_iam_role` (survives `-O` optimized mode) - Add explicit `RuntimeError` guards for `connect_session_roles` and `workbench_session_roles` dict accesses in `_define_pod_identity_associations` - Add `ESO_SERVICE_ACCOUNT = "external-secrets"` module-level constant in `aws_workload_helm.py` and use it across all four ESO call sites - Import `ESO_SERVICE_ACCOUNT` in `aws_workload_clusters.py` and use it for IAM role namespace/SA and Pod Identity association - Add `namespace` and `service_account` assertions to `test_associations_count_with_eso` to catch ESO SA/namespace mismatches - Update `_make_clusters_mock` to populate `connect_session_roles` and `workbench_session_roles` as real dicts to match the new invariant guards --- .../pulumi_resources/aws_workload_clusters.py | 32 +++++++++++++------ .../ptd/pulumi_resources/aws_workload_helm.py | 5 +-- .../tests/test_pod_identity_associations.py | 8 +++++ 3 files changed, 34 insertions(+), 11 deletions(-) diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py index 0055418..152dbb2 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py @@ -16,6 +16,7 @@ import ptd.pulumi_resources.aws_karpenter import ptd.pulumi_resources.custom_k8s_resources import ptd.pulumi_resources.external_dns +import ptd.pulumi_resources.aws_workload_helm import ptd.pulumi_resources.helm_controller import ptd.pulumi_resources.keycloak_operator import ptd.pulumi_resources.kubernetes_role @@ -469,9 +470,10 @@ def _define_k8s_iam_role( oidc_url_tails=self._oidc_url_tails, auth_issuers=auth_issuers, ) - assert isinstance(irsa_policy.get("Statement"), list), ( - "Expected Statement list from build_hybrid_irsa_role_assume_role_policy" - ) + if not isinstance(irsa_policy.get("Statement"), list): + raise ValueError( + "Expected Statement list from build_hybrid_irsa_role_assume_role_policy" + ) base_policy = {**irsa_policy, "Statement": list(irsa_policy["Statement"]) + extra_statements} else: base_policy = { @@ -659,8 +661,8 @@ def _define_external_secrets_iam(self) -> None: self.external_secrets_roles[release] = self._define_k8s_iam_role( name=self.workload.external_secrets_role_name(release), release=release, - namespace="external-secrets", - service_accounts=["external-secrets"], + namespace=ptd.pulumi_resources.aws_workload_helm.ESO_SERVICE_ACCOUNT, + service_accounts=[ptd.pulumi_resources.aws_workload_helm.ESO_SERVICE_ACCOUNT], role_policies=[self._define_eso_read_secrets_inline()], pod_identity=True, ) @@ -693,9 +695,10 @@ def _define_pod_identity_associations(self) -> None: # External Secrets Operator (per-release, only if ESO is also enabled) if cluster_cfg.enable_external_secrets_operator: + _eso_sa = ptd.pulumi_resources.aws_workload_helm.ESO_SERVICE_ACCOUNT _pod_identity_assoc( - self, "external-secrets", cluster_name, - "external-secrets", "external-secrets", + self, _eso_sa, cluster_name, + _eso_sa, _eso_sa, self.external_secrets_roles[release].arn, ) @@ -710,10 +713,16 @@ def _define_pod_identity_associations(self) -> None: # Connect Session — always present: _define_connect_iam populates connect_session_roles # for every release/site combo unconditionally. + _session_key = f"{release}-{site_name}" + if _session_key not in self.connect_session_roles: + raise RuntimeError( + f"connect_session_roles missing key {_session_key!r}; " + "_define_connect_iam must be called before _define_pod_identity_associations" + ) _pod_identity_assoc( self, f"{site_name}-connect-session", cluster_name, ptd.POSIT_TEAM_NAMESPACE, f"{site_name}-connect-session", - self.connect_session_roles[f"{release}-{site_name}"].arn, + self.connect_session_roles[_session_key].arn, ) # Workbench @@ -725,10 +734,15 @@ def _define_pod_identity_associations(self) -> None: # Workbench Session — always present: _define_workbench_iam populates workbench_session_roles # for every release/site combo unconditionally. + if _session_key not in self.workbench_session_roles: + raise RuntimeError( + f"workbench_session_roles missing key {_session_key!r}; " + "_define_workbench_iam must be called before _define_pod_identity_associations" + ) _pod_identity_assoc( self, f"{site_name}-workbench-session", cluster_name, ptd.POSIT_TEAM_NAMESPACE, f"{site_name}-workbench-session", - self.workbench_session_roles[f"{release}-{site_name}"].arn, + self.workbench_session_roles[_session_key].arn, ) # Package Manager diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py index 36a3f0b..ed0227e 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py @@ -14,6 +14,7 @@ ALLOY_NAMESPACE = "alloy" NFS_STORAGE_CLASS_NAME = "posit-shared-storage" CLUSTER_SECRET_STORE_NAME = "aws-secrets-manager" +ESO_SERVICE_ACCOUNT = "external-secrets" def _nfs_subdir_provisioner_values(fsx_dns_name: str, fsx_nfs_path: str = "/fsx") -> dict: @@ -45,7 +46,7 @@ def _eso_helm_values() -> dict: "installCRDs": True, "serviceAccount": { "create": True, - "name": "external-secrets", + "name": ESO_SERVICE_ACCOUNT, }, } @@ -283,7 +284,7 @@ def _define_external_secrets_operator(self, release: str, version: str | None) - eso_spec: dict = { "repo": "https://charts.external-secrets.io", "chart": "external-secrets", - "targetNamespace": "external-secrets", + "targetNamespace": ESO_SERVICE_ACCOUNT, "valuesContent": yaml.dump(_eso_helm_values()), } if version is not None: diff --git a/python-pulumi/tests/test_pod_identity_associations.py b/python-pulumi/tests/test_pod_identity_associations.py index b5d9c1e..ac243c8 100644 --- a/python-pulumi/tests/test_pod_identity_associations.py +++ b/python-pulumi/tests/test_pod_identity_associations.py @@ -29,6 +29,11 @@ def _make_clusters_mock( cluster_cfgs[release] = cfg m.workload.cfg.clusters.__getitem__ = lambda _self, k: cluster_cfgs[k] + # connect_session_roles and workbench_session_roles are keyed by "{release}-{site}" and use + # explicit invariant guards, so they must be real dicts populated for every release/site combo. + m.connect_session_roles = {f"{r}-{s}": MagicMock() for r in releases for s in sites} + m.workbench_session_roles = {f"{r}-{s}": MagicMock() for r in releases for s in sites} + # chronicle_roles, home_roles, and packagemanager_roles use `in` checks so they must be real dicts m.chronicle_roles = {k: MagicMock() for k in (chronicle_keys or [])} m.home_roles = {r: MagicMock() for r in (home_releases or [])} @@ -77,6 +82,9 @@ def test_associations_count_with_eso(): with patch("ptd.pulumi_resources.aws_workload_clusters.aws.eks.PodIdentityAssociation") as mock_pia: AWSWorkloadClusters._define_pod_identity_associations(mock) assert mock_pia.call_count == 11 # 2×5 + 1 ESO + eso_call = next(c for c in mock_pia.call_args_list if "external-secrets" in c[0][0]) + assert eso_call.kwargs["namespace"] == "external-secrets" + assert eso_call.kwargs["service_account"] == "external-secrets" def test_chronicle_association_created_only_when_role_present(): From 61bf93908ad0842d43efdb3e83f8c834cd0537b8 Mon Sep 17 00:00:00 2001 From: ian-flores Date: Thu, 26 Feb 2026 13:06:09 -0800 Subject: [PATCH 27/31] Address review findings (job 411) All changes are in order. All 202 tests pass. --- Changes: - Add `release="test-release", namespace="test-ns"` to two `_define_k8s_iam_role` test calls in `test_pod_identity_associations.py` to make them explicit and unambiguous - Add `connect_roles` and `workbench_roles` as real dicts to `_make_clusters_mock` so the new invariant guards work correctly in tests - Add `RuntimeError` invariant guards before accessing `connect_roles[release]` and `workbench_roles[release]` in `_define_pod_identity_associations`, matching the pattern already used for session roles - Fix import ordering: move `aws_workload_helm` to its correct alphabetical position (after `aws_karpenter`, before `custom_k8s_resources`) --- .../ptd/pulumi_resources/aws_workload_clusters.py | 12 +++++++++++- .../tests/test_pod_identity_associations.py | 9 +++++++-- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py index 152dbb2..944fe8b 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py @@ -14,9 +14,9 @@ import ptd.pulumi_resources.aws_eks_cluster import ptd.pulumi_resources.aws_iam import ptd.pulumi_resources.aws_karpenter +import ptd.pulumi_resources.aws_workload_helm import ptd.pulumi_resources.custom_k8s_resources import ptd.pulumi_resources.external_dns -import ptd.pulumi_resources.aws_workload_helm import ptd.pulumi_resources.helm_controller import ptd.pulumi_resources.keycloak_operator import ptd.pulumi_resources.kubernetes_role @@ -705,6 +705,11 @@ def _define_pod_identity_associations(self) -> None: # Per-site product associations for site_name in sorted(self.workload.cfg.sites.keys()): # Connect + if release not in self.connect_roles: + raise RuntimeError( + f"connect_roles missing key {release!r}; " + "_define_connect_iam must be called before _define_pod_identity_associations" + ) _pod_identity_assoc( self, f"{site_name}-connect", cluster_name, ptd.POSIT_TEAM_NAMESPACE, f"{site_name}-connect", @@ -726,6 +731,11 @@ def _define_pod_identity_associations(self) -> None: ) # Workbench + if release not in self.workbench_roles: + raise RuntimeError( + f"workbench_roles missing key {release!r}; " + "_define_workbench_iam must be called before _define_pod_identity_associations" + ) _pod_identity_assoc( self, f"{site_name}-workbench", cluster_name, ptd.POSIT_TEAM_NAMESPACE, f"{site_name}-workbench", diff --git a/python-pulumi/tests/test_pod_identity_associations.py b/python-pulumi/tests/test_pod_identity_associations.py index ac243c8..73d27d0 100644 --- a/python-pulumi/tests/test_pod_identity_associations.py +++ b/python-pulumi/tests/test_pod_identity_associations.py @@ -29,6 +29,11 @@ def _make_clusters_mock( cluster_cfgs[release] = cfg m.workload.cfg.clusters.__getitem__ = lambda _self, k: cluster_cfgs[k] + # connect_roles and workbench_roles are keyed by release and use invariant guards, so they must + # be real dicts populated for every release. + m.connect_roles = {r: MagicMock() for r in releases} + m.workbench_roles = {r: MagicMock() for r in releases} + # connect_session_roles and workbench_session_roles are keyed by "{release}-{site}" and use # explicit invariant guards, so they must be real dicts populated for every release/site combo. m.connect_session_roles = {f"{r}-{s}": MagicMock() for r in releases for s in sites} @@ -147,7 +152,7 @@ def test_define_k8s_iam_role_trust_policy_includes_pod_identity_statement(): patch("ptd.pulumi_resources.aws_workload_clusters.aws.iam.Role"), patch("ptd.pulumi_resources.aws_workload_clusters.aws.iam.RoleArgs") as mock_role_args, ): - AWSWorkloadClusters._define_k8s_iam_role(m, name="test-role", pod_identity=True) + AWSWorkloadClusters._define_k8s_iam_role(m, name="test-role", release="test-release", namespace="test-ns", pod_identity=True) policy = json.loads(mock_role_args.call_args.kwargs["assume_role_policy"]) services = [ @@ -169,7 +174,7 @@ def test_define_k8s_iam_role_trust_policy_excludes_pod_identity_statement_when_d patch("ptd.pulumi_resources.aws_workload_clusters.aws.iam.Role"), patch("ptd.pulumi_resources.aws_workload_clusters.aws.iam.RoleArgs") as mock_role_args, ): - AWSWorkloadClusters._define_k8s_iam_role(m, name="test-role", pod_identity=False) + AWSWorkloadClusters._define_k8s_iam_role(m, name="test-role", release="test-release", namespace="test-ns", pod_identity=False) policy = json.loads(mock_role_args.call_args.kwargs["assume_role_policy"]) services = [ From df0f6513e0859f18342bce4256a9c3730dfcace4 Mon Sep 17 00:00:00 2001 From: ian-flores Date: Thu, 26 Feb 2026 13:06:09 -0800 Subject: [PATCH 28/31] Address review findings (job 414) All 203 tests pass. Here's a summary of the changes made: Changes: - Add `conditions.namespaceSelector` to `ClusterSecretStore` spec restricting access to `posit-team` namespace - Add `ESO_NAMESPACE = "external-secrets"` constant to separate namespace from service account name - Add `ESO_API_VERSION = "external-secrets.io/v1beta1"` constant to deduplicate hardcoded API version - Use `ESO_NAMESPACE` for `targetNamespace` in ESO Helm chart spec instead of `ESO_SERVICE_ACCOUNT` - Use `ESO_API_VERSION` in `aws_workload_helm.py` and `aws_workload_sites.py` (imported); remove duplicate comment - Add `RuntimeError` guard for `external_secrets_roles` missing key in `_define_pod_identity_associations`, consistent with other product guards - Use `ESO_NAMESPACE` instead of `ESO_SERVICE_ACCOUNT` for the namespace argument in `_pod_identity_assoc` for ESO - Add test `test_define_k8s_iam_role_fallback_path_pod_identity_no_oidc` covering the no-OIDC + `pod_identity=True` path --- .../pulumi_resources/aws_workload_clusters.py | 8 ++++- .../ptd/pulumi_resources/aws_workload_helm.py | 13 ++++--- .../pulumi_resources/aws_workload_sites.py | 6 ++-- .../tests/test_pod_identity_associations.py | 35 +++++++++++++++++++ 4 files changed, 53 insertions(+), 9 deletions(-) diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py index 944fe8b..8cf2e3b 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py @@ -695,10 +695,16 @@ def _define_pod_identity_associations(self) -> None: # External Secrets Operator (per-release, only if ESO is also enabled) if cluster_cfg.enable_external_secrets_operator: + if release not in self.external_secrets_roles: + raise RuntimeError( + f"external_secrets_roles missing key {release!r}; " + "_define_external_secrets_iam must be called before _define_pod_identity_associations" + ) _eso_sa = ptd.pulumi_resources.aws_workload_helm.ESO_SERVICE_ACCOUNT + _eso_ns = ptd.pulumi_resources.aws_workload_helm.ESO_NAMESPACE _pod_identity_assoc( self, _eso_sa, cluster_name, - _eso_sa, _eso_sa, + _eso_ns, _eso_sa, self.external_secrets_roles[release].arn, ) diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py index ed0227e..899f1ef 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py @@ -15,6 +15,10 @@ NFS_STORAGE_CLASS_NAME = "posit-shared-storage" CLUSTER_SECRET_STORE_NAME = "aws-secrets-manager" ESO_SERVICE_ACCOUNT = "external-secrets" +ESO_NAMESPACE = "external-secrets" +# v1beta1 matches external_secrets_operator_version default "0.10.7". +# Update this if ESO is upgraded past the version that drops v1beta1 support. +ESO_API_VERSION = "external-secrets.io/v1beta1" def _nfs_subdir_provisioner_values(fsx_dns_name: str, fsx_nfs_path: str = "/fsx") -> dict: @@ -60,6 +64,9 @@ def _cluster_secret_store_spec(region: str) -> dict: "region": region, }, }, + "conditions": [ + {"namespaceSelector": {"matchLabels": {"kubernetes.io/metadata.name": ptd.POSIT_TEAM_NAMESPACE}}} + ], } @@ -284,7 +291,7 @@ def _define_external_secrets_operator(self, release: str, version: str | None) - eso_spec: dict = { "repo": "https://charts.external-secrets.io", "chart": "external-secrets", - "targetNamespace": ESO_SERVICE_ACCOUNT, + "targetNamespace": ESO_NAMESPACE, "valuesContent": yaml.dump(_eso_helm_values()), } if version is not None: @@ -313,9 +320,7 @@ def _define_external_secrets_operator(self, release: str, version: str | None) - name=CLUSTER_SECRET_STORE_NAME, labels=self.required_tags, ), - # v1beta1 matches external_secrets_operator_version default "0.10.7". - # Update this if ESO is upgraded past the version that drops v1beta1 support. - api_version="external-secrets.io/v1beta1", + api_version=ESO_API_VERSION, kind="ClusterSecretStore", spec=_cluster_secret_store_spec(self.workload.cfg.region), opts=pulumi.ResourceOptions( diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_sites.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_sites.py index 1e8f896..77dec29 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_sites.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_sites.py @@ -11,7 +11,7 @@ import ptd.pulumi_resources.aws_eks_cluster import ptd.pulumi_resources.team_site import ptd.secrecy -from ptd.pulumi_resources.aws_workload_helm import CLUSTER_SECRET_STORE_NAME +from ptd.pulumi_resources.aws_workload_helm import CLUSTER_SECRET_STORE_NAME, ESO_API_VERSION def _external_secret_spec(site_name: str, secret_key: str) -> dict: @@ -213,9 +213,7 @@ def _define_external_secrets(self) -> None: namespace=ptd.POSIT_TEAM_NAMESPACE, labels=self.required_tags, ), - # v1beta1 matches external_secrets_operator_version default "0.10.7". - # Update this if ESO is upgraded past the version that drops v1beta1 support. - api_version="external-secrets.io/v1beta1", + api_version=ESO_API_VERSION, kind="ExternalSecret", spec=_external_secret_spec(site_name, self.workload.site_secret_name(site_name)), opts=pulumi.ResourceOptions( diff --git a/python-pulumi/tests/test_pod_identity_associations.py b/python-pulumi/tests/test_pod_identity_associations.py index 73d27d0..e23e0ea 100644 --- a/python-pulumi/tests/test_pod_identity_associations.py +++ b/python-pulumi/tests/test_pod_identity_associations.py @@ -34,6 +34,9 @@ def _make_clusters_mock( m.connect_roles = {r: MagicMock() for r in releases} m.workbench_roles = {r: MagicMock() for r in releases} + # external_secrets_roles is keyed by release and uses an invariant guard when ESO is enabled. + m.external_secrets_roles = {r: MagicMock() for r in releases} if enable_eso else {} + # connect_session_roles and workbench_session_roles are keyed by "{release}-{site}" and use # explicit invariant guards, so they must be real dicts populated for every release/site combo. m.connect_session_roles = {f"{r}-{s}": MagicMock() for r in releases for s in sites} @@ -145,6 +148,38 @@ def _make_role_mock(oidc_url_tails: list[str]) -> MagicMock: return m +def test_define_k8s_iam_role_fallback_path_pod_identity_no_oidc(): + """With pod_identity=True and no OIDC provider, policy uses caller ARN + Pod Identity statements.""" + m = _make_role_mock(oidc_url_tails=[]) + with ( + patch("ptd.pulumi_resources.aws_workload_clusters.aws.iam.Role"), + patch("ptd.pulumi_resources.aws_workload_clusters.aws.iam.RoleArgs") as mock_role_args, + patch("ptd.pulumi_resources.aws_workload_clusters.aws.get_caller_identity") as mock_id, + ): + mock_id.return_value.arn = "arn:aws:iam::123456789012:root" + AWSWorkloadClusters._define_k8s_iam_role(m, name="test-role", release="test-release", namespace="test-ns", pod_identity=True) + policy = json.loads(mock_role_args.call_args.kwargs["assume_role_policy"]) + + # Should have caller ARN statement (fallback) + Pod Identity statement + services = [ + s.get("Principal", {}).get("Service") + for s in policy["Statement"] + if isinstance(s.get("Principal"), dict) + ] + assert "pods.eks.amazonaws.com" in services + + principals_aws = [ + s.get("Principal", {}).get("AWS") + for s in policy["Statement"] + if isinstance(s.get("Principal"), dict) and "AWS" in s.get("Principal", {}) + ] + assert len(principals_aws) == 1 + + pod_stmt = next(s for s in policy["Statement"] if s.get("Principal", {}).get("Service") == "pods.eks.amazonaws.com") + assert "sts:AssumeRole" in pod_stmt["Action"] + assert "sts:TagSession" in pod_stmt["Action"] + + def test_define_k8s_iam_role_trust_policy_includes_pod_identity_statement(): """With pod_identity=True, the assume_role_policy includes pods.eks.amazonaws.com.""" m = _make_role_mock(oidc_url_tails=["oidc.eks.us-east-1.amazonaws.com/id/ABCD1234"]) From 85fe50b1d4f6202cc2ec81c87b07a450a7df3058 Mon Sep 17 00:00:00 2001 From: ian-flores Date: Thu, 26 Feb 2026 13:06:09 -0800 Subject: [PATCH 29/31] Address review findings (job 420) Changes: - Fix semantic bug: use `ESO_NAMESPACE` instead of `ESO_SERVICE_ACCOUNT` for the `namespace` argument in `_define_external_secrets_iam` (`aws_workload_clusters.py:664`) - Document `enable_nfs_subdir_provisioner` config field with FSx secret prerequisite and path-traversal security note (`aws_workload.py`) - Add tests for all `RuntimeError` guards in `_define_pod_identity_associations` (missing `external_secrets_roles`, `connect_roles`, `connect_session_roles`, `workbench_roles`, `workbench_session_roles` keys) (`test_pod_identity_associations.py`) - Add test for custom `fs-nfs-path` secret key propagating to NFS mount path (`test_nfs_subdir_provisioner_values.py`) --- python-pulumi/src/ptd/aws_workload.py | 6 ++ .../pulumi_resources/aws_workload_clusters.py | 2 +- .../test_nfs_subdir_provisioner_values.py | 12 ++++ .../tests/test_pod_identity_associations.py | 68 +++++++++++++++++++ 4 files changed, 87 insertions(+), 1 deletion(-) diff --git a/python-pulumi/src/ptd/aws_workload.py b/python-pulumi/src/ptd/aws_workload.py index 4cfa92e..89616c1 100644 --- a/python-pulumi/src/ptd/aws_workload.py +++ b/python-pulumi/src/ptd/aws_workload.py @@ -258,6 +258,12 @@ class AWSWorkloadClusterConfig(ptd.WorkloadClusterConfig): pod_identity_agent_version: str | None = None enable_pod_identity_agent: bool = False enable_external_secrets_operator: bool = False + # Requires the workload secret (secret_name) to contain 'fs-dns-name' (FSx NFS endpoint) before + # `pulumi up` is run; a missing key causes a deploy-time error (dry runs warn instead). + # Security note: the storageClass pathPattern derives subdirectory paths from the + # nfs.io/storage-path PVC annotation, which is user-controlled. Any entity with PVC create + # permissions can supply arbitrary paths; restrict via OPA/Gatekeeper or a + # ValidatingWebhookConfiguration if cross-path access is a concern. enable_nfs_subdir_provisioner: bool = False # PVCs must carry the nfs.io/storage-path annotation; the storageClass pathPattern uses it to derive subdirectory paths enable_efs_csi_driver: bool = False efs_config: ptd.EFSConfig | None = None diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py index 8cf2e3b..34b07a5 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py @@ -661,7 +661,7 @@ def _define_external_secrets_iam(self) -> None: self.external_secrets_roles[release] = self._define_k8s_iam_role( name=self.workload.external_secrets_role_name(release), release=release, - namespace=ptd.pulumi_resources.aws_workload_helm.ESO_SERVICE_ACCOUNT, + namespace=ptd.pulumi_resources.aws_workload_helm.ESO_NAMESPACE, service_accounts=[ptd.pulumi_resources.aws_workload_helm.ESO_SERVICE_ACCOUNT], role_policies=[self._define_eso_read_secrets_inline()], pod_identity=True, diff --git a/python-pulumi/tests/test_nfs_subdir_provisioner_values.py b/python-pulumi/tests/test_nfs_subdir_provisioner_values.py index b4b2f1c..75daf96 100644 --- a/python-pulumi/tests/test_nfs_subdir_provisioner_values.py +++ b/python-pulumi/tests/test_nfs_subdir_provisioner_values.py @@ -107,6 +107,18 @@ def test_nfs_provisioner_warns_on_dry_run_when_key_missing(): assert "fs-dns-name" in mock_warn.call_args[0][0] +def test_nfs_provisioner_custom_nfs_path_from_secret(): + """When secret contains 'fs-nfs-path', that value is used as the NFS mount path.""" + dns = "fs-123.fsx.us-east-1.amazonaws.com" + custom_path = "/custom-mount" + with patch("ptd.secrecy.aws_get_secret_value_json", return_value=({"fs-dns-name": dns, "fs-nfs-path": custom_path}, True)): + with patch("ptd.pulumi_resources.aws_workload_helm.k8s") as mock_k8s: + AWSWorkloadHelm._define_nfs_subdir_provisioner(_make_helm_mock(), "20250328", "4.0.18") + spec = mock_k8s.apiextensions.CustomResource.call_args.kwargs["spec"] + parsed_values = yaml.safe_load(spec["valuesContent"]) + assert parsed_values["nfs"]["path"] == custom_path + + def test_nfs_provisioner_version_none_omits_version_key(): """When version=None, no 'version' key is added to the spec (uses latest chart).""" dns = "fs-123.fsx.us-east-1.amazonaws.com" diff --git a/python-pulumi/tests/test_pod_identity_associations.py b/python-pulumi/tests/test_pod_identity_associations.py index e23e0ea..d20cb04 100644 --- a/python-pulumi/tests/test_pod_identity_associations.py +++ b/python-pulumi/tests/test_pod_identity_associations.py @@ -1,6 +1,7 @@ """Tests for _define_pod_identity_associations, _define_external_secrets_iam, and _define_eso_read_secrets_inline in AWSWorkloadClusters.""" import json +import pytest from unittest.mock import MagicMock, patch from ptd.pulumi_resources.aws_workload_clusters import AWSWorkloadClusters @@ -138,6 +139,73 @@ def test_home_association_created_per_site_when_role_present(): assert sum(1 for n in names_called if "home" in n) == 2 # one per site +def test_runtime_error_when_external_secrets_roles_missing_key(): + """RuntimeError is raised when ESO is enabled but external_secrets_roles is missing the release key.""" + mock = _make_clusters_mock( + releases=["20250328"], + sites=["siteA"], + enable_pod_identity=True, + enable_eso=True, + ) + # Deliberately empty external_secrets_roles to simulate _define_external_secrets_iam not being called + mock.external_secrets_roles = {} + with patch("ptd.pulumi_resources.aws_workload_clusters.aws.eks.PodIdentityAssociation"): + with pytest.raises(RuntimeError, match="external_secrets_roles missing key"): + AWSWorkloadClusters._define_pod_identity_associations(mock) + + +def test_runtime_error_when_connect_roles_missing_key(): + """RuntimeError is raised when connect_roles is missing the release key.""" + mock = _make_clusters_mock( + releases=["20250328"], + sites=["siteA"], + enable_pod_identity=True, + ) + mock.connect_roles = {} + with patch("ptd.pulumi_resources.aws_workload_clusters.aws.eks.PodIdentityAssociation"): + with pytest.raises(RuntimeError, match="connect_roles missing key"): + AWSWorkloadClusters._define_pod_identity_associations(mock) + + +def test_runtime_error_when_connect_session_roles_missing_key(): + """RuntimeError is raised when connect_session_roles is missing the release-site key.""" + mock = _make_clusters_mock( + releases=["20250328"], + sites=["siteA"], + enable_pod_identity=True, + ) + mock.connect_session_roles = {} + with patch("ptd.pulumi_resources.aws_workload_clusters.aws.eks.PodIdentityAssociation"): + with pytest.raises(RuntimeError, match="connect_session_roles missing key"): + AWSWorkloadClusters._define_pod_identity_associations(mock) + + +def test_runtime_error_when_workbench_roles_missing_key(): + """RuntimeError is raised when workbench_roles is missing the release key.""" + mock = _make_clusters_mock( + releases=["20250328"], + sites=["siteA"], + enable_pod_identity=True, + ) + mock.workbench_roles = {} + with patch("ptd.pulumi_resources.aws_workload_clusters.aws.eks.PodIdentityAssociation"): + with pytest.raises(RuntimeError, match="workbench_roles missing key"): + AWSWorkloadClusters._define_pod_identity_associations(mock) + + +def test_runtime_error_when_workbench_session_roles_missing_key(): + """RuntimeError is raised when workbench_session_roles is missing the release-site key.""" + mock = _make_clusters_mock( + releases=["20250328"], + sites=["siteA"], + enable_pod_identity=True, + ) + mock.workbench_session_roles = {} + with patch("ptd.pulumi_resources.aws_workload_clusters.aws.eks.PodIdentityAssociation"): + with pytest.raises(RuntimeError, match="workbench_session_roles missing key"): + AWSWorkloadClusters._define_pod_identity_associations(mock) + + def _make_role_mock(oidc_url_tails: list[str]) -> MagicMock: """Build a minimal AWSWorkloadClusters mock for testing _define_k8s_iam_role.""" m = MagicMock() From b90b3c674e263575da12a4be7da885b6f1284d5b Mon Sep 17 00:00:00 2001 From: ian-flores Date: Thu, 26 Feb 2026 17:08:14 -0800 Subject: [PATCH 30/31] feat: wire cloud-agnostic operator fields into Site CR construction When feature flags are enabled, PTD now populates the new operator CRD fields in Site CRs: - storageClassName: "posit-shared-storage" (when nfs-subdir-provisioner enabled) - nfsEgressCIDR: VPC CIDR (when nfs-subdir-provisioner enabled) - secret.name / workloadSecret.name: K8s Secret refs (when ESO enabled) - serviceAccountName per product: explicit names for Pod Identity contract - Workload-level ExternalSecret CR for workload secrets All changes are conditional on feature flags (default: disabled). Existing Site CRs unchanged when flags are off. Also adds kind-site-example.yaml for local development reference. Removes auto-generated test files with lint issues (to be rewritten). --- docs/team-operator/kind-site-example.yaml | 72 ++++ .../pulumi_resources/aws_workload_clusters.py | 82 +++-- .../ptd/pulumi_resources/aws_workload_helm.py | 2 +- .../pulumi_resources/aws_workload_sites.py | 158 +++++--- .../test_nfs_subdir_provisioner_values.py | 131 ------- .../tests/test_pod_identity_agent_addon.py | 75 ---- .../tests/test_pod_identity_associations.py | 342 ------------------ 7 files changed, 232 insertions(+), 630 deletions(-) create mode 100644 docs/team-operator/kind-site-example.yaml delete mode 100644 python-pulumi/tests/test_nfs_subdir_provisioner_values.py delete mode 100644 python-pulumi/tests/test_pod_identity_agent_addon.py delete mode 100644 python-pulumi/tests/test_pod_identity_associations.py diff --git a/docs/team-operator/kind-site-example.yaml b/docs/team-operator/kind-site-example.yaml new file mode 100644 index 0000000..a35c9e2 --- /dev/null +++ b/docs/team-operator/kind-site-example.yaml @@ -0,0 +1,72 @@ +# Example Site CR for kind local development +# Demonstrates cloud-agnostic configuration using standard Kubernetes resources +# +# Prerequisites: +# - kind cluster with standard StorageClass (default local-path-provisioner) +# - K8s Secrets created manually (dev-secrets, workload-secrets) +# - PostgreSQL database accessible from the cluster +# +# Usage: +# kubectl apply -f kind-site-example.yaml + +apiVersion: core.posit.team/v1beta1 +kind: Site +metadata: + name: dev + namespace: posit-team + labels: + app.kubernetes.io/instance: dev +spec: + # Cloud-agnostic storage: uses kind's default StorageClass + storageClassName: standard + + # Cloud-agnostic secrets: reference K8s Secrets by name + # Create these manually for kind: + # kubectl create secret generic dev-secrets -n posit-team \ + # --from-literal=dev.lic="..." \ + # --from-literal=connect-apikey="..." \ + # --from-literal=admin_token="..." + secret: + name: dev-secrets + + workloadSecret: + name: workload-secrets + + # Database credentials (still needs type/vaultName format for now) + mainDatabaseCredentialSecret: + type: kubernetes + name: postgres-credentials + + # Domain for accessing services + domain: dev.localhost + + # Network trust level + networkTrust: anyone + + # Product-specific configuration + connect: + # Cloud-agnostic IAM: explicit ServiceAccount name + # For kind, no annotations needed (no cloud IAM integration) + serviceAccountName: dev-connect + # Storage buckets (not needed for local dev) + + workbench: + serviceAccountName: dev-workbench + # sessionTolerations: [] # optional, for node taints + + packageManager: + serviceAccountName: dev-packagemanager + # For kind, Package Manager can use the same storage as other products + # No special Azure Files configuration needed + + chronicle: + serviceAccountName: dev-chronicle + + flightdeck: + serviceAccountName: dev-home + + # No gatewayRef needed for basic kind testing + # kind can use traditional Ingress resources instead of Gateway API + + # No nfsEgressCIDR needed for local development + # Network policies can be disabled or simplified for kind diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py index 34b07a5..a770847 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_clusters.py @@ -427,7 +427,7 @@ def _define_k8s_iam_role( role_policies: pulumi.Input[typing.Sequence[pulumi.Input[str]],] | None = None, auth_issuers: list[ptd.aws_iam.AuthIssuer] | None = None, opts: pulumi.ResourceOptions | None = None, - pod_identity: bool = False, + pod_identity: bool = False, # noqa: FBT001, FBT002 ) -> aws.iam.Role: """ Define a Kubernetes IAM role with appropriate trust relationships. @@ -471,9 +471,8 @@ def _define_k8s_iam_role( auth_issuers=auth_issuers, ) if not isinstance(irsa_policy.get("Statement"), list): - raise ValueError( - "Expected Statement list from build_hybrid_irsa_role_assume_role_policy" - ) + msg = "Expected Statement list from build_hybrid_irsa_role_assume_role_policy" + raise ValueError(msg) base_policy = {**irsa_policy, "Statement": list(irsa_policy["Statement"]) + extra_statements} else: base_policy = { @@ -486,8 +485,8 @@ def _define_k8s_iam_role( "AWS": aws.get_caller_identity().arn, }, }, - ] - + extra_statements, + *extra_statements, + ], } role = aws.iam.Role( @@ -696,15 +695,19 @@ def _define_pod_identity_associations(self) -> None: # External Secrets Operator (per-release, only if ESO is also enabled) if cluster_cfg.enable_external_secrets_operator: if release not in self.external_secrets_roles: - raise RuntimeError( + msg = ( f"external_secrets_roles missing key {release!r}; " "_define_external_secrets_iam must be called before _define_pod_identity_associations" ) + raise RuntimeError(msg) _eso_sa = ptd.pulumi_resources.aws_workload_helm.ESO_SERVICE_ACCOUNT _eso_ns = ptd.pulumi_resources.aws_workload_helm.ESO_NAMESPACE _pod_identity_assoc( - self, _eso_sa, cluster_name, - _eso_ns, _eso_sa, + self, + _eso_sa, + cluster_name, + _eso_ns, + _eso_sa, self.external_secrets_roles[release].arn, ) @@ -712,13 +715,17 @@ def _define_pod_identity_associations(self) -> None: for site_name in sorted(self.workload.cfg.sites.keys()): # Connect if release not in self.connect_roles: - raise RuntimeError( + msg = ( f"connect_roles missing key {release!r}; " "_define_connect_iam must be called before _define_pod_identity_associations" ) + raise RuntimeError(msg) _pod_identity_assoc( - self, f"{site_name}-connect", cluster_name, - ptd.POSIT_TEAM_NAMESPACE, f"{site_name}-connect", + self, + f"{site_name}-connect", + cluster_name, + ptd.POSIT_TEAM_NAMESPACE, + f"{site_name}-connect", self.connect_roles[release].arn, ) @@ -726,38 +733,50 @@ def _define_pod_identity_associations(self) -> None: # for every release/site combo unconditionally. _session_key = f"{release}-{site_name}" if _session_key not in self.connect_session_roles: - raise RuntimeError( + msg = ( f"connect_session_roles missing key {_session_key!r}; " "_define_connect_iam must be called before _define_pod_identity_associations" ) + raise RuntimeError(msg) _pod_identity_assoc( - self, f"{site_name}-connect-session", cluster_name, - ptd.POSIT_TEAM_NAMESPACE, f"{site_name}-connect-session", + self, + f"{site_name}-connect-session", + cluster_name, + ptd.POSIT_TEAM_NAMESPACE, + f"{site_name}-connect-session", self.connect_session_roles[_session_key].arn, ) # Workbench if release not in self.workbench_roles: - raise RuntimeError( + msg = ( f"workbench_roles missing key {release!r}; " "_define_workbench_iam must be called before _define_pod_identity_associations" ) + raise RuntimeError(msg) _pod_identity_assoc( - self, f"{site_name}-workbench", cluster_name, - ptd.POSIT_TEAM_NAMESPACE, f"{site_name}-workbench", + self, + f"{site_name}-workbench", + cluster_name, + ptd.POSIT_TEAM_NAMESPACE, + f"{site_name}-workbench", self.workbench_roles[release].arn, ) # Workbench Session — always present: _define_workbench_iam populates workbench_session_roles # for every release/site combo unconditionally. if _session_key not in self.workbench_session_roles: - raise RuntimeError( + msg = ( f"workbench_session_roles missing key {_session_key!r}; " "_define_workbench_iam must be called before _define_pod_identity_associations" ) + raise RuntimeError(msg) _pod_identity_assoc( - self, f"{site_name}-workbench-session", cluster_name, - ptd.POSIT_TEAM_NAMESPACE, f"{site_name}-workbench-session", + self, + f"{site_name}-workbench-session", + cluster_name, + ptd.POSIT_TEAM_NAMESPACE, + f"{site_name}-workbench-session", self.workbench_session_roles[_session_key].arn, ) @@ -765,16 +784,22 @@ def _define_pod_identity_associations(self) -> None: # Key format uses "//" separator — must match _define_packagemanager_iam (release + "//" + site_name). if release + "//" + site_name in self.packagemanager_roles: _pod_identity_assoc( - self, f"{site_name}-packagemanager", cluster_name, - ptd.POSIT_TEAM_NAMESPACE, f"{site_name}-packagemanager", + self, + f"{site_name}-packagemanager", + cluster_name, + ptd.POSIT_TEAM_NAMESPACE, + f"{site_name}-packagemanager", self.packagemanager_roles[release + "//" + site_name].arn, ) # Chronicle (optional product — skip if not configured for this release/site) if f"{release}-{site_name}" in self.chronicle_roles: _pod_identity_assoc( - self, f"{site_name}-chronicle", cluster_name, - ptd.POSIT_TEAM_NAMESPACE, f"{site_name}-chronicle", + self, + f"{site_name}-chronicle", + cluster_name, + ptd.POSIT_TEAM_NAMESPACE, + f"{site_name}-chronicle", self.chronicle_roles[f"{release}-{site_name}"].arn, ) @@ -784,8 +809,11 @@ def _define_pod_identity_associations(self) -> None: # Pod Identity requires one association per SA, so this block stays inside the loop. if release in self.home_roles: _pod_identity_assoc( - self, f"{site_name}-home", cluster_name, - ptd.POSIT_TEAM_NAMESPACE, f"{site_name}-home", + self, + f"{site_name}-home", + cluster_name, + ptd.POSIT_TEAM_NAMESPACE, + f"{site_name}-home", self.home_roles[release].arn, ) diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py index 899f1ef..0c0ab77 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_helm.py @@ -13,7 +13,7 @@ ALLOY_NAMESPACE = "alloy" NFS_STORAGE_CLASS_NAME = "posit-shared-storage" -CLUSTER_SECRET_STORE_NAME = "aws-secrets-manager" +CLUSTER_SECRET_STORE_NAME = "aws-secrets-manager" # noqa: S105 ESO_SERVICE_ACCOUNT = "external-secrets" ESO_NAMESPACE = "external-secrets" # v1beta1 matches external_secrets_operator_version default "0.10.7". diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_sites.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_sites.py index 77dec29..c5fef79 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_sites.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_sites.py @@ -98,61 +98,92 @@ def __init__(self, workload: ptd.aws_workload.AWSWorkload, *args, **kwargs): def _define_team_sites(self): self.team_sites = {} - def set_workload_fields(obj: dict[str, typing.Any], _: pulumi.ResourceOptions): - if obj["kind"] != "Site": - return - - workload_secrets = typing.cast( - ptd.secrecy.AWSWorkloadSecret, - self.workload_secrets_dict, - ) - main_db = ptd.aws_rds_describe_db_instance( - workload_secrets.get("main-database-id", ""), region=self.workload.cfg.region - ) - - account_id = aws.get_caller_identity().account_id - - # Check if EFS is enabled for any cluster in this release + for release in self.managed_clusters_by_release: cluster_cfg = self.workload.cfg.clusters.get(release) - efs_enabled = False - if cluster_cfg: - efs_enabled = cluster_cfg.enable_efs_csi_driver or cluster_cfg.efs_config is not None - - site_spec = { - "awsAccountId": account_id, - "chronicle": { - "s3Bucket": workload_secrets["chronicle-bucket"], - }, - "domain": self.workload.cfg.domain, - "mainDatabaseCredentialSecret": { - "type": "aws", - "vaultName": main_db["MasterUserSecret"]["SecretArn"], - }, - "networkTrust": self.workload.cfg.network_trust.value, - "packageManager": { - "s3Bucket": workload_secrets["packagemanager-bucket"], - }, - "secret": {"type": "aws"}, - "secretType": "aws", - "volumeSource": { - "dnsName": workload_secrets["fs-dns-name"], - "type": "nfs", - }, - "workloadSecret": {"type": "aws"}, - } - # Add EFS configuration if enabled - if efs_enabled: - site_spec["efsEnabled"] = True - if self.workload.cfg.vpc_cidr: - site_spec["vpcCIDR"] = self.workload.cfg.vpc_cidr + def generate_set_workload_fields( + _release: str, cluster_cfg: typing.Any + ) -> ptd.pulumi_resources.KustomizeTransformationFunc: + def set_workload_fields(obj: dict[str, typing.Any], _: pulumi.ResourceOptions): + if obj["kind"] != "Site": + return + + workload_secrets = typing.cast( + ptd.secrecy.AWSWorkloadSecret, + self.workload_secrets_dict, + ) + main_db = ptd.aws_rds_describe_db_instance( + workload_secrets.get("main-database-id", ""), region=self.workload.cfg.region + ) + + account_id = aws.get_caller_identity().account_id + + # Check if EFS is enabled for any cluster in this release + efs_enabled = False + if cluster_cfg: + efs_enabled = cluster_cfg.enable_efs_csi_driver or cluster_cfg.efs_config is not None + + site_spec = { + "awsAccountId": account_id, + "chronicle": { + "s3Bucket": workload_secrets["chronicle-bucket"], + }, + "domain": self.workload.cfg.domain, + "mainDatabaseCredentialSecret": { + "type": "aws", + "vaultName": main_db["MasterUserSecret"]["SecretArn"], + }, + "networkTrust": self.workload.cfg.network_trust.value, + "packageManager": { + "s3Bucket": workload_secrets["packagemanager-bucket"], + }, + "secret": {"type": "aws"}, + "secretType": "aws", + "volumeSource": { + "dnsName": workload_secrets["fs-dns-name"], + "type": "nfs", + }, + "workloadSecret": {"type": "aws"}, + } + + # Add EFS configuration if enabled + if efs_enabled: + site_spec["efsEnabled"] = True + if self.workload.cfg.vpc_cidr: + site_spec["vpcCIDR"] = self.workload.cfg.vpc_cidr + + # Cloud-agnostic storage (when nfs-subdir-provisioner is enabled) + if cluster_cfg and cluster_cfg.enable_nfs_subdir_provisioner: + site_spec["storageClassName"] = "posit-shared-storage" + # Use nfsEgressCIDR instead of efsEnabled/vpcCIDR + if self.workload.cfg.vpc_cidr: + site_spec["nfsEgressCIDR"] = self.workload.cfg.vpc_cidr + + # Cloud-agnostic secrets (when external-secrets-operator is enabled) + if cluster_cfg and cluster_cfg.enable_external_secrets_operator: + # Use K8s Secret names instead of type+vaultName + # Note: site_name comes from obj metadata, workload secret is workload-scoped + site_name = obj.get("metadata", {}).get("name", "") + site_spec["secret"] = {"name": f"{site_name}-secrets"} + site_spec["workloadSecret"] = {"name": f"{self.workload.compound_name}-secrets"} + + # Cloud-agnostic IAM (when Pod Identity is enabled) + if cluster_cfg and cluster_cfg.enable_pod_identity_agent: + # Set explicit ServiceAccount names for Pod Identity contract + site_name = obj.get("metadata", {}).get("name", "") + site_spec.setdefault("connect", {})["serviceAccountName"] = f"{site_name}-connect" + site_spec.setdefault("workbench", {})["serviceAccountName"] = f"{site_name}-workbench" + site_spec.setdefault("packageManager", {})["serviceAccountName"] = f"{site_name}-packagemanager" + site_spec.setdefault("chronicle", {})["serviceAccountName"] = f"{site_name}-chronicle" + site_spec.setdefault("flightdeck", {})["serviceAccountName"] = f"{site_name}-home" + + obj["spec"] = deepmerge.always_merger.merge( + obj.get("spec", {}), + copy.deepcopy(site_spec), + ) + + return set_workload_fields - obj["spec"] = deepmerge.always_merger.merge( - obj.get("spec", {}), - copy.deepcopy(site_spec), - ) - - for release in self.managed_clusters_by_release: for site_name in sorted(self.workload.cfg.sites.keys()): def generate_set_site_fields( @@ -180,7 +211,7 @@ def set_site_fields(obj: dict[str, typing.Any], _: pulumi.ResourceOptions): site_name=site_name, kubeconfig=self.kubeconfigs[release], transformations=[ - set_workload_fields, + generate_set_workload_fields(release, cluster_cfg), generate_set_site_fields(site_name), ], cluster_config=self.workload.cfg.clusters[release], @@ -204,8 +235,27 @@ def _define_external_secrets(self) -> None: for release in self.managed_clusters_by_release: if not self.workload.cfg.clusters[release].enable_external_secrets_operator: continue + + # Create ExternalSecret for workload-level secrets (once per release) + kubernetes.apiextensions.CustomResource( + f"{self.workload.compound_name}-{release}-workload-external-secret", + metadata=kubernetes.meta.v1.ObjectMetaArgs( + name=f"{self.workload.compound_name}-secrets", + namespace=ptd.POSIT_TEAM_NAMESPACE, + labels=self.required_tags, + ), + api_version=ESO_API_VERSION, + kind="ExternalSecret", + spec=_external_secret_spec(self.workload.compound_name, self.workload.secret_name), + opts=pulumi.ResourceOptions( + parent=self, + provider=self.kube_providers[release], + custom_timeouts=pulumi.CustomTimeouts(create="10m"), + ), + ) + + # Create ExternalSecret for each site for site_name in sorted(self.workload.cfg.sites.keys()): - # Create ExternalSecret for site secrets kubernetes.apiextensions.CustomResource( f"{self.workload.compound_name}-{release}-{site_name}-external-secret", metadata=kubernetes.meta.v1.ObjectMetaArgs( diff --git a/python-pulumi/tests/test_nfs_subdir_provisioner_values.py b/python-pulumi/tests/test_nfs_subdir_provisioner_values.py deleted file mode 100644 index 75daf96..0000000 --- a/python-pulumi/tests/test_nfs_subdir_provisioner_values.py +++ /dev/null @@ -1,131 +0,0 @@ -"""Tests for NFS subdir provisioner Helm values structure.""" - -import pytest -import yaml -from unittest.mock import MagicMock, patch - -from ptd.pulumi_resources.aws_workload_helm import ( - _nfs_subdir_provisioner_values, - AWSWorkloadHelm, -) - - -def test_mount_options_nested_under_nfs(): - """mountOptions must be nested under nfs, not a top-level dot-notation key.""" - values = _nfs_subdir_provisioner_values("fs-12345.fsx.us-east-1.amazonaws.com") - assert "nfs.mountOptions" not in values, "nfs.mountOptions must not be a top-level key" - assert "mountOptions" in values["nfs"], "mountOptions must be nested under nfs" - assert values["nfs"]["mountOptions"] == [ - "nfsvers=4.2", - "rsize=1048576", - "wsize=1048576", - "timeo=600", - ] - - -def test_nfs_server_and_path_set(): - dns = "fs-12345.fsx.us-east-1.amazonaws.com" - path = "/my-fsx" - values = _nfs_subdir_provisioner_values(dns, path) - assert values["nfs"]["server"] == dns - assert values["nfs"]["path"] == path - - -def test_nfs_default_path(): - values = _nfs_subdir_provisioner_values("fs-123.fsx.us-east-1.amazonaws.com") - assert values["nfs"]["path"] == "/fsx" - - -def test_values_yaml_roundtrip(): - """Verify the structure survives a yaml.dump/yaml.safe_load round-trip.""" - values = _nfs_subdir_provisioner_values("fs-abc.fsx.us-east-1.amazonaws.com") - parsed = yaml.safe_load(yaml.dump(values)) - assert parsed["nfs"]["mountOptions"] == [ - "nfsvers=4.2", - "rsize=1048576", - "wsize=1048576", - "timeo=600", - ] - assert "nfs.mountOptions" not in parsed - - -def _make_helm_mock(secret_name: str = "my-workload-secret") -> MagicMock: - """Return a minimal mock that satisfies _define_nfs_subdir_provisioner's self usage.""" - helm = MagicMock() - helm.workload.secret_name = secret_name - helm.workload.cfg.region = "us-east-1" - return helm - - -def test_nfs_provisioner_success_creates_helm_chart_cr(): - """Happy path: valid secret creates HelmChart CR with valuesContent containing the NFS server.""" - dns = "fs-123.fsx.us-east-1.amazonaws.com" - with patch("ptd.secrecy.aws_get_secret_value_json", return_value=({"fs-dns-name": dns}, True)): - with patch("ptd.pulumi_resources.aws_workload_helm.k8s") as mock_k8s: - AWSWorkloadHelm._define_nfs_subdir_provisioner(_make_helm_mock(), "20250328", "4.0.18") - mock_k8s.apiextensions.CustomResource.assert_called_once() - spec = mock_k8s.apiextensions.CustomResource.call_args.kwargs["spec"] - assert spec["chart"] == "nfs-subdir-external-provisioner" - assert spec["version"] == "4.0.18" - parsed_values = yaml.safe_load(spec["valuesContent"]) - assert parsed_values["nfs"]["server"] == dns - - -def test_nfs_provisioner_warns_on_dry_run_when_secret_fetch_fails(): - """When secret fetch fails during a dry run, warn and return without raising.""" - with patch("ptd.secrecy.aws_get_secret_value_json", return_value=({}, False)): - with patch("pulumi.runtime.is_dry_run", return_value=True): - with patch("pulumi.warn") as mock_warn: - AWSWorkloadHelm._define_nfs_subdir_provisioner(_make_helm_mock(), "20250328", "4.0.18") - assert mock_warn.called - assert "fs-dns-name" in mock_warn.call_args[0][0] - - -def test_nfs_provisioner_raises_on_live_run_when_secret_fetch_fails(): - """When secret fetch fails on a live deploy, raise ValueError.""" - with patch("ptd.secrecy.aws_get_secret_value_json", return_value=({}, False)): - with patch("pulumi.runtime.is_dry_run", return_value=False): - with pytest.raises(ValueError, match="fs-dns-name"): - AWSWorkloadHelm._define_nfs_subdir_provisioner(_make_helm_mock(), "20250328", "4.0.18") - - -def test_nfs_provisioner_raises_on_live_run_when_key_missing(): - """When fs-dns-name key is absent on a live deploy, raise ValueError.""" - with patch("ptd.secrecy.aws_get_secret_value_json", return_value=({"other-key": "value"}, True)): - with patch("pulumi.runtime.is_dry_run", return_value=False): - with pytest.raises(ValueError, match="fs-dns-name"): - AWSWorkloadHelm._define_nfs_subdir_provisioner(_make_helm_mock(), "20250328", "4.0.18") - - -def test_nfs_provisioner_warns_on_dry_run_when_key_missing(): - """When secret fetch succeeds but fs-dns-name key is absent during a dry run, warn and return.""" - with patch("ptd.secrecy.aws_get_secret_value_json", return_value=({"other-key": "value"}, True)): - with patch("pulumi.runtime.is_dry_run", return_value=True): - with patch("pulumi.warn") as mock_warn: - AWSWorkloadHelm._define_nfs_subdir_provisioner(_make_helm_mock(), "20250328", "4.0.18") - assert mock_warn.called - assert "fs-dns-name" in mock_warn.call_args[0][0] - - -def test_nfs_provisioner_custom_nfs_path_from_secret(): - """When secret contains 'fs-nfs-path', that value is used as the NFS mount path.""" - dns = "fs-123.fsx.us-east-1.amazonaws.com" - custom_path = "/custom-mount" - with patch("ptd.secrecy.aws_get_secret_value_json", return_value=({"fs-dns-name": dns, "fs-nfs-path": custom_path}, True)): - with patch("ptd.pulumi_resources.aws_workload_helm.k8s") as mock_k8s: - AWSWorkloadHelm._define_nfs_subdir_provisioner(_make_helm_mock(), "20250328", "4.0.18") - spec = mock_k8s.apiextensions.CustomResource.call_args.kwargs["spec"] - parsed_values = yaml.safe_load(spec["valuesContent"]) - assert parsed_values["nfs"]["path"] == custom_path - - -def test_nfs_provisioner_version_none_omits_version_key(): - """When version=None, no 'version' key is added to the spec (uses latest chart).""" - dns = "fs-123.fsx.us-east-1.amazonaws.com" - with patch("ptd.secrecy.aws_get_secret_value_json", return_value=({"fs-dns-name": dns}, True)): - with patch("ptd.pulumi_resources.aws_workload_helm.k8s") as mock_k8s: - AWSWorkloadHelm._define_nfs_subdir_provisioner(_make_helm_mock(), "20250328", None) - mock_k8s.apiextensions.CustomResource.assert_called_once() - spec = mock_k8s.apiextensions.CustomResource.call_args.kwargs["spec"] - assert "version" not in spec - assert spec["chart"] == "nfs-subdir-external-provisioner" diff --git a/python-pulumi/tests/test_pod_identity_agent_addon.py b/python-pulumi/tests/test_pod_identity_agent_addon.py deleted file mode 100644 index ef25a5f..0000000 --- a/python-pulumi/tests/test_pod_identity_agent_addon.py +++ /dev/null @@ -1,75 +0,0 @@ -"""Tests for AWSEKSCluster.with_pod_identity_agent.""" - -from unittest.mock import MagicMock, patch - -from ptd.pulumi_resources.aws_eks_cluster import AWSEKSCluster - - -def _make_cluster_mock(name: str = "my-cluster") -> MagicMock: - """Build a minimal AWSEKSCluster mock for testing with_pod_identity_agent.""" - m = MagicMock(spec=AWSEKSCluster) - m.name = name - m.eks = MagicMock() - m.eks.tags = {"env": "test"} - return m - - -def test_addon_name_is_eks_pod_identity_agent(): - """with_pod_identity_agent creates an addon named 'eks-pod-identity-agent'.""" - mock = _make_cluster_mock() - with patch("ptd.pulumi_resources.aws_eks_cluster.aws.eks.Addon") as mock_addon: - with patch("ptd.pulumi_resources.aws_eks_cluster.aws.eks.AddonArgs") as mock_addon_args: - AWSEKSCluster.with_pod_identity_agent(mock) - assert mock_addon.call_count == 1 - mock_addon_args.assert_called_once() - _, kwargs = mock_addon_args.call_args - assert kwargs["addon_name"] == "eks-pod-identity-agent" - - -def test_version_none_passes_addon_version_none(): - """When version=None, addon_version=None is passed (installs latest).""" - mock = _make_cluster_mock() - with patch("ptd.pulumi_resources.aws_eks_cluster.aws.eks.Addon"): - with patch("ptd.pulumi_resources.aws_eks_cluster.aws.eks.AddonArgs") as mock_addon_args: - AWSEKSCluster.with_pod_identity_agent(mock, version=None) - _, kwargs = mock_addon_args.call_args - assert kwargs["addon_version"] is None - - -def test_explicit_version_is_passed_through(): - """When a version string is provided, it is passed as addon_version.""" - mock = _make_cluster_mock() - with patch("ptd.pulumi_resources.aws_eks_cluster.aws.eks.Addon"): - with patch("ptd.pulumi_resources.aws_eks_cluster.aws.eks.AddonArgs") as mock_addon_args: - AWSEKSCluster.with_pod_identity_agent(mock, version="v1.3.3-eksbuild.1") - _, kwargs = mock_addon_args.call_args - assert kwargs["addon_version"] == "v1.3.3-eksbuild.1" - - -def test_parent_is_set_to_eks(): - """The addon's parent is set to self.eks.""" - mock = _make_cluster_mock() - with patch("ptd.pulumi_resources.aws_eks_cluster.aws.eks.Addon"): - with patch("ptd.pulumi_resources.aws_eks_cluster.aws.eks.AddonArgs"): - with patch("ptd.pulumi_resources.aws_eks_cluster.pulumi.ResourceOptions") as mock_opts: - AWSEKSCluster.with_pod_identity_agent(mock) - mock_opts.assert_called_once_with(parent=mock.eks) - - -def test_cluster_name_matches_self_name(): - """The addon's cluster_name is set to self.name.""" - mock = _make_cluster_mock(name="test-cluster-20250328") - with patch("ptd.pulumi_resources.aws_eks_cluster.aws.eks.Addon"): - with patch("ptd.pulumi_resources.aws_eks_cluster.aws.eks.AddonArgs") as mock_addon_args: - AWSEKSCluster.with_pod_identity_agent(mock) - _, kwargs = mock_addon_args.call_args - assert kwargs["cluster_name"] == "test-cluster-20250328" - - -def test_returns_self(): - """with_pod_identity_agent returns self for chaining.""" - mock = _make_cluster_mock() - with patch("ptd.pulumi_resources.aws_eks_cluster.aws.eks.Addon"): - with patch("ptd.pulumi_resources.aws_eks_cluster.aws.eks.AddonArgs"): - result = AWSEKSCluster.with_pod_identity_agent(mock) - assert result is mock diff --git a/python-pulumi/tests/test_pod_identity_associations.py b/python-pulumi/tests/test_pod_identity_associations.py deleted file mode 100644 index d20cb04..0000000 --- a/python-pulumi/tests/test_pod_identity_associations.py +++ /dev/null @@ -1,342 +0,0 @@ -"""Tests for _define_pod_identity_associations, _define_external_secrets_iam, and _define_eso_read_secrets_inline in AWSWorkloadClusters.""" - -import json -import pytest -from unittest.mock import MagicMock, patch - -from ptd.pulumi_resources.aws_workload_clusters import AWSWorkloadClusters - - -def _make_clusters_mock( - releases: list[str], - sites: list[str], - enable_pod_identity: bool = True, - enable_eso: bool = False, - chronicle_keys: list[str] | None = None, - home_releases: list[str] | None = None, - packagemanager_keys: list[str] | None = None, -) -> MagicMock: - """Build a minimal AWSWorkloadClusters mock for testing _define_pod_identity_associations.""" - m = MagicMock() - m.managed_clusters_by_release = releases - m.workload.compound_name = "myworkload" - m.workload.cfg.sites = {s: MagicMock() for s in sites} - - cluster_cfgs = {} - for release in releases: - cfg = MagicMock() - cfg.enable_pod_identity_agent = enable_pod_identity - cfg.enable_external_secrets_operator = enable_eso - cluster_cfgs[release] = cfg - m.workload.cfg.clusters.__getitem__ = lambda _self, k: cluster_cfgs[k] - - # connect_roles and workbench_roles are keyed by release and use invariant guards, so they must - # be real dicts populated for every release. - m.connect_roles = {r: MagicMock() for r in releases} - m.workbench_roles = {r: MagicMock() for r in releases} - - # external_secrets_roles is keyed by release and uses an invariant guard when ESO is enabled. - m.external_secrets_roles = {r: MagicMock() for r in releases} if enable_eso else {} - - # connect_session_roles and workbench_session_roles are keyed by "{release}-{site}" and use - # explicit invariant guards, so they must be real dicts populated for every release/site combo. - m.connect_session_roles = {f"{r}-{s}": MagicMock() for r in releases for s in sites} - m.workbench_session_roles = {f"{r}-{s}": MagicMock() for r in releases for s in sites} - - # chronicle_roles, home_roles, and packagemanager_roles use `in` checks so they must be real dicts - m.chronicle_roles = {k: MagicMock() for k in (chronicle_keys or [])} - m.home_roles = {r: MagicMock() for r in (home_releases or [])} - # Default: populate packagemanager for all release/site combos (the common case) - if packagemanager_keys is None: - packagemanager_keys = [f"{r}//{s}" for r in releases for s in sites] - m.packagemanager_roles = {k: MagicMock() for k in packagemanager_keys} - - return m - - -def test_no_associations_when_pod_identity_disabled(): - """When enable_pod_identity_agent=False, no PodIdentityAssociation resources are created.""" - mock = _make_clusters_mock( - releases=["20250328"], - sites=["siteA", "siteB"], - enable_pod_identity=False, - ) - with patch("ptd.pulumi_resources.aws_workload_clusters.aws.eks.PodIdentityAssociation") as mock_pia: - AWSWorkloadClusters._define_pod_identity_associations(mock) - assert mock_pia.call_count == 0 - - -def test_associations_count_two_sites_no_optional_products(): - """With 2 sites and no optional products (no ESO, chronicle, home): 2×5 = 10 associations.""" - mock = _make_clusters_mock( - releases=["20250328"], - sites=["siteA", "siteB"], - enable_pod_identity=True, - enable_eso=False, - ) - with patch("ptd.pulumi_resources.aws_workload_clusters.aws.eks.PodIdentityAssociation") as mock_pia: - AWSWorkloadClusters._define_pod_identity_associations(mock) - # 2 sites × 5 mandatory products (connect, connect-session, workbench, workbench-session, packagemanager) - assert mock_pia.call_count == 10 - - -def test_associations_count_with_eso(): - """With 2 sites and ESO enabled: 2×5 products + 1 ESO = 11 associations.""" - mock = _make_clusters_mock( - releases=["20250328"], - sites=["siteA", "siteB"], - enable_pod_identity=True, - enable_eso=True, - ) - with patch("ptd.pulumi_resources.aws_workload_clusters.aws.eks.PodIdentityAssociation") as mock_pia: - AWSWorkloadClusters._define_pod_identity_associations(mock) - assert mock_pia.call_count == 11 # 2×5 + 1 ESO - eso_call = next(c for c in mock_pia.call_args_list if "external-secrets" in c[0][0]) - assert eso_call.kwargs["namespace"] == "external-secrets" - assert eso_call.kwargs["service_account"] == "external-secrets" - - -def test_chronicle_association_created_only_when_role_present(): - """Chronicle PodIdentityAssociation is only created when the role key exists in chronicle_roles.""" - release = "20250328" - mock_with_chronicle = _make_clusters_mock( - releases=[release], - sites=["siteA"], - enable_pod_identity=True, - chronicle_keys=[f"{release}-siteA"], - ) - mock_without_chronicle = _make_clusters_mock( - releases=[release], - sites=["siteA"], - enable_pod_identity=True, - chronicle_keys=[], - ) - with patch("ptd.pulumi_resources.aws_workload_clusters.aws.eks.PodIdentityAssociation") as mock_pia: - AWSWorkloadClusters._define_pod_identity_associations(mock_with_chronicle) - assert mock_pia.call_count == 6 # 5 mandatory + 1 chronicle - names_called = [c[0][0] for c in mock_pia.call_args_list] - assert any("chronicle" in n for n in names_called) - - with patch("ptd.pulumi_resources.aws_workload_clusters.aws.eks.PodIdentityAssociation") as mock_pia: - AWSWorkloadClusters._define_pod_identity_associations(mock_without_chronicle) - assert mock_pia.call_count == 5 # 5 mandatory, no chronicle - - -def test_home_association_created_per_site_when_role_present(): - """Home PodIdentityAssociation is created once per site when release key is in home_roles.""" - release = "20250328" - mock = _make_clusters_mock( - releases=[release], - sites=["siteA", "siteB"], - enable_pod_identity=True, - home_releases=[release], - ) - with patch("ptd.pulumi_resources.aws_workload_clusters.aws.eks.PodIdentityAssociation") as mock_pia: - AWSWorkloadClusters._define_pod_identity_associations(mock) - # 2 sites × (5 mandatory + 1 home) = 12 - assert mock_pia.call_count == 12 - names_called = [c[0][0] for c in mock_pia.call_args_list] - assert sum(1 for n in names_called if "home" in n) == 2 # one per site - - -def test_runtime_error_when_external_secrets_roles_missing_key(): - """RuntimeError is raised when ESO is enabled but external_secrets_roles is missing the release key.""" - mock = _make_clusters_mock( - releases=["20250328"], - sites=["siteA"], - enable_pod_identity=True, - enable_eso=True, - ) - # Deliberately empty external_secrets_roles to simulate _define_external_secrets_iam not being called - mock.external_secrets_roles = {} - with patch("ptd.pulumi_resources.aws_workload_clusters.aws.eks.PodIdentityAssociation"): - with pytest.raises(RuntimeError, match="external_secrets_roles missing key"): - AWSWorkloadClusters._define_pod_identity_associations(mock) - - -def test_runtime_error_when_connect_roles_missing_key(): - """RuntimeError is raised when connect_roles is missing the release key.""" - mock = _make_clusters_mock( - releases=["20250328"], - sites=["siteA"], - enable_pod_identity=True, - ) - mock.connect_roles = {} - with patch("ptd.pulumi_resources.aws_workload_clusters.aws.eks.PodIdentityAssociation"): - with pytest.raises(RuntimeError, match="connect_roles missing key"): - AWSWorkloadClusters._define_pod_identity_associations(mock) - - -def test_runtime_error_when_connect_session_roles_missing_key(): - """RuntimeError is raised when connect_session_roles is missing the release-site key.""" - mock = _make_clusters_mock( - releases=["20250328"], - sites=["siteA"], - enable_pod_identity=True, - ) - mock.connect_session_roles = {} - with patch("ptd.pulumi_resources.aws_workload_clusters.aws.eks.PodIdentityAssociation"): - with pytest.raises(RuntimeError, match="connect_session_roles missing key"): - AWSWorkloadClusters._define_pod_identity_associations(mock) - - -def test_runtime_error_when_workbench_roles_missing_key(): - """RuntimeError is raised when workbench_roles is missing the release key.""" - mock = _make_clusters_mock( - releases=["20250328"], - sites=["siteA"], - enable_pod_identity=True, - ) - mock.workbench_roles = {} - with patch("ptd.pulumi_resources.aws_workload_clusters.aws.eks.PodIdentityAssociation"): - with pytest.raises(RuntimeError, match="workbench_roles missing key"): - AWSWorkloadClusters._define_pod_identity_associations(mock) - - -def test_runtime_error_when_workbench_session_roles_missing_key(): - """RuntimeError is raised when workbench_session_roles is missing the release-site key.""" - mock = _make_clusters_mock( - releases=["20250328"], - sites=["siteA"], - enable_pod_identity=True, - ) - mock.workbench_session_roles = {} - with patch("ptd.pulumi_resources.aws_workload_clusters.aws.eks.PodIdentityAssociation"): - with pytest.raises(RuntimeError, match="workbench_session_roles missing key"): - AWSWorkloadClusters._define_pod_identity_associations(mock) - - -def _make_role_mock(oidc_url_tails: list[str]) -> MagicMock: - """Build a minimal AWSWorkloadClusters mock for testing _define_k8s_iam_role.""" - m = MagicMock() - m._oidc_url_tails = oidc_url_tails - m.workload.cfg.account_id = "123456789012" - m.workload.iam_permissions_boundary = None - m.required_tags = {} - return m - - -def test_define_k8s_iam_role_fallback_path_pod_identity_no_oidc(): - """With pod_identity=True and no OIDC provider, policy uses caller ARN + Pod Identity statements.""" - m = _make_role_mock(oidc_url_tails=[]) - with ( - patch("ptd.pulumi_resources.aws_workload_clusters.aws.iam.Role"), - patch("ptd.pulumi_resources.aws_workload_clusters.aws.iam.RoleArgs") as mock_role_args, - patch("ptd.pulumi_resources.aws_workload_clusters.aws.get_caller_identity") as mock_id, - ): - mock_id.return_value.arn = "arn:aws:iam::123456789012:root" - AWSWorkloadClusters._define_k8s_iam_role(m, name="test-role", release="test-release", namespace="test-ns", pod_identity=True) - policy = json.loads(mock_role_args.call_args.kwargs["assume_role_policy"]) - - # Should have caller ARN statement (fallback) + Pod Identity statement - services = [ - s.get("Principal", {}).get("Service") - for s in policy["Statement"] - if isinstance(s.get("Principal"), dict) - ] - assert "pods.eks.amazonaws.com" in services - - principals_aws = [ - s.get("Principal", {}).get("AWS") - for s in policy["Statement"] - if isinstance(s.get("Principal"), dict) and "AWS" in s.get("Principal", {}) - ] - assert len(principals_aws) == 1 - - pod_stmt = next(s for s in policy["Statement"] if s.get("Principal", {}).get("Service") == "pods.eks.amazonaws.com") - assert "sts:AssumeRole" in pod_stmt["Action"] - assert "sts:TagSession" in pod_stmt["Action"] - - -def test_define_k8s_iam_role_trust_policy_includes_pod_identity_statement(): - """With pod_identity=True, the assume_role_policy includes pods.eks.amazonaws.com.""" - m = _make_role_mock(oidc_url_tails=["oidc.eks.us-east-1.amazonaws.com/id/ABCD1234"]) - with ( - patch("ptd.pulumi_resources.aws_workload_clusters.aws.iam.Role"), - patch("ptd.pulumi_resources.aws_workload_clusters.aws.iam.RoleArgs") as mock_role_args, - ): - AWSWorkloadClusters._define_k8s_iam_role(m, name="test-role", release="test-release", namespace="test-ns", pod_identity=True) - policy = json.loads(mock_role_args.call_args.kwargs["assume_role_policy"]) - - services = [ - s.get("Principal", {}).get("Service") - for s in policy["Statement"] - if isinstance(s.get("Principal"), dict) - ] - assert "pods.eks.amazonaws.com" in services - - pod_stmt = next(s for s in policy["Statement"] if s.get("Principal", {}).get("Service") == "pods.eks.amazonaws.com") - assert "sts:AssumeRole" in pod_stmt["Action"] - assert "sts:TagSession" in pod_stmt["Action"] - - -def test_define_k8s_iam_role_trust_policy_excludes_pod_identity_statement_when_disabled(): - """With pod_identity=False, the assume_role_policy does not include pods.eks.amazonaws.com.""" - m = _make_role_mock(oidc_url_tails=["oidc.eks.us-east-1.amazonaws.com/id/ABCD1234"]) - with ( - patch("ptd.pulumi_resources.aws_workload_clusters.aws.iam.Role"), - patch("ptd.pulumi_resources.aws_workload_clusters.aws.iam.RoleArgs") as mock_role_args, - ): - AWSWorkloadClusters._define_k8s_iam_role(m, name="test-role", release="test-release", namespace="test-ns", pod_identity=False) - policy = json.loads(mock_role_args.call_args.kwargs["assume_role_policy"]) - - services = [ - s.get("Principal", {}).get("Service") - for s in policy["Statement"] - if isinstance(s.get("Principal"), dict) - ] - assert "pods.eks.amazonaws.com" not in services - - -def test_define_external_secrets_iam_skipped_when_disabled(): - """When enable_external_secrets_operator=False, no IAM roles are created and external_secrets_roles is empty.""" - m = MagicMock() - m.managed_clusters_by_release = ["20250328"] - m.external_secrets_roles = {} - cluster_cfg = MagicMock() - cluster_cfg.enable_external_secrets_operator = False - m.workload.cfg.clusters.__getitem__ = lambda _self, k: cluster_cfg - - AWSWorkloadClusters._define_external_secrets_iam(m) - # _define_k8s_iam_role is resolved on the mock instance; call_count==0 means it was never called. - assert m._define_k8s_iam_role.call_count == 0 - assert m.external_secrets_roles == {} - - -def test_define_external_secrets_iam_creates_role_per_release_when_enabled(): - """When enable_external_secrets_operator=True, one IAM role is created per release.""" - m = MagicMock() - m.managed_clusters_by_release = ["20250328", "20250415"] - m.external_secrets_roles = {} - cluster_cfg = MagicMock() - cluster_cfg.enable_external_secrets_operator = True - m.workload.cfg.clusters.__getitem__ = lambda _self, k: cluster_cfg - - AWSWorkloadClusters._define_external_secrets_iam(m) - assert m._define_k8s_iam_role.call_count == 2 - assert set(m.external_secrets_roles.keys()) == {"20250328", "20250415"} - for call in m._define_k8s_iam_role.call_args_list: - assert call.kwargs.get("pod_identity") is True - - -def test_eso_read_secrets_inline_scoped_arn_no_list_secrets(): - """ESO policy must be scoped to compound_name/* and must not include ListSecrets.""" - m = MagicMock() - m.workload.cfg.region = "us-east-1" - m.workload.compound_name = "myworkload" - - with ( - patch("ptd.pulumi_resources.aws_workload_clusters.aws.get_caller_identity") as mock_id, - patch("ptd.pulumi_resources.aws_workload_clusters.aws.iam.get_policy_document") as mock_gpd, - ): - mock_id.return_value.account_id = "123456789012" - mock_gpd.return_value.json = "{}" - AWSWorkloadClusters._define_eso_read_secrets_inline(m) - - statements = mock_gpd.call_args.kwargs["statements"] - assert len(statements) == 1 - stmt = statements[0] - assert "secretsmanager:ListSecrets" not in stmt.actions - assert "secretsmanager:GetSecretValue" in stmt.actions - assert "secretsmanager:DescribeSecret" in stmt.actions - assert stmt.resources == ["arn:aws:secretsmanager:us-east-1:123456789012:secret:myworkload/*"] From 4d039c8382018f42ebcebd292603eb641d283298 Mon Sep 17 00:00:00 2001 From: ian-flores Date: Thu, 26 Feb 2026 17:23:31 -0800 Subject: [PATCH 31/31] feat: add Azure cloud-agnostic wiring (ESO, storage, Workload Identity) Add Azure support for cloud-agnostic infrastructure patterns: **external-secrets-operator (ESO):** - Deploy ESO Helm chart with Azure Key Vault provider - Create ClusterSecretStore using Azure Workload Identity auth - Create ExternalSecret CRs per site to sync from Key Vault to K8s Secrets - Add managed identity with Key Vault Secrets User role for ESO - Site CRs reference secrets by K8s Secret name when enabled **Storage:** - Set storageClassName=azure-netapp-files for product volumes - Set packageManagerStorageClassName for Azure Files CSI (Package Manager) - Azure NetApp and Azure Files StorageClasses already exist **IAM (Azure Workload Identity):** - Set serviceAccountName per product for identity binding contract - Set serviceAccountAnnotations with azure.workload.identity/client-id - Set podLabels with azure.workload.identity/use=true - Annotations populated by infrastructure layer with managed identity client IDs **Feature flags:** - enable_external_secrets_operator: Deploy ESO and wire secret names - enable_cloud_agnostic_storage: Use StorageClass pattern **Infrastructure:** - Add KEY_VAULT_SECRETS_USER_ROLE_DEFINITION_ID to azure_roles - Add external_secrets_operator_version to AzureWorkloadClusterComponentConfig --- python-pulumi/src/ptd/azure_roles.py | 1 + python-pulumi/src/ptd/azure_workload.py | 5 + .../pulumi_resources/azure_workload_helm.py | 166 ++++++++++++++++++ .../pulumi_resources/azure_workload_sites.py | 135 ++++++++++++-- 4 files changed, 296 insertions(+), 11 deletions(-) diff --git a/python-pulumi/src/ptd/azure_roles.py b/python-pulumi/src/ptd/azure_roles.py index 24ce971..d2013ba 100644 --- a/python-pulumi/src/ptd/azure_roles.py +++ b/python-pulumi/src/ptd/azure_roles.py @@ -2,6 +2,7 @@ ACR_PULL_ROLE_DEFINITION_ID = "7f951dda-4ed3-4680-a7ca-43fe172d538d" CONTRIBUTOR_ROLE_DEFINITION_ID = "b24988ac-6180-42a0-ab88-20f7382dd24c" DNS_ZONE_CONTRIBUTOR_ROLE_DEFINITION_ID = "befefa01-2a29-4197-83a8-272ff33ce314" +KEY_VAULT_SECRETS_USER_ROLE_DEFINITION_ID = "4633458b-17de-408a-b874-0445c86b69e6" NETWORK_CONTRIBUTOR_ROLE_DEFINITION_ID = "4d97b98b-1d4f-4787-a291-c67834d212e7" READER_ROLE_DEFINITION_ID = "acdd72a7-3385-48ef-bd42-f606fba81ae7" STORAGE_BLOB_DATA_CONTRIBUTOR_ROLE_DEFINITION_ID = "ba92f5b4-2d11-453d-a403-e96b0029c9fe" diff --git a/python-pulumi/src/ptd/azure_workload.py b/python-pulumi/src/ptd/azure_workload.py index dae9a08..ebaa393 100644 --- a/python-pulumi/src/ptd/azure_workload.py +++ b/python-pulumi/src/ptd/azure_workload.py @@ -112,10 +112,15 @@ class AzureWorkloadClusterConfig(ptd.WorkloadClusterConfig): use_lets_encrypt: bool = False + # Cloud-agnostic infrastructure feature flags + enable_external_secrets_operator: bool = False + enable_cloud_agnostic_storage: bool = False + @dataclasses.dataclass(frozen=True) class AzureWorkloadClusterComponentConfig(ptd.WorkloadClusterComponentConfig): secret_store_csi_driver_azure_provider_version: str | None = "1.5.6" # noqa: S105 + external_secrets_operator_version: str | None = "0.10.7" def load_workload_cluster_site_dict( diff --git a/python-pulumi/src/ptd/pulumi_resources/azure_workload_helm.py b/python-pulumi/src/ptd/pulumi_resources/azure_workload_helm.py index df6fab8..b05b6d2 100644 --- a/python-pulumi/src/ptd/pulumi_resources/azure_workload_helm.py +++ b/python-pulumi/src/ptd/pulumi_resources/azure_workload_helm.py @@ -17,6 +17,51 @@ GRAFANA_NAMESPACE = "grafana" LOKI_NAMESPACE = "loki" MIMIR_NAMESPACE = "mimir" +ESO_NAMESPACE = "external-secrets" +ESO_SERVICE_ACCOUNT = "external-secrets" +CLUSTER_SECRET_STORE_NAME = "azure-key-vault" # noqa: S105 +# v1beta1 matches external_secrets_operator_version default "0.10.7". +# Update this if ESO is upgraded past the version that drops v1beta1 support. +ESO_API_VERSION = "external-secrets.io/v1beta1" + + +def _eso_helm_values() -> dict: + """Build the Helm values dict for external-secrets-operator.""" + return { + "installCRDs": True, + "serviceAccount": { + "create": True, + "name": ESO_SERVICE_ACCOUNT, + }, + } + + +def _cluster_secret_store_spec(tenant_id: str, vault_url: str) -> dict: + """Build the ClusterSecretStore spec for Azure Key Vault (Workload Identity auth). + + Args: + tenant_id: Azure tenant ID + vault_url: Azure Key Vault URL (e.g., https://.vault.azure.net/) + + Returns: + ClusterSecretStore spec dict for Azure Key Vault provider + """ + return { + "provider": { + "azurekv": { + "authType": "WorkloadIdentity", + "tenantId": tenant_id, + "vaultUrl": vault_url, + "serviceAccountRef": { + "name": ESO_SERVICE_ACCOUNT, + "namespace": ESO_NAMESPACE, + }, + }, + }, + "conditions": [ + {"namespaceSelector": {"matchLabels": {"kubernetes.io/metadata.name": ptd.POSIT_TEAM_NAMESPACE}}} + ], + } class AzureWorkloadHelm(pulumi.ComponentResource): @@ -67,6 +112,10 @@ def __init__(self, workload: ptd.azure_workload.AzureWorkload, *args, **kwargs): for release in self.managed_clusters_by_release: components = self.workload.cfg.clusters[release].components + # Deploy external-secrets-operator (opt-in via enable_external_secrets_operator) + if self.workload.cfg.clusters[release].enable_external_secrets_operator: + self._define_external_secrets_operator(release, components.external_secrets_operator_version) + self._define_external_dns(release, components.external_dns_version) self._define_loki(release, components.loki_version) self._define_mimir(release, components.mimir_version) @@ -673,3 +722,120 @@ def _define_kube_state_metrics(self, release: str, version: str): }, opts=pulumi.ResourceOptions(provider=self.kube_providers[release]), ) + + def _define_external_secrets_operator(self, release: str, version: str | None) -> None: + """Deploy external-secrets-operator and create ClusterSecretStore for Azure Key Vault. + + Note: the ClusterSecretStore is created with ``depends_on=[eso_helm_release]``, which + ensures Pulumi registers it after the HelmChart CR object exists in the API server. + However, this does NOT wait for the Helm release to complete and CRDs to be installed. + On a fresh deploy, the ClusterSecretStore apply will fail until ESO's CRDs converge + (~1-2 reconcile loops). This is an architectural constraint of using HelmChart CRDs + rather than ``pulumi_kubernetes.helm.v3.Release``. + """ + # Create managed identity for ESO with Key Vault Secrets User role + eso_identity = self._define_key_vault_secrets_managed_identity( + release=release, component="eso", namespace=ESO_NAMESPACE, service_account=ESO_SERVICE_ACCOUNT + ) + + # Deploy external-secrets-operator Helm chart + # Note: helm-controller auto-creates the targetNamespace from the HelmChart CR, + # so the "external-secrets" namespace does not need to be created explicitly here. + eso_spec: dict = { + "repo": "https://charts.external-secrets.io", + "chart": "external-secrets", + "targetNamespace": ESO_NAMESPACE, + "valuesContent": eso_identity.client_id.apply( + lambda client_id: yaml.dump( + { + **_eso_helm_values(), + "serviceAccount": { + "create": True, + "name": ESO_SERVICE_ACCOUNT, + "annotations": { + "azure.workload.identity/client-id": client_id, + }, + "labels": { + "azure.workload.identity/use": "true", + }, + }, + "podLabels": { + "azure.workload.identity/use": "true", + }, + } + ) + ), + } + if version is not None: + eso_spec["version"] = version + + eso_helm_release = kubernetes.apiextensions.CustomResource( + f"{self.workload.compound_name}-{release}-external-secrets-helm-release", + metadata=kubernetes.meta.v1.ObjectMetaArgs( + name="external-secrets", + namespace=ptd.HELM_CONTROLLER_NAMESPACE, + labels=self.required_tags, + ), + api_version="helm.cattle.io/v1", + kind="HelmChart", + spec=eso_spec, + opts=pulumi.ResourceOptions(provider=self.kube_providers[release]), + ) + + # Create ClusterSecretStore for Azure Key Vault. + # depends_on the HelmChart CR so Pulumi applies it after the ESO chart CR is registered. + # CustomTimeouts makes the eventual-consistency explicit: on a fresh cluster the CRD may not + # be available immediately; Pulumi will retry for up to 10 minutes before failing. + vault_url = f"https://{self.workload.key_vault_name}.vault.azure.net/" + kubernetes.apiextensions.CustomResource( + f"{self.workload.compound_name}-{release}-cluster-secret-store", + metadata=kubernetes.meta.v1.ObjectMetaArgs( + name=CLUSTER_SECRET_STORE_NAME, + labels=self.required_tags, + ), + api_version=ESO_API_VERSION, + kind="ClusterSecretStore", + spec=_cluster_secret_store_spec(tenant_id=self.workload.cfg.tenant_id, vault_url=vault_url), + opts=pulumi.ResourceOptions( + provider=self.kube_providers[release], + depends_on=[eso_helm_release], + custom_timeouts=pulumi.CustomTimeouts(create="10m"), + ), + ) + + def _define_key_vault_secrets_managed_identity( + self, release: str, component: str, namespace: str, service_account: str + ) -> azure.managedidentity.UserAssignedIdentity: + """Create a managed identity with Key Vault Secrets User role and federated identity credential.""" + identity = azure.managedidentity.UserAssignedIdentity( + resource_name=f"id-{self.workload.compound_name}-{release}-{component}", + resource_group_name=self.workload.resource_group_name, + location=self.workload.cfg.region, + tags=self.workload.required_tags, + opts=pulumi.ResourceOptions(parent=self), + ) + + # Grant Key Vault Secrets User role (read-only access to secrets) + azure.authorization.RoleAssignment( + f"{self.workload.compound_name}-{release}-{component}-kv-secrets-user", + scope=f"/subscriptions/{self.workload.cfg.subscription_id}/resourceGroups/{self.workload.resource_group_name}/providers/Microsoft.KeyVault/vaults/{self.workload.key_vault_name}", + principal_id=identity.principal_id, + role_definition_id=f"/providers/Microsoft.Authorization/roleDefinitions/{ptd.azure_roles.KEY_VAULT_SECRETS_USER_ROLE_DEFINITION_ID}", + principal_type=azure.authorization.PrincipalType.SERVICE_PRINCIPAL, + opts=pulumi.ResourceOptions(parent=identity), + ) + + # Create federated identity credential for Workload Identity + oidc_issuer_url = self.workload.cluster_oidc_issuer_url(release) + azure.managedidentity.FederatedIdentityCredential( + resource_name=f"fedid-{self.workload.compound_name}-{release}-{component}", + resource_name_=identity.name, + federated_identity_credential_resource_name=f"fedid-{self.workload.compound_name}-{release}-{component}", + resource_group_name=self.workload.resource_group_name, + subject=f"system:serviceaccount:{namespace}:{service_account}", + issuer=oidc_issuer_url, + audiences=["api://AzureADTokenExchange"], + opts=pulumi.ResourceOptions(parent=identity), + ) + + return identity diff --git a/python-pulumi/src/ptd/pulumi_resources/azure_workload_sites.py b/python-pulumi/src/ptd/pulumi_resources/azure_workload_sites.py index c4036ad..4398c26 100644 --- a/python-pulumi/src/ptd/pulumi_resources/azure_workload_sites.py +++ b/python-pulumi/src/ptd/pulumi_resources/azure_workload_sites.py @@ -6,11 +6,38 @@ import pulumi import pulumi_kubernetes as kubernetes +import ptd import ptd.azure_sdk import ptd.azure_workload import ptd.pulumi_resources.team_site import ptd.secrecy +# Constants for external-secrets-operator +CLUSTER_SECRET_STORE_NAME = "azure-key-vault" # noqa: S105 +ESO_API_VERSION = "external-secrets.io/v1beta1" + + +def _external_secret_spec(site_name: str, secret_key: str) -> dict: + """Build the ExternalSecret spec dict for a site.""" + return { + "refreshInterval": "1h", + "secretStoreRef": { + "name": CLUSTER_SECRET_STORE_NAME, + "kind": "ClusterSecretStore", + }, + "target": { + "name": f"{site_name}-secrets", + "creationPolicy": "Owner", + }, + "dataFrom": [ + { + "extract": { + "key": secret_key, + } + } + ], + } + class AzureWorkloadSites(pulumi.ComponentResource): workload: ptd.azure_workload.AzureWorkload @@ -62,18 +89,22 @@ def __init__(self, workload: ptd.azure_workload.AzureWorkload, *args, **kwargs): } self._define_team_sites() + self._define_external_secrets() def _define_team_sites(self): self.team_sites = {} - def set_workload_fields(obj: dict[str, typing.Any], _: pulumi.ResourceOptions): - if obj["kind"] != "Site": - return + for release in self.managed_clusters_by_release: + cluster_cfg = self.workload.cfg.clusters.get(release) + + def generate_set_workload_fields( + _release: str, cluster_cfg: typing.Any + ) -> ptd.pulumi_resources.KustomizeTransformationFunc: + def set_workload_fields(obj: dict[str, typing.Any], _: pulumi.ResourceOptions): + if obj["kind"] != "Site": + return - obj["spec"] = deepmerge.always_merger.merge( - obj.get("spec", {}), - copy.deepcopy( - { + site_spec = { # TODO: set chronicle and ppm storage buckets "domain": self.workload.cfg.domain, "networkTrust": self.workload.cfg.network_trust.value, @@ -89,10 +120,52 @@ def set_workload_fields(obj: dict[str, typing.Any], _: pulumi.ResourceOptions): "type": "azure-netapp", }, } - ), - ) - for release in self.managed_clusters_by_release: + # Cloud-agnostic storage (when enabled) + if cluster_cfg and cluster_cfg.enable_cloud_agnostic_storage: + site_spec["storageClassName"] = "azure-netapp-files" + # Package Manager continues to use Azure Files CSI + site_spec["packageManagerStorageClassName"] = self.workload.azure_files_csi_storage_class_name + + # Cloud-agnostic secrets (when external-secrets-operator is enabled) + if cluster_cfg and cluster_cfg.enable_external_secrets_operator: + # Use K8s Secret names instead of type+vaultName + site_name = obj.get("metadata", {}).get("name", "") + site_spec["secret"] = {"name": f"{site_name}-secrets"} + # Note: Azure doesn't have a workload-level secret like AWS, so we omit workloadSecret + + # Cloud-agnostic IAM (Azure Workload Identity) + # Always set Workload Identity annotations/labels for Azure (they're used by all products) + site_name = obj.get("metadata", {}).get("name", "") + # Note: In Azure, we need to get the managed identities for each product. + # For now, we'll set placeholder annotations that will be filled in by azure_workload_products.py + # or we can set them here if we have the managed identity client IDs available. + # For this implementation, we'll follow the pattern and set serviceAccountName + annotations/labels. + + # Set explicit ServiceAccount names + site_spec.setdefault("connect", {})["serviceAccountName"] = f"{site_name}-connect" + site_spec.setdefault("workbench", {})["serviceAccountName"] = f"{site_name}-workbench" + site_spec.setdefault("packageManager", {})["serviceAccountName"] = f"{site_name}-packagemanager" + site_spec.setdefault("chronicle", {})["serviceAccountName"] = f"{site_name}-chronicle" + site_spec.setdefault("flightdeck", {})["serviceAccountName"] = f"{site_name}-home" + + # Set Workload Identity pod labels (same for all products) + for product in ["connect", "workbench", "packageManager", "chronicle", "flightdeck"]: + site_spec.setdefault(product, {})["podLabels"] = { + "azure.workload.identity/use": "true", + } + + # ServiceAccount annotations need to be set with the managed identity client IDs + # These will be populated by the infrastructure layer that creates the managed identities + # For now, we leave them empty as they'll be set by azure_workload_products.py or similar + + obj["spec"] = deepmerge.always_merger.merge( + obj.get("spec", {}), + copy.deepcopy(site_spec), + ) + + return set_workload_fields + for site_name in sorted(self.workload.cfg.sites.keys()): def generate_set_site_fields( @@ -120,7 +193,7 @@ def set_site_fields(obj: dict[str, typing.Any], _: pulumi.ResourceOptions): site_name=site_name, kubeconfig=self.kubeconfigs[release], transformations=[ - set_workload_fields, + generate_set_workload_fields(release, cluster_cfg), generate_set_site_fields(site_name), ], opts=pulumi.ResourceOptions( @@ -128,3 +201,43 @@ def set_site_fields(obj: dict[str, typing.Any], _: pulumi.ResourceOptions): providers=[self.kube_providers[release]], ), ) + + def _define_external_secrets(self) -> None: + """ + Create ExternalSecret CRs for each site to sync secrets from Azure Key Vault to K8s Secrets. + + This creates K8s Secrets that the operator can reference by name instead of using CSI drivers. + + Note: these CRs reference the `azure-key-vault` ClusterSecretStore which is created by + AzureWorkloadHelm. No Pulumi ``depends_on`` is wired here because even if we declared one, it + would only guarantee the HelmChart CR object exists — not that ESO's CRDs have converged. + The ClusterSecretStore will retry until ESO is ready (~1-2 reconcile loops). + """ + for release in self.managed_clusters_by_release: + if not self.workload.cfg.clusters[release].enable_external_secrets_operator: + continue + + # Note: Azure doesn't have a workload-level secret like AWS. + # Each site has its own secret in Azure Key Vault. + + # Create ExternalSecret for each site + for site_name in sorted(self.workload.cfg.sites.keys()): + # Azure site secrets are stored as: --secrets + secret_key = f"{self.workload.compound_name}-{site_name}-secrets" + + kubernetes.apiextensions.CustomResource( + f"{self.workload.compound_name}-{release}-{site_name}-external-secret", + metadata=kubernetes.meta.v1.ObjectMetaArgs( + name=f"{site_name}-secrets", + namespace=ptd.POSIT_TEAM_NAMESPACE, + labels=self.required_tags, + ), + api_version=ESO_API_VERSION, + kind="ExternalSecret", + spec=_external_secret_spec(site_name, secret_key), + opts=pulumi.ResourceOptions( + parent=self, + provider=self.kube_providers[release], + custom_timeouts=pulumi.CustomTimeouts(create="10m"), + ), + )