Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions docs/KNOWN_ISSUES.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,20 @@ With this setting disabled, the infrastructure will deploy successfully without



### Traefik v3 CRD Migration

**Background:**
PTD upgraded the Traefik ingress controller from v2 (Helm chart `24.x`) to v3 (Helm chart `33.x`). Traefik v3 ships updated CRD schemas for `IngressRoute`, `Middleware`, `TLSOption`, and related resources.

**Risk:**
If any workloads use Traefik v2-style CRD fields that were removed or renamed in v3 (e.g., `ServersTransport` → `ServersTransports`, changed TLS store/options schemas, modified middleware CRD fields), those resources may fail to reconcile after the Helm upgrade without producing obvious errors.

**Before rolling out to production clusters:**
1. Audit existing `IngressRoute`, `Middleware`, `TLSOption`, and `ServersTransport` resources against the [Traefik v3 migration guide](https://doc.traefik.io/traefik/migration/v2-to-v3/).
2. Pay particular attention to `ServersTransport` → `ServersTransports` rename and any middleware fields your workloads rely on.
3. Test the upgrade in a staging cluster before applying to production.


## Workarounds and Best Practices

### Direct Pulumi Stack Access with `ptd workon`
Expand Down
2 changes: 1 addition & 1 deletion python-pulumi/src/ptd/aws_control_room.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ class AWSControlRoomConfig:
tailscale_enabled: bool = True
tigera_operator_version: str = "3.27.2"
traefik_forward_auth_version: str = "0.0.14"
traefik_version: str = "24.0.0"
traefik_version: str = "33.2.1"
ebs_csi_addon_version: str = "v1.41.0-eksbuild.1"


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,7 @@ def extract_dvo_records(dvos) -> list[aws.route53.Record]:
"",
cert,
deployment_replicas=self.control_room.cfg.traefik_deployment_replicas,
version=self.control_room.cfg.traefik_version,
opts=pulumi.ResourceOptions(
parent=self.eks,
provider=self.eks.provider,
Expand Down
169 changes: 95 additions & 74 deletions python-pulumi/src/ptd/pulumi_resources/traefik.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,91 @@ def _build_nlb_tag_string(tags: dict[str, str] | None, cluster_name: str) -> str
)


def _build_traefik_helm_values(
node_selector: str,
deployment_replicas: int,
cert_arn,
nlb_tags: str,
) -> dict:
"""Build the Helm values dict for the Traefik chart (v3 syntax)."""
return {
"service": {
"type": "LoadBalancer",
"annotations": {
"service.beta.kubernetes.io/aws-load-balancer-type": "external",
"service.beta.kubernetes.io/aws-load-balancer-scheme": "internet-facing",
"service.beta.kubernetes.io/aws-load-balancer-ip-address-type": "ipv4",
"service.beta.kubernetes.io/aws-load-balancer-nlb-target-type": "ip",
"service.beta.kubernetes.io/aws-load-balancer-ssl-cert": cert_arn,
"service.beta.kubernetes.io/aws-load-balancer-ssl-ports": "443",
"service.beta.kubernetes.io/aws-load-balancer-access-log-enabled": "false",
"service.beta.kubernetes.io/aws-load-balancer-ssl-negotiation-policy": "ELBSecurityPolicy-FS-1-2-2019-08",
"service.beta.kubernetes.io/aws-load-balancer-healthcheck-healthy-threshold": "3",
"service.beta.kubernetes.io/aws-load-balancer-healthcheck-unhealthy-threshold": "3",
"service.beta.kubernetes.io/aws-load-balancer-healthcheck-timeout": "10",
"service.beta.kubernetes.io/aws-load-balancer-healthcheck-interval": "10",
"service.beta.kubernetes.io/aws-load-balancer-additional-resource-tags": nlb_tags,
},
},
"ports": {
"web": {
"redirections": {
"entryPoint": {
"to": "websecure",
"scheme": "https",
"permanent": True,
}
},
},
"websecure": {
"tls": {
"enabled": False,
}
},
},
"nodeSelector": (
{
"node.kubernetes.io/instance-type": node_selector,
}
if node_selector
else None
),
"providers": {
"kubernetesCRD": {
"enabled": True,
},
"kubernetesIngress": {
"enabled": True,
},
"publishedService": {
"enabled": True,
},
},
"additionalArguments": [
"--metrics.prometheus=true",
],
"deployment": {
"replicas": deployment_replicas,
},
"logs": {
"general": {"level": "DEBUG"},
"access": {"enabled": True},
},
"image": {
"registry": "ghcr.io/traefik",
},
"ingressClass": {
"enabled": True,
"isDefaultClass": True,
},
"ingressRoute": {
"dashboard": {
"enabled": True,
}
},
}


class Traefik(pulumi.ComponentResource):
def __init__(
self,
Expand All @@ -44,6 +129,7 @@ def __init__(
cert: aws.acm.Certificate | None = None,
deployment_replicas: int = 3,
*args,
version: str,
**kwargs,
):
"""
Expand All @@ -63,6 +149,7 @@ def __init__(
self.cluster = cluster
self.node_selector: str = node_selector
self.deployment_replicas = deployment_replicas
self.version = version
self.traefik: k8s.helm.v3.Release | None = None
self._deploy(cert)

Expand Down Expand Up @@ -156,84 +243,18 @@ def _deploy(self, cert: aws.acm.Certificate | None):
f"{self.cluster.name}-traefik",
k8s.helm.v3.ReleaseArgs(
chart="traefik",
version="24.0.0",
version=self.version,
namespace=self.namespace,
name="traefik",
repository_opts=k8s.helm.v3.RepositoryOptsArgs(
repo="https://helm.traefik.io/traefik/",
repo="https://traefik.github.io/charts",
),
values=_build_traefik_helm_values(
node_selector=self.node_selector,
deployment_replicas=self.deployment_replicas,
cert_arn=(cert.arn if cert else None),
nlb_tags=nlb_tags,
),
values={
"service": {
# TODO: we could make this into an ingress if we want...?
"type": "LoadBalancer",
"annotations": {
"service.beta.kubernetes.io/aws-load-balancer-type": "external",
"service.beta.kubernetes.io/aws-load-balancer-scheme": "internet-facing",
"service.beta.kubernetes.io/aws-load-balancer-ip-address-type": "ipv4",
"service.beta.kubernetes.io/aws-load-balancer-nlb-target-type": "ip",
"service.beta.kubernetes.io/aws-load-balancer-ssl-cert": (cert.arn if cert else None),
"service.beta.kubernetes.io/aws-load-balancer-ssl-ports": "443",
"service.beta.kubernetes.io/aws-load-balancer-access-log-enabled": "false",
"service.beta.kubernetes.io/aws-load-balancer-ssl-negotiation-policy": "ELBSecurityPolicy-FS-1-2-2019-08",
"service.beta.kubernetes.io/aws-load-balancer-healthcheck-healthy-threshold": "3",
"service.beta.kubernetes.io/aws-load-balancer-healthcheck-unhealthy-threshold": "3",
"service.beta.kubernetes.io/aws-load-balancer-healthcheck-timeout": "10",
"service.beta.kubernetes.io/aws-load-balancer-healthcheck-interval": "10",
"service.beta.kubernetes.io/aws-load-balancer-additional-resource-tags": nlb_tags,
},
},
"ports": {
"web": {
"redirectTo": "websecure",
},
"websecure": {
"tls": {
"enabled": False,
}
},
},
"nodeSelector": (
{
# "meta" nodes
"node.kubernetes.io/instance-type": self.node_selector,
}
if self.node_selector
else None
),
"providers": {
"kubernetesCRD": {
"enabled": True,
},
"kubernetesIngress": {
"enabled": True,
},
"publishedService": {
"enabled": True,
},
},
"additionalArguments": [
"--metrics.prometheus=true",
],
"deployment": {
"replicas": self.deployment_replicas,
},
"logs": {
"general": {"level": "DEBUG"},
"access": {"enabled": True},
},
"image": {
"registry": "ghcr.io/traefik",
},
"ingressClass": {
"enabled": True,
"default": True,
},
"ingressRoute": {
"dashboard": {
"enabled": True,
}
},
},
),
opts=pulumi.ResourceOptions(
provider=self.cluster.provider,
Expand Down
60 changes: 59 additions & 1 deletion python-pulumi/tests/test_traefik_nlb_tags.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pytest

from ptd.pulumi_resources.traefik import _build_nlb_tag_string
from ptd.pulumi_resources.traefik import _build_nlb_tag_string, _build_traefik_helm_values


def test_build_nlb_tag_string_happy_path() -> None:
Expand Down Expand Up @@ -70,6 +70,64 @@ def test_build_nlb_tag_string_invalid_environment_value() -> None:
)


def _make_traefik_values(**overrides):
"""Helper to create traefik helm values with sensible defaults."""
defaults = dict(
node_selector="",
deployment_replicas=3,
cert_arn=None,
nlb_tags="posit.team/true-name=myapp,posit.team/environment=prod,Name=cluster",
)
defaults.update(overrides)
return _build_traefik_helm_values(**defaults)


def test_traefik_v3_redirect_uses_redirections_syntax() -> None:
values = _make_traefik_values()
web = values["ports"]["web"]
assert "redirectTo" not in web, "v2 redirect syntax must not be present"
assert "redirections" in web
assert web["redirections"]["entryPoint"]["to"] == "websecure"
assert web["redirections"]["entryPoint"]["scheme"] == "https"
assert web["redirections"]["entryPoint"]["permanent"] is True


def test_traefik_v3_ingress_class_uses_is_default_class() -> None:
values = _make_traefik_values()
ingress_class = values["ingressClass"]
assert "default" not in ingress_class, "v2 'default' key must not be present"
assert ingress_class["isDefaultClass"] is True
assert ingress_class["enabled"] is True


def test_traefik_node_selector_empty_string_yields_none() -> None:
values = _make_traefik_values(node_selector="")
assert values["nodeSelector"] is None


def test_traefik_node_selector_set_yields_dict() -> None:
values = _make_traefik_values(node_selector="m5.xlarge")
assert values["nodeSelector"] == {"node.kubernetes.io/instance-type": "m5.xlarge"}


def test_traefik_deployment_replicas_propagated() -> None:
values = _make_traefik_values(deployment_replicas=5)
assert values["deployment"]["replicas"] == 5


def test_traefik_cert_arn_none_sets_annotation_to_none() -> None:
values = _make_traefik_values(cert_arn=None)
assert values["service"]["annotations"]["service.beta.kubernetes.io/aws-load-balancer-ssl-cert"] is None


def test_traefik_cert_arn_set_propagates_to_annotation() -> None:
values = _make_traefik_values(cert_arn="arn:aws:acm:us-east-1:123:certificate/abc")
assert (
values["service"]["annotations"]["service.beta.kubernetes.io/aws-load-balancer-ssl-cert"]
== "arn:aws:acm:us-east-1:123:certificate/abc"
)


def test_build_nlb_tag_string_extra_tags_are_dropped() -> None:
"""Extra tags in the input dict (e.g. aws:created-by, Cost-Center) are intentionally
discarded; only true-name, environment, and Name should appear in the output."""
Expand Down
Loading