From b6089a15f24747b7e7a4ed066d005a4943293565 Mon Sep 17 00:00:00 2001 From: Gareth Price Date: Mon, 6 Apr 2026 22:58:14 +0100 Subject: [PATCH 1/4] fix: harden proxy auth and project path handling --- docs/setup/security.md | 18 +++ src/phlo/capabilities/authentication.py | 85 +++++++++- src/phlo/infrastructure/config.py | 8 +- tests/test_authentication_provider.py | 200 ++++++++++++++++++++++++ tests/test_infrastructure_config.py | 26 +++ 5 files changed, 327 insertions(+), 10 deletions(-) diff --git a/docs/setup/security.md b/docs/setup/security.md index d8220b4fe..c5a5b3cde 100644 --- a/docs/setup/security.md +++ b/docs/setup/security.md @@ -40,6 +40,24 @@ openssl rand -base64 32 ## Authentication +### Phlo Reverse Proxy Authentication + +If you put Phlo behind a trusted reverse proxy, configure the app to accept asserted identity +headers only from that proxy and bind those headers into a shared-secret signature: + +```bash +# .phlo/.env.local +PHLO_AUTH_PROXY_ENABLED=true +PHLO_AUTH_PROXY_TRUSTED_PROXIES=127.0.0.1/32,10.0.0.0/8 +PHLO_AUTH_PROXY_SHARED_SECRET= +PHLO_AUTH_PROXY_HEADER_SUBJECT=X-Remote-User +PHLO_AUTH_PROXY_HEADER_EMAIL=X-Remote-Email +PHLO_AUTH_PROXY_HEADER_GROUPS=X-Remote-Groups +``` + +When `PHLO_AUTH_PROXY_SHARED_SECRET` is set, the proxy must sign the timestamp, remote address, +request path, and asserted identity headers so downstream header changes are rejected. + ### Option 1: LDAP Authentication LDAP works with Trino and MinIO. Configure your LDAP server details: diff --git a/src/phlo/capabilities/authentication.py b/src/phlo/capabilities/authentication.py index d29638ad3..fe531d4dd 100644 --- a/src/phlo/capabilities/authentication.py +++ b/src/phlo/capabilities/authentication.py @@ -8,7 +8,7 @@ import os import secrets from contextlib import suppress -from datetime import datetime, timezone +from datetime import UTC, datetime from typing import Any from phlo.capabilities.interfaces import ( @@ -208,7 +208,7 @@ def _is_session_valid(self, session: AuthenticatedSession) -> bool: """Check if session is still valid.""" if session.expires_at is None: return True - return datetime.now(timezone.utc) < session.expires_at # noqa: UP017 + return datetime.now(UTC) < session.expires_at class ProxyAuthenticationProvider: @@ -225,6 +225,7 @@ def __init__( header_subject: str = "X-Remote-User", header_email: str = "X-Remote-Email", header_groups: str = "X-Remote-Groups", + shared_secret: str | None = None, ): self._trusted_networks: list[ipaddress.IPv4Network | ipaddress.IPv6Network] = [] self._trusted_hosts: set[str] = set() @@ -240,6 +241,17 @@ def __init__( self._header_subject = header_subject.lower() self._header_email = header_email.lower() self._header_groups = header_groups.lower() + self._signature_header = "x-phlo-proxy-signature" + self._timestamp_header = "x-phlo-proxy-timestamp" + self._shared_secret = shared_secret + + def _identity_payload_parts(self, request_context: RequestContext) -> tuple[str, str, str]: + """Return the asserted identity fields bound into the proxy signature.""" + subject = request_context.headers.get(self._header_subject, "") + email = request_context.headers.get(self._header_email, "") + groups_raw = request_context.headers.get(self._header_groups, "") + groups = ",".join(g.strip() for g in groups_raw.split(",") if g.strip()) + return subject, email, groups def _is_from_trusted_proxy(self, request_context: RequestContext) -> bool: """Check if request came from a trusted proxy using CIDR matching.""" @@ -257,6 +269,41 @@ def _is_from_trusted_proxy(self, request_context: RequestContext) -> bool: pass return False + def _verify_proxy_signature(self, request_context: RequestContext) -> bool: + """Verify that the request was signed by a trusted proxy with the shared secret.""" + if self._shared_secret is None: + return True + signature = request_context.headers.get(self._signature_header) + timestamp_str = request_context.headers.get(self._timestamp_header) + if not signature or not timestamp_str: + logger.debug("missing_proxy_signature", remote_addr=request_context.remote_addr) + return False + try: + timestamp = int(timestamp_str) + except ValueError: + logger.debug("invalid_proxy_timestamp", timestamp=timestamp_str) + return False + now = datetime.now(UTC).timestamp() + if abs(now - timestamp) > 300: + logger.debug("expired_proxy_timestamp", timestamp=timestamp_str) + return False + subject, email, groups = self._identity_payload_parts(request_context) + payload = ":".join( + [ + str(timestamp), + request_context.remote_addr or "", + request_context.path, + subject, + email, + groups, + ] + ) + expected = hmac.new(self._shared_secret.encode(), payload.encode(), "sha256").hexdigest() + if not hmac.compare_digest(signature, expected): + logger.debug("invalid_proxy_signature", remote_addr=request_context.remote_addr) + return False + return True + def authenticate(self, request_context: RequestContext) -> AuthResult: """Authenticate using proxy-asserted identity.""" if not self._is_from_trusted_proxy(request_context): @@ -275,7 +322,23 @@ def authenticate(self, request_context: RequestContext) -> AuthResult: reason_code="invalid_identity_payload", ) - subject = request_context.headers.get(self._header_subject) + if not self._verify_proxy_signature(request_context): + _log_auth_event( + "failure", + None, + "invalid_identity_payload", + "proxy", + auth_method="proxy", + path=request_context.path, + remote_addr=request_context.remote_addr, + reason="invalid_signature", + ) + return AuthResult( + authenticated=False, + reason_code="invalid_identity_payload", + ) + + subject, email, groups_raw = self._identity_payload_parts(request_context) if not subject: _log_auth_event( "failure", @@ -291,9 +354,7 @@ def authenticate(self, request_context: RequestContext) -> AuthResult: reason_code="missing_credentials", ) - email = request_context.headers.get(self._header_email) - groups_raw = request_context.headers.get(self._header_groups, "") - groups = tuple(g.strip() for g in groups_raw.split(",") if g.strip()) + groups = tuple(g for g in groups_raw.split(",") if g) principal = AuthPrincipal( subject=subject, @@ -445,6 +506,18 @@ def _load_proxy_config() -> dict[str, Any]: if header_subject: config["header_subject"] = header_subject + header_email = os.environ.get("PHLO_AUTH_PROXY_HEADER_EMAIL") + if header_email: + config["header_email"] = header_email + + header_groups = os.environ.get("PHLO_AUTH_PROXY_HEADER_GROUPS") + if header_groups: + config["header_groups"] = header_groups + + shared_secret = os.environ.get("PHLO_AUTH_PROXY_SHARED_SECRET") + if shared_secret: + config["shared_secret"] = shared_secret + return config diff --git a/src/phlo/infrastructure/config.py b/src/phlo/infrastructure/config.py index 237213707..ce97ccc8b 100644 --- a/src/phlo/infrastructure/config.py +++ b/src/phlo/infrastructure/config.py @@ -25,15 +25,15 @@ def _default_project_root() -> Path: """Resolve the default project root from environment or current working directory.""" project_root = os.environ.get("PHLO_PROJECT_PATH") if project_root: - resolved = Path(project_root).resolve() - if ".." in Path(project_root).parts: + raw_path = Path(project_root) + if ".." in raw_path.parts: logger.warning( "project_path_traversal_rejected", raw=project_root, - resolved=str(resolved), + reason="path_traversal_sequence", ) raise ValueError(f"PHLO_PROJECT_PATH contains path traversal: {project_root}") - return resolved + return raw_path.resolve() return Path.cwd() diff --git a/tests/test_authentication_provider.py b/tests/test_authentication_provider.py index b9947b606..55f4ed1a8 100644 --- a/tests/test_authentication_provider.py +++ b/tests/test_authentication_provider.py @@ -2,7 +2,9 @@ from __future__ import annotations +import hmac import os +import time from unittest.mock import patch import pytest @@ -145,6 +147,19 @@ def test_authenticate_without_token_fails(self): class TestProxyAuthenticationProvider: """Tests for ProxyAuthenticationProvider.""" + @staticmethod + def _proxy_signature( + secret: str, + timestamp: str, + remote_addr: str, + path: str, + subject: str = "", + email: str = "", + groups: str = "", + ) -> str: + payload = ":".join([timestamp, remote_addr, path, subject, email, groups]) + return hmac.new(secret.encode(), payload.encode(), "sha256").hexdigest() + def test_authenticate_from_trusted_proxy(self): """Test authentication succeeds from trusted proxy IP.""" provider = ProxyAuthenticationProvider(trusted_proxies=["127.0.0.1/32", "192.168.1.0/24"]) @@ -223,6 +238,165 @@ def test_cidr_network_matching(self): RequestContext(headers={}, cookies={}, query_params={}, remote_addr="172.16.0.1") ) + def test_authenticate_with_shared_secret_succeeds(self): + """Test authentication succeeds with valid signature (issue #338).""" + secret = "test-secret-123" + provider = ProxyAuthenticationProvider( + trusted_proxies=["127.0.0.1/32"], shared_secret=secret + ) + + timestamp = str(int(time.time())) + signature = self._proxy_signature( + secret, + timestamp, + "127.0.0.1", + "/test/path", + subject="proxy-user", + email="proxy@example.com", + ) + + request_context = RequestContext( + headers={ + "x-remote-user": "proxy-user", + "x-remote-email": "proxy@example.com", + "x-phlo-proxy-signature": signature, + "x-phlo-proxy-timestamp": timestamp, + }, + cookies={}, + query_params={}, + remote_addr="127.0.0.1", + path="/test/path", + ) + result = provider.authenticate(request_context) + + assert result.authenticated is True + assert result.principal is not None + assert result.principal.subject == "proxy-user" + + def test_authenticate_without_signature_fails_when_secret_configured(self): + """Test authentication fails without signature when shared_secret is set (issue #338).""" + provider = ProxyAuthenticationProvider( + trusted_proxies=["127.0.0.1/32"], shared_secret="test-secret" + ) + + request_context = RequestContext( + headers={ + "x-remote-user": "proxy-user", + }, + cookies={}, + query_params={}, + remote_addr="127.0.0.1", + path="/test/path", + ) + result = provider.authenticate(request_context) + + assert result.authenticated is False + assert result.reason_code == "invalid_identity_payload" + + def test_authenticate_with_invalid_signature_fails(self): + """Test authentication fails with invalid signature (issue #338).""" + provider = ProxyAuthenticationProvider( + trusted_proxies=["127.0.0.1/32"], shared_secret="test-secret" + ) + + timestamp = str(int(time.time())) + + request_context = RequestContext( + headers={ + "x-remote-user": "proxy-user", + "x-phlo-proxy-signature": "invalid-signature", + "x-phlo-proxy-timestamp": timestamp, + }, + cookies={}, + query_params={}, + remote_addr="127.0.0.1", + path="/test/path", + ) + result = provider.authenticate(request_context) + + assert result.authenticated is False + assert result.reason_code == "invalid_identity_payload" + + def test_authenticate_with_expired_timestamp_fails(self): + """Test authentication fails with expired timestamp (issue #338).""" + secret = "test-secret-123" + provider = ProxyAuthenticationProvider( + trusted_proxies=["127.0.0.1/32"], shared_secret=secret + ) + + timestamp = str(int(time.time()) - 600) + signature = self._proxy_signature(secret, timestamp, "127.0.0.1", "/test/path") + + request_context = RequestContext( + headers={ + "x-remote-user": "proxy-user", + "x-phlo-proxy-signature": signature, + "x-phlo-proxy-timestamp": timestamp, + }, + cookies={}, + query_params={}, + remote_addr="127.0.0.1", + path="/test/path", + ) + result = provider.authenticate(request_context) + + assert result.authenticated is False + assert result.reason_code == "invalid_identity_payload" + + def test_authenticate_without_secret_allows_unsigned_requests(self): + """Test authentication allows unsigned requests when no shared_secret configured.""" + provider = ProxyAuthenticationProvider(trusted_proxies=["127.0.0.1/32"]) + + request_context = RequestContext( + headers={ + "x-remote-user": "proxy-user", + "x-remote-email": "proxy@example.com", + }, + cookies={}, + query_params={}, + remote_addr="127.0.0.1", + path="/test/path", + ) + result = provider.authenticate(request_context) + + assert result.authenticated is True + + def test_authenticate_rejects_signed_request_when_identity_headers_change(self): + """Test signature binds asserted identity fields to the authenticated principal.""" + secret = "test-secret-123" + provider = ProxyAuthenticationProvider( + trusted_proxies=["127.0.0.1/32"], shared_secret=secret + ) + + timestamp = str(int(time.time())) + signature = self._proxy_signature( + secret, + timestamp, + "127.0.0.1", + "/test/path", + subject="proxy-user", + email="proxy@example.com", + groups="admins,operators", + ) + + request_context = RequestContext( + headers={ + "x-remote-user": "other-user", + "x-remote-email": "proxy@example.com", + "x-remote-groups": "admins,operators", + "x-phlo-proxy-signature": signature, + "x-phlo-proxy-timestamp": timestamp, + }, + cookies={}, + query_params={}, + remote_addr="127.0.0.1", + path="/test/path", + ) + result = provider.authenticate(request_context) + + assert result.authenticated is False + assert result.reason_code == "invalid_identity_payload" + class TestAuthenticationProviderRegistration: """Tests for authentication provider registration.""" @@ -252,6 +426,32 @@ def test_no_auto_registration_by_default(self): providers = registry.list_authentication_providers() assert len(providers) == 0 + def test_register_proxy_provider_loads_shared_secret_from_environment(self): + """Test proxy shared secret is wired through environment config.""" + with patch.dict( + os.environ, + { + "PHLO_AUTH_PROXY_ENABLED": "true", + "PHLO_AUTH_PROXY_SHARED_SECRET": "env-secret", + "PHLO_AUTH_PROXY_TRUSTED_PROXIES": "127.0.0.1/32", + "PHLO_AUTH_PROXY_HEADER_EMAIL": "X-Test-Email", + "PHLO_AUTH_PROXY_HEADER_GROUPS": "X-Test-Groups", + }, + clear=True, + ): + clear_capabilities() + register_default_capability_providers() + registry = get_capability_registry() + providers = registry.list_authentication_providers() + + assert len(providers) == 1 + assert providers[0].name == "proxy" + provider = providers[0].provider + assert isinstance(provider, ProxyAuthenticationProvider) + assert provider._shared_secret == "env-secret" + assert provider._header_email == "x-test-email" + assert provider._header_groups == "x-test-groups" + class TestProviderSelection: """Tests for provider selection logic.""" diff --git a/tests/test_infrastructure_config.py b/tests/test_infrastructure_config.py index f6d450d55..37a670424 100644 --- a/tests/test_infrastructure_config.py +++ b/tests/test_infrastructure_config.py @@ -358,3 +358,29 @@ def test_clear_config_cache_refreshes_updated_file_contents(tmp_path: Path): assert refreshed_config is not first_config assert refreshed_config.container_naming_pattern == "{project}_{service}" assert get_container_name("dagster", "test-project", tmp_path) == "updated-container" + + +def test_phlo_project_path_rejects_path_traversal( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + """PHLO_PROJECT_PATH with '..' sequences should be rejected (issue #339).""" + bad_path = tmp_path / ".." / "etc" + monkeypatch.setenv("PHLO_PROJECT_PATH", str(bad_path)) + + with pytest.raises(ValueError, match="path traversal"): + from phlo.infrastructure.config import _default_project_root + + _default_project_root() + + +def test_phlo_project_path_allows_explicit_absolute_path_outside_cwd( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + """PHLO_PROJECT_PATH should honor explicit absolute paths for child processes and CI.""" + project_root = tmp_path / "external-project" + project_root.mkdir() + monkeypatch.setenv("PHLO_PROJECT_PATH", str(project_root)) + + from phlo.infrastructure.config import _default_project_root + + assert _default_project_root() == project_root.resolve() From f4b0e8478703c5055e632a6aecb9d1191badfb30 Mon Sep 17 00:00:00 2001 From: Gareth Price Date: Mon, 6 Apr 2026 23:02:50 +0100 Subject: [PATCH 2/4] fix: resolve ci typecheck failures --- src/phlo/capabilities/authentication.py | 19 ++++++++++--------- src/phlo/cli/commands/schema_migrate.py | 2 +- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/src/phlo/capabilities/authentication.py b/src/phlo/capabilities/authentication.py index fe531d4dd..2b74680d4 100644 --- a/src/phlo/capabilities/authentication.py +++ b/src/phlo/capabilities/authentication.py @@ -288,16 +288,17 @@ def _verify_proxy_signature(self, request_context: RequestContext) -> bool: logger.debug("expired_proxy_timestamp", timestamp=timestamp_str) return False subject, email, groups = self._identity_payload_parts(request_context) - payload = ":".join( - [ - str(timestamp), - request_context.remote_addr or "", - request_context.path, - subject, - email, - groups, - ] + remote_addr = request_context.remote_addr or "" + path = request_context.path or "" + payload_parts: tuple[str, str, str, str, str, str] = ( + str(timestamp), + remote_addr, + path, + subject, + email, + groups, ) + payload = ":".join(payload_parts) expected = hmac.new(self._shared_secret.encode(), payload.encode(), "sha256").hexdigest() if not hmac.compare_digest(signature, expected): logger.debug("invalid_proxy_signature", remote_addr=request_context.remote_addr) diff --git a/src/phlo/cli/commands/schema_migrate.py b/src/phlo/cli/commands/schema_migrate.py index ecef7a097..e081f2c3e 100644 --- a/src/phlo/cli/commands/schema_migrate.py +++ b/src/phlo/cli/commands/schema_migrate.py @@ -61,7 +61,7 @@ def _resolve_migrator() -> Any: f"[red]Configured schema migrator '{configured_migrator}' is not registered.[/red]" ) console.print(f"Available schema migrators: {available}") - sys.exit(1) + raise SystemExit(1) return selected.provider default_table_store = configured_capability_name("table_store") From 35bc46abcff92323401bb1beb05efb1aee69ee34 Mon Sep 17 00:00:00 2001 From: Gareth Price Date: Mon, 6 Apr 2026 23:05:18 +0100 Subject: [PATCH 3/4] fix: preserve hook filter matching without tags --- src/phlo/hooks/bus.py | 5 +---- tests/test_hook_bus.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/src/phlo/hooks/bus.py b/src/phlo/hooks/bus.py index 0807ca595..1137b216d 100644 --- a/src/phlo/hooks/bus.py +++ b/src/phlo/hooks/bus.py @@ -193,10 +193,7 @@ def _matches_filters(filters: HookFilter, event: HookEvent) -> bool: event_asset_keys = _event_asset_keys(event) if not event_asset_keys or not filters.asset_keys.intersection(event_asset_keys): return False - return not ( - filters.tags is not None - and not all(event.tags.get(k) == v for k, v in filters.tags.items()) - ) + return filters.tags is None or all(event.tags.get(k) == v for k, v in filters.tags.items()) def _event_asset_keys(event: HookEvent) -> set[str]: diff --git a/tests/test_hook_bus.py b/tests/test_hook_bus.py index 7de658059..919b2d577 100644 --- a/tests/test_hook_bus.py +++ b/tests/test_hook_bus.py @@ -188,3 +188,32 @@ async def async_handler(_event) -> None: with pytest.raises(TypeError, match="emit_async"): bus.emit(event) + + +def test_hook_bus_filter_with_no_tags_passes_all_events() -> None: + """Verify events are not rejected when filters.tags is None (issue #344).""" + bus = MockHookBus() + calls: list[str] = [] + + def handler(_event) -> None: + calls.append("called") + + bus.register( + HookRegistration( + hook_name="handler", + handler=handler, + priority=10, + filters=HookFilter(event_types={"quality.result"}), + ), + plugin_name="plugin", + ) + + event = QualityResultEvent( + event_type="quality.result", + asset_key="asset", + check_name="null_check", + passed=True, + tags=None, + ) + bus.emit(event) + assert calls == ["called"] From 1df5f7c904f2fcab8322453b6d0f43d4b3267b77 Mon Sep 17 00:00:00 2001 From: Gareth Price Date: Tue, 7 Apr 2026 16:26:55 +0100 Subject: [PATCH 4/4] fix: preserve proxy email semantics --- src/phlo/capabilities/authentication.py | 17 +++++++++++------ tests/test_authentication_provider.py | 21 +++++++++++++++++++++ 2 files changed, 32 insertions(+), 6 deletions(-) diff --git a/src/phlo/capabilities/authentication.py b/src/phlo/capabilities/authentication.py index 2b74680d4..4ec0d0bd2 100644 --- a/src/phlo/capabilities/authentication.py +++ b/src/phlo/capabilities/authentication.py @@ -245,13 +245,18 @@ def __init__( self._timestamp_header = "x-phlo-proxy-timestamp" self._shared_secret = shared_secret - def _identity_payload_parts(self, request_context: RequestContext) -> tuple[str, str, str]: - """Return the asserted identity fields bound into the proxy signature.""" + def _identity_headers(self, request_context: RequestContext) -> tuple[str, str | None, str]: + """Return asserted identity fields using runtime-facing types.""" subject = request_context.headers.get(self._header_subject, "") - email = request_context.headers.get(self._header_email, "") + email = request_context.headers.get(self._header_email) groups_raw = request_context.headers.get(self._header_groups, "") + return subject, email, groups_raw + + def _identity_payload_parts(self, request_context: RequestContext) -> tuple[str, str, str]: + """Return the asserted identity fields bound into the proxy signature.""" + subject, email, groups_raw = self._identity_headers(request_context) groups = ",".join(g.strip() for g in groups_raw.split(",") if g.strip()) - return subject, email, groups + return subject, email or "", groups def _is_from_trusted_proxy(self, request_context: RequestContext) -> bool: """Check if request came from a trusted proxy using CIDR matching.""" @@ -339,7 +344,7 @@ def authenticate(self, request_context: RequestContext) -> AuthResult: reason_code="invalid_identity_payload", ) - subject, email, groups_raw = self._identity_payload_parts(request_context) + subject, email, groups_raw = self._identity_headers(request_context) if not subject: _log_auth_event( "failure", @@ -355,7 +360,7 @@ def authenticate(self, request_context: RequestContext) -> AuthResult: reason_code="missing_credentials", ) - groups = tuple(g for g in groups_raw.split(",") if g) + groups = tuple(g.strip() for g in groups_raw.split(",") if g.strip()) principal = AuthPrincipal( subject=subject, diff --git a/tests/test_authentication_provider.py b/tests/test_authentication_provider.py index 55f4ed1a8..ac04d08cb 100644 --- a/tests/test_authentication_provider.py +++ b/tests/test_authentication_provider.py @@ -360,6 +360,27 @@ def test_authenticate_without_secret_allows_unsigned_requests(self): result = provider.authenticate(request_context) assert result.authenticated is True + assert result.principal is not None + assert result.principal.email == "proxy@example.com" + + def test_authenticate_without_email_header_keeps_email_none(self): + """Test absent proxy email header stays None on the principal.""" + provider = ProxyAuthenticationProvider(trusted_proxies=["127.0.0.1/32"]) + + request_context = RequestContext( + headers={ + "x-remote-user": "proxy-user", + }, + cookies={}, + query_params={}, + remote_addr="127.0.0.1", + path="/test/path", + ) + result = provider.authenticate(request_context) + + assert result.authenticated is True + assert result.principal is not None + assert result.principal.email is None def test_authenticate_rejects_signed_request_when_identity_headers_change(self): """Test signature binds asserted identity fields to the authenticated principal."""