Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 29 additions & 1 deletion src/phlo/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,34 @@
Structured error classes with error codes, contextual messages, and suggestions.
"""

import re
from enum import Enum

_KEY_VALUE_SENSITIVE_PATTERN = re.compile(
r"\b(password|passwd|token|secret|api_key|apikey|credential)\b\s*[:=]\s*[^\s,;]+",
re.IGNORECASE,
)
_AUTHORIZATION_SENSITIVE_PATTERN = re.compile(r"\b(authorization|bearer)\b\s+\S+", re.IGNORECASE)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 _AUTHORIZATION_SENSITIVE_PATTERN over-redacts common error context words after "authorization"

The _AUTHORIZATION_SENSITIVE_PATTERN regex \b(authorization|bearer)\b\s+\S+ unconditionally redacts whatever single word follows "authorization" or "bearer". This corrupts common error messages where these words are followed by non-sensitive context like "failed", "denied", "required", or "header".

Examples of false-positive redaction
  • "authorization failed""authorization <redacted>" ("failed" is not a secret)
  • "authorization denied""authorization <redacted>"
  • "authorization required""authorization <redacted>"
  • "missing authorization header""missing authorization <redacted>"
  • "authorization error: invalid credentials""authorization <redacted> invalid credentials"

These are common error message patterns from HTTP clients and auth libraries. When such exceptions are wrapped as a cause in a PhloError, the diagnostic context is destroyed, making it significantly harder to debug authorization-related failures.

Prompt for agents
The _AUTHORIZATION_SENSITIVE_PATTERN at line 14 of src/phlo/exceptions.py matches any word following 'authorization' or 'bearer', causing false-positive redaction of common non-sensitive context words like 'failed', 'denied', 'required', 'header', etc.

The pattern is: r"\b(authorization|bearer)\b\s+\S+"

The intent is to catch HTTP Authorization header values like 'Authorization: Bearer <token>' or inline 'bearer <token>' patterns. But the 'authorization' alternative is too broad.

Possible fixes:
1. Remove 'authorization' from this pattern entirely since the 'bearer' alternative already catches the actual token in 'Authorization: Bearer <token>' format.
2. Make the 'authorization' alternative more specific, e.g. require a colon delimiter like 'authorization:\s+\S+' to only match header-like formats.
3. Add a negative lookahead for common non-sensitive words: 'authorization\s+(?!failed|denied|required|error|header|expired|invalid)\S+'.

Option 1 is simplest and most robust since Bearer already handles the main use case.
Open in Devin Review

Was this helpful? React with 👍 or 👎 to provide feedback.

_CONNECTION_STRING_SENSITIVE_PATTERN = re.compile(
r"\b(connection\s+string)\b\s*[:=]\s*.+?(?=(?:[,;]\s+\w+\s*[:=])|\n|$)",
re.IGNORECASE,
)
_KEY_MATERIAL_SENSITIVE_PATTERN = re.compile(
r"\b(private_key|signing_key|encryption_key)\b(?:\s*[:=]\s*|\s+).+?(?=(?:[,;]\s+\w+\s*[:=])|\n|$)",
re.IGNORECASE,
)


def _redact_sensitive(s: str) -> str:
"""Redact sensitive patterns from a string for safe output."""
result = _KEY_MATERIAL_SENSITIVE_PATTERN.sub(r"\1=<redacted>", s)
result = _CONNECTION_STRING_SENSITIVE_PATTERN.sub(r"\1=<redacted>", result)
result = _KEY_VALUE_SENSITIVE_PATTERN.sub(
lambda m: f"{m.group(1)}=<redacted>",
result,
)
return _AUTHORIZATION_SENSITIVE_PATTERN.sub(r"\1 <redacted>", result)


class PhloErrorCode(Enum):
"""Error codes for Phlo exceptions."""
Expand Down Expand Up @@ -98,7 +124,9 @@ def _format_message(self, message: str) -> str:

if self.cause:
lines.append("")
lines.append(f"Caused by: {type(self.cause).__name__}: {str(self.cause)}")
lines.append(
f"Caused by: {type(self.cause).__name__}: {_redact_sensitive(str(self.cause))}"
)

lines.append("")
lines.append(f"Documentation: {self.doc_url}")
Expand Down
30 changes: 30 additions & 0 deletions tests/test_exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from phlo.exceptions import (
PhloError,
PhloErrorCode,
_redact_sensitive,
format_field_list,
suggest_similar_field_names,
)
Expand Down Expand Up @@ -85,3 +86,32 @@ def test_format_field_list_handles_empty_field_list() -> None:
formatted = format_field_list([])

assert formatted == ""


def test_phlo_error_redacts_sensitive_data_in_cause() -> None:
"""Sensitive patterns in cause messages are redacted."""
error = PhloError(
message="Connection failed",
code=PhloErrorCode.INFRASTRUCTURE_ERROR,
cause=ValueError("connection string: password=secret123"),
)
message = str(error)
assert "Caused by: ValueError: connection string=<redacted>" in message
assert "secret123" not in message


def test_redact_sensitive_handles_colon_delimited_secret_values() -> None:
"""Common `key: value` secret formats are redacted."""
assert _redact_sensitive("password: secret123") == "password=<redacted>"
assert _redact_sensitive("token: abc123") == "token=<redacted>"
assert _redact_sensitive("connection string: Server=db;Password=hunter2") == (
"connection string=<redacted>"
)


def test_redact_sensitive_removes_private_key_material() -> None:
"""Key labels and their body are redacted together."""
redacted = _redact_sensitive("private_key PEM_BLOCK_BODY_XYZ")

assert redacted == "private_key=<redacted>"
assert "PEM_BLOCK_BODY_XYZ" not in redacted
Loading