Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,12 @@ Storage Service (Lambda) → DynamoDB (BSOs)

**HAWK Authorizer** (`lambda/src/entrypoint/hawk_authorizer.py`): Lambda authorizer for API Gateway

**Environment Configuration:**
- `CLOCK_SKEW_TOLERANCE`: OIDC JWT validation skew (Token API: 300s)
- `OIDC_CACHE_TTL_SECONDS`: OIDC provider config and JWKS cache TTL (Token API: 3600s / 1 hour)
- `HAWK_TIMESTAMP_SKEW_TOLERANCE`: HAWK timestamp validation skew (60s default)
- `TOKEN_DURATION`: HAWK credential duration (300s)

## Project Structure

```
Expand Down Expand Up @@ -127,6 +133,7 @@ tools/ # CLI tools for testing
- **HAWK credentials**: Expire after 300 seconds - clients must refresh via Token Service
- **Test structure**: Must mirror `src/` structure (e.g., `tests/routes/bso/test_read.py` ↔ `src/routes/bso/read.py`)
- **Fixtures**: Use `mock_service_provider`, `mock_storage_manager` from `conftest.py`
- **DynamoDB GSI**: `list_collections` uses `UserCollectionsIndex` GSI for efficient queries - collection metadata items include `user_id` attribute

### TypeScript/CDK
- **Strict mode**: All code paths must return values
Expand Down
10 changes: 0 additions & 10 deletions lambda/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,16 +83,6 @@ class NewRoute(BaseRoute):
}
```

## TODO Items

All route implementations currently contain TODO comments for:
- Authentication validation integration
- Proper timestamp generation
- Request payload parsing and validation
- Response header management (X-Last-Modified, X-If-Unmodified-Since)
- Batch operation logic
- Quota enforcement

## Testing

Each route class can be unit tested independently by instantiating the class and calling the `handle` method with mock API Gateway events.
32 changes: 30 additions & 2 deletions lambda/src/environment/service_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,9 +120,28 @@ def oidc_config(self) -> dict:

@cached_property
def clock_skew_tolerance(self) -> int:
"""Get clock skew tolerance in seconds"""
"""
Get clock skew tolerance in seconds for OIDC JWT validation
"""
return int(os.environ["CLOCK_SKEW_TOLERANCE"])

@cached_property
def oidc_cache_ttl_seconds(self) -> int:
"""
Get OIDC configuration cache TTL in seconds (default 3600 / 1 hour)
"""
return int(os.environ.get("OIDC_CACHE_TTL_SECONDS", "3600"))

@cached_property
def hawk_timestamp_skew_tolerance(self) -> int:
"""
Get HAWK timestamp skew tolerance in seconds

Separate from OIDC clock_skew_tolerance to allow independent configuration
of HAWK timestamp validation vs OIDC JWT validation
"""
return int(os.environ["HAWK_TIMESTAMP_SKEW_TOLERANCE"])

@cached_property
def retry_after_seconds(self) -> int:
"""Get Retry-After value for 503 responses in seconds (default 30)"""
Expand All @@ -135,6 +154,7 @@ def oidc_validator(self) -> OIDCValidator:
provider_url=self.oidc_config["provider_url"],
client_id=self.oidc_config["client_id"],
clock_skew_tolerance=self.clock_skew_tolerance,
cache_ttl_seconds=self.oidc_cache_ttl_seconds,
)

@cached_property
Expand Down Expand Up @@ -163,6 +183,10 @@ def token_api_router(self):
def token_cache_table_name(self):
return os.environ.get("TOKEN_CACHE_TABLE_NAME")

@cached_property
def token_duration(self) -> int:
return int(os.environ["TOKEN_DURATION"])

@cached_property
def token_cache_table(self):
"""Create DynamoDB Table resource for token cache"""
Expand All @@ -172,4 +196,8 @@ def token_cache_table(self):
@cached_property
def hawk_service(self) -> HawkService:
"""Create HAWK service for authentication"""
return HawkService(token_cache_table=self.token_cache_table)
return HawkService(
token_cache_table=self.token_cache_table,
timestamp_skew_tolerance=self.hawk_timestamp_skew_tolerance,
token_duration=self.token_duration,
)
15 changes: 9 additions & 6 deletions lambda/src/services/hawk_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,17 +51,20 @@ class HawkService:
- All validation helper methods
"""

TIMESTAMP_SKEW_SECONDS = 60 # Allow 60 second clock skew
TOKEN_DURATION_SECONDS = 300 # 5 minutes

def __init__(self, token_cache_table):
def __init__(
self, token_cache_table, timestamp_skew_tolerance: int = 60, token_duration: int = 300
):
"""
Initialize HAWK service with DynamoDB table.

Args:
token_cache_table: boto3 DynamoDB Table resource for token cache
timestamp_skew_tolerance: Allowed clock skew in seconds
token_duration: Duration a token is valid for
"""
self.token_cache_table = token_cache_table
self.timestamp_skew_tolerance = timestamp_skew_tolerance
self.token_duration = token_duration

def validate(
self, authorization_header: str, method: str, path: str, host: str, port: int
Expand Down Expand Up @@ -229,7 +232,7 @@ def validate_timestamp(self, timestamp: int) -> bool:
True if timestamp is within acceptable skew
"""
now = int(time.time())
return abs(now - timestamp) <= self.TIMESTAMP_SKEW_SECONDS
return abs(now - timestamp) <= self.timestamp_skew_tolerance

def build_normalized_string(
self,
Expand Down Expand Up @@ -384,7 +387,7 @@ def generate_hawk_credentials(self, user_id: str, generation: int) -> HawkCreden

Used by Token Server when issuing tokens.
"""
expiry = int(time.time()) + self.TOKEN_DURATION_SECONDS
expiry = int(time.time()) + self.token_duration
hawk_id = self.generate_hawk_id(user_id, generation, expiry)
hawk_key = self.generate_hawk_key()

Expand Down
16 changes: 12 additions & 4 deletions lambda/src/services/oidc_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
)
from src.shared.oidc import OIDCProviderConfig, OIDCTokenClaims

# Cache TTL in seconds (1 hour)
# Default cache TTL in seconds (1 hour) - maintained for backward compatibility
CACHE_TTL_SECONDS = 3600


Expand All @@ -31,18 +31,26 @@ class OIDCValidator:
- User identifier extraction from sub claim
"""

def __init__(self, provider_url: str, client_id: str, clock_skew_tolerance: int = 300):
def __init__(
self,
provider_url: str,
client_id: str,
clock_skew_tolerance: int = 300,
cache_ttl_seconds: int = CACHE_TTL_SECONDS,
):
"""
Initialize OIDC validator with provider configuration.

Args:
provider_url: Base URL of OIDC provider (e.g., https://auth.example.com)
client_id: Expected audience claim value (OAuth client ID)
clock_skew_tolerance: Maximum allowed clock skew in seconds (default 300 / 5 minutes)
cache_ttl_seconds: Cache TTL for provider config and JWKS in seconds (default 3600 / 1 hour)
"""
self.provider_url = provider_url.rstrip("/")
self.client_id = client_id
self.clock_skew_tolerance = clock_skew_tolerance
self.cache_ttl_seconds = cache_ttl_seconds
self._provider_config: Optional[OIDCProviderConfig] = None
self._provider_config_timestamp: float = 0
self._jwk_client: Optional[PyJWKClient] = None
Expand All @@ -52,7 +60,7 @@ def _is_cache_valid(self) -> bool:
if self._provider_config is None:
return False
current_time = datetime.now(timezone.utc).timestamp()
return (current_time - self._provider_config_timestamp) < CACHE_TTL_SECONDS
return (current_time - self._provider_config_timestamp) < self.cache_ttl_seconds

def discover_provider_config(self) -> OIDCProviderConfig:
"""
Expand Down Expand Up @@ -117,7 +125,7 @@ def _get_jwk_client(self) -> PyJWKClient:
self._jwk_client = PyJWKClient(
config.jwks_uri,
cache_keys=True,
lifespan=CACHE_TTL_SECONDS,
lifespan=self.cache_ttl_seconds,
)
return self._jwk_client

Expand Down
26 changes: 18 additions & 8 deletions lambda/src/services/storage_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ def _encode_collection_data(self, user_id: str, collection_data: CollectionData)
col_data = collection_data.to_dict()
col_data[_PK] = self._collection_pk(user_id, collection_data.name)
col_data[_SK] = self._metadata_sk()
col_data["user_id"] = user_id # GSI partition key for efficient user queries
return col_data

def get_collection(self, user_id: str, collection_name: str) -> CollectionData:
Expand Down Expand Up @@ -376,20 +377,29 @@ def list_collections(self, user_id: str) -> List[CollectionData]:
"""
collections = []

# Query for all collections for this user
# Use begins_with to match USER#{user_id}#COLLECTION#
response = self.table.scan(
FilterExpression="begins_with(PK, :user_prefix) AND SK = :metadata",
ExpressionAttributeValues={
":user_prefix": f"USER#{user_id}#COLLECTION#",
":metadata": self._metadata_sk(),
},
# Query GSI for all collections for this user
response = self.table.query(
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Critical Bug: This query will return ALL items for the user, not just collection metadata!

The GSI only filters by user_id, so it will return:

  • Collection metadata (SK=METADATA) ✅
  • All BSO objects (SK=OBJECT#...) ❌
  • Any other user data ❌

Solution: Add a filter expression:

response = self.table.query(
    IndexName="UserCollectionsIndex",
    KeyConditionExpression="user_id = :user_id",
    FilterExpression="SK = :metadata",
    ExpressionAttributeValues={
        ":user_id": user_id,
        ":metadata": self._metadata_sk()
    },
)

Note: FilterExpression is applied after the query, so you still get the GSI performance benefit but filter out non-metadata items.

IndexName="UserCollectionsIndex",
KeyConditionExpression="user_id = :user_id",
ExpressionAttributeValues={":user_id": user_id},
)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Missing pagination handling

This query doesn't handle pagination. DynamoDB query operations can return a maximum of 1 MB of data per call. If a user has many collections, the response may be paginated and include a LastEvaluatedKey.

Suggested change
)
# Query GSI for all collections for this user
response = self.table.query(
IndexName="UserCollectionsIndex",
KeyConditionExpression="user_id = :user_id",
ExpressionAttributeValues={":user_id": user_id},
)
for item in response.get("Items", []):
collection = CollectionData.from_dict(item)
collections.append(collection)
# Handle pagination
while "LastEvaluatedKey" in response:
response = self.table.query(
IndexName="UserCollectionsIndex",
KeyConditionExpression="user_id = :user_id",
ExpressionAttributeValues={":user_id": user_id},
ExclusiveStartKey=response["LastEvaluatedKey"],
)
for item in response.get("Items", []):
collection = CollectionData.from_dict(item)
collections.append(collection)


for item in response.get("Items", []):
collection = CollectionData.from_dict(item)
collections.append(collection)

# Handle pagination if there are more results
while "LastEvaluatedKey" in response:
response = self.table.query(
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same FilterExpression needed here for pagination queries.

IndexName="UserCollectionsIndex",
KeyConditionExpression="user_id = :user_id",
ExpressionAttributeValues={":user_id": user_id},
ExclusiveStartKey=response["LastEvaluatedKey"],
)
for item in response.get("Items", []):
collection = CollectionData.from_dict(item)
collections.append(collection)

return collections

def get_collection_objects(
Expand Down
2 changes: 1 addition & 1 deletion lambda/src/services/token_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,6 @@ def generate_token(self, user_id: str, uid: int, generation: int) -> TokenRespon
key=credentials.hawk_key,
api_endpoint=api_endpoint,
uid=uid,
duration=self._hawk_service.TOKEN_DURATION_SECONDS,
duration=self._hawk_service.token_duration,
hashalg=self.HASH_ALGORITHM,
)
2 changes: 2 additions & 0 deletions lambda/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,9 @@ def setup_environment(
monkeypatch.setenv("OIDC_SECRET_ARN", oidc_secret_arn)
monkeypatch.setenv("BASE_DOMAIN", base_domain)
monkeypatch.setenv("CLOCK_SKEW_TOLERANCE", "300")
monkeypatch.setenv("HAWK_TIMESTAMP_SKEW_TOLERANCE", "60")
monkeypatch.setenv("RETRY_AFTER_SECONDS", "30")
monkeypatch.setenv("TOKEN_DURATION", "300")


class MockServiceProvider(ServiceProvider):
Expand Down
6 changes: 4 additions & 2 deletions lambda/tests/integration/test_e2e_flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,14 +401,15 @@ def test_info_collections_scoped_to_user(
info_event = build_storage_event(method="GET", path="/info/collections", user_id=user1_id)

modified_time = time.time()
# Mock scan returns only User1's collections
# Mock query with GSI returns only User1's collections
dynamodb_stubber.add_response(
"scan",
"query",
{
"Items": [
{
"PK": {"S": f"USER#{user1_id}#COLLECTION#bookmarks"},
"SK": {"S": "METADATA"},
"user_id": {"S": user1_id},
"name": {"S": "bookmarks"},
"modified": {"N": str(modified_time)},
"count": {"N": "5"},
Expand All @@ -417,6 +418,7 @@ def test_info_collections_scoped_to_user(
{
"PK": {"S": f"USER#{user1_id}#COLLECTION#tabs"},
"SK": {"S": "METADATA"},
"user_id": {"S": user1_id},
"name": {"S": "tabs"},
"modified": {"N": str(modified_time)},
"count": {"N": "3"},
Expand Down
19 changes: 0 additions & 19 deletions lambda/tests/integration/test_token_server_mozilla_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
import time
from unittest.mock import patch

import pytest
from botocore.stub import ANY

from src.entrypoint.token_api import lambda_handler as token_handler
Expand Down Expand Up @@ -934,24 +933,6 @@ def test_x_timestamp_on_401_response(
timestamp_int = int(timestamp)
assert timestamp_int > 0

@pytest.mark.skip(reason="503 error handling happens at infrastructure level before handler")
def test_retry_after_on_503_response(
self,
mock_service_provider,
secretsmanager_stubber,
sample_lambda_context,
):
"""
Test Retry-After header on 503 response.

Note: This test is skipped because 503 errors from Secrets Manager
occur during ServiceProvider initialization, before the request handler
can catch them. In production, API Gateway would handle these errors.

**Validates: Requirement 15.1**
"""
pass

def test_www_authenticate_on_401_response(
self,
mock_service_provider,
Expand Down
Loading
Loading