From 519f249f15213267ab14094cfb53e164e5f206fb Mon Sep 17 00:00:00 2001 From: vasques1 Date: Mon, 2 Feb 2026 14:21:46 -0500 Subject: [PATCH 1/9] return values from lambda instead of placeholder --- api/manifest/services.py | 156 ++++++++++++++++++++++++++------------- 1 file changed, 104 insertions(+), 52 deletions(-) diff --git a/api/manifest/services.py b/api/manifest/services.py index 19d5517..7b589d9 100644 --- a/api/manifest/services.py +++ b/api/manifest/services.py @@ -2,11 +2,13 @@ Services for the Manifest API """ +import json from fastapi import HTTPException, status, UploadFile import boto3 from botocore.exceptions import NoCredentialsError, ClientError from api.manifest.models import ManifestUploadResponse, ManifestValidationResponse from api.settings.services import get_setting_value +from core.config import get_settings from core.deps import SessionDep from core.logger import logger @@ -220,71 +222,121 @@ def upload_manifest_file( def validate_manifest_file( session: SessionDep, - s3_path: str, valid: bool = True + s3_path: str, ) -> ManifestValidationResponse: """ - Validate a manifest CSV file from S3. + Validate a manifest CSV file from S3 by invoking a Lambda function. Args: session: Database session s3_path: S3 path to the manifest CSV file to validate - valid: Mock parameter to simulate valid or invalid responses for testing Returns: ManifestValidationResponse with validation status and any errors found """ - # Placeholder for validation logic - # The actual validation logic would go here + # Get Lambda function name from settings, fall back to default + lambda_function_name = get_setting_value( + session, + "MANIFEST_VALIDATION_LAMBDA", + default="ngs360-manifest-validator" + ) + logger.info( + f"Invoking Lambda function: {lambda_function_name} " + f"for manifest validation of {s3_path}" + ) + + try: + # Get AWS region from settings + settings = get_settings() + region = settings.AWS_REGION + + # Create Lambda client + lambda_client = boto3.client("lambda", region_name=region) + + # Prepare payload for Lambda function + payload = { + "s3_path": s3_path + } + + # Invoke Lambda function synchronously + response = lambda_client.invoke( + FunctionName=lambda_function_name, + InvocationType="RequestResponse", + Payload=json.dumps(payload) + ) - # Call to lambda function for validation (placeholder) - lambda_function_name = get_setting_value(session, "MANIFEST_VALIDATION_LAMBDA") - logger.info(f"Invoking Lambda function: {lambda_function_name} for manifest validation") + # Read and parse Lambda response + response_payload = json.loads(response["Payload"].read().decode("utf-8")) + logger.debug(f"Lambda response: {response_payload}") + + # Check for Lambda execution errors + if "FunctionError" in response: + error_message = response_payload.get( + "errorMessage", + "Unknown Lambda execution error" + ) + logger.error(f"Lambda function error: {error_message}") + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"Lambda validation error: {error_message}" + ) - # Placeholder for invoking the lambda function - # response = invoke_lambda(lambda_function_name, payload={"s3_path": s3_path}) + # Parse the Lambda response body if it contains a nested body field + # (common pattern for API Gateway-style Lambda responses) + if "body" in response_payload: + if isinstance(response_payload["body"], str): + validation_result = json.loads(response_payload["body"]) + else: + validation_result = response_payload["body"] + else: + validation_result = response_payload - if not valid: - # Return mock validation errors for testing + # Build response from Lambda result return ManifestValidationResponse( - valid=False, - message={ - "ManifestVersion": "Validated against manifest version: DTS12.1", - "ExtraFields": ( - "See extra fields (info only): " - "['VHYB', 'VLANE', 'VBARCODE']" - ) - }, - error={ - "InvalidFilePath": [ - ( - "Unable to find file s3://example/example_1.clipped.fastq.gz " - "described in row 182, check that file exists and is accessible" - ), - ( - "Unable to find file s3://example/example_2.clipped.fastq.gz " - "described in row 183, check that file exists and is accessible" - ) - ], - "MissingRequiredField": [ - "Row 45 is missing required field 'SAMPLE_ID'", - "Row 67 is missing required field 'FILE_PATH'" - ], - "InvalidDataFormat": [ - "Row 92: Invalid date format in field 'RUN_DATE', expected YYYY-MM-DD" - ] - }, - warning={ - "DuplicateSample": [ - "Sample 'ABC-123' appears multiple times in rows 10, 25, 42" - ] - } + valid=validation_result.get("valid", False), + message=validation_result.get("message", {}), + error=validation_result.get("error", {}), + warning=validation_result.get("warning", {}) ) - return ManifestValidationResponse( - valid=True, - message={ - "ManifestVersion": "Validated against manifest version: DTS12.1" - }, - error={}, - warning={} - ) + except NoCredentialsError as exc: + logger.error("AWS credentials not found for Lambda invocation") + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="AWS credentials not found. Please configure AWS credentials.", + ) from exc + except ClientError as exc: + error_code = exc.response["Error"]["Code"] + error_message = exc.response["Error"]["Message"] + logger.error(f"Lambda ClientError: {error_code} - {error_message}") + + if error_code == "ResourceNotFoundException": + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Lambda function not found: {lambda_function_name}", + ) from exc + elif error_code == "AccessDeniedException": + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail=f"Access denied to Lambda function: {lambda_function_name}", + ) from exc + else: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"Lambda error: {error_message}", + ) from exc + except json.JSONDecodeError as exc: + logger.error(f"Failed to parse Lambda response: {exc}") + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Failed to parse validation response from Lambda", + ) from exc + except HTTPException: + # Re-raise HTTPException without modification + raise + except Exception as exc: + logger.error(f"Unexpected error invoking Lambda: {exc}") + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"Unexpected error during manifest validation: {str(exc)}", + ) from exc From 6f66ba1a029e57a3f7a1d72e62c756c067666b5d Mon Sep 17 00:00:00 2001 From: vasques1 Date: Mon, 2 Feb 2026 14:23:30 -0500 Subject: [PATCH 2/9] parse validator lambda response --- api/manifest/services.py | 53 ++++++++++++++++++++++++++++++++++------ 1 file changed, 46 insertions(+), 7 deletions(-) diff --git a/api/manifest/services.py b/api/manifest/services.py index 7b589d9..424f910 100644 --- a/api/manifest/services.py +++ b/api/manifest/services.py @@ -253,9 +253,15 @@ def validate_manifest_file( # Create Lambda client lambda_client = boto3.client("lambda", region_name=region) + # Parse the S3 path to extract the bucket for files_bucket parameter + bucket, _ = _parse_s3_path(s3_path) + # Prepare payload for Lambda function + # Lambda expects: manifest_path, files_bucket, manifest_version (optional), + # files_prefix (optional), available_pipelines (optional) payload = { - "s3_path": s3_path + "manifest_path": s3_path, + "files_bucket": bucket, } # Invoke Lambda function synchronously @@ -269,7 +275,7 @@ def validate_manifest_file( response_payload = json.loads(response["Payload"].read().decode("utf-8")) logger.debug(f"Lambda response: {response_payload}") - # Check for Lambda execution errors + # Check for Lambda execution errors (unhandled exceptions) if "FunctionError" in response: error_message = response_payload.get( "errorMessage", @@ -282,21 +288,54 @@ def validate_manifest_file( ) # Parse the Lambda response body if it contains a nested body field - # (common pattern for API Gateway-style Lambda responses) + # (API Gateway-style Lambda responses have body as JSON string) if "body" in response_payload: if isinstance(response_payload["body"], str): validation_result = json.loads(response_payload["body"]) else: validation_result = response_payload["body"] else: + # Direct invocation returns raw body with statusCode embedded validation_result = response_payload + # Check for Lambda-level errors (validation errors, missing params, etc.) + if not validation_result.get("success", True): + error_msg = validation_result.get("error", "Unknown validation error") + error_type = validation_result.get("error_type", "ValidationError") + lambda_status = validation_result.get("statusCode", 400) + + logger.error(f"Lambda returned error: {error_type} - {error_msg}") + + # Map Lambda status codes to HTTP exceptions + if lambda_status == 400: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Validation request error: {error_msg}" + ) + elif lambda_status == 404: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Manifest file not found: {error_msg}" + ) + elif lambda_status == 503: + raise HTTPException( + status_code=status.HTTP_503_SERVICE_UNAVAILABLE, + detail=f"Service unavailable: {error_msg}" + ) + else: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"Validation error: {error_msg}" + ) + # Build response from Lambda result + # Lambda returns: validation_passed, messages, errors, warnings + # API returns: valid, message, error, warning return ManifestValidationResponse( - valid=validation_result.get("valid", False), - message=validation_result.get("message", {}), - error=validation_result.get("error", {}), - warning=validation_result.get("warning", {}) + valid=validation_result.get("validation_passed", False), + message=validation_result.get("messages", {}), + error=validation_result.get("errors", {}), + warning=validation_result.get("warnings", {}) ) except NoCredentialsError as exc: From d1c600df14021a60d64b036d3e4f705eaf3459d1 Mon Sep 17 00:00:00 2001 From: vasques1 Date: Mon, 2 Feb 2026 14:25:11 -0500 Subject: [PATCH 3/9] update endpoint code to remove placeholder --- api/manifest/routes.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/api/manifest/routes.py b/api/manifest/routes.py index 55e35be..5297d6c 100644 --- a/api/manifest/routes.py +++ b/api/manifest/routes.py @@ -80,25 +80,21 @@ def validate_manifest( s3_path: str = Query( ..., description="S3 path to the manifest CSV file to validate" ), - valid: bool = Query( - True, - description="Mock validation result for testing (True=valid, False=invalid)" - ), ) -> ManifestValidationResponse: """ - Validate a manifest CSV file from S3. + Validate a manifest CSV file from S3 using the ngs360-manifest-validator Lambda. - Checks the manifest file for: + The Lambda function checks the manifest file for: - Required fields - Data format compliance - Value constraints + - File existence verification Args: s3_path: S3 path to the manifest CSV file to validate - valid: Mock parameter to simulate valid or invalid responses for testing Returns: ManifestValidationResponse with validation status and any errors found """ - result = services.validate_manifest_file(session, s3_path, valid) + result = services.validate_manifest_file(session, s3_path) return result From 14e09da73fa23d4db18f68f7e274b41a8d0e0a07 Mon Sep 17 00:00:00 2001 From: vasques1 Date: Mon, 2 Feb 2026 14:25:35 -0500 Subject: [PATCH 4/9] fix fn call to get default setting --- api/manifest/services.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/api/manifest/services.py b/api/manifest/services.py index 424f910..7b4d915 100644 --- a/api/manifest/services.py +++ b/api/manifest/services.py @@ -235,10 +235,9 @@ def validate_manifest_file( ManifestValidationResponse with validation status and any errors found """ # Get Lambda function name from settings, fall back to default - lambda_function_name = get_setting_value( - session, - "MANIFEST_VALIDATION_LAMBDA", - default="ngs360-manifest-validator" + lambda_function_name = ( + get_setting_value(session, "MANIFEST_VALIDATION_LAMBDA") + or "ngs360-manifest-validator" ) logger.info( f"Invoking Lambda function: {lambda_function_name} " From e538a36d9c0d0fb559651f08efceaf6d5d2cdd25 Mon Sep 17 00:00:00 2001 From: vasques1 Date: Mon, 2 Feb 2026 14:28:59 -0500 Subject: [PATCH 5/9] add mock lambda client fixture for testing --- tests/conftest.py | 91 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) diff --git a/tests/conftest.py b/tests/conftest.py index 1c7928b..ce3da62 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -350,6 +350,84 @@ def put_object(self, Bucket: str, Key: str, Body: bytes, **kwargs): return {"ETag": '"mock-etag"', "VersionId": "mock-version-id"} +class MockLambdaPayload: + """Mock Lambda response payload""" + + def __init__(self, content: bytes): + self.content = content + + def read(self) -> bytes: + return self.content + + +class MockLambdaClient: + """Mock Lambda client for testing""" + + def __init__(self): + self.response_data = {} # The response to return + self.error_mode = None # For simulating errors + self.invocations = [] # Track invocations + + def set_response(self, response: dict): + """Set the response that will be returned by invoke()""" + self.response_data = response + + def simulate_error(self, error_type: str): + """ + Configure client to raise specific errors + + Args: + error_type: One of "ResourceNotFoundException", "AccessDeniedException", + "NoCredentialsError" + """ + self.error_mode = error_type + + def invoke(self, FunctionName: str, InvocationType: str, Payload: str): + """Mock Lambda invoke operation""" + import json + from botocore.exceptions import NoCredentialsError, ClientError + + # Track the invocation + self.invocations.append({ + "FunctionName": FunctionName, + "InvocationType": InvocationType, + "Payload": json.loads(Payload) + }) + + # Check for simulated errors + if self.error_mode == "NoCredentialsError": + raise NoCredentialsError() + elif self.error_mode == "ResourceNotFoundException": + error_response = { + "Error": { + "Code": "ResourceNotFoundException", + "Message": f"Function not found: {FunctionName}", + } + } + raise ClientError(error_response, "Invoke") + elif self.error_mode == "AccessDeniedException": + error_response = { + "Error": { + "Code": "AccessDeniedException", + "Message": "Access Denied" + } + } + raise ClientError(error_response, "Invoke") + + # Return the configured response + response_json = json.dumps(self.response_data).encode("utf-8") + return { + "StatusCode": 200, + "Payload": MockLambdaPayload(response_json) + } + + +@pytest.fixture(name="mock_lambda_client") +def mock_lambda_client_fixture(): + """Provide a mock Lambda client for testing""" + return MockLambdaClient() + + @pytest.fixture(name="test_project") def test_project_fixture(session): """Provide a test project instance""" @@ -459,8 +537,11 @@ def client_fixture( session: Session, mock_opensearch_client: MockOpenSearchClient, mock_s3_client: MockS3Client, + mock_lambda_client: MockLambdaClient, + monkeypatch, ): """Provide a TestClient with dependencies overridden for testing""" + import boto3 def get_db_override(): return session @@ -471,6 +552,16 @@ def get_opensearch_client_override(): def get_s3_client_override(): return mock_s3_client + # Patch boto3.client to return mock Lambda client for "lambda" service + original_boto3_client = boto3.client + + def mock_boto3_client(service_name, **kwargs): + if service_name == "lambda": + return mock_lambda_client + return original_boto3_client(service_name, **kwargs) + + monkeypatch.setattr(boto3, "client", mock_boto3_client) + app.dependency_overrides[get_db] = get_db_override app.dependency_overrides[get_opensearch_client] = get_opensearch_client_override app.dependency_overrides[get_s3_client] = get_s3_client_override From 39748dcf7d2de9073020ed67a4eaf71cbe36ee9c Mon Sep 17 00:00:00 2001 From: vasques1 Date: Mon, 2 Feb 2026 14:29:27 -0500 Subject: [PATCH 6/9] update tests to use lambda fixture instead of mock valid param --- tests/api/test_manifest.py | 157 +++++++++++++++++++++++++++++++------ 1 file changed, 134 insertions(+), 23 deletions(-) diff --git a/tests/api/test_manifest.py b/tests/api/test_manifest.py index 69c4fba..6c3303c 100644 --- a/tests/api/test_manifest.py +++ b/tests/api/test_manifest.py @@ -637,10 +637,23 @@ def test_upload_manifest_missing_file( class TestManifestValidation: """Test manifest validation endpoint""" - def test_validate_manifest_valid(self, client: TestClient): - """Test validation endpoint with valid manifest (mock)""" + def test_validate_manifest_valid( + self, client: TestClient, mock_lambda_client + ): + """Test validation endpoint with valid manifest via Lambda""" + # Configure mock Lambda response for valid manifest + mock_lambda_client.set_response({ + "success": True, + "validation_passed": True, + "messages": {"ManifestVersion": "Validated against manifest version: DTS12.1"}, + "errors": {}, + "warnings": {}, + "manifest_path": "s3://test-bucket/manifest.csv", + "statusCode": 200 + }) + response = client.post( - "/api/v1/manifest/validate?s3_path=s3://test-bucket/manifest.csv&valid=true" + "/api/v1/manifest/validate?s3_path=s3://test-bucket/manifest.csv" ) # Verify successful response @@ -666,10 +679,44 @@ def test_validate_manifest_valid(self, client: TestClient): # Should have manifest version message assert "ManifestVersion" in data["message"] - def test_validate_manifest_invalid(self, client: TestClient): - """Test validation endpoint with invalid manifest (mock)""" + def test_validate_manifest_invalid( + self, client: TestClient, mock_lambda_client + ): + """Test validation endpoint with invalid manifest via Lambda""" + # Configure mock Lambda response for invalid manifest + mock_lambda_client.set_response({ + "success": True, + "validation_passed": False, + "messages": { + "ManifestVersion": "Validated against manifest version: DTS12.1", + "ExtraFields": "See extra fields (info only): ['VHYB', 'VLANE', 'VBARCODE']" + }, + "errors": { + "InvalidFilePath": [ + "Unable to find file s3://example/example_1.clipped.fastq.gz " + "described in row 182, check that file exists and is accessible", + "Unable to find file s3://example/example_2.clipped.fastq.gz " + "described in row 183, check that file exists and is accessible" + ], + "MissingRequiredField": [ + "Row 45 is missing required field 'SAMPLE_ID'", + "Row 67 is missing required field 'FILE_PATH'" + ], + "InvalidDataFormat": [ + "Row 92: Invalid date format in field 'RUN_DATE', expected YYYY-MM-DD" + ] + }, + "warnings": { + "DuplicateSample": [ + "Sample 'ABC-123' appears multiple times in rows 10, 25, 42" + ] + }, + "manifest_path": "s3://test-bucket/manifest.csv", + "statusCode": 422 + }) + response = client.post( - "/api/v1/manifest/validate?s3_path=s3://test-bucket/manifest.csv&valid=false" + "/api/v1/manifest/validate?s3_path=s3://test-bucket/manifest.csv" ) # Verify successful response @@ -710,20 +757,6 @@ def test_validate_manifest_invalid(self, client: TestClient): assert "ManifestVersion" in data["message"] assert "ExtraFields" in data["message"] - def test_validate_manifest_default_valid(self, client: TestClient): - """Test validation endpoint defaults to valid=true""" - response = client.post( - "/api/v1/manifest/validate?s3_path=s3://test-bucket/manifest.csv" - ) - - # Verify successful response - assert response.status_code == 200 - - data = response.json() - - # Default should be valid - assert data["valid"] is True - def test_validate_manifest_missing_s3_path(self, client: TestClient): """Test validation endpoint fails without s3_path""" response = client.post("/api/v1/manifest/validate") @@ -731,17 +764,35 @@ def test_validate_manifest_missing_s3_path(self, client: TestClient): # Verify 422 error (missing required parameter) assert response.status_code == 422 - def test_validate_manifest_response_structure(self, client: TestClient): + def test_validate_manifest_response_structure( + self, client: TestClient, mock_lambda_client + ): """Test that both valid and invalid responses match expected structure""" # Test valid response + mock_lambda_client.set_response({ + "success": True, + "validation_passed": True, + "messages": {"ManifestVersion": "DTS12.1"}, + "errors": {}, + "warnings": {}, + "statusCode": 200 + }) valid_response = client.post( - "/api/v1/manifest/validate?s3_path=s3://test-bucket/manifest.csv&valid=true" + "/api/v1/manifest/validate?s3_path=s3://test-bucket/manifest.csv" ) valid_data = valid_response.json() # Test invalid response + mock_lambda_client.set_response({ + "success": True, + "validation_passed": False, + "messages": {"ManifestVersion": "DTS12.1"}, + "errors": {"SomeError": ["Error message"]}, + "warnings": {"SomeWarning": ["Warning message"]}, + "statusCode": 422 + }) invalid_response = client.post( - "/api/v1/manifest/validate?s3_path=s3://test-bucket/manifest.csv&valid=false" + "/api/v1/manifest/validate?s3_path=s3://test-bucket/manifest.csv" ) invalid_data = invalid_response.json() @@ -755,3 +806,63 @@ def test_validate_manifest_response_structure(self, client: TestClient): assert isinstance(data["error"], dict) assert isinstance(data["warning"], dict) assert isinstance(data["valid"], bool) + + def test_validate_manifest_lambda_error( + self, client: TestClient, mock_lambda_client + ): + """Test validation endpoint handles Lambda errors""" + # Configure mock Lambda response for validation request error + mock_lambda_client.set_response({ + "success": False, + "error": "manifest_path is required", + "error_type": "ValidationError", + "statusCode": 400 + }) + + response = client.post( + "/api/v1/manifest/validate?s3_path=s3://test-bucket/manifest.csv" + ) + + # Verify error response + assert response.status_code == 400 + assert "manifest_path" in response.json()["detail"] + + def test_validate_manifest_lambda_file_not_found( + self, client: TestClient, mock_lambda_client + ): + """Test validation endpoint handles file not found errors""" + # Configure mock Lambda response for file not found + mock_lambda_client.set_response({ + "success": False, + "error": "Manifest file not found at s3://test-bucket/manifest.csv", + "error_type": "FileNotFoundError", + "statusCode": 404 + }) + + response = client.post( + "/api/v1/manifest/validate?s3_path=s3://test-bucket/manifest.csv" + ) + + # Verify error response + assert response.status_code == 404 + assert "not found" in response.json()["detail"].lower() + + def test_validate_manifest_lambda_service_unavailable( + self, client: TestClient, mock_lambda_client + ): + """Test validation endpoint handles service unavailable errors""" + # Configure mock Lambda response for service unavailable + mock_lambda_client.set_response({ + "success": False, + "error": "Failed to connect to NGS360", + "error_type": "ServiceUnavailable", + "statusCode": 503 + }) + + response = client.post( + "/api/v1/manifest/validate?s3_path=s3://test-bucket/manifest.csv" + ) + + # Verify error response + assert response.status_code == 503 + assert "unavailable" in response.json()["detail"].lower() From 13dc332bd0cf89ae432794dee8bc4af7405e39d2 Mon Sep 17 00:00:00 2001 From: vasques1 Date: Mon, 2 Feb 2026 16:36:30 -0500 Subject: [PATCH 7/9] pylint fixes for manifest services, tests --- api/manifest/services.py | 17 ++++++------ tests/api/test_manifest.py | 56 +++++++------------------------------- 2 files changed, 19 insertions(+), 54 deletions(-) diff --git a/api/manifest/services.py b/api/manifest/services.py index 7b4d915..7f7a2e6 100644 --- a/api/manifest/services.py +++ b/api/manifest/services.py @@ -240,8 +240,9 @@ def validate_manifest_file( or "ngs360-manifest-validator" ) logger.info( - f"Invoking Lambda function: {lambda_function_name} " - f"for manifest validation of {s3_path}" + "Invoking Lambda function: %s for manifest validation of %s", + lambda_function_name, + s3_path ) try: @@ -272,7 +273,7 @@ def validate_manifest_file( # Read and parse Lambda response response_payload = json.loads(response["Payload"].read().decode("utf-8")) - logger.debug(f"Lambda response: {response_payload}") + logger.debug("Lambda response: %s", response_payload) # Check for Lambda execution errors (unhandled exceptions) if "FunctionError" in response: @@ -280,7 +281,7 @@ def validate_manifest_file( "errorMessage", "Unknown Lambda execution error" ) - logger.error(f"Lambda function error: {error_message}") + logger.error("Lambda function error: %s", error_message) raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=f"Lambda validation error: {error_message}" @@ -303,7 +304,7 @@ def validate_manifest_file( error_type = validation_result.get("error_type", "ValidationError") lambda_status = validation_result.get("statusCode", 400) - logger.error(f"Lambda returned error: {error_type} - {error_msg}") + logger.error("Lambda returned error: %s - %s", error_type, error_msg) # Map Lambda status codes to HTTP exceptions if lambda_status == 400: @@ -346,7 +347,7 @@ def validate_manifest_file( except ClientError as exc: error_code = exc.response["Error"]["Code"] error_message = exc.response["Error"]["Message"] - logger.error(f"Lambda ClientError: {error_code} - {error_message}") + logger.error("Lambda ClientError: %s - %s", error_code, error_message) if error_code == "ResourceNotFoundException": raise HTTPException( @@ -364,7 +365,7 @@ def validate_manifest_file( detail=f"Lambda error: {error_message}", ) from exc except json.JSONDecodeError as exc: - logger.error(f"Failed to parse Lambda response: {exc}") + logger.error("Failed to parse Lambda response: %s", exc) raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Failed to parse validation response from Lambda", @@ -373,7 +374,7 @@ def validate_manifest_file( # Re-raise HTTPException without modification raise except Exception as exc: - logger.error(f"Unexpected error invoking Lambda: {exc}") + logger.error("Unexpected error invoking Lambda: %s", exc) raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=f"Unexpected error during manifest validation: {str(exc)}", diff --git a/tests/api/test_manifest.py b/tests/api/test_manifest.py index 6c3303c..f17d37c 100644 --- a/tests/api/test_manifest.py +++ b/tests/api/test_manifest.py @@ -3,9 +3,15 @@ """ from datetime import datetime +import io +import pytest +from fastapi import HTTPException from fastapi.testclient import TestClient +from api.manifest.services import _parse_s3_path +from api.manifest.services import get_latest_manifest_file + from tests.conftest import MockS3Client @@ -14,7 +20,6 @@ class TestManifestServices: def test_parse_s3_path(self): """Test S3 path parsing in manifest service""" - from api.manifest.services import _parse_s3_path # Valid paths assert _parse_s3_path("s3://my-bucket") == ("my-bucket", "") @@ -42,8 +47,6 @@ def test_parse_s3_path(self): def test_get_latest_manifest_file_single(self, mock_s3_client: MockS3Client): """Test finding latest manifest when only one exists""" - from api.manifest.services import get_latest_manifest_file - # Setup mock S3 with one manifest file files = [ { @@ -60,8 +63,6 @@ def test_get_latest_manifest_file_single(self, mock_s3_client: MockS3Client): def test_get_latest_manifest_file_multiple(self, mock_s3_client: MockS3Client): """Test finding latest manifest when multiple exist""" - from api.manifest.services import get_latest_manifest_file - # Setup mock S3 with multiple manifest files - most recent should be selected files = [ { @@ -88,8 +89,6 @@ def test_get_latest_manifest_file_multiple(self, mock_s3_client: MockS3Client): def test_get_latest_manifest_case_insensitive(self, mock_s3_client: MockS3Client): """Test case-insensitive matching for 'Manifest'""" - from api.manifest.services import get_latest_manifest_file - # Test various cases: MANIFEST, manifest, Manifest, etc. files = [ { @@ -117,8 +116,6 @@ def test_get_latest_manifest_case_insensitive(self, mock_s3_client: MockS3Client def test_get_latest_manifest_csv_only(self, mock_s3_client: MockS3Client): """Test that only .csv files are matched""" - from api.manifest.services import get_latest_manifest_file - # Include non-CSV files with manifest in name files = [ { @@ -151,7 +148,6 @@ def test_get_latest_manifest_csv_only(self, mock_s3_client: MockS3Client): def test_get_latest_manifest_substring_match(self, mock_s3_client: MockS3Client): """Test substring matching for 'manifest' in filename""" - from api.manifest.services import get_latest_manifest_file # Files with manifest as substring files = [ @@ -180,8 +176,6 @@ def test_get_latest_manifest_substring_match(self, mock_s3_client: MockS3Client) def test_get_latest_manifest_recursive(self, mock_s3_client: MockS3Client): """Test recursive search through subdirectories""" - from api.manifest.services import get_latest_manifest_file - # Files in different subdirectories files = [ { @@ -214,8 +208,6 @@ def test_get_latest_manifest_recursive(self, mock_s3_client: MockS3Client): def test_get_latest_manifest_no_match(self, mock_s3_client: MockS3Client): """Test returning None when no manifest files found""" - from api.manifest.services import get_latest_manifest_file - # Files without "manifest" in name files = [ { @@ -237,8 +229,6 @@ def test_get_latest_manifest_no_match(self, mock_s3_client: MockS3Client): def test_get_latest_manifest_empty_bucket(self, mock_s3_client: MockS3Client): """Test returning None when bucket/prefix is empty""" - from api.manifest.services import get_latest_manifest_file - # Empty bucket mock_s3_client.setup_bucket("test-bucket", "vendor/", [], []) @@ -248,11 +238,6 @@ def test_get_latest_manifest_empty_bucket(self, mock_s3_client: MockS3Client): def test_get_latest_manifest_invalid_path(self, mock_s3_client: MockS3Client): """Test error handling for invalid S3 path""" - from api.manifest.services import get_latest_manifest_file - from fastapi import HTTPException - import pytest - - # Invalid S3 paths invalid_paths = ["http://bucket/path", "s3://", "s3:///bucket"] for path in invalid_paths: @@ -262,10 +247,6 @@ def test_get_latest_manifest_invalid_path(self, mock_s3_client: MockS3Client): def test_get_latest_manifest_no_credentials(self, mock_s3_client: MockS3Client): """Test error handling when AWS credentials are missing""" - from api.manifest.services import get_latest_manifest_file - from fastapi import HTTPException - import pytest - mock_s3_client.simulate_error("NoCredentialsError") with pytest.raises(HTTPException) as exc_info: @@ -274,10 +255,6 @@ def test_get_latest_manifest_no_credentials(self, mock_s3_client: MockS3Client): def test_get_latest_manifest_bucket_not_found(self, mock_s3_client: MockS3Client): """Test error handling when bucket doesn't exist""" - from api.manifest.services import get_latest_manifest_file - from fastapi import HTTPException - import pytest - mock_s3_client.simulate_error("NoSuchBucket") with pytest.raises(HTTPException) as exc_info: @@ -286,10 +263,6 @@ def test_get_latest_manifest_bucket_not_found(self, mock_s3_client: MockS3Client def test_get_latest_manifest_access_denied(self, mock_s3_client: MockS3Client): """Test error handling when access is denied""" - from api.manifest.services import get_latest_manifest_file - from fastapi import HTTPException - import pytest - mock_s3_client.simulate_error("AccessDenied") with pytest.raises(HTTPException) as exc_info: @@ -352,7 +325,7 @@ def test_get_latest_manifest_no_content( assert response.text == "" def test_get_latest_manifest_invalid_path( - self, client: TestClient, mock_s3_client: MockS3Client + self, client: TestClient ): """Test 400 error for invalid S3 path""" # Make API call with invalid path @@ -419,7 +392,6 @@ def test_upload_manifest_to_directory( self, client: TestClient, mock_s3_client: MockS3Client ): """Test uploading a manifest to a directory path (ending with /)""" - import io # Create a test CSV file csv_content = b"Sample_ID,Sample_Name,Project\nS001,Sample1,ProjectA\nS002,Sample2,ProjectB" @@ -450,7 +422,6 @@ def test_upload_manifest_to_file_path( self, client: TestClient, mock_s3_client: MockS3Client ): """Test uploading a manifest with a specific file path""" - import io # Create a test CSV file csv_content = b"Sample_ID,Sample_Name\nS001,Sample1" @@ -476,7 +447,6 @@ def test_upload_manifest_to_directory_without_trailing_slash( self, client: TestClient, mock_s3_client: MockS3Client ): """Test uploading to a directory path without trailing slash""" - import io # Create a test CSV file csv_content = b"Sample_ID\nS001" @@ -501,7 +471,6 @@ def test_upload_manifest_to_root_bucket( self, client: TestClient, mock_s3_client: MockS3Client ): """Test uploading a manifest to the root of a bucket""" - import io # Create a test CSV file csv_content = b"Sample_ID\nS001" @@ -521,10 +490,9 @@ def test_upload_manifest_to_root_bucket( assert "root_manifest.csv" in mock_s3_client.uploaded_files["test-bucket"] def test_upload_manifest_non_csv_file( - self, client: TestClient, mock_s3_client: MockS3Client + self, client: TestClient ): """Test that non-CSV files are rejected""" - import io # Create a test text file file = io.BytesIO(b"This is not a CSV file") @@ -544,7 +512,6 @@ def test_upload_manifest_bucket_not_found( self, client: TestClient, mock_s3_client: MockS3Client ): """Test upload fails when bucket doesn't exist""" - import io mock_s3_client.simulate_error("NoSuchBucket") @@ -565,7 +532,6 @@ def test_upload_manifest_access_denied( self, client: TestClient, mock_s3_client: MockS3Client ): """Test upload fails when access is denied""" - import io mock_s3_client.simulate_error("AccessDenied") @@ -586,7 +552,6 @@ def test_upload_manifest_no_credentials( self, client: TestClient, mock_s3_client: MockS3Client ): """Test upload fails when AWS credentials are missing""" - import io mock_s3_client.simulate_error("NoCredentialsError") @@ -604,10 +569,9 @@ def test_upload_manifest_no_credentials( assert "credentials" in response.json()["detail"].lower() def test_upload_manifest_invalid_s3_path( - self, client: TestClient, mock_s3_client: MockS3Client + self, client: TestClient ): """Test upload fails with invalid S3 path format""" - import io csv_content = b"Sample_ID\nS001" file = io.BytesIO(csv_content) @@ -623,7 +587,7 @@ def test_upload_manifest_invalid_s3_path( assert "Invalid S3 path format" in response.json()["detail"] def test_upload_manifest_missing_file( - self, client: TestClient, mock_s3_client: MockS3Client + self, client: TestClient ): """Test that request without file fails""" response = client.post( From faac439a5b65f1ee1c3aa0841f2adace41dc6be1 Mon Sep 17 00:00:00 2001 From: vasques1 Date: Mon, 2 Feb 2026 22:06:31 -0500 Subject: [PATCH 8/9] expose all lambda params as querystrings, not just s3path --- api/manifest/routes.py | 26 +++++++++++++++++++++++++- api/manifest/services.py | 29 ++++++++++++++++++++++------- 2 files changed, 47 insertions(+), 8 deletions(-) diff --git a/api/manifest/routes.py b/api/manifest/routes.py index 5297d6c..eea1aeb 100644 --- a/api/manifest/routes.py +++ b/api/manifest/routes.py @@ -2,6 +2,8 @@ Routes/endpoints for the Manifest API """ +from typing import Optional + from fastapi import APIRouter, Depends, Query, Response, status, UploadFile, File from api.manifest import services from api.manifest.models import ManifestUploadResponse, ManifestValidationResponse @@ -80,6 +82,15 @@ def validate_manifest( s3_path: str = Query( ..., description="S3 path to the manifest CSV file to validate" ), + manifest_version: Optional[str] = Query( + None, description="Manifest version to validate against (e.g., 'DTS12.1')" + ), + files_bucket: Optional[str] = Query( + None, description="S3 bucket where manifest files are located" + ), + files_prefix: Optional[str] = Query( + None, description="S3 prefix/path where manifest files are located" + ), ) -> ManifestValidationResponse: """ Validate a manifest CSV file from S3 using the ngs360-manifest-validator Lambda. @@ -92,9 +103,22 @@ def validate_manifest( Args: s3_path: S3 path to the manifest CSV file to validate + manifest_version: Optional manifest version to validate against + files_bucket: Optional S3 bucket where manifest files are located + files_prefix: Optional S3 prefix/path for file existence checks Returns: ManifestValidationResponse with validation status and any errors found """ - result = services.validate_manifest_file(session, s3_path) + # Uppercase manifest_version if provided + if manifest_version: + manifest_version = manifest_version.upper() + + result = services.validate_manifest_file( + session=session, + manifest_path=s3_path, + manifest_version=manifest_version, + files_bucket=files_bucket, + files_prefix=files_prefix, + ) return result diff --git a/api/manifest/services.py b/api/manifest/services.py index 7f7a2e6..fbcfb72 100644 --- a/api/manifest/services.py +++ b/api/manifest/services.py @@ -3,6 +3,7 @@ """ import json +from typing import Optional from fastapi import HTTPException, status, UploadFile import boto3 from botocore.exceptions import NoCredentialsError, ClientError @@ -222,14 +223,20 @@ def upload_manifest_file( def validate_manifest_file( session: SessionDep, - s3_path: str, + manifest_path: str, + manifest_version: Optional[str] = None, + files_bucket: Optional[str] = None, + files_prefix: Optional[str] = None, ) -> ManifestValidationResponse: """ Validate a manifest CSV file from S3 by invoking a Lambda function. Args: session: Database session - s3_path: S3 path to the manifest CSV file to validate + manifest_path: S3 path to the manifest CSV file to validate + manifest_version: Optional manifest version to validate against + files_bucket: Optional S3 bucket where manifest files are located + files_prefix: Optional S3 prefix/path for file existence checks Returns: ManifestValidationResponse with validation status and any errors found @@ -242,7 +249,7 @@ def validate_manifest_file( logger.info( "Invoking Lambda function: %s for manifest validation of %s", lambda_function_name, - s3_path + manifest_path ) try: @@ -253,17 +260,25 @@ def validate_manifest_file( # Create Lambda client lambda_client = boto3.client("lambda", region_name=region) - # Parse the S3 path to extract the bucket for files_bucket parameter - bucket, _ = _parse_s3_path(s3_path) + # Parse the S3 path to extract the bucket for files_bucket parameter if not provided + if files_bucket is None: + bucket, _ = _parse_s3_path(manifest_path) + files_bucket = bucket # Prepare payload for Lambda function # Lambda expects: manifest_path, files_bucket, manifest_version (optional), # files_prefix (optional), available_pipelines (optional) payload = { - "manifest_path": s3_path, - "files_bucket": bucket, + "manifest_path": manifest_path, + "files_bucket": files_bucket, } + # Add optional parameters if provided + if manifest_version: + payload["manifest_version"] = manifest_version + if files_prefix: + payload["files_prefix"] = files_prefix + # Invoke Lambda function synchronously response = lambda_client.invoke( FunctionName=lambda_function_name, From 202e505063daf6ef6f5300357049eeaef893a194 Mon Sep 17 00:00:00 2001 From: vasques1 Date: Mon, 2 Feb 2026 22:07:14 -0500 Subject: [PATCH 9/9] add tests for new params --- tests/api/test_manifest.py | 78 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) diff --git a/tests/api/test_manifest.py b/tests/api/test_manifest.py index f17d37c..10059b3 100644 --- a/tests/api/test_manifest.py +++ b/tests/api/test_manifest.py @@ -830,3 +830,81 @@ def test_validate_manifest_lambda_service_unavailable( # Verify error response assert response.status_code == 503 assert "unavailable" in response.json()["detail"].lower() + + def test_validate_manifest_with_manifest_version( + self, client: TestClient, mock_lambda_client + ): + """Test validation endpoint with manifest_version parameter""" + mock_lambda_client.set_response({ + "success": True, + "validation_passed": True, + "messages": {"ManifestVersion": "DTS12.1"}, + "errors": {}, + "warnings": {}, + "statusCode": 200 + }) + + response = client.post( + "/api/v1/manifest/validate" + "?s3_path=s3://test-bucket/manifest.csv" + "&manifest_version=dts12.1" + ) + + assert response.status_code == 200 + data = response.json() + assert data["valid"] is True + + # Verify manifest_version was passed (uppercased) to Lambda + last_payload = mock_lambda_client.invocations[-1]["Payload"] + assert last_payload["manifest_version"] == "DTS12.1" + + def test_validate_manifest_with_files_bucket_and_prefix( + self, client: TestClient, mock_lambda_client + ): + """Test validation endpoint with files_bucket and files_prefix parameters""" + mock_lambda_client.set_response({ + "success": True, + "validation_passed": True, + "messages": {}, + "errors": {}, + "warnings": {}, + "statusCode": 200 + }) + + response = client.post( + "/api/v1/manifest/validate" + "?s3_path=s3://test-bucket/manifest.csv" + "&files_bucket=data-bucket" + "&files_prefix=raw/fastq/" + ) + + assert response.status_code == 200 + + # Verify files_bucket and files_prefix were passed to Lambda + last_payload = mock_lambda_client.invocations[-1]["Payload"] + assert last_payload["files_bucket"] == "data-bucket" + assert last_payload["files_prefix"] == "raw/fastq/" + + def test_validate_manifest_files_bucket_defaults_to_s3_path_bucket( + self, client: TestClient, mock_lambda_client + ): + """Test that files_bucket defaults to bucket from s3_path when not provided""" + mock_lambda_client.set_response({ + "success": True, + "validation_passed": True, + "messages": {}, + "errors": {}, + "warnings": {}, + "statusCode": 200 + }) + + response = client.post( + "/api/v1/manifest/validate?s3_path=s3://my-bucket/path/manifest.csv" + ) + + assert response.status_code == 200 + + # Verify files_bucket defaulted to bucket from s3_path + last_payload = mock_lambda_client.invocations[-1]["Payload"] + assert last_payload["manifest_path"] == "s3://my-bucket/path/manifest.csv" + assert last_payload["files_bucket"] == "my-bucket"