From a61deae9583d9554e18fecab54519ea0de2a55c7 Mon Sep 17 00:00:00 2001 From: Harold Sun Date: Wed, 7 Jan 2026 22:48:11 +0000 Subject: [PATCH 1/2] feat: support Lambda Managed Instance --- Cargo.lock | 16 +-- Cargo.toml | 3 +- README.md | 25 +++- .../fastapi-response-streaming-lmi/.gitignore | 1 + .../fastapi-response-streaming-lmi/README.md | 121 ++++++++++++++++++ .../__init__.py | 0 .../app/__init__.py | 0 .../app/main.py | 28 ++++ .../app/requirements.txt | 2 + .../fastapi-response-streaming-lmi/app/run.sh | 5 + .../template.yaml | 61 +++++++++ src/lib.rs | 22 ++-- 12 files changed, 263 insertions(+), 21 deletions(-) create mode 100644 examples/fastapi-response-streaming-lmi/.gitignore create mode 100644 examples/fastapi-response-streaming-lmi/README.md create mode 100644 examples/fastapi-response-streaming-lmi/__init__.py create mode 100644 examples/fastapi-response-streaming-lmi/app/__init__.py create mode 100644 examples/fastapi-response-streaming-lmi/app/main.py create mode 100644 examples/fastapi-response-streaming-lmi/app/requirements.txt create mode 100755 examples/fastapi-response-streaming-lmi/app/run.sh create mode 100644 examples/fastapi-response-streaming-lmi/template.yaml diff --git a/Cargo.lock b/Cargo.lock index f3b776a9..f76376e2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -248,9 +248,9 @@ dependencies = [ [[package]] name = "aws_lambda_events" -version = "1.0.1" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac59c9b189a3bd75146633e1c87f35fb727f53b03c7f331af789ff1293a9f350" +checksum = "ca106ceeb46420f22b33b863f8a667214afbf6b0457bc209f8c97de2282bedae" dependencies = [ "base64", "bytes", @@ -1048,9 +1048,9 @@ dependencies = [ [[package]] name = "lambda_http" -version = "1.0.1" +version = "1.1.0-rc1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a106755a9079a3ed20b4262e32c9a5efa127c97546cb2ecf69bbf3d17dbf970b" +checksum = "12ca353a921b753e2a464cf3286381a0be7cdb6cc6e685daa2fdc5b95a6f4402" dependencies = [ "aws_lambda_events", "bytes", @@ -1073,9 +1073,9 @@ dependencies = [ [[package]] name = "lambda_runtime" -version = "1.0.1" +version = "1.1.0-rc1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46e566d19773ae483b0d32cc6045c5420d7b3eec6192ecb1c26ffa4e0091388a" +checksum = "3ef345d40ef6f1bcfb051a06cee05ed3cf8815ef32552610b8327b9675788615" dependencies = [ "async-stream", "base64", @@ -1098,9 +1098,9 @@ dependencies = [ [[package]] name = "lambda_runtime_api_client" -version = "1.0.1" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c039f06329949692a81a993ede4cfaa6abab3ac8e72590cf7c5e6a64a9ec7b1" +checksum = "7b4873061514cb57ffb6a599b77c46c65d6d783efe9bad8fd56b7cba7f0459ef" dependencies = [ "bytes", "futures-channel", diff --git a/Cargo.toml b/Cargo.toml index 1d01203f..c2ab52c2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,11 +22,12 @@ http = "1.2.0" http-body = "1.0.1" hyper = { version = "1.5.2", features = ["client"] } hyper-util = "0.1.10" -lambda_http = { version = "1.0.1", default-features = false, features = [ +lambda_http = { version = "1.1.0-rc1", default-features = false, features = [ "apigw_http", "apigw_rest", "alb", "pass_through", + "experimental-concurrency" ] } serde_json = "1.0.135" tokio = { version = "1.48.0", features = [ diff --git a/README.md b/README.md index 4ad91971..5f2703e9 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,7 @@ The same docker image can run on AWS Lambda, Amazon EC2, AWS Fargate, and local - Run web applications on AWS Lambda - Supports Amazon API Gateway Rest API and Http API endpoints, Lambda Function URLs, and Application Load Balancer - Supports Lambda managed runtimes, custom runtimes and docker OCI images +- Supports Lambda Managed Instances for multi-concurrent request handling - Supports any web frameworks and languages, no new code dependency to include - Automatic encode binary response - Enables graceful shutdown @@ -101,7 +102,7 @@ The readiness check port/path and traffic port can be configured using environme | AWS_LWA_READINESS_CHECK_MIN_UNHEALTHY_STATUS | The minimum HTTP status code that is considered unhealthy | "500" | | AWS_LWA_ASYNC_INIT / ASYNC_INIT* | enable asynchronous initialization for long initialization functions | "false" | | AWS_LWA_REMOVE_BASE_PATH / REMOVE_BASE_PATH* | the base path to be removed from request path | None | -| AWS_LWA_ENABLE_COMPRESSION | enable gzip compression for response body | "false" | +| AWS_LWA_ENABLE_COMPRESSION | enable gzip/br compression for response body | "false" | | AWS_LWA_INVOKE_MODE | Lambda function invoke mode: "buffered" or "response_stream", default is "buffered" | "buffered" | | AWS_LWA_PASS_THROUGH_PATH | the path for receiving event payloads that are passed through from non-http triggers | "/events" | | AWS_LWA_AUTHORIZATION_SOURCE | a header name to be replaced to `Authorization` | None | @@ -127,8 +128,8 @@ For example, you could have configured your API Gateway to have a /orders/{proxy Each resource is handled by a separate Lambda functions. For this reason, the application inside Lambda may not be aware of the fact that the /orders path exists. Use REMOVE_BASE_PATH to remove the /orders prefix when routing requests to the application. Defaults to empty string. Checkout [SpringBoot](examples/springboot) example. -**AWS_LWA_ENABLE_COMPRESSION** - Lambda Web Adapter supports gzip compression for response body. This feature is disabled by default. Enable it by setting environment variable `AWS_LWA_ENABLE_COMPRESSION` to `true`. -When enabled, this will compress responses unless it's an image as determined by the content-type starting with `image` or the response is less than 32 bytes. This will also compress HTTP/1.1 chunked streaming response. +**AWS_LWA_ENABLE_COMPRESSION** - Lambda Web Adapter supports gzip/br compression for response body. This feature is disabled by default. Enable it by setting environment variable `AWS_LWA_ENABLE_COMPRESSION` to `true`. +When enabled, this will compress responses unless it's an image as determined by the content-type starting with `image` or the response is less than 32 bytes. This feature does not support streaming response. **AWS_LWA_INVOKE_MODE** - Lambda function invoke mode, this should match Function Url invoke mode. The default is "buffered". When configured as "response_stream", Lambda Web Adapter will stream response to Lambda service [blog](https://aws.amazon.com/blogs/compute/introducing-aws-lambda-response-streaming/). Please check out [FastAPI with Response Streaming](examples/fastapi-response-streaming) example. @@ -153,6 +154,23 @@ Lambda Web Adapter forwards this information to the web application in a Http He Lambda Web Adapter forwards this information to the web application in a Http Header named "x-amzn-lambda-context". In the web application, you can retrieve the value of this http header and deserialize it into a JSON object. Check out [Express.js in Zip](examples/expressjs-zip) on how to use it. +## Lambda Managed Instances + +Lambda Web Adapter supports [Lambda Managed Instances](https://docs.aws.amazon.com/lambda/latest/dg/lambda-managed-instances.html), which allows a single Lambda execution environment to handle multiple concurrent requests. This can improve throughput and reduce costs for I/O-bound workloads. + +When running on Lambda Managed Instances, Lambda Web Adapter automatically handles concurrent invocations by forwarding multiple requests to your web application simultaneously. Since most web frameworks (Express.js, FastAPI, Spring Boot, etc.) are already designed to handle concurrent requests, your application should work without modification. + +### Considerations for Multi-Concurrency + +When using Lambda Managed Instances, keep these points in mind: + +- **Shared state**: Global variables and in-memory caches are shared across concurrent requests. Ensure your application handles shared state safely. +- **Connection pooling**: Use connection pools for databases and external services rather than single connections. +- **File system**: The `/tmp` directory is shared across concurrent requests. Use unique file names or implement file locking to avoid conflicts. +- **Resource limits**: Memory and CPU are shared across concurrent requests. Monitor resource usage under concurrent load. + +Lambda Managed Instances works with both buffered and response streaming modes. + ## Graceful Shutdown For a function with Lambda Extensions registered, Lambda enables shutdown phase for the function. When Lambda service is about to shut down a Lambda execution environment, @@ -184,6 +202,7 @@ The `AWS_LWA_LAMBDA_RUNTIME_API_PROXY` environment varible makes the Lambda Web - [FastAPI with Background Tasks](examples/fastapi-background-tasks) - [FastAPI with Response Streaming](examples/fastapi-response-streaming) - [FastAPI with Response Streaming in Zip](examples/fastapi-response-streaming-zip) +- [FastAPI with Response Streaming on Lambda Managed Instances](examples/fastapi-response-streaming-lmi) - [FastAPI Response Streaming Backend with IAM Auth](examples/fastapi-backend-only-response-streaming/) - [Flask](examples/flask) - [Flask in Zip](examples/flask-zip) diff --git a/examples/fastapi-response-streaming-lmi/.gitignore b/examples/fastapi-response-streaming-lmi/.gitignore new file mode 100644 index 00000000..9984c2e5 --- /dev/null +++ b/examples/fastapi-response-streaming-lmi/.gitignore @@ -0,0 +1 @@ +.aws-sam/ diff --git a/examples/fastapi-response-streaming-lmi/README.md b/examples/fastapi-response-streaming-lmi/README.md new file mode 100644 index 00000000..761b70e0 --- /dev/null +++ b/examples/fastapi-response-streaming-lmi/README.md @@ -0,0 +1,121 @@ +# FastAPI Response Streaming with Lambda Managed Instances + +This example shows how to use Lambda Web Adapter to run a FastAPI application with response streaming on [Lambda Managed Instances](https://docs.aws.amazon.com/lambda/latest/dg/lambda-managed-instances.html) (LMI). + +Lambda Managed Instances allows a single Lambda execution environment to handle multiple concurrent requests, improving throughput and reducing costs for I/O-bound workloads like streaming responses. + +## Prerequisites + +Lambda Managed Instances requires a VPC with: +- At least one subnet (two subnets recommended) +- A security group that allows outbound traffic + +If you don't have a VPC configured, you can use the default VPC or create one. + +## How does it work? + +This example combines three Lambda features: + +1. **Lambda Web Adapter** - Runs your FastAPI app on Lambda without code changes +2. **Response Streaming** - Streams responses back to clients as they're generated +3. **Lambda Managed Instances** - Handles multiple concurrent requests per execution environment + +### Key Configuration + +```yaml +LMICapacityProvider: + Type: AWS::Lambda::CapacityProvider + Properties: + Name: !Sub "${AWS::StackName}-capacity-provider" + VpcConfig: + SubnetIds: !Ref SubnetIds + SecurityGroupIds: !Ref SecurityGroupIds + InstanceRequirements: + Architectures: + - x86_64 + AllowedTypes: + - m7i.large + ScalingConfig: + MaxVCpuCount: 2 + +FastAPIFunction: + Type: AWS::Serverless::Function + Properties: + CodeUri: app/ + Handler: run.sh + Runtime: python3.13 + MemorySize: 2048 + Environment: + Variables: + AWS_LAMBDA_EXEC_WRAPPER: /opt/bootstrap + AWS_LWA_INVOKE_MODE: response_stream + PORT: 8000 + Layers: + - !Sub arn:aws:lambda:${AWS::Region}:753240598075:layer:LambdaAdapterLayerX86:25 + CapacityProviderConfig: + Arn: !Ref LMICapacityProvider + PerExecutionEnvironmentMaxConcurrency: 64 + FunctionUrlConfig: + AuthType: NONE + InvokeMode: RESPONSE_STREAM +``` + +- `AWS::Lambda::CapacityProvider` - Creates the LMI capacity provider with VPC configuration +- `CapacityProviderConfig.Arn` - References the capacity provider +- `CapacityProviderConfig.PerExecutionEnvironmentMaxConcurrency: 64` - Up to 64 concurrent requests per instance +- `AWS_LWA_INVOKE_MODE: response_stream` - Configures Lambda Web Adapter for streaming +- `FunctionUrlConfig.InvokeMode: RESPONSE_STREAM` - Enables streaming on the Function URL + +## Build and Deploy + +First, get your VPC subnet and security group IDs: + +```bash +# List subnets in your default VPC +aws ec2 describe-subnets --filters "Name=default-for-az,Values=true" \ + --query 'Subnets[*].[SubnetId,AvailabilityZone]' --output table + +# List security groups +aws ec2 describe-security-groups --filters "Name=group-name,Values=default" \ + --query 'SecurityGroups[*].[GroupId,GroupName]' --output table +``` + +Build and deploy: + +```bash +sam build --use-container +sam deploy --guided +``` + +During guided deployment, you'll be prompted for: +- `SubnetIds` - Comma-separated list of subnet IDs (e.g., `subnet-abc123,subnet-def456`) +- `SecurityGroupIds` - Comma-separated list of security group IDs (e.g., `sg-abc123`) + +## Verify it works + +Open the Function URL in a browser. You should see a message stream back character by character, with a unique request ID prefix like `[a1b2c3d4] This is streaming from Lambda Managed Instances!`. + +### Test concurrent requests + +To verify LMI is working, send multiple concurrent requests: + +```bash +# Get your function URL +URL=$(aws cloudformation describe-stacks --stack-name fastapi-response-streaming-lmi \ + --query 'Stacks[0].Outputs[?OutputKey==`FastAPIFunctionUrl`].OutputValue' --output text) + +# Send 10 concurrent requests +for i in {1..10}; do curl -s "$URL" & done; wait +``` + +Each response will have a different request ID, but they may share the same execution environment (visible in CloudWatch logs). + +## Considerations + +When using LMI with streaming: + +- **VPC**: LMI requires VPC configuration. Ensure your subnets have internet access (via NAT Gateway) if your function needs to call external services +- **Shared state**: FastAPI/Uvicorn handles concurrency natively, but avoid mutable global state +- **Memory**: With 64 concurrent requests, ensure sufficient memory (2048MB in this example) +- **Timeouts**: Streaming responses can run up to 15 minutes with Function URLs +- **Scaling**: `MaxInstanceCount` controls the maximum number of EC2 instances in the capacity provider diff --git a/examples/fastapi-response-streaming-lmi/__init__.py b/examples/fastapi-response-streaming-lmi/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/examples/fastapi-response-streaming-lmi/app/__init__.py b/examples/fastapi-response-streaming-lmi/app/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/examples/fastapi-response-streaming-lmi/app/main.py b/examples/fastapi-response-streaming-lmi/app/main.py new file mode 100644 index 00000000..b8d1d0b0 --- /dev/null +++ b/examples/fastapi-response-streaming-lmi/app/main.py @@ -0,0 +1,28 @@ +from fastapi import FastAPI +from fastapi.responses import StreamingResponse +import asyncio +import uuid + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +app = FastAPI() + +@app.get("/health") +async def health(): + return {"status": "healthy"} + + +async def streamer(request_id: str): + """Stream a message character by character with request ID for tracing.""" + message = f"[{request_id}] This is streaming from Lambda Managed Instances!\n" + for char in message: + yield char + await asyncio.sleep(0.05) + + +@app.get("/{path:path}") +async def index(path: str, request: Request): + """Stream response - each concurrent request gets a unique ID.""" + request_id = str(uuid.uuid4())[:8] + return StreamingResponse(streamer(request_id), media_type="text/plain") diff --git a/examples/fastapi-response-streaming-lmi/app/requirements.txt b/examples/fastapi-response-streaming-lmi/app/requirements.txt new file mode 100644 index 00000000..87c06ac2 --- /dev/null +++ b/examples/fastapi-response-streaming-lmi/app/requirements.txt @@ -0,0 +1,2 @@ +fastapi==0.115.5 +uvicorn==0.32.0 diff --git a/examples/fastapi-response-streaming-lmi/app/run.sh b/examples/fastapi-response-streaming-lmi/app/run.sh new file mode 100755 index 00000000..a630f442 --- /dev/null +++ b/examples/fastapi-response-streaming-lmi/app/run.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +PATH=$PATH:$LAMBDA_TASK_ROOT/bin \ + PYTHONPATH=$PYTHONPATH:/opt/python:$LAMBDA_RUNTIME_DIR \ + exec python -m uvicorn --port=$PORT main:app diff --git a/examples/fastapi-response-streaming-lmi/template.yaml b/examples/fastapi-response-streaming-lmi/template.yaml new file mode 100644 index 00000000..be9745fc --- /dev/null +++ b/examples/fastapi-response-streaming-lmi/template.yaml @@ -0,0 +1,61 @@ +AWSTemplateFormatVersion: '2010-09-09' +Transform: AWS::Serverless-2016-10-31 +Description: FastAPI response streaming with Lambda Managed Instances + +Parameters: + SubnetIds: + Type: List + Description: Subnet IDs for the Lambda Managed Instances capacity provider + SecurityGroupIds: + Type: List + Description: Security Group IDs for the Lambda Managed Instances capacity provider + +Globals: + Function: + Timeout: 120 + +Resources: + LMICapacityProvider: + Type: AWS::Serverless::CapacityProvider + Properties: + CapacityProviderName: !Sub "${AWS::StackName}-capacity-provider" + VpcConfig: + SubnetIds: !Ref SubnetIds + SecurityGroupIds: !Ref SecurityGroupIds + ScalingConfig: + MaxVCpuCount: 20 + AverageCPUUtilization: 70.0 + + FastAPIFunction: + Type: AWS::Serverless::Function + Properties: + CodeUri: app/ + Handler: run.sh + Runtime: python3.13 + MemorySize: 2048 + AutoPublishAlias: live + Environment: + Variables: + AWS_LAMBDA_EXEC_WRAPPER: /opt/bootstrap + AWS_LWA_INVOKE_MODE: response_stream + PORT: 8000 + Layers: + # - !Sub arn:aws:lambda:${AWS::Region}:753240598075:layer:LambdaAdapterLayerX86:25 + - arn:aws:lambda:us-west-2:048972532408:layer:LambdaAdapterLayerX86:8 + CapacityProviderConfig: + Arn: !GetAtt LMICapacityProvider.Arn + PerExecutionEnvironmentMaxConcurrency: 64 + FunctionUrlConfig: + AuthType: AWS_IAM + InvokeMode: RESPONSE_STREAM + +Outputs: + FastAPIFunctionUrl: + Description: "Function URL for FastAPI function" + Value: !GetAtt FastAPIFunctionUrl.FunctionUrl + FastAPIFunction: + Description: "FastAPI Lambda Function ARN" + Value: !GetAtt FastAPIFunction.Arn + CapacityProviderArn: + Description: "Capacity Provider ARN" + Value: !GetAtt LMICapacityProvider.Arn diff --git a/src/lib.rs b/src/lib.rs index 4f8e1e14..5c4f1fa5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -317,16 +317,20 @@ impl Adapter { env::set_var("AWS_LAMBDA_RUNTIME_API", runtime_proxy); } - if compression { - let svc = ServiceBuilder::new().layer(CompressionLayer::new()).service(self); - match invoke_mode { - LambdaInvokeMode::Buffered => lambda_http::run(svc).await, - LambdaInvokeMode::ResponseStream => lambda_http::run_with_streaming_response(svc).await, + match invoke_mode { + LambdaInvokeMode::Buffered if compression => { + let svc = ServiceBuilder::new().layer(CompressionLayer::new()).service(self); + lambda_http::run_concurrent(svc).await } - } else { - match invoke_mode { - LambdaInvokeMode::Buffered => lambda_http::run(self).await, - LambdaInvokeMode::ResponseStream => lambda_http::run_with_streaming_response(self).await, + LambdaInvokeMode::Buffered => lambda_http::run_concurrent(self).await, + LambdaInvokeMode::ResponseStream => { + if compression { + tracing::warn!( + "Compression is not supported with response streaming mode. \ + Compression will be disabled." + ); + } + lambda_http::run_with_streaming_response_concurrent(self).await } } } From bee85de062e2c16d78a2464ccd1807846ce84c8d Mon Sep 17 00:00:00 2001 From: Harold Sun Date: Wed, 7 Jan 2026 23:36:26 +0000 Subject: [PATCH 2/2] fix example and tests --- .config/nextest.toml | 2 +- examples/fastapi-response-streaming-lmi/app/main.py | 7 ++----- examples/fastapi-response-streaming-lmi/template.yaml | 5 ++--- 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/.config/nextest.toml b/.config/nextest.toml index 1d529e5a..7dcb829b 100644 --- a/.config/nextest.toml +++ b/.config/nextest.toml @@ -113,7 +113,7 @@ store-failure-output = true test-threads = 1 [profile.ci] -inherits = "test" +# inherits = "test" # Print out output for failing tests as soon as they fail, and also at the end # of the run (for easy scrollability). failure-output = "immediate-final" diff --git a/examples/fastapi-response-streaming-lmi/app/main.py b/examples/fastapi-response-streaming-lmi/app/main.py index b8d1d0b0..612b8bc2 100644 --- a/examples/fastapi-response-streaming-lmi/app/main.py +++ b/examples/fastapi-response-streaming-lmi/app/main.py @@ -3,9 +3,6 @@ import asyncio import uuid -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) - app = FastAPI() @app.get("/health") @@ -21,8 +18,8 @@ async def streamer(request_id: str): await asyncio.sleep(0.05) -@app.get("/{path:path}") -async def index(path: str, request: Request): +@app.get("/") +async def index(): """Stream response - each concurrent request gets a unique ID.""" request_id = str(uuid.uuid4())[:8] return StreamingResponse(streamer(request_id), media_type="text/plain") diff --git a/examples/fastapi-response-streaming-lmi/template.yaml b/examples/fastapi-response-streaming-lmi/template.yaml index be9745fc..c52196f1 100644 --- a/examples/fastapi-response-streaming-lmi/template.yaml +++ b/examples/fastapi-response-streaming-lmi/template.yaml @@ -40,13 +40,12 @@ Resources: AWS_LWA_INVOKE_MODE: response_stream PORT: 8000 Layers: - # - !Sub arn:aws:lambda:${AWS::Region}:753240598075:layer:LambdaAdapterLayerX86:25 - - arn:aws:lambda:us-west-2:048972532408:layer:LambdaAdapterLayerX86:8 + - !Sub arn:aws:lambda:${AWS::Region}:753240598075:layer:LambdaAdapterLayerX86:26 CapacityProviderConfig: Arn: !GetAtt LMICapacityProvider.Arn PerExecutionEnvironmentMaxConcurrency: 64 FunctionUrlConfig: - AuthType: AWS_IAM + AuthType: NONE InvokeMode: RESPONSE_STREAM Outputs: