From 8a9b4002a866ca4580f162af9ad7d268eb45a063 Mon Sep 17 00:00:00 2001 From: Brandon Sanchez Date: Wed, 25 Jun 2025 09:03:34 -0500 Subject: [PATCH 1/8] feat: implement initial configurations for aws bedrock support --- code/.env.template | 7 ++- code/config/config_llm.yaml | 9 +++ code/llm/llm.py | 4 ++ code/requirements.txt | 3 + docs/setup-aws_bedrock.md | 115 ++++++++++++++++++++++++++++++++++++ 5 files changed, 137 insertions(+), 1 deletion(-) create mode 100644 docs/setup-aws_bedrock.md diff --git a/code/.env.template b/code/.env.template index 3a6c89fd4..e41df0a19 100644 --- a/code/.env.template +++ b/code/.env.template @@ -60,4 +60,9 @@ OPENSEARCH_CREDENTIALS="" NLWEB_LOGGING_PROFILE=production # Hugging Face Inference Providers env variables -HF_TOKEN="" \ No newline at end of file +HF_TOKEN="" + +# AWS Bedrock env variables +AWS_BEDROCK_ACCESS_KEY_ID="" +AWS_BEDROCK_SECRET_ACCESS_KEY="" +AWS_BEDROCK_REGION="us-east-1" diff --git a/code/config/config_llm.yaml b/code/config/config_llm.yaml index 023648a47..b1d3f349b 100644 --- a/code/config/config_llm.yaml +++ b/code/config/config_llm.yaml @@ -73,3 +73,12 @@ endpoints: models: high: Qwen/Qwen2.5-72B-Instruct low: Qwen/Qwen2.5-Coder-7B-Instruct + + aws_bedrock: + api_key_env: AWS_BEDROCK_ACCESS_KEY_ID + api_secret_env: AWS_BEDROCK_SECRET_ACCESS_KEY + api_region_env: AWS_BEDROCK_REGION + llm_type: aws_bedrock + models: + high: amazon.nova-pro-v1:0 + low: amazon.nova-micro-v1:0 diff --git a/code/llm/llm.py b/code/llm/llm.py index b5a05bf01..8b9d78cb6 100644 --- a/code/llm/llm.py +++ b/code/llm/llm.py @@ -52,6 +52,7 @@ def init(): "inception": ["aiohttp>=3.9.1"], "snowflake": ["httpx>=0.28.1"], "huggingface": ["huggingface_hub>=0.31.0"], + "aws_bedrock": ["boto3>=1.38.15"], } # Cache for installed packages @@ -147,6 +148,9 @@ def _get_provider(llm_type: str): elif llm_type == "huggingface": from llm.huggingface import provider as huggingface_provider _loaded_providers[llm_type] = huggingface_provider + elif llm_type == "aws_bedrock": + from llm.aws_bedrock import provider as aws_bedrock_provider + _loaded_providers[llm_type] = aws_bedrock_provider else: raise ValueError(f"Unknown LLM type: {llm_type}") diff --git a/code/requirements.txt b/code/requirements.txt index 301a807d9..f180cf49f 100644 --- a/code/requirements.txt +++ b/code/requirements.txt @@ -36,6 +36,9 @@ seaborn>=0.13.0 # For Hugging Face: # huggingface_hub>=0.31.0 +# For AWS Bedrock Foundation Models: +# boto3>=1.38.15 + # For Azure AI Inference: # azure-ai-inference>=1.0.0b9 # azure-core>=1.30.0 diff --git a/docs/setup-aws_bedrock.md b/docs/setup-aws_bedrock.md new file mode 100644 index 000000000..e060602c5 --- /dev/null +++ b/docs/setup-aws_bedrock.md @@ -0,0 +1,115 @@ +# Setting Up AWS Bedrock Foundational Models + +This guide walks through the process of setting up AWS Bedrock foundational models, from enabling the models in the AWS console to configuring IAM permissions and obtaining the necessary credentials for boto3 integration. + +## Table of Contents + +1. [Enabling AWS Bedrock](#enabling-aws-bedrock) +2. [Requesting Access to Foundation Models](#requesting-access-to-foundation-models) +3. [Setting Up IAM Permissions](#setting-up-iam-permissions) +4. [Creating API Credentials](#creating-api-credentials) +5. [Configuring boto3 for AWS Bedrock](#configuring-boto3-for-aws-bedrock) + +## Enabling AWS Bedrock + +1. Sign in to the [AWS Management Console](https://console.aws.amazon.com/) +2. In the search bar at the top, type "Bedrock" and select the Amazon Bedrock service +3. If this is your first time using Bedrock, you'll see a welcome page. Click "Get started" +4. Select your preferred AWS region from the dropdown in the top-right corner (note that AWS Bedrock is not available in all regions) +5. You'll be directed to the Amazon Bedrock console dashboard + +## Requesting Access to Foundation Models + +Before you can use any foundation models, you need to request access: + +1. In the Bedrock console, navigate to "Model access" in the left sidebar +2. You'll see a list of available foundation models from providers like Amazon, Anthropic, AI21 Labs, Cohere, Meta, and others +3. Select the checkboxes next to the models you want to use (e.g., Claude, Llama 2, Amazon Titan) +4. Click "Request model access" at the bottom of the page +5. Review the terms and conditions, then click "Request model access" again +6. Wait for approval (this is usually immediate for most models) +7. Once approved, the status will change to "Access granted" + +## Setting Up IAM Permissions + +To use AWS Bedrock programmatically, you need to create an IAM user or role with appropriate permissions: + +1. Navigate to the [IAM console](https://console.aws.amazon.com/iam/) +2. Create a new policy: + - Click "Policies" in the left sidebar, then "Create policy" + - Switch to the JSON tab and paste the following policy: + +```json +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "bedrock:ListFoundationModels", + "bedrock:GetFoundationModel", + "bedrock:InvokeModel", + "bedrock:InvokeModelWithResponseStream" + ], + "Resource": "*" + } + ] +} +``` + + - Click "Next", name your policy (e.g., "BedrockAccess"), add a description, and click "Create policy" + +3. Create a new IAM user or update an existing one: + - Click "Users" in the left sidebar + - Create a new user or select an existing one + - Under "Permissions", click "Add permissions" + - Choose "Attach policies directly" + - Search for and select the "BedrockAccess" policy you created + - Click "Next" and then "Add permissions" + +## Creating API Credentials + +To use AWS Bedrock with boto3, you need API credentials: + +1. In the IAM console, navigate to the user you created or updated +2. Go to the "Security credentials" tab +3. Under "Access keys", click "Create access key" +4. Select "Command Line Interface (CLI)" as the use case +5. Acknowledge the recommendation and click "Next" +6. (Optional) Add a description tag and click "Create access key" +7. You'll see your Access Key ID and Secret Access Key. **Important**: This is the only time you'll see the Secret Access Key, so make sure to save it securely +8. Download the .csv file or copy both keys to a secure location +9. Add the Access Key ID and Secret Access Key to your environment variables + 9.1. Add the Access Key ID to the environment variable AWS_BEDROCK_ACCESS_KEY_ID + 9.2. Add the Secret Access Key to the environment variable AWS_BEDROCK_SECRET_ACCESS_KEY + 9.3. Add the region to the environment variable AWS_BEDROCK_REGION + +## Available Foundation Models + +AWS Bedrock provides access to various foundation models, including: + +- **Amazon**: Titan Text, Titan Embeddings +- **Anthropic**: Claude, Claude Instant +- **AI21 Labs**: Jurassic-2 +- **Cohere**: Command, Embed +- **Meta**: Llama 2 +- **Stability AI**: Stable Diffusion + +Each model has different capabilities, pricing, and parameter options. Refer to the [AWS Bedrock documentation](https://docs.aws.amazon.com/bedrock/) for detailed information about each model. + +## Monitoring and Cost Management + +1. Monitor your usage in the AWS Billing console +2. Set up AWS Budgets to get alerts when costs exceed thresholds +3. Consider implementing token counting and rate limiting in your application + +## Troubleshooting + +Common issues and solutions: + +- **Access Denied Errors**: Verify that your IAM permissions are correctly set up +- **Model Not Found**: Ensure you've requested and been granted access to the model +- **Region Issues**: Confirm that the model is available in your selected region +- **Quota Limits**: Check if you've hit your quota limits and request increases if needed + +For more information, refer to the [AWS Bedrock documentation](https://docs.aws.amazon.com/bedrock/). From 9b3b0ffb41eacc76ac04baddbd8fa85e6dd00384 Mon Sep 17 00:00:00 2001 From: Brandon Sanchez Date: Wed, 25 Jun 2025 09:23:56 -0500 Subject: [PATCH 2/8] fix: typo on setup docs --- docs/setup-aws_bedrock.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/setup-aws_bedrock.md b/docs/setup-aws_bedrock.md index e060602c5..504f08b24 100644 --- a/docs/setup-aws_bedrock.md +++ b/docs/setup-aws_bedrock.md @@ -1,4 +1,4 @@ -# Setting Up AWS Bedrock Foundational Models +# Setting Up AWS Bedrock Foundation Models This guide walks through the process of setting up AWS Bedrock foundational models, from enabling the models in the AWS console to configuring IAM permissions and obtaining the necessary credentials for boto3 integration. From 19ed85de549c5cd358e864311b2a79f0ed5cde24 Mon Sep 17 00:00:00 2001 From: Brandon Sanchez Date: Wed, 25 Jun 2025 16:05:53 -0500 Subject: [PATCH 3/8] fix(aws_bedrock): updated variable names and implementation to support LLMProviderConfig interface --- code/.env.template | 3 +-- code/config/config_llm.yaml | 5 ++--- docs/setup-aws_bedrock.md | 23 +++++++++++------------ 3 files changed, 14 insertions(+), 17 deletions(-) diff --git a/code/.env.template b/code/.env.template index e41df0a19..80af8d968 100644 --- a/code/.env.template +++ b/code/.env.template @@ -63,6 +63,5 @@ NLWEB_LOGGING_PROFILE=production HF_TOKEN="" # AWS Bedrock env variables -AWS_BEDROCK_ACCESS_KEY_ID="" -AWS_BEDROCK_SECRET_ACCESS_KEY="" +AWS_BEDROCK_API_KEY="" AWS_BEDROCK_REGION="us-east-1" diff --git a/code/config/config_llm.yaml b/code/config/config_llm.yaml index b1d3f349b..85f39b7db 100644 --- a/code/config/config_llm.yaml +++ b/code/config/config_llm.yaml @@ -75,9 +75,8 @@ endpoints: low: Qwen/Qwen2.5-Coder-7B-Instruct aws_bedrock: - api_key_env: AWS_BEDROCK_ACCESS_KEY_ID - api_secret_env: AWS_BEDROCK_SECRET_ACCESS_KEY - api_region_env: AWS_BEDROCK_REGION + api_key_env: AWS_BEDROCK_API_KEY + api_version_env: AWS_BEDROCK_REGION llm_type: aws_bedrock models: high: amazon.nova-pro-v1:0 diff --git a/docs/setup-aws_bedrock.md b/docs/setup-aws_bedrock.md index 504f08b24..75efcaf7a 100644 --- a/docs/setup-aws_bedrock.md +++ b/docs/setup-aws_bedrock.md @@ -79,21 +79,20 @@ To use AWS Bedrock with boto3, you need API credentials: 6. (Optional) Add a description tag and click "Create access key" 7. You'll see your Access Key ID and Secret Access Key. **Important**: This is the only time you'll see the Secret Access Key, so make sure to save it securely 8. Download the .csv file or copy both keys to a secure location -9. Add the Access Key ID and Secret Access Key to your environment variables - 9.1. Add the Access Key ID to the environment variable AWS_BEDROCK_ACCESS_KEY_ID - 9.2. Add the Secret Access Key to the environment variable AWS_BEDROCK_SECRET_ACCESS_KEY - 9.3. Add the region to the environment variable AWS_BEDROCK_REGION +9. In order to support the LLMProvider interface, you will need to add the Access Key ID and Secret Access Key and Region to your environment variables: + 9.1. Concatenate the Access Key ID and Secret Access Key with a colon (:) and add it to the environment variable AWS_BEDROCK_API_KEY + 9.2. Add the region to the environment variable AWS_BEDROCK_REGION -## Available Foundation Models +## Supported Foundation Models -AWS Bedrock provides access to various foundation models, including: +AWS Bedrock provides access to various foundation models, currently supported models are: -- **Amazon**: Titan Text, Titan Embeddings -- **Anthropic**: Claude, Claude Instant -- **AI21 Labs**: Jurassic-2 -- **Cohere**: Command, Embed -- **Meta**: Llama 2 -- **Stability AI**: Stable Diffusion +- **Amazon**: amazon.nova-..., amazon.titan-text-... +- **AI21 Labs**: ai21... +- **Anthropic**: anthropic.claude-... +- **Cohere**: cohere.command-... +- **Meta**: meta.llama3... +- **Mistral**: mistral... Each model has different capabilities, pricing, and parameter options. Refer to the [AWS Bedrock documentation](https://docs.aws.amazon.com/bedrock/) for detailed information about each model. From 32fac4c0fabd94b496d64d9e0adce0f91c6d94bf Mon Sep 17 00:00:00 2001 From: Brandon Sanchez Date: Wed, 25 Jun 2025 16:06:28 -0500 Subject: [PATCH 4/8] feat(aws_bedrock): implemented aws_bedrock provider for llm --- code/llm/aws_bedrock.py | 270 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 270 insertions(+) create mode 100644 code/llm/aws_bedrock.py diff --git a/code/llm/aws_bedrock.py b/code/llm/aws_bedrock.py new file mode 100644 index 000000000..a1321ea20 --- /dev/null +++ b/code/llm/aws_bedrock.py @@ -0,0 +1,270 @@ +# Copyright (c) 2025 Microsoft Corporation. +# Licensed under the MIT License + +""" +AWS Bedrock wrapper for LLM functionality. + +WARNING: This code is under development and may undergo changes in future releases. +Backwards compatibility is not guaranteed at this time. +""" + +import json +import re +from typing import Dict, Any, Optional + +from botocore.exceptions import ReadTimeoutError, ConnectTimeoutError +from botocore.config import Config + +import boto3 +from config.config import CONFIG +import threading + +from llm.llm_provider import LLMProvider + +from utils.logging_config_helper import get_configured_logger + +logger = get_configured_logger("llm") + + +class ConfigurationError(RuntimeError): + """ + Raised when configuration is missing or invalid. + """ + + pass + + +class AWSBedrockProvider(LLMProvider): + """Implementation of LLMProvider for AWS Bedrock.""" + + _client_lock = threading.Lock() + _client = None + + @classmethod + def get_api_key(cls) -> str: + """ + Retrieve the AWS Bedrock API key from environment or raise an error. + """ + # Get the API key from aws bedrock config + provider_config = CONFIG.llm_endpoints["aws_bedrock"] + api_key = provider_config.api_key.split(":")[0] + return api_key + + @classmethod + def get_api_secret(cls) -> str: + """ + Retrieve the AWS Bedrock API secret from environment or raise an error. + """ + # Get the API secret from aws bedrock config + provider_config = CONFIG.llm_endpoints["aws_bedrock"] + api_secret = provider_config.api_key.split(":")[1] + return api_secret + + @classmethod + def get_api_region(cls) -> str: + """ + Retrieve the AWS Bedrock API region from environment or raise an error. + """ + # Get the API region from aws bedrock config + provider_config = CONFIG.llm_endpoints["aws_bedrock"] + api_region = provider_config.api_version + return api_region + + @classmethod + def get_client(cls, timeout: float = 30.0) -> boto3.client: + """ + Configure and return an AWS Bedrock client. + """ + config = Config( + connect_timeout=timeout, + read_timeout=timeout + ) + + with cls._client_lock: + if cls._client is None: + api_key = cls.get_api_key() + api_secret = cls.get_api_secret() + api_region = cls.get_api_region() + cls._client = boto3.client( + service_name="bedrock-runtime", + region_name=api_region, + aws_access_key_id=api_key, + aws_secret_access_key=api_secret, + config=config + ) + return cls._client + + @classmethod + def _build_model_body( + cls, + model: str, + prompt: str, + schema: Dict[str, Any], + max_tokens: int, + temperature: float, + ) -> Dict[str, Any]: + """ + Construct the system and user message sequence enforcing a JSON schema. + """ + formatted_prompt = f"Respond ONLY with a valid JSON and no other text that matches this schema: {json.dumps(schema, indent=2)}\n\nInstruction: {prompt}" + if model.startswith("amazon.nova"): + return { + "system": [ + { + "text": ( + f"Provide a valid JSON response matching this schema: " + f"{json.dumps(schema)}" + ) + } + ], + "messages": [ + {"role": "user", "content": [{"text": prompt}]}, + ], + "inferenceConfig": { + "maxTokens": max_tokens, + "temperature": temperature, + }, + } + elif model.startswith("amazon.titan-text"): + return { + "inputText": formatted_prompt, + "textGenerationConfig": { + "maxTokenCount": max_tokens, + "temperature": temperature, + }, + } + elif model.startswith("ai21"): + return { + "prompt": formatted_prompt, + "maxTokens": max_tokens, + "temperature": temperature, + } + elif model.startswith("anthropic"): + return { + "anthropic_version": "bedrock-2023-05-31", + "messages": [ + { + "role": "system", + "content": { + "type": "text", + "text": ( + f"Provide a valid JSON response matching this schema: " + f"{json.dumps(schema)}" + ), + }, + }, + {"role": "user", "content": {"type": "text", "text": prompt}}, + ], + "max_tokens": max_tokens, + "temperature": temperature, + } + elif model.startswith("cohere.command-r"): + return { + "message": formatted_prompt, + "max_tokens": max_tokens, + "temperature": temperature, + } + elif model.startswith("cohere.command"): + return { + "prompt": formatted_prompt, + "max_tokens": max_tokens, + "temperature": temperature, + } + elif model.startswith("meta.llama3"): + return { + "prompt": formatted_prompt, + "max_gen_len": max_tokens, + "temperature": temperature, + } + elif model.startswith("mistral"): + return { + "prompt": f"[INST] {formatted_prompt} [/INST]", + "max_tokens": max_tokens, + "temperature": temperature, + } + else: + raise ValueError(f"Model {model} not supported") + + @classmethod + def _get_response_by_model(cls, model: str, body: Dict[str, Any]) -> str: + """ + Get the response from the model. + """ + try: + if model.startswith("amazon.nova"): + return body["output"]["message"]["content"][0]["text"] + elif model.startswith("amazon.titan-text"): + return body["results"][0]["outputText"] + elif model.startswith("ai21"): + return body["completions"][0]["data"]["text"] + elif model.startswith("anthropic"): + return body["content"][0]["text"] + elif model.startswith("cohere.command-r"): + return body["text"] + elif model.startswith("cohere.command"): + return body["generations"][0]["text"] + elif model.startswith("meta.llama3"): + return body["generation"] + elif model.startswith("mistral"): + return body["outputs"][0]["text"] + else: + raise ValueError(f"Model {model} not supported") + except Exception as e: + raise ValueError(f"Error getting response from model {model}: {e}") + + @classmethod + def clean_response(cls, content: str) -> Dict[str, Any]: + """ + Strip markdown fences and extract the first JSON object. + """ + cleaned = re.sub(r"```(?:json)?\s*", "", content).strip() + match = re.search(r"(\{.*\})", cleaned, re.S) + if not match: + logger.error("Failed to parse JSON from content: %r", content) + raise ValueError("No JSON object found in response") + return json.loads(match.group(1)) + + async def get_completion( + self, + prompt: str, + schema: Dict[str, Any], + model: Optional[str] = None, + temperature: float = 1.0, + max_tokens: int = 2048, + timeout: float = 30.0, + **kwargs, + ) -> Dict[str, Any]: + """ + Send an async chat completion request to AWS Bedrock and return parsed JSON. + """ + # If model not provided, get it from config + if model is None: + provider_config = CONFIG.llm_endpoints["aws_bedrock"] + # Use the 'high' model for completions by default + model = provider_config.models.high + + client = self.get_client(timeout) + body = self._build_model_body(model, prompt, schema, max_tokens, temperature) + + try: + # Run the synchronous boto3 client in a thread pool executor + response = client.invoke_model(modelId=model, body=json.dumps(body)) + except ReadTimeoutError: + logger.error("⏰ Read timeout: el modelo tardΓ³ demasiado en responder.") + return {} + except ConnectTimeoutError: + logger.error("🚫 Completion request timed out after %s seconds.", timeout) + return {} + + try: + # Decode the response body. + model_response = json.loads(response["body"].read()) + model_response_text = self._get_response_by_model(model, model_response) + return self.clean_response(model_response_text) + except Exception as e: + logger.error(f"Error processing AWS Bedrock response: {e}") + return {} + + +# Create a singleton instance +provider = AWSBedrockProvider() From 858a685aedfd5f12a0d472064f77244cdadafa51 Mon Sep 17 00:00:00 2001 From: Brandon Sanchez Date: Wed, 25 Jun 2025 18:54:50 -0500 Subject: [PATCH 5/8] feat: implemented AWS bedrock embeddings support --- code/README.md | 2 + code/config/config_embedding.yaml | 9 +- code/embedding/aws_bedrock_embedding.py | 153 ++++++++++++++++++++++++ code/embedding/embedding.py | 22 +++- docs/setup-aws_bedrock.md | 5 + 5 files changed, 188 insertions(+), 3 deletions(-) create mode 100644 code/embedding/aws_bedrock_embedding.py diff --git a/code/README.md b/code/README.md index 07d5576d9..3845ed079 100644 --- a/code/README.md +++ b/code/README.md @@ -29,6 +29,7 @@ code/ | └── whoHandler.py # β”œβ”€β”€ embedding/ | β”œβ”€β”€ anthropic_embedding.py # +| β”œβ”€β”€ aws_bedrock_embedding.py # | β”œβ”€β”€ azure_oai_embedding.py # | β”œβ”€β”€ embedding.py # | β”œβ”€β”€ gemini_embedding.py # @@ -36,6 +37,7 @@ code/ | β”œβ”€β”€ snowflake_embedding.py # β”œβ”€β”€ llm/ | β”œβ”€β”€ anthropic.py # +| β”œβ”€β”€ aws_bedrock.py # | β”œβ”€β”€ azure_deepseek.py # | β”œβ”€β”€ azure_llama.py # | β”œβ”€β”€ azure_oai.py # diff --git a/code/config/config_embedding.yaml b/code/config/config_embedding.yaml index dae78d67c..26cf6b1f9 100644 --- a/code/config/config_embedding.yaml +++ b/code/config/config_embedding.yaml @@ -13,11 +13,16 @@ providers: azure_openai: api_key_env: AZURE_OPENAI_API_KEY api_endpoint_env: AZURE_OPENAI_ENDPOINT - api_version_env: "2024-10-21" # Specific API version for embeddings + api_version_env: "2024-10-21" # Specific API version for embeddings model: text-embedding-3-small snowflake: api_key_env: SNOWFLAKE_PAT api_endpoint_env: SNOWFLAKE_ACCOUNT_URL api_version_env: "2024-10-01" - model: snowflake-arctic-embed-m-v1.5 \ No newline at end of file + model: snowflake-arctic-embed-m-v1.5 + + aws_bedrock: + api_key_env: AWS_BEDROCK_API_KEY + api_version_env: AWS_BEDROCK_REGION + model: amazon.titan-embed-text-v1 diff --git a/code/embedding/aws_bedrock_embedding.py b/code/embedding/aws_bedrock_embedding.py new file mode 100644 index 000000000..c6209ddd9 --- /dev/null +++ b/code/embedding/aws_bedrock_embedding.py @@ -0,0 +1,153 @@ +# Copyright (c) 2025 Microsoft Corporation. +# Licensed under the MIT License + +""" +OpenAI embedding implementation. + +WARNING: This code is under development and may undergo changes in future releases. +Backwards compatibility is not guaranteed at this time. +""" + +import os +import json +from typing import List, Optional, Any + +from botocore.config import Config + +import boto3 +from config.config import CONFIG +import threading + +from utils.logging_config_helper import get_configured_logger, LogLevel + +logger = get_configured_logger("aws_bedrock_embedding") + +_client_lock = threading.Lock() +aws_bedrock_client = None + + +def get_aws_bedrock_api_key() -> str: + """ + Retrieve the AWS Bedrock API key from configuration. + """ + # Get the API key from the embedding provider config + provider_config = CONFIG.get_embedding_provider("aws_bedrock") + if provider_config and provider_config.api_key: + api_key = provider_config.api_key + if api_key: + return api_key + + # Fallback to environment variable + api_key = os.getenv("AWS_BEDROCK_API_KEY") + if not api_key: + error_msg = "AWS Bedrock API key not found in configuration or environment" + logger.error(error_msg) + raise ValueError(error_msg) + + return api_key + + +def get_aws_bedrock_region() -> str: + """ + Retrieve the AWS Bedrock region from configuration. + """ + # Get the API key from the embedding provider config + provider_config = CONFIG.get_embedding_provider("aws_bedrock") + if provider_config and provider_config.api_version: + aws_region = provider_config.api_version + if aws_region: + return aws_region + + # Fallback to environment variable + aws_region = os.getenv("AWS_BEDROCK_REGION") + if not aws_region: + error_msg = "AWS Bedrock region not found in configuration or environment" + logger.error(error_msg) + raise ValueError(error_msg) + + return aws_region + + +def get_runtime_client(timeout: float = 30.0) -> Any: + """ + Configure and return an AWS Bedrock runtime client. + """ + config = Config(connect_timeout=timeout, read_timeout=timeout) + + global aws_bedrock_client + with _client_lock: + if aws_bedrock_client is None: + try: + api_key = get_aws_bedrock_api_key() + aws_access_key_id = api_key.split(":")[0] + aws_secret_access_key = api_key.split(":")[1] + aws_region = get_aws_bedrock_region() + + aws_bedrock_client = boto3.client( + service_name="bedrock-runtime", + region_name=aws_region, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + config=config, + ) + logger.debug("AWS Bedrock client initialized successfully") + except Exception as e: + logger.exception("Failed to initialize AWS Bedrock client") + raise + + return aws_bedrock_client + + +def get_aws_bedrock_embeddings( + text: str, model: Optional[str] = None, timeout: float = 30.0 +) -> List[float]: + """ + Generate an embedding for a single text using AWS Bedrock API. + + Args: + text: The text to embed + model: Optional model ID to use, defaults to provider's configured model + timeout: Maximum time to wait for the embedding response in seconds + + Returns: + List of floats representing the embedding vector + """ + # If model not provided, get it from config + if model is None: + provider_config = CONFIG.get_embedding_provider("aws_bedrock") + if provider_config and provider_config.model: + model = provider_config.model + else: + # Default to a common embedding model + model = "amazon.titan-embed-text-v1" + + logger.debug(f"Generating AWS Bedrock embedding with model: {model}") + logger.debug(f"Text length: {len(text)} chars") + + client = get_runtime_client(timeout) + + try: + # Clean input text (replace newlines with spaces) + text = text.replace("\n", " ") + + response = client.invoke_model( + modelId=model, body=json.dumps({"inputText": text}) + ) + + model_response = json.loads(response["body"].read()) + embedding = model_response["embedding"] + logger.debug(f"AWS Bedrock embedding generated, dimension: {len(embedding)}") + return embedding + except Exception as e: + logger.exception("Error generating AWS Bedrock embedding") + logger.log_with_context( + LogLevel.ERROR, + "AWS Bedrock embedding generation failed", + { + "model": model, + "text_length": len(text), + "error_type": type(e).__name__, + "error_message": str(e), + }, + ) + raise diff --git a/code/embedding/embedding.py b/code/embedding/embedding.py index bc8a4354d..cffc00da0 100644 --- a/code/embedding/embedding.py +++ b/code/embedding/embedding.py @@ -22,7 +22,8 @@ "openai": threading.Lock(), "gemini": threading.Lock(), "azure_openai": threading.Lock(), - "snowflake": threading.Lock() + "snowflake": threading.Lock(), + "aws_bedrock": threading.Lock() } async def get_embedding( @@ -115,6 +116,14 @@ async def get_embedding( logger.debug(f"Snowflake Cortex embeddings received, dimension: {len(result)}") return result + if provider == "aws_bedrock": + logger.debug("Getting AWS Bedrock embeddings") + # Import here to avoid potential circular imports + from embedding.aws_bedrock_embedding import get_aws_bedrock_embeddings + result = get_aws_bedrock_embeddings(text, model=model_id, timeout=timeout) + logger.debug(f"AWS Bedrock embeddings received, dimension: {len(result)}") + return result + error_msg = f"No embedding implementation for provider '{provider}'" logger.error(error_msg) raise ValueError(error_msg) @@ -222,6 +231,17 @@ async def batch_get_embeddings( logger.debug(f"Gemini batch embeddings received, count: {len(results)}") return results + if provider == "aws_bedrock": + logger.debug("Getting AWS Bedrock batch embeddings") + from embedding.aws_bedrock_embedding import get_aws_bedrock_embeddings + + results = [] + for text in texts: + result = get_aws_bedrock_embeddings(text, model=model_id, timeout=timeout) + results.append(result) + logger.debug(f"AWS Bedrock batch embeddings received, count: {len(results)}") + return results + # Default implementation if provider doesn't match any above logger.debug(f"No specific batch implementation for {provider}, processing sequentially") results = [] diff --git a/docs/setup-aws_bedrock.md b/docs/setup-aws_bedrock.md index 75efcaf7a..0cc03e3f2 100644 --- a/docs/setup-aws_bedrock.md +++ b/docs/setup-aws_bedrock.md @@ -94,6 +94,11 @@ AWS Bedrock provides access to various foundation models, currently supported mo - **Meta**: meta.llama3... - **Mistral**: mistral... +For embedding models, currently supported models are: + +- **Amazon**: amazon.titan-embed... +- **Cohere**: cohere.embed-... + Each model has different capabilities, pricing, and parameter options. Refer to the [AWS Bedrock documentation](https://docs.aws.amazon.com/bedrock/) for detailed information about each model. ## Monitoring and Cost Management From f95f2a0e6754c4b2a2fb007c29572f7a1a239836 Mon Sep 17 00:00:00 2001 From: Brandon Sanchez Date: Wed, 25 Jun 2025 19:17:17 -0500 Subject: [PATCH 6/8] fix: prevent out of range error for responses wihtout braces --- code/prompts/prompts.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/code/prompts/prompts.py b/code/prompts/prompts.py index 2c639a34b..3a652ea6d 100644 --- a/code/prompts/prompts.py +++ b/code/prompts/prompts.py @@ -103,7 +103,18 @@ def get_prompt_variable_value(variable, handler): value = site elif variable == "site.itemType": item_type = handler.item_type - value = item_type.split("}")[1] + try: + # Try to split and get the part after the closing brace + parts = item_type.split("}") + if len(parts) > 1: + value = parts[1] + else: + # If there's no part after the brace, use the original value + logger.warning(f"item_type '{item_type}' doesn't contain expected format with closing brace") + value = item_type + except Exception as e: + logger.error(f"Error processing item_type '{item_type}': {str(e)}") + value = item_type elif variable == "request.query": if (handler.state.is_decontextualization_done()): value = handler.decontextualized_query From 93b579c7b40eb4e1da9ad0b8ccf1ace8a4308b40 Mon Sep 17 00:00:00 2001 From: Brandon Sanchez Date: Tue, 15 Jul 2025 15:36:20 -0500 Subject: [PATCH 7/8] fix: updated files to new project directory schema --- code/python/core/embedding.py | 22 ++++++++++++++++++- code/python/core/llm.py | 4 ++++ .../aws_bedrock_embedding.py | 0 .../python/llm_providers}/aws_bedrock.py | 0 code/python/requirements.txt | 3 +++ config/config_embedding.yaml | 9 ++++++-- config/config_llm.yaml | 8 +++++++ 7 files changed, 43 insertions(+), 3 deletions(-) rename {old_code/embedding => code/python/embedding_providers}/aws_bedrock_embedding.py (100%) rename {old_code/llm => code/python/llm_providers}/aws_bedrock.py (100%) diff --git a/code/python/core/embedding.py b/code/python/core/embedding.py index 5e2aa680c..f268ef300 100644 --- a/code/python/core/embedding.py +++ b/code/python/core/embedding.py @@ -22,7 +22,8 @@ "openai": threading.Lock(), "gemini": threading.Lock(), "azure_openai": threading.Lock(), - "snowflake": threading.Lock() + "snowflake": threading.Lock(), + "aws_bedrock": threading.Lock() } async def get_embedding( @@ -131,6 +132,14 @@ async def get_embedding( logger.debug(f"Snowflake Cortex embeddings received, dimension: {len(result)}") return result + if provider == "aws_bedrock": + logger.debug("Getting AWS Bedrock embeddings") + # Import here to avoid potential circular imports + from embedding_providers.aws_bedrock_embedding import get_aws_bedrock_embeddings + result = get_aws_bedrock_embeddings(text, model=model_id, timeout=timeout) + logger.debug(f"AWS Bedrock embeddings received, dimension: {len(result)}") + return result + error_msg = f"No embedding implementation for provider '{provider}'" logger.error(error_msg) raise ValueError(error_msg) @@ -248,6 +257,17 @@ async def batch_get_embeddings( ) logger.debug(f"Gemini batch embeddings received, count: {len(result)}") return result + + if provider == "aws_bedrock": + logger.debug("Getting AWS Bedrock batch embeddings") + from embedding_providers.aws_bedrock_embedding import get_aws_bedrock_embeddings + + results = [] + for text in texts: + result = get_aws_bedrock_embeddings(text, model=model_id, timeout=timeout) + results.append(result) + logger.debug(f"AWS Bedrock batch embeddings received, count: {len(results)}") + return results # Default implementation if provider doesn't match any above logger.debug(f"No specific batch implementation for {provider}, processing sequentially") diff --git a/code/python/core/llm.py b/code/python/core/llm.py index 397539693..89ec060e6 100644 --- a/code/python/core/llm.py +++ b/code/python/core/llm.py @@ -52,6 +52,7 @@ def init(): "inception": ["aiohttp>=3.9.1"], "snowflake": ["httpx>=0.28.1"], "huggingface": ["huggingface_hub>=0.31.0"], + "aws_bedrock": ["boto3>=1.38.15"], } # Cache for installed packages @@ -147,6 +148,9 @@ def _get_provider(llm_type: str): elif llm_type == "huggingface": from llm_providers.huggingface import provider as huggingface_provider _loaded_providers[llm_type] = huggingface_provider + elif llm_type == "aws_bedrock": + from llm_providers.aws_bedrock import provider as aws_bedrock_provider + _loaded_providers[llm_type] = aws_bedrock_provider else: raise ValueError(f"Unknown LLM type: {llm_type}") diff --git a/old_code/embedding/aws_bedrock_embedding.py b/code/python/embedding_providers/aws_bedrock_embedding.py similarity index 100% rename from old_code/embedding/aws_bedrock_embedding.py rename to code/python/embedding_providers/aws_bedrock_embedding.py diff --git a/old_code/llm/aws_bedrock.py b/code/python/llm_providers/aws_bedrock.py similarity index 100% rename from old_code/llm/aws_bedrock.py rename to code/python/llm_providers/aws_bedrock.py diff --git a/code/python/requirements.txt b/code/python/requirements.txt index 0529f2b9c..f9ec5c129 100644 --- a/code/python/requirements.txt +++ b/code/python/requirements.txt @@ -69,3 +69,6 @@ openai>=1.12.0 # psycopg[binary]>=3.1.12 # PostgreSQL adapter (psycopg3) # psycopg[pool]>=3.2.0 # Connection pooling for psycopg3 # pgvector>=0.4.0 + +# For AWS Bedrock Foundation Models: +# boto3>=1.38.15 diff --git a/config/config_embedding.yaml b/config/config_embedding.yaml index 98d0dcd91..3e9f6811e 100644 --- a/config/config_embedding.yaml +++ b/config/config_embedding.yaml @@ -13,11 +13,16 @@ providers: azure_openai: api_key_env: AZURE_OPENAI_API_KEY api_endpoint_env: AZURE_OPENAI_ENDPOINT - api_version_env: "2024-10-21" # Specific API version for embeddings + api_version_env: "2024-10-21" # Specific API version for embeddings model: text-embedding-3-small snowflake: api_key_env: SNOWFLAKE_PAT api_endpoint_env: SNOWFLAKE_ACCOUNT_URL api_version_env: "2024-10-01" - model: snowflake-arctic-embed-m-v1.5 \ No newline at end of file + model: snowflake-arctic-embed-m-v1.5 + + aws_bedrock: + api_key_env: AWS_BEDROCK_API_KEY + api_version_env: AWS_BEDROCK_REGION + model: amazon.titan-embed-text-v1 diff --git a/config/config_llm.yaml b/config/config_llm.yaml index 26c8d4745..c9c720ba0 100644 --- a/config/config_llm.yaml +++ b/config/config_llm.yaml @@ -73,3 +73,11 @@ endpoints: models: high: Qwen/Qwen2.5-72B-Instruct low: Qwen/Qwen2.5-Coder-7B-Instruct + + aws_bedrock: + api_key_env: AWS_BEDROCK_API_KEY + api_version_env: AWS_BEDROCK_REGION + llm_type: aws_bedrock + models: + high: amazon.nova-pro-v1:0 + low: amazon.nova-micro-v1:0 From ce4abfd008c114c98990938228f8b1f318d992f4 Mon Sep 17 00:00:00 2001 From: Chelsea Carter Date: Wed, 16 Jul 2025 21:14:14 -0700 Subject: [PATCH 8/8] adding logic to install bedrock requirements from embedding model if called before LLM; additional refactoring changes. --- code/python/core/embedding.py | 63 ++++++++++++++++++- .../aws_bedrock_embedding.py | 19 ++++-- code/python/llm_providers/aws_bedrock.py | 8 +-- config/config_embedding.yaml | 2 +- config/config_retrieval.yaml | 4 +- 5 files changed, 80 insertions(+), 16 deletions(-) diff --git a/code/python/core/embedding.py b/code/python/core/embedding.py index f268ef300..a8d231847 100644 --- a/code/python/core/embedding.py +++ b/code/python/core/embedding.py @@ -11,6 +11,8 @@ from typing import Optional, List import asyncio import threading +import sys +import subprocess from core.config import CONFIG from misc.logger.logging_config_helper import get_configured_logger, LogLevel @@ -26,6 +28,60 @@ "aws_bedrock": threading.Lock() } +# Mapping of embedding provider types to their required pip packages +_embedding_provider_packages = { + "openai": ["openai>=1.12.0"], + "gemini": ["google-cloud-aiplatform>=1.38.0"], + "azure_openai": ["openai>=1.12.0"], + "snowflake": ["httpx>=0.28.1"], + "aws_bedrock": ["boto3>=1.38.15"], +} + +# Cache for installed packages - shared with LLM to avoid duplicate installs +try: + from core.llm import _installed_packages +except ImportError: + _installed_packages = set() + +def _ensure_package_installed(provider: str): + """ + Ensure the required packages for an embedding provider are installed. + + Args: + provider: The name of the embedding provider + """ + if provider not in _embedding_provider_packages: + return + + packages = _embedding_provider_packages[provider] + for package in packages: + # Extract package name without version for caching + package_name = package.split(">=")[0].split("==")[0] + + if package_name in _installed_packages: + continue + + try: + # Try to import the package first + if package_name == "google-cloud-aiplatform": + __import__("vertexai") + else: + __import__(package_name) + _installed_packages.add(package_name) + logger.debug(f"Package {package_name} is already installed") + except ImportError: + # Package not installed, install it + logger.info(f"Installing {package} for {provider} provider...") + try: + subprocess.check_call([ + sys.executable, "-m", "pip", "install", package, "--quiet" + ]) + _installed_packages.add(package_name) + logger.info(f"Successfully installed {package}") + except subprocess.CalledProcessError as e: + logger.error(f"Failed to install {package}: {e}") + raise ValueError(f"Failed to install required package {package} for {provider}") + async def get_embedding( text: str, provider: Optional[str] = None, @@ -86,6 +142,9 @@ async def get_embedding( logger.debug(f"Using embedding model: {model_id}") try: + # Ensure required packages are installed before importing provider modules + _ensure_package_installed(provider) + # Use a timeout wrapper for all embedding calls if provider == "openai": logger.debug("Getting OpenAI embeddings") @@ -261,7 +320,6 @@ async def batch_get_embeddings( if provider == "aws_bedrock": logger.debug("Getting AWS Bedrock batch embeddings") from embedding_providers.aws_bedrock_embedding import get_aws_bedrock_embeddings - results = [] for text in texts: result = get_aws_bedrock_embeddings(text, model=model_id, timeout=timeout) @@ -274,8 +332,7 @@ async def batch_get_embeddings( results = [] for text in texts: embedding = await get_embedding(text, provider, model) - results.append(embedding) - + results.append(embedding) return results except asyncio.TimeoutError: diff --git a/code/python/embedding_providers/aws_bedrock_embedding.py b/code/python/embedding_providers/aws_bedrock_embedding.py index c6209ddd9..aaadc1553 100644 --- a/code/python/embedding_providers/aws_bedrock_embedding.py +++ b/code/python/embedding_providers/aws_bedrock_embedding.py @@ -15,10 +15,10 @@ from botocore.config import Config import boto3 -from config.config import CONFIG +from core.config import CONFIG import threading -from utils.logging_config_helper import get_configured_logger, LogLevel +from misc.logger.logging_config_helper import get_configured_logger, LogLevel logger = get_configured_logger("aws_bedrock_embedding") @@ -79,8 +79,16 @@ def get_runtime_client(timeout: float = 30.0) -> Any: if aws_bedrock_client is None: try: api_key = get_aws_bedrock_api_key() - aws_access_key_id = api_key.split(":")[0] - aws_secret_access_key = api_key.split(":")[1] + + # Validate API key format + parts = api_key.split(":") + if len(parts) != 2: + error_msg = "AWS Bedrock API key must be in format 'access_key_id:secret_access_key'" + logger.error(error_msg) + raise ValueError(error_msg) + + aws_access_key_id = parts[0] + aws_secret_access_key = parts[1] aws_region = get_aws_bedrock_region() aws_bedrock_client = boto3.client( @@ -97,7 +105,6 @@ def get_runtime_client(timeout: float = 30.0) -> Any: return aws_bedrock_client - def get_aws_bedrock_embeddings( text: str, model: Optional[str] = None, timeout: float = 30.0 ) -> List[float]: @@ -119,7 +126,7 @@ def get_aws_bedrock_embeddings( model = provider_config.model else: # Default to a common embedding model - model = "amazon.titan-embed-text-v1" + model = "amazon.titan-embed-text-v2:0" logger.debug(f"Generating AWS Bedrock embedding with model: {model}") logger.debug(f"Text length: {len(text)} chars") diff --git a/code/python/llm_providers/aws_bedrock.py b/code/python/llm_providers/aws_bedrock.py index a1321ea20..97f0bbd05 100644 --- a/code/python/llm_providers/aws_bedrock.py +++ b/code/python/llm_providers/aws_bedrock.py @@ -16,12 +16,12 @@ from botocore.config import Config import boto3 -from config.config import CONFIG +from core.config import CONFIG import threading -from llm.llm_provider import LLMProvider +from llm_providers.llm_provider import LLMProvider -from utils.logging_config_helper import get_configured_logger +from misc.logger.logging_config_helper import get_configured_logger logger = get_configured_logger("llm") @@ -250,7 +250,7 @@ async def get_completion( # Run the synchronous boto3 client in a thread pool executor response = client.invoke_model(modelId=model, body=json.dumps(body)) except ReadTimeoutError: - logger.error("⏰ Read timeout: el modelo tardΓ³ demasiado en responder.") + logger.error("⏰ Read timeout: the model took too long to respond..") return {} except ConnectTimeoutError: logger.error("🚫 Completion request timed out after %s seconds.", timeout) diff --git a/config/config_embedding.yaml b/config/config_embedding.yaml index 3e9f6811e..ebbe99946 100644 --- a/config/config_embedding.yaml +++ b/config/config_embedding.yaml @@ -25,4 +25,4 @@ providers: aws_bedrock: api_key_env: AWS_BEDROCK_API_KEY api_version_env: AWS_BEDROCK_REGION - model: amazon.titan-embed-text-v1 + model: amazon.titan-embed-text-v2:0 diff --git a/config/config_retrieval.yaml b/config/config_retrieval.yaml index 23841429a..eeab3563e 100644 --- a/config/config_retrieval.yaml +++ b/config/config_retrieval.yaml @@ -3,7 +3,7 @@ write_endpoint: qdrant_local endpoints: nlweb_west: - enabled: true + enabled: false api_key_env: NLWEB_WEST_API_KEY api_endpoint_env: NLWEB_WEST_ENDPOINT index_name: embeddings1536 @@ -72,7 +72,7 @@ endpoints: use_knn: false postgres: - enabled: true + enabled: false # Database connection details (i.e. "postgresql://:/?user=&sslmode=require") api_endpoint_env: POSTGRES_CONNECTION_STRING # Password for authentication