Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 16 additions & 33 deletions gen-ai/Bedrock/04-idp-genai-advanced-rag.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -125,16 +125,18 @@
"!pip install inflect\n",
"!pip install requests-aws4auth\n",
"!pip install opensearch-py\n",
"!pip install anthropic"
"!pip install anthropic\n",
"!pip install openpyxl "
]
},
{
"cell_type": "markdown",
"id": "ff63129e-f3c9-41ea-bf48-3d2608a2531a",
"metadata": {},
"source": [
"Restart the Kernel \\\n",
"Click **kernel** on the top bar and **Restart Kernel**. Continue with the cells below."
"> ⚠️ Restart the Kernel \\\n",
"> \n",
"> Click **kernel** on the top bar and **Restart Kernel**. Continue with the cells below."
]
},
{
Expand Down Expand Up @@ -169,7 +171,6 @@
"from collections import OrderedDict\n",
"import boto3\n",
"import time\n",
"import a_opensearch_utilities_\n",
"import sagemaker\n",
"import openpyxl\n",
"from openpyxl.cell import Cell\n",
Expand All @@ -184,7 +185,7 @@
")\n",
"from anthropic import Anthropic\n",
"client = Anthropic()\n",
"bedrock_runtime = boto3.client(service_name='bedrock-runtime',region_name='us-east-1',config=config)"
"bedrock_runtime = boto3.client(service_name='bedrock-runtime',region_name='us-west-2',config=config)"
]
},
{
Expand Down Expand Up @@ -264,12 +265,7 @@
" {\n",
" 'Resource': ['index/' + vector_store_name + '/*'],\n",
" 'Permission': [\n",
" 'aoss:CreateIndex',\n",
" 'aoss:DeleteIndex',\n",
" 'aoss:UpdateIndex',\n",
" 'aoss:DescribeIndex',\n",
" 'aoss:ReadDocument',\n",
" 'aoss:WriteDocument'],\n",
" 'aoss:*'],\n",
" 'ResourceType': 'index'\n",
" }],\n",
" 'Principal': [identity],\n",
Expand Down Expand Up @@ -450,7 +446,7 @@
"outputs": [],
"source": [
"BUCKET= sagemaker.Session().default_bucket()\n",
"extractor = Textractor(region_name=\"us-east-1\")\n",
"extractor = Textractor(region_name=\"us-west-2\")\n",
"file=\"amazon-2024-10k.pdf\"\n",
"doc_id= os.path.basename(file)\n",
"file_name, ext = os.path.splitext(file)\n",
Expand Down Expand Up @@ -1261,7 +1257,6 @@
},
{
"cell_type": "markdown",
"id": "6b5f93e9-4813-4b4e-a040-87e2370ec64b",
"metadata": {},
"source": [
"\n",
Expand All @@ -1284,9 +1279,7 @@
"\n",
"**Note:** Certain chunks may exceed the threshold set for chunking in the previous cells due to the way tables are chunked by row and section paragraph sizes. This might result in a token limit exceed error for certain embedding models.\n",
"\n",
"Ensure to replace the **domain_endpoint** variable with the Amazon OpenSearch Service domain (2.11 and higher) or Serverless collection you created in your account.\n",
"\n",
"If using Amazon Opensearch Serverless, change the `openserach_serverless` to True."
"Using **host** variable in **domain_endpoint** will ensure it takes your built OpenSearch Service domain/Serverless endpoint id. If not following the steps, please update the variable with yours domain/endpoint id."
]
},
{
Expand All @@ -1305,11 +1298,11 @@
"This script demonstrates indexing documents into an Amazon OpenSearch Serverless domain using AWS Identity and Access Management (IAM) for authentication.\n",
"\"\"\"\n",
"service = 'aoss'\n",
"# replace wit your OpenSearch Service domain/Serverless endpoint\n",
"domain_endpoint = host\n",
"# Using host will use your OpenSearch Service domain/Serverless endpoint id\n",
"domain_endpoint = host \n",
"\n",
"credentials = boto3.Session().get_credentials()\n",
"awsauth = AWSV4SignerAuth(credentials, \"us-east-1\", service)\n",
"awsauth = AWSV4SignerAuth(credentials, \"us-west-2\", service)\n",
"os_ = OpenSearch(\n",
" hosts = [{'host': domain_endpoint, 'port': 443}],\n",
" http_auth = awsauth,\n",
Expand Down Expand Up @@ -1672,7 +1665,7 @@
"source": [
"from opensearchpy import Transport\n",
"credentials = boto3.Session().get_credentials()\n",
"awsauth = AWS4Auth(credentials.access_key, credentials.secret_key, \"us-east-1\", service, session_token=credentials.token)\n",
"awsauth = AWS4Auth(credentials.access_key, credentials.secret_key, \"us-west-2\", service, session_token=credentials.token)\n",
"transport = Transport(\n",
" hosts = [{'host': domain_endpoint, 'port': 443}],\n",
" http_auth = awsauth,\n",
Expand Down Expand Up @@ -1960,20 +1953,12 @@
},
{
"cell_type": "code",
"execution_count": 78,
"execution_count": null,
"id": "edc0c8ab-5cf5-4200-829f-08c576db2d45",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Size of prompt token is 2938\n"
]
}
],
"outputs": [],
"source": [
"csv_seperator=\"|\"\n",
"prompt_template=f\"\"\"You are a helpful, obedient and truthful financial assistance.\n",
Expand All @@ -1998,9 +1983,7 @@
"Question: {question}\n",
"if able to answer:\n",
" Include in your response before your answer: \n",
" <source>document or additional info tag(s) containing the relevant info</source>\"\"\"\n",
"\n",
"print(f' Size of prompt token is {client.count_tokens(prompt_template)}')"
" <source>document or additional info tag(s) containing the relevant info</source>\"\"\"\n"
]
},
{
Expand Down