From 9f7a1e81c0ac73f36141217298f81ba9347a110f Mon Sep 17 00:00:00 2001 From: Alejandro Veliz Fernandez Date: Fri, 15 Nov 2024 11:52:20 +0100 Subject: [PATCH] Fix notebook existing bugs. Fixes include (1) update regions to us-west-2, (2) remove 'a_opensearch_utilities_' library, (3) added 'openpyxl', (4) Remove legacy client.count_tokens() call from Anthropic. --- .../Bedrock/04-idp-genai-advanced-rag.ipynb | 49 ++++++------------- 1 file changed, 16 insertions(+), 33 deletions(-) diff --git a/gen-ai/Bedrock/04-idp-genai-advanced-rag.ipynb b/gen-ai/Bedrock/04-idp-genai-advanced-rag.ipynb index 82f2982..2afdad3 100644 --- a/gen-ai/Bedrock/04-idp-genai-advanced-rag.ipynb +++ b/gen-ai/Bedrock/04-idp-genai-advanced-rag.ipynb @@ -125,7 +125,8 @@ "!pip install inflect\n", "!pip install requests-aws4auth\n", "!pip install opensearch-py\n", - "!pip install anthropic" + "!pip install anthropic\n", + "!pip install openpyxl " ] }, { @@ -133,8 +134,9 @@ "id": "ff63129e-f3c9-41ea-bf48-3d2608a2531a", "metadata": {}, "source": [ - "Restart the Kernel \\\n", - "Click **kernel** on the top bar and **Restart Kernel**. Continue with the cells below." + "> ⚠️ Restart the Kernel \\\n", + "> \n", + "> Click **kernel** on the top bar and **Restart Kernel**. Continue with the cells below." ] }, { @@ -169,7 +171,6 @@ "from collections import OrderedDict\n", "import boto3\n", "import time\n", - "import a_opensearch_utilities_\n", "import sagemaker\n", "import openpyxl\n", "from openpyxl.cell import Cell\n", @@ -184,7 +185,7 @@ ")\n", "from anthropic import Anthropic\n", "client = Anthropic()\n", - "bedrock_runtime = boto3.client(service_name='bedrock-runtime',region_name='us-east-1',config=config)" + "bedrock_runtime = boto3.client(service_name='bedrock-runtime',region_name='us-west-2',config=config)" ] }, { @@ -264,12 +265,7 @@ " {\n", " 'Resource': ['index/' + vector_store_name + '/*'],\n", " 'Permission': [\n", - " 'aoss:CreateIndex',\n", - " 'aoss:DeleteIndex',\n", - " 'aoss:UpdateIndex',\n", - " 'aoss:DescribeIndex',\n", - " 'aoss:ReadDocument',\n", - " 'aoss:WriteDocument'],\n", + " 'aoss:*'],\n", " 'ResourceType': 'index'\n", " }],\n", " 'Principal': [identity],\n", @@ -450,7 +446,7 @@ "outputs": [], "source": [ "BUCKET= sagemaker.Session().default_bucket()\n", - "extractor = Textractor(region_name=\"us-east-1\")\n", + "extractor = Textractor(region_name=\"us-west-2\")\n", "file=\"amazon-2024-10k.pdf\"\n", "doc_id= os.path.basename(file)\n", "file_name, ext = os.path.splitext(file)\n", @@ -1261,7 +1257,6 @@ }, { "cell_type": "markdown", - "id": "6b5f93e9-4813-4b4e-a040-87e2370ec64b", "metadata": {}, "source": [ "\n", @@ -1284,9 +1279,7 @@ "\n", "**Note:** Certain chunks may exceed the threshold set for chunking in the previous cells due to the way tables are chunked by row and section paragraph sizes. This might result in a token limit exceed error for certain embedding models.\n", "\n", - "Ensure to replace the **domain_endpoint** variable with the Amazon OpenSearch Service domain (2.11 and higher) or Serverless collection you created in your account.\n", - "\n", - "If using Amazon Opensearch Serverless, change the `openserach_serverless` to True." + "Using **host** variable in **domain_endpoint** will ensure it takes your built OpenSearch Service domain/Serverless endpoint id. If not following the steps, please update the variable with yours domain/endpoint id." ] }, { @@ -1305,11 +1298,11 @@ "This script demonstrates indexing documents into an Amazon OpenSearch Serverless domain using AWS Identity and Access Management (IAM) for authentication.\n", "\"\"\"\n", "service = 'aoss'\n", - "# replace wit your OpenSearch Service domain/Serverless endpoint\n", - "domain_endpoint = host\n", + "# Using host will use your OpenSearch Service domain/Serverless endpoint id\n", + "domain_endpoint = host \n", "\n", "credentials = boto3.Session().get_credentials()\n", - "awsauth = AWSV4SignerAuth(credentials, \"us-east-1\", service)\n", + "awsauth = AWSV4SignerAuth(credentials, \"us-west-2\", service)\n", "os_ = OpenSearch(\n", " hosts = [{'host': domain_endpoint, 'port': 443}],\n", " http_auth = awsauth,\n", @@ -1672,7 +1665,7 @@ "source": [ "from opensearchpy import Transport\n", "credentials = boto3.Session().get_credentials()\n", - "awsauth = AWS4Auth(credentials.access_key, credentials.secret_key, \"us-east-1\", service, session_token=credentials.token)\n", + "awsauth = AWS4Auth(credentials.access_key, credentials.secret_key, \"us-west-2\", service, session_token=credentials.token)\n", "transport = Transport(\n", " hosts = [{'host': domain_endpoint, 'port': 443}],\n", " http_auth = awsauth,\n", @@ -1960,20 +1953,12 @@ }, { "cell_type": "code", - "execution_count": 78, + "execution_count": null, "id": "edc0c8ab-5cf5-4200-829f-08c576db2d45", "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Size of prompt token is 2938\n" - ] - } - ], + "outputs": [], "source": [ "csv_seperator=\"|\"\n", "prompt_template=f\"\"\"You are a helpful, obedient and truthful financial assistance.\n", @@ -1998,9 +1983,7 @@ "Question: {question}\n", "if able to answer:\n", " Include in your response before your answer: \n", - " document or additional info tag(s) containing the relevant info\"\"\"\n", - "\n", - "print(f' Size of prompt token is {client.count_tokens(prompt_template)}')" + " document or additional info tag(s) containing the relevant info\"\"\"\n" ] }, {