aws-samples · avelizf · Nov 15, 2024
diff --git a/gen-ai/Bedrock/04-idp-genai-advanced-rag.ipynb b/gen-ai/Bedrock/04-idp-genai-advanced-rag.ipynb
@@ -125,16 +125,18 @@
     "!pip install inflect\n",
     "!pip install requests-aws4auth\n",
     "!pip install opensearch-py\n",
-    "!pip install anthropic"
+    "!pip install anthropic\n",
+    "!pip install openpyxl "
    ]
   },
   {
    "cell_type": "markdown",
    "id": "ff63129e-f3c9-41ea-bf48-3d2608a2531a",
    "metadata": {},
    "source": [
-    "Restart the Kernel \\\n",
-    "Click **kernel** on the top bar and **Restart Kernel**. Continue with the cells below."
+    "> ⚠️ Restart the Kernel \\\n",
+    "> \n",
+    "> Click **kernel** on the top bar and **Restart Kernel**. Continue with the cells below."
    ]
   },
   {
@@ -169,7 +171,6 @@
     "from collections import OrderedDict\n",
     "import boto3\n",
     "import time\n",
-    "import a_opensearch_utilities_\n",
     "import sagemaker\n",
     "import openpyxl\n",
     "from openpyxl.cell import Cell\n",
@@ -184,7 +185,7 @@
     ")\n",
     "from anthropic import Anthropic\n",
     "client = Anthropic()\n",
-    "bedrock_runtime = boto3.client(service_name='bedrock-runtime',region_name='us-east-1',config=config)"
+    "bedrock_runtime = boto3.client(service_name='bedrock-runtime',region_name='us-west-2',config=config)"
    ]
   },
   {
@@ -264,12 +265,7 @@
     "                    {\n",
     "                        'Resource': ['index/' + vector_store_name + '/*'],\n",
     "                        'Permission': [\n",
-    "                            'aoss:CreateIndex',\n",
-    "                            'aoss:DeleteIndex',\n",
-    "                            'aoss:UpdateIndex',\n",
-    "                            'aoss:DescribeIndex',\n",
-    "                            'aoss:ReadDocument',\n",
-    "                            'aoss:WriteDocument'],\n",
+    "                            'aoss:*'],\n",
     "                        'ResourceType': 'index'\n",
     "                    }],\n",
     "                'Principal': [identity],\n",
@@ -450,7 +446,7 @@
    "outputs": [],
    "source": [
     "BUCKET= sagemaker.Session().default_bucket()\n",
-    "extractor = Textractor(region_name=\"us-east-1\")\n",
+    "extractor = Textractor(region_name=\"us-west-2\")\n",
     "file=\"amazon-2024-10k.pdf\"\n",
     "doc_id= os.path.basename(file)\n",
     "file_name, ext = os.path.splitext(file)\n",
@@ -1261,7 +1257,6 @@
   },
   {
    "cell_type": "markdown",
-   "id": "6b5f93e9-4813-4b4e-a040-87e2370ec64b",
    "metadata": {},
    "source": [
     "\n",
@@ -1284,9 +1279,7 @@
     "\n",
     "**Note:** Certain chunks may exceed the threshold set for chunking in the previous cells due to the way tables are chunked by row and section paragraph sizes. This might result in a token limit exceed error for certain embedding models.\n",
     "\n",
-    "Ensure to replace the **domain_endpoint** variable with the Amazon OpenSearch Service domain (2.11 and higher) or Serverless collection you created in your account.\n",
-    "\n",
-    "If using Amazon Opensearch Serverless, change the `openserach_serverless` to True."
+    "Using **host** variable in **domain_endpoint** will ensure it takes your built OpenSearch Service domain/Serverless endpoint id. If not following the steps, please update the variable with yours domain/endpoint id."
    ]
   },
   {
@@ -1305,11 +1298,11 @@
     "This script demonstrates indexing documents into an Amazon OpenSearch Serverless domain using AWS Identity and Access Management (IAM) for authentication.\n",
     "\"\"\"\n",
     "service = 'aoss'\n",
-    "# replace wit your OpenSearch Service domain/Serverless endpoint\n",
-    "domain_endpoint = host\n",
+    "# Using host will use your OpenSearch Service domain/Serverless endpoint id\n",
+    "domain_endpoint = host \n",
     "\n",
     "credentials = boto3.Session().get_credentials()\n",
-    "awsauth =  AWSV4SignerAuth(credentials, \"us-east-1\", service)\n",
+    "awsauth =  AWSV4SignerAuth(credentials, \"us-west-2\", service)\n",
     "os_ = OpenSearch(\n",
     "    hosts = [{'host': domain_endpoint, 'port': 443}],\n",
     "    http_auth = awsauth,\n",
@@ -1672,7 +1665,7 @@
    "source": [
     "from opensearchpy import Transport\n",
     "credentials = boto3.Session().get_credentials()\n",
-    "awsauth = AWS4Auth(credentials.access_key, credentials.secret_key, \"us-east-1\", service, session_token=credentials.token)\n",
+    "awsauth = AWS4Auth(credentials.access_key, credentials.secret_key, \"us-west-2\", service, session_token=credentials.token)\n",
     "transport = Transport(\n",
     "   hosts = [{'host': domain_endpoint, 'port': 443}],\n",
     "    http_auth = awsauth,\n",
@@ -1960,20 +1953,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 78,
+   "execution_count": null,
    "id": "edc0c8ab-5cf5-4200-829f-08c576db2d45",
    "metadata": {
     "tags": []
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      " Size of prompt token is 2938\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "csv_seperator=\"|\"\n",
     "prompt_template=f\"\"\"You are a helpful, obedient and truthful financial assistance.\n",
@@ -1998,9 +1983,7 @@
     "Question: {question}\n",
     "if able to answer:\n",
     "    Include in your response before your answer:    \n",
-    "    <source>document or additional info tag(s) containing the relevant info</source>\"\"\"\n",
-    "\n",
-    "print(f' Size of prompt token is {client.count_tokens(prompt_template)}')"
+    "    <source>document or additional info tag(s) containing the relevant info</source>\"\"\"\n"
    ]
   },
   {