diff --git a/.env.example b/.env.example index 77bb563..778ad55 100644 --- a/.env.example +++ b/.env.example @@ -1,6 +1,6 @@ OPENAI_API_KEY= -# Update these with your Supabase details from your project settings > API +# Update these with your Supabase details from your project settings > API and dashboard settings PINECONE_API_KEY= PINECONE_ENVIRONMENT= - +PINECONE_INDEX_NAME= diff --git a/README.md b/README.md index e4d7969..93442d5 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ -# GPT-4 & LangChain - Create a ChatGPT Chatbot for Your PDF Docs +# GPT-4 & LangChain - Create a ChatGPT Chatbot for Your PDF Files -Use the new GPT-4 api to build a chatGPT chatbot for Large PDF docs (56 pages used in this example). +Use the new GPT-4 api to build a chatGPT chatbot for multiple Large PDF files. Tech stack used includes LangChain, Pinecone, Typescript, Openai, and Next.js. LangChain is a framework that makes it easier to build scalable AI/LLM apps and chatbots. Pinecone is a vectorstore for storing embeddings and your PDF in text to later retrieve similar docs. @@ -37,28 +37,30 @@ OPENAI_API_KEY= PINECONE_API_KEY= PINECONE_ENVIRONMENT= +PINECONE_INDEX_NAME= + ``` - Visit [openai](https://help.openai.com/en/articles/4936850-where-do-i-find-my-secret-api-key) to retrieve API keys and insert into your `.env` file. -- Visit [pinecone](https://pinecone.io/) to create and retrieve your API keys. +- Visit [pinecone](https://pinecone.io/) to create and retrieve your API keys, and also retrieve your environment and index name from the dashboard. -4. In the `config` folder, replace the `PINECONE_INDEX_NAME` and `PINECONE_NAME_SPACE` with your own details from your pinecone dashboard. +4. In the `config` folder, replace the `PINECONE_NAME_SPACE` with a `namespace` where you'd like to store your embeddings on Pinecone when you run `pnpm run ingest`. This namespace will later be used for queries and retrieval. -5. In `utils/makechain.ts` chain change the `QA_PROMPT` for your own usecase. Change `modelName` in `new OpenAIChat` to a different api model if you don't have access to `gpt-4`. See [the OpenAI docs](https://platform.openai.com/docs/models/model-endpoint-compatibility) for a list of supported `modelName`s. For example you could use `gpt-3.5-turbo` if you do not have access to `gpt-4`, yet. +5. In `utils/makechain.ts` chain change the `QA_PROMPT` for your own usecase. Change `modelName` in `new OpenAIChat` to `gpt-3.5-turbo`, if you don't have access to `gpt-4`. Please verify outside this repo that you have access to `gpt-4`, otherwise the application will not work with it. -## Convert your PDF to embeddings +## Convert your PDF files to embeddings -1. In `docs` folder replace the pdf with your own pdf doc. +**This repo can load multiple PDF files** -2. In `scripts/ingest-data.ts` replace `filePath` with `docs/{yourdocname}.pdf` +1. Inside `docs` folder, add your pdf files or folders that contain pdf files. -3. Run the script `npm run ingest` to 'ingest' and embed your docs +2. Run the script `npm run ingest` to 'ingest' and embed your docs. If you run into errors troubleshoot below. -4. Check Pinecone dashboard to verify your namespace and vectors have been added. +3. Check Pinecone dashboard to verify your namespace and vectors have been added. ## Run the app -Once you've verified that the embeddings and content have been successfully added to your Pinecone, you can run the app `npm run dev` to launch the local dev environment and then type a question in the chat interface. +Once you've verified that the embeddings and content have been successfully added to your Pinecone, you can run the app `pnpm run dev` to launch the local dev environment, and then type a question in the chat interface. ## Troubleshooting @@ -68,17 +70,18 @@ In general, keep an eye out in the `issues` and `discussions` section of this re - Make sure you're running the latest Node version. Run `node -v` - Make sure you're using the same versions of LangChain and Pinecone as this repo. -- Check that you've created an `.env` file that contains your valid (and working) API keys. +- Check that you've created an `.env` file that contains your valid (and working) API keys, environment and index name. - If you change `modelName` in `OpenAIChat` note that the correct name of the alternative model is `gpt-3.5-turbo` -- Pinecone indexes of users on the Starter(free) plan are deleted after 7 days of inactivity. To prevent this, send an API request to Pinecone to reset the counter. +- Make sure you have access to `gpt-4` if you decide to use. Test your openAI keys outside the repo and make sure it works and that you have enough API credits. +- Your pdf file is corrupted and cannot be parsed. **Pinecone errors** -- Make sure your pinecone dashboard `environment` and `index` matches the one in your `config` folder. +- Make sure your pinecone dashboard `environment` and `index` matches the one in the `pinecone.ts` and `.env` files. - Check that you've set the vector dimensions to `1536`. -- Switch your Environment in pinecone to `us-east1-gcp` if the other environment is causing issues. - -If you're stuck after trying all these steps, delete `node_modules`, restart your computer, then `pnpm install` again. +- Make sure your pinecone namespace is in lowercase. +- Pinecone indexes of users on the Starter(free) plan are deleted after 7 days of inactivity. To prevent this, send an API request to Pinecone to reset the counter. +- Retry from scratch with a new Pinecone index and cloned repo. ## Credit diff --git a/components/layout.tsx b/components/layout.tsx index 4481b4d..5e3d207 100644 --- a/components/layout.tsx +++ b/components/layout.tsx @@ -14,7 +14,7 @@ export default function Layout({ children }: LayoutProps) { -
+
{children}
diff --git a/config/pinecone.ts b/config/pinecone.ts index f1851c8..ce2dada 100644 --- a/config/pinecone.ts +++ b/config/pinecone.ts @@ -1,8 +1,12 @@ /** - * Change the index and namespace to your own + * Change the namespace to the namespace on Pinecone you'd like to store your embeddings. */ -const PINECONE_INDEX_NAME = 'langchainjsfundamentals'; +if (!process.env.PINECONE_INDEX_NAME) { + throw new Error('Missing Pinecone index name in .env file'); +} + +const PINECONE_INDEX_NAME = process.env.PINECONE_INDEX_NAME ?? ''; const PINECONE_NAME_SPACE = 'pdf-test'; //namespace is optional for your vectors diff --git a/declarations/pdf-parse.d.ts b/declarations/pdf-parse.d.ts new file mode 100644 index 0000000..5b2ab50 --- /dev/null +++ b/declarations/pdf-parse.d.ts @@ -0,0 +1,5 @@ +declare module 'pdf-parse/lib/pdf-parse.js' { + import pdf from 'pdf-parse'; + + export default pdf; +} diff --git a/docs/finance/turingfinance.pdf b/docs/finance/turingfinance.pdf new file mode 100644 index 0000000..6a9cf33 Binary files /dev/null and b/docs/finance/turingfinance.pdf differ diff --git a/docs/MorseVsFrederick.pdf b/docs/law/MorseVsFrederick.pdf similarity index 100% rename from docs/MorseVsFrederick.pdf rename to docs/law/MorseVsFrederick.pdf diff --git a/package.json b/package.json index 8d10f96..99d74b4 100644 --- a/package.json +++ b/package.json @@ -2,6 +2,9 @@ "name": "gpt4-langchain-pdf-chatbot", "version": "0.1.0", "private": true, + "engines": { + "node": ">=18" + }, "license": "MIT", "author": "Mayooear", "type": "module", @@ -20,7 +23,7 @@ "@radix-ui/react-accordion": "^1.1.1", "clsx": "^1.2.1", "dotenv": "^16.0.3", - "langchain": "0.0.33", + "langchain": "0.0.41", "lucide-react": "^0.125.0", "next": "13.2.3", "pdf-parse": "1.1.1", @@ -43,6 +46,9 @@ "tsx": "^3.12.3", "typescript": "^4.9.5" }, + "engines": { + "node": ">=18" + }, "keywords": [ "starter", "gpt4", diff --git a/pages/api/chat.ts b/pages/api/chat.ts index a964644..82f97bb 100644 --- a/pages/api/chat.ts +++ b/pages/api/chat.ts @@ -21,10 +21,12 @@ export default async function handler( /* create vectorstore*/ const vectorStore = await PineconeStore.fromExistingIndex( - index, new OpenAIEmbeddings({}), - 'text', - PINECONE_NAME_SPACE, //optional + { + pineconeIndex: index, + textKey: 'text', + namespace: PINECONE_NAME_SPACE, + }, ); res.writeHead(200, { diff --git a/pages/index.tsx b/pages/index.tsx index 03a182a..93cea1e 100644 --- a/pages/index.tsx +++ b/pages/index.tsx @@ -1,4 +1,4 @@ -import { useRef, useState, useEffect, useMemo } from 'react'; +import { useRef, useState, useEffect, useMemo, useCallback } from 'react'; import Layout from '@/components/layout'; import styles from '@/styles/Home.module.css'; import { Message } from '@/types/chat'; @@ -18,6 +18,7 @@ export default function Home() { const [query, setQuery] = useState(''); const [loading, setLoading] = useState(false); const [sourceDocs, setSourceDocs] = useState([]); + const [error, setError] = useState(null); const [messageState, setMessageState] = useState<{ messages: Message[]; pending?: string; @@ -36,8 +37,6 @@ export default function Home() { const { messages, pending, history, pendingSourceDocs } = messageState; - console.log('messageState', messageState); - const messageListRef = useRef(null); const textAreaRef = useRef(null); @@ -49,6 +48,8 @@ export default function Home() { async function handleSubmit(e: any) { e.preventDefault(); + setError(null); + if (!query) { alert('Please input a question'); return; @@ -120,18 +121,22 @@ export default function Home() { }); } catch (error) { setLoading(false); + setError('An error occurred while fetching the data. Please try again.'); console.log('error', error); } } //prevent empty submissions - const handleEnter = (e: any) => { - if (e.key === 'Enter' && query) { - handleSubmit(e); - } else if (e.key == 'Enter') { - e.preventDefault(); - } - }; + const handleEnter = useCallback( + (e: any) => { + if (e.key === 'Enter' && query) { + handleSubmit(e); + } else if (e.key == 'Enter') { + e.preventDefault(); + } + }, + [query], + ); const chatMessages = useMemo(() => { return [ @@ -148,6 +153,13 @@ export default function Home() { ]; }, [messages, pending, pendingSourceDocs]); + //scroll to bottom of chat + useEffect(() => { + if (messageListRef.current) { + messageListRef.current.scrollTop = messageListRef.current.scrollHeight; + } + }, [chatMessages]); + return ( <> @@ -201,14 +213,17 @@ export default function Home() {
{message.sourceDocs && ( -
+
{message.sourceDocs.map((doc, index) => ( -
+

Source {index + 1}

@@ -234,7 +249,7 @@ export default function Home() {
{sourceDocs.map((doc, index) => ( -
+

Source {index + 1}

@@ -296,9 +311,14 @@ export default function Home() {
+ {error && ( +
+

{error}

+
+ )}
-