From 70d0d0e67a7f79d5c28979062395be5034af676d Mon Sep 17 00:00:00 2001 From: ElNandes Date: Wed, 14 Aug 2024 13:19:08 +0200 Subject: [PATCH 1/2] Adding new images/dockerfile with dependencies for basic rag implementaitons --- .../cuda12-pytorch-2.2.2-RAG/Dockerfile | 31 +++++++++++++++++++ .../cuda12-pytorch-2.2.2-RAG/chroma_fix.sh | 26 ++++++++++++++++ .../cuda12-pytorch-2.2.2-RAG/requirements.txt | 13 ++++++++ 3 files changed, 70 insertions(+) create mode 100644 nlp-notebook/cuda12-pytorch-2.2.2-RAG/Dockerfile create mode 100755 nlp-notebook/cuda12-pytorch-2.2.2-RAG/chroma_fix.sh create mode 100644 nlp-notebook/cuda12-pytorch-2.2.2-RAG/requirements.txt diff --git a/nlp-notebook/cuda12-pytorch-2.2.2-RAG/Dockerfile b/nlp-notebook/cuda12-pytorch-2.2.2-RAG/Dockerfile new file mode 100644 index 0000000..8e3967f --- /dev/null +++ b/nlp-notebook/cuda12-pytorch-2.2.2-RAG/Dockerfile @@ -0,0 +1,31 @@ +ARG BASE_IMAGE=quay.io/a2s-institute/ml-notebook:cuda12-pytorch-2.2.2 + +FROM $BASE_IMAGE + +LABEL maintainer="Mohammad Wasil " + +USER $NB_USER + +COPY requirements.txt /tmp/requirements.txt +COPY chroma_fix.sh /tmp/chroma_fix.sh +RUN pip install --upgrade pip && \ + pip install --no-cache-dir -r /tmp/requirements.txt + + + +USER root + +RUN curl -fsSL https://ollama.com/install.sh | sh + +RUN chmod +x /tmp/chroma_fix.sh && /tmp/chroma_fix.sh + +# clean up +RUN rm /tmp/requirements.txt && \ + /opt/conda/bin/conda clean -afy && \ + find /opt/conda/ -follow -type f -name '*.a' -delete && \ + find /opt/conda/ -follow -type f -name '*.pyc' -delete && \ + find /opt/conda/ -follow -type f -name '*.js.map' -delete + +USER $NB_USER + +CMD ["ollama","serve"] \ No newline at end of file diff --git a/nlp-notebook/cuda12-pytorch-2.2.2-RAG/chroma_fix.sh b/nlp-notebook/cuda12-pytorch-2.2.2-RAG/chroma_fix.sh new file mode 100755 index 0000000..309bea9 --- /dev/null +++ b/nlp-notebook/cuda12-pytorch-2.2.2-RAG/chroma_fix.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +# File you want to prepend lines to +TARGET_FILE="../../home/vscode/.local/lib/python3.12/site-packages/chromadb/__init__.py" + +# Temporary file to store the existing content +TEMP_FILE=$(mktemp) + +# Check if the file exists +if [ -f "$TARGET_FILE" ]; then + # Write the new lines to the temporary file + echo "__import__('pysqlite3')" > "$TEMP_FILE" + echo "import sys" >> "$TEMP_FILE" + echo "sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')" >> "$TEMP_FILE" + + # Append the original content to the temporary file + cat "$TARGET_FILE" >> "$TEMP_FILE" + + # Move the temporary file to the original file + mv "$TEMP_FILE" "$TARGET_FILE" + + echo "Lines have been prepended to $TARGET_FILE" +else + echo "File not found!" + rm "$TEMP_FILE" +fi \ No newline at end of file diff --git a/nlp-notebook/cuda12-pytorch-2.2.2-RAG/requirements.txt b/nlp-notebook/cuda12-pytorch-2.2.2-RAG/requirements.txt new file mode 100644 index 0000000..1051758 --- /dev/null +++ b/nlp-notebook/cuda12-pytorch-2.2.2-RAG/requirements.txt @@ -0,0 +1,13 @@ +llama-index-core==0.10.63 +llama-index-embeddings-huggingface==0.2.2 +llama-index-llms-ollama==0.2.2 +llama-index-readers-file==0.1.32 +llama-index-vector-stores-chroma==0.1.10 +chromadb==0.5.3 +pysqlite3-binary==0.5.3 +PyMuPDF==1.24.9 +spacy +nltk +gensim +wikipedia-api +llama-cpp-python From d63a28b2c7a6c03ebe0e7de1451a91e168794ebf Mon Sep 17 00:00:00 2001 From: ElNandes Date: Wed, 14 Aug 2024 13:21:57 +0200 Subject: [PATCH 2/2] minor fix for chromadb issue --- nlp-notebook/cuda12-pytorch-2.2.2-RAG/chroma_fix.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nlp-notebook/cuda12-pytorch-2.2.2-RAG/chroma_fix.sh b/nlp-notebook/cuda12-pytorch-2.2.2-RAG/chroma_fix.sh index 309bea9..71fe3b4 100755 --- a/nlp-notebook/cuda12-pytorch-2.2.2-RAG/chroma_fix.sh +++ b/nlp-notebook/cuda12-pytorch-2.2.2-RAG/chroma_fix.sh @@ -1,7 +1,7 @@ #!/bin/bash # File you want to prepend lines to -TARGET_FILE="../../home/vscode/.local/lib/python3.12/site-packages/chromadb/__init__.py" +TARGET_FILE="./opt/conda/lib/python3.11/site-packages/chromadb/__init__.py" # Temporary file to store the existing content TEMP_FILE=$(mktemp)