NVIDIA · jtcasablanca · Feb 28, 2025 · Mar 1, 2025 · Mar 1, 2025 · Mar 1, 2025
diff --git a/.gitignore b/.gitignore
@@ -54,4 +54,8 @@ cover/
 
 # Workbench Project Layout
 data/scratch/*
-!data/scratch/.gitkeep
+!data/scratch/.gitkeep
+data/chroma.sqlite3
+
+data/*
+!data/.gitkeep
diff --git a/.project/configpacks b/.project/configpacks
@@ -3,10 +3,12 @@
 *cuda.CUDA
 *defaults.EnvVars
 *defaults.Readme
+*defaults.CA
 *defaults.Entrypoint
 *apt.PackageManager
 *bash.PreLanguage
 *python.PipPackageManager
 *bash.PostBuild
 *jupyterlab.JupyterLab
+*vs_code.VSCode
 *tensorboard.Tensorboard
diff --git a/.project/spec.yaml b/.project/spec.yaml
@@ -1,137 +1,139 @@
 specVersion: v2
 specMinorVersion: 2
 meta:
-  name: agentic-rag
-  image: project-agentic-rag
-  description: An example project for advanced RAG using agents
-  labels: []
-  createdOn: "2024-07-15T21:09:46Z"
-  defaultBranch: main
+    name: agentic-rag
+    image: project-agentic-rag
+    description: An example project for advanced RAG using agents
+    labels: []
+    createdOn: "2024-07-15T21:09:46Z"
+    defaultBranch: main
 layout:
-- path: code/
-  type: code
-  storage: git
-- path: models/
-  type: models
-  storage: gitlfs
-- path: data/
-  type: data
-  storage: gitlfs
-- path: data/scratch/
-  type: data
-  storage: gitignore
+    - path: code/
+      type: code
+      storage: git
+    - path: models/
+      type: models
+      storage: gitlfs
+    - path: data/
+      type: data
+      storage: gitignore
 environment:
-  base:
-    registry: nvcr.io
-    image: nvidia/ai-workbench/pytorch:1.0.2
-    build_timestamp: "20231102150513"
-    name: PyTorch
-    supported_architectures: []
-    cuda_version: "12.2"
-    description: A Pytorch 2.1 Base with CUDA 12.2
-    entrypoint_script: ""
-    labels:
-    - cuda12.2
-    - pytorch2.1
-    apps:
-    - name: chat
-      type: custom
-      class: webapp
-      start_command: cd /project/code/ && PROXY_PREFIX=$PROXY_PREFIX python3 -m chatui
-      health_check_command: curl -f "http://localhost:8080/"
-      stop_command: pkill -f "^python3 -m chatui"
-      user_msg: ""
-      logfile_path: ""
-      timeout_seconds: 60
-      icon_url: ""
-      webapp_options:
-        autolaunch: true
-        port: "8080"
-        proxy:
-          trim_prefix: true
-        url: http://localhost:8080/
-    - name: jupyterlab
-      type: jupyterlab
-      class: webapp
-      start_command: jupyter lab --allow-root --port 8888 --ip 0.0.0.0 --no-browser
-        --NotebookApp.base_url=\$PROXY_PREFIX --NotebookApp.default_url=/lab --NotebookApp.allow_origin='*'
-      health_check_command: '[ \$(echo url=\$(jupyter lab list | head -n 2 | tail
-        -n 1 | cut -f1 -d'' '' | grep -v ''Currently'' | sed "s@/?@/lab?@g") | curl
-        -o /dev/null -s -w ''%{http_code}'' --config -) == ''200'' ]'
-      stop_command: jupyter lab stop 8888
-      user_msg: ""
-      logfile_path: ""
-      timeout_seconds: 60
-      icon_url: ""
-      webapp_options:
-        autolaunch: true
-        port: "8888"
-        proxy:
-          trim_prefix: false
-        url_command: jupyter lab list | head -n 2 | tail -n 1 | cut -f1 -d' ' | grep
-          -v 'Currently'
-    - name: tensorboard
-      type: tensorboard
-      class: webapp
-      start_command: tensorboard --logdir \$TENSORBOARD_LOGS_DIRECTORY --path_prefix=\$PROXY_PREFIX
-        --bind_all
-      health_check_command: '[ \$(curl -o /dev/null -s -w ''%{http_code}'' http://localhost:\$TENSORBOARD_PORT\$PROXY_PREFIX/)
-        == ''200'' ]'
-      stop_command: pkill tensorboard
-      user_msg: ""
-      logfile_path: ""
-      timeout_seconds: 60
-      icon_url: ""
-      webapp_options:
-        autolaunch: true
-        port: "6006"
-        proxy:
-          trim_prefix: false
-        url: http://localhost:6006
-    programming_languages:
-    - python3
-    icon_url: ""
-    image_version: 1.0.2
-    os: linux
-    os_distro: ubuntu
-    os_distro_release: "22.04"
-    schema_version: v2
-    user_info:
-      uid: ""
-      gid: ""
-      username: ""
-    package_managers:
-    - name: apt
-      binary_path: /usr/bin/apt
-      installed_packages:
-      - curl
-      - git
-      - git-lfs
-      - vim
-    - name: pip
-      binary_path: /usr/local/bin/pip
-      installed_packages:
-      - jupyterlab==4.0.7
-    package_manager_environment:
-      name: ""
-      target: ""
+    base:
+        registry: nvcr.io
+        image: nvidia/ai-workbench/pytorch:1.0.2
+        build_timestamp: "20231102150513"
+        name: PyTorch
+        supported_architectures: []
+        cuda_version: "12.2"
+        description: A Pytorch 2.1 Base with CUDA 12.2
+        entrypoint_script: ""
+        labels:
+            - cuda12.2
+            - pytorch2.1
+        apps:
+            - name: chat
+              type: custom
+              class: webapp
+              start_command: cd /project/code/ && PROXY_PREFIX=$PROXY_PREFIX python3 -m chatui
+              health_check_command: curl -f "http://localhost:8080/"
+              stop_command: pkill -f "^python3 -m chatui"
+              user_msg: ""
+              logfile_path: ""
+              timeout_seconds: 60
+              icon_url: ""
+              webapp_options:
+                autolaunch: true
+                port: "8080"
+                proxy:
+                    trim_prefix: true
+                url: http://localhost:8080/
+            - name: jupyterlab
+              type: jupyterlab
+              class: webapp
+              start_command: jupyter lab --allow-root --port 8888 --ip 0.0.0.0 --no-browser --NotebookApp.base_url=\$PROXY_PREFIX --NotebookApp.default_url=/lab --NotebookApp.allow_origin='*'
+              health_check_command: '[ \$(echo url=\$(jupyter lab list | head -n 2 | tail -n 1 | cut -f1 -d'' '' | grep -v ''Currently'' | sed "s@/?@/lab?@g") | curl -o /dev/null -s -w ''%{http_code}'' --config -) == ''200'' ]'
+              stop_command: jupyter lab stop 8888
+              user_msg: ""
+              logfile_path: ""
+              timeout_seconds: 60
+              icon_url: ""
+              webapp_options:
+                autolaunch: true
+                port: "8888"
+                proxy:
+                    trim_prefix: false
+                url_command: jupyter lab list | head -n 2 | tail -n 1 | cut -f1 -d' ' | grep -v 'Currently'
+            - name: tensorboard
+              type: tensorboard
+              class: webapp
+              start_command: tensorboard --logdir \$TENSORBOARD_LOGS_DIRECTORY --path_prefix=\$PROXY_PREFIX --bind_all
+              health_check_command: '[ \$(curl -o /dev/null -s -w ''%{http_code}'' http://localhost:\$TENSORBOARD_PORT\$PROXY_PREFIX/) == ''200'' ]'
+              stop_command: pkill tensorboard
+              user_msg: ""
+              logfile_path: ""
+              timeout_seconds: 60
+              icon_url: ""
+              webapp_options:
+                autolaunch: true
+                port: "6006"
+                proxy:
+                    trim_prefix: false
+                url: http://localhost:6006
+        programming_languages:
+            - python3
+        icon_url: ""
+        image_version: 1.0.2
+        os: linux
+        os_distro: ubuntu
+        os_distro_release: "22.04"
+        schema_version: v2
+        user_info:
+            uid: ""
+            gid: ""
+            username: ""
+        package_managers:
+            - name: apt
+              binary_path: /usr/bin/apt
+              installed_packages:
+                - curl
+                - git
+                - git-lfs
+                - vim
+            - name: pip
+              binary_path: /usr/local/bin/pip
+              installed_packages:
+                - jupyterlab==4.0.7
+        package_manager_environment:
+            name: ""
+            target: ""
+    compose_file_path: ""
 execution:
-  apps: []
-  resources:
-    gpu:
-      requested: 1
-    sharedMemoryMB: 1024
-  secrets:
-  - variable: NVIDIA_API_KEY
-    description: NVIDIA API Key for accessing the API catalog
-  - variable: TAVILY_API_KEY
-    description: Tavily Search API Key
-  mounts:
-  - type: project
-    target: /project/
-    description: Project directory
-    options: rw
-  - type: volume
-    target: /data/tensorboard/logs/
-    description: Tensorboard Log Files
-    options: volumeName=tensorboard-logs-volume
+    apps:
+        - name: Visual Studio Code
+          type: vs-code
+          class: native
+          start_command: ""
+          health_check_command: '[ \$(ps aux | grep ".vscode-server" | grep -v grep | wc -l ) -gt 4 ] && [ \$(ps aux | grep "/.vscode-server/bin/.*/node .* net.createConnection" | grep -v grep | wc -l) -gt 0 ]'
+          stop_command: ""
+          user_msg: ""
+          logfile_path: ""
+          timeout_seconds: 120
+          icon_url: ""
+    resources:
+        gpu:
+            requested: 0
+        sharedMemoryMB: 1024
+    secrets:
+        - variable: NVIDIA_API_KEY
+          description: NVIDIA API Key for accessing the API catalog
+        - variable: TAVILY_API_KEY
+          description: Tavily Search API Key
+    mounts:
+        - type: project
+          target: /project/
+          description: Project directory
+          options: rw
+        - type: volume
+          target: /data/tensorboard/logs/
+          description: Tensorboard Log Files
+          options: volumeName=tensorboard-logs-volume