From 6c27094fc658f42cb6745afb220d842c633ba04f Mon Sep 17 00:00:00 2001 From: Dale Wahl Date: Thu, 5 Mar 2026 15:43:58 +0100 Subject: [PATCH 1/8] add ollama to docker-compose --- docker-compose_ollama.yml | 54 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 docker-compose_ollama.yml diff --git a/docker-compose_ollama.yml b/docker-compose_ollama.yml new file mode 100644 index 000000000..020b12c96 --- /dev/null +++ b/docker-compose_ollama.yml @@ -0,0 +1,54 @@ +# Use this file as an override to add a local Ollama instance to your 4CAT stack. +# +# Usage: +# docker compose -f docker-compose.yml -f docker-compose_ollama.yml up -d +# +# Once running, configure 4CAT via the Control Panel → Settings → LLM: +# LLM Provider Type : ollama +# LLM Server URL : http://ollama:11434 +# +# GPU support (NVIDIA): +# Uncomment the `deploy.resources` block in the ollama service below and +# ensure the NVIDIA Container Toolkit is installed on your host. +# See: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html +# +# GPU support (Apple Silicon / AMD): +# Pass the appropriate device through your host's Docker settings instead. +# Ollama will automatically detect the GPU when it is available inside the container. + +services: + ollama: + image: ollama/ollama:latest + container_name: 4cat_ollama + restart: unless-stopped + volumes: + - 4cat_ollama:/root/.ollama + # Expose the Ollama API on the host for optional external access or + # management with the Ollama CLI. Remove this block if you want to keep + # Ollama accessible only within the Docker network. + ports: + - "127.0.0.1:11434:11434" + healthcheck: + test: ["CMD", "ollama", "ls"] + interval: 10s + timeout: 5s + retries: 5 + # --- NVIDIA GPU support (uncomment to enable) --- + # deploy: + # resources: + # reservations: + # devices: + # - driver: nvidia + # count: all + # capabilities: [gpu] + + # Make the 4CAT backend wait for Ollama to be healthy before starting. + # This prevents initial model-refresh failures on first boot. + backend: + depends_on: + ollama: + condition: service_healthy + +volumes: + 4cat_ollama: + name: 4cat_ollama_data From 8a8427cd5a1bfc686189498b29ccba5d7c9a59d6 Mon Sep 17 00:00:00 2001 From: Dale Wahl Date: Thu, 5 Mar 2026 15:44:39 +0100 Subject: [PATCH 2/8] give me a proper worker who can do neat stuff. --- backend/workers/ollama_manager.py | 171 ++++++++++++++++++++++++++++++ backend/workers/refresh_items.py | 55 +--------- 2 files changed, 173 insertions(+), 53 deletions(-) create mode 100644 backend/workers/ollama_manager.py diff --git a/backend/workers/ollama_manager.py b/backend/workers/ollama_manager.py new file mode 100644 index 000000000..d990b8b14 --- /dev/null +++ b/backend/workers/ollama_manager.py @@ -0,0 +1,171 @@ +""" +Manage Ollama LLM models +""" +import json +import time + +import requests + +from backend.lib.worker import BasicWorker + + +class OllamaManager(BasicWorker): + """ + Manage Ollama LLM models + + Periodically refreshes the list of available models from an Ollama server. + Can also pull or delete models on demand when queued with a specific task. + + Job details: + - task: "refresh" (default), "pull", or "delete" + + Job remote_id: + - For refresh: "manage-ollama-refresh" (periodic) or "manage-ollama-manual" (on-demand) + - For pull/delete: the model name to pull or delete + """ + type = "manage-ollama" + max_workers = 1 + + @classmethod + def ensure_job(cls, config=None): + """ + Ensure the daily refresh job is always scheduled + + :return: Job parameters for the worker + """ + return {"remote_id": "manage-ollama-refresh", "interval": 86400} + + def work(self): + task = self.job.details.get("task", "refresh") if self.job.details else "refresh" + model_name = self.job.data["remote_id"] + + if task == "refresh": + self.refresh_models() + elif task == "pull": + success = self.pull_model(model_name) + if success: + self.refresh_models() + elif task == "delete": + success = self.delete_model(model_name) + if success: + self.refresh_models() + else: + self.log.warning(f"OllamaManager: unknown task '{task}'") + + self.job.finish() + + def _get_llm_headers(self): + """Build request headers for LLM server auth.""" + headers = {"Content-Type": "application/json"} + llm_api_key = self.config.get("llm.api_key", "") + llm_auth_type = self.config.get("llm.auth_type", "") + if llm_api_key and llm_auth_type: + headers[llm_auth_type] = llm_api_key + return headers + + def refresh_models(self): + """ + Query the Ollama server for available models and update llm.available_models. + """ + llm_server = self.config.get("llm.server", "") + if not llm_server: + return + + headers = self._get_llm_headers() + available_models = {} + + try: + response = requests.get(f"{llm_server}/api/tags", headers=headers, timeout=10) + if response.status_code != 200: + self.log.warning(f"OllamaManager: could not refresh model list - server returned {response.status_code}") + return + + for model in response.json().get("models", []): + model_id = model["name"] + try: + meta = requests.post( + f"{llm_server}/api/show", + headers=headers, + json={"model": model_id}, + timeout=10 + ).json() + display_name = ( + f"{meta['model_info']['general.basename']}" + f" ({meta['details']['parameter_size']} parameters)" + ) + except (requests.RequestException, json.JSONDecodeError, KeyError) as e: + self.log.debug(f"OllamaManager: could not get metadata for {model_id} (error: {e}), using name only") + display_name = model_id + + available_models[model_id] = { + "name": display_name, + "model_card": f"https://ollama.com/library/{model_id.split(':')[0]}", + "provider": "local" + } + + self.config.set("llm.available_models", available_models) + self.log.debug(f"OllamaManager: refreshed model list ({len(available_models)} models)") + + except requests.RequestException as e: + self.log.warning(f"OllamaManager: could not refresh model list - request error: {e}") + + def pull_model(self, model_name): + """ + Pull a model from the Ollama registry. + + :param str model_name: Model name (e.g. "llama3:8b") + :return bool: True on success + """ + llm_server = self.config.get("llm.server", "") + if not llm_server: + self.log.warning("OllamaManager: cannot pull model - no LLM server configured") + return False + + headers = self._get_llm_headers() + try: + # stream=False waits for the pull to complete before returning + response = requests.post( + f"{llm_server}/api/pull", + headers=headers, + json={"model": model_name, "stream": False}, + timeout=600 + ) + if response.status_code == 200: + self.log.info(f"OllamaManager: successfully pulled model '{model_name}'") + return True + else: + self.log.warning(f"OllamaManager: could not pull model '{model_name}' - server returned {response.status_code}") + return False + except requests.RequestException as e: + self.log.warning(f"OllamaManager: could not pull model '{model_name}' - request error: {e}") + return False + + def delete_model(self, model_name): + """ + Delete a model from the Ollama server. + + :param str model_name: Model name (e.g. "llama3:8b") + :return bool: True on success + """ + llm_server = self.config.get("llm.server", "") + if not llm_server: + self.log.warning("OllamaManager: cannot delete model - no LLM server configured") + return False + + headers = self._get_llm_headers() + try: + response = requests.delete( + f"{llm_server}/api/delete", + headers=headers, + json={"model": model_name}, + timeout=30 + ) + if response.status_code == 200: + self.log.info(f"OllamaManager: successfully deleted model '{model_name}'") + return True + else: + self.log.warning(f"OllamaManager: could not delete model '{model_name}' - server returned {response.status_code}") + return False + except requests.RequestException as e: + self.log.warning(f"OllamaManager: could not delete model '{model_name}' - request error: {e}") + return False diff --git a/backend/workers/refresh_items.py b/backend/workers/refresh_items.py index 28eb73637..96a7da6b0 100644 --- a/backend/workers/refresh_items.py +++ b/backend/workers/refresh_items.py @@ -1,17 +1,14 @@ """ Refresh items """ -import json - -import requests - from backend.lib.worker import BasicWorker class ItemUpdater(BasicWorker): """ Refresh 4CAT items - Refreshes settings that are dependent on external factors + Refreshes settings that are dependent on external factors. + LLM model refreshing is handled by the OllamaManager worker. """ type = "refresh-items" max_workers = 1 @@ -21,58 +18,10 @@ def ensure_job(cls, config=None): """ Ensure that the refresher is always running - This is used to ensure that the refresher is always running, and if it is - not, it will be started by the WorkerManager. - :return: Job parameters for the worker """ return {"remote_id": "refresh-items", "interval": 60} def work(self): - # Refresh items - self.refresh_settings() - self.job.finish() - - def refresh_settings(self): - """ - Refresh settings - """ - # LLM server settings - llm_provider = self.config.get("llm.provider_type", "none").lower() - llm_server = self.config.get("llm.server", "") - - # For now we only support the Ollama API - if llm_provider == "ollama" and llm_server: - headers = {"Content-Type": "application/json"} - llm_api_key = self.config.get("llm.api_key", "") - llm_auth_type = self.config.get("llm.auth_type", "") - if llm_api_key and llm_auth_type: - headers[llm_auth_type] = llm_api_key - - available_models = {} - try: - response = requests.get(f"{llm_server}/api/tags", headers=headers, timeout=10) - if response.status_code == 200: - settings = response.json() - for model in settings.get("models", []): - model = model["name"] - try: - model_metadata = requests.post(f"{llm_server}/api/show", headers=headers, json={"model": model}, timeout=10).json() - available_models[model] = { - "name": f"{model_metadata['model_info']['general.basename']} ({model_metadata['details']['parameter_size']} parameters)", - "model_card": f"https://ollama.com/library/{model}", - "provider": "local" - } - - except (requests.RequestException, json.JSONDecodeError, KeyError) as e: - self.log.debug(f"Could not get metadata for model {model} from Ollama - skipping (error: {e})") - - self.config.set("llm.available_models", available_models) - self.log.debug("Refreshed LLM server settings cache") - else: - self.log.warning(f"Could not refresh LLM server settings cache - server returned status code {response.status_code}") - - except requests.RequestException as e: - self.log.warning(f"Could not refresh LLM server settings cache - request error: {str(e)}") \ No newline at end of file From 89824e2d475e2ac043c2f198efb31db54bcaa743 Mon Sep 17 00:00:00 2001 From: Dale Wahl Date: Thu, 5 Mar 2026 15:53:21 +0100 Subject: [PATCH 3/8] ruff you mean --- backend/workers/ollama_manager.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/backend/workers/ollama_manager.py b/backend/workers/ollama_manager.py index d990b8b14..64808f52d 100644 --- a/backend/workers/ollama_manager.py +++ b/backend/workers/ollama_manager.py @@ -2,8 +2,6 @@ Manage Ollama LLM models """ import json -import time - import requests from backend.lib.worker import BasicWorker From e7aa9af35831ef719b5ae45939b94dba5c28fc17 Mon Sep 17 00:00:00 2001 From: Dale Wahl Date: Thu, 5 Mar 2026 16:36:00 +0100 Subject: [PATCH 4/8] add docker setup if ollama present --- docker/docker_setup.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/docker/docker_setup.py b/docker/docker_setup.py index 450684602..aea641c12 100644 --- a/docker/docker_setup.py +++ b/docker/docker_setup.py @@ -207,6 +207,35 @@ def _format_host(host: str) -> str: f"docker exec 4cat_backend python -c \"from common.config_manager import ConfigManager;config=ConfigManager();config.with_db();config.set('flask.server_name', '{formatted_host}:{public_port}');config.db.commit();\"" ) + # If an Ollama container is available on the Docker network, configure 4CAT to use it. + ollama_url = 'http://ollama:11434' + try: + import requests + try: + resp = requests.get(f"{ollama_url}/api/tags", timeout=2) + if resp.status_code == 200: + current_llm_server = config.get("llm.server") + if current_llm_server == ollama_url: + print("Ollama server already configured in 4CAT settings.") + elif current_llm_server and current_llm_server != ollama_url: + # Previously configured LLM server is different; log a warning but do not overwrite user settings + print(f"Warning: Detected Ollama server at {ollama_url} but llm.server is set to {current_llm_server}. To use the Ollama server, update the llm.server setting to {ollama_url} in the 4CAT Control Panel.") + else: + # set basic LLM settings so the initial admin user does not need to + # configure them manually for local development environments that + # include the Ollama sidecar. + config.set('llm.provider_type', 'ollama') + config.set('llm.server', ollama_url) + config.set('llm.access', True) + config.db.commit() + print('Detected Ollama on Docker network; configured LLM settings to use it.') + except requests.RequestException: + # Ollama not available; do nothing + pass + except Exception: + # requests other error; skip automatic Ollama configuration + pass + print(f"\nStarting app\n" f"4CAT is accessible at:\n" f"{'https' if config.get('flask.https', False) else 'http'}://{config.get('flask.server_name')}\n") From 74e01b6263425f161e109d4a55e71bcacd659ee2 Mon Sep 17 00:00:00 2001 From: Dale Wahl Date: Thu, 5 Mar 2026 16:36:33 +0100 Subject: [PATCH 5/8] a useful frontend setting panel --- common/lib/config_definition.py | 10 +- webtool/__init__.py | 2 + webtool/templates/controlpanel/layout.html | 2 + .../templates/controlpanel/llm-server.html | 147 ++++++++++++++++++ webtool/views/views_llm.py | 104 +++++++++++++ 5 files changed, 264 insertions(+), 1 deletion(-) create mode 100644 webtool/templates/controlpanel/llm-server.html create mode 100644 webtool/views/views_llm.py diff --git a/common/lib/config_definition.py b/common/lib/config_definition.py index aef363e04..daf20020d 100644 --- a/common/lib/config_definition.py +++ b/common/lib/config_definition.py @@ -621,7 +621,15 @@ "type": UserInput.OPTION_TEXT_JSON, "default": {}, "help": "Available LLM models", - "tooltip": "A JSON dictionary of available LLM models on the server. 4CAT will query the LLM server for available models periodically.", + "tooltip": "A JSON dictionary of available LLM models on the server. Refreshed daily by the OllamaManager worker.", + "indirect": True, + "global": True + }, + "llm.enabled_models": { + "type": UserInput.OPTION_TEXT_JSON, + "default": [], + "help": "Enabled LLM models", + "tooltip": "List of model keys enabled for use. Managed via the LLM Server settings panel.", "indirect": True, "global": True }, diff --git a/webtool/__init__.py b/webtool/__init__.py index 54ac2072c..e807b7ae0 100644 --- a/webtool/__init__.py +++ b/webtool/__init__.py @@ -171,6 +171,7 @@ def time_this(func): import webtool.views.views_restart # noqa: E402 import webtool.views.views_admin # noqa: E402 import webtool.views.views_extensions # noqa: E402 + import webtool.views.views_llm # noqa: E402 import webtool.views.views_user # noqa: E402 import webtool.views.views_dataset # noqa: E402 import webtool.views.views_misc # noqa: E402 @@ -181,6 +182,7 @@ def time_this(func): app.register_blueprint(webtool.views.views_restart.component) app.register_blueprint(webtool.views.views_admin.component) app.register_blueprint(webtool.views.views_extensions.component) + app.register_blueprint(webtool.views.views_llm.component) app.register_blueprint(webtool.views.views_user.component) app.register_blueprint(webtool.views.views_dataset.component) app.register_blueprint(webtool.views.views_misc.component) diff --git a/webtool/templates/controlpanel/layout.html b/webtool/templates/controlpanel/layout.html index b0b0bc0b4..98d87220a 100644 --- a/webtool/templates/controlpanel/layout.html +++ b/webtool/templates/controlpanel/layout.html @@ -18,6 +18,8 @@ Jobs{% endif %} {% if __user_config("privileges.admin.can_restart") %} Extensions{% endif %} + {% if __user_config("privileges.admin.can_manage_settings") and __user_config("llm.access") %} + LLM Server{% endif %} {% if __user_config("privileges.admin.can_manage_users") %} View logs{% endif %} {% if __user_config("privileges.admin.can_manipulate_all_datasets") %} diff --git a/webtool/templates/controlpanel/llm-server.html b/webtool/templates/controlpanel/llm-server.html new file mode 100644 index 000000000..d8a7dbd53 --- /dev/null +++ b/webtool/templates/controlpanel/llm-server.html @@ -0,0 +1,147 @@ +{% extends "controlpanel/layout.html" %} + +{% block title %}LLM Server{% endblock %} +{% block body_class %}plain-page admin {{ body_class }}{% endblock %} +{% block subbreadcrumbs %}{% set navigation.sub = "llm" %}{% endblock %} + +{% block body %} +
+
+

LLM Server

+ + {% if flashes %} +
+ {% for notice in flashes %} +

{{ notice|safe }}

+ {% endfor %} +
+ {% endif %} + + {# Server status #} +
+ + + + + + + + + + + + + +
SettingValue
Server URL{% if llm_server %}{{ llm_server }}{% else %}Not configured{% endif %}
Status + {% if server_status == "online" %} + Online + {% elif server_status == "not configured" %} + Not configured + {% else %} + {{ server_status }} + {% endif %} +
+
+ + {# Available models #} +

Available Models +
+ + +
+

+ +
+ + + + + + + + + + + + + + {% if available_models %} + {% for model_id, model in available_models.items() %} + + + + + + + {% endfor %} + {% else %} + + + + {% endif %} +
ModelDisplay nameStatusActions
+ {{ model_id }} + + {% if model.model_card %} + {{ model.name }} + {% else %} + {{ model.name }} + {% endif %} + + {% if model_id in enabled_models %} + Enabled + {% else %} + Disabled + {% endif %} + + {% if model_id in enabled_models %} +
+ + + +
+ {% else %} +
+ + + +
+ {% endif %} +
+ + + +
+
+ {% if llm_server %} + No models found. Use the Refresh button to fetch available models, or pull a new model below. + {% else %} + Configure the LLM server URL in Settings first. + {% endif %} +
+
+
+ + {# Pull a new model #} + {% if llm_server %} +
+

Pull Model

+

Enter an Ollama model name (e.g. llama3:8b) to pull it from the + Ollama library. + Pulling large models may take several minutes; the job runs in the background.

+
+ +
+ + +
+
+ +
+
+
+ {% endif %} +
+{% endblock %} diff --git a/webtool/views/views_llm.py b/webtool/views/views_llm.py new file mode 100644 index 000000000..41f9df472 --- /dev/null +++ b/webtool/views/views_llm.py @@ -0,0 +1,104 @@ +""" +4CAT views for LLM server management +""" +import time + +import requests + +from flask import Blueprint, render_template, flash, get_flashed_messages, redirect, url_for, request, g +from flask_login import login_required + +from webtool.lib.helpers import setting_required, error + +component = Blueprint("llm", __name__) + + +@component.route("/admin/llm/", methods=["GET", "POST"]) +@login_required +@setting_required("privileges.admin.can_manage_settings") +def llm_panel(): + """ + LLM Server management panel + + Shows server status, available models, and controls to pull/delete/refresh + models. Pull, delete, and refresh operations are queued as OllamaManager + jobs rather than run synchronously. + """ + if not g.config.get("llm.access"): + return error(403, message="LLM access is not enabled on this server.") + + if request.method == "POST": + action = request.form.get("action", "").strip() + + if action == "refresh": + # Queue a one-time manual refresh job; use a timestamp-based remote_id + # so it is always accepted even if a periodic job already exists. + g.queue.add_job("manage-ollama", details={"task": "refresh"}, + remote_id=f"manage-ollama-manual-{int(time.time())}") + flash("Model refresh job queued.") + + elif action == "pull": + model_name = request.form.get("model_name", "").strip() + if model_name: + g.queue.add_job("manage-ollama", details={"task": "pull"}, remote_id=model_name) + flash(f"Pull job queued for model '{model_name}'.") + else: + flash("Please provide a model name to pull.") + + elif action == "delete": + model_name = request.form.get("model_name", "").strip() + if model_name: + g.queue.add_job("manage-ollama", details={"task": "delete"}, remote_id=model_name) + flash(f"Delete job queued for model '{model_name}'.") + + elif action == "enable": + model_name = request.form.get("model_name", "").strip() + if model_name: + enabled_models = list(g.config.get("llm.enabled_models", []) or []) + if model_name not in enabled_models: + enabled_models.append(model_name) + g.config.set("llm.enabled_models", enabled_models) + flash(f"Model '{model_name}' enabled.") + + elif action == "disable": + model_name = request.form.get("model_name", "").strip() + if model_name: + enabled_models = list(g.config.get("llm.enabled_models", []) or []) + if model_name in enabled_models: + enabled_models.remove(model_name) + g.config.set("llm.enabled_models", enabled_models) + flash(f"Model '{model_name}' disabled.") + + return redirect(url_for("llm.llm_panel")) + + # --- GET: render panel --- + + llm_server = g.config.get("llm.server", "") + server_status = "not configured" + + if llm_server: + headers = {"Content-Type": "application/json"} + llm_api_key = g.config.get("llm.api_key", "") + llm_auth_type = g.config.get("llm.auth_type", "") + if llm_api_key and llm_auth_type: + headers[llm_auth_type] = llm_api_key + + try: + resp = requests.get(f"{llm_server}/api/tags", headers=headers, timeout=5) + server_status = "online" if resp.status_code == 200 else f"error (HTTP {resp.status_code})" + except requests.Timeout: + server_status = "unreachable (timeout)" + except requests.RequestException as e: + server_status = f"unreachable ({e})" + + available_models = g.config.get("llm.available_models", {}) or {} + enabled_models = list(g.config.get("llm.enabled_models", []) or []) + + return render_template( + "controlpanel/llm-server.html", + flashes=get_flashed_messages(), + llm_server=llm_server, + server_status=server_status, + available_models=available_models, + enabled_models=enabled_models, + ) From baec03a75257a8780796add212a9ce3826671dd6 Mon Sep 17 00:00:00 2001 From: Dale Wahl Date: Thu, 5 Mar 2026 16:50:36 +0100 Subject: [PATCH 6/8] only show enabled models --- processors/machine_learning/llm_prompter.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/processors/machine_learning/llm_prompter.py b/processors/machine_learning/llm_prompter.py index 57d8ee496..959ff8f88 100644 --- a/processors/machine_learning/llm_prompter.py +++ b/processors/machine_learning/llm_prompter.py @@ -66,9 +66,15 @@ def get_queue_id(cls, remote_id, details, dataset) -> str: def get_options(cls, parent_dataset=None, config=None) -> dict: # Check if 4CAT wide LLM server is available if config.get("llm.access", False) and config.get("llm.server", ""): - shared_llm_name = config.get("llm.host_name", "4CAT LLM Server") - shared_llm_models = {model: model_metadata.get("name") for model, model_metadata in config.get("llm.available_models", {}).items()} - shared_llm_default = list(shared_llm_models.keys())[0] if shared_llm_models else "" + # Check some models enabled + shared_llm_enabled_models = config.get("llm.enabled_models", []) + shared_llm_models = {model: model_metadata.get("name") for model, model_metadata in config.get("llm.available_models", {}).items() if model in shared_llm_enabled_models} + if not shared_llm_models: + shared_llm_name = False + shared_llm_default = "" + else: + shared_llm_name = config.get("llm.host_name", "4CAT LLM Server") + shared_llm_default = list(shared_llm_models.keys())[0] if shared_llm_models else "" else: shared_llm_name = False shared_llm_default = "" From 36fe0ed734184e1012033cb8469c2d96a9ca14a3 Mon Sep 17 00:00:00 2001 From: Dale Wahl Date: Thu, 5 Mar 2026 16:51:50 +0100 Subject: [PATCH 7/8] update docker readme so people can use ollama --- docker/README.md | 60 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/docker/README.md b/docker/README.md index 00f0862fc..de5dd318b 100644 --- a/docker/README.md +++ b/docker/README.md @@ -42,3 +42,63 @@ https://github.com/docker/buildx/issues/426 https://stackoverflow.com/questions/64221861/failed-to-resolve-with-frontend-dockerfile-v0 4. More errors coming soon! (No doubt) + +--- + +## Running a local Ollama instance alongside 4CAT + +4CAT can use a local [Ollama](https://ollama.com) server for LLM-powered processors. +A Docker Compose override file (`docker-compose_ollama.yml`) is included to add +Ollama as a sidecar service so you do not need to run it separately on the host. + +### Quick start + +```bash +docker compose -f docker-compose.yml -f docker-compose_ollama.yml up -d +``` + +This starts the standard 4CAT stack plus an `ollama` container that is only +accessible within the Docker network (and optionally on `localhost:11434` on +the host via the exposed port). + +### Configuring 4CAT to use Ollama + +1. Log in as admin and open **Control Panel → Settings**. +2. Set the following LLM fields: + + | Setting | Value | + |---|---| + | LLM Provider Type | `ollama` | + | LLM Server URL | `http://ollama:11434` | + | LLM Access | enabled | + +3. Save settings. +4. Open **Control Panel → LLM Server** (visible once *LLM Access* is enabled). +5. Use the **Refresh** button to load available models, then **Pull** a model + (e.g. `llama3.2:3b`) to download it from the Ollama library. +6. Enable the models you want to make available to users. + +### GPU support (NVIDIA) + +Uncomment the `deploy.resources` block in `docker-compose_ollama.yml` and +ensure the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) +is installed on your host. Then restart the stack with the override: + +```bash +docker compose -f docker-compose.yml -f docker-compose_ollama.yml up -d +``` + +### Persisting models + +Models downloaded by Ollama are stored in the `4cat_ollama_data` Docker volume. +They survive container restarts and re-creations unless you explicitly remove +the volume (`docker volume rm 4cat_ollama_data`). + +### Using an external Ollama server + +If you already run Ollama on the host or elsewhere, skip the override file and +point 4CAT directly at that server: + +- **On the same host**: use `http://host.docker.internal:11434` as the LLM Server URL. +- **Remote server**: use the server's reachable URL and configure any required + API key in the *LLM Server API Key* and *LLM Server Authentication Type* settings. From eb4b49a98126002100be6d16a0f879572bb1469f Mon Sep 17 00:00:00 2001 From: Copilot <198982749+Copilot@users.noreply.github.com> Date: Tue, 10 Mar 2026 12:56:32 +0100 Subject: [PATCH 8/8] Cleanup: stale enabled models, refresh_items scheduling, README auto-config docs (#581) * Initial plan * Fix stale enabled models, disable refresh_items scheduling, update README docs Co-authored-by: dale-wahl <32108944+dale-wahl@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: dale-wahl <32108944+dale-wahl@users.noreply.github.com> --- backend/workers/ollama_manager.py | 8 ++++++++ backend/workers/refresh_items.py | 15 +++++++-------- docker/README.md | 15 ++++++++++++++- 3 files changed, 29 insertions(+), 9 deletions(-) diff --git a/backend/workers/ollama_manager.py b/backend/workers/ollama_manager.py index 64808f52d..3c6950b59 100644 --- a/backend/workers/ollama_manager.py +++ b/backend/workers/ollama_manager.py @@ -104,6 +104,14 @@ def refresh_models(self): self.config.set("llm.available_models", available_models) self.log.debug(f"OllamaManager: refreshed model list ({len(available_models)} models)") + # Reconcile enabled models: remove any that are no longer available + enabled_models = self.config.get("llm.enabled_models", []) + reconciled = [m for m in enabled_models if m in available_models] + if len(reconciled) != len(enabled_models): + removed = set(enabled_models) - set(reconciled) + self.log.info(f"OllamaManager: removed stale enabled model(s): {', '.join(removed)}") + self.config.set("llm.enabled_models", reconciled) + except requests.RequestException as e: self.log.warning(f"OllamaManager: could not refresh model list - request error: {e}") diff --git a/backend/workers/refresh_items.py b/backend/workers/refresh_items.py index 96a7da6b0..7ab11645d 100644 --- a/backend/workers/refresh_items.py +++ b/backend/workers/refresh_items.py @@ -13,15 +13,14 @@ class ItemUpdater(BasicWorker): type = "refresh-items" max_workers = 1 - @classmethod - def ensure_job(cls, config=None): - """ - Ensure that the refresher is always running - - :return: Job parameters for the worker - """ - return {"remote_id": "refresh-items", "interval": 60} + # ensure_job is intentionally disabled: this worker currently does nothing + # and would only create unnecessary job queue churn. Re-enable when work() + # has actual tasks to perform. + # @classmethod + # def ensure_job(cls, config=None): + # return {"remote_id": "refresh-items", "interval": 60} def work(self): + # Placeholder – no tasks implemented yet. self.job.finish() \ No newline at end of file diff --git a/docker/README.md b/docker/README.md index de5dd318b..31843b2ce 100644 --- a/docker/README.md +++ b/docker/README.md @@ -63,8 +63,17 @@ the host via the exposed port). ### Configuring 4CAT to use Ollama +#### Automatic configuration (fresh Docker install with sidecar) + +When you start 4CAT for the first time using the Ollama override file, the +`docker_setup.py` initialisation script automatically detects the `ollama` +sidecar and sets **LLM Provider Type**, **LLM Server URL**, and **LLM Access** +for you. You can skip to step 2 below. + +#### Manual configuration (or to verify/change settings) + 1. Log in as admin and open **Control Panel → Settings**. -2. Set the following LLM fields: +2. Confirm or set the following LLM fields: | Setting | Value | |---|---| @@ -102,3 +111,7 @@ point 4CAT directly at that server: - **On the same host**: use `http://host.docker.internal:11434` as the LLM Server URL. - **Remote server**: use the server's reachable URL and configure any required API key in the *LLM Server API Key* and *LLM Server Authentication Type* settings. + +In both cases, configure the LLM settings manually via **Control Panel → Settings** +(see *Manual configuration* above), using the appropriate server URL instead of +`http://ollama:11434`.