Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
177 changes: 177 additions & 0 deletions backend/workers/ollama_manager.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
"""
Manage Ollama LLM models
"""
import json
import requests

from backend.lib.worker import BasicWorker


class OllamaManager(BasicWorker):
"""
Manage Ollama LLM models

Periodically refreshes the list of available models from an Ollama server.
Can also pull or delete models on demand when queued with a specific task.

Job details:
- task: "refresh" (default), "pull", or "delete"

Job remote_id:
- For refresh: "manage-ollama-refresh" (periodic) or "manage-ollama-manual" (on-demand)
- For pull/delete: the model name to pull or delete
"""
type = "manage-ollama"
max_workers = 1

@classmethod
def ensure_job(cls, config=None):
"""
Ensure the daily refresh job is always scheduled

:return: Job parameters for the worker
"""
return {"remote_id": "manage-ollama-refresh", "interval": 86400}

def work(self):
task = self.job.details.get("task", "refresh") if self.job.details else "refresh"
model_name = self.job.data["remote_id"]

if task == "refresh":
self.refresh_models()
elif task == "pull":
success = self.pull_model(model_name)
if success:
self.refresh_models()
elif task == "delete":
success = self.delete_model(model_name)
if success:
self.refresh_models()
Comment on lines +46 to +49
Copy link

Copilot AI Mar 10, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When a model is successfully deleted from the Ollama server, refresh_models() updates llm.available_models to remove the deleted model, but llm.enabled_models is never cleaned up. This means deleted models accumulate as stale entries in llm.enabled_models. While this doesn't cause an immediate runtime error (since llm_prompter.py intersects the two lists), it's misleading: after a delete-and-refresh cycle, the model would disappear from the available models table in the UI, but it remains in the enabled list. If the model is later re-pulled, it would reappear as already enabled, which could be surprising.

The delete_model() method (or the work() method after a successful delete) should remove the model from llm.enabled_models, or at minimum refresh_models() should reconcile llm.enabled_models to remove entries no longer present in llm.available_models.

Copilot uses AI. Check for mistakes.
else:
self.log.warning(f"OllamaManager: unknown task '{task}'")

self.job.finish()

def _get_llm_headers(self):
"""Build request headers for LLM server auth."""
headers = {"Content-Type": "application/json"}
llm_api_key = self.config.get("llm.api_key", "")
llm_auth_type = self.config.get("llm.auth_type", "")
if llm_api_key and llm_auth_type:
headers[llm_auth_type] = llm_api_key
return headers

def refresh_models(self):
"""
Query the Ollama server for available models and update llm.available_models.
"""
llm_server = self.config.get("llm.server", "")
if not llm_server:
return

headers = self._get_llm_headers()
available_models = {}

try:
response = requests.get(f"{llm_server}/api/tags", headers=headers, timeout=10)
if response.status_code != 200:
self.log.warning(f"OllamaManager: could not refresh model list - server returned {response.status_code}")
return

for model in response.json().get("models", []):
model_id = model["name"]
try:
meta = requests.post(
f"{llm_server}/api/show",
headers=headers,
json={"model": model_id},
timeout=10
).json()
display_name = (
f"{meta['model_info']['general.basename']}"
f" ({meta['details']['parameter_size']} parameters)"
)
except (requests.RequestException, json.JSONDecodeError, KeyError) as e:
self.log.debug(f"OllamaManager: could not get metadata for {model_id} (error: {e}), using name only")
display_name = model_id

available_models[model_id] = {
"name": display_name,
"model_card": f"https://ollama.com/library/{model_id.split(':')[0]}",
"provider": "local"
}

self.config.set("llm.available_models", available_models)
self.log.debug(f"OllamaManager: refreshed model list ({len(available_models)} models)")

# Reconcile enabled models: remove any that are no longer available
enabled_models = self.config.get("llm.enabled_models", [])
reconciled = [m for m in enabled_models if m in available_models]
if len(reconciled) != len(enabled_models):
removed = set(enabled_models) - set(reconciled)
self.log.info(f"OllamaManager: removed stale enabled model(s): {', '.join(removed)}")
self.config.set("llm.enabled_models", reconciled)

except requests.RequestException as e:
self.log.warning(f"OllamaManager: could not refresh model list - request error: {e}")

def pull_model(self, model_name):
"""
Pull a model from the Ollama registry.

:param str model_name: Model name (e.g. "llama3:8b")
:return bool: True on success
"""
llm_server = self.config.get("llm.server", "")
if not llm_server:
self.log.warning("OllamaManager: cannot pull model - no LLM server configured")
return False

headers = self._get_llm_headers()
try:
# stream=False waits for the pull to complete before returning
response = requests.post(
f"{llm_server}/api/pull",
headers=headers,
json={"model": model_name, "stream": False},
timeout=600
)
if response.status_code == 200:
self.log.info(f"OllamaManager: successfully pulled model '{model_name}'")
return True
else:
self.log.warning(f"OllamaManager: could not pull model '{model_name}' - server returned {response.status_code}")
return False
except requests.RequestException as e:
self.log.warning(f"OllamaManager: could not pull model '{model_name}' - request error: {e}")
return False

def delete_model(self, model_name):
"""
Delete a model from the Ollama server.

:param str model_name: Model name (e.g. "llama3:8b")
:return bool: True on success
"""
llm_server = self.config.get("llm.server", "")
if not llm_server:
self.log.warning("OllamaManager: cannot delete model - no LLM server configured")
return False

headers = self._get_llm_headers()
try:
response = requests.delete(
f"{llm_server}/api/delete",
headers=headers,
json={"model": model_name},
timeout=30
)
if response.status_code == 200:
self.log.info(f"OllamaManager: successfully deleted model '{model_name}'")
return True
else:
self.log.warning(f"OllamaManager: could not delete model '{model_name}' - server returned {response.status_code}")
return False
except requests.RequestException as e:
self.log.warning(f"OllamaManager: could not delete model '{model_name}' - request error: {e}")
return False
70 changes: 9 additions & 61 deletions backend/workers/refresh_items.py
Original file line number Diff line number Diff line change
@@ -1,78 +1,26 @@
"""
Refresh items
"""
import json

import requests

from backend.lib.worker import BasicWorker

class ItemUpdater(BasicWorker):
"""
Refresh 4CAT items

Refreshes settings that are dependent on external factors
Refreshes settings that are dependent on external factors.
LLM model refreshing is handled by the OllamaManager worker.
"""
type = "refresh-items"
max_workers = 1

@classmethod
def ensure_job(cls, config=None):
"""
Ensure that the refresher is always running

This is used to ensure that the refresher is always running, and if it is
not, it will be started by the WorkerManager.

:return: Job parameters for the worker
"""
return {"remote_id": "refresh-items", "interval": 60}
# ensure_job is intentionally disabled: this worker currently does nothing
# and would only create unnecessary job queue churn. Re-enable when work()
# has actual tasks to perform.
# @classmethod
# def ensure_job(cls, config=None):
# return {"remote_id": "refresh-items", "interval": 60}

def work(self):
# Refresh items
self.refresh_settings()

# Placeholder – no tasks implemented yet.
self.job.finish()

def refresh_settings(self):
"""
Refresh settings
"""
# LLM server settings
llm_provider = self.config.get("llm.provider_type", "none").lower()
llm_server = self.config.get("llm.server", "")

# For now we only support the Ollama API
if llm_provider == "ollama" and llm_server:
headers = {"Content-Type": "application/json"}
llm_api_key = self.config.get("llm.api_key", "")
llm_auth_type = self.config.get("llm.auth_type", "")
if llm_api_key and llm_auth_type:
headers[llm_auth_type] = llm_api_key

available_models = {}
try:
response = requests.get(f"{llm_server}/api/tags", headers=headers, timeout=10)
if response.status_code == 200:
settings = response.json()
for model in settings.get("models", []):
model = model["name"]
try:
model_metadata = requests.post(f"{llm_server}/api/show", headers=headers, json={"model": model}, timeout=10).json()
available_models[model] = {
"name": f"{model_metadata['model_info']['general.basename']} ({model_metadata['details']['parameter_size']} parameters)",
"model_card": f"https://ollama.com/library/{model}",
"provider": "local"
}

except (requests.RequestException, json.JSONDecodeError, KeyError) as e:
self.log.debug(f"Could not get metadata for model {model} from Ollama - skipping (error: {e})")

self.config.set("llm.available_models", available_models)
self.log.debug("Refreshed LLM server settings cache")
else:
self.log.warning(f"Could not refresh LLM server settings cache - server returned status code {response.status_code}")

except requests.RequestException as e:
self.log.warning(f"Could not refresh LLM server settings cache - request error: {str(e)}")

10 changes: 9 additions & 1 deletion common/lib/config_definition.py
Original file line number Diff line number Diff line change
Expand Up @@ -621,7 +621,15 @@
"type": UserInput.OPTION_TEXT_JSON,
"default": {},
"help": "Available LLM models",
"tooltip": "A JSON dictionary of available LLM models on the server. 4CAT will query the LLM server for available models periodically.",
"tooltip": "A JSON dictionary of available LLM models on the server. Refreshed daily by the OllamaManager worker.",
"indirect": True,
"global": True
},
"llm.enabled_models": {
"type": UserInput.OPTION_TEXT_JSON,
"default": [],
"help": "Enabled LLM models",
"tooltip": "List of model keys enabled for use. Managed via the LLM Server settings panel.",
"indirect": True,
"global": True
},
Expand Down
54 changes: 54 additions & 0 deletions docker-compose_ollama.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Use this file as an override to add a local Ollama instance to your 4CAT stack.
#
# Usage:
# docker compose -f docker-compose.yml -f docker-compose_ollama.yml up -d
#
# Once running, configure 4CAT via the Control Panel → Settings → LLM:
# LLM Provider Type : ollama
# LLM Server URL : http://ollama:11434
#
# GPU support (NVIDIA):
# Uncomment the `deploy.resources` block in the ollama service below and
# ensure the NVIDIA Container Toolkit is installed on your host.
# See: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html
#
# GPU support (Apple Silicon / AMD):
# Pass the appropriate device through your host's Docker settings instead.
# Ollama will automatically detect the GPU when it is available inside the container.

services:
ollama:
image: ollama/ollama:latest
container_name: 4cat_ollama
restart: unless-stopped
volumes:
- 4cat_ollama:/root/.ollama
# Expose the Ollama API on the host for optional external access or
# management with the Ollama CLI. Remove this block if you want to keep
# Ollama accessible only within the Docker network.
ports:
- "127.0.0.1:11434:11434"
healthcheck:
test: ["CMD", "ollama", "ls"]
interval: 10s
timeout: 5s
retries: 5
# --- NVIDIA GPU support (uncomment to enable) ---
# deploy:
# resources:
# reservations:
# devices:
# - driver: nvidia
# count: all
# capabilities: [gpu]

# Make the 4CAT backend wait for Ollama to be healthy before starting.
# This prevents initial model-refresh failures on first boot.
backend:
depends_on:
ollama:
condition: service_healthy

volumes:
4cat_ollama:
name: 4cat_ollama_data
Loading