Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 56 additions & 26 deletions unraid_mcp/tools/health.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,36 @@

from fastmcp import FastMCP

from ..config.logging import logger
from ..config.settings import UNRAID_API_URL, UNRAID_MCP_HOST, UNRAID_MCP_PORT, UNRAID_MCP_TRANSPORT
from ..core.client import make_graphql_request
from unraid_mcp.config.logging import logger
from unraid_mcp.config.settings import (
UNRAID_API_URL,
UNRAID_MCP_HOST,
UNRAID_MCP_PORT,
UNRAID_MCP_TRANSPORT,
)
from unraid_mcp.core.client import make_graphql_request

_PROCESS_START_TIME = time.time()

# Performance thresholds for health assessment
API_LATENCY_WARNING_MS = 5000
API_LATENCY_DEGRADED_MS = 10000

# Severity ranking for health status aggregation
_SEVERITY_RANK = {
"healthy": 0,
"warning": 1,
"degraded": 2,
"unhealthy": 3,
"critical": 3,
} # critical reserved for future use


def _update_health_status(current: str, new: str) -> str:
"""Update status only if the new status is more severe."""
if _SEVERITY_RANK.get(new, 0) > _SEVERITY_RANK.get(current, 0):
return new
return current


def register_health_tools(mcp: FastMCP) -> None:
Expand All @@ -27,6 +54,7 @@ def register_health_tools(mcp: FastMCP) -> None:
async def health_check() -> dict[str, Any]:
"""Returns comprehensive health status of the Unraid MCP server and system for monitoring purposes."""
start_time = time.time()
process_start_time = _PROCESS_START_TIME # snapshot for consistent timing
health_status = "healthy"
issues = []

Expand All @@ -37,7 +65,7 @@ async def health_check() -> dict[str, Any]:
info {
machineId
time
versions { unraid }
versions { core { unraid } }
os { uptime }
}
array {
Expand All @@ -64,16 +92,16 @@ async def health_check() -> dict[str, Any]:
# Base health info
health_info = {
"status": health_status,
"timestamp": datetime.datetime.utcnow().isoformat(),
"timestamp": datetime.datetime.now(datetime.timezone.utc).isoformat(),
"api_latency_ms": api_latency,
"server": {
"name": "Unraid MCP Server",
"version": "0.1.0",
"transport": UNRAID_MCP_TRANSPORT,
"host": UNRAID_MCP_HOST,
"port": UNRAID_MCP_PORT,
"process_uptime_seconds": time.time() - start_time # Rough estimate
}
"process_uptime_seconds": round(time.time() - process_start_time, 2),
},
}

if not response_data:
Expand All @@ -91,11 +119,11 @@ async def health_check() -> dict[str, Any]:
"url": UNRAID_API_URL,
"machine_id": info.get("machineId"),
"time": info.get("time"),
"version": info.get("versions", {}).get("unraid"),
"uptime": info.get("os", {}).get("uptime")
"version": info.get("versions", {}).get("core", {}).get("unraid"),
"uptime": info.get("os", {}).get("uptime"),
}
else:
health_status = "degraded"
health_status = _update_health_status(health_status, "degraded")
issues.append("Unable to retrieve system info")

# Array health analysis
Expand All @@ -104,13 +132,13 @@ async def health_check() -> dict[str, Any]:
array_state = array_info.get("state", "unknown")
health_info["array_status"] = {
"state": array_state,
"healthy": array_state in ["STARTED", "STOPPED"]
"healthy": array_state in ["STARTED", "STOPPED"],
}
if array_state not in ["STARTED", "STOPPED"]:
health_status = "warning"
health_status = _update_health_status(health_status, "warning")
issues.append(f"Array in unexpected state: {array_state}")
else:
health_status = "warning"
health_status = _update_health_status(health_status, "warning")
issues.append("Unable to retrieve array status")

# Notifications analysis
Expand All @@ -125,11 +153,11 @@ async def health_check() -> dict[str, Any]:
"unread_total": total_unread,
"unread_alerts": alert_count,
"unread_warnings": warning_count,
"has_critical_notifications": alert_count > 0
"has_critical_notifications": alert_count > 0,
}

if alert_count > 0:
health_status = "warning"
health_status = _update_health_status(health_status, "warning")
issues.append(f"{alert_count} unread alert notification(s)")

# Docker services analysis
Expand All @@ -143,16 +171,18 @@ async def health_check() -> dict[str, Any]:
"total_containers": len(containers),
"running_containers": len(running_containers),
"stopped_containers": len(stopped_containers),
"containers_healthy": len([c for c in containers if c.get("status", "").startswith("Up")])
"containers_healthy": len(
[c for c in containers if c.get("status", "").startswith("Up")]
),
}

# API performance assessment
if api_latency > 5000: # > 5 seconds
health_status = "warning"
issues.append(f"High API latency: {api_latency}ms")
elif api_latency > 10000: # > 10 seconds
health_status = "degraded"
if api_latency > API_LATENCY_DEGRADED_MS: # > 10 seconds
health_status = _update_health_status(health_status, "degraded")
issues.append(f"Very high API latency: {api_latency}ms")
elif api_latency > API_LATENCY_WARNING_MS: # > 5 seconds
health_status = _update_health_status(health_status, "warning")
issues.append(f"High API latency: {api_latency}ms")

# Final status determination
health_info["status"] = health_status
Expand All @@ -162,7 +192,7 @@ async def health_check() -> dict[str, Any]:
# Add performance metrics
health_info["performance"] = {
"api_response_time_ms": api_latency,
"health_check_duration_ms": round((time.time() - start_time) * 1000, 2)
"health_check_duration_ms": round((time.time() - start_time) * 1000, 2),
}

return health_info
Expand All @@ -171,16 +201,16 @@ async def health_check() -> dict[str, Any]:
logger.error(f"Health check failed: {e}")
return {
"status": "unhealthy",
"timestamp": datetime.datetime.utcnow().isoformat(),
"timestamp": datetime.datetime.now(datetime.timezone.utc).isoformat(),
"error": str(e),
"api_latency_ms": round((time.time() - start_time) * 1000, 2) if 'start_time' in locals() else None,
"api_latency_ms": round((time.time() - start_time) * 1000, 2),
"server": {
"name": "Unraid MCP Server",
"version": "0.1.0",
"transport": UNRAID_MCP_TRANSPORT,
"host": UNRAID_MCP_HOST,
"port": UNRAID_MCP_PORT
}
"port": UNRAID_MCP_PORT,
},
}

logger.info("Health tools registered successfully")
72 changes: 41 additions & 31 deletions unraid_mcp/tools/rclone.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,22 @@ async def list_rclone_remotes() -> list[dict[str, Any]]:

response_data = await make_graphql_request(query)

if "rclone" in response_data and "remotes" in response_data["rclone"]:
remotes = response_data["rclone"]["remotes"]
logger.info(f"Retrieved {len(remotes)} RClone remotes")
return list(remotes) if isinstance(remotes, list) else []
rclone_data = response_data.get("rclone")
# Handle case where rclone field is missing or None
if not rclone_data:
return []

return []
remotes = rclone_data.get("remotes")
# Handle case where remotes field is missing or None
if remotes is None:
return []

logger.info(f"Retrieved {len(remotes)} RClone remotes")
return list(remotes) if isinstance(remotes, list) else []

except Exception as e:
if isinstance(e, ToolError):
raise e
logger.error(f"Failed to list RClone remotes: {str(e)}")
raise ToolError(f"Failed to list RClone remotes: {str(e)}") from e

Expand All @@ -74,23 +82,34 @@ async def get_rclone_config_form(provider_type: str | None = None) -> dict[str,

variables = {}
if provider_type:
variables["formOptions"] = {"providerType": provider_type}
# Sanitize provider type to prevent potential path issues
# The user reported "url must not start with a slash", so we strip slashes just in case
clean_provider_type = provider_type.strip("/")
variables["formOptions"] = {"providerType": clean_provider_type}

response_data = await make_graphql_request(query, variables)

if "rclone" in response_data and "configForm" in response_data["rclone"]:
form_data = response_data["rclone"]["configForm"]
logger.info(f"Retrieved RClone config form for {provider_type or 'general'}")
return dict(form_data) if isinstance(form_data, dict) else {}
rclone_data = response_data.get("rclone")
if not rclone_data:
raise ToolError("No RClone data received from API")

form_data = rclone_data.get("configForm")
if form_data is None:
raise ToolError("No RClone config form data received")

raise ToolError("No RClone config form data received")
logger.info(f"Retrieved RClone config form for {provider_type or 'general'}")
return dict(form_data) if isinstance(form_data, dict) else {}

except Exception as e:
logger.error(f"Failed to get RClone config form: {str(e)}")
raise ToolError(f"Failed to get RClone config form: {str(e)}") from e
if isinstance(e, ToolError):
raise e
logger.error(f"Failed to get RClone config form: {e}")
raise ToolError(f"Failed to get RClone config form: {e}") from e

@mcp.tool()
async def create_rclone_remote(name: str, provider_type: str, config_data: dict[str, Any]) -> dict[str, Any]:
async def create_rclone_remote(
name: str, provider_type: str, config_data: dict[str, Any]
) -> dict[str, Any]:
"""
Create a new RClone remote with the specified configuration.

Expand All @@ -112,13 +131,7 @@ async def create_rclone_remote(name: str, provider_type: str, config_data: dict[
}
"""

variables = {
"input": {
"name": name,
"type": provider_type,
"config": config_data
}
}
variables = {"input": {"name": name, "type": provider_type, "config": config_data}}

response_data = await make_graphql_request(mutation, variables)

Expand All @@ -128,12 +141,14 @@ async def create_rclone_remote(name: str, provider_type: str, config_data: dict[
return {
"success": True,
"message": f"RClone remote '{name}' created successfully",
"remote": remote_info
"remote": remote_info,
}

raise ToolError("Failed to create RClone remote")

except Exception as e:
if isinstance(e, ToolError):
raise e
logger.error(f"Failed to create RClone remote {name}: {str(e)}")
raise ToolError(f"Failed to create RClone remote {name}: {str(e)}") from e

Expand All @@ -154,24 +169,19 @@ async def delete_rclone_remote(name: str) -> dict[str, Any]:
}
"""

variables = {
"input": {
"name": name
}
}
variables = {"input": {"name": name}}

response_data = await make_graphql_request(mutation, variables)

if "rclone" in response_data and response_data["rclone"]["deleteRCloneRemote"]:
logger.info(f"Successfully deleted RClone remote: {name}")
return {
"success": True,
"message": f"RClone remote '{name}' deleted successfully"
}
return {"success": True, "message": f"RClone remote '{name}' deleted successfully"}

raise ToolError(f"Failed to delete RClone remote '{name}'")

except Exception as e:
if isinstance(e, ToolError):
raise e
logger.error(f"Failed to delete RClone remote {name}: {str(e)}")
raise ToolError(f"Failed to delete RClone remote {name}: {str(e)}") from e

Expand Down
Loading