From dac6e84a25fd8594c8d9096583c3462cc4f2d8f3 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 9 Feb 2026 13:50:01 +0000 Subject: [PATCH] Add kernel restart functionality for recovery from unresponsive states Implements a "Restart Kernel" feature that allows users to recover from runtime issues (memory leaks, hung processes, corrupted state) without losing code or flow configuration. Backend: - Add RESTARTING state to KernelState enum - Add restart_kernel() method to KernelManager that stops the container, creates a fresh one with the same config (packages, memory/CPU limits), and waits for health check - Add _get_container_logs() helper for diagnostic output on restart failure - Add POST /kernels/{kernel_id}/restart API endpoint Frontend: - Add "restarting" to KernelState type with spinner icon and yellow badge - Add KernelApi.restart() method - Add "Restart" button to KernelCard (visible when idle/executing/error) - Add restart button to PythonScript node settings next to kernel dropdown - Wire up restart handling in KernelManagerView and useKernelManager - Status indicator updates in real-time (restarting -> idle) - Error messages surface container logs on restart failure https://claude.ai/code/session_01G7rwmj8nw9LnXdEVppxgnp --- flowfile_core/flowfile_core/kernel/manager.py | 54 +++++++++++++++++ flowfile_core/flowfile_core/kernel/models.py | 1 + flowfile_core/flowfile_core/kernel/routes.py | 15 +++++ .../src/renderer/app/api/kernel.api.ts | 13 ++++ .../elements/pythonScript/PythonScript.vue | 59 +++++++++++++++++++ .../src/renderer/app/types/kernel.types.ts | 2 +- .../views/KernelManagerView/KernelCard.vue | 29 ++++++++- .../KernelManagerView/KernelManagerView.vue | 10 ++++ .../KernelManagerView/KernelStatusBadge.vue | 6 ++ .../KernelManagerView/useKernelManager.ts | 11 ++++ 10 files changed, 198 insertions(+), 2 deletions(-) diff --git a/flowfile_core/flowfile_core/kernel/manager.py b/flowfile_core/flowfile_core/kernel/manager.py index b3ec0cec..7b23e2bc 100644 --- a/flowfile_core/flowfile_core/kernel/manager.py +++ b/flowfile_core/flowfile_core/kernel/manager.py @@ -529,6 +529,48 @@ async def stop_kernel(self, kernel_id: str) -> None: kernel.container_id = None logger.info("Stopped kernel '%s'", kernel_id) + async def restart_kernel(self, kernel_id: str) -> KernelInfo: + """Stop and restart a kernel container, preserving configuration. + + All in-memory artifacts are cleared (they do not survive a restart). + The kernel configuration (packages, memory/CPU limits) is preserved. + If the restart fails, the error message includes container logs. + """ + kernel = self._get_kernel_or_raise(kernel_id) + kernel.state = KernelState.RESTARTING + kernel.error_message = None + + try: + # Stop and remove the existing container + self._cleanup_container(kernel_id) + kernel.container_id = None + + # Re-allocate port if needed (local mode only) + if kernel.port is None and not self._kernel_volume: + kernel.port = self._allocate_port() + + # Start a fresh container with the same configuration + env = self._build_kernel_env(kernel_id, kernel) + run_kwargs = self._build_run_kwargs(kernel_id, kernel, env) + container = self._docker.containers.run(_KERNEL_IMAGE, **run_kwargs) + kernel.container_id = container.id + await self._wait_for_healthy(kernel_id, timeout=kernel.health_timeout) + kernel.state = KernelState.IDLE + logger.info("Restarted kernel '%s' (container %s)", kernel_id, container.short_id) + except (docker.errors.DockerException, httpx.HTTPError, TimeoutError, OSError) as exc: + kernel.state = KernelState.ERROR + # Try to capture container logs for diagnostics + logs = self._get_container_logs(kernel_id) + if logs: + kernel.error_message = f"Restart failed: {exc}\n\nContainer logs:\n{logs}" + else: + kernel.error_message = f"Restart failed: {exc}" + logger.error("Failed to restart kernel '%s': %s", kernel_id, exc) + self._cleanup_container(kernel_id) + raise RuntimeError(kernel.error_message) from exc + + return kernel + async def delete_kernel(self, kernel_id: str) -> None: kernel = self._get_kernel_or_raise(kernel_id) if kernel.state in (KernelState.IDLE, KernelState.EXECUTING): @@ -818,6 +860,18 @@ def _cleanup_container(self, kernel_id: str) -> None: except (docker.errors.APIError, docker.errors.DockerException) as exc: logger.warning("Error cleaning up container for kernel '%s': %s", kernel_id, exc) + def _get_container_logs(self, kernel_id: str, tail: int = 50) -> str: + """Retrieve the last *tail* lines of logs from the kernel container.""" + kernel = self._kernels.get(kernel_id) + if kernel is None or kernel.container_id is None: + return "" + try: + container = self._docker.containers.get(kernel.container_id) + return container.logs(tail=tail).decode("utf-8", errors="replace") + except Exception as exc: + logger.debug("Could not retrieve logs for kernel '%s': %s", kernel_id, exc) + return "" + async def _wait_for_healthy(self, kernel_id: str, timeout: int = _HEALTH_TIMEOUT) -> None: kernel = self._get_kernel_or_raise(kernel_id) url = f"{self._kernel_url(kernel)}/health" diff --git a/flowfile_core/flowfile_core/kernel/models.py b/flowfile_core/flowfile_core/kernel/models.py index 03d1cb71..b1bb0f55 100644 --- a/flowfile_core/flowfile_core/kernel/models.py +++ b/flowfile_core/flowfile_core/kernel/models.py @@ -7,6 +7,7 @@ class KernelState(str, Enum): STOPPED = "stopped" STARTING = "starting" + RESTARTING = "restarting" IDLE = "idle" EXECUTING = "executing" ERROR = "error" diff --git a/flowfile_core/flowfile_core/kernel/routes.py b/flowfile_core/flowfile_core/kernel/routes.py index fd507e90..3a992efc 100644 --- a/flowfile_core/flowfile_core/kernel/routes.py +++ b/flowfile_core/flowfile_core/kernel/routes.py @@ -128,6 +128,21 @@ async def stop_kernel(kernel_id: str, current_user=Depends(get_current_active_us raise HTTPException(status_code=404, detail=str(exc)) +@router.post("/{kernel_id}/restart", response_model=KernelInfo) +async def restart_kernel(kernel_id: str, current_user=Depends(get_current_active_user)): + """Restart a kernel container, preserving configuration but clearing all in-memory artifacts.""" + manager = _get_manager() + kernel = await manager.get_kernel(kernel_id) + if kernel is None: + raise HTTPException(status_code=404, detail=f"Kernel '{kernel_id}' not found") + if manager.get_kernel_owner(kernel_id) != current_user.id: + raise HTTPException(status_code=403, detail="Not authorized to access this kernel") + try: + return await manager.restart_kernel(kernel_id) + except Exception as exc: + raise HTTPException(status_code=500, detail=str(exc)) from exc + + @router.post("/{kernel_id}/execute", response_model=ExecuteResult) async def execute_code(kernel_id: str, request: ExecuteRequest, current_user=Depends(get_current_active_user)): manager = _get_manager() diff --git a/flowfile_frontend/src/renderer/app/api/kernel.api.ts b/flowfile_frontend/src/renderer/app/api/kernel.api.ts index cc23304a..fde836b7 100644 --- a/flowfile_frontend/src/renderer/app/api/kernel.api.ts +++ b/flowfile_frontend/src/renderer/app/api/kernel.api.ts @@ -75,6 +75,19 @@ export class KernelApi { } } + static async restart(kernelId: string): Promise { + try { + const response = await axios.post( + `${API_BASE_URL}/${encodeURIComponent(kernelId)}/restart`, + ); + return response.data; + } catch (error) { + console.error("API Error: Failed to restart kernel:", error); + const errorMsg = (error as any).response?.data?.detail || "Failed to restart kernel"; + throw new Error(errorMsg); + } + } + static async getArtifacts(kernelId: string): Promise> { try { const response = await axios.get>( diff --git a/flowfile_frontend/src/renderer/app/components/nodes/node-types/elements/pythonScript/PythonScript.vue b/flowfile_frontend/src/renderer/app/components/nodes/node-types/elements/pythonScript/PythonScript.vue index cfa40850..c17c43aa 100644 --- a/flowfile_frontend/src/renderer/app/components/nodes/node-types/elements/pythonScript/PythonScript.vue +++ b/flowfile_frontend/src/renderer/app/components/nodes/node-types/elements/pythonScript/PythonScript.vue @@ -34,6 +34,15 @@ + Manage Kernels @@ -50,6 +59,7 @@ + @@ -247,6 +257,8 @@ const stopKernelPolling = () => { } }; +const isRestarting = ref(false); + const handleKernelChange = (kernelId: string | null) => { if (nodePythonScript.value) { nodePythonScript.value.python_script_input.kernel_id = kernelId ?? null; @@ -254,6 +266,22 @@ const handleKernelChange = (kernelId: string | null) => { loadArtifacts(); }; +const handleRestartKernel = async () => { + if (!selectedKernelId.value || isRestarting.value) return; + isRestarting.value = true; + try { + await KernelApi.restart(selectedKernelId.value); + await loadKernels(); + loadArtifacts(); + } catch (error: any) { + const msg = error.message || "Failed to restart kernel."; + console.error("Kernel restart failed:", msg); + alert(`Kernel restart failed: ${msg}`); + } finally { + isRestarting.value = false; + } +}; + // ─── Artifact helpers ─────────────────────────────────────────────────────── const loadArtifacts = async () => { @@ -472,6 +500,33 @@ defineExpose({ loadNodeData, pushNodeData, saveSettings }); flex: 1; } +.restart-kernel-btn { + display: inline-flex; + align-items: center; + justify-content: center; + width: 28px; + height: 28px; + padding: 0; + border: 1px solid var(--el-border-color, #dcdfe6); + border-radius: 4px; + background: var(--el-fill-color-blank, #fff); + color: var(--el-text-color-regular, #606266); + cursor: pointer; + font-size: 0.8rem; + flex-shrink: 0; + transition: all 0.15s; +} + +.restart-kernel-btn:hover:not(:disabled) { + color: var(--el-color-warning, #e6a23c); + border-color: var(--el-color-warning, #e6a23c); +} + +.restart-kernel-btn:disabled { + opacity: 0.5; + cursor: not-allowed; +} + .manage-kernels-link { font-size: 0.8rem; color: var(--el-color-primary); @@ -516,6 +571,10 @@ defineExpose({ loadNodeData, pushNodeData, saveSettings }); background-color: #f56c6c; } +.kernel-state-dot--restarting { + background-color: #e6a23c; +} + .kernel-state-label { font-size: 0.8rem; color: var(--el-text-color-secondary); diff --git a/flowfile_frontend/src/renderer/app/types/kernel.types.ts b/flowfile_frontend/src/renderer/app/types/kernel.types.ts index 99e924ea..57c224ab 100644 --- a/flowfile_frontend/src/renderer/app/types/kernel.types.ts +++ b/flowfile_frontend/src/renderer/app/types/kernel.types.ts @@ -1,6 +1,6 @@ // Kernel management related TypeScript interfaces and types -export type KernelState = "stopped" | "starting" | "idle" | "executing" | "error"; +export type KernelState = "stopped" | "starting" | "restarting" | "idle" | "executing" | "error"; export interface KernelConfig { id: string; diff --git a/flowfile_frontend/src/renderer/app/views/KernelManagerView/KernelCard.vue b/flowfile_frontend/src/renderer/app/views/KernelManagerView/KernelCard.vue index 86bafae9..713deda5 100644 --- a/flowfile_frontend/src/renderer/app/views/KernelManagerView/KernelCard.vue +++ b/flowfile_frontend/src/renderer/app/views/KernelManagerView/KernelCard.vue @@ -55,6 +55,14 @@ > Start +