Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
144 changes: 96 additions & 48 deletions desktop/main.js
Original file line number Diff line number Diff line change
Expand Up @@ -618,57 +618,39 @@ async function monitorResearchUiStartup() {
}
}

function resolvePythonCommand() {
function resolvePythonCandidates() {
const isWindows = process.platform === "win32";
const localVenv = path.join(PROJECT_ROOT, ".venv", isWindows ? "Scripts" : "bin", isWindows ? "python.exe" : "python");
if (localVenv && require("fs").existsSync(localVenv)) {
return localVenv;
}
return process.env.PYTHON || (isWindows ? "python" : "python3");
}

function extractServerUrl(line) {
const match = String(line).match(/URL:\s*(http:\/\/[^\s]+)/i);
return match ? match[1] : null;
}

async function startResearchUiServer({ forceRestart = false } = {}) {
if (forceRestart) {
stopResearchUiServer({ force: true });
}

if (researchServerProcess) return;

const existingUrl = await detectResearchUiServerUrl();
if (existingUrl) {
researchServerOwned = false;
markResearchUiReady(existingUrl, "external");
appendLog(`[startup] reusing existing research_ui server at ${existingUrl}`);
return;
}

updateWorkspaceState({
status: "starting",
serverUrl: null,
error: null,
source: "managed",
const candidates = [
localVenv,
process.env.PYTHON || "",
isWindows ? "python" : "python3",
]
.map((value) => String(value || "").trim())
.filter(Boolean);
Comment on lines +621 to +630
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion (bug_risk): Consider checking execute permission (X_OK) for Python binaries on POSIX platforms.

Right now resolvePythonCandidates only checks fs.constants.R_OK for absolute paths. On POSIX you can have readable but non-executable files, which will reliably cause EACCES on spawn and force the retry logic. To avoid repeatedly trying unusable interpreters, also check X_OK on non-Windows platforms, e.g. fs.accessSync(candidate, fs.constants.R_OK | fs.constants.X_OK) behind a platform guard.

Suggested implementation:

function resolvePythonCandidates() {
  const isWindows = process.platform === "win32";
  const localVenv = path.join(
    PROJECT_ROOT,
    ".venv",
    isWindows ? "Scripts" : "bin",
    isWindows ? "python.exe" : "python",
  );
      // On POSIX require both read and execute permissions for Python binaries, to avoid
      // repeatedly trying interpreters that will fail with EACCES on spawn.
      const accessMode = process.platform === "win32"
        ? fs.constants.R_OK
        : fs.constants.R_OK | fs.constants.X_OK;

      fs.accessSync(candidate, accessMode);

I assumed there is an existing fs.accessSync(candidate, fs.constants.R_OK); call inside resolvePythonCandidates where absolute-path Python candidates are validated. If the call site differs, apply the same pattern:

  1. Compute accessMode with R_OK on Windows and R_OK | X_OK on non-Windows:
    const accessMode = process.platform === "win32" ? fs.constants.R_OK : fs.constants.R_OK | fs.constants.X_OK;
  2. Pass accessMode to fs.accessSync instead of fs.constants.R_OK.

No other logic (candidate enumeration, error handling, or retry behavior) needs to change for this improvement.


return [...new Set(candidates)].filter((candidate) => {
if (!path.isAbsolute(candidate)) return true;
try {
fs.accessSync(candidate, fs.constants.R_OK);
return true;
} catch (_error) {
return false;
}
});
scheduleResearchStartupTimeout();
}

researchServerProcess = spawn(resolvePythonCommand(), [SERVER_SCRIPT], {
cwd: PROJECT_ROOT,
windowsHide: true,
stdio: ["ignore", "pipe", "pipe"],
});
function bindResearchUiProcess(processHandle, pythonCommand, candidates, candidateIndex) {
researchServerProcess = processHandle;
researchServerOwned = true;

researchServerProcess.stdout.setEncoding("utf8");
researchServerProcess.stderr.setEncoding("utf8");
processHandle.stdout.setEncoding("utf8");
processHandle.stderr.setEncoding("utf8");
monitorResearchUiStartup().catch((error) => {
appendLog(`[startup-monitor-error] ${error.message}`);
});

researchServerProcess.stdout.on("data", (chunk) => {
processHandle.stdout.on("data", (chunk) => {
String(chunk || "")
.split(/\r?\n/)
.forEach((line) => {
Expand All @@ -678,15 +660,15 @@ async function startResearchUiServer({ forceRestart = false } = {}) {
if (discoveredUrl) {
isResearchUiReachable(discoveredUrl)
.then((reachable) => {
if (!reachable || !researchServerProcess) return;
if (!reachable || !researchServerProcess || researchServerProcess !== processHandle) return;
markResearchUiReady(discoveredUrl, "managed");
})
.catch(() => {});
}
});
});

researchServerProcess.stderr.on("data", (chunk) => {
processHandle.stderr.on("data", (chunk) => {
String(chunk || "")
.split(/\r?\n/)
.forEach((line) => {
Expand All @@ -695,7 +677,8 @@ async function startResearchUiServer({ forceRestart = false } = {}) {
});
});

researchServerProcess.on("exit", (code, signal) => {
processHandle.on("exit", (code, signal) => {
if (researchServerProcess !== processHandle) return;
researchServerProcess = null;
researchServerOwned = false;
clearResearchStartupTimer();
Expand All @@ -707,9 +690,20 @@ async function startResearchUiServer({ forceRestart = false } = {}) {
});
});

researchServerProcess.on("error", (error) => {
researchServerProcess = null;
researchServerOwned = false;
processHandle.on("error", (error) => {
if (researchServerProcess === processHandle) {
researchServerProcess = null;
researchServerOwned = false;
}
const shouldRetry =
["EACCES", "EPERM", "ENOENT"].includes(error?.code || "")
&& candidateIndex < candidates.length - 1;
if (shouldRetry) {
const nextCommand = candidates[candidateIndex + 1];
appendLog(`[spawn-error] ${pythonCommand} failed (${error.code}). Retrying with ${nextCommand}.`);
launchResearchUiProcess(candidates, candidateIndex + 1);
return;
}
clearResearchStartupTimer();
updateWorkspaceState({
status: "error",
Expand All @@ -721,6 +715,60 @@ async function startResearchUiServer({ forceRestart = false } = {}) {
});
}

function launchResearchUiProcess(candidates, candidateIndex = 0) {
const pythonCommand = candidates[candidateIndex];
appendLog(`[startup] launching research_ui with ${pythonCommand}`);
const child = spawn(pythonCommand, [SERVER_SCRIPT], {
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

security (javascript.lang.security.detect-child-process): Detected calls to child_process from a function argument candidateIndex. This could lead to a command injection if the input is user controllable. Try to avoid calls to child_process, and if it is needed ensure user input is correctly sanitized or sandboxed.

Source: opengrep

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

security (javascript.lang.security.detect-child-process): Detected calls to child_process from a function argument candidateIndex. This could lead to a command injection if the input is user controllable. Try to avoid calls to child_process, and if it is needed ensure user input is correctly sanitized or sandboxed.

Source: opengrep

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

security (javascript.lang.security.detect-child-process): Detected calls to child_process from a function argument candidates. This could lead to a command injection if the input is user controllable. Try to avoid calls to child_process, and if it is needed ensure user input is correctly sanitized or sandboxed.

Source: opengrep

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

security (javascript.lang.security.detect-child-process): Detected calls to child_process from a function argument candidateIndex. This could lead to a command injection if the input is user controllable. Try to avoid calls to child_process, and if it is needed ensure user input is correctly sanitized or sandboxed.

Source: opengrep

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

security (javascript.lang.security.detect-child-process): Detected calls to child_process from a function argument candidates. This could lead to a command injection if the input is user controllable. Try to avoid calls to child_process, and if it is needed ensure user input is correctly sanitized or sandboxed.

Source: opengrep

cwd: PROJECT_ROOT,
windowsHide: true,
stdio: ["ignore", "pipe", "pipe"],
});
bindResearchUiProcess(child, pythonCommand, candidates, candidateIndex);
}

function extractServerUrl(line) {
const match = String(line).match(/URL:\s*(http:\/\/[^\s]+)/i);
return match ? match[1] : null;
}

async function startResearchUiServer({ forceRestart = false } = {}) {
if (forceRestart) {
stopResearchUiServer({ force: true });
}

if (researchServerProcess) return;

const existingUrl = await detectResearchUiServerUrl();
if (existingUrl) {
researchServerOwned = false;
markResearchUiReady(existingUrl, "external");
appendLog(`[startup] reusing existing research_ui server at ${existingUrl}`);
return;
}

updateWorkspaceState({
status: "starting",
serverUrl: null,
error: null,
source: "managed",
});
scheduleResearchStartupTimeout();

const pythonCandidates = resolvePythonCandidates();
if (!pythonCandidates.length) {
clearResearchStartupTimer();
updateWorkspaceState({
status: "error",
serverUrl: null,
error: "No usable Python interpreter was found for research_ui startup.",
source: "managed",
});
return;
}

launchResearchUiProcess(pythonCandidates, 0);
}

function stopResearchUiServer({ force = false } = {}) {
clearResearchStartupTimer();
if (!researchServerProcess || !researchServerOwned) return;
Expand Down Expand Up @@ -754,7 +802,7 @@ function createMainWindow() {
preload: path.join(DESKTOP_ROOT, "preload.js"),
contextIsolation: true,
nodeIntegration: false,
sandbox: false,
sandbox: true,
},
});

Expand Down
62 changes: 55 additions & 7 deletions desktop/renderer/app.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import {
stripWrappingQuotes as stripQuotes,
titleCase as titleCaseValue,
toneClass as resolveToneClass,
LruCache,
uniqueRunIds as dedupeRunIds,
} from "./modules/utils.js";
import {
Expand Down Expand Up @@ -51,8 +52,12 @@ const CONFIG = {
maxSweepRows: 5,
maxSweepDecisionCompare: 4,
persistDebounceMs: 400,
maxDetailCacheEntries: 50,
maxConsecutiveRefreshErrors: 3,
};

let unsubscribeWorkspaceState = null;

const state = {
workspace: { status: "starting", serverUrl: null, logs: [], error: null },
snapshot: null,
Expand All @@ -61,14 +66,20 @@ const state = {
sweepDecisionStore: defaultSweepDecisionStore(),
sweepDecisionLoaded: false,
selectedRunIds: [],
detailCache: new Map(),
detailCache: new LruCache(CONFIG.maxDetailCacheEntries),
experimentsWorkspace: { status: "idle", configs: [], sweeps: [], error: null, updatedAt: null },
experimentConfigPreviewCache: new Map(),
isSubmittingLaunch: false,
launchFeedback: "Use deterministic inputs or ask from chat.",
refreshTimer: null,
isStepbitSubmitting: false,
snapshotStatus: { status: "idle", error: null, lastSuccessAt: null },
snapshotStatus: {
status: "idle",
error: null,
lastSuccessAt: null,
consecutiveErrors: 0,
refreshPaused: false,
},
isRetryingWorkspace: false,
chatMessages: [
{
Expand Down Expand Up @@ -177,7 +188,7 @@ document.addEventListener("DOMContentLoaded", async () => {
state.workspace = initialState;
renderWorkspaceState();
renderWorkflow();
window.quantlabDesktop.onWorkspaceState((payload) => {
unsubscribeWorkspaceState = window.quantlabDesktop.onWorkspaceState((payload) => {
state.workspace = payload;
renderWorkspaceState();
if (payload.serverUrl) ensureRefreshLoop();
Expand All @@ -186,8 +197,12 @@ document.addEventListener("DOMContentLoaded", async () => {
});

window.addEventListener("beforeunload", () => {
if (state.refreshTimer) window.clearInterval(state.refreshTimer);
stopRefreshLoop();
if (state.workspacePersistTimer) window.clearTimeout(state.workspacePersistTimer);
if (unsubscribeWorkspaceState) {
unsubscribeWorkspaceState();
unsubscribeWorkspaceState = null;
}
if (state.workspaceStoreLoaded) {
window.quantlabDesktop.saveShellWorkspaceStore(serializeShellWorkspaceStore()).catch(() => {});
}
Expand Down Expand Up @@ -296,12 +311,23 @@ function bindEvents() {
}

function ensureRefreshLoop() {
if (!state.workspace.serverUrl) return;
refreshSnapshot();
if (!state.refreshTimer) state.refreshTimer = window.setInterval(refreshSnapshot, CONFIG.refreshIntervalMs);
}
Comment on lines 313 to 317
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion (bug_risk): Guard the refresh loop against restarting when automatic refresh has been paused after repeated errors

Because ensureRefreshLoop is still called from the workspace state subscription, a new interval will be started whenever serverUrl is set, even if snapshotStatus.refreshPaused is true. To honor the intended "pause until user intervenes" behavior, add a guard here, e.g. if (state.snapshotStatus.refreshPaused) return;, so the loop only restarts when explicitly reset in retryWorkspaceRuntime.

Suggested change
function ensureRefreshLoop() {
if (!state.workspace.serverUrl) return;
refreshSnapshot();
if (!state.refreshTimer) state.refreshTimer = window.setInterval(refreshSnapshot, CONFIG.refreshIntervalMs);
}
function ensureRefreshLoop() {
if (!state.workspace.serverUrl) return;
if (state.snapshotStatus?.refreshPaused) return;
refreshSnapshot();
if (!state.refreshTimer) state.refreshTimer = window.setInterval(refreshSnapshot, CONFIG.refreshIntervalMs);
}


function stopRefreshLoop() {
if (state.refreshTimer) {
window.clearInterval(state.refreshTimer);
state.refreshTimer = null;
}
}

async function refreshSnapshot() {
if (!state.workspace.serverUrl) return;
if (!state.workspace.serverUrl) {
stopRefreshLoop();
return;
}
try {
const runsRegistry = await window.quantlabDesktop.requestJson(CONFIG.runsIndexPath);
state.detailCache.clear();
Expand All @@ -318,7 +344,13 @@ async function refreshSnapshot() {
brokerHealth: extra[2].status === "fulfilled" ? extra[2].value : state.snapshot?.brokerHealth || null,
stepbitWorkspace: extra[3].status === "fulfilled" ? extra[3].value : state.snapshot?.stepbitWorkspace || null,
};
state.snapshotStatus = { status: "ok", error: null, lastSuccessAt: new Date().toISOString() };
state.snapshotStatus = {
status: "ok",
error: null,
lastSuccessAt: new Date().toISOString(),
consecutiveErrors: 0,
refreshPaused: false,
};
const validIds = new Set(getRuns().map((run) => run.run_id));
const filteredSelection = state.selectedRunIds.filter((runId) => validIds.has(runId));
if (filteredSelection.length !== state.selectedRunIds.length) {
Expand All @@ -334,10 +366,15 @@ async function refreshSnapshot() {
refreshExperimentsWorkspace({ focusTab: false, silent: true });
}
} catch (error) {
const consecutiveErrors = (state.snapshotStatus.consecutiveErrors || 0) + 1;
const refreshPaused = consecutiveErrors >= CONFIG.maxConsecutiveRefreshErrors;
if (refreshPaused) stopRefreshLoop();
state.snapshotStatus = {
status: "error",
error: error?.message || "The local API is unavailable.",
lastSuccessAt: state.snapshotStatus.lastSuccessAt,
consecutiveErrors,
refreshPaused,
};
renderWorkspaceState();
// Keep the shell usable even if optional surfaces are down.
Expand Down Expand Up @@ -689,10 +726,13 @@ function buildRuntimeAlert() {
};
}
if (state.snapshotStatus.status === "error") {
const pausedSuffix = state.snapshotStatus.refreshPaused
? " Automatic refresh paused after repeated failures."
: "";
return {
tone: "warn",
actionLabel: "Retry API",
message: `API unavailable: ${state.snapshotStatus.error || "local request failed."}`,
message: `API unavailable: ${state.snapshotStatus.error || "local request failed."}${pausedSuffix}`,
};
}
if (state.workspace.status === "starting") {
Expand All @@ -714,7 +754,15 @@ async function retryWorkspaceRuntime() {
state.workspace = await window.quantlabDesktop.restartWorkspaceServer();
renderWorkspaceState();
}
state.snapshotStatus = {
...state.snapshotStatus,
consecutiveErrors: 0,
refreshPaused: false,
};
await refreshSnapshot();
if (state.workspace.serverUrl && !state.refreshTimer && state.snapshotStatus.status !== "error") {
ensureRefreshLoop();
}
} finally {
state.isRetryingWorkspace = false;
renderWorkspaceState();
Expand Down
35 changes: 35 additions & 0 deletions desktop/renderer/modules/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,41 @@ export function parseCsvRows(text, limit = Infinity) {
return rows;
}

export class LruCache {
constructor(maxSize = 50) {
this.maxSize = Math.max(1, Number(maxSize) || 50);
this.cache = new Map();
}

clear() {
this.cache.clear();
}

Comment on lines +199 to +208
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nitpick (bug_risk): LruCache.set returns the value instead of the cache instance, which may be surprising.

set currently returns value, while Map.prototype.set (and most caches) return this for chaining. If callers later assume Map-like behavior and chain calls or use the return as the cache, this could fail in a non-obvious way. Unless there’s a strong reason to return the stored value, consider returning this instead.

has(key) {
return this.cache.has(key);
}

get(key) {
if (!this.cache.has(key)) return undefined;
const value = this.cache.get(key);
this.cache.delete(key);
this.cache.set(key, value);
return value;
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion (bug_risk): Align LruCache.set return value with Map semantics to avoid surprises for callers.

LruCache.set currently returns value, but Map.prototype.set returns the map instance. Since this replaced a Map, any callers relying on chaining (e.g. cache.set(k1, v1).set(k2, v2)) will now fail silently. Returning this instead would preserve Map-like semantics and avoid surprising breakages.

}

set(key, value) {
if (this.cache.has(key)) {
this.cache.delete(key);
}
this.cache.set(key, value);
while (this.cache.size > this.maxSize) {
const oldestKey = this.cache.keys().next().value;
this.cache.delete(oldestKey);
}
return value;
}
}

function splitCsvLine(line) {
const values = [];
let current = "";
Expand Down
Loading