From eae155ce2307288c771b86b80eb2b25e5d9616f0 Mon Sep 17 00:00:00 2001 From: Jacob Lee Date: Fri, 13 Mar 2026 22:30:50 +0800 Subject: [PATCH 1/2] fix(gateway): remove stale agent-data directory creation from buildCredentialPayload MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit buildCredentialPayload() was creating an `agent-data/` directory under skillsDir on the NFS, contradicting its own JSDoc ("Returns data only — does NOT write to disk"). Gateway should never write to the shared NFS; only AgentBox pods should write to their own user-data mount. --- src/gateway/rpc-methods.ts | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/gateway/rpc-methods.ts b/src/gateway/rpc-methods.ts index 1aa69aa..a800129 100644 --- a/src/gateway/rpc-methods.ts +++ b/src/gateway/rpc-methods.ts @@ -3875,12 +3875,6 @@ export function createRpcMethods( workspaceId: string, isDefault: boolean, ): Promise { - // Ensure user agent-data directory exists (used as subPath mount for user data) - const agentDataDir = path.join(skillsDir, "user", userId, "agent-data"); - if (!fs.existsSync(agentDataDir)) { - fs.mkdirSync(agentDataDir, { recursive: true }); - } - const manifest: CredentialManifestEntry[] = []; const files: CredentialFile[] = []; From c95c67b1c2b583912440ca6b0a14701b8d8630c0 Mon Sep 17 00:00:00 2001 From: Jacob Lee Date: Fri, 13 Mar 2026 22:41:07 +0800 Subject: [PATCH 2/2] =?UTF-8?q?fix(k8s):=20decouple=20NFS=20PVC=20from=20s?= =?UTF-8?q?kills=20=E2=80=94=20mount=20only=20for=20user-data?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Skills are synced via RPC (buildSkillBundle), not shared filesystem. The NFS PVC should only be used for user-data persistence. agentbox-template.yaml: - Remove all skills/credentials/kube NFS mounts (were on siclaw-skills PVC) - Add emptyDir volumes for skills, credentials, config (synced via RPC) - Add client-cert secret volume (mTLS) - Align with what k8s-spawner.ts actually generates gateway-deployment.yaml: - Add NFS PVC (siclaw-data) mount at /app/.siclaw/user-data - Add persistence env vars (CLAIM_NAME, MOUNT_PATH) - Gateway uses this mount only for ensureUserDir() before spawning pods --- k8s/agentbox-template.yaml | 128 ++++++++++++++---------------------- k8s/gateway-deployment.yaml | 12 ++++ 2 files changed, 63 insertions(+), 77 deletions(-) diff --git a/k8s/agentbox-template.yaml b/k8s/agentbox-template.yaml index 085ebe3..efb9b67 100644 --- a/k8s/agentbox-template.yaml +++ b/k8s/agentbox-template.yaml @@ -1,12 +1,13 @@ # AgentBox Pod Template -# This template is used by K8sSpawner to create per-user AgentBox pods +# Reference document — shows what K8sSpawner creates programmatically. +# Not consumed at runtime; kept in sync with k8s-spawner.ts for documentation. # -# Variables to substitute: +# Variables: # ${USER_ID} - User identifier # ${IMAGE} - Container image (e.g., siclaw-agentbox:latest) # ${NAMESPACE} - Kubernetes namespace # ${WORKSPACE_ID} - Workspace identifier (e.g., "default") -# + apiVersion: v1 kind: Pod metadata: @@ -15,17 +16,25 @@ metadata: labels: app: agentbox siclaw.io/app: agentbox - user: ${USER_ID} - managed-by: siclaw-gateway + siclaw.io/user: ${USER_ID} + siclaw.io/workspace: ${WORKSPACE_ID} spec: automountServiceAccountToken: false restartPolicy: Never terminationGracePeriodSeconds: 30 + securityContext: + seccompProfile: + type: RuntimeDefault containers: - name: agentbox image: ${IMAGE} imagePullPolicy: Always + securityContext: + capabilities: + drop: ["ALL"] + add: ["SETUID", "SETGID", "CHOWN", "FOWNER"] + readOnlyRootFilesystem: true ports: - name: https @@ -36,23 +45,13 @@ spec: protocol: TCP env: - - name: USER_ID - value: "${USER_ID}" - - name: NODE_ENV - value: "production" - - name: SICLAW_AGENTBOX_PORT - value: "3000" - name: PI_CODING_AGENT_DIR value: ".siclaw/user-data/agent" - - name: SICLAW_SKILLS_DIR - value: ".siclaw/skills" - - name: SICLAW_USER_DATA_DIR - value: ".siclaw/user-data" - name: SICLAW_GATEWAY_URL value: "https://siclaw-gateway.${NAMESPACE}.svc.cluster.local:3002" - - name: SICLAW_CREDENTIALS_DIR - value: "/home/agentbox/.credentials" - # LLM config is injected dynamically by gateway via envResolver + - name: SICLAW_WORKSPACE_ID + value: "${WORKSPACE_ID}" + # LLM config + settings.json are injected dynamically by gateway via RPC # Resource limits resources: @@ -61,88 +60,64 @@ spec: cpu: "100m" limits: memory: "1Gi" - cpu: "500m" + cpu: "1000m" # Health probes - livenessProbe: + readinessProbe: httpGet: path: /health port: 3000 - initialDelaySeconds: 10 - periodSeconds: 30 - timeoutSeconds: 5 - failureThreshold: 3 + scheme: HTTPS + initialDelaySeconds: 2 + periodSeconds: 2 - readinessProbe: + livenessProbe: httpGet: path: /health port: 3000 - initialDelaySeconds: 5 + scheme: HTTPS + initialDelaySeconds: 10 periodSeconds: 10 - timeoutSeconds: 3 - failureThreshold: 3 # Volume mounts volumeMounts: - - name: kubeconfig - mountPath: /home/agentbox/.kube - readOnly: true + - name: credentials + mountPath: /app/.siclaw/credentials - name: config mountPath: /app/.siclaw/config - - name: tmp - mountPath: /tmp - - name: workspace - mountPath: /workspace - # Skills & MCP mounts (from shared PVC) - - name: skills-pv - mountPath: /app/.siclaw/skills/core - subPath: core - readOnly: true - - name: skills-pv - mountPath: /app/.siclaw/skills/team - subPath: team - readOnly: true - - name: skills-pv - mountPath: /app/.siclaw/skills/user - subPath: user/${USER_ID}/.ws-${WORKSPACE_ID} - readOnly: true - - name: skills-pv + - name: skills-local + mountPath: /app/.siclaw/skills + - name: user-data mountPath: /app/.siclaw/user-data subPath: users/${USER_ID}/${WORKSPACE_ID} - - name: skills-pv - mountPath: /home/agentbox/.credentials - subPath: user/${USER_ID}/.ws-${WORKSPACE_ID}/.credentials - readOnly: true - - name: skills-pv - mountPath: /home/agentbox/.kube/envs - subPath: user/${USER_ID}/.kube/envs - readOnly: true - - name: skills-pv - mountPath: /home/agentbox/.kube/defaults - subPath: _default_kubeconfigs + - name: client-cert + mountPath: /etc/siclaw/certs readOnly: true + - name: tmp + mountPath: /tmp volumes: - # User-specific kubeconfig (optional) - - name: kubeconfig - secret: - secretName: user-${USER_ID}-kubeconfig - optional: true + # Credentials (synced from gateway via RPC) + - name: credentials + emptyDir: {} # Config directory (settings.json written at runtime) - name: config emptyDir: {} + # Skills (synced from gateway via RPC buildSkillBundle) + - name: skills-local + emptyDir: {} + # User data (memory, investigations, sessions) — persistent on shared PVC + - name: user-data + persistentVolumeClaim: + claimName: siclaw-data + # mTLS client certificate + - name: client-cert + secret: + secretName: agentbox-${USER_ID}-cert # Temp directory - name: tmp emptyDir: sizeLimit: 500Mi - # Workspace for user files - - name: workspace - emptyDir: - sizeLimit: 1Gi - # Shared data PVC (skills + user data) - - name: skills-pv - persistentVolumeClaim: - claimName: siclaw-data --- # Service for accessing the AgentBox pod @@ -153,15 +128,14 @@ metadata: namespace: ${NAMESPACE} labels: app: agentbox - user: ${USER_ID} + siclaw.io/user: ${USER_ID} spec: type: ClusterIP selector: app: agentbox - user: ${USER_ID} + siclaw.io/user: ${USER_ID} ports: - - name: http + - name: https port: 3000 targetPort: 3000 protocol: TCP - diff --git a/k8s/gateway-deployment.yaml b/k8s/gateway-deployment.yaml index 7ba6223..7d8b9d9 100644 --- a/k8s/gateway-deployment.yaml +++ b/k8s/gateway-deployment.yaml @@ -54,6 +54,13 @@ spec: value: "siclaw-agentbox:latest" - name: SICLAW_SKILLS_DIR value: "/app/.siclaw/skills" + # User data persistence — shared PVC for ensureUserDir() + - name: SICLAW_PERSISTENCE_ENABLED + value: "true" + - name: SICLAW_PERSISTENCE_CLAIM_NAME + value: "siclaw-data" + - name: SICLAW_PERSISTENCE_MOUNT_PATH + value: "/app/.siclaw/user-data" - name: SICLAW_DATABASE_URL valueFrom: secretKeyRef: @@ -81,9 +88,14 @@ spec: volumeMounts: - name: skills-local mountPath: /app/.siclaw/skills + - name: user-data + mountPath: /app/.siclaw/user-data volumes: - name: skills-local emptyDir: {} + - name: user-data + persistentVolumeClaim: + claimName: siclaw-data --- apiVersion: v1