From 8ba7547227596a081e80a13c8981fbe378f62baa Mon Sep 17 00:00:00 2001
From: Che <30403707+Che-Zhu@users.noreply.github.com>
Date: Wed, 11 Mar 2026 17:19:41 +0800
Subject: [PATCH 1/3] refactor: add project task platform
---
.../(list)/_components/home-page-content.tsx | 2 +-
.../(list)/_components/project-list.tsx | 2 +-
app/(dashboard)/projects/(list)/page.tsx | 4 +-
.../_components/import-github-dialog.tsx | 9 +-
app/api/projects/[id]/route.ts | 6 +-
lib/actions/project.ts | 27 ++-
lib/data/project.ts | 3 +
lib/events/sandbox/sandboxListener.ts | 6 +-
lib/jobs/project-import/index.ts | 2 -
.../project-import/projectImportReconcile.ts | 229 ------------------
.../executors/clone-repository.ts | 95 ++++++++
lib/jobs/project-task/executors/index.ts | 22 ++
lib/jobs/project-task/index.ts | 2 +
lib/jobs/project-task/projectTaskReconcile.ts | 159 ++++++++++++
lib/repo/project-import.ts | 150 ------------
lib/repo/project-task.ts | 206 ++++++++++++++++
lib/services/project-task-dispatcher.ts | 59 +++++
lib/startup/index.ts | 24 +-
lib/util/project-display-status.ts | 22 +-
.../migration.sql | 53 ++++
prisma/migrations/migration_lock.toml | 1 +
prisma/schema.prisma | 89 +++++--
22 files changed, 731 insertions(+), 441 deletions(-)
delete mode 100644 lib/jobs/project-import/index.ts
delete mode 100644 lib/jobs/project-import/projectImportReconcile.ts
create mode 100644 lib/jobs/project-task/executors/clone-repository.ts
create mode 100644 lib/jobs/project-task/executors/index.ts
create mode 100644 lib/jobs/project-task/index.ts
create mode 100644 lib/jobs/project-task/projectTaskReconcile.ts
delete mode 100644 lib/repo/project-import.ts
create mode 100644 lib/repo/project-task.ts
create mode 100644 lib/services/project-task-dispatcher.ts
create mode 100644 prisma/migrations/20260311171810_project_task_platform/migration.sql
create mode 100644 prisma/migrations/migration_lock.toml
diff --git a/app/(dashboard)/projects/(list)/_components/home-page-content.tsx b/app/(dashboard)/projects/(list)/_components/home-page-content.tsx
index ffe161c..40501f6 100644
--- a/app/(dashboard)/projects/(list)/_components/home-page-content.tsx
+++ b/app/(dashboard)/projects/(list)/_components/home-page-content.tsx
@@ -15,7 +15,7 @@ import { ProjectList } from './project-list'
const REFRESH_INTERVAL_MS = 3000
interface HomePageContentProps {
- projects: ProjectWithRelations<{ sandboxes: true }>[]
+ projects: ProjectWithRelations<{ sandboxes: true; tasks: true }>[]
}
export function HomePageContent({ projects }: HomePageContentProps) {
diff --git a/app/(dashboard)/projects/(list)/_components/project-list.tsx b/app/(dashboard)/projects/(list)/_components/project-list.tsx
index b08b605..82d95dd 100644
--- a/app/(dashboard)/projects/(list)/_components/project-list.tsx
+++ b/app/(dashboard)/projects/(list)/_components/project-list.tsx
@@ -9,7 +9,7 @@ import { CreateProjectCard } from './create-project-card'
import { ProjectCard } from './project-card'
interface ProjectListProps {
- projects: ProjectWithRelations<{ sandboxes: true }>[]
+ projects: ProjectWithRelations<{ sandboxes: true; tasks: true }>[]
activeFilter: 'ALL' | ProjectDisplayStatus
}
diff --git a/app/(dashboard)/projects/(list)/page.tsx b/app/(dashboard)/projects/(list)/page.tsx
index 19ab469..430cbb9 100644
--- a/app/(dashboard)/projects/(list)/page.tsx
+++ b/app/(dashboard)/projects/(list)/page.tsx
@@ -17,7 +17,7 @@ export default async function HomePage() {
redirect('/login')
}
- const projects = await getProjects(session.user.id, { sandboxes: true })
+ const projects = await getProjects(session.user.id, { sandboxes: true, tasks: true })
return (
@@ -26,4 +26,4 @@ export default async function HomePage() {
)
-}
\ No newline at end of file
+}
diff --git a/app/(dashboard)/projects/_components/import-github-dialog.tsx b/app/(dashboard)/projects/_components/import-github-dialog.tsx
index a4f2586..6c696a9 100644
--- a/app/(dashboard)/projects/_components/import-github-dialog.tsx
+++ b/app/(dashboard)/projects/_components/import-github-dialog.tsx
@@ -3,7 +3,7 @@
import { useCallback, useEffect, useState } from 'react'
import { FaGithub } from 'react-icons/fa'
import { MdRefresh } from 'react-icons/md'
-import type { ProjectImportStatus } from '@prisma/client'
+import type { ProjectTaskStatus, ProjectTaskType } from '@prisma/client'
import { useRouter } from 'next/navigation'
import { toast } from 'sonner'
@@ -97,11 +97,12 @@ export function ImportGitHubDialog({ open, onOpenChange }: ImportGitHubDialogPro
const pollImportStatus = async () => {
try {
- const project = await GET<{ importStatus: ProjectImportStatus }>(
+ const project = await GET<{ tasks: Array<{ type: ProjectTaskType; status: ProjectTaskStatus }> }>(
`/api/projects/${importProjectId}`
)
- if (project.importStatus === 'READY') {
+ const cloneTask = project.tasks.find((task) => task.type === 'CLONE_REPOSITORY')
+ if (!cloneTask || cloneTask.status === 'SUCCEEDED') {
toast.success('Repository imported successfully')
onOpenChange(false)
setImportProjectId(null)
@@ -109,7 +110,7 @@ export function ImportGitHubDialog({ open, onOpenChange }: ImportGitHubDialogPro
return
}
- if (project.importStatus === 'FAILED') {
+ if (cloneTask.status === 'FAILED' || cloneTask.status === 'CANCELLED') {
toast.error('Repository import failed. An empty project was created instead.')
onOpenChange(false)
setImportProjectId(null)
diff --git a/app/api/projects/[id]/route.ts b/app/api/projects/[id]/route.ts
index 832b645..6b68ff7 100644
--- a/app/api/projects/[id]/route.ts
+++ b/app/api/projects/[id]/route.ts
@@ -9,6 +9,7 @@ type ProjectWithFullRelations = Prisma.ProjectGetPayload<{
sandboxes: true
databases: true
environments: true
+ tasks: true
}
}>
@@ -39,6 +40,9 @@ export const GET = withAuth(async (_req, context, session) =
environments: {
orderBy: { createdAt: 'asc' },
},
+ tasks: {
+ orderBy: { createdAt: 'desc' },
+ },
},
})
@@ -51,4 +55,4 @@ export const GET = withAuth(async (_req, context, session) =
console.error('Error fetching project:', error)
return NextResponse.json({ error: 'Failed to fetch project details' }, { status: 500 })
}
-})
\ No newline at end of file
+})
diff --git a/lib/actions/project.ts b/lib/actions/project.ts
index 3b85013..12bca0f 100644
--- a/lib/actions/project.ts
+++ b/lib/actions/project.ts
@@ -7,7 +7,7 @@
* instead of API Routes directly.
*/
-import type { Project, ProjectImportStatus } from '@prisma/client'
+import type { Project } from '@prisma/client'
import { auth } from '@/lib/auth'
import { EnvironmentCategory } from '@/lib/const'
@@ -17,6 +17,7 @@ import { KubernetesUtils } from '@/lib/k8s/kubernetes-utils'
import { VERSIONS } from '@/lib/k8s/versions'
import { logger as baseLogger } from '@/lib/logger'
import { getInstallationByGitHubId } from '@/lib/repo/github'
+import { createProjectTask } from '@/lib/repo/project-task'
import { listInstallationRepos } from '@/lib/services/github-app'
import { generateRandomString } from '@/lib/util/common'
@@ -61,8 +62,8 @@ type CreateProjectWithSandboxOptions = {
name: string
description?: string
importData?: {
- importStatus: ProjectImportStatus
githubAppInstallationId: string
+ installationId: number
githubRepoId: number
githubRepoFullName: string
githubRepoDefaultBranch?: string
@@ -112,13 +113,10 @@ async function createProjectWithSandbox({
description,
userId,
status: 'CREATING',
- importStatus: importData?.importStatus ?? 'READY',
githubAppInstallationId: importData?.githubAppInstallationId,
githubRepoId: importData?.githubRepoId,
githubRepoFullName: importData?.githubRepoFullName,
githubRepoDefaultBranch: importData?.githubRepoDefaultBranch,
- importError: null,
- importLockedUntil: null,
},
})
@@ -168,6 +166,23 @@ async function createProjectWithSandbox({
},
})
+ if (importData?.githubRepoDefaultBranch) {
+ await createProjectTask(tx, {
+ projectId: project.id,
+ sandboxId: sandbox.id,
+ type: 'CLONE_REPOSITORY',
+ status: 'WAITING_FOR_PREREQUISITES',
+ triggerSource: 'USER_ACTION',
+ payload: {
+ installationId: importData.installationId,
+ repoId: importData.githubRepoId,
+ repoFullName: importData.githubRepoFullName,
+ defaultBranch: importData.githubRepoDefaultBranch,
+ },
+ maxAttempts: 3,
+ })
+ }
+
return { project, sandbox }
},
{
@@ -263,8 +278,8 @@ export async function importProjectFromGitHub(
name: payload.repoName,
description: payload.description,
importData: {
- importStatus: 'PENDING',
githubAppInstallationId: installation.id,
+ installationId: installation.installationId,
githubRepoId: payload.repoId,
githubRepoFullName: payload.repoFullName,
githubRepoDefaultBranch: payload.defaultBranch,
diff --git a/lib/data/project.ts b/lib/data/project.ts
index 4a7b70a..c7bca58 100644
--- a/lib/data/project.ts
+++ b/lib/data/project.ts
@@ -11,6 +11,7 @@ export type ProjectInclude = {
sandboxes?: boolean;
databases?: boolean;
environments?: boolean;
+ tasks?: boolean;
};
/**
@@ -33,6 +34,7 @@ export const getProject = cache(async function getProject(
sandboxes: false,
databases: false,
environments: false,
+ tasks: false,
...include,
};
@@ -68,6 +70,7 @@ export const getProjects = cache(async function getProjects(
sandboxes: false,
databases: false,
environments: false,
+ tasks: false,
...include,
};
diff --git a/lib/events/sandbox/sandboxListener.ts b/lib/events/sandbox/sandboxListener.ts
index 269ce86..3606187 100644
--- a/lib/events/sandbox/sandboxListener.ts
+++ b/lib/events/sandbox/sandboxListener.ts
@@ -1,4 +1,4 @@
-import { triggerProjectImportForProject } from '@/lib/jobs/project-import'
+import { triggerRunnableTasksForProject } from '@/lib/jobs/project-task'
import { getK8sServiceForUser } from '@/lib/k8s/k8s-service-helper'
import { logger as baseLogger } from '@/lib/logger'
import { getProjectEnvironments } from '@/lib/repo/environment'
@@ -355,8 +355,8 @@ registerSandboxListeners()
async function triggerImportOnSandboxRunning(projectId: string): Promise {
try {
- await triggerProjectImportForProject(projectId)
+ await triggerRunnableTasksForProject(projectId)
} catch (error) {
- logger.error(`Failed to trigger project import for ${projectId}: ${error}`)
+ logger.error(`Failed to trigger project tasks for ${projectId}: ${error}`)
}
}
diff --git a/lib/jobs/project-import/index.ts b/lib/jobs/project-import/index.ts
deleted file mode 100644
index c0cc926..0000000
--- a/lib/jobs/project-import/index.ts
+++ /dev/null
@@ -1,2 +0,0 @@
-export { startProjectImportReconcileJob } from './projectImportReconcile'
-export { triggerProjectImportForProject } from './projectImportReconcile'
diff --git a/lib/jobs/project-import/projectImportReconcile.ts b/lib/jobs/project-import/projectImportReconcile.ts
deleted file mode 100644
index c67ac3a..0000000
--- a/lib/jobs/project-import/projectImportReconcile.ts
+++ /dev/null
@@ -1,229 +0,0 @@
-import { Cron } from 'croner'
-
-import { logger as baseLogger } from '@/lib/logger'
-import {
- acquireAndLockImportProjects,
- getImportProjectById,
- type ImportProjectWithRelations,
- LOCK_DURATION_SECONDS,
- setProjectImportState,
- tryClaimImportExecutionLock,
-} from '@/lib/repo/project-import'
-import { getInstallationToken } from '@/lib/services/github-app'
-import { getSandboxTtydContext } from '@/lib/util/ttyd-context'
-import { execCommand } from '@/lib/util/ttyd-exec'
-
-const logger = baseLogger.child({ module: 'lib/jobs/project-import/projectImportReconcile' })
-
-const MAX_IMPORTS_PER_CYCLE = parseInt(process.env.MAX_PROJECT_IMPORTS_PER_RECONCILE || '5', 10)
-const RECONCILE_INTERVAL_SECONDS = parseInt(
- process.env.PROJECT_IMPORT_RECONCILE_INTERVAL_SECONDS || '10',
- 10
-)
-const CLONING_LOCK_DURATION_SECONDS = 300
-const CLONE_MAX_ATTEMPTS = parseInt(process.env.PROJECT_IMPORT_CLONE_MAX_ATTEMPTS || '3', 10)
-const IMPORT_EXEC_TIMEOUT_MS = parseInt(process.env.PROJECT_IMPORT_EXEC_TIMEOUT_MS || '90000', 10)
-
-type ImportTriggerSource = 'reconcile' | 'sandbox-running-event'
-
-export function startProjectImportReconcileJob() {
- logger.info('Starting project import reconcile job')
- logger.info(`Lock duration: ${LOCK_DURATION_SECONDS} seconds`)
- logger.info(`Max projects per cycle: ${MAX_IMPORTS_PER_CYCLE}`)
- logger.info(`Reconcile interval: ${RECONCILE_INTERVAL_SECONDS} seconds`)
-
- const job = new Cron(`*/${RECONCILE_INTERVAL_SECONDS} * * * * *`, async () => {
- try {
- await reconcileProjectImports()
- } catch (error) {
- logger.error(`Project import reconcile job error: ${error}`)
- }
- })
-
- logger.info(
- `✅ Project import reconcile job started (every ${RECONCILE_INTERVAL_SECONDS} seconds)`
- )
- return job
-}
-
-async function reconcileProjectImports() {
- const projects = await acquireAndLockImportProjects(MAX_IMPORTS_PER_CYCLE)
-
- if (projects.length === 0) {
- return
- }
-
- logger.info(`Acquired ${projects.length} projects for import reconcile`)
-
- for (const project of projects) {
- try {
- await handleSingleProjectImport(project, 'reconcile')
- } catch (error) {
- logger.error(`Failed to process project import for ${project.id}: ${error}`)
- await setProjectImportState(project.id, {
- importStatus: 'FAILED',
- importError: String(error),
- importLockedUntil: null,
- })
- }
- }
-}
-
-export async function triggerProjectImportForProject(projectId: string): Promise {
- const project = await getImportProjectById(projectId)
- if (!project) {
- return
- }
-
- if (project.importStatus !== 'PENDING' && project.importStatus !== 'CLONING') {
- return
- }
-
- const sandbox = project.sandboxes[0]
- if (!sandbox || sandbox.status !== 'RUNNING') {
- return
- }
-
- const claimed = await tryClaimImportExecutionLock(project.id, CLONING_LOCK_DURATION_SECONDS)
- if (!claimed) {
- return
- }
-
- const projectAfterLock = await getImportProjectById(project.id)
- if (!projectAfterLock) {
- return
- }
-
- await handleSingleProjectImport(projectAfterLock, 'sandbox-running-event')
-}
-
-async function handleSingleProjectImport(
- project: ImportProjectWithRelations,
- source: ImportTriggerSource
-): Promise {
- if (!project.githubRepoFullName || !project.githubAppInstallation || !project.githubRepoDefaultBranch) {
- await setProjectImportState(project.id, {
- importStatus: 'FAILED',
- importError: 'Missing repository metadata for import',
- importLockedUntil: null,
- })
- return
- }
-
- const sandbox = project.sandboxes[0]
- if (!sandbox) {
- await setProjectImportState(project.id, {
- importStatus: 'FAILED',
- importError: 'Sandbox not found',
- importLockedUntil: null,
- })
- return
- }
-
- if (sandbox.status !== 'RUNNING') {
- if (project.importStatus === 'CLONING') {
- await setProjectImportState(project.id, {
- importStatus: 'PENDING',
- importLockedUntil: null,
- })
- }
- return
- }
-
- if (source === 'reconcile') {
- await setProjectImportState(project.id, {
- importStatus: 'CLONING',
- importError: null,
- importLockedUntil: new Date(Date.now() + CLONING_LOCK_DURATION_SECONDS * 1000),
- })
- }
-
- const cloneResult = await cloneRepoToSandboxWithRetry(project, sandbox.id)
- if (cloneResult.success) {
- await setProjectImportState(project.id, {
- importStatus: 'READY',
- importError: null,
- importLockedUntil: null,
- })
- return
- }
-
- await setProjectImportState(project.id, {
- importStatus: 'FAILED',
- importError: cloneResult.error,
- importLockedUntil: null,
- })
-}
-
-async function cloneRepoToSandboxWithRetry(
- project: ImportProjectWithRelations,
- sandboxId: string
-): Promise<{ success: true } | { success: false; error: string }> {
- let lastError = 'Unknown clone error'
-
- for (let attempt = 1; attempt <= CLONE_MAX_ATTEMPTS; attempt++) {
- const attemptStart = Date.now()
- try {
- logger.info(`Import clone attempt ${attempt}/${CLONE_MAX_ATTEMPTS} for project ${project.id}`)
- await cloneRepoToSandbox(project, sandboxId)
- logger.info(
- `Import clone succeeded on attempt ${attempt} for project ${project.id} in ${Date.now() - attemptStart}ms`
- )
- return { success: true }
- } catch (error) {
- lastError = error instanceof Error ? error.message : String(error)
- logger.warn(
- `Import clone attempt ${attempt}/${CLONE_MAX_ATTEMPTS} failed for project ${project.id} after ${Date.now() - attemptStart}ms: ${lastError}`
- )
- if (attempt < CLONE_MAX_ATTEMPTS) {
- await sleep(1500)
- }
- }
- }
-
- return {
- success: false,
- error: `Clone failed after ${CLONE_MAX_ATTEMPTS} attempts: ${lastError}`,
- }
-}
-
-async function cloneRepoToSandbox(project: ImportProjectWithRelations, sandboxId: string): Promise {
- const installationToken = await getInstallationToken(project.githubAppInstallation!.installationId)
- const { ttyd } = await getSandboxTtydContext(sandboxId, project.user.id)
-
- const authUrl = `https://x-access-token:${installationToken}@github.com/${project.githubRepoFullName}.git`
- const escapedAuthUrl = shellEscapeSingleQuoted(authUrl)
- const escapedBranch = shellEscapeSingleQuoted(project.githubRepoDefaultBranch!)
- const repoFullName = project.githubRepoFullName!
- const repoName = repoFullName.split('/').at(-1) || 'repo'
- const importDirName = repoName.replace(/[^a-zA-Z0-9._-]/g, '-')
- const uniqueImportDirName = `${importDirName}-${project.id}`
- const escapedImportDirName = shellEscapeSingleQuoted(uniqueImportDirName)
-
- const cloneCommand = [
- 'set -e',
- 'mkdir -p import',
- `target_dir='import/${escapedImportDirName}'`,
- 'tmp_dir=$(mktemp -d)',
- `GIT_TERMINAL_PROMPT=0 GIT_ASKPASS=/bin/echo git clone --depth 1 --branch '${escapedBranch}' '${escapedAuthUrl}' "$tmp_dir/repo"`,
- 'mkdir -p "$target_dir"',
- 'cp -a "$tmp_dir/repo"/. "$target_dir"/',
- 'rm -rf "$tmp_dir"',
- ].join(' && ')
-
- await execCommand(
- ttyd.baseUrl,
- ttyd.accessToken,
- cloneCommand,
- IMPORT_EXEC_TIMEOUT_MS,
- ttyd.authorization
- )
-}
-
-function shellEscapeSingleQuoted(input: string): string {
- return input.replace(/'/g, `'\\''`)
-}
-
-function sleep(ms: number): Promise {
- return new Promise((resolve) => setTimeout(resolve, ms))
-}
diff --git a/lib/jobs/project-task/executors/clone-repository.ts b/lib/jobs/project-task/executors/clone-repository.ts
new file mode 100644
index 0000000..d8b69d1
--- /dev/null
+++ b/lib/jobs/project-task/executors/clone-repository.ts
@@ -0,0 +1,95 @@
+import type { Prisma } from '@prisma/client'
+
+import type { ProjectTaskWithRelations } from '@/lib/repo/project-task'
+import { getInstallationToken } from '@/lib/services/github-app'
+import { getSandboxTtydContext } from '@/lib/util/ttyd-context'
+import { execCommand } from '@/lib/util/ttyd-exec'
+
+export type ProjectTaskExecutorResult =
+ | { success: true; result?: Prisma.InputJsonValue }
+ | { success: false; error: string; retryable: boolean }
+
+const IMPORT_EXEC_TIMEOUT_MS = parseInt(process.env.PROJECT_IMPORT_EXEC_TIMEOUT_MS || '90000', 10)
+
+type CloneRepositoryPayload = {
+ installationId?: number
+ repoFullName?: string
+ defaultBranch?: string
+}
+
+export async function runCloneRepositoryTask(
+ task: ProjectTaskWithRelations
+): Promise {
+ const payload = (task.payload ?? {}) as CloneRepositoryPayload
+ const installationId = payload.installationId
+ const repoFullName = payload.repoFullName
+ const defaultBranch = payload.defaultBranch
+
+ if (!installationId || !repoFullName || !defaultBranch) {
+ return {
+ success: false,
+ error: 'Missing clone repository payload',
+ retryable: false,
+ }
+ }
+
+ if (!task.sandbox?.id) {
+ return {
+ success: false,
+ error: 'Sandbox not found for clone task',
+ retryable: false,
+ }
+ }
+
+ try {
+ const installationToken = await getInstallationToken(installationId)
+ const { ttyd } = await getSandboxTtydContext(task.sandbox.id, task.project.user.id)
+
+ const authUrl = `https://x-access-token:${installationToken}@github.com/${repoFullName}.git`
+ const escapedAuthUrl = shellEscapeSingleQuoted(authUrl)
+ const escapedBranch = shellEscapeSingleQuoted(defaultBranch)
+ const repoName = repoFullName.split('/').at(-1) || 'repo'
+ const importDirName = repoName.replace(/[^a-zA-Z0-9._-]/g, '-')
+ const uniqueImportDirName = `${importDirName}-${task.projectId}`
+ const escapedImportDirName = shellEscapeSingleQuoted(uniqueImportDirName)
+
+ const cloneCommand = [
+ 'set -e',
+ 'mkdir -p import',
+ `target_dir='import/${escapedImportDirName}'`,
+ 'tmp_dir=$(mktemp -d)',
+ `rm -rf "$target_dir"`,
+ `GIT_TERMINAL_PROMPT=0 GIT_ASKPASS=/bin/echo git clone --depth 1 --branch '${escapedBranch}' '${escapedAuthUrl}' "$tmp_dir/repo"`,
+ 'mkdir -p "$target_dir"',
+ 'cp -a "$tmp_dir/repo"/. "$target_dir"/',
+ 'rm -rf "$tmp_dir"',
+ ].join(' && ')
+
+ await execCommand(
+ ttyd.baseUrl,
+ ttyd.accessToken,
+ cloneCommand,
+ IMPORT_EXEC_TIMEOUT_MS,
+ ttyd.authorization
+ )
+
+ return {
+ success: true,
+ result: {
+ repoFullName,
+ defaultBranch,
+ importPath: `import/${uniqueImportDirName}`,
+ },
+ }
+ } catch (error) {
+ return {
+ success: false,
+ error: error instanceof Error ? error.message : String(error),
+ retryable: true,
+ }
+ }
+}
+
+function shellEscapeSingleQuoted(input: string): string {
+ return input.replace(/'/g, `'\\''`)
+}
diff --git a/lib/jobs/project-task/executors/index.ts b/lib/jobs/project-task/executors/index.ts
new file mode 100644
index 0000000..b549a26
--- /dev/null
+++ b/lib/jobs/project-task/executors/index.ts
@@ -0,0 +1,22 @@
+import type { ProjectTaskType } from '@prisma/client'
+
+import type { ProjectTaskWithRelations } from '@/lib/repo/project-task'
+
+import { type ProjectTaskExecutorResult, runCloneRepositoryTask } from './clone-repository'
+
+export async function runProjectTaskExecutor(
+ task: ProjectTaskWithRelations
+): Promise {
+ switch (task.type as ProjectTaskType) {
+ case 'CLONE_REPOSITORY':
+ return runCloneRepositoryTask(task)
+ default:
+ return {
+ success: false,
+ error: `No executor registered for task type ${task.type}`,
+ retryable: false,
+ }
+ }
+}
+
+export type { ProjectTaskExecutorResult }
diff --git a/lib/jobs/project-task/index.ts b/lib/jobs/project-task/index.ts
new file mode 100644
index 0000000..14a93da
--- /dev/null
+++ b/lib/jobs/project-task/index.ts
@@ -0,0 +1,2 @@
+export { startProjectTaskReconcileJob } from './projectTaskReconcile'
+export { triggerRunnableTasksForProject } from './projectTaskReconcile'
diff --git a/lib/jobs/project-task/projectTaskReconcile.ts b/lib/jobs/project-task/projectTaskReconcile.ts
new file mode 100644
index 0000000..acf50e2
--- /dev/null
+++ b/lib/jobs/project-task/projectTaskReconcile.ts
@@ -0,0 +1,159 @@
+import { Cron } from 'croner'
+
+import { logger as baseLogger } from '@/lib/logger'
+import {
+ acquireAndLockProjectTasks,
+ getProjectTaskById,
+ getRunnableTasksForProject,
+ incrementProjectTaskAttemptCount,
+ LOCK_DURATION_SECONDS,
+ type ProjectTaskWithRelations,
+ setProjectTaskState,
+ tryClaimProjectTaskExecutionLock,
+} from '@/lib/repo/project-task'
+
+import { runProjectTaskExecutor } from './executors'
+
+const logger = baseLogger.child({ module: 'lib/jobs/project-task/projectTaskReconcile' })
+
+const MAX_TASKS_PER_CYCLE = parseInt(process.env.MAX_PROJECT_TASKS_PER_RECONCILE || '10', 10)
+const RECONCILE_INTERVAL_SECONDS = parseInt(
+ process.env.PROJECT_TASK_RECONCILE_INTERVAL_SECONDS || '10',
+ 10
+)
+const EXECUTION_LOCK_DURATION_SECONDS = parseInt(
+ process.env.PROJECT_TASK_EXECUTION_LOCK_DURATION_SECONDS || '300',
+ 10
+)
+
+export function startProjectTaskReconcileJob() {
+ logger.info('Starting project task reconcile job')
+ logger.info(`Lock duration: ${LOCK_DURATION_SECONDS} seconds`)
+ logger.info(`Execution lock duration: ${EXECUTION_LOCK_DURATION_SECONDS} seconds`)
+ logger.info(`Max tasks per cycle: ${MAX_TASKS_PER_CYCLE}`)
+ logger.info(`Reconcile interval: ${RECONCILE_INTERVAL_SECONDS} seconds`)
+
+ const job = new Cron(`*/${RECONCILE_INTERVAL_SECONDS} * * * * *`, async () => {
+ try {
+ await reconcileProjectTasks()
+ } catch (error) {
+ logger.error(`Project task reconcile job error: ${error}`)
+ }
+ })
+
+ logger.info(`✅ Project task reconcile job started (every ${RECONCILE_INTERVAL_SECONDS} seconds)`)
+ return job
+}
+
+async function reconcileProjectTasks() {
+ const tasks = await acquireAndLockProjectTasks(MAX_TASKS_PER_CYCLE)
+
+ if (tasks.length === 0) {
+ return
+ }
+
+ logger.info(`Acquired ${tasks.length} project tasks for reconcile`)
+
+ for (const task of tasks) {
+ try {
+ await handleProjectTask(task)
+ } catch (error) {
+ logger.error(`Failed to process project task ${task.id}: ${error}`)
+ }
+ }
+}
+
+export async function triggerRunnableTasksForProject(projectId: string): Promise {
+ const tasks = await getRunnableTasksForProject(projectId)
+ for (const task of tasks) {
+ try {
+ const freshTask = await getProjectTaskById(task.id)
+ if (freshTask) {
+ await handleProjectTask(freshTask)
+ }
+ } catch (error) {
+ logger.error(`Failed to trigger project task ${task.id}: ${error}`)
+ }
+ }
+}
+
+async function handleProjectTask(task: ProjectTaskWithRelations): Promise {
+ const prerequisites = evaluateTaskPrerequisites(task)
+ if (!prerequisites.ready) {
+ await setProjectTaskState(task.id, {
+ status: 'WAITING_FOR_PREREQUISITES',
+ error: prerequisites.reason ?? null,
+ lockedUntil: null,
+ startedAt: null,
+ finishedAt: null,
+ })
+ return
+ }
+
+ const claimed = await tryClaimProjectTaskExecutionLock(task.id, EXECUTION_LOCK_DURATION_SECONDS)
+ if (!claimed) {
+ return
+ }
+
+ const executingTask = await getProjectTaskById(task.id)
+ if (!executingTask) {
+ return
+ }
+
+ const attemptCount = await incrementProjectTaskAttemptCount(task.id)
+ const result = await runProjectTaskExecutor(executingTask)
+
+ if (result.success) {
+ await setProjectTaskState(task.id, {
+ status: 'SUCCEEDED',
+ error: null,
+ result: result.result,
+ lockedUntil: null,
+ finishedAt: new Date(),
+ attemptCount,
+ })
+ return
+ }
+
+ if (result.retryable && attemptCount < executingTask.maxAttempts) {
+ await setProjectTaskState(task.id, {
+ status: 'PENDING',
+ error: result.error,
+ lockedUntil: null,
+ finishedAt: null,
+ attemptCount,
+ })
+ return
+ }
+
+ await setProjectTaskState(task.id, {
+ status: 'FAILED',
+ error: result.error,
+ lockedUntil: null,
+ finishedAt: new Date(),
+ attemptCount,
+ })
+}
+
+function evaluateTaskPrerequisites(
+ task: Pick
+): { ready: boolean; reason?: string } {
+ switch (task.type) {
+ case 'CLONE_REPOSITORY':
+ case 'INSTALL_SKILL':
+ case 'UNINSTALL_SKILL':
+ case 'DEPLOY_PROJECT':
+ if (!task.sandbox) {
+ return { ready: false, reason: 'Sandbox not found' }
+ }
+ if (task.sandbox.status !== 'RUNNING') {
+ return {
+ ready: false,
+ reason: `Waiting for sandbox to become RUNNING (current: ${task.sandbox.status})`,
+ }
+ }
+ return { ready: true }
+ default:
+ return { ready: false, reason: `Unknown task type ${task.type}` }
+ }
+}
diff --git a/lib/repo/project-import.ts b/lib/repo/project-import.ts
deleted file mode 100644
index 6753c56..0000000
--- a/lib/repo/project-import.ts
+++ /dev/null
@@ -1,150 +0,0 @@
-import type { Project } from '@prisma/client'
-
-import { prisma } from '@/lib/db'
-import { logger as baseLogger } from '@/lib/logger'
-
-const logger = baseLogger.child({ module: 'lib/repo/project-import' })
-
-const LOCK_DURATION_SECONDS = parseInt(process.env.PROJECT_IMPORT_LOCK_DURATION_SECONDS || '5', 10)
-
-type ImportProjectWithRelations = Project & {
- user: {
- id: string
- }
- githubAppInstallation: {
- installationId: number
- } | null
- sandboxes: Array<{
- id: string
- status: string
- }>
-}
-
-const importProjectWithRelationsInclude = {
- user: {
- select: {
- id: true,
- },
- },
- githubAppInstallation: {
- select: {
- installationId: true,
- },
- },
- sandboxes: {
- select: {
- id: true,
- status: true,
- },
- orderBy: {
- createdAt: 'asc',
- },
- },
-} as const
-
-export async function acquireAndLockImportProjects(
- limit: number = 10,
- baseLockSeconds: number = LOCK_DURATION_SECONDS,
- randomOffsetSeconds: number = 2
-): Promise {
- try {
- const now = new Date()
- const randomSeconds = Math.random() * randomOffsetSeconds
- const lockUntil = new Date(now.getTime() + (baseLockSeconds + randomSeconds) * 1000)
-
- const lockedProjects = await prisma.$transaction(async (tx) => {
- return await tx.$queryRaw`
- UPDATE "Project"
- SET
- "importLockedUntil" = ${lockUntil},
- "updatedAt" = NOW()
- WHERE "id" IN (
- SELECT "id"
- FROM "Project"
- WHERE "importStatus" IN ('PENDING', 'CLONING')
- AND ("importLockedUntil" IS NULL OR "importLockedUntil" <= ${now})
- ORDER BY "updatedAt" ASC
- LIMIT ${limit}
- FOR UPDATE SKIP LOCKED
- )
- RETURNING *
- `
- })
-
- if (lockedProjects.length === 0) {
- return []
- }
-
- const projectsWithRelations = await prisma.project.findMany({
- where: {
- id: {
- in: lockedProjects.map((project) => project.id),
- },
- },
- include: importProjectWithRelationsInclude,
- })
-
- return projectsWithRelations
- } catch (error) {
- logger.error(`Error acquiring and locking import projects: ${error}`)
- return []
- }
-}
-
-export async function getImportProjectById(projectId: string): Promise {
- return await prisma.project.findUnique({
- where: {
- id: projectId,
- },
- include: importProjectWithRelationsInclude,
- })
-}
-
-export async function tryClaimImportExecutionLock(
- projectId: string,
- lockSeconds: number
-): Promise {
- const now = new Date()
- const lockUntil = new Date(now.getTime() + lockSeconds * 1000)
-
- const result = await prisma.project.updateMany({
- where: {
- id: projectId,
- importStatus: {
- in: ['PENDING', 'CLONING'],
- },
- OR: [{ importLockedUntil: null }, { importLockedUntil: { lte: now } }],
- },
- data: {
- importStatus: 'CLONING',
- importError: null,
- importLockedUntil: lockUntil,
- updatedAt: new Date(),
- },
- })
-
- return result.count > 0
-}
-
-export async function setProjectImportState(
- projectId: string,
- data: {
- importStatus: 'PENDING' | 'CLONING' | 'READY' | 'FAILED'
- importError?: string | null
- importLockedUntil?: Date | null
- }
-): Promise {
- await prisma.project.update({
- where: { id: projectId },
- data: {
- importStatus: data.importStatus,
- importError: data.importError ?? null,
- importLockedUntil:
- data.importLockedUntil === undefined ? undefined : data.importLockedUntil,
- updatedAt: new Date(),
- },
- })
-}
-
-export { LOCK_DURATION_SECONDS }
-export type { ImportProjectWithRelations }
diff --git a/lib/repo/project-task.ts b/lib/repo/project-task.ts
new file mode 100644
index 0000000..4859abb
--- /dev/null
+++ b/lib/repo/project-task.ts
@@ -0,0 +1,206 @@
+import type {
+ Prisma,
+ ProjectTaskStatus,
+ ProjectTaskTriggerSource,
+ ProjectTaskType,
+} from '@prisma/client'
+
+import { prisma } from '@/lib/db'
+import { logger as baseLogger } from '@/lib/logger'
+
+const logger = baseLogger.child({ module: 'lib/repo/project-task' })
+
+const LOCK_DURATION_SECONDS = parseInt(process.env.PROJECT_TASK_LOCK_DURATION_SECONDS || '5', 10)
+
+const projectTaskWithRelationsInclude = {
+ project: {
+ include: {
+ user: true,
+ },
+ },
+ sandbox: {
+ select: {
+ id: true,
+ status: true,
+ },
+ },
+} satisfies Prisma.ProjectTaskInclude
+
+type ProjectTaskWithRelations = Prisma.ProjectTaskGetPayload<{
+ include: typeof projectTaskWithRelationsInclude
+}>
+
+type CreateProjectTaskInput = {
+ projectId: string
+ sandboxId?: string | null
+ type: ProjectTaskType
+ status?: ProjectTaskStatus
+ triggerSource: ProjectTaskTriggerSource
+ payload?: Prisma.InputJsonValue
+ maxAttempts?: number
+}
+
+type ProjectTaskStateUpdate = {
+ status: ProjectTaskStatus
+ error?: string | null
+ result?: Prisma.InputJsonValue | Prisma.NullableJsonNullValueInput
+ lockedUntil?: Date | null
+ startedAt?: Date | null
+ finishedAt?: Date | null
+ attemptCount?: number
+}
+
+export async function createProjectTask(
+ tx: Prisma.TransactionClient,
+ input: CreateProjectTaskInput
+) {
+ return tx.projectTask.create({
+ data: {
+ projectId: input.projectId,
+ sandboxId: input.sandboxId ?? null,
+ type: input.type,
+ status: input.status ?? 'PENDING',
+ triggerSource: input.triggerSource,
+ payload: input.payload,
+ maxAttempts: input.maxAttempts ?? 3,
+ },
+ })
+}
+
+export async function acquireAndLockProjectTasks(
+ limit: number = 10,
+ baseLockSeconds: number = LOCK_DURATION_SECONDS,
+ randomOffsetSeconds: number = 2
+): Promise {
+ try {
+ const now = new Date()
+ const randomSeconds = Math.random() * randomOffsetSeconds
+ const lockUntil = new Date(now.getTime() + (baseLockSeconds + randomSeconds) * 1000)
+
+ const lockedTasks = await prisma.$transaction(async (tx) => {
+ return await tx.$queryRaw>`
+ UPDATE "ProjectTask"
+ SET
+ "lockedUntil" = ${lockUntil},
+ "updatedAt" = NOW()
+ WHERE "id" IN (
+ SELECT "id"
+ FROM "ProjectTask"
+ WHERE "status" IN ('PENDING', 'WAITING_FOR_PREREQUISITES', 'RUNNING')
+ AND ("lockedUntil" IS NULL OR "lockedUntil" <= ${now})
+ ORDER BY "updatedAt" ASC
+ LIMIT ${limit}
+ FOR UPDATE SKIP LOCKED
+ )
+ RETURNING "id"
+ `
+ })
+
+ if (lockedTasks.length === 0) {
+ return []
+ }
+
+ return prisma.projectTask.findMany({
+ where: {
+ id: {
+ in: lockedTasks.map((task) => task.id),
+ },
+ },
+ include: projectTaskWithRelationsInclude,
+ orderBy: { createdAt: 'asc' },
+ })
+ } catch (error) {
+ logger.error(`Error acquiring and locking project tasks: ${error}`)
+ return []
+ }
+}
+
+export async function getProjectTaskById(taskId: string): Promise {
+ return prisma.projectTask.findUnique({
+ where: { id: taskId },
+ include: projectTaskWithRelationsInclude,
+ })
+}
+
+export async function getRunnableTasksForProject(
+ projectId: string,
+ taskType?: ProjectTaskType
+): Promise {
+ return prisma.projectTask.findMany({
+ where: {
+ projectId,
+ type: taskType,
+ status: {
+ in: ['PENDING', 'WAITING_FOR_PREREQUISITES', 'RUNNING'],
+ },
+ },
+ include: projectTaskWithRelationsInclude,
+ orderBy: { createdAt: 'asc' },
+ })
+}
+
+export async function tryClaimProjectTaskExecutionLock(
+ taskId: string,
+ lockSeconds: number
+): Promise {
+ const now = new Date()
+ const lockUntil = new Date(now.getTime() + lockSeconds * 1000)
+
+ const result = await prisma.projectTask.updateMany({
+ where: {
+ id: taskId,
+ status: {
+ in: ['PENDING', 'WAITING_FOR_PREREQUISITES', 'RUNNING'],
+ },
+ OR: [{ lockedUntil: null }, { lockedUntil: { lte: now } }],
+ },
+ data: {
+ status: 'RUNNING',
+ error: null,
+ lockedUntil: lockUntil,
+ startedAt: now,
+ updatedAt: now,
+ },
+ })
+
+ return result.count > 0
+}
+
+export async function setProjectTaskState(
+ taskId: string,
+ data: ProjectTaskStateUpdate
+): Promise {
+ await prisma.projectTask.update({
+ where: { id: taskId },
+ data: {
+ status: data.status,
+ error: data.error ?? null,
+ result: data.result,
+ lockedUntil: data.lockedUntil === undefined ? undefined : data.lockedUntil,
+ startedAt: data.startedAt === undefined ? undefined : data.startedAt,
+ finishedAt: data.finishedAt === undefined ? undefined : data.finishedAt,
+ attemptCount: data.attemptCount,
+ updatedAt: new Date(),
+ },
+ })
+}
+
+export async function incrementProjectTaskAttemptCount(taskId: string): Promise {
+ const task = await prisma.projectTask.update({
+ where: { id: taskId },
+ data: {
+ attemptCount: {
+ increment: 1,
+ },
+ updatedAt: new Date(),
+ },
+ select: {
+ attemptCount: true,
+ },
+ })
+
+ return task.attemptCount
+}
+
+export { LOCK_DURATION_SECONDS, projectTaskWithRelationsInclude }
+export type { CreateProjectTaskInput, ProjectTaskWithRelations }
diff --git a/lib/services/project-task-dispatcher.ts b/lib/services/project-task-dispatcher.ts
new file mode 100644
index 0000000..0fa19f2
--- /dev/null
+++ b/lib/services/project-task-dispatcher.ts
@@ -0,0 +1,59 @@
+import type { Prisma, ProjectTaskStatus, ProjectTaskType } from '@prisma/client'
+import { ProjectTaskTriggerSource } from '@prisma/client'
+
+import { prisma } from '@/lib/db'
+import { logger as baseLogger } from '@/lib/logger'
+import { createProjectTask } from '@/lib/repo/project-task'
+
+const logger = baseLogger.child({ module: 'lib/services/project-task-dispatcher' })
+
+type DispatchProjectTaskInput = {
+ projectId: string
+ sandboxId?: string | null
+ type: ProjectTaskType
+ triggerSource: ProjectTaskTriggerSource
+ payload?: Prisma.InputJsonValue
+ status?: ProjectTaskStatus
+ maxAttempts?: number
+}
+
+export async function dispatchProjectTask(input: DispatchProjectTaskInput) {
+ const task = await prisma.$transaction(async (tx) =>
+ createProjectTask(tx, {
+ projectId: input.projectId,
+ sandboxId: input.sandboxId,
+ type: input.type,
+ status: input.status,
+ triggerSource: input.triggerSource,
+ payload: input.payload,
+ maxAttempts: input.maxAttempts,
+ })
+ )
+
+ logger.info(`Dispatched project task ${task.id} (${task.type}) for project ${task.projectId}`)
+ return task
+}
+
+export async function dispatchCloneRepositoryTask(input: {
+ projectId: string
+ sandboxId: string
+ installationId: number
+ repoId: number
+ repoFullName: string
+ defaultBranch: string
+}) {
+ return dispatchProjectTask({
+ projectId: input.projectId,
+ sandboxId: input.sandboxId,
+ type: 'CLONE_REPOSITORY',
+ status: 'WAITING_FOR_PREREQUISITES',
+ triggerSource: ProjectTaskTriggerSource.USER_ACTION,
+ payload: {
+ installationId: input.installationId,
+ repoId: input.repoId,
+ repoFullName: input.repoFullName,
+ defaultBranch: input.defaultBranch,
+ },
+ maxAttempts: 3,
+ })
+}
diff --git a/lib/startup/index.ts b/lib/startup/index.ts
index 3523368..4c9ee7d 100644
--- a/lib/startup/index.ts
+++ b/lib/startup/index.ts
@@ -77,17 +77,17 @@ async function startBackgroundJobs() {
const sandboxJob = startSandboxReconcileJob()
logger.info('✅ Sandbox reconcile job started')
- // Start project import reconciliation job
- // Runs every 3 seconds to process GitHub imports when sandboxes become RUNNING
- const { startProjectImportReconcileJob } = await import('@/lib/jobs/project-import')
- const projectImportJob = startProjectImportReconcileJob()
- logger.info('✅ Project import reconcile job started')
+ // Start project task reconciliation job
+ // Runs every 10 seconds to process project-level async tasks when prerequisites are met
+ const { startProjectTaskReconcileJob } = await import('@/lib/jobs/project-task')
+ const projectTaskJob = startProjectTaskReconcileJob()
+ logger.info('✅ Project task reconcile job started')
// Store job references for potential cleanup (optional)
// In most cases, these jobs will run for the lifetime of the server
globalThis.__databaseReconcileJob = databaseJob
globalThis.__sandboxReconcileJob = sandboxJob
- globalThis.__projectImportReconcileJob = projectImportJob
+ globalThis.__projectTaskReconcileJob = projectTaskJob
logger.info('All background jobs started successfully')
} catch (error) {
@@ -123,11 +123,11 @@ export function cleanup() {
logger.info('✅ Sandbox reconcile job stopped')
}
- // Stop project import reconcile job if it exists
- if (globalThis.__projectImportReconcileJob) {
- globalThis.__projectImportReconcileJob.stop()
- globalThis.__projectImportReconcileJob = undefined
- logger.info('✅ Project import reconcile job stopped')
+ // Stop project task reconcile job if it exists
+ if (globalThis.__projectTaskReconcileJob) {
+ globalThis.__projectTaskReconcileJob.stop()
+ globalThis.__projectTaskReconcileJob = undefined
+ logger.info('✅ Project task reconcile job stopped')
}
logger.info('✅ Cleanup completed')
@@ -142,5 +142,5 @@ declare global {
var __sandboxReconcileJob: Cron | undefined
- var __projectImportReconcileJob: Cron | undefined
+ var __projectTaskReconcileJob: Cron | undefined
}
diff --git a/lib/util/project-display-status.ts b/lib/util/project-display-status.ts
index a02e5e5..ac350e0 100644
--- a/lib/util/project-display-status.ts
+++ b/lib/util/project-display-status.ts
@@ -1,4 +1,4 @@
-import type { ProjectImportStatus, ProjectStatus } from '@prisma/client'
+import type { ProjectStatus, ProjectTask, ProjectTaskStatus, ProjectTaskType } from '@prisma/client'
export type ProjectDisplayStatus =
| 'CREATING'
@@ -14,9 +14,9 @@ export type ProjectDisplayStatus =
type ProjectDisplayStatusInput = {
status: ProjectStatus
- importStatus: ProjectImportStatus
githubRepoFullName?: string | null
githubAppInstallationId?: string | null
+ tasks?: Pick[]
}
export function isImportProject(project: {
@@ -30,8 +30,9 @@ export function getProjectDisplayStatus(
project: ProjectDisplayStatusInput
): ProjectDisplayStatus {
const importProject = isImportProject(project)
+ const cloneTask = getLatestTask(project.tasks, 'CLONE_REPOSITORY')
- if (project.importStatus === 'FAILED') {
+ if (cloneTask?.status === 'FAILED') {
return 'NEEDS_ATTENTION'
}
@@ -55,7 +56,7 @@ export function getProjectDisplayStatus(
return 'UPDATING'
}
- if (importProject && (project.importStatus === 'PENDING' || project.importStatus === 'CLONING')) {
+ if (importProject && cloneTask && isActiveTaskStatus(cloneTask.status)) {
return 'IMPORTING'
}
@@ -73,3 +74,16 @@ export function getProjectDisplayStatus(
return 'NEEDS_ATTENTION'
}
+
+function getLatestTask(
+ tasks: ProjectDisplayStatusInput['tasks'],
+ type: ProjectTaskType
+) {
+ return tasks
+ ?.filter((task) => task.type === type)
+ .sort((a, b) => b.createdAt.getTime() - a.createdAt.getTime())[0]
+}
+
+function isActiveTaskStatus(status: ProjectTaskStatus): boolean {
+ return ['PENDING', 'WAITING_FOR_PREREQUISITES', 'RUNNING'].includes(status)
+}
diff --git a/prisma/migrations/20260311171810_project_task_platform/migration.sql b/prisma/migrations/20260311171810_project_task_platform/migration.sql
new file mode 100644
index 0000000..239c657
--- /dev/null
+++ b/prisma/migrations/20260311171810_project_task_platform/migration.sql
@@ -0,0 +1,53 @@
+-- CreateEnum
+CREATE TYPE "ProjectTaskType" AS ENUM ('CLONE_REPOSITORY', 'INSTALL_SKILL', 'UNINSTALL_SKILL', 'DEPLOY_PROJECT');
+
+-- CreateEnum
+CREATE TYPE "ProjectTaskStatus" AS ENUM ('PENDING', 'WAITING_FOR_PREREQUISITES', 'RUNNING', 'SUCCEEDED', 'FAILED', 'CANCELLED');
+
+-- CreateEnum
+CREATE TYPE "ProjectTaskTriggerSource" AS ENUM ('USER_ACTION', 'SYSTEM_EVENT', 'POLICY_ROLLOUT');
+
+-- AlterTable
+ALTER TABLE "Project" DROP COLUMN "importError",
+DROP COLUMN "importLockedUntil",
+DROP COLUMN "importStatus";
+
+-- DropEnum
+DROP TYPE "ProjectImportStatus";
+
+-- CreateTable
+CREATE TABLE "ProjectTask" (
+ "id" TEXT NOT NULL,
+ "projectId" TEXT NOT NULL,
+ "sandboxId" TEXT,
+ "type" "ProjectTaskType" NOT NULL,
+ "status" "ProjectTaskStatus" NOT NULL DEFAULT 'PENDING',
+ "triggerSource" "ProjectTaskTriggerSource" NOT NULL,
+ "payload" JSONB,
+ "result" JSONB,
+ "error" TEXT,
+ "attemptCount" INTEGER NOT NULL DEFAULT 0,
+ "maxAttempts" INTEGER NOT NULL DEFAULT 3,
+ "lockedUntil" TIMESTAMP(3),
+ "startedAt" TIMESTAMP(3),
+ "finishedAt" TIMESTAMP(3),
+ "createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
+ "updatedAt" TIMESTAMP(3) NOT NULL,
+
+ CONSTRAINT "ProjectTask_pkey" PRIMARY KEY ("id")
+);
+
+-- CreateIndex
+CREATE INDEX "ProjectTask_projectId_status_idx" ON "ProjectTask"("projectId", "status");
+
+-- CreateIndex
+CREATE INDEX "ProjectTask_status_lockedUntil_idx" ON "ProjectTask"("status", "lockedUntil");
+
+-- CreateIndex
+CREATE INDEX "ProjectTask_type_status_idx" ON "ProjectTask"("type", "status");
+
+-- AddForeignKey
+ALTER TABLE "ProjectTask" ADD CONSTRAINT "ProjectTask_projectId_fkey" FOREIGN KEY ("projectId") REFERENCES "Project"("id") ON DELETE CASCADE ON UPDATE CASCADE;
+
+-- AddForeignKey
+ALTER TABLE "ProjectTask" ADD CONSTRAINT "ProjectTask_sandboxId_fkey" FOREIGN KEY ("sandboxId") REFERENCES "Sandbox"("id") ON DELETE SET NULL ON UPDATE CASCADE;
diff --git a/prisma/migrations/migration_lock.toml b/prisma/migrations/migration_lock.toml
new file mode 100644
index 0000000..2fe25d8
--- /dev/null
+++ b/prisma/migrations/migration_lock.toml
@@ -0,0 +1 @@
+provider = "postgresql"
diff --git a/prisma/schema.prisma b/prisma/schema.prisma
index 445269f..cf5f8c0 100644
--- a/prisma/schema.prisma
+++ b/prisma/schema.prisma
@@ -15,9 +15,9 @@ model User {
updatedAt DateTime @updatedAt
// Relations
- identities UserIdentity[] // One-to-many: User has multiple authentication identities
- projects Project[] // One-to-many: User owns multiple projects
- configs UserConfig[] // One-to-many: User global configurations
+ identities UserIdentity[] // One-to-many: User has multiple authentication identities
+ projects Project[] // One-to-many: User owns multiple projects
+ configs UserConfig[] // One-to-many: User global configurations
githubInstallations GitHubAppInstallation[] // One-to-many: User's GitHub App installations
}
@@ -68,19 +68,19 @@ model UserConfig {
// GitHubAppInstallation: Tracks GitHub App installations
// Required for obtaining installation access tokens
model GitHubAppInstallation {
- id String @id @default(cuid())
- installationId Int @unique // GitHub's numeric installation ID
+ id String @id @default(cuid())
+ installationId Int @unique // GitHub's numeric installation ID
userId String
- accountId Int // GitHub account numeric ID
- accountLogin String // GitHub username or organization name
- accountType String // "User" or "Organization"
+ accountId Int // GitHub account numeric ID
+ accountLogin String // GitHub username or organization name
+ accountType String // "User" or "Organization"
accountAvatarUrl String?
repositorySelection String // "all" or "selected"
- permissions Json @default("{}") // App permissions
- events Json @default("[]") // Subscribed events
+ permissions Json @default("{}") // App permissions
+ events Json @default("[]") // Subscribed events
status GitHubInstallationStatus @default(ACTIVE)
suspendedAt DateTime?
@@ -113,11 +113,6 @@ model Project {
githubRepoFullName String? // "owner/repo-name", for display
githubRepoDefaultBranch String? // Repository default branch for import workflow
- // GitHub import lifecycle
- importStatus ProjectImportStatus @default(READY)
- importError String?
- importLockedUntil DateTime? // Lock for import reconcile job
-
// Aggregated status based on child resources
status ProjectStatus @default(CREATING)
@@ -125,10 +120,11 @@ model Project {
updatedAt DateTime @updatedAt
// Relations: Project is a collection of resources
- user User @relation(fields: [userId], references: [id], onDelete: Cascade) // Many-to-one: Project belongs to a User
- environments Environment[] // One-to-many: Configuration variables
- databases Database[] // One-to-many: Multiple database clusters
- sandboxes Sandbox[] // One-to-many: Multiple runtime containers
+ user User @relation(fields: [userId], references: [id], onDelete: Cascade) // Many-to-one: Project belongs to a User
+ environments Environment[] // One-to-many: Configuration variables
+ databases Database[] // One-to-many: Multiple database clusters
+ sandboxes Sandbox[] // One-to-many: Multiple runtime containers
+ tasks ProjectTask[] // One-to-many: Project-level asynchronous tasks
githubAppInstallation GitHubAppInstallation? @relation(fields: [githubAppInstallationId], references: [id], onDelete: SetNull)
@@index([githubAppInstallationId])
@@ -231,7 +227,8 @@ model Sandbox {
updatedAt DateTime @updatedAt
// Relation: Many-to-one to Project (foreign key: projectId)
- project Project @relation(fields: [projectId], references: [id], onDelete: Cascade)
+ project Project @relation(fields: [projectId], references: [id], onDelete: Cascade)
+ tasks ProjectTask[]
@@unique([projectId, name]) // Unique sandbox name per project
@@unique([k8sNamespace, sandboxName]) // Unique sandbox name per namespace
@@ -239,6 +236,32 @@ model Sandbox {
@@index([sandboxName])
}
+model ProjectTask {
+ id String @id @default(cuid())
+ projectId String
+ sandboxId String?
+ type ProjectTaskType
+ status ProjectTaskStatus @default(PENDING)
+ triggerSource ProjectTaskTriggerSource
+ payload Json?
+ result Json?
+ error String?
+ attemptCount Int @default(0)
+ maxAttempts Int @default(3)
+ lockedUntil DateTime?
+ startedAt DateTime?
+ finishedAt DateTime?
+ createdAt DateTime @default(now())
+ updatedAt DateTime @updatedAt
+
+ project Project @relation(fields: [projectId], references: [id], onDelete: Cascade)
+ sandbox Sandbox? @relation(fields: [sandboxId], references: [id], onDelete: SetNull)
+
+ @@index([projectId, status])
+ @@index([status, lockedUntil])
+ @@index([type, status])
+}
+
// Project aggregated status based on child resources
// Aggregation rules (priority order):
// 1. ERROR: At least one resource has ERROR status
@@ -269,12 +292,26 @@ enum ProjectStatus {
PARTIAL // Inconsistent mixed states - manual intervention needed
}
-// GitHub repository import lifecycle for a project
-enum ProjectImportStatus {
- PENDING // Waiting for sandbox to become RUNNING
- CLONING // Repository clone in progress
- READY // Import finished or not an imported project
- FAILED // Import failed; project remains usable as empty project
+enum ProjectTaskType {
+ CLONE_REPOSITORY
+ INSTALL_SKILL
+ UNINSTALL_SKILL
+ DEPLOY_PROJECT
+}
+
+enum ProjectTaskStatus {
+ PENDING
+ WAITING_FOR_PREREQUISITES
+ RUNNING
+ SUCCEEDED
+ FAILED
+ CANCELLED
+}
+
+enum ProjectTaskTriggerSource {
+ USER_ACTION
+ SYSTEM_EVENT
+ POLICY_ROLLOUT
}
// Individual resource status (Database and Sandbox)
From 22ca386e5970be644b160b3c5187235c48693f48 Mon Sep 17 00:00:00 2001
From: Che <30403707+Che-Zhu@users.noreply.github.com>
Date: Thu, 12 Mar 2026 10:50:20 +0800
Subject: [PATCH 2/3] docs: update architecture direction
---
docs/architecture-evolution.md | 377 ++++++++++++++++++
docs/architecture.md | 672 +++++++++++++++++++--------------
2 files changed, 763 insertions(+), 286 deletions(-)
create mode 100644 docs/architecture-evolution.md
diff --git a/docs/architecture-evolution.md b/docs/architecture-evolution.md
new file mode 100644
index 0000000..835b844
--- /dev/null
+++ b/docs/architecture-evolution.md
@@ -0,0 +1,377 @@
+# Architecture Evolution
+
+This document describes the intended next stage of Fulling's codebase evolution.
+
+It is not a description of the current repository layout. It is the target direction for making the code structure match the system architecture more directly.
+
+## Why Evolve
+
+Fulling is no longer just a conventional web application.
+
+It is a platform with:
+
+- a database-backed control plane
+- resource reconciliation for Kubernetes infrastructure
+- project-level asynchronous task execution inside sandboxes
+- multiple external integrations such as GitHub, ttyd, Kubernetes, and future deploy providers
+
+As the system grows, a framework-shaped code layout stops matching the real mental model of the platform.
+
+The goal of the next phase is to make the codebase express the system in the same terms we use to reason about it:
+
+```text
+Intent -> State -> Reconcile -> Effect
+```
+
+## Target Mental Model
+
+The ideal architecture should be understandable as five layers:
+
+1. Interaction Layer
+2. Control State Layer
+3. Orchestration Layer
+4. Execution Layer
+5. Integration Layer
+
+A reader should be able to answer these questions quickly:
+
+- Where does user intent enter the system?
+- Where is that intent persisted as state?
+- Which background mechanism advances that state?
+- Which code actually performs the external effect?
+- Which files encapsulate external system protocols?
+
+## Target Architecture
+
+### 1. Interaction Layer
+
+Purpose:
+
+- receive requests from users or clients
+- parse input
+- call control-plane commands or queries
+- return responses
+
+Examples:
+
+- Next.js route handlers
+- Server Actions
+- UI entry components
+
+This layer should not contain infrastructure lifecycle logic or long-running orchestration logic.
+
+### 2. Control State Layer
+
+Purpose:
+
+- convert user intent into persisted state transitions
+- define command and query use cases
+- decide which records must be created or updated
+
+Examples:
+
+- create project
+- import project from GitHub
+- create database
+- dispatch install-skill tasks
+- dispatch deploy task
+
+This layer is where the system decides what should happen, not where it actually performs it.
+
+### 3. Orchestration Layer
+
+Purpose:
+
+- scan persisted state
+- evaluate state machine transitions
+- determine whether prerequisites are met
+- claim work and advance it safely
+
+Examples:
+
+- sandbox reconcile
+- database reconcile
+- project task reconcile
+
+This layer should own "when work is ready to run" and "what transition comes next."
+
+### 4. Execution Layer
+
+Purpose:
+
+- perform the actual work after orchestration decides it should happen
+
+Examples:
+
+- create sandbox in Kubernetes
+- stop database cluster
+- clone repository inside sandbox
+- install skill in sandbox
+- deploy project
+
+This layer should be effectful, explicit, and easy to test in isolation.
+
+### 5. Integration Layer
+
+Purpose:
+
+- isolate external system protocol details from internal orchestration and domain logic
+
+Examples:
+
+- GitHub App token exchange and installation APIs
+- ttyd command execution
+- Kubernetes service and managers
+- deploy provider clients
+- AI proxy integration
+
+This layer should be the only place that knows provider-specific protocol details.
+
+## Ideal Repository Shape
+
+The repository should gradually move toward something like this:
+
+```text
+app/
+ Framework entrypoints only
+
+lib/
+ domain/
+ control/
+ commands/
+ queries/
+ persistence/
+ orchestrators/
+ resources/
+ tasks/
+ executors/
+ k8s/
+ sandbox/
+ deploy/
+ integrations/
+ github/
+ ttyd/
+ k8s/
+ aiproxy/
+ policies/
+ shared/
+```
+
+This is not a requirement to rename everything immediately. It is the target shape that best matches the architecture.
+
+## What Each Target Area Means
+
+### `app/`
+
+Should contain:
+
+- routes
+- pages
+- route-local UI composition
+- request parsing
+- response shaping
+
+Should not be the place where major platform logic lives.
+
+### `lib/domain/`
+
+Should contain:
+
+- state semantics
+- status aggregation rules
+- lifecycle definitions
+- prerequisite evaluation logic
+- domain types and invariants
+
+Typical examples:
+
+- project status aggregation
+- task prerequisite rules
+- lifecycle transition rules
+
+### `lib/control/`
+
+Should contain:
+
+- application commands
+- application queries
+- user-intent entrypoints that are independent from framework details
+
+Typical examples:
+
+- `create-project`
+- `import-project-from-github`
+- `dispatch-install-skill-for-user-projects`
+- `get-project-list`
+
+This is the layer that turns interaction into durable state changes.
+
+### `lib/persistence/`
+
+Should contain:
+
+- database access
+- row claiming
+- lock management
+- state transition persistence
+
+This corresponds closely to what `lib/repo/` does today, but with a name that better reflects its role in a control-plane system.
+
+### `lib/orchestrators/`
+
+Should contain:
+
+- reconcile loops
+- transition selection
+- scheduling logic
+- wake-up logic for resource or task processing
+
+This layer is currently spread across `jobs/` and `events/`.
+
+Long term, the code should make it easy to inspect one workflow in one place, instead of hopping across multiple implementation-mechanism directories.
+
+### `lib/executors/`
+
+Should contain:
+
+- effectful operations triggered by orchestrators
+
+Examples:
+
+- Kubernetes resource actions
+- sandbox command execution
+- deployment actions
+
+The key separation is:
+
+- orchestrators decide whether to run
+- executors perform the work
+
+### `lib/integrations/`
+
+Should contain:
+
+- provider-specific APIs
+- transport/protocol code
+- external authentication/token exchange
+
+This is especially important because `services/` and `util/` currently mix together:
+
+- true business services
+- external adapters
+- generic helpers
+
+That mixing increases cognitive load.
+
+## Structural Rules For The Next Phase
+
+### Rule 1: Keep resource plane and task plane distinct
+
+Resource lifecycle and project task execution are related but not the same.
+
+- `Sandbox` and `Database` belong to the resource plane
+- `ProjectTask` belongs to the task plane
+
+They should not collapse into a single generic abstraction too early.
+
+### Rule 2: Prefer architecture-shaped names over framework-shaped names
+
+Names should describe system role, not just invocation style.
+
+Examples:
+
+- better: `control`, `orchestrators`, `integrations`, `executors`
+- weaker: `services`, `util`, `helpers`
+
+### Rule 3: Polling remains the correctness mechanism
+
+Wake-up triggers are useful, but background reconcile loops remain the source of truth.
+
+The architecture should continue to optimize for recoverability, not only low latency.
+
+### Rule 4: State machines should remain explicit
+
+If work needs to resume later, it should be represented in persisted state.
+
+Avoid moving important lifecycle meaning into hidden in-memory logic.
+
+### Rule 5: Reorganization should follow system boundaries, not file count
+
+The purpose of the reorganization is not aesthetic.
+
+It is to make the codebase reflect the platform's actual conceptual boundaries:
+
+- intent handling
+- persisted state
+- orchestration
+- execution
+- external integration
+
+## Suggested Migration Strategy
+
+The next phase should be gradual.
+
+### Phase 1: Clarify naming and ownership
+
+Goals:
+
+- continue building `ProjectTask` as the task-plane abstraction
+- reduce ambiguous use of `services/` and `util/`
+- document system ownership by layer
+
+Low-risk changes:
+
+- move external adapters toward `integrations/`
+- move business rules toward `domain/` or `policies/`
+- introduce command/query naming for control-plane operations
+
+### Phase 2: Reshape orchestration boundaries
+
+Goals:
+
+- make workflows easier to inspect end-to-end
+- reduce mental jumps between job and listener implementations
+
+Likely work:
+
+- make resource orchestration more visibly grouped
+- make task orchestration more visibly grouped
+- separate transition logic from executor logic more explicitly
+
+### Phase 3: Unify platform patterns where appropriate
+
+Goals:
+
+- reduce duplicated scheduling and transition mechanics
+- strengthen task dependency and execution models
+
+Examples:
+
+- generalized task prerequisite evaluation
+- shared orchestration helpers for claiming and transition persistence
+- richer task types such as deploy, install skill, uninstall skill
+
+This phase should happen only after the architecture is clearly expressed in the codebase.
+
+## What "Success" Looks Like
+
+The reorganization is successful when the following are true:
+
+- a new engineer can trace a major flow without guessing layer ownership
+- resource lifecycles are easy to inspect in one conceptual area
+- task execution flows are easy to inspect in one conceptual area
+- external provider code is clearly separated from domain and orchestration code
+- adding a new task type does not require inventing a new architectural path
+
+## Current Direction
+
+The current `ProjectTask` introduction is the first concrete move in this direction.
+
+It shifts GitHub import from a project-specific status track into a reusable task-plane abstraction.
+
+That same direction should guide the next structural changes:
+
+- task types should grow through the task plane
+- external adapters should become more explicit
+- orchestration should become easier to inspect
+- the repository layout should increasingly mirror the architecture itself
diff --git a/docs/architecture.md b/docs/architecture.md
index c76a0fa..09458df 100644
--- a/docs/architecture.md
+++ b/docs/architecture.md
@@ -1,353 +1,453 @@
-# Architecture Design Document
+# Architecture
-This document describes the architecture of FullstackAgent, an AI-powered cloud development platform.
+This document describes the current runtime architecture of Fulling and the roles of the major subsystems in the repository.
-## Table of Contents
+## Overview
-- [Overview](#overview)
-- [Reconciliation Pattern](#reconciliation-pattern)
-- [System Layers](#system-layers)
-- [Event System](#event-system)
-- [State Management](#state-management)
-- [Resource Lifecycle](#resource-lifecycle)
-- [Key Design Decisions](#key-design-decisions)
+Fulling is a database-driven control plane for project sandboxes.
-## Overview
+The system is organized around three ideas:
-FullstackAgent creates isolated Kubernetes sandbox environments for full-stack development. Each project gets:
+1. User-facing code records intent in PostgreSQL.
+2. Background reconcile jobs read persisted state and advance it asynchronously.
+3. External effects happen through two execution layers:
+ - Kubernetes resource control for sandboxes and databases
+ - Project task execution inside ready sandboxes
-- **Sandbox Container**: Next.js + Claude Code CLI + ttyd terminal + FileBrowser
-- **PostgreSQL Database**: Dedicated KubeBlocks cluster
-- **Live Domains**: HTTPS subdomains for app, terminal, and file browser
+This is not a request/response system that blocks on infrastructure. It is an asynchronous state-convergence system.
-### Core Principle: Asynchronous Reconciliation
+## High-Level Model
-The platform uses an **asynchronous reconciliation pattern** where:
+There are three major domains:
-1. API endpoints return immediately (non-blocking)
-2. Background jobs sync desired state (database) with actual state (Kubernetes)
-3. Event listeners execute K8s operations
-4. Status updates happen asynchronously
+- Control plane
+ - Next.js pages, Server Actions, and API routes
+ - Authentication and authorization
+ - Prisma models as the source of truth
+- Resource plane
+ - `Sandbox` and `Database` lifecycle management
+ - Kubernetes operations through user-scoped services
+- Task plane
+ - `ProjectTask` records for project-level asynchronous work
+ - Current use case: clone a GitHub repository into a sandbox
+ - Future use cases: install skill, uninstall skill, deploy project
-```
-User Request → API updates DB (status=CREATING) → Returns immediately (< 50ms)
- ↓
- Reconciliation Job (every 3s)
- ↓
- Emit Events → Listeners execute K8s ops
- ↓
- Update status: CREATING → STARTING → RUNNING
- ↓
- Frontend polls for updates
+At a high level:
+
+```text
+User action
+-> App code validates and writes desired state to DB
+-> Reconcile jobs scan DB for records in transitional states
+-> Event listeners / task executors perform external work
+-> Resource/task state is updated in DB
+-> UI polls and reflects the latest state
```
-## Reconciliation Pattern
+## Repository Structure
-### Why Reconciliation?
+```text
+app/
+ UI routes, API routes, and route-local components
-Traditional approach (blocking API):
-- API waits for K8s operations (30+ seconds)
-- User sees loading spinner
-- Timeout errors common
-- Hard to recover from failures
+components/
+ Shared UI components
-Reconciliation approach (non-blocking):
-- API returns immediately (< 50ms)
-- Background jobs handle K8s operations
-- Automatic retry on failures
-- Easy to monitor and debug
+lib/actions/
+ Server Actions called from client components
-### Flow Example
+lib/data/
+ Server-side data access for React Server Components
-```
-1. User clicks "Create Project"
- POST /api/projects { name: "my-blog" }
-
-2. API creates database records immediately
- Project: status=CREATING
- Sandbox: status=CREATING
- Database: status=CREATING
-
-3. Reconciliation job runs (every 3s)
- - Query: SELECT * FROM Sandbox WHERE status='CREATING' AND lockedUntil IS NULL
- - Locks sandbox (optimistic locking)
- - Emits CreateSandbox event
-
-4. Event listener executes
- - Gets user-specific K8s service
- - Creates StatefulSet, Service, Ingresses
- - Updates status: CREATING → STARTING
-
-5. Next cycle checks K8s status
- - If RUNNING: Update status to RUNNING
- - Aggregate project status from child resources
-```
+lib/repo/
+ Persistence helpers, locking, and state transitions
+
+lib/jobs/
+ Background reconcile loops
+
+lib/events/
+ Resource event buses and listeners
-### Optimistic Locking
+lib/k8s/
+ User-scoped Kubernetes service and managers
-Prevents concurrent updates to the same resource:
+lib/services/
+ Cross-cutting services and task dispatch helpers
-```typescript
-// Repository layer automatically handles locking
-const lockedSandboxes = await acquireAndLockSandboxes(10)
-// Only returns sandboxes where lockedUntil IS NULL OR < NOW()
-// Sets lockedUntil = NOW() + 30 seconds atomically
+lib/util/
+ Aggregation, ttyd execution, formatting, and helpers
+
+prisma/
+ Prisma schema and migrations
```
-Lock duration: 30 seconds (configurable)
+## Core Runtime Layers
+
+### 1. User Interaction Layer
-## System Layers
+Primary locations:
-### Layer 1: Control Plane (Main App)
+- `app/`
+- `lib/actions/`
+- `lib/data/`
-- **Framework**: Next.js 16 (App Router) + React 19
-- **Authentication**: NextAuth v5 (GitHub, Password, Sealos OAuth)
-- **Database**: Prisma ORM → PostgreSQL
-- **Responsibilities**:
- - Manages projects, users, environment variables
- - Does NOT directly execute K8s operations
- - Only updates database
+Responsibilities:
-### Layer 2: Reconciliation System
+- Authenticate the user
+- Validate inputs
+- Decide whether an operation is allowed
+- Write the resulting state to the database
+- Return immediately without waiting on Kubernetes or long-running sandbox work
-- **Background Jobs**: `lib/jobs/sandbox/`, `lib/jobs/database/`
-- **Event System**: `lib/events/sandbox/`, `lib/events/database/`
-- **Repository Layer**: `lib/repo/` with optimistic locking
-- **Status Aggregation**: `lib/util/projectStatus.ts`
+Examples:
-### Layer 3: Kubernetes Managers
+- Create new project
+ - create `Project`
+ - create `Sandbox`
+ - create built-in environment variables
+- Import from GitHub
+ - create `Project`
+ - create `Sandbox`
+ - create a `ProjectTask` of type `CLONE_REPOSITORY`
+- Add database
+ - create `Database` with status `CREATING`
+- Update environment variables
+ - persist environment changes
+ - mark running sandboxes as `UPDATING`
-- **SandboxManager**: `lib/k8s/sandbox-manager.ts` - StatefulSet operations
-- **DatabaseManager**: `lib/k8s/database-manager.ts` - KubeBlocks operations
-- **K8sServiceHelper**: `lib/k8s/k8s-service-helper.ts` - User-specific service factory
-- **All operations are idempotent and non-blocking**
+### 2. State Persistence Layer
-### Layer 4: Kubernetes Orchestration
+Primary locations:
-- **Platform**: Sealos (usw.sealos.io)
-- **Namespaces**: Each user operates in their own namespace
-- **Resources per project**:
- - 1 StatefulSet (sandbox)
- - 1 Service
- - 3 Ingresses (app, terminal, filebrowser)
- - 1 PostgreSQL cluster (KubeBlocks)
+- `prisma/schema.prisma`
+- `lib/repo/`
-### Layer 5: Runtime Containers
+The database is the durable control plane.
-- **Image**: `limbo2342/fullstack-web-runtime:sha-ca2470e`
-- **Base**: Ubuntu 24.04 + Node.js 22.x
-- **Includes**:
- - Claude Code CLI
- - ttyd (web terminal with HTTP Basic Auth)
- - FileBrowser (web file manager)
- - Next.js, Prisma, PostgreSQL client
- - Buildah (rootless container builds)
+The key models are:
-## Event System
+- `Project`
+ - project metadata
+ - aggregated project status
+ - optional GitHub repository metadata
+- `Sandbox`
+ - sandbox lifecycle state
+ - URLs and runtime resource configuration
+- `Database`
+ - PostgreSQL lifecycle state
+ - connection credentials once ready
+- `Environment`
+ - project-scoped environment variables
+- `ProjectTask`
+ - project-level asynchronous work
+ - payload, result, retries, and locks
+- `GitHubAppInstallation`
+ - GitHub App installation ownership and permissions
-### Event Bus
+The repository layer is where row locking and state transitions are centralized.
-Each resource type has its own event bus:
+## Resource Plane
-```typescript
-// lib/events/sandbox/bus.ts
-export const enum Events {
- CreateSandbox = 'CreateSandbox',
- StartSandbox = 'StartSandbox',
- StopSandbox = 'StopSandbox',
- DeleteSandbox = 'DeleteSandbox',
- UpdateSandbox = 'UpdateSandbox',
-}
+The resource plane manages infrastructure resources that exist independently in Kubernetes.
-export const sandboxEventBus = new EventEmitter()
+### Sandbox Lifecycle
+
+Primary files:
+
+- `lib/jobs/sandbox/sandboxReconcile.ts`
+- `lib/events/sandbox/sandboxListener.ts`
+- `lib/repo/sandbox.ts`
+- `lib/k8s/sandbox-manager.ts`
+
+States:
+
+- `CREATING`
+- `STARTING`
+- `RUNNING`
+- `UPDATING`
+- `STOPPING`
+- `STOPPED`
+- `TERMINATING`
+- `TERMINATED`
+- `ERROR`
+
+Flow:
+
+```text
+Sandbox.status = CREATING
+-> sandbox reconcile job locks the row
+-> emits CreateSandbox
+-> listener creates K8s resources and writes ingress URLs
+-> Sandbox.status = STARTING
+-> later reconcile checks K8s status
+-> Sandbox.status = RUNNING
```
-### Event Listeners
+Environment updates reuse the same mechanism:
-Listeners are registered at application startup:
+```text
+Environment changed
+-> running sandboxes marked UPDATING
+-> update sandbox env vars in Kubernetes
+-> pod restarts if needed
+-> sandbox returns to STARTING or RUNNING
+```
-```typescript
-// lib/events/sandbox/sandboxListener.ts
-export function registerSandboxListeners(): void {
- on(Events.CreateSandbox, handleCreateSandbox)
- on(Events.StartSandbox, handleStartSandbox)
- on(Events.StopSandbox, handleStopSandbox)
- on(Events.DeleteSandbox, handleDeleteSandbox)
- on(Events.UpdateSandbox, handleUpdateSandbox)
-}
+### Database Lifecycle
-// Auto-register when module is imported
-registerSandboxListeners()
+Primary files:
+
+- `lib/jobs/database/databaseReconcile.ts`
+- `lib/events/database/databaseListener.ts`
+- `lib/repo/database.ts`
+- `lib/k8s/database-manager.ts`
+
+States:
+
+- `CREATING`
+- `STARTING`
+- `RUNNING`
+- `STOPPING`
+- `STOPPED`
+- `TERMINATING`
+- `TERMINATED`
+- `ERROR`
+
+Flow:
+
+```text
+Database.status = CREATING
+-> database reconcile job locks the row
+-> emits CreateDatabase
+-> listener creates KubeBlocks cluster
+-> Database.status = STARTING
+-> later reconcile checks cluster status
+-> credentials are fetched
+-> Database.status = RUNNING
```
-### Event Handler Pattern
+### Project Status Aggregation
-```typescript
-async function handleCreateSandbox(payload: SandboxEventPayload): Promise {
- const { user, project, sandbox } = payload
+Primary file:
- if (sandbox.status !== 'CREATING') return
+- `lib/util/projectStatus.ts`
- try {
- const k8sService = await getK8sServiceForUser(user.id)
- await k8sService.getSandboxManager().createSandbox({...})
- await updateSandboxStatus(sandbox.id, 'STARTING')
- await projectStatusReconcile(project.id)
- } catch (error) {
- await updateSandboxStatus(sandbox.id, 'ERROR')
- }
-}
-```
+`Project.status` is derived from child resource states. It is not the main driver of work.
-## State Management
+Priority order:
-### Resource Status
+1. `ERROR`
+2. `CREATING`
+3. `UPDATING`
+4. all resources equal the same stable state
+5. consistent mixed transitions:
+ - `{RUNNING, STARTING}` -> `STARTING`
+ - `{STOPPED, STOPPING}` -> `STOPPING`
+ - `{TERMINATED, TERMINATING}` -> `TERMINATING`
+6. otherwise `PARTIAL`
-Individual resources (Sandbox, Database) have these states:
+When a project has no remaining resources, the project and its environments are deleted.
-| Status | Description |
-|--------|-------------|
-| `CREATING` | K8s resource being initially created |
-| `STARTING` | Transitioning from STOPPED to RUNNING |
-| `RUNNING` | Active and operational |
-| `STOPPING` | Transitioning from RUNNING to STOPPED |
-| `STOPPED` | Paused (replicas=0) |
-| `UPDATING` | Environment variables being updated |
-| `TERMINATING` | Being deleted from K8s |
-| `TERMINATED` | Deleted from K8s (soft delete in DB) |
-| `ERROR` | Encountered an error |
+## Task Plane
-### Project Status Aggregation
+The task plane manages project-level work that happens after a sandbox is ready.
-Project status is **aggregated** from child resources:
-
-**Priority order**:
-1. **ERROR** - At least one resource has ERROR
-2. **CREATING** - At least one resource has CREATING
-3. **UPDATING** - At least one resource has UPDATING
-4. **Pure states** - All same status → use that status
-5. **Transition states**:
- - STARTING: All ∈ {RUNNING, STARTING}
- - STOPPING: All ∈ {STOPPED, STOPPING}
- - TERMINATING: All ∈ {TERMINATED, TERMINATING}
-6. **PARTIAL** - Inconsistent mixed states (manual intervention needed)
-
-```typescript
-// lib/util/projectStatus.ts
-export function aggregateProjectStatus(
- sandboxes: Sandbox[],
- databases: Database[]
-): ProjectStatus {
- // Implementation follows priority rules above
-}
-```
+Primary files:
-## Resource Lifecycle
+- `lib/jobs/project-task/projectTaskReconcile.ts`
+- `lib/jobs/project-task/executors/`
+- `lib/repo/project-task.ts`
+- `lib/services/project-task-dispatcher.ts`
-### Sandbox Lifecycle
+### Current Task Types
-```
-CREATING → STARTING → RUNNING ⇄ STOPPING → STOPPED
- ↓ ↓ ↓ ↓
- └──────────┴─────────┴─────────→ ERROR
- ↓
- TERMINATING → TERMINATED
+- `CLONE_REPOSITORY`
+- `INSTALL_SKILL`
+- `UNINSTALL_SKILL`
+- `DEPLOY_PROJECT`
+
+Only `CLONE_REPOSITORY` is implemented today.
+
+### Task States
+
+- `PENDING`
+- `WAITING_FOR_PREREQUISITES`
+- `RUNNING`
+- `SUCCEEDED`
+- `FAILED`
+- `CANCELLED`
+
+### Task Flow
+
+```text
+User imports a GitHub repository
+-> app creates Project + Sandbox
+-> app creates ProjectTask(type=CLONE_REPOSITORY, status=WAITING_FOR_PREREQUISITES)
+-> sandbox reaches RUNNING
+-> task reconcile sees prerequisites are now satisfied
+-> task executor runs git clone inside the sandbox through ttyd
+-> task becomes SUCCEEDED or FAILED
```
-**Transitions**:
-- `CREATING → STARTING`: K8s resources created
-- `STARTING → RUNNING`: Pod ready
-- `RUNNING → STOPPING`: Stop requested
-- `STOPPING → STOPPED`: Pod terminated
-- `STOPPED → STARTING`: Start requested
-- `Any → ERROR`: Operation failed
-- `Any → TERMINATING`: Delete requested
-- `TERMINATING → TERMINATED`: K8s resources deleted
+Task execution data lives in:
-### Database Lifecycle
+- `payload`
+ - executor input, such as repo metadata or skill id
+- `result`
+ - executor output, such as imported path
+- `error`
+ - terminal error message for failed tasks
+
+## Polling and Triggering
+
+The system uses both polling and direct wake-up triggers.
+
+### Polling
+
+Background jobs continuously scan the database:
+
+- sandbox reconcile job
+- database reconcile job
+- project task reconcile job
+
+This is the correctness mechanism.
+
+### Direct wake-up triggers
+
+Some transitions accelerate work without replacing polling.
+
+Example:
+
+- when a sandbox becomes `RUNNING`, sandbox listeners call `triggerRunnableTasksForProject(projectId)`
+
+This reduces latency, but correctness still depends on periodic reconcile loops.
+
+## Locking Model
-Same as Sandbox, but for KubeBlocks PostgreSQL clusters.
+The system uses database-based optimistic coordination, not an external queue.
-### Environment Variable Updates
+Patterns:
-When environment variables change:
+- resource rows (`Sandbox`, `Database`) have `lockedUntil`
+- task rows (`ProjectTask`) also have `lockedUntil`
+- reconcile queries atomically select and lock eligible rows
+- row-level transitions are updated in repo helpers
+This avoids duplicate processing across concurrent app instances.
+
+## Kubernetes Integration
+
+Primary file:
+
+- `lib/k8s/k8s-service-helper.ts`
+
+Rule:
+
+- always obtain Kubernetes access through `getK8sServiceForUser(userId)`
+
+Why:
+
+- each user has a user-scoped kubeconfig
+- each user operates in a separate namespace
+- the app should never perform cluster operations without user scoping
+
+Kubernetes resources currently managed per project:
+
+- one sandbox StatefulSet
+- one sandbox Service
+- three sandbox Ingresses
+- optional PostgreSQL cluster through KubeBlocks
+
+## GitHub Integration
+
+Primary files:
+
+- `lib/actions/github.ts`
+- `lib/services/github-app.ts`
+- `app/api/github/app/callback/route.ts`
+
+The system uses GitHub App installations, not anonymous repository access.
+
+Import flow:
+
+1. user installs GitHub App
+2. installation is recorded in `GitHubAppInstallation`
+3. user chooses a repository in the import dialog
+4. import action verifies repository access against the installation
+5. project creation creates a clone task
+6. task executor clones the repo into the sandbox using an installation token
+
+## Design Rules
+
+### Non-blocking control plane
+
+User-facing endpoints should write desired state and return. They should not block on Kubernetes creation or long sandbox operations.
+
+### State machines over ad hoc branching
+
+If the system needs to resume work later, represent that as persisted state instead of in-memory flags.
+
+### Resource plane and task plane stay separate
+
+Use resource states for infrastructure lifecycle.
+Use project tasks for asynchronous work that runs on top of ready infrastructure.
+
+### Polling is the source of truth
+
+Event-triggered wake-ups are an optimization. Reconcile jobs remain the primary correctness mechanism.
+
+## Current End-to-End Flows
+
+### New project
+
+```text
+Create project
+-> Project.status = CREATING
+-> Sandbox.status = CREATING
+-> sandbox reconcile creates and starts sandbox
+-> Sandbox.status = RUNNING
+-> Project.status aggregates to RUNNING
```
-RUNNING → UPDATING → STARTING → RUNNING
+
+### Import from GitHub
+
+```text
+Import from GitHub
+-> Project + Sandbox created
+-> ProjectTask(CLONE_REPOSITORY) created
+-> sandbox reconcile drives sandbox to RUNNING
+-> project task reconcile runs clone executor
+-> task becomes SUCCEEDED or FAILED
+-> project remains usable regardless of task outcome
```
-1. Status changes to UPDATING
-2. StatefulSet spec updated (triggers pod restart)
-3. Status changes to STARTING
-4. Pod restarts with new env vars
-5. Status changes to RUNNING
-
-## Key Design Decisions
-
-### Why StatefulSet instead of Deployment?
-
-**StatefulSet benefits**:
-- **Persistent storage**: Each pod gets its own PVC
-- **Stable network identities**: Predictable pod names
-- **Ordered deployment**: Graceful startup/shutdown
-- **Stateful apps**: Better for databases, caches
-
-**Trade-offs**:
-- Slightly slower scaling
-- More complex updates
-
-### Why Reconciliation Pattern?
-
-**Benefits**:
-- Non-blocking API responses
-- Automatic recovery from failures
-- Consistent state management
-- Easy to monitor and debug
-- Idempotent operations
-
-**Trade-offs**:
-- Eventual consistency (up to 3s delay)
-- More complex architecture
-
-### Why HTTP Basic Auth for ttyd?
-
-**Previous approach**: Custom authentication script
-- Required maintaining shell script
-- Complex session management
-- Login popup in browser
-
-**Current approach**: HTTP Basic Auth (ttyd native)
-- No custom scripts needed
-- URL-based authentication: `?authorization=base64(user:password)`
-- Seamless browser integration
-- No login popup
-
-### Why FileBrowser Integration?
-
-**Benefits**:
-- Web-based file management
-- Drag & drop file upload
-- TUS protocol for large files
-- Session tracking for cwd detection
-- No additional authentication (uses own credentials)
-
-### Why User-Specific Namespaces?
-
-**Benefits**:
-- Multi-tenancy isolation
-- Resource quotas per user
-- Separate kubeconfig per user
-- No cross-user access
-
-**Implementation**:
-- Kubeconfig stored in `UserConfig` table
-- Loaded via `getK8sServiceForUser(userId)`
-- Each user operates in their own namespace
-
-## Related Documentation
-
-- [API Reference](./api.md) - API endpoints and request/response formats
-- [Database Schema](./database.md) - Prisma models and relationships
-- [Development Guide](./development.md) - Local development and code patterns
-- [Operations Manual](./operations.md) - Deployment and K8s operations
-- [Troubleshooting](./troubleshooting.md) - Common issues and debugging
+### Add database
+
+```text
+Create database
+-> Database.status = CREATING
+-> database reconcile creates cluster
+-> Database.status = STARTING
+-> credentials become available
+-> Database.status = RUNNING
+```
+
+### Deploy project
+
+Planned path:
+
+```text
+User requests deploy
+-> create ProjectTask(type=DEPLOY_PROJECT)
+-> task reconcile waits for prerequisites
+-> deploy executor performs deployment work
+```
+
+## When To Update This Document
+
+Update this document whenever one of the following changes:
+
+- a new persisted state machine is introduced
+- resource lifecycle semantics change
+- a new task type is added
+- ownership of a subsystem moves to a different directory
+- project-level work stops being sandbox-based
From f219ce616670a340be38be04ce21609c87a9d4aa Mon Sep 17 00:00:00 2001
From: Che <30403707+Che-Zhu@users.noreply.github.com>
Date: Thu, 12 Mar 2026 10:54:37 +0800
Subject: [PATCH 3/3] chore: update repo guidance
---
AGENTS.md | 196 ++++++++++++++++++++++++++++++++++++++++
docs/troubleshooting.md | 6 ++
2 files changed, 202 insertions(+)
create mode 100644 AGENTS.md
diff --git a/AGENTS.md b/AGENTS.md
new file mode 100644
index 0000000..66c82cd
--- /dev/null
+++ b/AGENTS.md
@@ -0,0 +1,196 @@
+# AGENTS.md
+
+This file provides guidance to Codex (Codex.ai/code) when working with code in this repository.
+
+## Project Overview
+
+**Fulling v2** is an AI-powered development platform that integrates AI Agent ecosystem to provide full-stack development capabilities. Users can import existing projects from GitHub or create new projects directly on the platform.
+
+**Core Value**: Free users' mental bandwidth through AI Agents. Users focus on development while Agents silently handle complex operations (deployment, infrastructure, etc.) without interruption.
+
+**Key Features**:
+- **Flexible Project Creation**: Import from GitHub repositories or create new projects from scratch
+- **Optional Database**: Add PostgreSQL database on-demand when needed
+- **AI Agent Ecosystem**: AI agents handle development, testing, deployment, and infrastructure management
+- **Automated Operations**: Deployment, scaling, and infrastructure management happen automatically in the background
+- **Full-Stack Development**: Complete environment with optional database, terminal, and file management
+- **Zero Infrastructure Knowledge Required**: Users don't need to understand Kubernetes, networking, or DevOps
+
+**Architecture**: The platform uses an **asynchronous reconciliation pattern** where API endpoints return immediately and background jobs sync desired state (database) with actual state (Kubernetes) every 3 seconds.
+
+## Tech Stack
+
+### Frontend
+- Framework: Next.js 16 (App Router) + React 19
+- Language: TypeScript
+- Styling: Tailwind CSS v4
+- UI Components: Shadcn/UI
+
+### Backend
+- Runtime: Node.js 22
+- API: Next.js API Routes
+- Database ORM: Prisma
+- Authentication: NextAuth v5 (GitHub, Password, Sealos OAuth)
+
+### Infrastructure
+- Container Orchestration: Kubernetes
+- Database: PostgreSQL (via KubeBlocks)
+- Web Terminal: ttyd (HTTP Basic Auth)
+- File Manager: FileBrowser
+
+## Key Conventions
+
+### Code Style
+- Use TypeScript strict mode
+- Always follow best practices
+- Write self-documenting code: for complex functions, describe purpose, expected inputs, and expected outputs above the function
+- Use functional components with hooks
+
+### Naming Conventions
+- K8s resources: `{k8s-project-name}-{resource-type}-{6chars}`
+- Environment variables: `UPPER_SNAKE_CASE`
+- Database tables: PascalCase (Prisma models)
+- API routes: kebab-case
+- Files: kebab-case
+
+### Component Organization
+- **Route-specific components**: Place in `_components/` directory under the route folder
+ - Use `_` prefix to prevent Next.js from treating it as a route
+ - Example: `app/(dashboard)/settings/_components/github-status-card.tsx`
+- **Shared components**: Place in top-level `components/` directory
+ - Only for components used across multiple routes
+ - Example: `components/ui/button.tsx`, `components/sidebar.tsx`
+
+### Important Patterns
+
+1. **Always use user-specific K8s service**:
+ ```typescript
+ const k8sService = await getK8sServiceForUser(userId)
+ ```
+
+2. **API endpoints are non-blocking**:
+ - Only update database
+ - Return immediately
+ - Reconciliation jobs handle K8s operations
+
+3. **Use optimistic locking**:
+ - Repository layer handles locking automatically
+ - Prevents concurrent updates
+
+4. **Follow reconciliation pattern**:
+ - API → Database → Reconciliation Job → Event → K8s Operation
+ - Status updates happen asynchronously
+
+## Key Design Decisions
+
+### Why StatefulSet?
+- Persistent storage for each pod
+- Stable network identities
+- Ordered pod deployment
+
+### Why Reconciliation Pattern?
+- Non-blocking API responses
+- Automatic recovery from failures
+- Consistent state management
+- Easy to monitor and debug
+
+### Why User-Specific Namespaces?
+- Multi-tenancy isolation
+- Resource quotas per user
+- Separate kubeconfig per user
+- No cross-user access
+
+## Project Structure
+
+```
+Fulling/
+├── app/ # Next.js App Router
+│ ├── api/ # API Routes
+│ ├── (auth)/ # Auth pages
+│ └── (dashboard)/projects/[id]/
+│ └── _components/ # Route-specific components
+│
+├── components/ # Shared components
+├── hooks/ # Client-side hooks
+│
+├── lib/
+│ ├── data/ # Server-side data access (for Server Components)
+│ ├── actions/ # Client-side data access (Server Actions)
+│ ├── repo/ # Repository layer with optimistic locking
+│ ├── services/ # Business logic services
+│ ├── events/ # Event bus and listeners
+│ ├── jobs/ # Reconciliation background jobs
+│ ├── startup/ # Application initialization
+│ ├── k8s/ # Kubernetes operations
+│ └── util/ # Utility functions
+│
+├── prisma/ # Prisma schema
+├── provider/ # React Context providers
+├── runtime/ # Sandbox Docker image
+└── yaml/ # Kubernetes templates
+```
+
+**Key Directories**:
+- `lib/data/` - Server-side data access, used by Server Components
+- `lib/actions/` - Server Actions, used by Client Components
+- `lib/repo/` - Repository with optimistic locking, used by Jobs/Events
+- `lib/events/` + `lib/jobs/` - Core of reconciliation pattern
+- `lib/startup/` - Initializes event listeners and reconciliation jobs
+
+## Documentation Index
+
+- [Architecture](./docs/architecture.md) - Reconciliation pattern, event system, state management
+- [Development Guide](./docs/development.md) - Local development, code patterns, testing
+- [Operations Manual](./docs/operations.md) - Deployment, monitoring, K8s operations
+- [Troubleshooting](./docs/troubleshooting.md) - Common issues, debugging commands
+
+## Quick Reference
+
+### Development Commands
+```bash
+pnpm dev # Start dev server
+pnpm build # Build for production
+pnpm lint # Run ESLint
+npx prisma generate # Generate Prisma client
+npx prisma db push # Push schema to database
+```
+
+### Key Files
+- `lib/k8s/k8s-service-helper.ts` - User-specific K8s service
+- `lib/events/sandbox/sandboxListener.ts` - Sandbox lifecycle handlers
+- `lib/jobs/sandbox/sandboxReconcile.ts` - Sandbox reconciliation job
+- `lib/events/database/databaseListener.ts` - Database lifecycle handlers
+- `lib/jobs/database/databaseReconcile.ts` - Database reconciliation job
+- `lib/actions/project.ts` - Project creation (creates Sandbox only)
+- `lib/actions/database.ts` - Database creation/deletion (on-demand)
+- `prisma/schema.prisma` - Database schema
+- `instrumentation.ts` - Application startup
+
+### Environment Variables
+- `DATABASE_URL` - Main app database connection
+- `NEXTAUTH_SECRET` - NextAuth secret
+- `GITHUB_CLIENT_ID` / `GITHUB_CLIENT_SECRET` - GitHub OAuth
+- `SEALOS_JWT_SECRET` - Sealos OAuth validation
+- `RUNTIME_IMAGE` - Container image version
+
+### Resource Status
+- `CREATING` → `STARTING` → `RUNNING` ⇄ `STOPPING` → `STOPPED`
+- `UPDATING` - Environment variables being updated
+- `TERMINATING` → `TERMINATED`
+- `ERROR` - Operation failed
+
+### Port Exposure
+- **3000**: Next.js application
+- **7681**: ttyd web terminal
+- **8080**: FileBrowser (file manager)
+
+## Important Notes
+
+- **Project Resources**: Each project includes a Sandbox (required) and can optionally have a Database (PostgreSQL). Database can be added on-demand after project creation.
+- **Reconciliation Delay**: Status updates may take up to 3 seconds
+- **User-Specific Namespaces**: Each user operates in their own K8s namespace
+- **Frontend Polling**: Client components poll every 3 seconds for status updates
+- **Database Wait Time**: PostgreSQL cluster takes 2-3 minutes to reach "Running" (when added)
+- **Idempotent Operations**: All K8s methods can be called multiple times safely
+- **Lock Duration**: Optimistic locks held for 30 seconds
+- **Deployment Domain**: Main app listens on `0.0.0.0:3000` (not localhost) for Sealos
diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md
index b7ac6c9..0eb338a 100644
--- a/docs/troubleshooting.md
+++ b/docs/troubleshooting.md
@@ -8,6 +8,7 @@ This document provides solutions for common issues and debugging techniques.
- [Debugging Commands](#debugging-commands)
- [Error Messages](#error-messages)
- [Performance Issues](#performance-issues)
+- [Incident References](#incident-references)
## Common Issues
@@ -258,6 +259,11 @@ await prisma.project.findMany({
})
// Check query performance
+
+## Incident References
+
+- GitHub import delay postmortem:
+ - [project-import-delay-postmortem.md](/Users/che/Documents/GitHub/fulling/docs/project-import-delay-postmortem.md)
const prisma = new PrismaClient({
log: ['query', 'info', 'warn', 'error'],
})