Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
126 changes: 123 additions & 3 deletions cmd/deploy_azure.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"time"

"github.com/DevExpGBB/gh-devlake/internal/azure"
"github.com/DevExpGBB/gh-devlake/internal/devlake"
dockerpkg "github.com/DevExpGBB/gh-devlake/internal/docker"
"github.com/DevExpGBB/gh-devlake/internal/gitclone"
"github.com/DevExpGBB/gh-devlake/internal/prompt"
Expand Down Expand Up @@ -73,6 +74,23 @@ func runDeployAzure(cmd *cobra.Command, args []string) error {
return fmt.Errorf("failed to create directory %s: %w", deployAzureDir, err)
}

// ── Check for existing Azure deployment ──
if existingState, resumeAction := detectExistingAzureDeployment(deployAzureDir); existingState != nil {
switch resumeAction {
case "abort":
return nil
case "restart":
fmt.Println("\n🧹 To restart, you need to clean up the existing deployment first")
fmt.Println(" Note: This will delete all Azure resources in the resource group")
fmt.Println(" Please run: gh devlake cleanup --azure")
Copy link

Copilot AI Mar 30, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The restart guidance assumes gh devlake cleanup --azure will find the Azure state file in the current working directory, but this command writes .devlake-azure.json into --dir. If the user deployed with --dir and is not in that directory, cleanup will fail unless they cd there, pass --state-file, or provide --resource-group. Update the instructions to include the correct directory/state-file path.

This issue also appears on line 328 of the same file.

Suggested change
fmt.Println(" Please run: gh devlake cleanup --azure")
fmt.Printf(" Please run: cd %s && gh devlake cleanup --azure\n", deployAzureDir)

Copilot uses AI. Check for mistakes.
fmt.Println(" Then re-run: gh devlake deploy azure")
return nil
case "resume":
// Continue with the deployment - may update existing resources
fmt.Println("\n Continuing with deployment (will update existing resources)...")
}
}

// ── Interactive image-source prompt (when no explicit flag set) ──
if !cmd.Flags().Changed("official") && !cmd.Flags().Changed("repo-url") {
imageChoices := []string{
Expand Down Expand Up @@ -146,10 +164,21 @@ func runDeployAzure(cmd *cobra.Command, args []string) error {
if err != nil {
fmt.Println(" Not logged in. Running az login...")
if loginErr := azure.Login(); loginErr != nil {
fmt.Println("\n💡 Azure CLI login failed")
fmt.Println(" Recovery steps:")
fmt.Println(" 1. Install Azure CLI: https://docs.microsoft.com/cli/azure/install-azure-cli")
fmt.Println(" 2. Run: az login")
fmt.Println(" 3. Follow the browser authentication flow")
fmt.Println(" 4. Re-run: gh devlake deploy azure")
return fmt.Errorf("az login failed: %w", loginErr)
}
acct, err = azure.CheckLogin()
if err != nil {
fmt.Println("\n💡 Still not authenticated after login")
fmt.Println(" Try:")
fmt.Println(" • Run 'az account list' to see your subscriptions")
fmt.Println(" • Run 'az account set --subscription <id>' if needed")
fmt.Println(" • Check Azure CLI version: az --version")
return fmt.Errorf("still not logged in after az login: %w", err)
}
}
Expand All @@ -163,7 +192,7 @@ func runDeployAzure(cmd *cobra.Command, args []string) error {
fmt.Println(" ✅ Resource Group created")

// ── Write early checkpoint — ensures cleanup works even if deployment fails ──
savePartialAzureState(azureRG, azureLocation)
savePartialAzureState(deployAzureDir, azureRG, azureLocation)

// ── Generate secrets ──
fmt.Println("\n🔐 Generating secrets...")
Expand Down Expand Up @@ -289,6 +318,16 @@ func runDeployAzure(cmd *cobra.Command, args []string) error {

deployment, err := azure.DeployBicep(azureRG, templatePath, params)
if err != nil {
fmt.Println("\n❌ Bicep deployment failed")
fmt.Println("\n💡 Troubleshooting steps:")
fmt.Println(" 1. Check Azure portal for deployment details:")
fmt.Printf(" https://portal.azure.com/#blade/HubsExtension/DeploymentDetailsBlade/resourceGroup/%s\n", azureRG)
fmt.Println(" 2. Check if quota limits were exceeded in your subscription")
fmt.Println(" 3. Verify the resource group location supports all required services")
fmt.Println(" 4. Check for service principal or permission issues")
fmt.Println("\n To retry:")
fmt.Println(" • If partial deployment exists, re-run will attempt to continue")
fmt.Println(" • To start fresh: gh devlake cleanup --azure, then deploy again")
return fmt.Errorf("Bicep deployment failed: %w", err)
}

Expand Down Expand Up @@ -425,8 +464,9 @@ func conditionalACR() any {
// Resource Group is created so that cleanup --azure always has a breadcrumb,
// even when the deployment fails mid-flight (e.g. Docker build errors).
// The full state write at the end of a successful deployment overwrites this.
func savePartialAzureState(rg, region string) {
stateFile := ".devlake-azure.json"
func savePartialAzureState(dir, rg, region string) {
absDir, _ := filepath.Abs(dir)
stateFile := filepath.Join(absDir, ".devlake-azure.json")
partial := map[string]any{
"deployedAt": time.Now().Format(time.RFC3339),
"resourceGroup": rg,
Expand All @@ -438,3 +478,83 @@ func savePartialAzureState(rg, region string) {
fmt.Fprintf(os.Stderr, "⚠️ Could not save early state checkpoint: %v\n", err)
}
}

// detectExistingAzureDeployment checks for existing Azure deployment state and prompts for action.
// Returns any existing state data and the user's choice: "resume", "restart", or "abort".
func detectExistingAzureDeployment(dir string) (map[string]any, string) {
if deployAzureQuiet {
// When called from init wizard, don't prompt
return nil, ""
}

Comment on lines +482 to +489
Copy link

Copilot AI Mar 30, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

New interactive resume/restart logic (detectExistingAzureDeployment) affects control flow and user guidance, but there are no tests for Azure deploy behaviors in this package. Consider adding unit tests around state-file parsing and action selection (including partial state) to catch regressions, especially since this code now influences whether deployments proceed or exit.

Copilot uses AI. Check for mistakes.
absDir, _ := filepath.Abs(dir)
stateFile := filepath.Join(absDir, ".devlake-azure.json")

// Check for state file
data, err := os.ReadFile(stateFile)
if err != nil {
if !os.IsNotExist(err) {
fmt.Printf("\n⚠️ Could not read Azure state file %s: %v\n", stateFile, err)
}
// No state file found or unreadable - proceed without state
return nil, ""
}

var state map[string]any
if err := json.Unmarshal(data, &state); err != nil {
// State file is corrupted - warn and proceed
fmt.Printf("\n⚠️ Found .devlake-azure.json but could not parse it: %v\n", err)
return nil, ""
}

// Display existing deployment info
fmt.Println("\n📋 Found existing Azure deployment:")
if deployedAt, ok := state["deployedAt"].(string); ok {
fmt.Printf(" Deployed: %s\n", deployedAt)
}
if rg, ok := state["resourceGroup"].(string); ok {
fmt.Printf(" Resource Group: %s\n", rg)
}
if region, ok := state["region"].(string); ok {
fmt.Printf(" Region: %s\n", region)
}

// Check if this is a partial deployment (failed mid-way)
isPartial := false
if partial, ok := state["partial"].(bool); ok && partial {
fmt.Println(" Status: ⚠️ Partial deployment (may have failed)")
isPartial = true
}

// Check if endpoints are available and reachable
if endpoints, ok := state["endpoints"].(map[string]any); ok {
if backend, ok := endpoints["backend"].(string); ok && backend != "" {
fmt.Printf(" Backend: %s\n", backend)
if err := devlake.PingURL(backend); err == nil {
fmt.Println(" Status: ✅ Running")
} else {
fmt.Println(" Status: ⚠️ Not responding (may be stopped)")
}
}
}

fmt.Println()
choices := []string{
"resume - Continue/update existing deployment",
"restart - Clean up and start fresh (requires manual cleanup)",
"abort - Exit without making changes",
}

if isPartial {
// For partial deployments, recommend resume
choices[0] = "resume - Continue deployment from where it failed (recommended)"
}

choice := prompt.Select("What would you like to do?", choices)
if choice == "" {
return state, "abort"
}

action := strings.SplitN(choice, " ", 2)[0]
return state, action
}
167 changes: 161 additions & 6 deletions cmd/deploy_local.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,24 @@ func runDeployLocal(cmd *cobra.Command, args []string) error {
}
}

// ── Check for existing deployment ──
_, resumeAction := detectExistingLocalDeployment(deployLocalDir)
if resumeAction != "" {
switch resumeAction {
case "abort":
return nil
case "restart":
fmt.Println("\n🧹 Cleaning up existing deployment...")
if err := cleanupLocalQuiet(deployLocalDir); err != nil {
fmt.Printf(" ⚠️ Cleanup encountered issues: %v\n", err)
fmt.Println(" Continuing with deployment...")
}
case "resume":
// Continue with the deployment - existing artifacts will be reused
fmt.Println("\n Continuing with existing deployment artifacts...")
}
}
Comment on lines +74 to +90
Copy link

Copilot AI Mar 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The existing-deployment prompt can be shown even when no state file is loaded (state==nil), but runDeployLocal only enters the switch when existingState != nil. In the common “artifacts exist but .devlake-local.json is missing/couldn’t load” case, the user’s resume/restart/abort choice is ignored and the deploy proceeds regardless. Consider changing the condition to key off a separate “foundArtifacts” flag or the returned action (e.g., resumeAction != "") so restart/abort are honored even without a state file.

This issue also appears on line 585 of the same file.

Copilot uses AI. Check for mistakes.

// ── Interactive image-source prompt (when no explicit flag set) ──
if deployLocalSource == "" {
imageChoices := []string{
Expand Down Expand Up @@ -177,9 +195,12 @@ func runDeployLocal(cmd *cobra.Command, args []string) error {
fmt.Println("\n🐳 Checking Docker...")
if err := dockerpkg.CheckAvailable(); err != nil {
fmt.Println(" ❌ Docker not found or not running")
fmt.Println(" Install Docker Desktop: https://docs.docker.com/get-docker")
fmt.Println(" Start Docker Desktop, then re-run: gh devlake deploy local")
return fmt.Errorf("Docker is not available — start Docker Desktop and retry")
fmt.Println("\n💡 Recovery steps:")
fmt.Println(" 1. Install Docker Desktop: https://docs.docker.com/get-docker")
fmt.Println(" 2. Start Docker Desktop and wait for it to fully initialize")
fmt.Println(" 3. Verify Docker is running: docker ps")
fmt.Println(" 4. Re-run this command: gh devlake deploy local")
return fmt.Errorf("Docker is not available — follow recovery steps above: %w", err)
}
fmt.Println(" ✅ Docker found")

Expand Down Expand Up @@ -518,10 +539,10 @@ func startLocalContainers(dir string, build bool, services ...string) (string, e
fmt.Println(" docker ps --format \"table {{.Names}}\\t{{.Ports}}\"")
}
fmt.Println("\n Then re-run:")
fmt.Println(" gh devlake init")
fmt.Println(" gh devlake deploy local")
fmt.Println("\n💡 To clean up partial artifacts:")
fmt.Println(" gh devlake cleanup --local --force")
return "", fmt.Errorf("port conflict — stop the conflicting container and retry")
return "", fmt.Errorf("port conflict — stop the conflicting container and retry: %w", err)
}
fmt.Println("\n💡 To clean up partial artifacts:")
fmt.Println(" gh devlake cleanup --local --force")
Expand All @@ -536,7 +557,141 @@ func startLocalContainers(dir string, build bool, services ...string) (string, e

backendURL, err := waitForReadyAny(backendURLCandidates, 36, 10*time.Second)
if err != nil {
return "", fmt.Errorf("DevLake not ready after 6 minutes — check: docker compose logs devlake: %w", err)
fmt.Println("\n❌ DevLake not ready after 6 minutes")
fmt.Println("\n💡 Troubleshooting steps:")

// Detect which compose file exists
composeFile := "docker-compose.yml"
if _, statErr := os.Stat(filepath.Join(absDir, "docker-compose.yml")); os.IsNotExist(statErr) {
if _, statErr := os.Stat(filepath.Join(absDir, "docker-compose-dev.yml")); statErr == nil {
composeFile = "docker-compose-dev.yml"
}
}
composePath := filepath.Join(absDir, composeFile)

fmt.Printf(" 1. Check container logs: docker compose -f \"%s\" logs devlake\n", composePath)
fmt.Printf(" 2. Verify all containers are running: docker compose -f \"%s\" ps\n", composePath)
fmt.Printf(" 3. Check MySQL initialization: docker compose -f \"%s\" logs mysql\n", composePath)
fmt.Printf(" 4. If containers keep restarting, check: docker compose -f \"%s\" logs\n", composePath)
fmt.Println("\n Common issues:")
fmt.Println(" • MySQL takes longer on first run (database initialization)")
fmt.Println(" • Insufficient Docker resources (increase memory in Docker Desktop settings)")
fmt.Println(" • Port conflicts (check docker compose logs for 'address already in use')")
return "", fmt.Errorf("DevLake not ready — check logs for details: %w", err)
}
return backendURL, nil
}

// detectExistingLocalDeployment checks for existing deployment artifacts and prompts for action.
// Returns the existing state (if found) and the user's choice: "resume", "restart", or "abort".
func detectExistingLocalDeployment(dir string) (*devlake.State, string) {
if deployLocalQuiet {
Comment on lines +585 to +588
Copy link

Copilot AI Mar 30, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new existing-deployment detection/prompting logic isn’t covered by tests. Since cmd/deploy_local.go already has a test file (cmd/deploy_local_test.go), consider adding unit coverage for the artifact/state detection logic by splitting prompting from detection (e.g., return a structured detection result and inject the selection function) so it can be tested without interactive stdin.

This issue also appears on line 681 of the same file.

Copilot uses AI. Check for mistakes.
// When called from init wizard, don't prompt
return nil, ""
Comment on lines +585 to +590
Copy link

Copilot AI Mar 30, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

New interactive recovery logic (detectExistingLocalDeployment / cleanupLocalQuiet) changes deploy behavior significantly, but cmd/deploy_local_test.go currently only covers the poetry-line rewrite helper. Add unit tests for the detection paths (state file present, artifacts-only, quiet mode) and ensure the chosen action is handled correctly.

Copilot uses AI. Check for mistakes.
}

absDir, _ := filepath.Abs(dir)
stateFile := filepath.Join(absDir, ".devlake-local.json")

// Check for state file
state, err := devlake.LoadState(stateFile)
Copy link

Copilot AI Mar 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

devlake.LoadState can fail due to JSON parse errors or permission issues; currently those cases are treated the same as “no state file” and no warning is shown. It’d be helpful to distinguish os.IsNotExist from other errors and surface a warning (or return an error for unreadable state) so users understand why recovery can’t use the state file.

Suggested change
state, err := devlake.LoadState(stateFile)
state, err := devlake.LoadState(stateFile)
if err != nil && !os.IsNotExist(err) {
fmt.Printf("\n⚠️ Unable to read local deployment state from %s: %v\n", stateFile, err)
}

Copilot uses AI. Check for mistakes.
if err != nil && !os.IsNotExist(err) {
fmt.Printf("\n⚠️ Unable to read local deployment state from %s: %v\n", stateFile, err)
}
if err != nil || state == nil {
// No state file or failed to load - check for docker-compose.yml + .env
composePath := filepath.Join(absDir, "docker-compose.yml")
devComposePath := filepath.Join(absDir, "docker-compose-dev.yml")
envPath := filepath.Join(absDir, ".env")

hasCompose := false
composeFileName := ""
if _, err := os.Stat(composePath); err == nil {
hasCompose = true
composeFileName = "docker-compose.yml"
} else if _, err := os.Stat(devComposePath); err == nil {
hasCompose = true
composeFileName = "docker-compose-dev.yml"
}

hasEnv := false
if _, err := os.Stat(envPath); err == nil {
hasEnv = true
}

// If we have artifacts but no state file, it might be a partial deployment
if hasCompose || hasEnv {
fmt.Println("\n📋 Found existing deployment artifacts:")
if hasCompose {
fmt.Printf(" • %s\n", composeFileName)
}
if hasEnv {
fmt.Println(" • .env file")
}
Comment on lines +623 to +630
Copy link

Copilot AI Mar 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When the compose file found is docker-compose-dev.yml, the output still lists it as “docker-compose.yml”. This can mislead users about what file is actually present and which file to edit/run commands against. Consider tracking which compose filename was detected and printing the correct one.

Copilot uses AI. Check for mistakes.
} else {
// No artifacts found - proceed normally
return nil, ""
}
} else {
// State file exists - check if deployment is running
fmt.Println("\n📋 Found existing deployment:")
fmt.Printf(" Deployed: %s\n", state.DeployedAt)
if state.Endpoints.Backend != "" {
fmt.Printf(" Backend: %s\n", state.Endpoints.Backend)

// Check if backend is still running
if err := devlake.PingURL(state.Endpoints.Backend); err == nil {
fmt.Println(" Status: ✅ Running")
} else {
fmt.Println(" Status: ⚠️ Not responding (may be stopped)")
}
}
}

fmt.Println()
choices := []string{
"resume - Continue with existing artifacts (recommended for recovery)",
"restart - Clean up and start fresh",
"abort - Exit without making changes",
}
choice := prompt.Select("What would you like to do?", choices)
if choice == "" {
return state, "abort"
}

action := strings.SplitN(choice, " ", 2)[0]
return state, action
}

// cleanupLocalQuiet performs cleanup of local deployment without prompts (used for restart).
func cleanupLocalQuiet(dir string) error {
absDir, _ := filepath.Abs(dir)

// Stop containers if compose file exists
composePath := filepath.Join(absDir, "docker-compose.yml")
devComposePath := filepath.Join(absDir, "docker-compose-dev.yml")

if _, err := os.Stat(composePath); err == nil {
if err := dockerpkg.ComposeDown(absDir); err != nil {
return fmt.Errorf("docker compose down failed: %w", err)
}
} else if _, err := os.Stat(devComposePath); err == nil {
// For docker-compose-dev.yml, we need to run docker compose explicitly
// since ComposeDown expects docker-compose.yml by default
cmd := exec.Command("docker", "compose", "-f", devComposePath, "down", "--rmi", "local")
cmd.Dir = absDir
if out, err := cmd.CombinedOutput(); err != nil {
return fmt.Errorf("docker compose down failed: %w\n%s", err, string(out))
}
}
Comment on lines +674 to +686
Copy link

Copilot AI Mar 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cleanupLocalQuiet checks for docker-compose-dev.yml, but then calls dockerpkg.ComposeDown(absDir) without specifying the compose file. docker compose down won’t use docker-compose-dev.yml by default, so the restart cleanup path will fail when only docker-compose-dev.yml exists. Consider either (1) running docker compose -f docker-compose-dev.yml down in this branch, or (2) standardizing by renaming/copying docker-compose-dev.yml to docker-compose.yml before calling ComposeDown.

Copilot uses AI. Check for mistakes.

// Remove state file
stateFile := filepath.Join(absDir, ".devlake-local.json")
if _, err := os.Stat(stateFile); err == nil {
if err := os.Remove(stateFile); err != nil {
fmt.Printf("\n⚠️ Failed to remove local state file %s: %v\n", stateFile, err)
}
}

return nil
}
Comment on lines +585 to +697
Copy link

Copilot AI Mar 30, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

New existing-deployment detection/cleanup behavior (detectExistingLocalDeployment + cleanupLocalQuiet) isn’t covered by tests. Since this package already has cmd/deploy_local_test.go, adding unit tests for the decision logic (artifact detection, state-file handling, action parsing) would help prevent regressions.

Copilot uses AI. Check for mistakes.
6 changes: 6 additions & 0 deletions internal/devlake/discovery.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,12 @@ func inferLocalCompanionURLs(backendURL string) (grafanaURL, configUIURL string)
return "", ""
}

// PingURL checks if a DevLake backend is reachable at the given URL.
func PingURL(baseURL string) error {
Copy link

Copilot AI Mar 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PingURL forwards baseURL directly to pingURL, but pingURL always appends "/ping". If callers pass a baseURL with a trailing slash (common when URLs are copied/pasted), the request becomes "//ping" which can lead to redirects or 404s depending on the server/proxy. Consider normalizing with strings.TrimRight(baseURL, "/") inside PingURL (or pingURL) to match the normalization used in Discover().

Suggested change
func PingURL(baseURL string) error {
func PingURL(baseURL string) error {
baseURL = strings.TrimRight(baseURL, "/")

Copilot uses AI. Check for mistakes.
baseURL = strings.TrimRight(baseURL, "/")
return pingURL(baseURL)
}
Comment on lines +109 to +113
Copy link

Copilot AI Mar 30, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PingURL is a new exported helper, but there are no direct tests covering its trimming/behavior. Since this is now used by deploy flows to check backend reachability, add a small unit test (e.g., with an httptest server and a trailing-slash URL) to prevent regressions.

Copilot uses AI. Check for mistakes.

func pingURL(baseURL string) error {
client := &http.Client{Timeout: 5 * time.Second}
resp, err := client.Get(baseURL + "/ping")
Expand Down
Loading