From 7ac5b74610f22cd0430bc1a77d880e783ef8f165 Mon Sep 17 00:00:00 2001 From: Christer Edvartsen Date: Sat, 16 Aug 2025 11:31:11 +0200 Subject: [PATCH 1/9] feat: remove `nais debug tidy` command Resolves #580, resolves #581. BREAKING CHANGE: `nais debug tidy` has been removed, as well as the `--by-pod` flag for the `nais debug` command. --- internal/debug/command.go | 18 +- internal/debug/command/debug.go | 47 ++--- internal/debug/command/flag/flag.go | 23 +-- internal/debug/debug.go | 283 ++++++++++++++-------------- internal/debug/tidy.go | 65 ------- internal/debug/tidy/command.go | 22 --- 6 files changed, 173 insertions(+), 285 deletions(-) delete mode 100644 internal/debug/tidy.go delete mode 100644 internal/debug/tidy/command.go diff --git a/internal/debug/command.go b/internal/debug/command.go index 9bd412e6..49bc1a54 100644 --- a/internal/debug/command.go +++ b/internal/debug/command.go @@ -1,6 +1,7 @@ package debug import ( + "context" "fmt" "github.com/nais/cli/internal/debug/command/flag" @@ -8,15 +9,18 @@ import ( "k8s.io/client-go/kubernetes" ) -const debugImageDefault = "europe-north1-docker.pkg.dev/nais-io/nais/images/debug:latest" - -func Run(workloadName string, flags *flag.Debug) error { - clientSet, err := SetupClient(flags.DebugSticky, flags.Context) +func Run(ctx context.Context, workloadName string, flags *flag.Debug) error { + clientSet, err := SetupClient(flags, flags.Context) if err != nil { return err } - dg := Setup(clientSet, flags.DebugSticky, workloadName, debugImageDefault, flags.ByPod) + dg := &Debug{ + ctx: ctx, + podsClient: clientSet.CoreV1().Pods(flags.Namespace), + flags: flags, + workloadName: workloadName, + } if err := dg.Debug(); err != nil { return fmt.Errorf("debugging instance: %w", err) } @@ -24,7 +28,7 @@ func Run(workloadName string, flags *flag.Debug) error { return nil } -func SetupClient(flags *flag.DebugSticky, cluster flag.Context) (kubernetes.Interface, error) { +func SetupClient(flags *flag.Debug, cluster flag.Context) (kubernetes.Interface, error) { client := k8s.SetupControllerRuntimeClient(k8s.WithKubeContext(string(cluster))) if flags.Namespace == "" { @@ -32,7 +36,7 @@ func SetupClient(flags *flag.DebugSticky, cluster flag.Context) (kubernetes.Inte } if cluster != "" { - flags.Context = flag.Context(cluster) + flags.Context = cluster } clientSet, err := k8s.SetupClientGo(string(cluster)) diff --git a/internal/debug/command/debug.go b/internal/debug/command/debug.go index c33d01d2..56fb8805 100644 --- a/internal/debug/command/debug.go +++ b/internal/debug/command/debug.go @@ -2,11 +2,12 @@ package command import ( "context" + "fmt" + "time" "github.com/MakeNowJust/heredoc/v2" "github.com/nais/cli/internal/debug" "github.com/nais/cli/internal/debug/command/flag" - "github.com/nais/cli/internal/debug/tidy" "github.com/nais/cli/internal/k8s" "github.com/nais/cli/internal/root" "github.com/nais/naistrix" @@ -14,58 +15,34 @@ import ( func Debug(parentFlags *root.Flags) *naistrix.Command { defaultContext, defaultNamespace := k8s.GetDefaultContextAndNamespace() - stickyFlags := &flag.DebugSticky{ + flags := &flag.Debug{ Flags: parentFlags, Context: flag.Context(defaultContext), Namespace: defaultNamespace, - } - - debugFlags := &flag.Debug{ - DebugSticky: stickyFlags, + Ttl: time.Minute, } return &naistrix.Command{ Name: "debug", Title: "Create and attach to a debug container.", Description: heredoc.Doc(` - When flag "--copy" is set, the command can be used to debug a copy of the original pod, allowing you to troubleshoot without affecting the live pod. + When "--copy" is used the command can be used to debug a copy of the original pod, allowing you to troubleshoot without affecting the live pod. To debug a live pod, run the command without the "--copy" flag. - - You can only reconnect to the debug session if the pod is running. `), - Args: []naistrix.Argument{ - {Name: "app_name"}, - }, - Flags: debugFlags, - StickyFlags: stickyFlags, - RunFunc: func(ctx context.Context, out naistrix.Output, args []string) error { - return debug.Run(args[0], debugFlags) - }, - SubCommands: []*naistrix.Command{ - tidyCommand(stickyFlags), - }, - } -} - -func tidyCommand(parentFlags *flag.DebugSticky) *naistrix.Command { - flags := &flag.DebugTidy{ - DebugSticky: parentFlags, - } - return &naistrix.Command{ - Name: "tidy", - Title: "Clean up debug containers and debug pods.", - Description: heredoc.Doc(` - Remove debug containers created by the "nais debug" command. + ValidateFunc: func(ctx context.Context, args []string) error { + if flags.Ttl > time.Hour { + return fmt.Errorf("the --ttl duration can not exceed 1 hour") + } - Set the "--copy" flag to delete copy pods. - `), + return nil + }, Args: []naistrix.Argument{ {Name: "app_name"}, }, Flags: flags, RunFunc: func(ctx context.Context, out naistrix.Output, args []string) error { - return tidy.Run(args[0], flags) + return debug.Run(ctx, args[0], flags) }, } } diff --git a/internal/debug/command/flag/flag.go b/internal/debug/command/flag/flag.go index a3ac355e..00e402ab 100644 --- a/internal/debug/command/flag/flag.go +++ b/internal/debug/command/flag/flag.go @@ -1,24 +1,17 @@ package flag import ( + "time" + "github.com/nais/cli/internal/root" ) -type ( - Context string - DebugSticky struct { - *root.Flags - Context Context `name:"context" short:"c" usage:"The kubeconfig |CONTEXT| to use. Defaults to current context."` - Namespace string `name:"namespace" short:"n" usage:"The kubernetes |NAMESPACE| to use. Defaults to current namespace."` - Copy bool `name:"copy" usage:"Create a copy of the pod with a debug container. The original pod remains running and unaffected."` - } -) +type Context string type Debug struct { - *DebugSticky - ByPod bool `name:"by-pod" short:"b" usage:"Attach to a specific |BY-POD| in a workload."` -} - -type DebugTidy struct { - *DebugSticky + *root.Flags + Context Context `name:"context" short:"c" usage:"The kubeconfig |context| to use. Defaults to current context."` + Namespace string `name:"namespace" short:"n" usage:"The kubernetes |namespace| to use. Defaults to current namespace."` + Copy bool `name:"copy" usage:"Create a copy of the pod with a debug container. The original pod remains running and unaffected."` + Ttl time.Duration `name:"ttl" usage:"|Duration| the debug pod remains after exit. Only has effect when --copy is specified."` } diff --git a/internal/debug/debug.go b/internal/debug/debug.go index 0b4ab1e3..64a3f3f2 100644 --- a/internal/debug/debug.go +++ b/internal/debug/debug.go @@ -5,167 +5,145 @@ import ( "fmt" "os" "os/exec" - "strings" "time" "github.com/nais/cli/internal/debug/command/flag" "github.com/pterm/pterm" - core_v1 "k8s.io/api/core/v1" + corev1 "k8s.io/api/core/v1" k8serrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/client-go/kubernetes" + v1 "k8s.io/client-go/kubernetes/typed/core/v1" ) const ( - debuggerSuffix = "nais-debugger" - debuggerContainerDefaultName = "debugger" + // debugImage is the image used for the debug container. + debugImage = "europe-north1-docker.pkg.dev/nais-io/nais/images/debug:latest" + + // debugPodSuffix will be appended to the pod name when creating a debug pod. + debugPodSuffix = "nais-debugger" + + // debugPodContainerName is the name of the container that will be created in the debug pod. This name is not used + // when creating ephemeral debug containers. + debugPodContainerName = "debugger" ) type Debug struct { ctx context.Context - client kubernetes.Interface - flags *flag.DebugSticky + podsClient v1.PodInterface + flags *flag.Debug workloadName string - debugImage string - byPod bool } -func Setup(client kubernetes.Interface, flags *flag.DebugSticky, workloadName, debugImage string, byPod bool) *Debug { - return &Debug{ - ctx: context.Background(), - client: client, - flags: flags, - workloadName: workloadName, - debugImage: debugImage, - byPod: byPod, +func (d *Debug) Debug() error { + pods, err := d.getPodsForWorkload() + if err != nil { + return err + } + + if len(pods.Items) == 0 { + pterm.Info.Println("No pods found.") + return nil + } + + podNames := make([]string, len(pods.Items)) + for i, pod := range pods.Items { + podNames[i] = pod.Name + } + + podName := podNames[0] + if len(podNames) > 1 { + result, err := pterm.DefaultInteractiveSelect.WithOptions(podNames).Show() + if err != nil { + pterm.Error.Printf("Prompt failed: %v\n", err) + return err + } + podName = result } + + if err := d.debugPod(podName); err != nil { + pterm.Error.Printf("Failed to debug pod %s: %v\n", podName, err) + } + + return nil } -func (d *Debug) getPodsForWorkload() (*core_v1.PodList, error) { - pterm.Info.Println("Fetching workload...") - var podList *core_v1.PodList - var err error - podList, err = d.client.CoreV1().Pods(d.flags.Namespace).List(d.ctx, metav1.ListOptions{ - LabelSelector: fmt.Sprintf("app.kubernetes.io/name=%s", d.workloadName), +func (d *Debug) getPodsForWorkload() (*corev1.PodList, error) { + pterm.Info.Println("Fetching pods for workload...") + podList, err := d.podsClient.List(d.ctx, metav1.ListOptions{ + LabelSelector: "app.kubernetes.io/name=" + d.workloadName, }) - if len(podList.Items) == 0 { - podList, err = d.client.CoreV1().Pods(d.flags.Namespace).List(d.ctx, metav1.ListOptions{ - LabelSelector: fmt.Sprintf("app=%s", d.workloadName), - }) + if err != nil { + return nil, fmt.Errorf("failed to get pods: %w", err) + } + + if len(podList.Items) > 0 { + return podList, nil } + + podList, err = d.podsClient.List(d.ctx, metav1.ListOptions{ + LabelSelector: "app=" + d.workloadName, + }) if err != nil { return nil, fmt.Errorf("failed to get pods: %w", err) } - return podList, nil -} -func debuggerContainerName(podName string) string { - return fmt.Sprintf("%s-%s", podName, debuggerSuffix) + return podList, nil } func (d *Debug) debugPod(podName string) error { - const maxRetries = 6 - const pollInterval = 5 - if d.flags.Copy { - pN := debuggerContainerName(podName) - _, err := d.client.CoreV1().Pods(d.flags.Namespace).Get(d.ctx, pN, metav1.GetOptions{}) - if err == nil { - pterm.Info.Printf("%s already exists, trying to attach...\n", pN) - - // Polling loop to check if the debugger container is running - for i := 0; i < maxRetries; i++ { - pterm.Info.Printf("Attempt %d/%d: Time remaining: %d seconds\n", i+1, maxRetries, (maxRetries-i)*pollInterval) - pod, err := d.client.CoreV1().Pods(d.flags.Namespace).Get(d.ctx, pN, metav1.GetOptions{}) + podCopyName := debugPodName(podName) + if _, err := d.podsClient.Get(d.ctx, podCopyName, metav1.GetOptions{}); err == nil { + maxRetries := 5 + pollInterval := 2 + + pterm.Info.Println("Found existing debug pod, trying to attach...") + for i := range maxRetries { + pterm.Info.Printf("Attempt %d/%d\n", i+1, maxRetries) + + pod, err := d.podsClient.Get(d.ctx, podCopyName, metav1.GetOptions{}) if err != nil { - return fmt.Errorf("failed to get debug pod copy %s: %v", pN, err) + return fmt.Errorf("failed to get debug copy %q: %v", podCopyName, err) } for _, c := range pod.Status.ContainerStatuses { - if c.Name == debuggerContainerDefaultName && c.State.Running != nil { - pterm.Success.Println("Container is running. Attaching...") - return d.attachToExistingDebugContainer(pN) + if c.Name == debugPodContainerName && c.State.Running != nil { + return d.attachToExistingDebugContainer(podCopyName) } } + time.Sleep(time.Duration(pollInterval) * time.Second) } - // If the loop finishes without finding the running container - return fmt.Errorf("container did not start within the expected time") + return fmt.Errorf("unable to attach to the existing debug pod") } else if !k8serrors.IsNotFound(err) { - return fmt.Errorf("failed to check for existing debug pod copy %s: %v", pN, err) - } - } else { - pod, err := d.client.CoreV1().Pods(d.flags.Namespace).Get(d.ctx, podName, metav1.GetOptions{}) - if err != nil { - return fmt.Errorf("failed to get pod %s: %v", podName, err) - } - - if len(pod.Spec.EphemeralContainers) > 0 { - pterm.Warning.Printf("The container %s already has %d terminated debug containers.\n", podName, len(pod.Spec.EphemeralContainers)) - pterm.Info.Printf("Please consider using 'nais debug tidy %s' to clean up\n", d.workloadName) + return fmt.Errorf("failed to check for existing debug pod %q: %v", podCopyName, err) } } return d.createDebugPod(podName) } -func (d *Debug) attachToExistingDebugContainer(podName string) error { - cmd := exec.Command( - "kubectl", - "attach", - "-n", d.flags.Namespace, - fmt.Sprintf("pod/%s", podName), - "-c", debuggerContainerDefaultName, - "-i", - "-t", - ) - - if d.flags.Context != "" { - cmd.Args = append(cmd.Args, "--context", string(d.flags.Context)) - } - - cmd.Stdin = os.Stdin - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - - if err := cmd.Start(); err != nil { - return fmt.Errorf("failed to start attach command: %v", err) - } - pterm.Success.Printf("Attached to pod %s\n", podName) - - if err := cmd.Wait(); err != nil { - return fmt.Errorf("attach command failed: %v", err) - } - - return nil -} - func (d *Debug) createDebugPod(podName string) error { args := []string{ "debug", - "-n", d.flags.Namespace, - fmt.Sprintf("pod/%s", podName), - "-it", + "pod/" + podName, + "--namespace", d.flags.Namespace, + "--context", string(d.flags.Context), "--stdin", "--tty", "--profile=restricted", - "-q", - "--image", d.debugImage, - } - - if d.flags.Context != "" { - args = append(args, "--context", string(d.flags.Context)) + "--image", debugImage, + "--quiet", } if d.flags.Copy { args = append(args, - "--copy-to", debuggerContainerName(podName), - "-c", "debugger", + "--copy-to", debugPodName(podName), + "--container", debugPodContainerName, ) } else { - args = append(args, - "--target", d.workloadName) + args = append(args, "--target", d.workloadName) } cmd := exec.Command("kubectl", args...) @@ -173,61 +151,84 @@ func (d *Debug) createDebugPod(podName string) error { cmd.Stdout = os.Stdout cmd.Stderr = os.Stderr - if err := cmd.Start(); err != nil { + pterm.Info.Println("Starting debug container...") + + if err := cmd.Run(); err != nil { return fmt.Errorf("failed to start debug command: %v", err) } if d.flags.Copy { - pterm.Info.Printf("Debugging pod copy created, enable process namespace sharing in %s\n", debuggerContainerName(podName)) - } else { - pterm.Info.Println("Debugging container created...") - } - pterm.Info.Printf("Using debugger image %s\n", d.debugImage) - - if err := cmd.Wait(); err != nil { - if strings.Contains(err.Error(), "exit status 1") { - pterm.Info.Println("Debugging container exited") - return nil + if err := d.annotateAndLabelDebugPod(debugPodName(podName)); err != nil { + return fmt.Errorf("failed to annotate and label debug pod: %w", err) } - return fmt.Errorf("debug command failed: %v", err) - } - if d.flags.Copy { - pterm.Info.Printf("Run 'nais debug -cp %s' command to attach to the debug pod\n", d.workloadName) + pterm.Info.Printf("Debug pod will self-destruct in %s\n", d.flags.Ttl) + } else { + pterm.Info.Println("Remember to restart the pod to remove the debug container") } return nil } -func (d *Debug) Debug() error { - pods, err := d.getPodsForWorkload() - if err != nil { - return err - } +func (d *Debug) annotateAndLabelDebugPod(debugPodName string) error { + labelCommand := exec.CommandContext( + d.ctx, + "kubectl", + "label", + "pod/"+debugPodName, + "euthanaisa.nais.io/enabled=true", + "--namespace", d.flags.Namespace, + "--context", string(d.flags.Context), + ) - var podNames []string - for _, pod := range pods.Items { - podNames = append(podNames, pod.Name) + if err := labelCommand.Run(); err != nil { + return fmt.Errorf("unable to label debug pod: %w", err) } - if len(podNames) == 0 { - pterm.Info.Println("No pods found.") - return nil - } + killAfter := time.Now().Add(d.flags.Ttl).Format(time.RFC3339) + annotateCommand := exec.CommandContext( + d.ctx, + "kubectl", + "annotate", + "pod/"+debugPodName, + "euthanaisa.nais.io/kill-after="+killAfter, + "--namespace", d.flags.Namespace, + "--context", string(d.flags.Context), + ) - podName := podNames[0] - if d.byPod { - result, err := pterm.DefaultInteractiveSelect.WithOptions(podNames).Show() - if err != nil { - pterm.Error.Printf("Prompt failed: %v\n", err) - return err - } - podName = result + if err := annotateCommand.Run(); err != nil { + return fmt.Errorf("unable to annotate debug pod: %w", err) } - if err := d.debugPod(podName); err != nil { - pterm.Error.Printf("Failed to debug pod %s: %v\n", podName, err) + return nil +} + +func (d *Debug) attachToExistingDebugContainer(podName string) error { + cmd := exec.Command( + "kubectl", + "attach", + "pod/"+podName, + "--namespace", d.flags.Namespace, + "--context", string(d.flags.Context), + "--container", debugPodContainerName, + "--stdin", + "--tty", + "--quiet", + ) + cmd.Stdin = os.Stdin + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + + if err := cmd.Run(); err != nil { + return fmt.Errorf("failed to attach to the debug container: %w", err) } + pterm.Info.Println("Exited from Debug container") + return nil } + +// debugPodName generates a name for the debug pod copy given a pod name. +func debugPodName(podName string) string { + return podName + "-" + debugPodSuffix +} diff --git a/internal/debug/tidy.go b/internal/debug/tidy.go deleted file mode 100644 index ba1bbc78..00000000 --- a/internal/debug/tidy.go +++ /dev/null @@ -1,65 +0,0 @@ -package debug - -import ( - "fmt" - - "github.com/pterm/pterm" - k8serrors "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" -) - -func (d *Debug) Tidy() error { - pods, err := d.getPodsForWorkload() - if err != nil { - return err - } - - var podNames []string - for _, pod := range pods.Items { - podNames = append(podNames, pod.Name) - } - - if len(podNames) == 0 { - pterm.Info.Println("No pods found") - return nil - } - - for _, pod := range pods.Items { - podName := pod.Name - if d.flags.Copy { - podName = debuggerContainerName(pod.Name) - } - - if !d.flags.Copy && len(pod.Spec.EphemeralContainers) == 0 { - pterm.Info.Printf("No debug container found for: %s\n", pod.Name) - continue - } - - _, err := d.client.CoreV1().Pods(d.flags.Namespace).Get(d.ctx, podName, metav1.GetOptions{}) - if err != nil { - if k8serrors.IsNotFound(err) { - pterm.Info.Printf("No debug pod found for: %s\n", pod.Name) - continue - } - pterm.Error.Printf("Failed to get pod %s: %v\n", podName, err) - return err - } - - confirm, _ := pterm.DefaultInteractiveConfirm. - WithDefaultText(fmt.Sprintf("Pod '%s' with debug container, do you want to clean up?", podName)). - Show() - - if !confirm { - pterm.Info.Printf("Skipping deletion for pod: %s\n", podName) - continue - } - - // Delete pod if user confirms - if err := d.client.CoreV1().Pods(d.flags.Namespace).Delete(d.ctx, podName, metav1.DeleteOptions{}); err != nil { - pterm.Error.Printf("Failed to delete pod %s: %v\n", podName, err) - } else { - pterm.Success.Printf("Deleted pod: %s\n", podName) - } - } - return nil -} diff --git a/internal/debug/tidy/command.go b/internal/debug/tidy/command.go deleted file mode 100644 index 8c4e48fe..00000000 --- a/internal/debug/tidy/command.go +++ /dev/null @@ -1,22 +0,0 @@ -package tidy - -import ( - "fmt" - - "github.com/nais/cli/internal/debug" - "github.com/nais/cli/internal/debug/command/flag" -) - -func Run(workloadName string, flags *flag.DebugTidy) error { - clientSet, err := debug.SetupClient(flags.DebugSticky, flags.Context) - if err != nil { - return err - } - - dg := debug.Setup(clientSet, flags.DebugSticky, workloadName, "", false) - if err := dg.Tidy(); err != nil { - return fmt.Errorf("debugging instance: %w", err) - } - - return nil -} From a77f6e029f87c2e4fc2aaf4c8ba9eb904ec715c2 Mon Sep 17 00:00:00 2001 From: Christer Edvartsen Date: Sat, 16 Aug 2025 19:14:10 +0200 Subject: [PATCH 2/9] build(deps): bump nais/naistrix --- go.mod | 3 ++- go.sum | 6 ++++-- internal/application/application.go | 4 ++-- internal/application/application_test.go | 2 +- 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/go.mod b/go.mod index 445feaf4..e65912ff 100644 --- a/go.mod +++ b/go.mod @@ -29,7 +29,7 @@ require ( github.com/mitchellh/go-ps v1.0.0 github.com/nais/device v0.0.0-20250703090236-08bd8b276591 github.com/nais/liberator v0.0.0-20250703075635-7da81032e1ae - github.com/nais/naistrix v0.3.1 + github.com/nais/naistrix v0.4.0 github.com/pterm/pterm v0.12.81 github.com/savioxavier/termlink v1.4.3 github.com/sethvargo/go-retry v0.3.0 @@ -70,6 +70,7 @@ require ( dario.cat/mergo v1.0.2 // indirect github.com/AlekSi/pointer v1.2.0 // indirect github.com/BurntSushi/toml v1.4.1-0.20240526193622-a339e1f7089c // indirect + github.com/MakeNowJust/heredoc v1.0.0 // indirect github.com/Masterminds/goutils v1.1.1 // indirect github.com/Masterminds/semver/v3 v3.3.1 // indirect github.com/Masterminds/sprig/v3 v3.3.0 // indirect diff --git a/go.sum b/go.sum index 83904782..251ecb69 100644 --- a/go.sum +++ b/go.sum @@ -40,6 +40,8 @@ github.com/GoogleCloudPlatform/cloudsql-proxy v1.37.7 h1:+ugXZyYXIb2NFWaXleLbouy github.com/GoogleCloudPlatform/cloudsql-proxy v1.37.7/go.mod h1:9iAgV6bpVWBq2BpG4J691fNCdKYd4JqR5ey9m4odfHE= github.com/Khan/genqlient v0.8.1 h1:wtOCc8N9rNynRLXN3k3CnfzheCUNKBcvXmVv5zt6WCs= github.com/Khan/genqlient v0.8.1/go.mod h1:R2G6DzjBvCbhjsEajfRjbWdVglSH/73kSivC9TLWVjU= +github.com/MakeNowJust/heredoc v1.0.0 h1:cXCdzVdstXyiTqTvfqk9SDHpKNjxuom+DOlyEeQ4pzQ= +github.com/MakeNowJust/heredoc v1.0.0/go.mod h1:mG5amYoWBHf8vpLOuehzbGGw0EHxpZZ6lCpQ4fNJ8LE= github.com/MakeNowJust/heredoc/v2 v2.0.1 h1:rlCHh70XXXv7toz95ajQWOWQnN4WNLt0TdpZYIR/J6A= github.com/MakeNowJust/heredoc/v2 v2.0.1/go.mod h1:6/2Abh5s+hc3g9nbWLe9ObDIOhaRrqsyY9MWy+4JdRM= github.com/MarvinJWendt/testza v0.1.0/go.mod h1:7AxNvlfeHP7Z/hDQ5JtE3OKYT3XFUeLCDE2DQninSqs= @@ -414,8 +416,8 @@ github.com/nais/device v0.0.0-20250703090236-08bd8b276591 h1:MgFQR6KwI6nB8Dl8c6I github.com/nais/device v0.0.0-20250703090236-08bd8b276591/go.mod h1:5EJDz4LiIliyvsUZb8gRDwpqihR7r4J+T4pJxUjBc9I= github.com/nais/liberator v0.0.0-20250703075635-7da81032e1ae h1:aZqDazMXxDdL1qSG9SpTMm+WZYY0RJp0yTrs8TSzJ+0= github.com/nais/liberator v0.0.0-20250703075635-7da81032e1ae/go.mod h1:Z0ycpT5Ug9Edacd5173bShlh9vCtogCwoLCbjv/pz18= -github.com/nais/naistrix v0.3.1 h1:9/S297620cAtbFRN51105kJNKKVBXasa2ALmWbrITlM= -github.com/nais/naistrix v0.3.1/go.mod h1:lLHnRUy/wzrjr79pH1PqdzUDc3N7hp9Ex+H76RytWAw= +github.com/nais/naistrix v0.4.0 h1:Pf3bdnhWSnVDqwoJ496ZyupXvv5Gf4uu6BKXhFmj0CA= +github.com/nais/naistrix v0.4.0/go.mod h1:KQj7//htD3ZW/93v+6eyHjL/x6w93sc5dUBFCC/QdKY= github.com/onsi/ginkgo/v2 v2.23.4 h1:ktYTpKJAVZnDT4VjxSbiBenUjmlL/5QkBEocaWXiQus= github.com/onsi/ginkgo/v2 v2.23.4/go.mod h1:Bt66ApGPBFzHyR+JO10Zbt0Gsp4uWxu5mIOTusL46e8= github.com/onsi/gomega v1.37.0 h1:CdEG8g0S133B4OswTDC/5XPSzE1OeP29QOioj2PID2Y= diff --git a/internal/application/application.go b/internal/application/application.go index 47c92ef6..43f1f2e0 100644 --- a/internal/application/application.go +++ b/internal/application/application.go @@ -44,7 +44,7 @@ func newApplication(flags *root.Flags) *naistrix.Application { func Run(ctx context.Context, w io.Writer) error { flags := &root.Flags{} app := newApplication(flags) - executedCommand, err := app.Run(ctx, naistrix.NewWriter(w), os.Args[1:]) + err := app.Run(naistrix.RunWithContext(ctx), naistrix.RunWithOutput(naistrix.NewWriter(w))) autoComplete := slices.Contains(os.Args[1:], "__complete") if !autoComplete { @@ -57,7 +57,7 @@ func Run(ctx context.Context, w io.Writer) error { }() } - if !autoComplete && executedCommand != nil { + if executedCommand := app.ExecutedCommand(); !autoComplete && executedCommand != nil { collectCommandHistogram(ctx, executedCommand, err) } diff --git a/internal/application/application_test.go b/internal/application/application_test.go index 3b20847a..baaaebed 100644 --- a/internal/application/application_test.go +++ b/internal/application/application_test.go @@ -30,7 +30,7 @@ func runCommand(t *testing.T, ctx context.Context, cmd *naistrix.Command, parent t.Fatalf("failed to run command %q: %v", strings.Join(helpCmd, " "), err) } }() - _, err := newApplication(&root.Flags{}).Run(ctx, naistrix.Discard(), helpCmd) + err := newApplication(&root.Flags{}).Run(naistrix.RunWithContext(ctx), naistrix.RunWithOutput(naistrix.Discard()), naistrix.RunWithArgs(helpCmd)) if err != nil { t.Fatalf("failed to run command %s: %v", strings.Join(helpCmd, " "), err) } From 0d561036aa1f7f97bbe19155a3bd6383c8957f34 Mon Sep 17 00:00:00 2001 From: Vegar Sechmann Molvig Date: Tue, 19 Aug 2025 15:32:19 +0200 Subject: [PATCH 3/9] feat: set default ttl to 24 hours, no validate Co-authored-by: Christer Edvartsen --- internal/debug/command/debug.go | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/internal/debug/command/debug.go b/internal/debug/command/debug.go index 56fb8805..2333fc4e 100644 --- a/internal/debug/command/debug.go +++ b/internal/debug/command/debug.go @@ -2,7 +2,6 @@ package command import ( "context" - "fmt" "time" "github.com/MakeNowJust/heredoc/v2" @@ -19,7 +18,7 @@ func Debug(parentFlags *root.Flags) *naistrix.Command { Flags: parentFlags, Context: flag.Context(defaultContext), Namespace: defaultNamespace, - Ttl: time.Minute, + Ttl: 24 * time.Hour, } return &naistrix.Command{ @@ -30,13 +29,6 @@ func Debug(parentFlags *root.Flags) *naistrix.Command { To debug a live pod, run the command without the "--copy" flag. `), - ValidateFunc: func(ctx context.Context, args []string) error { - if flags.Ttl > time.Hour { - return fmt.Errorf("the --ttl duration can not exceed 1 hour") - } - - return nil - }, Args: []naistrix.Argument{ {Name: "app_name"}, }, From 23b5d55f8b2061f9711fd61a8e1b78c0923a3a8b Mon Sep 17 00:00:00 2001 From: Vegar Sechmann Molvig Date: Tue, 19 Aug 2025 15:33:42 +0200 Subject: [PATCH 4/9] feat: propagate metadata/probes to copied pod also experimenting with more interactive "waiting for container" prompt Co-authored-by: Christer Edvartsen --- internal/debug/debug.go | 149 ++++++++++++++++++++++++++++------------ 1 file changed, 106 insertions(+), 43 deletions(-) diff --git a/internal/debug/debug.go b/internal/debug/debug.go index 64a3f3f2..8fb3fa3f 100644 --- a/internal/debug/debug.go +++ b/internal/debug/debug.go @@ -3,8 +3,11 @@ package debug import ( "context" "fmt" + "maps" "os" "os/exec" + "slices" + "strings" "time" "github.com/nais/cli/internal/debug/command/flag" @@ -45,13 +48,14 @@ func (d *Debug) Debug() error { return nil } - podNames := make([]string, len(pods.Items)) - for i, pod := range pods.Items { - podNames[i] = pod.Name + podMap := make(map[string]corev1.Pod) + for _, pod := range pods.Items { + podMap[pod.Name] = pod } + podNames := slices.Collect(maps.Keys(podMap)) podName := podNames[0] - if len(podNames) > 1 { + if len(podMap) > 1 { result, err := pterm.DefaultInteractiveSelect.WithOptions(podNames).Show() if err != nil { pterm.Error.Printf("Prompt failed: %v\n", err) @@ -60,18 +64,23 @@ func (d *Debug) Debug() error { podName = result } - if err := d.debugPod(podName); err != nil { + if err := d.debugPod(podName, podMap[podName].Labels); err != nil { pterm.Error.Printf("Failed to debug pod %s: %v\n", podName, err) } return nil } +func labelSelector(key, value string) metav1.ListOptions { + excludeDebugPods := "cli.nais.io/debug!=true" + return metav1.ListOptions{ + LabelSelector: strings.Join([]string{excludeDebugPods, key + "=" + value}, ","), + } +} + func (d *Debug) getPodsForWorkload() (*corev1.PodList, error) { pterm.Info.Println("Fetching pods for workload...") - podList, err := d.podsClient.List(d.ctx, metav1.ListOptions{ - LabelSelector: "app.kubernetes.io/name=" + d.workloadName, - }) + podList, err := d.podsClient.List(d.ctx, labelSelector("app.kubernetes.io/name", d.workloadName)) if err != nil { return nil, fmt.Errorf("failed to get pods: %w", err) } @@ -80,9 +89,7 @@ func (d *Debug) getPodsForWorkload() (*corev1.PodList, error) { return podList, nil } - podList, err = d.podsClient.List(d.ctx, metav1.ListOptions{ - LabelSelector: "app=" + d.workloadName, - }) + podList, err = d.podsClient.List(d.ctx, labelSelector("app=", d.workloadName)) if err != nil { return nil, fmt.Errorf("failed to get pods: %w", err) } @@ -90,41 +97,72 @@ func (d *Debug) getPodsForWorkload() (*corev1.PodList, error) { return podList, nil } -func (d *Debug) debugPod(podName string) error { - if d.flags.Copy { - podCopyName := debugPodName(podName) - if _, err := d.podsClient.Get(d.ctx, podCopyName, metav1.GetOptions{}); err == nil { - maxRetries := 5 - pollInterval := 2 +func (d *Debug) whenDebugContainerReady(podCopyName string, callback func(podCopyName string) error) error { + timeout := 30 * time.Second + graceDuration := 300 * time.Millisecond - pterm.Info.Println("Found existing debug pod, trying to attach...") - for i := range maxRetries { - pterm.Info.Printf("Attempt %d/%d\n", i+1, maxRetries) + ctx, cancel := context.WithTimeout(d.ctx, timeout) + defer cancel() - pod, err := d.podsClient.Get(d.ctx, podCopyName, metav1.GetOptions{}) - if err != nil { - return fmt.Errorf("failed to get debug copy %q: %v", podCopyName, err) - } + statusLine := func(status any) { + pterm.Printo(fmt.Sprintf("Waiting for debug container to start [%v]", status)) + } - for _, c := range pod.Status.ContainerStatuses { - if c.Name == debugPodContainerName && c.State.Running != nil { - return d.attachToExistingDebugContainer(podCopyName) - } - } + for ctx.Err() == nil { + select { + case <-ctx.Done(): + return fmt.Errorf("timed out waiting for debug container to start") + default: + deadline, _ := ctx.Deadline() + statusLine(time.Until(deadline).Round(time.Second)) + + pod, err := d.podsClient.Get(d.ctx, podCopyName, metav1.GetOptions{}) + if err != nil { + return fmt.Errorf("failed to get debug copy %q: %v", podCopyName, err) + } - time.Sleep(time.Duration(pollInterval) * time.Second) + for _, c := range pod.Status.ContainerStatuses { + if c.Name == debugPodContainerName && c.State.Running != nil { + statusLine(pterm.Green("done")) + pterm.Println() + return callback(podCopyName) + } } - return fmt.Errorf("unable to attach to the existing debug pod") - } else if !k8serrors.IsNotFound(err) { + // No running container found, wait a bit before checking again + time.Sleep(graceDuration) + } + } + + return fmt.Errorf("debug pod %q did not start within the expected time", podCopyName) +} + +func (d *Debug) podExists(name string) (bool, error) { + if _, err := d.podsClient.Get(d.ctx, name, metav1.GetOptions{}); err == nil { + return true, nil + } else if k8serrors.IsNotFound(err) { + return false, nil + } else { + return false, err + } +} + +func (d *Debug) debugPod(podName string, labels map[string]string) error { + if d.flags.Copy { + podCopyName := debugPodName(podName) + // If debug pod already exists, attach instead of creating a new one + if exists, err := d.podExists(podCopyName); exists { + pterm.Info.Printf("Debug pod %q already exists, attaching...\n", podCopyName) + return d.whenDebugContainerReady(podCopyName, d.attach) + } else if err != nil { return fmt.Errorf("failed to check for existing debug pod %q: %v", podCopyName, err) } } - return d.createDebugPod(podName) + return d.createDebugPod(podName, labels) } -func (d *Debug) createDebugPod(podName string) error { +func (d *Debug) createDebugPod(podName string, labels map[string]string) error { args := []string{ "debug", "pod/" + podName, @@ -141,6 +179,11 @@ func (d *Debug) createDebugPod(podName string) error { args = append(args, "--copy-to", debugPodName(podName), "--container", debugPodContainerName, + "--keep-annotations", + "--keep-liveness", + "--keep-readiness", + "--keep-startup", + "--attach=false", ) } else { args = append(args, "--target", d.workloadName) @@ -151,34 +194,54 @@ func (d *Debug) createDebugPod(podName string) error { cmd.Stdout = os.Stdout cmd.Stderr = os.Stderr - pterm.Info.Println("Starting debug container...") + if d.flags.IsDebug() { + pterm.Info.Println("Starting debug container: ", cmd.String()) + } else { + pterm.Info.Println("Starting debug container...") + } if err := cmd.Run(); err != nil { return fmt.Errorf("failed to start debug command: %v", err) } if d.flags.Copy { - if err := d.annotateAndLabelDebugPod(debugPodName(podName)); err != nil { + podCopyName := debugPodName(podName) + if err := d.annotateAndLabelDebugPod(podCopyName, labels); err != nil { return fmt.Errorf("failed to annotate and label debug pod: %w", err) } + if err := d.whenDebugContainerReady(podCopyName, d.attach); err != nil { + return fmt.Errorf("failed to attach to debug pod %q: %w", podCopyName, err) + } + + // TODO ask if the user wants to delete the debug pod after attaching pterm.Info.Printf("Debug pod will self-destruct in %s\n", d.flags.Ttl) } else { + // TODO ask if the user wants to delete the debug pod after attaching pterm.Info.Println("Remember to restart the pod to remove the debug container") } return nil } -func (d *Debug) annotateAndLabelDebugPod(debugPodName string) error { +func (d *Debug) annotateAndLabelDebugPod(debugPodName string, existingLabels map[string]string) error { + args := []string{ + "--context", string(d.flags.Context), + "--namespace", d.flags.Namespace, + "label", + "pod/" + debugPodName, + "cli.nais.io/debug=true", + "euthanaisa.nais.io/enabled=true", + } + delete(existingLabels, "pod-template-hash") + for label, value := range existingLabels { + args = append(args, fmt.Sprintf("%s=%s", label, value)) + } + labelCommand := exec.CommandContext( d.ctx, "kubectl", - "label", - "pod/"+debugPodName, - "euthanaisa.nais.io/enabled=true", - "--namespace", d.flags.Namespace, - "--context", string(d.flags.Context), + args..., ) if err := labelCommand.Run(); err != nil { @@ -203,7 +266,7 @@ func (d *Debug) annotateAndLabelDebugPod(debugPodName string) error { return nil } -func (d *Debug) attachToExistingDebugContainer(podName string) error { +func (d *Debug) attach(podName string) error { cmd := exec.Command( "kubectl", "attach", From 0ddb488ad9cb7b419b97cbebfff2e9b3807cab9a Mon Sep 17 00:00:00 2001 From: Vegar Sechmann Molvig Date: Wed, 20 Aug 2025 11:17:43 +0200 Subject: [PATCH 5/9] fix: cleanup debug code flow Co-authored-by: Christer Edvartsen --- internal/debug/debug.go | 305 ++++++++++++++++++++++------------------ 1 file changed, 165 insertions(+), 140 deletions(-) diff --git a/internal/debug/debug.go b/internal/debug/debug.go index 8fb3fa3f..60b87faf 100644 --- a/internal/debug/debug.go +++ b/internal/debug/debug.go @@ -3,10 +3,8 @@ package debug import ( "context" "fmt" - "maps" "os" "os/exec" - "slices" "strings" "time" @@ -48,36 +46,19 @@ func (d *Debug) Debug() error { return nil } - podMap := make(map[string]corev1.Pod) - for _, pod := range pods.Items { - podMap[pod.Name] = pod - } - - podNames := slices.Collect(maps.Keys(podMap)) - podName := podNames[0] - if len(podMap) > 1 { - result, err := pterm.DefaultInteractiveSelect.WithOptions(podNames).Show() - if err != nil { - pterm.Error.Printf("Prompt failed: %v\n", err) - return err - } - podName = result + pod, err := interactiveSelectPod(pods.Items) + if err != nil { + pterm.Error.Printf("Failed to select pod: %v\n", err) + return err } - if err := d.debugPod(podName, podMap[podName].Labels); err != nil { - pterm.Error.Printf("Failed to debug pod %s: %v\n", podName, err) + if err := d.debugPod(*pod); err != nil { + pterm.Error.Printf("Failed to debug pod %s: %v\n", pod.Name, err) } return nil } -func labelSelector(key, value string) metav1.ListOptions { - excludeDebugPods := "cli.nais.io/debug!=true" - return metav1.ListOptions{ - LabelSelector: strings.Join([]string{excludeDebugPods, key + "=" + value}, ","), - } -} - func (d *Debug) getPodsForWorkload() (*corev1.PodList, error) { pterm.Info.Println("Fetching pods for workload...") podList, err := d.podsClient.List(d.ctx, labelSelector("app.kubernetes.io/name", d.workloadName)) @@ -97,46 +78,6 @@ func (d *Debug) getPodsForWorkload() (*corev1.PodList, error) { return podList, nil } -func (d *Debug) whenDebugContainerReady(podCopyName string, callback func(podCopyName string) error) error { - timeout := 30 * time.Second - graceDuration := 300 * time.Millisecond - - ctx, cancel := context.WithTimeout(d.ctx, timeout) - defer cancel() - - statusLine := func(status any) { - pterm.Printo(fmt.Sprintf("Waiting for debug container to start [%v]", status)) - } - - for ctx.Err() == nil { - select { - case <-ctx.Done(): - return fmt.Errorf("timed out waiting for debug container to start") - default: - deadline, _ := ctx.Deadline() - statusLine(time.Until(deadline).Round(time.Second)) - - pod, err := d.podsClient.Get(d.ctx, podCopyName, metav1.GetOptions{}) - if err != nil { - return fmt.Errorf("failed to get debug copy %q: %v", podCopyName, err) - } - - for _, c := range pod.Status.ContainerStatuses { - if c.Name == debugPodContainerName && c.State.Running != nil { - statusLine(pterm.Green("done")) - pterm.Println() - return callback(podCopyName) - } - } - - // No running container found, wait a bit before checking again - time.Sleep(graceDuration) - } - } - - return fmt.Errorf("debug pod %q did not start within the expected time", podCopyName) -} - func (d *Debug) podExists(name string) (bool, error) { if _, err := d.podsClient.Get(d.ctx, name, metav1.GetOptions{}); err == nil { return true, nil @@ -147,25 +88,10 @@ func (d *Debug) podExists(name string) (bool, error) { } } -func (d *Debug) debugPod(podName string, labels map[string]string) error { - if d.flags.Copy { - podCopyName := debugPodName(podName) - // If debug pod already exists, attach instead of creating a new one - if exists, err := d.podExists(podCopyName); exists { - pterm.Info.Printf("Debug pod %q already exists, attaching...\n", podCopyName) - return d.whenDebugContainerReady(podCopyName, d.attach) - } else if err != nil { - return fmt.Errorf("failed to check for existing debug pod %q: %v", podCopyName, err) - } - } - - return d.createDebugPod(podName, labels) -} - -func (d *Debug) createDebugPod(podName string, labels map[string]string) error { +func (d *Debug) debugPod(pod corev1.Pod) error { args := []string{ "debug", - "pod/" + podName, + "pod/" + pod.Name, "--namespace", d.flags.Namespace, "--context", string(d.flags.Context), "--stdin", @@ -176,90 +102,92 @@ func (d *Debug) createDebugPod(podName string, labels map[string]string) error { } if d.flags.Copy { - args = append(args, - "--copy-to", debugPodName(podName), - "--container", debugPodContainerName, - "--keep-annotations", - "--keep-liveness", - "--keep-readiness", - "--keep-startup", - "--attach=false", - ) - } else { - args = append(args, "--target", d.workloadName) + return d.createDebugPod(args, pod) } - cmd := exec.Command("kubectl", args...) - cmd.Stdin = os.Stdin - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr + return d.createDebugContainer(args, &pod) +} - if d.flags.IsDebug() { - pterm.Info.Println("Starting debug container: ", cmd.String()) - } else { - pterm.Info.Println("Starting debug container...") - } +func (d *Debug) createDebugContainer(commonArgs []string, pod *corev1.Pod) error { + pterm.Info.Printf("Creating ephemeral debug container in pod %s...\n", pod.Name) - if err := cmd.Run(); err != nil { + args := append(commonArgs, "--target", d.workloadName) // workloadName is the same as container name for nais apps + if err := d.kubectl(d.ctx, true, args...); err != nil { return fmt.Errorf("failed to start debug command: %v", err) } - if d.flags.Copy { - podCopyName := debugPodName(podName) - if err := d.annotateAndLabelDebugPod(podCopyName, labels); err != nil { - return fmt.Errorf("failed to annotate and label debug pod: %w", err) - } + pterm.Info.Println("Remember to restart the pod to remove the debug container") + return nil +} - if err := d.whenDebugContainerReady(podCopyName, d.attach); err != nil { - return fmt.Errorf("failed to attach to debug pod %q: %w", podCopyName, err) - } +func (d *Debug) createDebugPod(commonArgs []string, pod corev1.Pod) error { + pterm.Info.Printf("Creating a copy of pod %s with a debug container...\n", pod.Name) - // TODO ask if the user wants to delete the debug pod after attaching - pterm.Info.Printf("Debug pod will self-destruct in %s\n", d.flags.Ttl) - } else { - // TODO ask if the user wants to delete the debug pod after attaching - pterm.Info.Println("Remember to restart the pod to remove the debug container") + podCopyName := debugPodName(pod.Name) + // If debug pod already exists, attach instead of creating a new one + if exists, err := d.podExists(podCopyName); exists { + pterm.Info.Printf("Debug pod %q already exists, attaching...\n", podCopyName) + return d.whenDebugContainerReady(podCopyName, d.attach) + } else if err != nil { + return fmt.Errorf("failed to check for existing debug pod %q: %v", podCopyName, err) + } + + args := append(commonArgs, + "--copy-to", debugPodName(pod.Name), + "--container", debugPodContainerName, + "--keep-annotations", + "--keep-liveness", + "--keep-readiness", + "--keep-startup", + "--attach=false", + ) + if err := d.kubectl(d.ctx, false, args...); err != nil { + return fmt.Errorf("failed to start debug command: %v", err) } + if err := d.annotateAndLabelDebugPod(podCopyName, pod.Labels); err != nil { + return fmt.Errorf("failed to annotate and label debug pod: %w", err) + } + + if err := d.whenDebugContainerReady(podCopyName, d.attach); err != nil { + return fmt.Errorf("failed to attach to debug pod %q: %w", podCopyName, err) + } + + // TODO ask if the user wants to delete the debug pod after attaching + pterm.Info.Printf("Debug pod will self-destruct in %s\n", d.flags.Ttl) return nil } func (d *Debug) annotateAndLabelDebugPod(debugPodName string, existingLabels map[string]string) error { args := []string{ - "--context", string(d.flags.Context), - "--namespace", d.flags.Namespace, "label", "pod/" + debugPodName, "cli.nais.io/debug=true", "euthanaisa.nais.io/enabled=true", } + delete(existingLabels, "pod-template-hash") for label, value := range existingLabels { args = append(args, fmt.Sprintf("%s=%s", label, value)) } - labelCommand := exec.CommandContext( + if err := d.kubectl( d.ctx, - "kubectl", + false, args..., - ) - - if err := labelCommand.Run(); err != nil { + ); err != nil { return fmt.Errorf("unable to label debug pod: %w", err) } killAfter := time.Now().Add(d.flags.Ttl).Format(time.RFC3339) - annotateCommand := exec.CommandContext( + + if err := d.kubectl( d.ctx, - "kubectl", + false, "annotate", "pod/"+debugPodName, "euthanaisa.nais.io/kill-after="+killAfter, - "--namespace", d.flags.Namespace, - "--context", string(d.flags.Context), - ) - - if err := annotateCommand.Run(); err != nil { + ); err != nil { return fmt.Errorf("unable to annotate debug pod: %w", err) } @@ -267,22 +195,15 @@ func (d *Debug) annotateAndLabelDebugPod(debugPodName string, existingLabels map } func (d *Debug) attach(podName string) error { - cmd := exec.Command( - "kubectl", + if err := d.kubectl(d.ctx, + true, "attach", "pod/"+podName, - "--namespace", d.flags.Namespace, - "--context", string(d.flags.Context), "--container", debugPodContainerName, "--stdin", "--tty", "--quiet", - ) - cmd.Stdin = os.Stdin - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - - if err := cmd.Run(); err != nil { + ); err != nil { return fmt.Errorf("failed to attach to the debug container: %w", err) } @@ -291,7 +212,111 @@ func (d *Debug) attach(podName string) error { return nil } +func interactiveSelectPod(pods []corev1.Pod) (*corev1.Pod, error) { + if len(pods) > 1 { + var podNames []string + for _, p := range pods { + podNames = append(podNames, p.Name) + } + + result, err := pterm.DefaultInteractiveSelect.WithOptions(podNames).Show() + if err != nil { + pterm.Error.Printf("Prompt failed: %v\n", err) + return nil, err + } + + for _, p := range pods { + if p.Name == result { + return &p, nil + } + } + } else if len(pods) == 1 { + return &pods[0], nil + } + + return nil, fmt.Errorf("no pod selected or found") +} + +func labelSelector(key, value string) metav1.ListOptions { + excludeDebugPods := "cli.nais.io/debug!=true" + return metav1.ListOptions{ + LabelSelector: strings.Join([]string{excludeDebugPods, key + "=" + value}, ","), + } +} + // debugPodName generates a name for the debug pod copy given a pod name. func debugPodName(podName string) string { return podName + "-" + debugPodSuffix } + +func (d *Debug) whenDebugContainerReady(podCopyName string, callback func(podCopyName string) error) error { + timeout := 30 * time.Second + graceDuration := 300 * time.Millisecond + + ctx, cancel := context.WithTimeout(d.ctx, timeout) + defer cancel() + + statusLine := func(status any) { + pterm.Printo(fmt.Sprintf("Waiting for debug container to start [%v]", status)) + } + + for ctx.Err() == nil { + select { + case <-ctx.Done(): + return fmt.Errorf("timed out waiting for debug container to start") + default: + deadline, _ := ctx.Deadline() + statusLine(time.Until(deadline).Round(time.Second)) + + pod, err := d.podsClient.Get(d.ctx, podCopyName, metav1.GetOptions{}) + if err != nil { + return fmt.Errorf("failed to get debug copy %q: %v", podCopyName, err) + } + + for _, c := range pod.Status.ContainerStatuses { + if c.Name == debugPodContainerName && c.State.Running != nil { + statusLine(pterm.Green("done")) + pterm.Println() + return callback(podCopyName) + } + } + + // No running container found, wait a bit before checking again + time.Sleep(graceDuration) + } + } + + return fmt.Errorf("debug pod %q did not start within the expected time", podCopyName) +} + +func (d *Debug) kubectl(ctx context.Context, attach bool, args ...string) error { + cmd := exec.CommandContext(ctx, + "kubectl", + append(args, + "--namespace", d.flags.Namespace, + "--context", string(d.flags.Context), + )..., + ) + + if d.flags.IsDebug() { + pterm.Info.Println("Running command:", strings.Join(cmd.Args, " ")) + } + + if attach { + cmd.Stdin = os.Stdin + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + return cmd.Run() + } + + out, err := cmd.CombinedOutput() + if err != nil { + return fmt.Errorf("kubectl command failed: %w\nOutput: %s", err, string(out)) + } + + if d.flags.IsVerbose() { + pterm.Info.Println("Command output:", string(out)) + } + + return nil +} From ead5ebaf744ecd7826f6f6a6838bda40c42f582e Mon Sep 17 00:00:00 2001 From: Vegar Sechmann Molvig Date: Wed, 20 Aug 2025 11:44:39 +0200 Subject: [PATCH 6/9] fix: only print metric upload error if trace --- internal/metric/otel.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/metric/otel.go b/internal/metric/otel.go index 661f0578..f79fa59f 100644 --- a/internal/metric/otel.go +++ b/internal/metric/otel.go @@ -43,7 +43,7 @@ func Initialize() func(verbose bool) { ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) defer cancel() err := provider.Shutdown(ctx) - if err != nil { + if err != nil && verbose { fmt.Printf("Failed up upload metrics: %v\n", err) } } From 77777a3db02459336fbe56b7a3179b703530de91 Mon Sep 17 00:00:00 2001 From: Vegar Sechmann Molvig Date: Wed, 20 Aug 2025 14:15:37 +0200 Subject: [PATCH 7/9] fix: make more generic waitFor func for later use This is useful for running tasks with a timeout Co-authored-by: Christer Edvartsen --- internal/debug/command/debug.go | 3 +- internal/debug/command/flag/flag.go | 9 +-- internal/debug/debug.go | 102 ++++++++++++++++------------ 3 files changed, 66 insertions(+), 48 deletions(-) diff --git a/internal/debug/command/debug.go b/internal/debug/command/debug.go index 2333fc4e..64d7bdc0 100644 --- a/internal/debug/command/debug.go +++ b/internal/debug/command/debug.go @@ -18,7 +18,8 @@ func Debug(parentFlags *root.Flags) *naistrix.Command { Flags: parentFlags, Context: flag.Context(defaultContext), Namespace: defaultNamespace, - Ttl: 24 * time.Hour, + TTL: 24 * time.Hour, + Timeout: 30 * time.Second, } return &naistrix.Command{ diff --git a/internal/debug/command/flag/flag.go b/internal/debug/command/flag/flag.go index 00e402ab..ab9257d0 100644 --- a/internal/debug/command/flag/flag.go +++ b/internal/debug/command/flag/flag.go @@ -10,8 +10,9 @@ type Context string type Debug struct { *root.Flags - Context Context `name:"context" short:"c" usage:"The kubeconfig |context| to use. Defaults to current context."` - Namespace string `name:"namespace" short:"n" usage:"The kubernetes |namespace| to use. Defaults to current namespace."` - Copy bool `name:"copy" usage:"Create a copy of the pod with a debug container. The original pod remains running and unaffected."` - Ttl time.Duration `name:"ttl" usage:"|Duration| the debug pod remains after exit. Only has effect when --copy is specified."` + Context Context `short:"c" usage:"The kubeconfig |context| to use. Defaults to current context."` + Namespace string `short:"n" usage:"The kubernetes |namespace| to use. Defaults to current namespace."` + Copy bool `usage:"Create a copy of the pod with a debug container. The original pod remains running and unaffected."` + TTL time.Duration `usage:"|Duration| the debug pod remains after exit. Only has effect when --copy is specified."` + Timeout time.Duration `usage:"|Duration| to wait for the debug pod to be ready. Defaults to 30s."` } diff --git a/internal/debug/debug.go b/internal/debug/debug.go index 60b87faf..1f7abf62 100644 --- a/internal/debug/debug.go +++ b/internal/debug/debug.go @@ -127,7 +127,7 @@ func (d *Debug) createDebugPod(commonArgs []string, pod corev1.Pod) error { // If debug pod already exists, attach instead of creating a new one if exists, err := d.podExists(podCopyName); exists { pterm.Info.Printf("Debug pod %q already exists, attaching...\n", podCopyName) - return d.whenDebugContainerReady(podCopyName, d.attach) + return d.attach(podCopyName) } else if err != nil { return fmt.Errorf("failed to check for existing debug pod %q: %v", podCopyName, err) } @@ -149,12 +149,12 @@ func (d *Debug) createDebugPod(commonArgs []string, pod corev1.Pod) error { return fmt.Errorf("failed to annotate and label debug pod: %w", err) } - if err := d.whenDebugContainerReady(podCopyName, d.attach); err != nil { + if err := d.attach(podCopyName); err != nil { return fmt.Errorf("failed to attach to debug pod %q: %w", podCopyName, err) } // TODO ask if the user wants to delete the debug pod after attaching - pterm.Info.Printf("Debug pod will self-destruct in %s\n", d.flags.Ttl) + pterm.Info.Printf("Debug pod will self-destruct in %s\n", d.flags.TTL) return nil } @@ -179,7 +179,7 @@ func (d *Debug) annotateAndLabelDebugPod(debugPodName string, existingLabels map return fmt.Errorf("unable to label debug pod: %w", err) } - killAfter := time.Now().Add(d.flags.Ttl).Format(time.RFC3339) + killAfter := time.Now().Add(d.flags.TTL).Format(time.RFC3339) if err := d.kubectl( d.ctx, @@ -194,24 +194,6 @@ func (d *Debug) annotateAndLabelDebugPod(debugPodName string, existingLabels map return nil } -func (d *Debug) attach(podName string) error { - if err := d.kubectl(d.ctx, - true, - "attach", - "pod/"+podName, - "--container", debugPodContainerName, - "--stdin", - "--tty", - "--quiet", - ); err != nil { - return fmt.Errorf("failed to attach to the debug container: %w", err) - } - - pterm.Info.Println("Exited from Debug container") - - return nil -} - func interactiveSelectPod(pods []corev1.Pod) (*corev1.Pod, error) { if len(pods) > 1 { var podNames []string @@ -249,44 +231,78 @@ func debugPodName(podName string) string { return podName + "-" + debugPodSuffix } -func (d *Debug) whenDebugContainerReady(podCopyName string, callback func(podCopyName string) error) error { - timeout := 30 * time.Second - graceDuration := 300 * time.Millisecond +func waitFor[T any](ctx context.Context, timeout time.Duration, msg string, f func(context.Context) *T) (*T, error) { + graceDuration := 300 * time.Millisecond // time to wait between checks - ctx, cancel := context.WithTimeout(d.ctx, timeout) + ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() - statusLine := func(status any) { - pterm.Printo(fmt.Sprintf("Waiting for debug container to start [%v]", status)) + status, err := (&pterm.AreaPrinter{}).Start() + if err != nil { + return nil, fmt.Errorf("failed to create status area: %w", err) } + defer status.Stop() for ctx.Err() == nil { select { case <-ctx.Done(): - return fmt.Errorf("timed out waiting for debug container to start") + pterm.Println() + return nil, fmt.Errorf("timed out after %v", timeout) default: - deadline, _ := ctx.Deadline() - statusLine(time.Until(deadline).Round(time.Second)) - - pod, err := d.podsClient.Get(d.ctx, podCopyName, metav1.GetOptions{}) - if err != nil { - return fmt.Errorf("failed to get debug copy %q: %v", podCopyName, err) + if deadline, ok := ctx.Deadline(); ok { + status.Update(pterm.LightGreen(pterm.Sprintf("%s [%v]", msg, time.Until(deadline).Round(time.Second)))) + } else { + status.Update(pterm.LightGreen(pterm.Sprintf("%s [%v]", msg, "?"))) } - for _, c := range pod.Status.ContainerStatuses { - if c.Name == debugPodContainerName && c.State.Running != nil { - statusLine(pterm.Green("done")) - pterm.Println() - return callback(podCopyName) - } + if result := f(ctx); result != nil { + pterm.Println() + return result, nil } - // No running container found, wait a bit before checking again time.Sleep(graceDuration) } } - return fmt.Errorf("debug pod %q did not start within the expected time", podCopyName) + return nil, fmt.Errorf("wait for err: %w", ctx.Err()) +} + +func (d *Debug) attach(podCopyName string) error { + isReady := func(ctx context.Context) *string { + pod, err := d.podsClient.Get(d.ctx, podCopyName, metav1.GetOptions{}) + if err != nil { + return nil + } + + for _, c := range pod.Status.ContainerStatuses { + if c.Name == debugPodContainerName && c.State.Running != nil { + return &podCopyName + } + } + + return nil + } + + podName, err := waitFor(d.ctx, d.flags.Timeout, "Waiting for debug container to start", isReady) + if err != nil { + return fmt.Errorf("debug container did not start: %w", err) + } + + if err := d.kubectl(d.ctx, + true, + "attach", + "pod/"+*podName, + "--container", debugPodContainerName, + "--stdin", + "--tty", + "--quiet", + ); err != nil { + return fmt.Errorf("failed to attach to the debug container: %w", err) + } + + pterm.Info.Println("Exited from Debug container") + + return err } func (d *Debug) kubectl(ctx context.Context, attach bool, args ...string) error { From 85ba66ec91c8f98625c7fc247d054afe18bb41b6 Mon Sep 17 00:00:00 2001 From: Vegar Sechmann Molvig Date: Thu, 21 Aug 2025 14:36:06 +0200 Subject: [PATCH 8/9] feat: task concept, in own package --- internal/debug/command/flag/flag.go | 2 +- internal/debug/debug.go | 169 ++++++++++++---------------- internal/task/task.go | 83 ++++++++++++++ 3 files changed, 159 insertions(+), 95 deletions(-) create mode 100644 internal/task/task.go diff --git a/internal/debug/command/flag/flag.go b/internal/debug/command/flag/flag.go index ab9257d0..86a03179 100644 --- a/internal/debug/command/flag/flag.go +++ b/internal/debug/command/flag/flag.go @@ -14,5 +14,5 @@ type Debug struct { Namespace string `short:"n" usage:"The kubernetes |namespace| to use. Defaults to current namespace."` Copy bool `usage:"Create a copy of the pod with a debug container. The original pod remains running and unaffected."` TTL time.Duration `usage:"|Duration| the debug pod remains after exit. Only has effect when --copy is specified."` - Timeout time.Duration `usage:"|Duration| to wait for the debug pod to be ready. Defaults to 30s."` + Timeout time.Duration `usage:"|Duration| to wait for each remote interaction this command does. Usually the default is sufficient."` } diff --git a/internal/debug/debug.go b/internal/debug/debug.go index 1f7abf62..690bef92 100644 --- a/internal/debug/debug.go +++ b/internal/debug/debug.go @@ -9,6 +9,7 @@ import ( "time" "github.com/nais/cli/internal/debug/command/flag" + "github.com/nais/cli/internal/task" "github.com/pterm/pterm" corev1 "k8s.io/api/core/v1" k8serrors "k8s.io/apimachinery/pkg/api/errors" @@ -36,8 +37,11 @@ type Debug struct { } func (d *Debug) Debug() error { - pods, err := d.getPodsForWorkload() + pods, err := task.Timed(d.ctx, d.flags.Timeout, "Fetching pods for workload", func(ctx context.Context) (*corev1.PodList, error) { + return d.getPodsForWorkload(ctx) + }) if err != nil { + pterm.Error.Println("Failed to get pods for workload") return err } @@ -48,20 +52,20 @@ func (d *Debug) Debug() error { pod, err := interactiveSelectPod(pods.Items) if err != nil { - pterm.Error.Printf("Failed to select pod: %v\n", err) + pterm.Error.Println("Failed to select pod") return err } if err := d.debugPod(*pod); err != nil { - pterm.Error.Printf("Failed to debug pod %s: %v\n", pod.Name, err) + pterm.Error.Println("Failed to debug pod") + return err } return nil } -func (d *Debug) getPodsForWorkload() (*corev1.PodList, error) { - pterm.Info.Println("Fetching pods for workload...") - podList, err := d.podsClient.List(d.ctx, labelSelector("app.kubernetes.io/name", d.workloadName)) +func (d *Debug) getPodsForWorkload(ctx context.Context) (*corev1.PodList, error) { + podList, err := d.podsClient.List(ctx, labelSelector("app.kubernetes.io/name", d.workloadName)) if err != nil { return nil, fmt.Errorf("failed to get pods: %w", err) } @@ -70,7 +74,7 @@ func (d *Debug) getPodsForWorkload() (*corev1.PodList, error) { return podList, nil } - podList, err = d.podsClient.List(d.ctx, labelSelector("app=", d.workloadName)) + podList, err = d.podsClient.List(ctx, labelSelector("app=", d.workloadName)) if err != nil { return nil, fmt.Errorf("failed to get pods: %w", err) } @@ -78,8 +82,8 @@ func (d *Debug) getPodsForWorkload() (*corev1.PodList, error) { return podList, nil } -func (d *Debug) podExists(name string) (bool, error) { - if _, err := d.podsClient.Get(d.ctx, name, metav1.GetOptions{}); err == nil { +func (d *Debug) podExists(ctx context.Context, name string) (bool, error) { + if _, err := d.podsClient.Get(ctx, name, metav1.GetOptions{}); err == nil { return true, nil } else if k8serrors.IsNotFound(err) { return false, nil @@ -105,15 +109,18 @@ func (d *Debug) debugPod(pod corev1.Pod) error { return d.createDebugPod(args, pod) } - return d.createDebugContainer(args, &pod) + return d.createDebugContainer(args) } -func (d *Debug) createDebugContainer(commonArgs []string, pod *corev1.Pod) error { - pterm.Info.Printf("Creating ephemeral debug container in pod %s...\n", pod.Name) - +func (d *Debug) createDebugContainer(commonArgs []string) error { args := append(commonArgs, "--target", d.workloadName) // workloadName is the same as container name for nais apps - if err := d.kubectl(d.ctx, true, args...); err != nil { - return fmt.Errorf("failed to start debug command: %v", err) + + _, err := task.Timed(d.ctx, d.flags.Timeout, "Creating ephemeral debug container", func(ctx context.Context) (*any, error) { + return nil, d.kubectl(ctx, true, args...) + }) + if err != nil { + pterm.Error.Println("Failed to create ephemeral debug container") + return err } pterm.Info.Println("Remember to restart the pod to remove the debug container") @@ -121,19 +128,19 @@ func (d *Debug) createDebugContainer(commonArgs []string, pod *corev1.Pod) error } func (d *Debug) createDebugPod(commonArgs []string, pod corev1.Pod) error { - pterm.Info.Printf("Creating a copy of pod %s with a debug container...\n", pod.Name) - - podCopyName := debugPodName(pod.Name) - // If debug pod already exists, attach instead of creating a new one - if exists, err := d.podExists(podCopyName); exists { - pterm.Info.Printf("Debug pod %q already exists, attaching...\n", podCopyName) - return d.attach(podCopyName) - } else if err != nil { - return fmt.Errorf("failed to check for existing debug pod %q: %v", podCopyName, err) + debugPodName := createDebugPodName(pod.Name) + + exists, err := task.Timed(d.ctx, d.flags.Timeout, "Check for existing debug pod", func(ctx context.Context) (bool, error) { + return d.podExists(ctx, debugPodName) + }) + if err != nil { + return fmt.Errorf("failed to check for existing debug pod: %w", err) + } else if exists { + return d.attach(d.ctx, debugPodName) } args := append(commonArgs, - "--copy-to", debugPodName(pod.Name), + "--copy-to", debugPodName, "--container", debugPodContainerName, "--keep-annotations", "--keep-liveness", @@ -141,16 +148,22 @@ func (d *Debug) createDebugPod(commonArgs []string, pod corev1.Pod) error { "--keep-startup", "--attach=false", ) - if err := d.kubectl(d.ctx, false, args...); err != nil { - return fmt.Errorf("failed to start debug command: %v", err) + _, err = task.Timed(d.ctx, d.flags.Timeout, "Create debug pod", func(ctx context.Context) (*any, error) { + return nil, d.kubectl(ctx, false, args...) + }) + if err != nil { + return fmt.Errorf("failed to create debug pod: %v", err) } - if err := d.annotateAndLabelDebugPod(podCopyName, pod.Labels); err != nil { + _, err = task.Timed(d.ctx, d.flags.Timeout, "Annotate debug pod", func(ctx context.Context) (*any, error) { + return nil, d.annotateAndLabelDebugPod(ctx, debugPodName, pod.Labels) + }) + if err != nil { return fmt.Errorf("failed to annotate and label debug pod: %w", err) } - if err := d.attach(podCopyName); err != nil { - return fmt.Errorf("failed to attach to debug pod %q: %w", podCopyName, err) + if err := d.attach(d.ctx, debugPodName); err != nil { + return fmt.Errorf("failed to attach to debug pod %q: %w", debugPodName, err) } // TODO ask if the user wants to delete the debug pod after attaching @@ -158,7 +171,7 @@ func (d *Debug) createDebugPod(commonArgs []string, pod corev1.Pod) error { return nil } -func (d *Debug) annotateAndLabelDebugPod(debugPodName string, existingLabels map[string]string) error { +func (d *Debug) annotateAndLabelDebugPod(ctx context.Context, debugPodName string, existingLabels map[string]string) error { args := []string{ "label", "pod/" + debugPodName, @@ -172,7 +185,7 @@ func (d *Debug) annotateAndLabelDebugPod(debugPodName string, existingLabels map } if err := d.kubectl( - d.ctx, + ctx, false, args..., ); err != nil { @@ -182,7 +195,7 @@ func (d *Debug) annotateAndLabelDebugPod(debugPodName string, existingLabels map killAfter := time.Now().Add(d.flags.TTL).Format(time.RFC3339) if err := d.kubectl( - d.ctx, + ctx, false, "annotate", "pod/"+debugPodName, @@ -201,9 +214,9 @@ func interactiveSelectPod(pods []corev1.Pod) (*corev1.Pod, error) { podNames = append(podNames, p.Name) } - result, err := pterm.DefaultInteractiveSelect.WithOptions(podNames).Show() + result, err := pterm.DefaultInteractiveSelect.WithOptions(podNames).WithDefaultText(pterm.Normal("Please select a pod")).Show() if err != nil { - pterm.Error.Printf("Prompt failed: %v\n", err) + pterm.Error.Println("Prompt failed") return nil, err } @@ -227,82 +240,38 @@ func labelSelector(key, value string) metav1.ListOptions { } // debugPodName generates a name for the debug pod copy given a pod name. -func debugPodName(podName string) string { +func createDebugPodName(podName string) string { return podName + "-" + debugPodSuffix } -func waitFor[T any](ctx context.Context, timeout time.Duration, msg string, f func(context.Context) *T) (*T, error) { - graceDuration := 300 * time.Millisecond // time to wait between checks - - ctx, cancel := context.WithTimeout(ctx, timeout) - defer cancel() - - status, err := (&pterm.AreaPrinter{}).Start() - if err != nil { - return nil, fmt.Errorf("failed to create status area: %w", err) - } - defer status.Stop() - - for ctx.Err() == nil { - select { - case <-ctx.Done(): - pterm.Println() - return nil, fmt.Errorf("timed out after %v", timeout) - default: - if deadline, ok := ctx.Deadline(); ok { - status.Update(pterm.LightGreen(pterm.Sprintf("%s [%v]", msg, time.Until(deadline).Round(time.Second)))) - } else { - status.Update(pterm.LightGreen(pterm.Sprintf("%s [%v]", msg, "?"))) - } - - if result := f(ctx); result != nil { - pterm.Println() - return result, nil - } - - time.Sleep(graceDuration) - } - } - - return nil, fmt.Errorf("wait for err: %w", ctx.Err()) -} - -func (d *Debug) attach(podCopyName string) error { - isReady := func(ctx context.Context) *string { - pod, err := d.podsClient.Get(d.ctx, podCopyName, metav1.GetOptions{}) +func (d *Debug) debugContainerIsReady(podName string) func(ctx context.Context) (*corev1.Pod, error) { + return func(ctx context.Context) (*corev1.Pod, error) { + pod, err := d.podsClient.Get(ctx, podName, metav1.GetOptions{}) if err != nil { - return nil + return nil, err } for _, c := range pod.Status.ContainerStatuses { if c.Name == debugPodContainerName && c.State.Running != nil { - return &podCopyName + return pod, nil } } - return nil + return nil, fmt.Errorf("no ready debug container with name %q found in pod %q", debugPodContainerName, podName) } +} - podName, err := waitFor(d.ctx, d.flags.Timeout, "Waiting for debug container to start", isReady) +func (d *Debug) attach(ctx context.Context, podName string) error { + _, err := task.Timed(ctx, d.flags.Timeout, "Attaching to container", func(ctx context.Context) (*any, error) { + _, err := withRetryOnErr(d.debugContainerIsReady(podName))(ctx) + return nil, err + }) if err != nil { return fmt.Errorf("debug container did not start: %w", err) } - if err := d.kubectl(d.ctx, - true, - "attach", - "pod/"+*podName, - "--container", debugPodContainerName, - "--stdin", - "--tty", - "--quiet", - ); err != nil { - return fmt.Errorf("failed to attach to the debug container: %w", err) - } - - pterm.Info.Println("Exited from Debug container") - - return err + pterm.Info.Printf("You are now typing in the debug container in %q. Type exit to exit.\n", podName) + return d.kubectl(ctx, true, "attach", "pod/"+podName, "--container", debugPodContainerName, "--stdin", "--tty", "--quiet") } func (d *Debug) kubectl(ctx context.Context, attach bool, args ...string) error { @@ -336,3 +305,15 @@ func (d *Debug) kubectl(ctx context.Context, attach bool, args ...string) error return nil } + +// withRetryOnErr retries the function until it returns nil error, or context is done. +func withRetryOnErr[T any](f func(context.Context) (*T, error)) func(context.Context) (*T, error) { + return func(ctx context.Context) (*T, error) { + ret, err := f(ctx) + for err != nil { + ret, err = f(ctx) + } + + return ret, err + } +} diff --git a/internal/task/task.go b/internal/task/task.go new file mode 100644 index 00000000..d02109a6 --- /dev/null +++ b/internal/task/task.go @@ -0,0 +1,83 @@ +package task + +// template +// _, _ = runTimedTask(d.ctx, d.flags.Timeout, "desc", func(ctx context.Context) (*any, error) { +// return nil, nil +// }) + +import ( + "context" + "sync" + "time" + + "github.com/pterm/pterm" +) + +func Timed[T any](parentCtx context.Context, timeout time.Duration, description string, f func(ctx context.Context) (T, error)) (T, error) { + ctx, cancel := context.WithTimeout(parentCtx, timeout) + defer cancel() + + done := make(chan string) + lock := &sync.Mutex{} + + lock.Lock() + go start(ctx, description, done, lock) + ret, err := f(ctx) + result := pterm.Green("done") + if err != nil { + result = pterm.Red("err") + } + stop(done, result, lock) + return ret, err +} + +func stop(done chan string, status string, lock *sync.Mutex) { + if done != nil { + select { + case done <- status: + default: + } + lock.Lock() // Ensure we wait for the status area to finish before returning + } else { + close(done) + done = nil + } +} + +func start(ctx context.Context, description string, done chan string, lock *sync.Mutex) { + defer lock.Unlock() + statusArea, err := (&pterm.AreaPrinter{}).Start() + if err != nil { + pterm.Error.Printf("failed to create status area: %v\n", err) + return + } + defer statusArea.Stop() + + setStatus := func(status string) { + statusArea.Update(pterm.Sprintf("%s [%v]", description, status)) + } + + for { + select { + case status := <-done: + setStatus(status) + pterm.Println() + return + case <-ctx.Done(): + switch err := ctx.Err(); err { + case context.DeadlineExceeded: + setStatus(pterm.Yellow("timeout")) + case context.Canceled: + setStatus(pterm.Red("cancelled")) + } + pterm.Println() + return + case <-time.After(50 * time.Millisecond): + if deadline, ok := ctx.Deadline(); ok { + setStatus(time.Until(deadline).Round(time.Second).String()) + } else { + setStatus("?") + } + } + } +} From e2ac1fb6fbe5f0c5802d6139fd4746d9089775e9 Mon Sep 17 00:00:00 2001 From: Vegar Sechmann Molvig Date: Fri, 22 Aug 2025 11:42:36 +0200 Subject: [PATCH 9/9] fix: fix deadlock, and make correct ctx easier to use --- internal/debug/command.go | 3 +-- internal/debug/debug.go | 55 +++++++++++++++++++++------------------ 2 files changed, 31 insertions(+), 27 deletions(-) diff --git a/internal/debug/command.go b/internal/debug/command.go index 49bc1a54..4fc8d913 100644 --- a/internal/debug/command.go +++ b/internal/debug/command.go @@ -16,12 +16,11 @@ func Run(ctx context.Context, workloadName string, flags *flag.Debug) error { } dg := &Debug{ - ctx: ctx, podsClient: clientSet.CoreV1().Pods(flags.Namespace), flags: flags, workloadName: workloadName, } - if err := dg.Debug(); err != nil { + if err := dg.Debug(ctx); err != nil { return fmt.Errorf("debugging instance: %w", err) } diff --git a/internal/debug/debug.go b/internal/debug/debug.go index 690bef92..759e9513 100644 --- a/internal/debug/debug.go +++ b/internal/debug/debug.go @@ -30,14 +30,13 @@ const ( ) type Debug struct { - ctx context.Context podsClient v1.PodInterface flags *flag.Debug workloadName string } -func (d *Debug) Debug() error { - pods, err := task.Timed(d.ctx, d.flags.Timeout, "Fetching pods for workload", func(ctx context.Context) (*corev1.PodList, error) { +func (d *Debug) Debug(ctx context.Context) error { + pods, err := task.Timed(ctx, d.flags.Timeout, "Fetching pods for workload", func(ctx context.Context) (*corev1.PodList, error) { return d.getPodsForWorkload(ctx) }) if err != nil { @@ -56,7 +55,7 @@ func (d *Debug) Debug() error { return err } - if err := d.debugPod(*pod); err != nil { + if err := d.debugPod(ctx, *pod); err != nil { pterm.Error.Println("Failed to debug pod") return err } @@ -82,17 +81,19 @@ func (d *Debug) getPodsForWorkload(ctx context.Context) (*corev1.PodList, error) return podList, nil } -func (d *Debug) podExists(ctx context.Context, name string) (bool, error) { - if _, err := d.podsClient.Get(ctx, name, metav1.GetOptions{}); err == nil { - return true, nil - } else if k8serrors.IsNotFound(err) { - return false, nil - } else { - return false, err +func (d *Debug) podExists(name string) func(context.Context) (bool, error) { + return func(ctx context.Context) (bool, error) { + if _, err := d.podsClient.Get(ctx, name, metav1.GetOptions{}); err == nil { + return true, nil + } else if k8serrors.IsNotFound(err) { + return false, nil + } else { + return false, err + } } } -func (d *Debug) debugPod(pod corev1.Pod) error { +func (d *Debug) debugPod(ctx context.Context, pod corev1.Pod) error { args := []string{ "debug", "pod/" + pod.Name, @@ -106,16 +107,16 @@ func (d *Debug) debugPod(pod corev1.Pod) error { } if d.flags.Copy { - return d.createDebugPod(args, pod) + return d.createDebugPod(ctx, args, pod) } - return d.createDebugContainer(args) + return d.createDebugContainer(ctx, args) } -func (d *Debug) createDebugContainer(commonArgs []string) error { +func (d *Debug) createDebugContainer(ctx context.Context, commonArgs []string) error { args := append(commonArgs, "--target", d.workloadName) // workloadName is the same as container name for nais apps - _, err := task.Timed(d.ctx, d.flags.Timeout, "Creating ephemeral debug container", func(ctx context.Context) (*any, error) { + _, err := task.Timed(ctx, d.flags.Timeout, "Creating ephemeral debug container", func(ctx context.Context) (*any, error) { return nil, d.kubectl(ctx, true, args...) }) if err != nil { @@ -127,16 +128,14 @@ func (d *Debug) createDebugContainer(commonArgs []string) error { return nil } -func (d *Debug) createDebugPod(commonArgs []string, pod corev1.Pod) error { +func (d *Debug) createDebugPod(ctx context.Context, commonArgs []string, pod corev1.Pod) error { debugPodName := createDebugPodName(pod.Name) - exists, err := task.Timed(d.ctx, d.flags.Timeout, "Check for existing debug pod", func(ctx context.Context) (bool, error) { - return d.podExists(ctx, debugPodName) - }) + exists, err := task.Timed(ctx, d.flags.Timeout, "Check for existing debug pod", d.podExists(debugPodName)) if err != nil { return fmt.Errorf("failed to check for existing debug pod: %w", err) } else if exists { - return d.attach(d.ctx, debugPodName) + return d.attach(ctx, debugPodName) } args := append(commonArgs, @@ -148,21 +147,21 @@ func (d *Debug) createDebugPod(commonArgs []string, pod corev1.Pod) error { "--keep-startup", "--attach=false", ) - _, err = task.Timed(d.ctx, d.flags.Timeout, "Create debug pod", func(ctx context.Context) (*any, error) { + _, err = task.Timed(ctx, d.flags.Timeout, "Create debug pod", func(ctx context.Context) (*any, error) { return nil, d.kubectl(ctx, false, args...) }) if err != nil { return fmt.Errorf("failed to create debug pod: %v", err) } - _, err = task.Timed(d.ctx, d.flags.Timeout, "Annotate debug pod", func(ctx context.Context) (*any, error) { + _, err = task.Timed(ctx, d.flags.Timeout, "Annotate debug pod", func(ctx context.Context) (*any, error) { return nil, d.annotateAndLabelDebugPod(ctx, debugPodName, pod.Labels) }) if err != nil { return fmt.Errorf("failed to annotate and label debug pod: %w", err) } - if err := d.attach(d.ctx, debugPodName); err != nil { + if err := d.attach(ctx, debugPodName); err != nil { return fmt.Errorf("failed to attach to debug pod %q: %w", debugPodName, err) } @@ -311,7 +310,13 @@ func withRetryOnErr[T any](f func(context.Context) (*T, error)) func(context.Con return func(ctx context.Context) (*T, error) { ret, err := f(ctx) for err != nil { - ret, err = f(ctx) + select { + case <-ctx.Done(): + return nil, err + + default: + ret, err = f(ctx) + } } return ret, err