From 901bea4fc19f332a93229964f79a8d9be4986b98 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Sat, 9 Aug 2025 03:28:55 +0000 Subject: [PATCH] Implement retry logic for all Lambda Labs API calls - Add retry logic to AddSSHKey, LaunchInstance, GetInstance, TerminateInstance, ListInstances, and RestartInstance API calls - Use existing retry infrastructure: collections.RetryWithDataAndAttemptCount and getBackoff() - Apply consistent error handling with handleAPIError for all retry-wrapped functions - Follow same pattern as existing GetInstanceTypes retry implementation - Add proper lint annotations for deferred response body cleanup This ensures all Lambda Labs API calls have the same resilience and retry behavior, improving reliability when dealing with transient network issues or API rate limits. Co-Authored-By: Alec Fong --- go.mod | 6 +- go.sum | 10 +-- internal/lambdalabs/v1/instance.go | 133 +++++++++++++++++++++++------ 3 files changed, 114 insertions(+), 35 deletions(-) diff --git a/go.mod b/go.mod index cfaadd6d..7d5e0ceb 100644 --- a/go.mod +++ b/go.mod @@ -7,26 +7,26 @@ toolchain go1.23.2 require ( github.com/alecthomas/units v0.0.0-20240927000941-0f3dac36c52b github.com/bojanz/currency v1.3.1 + github.com/cenkalti/backoff/v4 v4.3.0 + github.com/gliderlabs/ssh v0.3.8 github.com/google/go-cmp v0.7.0 github.com/google/uuid v1.6.0 github.com/jarcoal/httpmock v1.4.0 github.com/nebius/gosdk v0.0.0-20250731090238-d96c0d4a5930 github.com/stretchr/testify v1.9.0 + golang.org/x/crypto v0.41.0 ) require ( buf.build/gen/go/bufbuild/protovalidate/protocolbuffers/go v1.31.0-20231030212536-12f9cba37c9d.2 // indirect github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be // indirect - github.com/cenkalti/backoff/v4 v4.3.0 // indirect github.com/cockroachdb/apd/v3 v3.2.1 // indirect github.com/davecgh/go-spew v1.1.1 // indirect - github.com/gliderlabs/ssh v0.3.8 // indirect github.com/gofrs/flock v0.12.1 // indirect github.com/golang-jwt/jwt/v4 v4.5.2 // indirect github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.1.0 // indirect github.com/kr/text v0.2.0 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect - golang.org/x/crypto v0.41.0 // indirect golang.org/x/exp v0.0.0-20230905200255-921286631fa9 // indirect golang.org/x/net v0.42.0 // indirect golang.org/x/sync v0.16.0 // indirect diff --git a/go.sum b/go.sum index 949962d3..4717a57c 100644 --- a/go.sum +++ b/go.sum @@ -58,20 +58,14 @@ golang.org/x/crypto v0.41.0 h1:WKYxWedPGCTVVl5+WHSSrOBT0O8lx32+zxmHxijgXp4= golang.org/x/crypto v0.41.0/go.mod h1:pO5AFd7FA68rFak7rOAGVuygIISepHftHnr8dr6+sUc= golang.org/x/exp v0.0.0-20230905200255-921286631fa9 h1:GoHiUyI/Tp2nVkLI2mCxVkOjsbSXD66ic0XW0js0R9g= golang.org/x/exp v0.0.0-20230905200255-921286631fa9/go.mod h1:S2oDrQGGwySpoQPVqRShND87VCbxmc6bL1Yd2oYrm6k= -golang.org/x/net v0.38.0 h1:vRMAPTMaeGqVhG5QyLJHqNDwecKTomGeqbnfZyKlBI8= -golang.org/x/net v0.38.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8= golang.org/x/net v0.42.0 h1:jzkYrhi3YQWD6MLBJcsklgQsoAcw89EcZbJw8Z614hs= golang.org/x/net v0.42.0/go.mod h1:FF1RA5d3u7nAYA4z2TkclSCKh68eSXtiFwcWQpPXdt8= -golang.org/x/sync v0.12.0 h1:MHc5BpPuC30uJk597Ri8TV3CNZcTLu6B6z4lJy+g6Jw= -golang.org/x/sync v0.12.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= golang.org/x/sync v0.16.0 h1:ycBJEhp9p4vXvUZNszeOq0kGTPghopOL8q0fq3vstxw= golang.org/x/sync v0.16.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= -golang.org/x/sys v0.31.0 h1:ioabZlmFYtWhL+TRYpcnNlLwhyxaM9kWTDEmfnprqik= -golang.org/x/sys v0.31.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI= golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= -golang.org/x/text v0.23.0 h1:D71I7dUrlY+VX0gQShAThNGHFxZ13dGLBHQLVl1mJlY= -golang.org/x/text v0.23.0/go.mod h1:/BLNzu4aZCJ1+kcD0DNRotWKage4q2rGVAg4o22unh4= +golang.org/x/term v0.34.0 h1:O/2T7POpk0ZZ7MAzMeWFSg6S5IpWd/RXDlM9hgM3DR4= +golang.org/x/term v0.34.0/go.mod h1:5jC53AEywhIVebHgPVeg0mj8OD3VO9OzclacVrqpaAw= golang.org/x/text v0.28.0 h1:rhazDwis8INMIwQ4tpjLDzUhx6RlXqZNPEM0huQojng= golang.org/x/text v0.28.0/go.mod h1:U8nCwOR8jO/marOQ0QbDiOngZVEBB7MAiitBuMjXiNU= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/internal/lambdalabs/v1/instance.go b/internal/lambdalabs/v1/instance.go index 1e3efe81..c2c5439c 100644 --- a/internal/lambdalabs/v1/instance.go +++ b/internal/lambdalabs/v1/instance.go @@ -8,6 +8,7 @@ import ( "time" "github.com/alecthomas/units" + "github.com/brevdev/cloud/internal/collections" openapi "github.com/brevdev/cloud/internal/lambdalabs/gen/lambdalabs" v1 "github.com/brevdev/cloud/pkg/v1" ) @@ -27,10 +28,7 @@ func (c *LambdaLabsClient) CreateInstance(ctx context.Context, attrs v1.CreateIn Name: keyPairName, PublicKey: &attrs.PublicKey, } - keyPairResp, resp, err := c.client.DefaultAPI.AddSSHKey(c.makeAuthContext(ctx)).AddSSHKeyRequest(request).Execute() - if resp != nil { - defer func() { _ = resp.Body.Close() }() - } + keyPairResp, err := c.addSSHKey(ctx, request) if err != nil && !strings.Contains(err.Error(), "name must be unique") { return nil, fmt.Errorf("failed to add SSH key: %w", err) } @@ -61,10 +59,7 @@ func (c *LambdaLabsClient) CreateInstance(ctx context.Context, attrs v1.CreateIn request.Name = *openapi.NewNullableString(&name) - resp, httpResp, err := c.client.DefaultAPI.LaunchInstance(c.makeAuthContext(ctx)).LaunchInstanceRequest(request).Execute() - if httpResp != nil { - defer func() { _ = httpResp.Body.Close() }() - } + resp, err := c.launchInstance(ctx, request) if err != nil { return nil, fmt.Errorf("failed to launch instance: %w", handleErrToCloudErr(err)) } @@ -80,10 +75,7 @@ func (c *LambdaLabsClient) CreateInstance(ctx context.Context, attrs v1.CreateIn // GetInstance retrieves an instance by ID // Supported via: GET /api/v1/instances/{id} func (c *LambdaLabsClient) GetInstance(ctx context.Context, instanceID v1.CloudProviderInstanceID) (*v1.Instance, error) { - resp, httpResp, err := c.client.DefaultAPI.GetInstance(c.makeAuthContext(ctx), string(instanceID)).Execute() - if httpResp != nil { - defer func() { _ = httpResp.Body.Close() }() - } + resp, err := c.getInstance(ctx, string(instanceID)) if err != nil { return nil, fmt.Errorf("failed to get instance: %w", err) } @@ -98,10 +90,7 @@ func (c *LambdaLabsClient) TerminateInstance(ctx context.Context, instanceID v1. InstanceIds: []string{string(instanceID)}, } - _, httpResp, err := c.client.DefaultAPI.TerminateInstance(c.makeAuthContext(ctx)).TerminateInstanceRequest(request).Execute() - if httpResp != nil { - defer func() { _ = httpResp.Body.Close() }() - } + _, err := c.terminateInstance(ctx, request) if err != nil { return fmt.Errorf("failed to terminate instance: %w", err) } @@ -112,10 +101,7 @@ func (c *LambdaLabsClient) TerminateInstance(ctx context.Context, instanceID v1. // ListInstances lists all instances // Supported via: GET /api/v1/instances func (c *LambdaLabsClient) ListInstances(ctx context.Context, _ v1.ListInstancesArgs) ([]v1.Instance, error) { - resp, httpResp, err := c.client.DefaultAPI.ListInstances(c.makeAuthContext(ctx)).Execute() - if httpResp != nil { - defer func() { _ = httpResp.Body.Close() }() - } + resp, err := c.listInstances(ctx) if err != nil { return nil, fmt.Errorf("failed to list instances: %w", err) } @@ -136,10 +122,7 @@ func (c *LambdaLabsClient) RebootInstance(ctx context.Context, instanceID v1.Clo InstanceIds: []string{string(instanceID)}, } - _, httpResp, err := c.client.DefaultAPI.RestartInstance(c.makeAuthContext(ctx)).RestartInstanceRequest(request).Execute() - if httpResp != nil { - defer func() { _ = httpResp.Body.Close() }() - } + _, err := c.restartInstance(ctx, request) if err != nil { return fmt.Errorf("failed to reboot instance: %w", err) } @@ -244,3 +227,105 @@ func (c *LambdaLabsClient) MergeInstanceForUpdate(_ v1.Instance, newInst v1.Inst func (c *LambdaLabsClient) MergeInstanceTypeForUpdate(_ v1.InstanceType, newIt v1.InstanceType) v1.InstanceType { return newIt } + +func (c *LambdaLabsClient) addSSHKey(ctx context.Context, request openapi.AddSSHKeyRequest) (*openapi.AddSSHKey200Response, error) { + result, err := collections.RetryWithDataAndAttemptCount(func() (*openapi.AddSSHKey200Response, error) { + res, resp, err := c.client.DefaultAPI.AddSSHKey(c.makeAuthContext(ctx)).AddSSHKeyRequest(request).Execute() + if resp != nil { + defer resp.Body.Close() //nolint:errcheck // ignore because using defer (for some reason HandleErrDefer) + } + if err != nil { + return &openapi.AddSSHKey200Response{}, handleAPIError(ctx, resp, err) + } + return res, nil + }, getBackoff()) + if err != nil { + return nil, err + } + return result, nil +} + +func (c *LambdaLabsClient) launchInstance(ctx context.Context, request openapi.LaunchInstanceRequest) (*openapi.LaunchInstance200Response, error) { + result, err := collections.RetryWithDataAndAttemptCount(func() (*openapi.LaunchInstance200Response, error) { + res, resp, err := c.client.DefaultAPI.LaunchInstance(c.makeAuthContext(ctx)).LaunchInstanceRequest(request).Execute() + if resp != nil { + defer resp.Body.Close() //nolint:errcheck // ignore because using defer (for some reason HandleErrDefer) + } + if err != nil { + return &openapi.LaunchInstance200Response{}, handleAPIError(ctx, resp, err) + } + return res, nil + }, getBackoff()) + if err != nil { + return nil, err + } + return result, nil +} + +func (c *LambdaLabsClient) getInstance(ctx context.Context, instanceID string) (*openapi.GetInstance200Response, error) { + result, err := collections.RetryWithDataAndAttemptCount(func() (*openapi.GetInstance200Response, error) { + res, resp, err := c.client.DefaultAPI.GetInstance(c.makeAuthContext(ctx), instanceID).Execute() + if resp != nil { + defer resp.Body.Close() //nolint:errcheck // ignore because using defer (for some reason HandleErrDefer) + } + if err != nil { + return &openapi.GetInstance200Response{}, handleAPIError(ctx, resp, err) + } + return res, nil + }, getBackoff()) + if err != nil { + return nil, err + } + return result, nil +} + +func (c *LambdaLabsClient) terminateInstance(ctx context.Context, request openapi.TerminateInstanceRequest) (*openapi.TerminateInstance200Response, error) { + result, err := collections.RetryWithDataAndAttemptCount(func() (*openapi.TerminateInstance200Response, error) { + res, resp, err := c.client.DefaultAPI.TerminateInstance(c.makeAuthContext(ctx)).TerminateInstanceRequest(request).Execute() + if resp != nil { + defer resp.Body.Close() //nolint:errcheck // ignore because using defer (for some reason HandleErrDefer) + } + if err != nil { + return &openapi.TerminateInstance200Response{}, handleAPIError(ctx, resp, err) + } + return res, nil + }, getBackoff()) + if err != nil { + return nil, err + } + return result, nil +} + +func (c *LambdaLabsClient) listInstances(ctx context.Context) (*openapi.ListInstances200Response, error) { + result, err := collections.RetryWithDataAndAttemptCount(func() (*openapi.ListInstances200Response, error) { + res, resp, err := c.client.DefaultAPI.ListInstances(c.makeAuthContext(ctx)).Execute() + if resp != nil { + defer resp.Body.Close() //nolint:errcheck // ignore because using defer (for some reason HandleErrDefer) + } + if err != nil { + return &openapi.ListInstances200Response{}, handleAPIError(ctx, resp, err) + } + return res, nil + }, getBackoff()) + if err != nil { + return nil, err + } + return result, nil +} + +func (c *LambdaLabsClient) restartInstance(ctx context.Context, request openapi.RestartInstanceRequest) (*openapi.RestartInstance200Response, error) { + result, err := collections.RetryWithDataAndAttemptCount(func() (*openapi.RestartInstance200Response, error) { + res, resp, err := c.client.DefaultAPI.RestartInstance(c.makeAuthContext(ctx)).RestartInstanceRequest(request).Execute() + if resp != nil { + defer resp.Body.Close() //nolint:errcheck // ignore because using defer (for some reason HandleErrDefer) + } + if err != nil { + return &openapi.RestartInstance200Response{}, handleAPIError(ctx, resp, err) + } + return res, nil + }, getBackoff()) + if err != nil { + return nil, err + } + return result, nil +}