From bc2028a2a3599966db8312d5f359989ae12996a6 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Sat, 9 Aug 2025 00:10:03 +0000 Subject: [PATCH] docs: add 'How to add a provider' guide and link it in README Co-Authored-By: Alec Fong --- README.md | 1 + docs/how-to-add-a-provider.md | 326 ++++++++++++++++++++++++++++++++++ 2 files changed, 327 insertions(+) create mode 100644 docs/how-to-add-a-provider.md diff --git a/README.md b/README.md index cdf63981..21a73bf5 100644 --- a/README.md +++ b/README.md @@ -71,6 +71,7 @@ See [SECURITY.md](docs/SECURITY.md) for complete security specifications and imp - **[V1 Design Notes](pkg/v1/V1_DESIGN_NOTES.md)**: Design decisions, known quirks, and AWS-inspired patterns in the v1 API - **[Architecture Overview](docs/ARCHITECTURE.md)**: How the Cloud SDK fits into Brev's overall architecture - **[Security Requirements](docs/SECURITY.md)**: Security specifications and implementation requirements +- **[How to Add a Provider](docs/how-to-add-a-provider.md)**: Step-by-step guide to implement a new cloud provider using the Lambda Labs example --- diff --git a/docs/how-to-add-a-provider.md b/docs/how-to-add-a-provider.md new file mode 100644 index 00000000..ed9cfd45 --- /dev/null +++ b/docs/how-to-add-a-provider.md @@ -0,0 +1,326 @@ +# How to Add a Cloud Provider + +This guide explains how to add a new cloud provider to the Brev Cloud SDK (v1). The Lambda Labs provider is the best working, well-tested example—use it as your canonical reference. + +Goals: +- Implement a provider-specific CloudCredential (factory) and CloudClient (implementation) that satisfy pkg/v1 interfaces. +- Accurately declare Capabilities based on the provider’s API surface. +- Implement at least instance lifecycle and instance types, adhering to security requirements. +- Add validation tests and (optionally) a GitHub Actions workflow to run them with real credentials. + +Helpful background: +- Architecture overview: ../docs/ARCHITECTURE.md +- Security requirements: ../docs/SECURITY.md +- Validation testing framework: ../docs/VALIDATION_TESTING.md +- v1 design notes: ../pkg/v1/V1_DESIGN_NOTES.md + +Provider examples: +- Lambda Labs (canonical): ../internal/lambdalabs/v1/README.md +- Nebius (in progress): ../internal/nebius/v1/README.md +- Fluidstack (in progress): ../internal/fluidstack/v1/README.md + +--- + +## Core v1 Interfaces You Must Target + +CloudClient is a composed interface of provider capabilities. You don’t need to implement everything—only what your provider supports—but you must advertise Capabilities correctly. + +- CloudClient composition: ../pkg/v1/client.go + - Key aggregation: CloudBase, CloudQuota, CloudRebootInstance, CloudStopStartInstance, CloudResizeInstanceVolume, CloudMachineImage, CloudChangeInstanceType, CloudModifyFirewall, CloudInstanceTags, UpdateHandler +- Capabilities system: ../pkg/v1/capabilities.go +- Instance lifecycle, validation helpers, and types: ../pkg/v1/instance.go +- Instance types and validation helpers: ../pkg/v1/instancetype.go + +Patterns to follow: +- Embed v1.NotImplCloudClient in your client so unsupported methods gracefully return ErrNotImplemented (see ../pkg/v1/notimplemented.go). +- Accurately return capability flags that match your provider’s real API. +- Prefer stable, provider-native identifiers; otherwise use MakeGenericInstanceTypeID/MakeGenericInstanceTypeIDFromInstance. + +--- + +## Directory Layout + +Create a new provider folder: + +- internal/{provider}/ + - SECURITY.md (provider-specific notes; link to top-level security expectations) + - CONTRIBUTE.md (optional provider integration notes) + - v1/ + - client.go (credentials and client) + - instance.go (instance lifecycle + helpers) + - instancetype.go (instance types) + - capabilities.go (capability declarations) + - networking.go, image.go, storage.go, tags.go, quota.go, location.go (as applicable) + - validation_test.go (validation suite entry point) + +Use Lambda Labs as the pattern: +- ../internal/lambdalabs/v1/client.go +- ../internal/lambdalabs/v1/instance.go +- ../internal/lambdalabs/v1/capabilities.go + +--- + +## Minimal Scaffold (Copy/Paste Template) + +Place in internal/{provider}/v1/client.go. Adjust names, imports, and fields for your provider. + +```go +package v1 + +import ( + "context" + + v1 "github.com/brevdev/cloud/pkg/v1" +) + +type {Provider}Credential struct { + RefID string + // Add auth fields (e.g., APIKey, ClientID, Secret, Tenant, etc.) +} + +var _ v1.CloudCredential = &{Provider}Credential{} + +func New{Provider}Credential(refID string /* auth fields */) *{Provider}Credential { + return &{Provider}Credential{ + RefID: refID, + // ... + } +} + +func (c *{Provider}Credential) GetReferenceID() string { return c.RefID } +func (c *{Provider}Credential) GetAPIType() v1.APIType { return v1.APITypeLocational /* or v1.APITypeGlobal */ } +func (c *{Provider}Credential) GetCloudProviderID() v1.CloudProviderID { + return "{provider-id}" // e.g., "lambdalabs" +} +func (c *{Provider}Credential) GetTenantID() (string, error) { + // Derive stable tenant ID for quota/account scoping if possible + return "", nil +} + +func (c *{Provider}Credential) GetCapabilities(_ context.Context) (v1.Capabilities, error) { + return get{Provider}Capabilities(), nil +} + +func (c *{Provider}Credential) MakeClient(ctx context.Context, location string) (v1.CloudClient, error) { + // Create a client configured for a given location if locational API + return New{Provider}Client(c.RefID /* auth fields */).MakeClient(ctx, location) +} + +// ---------------- Client ---------------- + +type {Provider}Client struct { + v1.NotImplCloudClient + refID string + location string + // add http/sdk client fields, base URLs, etc. +} + +var _ v1.CloudClient = &{Provider}Client{} + +func New{Provider}Client(refID string /* auth fields */) *{Provider}Client { + return &{Provider}Client{ + refID: refID, + // init http/sdk clients here + } +} + +func (c *{Provider}Client) GetAPIType() v1.APIType { return v1.APITypeLocational /* or Global */ } +func (c *{Provider}Client) GetCloudProviderID() v1.CloudProviderID { return "{provider-id}" } +func (c *{Provider}Client) GetReferenceID() string { return c.refID } +func (c *{Provider}Client) GetTenantID() (string, error) { return "", nil } + +func (c *{Provider}Client) MakeClient(_ context.Context, location string) (v1.CloudClient, error) { + c.location = location + return c, nil +} +``` + +Declare capabilities in internal/{provider}/v1/capabilities.go: + +```go +package v1 + +import ( + "context" + + v1 "github.com/brevdev/cloud/pkg/v1" +) + +func get{Provider}Capabilities() v1.Capabilities { + return v1.Capabilities{ + v1.CapabilityCreateInstance, + v1.CapabilityTerminateInstance, + v1.CapabilityCreateTerminateInstance, + // add others supported by your provider: reboot, stop/start, machine-image, tags, resize-volume, modify-firewall, etc. + } +} + +func (c *{Provider}Client) GetCapabilities(_ context.Context) (v1.Capabilities, error) { + return get{Provider}Capabilities(), nil +} + +func (c *{Provider}Credential) GetCapabilities(_ context.Context) (v1.Capabilities, error) { + return get{Provider}Capabilities(), nil +} +``` + +Implement instance lifecycle in internal/{provider}/v1/instance.go (map to provider API): + +```go +package v1 + +import ( + "context" + "fmt" + + v1 "github.com/brevdev/cloud/pkg/v1" +) + +func (c *{Provider}Client) CreateInstance(ctx context.Context, attrs v1.CreateInstanceAttrs) (*v1.Instance, error) { + // 1) ensure SSH key present (or inject via API) per ../docs/SECURITY.md + // 2) map attrs to provider request (location, instance type, image, tags, firewall rules if supported) + // 3) launch and return instance converted to v1.Instance + return nil, fmt.Errorf("not implemented") +} + +func (c *{Provider}Client) GetInstance(ctx context.Context, id v1.CloudProviderInstanceID) (*v1.Instance, error) { + return nil, fmt.Errorf("not implemented") +} + +func (c *{Provider}Client) ListInstances(ctx context.Context, args v1.ListInstancesArgs) ([]v1.Instance, error) { + return nil, fmt.Errorf("not implemented") +} + +func (c *{Provider}Client) TerminateInstance(ctx context.Context, id v1.CloudProviderInstanceID) error { + return fmt.Errorf("not implemented") +} + +// Optional if supported: +func (c *{Provider}Client) RebootInstance(ctx context.Context, id v1.CloudProviderInstanceID) error { return fmt.Errorf("not implemented") } +func (c *{Provider}Client) StopInstance(ctx context.Context, id v1.CloudProviderInstanceID) error { return fmt.Errorf("not implemented") } +func (c *{Provider}Client) StartInstance(ctx context.Context, id v1.CloudProviderInstanceID) error { return fmt.Errorf("not implemented") } + +// Merge strategies (pass-through is acceptable baseline). +func (c *{Provider}Client) MergeInstanceForUpdate(_ v1.Instance, newInst v1.Instance) v1.Instance { return newInst } +func (c *{Provider}Client) MergeInstanceTypeForUpdate(_ v1.InstanceType, newIt v1.InstanceType) v1.Type { return newIt } +``` + +See the canonical mapping and conversion logic in Lambda Labs: +- Create/terminate/list/reboot: ../internal/lambdalabs/v1/instance.go +- Capabilities: ../internal/lambdalabs/v1/capabilities.go +- Client/credential + NotImpl: ../internal/lambdalabs/v1/client.go + +Implement instance types in internal/{provider}/v1/instancetype.go: + +- Implement: + - GetInstanceTypes(ctx, args GetInstanceTypeArgs) ([]InstanceType, error) + - GetInstanceTypePollTime() time.Duration +- Use stable IDs if provider offers them. If not, use MakeGenericInstanceTypeID. +- Validate with helpers: + - ValidateGetInstanceTypes: ../pkg/v1/instancetype.go + - ValidateLocationalInstanceTypes: ../pkg/v1/instancetype.go + - ValidateStableInstanceTypeIDs (if you maintain a stable ID list) + +--- + +## Capabilities: Be Precise + +Capability flags live in ../pkg/v1/capabilities.go. Only include capabilities your API actually supports. For example, Lambda Labs supports: +- Create/terminate/reboot instance +- Does not (currently) support stop/start, resize volume, machine image, tags + +Reference: +- Lambda capabilities: ../internal/lambdalabs/v1/capabilities.go + +--- + +## Security Requirements + +All providers must conform to ../docs/SECURITY.md: +- Default deny all inbound, allow all outbound +- SSH server must be available with key-based auth +- Firewall rules should be explicitly configured via FirewallRule when supported +- If your provider’s firewall model is global/project-scoped rather than per-instance, document limitations in internal/{provider}/SECURITY.md and reflect that by omitting CapabilityModifyFirewall if applicable. + +Provider-specific security doc examples: +- Lambda Labs: ../internal/lambdalabs/SECURITY.md +- Nebius: ../internal/nebius/SECURITY.md +- Fluidstack: ../internal/fluidstack/v1/SECURITY.md + +--- + +## Validation Testing and CI + +Use the shared validation suite to test your provider with real credentials. + +- Validation framework and instructions: ../docs/VALIDATION_TESTING.md +- Shared package: ../internal/validation/suite.go + +Steps: +1) Create internal/{provider}/v1/validation_test.go: + +```go +package v1 + +import ( + "os" + "testing" + + "github.com/brevdev/cloud/internal/validation" +) + +func TestValidationFunctions(t *testing.T) { + if testing.Short() { + t.Skip("Skipping validation tests in short mode") + } + + apiKey := os.Getenv("YOUR_PROVIDER_API_KEY") + if apiKey == "" { + t.Skip("YOUR_PROVIDER_API_KEY not set, skipping validation tests") + } + + cfg := validation.ProviderConfig{ + Credential: New{Provider}Credential("validation-test" /* auth fields from env, e.g., apiKey */), + } + validation.RunValidationSuite(t, cfg) +} +``` + +2) Local runs: +- make test # skips validation (short) +- make test-validation # runs validation (long) +- make test-all # runs everything + +3) CI workflow (recommended): +- Add .github/workflows/validation-{provider}.yml (copy Lambda Labs workflow if available or follow VALIDATION_TESTING.md). +- Store secrets in GitHub Actions (e.g., YOUR_PROVIDER_API_KEY). + +--- + +## Checklist + +- [ ] Add internal/{provider}/v1 with client.go, instance.go, capabilities.go, instancetype.go +- [ ] Embed v1.NotImplCloudClient in client and only implement supported methods +- [ ] Accurately set Capabilities +- [ ] Implement instance types with stable IDs where possible +- [ ] Conform to security model; document provider-specific nuances +- [ ] Add validation_test.go and (optionally) CI workflow +- [ ] Run make lint and make test locally +- [ ] Add provider docs (README.md under provider folder) describing API mapping and feature coverage + +--- + +## References + +- Architecture: ../docs/ARCHITECTURE.md +- Security: ../docs/SECURITY.md +- Validation testing: ../docs/VALIDATION_TESTING.md +- CloudClient and composition: ../pkg/v1/client.go +- Capabilities: ../pkg/v1/capabilities.go +- Instance lifecycle and validations: ../pkg/v1/instance.go +- Instance types and validations: ../pkg/v1/instancetype.go +- Lambda Labs example: + - Client/Credential: ../internal/lambdalabs/v1/client.go + - Capabilities: ../internal/lambdalabs/v1/capabilities.go + - Instance operations: ../internal/lambdalabs/v1/instance.go + - Provider README: ../internal/lambdalabs/v1/README.md