diff --git a/args.go b/args.go index 43c56edc8..a6cafec7c 100644 --- a/args.go +++ b/args.go @@ -857,4 +857,13 @@ const ( // ArgDedicatedInferenceAcceleratorSlug filters accelerators by slug (optional). ArgDedicatedInferenceAcceleratorSlug = "slug" + + // ArgDedicatedInferenceRegion filters dedicated inferences by region (optional). + ArgDedicatedInferenceRegion = "region" + + // ArgDedicatedInferenceName filters dedicated inferences by name (optional). + ArgDedicatedInferenceName = "name" + + // ArgDedicatedInferenceTokenName is the name for a dedicated inference auth token. + ArgDedicatedInferenceTokenName = "token-name" ) diff --git a/commands/dedicated_inference.go b/commands/dedicated_inference.go index d02b7d474..e90e7c825 100644 --- a/commands/dedicated_inference.go +++ b/commands/dedicated_inference.go @@ -82,6 +82,43 @@ For more information, see https://docs.digitalocean.com/reference/api/digitaloce AddBoolFlag(cmdDelete, doctl.ArgForce, doctl.ArgShortForce, false, "Delete the dedicated inference endpoint without a confirmation prompt") cmdDelete.Example = `The following example deletes a dedicated inference endpoint: doctl dedicated-inference delete 12345678-1234-1234-1234-123456789012` + cmdUpdate := CmdBuilder( + cmd, + RunDedicatedInferenceUpdate, + "update ", + "Update a dedicated inference endpoint", + `Updates a dedicated inference endpoint using a spec file in JSON or YAML format. +Use the `+"`"+`--spec`+"`"+` flag to provide the path to the spec file. +Optionally provide a Hugging Face access token using `+"`"+`--hugging-face-token`+"`"+`.`, + Writer, + aliasOpt("u"), + displayerType(&displayers.DedicatedInference{}), + ) + AddStringFlag(cmdUpdate, doctl.ArgDedicatedInferenceSpec, "", "", `Path to a dedicated inference spec in JSON or YAML format. Set to "-" to read from stdin.`, requiredOpt()) + AddStringFlag(cmdUpdate, doctl.ArgDedicatedInferenceHuggingFaceToken, "", "", "Hugging Face token for accessing gated models (optional)") + cmdUpdate.Example = `The following example updates a dedicated inference endpoint using a spec file: doctl dedicated-inference update 12345678-1234-1234-1234-123456789012 --spec spec.yaml + +For more information, see https://docs.digitalocean.com/reference/api/digitalocean/#tag/Dedicated-Inference/operation/dedicatedInferences_update` + + cmdList := CmdBuilder( + cmd, + RunDedicatedInferenceList, + "list", + "List all dedicated inference endpoints", + `Lists all dedicated inference endpoints on your account, including their IDs, names, regions, statuses, and endpoints. +Optionally use `+"`"+`--region`+"`"+` to filter by region or `+"`"+`--name`+"`"+` to filter by name.`, + Writer, + aliasOpt("ls"), + displayerType(&displayers.DedicatedInferenceList{}), + ) + AddStringFlag(cmdList, doctl.ArgDedicatedInferenceRegion, "", "", "Filter by region (optional)") + AddStringFlag(cmdList, doctl.ArgDedicatedInferenceName, "", "", "Filter by name (optional)") + cmdList.Example = `The following example lists all dedicated inference endpoints: doctl dedicated-inference list + +The following example filters by region: doctl dedicated-inference list --region nyc2 + +The following example filters by name: doctl dedicated-inference list --name my-endpoint` + cmdListAccelerators := CmdBuilder( cmd, RunDedicatedInferenceListAccelerators, @@ -98,6 +135,70 @@ Optionally use `+"`"+`--slug`+"`"+` to filter by accelerator slug.`, The following example filters by slug: doctl dedicated-inference list-accelerators 12345678-1234-1234-1234-123456789012 --slug gpu-mi300x1-192gb` + cmdCreateToken := CmdBuilder( + cmd, + RunDedicatedInferenceCreateToken, + "create-token ", + "Create an auth token for a dedicated inference endpoint", + `Creates a new authentication token for a dedicated inference endpoint. +Use the `+"`"+`--token-name`+"`"+` flag to specify the name of the token.`, + Writer, + aliasOpt("ct"), + displayerType(&displayers.DedicatedInferenceTokenDisplayer{}), + ) + AddStringFlag(cmdCreateToken, doctl.ArgDedicatedInferenceTokenName, "", "", "Name for the auth token", requiredOpt()) + cmdCreateToken.Example = `The following example creates an auth token for a dedicated inference endpoint: doctl dedicated-inference create-token 12345678-1234-1234-1234-123456789012 --token-name my-token` + + cmdListTokens := CmdBuilder( + cmd, + RunDedicatedInferenceListTokens, + "list-tokens ", + "List auth tokens for a dedicated inference endpoint", + `Lists all authentication tokens for a dedicated inference endpoint, including their IDs, names, and creation timestamps. +Note: token values are not returned when listing tokens.`, + Writer, + aliasOpt("lt"), + displayerType(&displayers.DedicatedInferenceTokenDisplayer{}), + ) + cmdListTokens.Example = `The following example lists auth tokens for a dedicated inference endpoint: doctl dedicated-inference list-tokens 12345678-1234-1234-1234-123456789012` + + cmdRevokeToken := CmdBuilder( + cmd, + RunDedicatedInferenceRevokeToken, + "revoke-token ", + "Revoke an auth token for a dedicated inference endpoint", + `Revokes (deletes) an authentication token for a dedicated inference endpoint. +Provide the dedicated inference ID and the token ID as arguments.`, + Writer, + aliasOpt("rt"), + ) + AddBoolFlag(cmdRevokeToken, doctl.ArgForce, doctl.ArgShortForce, false, "Revoke the token without a confirmation prompt") + cmdRevokeToken.Example = `The following example revokes an auth token: doctl dedicated-inference revoke-token 12345678-1234-1234-1234-123456789012 12345678-0000-0000-1234-123456789012` + + cmdGetSizes := CmdBuilder( + cmd, + RunDedicatedInferenceGetSizes, + "get-sizes", + "List available dedicated inference GPU sizes and pricing", + `Returns the available GPU sizes for dedicated inference endpoints, including pricing, region availability, CPU, memory, GPU, and disk details.`, + Writer, + aliasOpt("gs"), + displayerType(&displayers.DedicatedInferenceSizeDisplayer{}), + ) + cmdGetSizes.Example = `The following example lists available dedicated inference sizes: doctl dedicated-inference get-sizes` + + cmdGetGPUModelConfig := CmdBuilder( + cmd, + RunDedicatedInferenceGetGPUModelConfig, + "get-gpu-model-config", + "List supported GPU model configurations", + `Returns the supported GPU model configurations for dedicated inference endpoints, including model slugs, names, compatible GPU slugs, and whether models are gated.`, + Writer, + aliasOpt("ggmc"), + displayerType(&displayers.DedicatedInferenceGPUModelConfigDisplayer{}), + ) + cmdGetGPUModelConfig.Example = `The following example lists GPU model configurations: doctl dedicated-inference get-gpu-model-config` + return cmd } @@ -183,6 +284,18 @@ func RunDedicatedInferenceGet(c *CmdConfig) error { return c.Display(&displayers.DedicatedInference{DedicatedInferences: do.DedicatedInferences{*endpoint}}) } +// RunDedicatedInferenceList lists all dedicated inference endpoints. +func RunDedicatedInferenceList(c *CmdConfig) error { + region, _ := c.Doit.GetString(c.NS, doctl.ArgDedicatedInferenceRegion) + name, _ := c.Doit.GetString(c.NS, doctl.ArgDedicatedInferenceName) + + list, err := c.DedicatedInferences().List(region, name) + if err != nil { + return err + } + return c.Display(&displayers.DedicatedInferenceList{DedicatedInferenceListItems: list}) +} + // RunDedicatedInferenceListAccelerators lists accelerators for a dedicated inference endpoint. func RunDedicatedInferenceListAccelerators(c *CmdConfig) error { if len(c.Args) < 1 { @@ -199,6 +312,121 @@ func RunDedicatedInferenceListAccelerators(c *CmdConfig) error { return c.Display(&displayers.DedicatedInferenceAccelerator{DedicatedInferenceAcceleratorInfos: accelerators}) } +// RunDedicatedInferenceUpdate updates an existing dedicated inference endpoint. +func RunDedicatedInferenceUpdate(c *CmdConfig) error { + if len(c.Args) < 1 { + return doctl.NewMissingArgsErr(c.NS) + } + id := c.Args[0] + + specPath, err := c.Doit.GetString(c.NS, doctl.ArgDedicatedInferenceSpec) + if err != nil { + return err + } + + spec, err := readDedicatedInferenceSpec(os.Stdin, specPath) + if err != nil { + return err + } + + req := &godo.DedicatedInferenceUpdateRequest{ + Spec: spec, + } + + hfToken, _ := c.Doit.GetString(c.NS, doctl.ArgDedicatedInferenceHuggingFaceToken) + if hfToken != "" { + req.Secrets = &godo.DedicatedInferenceSecrets{ + HuggingFaceToken: hfToken, + } + } + + endpoint, err := c.DedicatedInferences().Update(id, req) + if err != nil { + return err + } + return c.Display(&displayers.DedicatedInference{DedicatedInferences: do.DedicatedInferences{*endpoint}}) +} + +// RunDedicatedInferenceCreateToken creates a new auth token for a dedicated inference endpoint. +func RunDedicatedInferenceCreateToken(c *CmdConfig) error { + if len(c.Args) < 1 { + return doctl.NewMissingArgsErr(c.NS) + } + diID := c.Args[0] + + tokenName, err := c.Doit.GetString(c.NS, doctl.ArgDedicatedInferenceTokenName) + if err != nil { + return err + } + + req := &godo.DedicatedInferenceTokenCreateRequest{ + Name: tokenName, + } + + token, err := c.DedicatedInferences().CreateToken(diID, req) + if err != nil { + return err + } + return c.Display(&displayers.DedicatedInferenceTokenDisplayer{DedicatedInferenceTokens: []do.DedicatedInferenceToken{*token}}) +} + +// RunDedicatedInferenceListTokens lists all auth tokens for a dedicated inference endpoint. +func RunDedicatedInferenceListTokens(c *CmdConfig) error { + if len(c.Args) < 1 { + return doctl.NewMissingArgsErr(c.NS) + } + diID := c.Args[0] + + tokens, err := c.DedicatedInferences().ListTokens(diID) + if err != nil { + return err + } + + displayTokens := make([]do.DedicatedInferenceToken, len(tokens)) + for i := range tokens { + displayTokens[i] = tokens[i] + } + return c.Display(&displayers.DedicatedInferenceTokenDisplayer{DedicatedInferenceTokens: displayTokens}) +} + +// RunDedicatedInferenceRevokeToken revokes an auth token for a dedicated inference endpoint. +func RunDedicatedInferenceRevokeToken(c *CmdConfig) error { + if len(c.Args) < 2 { + return doctl.NewMissingArgsErr(c.NS) + } + diID := c.Args[0] + tokenID := c.Args[1] + + force, err := c.Doit.GetBool(c.NS, doctl.ArgForce) + if err != nil { + return err + } + + if force || AskForConfirmDelete("dedicated inference token", 1) == nil { + return c.DedicatedInferences().RevokeToken(diID, tokenID) + } + + return errOperationAborted +} + +// RunDedicatedInferenceGetSizes returns available dedicated inference sizes and pricing. +func RunDedicatedInferenceGetSizes(c *CmdConfig) error { + _, sizes, err := c.DedicatedInferences().GetSizes() + if err != nil { + return err + } + return c.Display(&displayers.DedicatedInferenceSizeDisplayer{DedicatedInferenceSizes: sizes}) +} + +// RunDedicatedInferenceGetGPUModelConfig returns supported GPU model configurations. +func RunDedicatedInferenceGetGPUModelConfig(c *CmdConfig) error { + configs, err := c.DedicatedInferences().GetGPUModelConfig() + if err != nil { + return err + } + return c.Display(&displayers.DedicatedInferenceGPUModelConfigDisplayer{DedicatedInferenceGPUModelConfigs: configs}) +} + // RunDedicatedInferenceDelete deletes a dedicated inference endpoint by ID. func RunDedicatedInferenceDelete(c *CmdConfig) error { if len(c.Args) < 1 { diff --git a/commands/dedicated_inference_test.go b/commands/dedicated_inference_test.go index 24b3a2b43..9e40d9fc8 100644 --- a/commands/dedicated_inference_test.go +++ b/commands/dedicated_inference_test.go @@ -72,8 +72,15 @@ func TestDedicatedInferenceCommand(t *testing.T) { } assert.True(t, subcommands["create"], "Expected create subcommand") assert.True(t, subcommands["get"], "Expected get subcommand") + assert.True(t, subcommands["update"], "Expected update subcommand") + assert.True(t, subcommands["list"], "Expected list subcommand") assert.True(t, subcommands["delete"], "Expected delete subcommand") assert.True(t, subcommands["list-accelerators"], "Expected list-accelerators subcommand") + assert.True(t, subcommands["create-token"], "Expected create-token subcommand") + assert.True(t, subcommands["list-tokens"], "Expected list-tokens subcommand") + assert.True(t, subcommands["revoke-token"], "Expected revoke-token subcommand") + assert.True(t, subcommands["get-sizes"], "Expected get-sizes subcommand") + assert.True(t, subcommands["get-gpu-model-config"], "Expected get-gpu-model-config subcommand") } func TestRunDedicatedInferenceCreate(t *testing.T) { @@ -183,6 +190,91 @@ func TestRunDedicatedInferenceDelete(t *testing.T) { }) } +func TestRunDedicatedInferenceUpdate(t *testing.T) { + withTestClient(t, func(config *CmdConfig, tm *tcMocks) { + specJSON := `{ + "version": 0, + "name": "test-dedicated-inference", + "region": "nyc2", + "vpc": {"uuid": "00000000-0000-4000-8000-000000000001"}, + "enable_public_endpoint": true, + "model_deployments": [ + { + "model_slug": "mistral/mistral-7b-instruct-v3", + "model_provider": "hugging_face", + "accelerators": [ + {"scale": 2, "type": "prefill", "accelerator_slug": "gpu-mi300x1-192gb"}, + {"scale": 4, "type": "decode", "accelerator_slug": "gpu-mi300x1-192gb"} + ] + } + ] + }` + tmpFile := t.TempDir() + "/spec.json" + err := os.WriteFile(tmpFile, []byte(specJSON), 0644) + assert.NoError(t, err) + + config.Doit.Set(config.NS, doctl.ArgDedicatedInferenceSpec, tmpFile) + config.Args = append(config.Args, "00000000-0000-4000-8000-000000000000") + + expectedReq := &godo.DedicatedInferenceUpdateRequest{ + Spec: testDedicatedInferenceSpecRequest, + } + + tm.dedicatedInferences.EXPECT().Update("00000000-0000-4000-8000-000000000000", expectedReq).Return(&testDedicatedInference, nil) + + err = RunDedicatedInferenceUpdate(config) + assert.NoError(t, err) + }) +} + +func TestRunDedicatedInferenceUpdate_WithHuggingFaceToken(t *testing.T) { + withTestClient(t, func(config *CmdConfig, tm *tcMocks) { + specJSON := `{ + "version": 0, + "name": "test-dedicated-inference", + "region": "nyc2", + "vpc": {"uuid": "00000000-0000-4000-8000-000000000001"}, + "enable_public_endpoint": true, + "model_deployments": [ + { + "model_slug": "mistral/mistral-7b-instruct-v3", + "model_provider": "hugging_face", + "accelerators": [ + {"scale": 2, "type": "prefill", "accelerator_slug": "gpu-mi300x1-192gb"}, + {"scale": 4, "type": "decode", "accelerator_slug": "gpu-mi300x1-192gb"} + ] + } + ] + }` + tmpFile := t.TempDir() + "/spec.json" + err := os.WriteFile(tmpFile, []byte(specJSON), 0644) + assert.NoError(t, err) + + config.Doit.Set(config.NS, doctl.ArgDedicatedInferenceSpec, tmpFile) + config.Doit.Set(config.NS, doctl.ArgDedicatedInferenceHuggingFaceToken, "hf_test_token") + config.Args = append(config.Args, "00000000-0000-4000-8000-000000000000") + + expectedReq := &godo.DedicatedInferenceUpdateRequest{ + Spec: testDedicatedInferenceSpecRequest, + Secrets: &godo.DedicatedInferenceSecrets{ + HuggingFaceToken: "hf_test_token", + }, + } + + tm.dedicatedInferences.EXPECT().Update("00000000-0000-4000-8000-000000000000", expectedReq).Return(&testDedicatedInference, nil) + + err = RunDedicatedInferenceUpdate(config) + assert.NoError(t, err) + }) +} + +func TestRunDedicatedInferenceUpdate_MissingID(t *testing.T) { + withTestClient(t, func(config *CmdConfig, tm *tcMocks) { + err := RunDedicatedInferenceUpdate(config) + assert.Error(t, err) + }) +} + func TestRunDedicatedInferenceDelete_MissingID(t *testing.T) { withTestClient(t, func(config *CmdConfig, tm *tcMocks) { err := RunDedicatedInferenceDelete(config) @@ -190,6 +282,78 @@ func TestRunDedicatedInferenceDelete_MissingID(t *testing.T) { }) } +func TestRunDedicatedInferenceList(t *testing.T) { + withTestClient(t, func(config *CmdConfig, tm *tcMocks) { + testListItems := do.DedicatedInferenceListItems{ + { + DedicatedInferenceListItem: &godo.DedicatedInferenceListItem{ + ID: "00000000-0000-4000-8000-000000000000", + Name: "test-dedicated-inference", + Region: "nyc2", + Status: "ACTIVE", + }, + }, + { + DedicatedInferenceListItem: &godo.DedicatedInferenceListItem{ + ID: "11111111-1111-4111-8111-111111111111", + Name: "another-endpoint", + Region: "sfo3", + Status: "PROVISIONING", + }, + }, + } + + tm.dedicatedInferences.EXPECT().List("", "").Return(testListItems, nil) + + err := RunDedicatedInferenceList(config) + assert.NoError(t, err) + }) +} + +func TestRunDedicatedInferenceList_WithRegion(t *testing.T) { + withTestClient(t, func(config *CmdConfig, tm *tcMocks) { + testListItems := do.DedicatedInferenceListItems{ + { + DedicatedInferenceListItem: &godo.DedicatedInferenceListItem{ + ID: "00000000-0000-4000-8000-000000000000", + Name: "test-dedicated-inference", + Region: "nyc2", + Status: "ACTIVE", + }, + }, + } + + tm.dedicatedInferences.EXPECT().List("nyc2", "").Return(testListItems, nil) + + config.Doit.Set(config.NS, doctl.ArgDedicatedInferenceRegion, "nyc2") + + err := RunDedicatedInferenceList(config) + assert.NoError(t, err) + }) +} + +func TestRunDedicatedInferenceList_WithName(t *testing.T) { + withTestClient(t, func(config *CmdConfig, tm *tcMocks) { + testListItems := do.DedicatedInferenceListItems{ + { + DedicatedInferenceListItem: &godo.DedicatedInferenceListItem{ + ID: "00000000-0000-4000-8000-000000000000", + Name: "test-dedicated-inference", + Region: "nyc2", + Status: "ACTIVE", + }, + }, + } + + tm.dedicatedInferences.EXPECT().List("", "test-dedicated-inference").Return(testListItems, nil) + + config.Doit.Set(config.NS, doctl.ArgDedicatedInferenceName, "test-dedicated-inference") + + err := RunDedicatedInferenceList(config) + assert.NoError(t, err) + }) +} + func TestRunDedicatedInferenceListAccelerators(t *testing.T) { withTestClient(t, func(config *CmdConfig, tm *tcMocks) { testAccelerators := do.DedicatedInferenceAcceleratorInfos{ @@ -249,3 +413,151 @@ func TestRunDedicatedInferenceListAccelerators_MissingID(t *testing.T) { assert.Error(t, err) }) } + +func TestRunDedicatedInferenceCreateToken(t *testing.T) { + withTestClient(t, func(config *CmdConfig, tm *tcMocks) { + testToken := &do.DedicatedInferenceToken{ + DedicatedInferenceToken: &godo.DedicatedInferenceToken{ + ID: "tok-123", + Name: "my-token", + Value: "secret-value-abc", + }, + } + + expectedReq := &godo.DedicatedInferenceTokenCreateRequest{ + Name: "my-token", + } + + tm.dedicatedInferences.EXPECT().CreateToken("00000000-0000-4000-8000-000000000000", expectedReq).Return(testToken, nil) + + config.Args = append(config.Args, "00000000-0000-4000-8000-000000000000") + config.Doit.Set(config.NS, doctl.ArgDedicatedInferenceTokenName, "my-token") + + err := RunDedicatedInferenceCreateToken(config) + assert.NoError(t, err) + }) +} + +func TestRunDedicatedInferenceCreateToken_MissingID(t *testing.T) { + withTestClient(t, func(config *CmdConfig, tm *tcMocks) { + err := RunDedicatedInferenceCreateToken(config) + assert.Error(t, err) + }) +} + +func TestRunDedicatedInferenceListTokens(t *testing.T) { + withTestClient(t, func(config *CmdConfig, tm *tcMocks) { + testTokens := do.DedicatedInferenceTokens{ + { + DedicatedInferenceToken: &godo.DedicatedInferenceToken{ + ID: "tok-1", + Name: "default", + }, + }, + { + DedicatedInferenceToken: &godo.DedicatedInferenceToken{ + ID: "tok-2", + Name: "my-token", + }, + }, + } + + tm.dedicatedInferences.EXPECT().ListTokens("00000000-0000-4000-8000-000000000000").Return(testTokens, nil) + + config.Args = append(config.Args, "00000000-0000-4000-8000-000000000000") + + err := RunDedicatedInferenceListTokens(config) + assert.NoError(t, err) + }) +} + +func TestRunDedicatedInferenceListTokens_MissingID(t *testing.T) { + withTestClient(t, func(config *CmdConfig, tm *tcMocks) { + err := RunDedicatedInferenceListTokens(config) + assert.Error(t, err) + }) +} + +func TestRunDedicatedInferenceRevokeToken(t *testing.T) { + withTestClient(t, func(config *CmdConfig, tm *tcMocks) { + tm.dedicatedInferences.EXPECT().RevokeToken("00000000-0000-4000-8000-000000000000", "tok-123").Return(nil) + + config.Args = append(config.Args, "00000000-0000-4000-8000-000000000000", "tok-123") + config.Doit.Set(config.NS, doctl.ArgForce, true) + + err := RunDedicatedInferenceRevokeToken(config) + assert.NoError(t, err) + }) +} + +func TestRunDedicatedInferenceRevokeToken_MissingArgs(t *testing.T) { + withTestClient(t, func(config *CmdConfig, tm *tcMocks) { + err := RunDedicatedInferenceRevokeToken(config) + assert.Error(t, err) + }) +} + +func TestRunDedicatedInferenceRevokeToken_MissingTokenID(t *testing.T) { + withTestClient(t, func(config *CmdConfig, tm *tcMocks) { + config.Args = append(config.Args, "00000000-0000-4000-8000-000000000000") + + err := RunDedicatedInferenceRevokeToken(config) + assert.Error(t, err) + }) +} + +func TestRunDedicatedInferenceGetSizes(t *testing.T) { + withTestClient(t, func(config *CmdConfig, tm *tcMocks) { + testSizes := do.DedicatedInferenceSizes{ + { + DedicatedInferenceSize: &godo.DedicatedInferenceSize{ + GPUSlug: "gpu-mi300x1-192gb", + PricePerHour: "3.59", + Regions: []string{"nyc2", "sfo3"}, + Currency: "USD", + CPU: 24, + Memory: 98304, + GPU: &godo.DedicatedInferenceSizeGPU{ + Count: 1, + VramGb: 192, + Slug: "mi300x", + }, + }, + }, + } + testRegions := []string{"nyc2", "sfo3"} + + tm.dedicatedInferences.EXPECT().GetSizes().Return(testRegions, testSizes, nil) + + err := RunDedicatedInferenceGetSizes(config) + assert.NoError(t, err) + }) +} + +func TestRunDedicatedInferenceGetGPUModelConfig(t *testing.T) { + withTestClient(t, func(config *CmdConfig, tm *tcMocks) { + testConfigs := do.DedicatedInferenceGPUModelConfigs{ + { + DedicatedInferenceGPUModelConfig: &godo.DedicatedInferenceGPUModelConfig{ + ModelSlug: "mistral/mistral-7b-instruct-v3", + ModelName: "Mistral 7B Instruct v3", + IsModelGated: false, + GPUSlugs: []string{"gpu-mi300x1-192gb", "gpu-h100x1-80gb"}, + }, + }, + { + DedicatedInferenceGPUModelConfig: &godo.DedicatedInferenceGPUModelConfig{ + ModelSlug: "meta-llama/llama-3-70b", + ModelName: "Llama 3 70B", + IsModelGated: true, + GPUSlugs: []string{"gpu-mi300x1-192gb"}, + }, + }, + } + + tm.dedicatedInferences.EXPECT().GetGPUModelConfig().Return(testConfigs, nil) + + err := RunDedicatedInferenceGetGPUModelConfig(config) + assert.NoError(t, err) + }) +} diff --git a/commands/displayers/dedicated_inference.go b/commands/displayers/dedicated_inference.go index 01c59b855..7d24ee6b4 100644 --- a/commands/displayers/dedicated_inference.go +++ b/commands/displayers/dedicated_inference.go @@ -1,7 +1,9 @@ package displayers import ( + "fmt" "io" + "strings" "github.com/digitalocean/doctl/do" ) @@ -119,3 +121,227 @@ func (d *DedicatedInferenceAccelerator) KV() []map[string]any { } return out } + +// DedicatedInferenceList wraps a slice of dedicated inference list items for display. +type DedicatedInferenceList struct { + DedicatedInferenceListItems do.DedicatedInferenceListItems +} + +var _ Displayable = &DedicatedInferenceList{} + +func (d *DedicatedInferenceList) JSON(out io.Writer) error { + return writeJSON(d.DedicatedInferenceListItems, out) +} + +func (d *DedicatedInferenceList) Cols() []string { + return []string{ + "ID", + "Name", + "Region", + "Status", + "VPCUUID", + "PublicEndpoint", + "PrivateEndpoint", + "CreatedAt", + "UpdatedAt", + } +} + +func (d *DedicatedInferenceList) ColMap() map[string]string { + return map[string]string{ + "ID": "ID", + "Name": "Name", + "Region": "Region", + "Status": "Status", + "VPCUUID": "VPC UUID", + "PublicEndpoint": "Public Endpoint", + "PrivateEndpoint": "Private Endpoint", + "CreatedAt": "Created At", + "UpdatedAt": "Updated At", + } +} + +func (d *DedicatedInferenceList) KV() []map[string]any { + if d == nil || d.DedicatedInferenceListItems == nil { + return []map[string]any{} + } + out := make([]map[string]any, 0, len(d.DedicatedInferenceListItems)) + for _, di := range d.DedicatedInferenceListItems { + publicEndpoint := "" + privateEndpoint := "" + if di.Endpoints != nil { + publicEndpoint = di.Endpoints.PublicEndpointFQDN + privateEndpoint = di.Endpoints.PrivateEndpointFQDN + } + out = append(out, map[string]any{ + "ID": di.ID, + "Name": di.Name, + "Region": di.Region, + "Status": di.Status, + "VPCUUID": di.VPCUUID, + "PublicEndpoint": publicEndpoint, + "PrivateEndpoint": privateEndpoint, + "CreatedAt": di.CreatedAt, + "UpdatedAt": di.UpdatedAt, + }) + } + return out +} + +// DedicatedInferenceTokenDisplayer wraps a slice of dedicated inference tokens for display. +type DedicatedInferenceTokenDisplayer struct { + DedicatedInferenceTokens []do.DedicatedInferenceToken +} + +var _ Displayable = &DedicatedInferenceTokenDisplayer{} + +func (d *DedicatedInferenceTokenDisplayer) JSON(out io.Writer) error { + return writeJSON(d.DedicatedInferenceTokens, out) +} + +func (d *DedicatedInferenceTokenDisplayer) Cols() []string { + return []string{ + "ID", + "Name", + "Value", + "CreatedAt", + } +} + +func (d *DedicatedInferenceTokenDisplayer) ColMap() map[string]string { + return map[string]string{ + "ID": "ID", + "Name": "Name", + "Value": "Value", + "CreatedAt": "Created At", + } +} + +func (d *DedicatedInferenceTokenDisplayer) KV() []map[string]any { + if d == nil || d.DedicatedInferenceTokens == nil { + return []map[string]any{} + } + out := make([]map[string]any, 0, len(d.DedicatedInferenceTokens)) + for _, t := range d.DedicatedInferenceTokens { + out = append(out, map[string]any{ + "ID": t.ID, + "Name": t.Name, + "Value": t.Value, + "CreatedAt": t.CreatedAt, + }) + } + return out +} + +// DedicatedInferenceSizeDisplayer wraps a slice of dedicated inference sizes for display. +type DedicatedInferenceSizeDisplayer struct { + DedicatedInferenceSizes do.DedicatedInferenceSizes +} + +var _ Displayable = &DedicatedInferenceSizeDisplayer{} + +func (d *DedicatedInferenceSizeDisplayer) JSON(out io.Writer) error { + return writeJSON(d.DedicatedInferenceSizes, out) +} + +func (d *DedicatedInferenceSizeDisplayer) Cols() []string { + return []string{ + "GPUSlug", + "PricePerHour", + "Currency", + "CPU", + "Memory", + "GPUCount", + "GPUVramGB", + "GPUModel", + "Regions", + } +} + +func (d *DedicatedInferenceSizeDisplayer) ColMap() map[string]string { + return map[string]string{ + "GPUSlug": "GPU Slug", + "PricePerHour": "Price/Hour", + "Currency": "Currency", + "CPU": "CPU", + "Memory": "Memory (MB)", + "GPUCount": "GPU Count", + "GPUVramGB": "GPU VRAM (GB)", + "GPUModel": "GPU Model", + "Regions": "Regions", + } +} + +func (d *DedicatedInferenceSizeDisplayer) KV() []map[string]any { + if d == nil || d.DedicatedInferenceSizes == nil { + return []map[string]any{} + } + out := make([]map[string]any, 0, len(d.DedicatedInferenceSizes)) + for _, sz := range d.DedicatedInferenceSizes { + gpuCount := uint32(0) + gpuVramGB := uint32(0) + gpuModel := "" + if sz.GPU != nil { + gpuCount = sz.GPU.Count + gpuVramGB = sz.GPU.VramGb + gpuModel = sz.GPU.Slug + } + out = append(out, map[string]any{ + "GPUSlug": sz.GPUSlug, + "PricePerHour": fmt.Sprintf("%s %s", sz.PricePerHour, sz.Currency), + "Currency": sz.Currency, + "CPU": sz.CPU, + "Memory": sz.Memory, + "GPUCount": gpuCount, + "GPUVramGB": gpuVramGB, + "GPUModel": gpuModel, + "Regions": strings.Join(sz.Regions, ","), + }) + } + return out +} + +// DedicatedInferenceGPUModelConfigDisplayer wraps a slice of GPU model configs for display. +type DedicatedInferenceGPUModelConfigDisplayer struct { + DedicatedInferenceGPUModelConfigs do.DedicatedInferenceGPUModelConfigs +} + +var _ Displayable = &DedicatedInferenceGPUModelConfigDisplayer{} + +func (d *DedicatedInferenceGPUModelConfigDisplayer) JSON(out io.Writer) error { + return writeJSON(d.DedicatedInferenceGPUModelConfigs, out) +} + +func (d *DedicatedInferenceGPUModelConfigDisplayer) Cols() []string { + return []string{ + "ModelSlug", + "ModelName", + "IsModelGated", + "GPUSlugs", + } +} + +func (d *DedicatedInferenceGPUModelConfigDisplayer) ColMap() map[string]string { + return map[string]string{ + "ModelSlug": "Model Slug", + "ModelName": "Model Name", + "IsModelGated": "Gated", + "GPUSlugs": "GPU Slugs", + } +} + +func (d *DedicatedInferenceGPUModelConfigDisplayer) KV() []map[string]any { + if d == nil || d.DedicatedInferenceGPUModelConfigs == nil { + return []map[string]any{} + } + out := make([]map[string]any, 0, len(d.DedicatedInferenceGPUModelConfigs)) + for _, cfg := range d.DedicatedInferenceGPUModelConfigs { + out = append(out, map[string]any{ + "ModelSlug": cfg.ModelSlug, + "ModelName": cfg.ModelName, + "IsModelGated": cfg.IsModelGated, + "GPUSlugs": strings.Join(cfg.GPUSlugs, ","), + }) + } + return out +} diff --git a/do/dedicated_inference.go b/do/dedicated_inference.go index 102601092..875198044 100644 --- a/do/dedicated_inference.go +++ b/do/dedicated_inference.go @@ -32,6 +32,14 @@ type DedicatedInferenceToken struct { *godo.DedicatedInferenceToken } +// DedicatedInferenceListItem wraps a godo.DedicatedInferenceListItem. +type DedicatedInferenceListItem struct { + *godo.DedicatedInferenceListItem +} + +// DedicatedInferenceListItems is a slice of DedicatedInferenceListItem. +type DedicatedInferenceListItems []DedicatedInferenceListItem + // DedicatedInferenceAcceleratorInfo wraps a godo.DedicatedInferenceAcceleratorInfo. type DedicatedInferenceAcceleratorInfo struct { *godo.DedicatedInferenceAcceleratorInfo @@ -40,12 +48,38 @@ type DedicatedInferenceAcceleratorInfo struct { // DedicatedInferenceAcceleratorInfos is a slice of DedicatedInferenceAcceleratorInfo. type DedicatedInferenceAcceleratorInfos []DedicatedInferenceAcceleratorInfo +// DedicatedInferenceTokens is a slice of DedicatedInferenceToken. +type DedicatedInferenceTokens []DedicatedInferenceToken + +// DedicatedInferenceSize wraps a godo.DedicatedInferenceSize. +type DedicatedInferenceSize struct { + *godo.DedicatedInferenceSize +} + +// DedicatedInferenceSizes is a slice of DedicatedInferenceSize. +type DedicatedInferenceSizes []DedicatedInferenceSize + +// DedicatedInferenceGPUModelConfig wraps a godo.DedicatedInferenceGPUModelConfig. +type DedicatedInferenceGPUModelConfig struct { + *godo.DedicatedInferenceGPUModelConfig +} + +// DedicatedInferenceGPUModelConfigs is a slice of DedicatedInferenceGPUModelConfig. +type DedicatedInferenceGPUModelConfigs []DedicatedInferenceGPUModelConfig + // DedicatedInferenceService is an interface for interacting with DigitalOcean's Dedicated Inference API. type DedicatedInferenceService interface { Create(req *godo.DedicatedInferenceCreateRequest) (*DedicatedInference, *DedicatedInferenceToken, error) Get(id string) (*DedicatedInference, error) + Update(id string, req *godo.DedicatedInferenceUpdateRequest) (*DedicatedInference, error) + List(region string, name string) (DedicatedInferenceListItems, error) Delete(id string) error ListAccelerators(diID string, slug string) (DedicatedInferenceAcceleratorInfos, error) + CreateToken(diID string, req *godo.DedicatedInferenceTokenCreateRequest) (*DedicatedInferenceToken, error) + ListTokens(diID string) (DedicatedInferenceTokens, error) + RevokeToken(diID string, tokenID string) error + GetSizes() ([]string, DedicatedInferenceSizes, error) + GetGPUModelConfig() (DedicatedInferenceGPUModelConfigs, error) } var _ DedicatedInferenceService = &dedicatedInferenceService{} @@ -83,12 +117,49 @@ func (s *dedicatedInferenceService) Get(id string) (*DedicatedInference, error) return &DedicatedInference{DedicatedInference: d}, nil } +// Update updates an existing dedicated inference endpoint. +func (s *dedicatedInferenceService) Update(id string, req *godo.DedicatedInferenceUpdateRequest) (*DedicatedInference, error) { + d, _, err := s.client.DedicatedInference.Update(context.TODO(), id, req) + if err != nil { + return nil, err + } + return &DedicatedInference{DedicatedInference: d}, nil +} + // Delete deletes a dedicated inference endpoint by ID. func (s *dedicatedInferenceService) Delete(id string) error { _, err := s.client.DedicatedInference.Delete(context.TODO(), id) return err } +// List lists all dedicated inference endpoints. +func (s *dedicatedInferenceService) List(region string, name string) (DedicatedInferenceListItems, error) { + f := func(opt *godo.ListOptions) ([]any, *godo.Response, error) { + list, resp, err := s.client.DedicatedInference.List(context.TODO(), &godo.DedicatedInferenceListOptions{Region: region, Name: name, ListOptions: *opt}) + if err != nil { + return nil, nil, err + } + + items := make([]any, len(list)) + for i := range list { + items[i] = list[i] + } + return items, resp, nil + } + + si, err := PaginateResp(f) + if err != nil { + return nil, err + } + + result := make(DedicatedInferenceListItems, len(si)) + for i := range si { + d := si[i].(godo.DedicatedInferenceListItem) + result[i] = DedicatedInferenceListItem{DedicatedInferenceListItem: &d} + } + return result, nil +} + // ListAccelerators lists accelerators for a dedicated inference endpoint. func (s *dedicatedInferenceService) ListAccelerators(diID string, slug string) (DedicatedInferenceAcceleratorInfos, error) { f := func(opt *godo.ListOptions) ([]any, *godo.Response, error) { @@ -116,3 +187,74 @@ func (s *dedicatedInferenceService) ListAccelerators(diID string, slug string) ( } return list, nil } + +// CreateToken creates a new auth token for a dedicated inference endpoint. +func (s *dedicatedInferenceService) CreateToken(diID string, req *godo.DedicatedInferenceTokenCreateRequest) (*DedicatedInferenceToken, error) { + t, _, err := s.client.DedicatedInference.CreateToken(context.TODO(), diID, req) + if err != nil { + return nil, err + } + return &DedicatedInferenceToken{DedicatedInferenceToken: t}, nil +} + +// ListTokens lists all auth tokens for a dedicated inference endpoint. +func (s *dedicatedInferenceService) ListTokens(diID string) (DedicatedInferenceTokens, error) { + f := func(opt *godo.ListOptions) ([]any, *godo.Response, error) { + list, resp, err := s.client.DedicatedInference.ListTokens(context.TODO(), diID, opt) + if err != nil { + return nil, nil, err + } + + items := make([]any, len(list)) + for i := range list { + items[i] = list[i] + } + return items, resp, nil + } + + si, err := PaginateResp(f) + if err != nil { + return nil, err + } + + result := make(DedicatedInferenceTokens, len(si)) + for i := range si { + t := si[i].(godo.DedicatedInferenceToken) + result[i] = DedicatedInferenceToken{DedicatedInferenceToken: &t} + } + return result, nil +} + +// RevokeToken revokes an auth token for a dedicated inference endpoint. +func (s *dedicatedInferenceService) RevokeToken(diID string, tokenID string) error { + _, err := s.client.DedicatedInference.RevokeToken(context.TODO(), diID, tokenID) + return err +} + +// GetSizes returns available dedicated inference sizes and pricing. +func (s *dedicatedInferenceService) GetSizes() ([]string, DedicatedInferenceSizes, error) { + resp, _, err := s.client.DedicatedInference.GetSizes(context.TODO()) + if err != nil { + return nil, nil, err + } + + sizes := make(DedicatedInferenceSizes, len(resp.Sizes)) + for i, sz := range resp.Sizes { + sizes[i] = DedicatedInferenceSize{DedicatedInferenceSize: sz} + } + return resp.EnabledRegions, sizes, nil +} + +// GetGPUModelConfig returns supported GPU model configurations. +func (s *dedicatedInferenceService) GetGPUModelConfig() (DedicatedInferenceGPUModelConfigs, error) { + resp, _, err := s.client.DedicatedInference.GetGPUModelConfig(context.TODO()) + if err != nil { + return nil, err + } + + configs := make(DedicatedInferenceGPUModelConfigs, len(resp.GPUModelConfigs)) + for i, cfg := range resp.GPUModelConfigs { + configs[i] = DedicatedInferenceGPUModelConfig{DedicatedInferenceGPUModelConfig: cfg} + } + return configs, nil +} diff --git a/do/mocks/DedicatedInferenceService.go b/do/mocks/DedicatedInferenceService.go index 948e3cd60..c86ff2cee 100644 --- a/do/mocks/DedicatedInferenceService.go +++ b/do/mocks/DedicatedInferenceService.go @@ -86,6 +86,111 @@ func (mr *MockDedicatedInferenceServiceMockRecorder) ListAccelerators(diID any, return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ListAccelerators", reflect.TypeOf((*MockDedicatedInferenceService)(nil).ListAccelerators), diID, slug) } +// List mocks base method. +func (m *MockDedicatedInferenceService) List(region string, name string) (do.DedicatedInferenceListItems, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "List", region, name) + ret0, _ := ret[0].(do.DedicatedInferenceListItems) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// List indicates an expected call of List. +func (mr *MockDedicatedInferenceServiceMockRecorder) List(region any, name any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "List", reflect.TypeOf((*MockDedicatedInferenceService)(nil).List), region, name) +} + +// Update mocks base method. +func (m *MockDedicatedInferenceService) Update(id string, req *godo.DedicatedInferenceUpdateRequest) (*do.DedicatedInference, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "Update", id, req) + ret0, _ := ret[0].(*do.DedicatedInference) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// Update indicates an expected call of Update. +func (mr *MockDedicatedInferenceServiceMockRecorder) Update(id any, req any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Update", reflect.TypeOf((*MockDedicatedInferenceService)(nil).Update), id, req) +} + +// CreateToken mocks base method. +func (m *MockDedicatedInferenceService) CreateToken(diID string, req *godo.DedicatedInferenceTokenCreateRequest) (*do.DedicatedInferenceToken, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "CreateToken", diID, req) + ret0, _ := ret[0].(*do.DedicatedInferenceToken) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// CreateToken indicates an expected call of CreateToken. +func (mr *MockDedicatedInferenceServiceMockRecorder) CreateToken(diID any, req any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CreateToken", reflect.TypeOf((*MockDedicatedInferenceService)(nil).CreateToken), diID, req) +} + +// ListTokens mocks base method. +func (m *MockDedicatedInferenceService) ListTokens(diID string) (do.DedicatedInferenceTokens, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "ListTokens", diID) + ret0, _ := ret[0].(do.DedicatedInferenceTokens) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// ListTokens indicates an expected call of ListTokens. +func (mr *MockDedicatedInferenceServiceMockRecorder) ListTokens(diID any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ListTokens", reflect.TypeOf((*MockDedicatedInferenceService)(nil).ListTokens), diID) +} + +// RevokeToken mocks base method. +func (m *MockDedicatedInferenceService) RevokeToken(diID string, tokenID string) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "RevokeToken", diID, tokenID) + ret0, _ := ret[0].(error) + return ret0 +} + +// RevokeToken indicates an expected call of RevokeToken. +func (mr *MockDedicatedInferenceServiceMockRecorder) RevokeToken(diID any, tokenID any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "RevokeToken", reflect.TypeOf((*MockDedicatedInferenceService)(nil).RevokeToken), diID, tokenID) +} + +// GetSizes mocks base method. +func (m *MockDedicatedInferenceService) GetSizes() ([]string, do.DedicatedInferenceSizes, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetSizes") + ret0, _ := ret[0].([]string) + ret1, _ := ret[1].(do.DedicatedInferenceSizes) + ret2, _ := ret[2].(error) + return ret0, ret1, ret2 +} + +// GetSizes indicates an expected call of GetSizes. +func (mr *MockDedicatedInferenceServiceMockRecorder) GetSizes() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetSizes", reflect.TypeOf((*MockDedicatedInferenceService)(nil).GetSizes)) +} + +// GetGPUModelConfig mocks base method. +func (m *MockDedicatedInferenceService) GetGPUModelConfig() (do.DedicatedInferenceGPUModelConfigs, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetGPUModelConfig") + ret0, _ := ret[0].(do.DedicatedInferenceGPUModelConfigs) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// GetGPUModelConfig indicates an expected call of GetGPUModelConfig. +func (mr *MockDedicatedInferenceServiceMockRecorder) GetGPUModelConfig() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetGPUModelConfig", reflect.TypeOf((*MockDedicatedInferenceService)(nil).GetGPUModelConfig)) +} + // Get mocks base method. func (m *MockDedicatedInferenceService) Get(id string) (*do.DedicatedInference, error) { m.ctrl.T.Helper() diff --git a/integration/dedicated_inference_create_token_test.go b/integration/dedicated_inference_create_token_test.go new file mode 100644 index 000000000..64225ba25 --- /dev/null +++ b/integration/dedicated_inference_create_token_test.go @@ -0,0 +1,214 @@ +package integration + +import ( + "encoding/json" + "fmt" + "io" + "net/http" + "net/http/httptest" + "net/http/httputil" + "os/exec" + "strings" + "testing" + + "github.com/sclevine/spec" + "github.com/stretchr/testify/require" +) + +var _ = suite("dedicated-inference/create-token", func(t *testing.T, when spec.G, it spec.S) { + var ( + expect *require.Assertions + cmd *exec.Cmd + server *httptest.Server + ) + + it.Before(func() { + expect = require.New(t) + + server = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { + switch req.URL.Path { + case "/v2/dedicated-inferences/00000000-0000-4000-8000-000000000000/tokens": + auth := req.Header.Get("Authorization") + if auth != "Bearer some-magic-token" { + w.WriteHeader(http.StatusUnauthorized) + return + } + + if req.Method != http.MethodPost { + w.WriteHeader(http.StatusMethodNotAllowed) + return + } + + body, err := io.ReadAll(req.Body) + if err != nil { + t.Fatal("failed to read request body") + } + + var createReq map[string]any + err = json.Unmarshal(body, &createReq) + if err != nil { + t.Fatalf("failed to parse request body: %s", err) + } + + name, ok := createReq["name"] + if !ok || name == "" { + w.WriteHeader(http.StatusBadRequest) + w.Write([]byte(`{"id":"bad_request","message":"name is required"}`)) + return + } + + w.Header().Set("Content-Type", "application/json") + w.Write([]byte(dedicatedInferenceCreateTokenResponse)) + case "/v2/dedicated-inferences/99999999-9999-4999-8999-999999999999/tokens": + auth := req.Header.Get("Authorization") + if auth != "Bearer some-magic-token" { + w.WriteHeader(http.StatusUnauthorized) + return + } + + if req.Method != http.MethodPost { + w.WriteHeader(http.StatusMethodNotAllowed) + return + } + + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusNotFound) + w.Write([]byte(`{"id":"not_found","message":"The resource you requested could not be found."}`)) + default: + dump, err := httputil.DumpRequest(req, true) + if err != nil { + t.Fatal("failed to dump request") + } + + t.Fatalf("received unknown request: %s", dump) + } + })) + }) + + when("valid dedicated inference ID and token name are provided", func() { + it("creates an auth token", func() { + aliases := []string{"create-token", "ct"} + + for _, alias := range aliases { + cmd = exec.Command(builtBinaryPath, + "-t", "some-magic-token", + "-u", server.URL, + "dedicated-inference", + alias, + "00000000-0000-4000-8000-000000000000", + "--token-name", "my-token", + ) + + output, err := cmd.CombinedOutput() + expect.NoError(err, fmt.Sprintf("received error output for alias %q: %s", alias, output)) + expect.Equal(strings.TrimSpace(dedicatedInferenceCreateTokenOutput), strings.TrimSpace(string(output))) + } + }) + }) + + when("dedicated inference ID is missing", func() { + it("returns an error", func() { + cmd = exec.Command(builtBinaryPath, + "-t", "some-magic-token", + "-u", server.URL, + "dedicated-inference", + "create-token", + "--token-name", "my-token", + ) + + output, err := cmd.CombinedOutput() + expect.Error(err) + expect.Contains(string(output), "missing") + }) + }) + + when("dedicated inference does not exist", func() { + it("returns a not found error", func() { + cmd = exec.Command(builtBinaryPath, + "-t", "some-magic-token", + "-u", server.URL, + "dedicated-inference", + "create-token", + "99999999-9999-4999-8999-999999999999", + "--token-name", "my-token", + ) + + output, err := cmd.CombinedOutput() + expect.Error(err) + expect.Contains(string(output), "404") + }) + }) + + when("token-name flag is missing", func() { + it("returns an error", func() { + cmd = exec.Command(builtBinaryPath, + "-t", "some-magic-token", + "-u", server.URL, + "dedicated-inference", + "create-token", + "00000000-0000-4000-8000-000000000000", + ) + + output, err := cmd.CombinedOutput() + expect.Error(err) + expect.Contains(string(output), "token-name") + }) + }) + + when("using the di alias", func() { + it("creates an auth token", func() { + cmd = exec.Command(builtBinaryPath, + "-t", "some-magic-token", + "-u", server.URL, + "di", + "create-token", + "00000000-0000-4000-8000-000000000000", + "--token-name", "my-token", + ) + + output, err := cmd.CombinedOutput() + expect.NoError(err, fmt.Sprintf("received error output: %s", output)) + expect.Equal(strings.TrimSpace(dedicatedInferenceCreateTokenOutput), strings.TrimSpace(string(output))) + }) + }) + + when("passing a format flag", func() { + it("displays only those columns", func() { + cmd = exec.Command(builtBinaryPath, + "-t", "some-magic-token", + "-u", server.URL, + "dedicated-inference", + "create-token", + "00000000-0000-4000-8000-000000000000", + "--token-name", "my-token", + "--format", "ID,Name", + ) + + output, err := cmd.CombinedOutput() + expect.NoError(err, fmt.Sprintf("received error output: %s", output)) + expect.Equal(strings.TrimSpace(dedicatedInferenceCreateTokenFormatOutput), strings.TrimSpace(string(output))) + }) + }) +}) + +const ( + dedicatedInferenceCreateTokenOutput = ` +ID Name Value Created At +tok-1 my-token secret-token-123 2023-01-01 00:00:00 +0000 UTC +` + dedicatedInferenceCreateTokenFormatOutput = ` +ID Name +tok-1 my-token +` + + dedicatedInferenceCreateTokenResponse = ` +{ + "token": { + "id": "tok-1", + "name": "my-token", + "value": "secret-token-123", + "created_at": "2023-01-01T00:00:00Z" + } +} +` +) diff --git a/integration/dedicated_inference_get_gpu_model_config_test.go b/integration/dedicated_inference_get_gpu_model_config_test.go new file mode 100644 index 000000000..c8493f75a --- /dev/null +++ b/integration/dedicated_inference_get_gpu_model_config_test.go @@ -0,0 +1,134 @@ +package integration + +import ( + "fmt" + "net/http" + "net/http/httptest" + "net/http/httputil" + "os/exec" + "strings" + "testing" + + "github.com/sclevine/spec" + "github.com/stretchr/testify/require" +) + +var _ = suite("dedicated-inference/get-gpu-model-config", func(t *testing.T, when spec.G, it spec.S) { + var ( + expect *require.Assertions + cmd *exec.Cmd + server *httptest.Server + ) + + it.Before(func() { + expect = require.New(t) + + server = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { + switch req.URL.Path { + case "/v2/dedicated-inferences/gpu-model-config": + auth := req.Header.Get("Authorization") + if auth != "Bearer some-magic-token" { + w.WriteHeader(http.StatusUnauthorized) + return + } + + if req.Method != http.MethodGet { + w.WriteHeader(http.StatusMethodNotAllowed) + return + } + + w.Header().Set("Content-Type", "application/json") + w.Write([]byte(dedicatedInferenceGetGPUModelConfigResponse)) + default: + dump, err := httputil.DumpRequest(req, true) + if err != nil { + t.Fatal("failed to dump request") + } + + t.Fatalf("received unknown request: %s", dump) + } + })) + }) + + when("command is invoked", func() { + it("lists GPU model configurations", func() { + aliases := []string{"get-gpu-model-config", "ggmc"} + + for _, alias := range aliases { + cmd = exec.Command(builtBinaryPath, + "-t", "some-magic-token", + "-u", server.URL, + "dedicated-inference", + alias, + ) + + output, err := cmd.CombinedOutput() + expect.NoError(err, fmt.Sprintf("received error output for alias %q: %s", alias, output)) + expect.Equal(strings.TrimSpace(dedicatedInferenceGetGPUModelConfigOutput), strings.TrimSpace(string(output))) + } + }) + }) + + when("passing a format flag", func() { + it("displays only those columns", func() { + cmd = exec.Command(builtBinaryPath, + "-t", "some-magic-token", + "-u", server.URL, + "dedicated-inference", + "get-gpu-model-config", + "--format", "ModelSlug,IsModelGated", + ) + + output, err := cmd.CombinedOutput() + expect.NoError(err, fmt.Sprintf("received error output: %s", output)) + expect.Equal(strings.TrimSpace(dedicatedInferenceGetGPUModelConfigFormatOutput), strings.TrimSpace(string(output))) + }) + }) + + when("using the di alias", func() { + it("lists GPU model configurations", func() { + cmd = exec.Command(builtBinaryPath, + "-t", "some-magic-token", + "-u", server.URL, + "di", + "get-gpu-model-config", + ) + + output, err := cmd.CombinedOutput() + expect.NoError(err, fmt.Sprintf("received error output: %s", output)) + expect.Equal(strings.TrimSpace(dedicatedInferenceGetGPUModelConfigOutput), strings.TrimSpace(string(output))) + }) + }) +}) + +const ( + dedicatedInferenceGetGPUModelConfigOutput = ` +Model Slug Model Name Gated GPU Slugs +mistral/mistral-7b-instruct-v3 Mistral 7B Instruct v3 false gpu-mi300x1-192gb,gpu-h100x1-80gb +meta-llama/llama-3-70b Llama 3 70B true gpu-mi300x1-192gb +` + dedicatedInferenceGetGPUModelConfigFormatOutput = ` +Model Slug Gated +mistral/mistral-7b-instruct-v3 false +meta-llama/llama-3-70b true +` + + dedicatedInferenceGetGPUModelConfigResponse = ` +{ + "gpu_model_configs": [ + { + "model_slug": "mistral/mistral-7b-instruct-v3", + "model_name": "Mistral 7B Instruct v3", + "is_model_gated": false, + "gpu_slugs": ["gpu-mi300x1-192gb", "gpu-h100x1-80gb"] + }, + { + "model_slug": "meta-llama/llama-3-70b", + "model_name": "Llama 3 70B", + "is_model_gated": true, + "gpu_slugs": ["gpu-mi300x1-192gb"] + } + ] +} +` +) diff --git a/integration/dedicated_inference_get_sizes_test.go b/integration/dedicated_inference_get_sizes_test.go new file mode 100644 index 000000000..a5adcdd5d --- /dev/null +++ b/integration/dedicated_inference_get_sizes_test.go @@ -0,0 +1,170 @@ +package integration + +import ( + "fmt" + "net/http" + "net/http/httptest" + "net/http/httputil" + "os/exec" + "strings" + "testing" + + "github.com/sclevine/spec" + "github.com/stretchr/testify/require" +) + +var _ = suite("dedicated-inference/get-sizes", func(t *testing.T, when spec.G, it spec.S) { + var ( + expect *require.Assertions + cmd *exec.Cmd + server *httptest.Server + ) + + it.Before(func() { + expect = require.New(t) + + server = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { + switch req.URL.Path { + case "/v2/dedicated-inferences/sizes": + auth := req.Header.Get("Authorization") + if auth != "Bearer some-magic-token" { + w.WriteHeader(http.StatusUnauthorized) + return + } + + if req.Method != http.MethodGet { + w.WriteHeader(http.StatusMethodNotAllowed) + return + } + + w.Header().Set("Content-Type", "application/json") + w.Write([]byte(dedicatedInferenceGetSizesResponse)) + default: + dump, err := httputil.DumpRequest(req, true) + if err != nil { + t.Fatal("failed to dump request") + } + + t.Fatalf("received unknown request: %s", dump) + } + })) + }) + + when("command is invoked", func() { + it("lists available sizes", func() { + aliases := []string{"get-sizes", "gs"} + + for _, alias := range aliases { + cmd = exec.Command(builtBinaryPath, + "-t", "some-magic-token", + "-u", server.URL, + "dedicated-inference", + alias, + ) + + output, err := cmd.CombinedOutput() + expect.NoError(err, fmt.Sprintf("received error output for alias %q: %s", alias, output)) + expect.Equal(strings.TrimSpace(dedicatedInferenceGetSizesOutput), strings.TrimSpace(string(output))) + } + }) + }) + + when("passing a format flag", func() { + it("displays only those columns", func() { + cmd = exec.Command(builtBinaryPath, + "-t", "some-magic-token", + "-u", server.URL, + "dedicated-inference", + "get-sizes", + "--format", "GPUSlug,PricePerHour,Regions", + ) + + output, err := cmd.CombinedOutput() + expect.NoError(err, fmt.Sprintf("received error output: %s", output)) + expect.Equal(strings.TrimSpace(dedicatedInferenceGetSizesFormatOutput), strings.TrimSpace(string(output))) + }) + }) + + when("using the di alias", func() { + it("lists available sizes", func() { + cmd = exec.Command(builtBinaryPath, + "-t", "some-magic-token", + "-u", server.URL, + "di", + "get-sizes", + ) + + output, err := cmd.CombinedOutput() + expect.NoError(err, fmt.Sprintf("received error output: %s", output)) + expect.Equal(strings.TrimSpace(dedicatedInferenceGetSizesOutput), strings.TrimSpace(string(output))) + }) + }) +}) + +const ( + dedicatedInferenceGetSizesResponse = ` +{ + "enabled_regions": ["nyc2", "sfo3"], + "sizes": [ + { + "gpu_slug": "gpu-mi300x1-192gb", + "price_per_hour": "3.59", + "regions": ["nyc2", "sfo3"], + "currency": "USD", + "cpu": 24, + "memory": 98304, + "gpu": { + "count": 1, + "vram_gb": 192, + "slug": "mi300x" + }, + "size_category": { + "name": "GPU Optimized", + "fleet_name": "gpu-mi300x" + }, + "disks": [ + { + "type": "local", + "size_gb": 960 + } + ] + }, + { + "gpu_slug": "gpu-h100x1-80gb", + "price_per_hour": "4.25", + "regions": ["nyc2"], + "currency": "USD", + "cpu": 16, + "memory": 65536, + "gpu": { + "count": 1, + "vram_gb": 80, + "slug": "h100" + }, + "size_category": { + "name": "GPU Optimized", + "fleet_name": "gpu-h100" + }, + "disks": [ + { + "type": "local", + "size_gb": 480 + } + ] + } + ] +} +` + + // NOTE: Column spacing must exactly match doctl's table formatter. + dedicatedInferenceGetSizesOutput = ` +GPU Slug Price/Hour Currency CPU Memory (MB) GPU Count GPU VRAM (GB) GPU Model Regions +gpu-mi300x1-192gb 3.59 USD USD 24 98304 1 192 mi300x nyc2,sfo3 +gpu-h100x1-80gb 4.25 USD USD 16 65536 1 80 h100 nyc2 +` + dedicatedInferenceGetSizesFormatOutput = ` +GPU Slug Price/Hour Regions +gpu-mi300x1-192gb 3.59 USD nyc2,sfo3 +gpu-h100x1-80gb 4.25 USD nyc2 +` +) diff --git a/integration/dedicated_inference_list_test.go b/integration/dedicated_inference_list_test.go new file mode 100644 index 000000000..6ff20c35e --- /dev/null +++ b/integration/dedicated_inference_list_test.go @@ -0,0 +1,253 @@ +package integration + +import ( + "fmt" + "net/http" + "net/http/httptest" + "net/http/httputil" + "os/exec" + "strings" + "testing" + + "github.com/sclevine/spec" + "github.com/stretchr/testify/require" +) + +var _ = suite("dedicated-inference/list", func(t *testing.T, when spec.G, it spec.S) { + var ( + expect *require.Assertions + cmd *exec.Cmd + server *httptest.Server + ) + + it.Before(func() { + expect = require.New(t) + + server = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { + switch req.URL.Path { + case "/v2/dedicated-inferences": + auth := req.Header.Get("Authorization") + if auth != "Bearer some-magic-token" { + w.WriteHeader(http.StatusUnauthorized) + return + } + + if req.Method != http.MethodGet { + w.WriteHeader(http.StatusMethodNotAllowed) + return + } + + regionFilter := req.URL.Query().Get("region") + nameFilter := req.URL.Query().Get("name") + + if regionFilter == "nyc2" { + w.Header().Set("Content-Type", "application/json") + w.Write([]byte(dedicatedInferenceListFilteredByRegionResponse)) + return + } + + if nameFilter == "test-di-1" { + w.Header().Set("Content-Type", "application/json") + w.Write([]byte(dedicatedInferenceListFilteredByNameResponse)) + return + } + + w.Header().Set("Content-Type", "application/json") + w.Write([]byte(dedicatedInferenceListResponse)) + default: + dump, err := httputil.DumpRequest(req, true) + if err != nil { + t.Fatal("failed to dump request") + } + + t.Fatalf("received unknown request: %s", dump) + } + })) + }) + + when("no filters are provided", func() { + it("lists all dedicated inference endpoints", func() { + aliases := []string{"list", "ls"} + + for _, alias := range aliases { + cmd = exec.Command(builtBinaryPath, + "-t", "some-magic-token", + "-u", server.URL, + "dedicated-inference", + alias, + ) + + output, err := cmd.CombinedOutput() + expect.NoError(err, fmt.Sprintf("received error output for alias %q: %s", alias, output)) + expect.Equal(strings.TrimSpace(dedicatedInferenceListOutput), strings.TrimSpace(string(output))) + } + }) + }) + + when("region filter is provided", func() { + it("lists only endpoints in that region", func() { + cmd = exec.Command(builtBinaryPath, + "-t", "some-magic-token", + "-u", server.URL, + "dedicated-inference", + "list", + "--region", "nyc2", + ) + + output, err := cmd.CombinedOutput() + expect.NoError(err, fmt.Sprintf("received error output: %s", output)) + expect.Equal(strings.TrimSpace(dedicatedInferenceListFilteredByRegionOutput), strings.TrimSpace(string(output))) + }) + }) + + when("name filter is provided", func() { + it("lists only endpoints with that name", func() { + cmd = exec.Command(builtBinaryPath, + "-t", "some-magic-token", + "-u", server.URL, + "dedicated-inference", + "list", + "--name", "test-di-1", + ) + + output, err := cmd.CombinedOutput() + expect.NoError(err, fmt.Sprintf("received error output: %s", output)) + expect.Equal(strings.TrimSpace(dedicatedInferenceListFilteredByNameOutput), strings.TrimSpace(string(output))) + }) + }) + + when("passing a format flag", func() { + it("displays only those columns", func() { + cmd = exec.Command(builtBinaryPath, + "-t", "some-magic-token", + "-u", server.URL, + "dedicated-inference", + "list", + "--format", "ID,Name,Status", + ) + + output, err := cmd.CombinedOutput() + expect.NoError(err, fmt.Sprintf("received error output: %s", output)) + expect.Equal(strings.TrimSpace(dedicatedInferenceListFormatOutput), strings.TrimSpace(string(output))) + }) + }) + + when("using the di alias", func() { + it("lists all dedicated inference endpoints", func() { + cmd = exec.Command(builtBinaryPath, + "-t", "some-magic-token", + "-u", server.URL, + "di", + "list", + ) + + output, err := cmd.CombinedOutput() + expect.NoError(err, fmt.Sprintf("received error output: %s", output)) + expect.Equal(strings.TrimSpace(dedicatedInferenceListOutput), strings.TrimSpace(string(output))) + }) + }) +}) + +const ( + dedicatedInferenceListOutput = ` +ID Name Region Status VPC UUID Public Endpoint Private Endpoint Created At Updated At +00000000-0000-4000-8000-000000000000 test-di-1 nyc2 ACTIVE 00000000-0000-4000-8000-000000000001 public.di-1.example.com private.di-1.example.com 2023-01-01 00:00:00 +0000 UTC 2023-01-01 00:00:00 +0000 UTC +11111111-1111-4111-8111-111111111111 test-di-2 sfo3 PROVISIONING 11111111-1111-4111-8111-111111111112 public.di-2.example.com private.di-2.example.com 2023-01-02 00:00:00 +0000 UTC 2023-01-02 00:00:00 +0000 UTC +` + dedicatedInferenceListFilteredByRegionOutput = ` +ID Name Region Status VPC UUID Public Endpoint Private Endpoint Created At Updated At +00000000-0000-4000-8000-000000000000 test-di-1 nyc2 ACTIVE 00000000-0000-4000-8000-000000000001 public.di-1.example.com private.di-1.example.com 2023-01-01 00:00:00 +0000 UTC 2023-01-01 00:00:00 +0000 UTC +` + dedicatedInferenceListFilteredByNameOutput = ` +ID Name Region Status VPC UUID Public Endpoint Private Endpoint Created At Updated At +00000000-0000-4000-8000-000000000000 test-di-1 nyc2 ACTIVE 00000000-0000-4000-8000-000000000001 public.di-1.example.com private.di-1.example.com 2023-01-01 00:00:00 +0000 UTC 2023-01-01 00:00:00 +0000 UTC +` + dedicatedInferenceListFormatOutput = ` +ID Name Status +00000000-0000-4000-8000-000000000000 test-di-1 ACTIVE +11111111-1111-4111-8111-111111111111 test-di-2 PROVISIONING +` + + dedicatedInferenceListResponse = ` +{ + "dedicated_inferences": [ + { + "id": "00000000-0000-4000-8000-000000000000", + "name": "test-di-1", + "region": "nyc2", + "status": "ACTIVE", + "vpc_uuid": "00000000-0000-4000-8000-000000000001", + "endpoints": { + "public_endpoint_fqdn": "public.di-1.example.com", + "private_endpoint_fqdn": "private.di-1.example.com" + }, + "created_at": "2023-01-01T00:00:00Z", + "updated_at": "2023-01-01T00:00:00Z" + }, + { + "id": "11111111-1111-4111-8111-111111111111", + "name": "test-di-2", + "region": "sfo3", + "status": "PROVISIONING", + "vpc_uuid": "11111111-1111-4111-8111-111111111112", + "endpoints": { + "public_endpoint_fqdn": "public.di-2.example.com", + "private_endpoint_fqdn": "private.di-2.example.com" + }, + "created_at": "2023-01-02T00:00:00Z", + "updated_at": "2023-01-02T00:00:00Z" + } + ], + "links": {}, + "meta": { + "total": 2 + } +} +` + dedicatedInferenceListFilteredByRegionResponse = ` +{ + "dedicated_inferences": [ + { + "id": "00000000-0000-4000-8000-000000000000", + "name": "test-di-1", + "region": "nyc2", + "status": "ACTIVE", + "vpc_uuid": "00000000-0000-4000-8000-000000000001", + "endpoints": { + "public_endpoint_fqdn": "public.di-1.example.com", + "private_endpoint_fqdn": "private.di-1.example.com" + }, + "created_at": "2023-01-01T00:00:00Z", + "updated_at": "2023-01-01T00:00:00Z" + } + ], + "links": {}, + "meta": { + "total": 1 + } +} +` + dedicatedInferenceListFilteredByNameResponse = ` +{ + "dedicated_inferences": [ + { + "id": "00000000-0000-4000-8000-000000000000", + "name": "test-di-1", + "region": "nyc2", + "status": "ACTIVE", + "vpc_uuid": "00000000-0000-4000-8000-000000000001", + "endpoints": { + "public_endpoint_fqdn": "public.di-1.example.com", + "private_endpoint_fqdn": "private.di-1.example.com" + }, + "created_at": "2023-01-01T00:00:00Z", + "updated_at": "2023-01-01T00:00:00Z" + } + ], + "links": {}, + "meta": { + "total": 1 + } +} +` +) diff --git a/integration/dedicated_inference_list_tokens_test.go b/integration/dedicated_inference_list_tokens_test.go new file mode 100644 index 000000000..b66d38576 --- /dev/null +++ b/integration/dedicated_inference_list_tokens_test.go @@ -0,0 +1,185 @@ +package integration + +import ( + "fmt" + "net/http" + "net/http/httptest" + "net/http/httputil" + "os/exec" + "strings" + "testing" + + "github.com/sclevine/spec" + "github.com/stretchr/testify/require" +) + +var _ = suite("dedicated-inference/list-tokens", func(t *testing.T, when spec.G, it spec.S) { + var ( + expect *require.Assertions + cmd *exec.Cmd + server *httptest.Server + ) + + it.Before(func() { + expect = require.New(t) + + server = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { + switch req.URL.Path { + case "/v2/dedicated-inferences/00000000-0000-4000-8000-000000000000/tokens": + auth := req.Header.Get("Authorization") + if auth != "Bearer some-magic-token" { + w.WriteHeader(http.StatusUnauthorized) + return + } + + if req.Method != http.MethodGet { + w.WriteHeader(http.StatusMethodNotAllowed) + return + } + + w.Header().Set("Content-Type", "application/json") + w.Write([]byte(dedicatedInferenceListTokensResponse)) + case "/v2/dedicated-inferences/99999999-9999-4999-8999-999999999999/tokens": + auth := req.Header.Get("Authorization") + if auth != "Bearer some-magic-token" { + w.WriteHeader(http.StatusUnauthorized) + return + } + + if req.Method != http.MethodGet { + w.WriteHeader(http.StatusMethodNotAllowed) + return + } + + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusNotFound) + w.Write([]byte(`{"id":"not_found","message":"The resource you requested could not be found."}`)) + default: + dump, err := httputil.DumpRequest(req, true) + if err != nil { + t.Fatal("failed to dump request") + } + + t.Fatalf("received unknown request: %s", dump) + } + })) + }) + + when("valid dedicated inference ID is provided", func() { + it("lists the auth tokens", func() { + aliases := []string{"list-tokens", "lt"} + + for _, alias := range aliases { + cmd = exec.Command(builtBinaryPath, + "-t", "some-magic-token", + "-u", server.URL, + "dedicated-inference", + alias, + "00000000-0000-4000-8000-000000000000", + ) + + output, err := cmd.CombinedOutput() + expect.NoError(err, fmt.Sprintf("received error output for alias %q: %s", alias, output)) + expect.Equal(strings.TrimSpace(dedicatedInferenceListTokensOutput), strings.TrimSpace(string(output))) + } + }) + }) + + when("dedicated inference ID is missing", func() { + it("returns an error", func() { + cmd = exec.Command(builtBinaryPath, + "-t", "some-magic-token", + "-u", server.URL, + "dedicated-inference", + "list-tokens", + ) + + output, err := cmd.CombinedOutput() + expect.Error(err) + expect.Contains(string(output), "missing") + }) + }) + + when("dedicated inference does not exist", func() { + it("returns a not found error", func() { + cmd = exec.Command(builtBinaryPath, + "-t", "some-magic-token", + "-u", server.URL, + "dedicated-inference", + "list-tokens", + "99999999-9999-4999-8999-999999999999", + ) + + output, err := cmd.CombinedOutput() + expect.Error(err) + expect.Contains(string(output), "404") + }) + }) + + when("passing a format flag", func() { + it("displays only those columns", func() { + cmd = exec.Command(builtBinaryPath, + "-t", "some-magic-token", + "-u", server.URL, + "dedicated-inference", + "list-tokens", + "00000000-0000-4000-8000-000000000000", + "--format", "ID,Name", + ) + + output, err := cmd.CombinedOutput() + expect.NoError(err, fmt.Sprintf("received error output: %s", output)) + expect.Equal(strings.TrimSpace(dedicatedInferenceListTokensFormatOutput), strings.TrimSpace(string(output))) + }) + }) + + when("using the di alias", func() { + it("lists the auth tokens", func() { + cmd = exec.Command(builtBinaryPath, + "-t", "some-magic-token", + "-u", server.URL, + "di", + "list-tokens", + "00000000-0000-4000-8000-000000000000", + ) + + output, err := cmd.CombinedOutput() + expect.NoError(err, fmt.Sprintf("received error output: %s", output)) + expect.Equal(strings.TrimSpace(dedicatedInferenceListTokensOutput), strings.TrimSpace(string(output))) + }) + }) +}) + +const ( + dedicatedInferenceListTokensOutput = ` +ID Name Value Created At +tok-1 default 2023-01-01 00:00:00 +0000 UTC +tok-2 my-token 2023-01-02 00:00:00 +0000 UTC +` + dedicatedInferenceListTokensFormatOutput = ` +ID Name +tok-1 default +tok-2 my-token +` + + dedicatedInferenceListTokensResponse = ` +{ + "tokens": [ + { + "id": "tok-1", + "name": "default", + "created_at": "2023-01-01T00:00:00Z" + }, + { + "id": "tok-2", + "name": "my-token", + "created_at": "2023-01-02T00:00:00Z" + } + ], + "links": {}, + "meta": { + "total": 2 + } +} +` +) diff --git a/integration/dedicated_inference_revoke_token_test.go b/integration/dedicated_inference_revoke_token_test.go new file mode 100644 index 000000000..723723620 --- /dev/null +++ b/integration/dedicated_inference_revoke_token_test.go @@ -0,0 +1,152 @@ +package integration + +import ( + "fmt" + "net/http" + "net/http/httptest" + "net/http/httputil" + "os/exec" + "testing" + + "github.com/sclevine/spec" + "github.com/stretchr/testify/require" +) + +var _ = suite("dedicated-inference/revoke-token", func(t *testing.T, when spec.G, it spec.S) { + var ( + expect *require.Assertions + cmd *exec.Cmd + server *httptest.Server + ) + + it.Before(func() { + expect = require.New(t) + + server = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { + switch req.URL.Path { + case "/v2/dedicated-inferences/00000000-0000-4000-8000-000000000000/tokens/tok-123": + auth := req.Header.Get("Authorization") + if auth != "Bearer some-magic-token" { + w.WriteHeader(http.StatusUnauthorized) + return + } + + if req.Method != http.MethodDelete { + w.WriteHeader(http.StatusMethodNotAllowed) + return + } + + w.WriteHeader(http.StatusNoContent) + case "/v2/dedicated-inferences/99999999-9999-4999-8999-999999999999/tokens/tok-bad": + auth := req.Header.Get("Authorization") + if auth != "Bearer some-magic-token" { + w.WriteHeader(http.StatusUnauthorized) + return + } + + if req.Method != http.MethodDelete { + w.WriteHeader(http.StatusMethodNotAllowed) + return + } + + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusNotFound) + w.Write([]byte(`{"id":"not_found","message":"The resource you requested could not be found."}`)) + default: + dump, err := httputil.DumpRequest(req, true) + if err != nil { + t.Fatal("failed to dump request") + } + + t.Fatalf("received unknown request: %s", dump) + } + })) + }) + + when("valid dedicated inference ID and token ID are provided", func() { + it("revokes the auth token", func() { + aliases := []string{"revoke-token", "rt"} + + for _, alias := range aliases { + cmd = exec.Command(builtBinaryPath, + "-t", "some-magic-token", + "-u", server.URL, + "dedicated-inference", + alias, + "00000000-0000-4000-8000-000000000000", + "tok-123", + "--force", + ) + + output, err := cmd.CombinedOutput() + expect.NoError(err, fmt.Sprintf("received error output for alias %q: %s", alias, output)) + } + }) + }) + + when("arguments are missing", func() { + it("returns an error when no args provided", func() { + cmd = exec.Command(builtBinaryPath, + "-t", "some-magic-token", + "-u", server.URL, + "dedicated-inference", + "revoke-token", + "--force", + ) + + output, err := cmd.CombinedOutput() + expect.Error(err) + expect.Contains(string(output), "missing") + }) + + it("returns an error when only dedicated inference ID provided", func() { + cmd = exec.Command(builtBinaryPath, + "-t", "some-magic-token", + "-u", server.URL, + "dedicated-inference", + "revoke-token", + "00000000-0000-4000-8000-000000000000", + "--force", + ) + + output, err := cmd.CombinedOutput() + expect.Error(err) + expect.Contains(string(output), "missing") + }) + }) + + when("the token does not exist", func() { + it("returns a not found error", func() { + cmd = exec.Command(builtBinaryPath, + "-t", "some-magic-token", + "-u", server.URL, + "dedicated-inference", + "revoke-token", + "99999999-9999-4999-8999-999999999999", + "tok-bad", + "--force", + ) + + output, err := cmd.CombinedOutput() + expect.Error(err) + expect.Contains(string(output), "404") + }) + }) + + when("using the di alias", func() { + it("revokes the auth token", func() { + cmd = exec.Command(builtBinaryPath, + "-t", "some-magic-token", + "-u", server.URL, + "di", + "revoke-token", + "00000000-0000-4000-8000-000000000000", + "tok-123", + "--force", + ) + + output, err := cmd.CombinedOutput() + expect.NoError(err, fmt.Sprintf("received error output: %s", output)) + }) + }) +}) diff --git a/integration/dedicated_inference_update_test.go b/integration/dedicated_inference_update_test.go new file mode 100644 index 000000000..e6fe91ac9 --- /dev/null +++ b/integration/dedicated_inference_update_test.go @@ -0,0 +1,353 @@ +package integration + +import ( + "encoding/json" + "fmt" + "io" + "net/http" + "net/http/httptest" + "net/http/httputil" + "os" + "os/exec" + "strings" + "testing" + + "github.com/sclevine/spec" + "github.com/stretchr/testify/require" +) + +var _ = suite("dedicated-inference/update", func(t *testing.T, when spec.G, it spec.S) { + var ( + expect *require.Assertions + cmd *exec.Cmd + server *httptest.Server + ) + + it.Before(func() { + expect = require.New(t) + + server = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { + switch req.URL.Path { + case "/v2/dedicated-inferences/00000000-0000-4000-8000-000000000000": + auth := req.Header.Get("Authorization") + if auth != "Bearer some-magic-token" { + w.WriteHeader(http.StatusUnauthorized) + return + } + + if req.Method != http.MethodPatch { + w.WriteHeader(http.StatusMethodNotAllowed) + return + } + + body, err := io.ReadAll(req.Body) + if err != nil { + t.Fatal("failed to read request body") + } + + var updateReq map[string]any + err = json.Unmarshal(body, &updateReq) + if err != nil { + t.Fatalf("failed to parse request body: %s", err) + } + + // Verify the request contains a spec + spec, ok := updateReq["spec"] + if !ok || spec == nil { + w.WriteHeader(http.StatusBadRequest) + w.Write([]byte(`{"id":"bad_request","message":"spec is required"}`)) + return + } + + w.Header().Set("Content-Type", "application/json") + w.Write([]byte(dedicatedInferenceUpdateResponse)) + case "/v2/dedicated-inferences/99999999-9999-4999-8999-999999999999": + auth := req.Header.Get("Authorization") + if auth != "Bearer some-magic-token" { + w.WriteHeader(http.StatusUnauthorized) + return + } + + if req.Method != http.MethodPatch { + w.WriteHeader(http.StatusMethodNotAllowed) + return + } + + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusNotFound) + w.Write([]byte(`{"id":"not_found","message":"The resource you requested could not be found."}`)) + default: + dump, err := httputil.DumpRequest(req, true) + if err != nil { + t.Fatal("failed to dump request") + } + + t.Fatalf("received unknown request: %s", dump) + } + })) + }) + + when("valid dedicated inference ID and spec are provided", func() { + it("updates the dedicated inference endpoint", func() { + specFile, err := os.CreateTemp(t.TempDir(), "spec-*.json") + expect.NoError(err) + defer specFile.Close() + + _, err = specFile.WriteString(dedicatedInferenceUpdateSpecJSON) + expect.NoError(err) + + aliases := []string{"update", "u"} + + for _, alias := range aliases { + cmd = exec.Command(builtBinaryPath, + "-t", "some-magic-token", + "-u", server.URL, + "dedicated-inference", + alias, + "00000000-0000-4000-8000-000000000000", + "--spec", specFile.Name(), + ) + + output, err := cmd.CombinedOutput() + expect.NoError(err, fmt.Sprintf("received error output for alias %q: %s", alias, output)) + expect.Equal(strings.TrimSpace(dedicatedInferenceUpdateOutput), strings.TrimSpace(string(output))) + } + }) + }) + + when("dedicated inference ID is missing", func() { + it("returns an error", func() { + specFile, err := os.CreateTemp(t.TempDir(), "spec-*.json") + expect.NoError(err) + defer specFile.Close() + + _, err = specFile.WriteString(dedicatedInferenceUpdateSpecJSON) + expect.NoError(err) + + cmd = exec.Command(builtBinaryPath, + "-t", "some-magic-token", + "-u", server.URL, + "dedicated-inference", + "update", + "--spec", specFile.Name(), + ) + + output, err := cmd.CombinedOutput() + expect.Error(err) + expect.Contains(string(output), "missing") + }) + }) + + when("dedicated inference does not exist", func() { + it("returns a not found error", func() { + specFile, err := os.CreateTemp(t.TempDir(), "spec-*.json") + expect.NoError(err) + defer specFile.Close() + + _, err = specFile.WriteString(dedicatedInferenceUpdateSpecJSON) + expect.NoError(err) + + cmd = exec.Command(builtBinaryPath, + "-t", "some-magic-token", + "-u", server.URL, + "dedicated-inference", + "update", + "99999999-9999-4999-8999-999999999999", + "--spec", specFile.Name(), + ) + + output, err := cmd.CombinedOutput() + expect.Error(err) + expect.Contains(string(output), "404") + }) + }) + + when("spec flag is missing", func() { + it("returns an error", func() { + cmd = exec.Command(builtBinaryPath, + "-t", "some-magic-token", + "-u", server.URL, + "dedicated-inference", + "update", + "00000000-0000-4000-8000-000000000000", + ) + + output, err := cmd.CombinedOutput() + expect.Error(err) + expect.Contains(string(output), "spec") + }) + }) + + when("using the di alias", func() { + it("updates the dedicated inference endpoint", func() { + specFile, err := os.CreateTemp(t.TempDir(), "spec-*.json") + expect.NoError(err) + defer specFile.Close() + + _, err = specFile.WriteString(dedicatedInferenceUpdateSpecJSON) + expect.NoError(err) + + cmd = exec.Command(builtBinaryPath, + "-t", "some-magic-token", + "-u", server.URL, + "di", + "update", + "00000000-0000-4000-8000-000000000000", + "--spec", specFile.Name(), + ) + + output, err := cmd.CombinedOutput() + expect.NoError(err, fmt.Sprintf("received error output: %s", output)) + expect.Equal(strings.TrimSpace(dedicatedInferenceUpdateOutput), strings.TrimSpace(string(output))) + }) + }) + + when("passing a format flag", func() { + it("displays only those columns", func() { + specFile, err := os.CreateTemp(t.TempDir(), "spec-*.json") + expect.NoError(err) + defer specFile.Close() + + _, err = specFile.WriteString(dedicatedInferenceUpdateSpecJSON) + expect.NoError(err) + + cmd = exec.Command(builtBinaryPath, + "-t", "some-magic-token", + "-u", server.URL, + "dedicated-inference", + "update", + "00000000-0000-4000-8000-000000000000", + "--spec", specFile.Name(), + "--format", "ID,Name,Status", + ) + + output, err := cmd.CombinedOutput() + expect.NoError(err, fmt.Sprintf("received error output: %s", output)) + expect.Equal(strings.TrimSpace(dedicatedInferenceUpdateFormatOutput), strings.TrimSpace(string(output))) + }) + }) + + when("using a YAML spec file", func() { + it("updates the dedicated inference endpoint", func() { + specFile, err := os.CreateTemp(t.TempDir(), "spec-*.yaml") + expect.NoError(err) + defer specFile.Close() + + _, err = specFile.WriteString(dedicatedInferenceUpdateSpecYAML) + expect.NoError(err) + + cmd = exec.Command(builtBinaryPath, + "-t", "some-magic-token", + "-u", server.URL, + "dedicated-inference", + "update", + "00000000-0000-4000-8000-000000000000", + "--spec", specFile.Name(), + ) + + output, err := cmd.CombinedOutput() + expect.NoError(err, fmt.Sprintf("received error output: %s", output)) + expect.Equal(strings.TrimSpace(dedicatedInferenceUpdateOutput), strings.TrimSpace(string(output))) + }) + }) +}) + +const ( + dedicatedInferenceUpdateOutput = ` +ID Name Region Status VPC UUID Public Endpoint Private Endpoint Created At Updated At +00000000-0000-4000-8000-000000000000 test-dedicated-inference nyc2 PROVISIONING 00000000-0000-4000-8000-000000000001 public.dedicated-inference.example.com private.dedicated-inference.example.com 2023-01-01 00:00:00 +0000 UTC 2023-01-02 00:00:00 +0000 UTC +` + dedicatedInferenceUpdateFormatOutput = ` +ID Name Status +00000000-0000-4000-8000-000000000000 test-dedicated-inference PROVISIONING +` + + dedicatedInferenceUpdateSpecJSON = `{ + "version": 1, + "name": "test-dedicated-inference", + "region": "nyc2", + "vpc": {"uuid": "00000000-0000-4000-8000-000000000001"}, + "enable_public_endpoint": true, + "model_deployments": [ + { + "model_slug": "mistral/mistral-7b-instruct-v3", + "model_provider": "hugging_face", + "accelerators": [ + {"scale": 3, "type": "prefill", "accelerator_slug": "gpu-mi300x1-192gb"}, + {"scale": 4, "type": "decode", "accelerator_slug": "gpu-mi300x1-192gb"} + ] + } + ] +}` + + dedicatedInferenceUpdateSpecYAML = `version: 1 +name: test-dedicated-inference +region: nyc2 +vpc: + uuid: "00000000-0000-4000-8000-000000000001" +enable_public_endpoint: true +model_deployments: + - model_slug: mistral/mistral-7b-instruct-v3 + model_provider: hugging_face + accelerators: + - scale: 3 + type: prefill + accelerator_slug: gpu-mi300x1-192gb + - scale: 4 + type: decode + accelerator_slug: gpu-mi300x1-192gb +` + + dedicatedInferenceUpdateResponse = ` +{ + "dedicated_inference": { + "id": "00000000-0000-4000-8000-000000000000", + "name": "test-dedicated-inference", + "region": "nyc2", + "status": "PROVISIONING", + "vpc_uuid": "00000000-0000-4000-8000-000000000001", + "endpoints": { + "public_endpoint_fqdn": "public.dedicated-inference.example.com", + "private_endpoint_fqdn": "private.dedicated-inference.example.com" + }, + "spec": { + "version": 1, + "id": "deploy-00000000-0000-4000-8000-000000000099", + "dedicated_inference_id": "00000000-0000-4000-8000-000000000000", + "state": "ACTIVE", + "enable_public_endpoint": true, + "vpc_config": { + "vpc_uuid": "00000000-0000-4000-8000-000000000001" + }, + "model_deployments": [ + { + "model_id": "model-001", + "model_slug": "mistral/mistral-7b-instruct-v3", + "model_provider": "hugging_face", + "accelerators": [ + { + "accelerator_id": "acc-001", + "accelerator_slug": "gpu-mi300x1-192gb", + "state": "ACTIVE", + "type": "prefill", + "scale": 3 + }, + { + "accelerator_id": "acc-002", + "accelerator_slug": "gpu-mi300x1-192gb", + "state": "ACTIVE", + "type": "decode", + "scale": 4 + } + ] + } + ], + "created_at": "2023-01-01T00:00:00Z", + "updated_at": "2023-01-02T00:00:00Z" + }, + "created_at": "2023-01-01T00:00:00Z", + "updated_at": "2023-01-02T00:00:00Z" + } +} +` +)