diff --git a/NEXT_CHANGELOG.md b/NEXT_CHANGELOG.md index c05710010..7784a4fb8 100644 --- a/NEXT_CHANGELOG.md +++ b/NEXT_CHANGELOG.md @@ -23,6 +23,8 @@ ### Internal Changes + * Pass `--force-refresh` to Databricks CLI `auth token` command to bypass the CLI's internal token cache. + * Generalize CLI token source into a progressive command list for forward-compatible flag support. * Use resolved host type from host metadata in `HostType()` method, falling back to URL pattern matching when metadata is unavailable. * Normalize internal token sources on `auth.TokenSource` for proper context propagation ([#1577](https://github.com/databricks/databricks-sdk-go/pull/1577)). * Fix `TestAzureGithubOIDCCredentials` hang caused by missing `HTTPTransport` stub: `EnsureResolved` now calls `resolveHostMetadata`, which makes a real network request when no transport is set ([#1550](https://github.com/databricks/databricks-sdk-go/pull/1550)). diff --git a/config/cli_token_source.go b/config/cli_token_source.go index cb502130b..7cc2ec8d3 100644 --- a/config/cli_token_source.go +++ b/config/cli_token_source.go @@ -10,6 +10,7 @@ import ( "path/filepath" "runtime" "strings" + "sync/atomic" "time" "github.com/databricks/databricks-sdk-go/logger" @@ -31,13 +32,27 @@ type cliTokenResponse struct { Expiry string `json:"expiry"` } -type CliTokenSource struct { - // cmd is the primary command to execute (--profile when available, --host otherwise). - cmd []string +// cliCommand is a single CLI invocation with an associated warning message +// that is logged when this command fails and we fall back to the next one. +type cliCommand struct { + args []string + // usedFlags lists all flags passed in this command. When the CLI reports + // "unknown flag: " for one of these, we fall back to the next command. + usedFlags []string + warningMessage string +} - // hostCmd is a fallback command using --host, used when the primary --profile - // command fails because the CLI is too old to support --profile. - hostCmd []string +// CliTokenSource fetches OAuth tokens by shelling out to the Databricks CLI. +// It holds a list of commands ordered from most feature-rich to simplest, +// falling back progressively for older CLI versions that lack newer flags. +type CliTokenSource struct { + commands []cliCommand + // activeCommandIndex is the index of the CLI command known to work, or -1 + // if not yet resolved. Once resolved it never changes — older CLIs don't + // gain new flags. We use atomic.Int32 instead of sync.Once because + // probing must be retryable on transient errors: concurrent callers may + // redundantly probe, but all converge to the same index. + activeCommandIndex atomic.Int32 } func NewCliTokenSource(cfg *Config) (*CliTokenSource, error) { @@ -45,48 +60,86 @@ func NewCliTokenSource(cfg *Config) (*CliTokenSource, error) { if err != nil { return nil, err } - profileCmd, hostCmd := buildCliCommands(cliPath, cfg) - return &CliTokenSource{cmd: profileCmd, hostCmd: hostCmd}, nil + commands := buildCliCommands(cliPath, cfg) + if len(commands) == 0 { + return nil, fmt.Errorf("cannot configure CLI token source: neither profile nor host is set") + } + ts := &CliTokenSource{commands: commands} + ts.activeCommandIndex.Store(-1) + return ts, nil } -// buildCliCommands constructs the CLI commands for fetching an auth token. -// When cfg.Profile is set, the primary command uses --profile and a fallback -// --host command is also returned for compatibility with older CLIs. -// When cfg.Profile is empty, the primary command uses --host and no fallback -// is needed. -func buildCliCommands(cliPath string, cfg *Config) (primaryCmd []string, hostCmd []string) { +// buildCliCommands constructs the list of CLI commands for fetching an auth +// token, ordered from most feature-rich to simplest. The order defines the +// fallback chain: when a command fails with an unknown flag error, the next +// one is tried. +// +// When cfg.Profile is set, --force-refresh is based on the --profile command. +// When cfg.Profile is empty, --force-refresh is based on the --host command +// instead, so host-only configurations still benefit from cache bypass. +func buildCliCommands(cliPath string, cfg *Config) []cliCommand { + var commands []cliCommand if cfg.Profile != "" { - primary := []string{cliPath, "auth", "token", "--profile", cfg.Profile} - if cfg.Host != "" { - // Build a --host fallback for old CLIs that don't support --profile. - return primary, buildHostCommand(cliPath, cfg) + commands = append(commands, cliCommand{ + args: []string{cliPath, "auth", "token", "--profile", cfg.Profile, "--force-refresh"}, + usedFlags: []string{"--force-refresh", "--profile"}, + warningMessage: "Databricks CLI does not support --force-refresh flag. The CLI's token cache may provide stale tokens. Please upgrade your CLI to the latest version.", + }) + commands = append(commands, cliCommand{ + args: []string{cliPath, "auth", "token", "--profile", cfg.Profile}, + usedFlags: []string{"--profile"}, + warningMessage: "Databricks CLI does not support --profile flag. Falling back to --host. Please upgrade your CLI to the latest version.", + }) + } + if cfg.Host != "" { + hostArgs := []string{cliPath, "auth", "token", "--host", cfg.Host} + switch cfg.HostType() { + case AccountHost: + hostArgs = append(hostArgs, "--account-id", cfg.AccountID) } - return primary, nil + if cfg.Profile == "" { + forceArgs := append(hostArgs[:len(hostArgs):len(hostArgs)], "--force-refresh") + commands = append(commands, cliCommand{ + args: forceArgs, + usedFlags: []string{"--force-refresh"}, + warningMessage: "Databricks CLI does not support --force-refresh flag. The CLI's token cache may provide stale tokens. Please upgrade your CLI to the latest version.", + }) + } + commands = append(commands, cliCommand{args: hostArgs}) } - return buildHostCommand(cliPath, cfg), nil + return commands } -// buildHostCommand constructs the legacy --host based CLI command. -func buildHostCommand(cliPath string, cfg *Config) []string { - cmd := []string{cliPath, "auth", "token", "--host", cfg.Host} - switch cfg.HostType() { - case AccountHost: - cmd = append(cmd, "--account-id", cfg.AccountID) +// Token fetches an OAuth token by shelling out to the Databricks CLI. +func (c *CliTokenSource) Token(ctx context.Context) (*oauth2.Token, error) { + // If the working command has already been identified, call it directly. + // Otherwise, probe each command in order to find one that works. + if idx := int(c.activeCommandIndex.Load()); idx >= 0 { + return c.execCliCommand(ctx, c.commands[idx].args) } - return cmd + return c.probeAndExec(ctx) } -// Token fetches an OAuth token by shelling out to the Databricks CLI. -// When a --profile command is configured, it is tried first. If the CLI -// returns "unknown flag: --profile" (indicating an older CLI version), -// the fallback --host command is used instead. -func (c *CliTokenSource) Token(ctx context.Context) (*oauth2.Token, error) { - tok, err := c.execCliCommand(ctx, c.cmd) - if err != nil && c.hostCmd != nil && isUnknownFlagError(err) { - logger.Warnf(ctx, "Databricks CLI does not support --profile flag. Falling back to --host. Please upgrade your CLI to the latest version.") - return c.execCliCommand(ctx, c.hostCmd) +// probeAndExec walks the command list from most-featured to simplest, looking +// for a CLI command that succeeds. When a command fails with "unknown flag" for +// one of its [cliCommand.usedFlags], it logs a warning and tries the next. Any +// other error stops probing immediately and is returned to the caller. +// On success, [activeCommandIndex] is stored so future calls skip probing. +func (c *CliTokenSource) probeAndExec(ctx context.Context) (*oauth2.Token, error) { + for i := range c.commands { + cmd := c.commands[i] + tok, err := c.execCliCommand(ctx, cmd.args) + if err == nil { + c.activeCommandIndex.Store(int32(i)) + return tok, nil + } + lastCommand := i == len(c.commands)-1 + if lastCommand || !isUnknownFlagError(err, cmd.usedFlags) { + return nil, err + } + logger.Warnf(ctx, cmd.warningMessage) } - return tok, err + return nil, fmt.Errorf("cannot get access token: no CLI commands configured") } func (c *CliTokenSource) execCliCommand(ctx context.Context, args []string) (*oauth2.Token, error) { @@ -95,7 +148,13 @@ func (c *CliTokenSource) execCliCommand(ctx context.Context, args []string) (*oa if err != nil { var exitErr *exec.ExitError if errors.As(err, &exitErr) { - return nil, fmt.Errorf("cannot get access token: %s", strings.TrimSpace(string(exitErr.Stderr))) + // Format with %q: the CLI's stderr can be multi-line (includes usage + // text on unknown-flag errors) and quoting keeps it on one line so + // log aggregators don't split a single error across multiple entries. + // We intentionally discard exec.ExitError — the stderr text is the + // CLI's error contract; exit codes and process state are not useful. + return nil, fmt.Errorf("cannot get access token: %q", + strings.TrimSpace(string(exitErr.Stderr))) } return nil, fmt.Errorf("cannot get access token: %w", err) } @@ -114,11 +173,16 @@ func (c *CliTokenSource) execCliCommand(ctx context.Context, args []string) (*oa }, nil } -// isUnknownFlagError returns true if the error indicates the CLI does not -// recognize the --profile flag. This happens with older CLI versions that -// predate profile-based token lookup. -func isUnknownFlagError(err error) bool { - return strings.Contains(err.Error(), "unknown flag: --profile") +// isUnknownFlagError returns true if the error message indicates the CLI does +// not recognize one of the given flags. +func isUnknownFlagError(err error, flags []string) bool { + msg := err.Error() + for _, flag := range flags { + if strings.Contains(msg, "unknown flag: "+flag) { + return true + } + } + return false } // parseExpiry parses an expiry time string in multiple formats for cross-SDK compatibility. diff --git a/config/cli_token_source_test.go b/config/cli_token_source_test.go index 153a3dffb..f75fb1a13 100644 --- a/config/cli_token_source_test.go +++ b/config/cli_token_source_test.go @@ -136,20 +136,25 @@ func TestBuildCliCommands(t *testing.T) { ) testCases := []struct { - name string - cfg *Config - wantCmd []string - wantHostCmd []string + name string + cfg *Config + wantCommands [][]string }{ { - name: "workspace host", - cfg: &Config{Host: host}, - wantCmd: []string{cliPath, "auth", "token", "--host", host}, + name: "workspace host only — force-refresh based on host", + cfg: &Config{Host: host}, + wantCommands: [][]string{ + {cliPath, "auth", "token", "--host", host, "--force-refresh"}, + {cliPath, "auth", "token", "--host", host}, + }, }, { - name: "account host", - cfg: &Config{Host: accountHost, AccountID: accountID}, - wantCmd: []string{cliPath, "auth", "token", "--host", accountHost, "--account-id", accountID}, + name: "account host only — force-refresh based on host with account-id", + cfg: &Config{Host: accountHost, AccountID: accountID}, + wantCommands: [][]string{ + {cliPath, "auth", "token", "--host", accountHost, "--account-id", accountID, "--force-refresh"}, + {cliPath, "auth", "token", "--host", accountHost, "--account-id", accountID}, + }, }, { name: "former unified host treated as workspace", @@ -158,29 +163,40 @@ func TestBuildCliCommands(t *testing.T) { AccountID: accountID, WorkspaceID: workspaceID, }, - wantCmd: []string{cliPath, "auth", "token", "--host", unifiedHost}, + wantCommands: [][]string{ + {cliPath, "auth", "token", "--host", unifiedHost, "--force-refresh"}, + {cliPath, "auth", "token", "--host", unifiedHost}, + }, }, { - name: "profile uses --profile with --host fallback", - cfg: &Config{Profile: "my-profile", Host: host}, - wantCmd: []string{cliPath, "auth", "token", "--profile", "my-profile"}, - wantHostCmd: []string{cliPath, "auth", "token", "--host", host}, + name: "profile with host — force-refresh based on profile", + cfg: &Config{Profile: "my-profile", Host: host}, + wantCommands: [][]string{ + {cliPath, "auth", "token", "--profile", "my-profile", "--force-refresh"}, + {cliPath, "auth", "token", "--profile", "my-profile"}, + {cliPath, "auth", "token", "--host", host}, + }, }, { - name: "profile without host — no fallback", - cfg: &Config{Profile: "my-profile"}, - wantCmd: []string{cliPath, "auth", "token", "--profile", "my-profile"}, + name: "profile without host — no host fallback", + cfg: &Config{Profile: "my-profile"}, + wantCommands: [][]string{ + {cliPath, "auth", "token", "--profile", "my-profile", "--force-refresh"}, + {cliPath, "auth", "token", "--profile", "my-profile"}, + }, }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - gotCmd, gotHostCmd := buildCliCommands(cliPath, tc.cfg) - if !slices.Equal(gotCmd, tc.wantCmd) { - t.Errorf("primary cmd = %v, want %v", gotCmd, tc.wantCmd) + got := buildCliCommands(cliPath, tc.cfg) + if len(got) != len(tc.wantCommands) { + t.Fatalf("got %d commands, want %d", len(got), len(tc.wantCommands)) } - if !slices.Equal(gotHostCmd, tc.wantHostCmd) { - t.Errorf("host cmd = %v, want %v", gotHostCmd, tc.wantHostCmd) + for i, want := range tc.wantCommands { + if !slices.Equal(got[i].args, want) { + t.Errorf("command[%d].args = %v, want %v", i, got[i].args, want) + } } }) } @@ -204,9 +220,11 @@ func TestNewCliTokenSource(t *testing.T) { if err != nil { t.Fatalf("NewCliTokenSource() unexpected error: %v", err) } - // Verify CLI path was resolved and used - if ts.cmd[0] != validCliPath { - t.Errorf("cmd[0] = %q, want %q", ts.cmd[0], validCliPath) + if len(ts.commands) == 0 { + t.Fatal("expected at least one command") + } + if ts.commands[0].args[0] != validCliPath { + t.Errorf("commands[0].args[0] = %q, want %q", ts.commands[0].args[0], validCliPath) } }) @@ -217,6 +235,23 @@ func TestNewCliTokenSource(t *testing.T) { t.Errorf("NewCliTokenSource() error = %v, want %v", err, ErrCliNotFound) } }) + + t.Run("no profile or host", func(t *testing.T) { + cfg := &Config{DatabricksCliPath: validCliPath} + _, err := NewCliTokenSource(cfg) + if err == nil { + t.Fatal("NewCliTokenSource() error = nil, want error") + } + if !strings.Contains(err.Error(), "neither profile nor host is set") { + t.Errorf("NewCliTokenSource() error = %v, want error containing %q", err, "neither profile nor host is set") + } + }) +} + +func newTestCliTokenSource(commands []cliCommand) *CliTokenSource { + ts := &CliTokenSource{commands: commands} + ts.activeCommandIndex.Store(-1) + return ts } func TestCliTokenSource_Token(t *testing.T) { @@ -262,7 +297,7 @@ func TestCliTokenSource_Token(t *testing.T) { t.Fatalf("failed to create mock script: %v", err) } - ts := &CliTokenSource{cmd: []string{mockScript}} + ts := newTestCliTokenSource([]cliCommand{{args: []string{mockScript}}}) token, err := ts.Token(context.Background()) if tc.wantErrMsg != "" { @@ -281,73 +316,177 @@ func TestCliTokenSource_Token(t *testing.T) { } } -func TestCliTokenSource_Token_FallbackOnUnknownFlag(t *testing.T) { +func TestCliTokenSource_Token_Fallback(t *testing.T) { if runtime.GOOS == "windows" { t.Skip("Skipping shell script test on Windows") } expiry := time.Now().Add(1 * time.Hour).Format(time.RFC3339) - validResponse, _ := json.Marshal(cliTokenResponse{ - AccessToken: "fallback-token", + successResponse, _ := json.Marshal(cliTokenResponse{ + AccessToken: "test-token", TokenType: "Bearer", Expiry: expiry, }) - tempDir := t.TempDir() + success := "#!/bin/sh\necho '" + string(successResponse) + "'" + unknownForceRefresh := "#!/bin/sh\necho 'Error: unknown flag: --force-refresh' >&2\nexit 1" + unknownProfile := "#!/bin/sh\necho 'Error: unknown flag: --profile' >&2\nexit 1" + realError := "#!/bin/sh\necho 'cache: databricks OAuth is not configured for this host' >&2\nexit 1" + unreachable := "#!/bin/sh\necho 'should not reach here' >&2\nexit 1" - // Primary script simulates an old CLI that doesn't know --profile. - profileScript := filepath.Join(tempDir, "profile_cli.sh") - if err := os.WriteFile(profileScript, []byte("#!/bin/sh\necho 'Error: unknown flag: --profile' >&2\nexit 1"), 0755); err != nil { - t.Fatalf("failed to create profile script: %v", err) + testCases := []struct { + name string + forceScript string // empty means nil forceCmd + profileScript string // empty means nil profileCmd + hostScript string // empty means nil hostCmd + wantToken string + wantErrMsg string + }{ + { + name: "force-refresh succeeds", + forceScript: success, + profileScript: unreachable, + hostScript: unreachable, + wantToken: "test-token", + }, + { + name: "force-refresh falls back to profile", + forceScript: unknownForceRefresh, + profileScript: success, + wantToken: "test-token", + }, + { + name: "force-refresh falls back to host (no profile)", + forceScript: unknownForceRefresh, + hostScript: success, + wantToken: "test-token", + }, + { + name: "profile falls back to host", + profileScript: unknownProfile, + hostScript: success, + wantToken: "test-token", + }, + { + name: "full fallback chain: force -> profile -> host", + forceScript: unknownProfile, + profileScript: unknownProfile, + hostScript: success, + wantToken: "test-token", + }, + { + name: "real error stops fallback", + forceScript: realError, + profileScript: unreachable, + hostScript: unreachable, + wantErrMsg: "databricks OAuth is not configured", + }, + { + name: "all commands fail with unknown flag — last error returned", + forceScript: unknownProfile, + profileScript: unknownProfile, + wantErrMsg: "unknown flag: --profile", + }, } - // Fallback script succeeds with --host. - hostScript := filepath.Join(tempDir, "host_cli.sh") - if err := os.WriteFile(hostScript, []byte("#!/bin/sh\necho '"+string(validResponse)+"'"), 0755); err != nil { - t.Fatalf("failed to create host script: %v", err) - } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + tempDir := t.TempDir() + var commands []cliCommand - ts := &CliTokenSource{ - cmd: []string{profileScript}, - hostCmd: []string{hostScript}, - } - token, err := ts.Token(context.Background()) - if err != nil { - t.Fatalf("Token() error = %v, want fallback to succeed", err) - } - if token.AccessToken != "fallback-token" { - t.Errorf("AccessToken = %q, want %q", token.AccessToken, "fallback-token") + if tc.forceScript != "" { + path := filepath.Join(tempDir, "force_cli.sh") + if err := os.WriteFile(path, []byte(tc.forceScript), 0755); err != nil { + t.Fatalf("failed to create force script: %v", err) + } + commands = append(commands, cliCommand{ + args: []string{path}, + usedFlags: []string{"--force-refresh", "--profile"}, + warningMessage: "force-refresh not supported", + }) + } + if tc.profileScript != "" { + path := filepath.Join(tempDir, "profile_cli.sh") + if err := os.WriteFile(path, []byte(tc.profileScript), 0755); err != nil { + t.Fatalf("failed to create profile script: %v", err) + } + commands = append(commands, cliCommand{ + args: []string{path}, + usedFlags: []string{"--profile"}, + warningMessage: "profile not supported", + }) + } + if tc.hostScript != "" { + path := filepath.Join(tempDir, "host_cli.sh") + if err := os.WriteFile(path, []byte(tc.hostScript), 0755); err != nil { + t.Fatalf("failed to create host script: %v", err) + } + commands = append(commands, cliCommand{args: []string{path}}) + } + + ts := newTestCliTokenSource(commands) + token, err := ts.Token(context.Background()) + if tc.wantErrMsg != "" { + if err == nil || !strings.Contains(err.Error(), tc.wantErrMsg) { + t.Errorf("Token() error = %v, want error containing %q", err, tc.wantErrMsg) + } + return + } + if err != nil { + t.Fatalf("Token() error = %v", err) + } + if token.AccessToken != tc.wantToken { + t.Errorf("AccessToken = %q, want %q", token.AccessToken, tc.wantToken) + } + }) } } -func TestCliTokenSource_Token_NoFallbackOnRealError(t *testing.T) { +func TestCliTokenSource_Token_ActiveCommandIndexPersists(t *testing.T) { if runtime.GOOS == "windows" { t.Skip("Skipping shell script test on Windows") } + expiry := time.Now().Add(1 * time.Hour).Format(time.RFC3339) + validResponse, _ := json.Marshal(cliTokenResponse{ + AccessToken: "host-token", + TokenType: "Bearer", + Expiry: expiry, + }) + tempDir := t.TempDir() - // Primary script fails with a real auth error (not unknown flag). - profileScript := filepath.Join(tempDir, "profile_cli.sh") - if err := os.WriteFile(profileScript, []byte("#!/bin/sh\necho 'cache: databricks OAuth is not configured for this host' >&2\nexit 1"), 0755); err != nil { - t.Fatalf("failed to create profile script: %v", err) + forceScript := filepath.Join(tempDir, "force_cli.sh") + if err := os.WriteFile(forceScript, []byte("#!/bin/sh\necho 'Error: unknown flag: --force-refresh' >&2\nexit 1"), 0755); err != nil { + t.Fatalf("failed to create force script: %v", err) } - // Fallback script would succeed, but should not be called. hostScript := filepath.Join(tempDir, "host_cli.sh") - if err := os.WriteFile(hostScript, []byte("#!/bin/sh\necho 'should not reach here' >&2\nexit 1"), 0755); err != nil { + if err := os.WriteFile(hostScript, []byte("#!/bin/sh\necho '"+string(validResponse)+"'"), 0755); err != nil { t.Fatalf("failed to create host script: %v", err) } - ts := &CliTokenSource{ - cmd: []string{profileScript}, - hostCmd: []string{hostScript}, + ts := newTestCliTokenSource([]cliCommand{ + {args: []string{forceScript}, usedFlags: []string{"--force-refresh"}, warningMessage: "force-refresh not supported"}, + {args: []string{hostScript}}, + }) + + token, err := ts.Token(context.Background()) + if err != nil { + t.Fatalf("first Token() error = %v", err) + } + if token.AccessToken != "host-token" { + t.Errorf("first AccessToken = %q, want %q", token.AccessToken, "host-token") + } + if ts.activeCommandIndex.Load() != 1 { + t.Errorf("activeCommandIndex = %d, want 1", ts.activeCommandIndex.Load()) } - _, err := ts.Token(context.Background()) - if err == nil { - t.Fatal("Token() error = nil, want error") + + token, err = ts.Token(context.Background()) + if err != nil { + t.Fatalf("second Token() error = %v", err) } - if !strings.Contains(err.Error(), "databricks OAuth is not configured") { - t.Errorf("Token() error = %v, want error containing original auth failure", err) + if token.AccessToken != "host-token" { + t.Errorf("second AccessToken = %q, want %q", token.AccessToken, "host-token") } }