diff --git a/.gitignore b/.gitignore index acea8a96..6f1e334e 100644 --- a/.gitignore +++ b/.gitignore @@ -33,3 +33,6 @@ go.work.sum # Editor/IDE # .idea/ # .vscode/ + +# locally built binaries +bin diff --git a/api/product/secret.go b/api/product/secret.go index 5ea86fdf..229c73cb 100644 --- a/api/product/secret.go +++ b/api/product/secret.go @@ -6,10 +6,12 @@ import ( "fmt" "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/awserr" "github.com/aws/aws-sdk-go/aws/session" "github.com/aws/aws-sdk-go/service/secretsmanager" "github.com/pkg/errors" v12 "k8s.io/api/core/v1" + kerrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" @@ -17,6 +19,58 @@ import ( "sigs.k8s.io/yaml" ) +// SecretNotFoundError indicates a secret key was not found in the vault +type SecretNotFoundError struct { + secretType SiteSecretType + vaultName string + key string + err error +} + +func newSecretNotFoundError(secretType SiteSecretType, vaultName, key string, err error) *SecretNotFoundError { + return &SecretNotFoundError{ + secretType: secretType, + vaultName: vaultName, + key: key, + err: err, + } +} + +func (e *SecretNotFoundError) Error() string { + return fmt.Sprintf("secret key '%s' not found in vault '%s' (type: %s): %v", + e.key, e.vaultName, e.secretType, e.err) +} + +func (e *SecretNotFoundError) Unwrap() error { + return e.err +} + +// SecretAccessError indicates an error accessing the secret store +type SecretAccessError struct { + secretType SiteSecretType + vaultName string + key string + err error +} + +func newSecretAccessError(secretType SiteSecretType, vaultName, key string, err error) *SecretAccessError { + return &SecretAccessError{ + secretType: secretType, + vaultName: vaultName, + key: key, + err: err, + } +} + +func (e *SecretAccessError) Error() string { + return fmt.Sprintf("access error fetching secret key '%s' from vault '%s' (type: %s): %v", + e.key, e.vaultName, e.secretType, e.err) +} + +func (e *SecretAccessError) Unwrap() error { + return e.err +} + type secretObjectJmesPath struct { Path string `json:"path,omitempty"` ObjectAlias string `json:"objectAlias,omitempty"` @@ -29,11 +83,13 @@ type secretObject struct { } type TestSecretProvider struct { - Secrets map[string]string `json:"secrets,omitempty"` + Secrets map[string]string `json:"secrets,omitempty"` + StrictMode bool `json:"strictMode,omitempty"` } var GlobalTestSecretProvider = &TestSecretProvider{ - Secrets: map[string]string{}, + Secrets: map[string]string{}, + StrictMode: false, // Default to fallback behavior for backward compatibility } func (t *TestSecretProvider) SetSecret(key, val string) error { @@ -57,6 +113,15 @@ func (t *TestSecretProvider) GetSecretWithFallback(key string) string { } } +func (t *TestSecretProvider) SetStrictMode(strict bool) { + t.StrictMode = strict +} + +func (t *TestSecretProvider) Reset() { + t.Secrets = map[string]string{} + t.StrictMode = false +} + func mapToJmesPath(input map[string]string) (jmes []secretObjectJmesPath) { for k, v := range input { jmes = append(jmes, secretObjectJmesPath{ @@ -139,57 +204,97 @@ func FetchSecret(ctx context.Context, r SomeReconciler, req ctrl.Request, secret l := r.GetLogger(ctx) switch secretType { case SiteSecretAws: - if sess, err := session.NewSession(&aws.Config{ - Region: aws.String(GetAWSRegion()), - }); err != nil { - return "", err - } else { - sm := secretsmanager.New(sess) - query := &secretsmanager.GetSecretValueInput{ - SecretId: aws.String(vaultName), - VersionId: nil, - VersionStage: aws.String("AWSCURRENT"), - } - if valueOutput, err := sm.GetSecretValue(query); err != nil { - return "", err - } else { - secretValue := map[string]json.RawMessage{} - if err := json.Unmarshal([]byte(*valueOutput.SecretString), &secretValue); err != nil { - return "", err - } - - if rawSecretEntry, ok := secretValue[key]; !ok { - // failed to find the configured key - return "", errors.New(fmt.Sprintf("could not find the configured key '%s' in secret '%s' with type '%s'", key, vaultName, secretType)) - } else { - var secretEntry string - if err := json.Unmarshal(rawSecretEntry, &secretEntry); err != nil { - // error unmarshalling secret - return "", err - } else { - // SUCCESS!! we got the secret! - return secretEntry, nil - } - } - } - } + return fetchAWSSecret(secretType, vaultName, key) + case SiteSecretKubernetes: - kubernetesSecretName := client.ObjectKey{Name: vaultName, Namespace: req.Namespace} - - existingSecret := &v12.Secret{} - if err := r.Get(ctx, kubernetesSecretName, existingSecret); err != nil { - l.Error(err, "Error retrieving kubernetes secret", "secret", kubernetesSecretName) - return "", err - } else { - secretEntry := existingSecret.Data[key] - return string(secretEntry), nil - } + return fetchKubernetesSecret(ctx, r, secretType, vaultName, req.Namespace, key) + case SiteSecretTest: - // try using the global test secret provider (or fallback to the key) + // Use the global test secret provider + if GlobalTestSecretProvider.StrictMode { + // In strict mode, return typed errors for missing secrets + secret, err := GlobalTestSecretProvider.GetSecret(key) + if err != nil { + return "", newSecretNotFoundError(secretType, vaultName, key, err) + } + return secret, nil + + } + // In non-strict mode, use fallback behavior (returns key if not found) return GlobalTestSecretProvider.GetSecretWithFallback(key), nil + default: err := errors.New("unknown secret type") l.Error(err, "Unknown secret type", "type", secretType) return "", err } } + +func fetchAWSSecret(secretType SiteSecretType, vaultName, key string) (string, error) { + sess, err := session.NewSession(&aws.Config{ + Region: aws.String(GetAWSRegion()), + }) + if err != nil { + return "", newSecretAccessError(secretType, vaultName, key, err) + } + sm := secretsmanager.New(sess) + query := &secretsmanager.GetSecretValueInput{ + SecretId: aws.String(vaultName), + VersionId: nil, + VersionStage: aws.String("AWSCURRENT"), + } + valueOutput, err := sm.GetSecretValue(query) + if err != nil { + var awsErr awserr.Error + if errors.As(err, &awsErr) && awsErr.Code() == secretsmanager.ErrCodeResourceNotFoundException { + return "", newSecretNotFoundError(secretType, vaultName, key, err) + } + return "", newSecretAccessError(secretType, vaultName, key, err) + } + + secretValue := map[string]json.RawMessage{} + if err := json.Unmarshal([]byte(*valueOutput.SecretString), &secretValue); err != nil { + // Malformed secret - access error + return "", newSecretAccessError(secretType, vaultName, key, err) + } + + rawSecretEntry, ok := secretValue[key] + if !ok { + // Vault exists but key doesn't - this is "not found" + return "", newSecretNotFoundError(secretType, vaultName, key, + fmt.Errorf("key %q not present in secret", key)) + } + + var secretEntry string + if err := json.Unmarshal(rawSecretEntry, &secretEntry); err != nil { + // Key exists but can't unmarshal - access error + return "", newSecretAccessError(secretType, vaultName, key, err) + } + + return secretEntry, nil +} + +func fetchKubernetesSecret(ctx context.Context, r SomeReconciler, secretType SiteSecretType, + vaultName, namespace, key string, +) (string, error) { + kubernetesSecretName := client.ObjectKey{Name: vaultName, Namespace: namespace} + + existingSecret := &v12.Secret{} + if err := r.Get(ctx, kubernetesSecretName, existingSecret); err != nil { + if kerrors.IsNotFound(err) { + // Secret doesn't exist + return "", newSecretNotFoundError(secretType, vaultName, key, err) + } + // Other error (permissions, network, etc.) + return "", newSecretAccessError(secretType, vaultName, key, err) + } + + secretEntry, exists := existingSecret.Data[key] + if !exists { + // Secret exists but key doesn't + return "", newSecretNotFoundError(secretType, vaultName, key, + fmt.Errorf("key %q not found in kubernetes secret", key)) + } + + return string(secretEntry), nil +} diff --git a/internal/controller/core/workbench.go b/internal/controller/core/workbench.go index 79f75d42..d0b75a9d 100644 --- a/internal/controller/core/workbench.go +++ b/internal/controller/core/workbench.go @@ -44,28 +44,57 @@ var invalidCharacters = regexp.MustCompile("[^a-z0-9]") // do not glob, lest we var azureDatabricksRegexp = regexp.MustCompile("azuredatabricks\\.net") -// FetchAndSetClientSecretForAzureDatabricks will check to see whether AzureDatabricks is in use... if it is, -// it will fetch the secret from the secret manager and modify the Spec in-place... -func (r *WorkbenchReconciler) FetchAndSetClientSecretForAzureDatabricks(ctx context.Context, req ctrl.Request, w *positcov1beta1.Workbench) error { +// FetchAndSetClientSecretForDatabricks will check whether Databricks (AWS or Azure) is in use and +// fetch the client secret from the secret manager. It modifies the Spec in-place. +// For Azure Databricks: Secret is required (returns error if not found) +// For AWS Databricks: Secret is optional (logs info if not found, continues without error) +func (r *WorkbenchReconciler) FetchAndSetClientSecretForDatabricks(ctx context.Context, req ctrl.Request, w *positcov1beta1.Workbench) error { l := r.GetLogger(ctx) - if w.Spec.SecretConfig.WorkbenchSecretIniConfig.Databricks != nil { - for _, v := range w.Spec.SecretConfig.WorkbenchSecretIniConfig.Databricks { - if azureDatabricksRegexp.MatchString(v.Url) { + if w.Spec.SecretConfig.WorkbenchSecretIniConfig.Databricks == nil { + return nil + } - // matched azure url... fetch and set the client secret + for dbName, v := range w.Spec.SecretConfig.WorkbenchSecretIniConfig.Databricks { + // TODO: ideally this secret would not be read by the operator... + // but that means we need a way to "mount" the secret by env var / etc. + clientSecretName := fmt.Sprintf("dev-client-secret-%s", v.ClientId) + cs, err := product.FetchSecret(ctx, r, req, w.Spec.Secret.Type, w.Spec.Secret.VaultName, clientSecretName) + if err != nil { + if azureDatabricksRegexp.MatchString(v.Url) { + // Azure Databricks + not found: return error + l.Error(err, "client secret required for Azure Databricks", + "databricks", dbName, + "url", v.Url) + return err + } - // TODO: ideally this secret would not be read by the operator... - // but that means we need a way to "mount" the secret by env var / etc. - clientSecretName := fmt.Sprintf("dev-client-secret-%s", v.ClientId) - if cs, err := product.FetchSecret(ctx, r, req, w.Spec.Secret.Type, w.Spec.Secret.VaultName, clientSecretName); err != nil { - l.Error(err, "error fetching client secret for databricks azure") - return err - } else { - v.ClientSecret = cs - } + // The client secret is an optional parameter for Databricks instances in AWS, so if the error is a + // "not found", we just want to log that and continue to allow configuration to be created. + // See the Workbench docs for more information: + // https://docs.posit.co/ide/server-pro/admin/integration/databricks.html#workbench-configuration + var notFoundErr *product.SecretNotFoundError + if errors.As(err, ¬FoundErr) { + // AWS Databricks + not found: log info and continue without setting secret + l.Info("Databricks client secret not found for AWS instance - continuing without OAuth", + "databricks", dbName, + "url", v.Url, + "clientId", v.ClientId, + "secretKey", clientSecretName, + ) + // Don't set ClientSecret, don't return error + continue } + // Any other error type from AWS should be returned + return err } + + // Success - set the client secret + v.ClientSecret = cs + l.Info("successfully fetched client secret for databricks", + "databricks", dbName, + "url", v.Url) + } return nil } @@ -129,9 +158,10 @@ func (r *WorkbenchReconciler) ReconcileWorkbench(ctx context.Context, req ctrl.R // FYI: Password is set via env var in the CreateSecretVolumeFactory } - // fetch azure secret, if databricks is involved - if err := r.FetchAndSetClientSecretForAzureDatabricks(ctx, req, w); err != nil { - l.Error(err, "error fetching client secret for databricks azure. Not fatal") + // fetch databricks secrets (both AWS and Azure) + if err := r.FetchAndSetClientSecretForDatabricks(ctx, req, w); err != nil { + l.Error(err, "error fetching client secret for databricks") + return ctrl.Result{}, err } // now create the service itself diff --git a/internal/controller/core/workbench_test.go b/internal/controller/core/workbench_test.go index c7f66b5a..1f2c92eb 100644 --- a/internal/controller/core/workbench_test.go +++ b/internal/controller/core/workbench_test.go @@ -18,10 +18,24 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" ) -func TestAzureDatabricks(t *testing.T) { +func TestDatabricksClientSecretFetching(t *testing.T) { r := &WorkbenchReconciler{} ctx := context.TODO() req := ctrl.Request{} + + // Enable strict mode for this test to get typed errors + product.GlobalTestSecretProvider.SetStrictMode(true) + defer product.GlobalTestSecretProvider.Reset() // Clean up after test + + // Set up test secrets for Azure instances + err := product.GlobalTestSecretProvider.SetSecret("dev-client-secret-some-client-id", "azure-secret-1") + require.NoError(t, err) + err = product.GlobalTestSecretProvider.SetSecret("dev-client-secret-another-client-id", "azure-secret-2") + require.NoError(t, err) + err = product.GlobalTestSecretProvider.SetSecret("dev-client-secret-other-client-id", "aws-secret-1") + require.NoError(t, err) + // Intentionally NOT setting secret for AWS instance to test warning behavior + w := &positcov1beta1.Workbench{ Spec: positcov1beta1.WorkbenchSpec{ Secret: positcov1beta1.SecretConfig{ @@ -31,40 +45,91 @@ func TestAzureDatabricks(t *testing.T) { SecretConfig: positcov1beta1.WorkbenchSecretConfig{ WorkbenchSecretIniConfig: positcov1beta1.WorkbenchSecretIniConfig{ Databricks: map[string]*positcov1beta1.WorkbenchDatabricksConfig{ - // this one checks that azure works + // Azure Databricks instances - should get secrets "posit-azure": { Name: "Azure Databricks", Url: "https://someprefix.azuredatabricks.net", ClientId: "some-client-id", }, - // this checks that other targets do not get interfered with - "posit-aws": { - Name: "AWS Databricks", - Url: "https://some-other-url.com", - ClientId: "aws-client-id", - }, - // this one checks that a suffix does not interfere with the match "another-azure": { Name: "Azure Databricks 2", Url: "https://someprefix.azuredatabricks.net/some-suffix/another-suffix", ClientId: "another-client-id", }, + // AWS Databricks instance - should NOT error on missing secret + "posit-aws": { + Name: "AWS Databricks", + Url: "https://example-workspace.cloud.databricks.com", + ClientId: "aws-client-id", + }, + // AWS Databricks instance - should populate secret + "posit-aws-with-client-secret": { + Name: "AWS Databricks with client secret", + Url: "https://confidential-workspace.cloud.databricks.com", + ClientId: "other-client-id", + }, }, }, }, }, } - var err error - // azure - require.Equal(t, w.Spec.SecretConfig.Databricks["posit-azure"].ClientSecret, "") - require.Equal(t, w.Spec.SecretConfig.Databricks["posit-aws"].ClientSecret, "") - require.Equal(t, w.Spec.SecretConfig.Databricks["another-azure"].ClientSecret, "") - err = r.FetchAndSetClientSecretForAzureDatabricks(ctx, req, w) + // Verify initial state + require.Equal(t, "", w.Spec.SecretConfig.Databricks["posit-azure"].ClientSecret) + require.Equal(t, "", w.Spec.SecretConfig.Databricks["posit-aws"].ClientSecret) + require.Equal(t, "", w.Spec.SecretConfig.Databricks["another-azure"].ClientSecret) + require.Equal(t, "", w.Spec.SecretConfig.Databricks["posit-aws-with-client-secret"].ClientSecret) + + // Execute the function + err = r.FetchAndSetClientSecretForDatabricks(ctx, req, w) require.NoError(t, err) - require.Equal(t, w.Spec.SecretConfig.Databricks["posit-azure"].ClientSecret, "dev-client-secret-some-client-id") - require.Equal(t, w.Spec.SecretConfig.Databricks["posit-aws"].ClientSecret, "") - require.Equal(t, w.Spec.SecretConfig.Databricks["another-azure"].ClientSecret, "dev-client-secret-another-client-id") + + // Azure instances should have secrets populated + require.Equal(t, "azure-secret-1", w.Spec.SecretConfig.Databricks["posit-azure"].ClientSecret) + require.Equal(t, "azure-secret-2", w.Spec.SecretConfig.Databricks["another-azure"].ClientSecret) + + // First AWS instance should remain empty (secret not found, but no error) + require.Equal(t, "", w.Spec.SecretConfig.Databricks["posit-aws"].ClientSecret) + // Other AWS instance should get client secret + require.Equal(t, "aws-secret-1", w.Spec.SecretConfig.Databricks["posit-aws-with-client-secret"].ClientSecret) +} + +func TestAzureDatabricksSecretNotFound(t *testing.T) { + r := &WorkbenchReconciler{} + ctx := context.TODO() + req := ctrl.Request{} + + // Enable strict mode for this test to get typed errors + product.GlobalTestSecretProvider.SetStrictMode(true) + defer product.GlobalTestSecretProvider.Reset() // Clean up after test + + // Don't set any secrets - simulating not found + + w := &positcov1beta1.Workbench{ + Spec: positcov1beta1.WorkbenchSpec{ + Secret: positcov1beta1.SecretConfig{ + VaultName: "test-vault", + Type: product.SiteSecretTest, + }, + SecretConfig: positcov1beta1.WorkbenchSecretConfig{ + WorkbenchSecretIniConfig: positcov1beta1.WorkbenchSecretIniConfig{ + Databricks: map[string]*positcov1beta1.WorkbenchDatabricksConfig{ + "posit-azure": { + Name: "Azure Databricks", + Url: "https://someprefix.azuredatabricks.net", + ClientId: "missing-client-id", + }, + }, + }, + }, + }, + } + + // Should return error for Azure when secret not found + err := r.FetchAndSetClientSecretForDatabricks(ctx, req, w) + require.Error(t, err) + var notFoundErr *product.SecretNotFoundError + require.ErrorAs(t, err, ¬FoundErr) } func initWorkbenchReconciler(t *testing.T, ctx context.Context, namespace, name string) (context.Context, *WorkbenchReconciler, ctrl.Request, client.Client) {