From bcb8cd216a4a174f14cce381b7114983231c54ab Mon Sep 17 00:00:00 2001 From: Mike Gahagan Date: Wed, 17 Dec 2025 10:16:46 -0500 Subject: [PATCH 1/5] Test to confirm HCP cluster can accept and use user provided pull secrets. --- test/e2e/cluster_pullsecret.go | 333 ++++++++++++++++++ test/util/framework/hcp_helper.go | 38 +- .../framework/per_invocation_framework.go | 64 +--- test/util/framework/per_test_framework.go | 26 +- test/util/framework/pullsecret_types.go | 29 ++ test/util/verifiers/image_pull.go | 166 +++++++++ test/util/verifiers/operator.go | 154 ++++++++ test/util/verifiers/pullsecret.go | 141 ++++++++ 8 files changed, 882 insertions(+), 69 deletions(-) create mode 100644 test/e2e/cluster_pullsecret.go create mode 100644 test/util/framework/pullsecret_types.go create mode 100644 test/util/verifiers/image_pull.go create mode 100644 test/util/verifiers/operator.go create mode 100644 test/util/verifiers/pullsecret.go diff --git a/test/e2e/cluster_pullsecret.go b/test/e2e/cluster_pullsecret.go new file mode 100644 index 0000000000..00966fdc28 --- /dev/null +++ b/test/e2e/cluster_pullsecret.go @@ -0,0 +1,333 @@ +// Copyright 2025 Microsoft Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package e2e + +import ( + "context" + "encoding/base64" + "encoding/json" + "fmt" + "os" + "path/filepath" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/client-go/dynamic" + "k8s.io/client-go/kubernetes" + + "github.com/Azure/ARO-HCP/test/util/framework" + "github.com/Azure/ARO-HCP/test/util/labels" + "github.com/Azure/ARO-HCP/test/util/verifiers" +) + +var _ = Describe("Cluster Pull Secret Management", func() { + BeforeEach(func() { + // per test initialization + }) + + It("should be able to create an HCP cluster and manage pull secrets", + labels.RequireNothing, + labels.Critical, + labels.Positive, + labels.AroRpApiCompatible, + func(ctx context.Context) { + const ( + customerClusterName = "pullsecret-hcp-cluster" + testPullSecretHost = "host.example.com" + testPullSecretPassword = "my_password" + testPullSecretEmail = "noreply@example.com" + pullSecretName = "additional-pull-secret" + pullSecretNamespace = "kube-system" + ) + tc := framework.NewTestContext() + + By("creating a resource group") + resourceGroup, err := tc.NewResourceGroup(ctx, "pullsecret-test", tc.Location()) + Expect(err).NotTo(HaveOccurred()) + + By("creating cluster parameters") + clusterParams := framework.NewDefaultClusterParams() + clusterParams.ClusterName = customerClusterName + managedResourceGroupName := framework.SuffixName(*resourceGroup.Name, "-managed", 64) + clusterParams.ManagedResourceGroupName = managedResourceGroupName + + By("creating customer resources") + clusterParams, err = tc.CreateClusterCustomerResources(ctx, + resourceGroup, + clusterParams, + map[string]interface{}{ + "persistTagValue": false, + }, + TestArtifactsFS, + ) + Expect(err).NotTo(HaveOccurred()) + + By("Creating the cluster") + err = tc.CreateHCPClusterFromParam(ctx, + *resourceGroup.Name, + clusterParams, + 45*time.Minute, + ) + Expect(err).NotTo(HaveOccurred()) + By("Creating the node pool") + nodePoolParams := framework.NewDefaultNodePoolParams() + nodePoolParams.NodePoolName = "np-1" + nodePoolParams.ClusterName = customerClusterName + nodePoolParams.Replicas = int32(2) + err = tc.CreateNodePoolFromParam(ctx, + *resourceGroup.Name, + customerClusterName, + nodePoolParams, + 15*time.Minute, + ) + Expect(err).NotTo(HaveOccurred()) + + By("getting credentials") + adminRESTConfig, err := tc.GetAdminRESTConfigForHCPCluster( + ctx, + tc.Get20240610ClientFactoryOrDie(ctx).NewHcpOpenShiftClustersClient(), + *resourceGroup.Name, + customerClusterName, + 10*time.Minute, + ) + Expect(err).NotTo(HaveOccurred()) + + By("ensuring the cluster is viable") + err = verifiers.VerifyHCPCluster(ctx, adminRESTConfig) + Expect(err).NotTo(HaveOccurred()) + + By("creating kubernetes client") + kubeClient, err := kubernetes.NewForConfig(adminRESTConfig) + Expect(err).NotTo(HaveOccurred()) + + By("creating test pull secret") + username := "test-user" + auth := base64.StdEncoding.EncodeToString([]byte(username + ":" + testPullSecretPassword)) + + testPullSecret, err := framework.CreateTestDockerConfigSecret( + testPullSecretHost, + username, + testPullSecretPassword, + testPullSecretEmail, + pullSecretName, + pullSecretNamespace, + ) + Expect(err).NotTo(HaveOccurred()) + + By("creating the test pull secret in the cluster") + _, err = kubeClient.CoreV1().Secrets(pullSecretNamespace).Create(ctx, testPullSecret, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + By("waiting for HCCO to merge the additional pull secret with the global pull secret") + Eventually(func() error { + return verifiers.VerifyPullSecretMergedIntoGlobal(testPullSecretHost).Verify(ctx, adminRESTConfig) + }, 300*time.Second, 15*time.Second).Should(Succeed(), "additional pull secret should be merged into global-pull-secret by HCCO") + + By("verifying the DaemonSet for global pull secret synchronization is created") + Eventually(func() error { + return verifiers.VerifyGlobalPullSecretSyncer().Verify(ctx, adminRESTConfig) + }, 60*time.Second, 10*time.Second).Should(Succeed(), "global-pull-secret-syncer DaemonSet should be created") + + By("verifying the pull secret was merged into the global pull secret") + err = verifiers.VerifyPullSecretAuthData( + "global-pull-secret", + pullSecretNamespace, + testPullSecretHost, + auth, + testPullSecretEmail, + ).Verify(ctx, adminRESTConfig) + Expect(err).NotTo(HaveOccurred()) + + By("reading pull-secret file from aro-hcp-qe-pull-secret directory") + pullSecretFilePath := filepath.Join(tc.PullSecretPath(), "pull-secret") + pullSecretFileData, err := os.ReadFile(pullSecretFilePath) + Expect(err).NotTo(HaveOccurred(), "failed to read pull-secret file from %s", pullSecretFilePath) + + By("parsing pull-secret file") + var pullSecretConfig framework.DockerConfigJSON + err = json.Unmarshal(pullSecretFileData, &pullSecretConfig) + Expect(err).NotTo(HaveOccurred(), "failed to parse pull-secret file") + + By("extracting registry.redhat.io credentials") + const redhatRegistryHost = "registry.redhat.io" + redhatRegistryAuth, ok := pullSecretConfig.Auths[redhatRegistryHost] + Expect(ok).To(BeTrue(), "registry.redhat.io credentials not found in pull-secret file") + + redhatRegistryAuthString := redhatRegistryAuth.Auth + redhatRegistryEmail := redhatRegistryAuth.Email + + By("updating additional-pull-secret to add registry.redhat.io credentials") + // Get the current additional-pull-secret + currentSecret, err := kubeClient.CoreV1().Secrets(pullSecretNamespace).Get(ctx, pullSecretName, metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred(), "failed to get existing additional-pull-secret") + + // Parse the current dockerconfigjson + var currentConfig framework.DockerConfigJSON + err = json.Unmarshal(currentSecret.Data[corev1.DockerConfigJsonKey], ¤tConfig) + Expect(err).NotTo(HaveOccurred(), "failed to parse current pull secret") + + // Add registry.redhat.io credentials to the existing auths + currentConfig.Auths[redhatRegistryHost] = framework.RegistryAuth{ + Auth: redhatRegistryAuthString, + Email: redhatRegistryEmail, + } + + // Marshal back to JSON + updatedDockerConfigJSON, err := json.Marshal(currentConfig) + Expect(err).NotTo(HaveOccurred()) + + // Update the secret + currentSecret.Data[corev1.DockerConfigJsonKey] = updatedDockerConfigJSON + _, err = kubeClient.CoreV1().Secrets(pullSecretNamespace).Update(ctx, currentSecret, metav1.UpdateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + By("waiting for HCCO to merge the updated pull secret (with registry.redhat.io) into global pull secret") + Eventually(func() error { + return verifiers.VerifyPullSecretMergedIntoGlobal(redhatRegistryHost).Verify(ctx, adminRESTConfig) + }, 300*time.Second, 15*time.Second).Should(Succeed(), "registry.redhat.io pull secret should be merged into global-pull-secret by HCCO") + + By("verifying both test registries are now in the global pull secret") + err = verifiers.VerifyPullSecretMergedIntoGlobal(testPullSecretHost).Verify(ctx, adminRESTConfig) + Expect(err).NotTo(HaveOccurred(), "host.example.com should still be in global-pull-secret") + + err = verifiers.VerifyPullSecretAuthData( + "global-pull-secret", + pullSecretNamespace, + redhatRegistryHost, + redhatRegistryAuthString, + redhatRegistryEmail, + ).Verify(ctx, adminRESTConfig) + Expect(err).NotTo(HaveOccurred()) + + By("creating dynamic client for operator installation") + dynamicClient, err := dynamic.NewForConfig(adminRESTConfig) + Expect(err).NotTo(HaveOccurred()) + + By("creating namespace for NFD operator") + const nfdNamespace = "openshift-nfd" + _, err = kubeClient.CoreV1().Namespaces().Create(ctx, &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: nfdNamespace, + }, + }, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + By("creating OperatorGroup for NFD operator") + operatorGroupGVR := schema.GroupVersionResource{ + Group: "operators.coreos.com", + Version: "v1", + Resource: "operatorgroups", + } + operatorGroup := &unstructured.Unstructured{ + Object: map[string]interface{}{ + "apiVersion": "operators.coreos.com/v1", + "kind": "OperatorGroup", + "metadata": map[string]interface{}{ + "name": "nfd-operator-group", + "namespace": nfdNamespace, + }, + "spec": map[string]interface{}{ + "targetNamespaces": []interface{}{nfdNamespace}, + }, + }, + } + _, err = dynamicClient.Resource(operatorGroupGVR).Namespace(nfdNamespace).Create(ctx, operatorGroup, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + By("creating Subscription for NFD operator from redhat-operators catalog") + subscriptionGVR := schema.GroupVersionResource{ + Group: "operators.coreos.com", + Version: "v1alpha1", + Resource: "subscriptions", + } + subscription := &unstructured.Unstructured{ + Object: map[string]interface{}{ + "apiVersion": "operators.coreos.com/v1alpha1", + "kind": "Subscription", + "metadata": map[string]interface{}{ + "name": "nfd", + "namespace": nfdNamespace, + }, + "spec": map[string]interface{}{ + "channel": "stable", + "name": "nfd", + "source": "redhat-operators", + "sourceNamespace": "openshift-marketplace", + "installPlanApproval": "Automatic", + }, + }, + } + _, err = dynamicClient.Resource(subscriptionGVR).Namespace(nfdNamespace).Create(ctx, subscription, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + By("waiting for NFD operator to be installed") + Eventually(func() error { + return verifiers.VerifyOperatorInstalled(nfdNamespace, "nfd").Verify(ctx, adminRESTConfig) + }, 300*time.Second, 15*time.Second).Should(Succeed(), "NFD operator should be installed successfully") + + By("creating NodeFeatureDiscovery CR to deploy NFD worker") + nfdGVR := schema.GroupVersionResource{ + Group: "nfd.openshift.io", + Version: "v1", + Resource: "nodefeaturediscoveries", + } + nfdCR := &unstructured.Unstructured{ + Object: map[string]interface{}{ + "apiVersion": "nfd.openshift.io/v1", + "kind": "NodeFeatureDiscovery", + "metadata": map[string]interface{}{ + "name": "nfd-instance", + "namespace": nfdNamespace, + }, + "spec": map[string]interface{}{ + "operand": map[string]interface{}{ + "image": "registry.redhat.io/openshift4/ose-node-feature-discovery:latest", + }, + }, + }, + } + _, err = dynamicClient.Resource(nfdGVR).Namespace(nfdNamespace).Create(ctx, nfdCR, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + By("waiting for NFD worker DaemonSet to be created") + Eventually(func() error { + daemonSets, err := kubeClient.AppsV1().DaemonSets(nfdNamespace).List(ctx, metav1.ListOptions{}) + if err != nil { + return err + } + for _, ds := range daemonSets.Items { + if ds.Name == "nfd-worker" { + if ds.Status.DesiredNumberScheduled > 0 && ds.Status.NumberReady > 0 { + return nil + } + return fmt.Errorf("nfd-worker DaemonSet found but not ready: desired=%d, ready=%d", + ds.Status.DesiredNumberScheduled, ds.Status.NumberReady) + } + } + return fmt.Errorf("nfd-worker DaemonSet not found") + }, 300*time.Second, 15*time.Second).Should(Succeed(), "NFD worker DaemonSet should be created and have ready pods") + + By("waiting for NFD worker pods to be created and verify images from registry.redhat.io can be pulled") + Eventually(func() error { + return verifiers.VerifyImagePulled(nfdNamespace, "registry.redhat.io", "ose-node-feature-discovery").Verify(ctx, adminRESTConfig) + }, 300*time.Second, 15*time.Second).Should(Succeed(), "NFD worker images from registry.redhat.io should be pulled successfully with the added pull secret") + }) +}) diff --git a/test/util/framework/hcp_helper.go b/test/util/framework/hcp_helper.go index 93d0d05a83..7367df2877 100644 --- a/test/util/framework/hcp_helper.go +++ b/test/util/framework/hcp_helper.go @@ -16,16 +16,18 @@ package framework import ( "context" + "encoding/base64" + "encoding/json" "errors" "fmt" "time" "github.com/davecgh/go-spew/spew" - "github.com/go-logr/logr" "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" "golang.org/x/sync/errgroup" + corev1 "k8s.io/api/core/v1" v1 "k8s.io/api/rbac/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" utilruntime "k8s.io/apimachinery/pkg/util/runtime" @@ -468,9 +470,38 @@ func CreateClusterRoleBinding(ctx context.Context, subject string, adminRESTConf return nil } +// CreateTestDockerConfigSecret creates a Docker config secret for testing pull secret functionality +func CreateTestDockerConfigSecret(host, username, password, email, secretName, namespace string) (*corev1.Secret, error) { + auth := base64.StdEncoding.EncodeToString([]byte(username + ":" + password)) + + dockerConfig := DockerConfigJSON{ + Auths: map[string]RegistryAuth{ + host: { + Email: email, + Auth: auth, + }, + }, + } + + dockerConfigJSON, err := json.Marshal(dockerConfig) + if err != nil { + return nil, fmt.Errorf("failed to marshal docker config: %w", err) + } + + return &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: secretName, + Namespace: namespace, + }, + Type: corev1.SecretTypeDockerConfigJson, + Data: map[string][]byte{ + corev1.DockerConfigJsonKey: dockerConfigJSON, + }, + }, nil +} + func BeginCreateHCPCluster( ctx context.Context, - logger logr.Logger, hcpClient *hcpsdk20240610preview.HcpOpenShiftClustersClient, resourceGroupName string, hcpClusterName string, @@ -478,7 +509,6 @@ func BeginCreateHCPCluster( location string, ) (*runtime.Poller[hcpsdk20240610preview.HcpOpenShiftClustersClientCreateOrUpdateResponse], error) { cluster := BuildHCPClusterFromParams(clusterParams, location) - logger.Info("Starting HCP cluster creation", "clusterName", hcpClusterName, "resourceGroup", resourceGroupName) poller, err := hcpClient.BeginCreateOrUpdate(ctx, resourceGroupName, hcpClusterName, cluster, nil) if err != nil { return nil, fmt.Errorf("failed starting cluster creation %q in resourcegroup=%q: %w", hcpClusterName, resourceGroupName, err) @@ -490,7 +520,6 @@ func BeginCreateHCPCluster( // the function won't wait for the deployment to be ready. func CreateHCPClusterAndWait( ctx context.Context, - logger logr.Logger, hcpClient *hcpsdk20240610preview.HcpOpenShiftClustersClient, resourceGroupName string, hcpClusterName string, @@ -503,7 +532,6 @@ func CreateHCPClusterAndWait( defer cancel() } - logger.Info("Starting HCP cluster creation", "clusterName", hcpClusterName, "resourceGroup", resourceGroupName) poller, err := hcpClient.BeginCreateOrUpdate(ctx, resourceGroupName, hcpClusterName, cluster, nil) if err != nil { return nil, fmt.Errorf("failed starting cluster creation %q in resourcegroup=%q: %w", hcpClusterName, resourceGroupName, err) diff --git a/test/util/framework/per_invocation_framework.go b/test/util/framework/per_invocation_framework.go index 11b5e4641d..3c3f488c46 100644 --- a/test/util/framework/per_invocation_framework.go +++ b/test/util/framework/per_invocation_framework.go @@ -21,7 +21,6 @@ import ( "net/http" "os" "path" - "path/filepath" "strconv" "strings" "sync" @@ -44,14 +43,13 @@ type perBinaryInvocationTestContext struct { tenantID string testUserClientID string location string + pullSecretPath string isDevelopmentEnvironment bool skipCleanup bool - pooledIdentities bool - contextLock sync.RWMutex - subscriptionID string - azureCredentials azcore.TokenCredential - identityPoolState *leasedIdentityPoolState + contextLock sync.RWMutex + subscriptionID string + azureCredentials azcore.TokenCredential } type CleanupFunc func(ctx context.Context) error @@ -75,14 +73,14 @@ func invocationContext() *perBinaryInvocationTestContext { initializeOnce.Do(func() { invocationContextInstance = &perBinaryInvocationTestContext{ artifactDir: artifactDir(), - sharedDir: SharedDir(), + sharedDir: sharedDir(), subscriptionName: subscriptionName(), tenantID: tenantID(), testUserClientID: testUserClientID(), location: location(), + pullSecretPath: pullSecretPath(), isDevelopmentEnvironment: IsDevelopmentEnvironment(), skipCleanup: skipCleanup(), - pooledIdentities: pooledIdentities(), } }) return invocationContextInstance @@ -191,37 +189,6 @@ func (tc *perBinaryInvocationTestContext) Location() string { return tc.location } -func (tc *perBinaryInvocationTestContext) UsePooledIdentities() bool { - return tc.pooledIdentities -} - -func (tc *perBinaryInvocationTestContext) getLeasedIdentityPoolState() (*leasedIdentityPoolState, error) { - tc.contextLock.RLock() - if tc.identityPoolState != nil { - defer tc.contextLock.RUnlock() - return tc.identityPoolState, nil - } - tc.contextLock.RUnlock() - - tc.contextLock.Lock() - defer tc.contextLock.Unlock() - - if tc.identityPoolState != nil { - return tc.identityPoolState, nil - } - - state, err := newLeasedIdentityPoolState(msiPoolStateFilePath()) - if err != nil { - return nil, fmt.Errorf("failed to get managed identities pool state: %w", err) - } - tc.identityPoolState = state - return tc.identityPoolState, nil -} - -func msiPoolStateFilePath() string { - return filepath.Join(artifactDir(), "identities-pool-state.yaml") -} - func skipCleanup() bool { ret, _ := strconv.ParseBool(os.Getenv("ARO_E2E_SKIP_CLEANUP")) return ret @@ -233,15 +200,10 @@ func artifactDir() string { return os.Getenv("ARTIFACT_DIR") } -func pooledIdentities() bool { - b, _ := strconv.ParseBool(strings.TrimSpace(os.Getenv(UsePooledIdentitiesEnvvar))) - return b -} - -// SharedDir is SHARED_DIR. It is a spot to store *files only* that can be shared between ci-operator steps. +// sharedDir is SHARED_DIR. It is a spot to store *files only* that can be shared between ci-operator steps. // We can use this for anything, but currently we have a backup cleanup and collection scripts that use files // here to cleanup and debug testing resources. -func SharedDir() string { +func sharedDir() string { // can't use gomega in this method since it is used outside of It() return os.Getenv("SHARED_DIR") } @@ -270,6 +232,16 @@ func tenantID() string { return os.Getenv("AZURE_TENANT_ID") } +// pullSecretPath returns the value of ARO_HCP_QE_PULL_SECRET_PATH environment variable +// If not set, defaults to /var/run/aro-hcp-qe-pull-secret +func pullSecretPath() string { + path := os.Getenv("ARO_HCP_QE_PULL_SECRET_PATH") + if path == "" { + return "/var/run/aro-hcp-qe-pull-secret" + } + return path +} + // IsDevelopmentEnvironment indicates when this environment is development. This controls client endpoints and disables security // when set to development. func IsDevelopmentEnvironment() bool { diff --git a/test/util/framework/per_test_framework.go b/test/util/framework/per_test_framework.go index f54d06da0b..d027574baf 100644 --- a/test/util/framework/per_test_framework.go +++ b/test/util/framework/per_test_framework.go @@ -164,16 +164,12 @@ func (tc *perItOrDescribeTestContext) deleteCreatedResources(ctx context.Context } errCleanupResourceGroups := tc.CleanupResourceGroups(ctx, hcpClientFactory.NewHcpOpenShiftClustersClient(), resourceGroupsClientFactory.NewResourceGroupsClient(), opts) if errCleanupResourceGroups != nil { - ginkgo.GinkgoLogr.Error(errCleanupResourceGroups, "at least one resource group failed to delete") + ginkgo.GinkgoLogr.Error(errCleanupResourceGroups, "at least one resource group failed to delete: %w", errCleanupResourceGroups) } err = CleanupAppRegistrations(ctx, graphClient, appRegistrations) if err != nil { - ginkgo.GinkgoLogr.Error(err, "at least one app registration failed to delete") - } - - if err := tc.releaseLeasedIdentities(ctx); err != nil { - ginkgo.GinkgoLogr.Error(err, "failed to release leased identities") + ginkgo.GinkgoLogr.Error(err, "at least one app registration failed to delete: %w", err) } ginkgo.GinkgoLogr.Info("finished deleting created resources") @@ -259,19 +255,9 @@ func (tc *perItOrDescribeTestContext) collectDebugInfo(ctx context.Context) { defer tc.contextLock.RUnlock() ginkgo.GinkgoLogr.Info("collecting debug info") - leasedContainers, err := tc.leasedIdentityContainers() - if err != nil { - ginkgo.GinkgoLogr.Error(err, "failed to get leased identity containers") - return - } - // deletion takes a while, it's worth it to do this in parallel waitGroup, ctx := errgroup.WithContext(ctx) - resourceGroups := append( - append([]string(nil), tc.knownResourceGroups...), - leasedContainers..., - ) - for _, resourceGroupName := range resourceGroups { + for _, resourceGroupName := range tc.knownResourceGroups { currResourceGroupName := resourceGroupName waitGroup.Go(func() error { // prevent a stray panic from exiting the process. Don't do this generally because ginkgo/gomega rely on panics to function. @@ -282,7 +268,7 @@ func (tc *perItOrDescribeTestContext) collectDebugInfo(ctx context.Context) { } if err := waitGroup.Wait(); err != nil { // remember that Wait only shows the first error, not all the errors. - ginkgo.GinkgoLogr.Error(err, "at least one resource group failed to collect") + ginkgo.GinkgoLogr.Error(err, "at least one resource group failed to collect: %w", err) } ginkgo.GinkgoLogr.Info("finished collecting debug info") @@ -752,6 +738,10 @@ func (tc *perItOrDescribeTestContext) TenantID() string { return tc.perBinaryInvocationTestContext.tenantID } +func (tc *perItOrDescribeTestContext) PullSecretPath() string { + return tc.perBinaryInvocationTestContext.pullSecretPath +} + func (tc *perItOrDescribeTestContext) recordDeploymentOperationsUnlocked(resourceGroup, deployment string, operations []timing.Operation) { if _, exists := tc.timingMetadata.Deployments[resourceGroup]; !exists { tc.timingMetadata.Deployments[resourceGroup] = make(map[string][]timing.Operation) diff --git a/test/util/framework/pullsecret_types.go b/test/util/framework/pullsecret_types.go new file mode 100644 index 0000000000..be71e8a7e1 --- /dev/null +++ b/test/util/framework/pullsecret_types.go @@ -0,0 +1,29 @@ +// Copyright 2025 Microsoft Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package framework + +// RegistryAuth represents authentication credentials for a single registry. +// This type models the structure of dockerconfigjson registry auth entries. +type RegistryAuth struct { + Username string `json:"username,omitempty"` + Email string `json:"email,omitempty"` + Auth string `json:"auth"` +} + +// DockerConfigJSON is the root structure for dockerconfigjson secret data. +// See: https://kubernetes.io/docs/concepts/configuration/secret/#docker-config-secrets +type DockerConfigJSON struct { + Auths map[string]RegistryAuth `json:"auths"` +} diff --git a/test/util/verifiers/image_pull.go b/test/util/verifiers/image_pull.go new file mode 100644 index 0000000000..c8f1c56f1c --- /dev/null +++ b/test/util/verifiers/image_pull.go @@ -0,0 +1,166 @@ +// Copyright 2025 Microsoft Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package verifiers + +import ( + "context" + "fmt" + "strings" + "time" + + "github.com/onsi/ginkgo/v2" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" +) + +type verifyImagePulled struct { + namespace string + imageRepository string + imageName string // optional - if empty, any image from repository is checked +} + +func (v verifyImagePulled) Name() string { + return "VerifyImagePulled" +} + +func (v verifyImagePulled) Verify(ctx context.Context, adminRESTConfig *rest.Config) error { + logger := ginkgo.GinkgoLogr + startTime := time.Now() + logger.Info("Starting image pull verification", + "namespace", v.namespace, + "imageRepository", v.imageRepository, + "imageName", v.imageName, + "startTime", startTime.Format(time.RFC3339)) + + kubeClient, err := kubernetes.NewForConfig(adminRESTConfig) + if err != nil { + return fmt.Errorf("failed to create kubernetes client: %w", err) + } + + // Get all pods in the namespace + pods, err := kubeClient.CoreV1().Pods(v.namespace).List(ctx, metav1.ListOptions{}) + if err != nil { + return fmt.Errorf("failed to list pods in namespace %s: %w", v.namespace, err) + } + + if len(pods.Items) == 0 { + return fmt.Errorf("no pods found in namespace %s", v.namespace) + } + + // Check if at least one pod successfully pulled the specified image + imagePulledSuccessfully := false + var imagePullErrors []string + + for _, pod := range pods.Items { + // Check container statuses for image pull success + for _, containerStatus := range pod.Status.ContainerStatuses { + // Check if the image matches our criteria + imageMatches := strings.Contains(containerStatus.Image, v.imageRepository) + if v.imageName != "" { + imageMatches = imageMatches && strings.Contains(containerStatus.Image, v.imageName) + } + + if imageMatches { + // If ImageID is set, the image was pulled successfully + if containerStatus.ImageID != "" { + imagePulledSuccessfully = true + logger.Info("Successfully pulled image", + "pod", pod.Name, + "container", containerStatus.Name, + "image", containerStatus.Image, + "imageID", containerStatus.ImageID) + } + } + } + + // Also check for ImagePullBackOff errors + for _, condition := range pod.Status.Conditions { + if condition.Type == "PodScheduled" && condition.Status == "False" { + if strings.Contains(condition.Message, "ImagePullBackOff") || strings.Contains(condition.Message, "ErrImagePull") { + imagePullErrors = append(imagePullErrors, fmt.Sprintf("pod %s: %s", pod.Name, condition.Message)) + } + } + } + + // Check container statuses for waiting state + for _, containerStatus := range pod.Status.ContainerStatuses { + if containerStatus.State.Waiting != nil { + reason := containerStatus.State.Waiting.Reason + message := containerStatus.State.Waiting.Message + + // Log all waiting states + logger.Info("Container waiting", + "pod", pod.Name, + "container", containerStatus.Name, + "reason", reason, + "message", message, + "image", containerStatus.Image) + + // Track image pull errors specifically + if reason == "ImagePullBackOff" || reason == "ErrImagePull" { + imagePullErrors = append(imagePullErrors, fmt.Sprintf("pod %s container %s: %s - %s", + pod.Name, containerStatus.Name, reason, message)) + } + } + } + } + + endTime := time.Now() + duration := endTime.Sub(startTime) + + if len(imagePullErrors) > 0 { + logger.Error(fmt.Errorf("image pull errors detected"), "verification failed", + "namespace", v.namespace, + "imageRepository", v.imageRepository, + "imageName", v.imageName, + "duration", duration, + "endTime", endTime.Format(time.RFC3339)) + return fmt.Errorf("image pull errors detected:\n%s", strings.Join(imagePullErrors, "\n")) + } + + if !imagePulledSuccessfully { + logger.Error(fmt.Errorf("no matching images pulled"), "verification failed", + "namespace", v.namespace, + "imageRepository", v.imageRepository, + "imageName", v.imageName, + "duration", duration, + "endTime", endTime.Format(time.RFC3339)) + return fmt.Errorf("no pods found with successfully pulled images from %s", v.imageRepository) + } + + logger.Info("Image pull verification completed successfully", + "namespace", v.namespace, + "imageRepository", v.imageRepository, + "imageName", v.imageName, + "duration", duration, + "endTime", endTime.Format(time.RFC3339)) + + return nil +} + +// VerifyImagePulled creates a verifier that checks if an image has been successfully pulled +// Parameters: +// - namespace: the namespace to check for pods +// - imageRepository: the repository to match (e.g., "registry.redhat.io") +// - imageName: optional specific image name to match (empty string matches any image from repository) +func VerifyImagePulled(namespace, imageRepository, imageName string) HostedClusterVerifier { + return verifyImagePulled{ + namespace: namespace, + imageRepository: imageRepository, + imageName: imageName, + } +} diff --git a/test/util/verifiers/operator.go b/test/util/verifiers/operator.go new file mode 100644 index 0000000000..f42da648bd --- /dev/null +++ b/test/util/verifiers/operator.go @@ -0,0 +1,154 @@ +// Copyright 2025 Microsoft Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package verifiers + +import ( + "context" + "fmt" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/client-go/dynamic" + "k8s.io/client-go/rest" +) + +type verifyOperatorInstalled struct { + namespace string + subscriptionName string +} + +func (v verifyOperatorInstalled) Name() string { + return "VerifyOperatorInstalled" +} + +func (v verifyOperatorInstalled) Verify(ctx context.Context, adminRESTConfig *rest.Config) error { + dynamicClient, err := dynamic.NewForConfig(adminRESTConfig) + if err != nil { + return fmt.Errorf("failed to create dynamic client: %w", err) + } + + // Check if Subscription exists + subscriptionGVR := schema.GroupVersionResource{ + Group: "operators.coreos.com", + Version: "v1alpha1", + Resource: "subscriptions", + } + + subscription, err := dynamicClient.Resource(subscriptionGVR).Namespace(v.namespace).Get(ctx, v.subscriptionName, metav1.GetOptions{}) + if err != nil { + return fmt.Errorf("failed to get subscription %s/%s: %w", v.namespace, v.subscriptionName, err) + } + + // Check subscription state + state, found, err := unstructured.NestedString(subscription.Object, "status", "state") + if err != nil { + return fmt.Errorf("failed to get subscription state: %w", err) + } + if !found { + return fmt.Errorf("subscription state not found") + } + if state != "AtLatestKnown" { + return fmt.Errorf("subscription state is %q, expected AtLatestKnown", state) + } + + // Get InstallPlan reference + installPlanRef, found, err := unstructured.NestedString(subscription.Object, "status", "installplan", "name") + if err != nil { + return fmt.Errorf("failed to get installplan reference: %w", err) + } + if !found { + return fmt.Errorf("installplan reference not found in subscription") + } + + // Check InstallPlan status + installPlanGVR := schema.GroupVersionResource{ + Group: "operators.coreos.com", + Version: "v1alpha1", + Resource: "installplans", + } + + installPlan, err := dynamicClient.Resource(installPlanGVR).Namespace(v.namespace).Get(ctx, installPlanRef, metav1.GetOptions{}) + if err != nil { + return fmt.Errorf("failed to get installplan %s/%s: %w", v.namespace, installPlanRef, err) + } + + phase, found, err := unstructured.NestedString(installPlan.Object, "status", "phase") + if err != nil { + return fmt.Errorf("failed to get installplan phase: %w", err) + } + if !found { + return fmt.Errorf("installplan phase not found") + } + if phase != "Complete" { + return fmt.Errorf("installplan phase is %q, expected Complete", phase) + } + + return nil +} + +func VerifyOperatorInstalled(namespace, subscriptionName string) HostedClusterVerifier { + return verifyOperatorInstalled{ + namespace: namespace, + subscriptionName: subscriptionName, + } +} + +type verifyOperatorCSV struct { + namespace string + csvName string +} + +func (v verifyOperatorCSV) Name() string { + return "VerifyOperatorCSV" +} + +func (v verifyOperatorCSV) Verify(ctx context.Context, adminRESTConfig *rest.Config) error { + dynamicClient, err := dynamic.NewForConfig(adminRESTConfig) + if err != nil { + return fmt.Errorf("failed to create dynamic client: %w", err) + } + + csvGVR := schema.GroupVersionResource{ + Group: "operators.coreos.com", + Version: "v1alpha1", + Resource: "clusterserviceversions", + } + + csv, err := dynamicClient.Resource(csvGVR).Namespace(v.namespace).Get(ctx, v.csvName, metav1.GetOptions{}) + if err != nil { + return fmt.Errorf("failed to get CSV %s/%s: %w", v.namespace, v.csvName, err) + } + + phase, found, err := unstructured.NestedString(csv.Object, "status", "phase") + if err != nil { + return fmt.Errorf("failed to get CSV phase: %w", err) + } + if !found { + return fmt.Errorf("CSV phase not found") + } + if phase != "Succeeded" { + return fmt.Errorf("CSV phase is %q, expected Succeeded", phase) + } + + return nil +} + +func VerifyOperatorCSV(namespace, csvName string) HostedClusterVerifier { + return verifyOperatorCSV{ + namespace: namespace, + csvName: csvName, + } +} diff --git a/test/util/verifiers/pullsecret.go b/test/util/verifiers/pullsecret.go new file mode 100644 index 0000000000..922796b640 --- /dev/null +++ b/test/util/verifiers/pullsecret.go @@ -0,0 +1,141 @@ +// Copyright 2025 Microsoft Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package verifiers + +import ( + "context" + "encoding/json" + "fmt" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" + + "github.com/Azure/ARO-HCP/test/util/framework" +) + +type verifyPullSecretMergedIntoGlobal struct { + expectedHost string +} + +func (v verifyPullSecretMergedIntoGlobal) Name() string { + return "VerifyPullSecretMergedIntoGlobal" +} + +func (v verifyPullSecretMergedIntoGlobal) Verify(ctx context.Context, adminRESTConfig *rest.Config) error { + kubeClient, err := kubernetes.NewForConfig(adminRESTConfig) + if err != nil { + return fmt.Errorf("failed to create kubernetes client: %w", err) + } + + globalSecret, err := kubeClient.CoreV1().Secrets("kube-system").Get(ctx, "global-pull-secret", metav1.GetOptions{}) + if err != nil { + return fmt.Errorf("failed to get global-pull-secret: %w", err) + } + + var globalConfig framework.DockerConfigJSON + if err := json.Unmarshal(globalSecret.Data[corev1.DockerConfigJsonKey], &globalConfig); err != nil { + return fmt.Errorf("failed to unmarshal global pull secret: %w", err) + } + + if _, exists := globalConfig.Auths[v.expectedHost]; !exists { + return fmt.Errorf("expected host %q not found in global pull secret", v.expectedHost) + } + + return nil +} + +func VerifyPullSecretMergedIntoGlobal(expectedHost string) HostedClusterVerifier { + return verifyPullSecretMergedIntoGlobal{expectedHost: expectedHost} +} + +type verifyGlobalPullSecretSyncer struct{} + +func (v verifyGlobalPullSecretSyncer) Name() string { + return "VerifyGlobalPullSecretSyncer" +} + +func (v verifyGlobalPullSecretSyncer) Verify(ctx context.Context, adminRESTConfig *rest.Config) error { + kubeClient, err := kubernetes.NewForConfig(adminRESTConfig) + if err != nil { + return fmt.Errorf("failed to create kubernetes client: %w", err) + } + + _, err = kubeClient.AppsV1().DaemonSets("kube-system").Get(ctx, "global-pull-secret-syncer", metav1.GetOptions{}) + if err != nil { + return fmt.Errorf("failed to get global-pull-secret-syncer DaemonSet: %w", err) + } + + return nil +} + +func VerifyGlobalPullSecretSyncer() HostedClusterVerifier { + return verifyGlobalPullSecretSyncer{} +} + +type verifyPullSecretAuthData struct { + secretName string + namespace string + expectedHost string + expectedAuth string + expectedEmail string +} + +func (v verifyPullSecretAuthData) Name() string { + return "VerifyPullSecretAuthData" +} + +func (v verifyPullSecretAuthData) Verify(ctx context.Context, adminRESTConfig *rest.Config) error { + kubeClient, err := kubernetes.NewForConfig(adminRESTConfig) + if err != nil { + return fmt.Errorf("failed to create kubernetes client: %w", err) + } + + secret, err := kubeClient.CoreV1().Secrets(v.namespace).Get(ctx, v.secretName, metav1.GetOptions{}) + if err != nil { + return fmt.Errorf("failed to get secret %s/%s: %w", v.namespace, v.secretName, err) + } + + var config framework.DockerConfigJSON + if err := json.Unmarshal(secret.Data[corev1.DockerConfigJsonKey], &config); err != nil { + return fmt.Errorf("failed to unmarshal pull secret: %w", err) + } + + hostAuth, exists := config.Auths[v.expectedHost] + if !exists { + return fmt.Errorf("expected host %q not found in pull secret", v.expectedHost) + } + + if hostAuth.Email != v.expectedEmail { + return fmt.Errorf("expected email %q, got %q", v.expectedEmail, hostAuth.Email) + } + + if hostAuth.Auth != v.expectedAuth { + return fmt.Errorf("expected auth %q, got %q", v.expectedAuth, hostAuth.Auth) + } + + return nil +} + +func VerifyPullSecretAuthData(secretName, namespace, expectedHost, expectedAuth, expectedEmail string) HostedClusterVerifier { + return verifyPullSecretAuthData{ + secretName: secretName, + namespace: namespace, + expectedHost: expectedHost, + expectedAuth: expectedAuth, + expectedEmail: expectedEmail, + } +} From 429efdd2cf655a27e8a6f8a03074e1ca99c89bd5 Mon Sep 17 00:00:00 2001 From: Mike Gahagan Date: Wed, 17 Dec 2025 10:16:46 -0500 Subject: [PATCH 2/5] Test to confirm HCP cluster can accept and use user provided pull secrets. --- test/e2e/cluster_pullsecret.go | 334 ++++++++++++++++++ test/util/framework/hcp_helper.go | 33 ++ .../framework/per_invocation_framework.go | 12 + test/util/framework/per_test_framework.go | 4 + test/util/framework/pullsecret_types.go | 29 ++ test/util/verifiers/image_pull.go | 166 +++++++++ test/util/verifiers/operator.go | 154 ++++++++ test/util/verifiers/pullsecret.go | 141 ++++++++ 8 files changed, 873 insertions(+) create mode 100644 test/e2e/cluster_pullsecret.go create mode 100644 test/util/framework/pullsecret_types.go create mode 100644 test/util/verifiers/image_pull.go create mode 100644 test/util/verifiers/operator.go create mode 100644 test/util/verifiers/pullsecret.go diff --git a/test/e2e/cluster_pullsecret.go b/test/e2e/cluster_pullsecret.go new file mode 100644 index 0000000000..d4a9770fdb --- /dev/null +++ b/test/e2e/cluster_pullsecret.go @@ -0,0 +1,334 @@ +// Copyright 2025 Microsoft Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package e2e + +import ( + "context" + "encoding/base64" + "encoding/json" + "fmt" + "os" + "path/filepath" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/client-go/dynamic" + "k8s.io/client-go/kubernetes" + + "github.com/Azure/ARO-HCP/test/util/framework" + "github.com/Azure/ARO-HCP/test/util/labels" + "github.com/Azure/ARO-HCP/test/util/verifiers" +) + +var _ = Describe("Cluster Pull Secret Management", func() { + BeforeEach(func() { + // per test initialization + }) + + It("should be able to create an HCP cluster and manage pull secrets", + labels.RequireNothing, + labels.Critical, + labels.Positive, + labels.AroRpApiCompatible, + func(ctx context.Context) { + const ( + customerClusterName = "pullsecret-hcp-cluster" + testPullSecretHost = "host.example.com" + testPullSecretPassword = "my_password" + testPullSecretEmail = "noreply@example.com" + pullSecretName = "additional-pull-secret" + pullSecretNamespace = "kube-system" + ) + tc := framework.NewTestContext() + + By("creating a resource group") + resourceGroup, err := tc.NewResourceGroup(ctx, "pullsecret-test", tc.Location()) + Expect(err).NotTo(HaveOccurred()) + + By("creating cluster parameters") + clusterParams := framework.NewDefaultClusterParams() + clusterParams.ClusterName = customerClusterName + managedResourceGroupName := framework.SuffixName(*resourceGroup.Name, "-managed", 64) + clusterParams.ManagedResourceGroupName = managedResourceGroupName + + By("creating customer resources") + clusterParams, err = tc.CreateClusterCustomerResources(ctx, + resourceGroup, + clusterParams, + map[string]interface{}{ + "persistTagValue": false, + }, + TestArtifactsFS, + ) + Expect(err).NotTo(HaveOccurred()) + + By("Creating the cluster") + err = tc.CreateHCPClusterFromParam(ctx, + GinkgoLogr, + *resourceGroup.Name, + clusterParams, + 45*time.Minute, + ) + Expect(err).NotTo(HaveOccurred()) + By("Creating the node pool") + nodePoolParams := framework.NewDefaultNodePoolParams() + nodePoolParams.NodePoolName = "np-1" + nodePoolParams.ClusterName = customerClusterName + nodePoolParams.Replicas = int32(2) + err = tc.CreateNodePoolFromParam(ctx, + *resourceGroup.Name, + customerClusterName, + nodePoolParams, + 15*time.Minute, + ) + Expect(err).NotTo(HaveOccurred()) + + By("getting credentials") + adminRESTConfig, err := tc.GetAdminRESTConfigForHCPCluster( + ctx, + tc.Get20240610ClientFactoryOrDie(ctx).NewHcpOpenShiftClustersClient(), + *resourceGroup.Name, + customerClusterName, + 10*time.Minute, + ) + Expect(err).NotTo(HaveOccurred()) + + By("ensuring the cluster is viable") + err = verifiers.VerifyHCPCluster(ctx, adminRESTConfig) + Expect(err).NotTo(HaveOccurred()) + + By("creating kubernetes client") + kubeClient, err := kubernetes.NewForConfig(adminRESTConfig) + Expect(err).NotTo(HaveOccurred()) + + By("creating test pull secret") + username := "test-user" + auth := base64.StdEncoding.EncodeToString([]byte(username + ":" + testPullSecretPassword)) + + testPullSecret, err := framework.CreateTestDockerConfigSecret( + testPullSecretHost, + username, + testPullSecretPassword, + testPullSecretEmail, + pullSecretName, + pullSecretNamespace, + ) + Expect(err).NotTo(HaveOccurred()) + + By("creating the test pull secret in the cluster") + _, err = kubeClient.CoreV1().Secrets(pullSecretNamespace).Create(ctx, testPullSecret, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + By("waiting for HCCO to merge the additional pull secret with the global pull secret") + Eventually(func() error { + return verifiers.VerifyPullSecretMergedIntoGlobal(testPullSecretHost).Verify(ctx, adminRESTConfig) + }, 300*time.Second, 15*time.Second).Should(Succeed(), "additional pull secret should be merged into global-pull-secret by HCCO") + + By("verifying the DaemonSet for global pull secret synchronization is created") + Eventually(func() error { + return verifiers.VerifyGlobalPullSecretSyncer().Verify(ctx, adminRESTConfig) + }, 60*time.Second, 10*time.Second).Should(Succeed(), "global-pull-secret-syncer DaemonSet should be created") + + By("verifying the pull secret was merged into the global pull secret") + err = verifiers.VerifyPullSecretAuthData( + "global-pull-secret", + pullSecretNamespace, + testPullSecretHost, + auth, + testPullSecretEmail, + ).Verify(ctx, adminRESTConfig) + Expect(err).NotTo(HaveOccurred()) + + By("reading pull-secret file from aro-hcp-qe-pull-secret directory") + pullSecretFilePath := filepath.Join(tc.PullSecretPath(), "pull-secret") + pullSecretFileData, err := os.ReadFile(pullSecretFilePath) + Expect(err).NotTo(HaveOccurred(), "failed to read pull-secret file from %s", pullSecretFilePath) + + By("parsing pull-secret file") + var pullSecretConfig framework.DockerConfigJSON + err = json.Unmarshal(pullSecretFileData, &pullSecretConfig) + Expect(err).NotTo(HaveOccurred(), "failed to parse pull-secret file") + + By("extracting registry.redhat.io credentials") + const redhatRegistryHost = "registry.redhat.io" + redhatRegistryAuth, ok := pullSecretConfig.Auths[redhatRegistryHost] + Expect(ok).To(BeTrue(), "registry.redhat.io credentials not found in pull-secret file") + + redhatRegistryAuthString := redhatRegistryAuth.Auth + redhatRegistryEmail := redhatRegistryAuth.Email + + By("updating additional-pull-secret to add registry.redhat.io credentials") + // Get the current additional-pull-secret + currentSecret, err := kubeClient.CoreV1().Secrets(pullSecretNamespace).Get(ctx, pullSecretName, metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred(), "failed to get existing additional-pull-secret") + + // Parse the current dockerconfigjson + var currentConfig framework.DockerConfigJSON + err = json.Unmarshal(currentSecret.Data[corev1.DockerConfigJsonKey], ¤tConfig) + Expect(err).NotTo(HaveOccurred(), "failed to parse current pull secret") + + // Add registry.redhat.io credentials to the existing auths + currentConfig.Auths[redhatRegistryHost] = framework.RegistryAuth{ + Auth: redhatRegistryAuthString, + Email: redhatRegistryEmail, + } + + // Marshal back to JSON + updatedDockerConfigJSON, err := json.Marshal(currentConfig) + Expect(err).NotTo(HaveOccurred()) + + // Update the secret + currentSecret.Data[corev1.DockerConfigJsonKey] = updatedDockerConfigJSON + _, err = kubeClient.CoreV1().Secrets(pullSecretNamespace).Update(ctx, currentSecret, metav1.UpdateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + By("waiting for HCCO to merge the updated pull secret (with registry.redhat.io) into global pull secret") + Eventually(func() error { + return verifiers.VerifyPullSecretMergedIntoGlobal(redhatRegistryHost).Verify(ctx, adminRESTConfig) + }, 300*time.Second, 15*time.Second).Should(Succeed(), "registry.redhat.io pull secret should be merged into global-pull-secret by HCCO") + + By("verifying both test registries are now in the global pull secret") + err = verifiers.VerifyPullSecretMergedIntoGlobal(testPullSecretHost).Verify(ctx, adminRESTConfig) + Expect(err).NotTo(HaveOccurred(), "host.example.com should still be in global-pull-secret") + + err = verifiers.VerifyPullSecretAuthData( + "global-pull-secret", + pullSecretNamespace, + redhatRegistryHost, + redhatRegistryAuthString, + redhatRegistryEmail, + ).Verify(ctx, adminRESTConfig) + Expect(err).NotTo(HaveOccurred()) + + By("creating dynamic client for operator installation") + dynamicClient, err := dynamic.NewForConfig(adminRESTConfig) + Expect(err).NotTo(HaveOccurred()) + + By("creating namespace for NFD operator") + const nfdNamespace = "openshift-nfd" + _, err = kubeClient.CoreV1().Namespaces().Create(ctx, &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: nfdNamespace, + }, + }, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + By("creating OperatorGroup for NFD operator") + operatorGroupGVR := schema.GroupVersionResource{ + Group: "operators.coreos.com", + Version: "v1", + Resource: "operatorgroups", + } + operatorGroup := &unstructured.Unstructured{ + Object: map[string]interface{}{ + "apiVersion": "operators.coreos.com/v1", + "kind": "OperatorGroup", + "metadata": map[string]interface{}{ + "name": "nfd-operator-group", + "namespace": nfdNamespace, + }, + "spec": map[string]interface{}{ + "targetNamespaces": []interface{}{nfdNamespace}, + }, + }, + } + _, err = dynamicClient.Resource(operatorGroupGVR).Namespace(nfdNamespace).Create(ctx, operatorGroup, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + By("creating Subscription for NFD operator from redhat-operators catalog") + subscriptionGVR := schema.GroupVersionResource{ + Group: "operators.coreos.com", + Version: "v1alpha1", + Resource: "subscriptions", + } + subscription := &unstructured.Unstructured{ + Object: map[string]interface{}{ + "apiVersion": "operators.coreos.com/v1alpha1", + "kind": "Subscription", + "metadata": map[string]interface{}{ + "name": "nfd", + "namespace": nfdNamespace, + }, + "spec": map[string]interface{}{ + "channel": "stable", + "name": "nfd", + "source": "redhat-operators", + "sourceNamespace": "openshift-marketplace", + "installPlanApproval": "Automatic", + }, + }, + } + _, err = dynamicClient.Resource(subscriptionGVR).Namespace(nfdNamespace).Create(ctx, subscription, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + By("waiting for NFD operator to be installed") + Eventually(func() error { + return verifiers.VerifyOperatorInstalled(nfdNamespace, "nfd").Verify(ctx, adminRESTConfig) + }, 300*time.Second, 15*time.Second).Should(Succeed(), "NFD operator should be installed successfully") + + By("creating NodeFeatureDiscovery CR to deploy NFD worker") + nfdGVR := schema.GroupVersionResource{ + Group: "nfd.openshift.io", + Version: "v1", + Resource: "nodefeaturediscoveries", + } + nfdCR := &unstructured.Unstructured{ + Object: map[string]interface{}{ + "apiVersion": "nfd.openshift.io/v1", + "kind": "NodeFeatureDiscovery", + "metadata": map[string]interface{}{ + "name": "nfd-instance", + "namespace": nfdNamespace, + }, + "spec": map[string]interface{}{ + "operand": map[string]interface{}{ + "image": "registry.redhat.io/openshift4/ose-node-feature-discovery:latest", + }, + }, + }, + } + _, err = dynamicClient.Resource(nfdGVR).Namespace(nfdNamespace).Create(ctx, nfdCR, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + By("waiting for NFD worker DaemonSet to be created") + Eventually(func() error { + daemonSets, err := kubeClient.AppsV1().DaemonSets(nfdNamespace).List(ctx, metav1.ListOptions{}) + if err != nil { + return err + } + for _, ds := range daemonSets.Items { + if ds.Name == "nfd-worker" { + if ds.Status.DesiredNumberScheduled > 0 && ds.Status.NumberReady > 0 { + return nil + } + return fmt.Errorf("nfd-worker DaemonSet found but not ready: desired=%d, ready=%d", + ds.Status.DesiredNumberScheduled, ds.Status.NumberReady) + } + } + return fmt.Errorf("nfd-worker DaemonSet not found") + }, 300*time.Second, 15*time.Second).Should(Succeed(), "NFD worker DaemonSet should be created and have ready pods") + + By("waiting for NFD worker pods to be created and verify images from registry.redhat.io can be pulled") + Eventually(func() error { + return verifiers.VerifyImagePulled(nfdNamespace, "registry.redhat.io", "ose-node-feature-discovery").Verify(ctx, adminRESTConfig) + }, 300*time.Second, 15*time.Second).Should(Succeed(), "NFD worker images from registry.redhat.io should be pulled successfully with the added pull secret") + }) +}) diff --git a/test/util/framework/hcp_helper.go b/test/util/framework/hcp_helper.go index 93d0d05a83..da7348e444 100644 --- a/test/util/framework/hcp_helper.go +++ b/test/util/framework/hcp_helper.go @@ -16,6 +16,8 @@ package framework import ( "context" + "encoding/base64" + "encoding/json" "errors" "fmt" "time" @@ -26,6 +28,7 @@ import ( "github.com/google/go-cmp/cmp/cmpopts" "golang.org/x/sync/errgroup" + corev1 "k8s.io/api/core/v1" v1 "k8s.io/api/rbac/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" utilruntime "k8s.io/apimachinery/pkg/util/runtime" @@ -468,6 +471,36 @@ func CreateClusterRoleBinding(ctx context.Context, subject string, adminRESTConf return nil } +// CreateTestDockerConfigSecret creates a Docker config secret for testing pull secret functionality +func CreateTestDockerConfigSecret(host, username, password, email, secretName, namespace string) (*corev1.Secret, error) { + auth := base64.StdEncoding.EncodeToString([]byte(username + ":" + password)) + + dockerConfig := DockerConfigJSON{ + Auths: map[string]RegistryAuth{ + host: { + Email: email, + Auth: auth, + }, + }, + } + + dockerConfigJSON, err := json.Marshal(dockerConfig) + if err != nil { + return nil, fmt.Errorf("failed to marshal docker config: %w", err) + } + + return &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: secretName, + Namespace: namespace, + }, + Type: corev1.SecretTypeDockerConfigJson, + Data: map[string][]byte{ + corev1.DockerConfigJsonKey: dockerConfigJSON, + }, + }, nil +} + func BeginCreateHCPCluster( ctx context.Context, logger logr.Logger, diff --git a/test/util/framework/per_invocation_framework.go b/test/util/framework/per_invocation_framework.go index 11b5e4641d..8c000da4cf 100644 --- a/test/util/framework/per_invocation_framework.go +++ b/test/util/framework/per_invocation_framework.go @@ -44,6 +44,7 @@ type perBinaryInvocationTestContext struct { tenantID string testUserClientID string location string + pullSecretPath string isDevelopmentEnvironment bool skipCleanup bool pooledIdentities bool @@ -80,6 +81,7 @@ func invocationContext() *perBinaryInvocationTestContext { tenantID: tenantID(), testUserClientID: testUserClientID(), location: location(), + pullSecretPath: pullSecretPath(), isDevelopmentEnvironment: IsDevelopmentEnvironment(), skipCleanup: skipCleanup(), pooledIdentities: pooledIdentities(), @@ -270,6 +272,16 @@ func tenantID() string { return os.Getenv("AZURE_TENANT_ID") } +// pullSecretPath returns the value of ARO_HCP_QE_PULL_SECRET_PATH environment variable +// If not set, defaults to /var/run/aro-hcp-qe-pull-secret +func pullSecretPath() string { + path := os.Getenv("ARO_HCP_QE_PULL_SECRET_PATH") + if path == "" { + return "/var/run/aro-hcp-qe-pull-secret" + } + return path +} + // IsDevelopmentEnvironment indicates when this environment is development. This controls client endpoints and disables security // when set to development. func IsDevelopmentEnvironment() bool { diff --git a/test/util/framework/per_test_framework.go b/test/util/framework/per_test_framework.go index f54d06da0b..db15ee0859 100644 --- a/test/util/framework/per_test_framework.go +++ b/test/util/framework/per_test_framework.go @@ -752,6 +752,10 @@ func (tc *perItOrDescribeTestContext) TenantID() string { return tc.perBinaryInvocationTestContext.tenantID } +func (tc *perItOrDescribeTestContext) PullSecretPath() string { + return tc.perBinaryInvocationTestContext.pullSecretPath +} + func (tc *perItOrDescribeTestContext) recordDeploymentOperationsUnlocked(resourceGroup, deployment string, operations []timing.Operation) { if _, exists := tc.timingMetadata.Deployments[resourceGroup]; !exists { tc.timingMetadata.Deployments[resourceGroup] = make(map[string][]timing.Operation) diff --git a/test/util/framework/pullsecret_types.go b/test/util/framework/pullsecret_types.go new file mode 100644 index 0000000000..be71e8a7e1 --- /dev/null +++ b/test/util/framework/pullsecret_types.go @@ -0,0 +1,29 @@ +// Copyright 2025 Microsoft Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package framework + +// RegistryAuth represents authentication credentials for a single registry. +// This type models the structure of dockerconfigjson registry auth entries. +type RegistryAuth struct { + Username string `json:"username,omitempty"` + Email string `json:"email,omitempty"` + Auth string `json:"auth"` +} + +// DockerConfigJSON is the root structure for dockerconfigjson secret data. +// See: https://kubernetes.io/docs/concepts/configuration/secret/#docker-config-secrets +type DockerConfigJSON struct { + Auths map[string]RegistryAuth `json:"auths"` +} diff --git a/test/util/verifiers/image_pull.go b/test/util/verifiers/image_pull.go new file mode 100644 index 0000000000..c8f1c56f1c --- /dev/null +++ b/test/util/verifiers/image_pull.go @@ -0,0 +1,166 @@ +// Copyright 2025 Microsoft Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package verifiers + +import ( + "context" + "fmt" + "strings" + "time" + + "github.com/onsi/ginkgo/v2" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" +) + +type verifyImagePulled struct { + namespace string + imageRepository string + imageName string // optional - if empty, any image from repository is checked +} + +func (v verifyImagePulled) Name() string { + return "VerifyImagePulled" +} + +func (v verifyImagePulled) Verify(ctx context.Context, adminRESTConfig *rest.Config) error { + logger := ginkgo.GinkgoLogr + startTime := time.Now() + logger.Info("Starting image pull verification", + "namespace", v.namespace, + "imageRepository", v.imageRepository, + "imageName", v.imageName, + "startTime", startTime.Format(time.RFC3339)) + + kubeClient, err := kubernetes.NewForConfig(adminRESTConfig) + if err != nil { + return fmt.Errorf("failed to create kubernetes client: %w", err) + } + + // Get all pods in the namespace + pods, err := kubeClient.CoreV1().Pods(v.namespace).List(ctx, metav1.ListOptions{}) + if err != nil { + return fmt.Errorf("failed to list pods in namespace %s: %w", v.namespace, err) + } + + if len(pods.Items) == 0 { + return fmt.Errorf("no pods found in namespace %s", v.namespace) + } + + // Check if at least one pod successfully pulled the specified image + imagePulledSuccessfully := false + var imagePullErrors []string + + for _, pod := range pods.Items { + // Check container statuses for image pull success + for _, containerStatus := range pod.Status.ContainerStatuses { + // Check if the image matches our criteria + imageMatches := strings.Contains(containerStatus.Image, v.imageRepository) + if v.imageName != "" { + imageMatches = imageMatches && strings.Contains(containerStatus.Image, v.imageName) + } + + if imageMatches { + // If ImageID is set, the image was pulled successfully + if containerStatus.ImageID != "" { + imagePulledSuccessfully = true + logger.Info("Successfully pulled image", + "pod", pod.Name, + "container", containerStatus.Name, + "image", containerStatus.Image, + "imageID", containerStatus.ImageID) + } + } + } + + // Also check for ImagePullBackOff errors + for _, condition := range pod.Status.Conditions { + if condition.Type == "PodScheduled" && condition.Status == "False" { + if strings.Contains(condition.Message, "ImagePullBackOff") || strings.Contains(condition.Message, "ErrImagePull") { + imagePullErrors = append(imagePullErrors, fmt.Sprintf("pod %s: %s", pod.Name, condition.Message)) + } + } + } + + // Check container statuses for waiting state + for _, containerStatus := range pod.Status.ContainerStatuses { + if containerStatus.State.Waiting != nil { + reason := containerStatus.State.Waiting.Reason + message := containerStatus.State.Waiting.Message + + // Log all waiting states + logger.Info("Container waiting", + "pod", pod.Name, + "container", containerStatus.Name, + "reason", reason, + "message", message, + "image", containerStatus.Image) + + // Track image pull errors specifically + if reason == "ImagePullBackOff" || reason == "ErrImagePull" { + imagePullErrors = append(imagePullErrors, fmt.Sprintf("pod %s container %s: %s - %s", + pod.Name, containerStatus.Name, reason, message)) + } + } + } + } + + endTime := time.Now() + duration := endTime.Sub(startTime) + + if len(imagePullErrors) > 0 { + logger.Error(fmt.Errorf("image pull errors detected"), "verification failed", + "namespace", v.namespace, + "imageRepository", v.imageRepository, + "imageName", v.imageName, + "duration", duration, + "endTime", endTime.Format(time.RFC3339)) + return fmt.Errorf("image pull errors detected:\n%s", strings.Join(imagePullErrors, "\n")) + } + + if !imagePulledSuccessfully { + logger.Error(fmt.Errorf("no matching images pulled"), "verification failed", + "namespace", v.namespace, + "imageRepository", v.imageRepository, + "imageName", v.imageName, + "duration", duration, + "endTime", endTime.Format(time.RFC3339)) + return fmt.Errorf("no pods found with successfully pulled images from %s", v.imageRepository) + } + + logger.Info("Image pull verification completed successfully", + "namespace", v.namespace, + "imageRepository", v.imageRepository, + "imageName", v.imageName, + "duration", duration, + "endTime", endTime.Format(time.RFC3339)) + + return nil +} + +// VerifyImagePulled creates a verifier that checks if an image has been successfully pulled +// Parameters: +// - namespace: the namespace to check for pods +// - imageRepository: the repository to match (e.g., "registry.redhat.io") +// - imageName: optional specific image name to match (empty string matches any image from repository) +func VerifyImagePulled(namespace, imageRepository, imageName string) HostedClusterVerifier { + return verifyImagePulled{ + namespace: namespace, + imageRepository: imageRepository, + imageName: imageName, + } +} diff --git a/test/util/verifiers/operator.go b/test/util/verifiers/operator.go new file mode 100644 index 0000000000..f42da648bd --- /dev/null +++ b/test/util/verifiers/operator.go @@ -0,0 +1,154 @@ +// Copyright 2025 Microsoft Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package verifiers + +import ( + "context" + "fmt" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/client-go/dynamic" + "k8s.io/client-go/rest" +) + +type verifyOperatorInstalled struct { + namespace string + subscriptionName string +} + +func (v verifyOperatorInstalled) Name() string { + return "VerifyOperatorInstalled" +} + +func (v verifyOperatorInstalled) Verify(ctx context.Context, adminRESTConfig *rest.Config) error { + dynamicClient, err := dynamic.NewForConfig(adminRESTConfig) + if err != nil { + return fmt.Errorf("failed to create dynamic client: %w", err) + } + + // Check if Subscription exists + subscriptionGVR := schema.GroupVersionResource{ + Group: "operators.coreos.com", + Version: "v1alpha1", + Resource: "subscriptions", + } + + subscription, err := dynamicClient.Resource(subscriptionGVR).Namespace(v.namespace).Get(ctx, v.subscriptionName, metav1.GetOptions{}) + if err != nil { + return fmt.Errorf("failed to get subscription %s/%s: %w", v.namespace, v.subscriptionName, err) + } + + // Check subscription state + state, found, err := unstructured.NestedString(subscription.Object, "status", "state") + if err != nil { + return fmt.Errorf("failed to get subscription state: %w", err) + } + if !found { + return fmt.Errorf("subscription state not found") + } + if state != "AtLatestKnown" { + return fmt.Errorf("subscription state is %q, expected AtLatestKnown", state) + } + + // Get InstallPlan reference + installPlanRef, found, err := unstructured.NestedString(subscription.Object, "status", "installplan", "name") + if err != nil { + return fmt.Errorf("failed to get installplan reference: %w", err) + } + if !found { + return fmt.Errorf("installplan reference not found in subscription") + } + + // Check InstallPlan status + installPlanGVR := schema.GroupVersionResource{ + Group: "operators.coreos.com", + Version: "v1alpha1", + Resource: "installplans", + } + + installPlan, err := dynamicClient.Resource(installPlanGVR).Namespace(v.namespace).Get(ctx, installPlanRef, metav1.GetOptions{}) + if err != nil { + return fmt.Errorf("failed to get installplan %s/%s: %w", v.namespace, installPlanRef, err) + } + + phase, found, err := unstructured.NestedString(installPlan.Object, "status", "phase") + if err != nil { + return fmt.Errorf("failed to get installplan phase: %w", err) + } + if !found { + return fmt.Errorf("installplan phase not found") + } + if phase != "Complete" { + return fmt.Errorf("installplan phase is %q, expected Complete", phase) + } + + return nil +} + +func VerifyOperatorInstalled(namespace, subscriptionName string) HostedClusterVerifier { + return verifyOperatorInstalled{ + namespace: namespace, + subscriptionName: subscriptionName, + } +} + +type verifyOperatorCSV struct { + namespace string + csvName string +} + +func (v verifyOperatorCSV) Name() string { + return "VerifyOperatorCSV" +} + +func (v verifyOperatorCSV) Verify(ctx context.Context, adminRESTConfig *rest.Config) error { + dynamicClient, err := dynamic.NewForConfig(adminRESTConfig) + if err != nil { + return fmt.Errorf("failed to create dynamic client: %w", err) + } + + csvGVR := schema.GroupVersionResource{ + Group: "operators.coreos.com", + Version: "v1alpha1", + Resource: "clusterserviceversions", + } + + csv, err := dynamicClient.Resource(csvGVR).Namespace(v.namespace).Get(ctx, v.csvName, metav1.GetOptions{}) + if err != nil { + return fmt.Errorf("failed to get CSV %s/%s: %w", v.namespace, v.csvName, err) + } + + phase, found, err := unstructured.NestedString(csv.Object, "status", "phase") + if err != nil { + return fmt.Errorf("failed to get CSV phase: %w", err) + } + if !found { + return fmt.Errorf("CSV phase not found") + } + if phase != "Succeeded" { + return fmt.Errorf("CSV phase is %q, expected Succeeded", phase) + } + + return nil +} + +func VerifyOperatorCSV(namespace, csvName string) HostedClusterVerifier { + return verifyOperatorCSV{ + namespace: namespace, + csvName: csvName, + } +} diff --git a/test/util/verifiers/pullsecret.go b/test/util/verifiers/pullsecret.go new file mode 100644 index 0000000000..922796b640 --- /dev/null +++ b/test/util/verifiers/pullsecret.go @@ -0,0 +1,141 @@ +// Copyright 2025 Microsoft Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package verifiers + +import ( + "context" + "encoding/json" + "fmt" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" + + "github.com/Azure/ARO-HCP/test/util/framework" +) + +type verifyPullSecretMergedIntoGlobal struct { + expectedHost string +} + +func (v verifyPullSecretMergedIntoGlobal) Name() string { + return "VerifyPullSecretMergedIntoGlobal" +} + +func (v verifyPullSecretMergedIntoGlobal) Verify(ctx context.Context, adminRESTConfig *rest.Config) error { + kubeClient, err := kubernetes.NewForConfig(adminRESTConfig) + if err != nil { + return fmt.Errorf("failed to create kubernetes client: %w", err) + } + + globalSecret, err := kubeClient.CoreV1().Secrets("kube-system").Get(ctx, "global-pull-secret", metav1.GetOptions{}) + if err != nil { + return fmt.Errorf("failed to get global-pull-secret: %w", err) + } + + var globalConfig framework.DockerConfigJSON + if err := json.Unmarshal(globalSecret.Data[corev1.DockerConfigJsonKey], &globalConfig); err != nil { + return fmt.Errorf("failed to unmarshal global pull secret: %w", err) + } + + if _, exists := globalConfig.Auths[v.expectedHost]; !exists { + return fmt.Errorf("expected host %q not found in global pull secret", v.expectedHost) + } + + return nil +} + +func VerifyPullSecretMergedIntoGlobal(expectedHost string) HostedClusterVerifier { + return verifyPullSecretMergedIntoGlobal{expectedHost: expectedHost} +} + +type verifyGlobalPullSecretSyncer struct{} + +func (v verifyGlobalPullSecretSyncer) Name() string { + return "VerifyGlobalPullSecretSyncer" +} + +func (v verifyGlobalPullSecretSyncer) Verify(ctx context.Context, adminRESTConfig *rest.Config) error { + kubeClient, err := kubernetes.NewForConfig(adminRESTConfig) + if err != nil { + return fmt.Errorf("failed to create kubernetes client: %w", err) + } + + _, err = kubeClient.AppsV1().DaemonSets("kube-system").Get(ctx, "global-pull-secret-syncer", metav1.GetOptions{}) + if err != nil { + return fmt.Errorf("failed to get global-pull-secret-syncer DaemonSet: %w", err) + } + + return nil +} + +func VerifyGlobalPullSecretSyncer() HostedClusterVerifier { + return verifyGlobalPullSecretSyncer{} +} + +type verifyPullSecretAuthData struct { + secretName string + namespace string + expectedHost string + expectedAuth string + expectedEmail string +} + +func (v verifyPullSecretAuthData) Name() string { + return "VerifyPullSecretAuthData" +} + +func (v verifyPullSecretAuthData) Verify(ctx context.Context, adminRESTConfig *rest.Config) error { + kubeClient, err := kubernetes.NewForConfig(adminRESTConfig) + if err != nil { + return fmt.Errorf("failed to create kubernetes client: %w", err) + } + + secret, err := kubeClient.CoreV1().Secrets(v.namespace).Get(ctx, v.secretName, metav1.GetOptions{}) + if err != nil { + return fmt.Errorf("failed to get secret %s/%s: %w", v.namespace, v.secretName, err) + } + + var config framework.DockerConfigJSON + if err := json.Unmarshal(secret.Data[corev1.DockerConfigJsonKey], &config); err != nil { + return fmt.Errorf("failed to unmarshal pull secret: %w", err) + } + + hostAuth, exists := config.Auths[v.expectedHost] + if !exists { + return fmt.Errorf("expected host %q not found in pull secret", v.expectedHost) + } + + if hostAuth.Email != v.expectedEmail { + return fmt.Errorf("expected email %q, got %q", v.expectedEmail, hostAuth.Email) + } + + if hostAuth.Auth != v.expectedAuth { + return fmt.Errorf("expected auth %q, got %q", v.expectedAuth, hostAuth.Auth) + } + + return nil +} + +func VerifyPullSecretAuthData(secretName, namespace, expectedHost, expectedAuth, expectedEmail string) HostedClusterVerifier { + return verifyPullSecretAuthData{ + secretName: secretName, + namespace: namespace, + expectedHost: expectedHost, + expectedAuth: expectedAuth, + expectedEmail: expectedEmail, + } +} From 30db437a8eac16ef931aa260d4bee4515da66d6c Mon Sep 17 00:00:00 2001 From: Mike Gahagan Date: Wed, 17 Dec 2025 12:35:06 -0500 Subject: [PATCH 3/5] fix integration issues with MI pool --- test/util/framework/hcp_helper.go | 5 ++ .../framework/per_invocation_framework.go | 52 ++++++++++++++++--- test/util/framework/per_test_framework.go | 30 ++++++++--- 3 files changed, 73 insertions(+), 14 deletions(-) diff --git a/test/util/framework/hcp_helper.go b/test/util/framework/hcp_helper.go index 7367df2877..da7348e444 100644 --- a/test/util/framework/hcp_helper.go +++ b/test/util/framework/hcp_helper.go @@ -23,6 +23,7 @@ import ( "time" "github.com/davecgh/go-spew/spew" + "github.com/go-logr/logr" "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" "golang.org/x/sync/errgroup" @@ -502,6 +503,7 @@ func CreateTestDockerConfigSecret(host, username, password, email, secretName, n func BeginCreateHCPCluster( ctx context.Context, + logger logr.Logger, hcpClient *hcpsdk20240610preview.HcpOpenShiftClustersClient, resourceGroupName string, hcpClusterName string, @@ -509,6 +511,7 @@ func BeginCreateHCPCluster( location string, ) (*runtime.Poller[hcpsdk20240610preview.HcpOpenShiftClustersClientCreateOrUpdateResponse], error) { cluster := BuildHCPClusterFromParams(clusterParams, location) + logger.Info("Starting HCP cluster creation", "clusterName", hcpClusterName, "resourceGroup", resourceGroupName) poller, err := hcpClient.BeginCreateOrUpdate(ctx, resourceGroupName, hcpClusterName, cluster, nil) if err != nil { return nil, fmt.Errorf("failed starting cluster creation %q in resourcegroup=%q: %w", hcpClusterName, resourceGroupName, err) @@ -520,6 +523,7 @@ func BeginCreateHCPCluster( // the function won't wait for the deployment to be ready. func CreateHCPClusterAndWait( ctx context.Context, + logger logr.Logger, hcpClient *hcpsdk20240610preview.HcpOpenShiftClustersClient, resourceGroupName string, hcpClusterName string, @@ -532,6 +536,7 @@ func CreateHCPClusterAndWait( defer cancel() } + logger.Info("Starting HCP cluster creation", "clusterName", hcpClusterName, "resourceGroup", resourceGroupName) poller, err := hcpClient.BeginCreateOrUpdate(ctx, resourceGroupName, hcpClusterName, cluster, nil) if err != nil { return nil, fmt.Errorf("failed starting cluster creation %q in resourcegroup=%q: %w", hcpClusterName, resourceGroupName, err) diff --git a/test/util/framework/per_invocation_framework.go b/test/util/framework/per_invocation_framework.go index 3c3f488c46..8c000da4cf 100644 --- a/test/util/framework/per_invocation_framework.go +++ b/test/util/framework/per_invocation_framework.go @@ -21,6 +21,7 @@ import ( "net/http" "os" "path" + "path/filepath" "strconv" "strings" "sync" @@ -46,10 +47,12 @@ type perBinaryInvocationTestContext struct { pullSecretPath string isDevelopmentEnvironment bool skipCleanup bool + pooledIdentities bool - contextLock sync.RWMutex - subscriptionID string - azureCredentials azcore.TokenCredential + contextLock sync.RWMutex + subscriptionID string + azureCredentials azcore.TokenCredential + identityPoolState *leasedIdentityPoolState } type CleanupFunc func(ctx context.Context) error @@ -73,7 +76,7 @@ func invocationContext() *perBinaryInvocationTestContext { initializeOnce.Do(func() { invocationContextInstance = &perBinaryInvocationTestContext{ artifactDir: artifactDir(), - sharedDir: sharedDir(), + sharedDir: SharedDir(), subscriptionName: subscriptionName(), tenantID: tenantID(), testUserClientID: testUserClientID(), @@ -81,6 +84,7 @@ func invocationContext() *perBinaryInvocationTestContext { pullSecretPath: pullSecretPath(), isDevelopmentEnvironment: IsDevelopmentEnvironment(), skipCleanup: skipCleanup(), + pooledIdentities: pooledIdentities(), } }) return invocationContextInstance @@ -189,6 +193,37 @@ func (tc *perBinaryInvocationTestContext) Location() string { return tc.location } +func (tc *perBinaryInvocationTestContext) UsePooledIdentities() bool { + return tc.pooledIdentities +} + +func (tc *perBinaryInvocationTestContext) getLeasedIdentityPoolState() (*leasedIdentityPoolState, error) { + tc.contextLock.RLock() + if tc.identityPoolState != nil { + defer tc.contextLock.RUnlock() + return tc.identityPoolState, nil + } + tc.contextLock.RUnlock() + + tc.contextLock.Lock() + defer tc.contextLock.Unlock() + + if tc.identityPoolState != nil { + return tc.identityPoolState, nil + } + + state, err := newLeasedIdentityPoolState(msiPoolStateFilePath()) + if err != nil { + return nil, fmt.Errorf("failed to get managed identities pool state: %w", err) + } + tc.identityPoolState = state + return tc.identityPoolState, nil +} + +func msiPoolStateFilePath() string { + return filepath.Join(artifactDir(), "identities-pool-state.yaml") +} + func skipCleanup() bool { ret, _ := strconv.ParseBool(os.Getenv("ARO_E2E_SKIP_CLEANUP")) return ret @@ -200,10 +235,15 @@ func artifactDir() string { return os.Getenv("ARTIFACT_DIR") } -// sharedDir is SHARED_DIR. It is a spot to store *files only* that can be shared between ci-operator steps. +func pooledIdentities() bool { + b, _ := strconv.ParseBool(strings.TrimSpace(os.Getenv(UsePooledIdentitiesEnvvar))) + return b +} + +// SharedDir is SHARED_DIR. It is a spot to store *files only* that can be shared between ci-operator steps. // We can use this for anything, but currently we have a backup cleanup and collection scripts that use files // here to cleanup and debug testing resources. -func sharedDir() string { +func SharedDir() string { // can't use gomega in this method since it is used outside of It() return os.Getenv("SHARED_DIR") } diff --git a/test/util/framework/per_test_framework.go b/test/util/framework/per_test_framework.go index d027574baf..85c0070c53 100644 --- a/test/util/framework/per_test_framework.go +++ b/test/util/framework/per_test_framework.go @@ -164,12 +164,16 @@ func (tc *perItOrDescribeTestContext) deleteCreatedResources(ctx context.Context } errCleanupResourceGroups := tc.CleanupResourceGroups(ctx, hcpClientFactory.NewHcpOpenShiftClustersClient(), resourceGroupsClientFactory.NewResourceGroupsClient(), opts) if errCleanupResourceGroups != nil { - ginkgo.GinkgoLogr.Error(errCleanupResourceGroups, "at least one resource group failed to delete: %w", errCleanupResourceGroups) + ginkgo.GinkgoLogr.Error(errCleanupResourceGroups, "at least one resource group failed to delete") } err = CleanupAppRegistrations(ctx, graphClient, appRegistrations) if err != nil { - ginkgo.GinkgoLogr.Error(err, "at least one app registration failed to delete: %w", err) + ginkgo.GinkgoLogr.Error(err, "at least one app registration failed to delete") + } + + if err := tc.releaseLeasedIdentities(ctx); err != nil { + ginkgo.GinkgoLogr.Error(err, "failed to release leased identities") } ginkgo.GinkgoLogr.Info("finished deleting created resources") @@ -255,9 +259,19 @@ func (tc *perItOrDescribeTestContext) collectDebugInfo(ctx context.Context) { defer tc.contextLock.RUnlock() ginkgo.GinkgoLogr.Info("collecting debug info") + leasedContainers, err := tc.leasedIdentityContainers() + if err != nil { + ginkgo.GinkgoLogr.Error(err, "failed to get leased identity containers") + return + } + // deletion takes a while, it's worth it to do this in parallel waitGroup, ctx := errgroup.WithContext(ctx) - for _, resourceGroupName := range tc.knownResourceGroups { + resourceGroups := append( + append([]string(nil), tc.knownResourceGroups...), + leasedContainers..., + ) + for _, resourceGroupName := range resourceGroups { currResourceGroupName := resourceGroupName waitGroup.Go(func() error { // prevent a stray panic from exiting the process. Don't do this generally because ginkgo/gomega rely on panics to function. @@ -268,7 +282,7 @@ func (tc *perItOrDescribeTestContext) collectDebugInfo(ctx context.Context) { } if err := waitGroup.Wait(); err != nil { // remember that Wait only shows the first error, not all the errors. - ginkgo.GinkgoLogr.Error(err, "at least one resource group failed to collect: %w", err) + ginkgo.GinkgoLogr.Error(err, "at least one resource group failed to collect") } ginkgo.GinkgoLogr.Info("finished collecting debug info") @@ -692,6 +706,10 @@ func (tc *perItOrDescribeTestContext) Location() string { return tc.perBinaryInvocationTestContext.Location() } +func (tc *perItOrDescribeTestContext) PullSecretPath() string { + return tc.perBinaryInvocationTestContext.pullSecretPath +} + func (tc *perItOrDescribeTestContext) FindVirtualMachineSizeMatching(ctx context.Context, pattern *regexp.Regexp) (string, error) { if pattern == nil { return "", fmt.Errorf("pattern cannot be nil") @@ -738,10 +756,6 @@ func (tc *perItOrDescribeTestContext) TenantID() string { return tc.perBinaryInvocationTestContext.tenantID } -func (tc *perItOrDescribeTestContext) PullSecretPath() string { - return tc.perBinaryInvocationTestContext.pullSecretPath -} - func (tc *perItOrDescribeTestContext) recordDeploymentOperationsUnlocked(resourceGroup, deployment string, operations []timing.Operation) { if _, exists := tc.timingMetadata.Deployments[resourceGroup]; !exists { tc.timingMetadata.Deployments[resourceGroup] = make(map[string][]timing.Operation) From 13bd2d0a26f5138d495d1975f0ede530de751f38 Mon Sep 17 00:00:00 2001 From: Mike Gahagan Date: Thu, 18 Dec 2025 15:46:29 -0500 Subject: [PATCH 4/5] use Minutes instead of seconds, report verifier state, skip test if no pull secret --- test/e2e/cluster_pullsecret.go | 54 ++++++++++++++++++++++++++-------- 1 file changed, 42 insertions(+), 12 deletions(-) diff --git a/test/e2e/cluster_pullsecret.go b/test/e2e/cluster_pullsecret.go index d4a9770fdb..324ceeceab 100644 --- a/test/e2e/cluster_pullsecret.go +++ b/test/e2e/cluster_pullsecret.go @@ -59,6 +59,12 @@ var _ = Describe("Cluster Pull Secret Management", func() { ) tc := framework.NewTestContext() + By("checking pull secret file exists") + pullSecretFilePath := filepath.Join(tc.PullSecretPath(), "pull-secret") + if _, err := os.Stat(pullSecretFilePath); os.IsNotExist(err) { + Skip(fmt.Sprintf("Pull secret file not found at %s, skipping test", pullSecretFilePath)) + } + By("creating a resource group") resourceGroup, err := tc.NewResourceGroup(ctx, "pullsecret-test", tc.Location()) Expect(err).NotTo(HaveOccurred()) @@ -138,14 +144,24 @@ var _ = Describe("Cluster Pull Secret Management", func() { Expect(err).NotTo(HaveOccurred()) By("waiting for HCCO to merge the additional pull secret with the global pull secret") + verifier := verifiers.VerifyPullSecretMergedIntoGlobal(testPullSecretHost) Eventually(func() error { - return verifiers.VerifyPullSecretMergedIntoGlobal(testPullSecretHost).Verify(ctx, adminRESTConfig) - }, 300*time.Second, 15*time.Second).Should(Succeed(), "additional pull secret should be merged into global-pull-secret by HCCO") + err := verifier.Verify(ctx, adminRESTConfig) + if err != nil { + GinkgoLogr.Info("Verifier check", "name", verifier.Name(), "status", "failed", "error", err.Error()) + } + return err + }, 5*time.Minute, 15*time.Second).Should(Succeed(), "additional pull secret should be merged into global-pull-secret by HCCO") By("verifying the DaemonSet for global pull secret synchronization is created") + verifier = verifiers.VerifyGlobalPullSecretSyncer() Eventually(func() error { - return verifiers.VerifyGlobalPullSecretSyncer().Verify(ctx, adminRESTConfig) - }, 60*time.Second, 10*time.Second).Should(Succeed(), "global-pull-secret-syncer DaemonSet should be created") + err := verifier.Verify(ctx, adminRESTConfig) + if err != nil { + GinkgoLogr.Info("Verifier check", "name", verifier.Name(), "status", "failed", "error", err.Error()) + } + return err + }, 1*time.Minute, 10*time.Second).Should(Succeed(), "global-pull-secret-syncer DaemonSet should be created") By("verifying the pull secret was merged into the global pull secret") err = verifiers.VerifyPullSecretAuthData( @@ -158,7 +174,6 @@ var _ = Describe("Cluster Pull Secret Management", func() { Expect(err).NotTo(HaveOccurred()) By("reading pull-secret file from aro-hcp-qe-pull-secret directory") - pullSecretFilePath := filepath.Join(tc.PullSecretPath(), "pull-secret") pullSecretFileData, err := os.ReadFile(pullSecretFilePath) Expect(err).NotTo(HaveOccurred(), "failed to read pull-secret file from %s", pullSecretFilePath) @@ -201,9 +216,14 @@ var _ = Describe("Cluster Pull Secret Management", func() { Expect(err).NotTo(HaveOccurred()) By("waiting for HCCO to merge the updated pull secret (with registry.redhat.io) into global pull secret") + verifier = verifiers.VerifyPullSecretMergedIntoGlobal(redhatRegistryHost) Eventually(func() error { - return verifiers.VerifyPullSecretMergedIntoGlobal(redhatRegistryHost).Verify(ctx, adminRESTConfig) - }, 300*time.Second, 15*time.Second).Should(Succeed(), "registry.redhat.io pull secret should be merged into global-pull-secret by HCCO") + err := verifier.Verify(ctx, adminRESTConfig) + if err != nil { + GinkgoLogr.Info("Verifier check", "name", verifier.Name(), "status", "failed", "error", err.Error()) + } + return err + }, 5*time.Minute, 15*time.Second).Should(Succeed(), "registry.redhat.io pull secret should be merged into global-pull-secret by HCCO") By("verifying both test registries are now in the global pull secret") err = verifiers.VerifyPullSecretMergedIntoGlobal(testPullSecretHost).Verify(ctx, adminRESTConfig) @@ -280,9 +300,14 @@ var _ = Describe("Cluster Pull Secret Management", func() { Expect(err).NotTo(HaveOccurred()) By("waiting for NFD operator to be installed") + verifier = verifiers.VerifyOperatorInstalled(nfdNamespace, "nfd") Eventually(func() error { - return verifiers.VerifyOperatorInstalled(nfdNamespace, "nfd").Verify(ctx, adminRESTConfig) - }, 300*time.Second, 15*time.Second).Should(Succeed(), "NFD operator should be installed successfully") + err := verifier.Verify(ctx, adminRESTConfig) + if err != nil { + GinkgoLogr.Info("Verifier check", "name", verifier.Name(), "status", "failed", "error", err.Error()) + } + return err + }, 5*time.Minute, 15*time.Second).Should(Succeed(), "NFD operator should be installed successfully") By("creating NodeFeatureDiscovery CR to deploy NFD worker") nfdGVR := schema.GroupVersionResource{ @@ -324,11 +349,16 @@ var _ = Describe("Cluster Pull Secret Management", func() { } } return fmt.Errorf("nfd-worker DaemonSet not found") - }, 300*time.Second, 15*time.Second).Should(Succeed(), "NFD worker DaemonSet should be created and have ready pods") + }, 5*time.Minute, 15*time.Second).Should(Succeed(), "NFD worker DaemonSet should be created and have ready pods") By("waiting for NFD worker pods to be created and verify images from registry.redhat.io can be pulled") + verifier = verifiers.VerifyImagePulled(nfdNamespace, "registry.redhat.io", "ose-node-feature-discovery") Eventually(func() error { - return verifiers.VerifyImagePulled(nfdNamespace, "registry.redhat.io", "ose-node-feature-discovery").Verify(ctx, adminRESTConfig) - }, 300*time.Second, 15*time.Second).Should(Succeed(), "NFD worker images from registry.redhat.io should be pulled successfully with the added pull secret") + err := verifier.Verify(ctx, adminRESTConfig) + if err != nil { + GinkgoLogr.Info("Verifier check", "name", verifier.Name(), "status", "failed", "error", err.Error()) + } + return err + }, 5*time.Minute, 15*time.Second).Should(Succeed(), "NFD worker images from registry.redhat.io should be pulled successfully with the added pull secret") }) }) From 95b2b6242fb3c12b257dd327e4fe66e88fe2725d Mon Sep 17 00:00:00 2001 From: Mike Gahagan Date: Fri, 19 Dec 2025 13:37:07 -0500 Subject: [PATCH 5/5] Use MI's from the reserved pool --- test/e2e/cluster_pullsecret.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test/e2e/cluster_pullsecret.go b/test/e2e/cluster_pullsecret.go index 324ceeceab..4a8faca8b6 100644 --- a/test/e2e/cluster_pullsecret.go +++ b/test/e2e/cluster_pullsecret.go @@ -65,6 +65,11 @@ var _ = Describe("Cluster Pull Secret Management", func() { Skip(fmt.Sprintf("Pull secret file not found at %s, skipping test", pullSecretFilePath)) } + if tc.UsePooledIdentities() { + err := tc.AssignIdentityContainers(ctx, 1, 60*time.Second) + Expect(err).NotTo(HaveOccurred()) + } + By("creating a resource group") resourceGroup, err := tc.NewResourceGroup(ctx, "pullsecret-test", tc.Location()) Expect(err).NotTo(HaveOccurred())