diff --git a/test/e2e/nodepool_update.go b/test/e2e/nodepool_update.go new file mode 100644 index 0000000000..84e6b4a661 --- /dev/null +++ b/test/e2e/nodepool_update.go @@ -0,0 +1,183 @@ +// Copyright 2025 Microsoft Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package e2e + +import ( + "context" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "github.com/Azure/azure-sdk-for-go/sdk/azcore/to" + + hcpsdk20240610preview "github.com/Azure/ARO-HCP/test/sdk/resourcemanager/redhatopenshifthcp/armredhatopenshifthcp" + "github.com/Azure/ARO-HCP/test/util/framework" + "github.com/Azure/ARO-HCP/test/util/labels" + "github.com/Azure/ARO-HCP/test/util/verifiers" +) + +var _ = Describe("Customer", func() { + It("should be able to update nodepool replicas and autoscaling", + labels.RequireNothing, + labels.High, + labels.Positive, + labels.AroRpApiCompatible, + func(ctx context.Context) { + const ( + customerClusterName = "np-update-nodes-hcp-cluster" + customerNodePoolName = "np-update-nodes" + ) + + tc := framework.NewTestContext() + + if tc.UsePooledIdentities() { + err := tc.AssignIdentityContainers(ctx, 1, 60*time.Second) + Expect(err).NotTo(HaveOccurred()) + } + + By("creating a resource group") + resourceGroup, err := tc.NewResourceGroup(ctx, "nodepool-update-nodes", tc.Location()) + Expect(err).NotTo(HaveOccurred()) + + By("creating cluster parameters") + clusterParams := framework.NewDefaultClusterParams() + clusterParams.ClusterName = customerClusterName + managedResourceGroupName := framework.SuffixName(*resourceGroup.Name, "-managed", 64) + clusterParams.ManagedResourceGroupName = managedResourceGroupName + + By("creating customer resources") + clusterParams, err = tc.CreateClusterCustomerResources(ctx, + resourceGroup, + clusterParams, + map[string]interface{}{ + "persistTagValue": false, + }, + TestArtifactsFS, + ) + Expect(err).NotTo(HaveOccurred()) + + By("creating the HCP cluster") + err = tc.CreateHCPClusterFromParam(ctx, + GinkgoLogr, + *resourceGroup.Name, + clusterParams, + 45*time.Minute, + ) + Expect(err).NotTo(HaveOccurred()) + + By("getting admin credentials for the cluster") + adminRESTConfig, err := tc.GetAdminRESTConfigForHCPCluster( + ctx, + tc.Get20240610ClientFactoryOrDie(ctx).NewHcpOpenShiftClustersClient(), + *resourceGroup.Name, + customerClusterName, + 10*time.Minute, + ) + Expect(err).NotTo(HaveOccurred()) + + By("creating the node pool") + nodePoolParams := framework.NewDefaultNodePoolParams() + nodePoolParams.NodePoolName = customerNodePoolName + nodePoolParams.Replicas = int32(2) + + err = tc.CreateNodePoolFromParam(ctx, + *resourceGroup.Name, + customerClusterName, + nodePoolParams, + 45*time.Minute, + ) + Expect(err).NotTo(HaveOccurred()) + + By("verifying nodes count and status after initial creation") + Expect(verifiers.VerifyNodeCount(int(nodePoolParams.Replicas)).Verify(ctx, adminRESTConfig)).To(Succeed()) + Expect(verifiers.VerifyNodesReady().Verify(ctx, adminRESTConfig)).To(Succeed()) + + By("scaling up the nodepool replicas from 2 to 3 replicas") + update := hcpsdk20240610preview.NodePoolUpdate{ + Properties: &hcpsdk20240610preview.NodePoolPropertiesUpdate{ + Replicas: to.Ptr(int32(3)), + }, + } + scaleUpResp, err := framework.UpdateNodePoolAndWait(ctx, + tc.Get20240610ClientFactoryOrDie(ctx).NewNodePoolsClient(), + *resourceGroup.Name, + customerClusterName, + customerNodePoolName, + update, + 45*time.Minute, + ) + Expect(err).NotTo(HaveOccurred()) + Expect(scaleUpResp.Properties).NotTo(BeNil()) + Expect(scaleUpResp.Properties.Replicas).NotTo(BeNil()) + Expect(*scaleUpResp.Properties.Replicas).To(Equal(int32(3))) + + By("verifying nodes count and status after scaling up") + Expect(verifiers.VerifyNodeCount(3).Verify(ctx, adminRESTConfig)).To(Succeed()) + Expect(verifiers.VerifyNodesReady().Verify(ctx, adminRESTConfig)).To(Succeed()) + + By("scaling down the nodepool replicas from 3 to 1 replicas") + update = hcpsdk20240610preview.NodePoolUpdate{ + Properties: &hcpsdk20240610preview.NodePoolPropertiesUpdate{ + Replicas: to.Ptr(int32(1)), + }, + } + scaleDownResp, err := framework.UpdateNodePoolAndWait(ctx, + tc.Get20240610ClientFactoryOrDie(ctx).NewNodePoolsClient(), + *resourceGroup.Name, + customerClusterName, + customerNodePoolName, + update, + 45*time.Minute, + ) + Expect(err).NotTo(HaveOccurred()) + Expect(scaleDownResp.Properties).NotTo(BeNil()) + Expect(scaleDownResp.Properties.Replicas).NotTo(BeNil()) + Expect(*scaleDownResp.Properties.Replicas).To(Equal(int32(1))) + + By("verifying nodes count and status after scaling down") + Expect(verifiers.VerifyNodeCount(1).Verify(ctx, adminRESTConfig)).To(Succeed()) + Expect(verifiers.VerifyNodesReady().Verify(ctx, adminRESTConfig)).To(Succeed()) + + By("enabling autoscaling with min 2 and max 3 replicas") + update = hcpsdk20240610preview.NodePoolUpdate{ + Properties: &hcpsdk20240610preview.NodePoolPropertiesUpdate{ + AutoScaling: &hcpsdk20240610preview.NodePoolAutoScaling{ + Min: to.Ptr(int32(2)), + Max: to.Ptr(int32(3)), + }, + }, + } + autoscaleResp, err := framework.UpdateNodePoolAndWait(ctx, + tc.Get20240610ClientFactoryOrDie(ctx).NewNodePoolsClient(), + *resourceGroup.Name, + customerClusterName, + customerNodePoolName, + update, + 45*time.Minute, + ) + Expect(err).NotTo(HaveOccurred()) + Expect(autoscaleResp.Properties).NotTo(BeNil()) + Expect(autoscaleResp.Properties.AutoScaling).NotTo(BeNil()) + Expect(autoscaleResp.Properties.AutoScaling.Min).NotTo(BeNil()) + Expect(autoscaleResp.Properties.AutoScaling.Max).NotTo(BeNil()) + Expect(*autoscaleResp.Properties.AutoScaling.Min).To(Equal(int32(2))) + Expect(*autoscaleResp.Properties.AutoScaling.Max).To(Equal(int32(3))) + + By("verifying nodes count and status after enabling autoscaling") + Expect(verifiers.VerifyNodeCount(2).Verify(ctx, adminRESTConfig)).To(Succeed()) + Expect(verifiers.VerifyNodesReady().Verify(ctx, adminRESTConfig)).To(Succeed()) + }) +}) diff --git a/test/util/framework/hcp_helper.go b/test/util/framework/hcp_helper.go index 5453d9e10a..7eb4bc39a3 100644 --- a/test/util/framework/hcp_helper.go +++ b/test/util/framework/hcp_helper.go @@ -331,6 +331,54 @@ func GetNodePool( return nodePoolsClient.Get(ctx, resourceGroupName, hcpClusterName, nodePoolName, nil) } +// UpdateNodePoolAndWait sends a PATCH (BeginUpdate) request for a nodepool and waits for completion +// within the provided timeout. It returns the final update response or an error. +func UpdateNodePoolAndWait( + ctx context.Context, + nodePoolsClient *hcpsdk20240610preview.NodePoolsClient, + resourceGroupName string, + hcpClusterName string, + nodePoolName string, + update hcpsdk20240610preview.NodePoolUpdate, + timeout time.Duration, +) (*hcpsdk20240610preview.NodePool, error) { + ctx, cancel := context.WithTimeoutCause(ctx, timeout, fmt.Errorf("timeout '%f' minutes exceeded during UpdateNodePoolAndWait for nodepool %s in cluster %s in resource group %s", timeout.Minutes(), nodePoolName, hcpClusterName, resourceGroupName)) + defer cancel() + + poller, err := nodePoolsClient.BeginUpdate(ctx, resourceGroupName, hcpClusterName, nodePoolName, update, nil) + if err != nil { + return nil, err + } + + operationResult, err := poller.PollUntilDone(ctx, &runtime.PollUntilDoneOptions{ + Frequency: StandardPollInterval, + }) + if err != nil { + if errors.Is(err, context.DeadlineExceeded) { + return nil, fmt.Errorf("failed waiting for nodepool=%q in cluster=%q resourcegroup=%q to finish updating, caused by: %w, error: %w", nodePoolName, hcpClusterName, resourceGroupName, context.Cause(ctx), err) + } + return nil, fmt.Errorf("failed waiting for nodepool=%q in cluster=%q resourcegroup=%q to finish updating: %w", nodePoolName, hcpClusterName, resourceGroupName, err) + } + + switch m := any(operationResult).(type) { + case hcpsdk20240610preview.NodePoolsClientUpdateResponse: + expect, err := GetNodePool(ctx, nodePoolsClient, resourceGroupName, hcpClusterName, nodePoolName) + if err != nil { + if errors.Is(err, context.DeadlineExceeded) { + return nil, fmt.Errorf("failed getting nodepool=%q in cluster=%q resourcegroup=%q, caused by: %w, error: %w", nodePoolName, hcpClusterName, resourceGroupName, context.Cause(ctx), err) + } + return nil, err + } + err = checkOperationResult(&expect.NodePool, &m.NodePool) + if err != nil { + return nil, err + } + return &m.NodePool, nil + default: + return nil, fmt.Errorf("unknown type %T", m) + } +} + // CreateOrUpdateExternalAuthAndWait creates or updates an external auth on an HCP cluster and waits func CreateOrUpdateExternalAuthAndWait( ctx context.Context, diff --git a/test/util/verifiers/nodes.go b/test/util/verifiers/nodes.go index 4cdf5eda3f..4f6bceeaf0 100644 --- a/test/util/verifiers/nodes.go +++ b/test/util/verifiers/nodes.go @@ -66,3 +66,35 @@ func (v verifyNodesReady) Verify(ctx context.Context, adminRESTConfig *rest.Conf func VerifyNodesReady() HostedClusterVerifier { return verifyNodesReady{} } + +type verifyNodeCount struct { + expected int +} + +func (v verifyNodeCount) Name() string { + return fmt.Sprintf("VerifyNodeCount(%d)", v.expected) +} + +func (v verifyNodeCount) Verify(ctx context.Context, adminRESTConfig *rest.Config) error { + kubeClient, err := kubernetes.NewForConfig(adminRESTConfig) + if err != nil { + return fmt.Errorf("failed to create kubernetes client: %w", err) + } + + nodes, err := kubeClient.CoreV1().Nodes().List(ctx, metav1.ListOptions{}) + if err != nil { + return fmt.Errorf("can't list nodes in the cluster: %w", err) + } + + if len(nodes.Items) != v.expected { + return fmt.Errorf("expected %d nodes, found %d", v.expected, len(nodes.Items)) + } + + return nil +} + +func VerifyNodeCount(expected int) HostedClusterVerifier { + return verifyNodeCount{ + expected: expected, + } +}