Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions test/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ require (
github.com/Azure/ARO-Tools v0.0.0-20251219030559-37f654161c5a
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/authorization/armauthorization/v3 v3.0.0-beta.2
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5 v5.6.0
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v6 v6.2.0
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources v1.2.0
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armsubscriptions v1.3.0
github.com/Azure/azure-sdk-for-go/sdk/security/keyvault/azsecrets v1.4.0
Expand Down
2 changes: 2 additions & 0 deletions test/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,8 @@ github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/internal/v3 v3.1.0 h1:2qsI
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/internal/v3 v3.1.0/go.mod h1:AW8VEadnhw9xox+VaVd9sP7NjzOAnaZBLRH6Tq3cJ38=
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/managementgroups/armmanagementgroups v1.2.0 h1:akP6VpxJGgQRpDR1P462piz/8OhYLRCreDj48AyNabc=
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/managementgroups/armmanagementgroups v1.2.0/go.mod h1:8wzvopPfyZYPaQUoKW87Zfdul7jmJMDfp/k7YY3oJyA=
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v6 v6.2.0 h1:HYGD75g0bQ3VO/Omedm54v4LrD3B1cGImuRF3AJ5wLo=
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v6 v6.2.0/go.mod h1:ulHyBFJOI0ONiRL4vcJTmS7rx18jQQlEPmAgo80cRdM=
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armdeploymentstacks v1.0.1 h1:bcgO/crpp7wqI0Froi/I4C2fme7Vk/WLusbV399Do8I=
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armdeploymentstacks v1.0.1/go.mod h1:kvfPmsE8gpOwwC1qrO1FeyBDDNfnwBN5UU3MPNiWW7I=
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armfeatures v1.2.0 h1:wIDqH4WA5uJ6irRqjzodeSw6Pmp0tu3oIbwzBZEdMfQ=
Expand Down
6 changes: 3 additions & 3 deletions test/util/cleanup/resourcegroups/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,9 +102,9 @@ func (o *Options) Run(ctx context.Context) error {
CleanupWorkflow: o.CleanupWorkflow,
}

err := tc.CleanupResourceGroups(ctx,
tc.Get20240610ClientFactoryOrDie(ctx).NewHcpOpenShiftClustersClient(),
resourceGroupsClient, opts)
err := tc.CleanupResourceGroups(
ctx,
opts)
if err != nil {
logger.Error(err, "Failed to delete some resource groups", "count", len(resourceGroupsToDelete))
return err
Expand Down
122 changes: 90 additions & 32 deletions test/util/framework/per_test_framework.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ import (

"github.com/Azure/azure-sdk-for-go/sdk/azcore"
armcompute "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5"
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v6"
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources"
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armsubscriptions"

Expand All @@ -58,6 +59,7 @@ type perItOrDescribeTestContext struct {
armComputeClientFactory *armcompute.ClientFactory
armResourcesClientFactory *armresources.ClientFactory
armSubscriptionsClientFactory *armsubscriptions.ClientFactory
armNetworkClientFactory *armnetwork.ClientFactory
graphClient *graphutil.Client

timingMetadata timing.SpecTimingMetadata
Expand Down Expand Up @@ -135,16 +137,6 @@ func (tc *perItOrDescribeTestContext) deleteCreatedResources(ctx context.Context
return
}

hcpClientFactory, err := tc.Get20240610ClientFactory(ctx)
if err != nil {
ginkgo.GinkgoLogr.Error(err, "failed to get HCP client")
return
}
resourceGroupsClientFactory, err := tc.GetARMResourcesClientFactory(ctx)
if err != nil {
ginkgo.GinkgoLogr.Error(err, "failed to get ARM client")
return
}
graphClient, err := tc.GetGraphClient(ctx)
if err != nil {
ginkgo.GinkgoLogr.Error(err, "failed to get Graph client")
Expand All @@ -154,15 +146,15 @@ func (tc *perItOrDescribeTestContext) deleteCreatedResources(ctx context.Context
tc.contextLock.RLock()
resourceGroupNames := tc.knownResourceGroups
appRegistrations := tc.knownAppRegistrationIDs
defer tc.contextLock.RUnlock()
tc.contextLock.RUnlock()
ginkgo.GinkgoLogr.Info("deleting created resources")

opts := CleanupResourceGroupsOptions{
ResourceGroupNames: resourceGroupNames,
Timeout: 60 * time.Minute,
CleanupWorkflow: CleanupWorkflowStandard,
}
errCleanupResourceGroups := tc.CleanupResourceGroups(ctx, hcpClientFactory.NewHcpOpenShiftClustersClient(), resourceGroupsClientFactory.NewResourceGroupsClient(), opts)
errCleanupResourceGroups := tc.CleanupResourceGroups(ctx, opts)
if errCleanupResourceGroups != nil {
ginkgo.GinkgoLogr.Error(errCleanupResourceGroups, "at least one resource group failed to delete")
}
Expand Down Expand Up @@ -219,7 +211,7 @@ type CleanupResourceGroupsOptions struct {
CleanupWorkflow CleanupWorkflow
}

func (tc *perItOrDescribeTestContext) CleanupResourceGroups(ctx context.Context, hcpClient *hcpsdk20240610preview.HcpOpenShiftClustersClient, resourceGroupsClient *armresources.ResourceGroupsClient, opts CleanupResourceGroupsOptions) error {
func (tc *perItOrDescribeTestContext) CleanupResourceGroups(ctx context.Context, opts CleanupResourceGroupsOptions) error {
// deletion takes a while, it's worth it to do this in parallel
wg := sync.WaitGroup{}
errCh := make(chan error, len(opts.ResourceGroupNames))
Expand All @@ -232,11 +224,11 @@ func (tc *perItOrDescribeTestContext) CleanupResourceGroups(ctx context.Context,

switch opts.CleanupWorkflow {
case CleanupWorkflowStandard:
if err := tc.cleanupResourceGroup(ctx, hcpClient, resourceGroupsClient, currResourceGroupName, opts.Timeout); err != nil {
if err := tc.cleanupResourceGroup(ctx, currResourceGroupName, opts.Timeout); err != nil {
errCh <- err
}
case CleanupWorkflowNoRP:
if err := tc.cleanupResourceGroupNoRP(ctx, resourceGroupsClient, currResourceGroupName, opts.Timeout); err != nil {
if err := tc.cleanupResourceGroupNoRP(ctx, currResourceGroupName, opts.Timeout); err != nil {
errCh <- err
}
}
Expand All @@ -255,8 +247,6 @@ func (tc *perItOrDescribeTestContext) CleanupResourceGroups(ctx context.Context,

// collectDebugInfo collects information and saves it in artifact dir
func (tc *perItOrDescribeTestContext) collectDebugInfo(ctx context.Context) {
tc.contextLock.RLock()
defer tc.contextLock.RUnlock()
ginkgo.GinkgoLogr.Info("collecting debug info")

leasedContainers, err := tc.leasedIdentityContainers()
Expand All @@ -267,10 +257,12 @@ func (tc *perItOrDescribeTestContext) collectDebugInfo(ctx context.Context) {

// deletion takes a while, it's worth it to do this in parallel
waitGroup, ctx := errgroup.WithContext(ctx)
tc.contextLock.RLock()
resourceGroups := append(
append([]string(nil), tc.knownResourceGroups...),
leasedContainers...,
)
tc.contextLock.RUnlock()
for _, resourceGroupName := range resourceGroups {
currResourceGroupName := resourceGroupName
waitGroup.Go(func() error {
Expand Down Expand Up @@ -313,9 +305,14 @@ func (tc *perItOrDescribeTestContext) NewResourceGroup(ctx context.Context, reso
return resourceGroup, nil
}

func (tc *perItOrDescribeTestContext) findManagedResourceGroups(ctx context.Context, resourceGroupsClient *armresources.ResourceGroupsClient, ResourceGroupName string) ([]string, error) {
func (tc *perItOrDescribeTestContext) findManagedResourceGroups(ctx context.Context, ResourceGroupName string) ([]string, error) {
managedResourceGroups := []string{}
pager := resourceGroupsClient.NewListPager(nil)
clientFactory, err := tc.GetARMResourcesClientFactory(ctx)
if err != nil {
return nil, err
}

pager := clientFactory.NewResourceGroupsClient().NewListPager(nil)
for pager.More() {
page, err := pager.NextPage(ctx)
if err != nil {
Expand All @@ -342,21 +339,34 @@ func (tc *perItOrDescribeTestContext) findManagedResourceGroups(ctx context.Cont
// 1. delete all HCP clusters and wait for success
// 2. check if any managed resource groups are left behind
// 3. delete the resource group and wait for success
func (tc *perItOrDescribeTestContext) cleanupResourceGroup(ctx context.Context, hcpClient *hcpsdk20240610preview.HcpOpenShiftClustersClient, resourceGroupsClient *armresources.ResourceGroupsClient, resourceGroupName string, timeout time.Duration) error {
func (tc *perItOrDescribeTestContext) cleanupResourceGroup(ctx context.Context, resourceGroupName string, timeout time.Duration) error {
startTime := time.Now()
defer func() {
finishTime := time.Now()
tc.recordTestStepUnlocked(fmt.Sprintf("Clean up resource group %s", resourceGroupName), startTime, finishTime)
tc.RecordTestStep(fmt.Sprintf("Clean up resource group %s", resourceGroupName), startTime, finishTime)
}()

errs := []error{}
resourceClientFactory, err := tc.GetARMResourcesClientFactory(ctx)
if err != nil {
return err
}

networkClientFactory, err := tc.GetARMNetworkClientFactory(ctx)
if err != nil {
return err
}

hcpClientFactory, err := tc.Get20240610ClientFactory(ctx)
if err != nil {
return err
}

ginkgo.GinkgoLogr.Info("deleting all hcp clusters in resource group", "resourceGroup", resourceGroupName)
if err := DeleteAllHCPClusters(ctx, hcpClient, resourceGroupName, timeout); err != nil {
if err := DeleteAllHCPClusters(ctx, hcpClientFactory.NewHcpOpenShiftClustersClient(), resourceGroupName, timeout); err != nil {
return fmt.Errorf("failed to cleanup resource group: %w", err)
}

managedResourceGroups, err := tc.findManagedResourceGroups(ctx, resourceGroupsClient, resourceGroupName)
managedResourceGroups, err := tc.findManagedResourceGroups(ctx, resourceGroupName)
if err != nil {
return fmt.Errorf("failed to search for managed resource groups: %w", err)
}
Expand All @@ -368,11 +378,11 @@ func (tc *perItOrDescribeTestContext) cleanupResourceGroup(ctx context.Context,
}

ginkgo.GinkgoLogr.Info("deleting resource group", "resourceGroup", resourceGroupName)
if err := DeleteResourceGroup(ctx, resourceGroupsClient, resourceGroupName, false, timeout); err != nil {
if err := DeleteResourceGroup(ctx, resourceClientFactory.NewResourceGroupsClient(), networkClientFactory, resourceGroupName, false, timeout); err != nil {
return fmt.Errorf("failed to cleanup resource group: %w", err)
}

return errors.Join(errs...)
return nil
}

// cleanupResourceGroupNoRP performs cleanup when the resource provider is not available.
Expand All @@ -381,7 +391,7 @@ func (tc *perItOrDescribeTestContext) cleanupResourceGroup(ctx context.Context,
// 1. discovers any "managed" resource groups whose ManagedBy references a resource in the parent
// resource group and deletes them (using 'force' to speed up VM/VMSS deletion).
// 2. deletes the parent resource group itself.
func (tc *perItOrDescribeTestContext) cleanupResourceGroupNoRP(ctx context.Context, resourceGroupsClient *armresources.ResourceGroupsClient, resourceGroupName string, timeout time.Duration) error {
func (tc *perItOrDescribeTestContext) cleanupResourceGroupNoRP(ctx context.Context, resourceGroupName string, timeout time.Duration) error {
startTime := time.Now()
defer func() {
finishTime := time.Now()
Expand All @@ -390,14 +400,24 @@ func (tc *perItOrDescribeTestContext) cleanupResourceGroupNoRP(ctx context.Conte

errs := []error{}

managedResourceGroups, err := tc.findManagedResourceGroups(ctx, resourceGroupsClient, resourceGroupName)
managedResourceGroups, err := tc.findManagedResourceGroups(ctx, resourceGroupName)
if err != nil {
return fmt.Errorf("failed to search for managed resource groups: %w", err)
}

resourceClientFactory, err := tc.GetARMResourcesClientFactory(ctx)
if err != nil {
return err
}

networkClientFactory, err := tc.GetARMNetworkClientFactory(ctx)
if err != nil {
return err
}

for _, managedRG := range managedResourceGroups {
ginkgo.GinkgoLogr.Info("deleting managed resource group", "resourceGroup", managedRG, "parentResourceGroup", resourceGroupName)
if err := DeleteResourceGroup(ctx, resourceGroupsClient, managedRG, true, timeout); err != nil {
if err := DeleteResourceGroup(ctx, resourceClientFactory.NewResourceGroupsClient(), networkClientFactory, managedRG, true, timeout); err != nil {
if isIgnorableResourceGroupCleanupError(err) {
ginkgo.GinkgoLogr.Info("ignoring not found resource group", "resourceGroup", managedRG)
} else {
Expand All @@ -407,7 +427,7 @@ func (tc *perItOrDescribeTestContext) cleanupResourceGroupNoRP(ctx context.Conte
}

ginkgo.GinkgoLogr.Info("deleting resource group", "resourceGroup", resourceGroupName)
if err := DeleteResourceGroup(ctx, resourceGroupsClient, resourceGroupName, false, timeout); err != nil {
if err := DeleteResourceGroup(ctx, resourceClientFactory.NewResourceGroupsClient(), networkClientFactory, resourceGroupName, false, timeout); err != nil {
return fmt.Errorf("failed to cleanup resource group: %w", err)
}

Expand All @@ -424,12 +444,12 @@ func (tc *perItOrDescribeTestContext) collectDebugInfoForResourceGroup(ctx conte
startTime := time.Now()
defer func() {
finishTime := time.Now()
tc.recordTestStepUnlocked(fmt.Sprintf("Collect debug info for resource group %s", resourceGroupName), startTime, finishTime)
tc.RecordTestStep(fmt.Sprintf("Collect debug info for resource group %s", resourceGroupName), startTime, finishTime)
}()

errs := []error{}

armResourceClient, err := tc.getARMResourcesClientFactoryUnlocked(ctx)
armResourceClient, err := tc.GetARMResourcesClientFactory(ctx)
if err != nil {
return fmt.Errorf("failed to get ARM resource client: %w", err)
}
Expand Down Expand Up @@ -556,6 +576,44 @@ func (tc *perItOrDescribeTestContext) getARMSubscriptionsClientFactoryUnlocked()
return tc.armSubscriptionsClientFactory, nil
}

func (tc *perItOrDescribeTestContext) GetARMNetworkClientFactory(ctx context.Context) (*armnetwork.ClientFactory, error) {
tc.contextLock.RLock()
if tc.armNetworkClientFactory != nil {
defer tc.contextLock.RUnlock()
return tc.armNetworkClientFactory, nil
}
tc.contextLock.RUnlock()

tc.contextLock.Lock()
defer tc.contextLock.Unlock()

return tc.getARMNetworkClientFactoryUnlocked(ctx)
}

func (tc *perItOrDescribeTestContext) getARMNetworkClientFactoryUnlocked(ctx context.Context) (*armnetwork.ClientFactory, error) {
if tc.armNetworkClientFactory != nil {
return tc.armNetworkClientFactory, nil
}

creds, err := tc.perBinaryInvocationTestContext.getAzureCredentials()
if err != nil {
return nil, err
}

// We already hold the lock, so we call the unlocked version
subscriptionID, err := tc.getSubscriptionIDUnlocked(ctx)
if err != nil {
return nil, err
}

clientFactory, err := armnetwork.NewClientFactory(subscriptionID, creds, tc.perBinaryInvocationTestContext.getClientFactoryOptions())
if err != nil {
return nil, err
}
tc.armNetworkClientFactory = clientFactory
return tc.armNetworkClientFactory, nil
}

func (tc *perItOrDescribeTestContext) GetARMResourcesClientFactory(ctx context.Context) (*armresources.ClientFactory, error) {
tc.contextLock.RLock()
if tc.armResourcesClientFactory != nil {
Expand Down
57 changes: 57 additions & 0 deletions test/util/framework/resourcegroup_helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (

"github.com/Azure/azure-sdk-for-go/sdk/azcore/runtime"
"github.com/Azure/azure-sdk-for-go/sdk/azcore/to"
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v6"
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources"
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armsubscriptions"
)
Expand Down Expand Up @@ -122,6 +123,7 @@ func ListAllExpiredResourceGroups(
func DeleteResourceGroup(
ctx context.Context,
resourceGroupsClient *armresources.ResourceGroupsClient,
networkClientFactory *armnetwork.ClientFactory,
resourceGroupName string,
force bool,
timeout time.Duration,
Expand All @@ -130,6 +132,12 @@ func DeleteResourceGroup(
ctx, cancel := context.WithTimeoutCause(ctx, timeout, fmt.Errorf("timeout '%f' minutes exceeded during DeleteResourceGroup for resource group %s", timeout.Minutes(), resourceGroupName))
defer cancel()

// detach any NSGs from subnets to avoid blocking deletion of the RG
err := detachSubnetNSGs(ctx, networkClientFactory, resourceGroupName)
if err != nil {
return fmt.Errorf("failed to detach NSGs from subnets in resource group %s: %w", resourceGroupName, err)
}

var opts *armresources.ResourceGroupsClientBeginDeleteOptions
if force {
opts = &armresources.ResourceGroupsClientBeginDeleteOptions{
Expand Down Expand Up @@ -164,3 +172,52 @@ func DeleteResourceGroup(

return nil
}

// detach any NSGs from subnets to avoid blocking deletion of the RG
func detachSubnetNSGs(ctx context.Context, networkClientFactory *armnetwork.ClientFactory, resourceGroupName string) error {
vnetClient := networkClientFactory.NewVirtualNetworksClient()
subnetClient := networkClientFactory.NewSubnetsClient()

vnetPager := vnetClient.NewListPager(resourceGroupName, nil)
for vnetPager.More() {
vnetPage, err := vnetPager.NextPage(ctx)
if err != nil {
return fmt.Errorf("failed listing vnets in resource group %s: %w", resourceGroupName, err)
}

for _, vnet := range vnetPage.Value {
if vnet == nil || vnet.Properties == nil || vnet.Properties.Subnets == nil || vnet.Name == nil {
continue
}

subnetsPager := subnetClient.NewListPager(resourceGroupName, *vnet.Name, nil)
for subnetsPager.More() {
subnetPage, err := subnetsPager.NextPage(ctx)
if err != nil {
return fmt.Errorf("failed listing subnets in resource group %s: %w", resourceGroupName, err)
}

for _, subnet := range subnetPage.Value {
if subnet == nil || subnet.Name == nil || subnet.Properties == nil || subnet.Properties.NetworkSecurityGroup == nil || subnet.Properties.NetworkSecurityGroup.ID == nil {
continue
}

subnet.Properties.NetworkSecurityGroup = nil
poller, err := subnetClient.BeginCreateOrUpdate(ctx, resourceGroupName, *vnet.Name, *subnet.Name, *subnet, &armnetwork.SubnetsClientBeginCreateOrUpdateOptions{})
if err != nil {
return fmt.Errorf("failed detaching NSG from subnet %s in resource group %s: %w", *subnet.Name, resourceGroupName, err)
}

_, err = poller.PollUntilDone(ctx, &runtime.PollUntilDoneOptions{
Frequency: StandardPollInterval,
})
if err != nil {
return fmt.Errorf("failed waiting for subnet %s in resource group %s to finish updating: %w", *subnet.Name, resourceGroupName, err)
}
}
}
}
}

return nil
}