From fe5aac7c364bb6c552aab7bfd245a9e851ad7c86 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 21 May 2025 11:41:32 +0200 Subject: [PATCH 001/321] fix: We need these secrets before running actual pipelines --- .../pkg/journey/handle_component.go | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/tests/load-tests/pkg/journey/handle_component.go b/tests/load-tests/pkg/journey/handle_component.go index 8e304a30e2..f7ef2bfcf2 100644 --- a/tests/load-tests/pkg/journey/handle_component.go +++ b/tests/load-tests/pkg/journey/handle_component.go @@ -302,6 +302,20 @@ func HandleComponent(ctx *PerComponentContext) error { return logging.Logger.Fail(60, "Component failed creation: %v", err) } + // Configure imagePullSecrets needed for component build task images + if len(ctx.ParentContext.ParentContext.Opts.PipelineImagePullSecrets) > 0 { + _, err = logging.Measure( + configurePipelineImagePullSecrets, + ctx.Framework, + ctx.ParentContext.ParentContext.Namespace, + ctx.ComponentName, + ctx.ParentContext.ParentContext.Opts.PipelineImagePullSecrets, + ) + if err != nil { + return logging.Logger.Fail(61, "Failed to configure pipeline imagePullSecrets: %v", err) + } + } + var pullIface interface{} pullIface, err = logging.Measure( getPaCPullNumber, @@ -310,14 +324,14 @@ func HandleComponent(ctx *PerComponentContext) error { ctx.ComponentName, ) if err != nil { - return logging.Logger.Fail(61, "Component failed validation: %v", err) + return logging.Logger.Fail(62, "Component failed validation: %v", err) } // Get merge request number var ok bool ctx.MergeRequestNumber, ok = pullIface.(int) if !ok { - return logging.Logger.Fail(62, "Type assertion failed on pull: %+v", pullIface) + return logging.Logger.Fail(63, "Type assertion failed on pull: %+v", pullIface) } // If this is supposed to be a multi-arch build, we do not care about @@ -347,24 +361,10 @@ func HandleComponent(ctx *PerComponentContext) error { placeholders, ) if err != nil { - return logging.Logger.Fail(63, "Repo-templating workflow component cleanup failed: %v", err) + return logging.Logger.Fail(64, "Repo-templating workflow component cleanup failed: %v", err) } } - // Configure imagePullSecrets needed for component build task images - if len(ctx.ParentContext.ParentContext.Opts.PipelineImagePullSecrets) > 0 { - _, err = logging.Measure( - configurePipelineImagePullSecrets, - ctx.Framework, - ctx.ParentContext.ParentContext.Namespace, - ctx.ComponentName, - ctx.ParentContext.ParentContext.Opts.PipelineImagePullSecrets, - ) - if err != nil { - return logging.Logger.Fail(64, "Failed to configure pipeline imagePullSecrets: %v", err) - } - } - return nil } From ee3531252188f8eb624470b210bbf49e15253f47 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 21 May 2025 12:38:31 +0200 Subject: [PATCH 002/321] fix: Passing multiple values need repeats of the option --- tests/load-tests/loadtest.go | 2 +- tests/load-tests/run-max-concurrency.sh | 2 +- tests/load-tests/run-stage-max-concurrency.sh | 2 +- tests/load-tests/run-stage.sh | 2 +- tests/load-tests/run.sh | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/load-tests/loadtest.go b/tests/load-tests/loadtest.go index 4bd40fa5f5..767bf44ab9 100644 --- a/tests/load-tests/loadtest.go +++ b/tests/load-tests/loadtest.go @@ -52,7 +52,7 @@ func init() { rootCmd.Flags().StringVar(&opts.JourneyDuration, "journey-duration", "1h", "repeat user journey until this timeout (either this or --journey-repeats)") rootCmd.Flags().BoolVar(&opts.PipelineMintmakerDisabled, "pipeline-mintmaker-disabled", true, "if you want to stop Mintmaker to be creating update PRs for your component (default in loadtest different from Konflux default)") rootCmd.Flags().BoolVar(&opts.PipelineRepoTemplating, "pipeline-repo-templating", false, "if we should use in repo template pipelines (merge PaC PR, template repo pipelines and ignore custom pipeline run, e.g. required for multi arch test)") - rootCmd.Flags().StringArrayVar(&opts.PipelineImagePullSecrets, "pipeline-image-pull-secrets", []string{}, "space separated secrets needed to pull task images") + rootCmd.Flags().StringArrayVar(&opts.PipelineImagePullSecrets, "pipeline-image-pull-secrets", []string{}, "secret needed to pull task images, can be used multiple times") rootCmd.Flags().StringVarP(&opts.OutputDir, "output-dir", "o", ".", "directory where output files such as load-tests.log or load-tests.json are stored") rootCmd.Flags().StringVar(&opts.BuildPipelineSelectorBundle, "build-pipeline-selector-bundle", "", "BuildPipelineSelector bundle to use when testing with build-definition PR") rootCmd.Flags().BoolVarP(&opts.LogInfo, "log-info", "v", false, "log messages with info level and above") diff --git a/tests/load-tests/run-max-concurrency.sh b/tests/load-tests/run-max-concurrency.sh index 84b9e236af..3c92480e16 100755 --- a/tests/load-tests/run-max-concurrency.sh +++ b/tests/load-tests/run-max-concurrency.sh @@ -52,7 +52,7 @@ load_test() { rm -rvf "$workdir/load-test.log" options="" - [[ -n "${PIPELINE_IMAGE_PULL_SECRETS:-}" ]] && options="$options --pipeline-image-pull-secrets $PIPELINE_IMAGE_PULL_SECRETS" + [[ -n "${PIPELINE_IMAGE_PULL_SECRETS:-}" ]] && for s in $PIPELINE_IMAGE_PULL_SECRETS; do options="$options --pipeline-image-pull-secrets $s"; done date -Ins --utc >started go run loadtest.go \ diff --git a/tests/load-tests/run-stage-max-concurrency.sh b/tests/load-tests/run-stage-max-concurrency.sh index 7d68bd2f16..36cc703e5c 100755 --- a/tests/load-tests/run-stage-max-concurrency.sh +++ b/tests/load-tests/run-stage-max-concurrency.sh @@ -50,7 +50,7 @@ load_test() { rm -rvf "$workdir/load-test.log" options="" - [[ -n "${PIPELINE_IMAGE_PULL_SECRETS:-}" ]] && options="$options --pipeline-image-pull-secrets $PIPELINE_IMAGE_PULL_SECRETS" + [[ -n "${PIPELINE_IMAGE_PULL_SECRETS:-}" ]] && for s in $PIPELINE_IMAGE_PULL_SECRETS; do options="$options --pipeline-image-pull-secrets $s"; done date -Ins --utc >started go run loadtest.go \ diff --git a/tests/load-tests/run-stage.sh b/tests/load-tests/run-stage.sh index 46f114039f..fff517a1d1 100755 --- a/tests/load-tests/run-stage.sh +++ b/tests/load-tests/run-stage.sh @@ -5,7 +5,7 @@ set -o errexit set -o pipefail options="" -[[ -n "${PIPELINE_IMAGE_PULL_SECRETS:-}" ]] && options="$options --pipeline-image-pull-secrets $PIPELINE_IMAGE_PULL_SECRETS" +[[ -n "${PIPELINE_IMAGE_PULL_SECRETS:-}" ]] && for s in $PIPELINE_IMAGE_PULL_SECRETS; do options="$options --pipeline-image-pull-secrets $s"; done date -Ins --utc >started go run loadtest.go \ diff --git a/tests/load-tests/run.sh b/tests/load-tests/run.sh index 6d59021f4f..b4e7fea113 100755 --- a/tests/load-tests/run.sh +++ b/tests/load-tests/run.sh @@ -61,7 +61,7 @@ fi ## Run the actual load test options="" -[[ -n "${PIPELINE_IMAGE_PULL_SECRETS:-}" ]] && options="$options --pipeline-image-pull-secrets $PIPELINE_IMAGE_PULL_SECRETS" +[[ -n "${PIPELINE_IMAGE_PULL_SECRETS:-}" ]] && for s in $PIPELINE_IMAGE_PULL_SECRETS; do options="$options --pipeline-image-pull-secrets $s"; done date -Ins --utc >started go run loadtest.go \ --applications-count "${APPLICATIONS_COUNT:-1}" \ From 658810eec193c44c43242ab6d0984ab4b6881e68 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 21 May 2025 14:07:51 +0200 Subject: [PATCH 003/321] feat: Remove forgotten debug print --- tests/load-tests/pkg/journey/handle_component.go | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/load-tests/pkg/journey/handle_component.go b/tests/load-tests/pkg/journey/handle_component.go index f7ef2bfcf2..ba073a233c 100644 --- a/tests/load-tests/pkg/journey/handle_component.go +++ b/tests/load-tests/pkg/journey/handle_component.go @@ -168,7 +168,6 @@ func configurePipelineImagePullSecrets(f *framework.Framework, namespace, compon component_sa := "build-pipeline-" + component for _, secret := range secrets { - println("-", secret) err := f.AsKubeAdmin.CommonController.LinkSecretToServiceAccount(namespace, secret, component_sa, true) if err != nil { return fmt.Errorf("Unable to add secret %s to service account %s: %v", secret, component_sa, err) From fedafe554d00311482b54a896c18d271ca81866b Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Thu, 22 May 2025 08:05:08 +0200 Subject: [PATCH 004/321] feat: Add basic info about errors to main JSON file --- .../ci-scripts/stage/collect-results.sh | 5 +- tests/load-tests/errors.py | 100 ++++++++++++++++++ 2 files changed, 104 insertions(+), 1 deletion(-) create mode 100755 tests/load-tests/errors.py diff --git a/tests/load-tests/ci-scripts/stage/collect-results.sh b/tests/load-tests/ci-scripts/stage/collect-results.sh index c961b88509..c0286a8e19 100755 --- a/tests/load-tests/ci-scripts/stage/collect-results.sh +++ b/tests/load-tests/ci-scripts/stage/collect-results.sh @@ -39,6 +39,9 @@ python3 -m pip install matplotlib echo "[$(date --utc -Ins)] Create summary JSON with timings" ./evaluate.py "${ARTIFACT_DIR}/load-test-timings.csv" "${ARTIFACT_DIR}/load-test-timings.json" +echo "[$(date --utc -Ins)] Create summary JSON with errors" +./errors.py "${ARTIFACT_DIR}/load-test-errors.csv" "${ARTIFACT_DIR}/load-test-errors.json" + echo "[$(date --utc -Ins)] Counting PRs and TRs" ci-scripts/utility_scripts/count-multiarch-taskruns.py --data-dir "${ARTIFACT_DIR}" >"${ARTIFACT_DIR}/count-multiarch-taskruns.log" @@ -54,7 +57,7 @@ STATUS_DATA_FILE="${ARTIFACT_DIR}/load-test.json" status_data.py \ --status-data-file "${STATUS_DATA_FILE}" \ --set "name=Konflux loadtest" "started=$( cat started )" "ended=$( cat ended )" \ - --set-subtree-json "parameters.options=${ARTIFACT_DIR}/load-test-options.json" "results.measurements=${ARTIFACT_DIR}/load-test-timings.json" "results.durations=${ARTIFACT_DIR}/get-taskruns-durations.json" + --set-subtree-json "parameters.options=${ARTIFACT_DIR}/load-test-options.json" "results.measurements=${ARTIFACT_DIR}/load-test-timings.json" "results.errors=${ARTIFACT_DIR}/load-test-errors.json" "results.durations=${ARTIFACT_DIR}/get-taskruns-durations.json" echo "[$(date --utc -Ins)] Adding monitoring data" mstarted="$( date -d "$( cat started )" --utc -Iseconds )" diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py new file mode 100755 index 0000000000..1822f6ce89 --- /dev/null +++ b/tests/load-tests/errors.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +import csv +import json +import re +import sys +import collections + + +# Column indexes in input data +COLUMN_WHEN = 0 +COLUMN_CODE = 1 +COLUMN_MESSAGE = 2 + +# Errors patterns we recognize (when newlines were removed) +ERRORS = { + "Component creation timed out waiting for image-controller annotations": r"Component failed creation: Unable to create the Component .* timed out when waiting for image-controller annotations to be updated on component", + "Couldnt get pipeline via bundles resolver from quay.io due to 429": r"Message:Error retrieving pipeline for pipelinerun .*bundleresolver.* cannot retrieve the oci image: GET https://quay.io/v2/.*unexpected status code 429 Too Many Requests", + "Couldnt get pipeline via git resolver from gitlab.cee due to 429": r"Message:.*resolver failed to get Pipeline.*error requesting remote resource.*Git.*https://gitlab.cee.redhat.com/.* status code: 429", + "Couldnt get pipeline via http resolver from gitlab.cee": r"Message:.*resolver failed to get Pipeline.*error requesting remote resource.*Http.*https://gitlab.cee.redhat.com/.* is not found", + "Couldnt get task via git resolver from gitlab.cee due to 429": r"Message:.*Couldn't retrieve Task .*resolver type git.*https://gitlab.cee.redhat.com/.* status code: 429", + "Couldnt get task via http resolver from gitlab.cee": r"Message:.*Couldn't retrieve Task .*resolver type http.*error getting.*requested URL .*https://gitlab.cee.redhat.com/.* is not found", + "Failed application creation when calling mapplication.kb.io webhook": r"Application failed creation: Unable to create the Application .*: Internal error occurred: failed calling webhook .*mapplication.kb.io.*: failed to call webhook: Post .*https://application-service-webhook-service.application-service.svc:443/mutate-appstudio-redhat-com-v1alpha1-application.* no endpoints available for service .*application-service-webhook-service", + "Failed Integration test scenario when calling dintegrationtestscenario.kb.io webhook": r"Integration test scenario failed creation: Unable to create the Integration Test Scenario .*: Internal error occurred: failed calling webhook .*dintegrationtestscenario.kb.io.*: failed to call webhook: Post .*https://integration-service-webhook-service.integration-service.svc:443/mutate-appstudio-redhat-com-v1beta2-integrationtestscenario.*: no endpoints available for service .*integration-service-webhook-service", + "Failed to merge MR on CEE GitLab due to 405": r"Repo-templating workflow component cleanup failed: Merging [0-9]+ failed: [Pp][Uu][Tt] .*https://gitlab.cee.redhat.com/api/.*/merge_requests/[0-9]+/merge.*message: 405 Method Not Allowed", + "Failed to merge MR on CEE GitLab due to DNS error": r"Repo-templating workflow component cleanup failed: Merging [0-9]+ failed: [Pp][Uu][Tt] .*https://gitlab.cee.redhat.com/api/.*/merge_requests/[0-9]+/merge.*Temporary failure in name resolution", + "Pipeline failed": r"Message:Tasks Completed: [0-9]+ \(Failed: [1-9]+,", + "Post-test data collection failed": r"Failed to collect pipeline run JSONs", + "Timeout getting pipeline": r"Message:.*resolver failed to get Pipeline.*resolution took longer than global timeout of .*", + "Timeout getting task via git resolver from gitlab.cee": r"Message:.*Couldn't retrieve Task .*resolver type git.*https://gitlab.cee.redhat.com/.* resolution took longer than global timeout of .*", +} + + +def message_to_reason(msg: str) -> str | None: + """ + Classifies an error message using regular expressions and returns the error name. + + Args: + msg: The input error message string. + + Returns: + The name of the error if a pattern matches, otherwise string "UNKNOWN". + """ + msg = msg.replace("\n", " ") # Remove newlines + for error_name, pattern in ERRORS.items(): + if re.search(pattern, msg): + return error_name + print(f"Unknown error: {msg}") + return "UNKNOWN" + + +def main(): + input_file = sys.argv[1] + output_file = sys.argv[2] + + error_messages = [] # list of error messages + error_reasons = [] # list of textual error reasons + error_by_code = collections.defaultdict( + lambda: 0 + ) # key: numeric error code, value: number of such errors + error_by_reason = collections.defaultdict( + lambda: 0 + ) # key: textual error reason, value: number of such errors + + with open(input_file, "r") as fp: + csvreader = csv.reader(fp) + for row in csvreader: + if row == []: + continue + + code = row[COLUMN_CODE] + message = row[COLUMN_MESSAGE] + + reason = message_to_reason(message) + + error_messages.append(message) + error_reasons.append(reason) + error_by_code[code] += 1 + error_by_reason[reason] += 1 + + data = { + "error_by_code": error_by_code, + "error_by_reason": error_by_reason, + "error_reasons_simple": "; ".join(error_reasons), + "error_messages": error_messages, + } + + print(f"Errors detected: {len(error_messages)}") + print("Errors by reason:") + for k, v in error_by_reason.items(): + print(f" {v} x {k}") + + with open(output_file, "w") as fp: + json.dump(data, fp, indent=4) + print(f"Data dumped to {output_file}") + + +if __name__ == "__main__": + sys.exit(main()) From 72a9d0e274fd51cd9b325da9250d379030fb97bd Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Thu, 22 May 2025 08:45:01 +0200 Subject: [PATCH 005/321] fix: When there is no error file, do not fail --- tests/load-tests/errors.py | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 1822f6ce89..d1d4414f7f 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -63,21 +63,24 @@ def main(): lambda: 0 ) # key: textual error reason, value: number of such errors - with open(input_file, "r") as fp: - csvreader = csv.reader(fp) - for row in csvreader: - if row == []: - continue - - code = row[COLUMN_CODE] - message = row[COLUMN_MESSAGE] - - reason = message_to_reason(message) - - error_messages.append(message) - error_reasons.append(reason) - error_by_code[code] += 1 - error_by_reason[reason] += 1 + try: + with open(input_file, "r") as fp: + csvreader = csv.reader(fp) + for row in csvreader: + if row == []: + continue + + code = row[COLUMN_CODE] + message = row[COLUMN_MESSAGE] + + reason = message_to_reason(message) + + error_messages.append(message) + error_reasons.append(reason) + error_by_code[code] += 1 + error_by_reason[reason] += 1 + except FileNotFoundError: + print("No errors file found, good :-D") data = { "error_by_code": error_by_code, From 5a9d5070b2a9184959eb03921fa95224d471c979 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Thu, 22 May 2025 10:41:36 +0200 Subject: [PATCH 006/321] feat: Recognize new error from rpm build pipeline about image pull secrets --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index d1d4414f7f..745dc23271 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -23,6 +23,7 @@ "Couldnt get task via http resolver from gitlab.cee": r"Message:.*Couldn't retrieve Task .*resolver type http.*error getting.*requested URL .*https://gitlab.cee.redhat.com/.* is not found", "Failed application creation when calling mapplication.kb.io webhook": r"Application failed creation: Unable to create the Application .*: Internal error occurred: failed calling webhook .*mapplication.kb.io.*: failed to call webhook: Post .*https://application-service-webhook-service.application-service.svc:443/mutate-appstudio-redhat-com-v1alpha1-application.* no endpoints available for service .*application-service-webhook-service", "Failed Integration test scenario when calling dintegrationtestscenario.kb.io webhook": r"Integration test scenario failed creation: Unable to create the Integration Test Scenario .*: Internal error occurred: failed calling webhook .*dintegrationtestscenario.kb.io.*: failed to call webhook: Post .*https://integration-service-webhook-service.integration-service.svc:443/mutate-appstudio-redhat-com-v1beta2-integrationtestscenario.*: no endpoints available for service .*integration-service-webhook-service", + "Failed to link pipeline image pull secret to build service account": r"Failed to configure pipeline imagePullSecrets: Unable to add secret .* to service account .*: serviceaccounts .* not found", "Failed to merge MR on CEE GitLab due to 405": r"Repo-templating workflow component cleanup failed: Merging [0-9]+ failed: [Pp][Uu][Tt] .*https://gitlab.cee.redhat.com/api/.*/merge_requests/[0-9]+/merge.*message: 405 Method Not Allowed", "Failed to merge MR on CEE GitLab due to DNS error": r"Repo-templating workflow component cleanup failed: Merging [0-9]+ failed: [Pp][Uu][Tt] .*https://gitlab.cee.redhat.com/api/.*/merge_requests/[0-9]+/merge.*Temporary failure in name resolution", "Pipeline failed": r"Message:Tasks Completed: [0-9]+ \(Failed: [1-9]+,", From 32093928e52c8f38a50d7acd4b81341944991ad7 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Mon, 29 Jul 2024 08:08:12 +0200 Subject: [PATCH 007/321] fix: Retry forking as it sometimes hangs, observed for runs with 100+ forks (cherry picked from commit ef92203b3e0c18fbdc9899f52e0bb3f7265ff76a) --- .../load-tests/pkg/journey/handle_repo_templating.go | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tests/load-tests/pkg/journey/handle_repo_templating.go b/tests/load-tests/pkg/journey/handle_repo_templating.go index 967e42164c..ea5adf5ad9 100644 --- a/tests/load-tests/pkg/journey/handle_repo_templating.go +++ b/tests/load-tests/pkg/journey/handle_repo_templating.go @@ -3,11 +3,13 @@ package journey import "fmt" import "strings" import "regexp" +import "time" import logging "github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/logging" import framework "github.com/konflux-ci/e2e-tests/pkg/framework" import github "github.com/google/go-github/v44/github" +import utils "github.com/konflux-ci/e2e-tests/pkg/utils" var fileList = []string{"COMPONENT-pull-request.yaml", "COMPONENT-push.yaml"} @@ -123,7 +125,14 @@ func ForkRepo(f *framework.Framework, repoUrl, repoRevision, username string) (s } // Create fork and make sure it appears - forkRepo, err = f.AsKubeAdmin.CommonController.Github.ForkRepository(sourceName, targetName) + err = utils.WaitUntilWithInterval(func() (done bool, err error) { + forkRepo, err = f.AsKubeAdmin.CommonController.Github.ForkRepository(sourceName, targetName) + if err != nil { + logging.Logger.Debug("Repo forking failed, trying again: %v", err) + return false, nil + } + return true, nil + }, time.Second * 20, time.Minute * 60) if err != nil { return "", err } From ca9465bf8e4a34e4ae4aa631efefe75f2e108bb1 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Mon, 11 Nov 2024 14:33:10 +0100 Subject: [PATCH 008/321] feat: Forks should be quick (cherry picked from commit ec8420ccce6376c79983d83b49e62a261eead40d) --- pkg/clients/github/repositories.go | 2 +- tests/load-tests/pkg/journey/handle_repo_templating.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/clients/github/repositories.go b/pkg/clients/github/repositories.go index 9c650eb294..98f525270c 100644 --- a/pkg/clients/github/repositories.go +++ b/pkg/clients/github/repositories.go @@ -203,7 +203,7 @@ func (g *Github) ForkRepository(sourceName, targetName string) (*github.Reposito return false, fmt.Errorf("Error forking %s/%s: %v", g.organization, sourceName, err) } return true, nil - }, time.Second * 10, time.Minute * 30) + }, time.Second * 2, time.Minute * 5) if err1 != nil { return nil, fmt.Errorf("Failed waiting for fork %s/%s: %v", g.organization, sourceName, err1) } diff --git a/tests/load-tests/pkg/journey/handle_repo_templating.go b/tests/load-tests/pkg/journey/handle_repo_templating.go index ea5adf5ad9..99b5e663be 100644 --- a/tests/load-tests/pkg/journey/handle_repo_templating.go +++ b/tests/load-tests/pkg/journey/handle_repo_templating.go @@ -132,7 +132,7 @@ func ForkRepo(f *framework.Framework, repoUrl, repoRevision, username string) (s return false, nil } return true, nil - }, time.Second * 20, time.Minute * 60) + }, time.Second * 20, time.Minute * 10) if err != nil { return "", err } From 5712909f07159853f1213ea51f03e8d39f396bec Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 23 May 2025 09:36:21 +0200 Subject: [PATCH 009/321] feat: New error noticed in automation about bundles resolver gerring 429 from quay.io, 2 times in last 24 hours --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 745dc23271..ecb66b5414 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -19,6 +19,7 @@ "Couldnt get pipeline via bundles resolver from quay.io due to 429": r"Message:Error retrieving pipeline for pipelinerun .*bundleresolver.* cannot retrieve the oci image: GET https://quay.io/v2/.*unexpected status code 429 Too Many Requests", "Couldnt get pipeline via git resolver from gitlab.cee due to 429": r"Message:.*resolver failed to get Pipeline.*error requesting remote resource.*Git.*https://gitlab.cee.redhat.com/.* status code: 429", "Couldnt get pipeline via http resolver from gitlab.cee": r"Message:.*resolver failed to get Pipeline.*error requesting remote resource.*Http.*https://gitlab.cee.redhat.com/.* is not found", + "Couldnt get task via buldles resolver from quay.io due to 429": r"Message:.*Couldn't retrieve Task .*resolver type bundles.*https://quay.io/.* status code 429 Too Many Requests", "Couldnt get task via git resolver from gitlab.cee due to 429": r"Message:.*Couldn't retrieve Task .*resolver type git.*https://gitlab.cee.redhat.com/.* status code: 429", "Couldnt get task via http resolver from gitlab.cee": r"Message:.*Couldn't retrieve Task .*resolver type http.*error getting.*requested URL .*https://gitlab.cee.redhat.com/.* is not found", "Failed application creation when calling mapplication.kb.io webhook": r"Application failed creation: Unable to create the Application .*: Internal error occurred: failed calling webhook .*mapplication.kb.io.*: failed to call webhook: Post .*https://application-service-webhook-service.application-service.svc:443/mutate-appstudio-redhat-com-v1alpha1-application.* no endpoints available for service .*application-service-webhook-service", From e04aed8e6477e4f4ce8177ee4101822ecf08f735 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 23 May 2025 10:52:55 +0200 Subject: [PATCH 010/321] feat: Also show numbers, thanks for the idea pmacik --- tests/load-tests/errors.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index ecb66b5414..ddbd055bff 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -57,7 +57,6 @@ def main(): output_file = sys.argv[2] error_messages = [] # list of error messages - error_reasons = [] # list of textual error reasons error_by_code = collections.defaultdict( lambda: 0 ) # key: numeric error code, value: number of such errors @@ -78,7 +77,6 @@ def main(): reason = message_to_reason(message) error_messages.append(message) - error_reasons.append(reason) error_by_code[code] += 1 error_by_reason[reason] += 1 except FileNotFoundError: @@ -87,14 +85,14 @@ def main(): data = { "error_by_code": error_by_code, "error_by_reason": error_by_reason, - "error_reasons_simple": "; ".join(error_reasons), + "error_reasons_simple": "; ".join([f"{v}x {k}" for k, v in error_by_reason.items()]), "error_messages": error_messages, } print(f"Errors detected: {len(error_messages)}") print("Errors by reason:") for k, v in error_by_reason.items(): - print(f" {v} x {k}") + print(f" {v}x {k}") with open(output_file, "w") as fp: json.dump(data, fp, indent=4) From 42646d64db61dfaa8cc0c2e767ff9b2b56c135ea Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Mon, 26 May 2025 12:00:24 +0200 Subject: [PATCH 011/321] feat: List of labels currently managed by OPL shovel.py (with automatically generated names and such) --- .../ci-scripts/config/horreum-labels.sh | 153 ++++++++++++++++++ 1 file changed, 153 insertions(+) create mode 100755 tests/load-tests/ci-scripts/config/horreum-labels.sh diff --git a/tests/load-tests/ci-scripts/config/horreum-labels.sh b/tests/load-tests/ci-scripts/config/horreum-labels.sh new file mode 100755 index 0000000000..263ad54dea --- /dev/null +++ b/tests/load-tests/ci-scripts/config/horreum-labels.sh @@ -0,0 +1,153 @@ +#!/bin/bash + +set -eu -o pipefail + +# Here we are using 'shovel.py' utility from OPL: +# +# https://github.com/redhat-performance/opl/ +# +# Example of some commands are: +# +# shovel.py horreum --base-url https://horreum.corp.redhat.com/ --api-token "$HORREUM_API_TOKEN" schema-label-add --schema-uri "urn:rhtap-perf-team-load-test:1.0" --extractor-jsonpath "\$.xyz" --metrics --owner hybrid-cloud-experience-perfscale-team +# +# shovel.py horreum --base-url https://horreum.corp.redhat.com/ --api-token "$HORREUM_API_TOKEN" schema-label-list --schema-uri "urn:rhtap-perf-team-load-test:1.0" | grep xyz +# +# shovel.py horreum --base-url https://horreum.corp.redhat.com/ --api-token "$HORREUM_API_TOKEN" schema-label-add --schema-uri "urn:rhtap-perf-team-load-test:1.0" --extractor-jsonpath "\$.xyz" --metrics --owner hybrid-cloud-experience-perfscale-team --name something --update-by-id 999999 +# +# shovel.py horreum --base-url https://horreum.corp.redhat.com/ --api-token "$HORREUM_API_TOKEN" schema-label-delete --schema-uri "urn:rhtap-perf-team-load-test:1.0" --id 999999 +# +# But here we are using just one that updates (or adds if label with the name is missing) labels for given extractor JSON path expressions: + +function horreum_schema_label_present() { + local extractor="$1" + shovel.py \ + --verbose \ + horreum \ + --base-url https://horreum.corp.redhat.com/ \ + --api-token "$HORREUM_API_TOKEN" \ + schema-label-update \ + --schema-uri "urn:rhtap-perf-team-load-test:1.0" \ + --metrics \ + --owner hybrid-cloud-experience-perfscale-team \ + --update-by-name \ + --add-if-missing \ + --extractor-jsonpath "${extractor}" +} + +horreum_schema_label_present '$.results.durations.stats.taskruns."build/apply-tags".passed.duration.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/apply-tags".passed.duration.samples' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/apply-tags".passed.idle.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/apply-tags".passed.running.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/apply-tags".passed.scheduled.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/buildah".passed.duration.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/buildah".passed.duration.samples' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/buildah".passed.idle.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/buildah".passed.running.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/buildah".passed.scheduled.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/build-image-index".passed.duration.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/build-image-index".passed.duration.samples' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/build-image-index".passed.idle.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/build-image-index".passed.running.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/build-image-index".passed.scheduled.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/clair-scan".passed.duration.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/clair-scan".passed.duration.samples' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/clair-scan".passed.idle.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/clair-scan".passed.running.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/clair-scan".passed.scheduled.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/clamav-scan".passed.duration.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/clamav-scan".passed.duration.samples' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/clamav-scan".passed.idle.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/clamav-scan".passed.running.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/clamav-scan".passed.scheduled.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/coverity-availability-check".passed.duration.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/coverity-availability-check".passed.duration.samples' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/coverity-availability-check".passed.idle.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/coverity-availability-check".passed.running.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/coverity-availability-check".passed.scheduled.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/deprecated-image-check".passed.duration.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/deprecated-image-check".passed.duration.samples' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/deprecated-image-check".passed.idle.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/deprecated-image-check".passed.running.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/deprecated-image-check".passed.scheduled.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/ecosystem-cert-preflight-checks".passed.duration.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/ecosystem-cert-preflight-checks".passed.duration.samples' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/ecosystem-cert-preflight-checks".passed.idle.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/ecosystem-cert-preflight-checks".passed.running.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/ecosystem-cert-preflight-checks".passed.scheduled.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/git-clone".passed.duration.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/git-clone".passed.duration.samples' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/git-clone".passed.idle.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/git-clone".passed.running.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/git-clone".passed.scheduled.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/init".passed.duration.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/init".passed.duration.samples' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/init".passed.idle.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/init".passed.running.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/init".passed.scheduled.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/push-dockerfile".passed.duration.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/push-dockerfile".passed.duration.samples' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/push-dockerfile".passed.idle.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/push-dockerfile".passed.running.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/push-dockerfile".passed.scheduled.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/rpms-signature-scan".passed.duration.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/rpms-signature-scan".passed.duration.samples' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/rpms-signature-scan".passed.idle.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/rpms-signature-scan".passed.running.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/rpms-signature-scan".passed.scheduled.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/sast-shell-check".passed.duration.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/sast-shell-check".passed.duration.samples' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/sast-shell-check".passed.idle.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/sast-shell-check".passed.running.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/sast-shell-check".passed.scheduled.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/sast-snyk-check".passed.duration.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/sast-snyk-check".passed.duration.samples' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/sast-snyk-check".passed.idle.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/sast-snyk-check".passed.running.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/sast-snyk-check".passed.scheduled.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/sast-unicode-check".passed.duration.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/sast-unicode-check".passed.duration.samples' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/sast-unicode-check".passed.idle.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/sast-unicode-check".passed.running.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/sast-unicode-check".passed.scheduled.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/show-sbom".passed.duration.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/show-sbom".passed.duration.samples' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/show-sbom".passed.idle.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/show-sbom".passed.running.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/show-sbom".passed.scheduled.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/summary".passed.duration.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/summary".passed.duration.samples' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/summary".passed.idle.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/summary".passed.running.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/summary".passed.scheduled.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."test/test-output".passed.duration.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."test/test-output".passed.duration.samples' +horreum_schema_label_present '$.results.durations.stats.taskruns."test/test-output".passed.idle.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."test/test-output".passed.running.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."test/test-output".passed.scheduled.mean' +horreum_schema_label_present '$.results.errors.error_reasons_simple' +horreum_schema_label_present '$.results.measurements.createApplication.error_rate' +horreum_schema_label_present '$.results.measurements.createApplication.pass.duration.mean' +horreum_schema_label_present '$.results.measurements.createComponent.error_rate' +horreum_schema_label_present '$.results.measurements.createComponent.pass.duration.mean' +horreum_schema_label_present '$.results.measurements.createIntegrationTestScenario.error_rate' +horreum_schema_label_present '$.results.measurements.createIntegrationTestScenario.pass.duration.mean' +horreum_schema_label_present '$.results.measurements.HandleUser.error_rate' +horreum_schema_label_present '$.results.measurements.HandleUser.pass.duration.mean' +horreum_schema_label_present '$.results.measurements.KPI.errors' +horreum_schema_label_present '$.results.measurements.KPI.mean' +horreum_schema_label_present '$.results.measurements.validateApplication.error_rate' +horreum_schema_label_present '$.results.measurements.validateApplication.pass.duration.mean' +horreum_schema_label_present '$.results.measurements.validateIntegrationTestScenario.error_rate' +horreum_schema_label_present '$.results.measurements.validateIntegrationTestScenario.pass.duration.mean' +horreum_schema_label_present '$.results.measurements.validatePipelineRunCondition.error_rate' +horreum_schema_label_present '$.results.measurements.validatePipelineRunCondition.pass.duration.mean' +horreum_schema_label_present '$.results.measurements.validatePipelineRunCreation.error_rate' +horreum_schema_label_present '$.results.measurements.validatePipelineRunCreation.pass.duration.mean' +horreum_schema_label_present '$.results.measurements.validatePipelineRunSignature.error_rate' +horreum_schema_label_present '$.results.measurements.validatePipelineRunSignature.pass.duration.mean' +horreum_schema_label_present '$.results.measurements.validateSnapshotCreation.error_rate' +horreum_schema_label_present '$.results.measurements.validateSnapshotCreation.pass.duration.mean' +horreum_schema_label_present '$.results.measurements.validateTestPipelineRunCondition.error_rate' +horreum_schema_label_present '$.results.measurements.validateTestPipelineRunCondition.pass.duration.mean' +horreum_schema_label_present '$.results.measurements.validateTestPipelineRunCreation.error_rate' +horreum_schema_label_present '$.results.measurements.validateTestPipelineRunCreation.pass.duration.mean' From 3f8b5ebddff539c9f9c2b7c64a0ff3ac05dfb436 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Mon, 26 May 2025 15:56:30 +0200 Subject: [PATCH 012/321] feat(KONFLUX-8332): Add tasks for RPM build pipeline --- .../ci-scripts/config/horreum-labels.sh | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/tests/load-tests/ci-scripts/config/horreum-labels.sh b/tests/load-tests/ci-scripts/config/horreum-labels.sh index 263ad54dea..dbfd59d169 100755 --- a/tests/load-tests/ci-scripts/config/horreum-labels.sh +++ b/tests/load-tests/ci-scripts/config/horreum-labels.sh @@ -49,6 +49,16 @@ horreum_schema_label_present '$.results.durations.stats.taskruns."build/build-im horreum_schema_label_present '$.results.durations.stats.taskruns."build/build-image-index".passed.idle.mean' horreum_schema_label_present '$.results.durations.stats.taskruns."build/build-image-index".passed.running.mean' horreum_schema_label_present '$.results.durations.stats.taskruns."build/build-image-index".passed.scheduled.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/calculate-deps".passed.duration.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/calculate-deps".passed.duration.samples' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/calculate-deps".passed.idle.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/calculate-deps".passed.running.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/calculate-deps".passed.scheduled.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/check-noarch".passed.duration.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/check-noarch".passed.duration.samples' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/check-noarch".passed.idle.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/check-noarch".passed.running.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/check-noarch".passed.scheduled.mean' horreum_schema_label_present '$.results.durations.stats.taskruns."build/clair-scan".passed.duration.mean' horreum_schema_label_present '$.results.durations.stats.taskruns."build/clair-scan".passed.duration.samples' horreum_schema_label_present '$.results.durations.stats.taskruns."build/clair-scan".passed.idle.mean' @@ -74,11 +84,26 @@ horreum_schema_label_present '$.results.durations.stats.taskruns."build/ecosyste horreum_schema_label_present '$.results.durations.stats.taskruns."build/ecosystem-cert-preflight-checks".passed.idle.mean' horreum_schema_label_present '$.results.durations.stats.taskruns."build/ecosystem-cert-preflight-checks".passed.running.mean' horreum_schema_label_present '$.results.durations.stats.taskruns."build/ecosystem-cert-preflight-checks".passed.scheduled.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/get-rpm-sources".passed.duration.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/get-rpm-sources".passed.duration.samples' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/get-rpm-sources".passed.idle.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/get-rpm-sources".passed.running.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/get-rpm-sources".passed.scheduled.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/git-clone-oci-ta".passed.duration.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/git-clone-oci-ta".passed.duration.samples' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/git-clone-oci-ta".passed.idle.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/git-clone-oci-ta".passed.running.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/git-clone-oci-ta".passed.scheduled.mean' horreum_schema_label_present '$.results.durations.stats.taskruns."build/git-clone".passed.duration.mean' horreum_schema_label_present '$.results.durations.stats.taskruns."build/git-clone".passed.duration.samples' horreum_schema_label_present '$.results.durations.stats.taskruns."build/git-clone".passed.idle.mean' horreum_schema_label_present '$.results.durations.stats.taskruns."build/git-clone".passed.running.mean' horreum_schema_label_present '$.results.durations.stats.taskruns."build/git-clone".passed.scheduled.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/import-to-quay".passed.duration.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/import-to-quay".passed.duration.samples' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/import-to-quay".passed.idle.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/import-to-quay".passed.running.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/import-to-quay".passed.scheduled.mean' horreum_schema_label_present '$.results.durations.stats.taskruns."build/init".passed.duration.mean' horreum_schema_label_present '$.results.durations.stats.taskruns."build/init".passed.duration.samples' horreum_schema_label_present '$.results.durations.stats.taskruns."build/init".passed.idle.mean' @@ -89,6 +114,11 @@ horreum_schema_label_present '$.results.durations.stats.taskruns."build/push-doc horreum_schema_label_present '$.results.durations.stats.taskruns."build/push-dockerfile".passed.idle.mean' horreum_schema_label_present '$.results.durations.stats.taskruns."build/push-dockerfile".passed.running.mean' horreum_schema_label_present '$.results.durations.stats.taskruns."build/push-dockerfile".passed.scheduled.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/rpmbuild".passed.duration.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/rpmbuild".passed.duration.samples' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/rpmbuild".passed.idle.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/rpmbuild".passed.running.mean' +horreum_schema_label_present '$.results.durations.stats.taskruns."build/rpmbuild".passed.scheduled.mean' horreum_schema_label_present '$.results.durations.stats.taskruns."build/rpms-signature-scan".passed.duration.mean' horreum_schema_label_present '$.results.durations.stats.taskruns."build/rpms-signature-scan".passed.duration.samples' horreum_schema_label_present '$.results.durations.stats.taskruns."build/rpms-signature-scan".passed.idle.mean' From e822ffabfa8956fa79d9df8542c1a91b770327a1 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Tue, 27 May 2025 06:29:23 +0200 Subject: [PATCH 013/321] feat(KONFLUX-8434): Do not wait thi long for snapshot and ITS --- tests/load-tests/pkg/journey/handle_test_run.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/load-tests/pkg/journey/handle_test_run.go b/tests/load-tests/pkg/journey/handle_test_run.go index 1b5d71f87f..ffa59cc1f5 100644 --- a/tests/load-tests/pkg/journey/handle_test_run.go +++ b/tests/load-tests/pkg/journey/handle_test_run.go @@ -13,7 +13,7 @@ import pipeline "github.com/tektoncd/pipeline/pkg/apis/pipeline/v1" func validateSnapshotCreation(f *framework.Framework, namespace, compName string) (string, error) { interval := time.Second * 20 - timeout := time.Minute * 30 + timeout := time.Minute * 5 var snap *appstudioApi.Snapshot // TODO It would be much better to watch this resource for a condition @@ -31,7 +31,7 @@ func validateSnapshotCreation(f *framework.Framework, namespace, compName string func validateTestPipelineRunCreation(f *framework.Framework, namespace, itsName, snapName string) error { interval := time.Second * 20 - timeout := time.Minute * 30 + timeout := time.Minute * 5 // TODO It would be much better to watch this resource for a condition err := utils.WaitUntilWithInterval(func() (done bool, err error) { @@ -48,7 +48,7 @@ func validateTestPipelineRunCreation(f *framework.Framework, namespace, itsName, func validateTestPipelineRunCondition(f *framework.Framework, namespace, itsName, snapName string) error { interval := time.Second * 20 - timeout := time.Minute * 60 + timeout := time.Minute * 10 var pr *pipeline.PipelineRun // TODO It would be much better to watch this resource for a condition From b3050371bee036c2b547169e400b664bea672c30 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Tue, 27 May 2025 10:11:50 +0200 Subject: [PATCH 014/321] feat: Adding error about failed creating ITS --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index ddbd055bff..32f18cc9ef 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -23,6 +23,7 @@ "Couldnt get task via git resolver from gitlab.cee due to 429": r"Message:.*Couldn't retrieve Task .*resolver type git.*https://gitlab.cee.redhat.com/.* status code: 429", "Couldnt get task via http resolver from gitlab.cee": r"Message:.*Couldn't retrieve Task .*resolver type http.*error getting.*requested URL .*https://gitlab.cee.redhat.com/.* is not found", "Failed application creation when calling mapplication.kb.io webhook": r"Application failed creation: Unable to create the Application .*: Internal error occurred: failed calling webhook .*mapplication.kb.io.*: failed to call webhook: Post .*https://application-service-webhook-service.application-service.svc:443/mutate-appstudio-redhat-com-v1alpha1-application.* no endpoints available for service .*application-service-webhook-service", + "Failed creating integration test scenario because it already exists": r"Integration test scenario failed creation: Unable to create the Integration Test Scenario .* integrationtestscenarios.appstudio.redhat.com .* already exists", "Failed Integration test scenario when calling dintegrationtestscenario.kb.io webhook": r"Integration test scenario failed creation: Unable to create the Integration Test Scenario .*: Internal error occurred: failed calling webhook .*dintegrationtestscenario.kb.io.*: failed to call webhook: Post .*https://integration-service-webhook-service.integration-service.svc:443/mutate-appstudio-redhat-com-v1beta2-integrationtestscenario.*: no endpoints available for service .*integration-service-webhook-service", "Failed to link pipeline image pull secret to build service account": r"Failed to configure pipeline imagePullSecrets: Unable to add secret .* to service account .*: serviceaccounts .* not found", "Failed to merge MR on CEE GitLab due to 405": r"Repo-templating workflow component cleanup failed: Merging [0-9]+ failed: [Pp][Uu][Tt] .*https://gitlab.cee.redhat.com/api/.*/merge_requests/[0-9]+/merge.*message: 405 Method Not Allowed", From 02e0ac578a8f436c1bef4dd50cde9634066b6139 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Tue, 27 May 2025 12:52:25 +0200 Subject: [PATCH 015/321] refactor: Make evaluate script bit more readable, fixing one edge case where on missing metric it would say KPI errors = 0 --- tests/load-tests/evaluate.py | 42 +++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/tests/load-tests/evaluate.py b/tests/load-tests/evaluate.py index 03d3f1d977..61ef8c4a5e 100755 --- a/tests/load-tests/evaluate.py +++ b/tests/load-tests/evaluate.py @@ -51,13 +51,6 @@ def count_stats(data): return { "samples": 0, } - elif len(data) == 1: - return { - "samples": 1, - "min": data[0], - "mean": data[0], - "max": data[0], - } else: return { "samples": len(data), @@ -113,10 +106,11 @@ def main(): stats_raw[m]["fail" if error else "pass"]["duration"].append(duration) stats_raw[m]["fail" if error else "pass"]["when"].append(when) - # print(f"Raw stats: {stats_raw}") + #print("Raw stats:") + #print(json.dumps(stats_raw, indent=4, default=lambda o: '<' + str(o) + '>')) stats = {} - kpi_sum = 0.0 + kpi_mean = 0.0 kpi_errors = 0 for m in METRICS: @@ -126,26 +120,34 @@ def main(): stats[m]["pass"]["when"] = count_stats_when(stats_raw[m]["pass"]["when"]) stats[m]["fail"]["when"] = count_stats_when(stats_raw[m]["fail"]["when"]) - if stats[m]["pass"]["duration"]["samples"] == 0: + if kpi_mean != -1: # If we had 0 measurements in some metric, that means not a single - # build made it through all steps, so kpi_sum metric does not make - # sense as it would only cover part of the journey - kpi_sum = -1 + # build made it through this step, so kpi_mean metric does not make + # sense as it would not cover this part of the journey + if stats[m]["pass"]["duration"]["samples"] == 0: + kpi_mean = -1 + else: + kpi_mean += stats[m]["pass"]["duration"]["mean"] + + if stats[m]["pass"]["duration"]["samples"] == 0: + if kpi_errors == 0: + kpi_errors += 1 else: - if kpi_sum != -1: - kpi_sum += stats[m]["pass"]["duration"]["mean"] + kpi_errors += stats[m]["fail"]["duration"]["samples"] - s = stats[m]["pass"]["duration"]["samples"] + stats[m]["fail"]["duration"]["samples"] - if s == 0: + runs = stats[m]["pass"]["duration"]["samples"] + stats[m]["fail"]["duration"]["samples"] + if runs == 0: stats[m]["error_rate"] = None else: - stats[m]["error_rate"] = stats[m]["fail"]["duration"]["samples"] / s - kpi_errors += stats[m]["fail"]["duration"]["samples"] + stats[m]["error_rate"] = stats[m]["fail"]["duration"]["samples"] / runs stats["KPI"] = {} - stats["KPI"]["mean"] = kpi_sum + stats["KPI"]["mean"] = kpi_mean stats["KPI"]["errors"] = kpi_errors + #print("Final stats:") + #print(json.dumps(stats, indent=4)) + print(f"KPI mean: {stats['KPI']['mean']}") print(f"KPI errors: {stats['KPI']['errors']}") From 1ac8fa69d564a349ce7fff8a4c8bf95d962c3216 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Tue, 27 May 2025 14:45:56 +0200 Subject: [PATCH 016/321] feat(KONFLUX-8436): Also track taskrun data per MPC architecture/platform --- .../utility_scripts/get-taskruns-durations.py | 34 ++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/tests/load-tests/ci-scripts/utility_scripts/get-taskruns-durations.py b/tests/load-tests/ci-scripts/utility_scripts/get-taskruns-durations.py index b7a7eeb40c..e687e6d394 100755 --- a/tests/load-tests/ci-scripts/utility_scripts/get-taskruns-durations.py +++ b/tests/load-tests/ci-scripts/utility_scripts/get-taskruns-durations.py @@ -153,6 +153,12 @@ def _populate_taskrun(self, tr): assert len(_tr_succeeded) == 1, f"TaskRun should have exactly one 'Succeeded' condition: {_tr_succeeded}" tr_result = _tr_succeeded[0]["status"] == "True" + tr_platform = None + if "params" in tr["spec"]: + for p in tr["spec"]["params"]: + if p["name"] == "PLATFORM": + tr_platform = p["value"] + tr_steps = {} for s in tr["status"]["steps"]: try: @@ -184,6 +190,7 @@ def _populate_taskrun(self, tr): "creation": tr_creation_time, "start": tr_start_time, "completion": tr_completion_time, + "platform": tr_platform, "steps": tr_steps, }) @@ -288,6 +295,7 @@ def doit(self): "creation": tr["creation"], "start": tr["start"], "completion": tr["completion"], + "platform": tr["platform"], "steps": tr["steps"], } @@ -305,6 +313,8 @@ def doit(self): }, "taskruns": { }, + "platformtaskruns": { + }, "steps": { }, } @@ -346,6 +356,7 @@ def doit(self): for tr_name, tr_data in pr_data["taskruns"].items(): tr_id = f"{pr_id}/{tr_data['task']}" + ptr_id = f"{pr_id}/{tr_data['task']}-{tr_data['platform']}" logging.debug(f"Working on TaskRun {tr_id}") if tr_id not in result["taskruns"]: @@ -380,6 +391,27 @@ def doit(self): result["taskruns"][tr_id][tr_result]["scheduled"].append(tr_scheduled) result["taskruns"][tr_id][tr_result]["idle"].append(tr_idle) + if tr_data['platform'] is not None: + if ptr_id not in result["platformtaskruns"]: + result["platformtaskruns"][ptr_id] = { + "passed": { + "duration": [], + "running": [], + "scheduled": [], + "idle": [], + }, + "failed": { + "duration": [], + "running": [], + "scheduled": [], + "idle": [], + }, + } + result["platformtaskruns"][ptr_id][tr_result]["duration"].append(tr_duration) + result["platformtaskruns"][ptr_id][tr_result]["running"].append(tr_running) + result["platformtaskruns"][ptr_id][tr_result]["scheduled"].append(tr_scheduled) + result["platformtaskruns"][ptr_id][tr_result]["idle"].append(tr_idle) + for s_name, s_data in tr_data["steps"].items(): s_id = f"{tr_id}/{s_name}" logging.debug(f"Working on Step {s_id}") @@ -400,7 +432,7 @@ def doit(self): result["steps"][s_id][s_result]["duration"].append(s_duration) # Compute statistical data - for e in ("pipelineruns", "taskruns", "steps"): + for e in ("pipelineruns", "taskruns", "platformtaskruns", "steps"): for my_id, my_data1 in result[e].items(): for my_result, my_data2 in my_data1.items(): for my_stat, my_data3 in my_data2.items(): From e5a5ebcf7feb97b379071c0a4ae1adb267dd5a13 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Tue, 27 May 2025 21:45:27 +0200 Subject: [PATCH 017/321] style: Flake8 reported issues --- .../ci-scripts/utility_scripts/get-taskruns-durations.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tests/load-tests/ci-scripts/utility_scripts/get-taskruns-durations.py b/tests/load-tests/ci-scripts/utility_scripts/get-taskruns-durations.py index e687e6d394..98ccf8086b 100755 --- a/tests/load-tests/ci-scripts/utility_scripts/get-taskruns-durations.py +++ b/tests/load-tests/ci-scripts/utility_scripts/get-taskruns-durations.py @@ -1,7 +1,6 @@ #!/usr/bin/env python import argparse -import collections import csv import datetime import json @@ -11,12 +10,9 @@ import sys import yaml import time -import operator import statistics import re -import tabulate - def str2date(date_str): if isinstance(date_str, datetime.datetime): @@ -32,6 +28,7 @@ def str2date(date_str): # Convert simplified date return datetime.datetime.fromisoformat(date_str) + class DateTimeDecoder(json.JSONDecoder): def __init__(self, *args, **kwargs): super().__init__(object_hook=self.object_hook, *args, **kwargs) @@ -48,6 +45,7 @@ def object_hook(self, o): ret[key] = value return ret + class Something: def __init__(self, data_dir, dump_json): self.data_pipelineruns = [] @@ -259,7 +257,6 @@ def _merge_time_interval(self, new, existing): logging.info(f"Interval {self._format_interval(new)} does not collide with any member, adding it") return existing + [new] - def doit(self): # Normalize data into the structure we will use and do some cross checks data = {} @@ -470,6 +467,7 @@ def doit(self): print("TaskRuns skipped:", self.tr_skips) print("Steps skipped:", self.step_skips) + def doit(args): something = Something( data_dir=args.data_dir, From 533e247fa6b587f9338a8d2e4cd32c6a71f3bf83 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Tue, 27 May 2025 21:47:18 +0200 Subject: [PATCH 018/321] feat(KONFLUX-8436): New labels for RPM build per-arch work --- .../ci-scripts/config/horreum-labels.sh | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/tests/load-tests/ci-scripts/config/horreum-labels.sh b/tests/load-tests/ci-scripts/config/horreum-labels.sh index dbfd59d169..d1d0d3ae50 100755 --- a/tests/load-tests/ci-scripts/config/horreum-labels.sh +++ b/tests/load-tests/ci-scripts/config/horreum-labels.sh @@ -34,6 +34,46 @@ function horreum_schema_label_present() { --extractor-jsonpath "${extractor}" } +horreum_schema_label_present '$.results.durations.stats.platformtaskruns."build/calculate-deps-linux/amd64".passed.duration.mean' +horreum_schema_label_present '$.results.durations.stats.platformtaskruns."build/calculate-deps-linux/amd64".passed.duration.samples' +horreum_schema_label_present '$.results.durations.stats.platformtaskruns."build/calculate-deps-linux/amd64".passed.idle.mean' +horreum_schema_label_present '$.results.durations.stats.platformtaskruns."build/calculate-deps-linux/amd64".passed.running.mean' +horreum_schema_label_present '$.results.durations.stats.platformtaskruns."build/calculate-deps-linux/amd64".passed.scheduled.mean' +horreum_schema_label_present '$.results.durations.stats.platformtaskruns."build/calculate-deps-linux/arm64".passed.duration.mean' +horreum_schema_label_present '$.results.durations.stats.platformtaskruns."build/calculate-deps-linux/arm64".passed.duration.samples' +horreum_schema_label_present '$.results.durations.stats.platformtaskruns."build/calculate-deps-linux/arm64".passed.idle.mean' +horreum_schema_label_present '$.results.durations.stats.platformtaskruns."build/calculate-deps-linux/arm64".passed.running.mean' +horreum_schema_label_present '$.results.durations.stats.platformtaskruns."build/calculate-deps-linux/arm64".passed.scheduled.mean' +horreum_schema_label_present '$.results.durations.stats.platformtaskruns."build/calculate-deps-linux/ppc64le".passed.duration.mean' +horreum_schema_label_present '$.results.durations.stats.platformtaskruns."build/calculate-deps-linux/ppc64le".passed.duration.samples' +horreum_schema_label_present '$.results.durations.stats.platformtaskruns."build/calculate-deps-linux/ppc64le".passed.idle.mean' +horreum_schema_label_present '$.results.durations.stats.platformtaskruns."build/calculate-deps-linux/ppc64le".passed.running.mean' +horreum_schema_label_present '$.results.durations.stats.platformtaskruns."build/calculate-deps-linux/ppc64le".passed.scheduled.mean' +horreum_schema_label_present '$.results.durations.stats.platformtaskruns."build/calculate-deps-linux/s390x".passed.duration.mean' +horreum_schema_label_present '$.results.durations.stats.platformtaskruns."build/calculate-deps-linux/s390x".passed.duration.samples' +horreum_schema_label_present '$.results.durations.stats.platformtaskruns."build/calculate-deps-linux/s390x".passed.idle.mean' +horreum_schema_label_present '$.results.durations.stats.platformtaskruns."build/calculate-deps-linux/s390x".passed.running.mean' +horreum_schema_label_present '$.results.durations.stats.platformtaskruns."build/calculate-deps-linux/s390x".passed.scheduled.mean' +horreum_schema_label_present '$.results.durations.stats.platformtaskruns."build/rpmbuild-linux/amd64".passed.duration.mean' +horreum_schema_label_present '$.results.durations.stats.platformtaskruns."build/rpmbuild-linux/amd64".passed.duration.samples' +horreum_schema_label_present '$.results.durations.stats.platformtaskruns."build/rpmbuild-linux/amd64".passed.idle.mean' +horreum_schema_label_present '$.results.durations.stats.platformtaskruns."build/rpmbuild-linux/amd64".passed.running.mean' +horreum_schema_label_present '$.results.durations.stats.platformtaskruns."build/rpmbuild-linux/amd64".passed.scheduled.mean' +horreum_schema_label_present '$.results.durations.stats.platformtaskruns."build/rpmbuild-linux/arm64".passed.duration.mean' +horreum_schema_label_present '$.results.durations.stats.platformtaskruns."build/rpmbuild-linux/arm64".passed.duration.samples' +horreum_schema_label_present '$.results.durations.stats.platformtaskruns."build/rpmbuild-linux/arm64".passed.idle.mean' +horreum_schema_label_present '$.results.durations.stats.platformtaskruns."build/rpmbuild-linux/arm64".passed.running.mean' +horreum_schema_label_present '$.results.durations.stats.platformtaskruns."build/rpmbuild-linux/arm64".passed.scheduled.mean' +horreum_schema_label_present '$.results.durations.stats.platformtaskruns."build/rpmbuild-linux/ppc64le".passed.duration.mean' +horreum_schema_label_present '$.results.durations.stats.platformtaskruns."build/rpmbuild-linux/ppc64le".passed.duration.samples' +horreum_schema_label_present '$.results.durations.stats.platformtaskruns."build/rpmbuild-linux/ppc64le".passed.idle.mean' +horreum_schema_label_present '$.results.durations.stats.platformtaskruns."build/rpmbuild-linux/ppc64le".passed.running.mean' +horreum_schema_label_present '$.results.durations.stats.platformtaskruns."build/rpmbuild-linux/ppc64le".passed.scheduled.mean' +horreum_schema_label_present '$.results.durations.stats.platformtaskruns."build/rpmbuild-linux/s390x".passed.duration.mean' +horreum_schema_label_present '$.results.durations.stats.platformtaskruns."build/rpmbuild-linux/s390x".passed.duration.samples' +horreum_schema_label_present '$.results.durations.stats.platformtaskruns."build/rpmbuild-linux/s390x".passed.idle.mean' +horreum_schema_label_present '$.results.durations.stats.platformtaskruns."build/rpmbuild-linux/s390x".passed.running.mean' +horreum_schema_label_present '$.results.durations.stats.platformtaskruns."build/rpmbuild-linux/s390x".passed.scheduled.mean' horreum_schema_label_present '$.results.durations.stats.taskruns."build/apply-tags".passed.duration.mean' horreum_schema_label_present '$.results.durations.stats.taskruns."build/apply-tags".passed.duration.samples' horreum_schema_label_present '$.results.durations.stats.taskruns."build/apply-tags".passed.idle.mean' From 74f196c2c888aa334eb570f3ca654cdb91f30d8c Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 28 May 2025 09:27:04 +0200 Subject: [PATCH 019/321] fix: Redirect also stderr to the log --- tests/load-tests/ci-scripts/collect-results.sh | 6 +++--- tests/load-tests/ci-scripts/stage/collect-results.sh | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/load-tests/ci-scripts/collect-results.sh b/tests/load-tests/ci-scripts/collect-results.sh index b657166683..c23bfced53 100755 --- a/tests/load-tests/ci-scripts/collect-results.sh +++ b/tests/load-tests/ci-scripts/collect-results.sh @@ -38,14 +38,14 @@ echo "[$(date --utc -Ins)] Create summary JSON with timings" ./evaluate.py "${ARTIFACT_DIR}/load-test-timings.csv" "${ARTIFACT_DIR}/load-test-timings.json" echo "[$(date --utc -Ins)] Counting PRs and TRs" -ci-scripts/utility_scripts/count-multiarch-taskruns.py --data-dir "${ARTIFACT_DIR}" >"${ARTIFACT_DIR}/count-multiarch-taskruns.log" +ci-scripts/utility_scripts/count-multiarch-taskruns.py --data-dir "${ARTIFACT_DIR}" &>"${ARTIFACT_DIR}/count-multiarch-taskruns.log" echo "[$(date --utc -Ins)] Graphing PRs and TRs" -ci-scripts/utility_scripts/show-pipelineruns.py --data-dir "${ARTIFACT_DIR}" >"${ARTIFACT_DIR}/show-pipelineruns.log" +ci-scripts/utility_scripts/show-pipelineruns.py --data-dir "${ARTIFACT_DIR}" &>"${ARTIFACT_DIR}/show-pipelineruns.log" mv "${ARTIFACT_DIR}/output.svg" "${ARTIFACT_DIR}/show-pipelines.svg" echo "[$(date --utc -Ins)] Computing duration of PRs, TRs and steps" -ci-scripts/utility_scripts/get-taskruns-durations.py --data-dir "${ARTIFACT_DIR}" --dump-json "${ARTIFACT_DIR}/get-taskruns-durations.json" >"${ARTIFACT_DIR}/get-taskruns-durations.log" +ci-scripts/utility_scripts/get-taskruns-durations.py --data-dir "${ARTIFACT_DIR}" --dump-json "${ARTIFACT_DIR}/get-taskruns-durations.json" &>"${ARTIFACT_DIR}/get-taskruns-durations.log" echo "[$(date --utc -Ins)] Creating main status data file" STATUS_DATA_FILE="${ARTIFACT_DIR}/load-test.json" diff --git a/tests/load-tests/ci-scripts/stage/collect-results.sh b/tests/load-tests/ci-scripts/stage/collect-results.sh index c0286a8e19..552413c6d5 100755 --- a/tests/load-tests/ci-scripts/stage/collect-results.sh +++ b/tests/load-tests/ci-scripts/stage/collect-results.sh @@ -43,14 +43,14 @@ echo "[$(date --utc -Ins)] Create summary JSON with errors" ./errors.py "${ARTIFACT_DIR}/load-test-errors.csv" "${ARTIFACT_DIR}/load-test-errors.json" echo "[$(date --utc -Ins)] Counting PRs and TRs" -ci-scripts/utility_scripts/count-multiarch-taskruns.py --data-dir "${ARTIFACT_DIR}" >"${ARTIFACT_DIR}/count-multiarch-taskruns.log" +ci-scripts/utility_scripts/count-multiarch-taskruns.py --data-dir "${ARTIFACT_DIR}" &>"${ARTIFACT_DIR}/count-multiarch-taskruns.log" echo "[$(date --utc -Ins)] Graphing PRs and TRs" -ci-scripts/utility_scripts/show-pipelineruns.py --data-dir "${ARTIFACT_DIR}" >"${ARTIFACT_DIR}/show-pipelineruns.log" || true +ci-scripts/utility_scripts/show-pipelineruns.py --data-dir "${ARTIFACT_DIR}" &>"${ARTIFACT_DIR}/show-pipelineruns.log" || true mv "${ARTIFACT_DIR}/output.svg" "${ARTIFACT_DIR}/show-pipelines.svg" || true echo "[$(date --utc -Ins)] Computing duration of PRs, TRs and steps" -ci-scripts/utility_scripts/get-taskruns-durations.py --data-dir "${ARTIFACT_DIR}" --dump-json "${ARTIFACT_DIR}/get-taskruns-durations.json" >"${ARTIFACT_DIR}/get-taskruns-durations.log" +ci-scripts/utility_scripts/get-taskruns-durations.py --data-dir "${ARTIFACT_DIR}" --dump-json "${ARTIFACT_DIR}/get-taskruns-durations.json" &>"${ARTIFACT_DIR}/get-taskruns-durations.log" echo "[$(date --utc -Ins)] Creating main status data file" STATUS_DATA_FILE="${ARTIFACT_DIR}/load-test.json" From e855eb93ad0200da8e9ba5f94fc8e9d3e276c714 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 28 May 2025 09:33:16 +0200 Subject: [PATCH 020/321] feat: Add importance of this error to make it easier to explain tr_skips value even when running without --debug or so --- .../ci-scripts/utility_scripts/get-taskruns-durations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/load-tests/ci-scripts/utility_scripts/get-taskruns-durations.py b/tests/load-tests/ci-scripts/utility_scripts/get-taskruns-durations.py index 98ccf8086b..3b03ef7265 100755 --- a/tests/load-tests/ci-scripts/utility_scripts/get-taskruns-durations.py +++ b/tests/load-tests/ci-scripts/utility_scripts/get-taskruns-durations.py @@ -176,7 +176,7 @@ def _populate_taskrun(self, tr): self.step_skips += 1 except KeyError as e: - logging.info(f"TaskRun incomplete, skipping: {e}, {str(tr)[:200]}") + logging.warning(f"TaskRun incomplete, skipping: {e}, {str(tr)[:200]}") self.tr_skips += 1 return From 5e2c897cc9d24a43836d69aa1dc07762f0eecc58 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 28 May 2025 09:35:03 +0200 Subject: [PATCH 021/321] feat: Given we put output to file, let's get all the details --- tests/load-tests/ci-scripts/collect-results.sh | 2 +- tests/load-tests/ci-scripts/stage/collect-results.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/load-tests/ci-scripts/collect-results.sh b/tests/load-tests/ci-scripts/collect-results.sh index c23bfced53..5a78339041 100755 --- a/tests/load-tests/ci-scripts/collect-results.sh +++ b/tests/load-tests/ci-scripts/collect-results.sh @@ -45,7 +45,7 @@ ci-scripts/utility_scripts/show-pipelineruns.py --data-dir "${ARTIFACT_DIR}" &>" mv "${ARTIFACT_DIR}/output.svg" "${ARTIFACT_DIR}/show-pipelines.svg" echo "[$(date --utc -Ins)] Computing duration of PRs, TRs and steps" -ci-scripts/utility_scripts/get-taskruns-durations.py --data-dir "${ARTIFACT_DIR}" --dump-json "${ARTIFACT_DIR}/get-taskruns-durations.json" &>"${ARTIFACT_DIR}/get-taskruns-durations.log" +ci-scripts/utility_scripts/get-taskruns-durations.py --debug --data-dir "${ARTIFACT_DIR}" --dump-json "${ARTIFACT_DIR}/get-taskruns-durations.json" &>"${ARTIFACT_DIR}/get-taskruns-durations.log" echo "[$(date --utc -Ins)] Creating main status data file" STATUS_DATA_FILE="${ARTIFACT_DIR}/load-test.json" diff --git a/tests/load-tests/ci-scripts/stage/collect-results.sh b/tests/load-tests/ci-scripts/stage/collect-results.sh index 552413c6d5..c80ce39e22 100755 --- a/tests/load-tests/ci-scripts/stage/collect-results.sh +++ b/tests/load-tests/ci-scripts/stage/collect-results.sh @@ -50,7 +50,7 @@ ci-scripts/utility_scripts/show-pipelineruns.py --data-dir "${ARTIFACT_DIR}" &>" mv "${ARTIFACT_DIR}/output.svg" "${ARTIFACT_DIR}/show-pipelines.svg" || true echo "[$(date --utc -Ins)] Computing duration of PRs, TRs and steps" -ci-scripts/utility_scripts/get-taskruns-durations.py --data-dir "${ARTIFACT_DIR}" --dump-json "${ARTIFACT_DIR}/get-taskruns-durations.json" &>"${ARTIFACT_DIR}/get-taskruns-durations.log" +ci-scripts/utility_scripts/get-taskruns-durations.py --debug --data-dir "${ARTIFACT_DIR}" --dump-json "${ARTIFACT_DIR}/get-taskruns-durations.json" &>"${ARTIFACT_DIR}/get-taskruns-durations.log" echo "[$(date --utc -Ins)] Creating main status data file" STATUS_DATA_FILE="${ARTIFACT_DIR}/load-test.json" From e434a7bd57906526f8040993b6a04fb03b1d75b5 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 28 May 2025 11:02:07 +0200 Subject: [PATCH 022/321] feat(KONFLUX-8220): Make it possible to fork repo via GitLab as well Generated-by: Gemini --- pkg/clients/gitlab/git.go | 44 +++++++++++++++++++ .../pkg/journey/handle_repo_templating.go | 11 ++++- 2 files changed, 53 insertions(+), 2 deletions(-) diff --git a/pkg/clients/gitlab/git.go b/pkg/clients/gitlab/git.go index b9c63b27c1..ed01d9f1ff 100644 --- a/pkg/clients/gitlab/git.go +++ b/pkg/clients/gitlab/git.go @@ -252,3 +252,47 @@ func (gc *GitlabClient) GetCommitStatusConclusion(statusName, projectID, commitS return matchingStatus.Status } + +// DeleteRepositoryIfExists deletes a GitLab repository if it exists. +// It returns an error if the deletion fails for any reason other than the project not being found (404). +func (gc *GitlabClient) DeleteRepositoryIfExists(projectID string) error { + resp, err := gc.client.Projects.DeleteProject(projectID) + + if err != nil { + if resp != nil && resp.StatusCode == http.StatusNotFound { + fmt.Printf("Project %s not found, no need to delete.\n", projectID) + return nil // Project not found, consider it a successful "deletion" in this context + } + return fmt.Errorf("error deleting project %s: %w", projectID, err) + } + + if resp.StatusCode != http.StatusAccepted { + return fmt.Errorf("unexpected status code when deleting project %s: %d", projectID, resp.StatusCode) + } + + fmt.Printf("Project %s deleted successfully.\n", projectID) + return nil +} + +// ForkRepository forks a source GitLab repository to a target project. +// It returns the web URL of the newly forked repository and an error if the operation fails. +func (gc *GitlabClient) ForkRepository(sourceProjectID, targetNamespace string) (string, error) { + opts := &gitlab.ForkProjectOptions{ + Namespace: gitlab.Ptr(targetNamespace), // The target namespace (group or user) for the forked project + } + + forkedProject, resp, err := gc.client.Projects.ForkProject(sourceProjectID, opts) + if err != nil { + return "", fmt.Errorf("error forking project %s to namespace %s: %w", sourceProjectID, targetNamespace, err) + } + + if resp.StatusCode != http.StatusCreated && resp.StatusCode != http.StatusAccepted { + return "", fmt.Errorf("unexpected status code when forking project %s: %d", sourceProjectID, resp.StatusCode) + } + + if forkedProject == nil || forkedProject.WebURL == "" { + return "", fmt.Errorf("forked project object or its web URL is nil") + } + + return forkedProject.WebURL, nil +} diff --git a/tests/load-tests/pkg/journey/handle_repo_templating.go b/tests/load-tests/pkg/journey/handle_repo_templating.go index 99b5e663be..73c1a1988a 100644 --- a/tests/load-tests/pkg/journey/handle_repo_templating.go +++ b/tests/load-tests/pkg/journey/handle_repo_templating.go @@ -112,9 +112,16 @@ func ForkRepo(f *framework.Framework, repoUrl, repoRevision, username string) (s if strings.Contains(repoUrl, "gitlab.") { logging.Logger.Debug("Forking Gitlab repository %s", repoUrl) - logging.Logger.Warning("Forking Gitlab repository not implemented yet, this will only work with 1 concurrent user") // TODO + // Delete a repository + err = client.DeleteRepositoryIfExists(targetName) + if err != nil { + fmt.Printf("Failed to delete repository: %v\n", err) + } + + // Fork a repository + forkedRepoURL, err := client.ForkRepository(sourceName, targetName) - return repoUrl, nil + return forkedRepoURL, nil } else { logging.Logger.Debug("Forking Github repository %s", repoUrl) From 85c5511dd033dcd30b992ad92932e344d1f7484e Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 28 May 2025 14:30:02 +0200 Subject: [PATCH 023/321] feat(KONFLUX-8220): Polish the code, biggest problem was forking to same namespace which turns out to need to set both new repo Name and Path --- pkg/clients/gitlab/git.go | 36 +++++++++++-------- .../pkg/journey/handle_repo_templating.go | 15 ++++---- 2 files changed, 31 insertions(+), 20 deletions(-) diff --git a/pkg/clients/gitlab/git.go b/pkg/clients/gitlab/git.go index ed01d9f1ff..8dd488974a 100644 --- a/pkg/clients/gitlab/git.go +++ b/pkg/clients/gitlab/git.go @@ -254,45 +254,53 @@ func (gc *GitlabClient) GetCommitStatusConclusion(statusName, projectID, commitS } // DeleteRepositoryIfExists deletes a GitLab repository if it exists. -// It returns an error if the deletion fails for any reason other than the project not being found (404). +// Returns an error if the deletion fails except for project not being found (404). func (gc *GitlabClient) DeleteRepositoryIfExists(projectID string) error { resp, err := gc.client.Projects.DeleteProject(projectID) if err != nil { if resp != nil && resp.StatusCode == http.StatusNotFound { - fmt.Printf("Project %s not found, no need to delete.\n", projectID) - return nil // Project not found, consider it a successful "deletion" in this context + return nil } - return fmt.Errorf("error deleting project %s: %w", projectID, err) + return fmt.Errorf("Error deleting project %s: %w", projectID, err) } if resp.StatusCode != http.StatusAccepted { - return fmt.Errorf("unexpected status code when deleting project %s: %d", projectID, resp.StatusCode) + return fmt.Errorf("Unexpected status code when deleting project %s: %d", projectID, resp.StatusCode) } - fmt.Printf("Project %s deleted successfully.\n", projectID) return nil } -// ForkRepository forks a source GitLab repository to a target project. -// It returns the web URL of the newly forked repository and an error if the operation fails. -func (gc *GitlabClient) ForkRepository(sourceProjectID, targetNamespace string) (string, error) { +// ForkRepository forks a source GitLab repository to a target repository. +// Returns the newly forked repository and an error if the operation fails. +func (gc *GitlabClient) ForkRepository(sourceProjectID, targetProjectID string) (*gitlab.Project, error) { + targetSplit := strings.Split(targetProjectID,"/") + if len(targetSplit) != 2 { + return nil, fmt.Errorf("Failed to parse target repo %s to namespace and repo name", targetProjectID) + } + + targetNamespace := targetSplit[0] + targetRepo := targetSplit[1] + opts := &gitlab.ForkProjectOptions{ - Namespace: gitlab.Ptr(targetNamespace), // The target namespace (group or user) for the forked project + Name: gitlab.Ptr(targetRepo), + NamespacePath: gitlab.Ptr(targetNamespace), + Path: gitlab.Ptr(targetRepo), } forkedProject, resp, err := gc.client.Projects.ForkProject(sourceProjectID, opts) if err != nil { - return "", fmt.Errorf("error forking project %s to namespace %s: %w", sourceProjectID, targetNamespace, err) + return nil, fmt.Errorf("Error forking project %s to namespace %s: %w", sourceProjectID, targetNamespace, err) } if resp.StatusCode != http.StatusCreated && resp.StatusCode != http.StatusAccepted { - return "", fmt.Errorf("unexpected status code when forking project %s: %d", sourceProjectID, resp.StatusCode) + return nil, fmt.Errorf("Unexpected status code when forking project %s: %d", sourceProjectID, resp.StatusCode) } if forkedProject == nil || forkedProject.WebURL == "" { - return "", fmt.Errorf("forked project object or its web URL is nil") + return nil, fmt.Errorf("Forked project object not complete: %v", forkedProject) } - return forkedProject.WebURL, nil + return forkedProject, nil } diff --git a/tests/load-tests/pkg/journey/handle_repo_templating.go b/tests/load-tests/pkg/journey/handle_repo_templating.go index 73c1a1988a..6f65c3008a 100644 --- a/tests/load-tests/pkg/journey/handle_repo_templating.go +++ b/tests/load-tests/pkg/journey/handle_repo_templating.go @@ -112,16 +112,19 @@ func ForkRepo(f *framework.Framework, repoUrl, repoRevision, username string) (s if strings.Contains(repoUrl, "gitlab.") { logging.Logger.Debug("Forking Gitlab repository %s", repoUrl) - // Delete a repository - err = client.DeleteRepositoryIfExists(targetName) + // Cleanup if it already exists + err = f.AsKubeAdmin.CommonController.Gitlab.DeleteRepositoryIfExists(targetName) if err != nil { - fmt.Printf("Failed to delete repository: %v\n", err) + return "", err } - // Fork a repository - forkedRepoURL, err := client.ForkRepository(sourceName, targetName) + // Create fork and make sure it appears + forkedRepoURL, err := f.AsKubeAdmin.CommonController.Gitlab.ForkRepository(sourceName, targetName) + if err != nil { + return "", err + } - return forkedRepoURL, nil + return forkedRepoURL.WebURL, nil } else { logging.Logger.Debug("Forking Github repository %s", repoUrl) From 4e6f53fec41049239084b8e56a5817904e4c0b15 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Thu, 29 May 2025 07:53:05 +0200 Subject: [PATCH 024/321] fix: When test fails in forking phase, this fails with: "KeyError: 'HandleUser'" --- tests/load-tests/evaluate.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tests/load-tests/evaluate.py b/tests/load-tests/evaluate.py index 61ef8c4a5e..657c4161e6 100755 --- a/tests/load-tests/evaluate.py +++ b/tests/load-tests/evaluate.py @@ -114,11 +114,12 @@ def main(): kpi_errors = 0 for m in METRICS: - stats[m] = {"pass": {}, "fail": {}} - stats[m]["pass"]["duration"] = count_stats(stats_raw[m]["pass"]["duration"]) - stats[m]["fail"]["duration"] = count_stats(stats_raw[m]["fail"]["duration"]) - stats[m]["pass"]["when"] = count_stats_when(stats_raw[m]["pass"]["when"]) - stats[m]["fail"]["when"] = count_stats_when(stats_raw[m]["fail"]["when"]) + stats[m] = {"pass": {"duration": {"samples": 0}, "when": {}}, "fail": {"duration": {"samples": 0}, "when": {}}} + if m in stats_raw: + stats[m]["pass"]["duration"] = count_stats(stats_raw[m]["pass"]["duration"]) + stats[m]["fail"]["duration"] = count_stats(stats_raw[m]["fail"]["duration"]) + stats[m]["pass"]["when"] = count_stats_when(stats_raw[m]["pass"]["when"]) + stats[m]["fail"]["when"] = count_stats_when(stats_raw[m]["fail"]["when"]) if kpi_mean != -1: # If we had 0 measurements in some metric, that means not a single From 2a1aed9be3fa354a04358165b8d949842d3de242 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Thu, 29 May 2025 08:03:30 +0200 Subject: [PATCH 025/321] fix: When loadtest fails forking, it dies with 255, so make sure we get a change to collect whatever we have so far --- tests/load-tests/run-stage.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/load-tests/run-stage.sh b/tests/load-tests/run-stage.sh index fff517a1d1..93b6889b9b 100755 --- a/tests/load-tests/run-stage.sh +++ b/tests/load-tests/run-stage.sh @@ -7,6 +7,7 @@ set -o pipefail options="" [[ -n "${PIPELINE_IMAGE_PULL_SECRETS:-}" ]] && for s in $PIPELINE_IMAGE_PULL_SECRETS; do options="$options --pipeline-image-pull-secrets $s"; done +trap "date -Ins --utc >ended" EXIT date -Ins --utc >started go run loadtest.go \ --applications-count "${APPLICATIONS_COUNT:-1}" \ @@ -30,4 +31,3 @@ go run loadtest.go \ --waitpipelines="${WAIT_PIPELINES:-true}" \ $options \ --stage -date -Ins --utc >ended From 0b03b7ed81e39c7ffb4091bdbe44baafdf554d09 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Thu, 29 May 2025 08:34:10 +0200 Subject: [PATCH 026/321] fix: Make sure that in case of fatal error, we save the error to csv and also it does not make sense to condition fatal message esp. when it also does exit --- tests/load-tests/pkg/logging/logging.go | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/load-tests/pkg/logging/logging.go b/tests/load-tests/pkg/logging/logging.go index eeb07cd2cb..172df41a21 100644 --- a/tests/load-tests/pkg/logging/logging.go +++ b/tests/load-tests/pkg/logging/logging.go @@ -10,7 +10,6 @@ var DEBUG = 1 var INFO = 2 var WARNING = 3 var ERROR = 4 -var FATAL = 5 var Logger = logger{} @@ -55,9 +54,8 @@ func (l *logger) Error(msg string, params ...interface{}) { } func (l *logger) Fatal(msg string, params ...interface{}) { - if l.Level <= FATAL { - klog.Fatalf("FATAL "+msg, params...) - } + MeasurementsStop() + klog.Fatalf("FATAL "+msg, params...) } // Log test failure with error code to CSV file so we can compile a statistic later From 4e0b70ecd869b07396fe38719ffa5f4a345da09f Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Thu, 29 May 2025 08:39:23 +0200 Subject: [PATCH 027/321] feat: New error that happens when before forking we delete the target repo and will not wait for it to be deleted before forking to it --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 32f18cc9ef..86df5df199 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -30,6 +30,7 @@ "Failed to merge MR on CEE GitLab due to DNS error": r"Repo-templating workflow component cleanup failed: Merging [0-9]+ failed: [Pp][Uu][Tt] .*https://gitlab.cee.redhat.com/api/.*/merge_requests/[0-9]+/merge.*Temporary failure in name resolution", "Pipeline failed": r"Message:Tasks Completed: [0-9]+ \(Failed: [1-9]+,", "Post-test data collection failed": r"Failed to collect pipeline run JSONs", + "Repo forking failed as the target is still being deleted": r"Repo forking failed: Error forking project .* POST https://gitlab.cee.redhat.com/.* 409 .*Project namespace name has already been taken, The project is still being deleted.*", "Timeout getting pipeline": r"Message:.*resolver failed to get Pipeline.*resolution took longer than global timeout of .*", "Timeout getting task via git resolver from gitlab.cee": r"Message:.*Couldn't retrieve Task .*resolver type git.*https://gitlab.cee.redhat.com/.* resolution took longer than global timeout of .*", } From 73cbdf8c6b6f1eacb693214bf5f7614a924a11a8 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Thu, 29 May 2025 09:00:12 +0200 Subject: [PATCH 028/321] feat: When deleting the repo, make sure it was deleted before going on Assisted-by: Gemini --- pkg/clients/gitlab/git.go | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/pkg/clients/gitlab/git.go b/pkg/clients/gitlab/git.go index 8dd488974a..6e35e7cd50 100644 --- a/pkg/clients/gitlab/git.go +++ b/pkg/clients/gitlab/git.go @@ -9,6 +9,8 @@ import ( . "github.com/onsi/gomega" "github.com/xanzy/go-gitlab" + + utils "github.com/konflux-ci/e2e-tests/pkg/utils" ) // CreateBranch creates a new branch in a GitLab project with the given projectID and newBranchName @@ -269,7 +271,19 @@ func (gc *GitlabClient) DeleteRepositoryIfExists(projectID string) error { return fmt.Errorf("Unexpected status code when deleting project %s: %d", projectID, resp.StatusCode) } - return nil + err = utils.WaitUntilWithInterval(func() (done bool, err error) { + _, getResp, getErr := gc.client.Projects.GetProject(projectID, nil) + if getErr != nil { + if getResp != nil && getResp.StatusCode == http.StatusNotFound { + return true, nil + } else { + return false, getErr + } + } + return false, nil + }, time.Second * 10, time.Minute * 5) + + return err } // ForkRepository forks a source GitLab repository to a target repository. From e4d215767d249377585952b897fd36803046a7a0 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Thu, 29 May 2025 09:24:26 +0200 Subject: [PATCH 029/321] feat: Make sure forking of the repo finishes before we go ahead Assisted-by: Gemini --- pkg/clients/gitlab/git.go | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/pkg/clients/gitlab/git.go b/pkg/clients/gitlab/git.go index 6e35e7cd50..7a4084c5ce 100644 --- a/pkg/clients/gitlab/git.go +++ b/pkg/clients/gitlab/git.go @@ -312,8 +312,25 @@ func (gc *GitlabClient) ForkRepository(sourceProjectID, targetProjectID string) return nil, fmt.Errorf("Unexpected status code when forking project %s: %d", sourceProjectID, resp.StatusCode) } - if forkedProject == nil || forkedProject.WebURL == "" { - return nil, fmt.Errorf("Forked project object not complete: %v", forkedProject) + err = utils.WaitUntilWithInterval(func() (done bool, err error) { + var getErr error + + forkedProject, _, getErr = gc.client.Projects.GetProject(forkedProject.ID, nil) + if getErr != nil { + return false, fmt.Errorf("Error getting forked project status for %s (ID: %d): %w", forkedProject.Name, forkedProject.ID, getErr) + } + + if forkedProject.ImportStatus == "finished" { + return true, nil + } else if forkedProject.ImportStatus == "failed" || forkedProject.ImportStatus == "timeout" { + return false, fmt.Errorf("Forking of project %s (ID: %d) failed with import status: %s", forkedProject.Name, forkedProject.ID, forkedProject.ImportStatus) + } + + return false, nil + }, time.Second * 10, time.Minute * 10) + + if err != nil { + return nil, fmt.Errorf("Error waiting for project %s (ID: %d) fork to complete: %w", targetProjectID, forkedProject.ID, err) } return forkedProject, nil From 781665107eae04a5712d8fbd32b106eeb6bb22c2 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Thu, 29 May 2025 10:08:12 +0200 Subject: [PATCH 030/321] feat: Make thi interval similar to interval used when forking GitLab repos --- pkg/clients/github/repositories.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/clients/github/repositories.go b/pkg/clients/github/repositories.go index 98f525270c..75ec7e9663 100644 --- a/pkg/clients/github/repositories.go +++ b/pkg/clients/github/repositories.go @@ -203,7 +203,7 @@ func (g *Github) ForkRepository(sourceName, targetName string) (*github.Reposito return false, fmt.Errorf("Error forking %s/%s: %v", g.organization, sourceName, err) } return true, nil - }, time.Second * 2, time.Minute * 5) + }, time.Second * 10, time.Minute * 5) if err1 != nil { return nil, fmt.Errorf("Failed waiting for fork %s/%s: %v", g.organization, sourceName, err1) } From 02c225722c31c954050aad91c37bcbc4141c0e50 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Thu, 29 May 2025 10:22:44 +0200 Subject: [PATCH 031/321] feat: Looks like GitLab fails to fork with 'Project namespace name has already been taken, The project is still being deleted' even when target repo is already gone (returning 404) --- pkg/clients/gitlab/git.go | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/pkg/clients/gitlab/git.go b/pkg/clients/gitlab/git.go index 7a4084c5ce..63888c4821 100644 --- a/pkg/clients/gitlab/git.go +++ b/pkg/clients/gitlab/git.go @@ -289,6 +289,10 @@ func (gc *GitlabClient) DeleteRepositoryIfExists(projectID string) error { // ForkRepository forks a source GitLab repository to a target repository. // Returns the newly forked repository and an error if the operation fails. func (gc *GitlabClient) ForkRepository(sourceProjectID, targetProjectID string) (*gitlab.Project, error) { + var forkedProject *gitlab.Project + var resp *gitlab.Response + var err error + targetSplit := strings.Split(targetProjectID,"/") if len(targetSplit) != 2 { return nil, fmt.Errorf("Failed to parse target repo %s to namespace and repo name", targetProjectID) @@ -303,7 +307,14 @@ func (gc *GitlabClient) ForkRepository(sourceProjectID, targetProjectID string) Path: gitlab.Ptr(targetRepo), } - forkedProject, resp, err := gc.client.Projects.ForkProject(sourceProjectID, opts) + err = utils.WaitUntilWithInterval(func() (done bool, err error) { + forkedProject, resp, err = gc.client.Projects.ForkProject(sourceProjectID, opts) + if err != nil { + fmt.Printf("Failed to fork, trying again: %v\n", err) + return false, nil + } + return true, nil + }, time.Second * 10, time.Minute * 5) if err != nil { return nil, fmt.Errorf("Error forking project %s to namespace %s: %w", sourceProjectID, targetNamespace, err) } From fa6b48729871466dc81fbcbdfde895e325206325 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Thu, 29 May 2025 12:14:33 +0200 Subject: [PATCH 032/321] feat: New error that appeared when I mistakenly did not provided GITLAB_BOT_TOKEN and GITLAB_API_URL --- tests/load-tests/errors.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 86df5df199..8d7926f7be 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -30,7 +30,8 @@ "Failed to merge MR on CEE GitLab due to DNS error": r"Repo-templating workflow component cleanup failed: Merging [0-9]+ failed: [Pp][Uu][Tt] .*https://gitlab.cee.redhat.com/api/.*/merge_requests/[0-9]+/merge.*Temporary failure in name resolution", "Pipeline failed": r"Message:Tasks Completed: [0-9]+ \(Failed: [1-9]+,", "Post-test data collection failed": r"Failed to collect pipeline run JSONs", - "Repo forking failed as the target is still being deleted": r"Repo forking failed: Error forking project .* POST https://gitlab.cee.redhat.com/.* 409 .*Project namespace name has already been taken, The project is still being deleted.*", + "Repo forking failed as the target is still being deleted": r"Repo forking failed: Error forking project .* POST https://gitlab.cee.redhat.com.* 409 .*Project namespace name has already been taken, The project is still being deleted", + "Repo forking failed when deleting target repo on gitlab.com (not CEE!) due unathorized": r"Repo forking failed: Error deleting project .* DELETE https://gitlab.com/.* 401 .* Unauthorized", "Timeout getting pipeline": r"Message:.*resolver failed to get Pipeline.*resolution took longer than global timeout of .*", "Timeout getting task via git resolver from gitlab.cee": r"Message:.*Couldn't retrieve Task .*resolver type git.*https://gitlab.cee.redhat.com/.* resolution took longer than global timeout of .*", } From 905aa6de0c27d46cfeca955e3a7f5927f2f90e55 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 30 May 2025 10:10:40 +0200 Subject: [PATCH 033/321] feat: Recently seen lots of 'Failed to configure pipeline imagePullSecrets: Unable to add secret ... to service account ...: serviceaccounts ... not found' errors, so adding explicit step to check component build SA presence --- .../ci-scripts/config/horreum-labels.sh | 2 ++ tests/load-tests/errors.py | 2 +- tests/load-tests/evaluate.py | 1 + .../pkg/journey/handle_component.go | 25 +++++++++++++++++++ 4 files changed, 29 insertions(+), 1 deletion(-) diff --git a/tests/load-tests/ci-scripts/config/horreum-labels.sh b/tests/load-tests/ci-scripts/config/horreum-labels.sh index d1d0d3ae50..4c88b87613 100755 --- a/tests/load-tests/ci-scripts/config/horreum-labels.sh +++ b/tests/load-tests/ci-scripts/config/horreum-labels.sh @@ -207,6 +207,8 @@ horreum_schema_label_present '$.results.measurements.KPI.errors' horreum_schema_label_present '$.results.measurements.KPI.mean' horreum_schema_label_present '$.results.measurements.validateApplication.error_rate' horreum_schema_label_present '$.results.measurements.validateApplication.pass.duration.mean' +horreum_schema_label_present '$.results.measurements.validateComponentBuildSA.error_rate' +horreum_schema_label_present '$.results.measurements.validateComponentBuildSA.pass.duration.mean' horreum_schema_label_present '$.results.measurements.validateIntegrationTestScenario.error_rate' horreum_schema_label_present '$.results.measurements.validateIntegrationTestScenario.pass.duration.mean' horreum_schema_label_present '$.results.measurements.validatePipelineRunCondition.error_rate' diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 8d7926f7be..8d467d70f3 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -25,7 +25,7 @@ "Failed application creation when calling mapplication.kb.io webhook": r"Application failed creation: Unable to create the Application .*: Internal error occurred: failed calling webhook .*mapplication.kb.io.*: failed to call webhook: Post .*https://application-service-webhook-service.application-service.svc:443/mutate-appstudio-redhat-com-v1alpha1-application.* no endpoints available for service .*application-service-webhook-service", "Failed creating integration test scenario because it already exists": r"Integration test scenario failed creation: Unable to create the Integration Test Scenario .* integrationtestscenarios.appstudio.redhat.com .* already exists", "Failed Integration test scenario when calling dintegrationtestscenario.kb.io webhook": r"Integration test scenario failed creation: Unable to create the Integration Test Scenario .*: Internal error occurred: failed calling webhook .*dintegrationtestscenario.kb.io.*: failed to call webhook: Post .*https://integration-service-webhook-service.integration-service.svc:443/mutate-appstudio-redhat-com-v1beta2-integrationtestscenario.*: no endpoints available for service .*integration-service-webhook-service", - "Failed to link pipeline image pull secret to build service account": r"Failed to configure pipeline imagePullSecrets: Unable to add secret .* to service account .*: serviceaccounts .* not found", + "Failed to link pipeline image pull secret to build service account because SA was not found": r"Failed to configure pipeline imagePullSecrets: Unable to add secret .* to service account .*: serviceaccounts .* not found", "Failed to merge MR on CEE GitLab due to 405": r"Repo-templating workflow component cleanup failed: Merging [0-9]+ failed: [Pp][Uu][Tt] .*https://gitlab.cee.redhat.com/api/.*/merge_requests/[0-9]+/merge.*message: 405 Method Not Allowed", "Failed to merge MR on CEE GitLab due to DNS error": r"Repo-templating workflow component cleanup failed: Merging [0-9]+ failed: [Pp][Uu][Tt] .*https://gitlab.cee.redhat.com/api/.*/merge_requests/[0-9]+/merge.*Temporary failure in name resolution", "Pipeline failed": r"Message:Tasks Completed: [0-9]+ \(Failed: [1-9]+,", diff --git a/tests/load-tests/evaluate.py b/tests/load-tests/evaluate.py index 657c4161e6..0b96c61648 100755 --- a/tests/load-tests/evaluate.py +++ b/tests/load-tests/evaluate.py @@ -23,6 +23,7 @@ "validateApplication", "createIntegrationTestScenario", "createComponent", + "validateComponentBuildSA", "validatePipelineRunCreation", "validatePipelineRunCondition", "validatePipelineRunSignature", diff --git a/tests/load-tests/pkg/journey/handle_component.go b/tests/load-tests/pkg/journey/handle_component.go index ba073a233c..918b354f01 100644 --- a/tests/load-tests/pkg/journey/handle_component.go +++ b/tests/load-tests/pkg/journey/handle_component.go @@ -122,6 +122,20 @@ func createComponent(f *framework.Framework, namespace, name, repoUrl, repoRevis return nil } +func validateComponentBuildSA(f *framework.Framework, namespace, name string) error { + interval := time.Second * 10 + timeout := time.Minute * 5 + component_sa := "build-pipeline-" + name + + // TODO It would be much better to watch this resource instead querying it + err := utils.WaitUntilWithInterval(f.AsKubeDeveloper.CommonController.ServiceAccountPresent(component_sa, namespace), interval, timeout) + if err != nil { + return fmt.Errorf("Component build SA %s in namespace %s not created: %v", component_sa, namespace, err) + } + + return nil +} + func getPaCPullNumber(f *framework.Framework, namespace, name string) (int, error) { interval := time.Second * 20 timeout := time.Minute * 15 @@ -301,6 +315,17 @@ func HandleComponent(ctx *PerComponentContext) error { return logging.Logger.Fail(60, "Component failed creation: %v", err) } + // Validate component build service account created + _, err = logging.Measure( + validateComponentBuildSA, + ctx.Framework, + ctx.ParentContext.ParentContext.Namespace, + ctx.ComponentName, + ) + if err != nil { + return logging.Logger.Fail(65, "Component build SA failed creation: %v", err) + } + // Configure imagePullSecrets needed for component build task images if len(ctx.ParentContext.ParentContext.Opts.PipelineImagePullSecrets) > 0 { _, err = logging.Measure( From 27bfc46a1a7a91639b81f1ee5e36ad4cba24317b Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Sun, 1 Jun 2025 08:46:11 +0200 Subject: [PATCH 034/321] feat: Add new error I have seen 4 times in yesterday runs, caused by: Failed to fork, trying again: POST https://gitlab.cee.redhat.com/api/v4/projects/jhutar/libecpg-test-fork/fork: 403 {message: You cannot perform write operations on a read-only instance} --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 8d467d70f3..d620b8b8c7 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -32,6 +32,7 @@ "Post-test data collection failed": r"Failed to collect pipeline run JSONs", "Repo forking failed as the target is still being deleted": r"Repo forking failed: Error forking project .* POST https://gitlab.cee.redhat.com.* 409 .*Project namespace name has already been taken, The project is still being deleted", "Repo forking failed when deleting target repo on gitlab.com (not CEE!) due unathorized": r"Repo forking failed: Error deleting project .* DELETE https://gitlab.com/.* 401 .* Unauthorized", + "Timeout forking the repo before the actual test": r"Repo forking failed: Error forking project .*: context deadline exceeded", "Timeout getting pipeline": r"Message:.*resolver failed to get Pipeline.*resolution took longer than global timeout of .*", "Timeout getting task via git resolver from gitlab.cee": r"Message:.*Couldn't retrieve Task .*resolver type git.*https://gitlab.cee.redhat.com/.* resolution took longer than global timeout of .*", } From 7148d81a65f11afabe05e7cc350066452a3e97e3 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Sun, 1 Jun 2025 08:51:45 +0200 Subject: [PATCH 035/321] feat: PipelineRun should appear in few secs/in a minute, so set timeout that does not make RPM build pipeline to timeout --- tests/load-tests/pkg/journey/handle_component.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/load-tests/pkg/journey/handle_component.go b/tests/load-tests/pkg/journey/handle_component.go index 918b354f01..dbab27549a 100644 --- a/tests/load-tests/pkg/journey/handle_component.go +++ b/tests/load-tests/pkg/journey/handle_component.go @@ -196,7 +196,7 @@ func listPipelineRunsWithTimeout(f *framework.Framework, namespace, appName, com var err error interval := time.Second * 20 - timeout := time.Minute * 60 + timeout := time.Minute * 30 err = utils.WaitUntilWithInterval(func() (done bool, err error) { prs, err = f.AsKubeDeveloper.HasController.GetComponentPipelineRunsWithType(compName, appName, namespace, "build", sha, "") From d3ca00c40ccf07bdac5963a5c5a5d99619e8a87d Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Sun, 1 Jun 2025 09:02:28 +0200 Subject: [PATCH 036/321] fix: Do not print error when deleting, if it says it is already gone --- pkg/clients/tekton/pipelineruns.go | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pkg/clients/tekton/pipelineruns.go b/pkg/clients/tekton/pipelineruns.go index 58897a8bae..f85489c7cf 100644 --- a/pkg/clients/tekton/pipelineruns.go +++ b/pkg/clients/tekton/pipelineruns.go @@ -198,8 +198,12 @@ func (t *TektonController) DeletePipelineRunIgnoreFinalizers(ns, name string) er } if err := t.KubeRest().Delete(context.Background(), &pipelineRunCR); err != nil { - g.GinkgoWriter.Printf("unable to delete PipelineRun '%s' in '%s': %v\n", pipelineRunCR.Name, pipelineRunCR.Namespace, err) - return false, nil + if strings.HasSuffix(err.Error(), " not found") { + return true, nil + } else { + g.GinkgoWriter.Printf("unable to delete PipelineRun '%s' in '%s': %v\n", pipelineRunCR.Name, pipelineRunCR.Namespace, err) + return false, nil + } } return true, nil }) From 8fb70445c23d107608ca94778c204ce3e5a40685 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Mon, 2 Jun 2025 06:56:08 +0200 Subject: [PATCH 037/321] feat: Adding error message about component build creation timeout I have seen once today --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index d620b8b8c7..3deae94dce 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -33,6 +33,7 @@ "Repo forking failed as the target is still being deleted": r"Repo forking failed: Error forking project .* POST https://gitlab.cee.redhat.com.* 409 .*Project namespace name has already been taken, The project is still being deleted", "Repo forking failed when deleting target repo on gitlab.com (not CEE!) due unathorized": r"Repo forking failed: Error deleting project .* DELETE https://gitlab.com/.* 401 .* Unauthorized", "Timeout forking the repo before the actual test": r"Repo forking failed: Error forking project .*: context deadline exceeded", + "Timeout getting build service account": r"Component build SA failed creation: Component build SA .* not created: context deadline exceeded", "Timeout getting pipeline": r"Message:.*resolver failed to get Pipeline.*resolution took longer than global timeout of .*", "Timeout getting task via git resolver from gitlab.cee": r"Message:.*Couldn't retrieve Task .*resolver type git.*https://gitlab.cee.redhat.com/.* resolution took longer than global timeout of .*", } From 8c5e7a7198ef9950c34e982e5861bcb2847f8f27 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 6 Jun 2025 10:17:32 +0200 Subject: [PATCH 038/321] feat: Also collect artifact dir name to make it easier to find logs in workdir exporter --- .../ci-scripts/max-concurrency/cluster_read_config.yaml | 1 + tests/load-tests/ci-scripts/stage/cluster_read_config.yaml | 1 + tests/load-tests/cluster_read_config.yaml | 1 + 3 files changed, 3 insertions(+) diff --git a/tests/load-tests/ci-scripts/max-concurrency/cluster_read_config.yaml b/tests/load-tests/ci-scripts/max-concurrency/cluster_read_config.yaml index 115592d7cf..15c2d86abd 100644 --- a/tests/load-tests/ci-scripts/max-concurrency/cluster_read_config.yaml +++ b/tests/load-tests/ci-scripts/max-concurrency/cluster_read_config.yaml @@ -108,6 +108,7 @@ # Interesting CI environment variables {% for var in [ + 'ARTIFACT_DIR', 'BUILD_ID', 'HOSTNAME', 'JOB_NAME', diff --git a/tests/load-tests/ci-scripts/stage/cluster_read_config.yaml b/tests/load-tests/ci-scripts/stage/cluster_read_config.yaml index 70973834cb..588b86dad4 100644 --- a/tests/load-tests/ci-scripts/stage/cluster_read_config.yaml +++ b/tests/load-tests/ci-scripts/stage/cluster_read_config.yaml @@ -96,6 +96,7 @@ # Interesting CI environment variables {% for var in [ + 'ARTIFACT_DIR', 'BUILD_ID', 'BUILD_TAG', 'BUILD_URL', diff --git a/tests/load-tests/cluster_read_config.yaml b/tests/load-tests/cluster_read_config.yaml index caa92898c6..0d300dfeb8 100644 --- a/tests/load-tests/cluster_read_config.yaml +++ b/tests/load-tests/cluster_read_config.yaml @@ -108,6 +108,7 @@ # Interesting CI environment variables {% for var in [ + 'ARTIFACT_DIR', 'BUILD_ID', 'HOSTNAME', 'JOB_NAME', From 5b5fdde0b1fb970c180c4c90dd72bdbb37cbce24 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 6 Jun 2025 10:21:36 +0200 Subject: [PATCH 039/321] feat: Labels useful to identify results in Jenkins and workdir exporter --- tests/load-tests/ci-scripts/config/horreum-labels.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/load-tests/ci-scripts/config/horreum-labels.sh b/tests/load-tests/ci-scripts/config/horreum-labels.sh index 4c88b87613..256410aa18 100755 --- a/tests/load-tests/ci-scripts/config/horreum-labels.sh +++ b/tests/load-tests/ci-scripts/config/horreum-labels.sh @@ -34,6 +34,8 @@ function horreum_schema_label_present() { --extractor-jsonpath "${extractor}" } +horreum_schema_label_present '$.metadata.env.ARTIFACT_DIR' +horreum_schema_label_present '$.metadata.env.BUILD_ID' horreum_schema_label_present '$.results.durations.stats.platformtaskruns."build/calculate-deps-linux/amd64".passed.duration.mean' horreum_schema_label_present '$.results.durations.stats.platformtaskruns."build/calculate-deps-linux/amd64".passed.duration.samples' horreum_schema_label_present '$.results.durations.stats.platformtaskruns."build/calculate-deps-linux/amd64".passed.idle.mean' From 6695618d72e776568d5256d7ad129e86e21fe778 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 6 Jun 2025 12:15:15 +0200 Subject: [PATCH 040/321] feat(KONFLUX-8591): Migrate these jobs to Jenkins --- .github/workflows/loadtest-hourly.yaml | 89 -------------------------- 1 file changed, 89 deletions(-) delete mode 100644 .github/workflows/loadtest-hourly.yaml diff --git a/.github/workflows/loadtest-hourly.yaml b/.github/workflows/loadtest-hourly.yaml deleted file mode 100644 index 1e0e9638ce..0000000000 --- a/.github/workflows/loadtest-hourly.yaml +++ /dev/null @@ -1,89 +0,0 @@ -name: Prod Load Test hourly probe - -on: - schedule: - - cron: "15 * * * *" - workflow_dispatch: - -jobs: - load_test: - strategy: - fail-fast: false - matrix: - cluster: - - label: stone-prd-rh01 - repo: https://github.com/rhtap-perf-test/nodejs-devfile-sample1 - member_cluster_secret: MEMBER_CLUSTER_STONE_PRD_RH01 - ocp_prometheus_token_secret: OCP_PROMETHEUS_TOKEN_STONE_PRD_RH01 - users_secret: USERS_STONE_PRD_RH01 - should_fail: false - - label: stone-stg-rh01 - repo: https://github.com/rhtap-perf-test/nodejs-devfile-sample2 - member_cluster_secret: MEMBER_CLUSTER_STONE_STG_RH01 - ocp_prometheus_token_secret: OCP_PROMETHEUS_TOKEN_STONE_STG_RH01 - users_secret: USERS_STONE_STG_RH01 - should_fail: false - - label: kflux-prd-rh02 - repo: https://github.com/rhtap-perf-test/nodejs-devfile-sample3 - member_cluster_secret: MEMBER_CLUSTER_KFLUX_PRD_RH02 - ocp_prometheus_token_secret: OCP_PROMETHEUS_TOKEN_KFLUX_PRD_RH02 - users_secret: USERS_KFLUX_PRD_RH02 - should_fail: false - - runs-on: ubuntu-latest - timeout-minutes: 120 - # continue even if the job fails - continue-on-error: true - - # Make sure this action does not get scheduled by cron on e2e-tests forks - if: ${{ github.repository_owner == 'konflux-ci' || github.event_name != 'schedule' }} - - env: - ARTIFACT_DIR: ${{ github.workspace }}/tests/load-test/artifacts/ - - steps: - - - name: Checkout code - uses: actions/checkout@v3 - - - name: Set up jq - run: | - sudo apt-get update - sudo apt-get install -y jq - - - name: Prepare list of users - working-directory: ./tests/load-tests - env: - USERS: ${{ secrets[matrix.cluster.users_secret] }} - run: echo "$USERS" > users.json - - - name: Run Load Test - working-directory: ./tests/load-tests - env: - SCENARIO: COMPONENT_REPO=${{ matrix.cluster.repo }} CONCURRENCY=1 COMPONENT_DOCKERFILE_PATH=Dockerfile MY_GITHUB_ORG=rhtap-perf-test - GITHUB_TOKEN: ${{ secrets.GH_TOKEN }} - MY_GITHUB_ORG: "rhtap-perf-test" - run: | - export $SCENARIO - ./run-stage.sh - - - name: Collect results - working-directory: ./tests/load-tests - env: - MEMBER_CLUSTER: ${{ secrets[matrix.cluster.member_cluster_secret] }} - OCP_PROMETHEUS_TOKEN: ${{ secrets[matrix.cluster.ocp_prometheus_token_secret] }} - run: | - export $SCENARIO - export MEMBER_CLUSTER - export OCP_PROMETHEUS_TOKEN - ./ci-scripts/stage/collect-results.sh ${CONCURRENCY:-1} ${PWD} - - - name: List files in the artifact directory - run: ls -la ${{ env.ARTIFACT_DIR }} - - - name: Archive artifacts - uses: actions/upload-artifact@v4 - with: - name: rhtap-load-test-${{ matrix.cluster.label }} - path: ${{ env.ARTIFACT_DIR }} - include-hidden-files: true From c9d71aaf531b49356676d1b7f608381ca4ef25cd Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Sat, 7 Jun 2025 08:02:50 +0200 Subject: [PATCH 041/321] feat: Noticed new error: Component failed creation: Unable to create the Component .*: Internal error occurred: resource quota evaluation timed out --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 3deae94dce..b8a7b13453 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -23,6 +23,7 @@ "Couldnt get task via git resolver from gitlab.cee due to 429": r"Message:.*Couldn't retrieve Task .*resolver type git.*https://gitlab.cee.redhat.com/.* status code: 429", "Couldnt get task via http resolver from gitlab.cee": r"Message:.*Couldn't retrieve Task .*resolver type http.*error getting.*requested URL .*https://gitlab.cee.redhat.com/.* is not found", "Failed application creation when calling mapplication.kb.io webhook": r"Application failed creation: Unable to create the Application .*: Internal error occurred: failed calling webhook .*mapplication.kb.io.*: failed to call webhook: Post .*https://application-service-webhook-service.application-service.svc:443/mutate-appstudio-redhat-com-v1alpha1-application.* no endpoints available for service .*application-service-webhook-service", + "Failed component creation because resource quota evaluation timed out": r"Component failed creation: Unable to create the Component .*: Internal error occurred: resource quota evaluation timed out", "Failed creating integration test scenario because it already exists": r"Integration test scenario failed creation: Unable to create the Integration Test Scenario .* integrationtestscenarios.appstudio.redhat.com .* already exists", "Failed Integration test scenario when calling dintegrationtestscenario.kb.io webhook": r"Integration test scenario failed creation: Unable to create the Integration Test Scenario .*: Internal error occurred: failed calling webhook .*dintegrationtestscenario.kb.io.*: failed to call webhook: Post .*https://integration-service-webhook-service.integration-service.svc:443/mutate-appstudio-redhat-com-v1beta2-integrationtestscenario.*: no endpoints available for service .*integration-service-webhook-service", "Failed to link pipeline image pull secret to build service account because SA was not found": r"Failed to configure pipeline imagePullSecrets: Unable to add secret .* to service account .*: serviceaccounts .* not found", From decad29c88cf71a602c259e80ff126b2d4f6745d Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Sat, 7 Jun 2025 08:07:15 +0200 Subject: [PATCH 042/321] feat: Remove this script as it does not add value now --- .../load-tests/ci-scripts/collect-results.sh | 3 - .../ci-scripts/stage/collect-results.sh | 3 - .../count-multiarch-taskruns.py | 279 ------------------ 3 files changed, 285 deletions(-) delete mode 100755 tests/load-tests/ci-scripts/utility_scripts/count-multiarch-taskruns.py diff --git a/tests/load-tests/ci-scripts/collect-results.sh b/tests/load-tests/ci-scripts/collect-results.sh index 5a78339041..7bbfd0b925 100755 --- a/tests/load-tests/ci-scripts/collect-results.sh +++ b/tests/load-tests/ci-scripts/collect-results.sh @@ -37,9 +37,6 @@ python3 -m pip install matplotlib echo "[$(date --utc -Ins)] Create summary JSON with timings" ./evaluate.py "${ARTIFACT_DIR}/load-test-timings.csv" "${ARTIFACT_DIR}/load-test-timings.json" -echo "[$(date --utc -Ins)] Counting PRs and TRs" -ci-scripts/utility_scripts/count-multiarch-taskruns.py --data-dir "${ARTIFACT_DIR}" &>"${ARTIFACT_DIR}/count-multiarch-taskruns.log" - echo "[$(date --utc -Ins)] Graphing PRs and TRs" ci-scripts/utility_scripts/show-pipelineruns.py --data-dir "${ARTIFACT_DIR}" &>"${ARTIFACT_DIR}/show-pipelineruns.log" mv "${ARTIFACT_DIR}/output.svg" "${ARTIFACT_DIR}/show-pipelines.svg" diff --git a/tests/load-tests/ci-scripts/stage/collect-results.sh b/tests/load-tests/ci-scripts/stage/collect-results.sh index c80ce39e22..6fadb2c8bb 100755 --- a/tests/load-tests/ci-scripts/stage/collect-results.sh +++ b/tests/load-tests/ci-scripts/stage/collect-results.sh @@ -42,9 +42,6 @@ echo "[$(date --utc -Ins)] Create summary JSON with timings" echo "[$(date --utc -Ins)] Create summary JSON with errors" ./errors.py "${ARTIFACT_DIR}/load-test-errors.csv" "${ARTIFACT_DIR}/load-test-errors.json" -echo "[$(date --utc -Ins)] Counting PRs and TRs" -ci-scripts/utility_scripts/count-multiarch-taskruns.py --data-dir "${ARTIFACT_DIR}" &>"${ARTIFACT_DIR}/count-multiarch-taskruns.log" - echo "[$(date --utc -Ins)] Graphing PRs and TRs" ci-scripts/utility_scripts/show-pipelineruns.py --data-dir "${ARTIFACT_DIR}" &>"${ARTIFACT_DIR}/show-pipelineruns.log" || true mv "${ARTIFACT_DIR}/output.svg" "${ARTIFACT_DIR}/show-pipelines.svg" || true diff --git a/tests/load-tests/ci-scripts/utility_scripts/count-multiarch-taskruns.py b/tests/load-tests/ci-scripts/utility_scripts/count-multiarch-taskruns.py deleted file mode 100755 index 0b51c32325..0000000000 --- a/tests/load-tests/ci-scripts/utility_scripts/count-multiarch-taskruns.py +++ /dev/null @@ -1,279 +0,0 @@ -#!/usr/bin/env python - -import argparse -import collections -import csv -import datetime -import json -import logging -import os -import os.path -import sys -import yaml -import time -import operator -import statistics -import re - -import tabulate - - -def str2date(date_str): - if isinstance(date_str, datetime.datetime): - return date_str - else: - try: - return datetime.datetime.fromisoformat(date_str) - except ValueError: # Python before 3.11 - # Convert "...Z" to "...+00:00" - date_str = date_str.replace("Z", "+00:00") - # Remove microseconds part - date_str = re.sub(r"(.*)(\.\d+)(\+.*)", r"\1\3", date_str) - # Convert simplified date - return datetime.datetime.fromisoformat(date_str) - -class DateTimeDecoder(json.JSONDecoder): - def __init__(self, *args, **kwargs): - super().__init__(object_hook=self.object_hook, *args, **kwargs) - - def object_hook(self, o): - ret = {} - for key, value in o.items(): - if isinstance(value, str): - try: - ret[key] = str2date(value) - except ValueError: - ret[key] = value - else: - ret[key] = value - return ret - -class Something: - def __init__(self, data_dir): - self.data_taskruns = [] - self.data_dir = data_dir - - self.tr_skips = 0 # how many TaskRuns we skipped - - self._populate(self.data_dir) - - def _load_json(self, path): - with open(path, "r") as fp: - return json.load(fp, cls=DateTimeDecoder) - - def _populate(self, data_dir): - for currentpath, folders, files in os.walk(data_dir): - for datafile in files: - datafile = os.path.join(currentpath, datafile) - - start = time.time() - if datafile.endswith(".yaml") or datafile.endswith(".yml"): - with open(datafile, "r") as fd: - data = yaml.safe_load(fd) - elif datafile.endswith(".json"): - try: - data = self._load_json(datafile) - except json.decoder.JSONDecodeError: - logging.warning(f"File {datafile} is malfrmed, skipping it") - continue - else: - continue - end = time.time() - logging.debug(f"Loaded {datafile} in {(end - start):.2f} seconds") - - if "kind" not in data: - logging.info(f"Skipping {datafile} as it does not contain kind") - continue - - if data["kind"] == "List": - if "items" not in data: - logging.info(f"Skipping {datafile} as it does not contain items") - continue - - for i in data["items"]: - self._populate_add_one(i) - else: - self._populate_add_one(data) - - print(f"We loaded {len(self.data_taskruns)} and skipped {self.tr_skips} TaskRuns") - - def _populate_add_one(self, something): - if "kind" not in something: - logging.info("Skipping item because it does not have kind") - return - - if something["kind"] == "TaskRun": - self._populate_taskrun(something) - else: - logging.debug(f"Skipping item because it has unexpected kind {something['kind']}") - return - - def _populate_taskrun(self, tr): - """Load TaskRun.""" - try: - tr_name = tr["metadata"]["name"] - except KeyError as e: - logging.info(f"TaskRun missing name, skipping: {e}, {str(tr)[:200]}") - self.tr_skips += 1 - return - - try: - tr_task = tr["metadata"]["labels"]["tekton.dev/pipelineTask"] - except KeyError as e: - logging.info( - f"TaskRun {tr_name} missing task, skipping: {e}" - ) - self.tr_skips += 1 - return - - try: - tr_conditions = tr["status"]["conditions"] - except KeyError as e: - logging.info(f"TaskRun {tr_name} missing conditions, skipping: {e}") - self.tr_skips += 1 - return - - tr_condition_ok = False - for c in tr_conditions: - if c["type"] == "Succeeded": - if c["status"] == "True": - tr_condition_ok = True - break - ###if not tr_condition_ok: - ### logging.info(f"TaskRun {tr_name} in wrong condition, skipping: {c}") - ### self.tr_skips += 1 - ### return - - try: - tr_creationTimestamp = str2date(tr["metadata"]["creationTimestamp"]) - tr_completionTime = str2date(tr["status"]["completionTime"]) - tr_startTime = str2date(tr["status"]["startTime"]) - tr_namespace = tr["metadata"]["namespace"] - except KeyError as e: - logging.info(f"TaskRun {tr_name} missing some fields, skipping: {e}") - self.tr_skips += 1 - return - - self.data_taskruns.append( - { - "namespace": tr_namespace, - "name": tr_name, - "task": tr_task, - "condition": tr_condition_ok, - "pending_duration": (tr_startTime - tr_creationTimestamp).total_seconds(), - "running_duration": (tr_completionTime - tr_startTime).total_seconds(), - "duration": (tr_completionTime - tr_creationTimestamp).total_seconds(), - } - ) - - def _show_multi_arch_tasks(self): - # All data - table_header = [ - "namespace", - "name", - "task", - "duration", - "condition", - ] - table = [] - for tr in self.data_taskruns: - table.append([ - tr["namespace"], - tr["name"], - tr["task"], - tr["duration"], - tr["condition"], - ]) - table.sort(key=operator.itemgetter(3)) - print("\nTaskRuns breakdown:\n") - print(tabulate.tabulate(table, headers=table_header)) - self._dump_as_csv("taskruns-breakdown-all.csv", table, table_header) - - # Per task average - data = {} - for tr in self.data_taskruns: - if not tr["condition"]: - continue # skip failed tasks - if tr["task"] not in data: - data[tr["task"]] = { - "count": 0, - "times": [], - } - data[tr["task"]]["count"] += 1 - data[tr["task"]]["times"].append(tr["duration"]) - table_header = [ - "task", - "duration_avg_sec", - "duration_stdev", - "duration_samples", - ] - table = [] - for t, v in data.items(): - table.append([ - t, - sum(v["times"]) / v["count"] if v["count"] > 0 else None, - statistics.stdev(v["times"]) if len(v["times"]) >= 2 else None, - v["count"], - ]) - table.sort(key=operator.itemgetter(1)) - print("\nTaskRuns breakdown averages by task (only successfull):\n") - print(tabulate.tabulate(table, headers=table_header, floatfmt=".0f")) - self._dump_as_csv("taskruns-breakdown-averages.csv", table, table_header) - - def _dump_as_csv(self, name, table, table_header): - name_full = os.path.join(self.data_dir, name) - with open(name_full, "w") as fd: - writer = csv.writer(fd) - writer.writerow(table_header) - for row in table: - writer.writerow(row) - - def doit(self): - self._show_multi_arch_tasks() - -def doit(args): - something = Something( - data_dir=args.data_dir, - ) - return something.doit() - - -def main(): - parser = argparse.ArgumentParser( - description="Show PipelineRuns and TaskRuns", - formatter_class=argparse.ArgumentDefaultsHelpFormatter, - ) - parser.add_argument( - "--data-dir", - required=True, - help="Directory from where to load YAML data and where to put output SVG", - ) - parser.add_argument( - "-v", - "--verbose", - action="store_true", - help="Show verbose output", - ) - parser.add_argument( - "-d", - "--debug", - action="store_true", - help="Show debug output", - ) - args = parser.parse_args() - - fmt = "%(asctime)s %(name)s %(levelname)s %(message)s" - if args.verbose: - logging.basicConfig(format=fmt, level=logging.INFO) - elif args.debug: - logging.basicConfig(format=fmt, level=logging.DEBUG) - else: - logging.basicConfig(format=fmt) - - logging.debug(f"Args: {args}") - - return doit(args) - - -if __name__ == "__main__": - sys.exit(main()) From 36f2bc2f6f7ad918337a55538d4f0a5aa9cdd466 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Sat, 7 Jun 2025 08:13:59 +0200 Subject: [PATCH 043/321] feat: Noticed new error: Integration test scenario failed validation: context deadline exceeded --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index b8a7b13453..11aa9632be 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -37,6 +37,7 @@ "Timeout getting build service account": r"Component build SA failed creation: Component build SA .* not created: context deadline exceeded", "Timeout getting pipeline": r"Message:.*resolver failed to get Pipeline.*resolution took longer than global timeout of .*", "Timeout getting task via git resolver from gitlab.cee": r"Message:.*Couldn't retrieve Task .*resolver type git.*https://gitlab.cee.redhat.com/.* resolution took longer than global timeout of .*", + "Timeout waiting for integration test scenario to finish": r"Integration test scenario failed validation: context deadline exceeded", } From 0e4d8b70efc10027e2b31e136d08d66983ebdd7c Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Sat, 7 Jun 2025 08:19:40 +0200 Subject: [PATCH 044/321] feat: Also collect log of important part of results collecting script --- tests/load-tests/ci-scripts/collect-results.sh | 4 ++++ tests/load-tests/ci-scripts/stage/collect-results.sh | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/tests/load-tests/ci-scripts/collect-results.sh b/tests/load-tests/ci-scripts/collect-results.sh index 7bbfd0b925..bad3d859bd 100755 --- a/tests/load-tests/ci-scripts/collect-results.sh +++ b/tests/load-tests/ci-scripts/collect-results.sh @@ -16,6 +16,8 @@ ARTIFACT_DIR=${ARTIFACT_DIR:-artifacts} mkdir -p ${ARTIFACT_DIR} pushd "${2:-./tests/load-tests}" +{ + echo "[$(date --utc -Ins)] Collecting artifacts" find . -maxdepth 1 -type f -name '*.log' -exec cp -vf {} "${ARTIFACT_DIR}" \; find . -maxdepth 1 -type f -name '*.csv' -exec cp -vf {} "${ARTIFACT_DIR}" \; @@ -145,4 +147,6 @@ fi #$tapa all "${pipelinerun_stub}.json" "${taskrun_stub}.json" "${pod_stub}.json" >"$tapa_tmp" #sort_csv "$tapa_tmp" "$tapa_all_csv" +} 2>&1 | tee "${ARTIFACT_DIR}/collect-results.log" + popd diff --git a/tests/load-tests/ci-scripts/stage/collect-results.sh b/tests/load-tests/ci-scripts/stage/collect-results.sh index 6fadb2c8bb..4d98889060 100755 --- a/tests/load-tests/ci-scripts/stage/collect-results.sh +++ b/tests/load-tests/ci-scripts/stage/collect-results.sh @@ -18,6 +18,8 @@ BASE_URL=$(echo $MEMBER_CLUSTER | grep -oP 'https://api\.\K[^:]+') PROMETHEUS_HOST="thanos-querier-openshift-monitoring.apps.$BASE_URL" TOKEN=${OCP_PROMETHEUS_TOKEN} +{ + echo "[$(date --utc -Ins)] Collecting artifacts" find . -maxdepth 1 -type f -name '*.log' -exec cp -vf {} "${ARTIFACT_DIR}" \; find . -maxdepth 1 -type f -name '*.csv' -exec cp -vf {} "${ARTIFACT_DIR}" \; @@ -75,4 +77,6 @@ status_data.py \ deactivate +} 2>&1 | tee "${ARTIFACT_DIR}/collect-results.log" + popd From 87d9797719c4c8e7bfbfe8d05380338f3c8229ba Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Sat, 7 Jun 2025 16:01:14 +0200 Subject: [PATCH 045/321] feat: New error: Test Pipeline Run failed run: context deadline exceeded --- tests/load-tests/errors.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 11aa9632be..b3b3d8f9f5 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -37,7 +37,8 @@ "Timeout getting build service account": r"Component build SA failed creation: Component build SA .* not created: context deadline exceeded", "Timeout getting pipeline": r"Message:.*resolver failed to get Pipeline.*resolution took longer than global timeout of .*", "Timeout getting task via git resolver from gitlab.cee": r"Message:.*Couldn't retrieve Task .*resolver type git.*https://gitlab.cee.redhat.com/.* resolution took longer than global timeout of .*", - "Timeout waiting for integration test scenario to finish": r"Integration test scenario failed validation: context deadline exceeded", + "Timeout waiting for integration test scenario to validate": r"Integration test scenario failed validation: context deadline exceeded", + "Timeout waiting for test pipeline to finish": r"Test Pipeline Run failed run: context deadline exceeded", } From e41e67099333e426e039ab6b7444769099388421 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Mon, 9 Jun 2025 11:11:22 +0200 Subject: [PATCH 046/321] feat: New error: gitlab.cee.redhat.com Token is expired. You can either do re-authorization or token refresh --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index b3b3d8f9f5..a4a8b2655e 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -29,6 +29,7 @@ "Failed to link pipeline image pull secret to build service account because SA was not found": r"Failed to configure pipeline imagePullSecrets: Unable to add secret .* to service account .*: serviceaccounts .* not found", "Failed to merge MR on CEE GitLab due to 405": r"Repo-templating workflow component cleanup failed: Merging [0-9]+ failed: [Pp][Uu][Tt] .*https://gitlab.cee.redhat.com/api/.*/merge_requests/[0-9]+/merge.*message: 405 Method Not Allowed", "Failed to merge MR on CEE GitLab due to DNS error": r"Repo-templating workflow component cleanup failed: Merging [0-9]+ failed: [Pp][Uu][Tt] .*https://gitlab.cee.redhat.com/api/.*/merge_requests/[0-9]+/merge.*Temporary failure in name resolution", + "GitLab token used by test expired": r"Repo forking failed: Error deleting project .*: DELETE https://gitlab.cee.redhat.com/.*: 401 .*error: invalid_token.*error_description: Token is expired. You can either do re-authorization or token refresh", "Pipeline failed": r"Message:Tasks Completed: [0-9]+ \(Failed: [1-9]+,", "Post-test data collection failed": r"Failed to collect pipeline run JSONs", "Repo forking failed as the target is still being deleted": r"Repo forking failed: Error forking project .* POST https://gitlab.cee.redhat.com.* 409 .*Project namespace name has already been taken, The project is still being deleted", From dc4c5162cd4bca2b895933d92bf275347fb457a4 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Mon, 9 Jun 2025 13:08:08 +0200 Subject: [PATCH 047/321] feat: New error: Test Pipeline Run failed creation: context deadline exceeded --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index a4a8b2655e..ee634ef819 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -39,6 +39,7 @@ "Timeout getting pipeline": r"Message:.*resolver failed to get Pipeline.*resolution took longer than global timeout of .*", "Timeout getting task via git resolver from gitlab.cee": r"Message:.*Couldn't retrieve Task .*resolver type git.*https://gitlab.cee.redhat.com/.* resolution took longer than global timeout of .*", "Timeout waiting for integration test scenario to validate": r"Integration test scenario failed validation: context deadline exceeded", + "Timeout waiting for test pipeline to create": r"Test Pipeline Run failed creation: context deadline exceeded", "Timeout waiting for test pipeline to finish": r"Test Pipeline Run failed run: context deadline exceeded", } From a6691f24d46183b9f5e24a334c21b109feb6aae4 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Mon, 9 Jun 2025 21:10:43 +0200 Subject: [PATCH 048/321] feat: Attempt to avoid: log.SetLogger(...) was never called; logs will not be displayed Sometimes (like 40% of runs?) I see this error: [controller-runtime] log.SetLogger(...) was never called; logs will not be displayed. Detected at: > goroutine 199 [running]: > runtime/debug.Stack() > /usr/lib/golang/src/runtime/debug/stack.go:26 +0x5e > sigs.k8s.io/controller-runtime/pkg/log.eventuallyFulfillRoot() > /home/jenkins/workspace/StoneSoupLoadTestProbe_stone_prod_p02_RPM/e2e-tests/tests/load-tests/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.17.5/pkg/log/log.go:60 +0xcd > sigs.k8s.io/controller-runtime/pkg/log.(*delegatingLogSink).WithName(0xc0005c2100, {0x2550046, 0x14}) > /home/jenkins/workspace/StoneSoupLoadTestProbe_stone_prod_p02_RPM/e2e-tests/tests/load-tests/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.17.5/pkg/log/deleg.go:147 +0x3e > github.com/go-logr/logr.Logger.WithName({{0x28e6250, 0xc0005c2100}, 0x0}, {0x2550046?, 0x21?}) > /home/jenkins/workspace/StoneSoupLoadTestProbe_stone_prod_p02_RPM/e2e-tests/tests/load-tests/go/pkg/mod/github.com/go-logr/logr@v1.4.1/logr.go:345 +0x36 > sigs.k8s.io/controller-runtime/pkg/client.newClient(0x0?, {0x0, 0xc0002fb110, {0x0, 0x0}, 0x0, {0x0, 0x0}, 0x0}) > /home/jenkins/workspace/StoneSoupLoadTestProbe_stone_prod_p02_RPM/e2e-tests/tests/load-tests/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.17.5/pkg/client/client.go:129 +0xf1 > sigs.k8s.io/controller-runtime/pkg/client.New(0xffffffffffffffff?, {0x0, 0xc0002fb110, {0x0, 0x0}, 0x0, {0x0, 0x0}, 0x0}) > /home/jenkins/workspace/StoneSoupLoadTestProbe_stone_prod_p02_RPM/e2e-tests/tests/load-tests/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.17.5/pkg/client/client.go:110 +0x7d > github.com/konflux-ci/e2e-tests/pkg/clients/kubernetes.CreateAPIProxyClient.func1({0x5f5e100?, 0xc000486550?}) > /home/jenkins/workspace/StoneSoupLoadTestProbe_stone_prod_p02_RPM/e2e-tests/pkg/clients/kubernetes/client.go:208 +0x78 > k8s.io/apimachinery/pkg/util/wait.loopConditionUntilContext.func2(0xc000bd2d30?, {0x28df3a0?, 0xc000486540?}) > /home/jenkins/workspace/StoneSoupLoadTestProbe_stone_prod_p02_RPM/e2e-tests/tests/load-tests/go/pkg/mod/k8s.io/apimachinery@v0.29.4/pkg/util/wait/loop.go:87 +0x52 > k8s.io/apimachinery/pkg/util/wait.loopConditionUntilContext({0x28df3a0, 0xc000486540}, {0x28d3f50, 0xc0006a6040}, 0x0, 0x0, 0xc000bd2e58) > /home/jenkins/workspace/StoneSoupLoadTestProbe_stone_prod_p02_RPM/e2e-tests/tests/load-tests/go/pkg/mod/k8s.io/apimachinery@v0.29.4/pkg/util/wait/loop.go:88 +0x237 > k8s.io/apimachinery/pkg/util/wait.PollUntilContextTimeout({0x28df248?, 0x3d86a60?}, 0x5f5e100, 0xc0003b22c0?, 0x0, 0xc000bd2e58) > /home/jenkins/workspace/StoneSoupLoadTestProbe_stone_prod_p02_RPM/e2e-tests/tests/load-tests/go/pkg/mod/k8s.io/apimachinery@v0.29.4/pkg/util/wait/poll.go:48 +0xa5 > github.com/konflux-ci/e2e-tests/pkg/clients/kubernetes.CreateAPIProxyClient({0xc00021cb00, 0x543}, {0xc0003b22c0, 0x39}) > /home/jenkins/workspace/StoneSoupLoadTestProbe_stone_prod_p02_RPM/e2e-tests/pkg/clients/kubernetes/client.go:207 +0x12e > github.com/konflux-ci/e2e-tests/pkg/clients/kubernetes.NewDevSandboxProxyClient({0xc00048bdf0, 0x6}, 0x40?, 0x1?, {{0xc0003b22c0, 0x39}, {0xc00048be00, 0x6}, {0xc00021cb00, 0x543}}) > /home/jenkins/workspace/StoneSoupLoadTestProbe_stone_prod_p02_RPM/e2e-tests/pkg/clients/kubernetes/client.go:148 +0x391 > github.com/konflux-ci/e2e-tests/pkg/framework.newFrameworkWithTimeout.func1() > /home/jenkins/workspace/StoneSoupLoadTestProbe_stone_prod_p02_RPM/e2e-tests/pkg/framework/framework.go:95 +0x53 > github.com/avast/retry-go/v4.Do(0xc000bd3418, {0xc000e47410, 0x1, 0xc0001619b0?}) > /home/jenkins/workspace/StoneSoupLoadTestProbe_stone_prod_p02_RPM/e2e-tests/tests/load-tests/go/pkg/mod/github.com/avast/retry-go/v4@v4.3.3/retry.go:132 +0x529 > github.com/konflux-ci/e2e-tests/pkg/framework.newFrameworkWithTimeout({0xc00048bdf0, 0x6}, 0x34630b8a000, {0xc000bc2480, 0x1, 0xc000bc2480?}) > /home/jenkins/workspace/StoneSoupLoadTestProbe_stone_prod_p02_RPM/e2e-tests/pkg/framework/framework.go:93 +0x2ef > github.com/konflux-ci/e2e-tests/pkg/framework.NewFrameworkWithTimeout({0xc00048bdf0, 0x6}, 0x34630b8a000, {0xc000bc2480, 0x1, 0x1}) > /home/jenkins/workspace/StoneSoupLoadTestProbe_stone_prod_p02_RPM/e2e-tests/pkg/framework/framework.go:175 +0x156 > github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/journey.HandleNewFrameworkForComp(0xc0006a87d0) > /home/jenkins/workspace/StoneSoupLoadTestProbe_stone_prod_p02_RPM/e2e-tests/tests/load-tests/pkg/journey/handle_users.go:46 +0x146 > reflect.Value.call({0x200f800?, 0x2693840?, 0x0?}, {0x2533b99, 0x4}, {0xc000c81938, 0x1, 0x2ed2799?}) > /usr/lib/golang/src/reflect/value.go:584 +0xca6 > reflect.Value.Call({0x200f800?, 0x2693840?, 0xc000e2ce00?}, {0xc000c81938?, 0x1?, 0x51f8a4?}) > /usr/lib/golang/src/reflect/value.go:368 +0xb9 > github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/logging.Measure({0x200f800?, 0x2693840?}, {0xc000e47f98, 0x1, 0xc0004d1fb8?}) > /home/jenkins/workspace/StoneSoupLoadTestProbe_stone_prod_p02_RPM/e2e-tests/tests/load-tests/pkg/logging/time_and_log.go:211 +0x550 > main.perComponentThread(0xc0006a87d0) > /home/jenkins/workspace/StoneSoupLoadTestProbe_stone_prod_p02_RPM/e2e-tests/tests/load-tests/loadtest.go:270 +0xa5 > created by github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/journey.PerComponentSetup in goroutine 118 > /home/jenkins/workspace/StoneSoupLoadTestProbe_stone_prod_p02_RPM/e2e-tests/tests/load-tests/pkg/journey/journey.go:172 +0x65 Other than this warning, it does not seem to affect functionality of the load test. --- tests/load-tests/loadtest.go | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/load-tests/loadtest.go b/tests/load-tests/loadtest.go index 767bf44ab9..618edbd7a1 100644 --- a/tests/load-tests/loadtest.go +++ b/tests/load-tests/loadtest.go @@ -9,6 +9,8 @@ import logging "github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/logging" import cobra "github.com/spf13/cobra" import klog "k8s.io/klog/v2" +import klogr "k8s.io/klog/v2/klogr" +import ctrl "sigs.k8s.io/controller-runtime" //import "os" //import "context" @@ -63,6 +65,15 @@ func init() { func main() { var err error + // Setup logging + klog.InitFlags(nil) + defer klog.Flush() + // Set the controller-runtime logger to use klogr. + // This makes controller-runtime logs go through klog. + // Hopefuly will help us to avoid these errors: + // [controller-runtime] log.SetLogger(...) was never called; logs will not be displayed. + ctrl.SetLogger(klogr.New()) + // Setup argument parser err = rootCmd.Execute() if err != nil { From 8e93f0ef719639df7f44dd7e0740085b2696cd10 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Tue, 10 Jun 2025 17:14:41 +0200 Subject: [PATCH 049/321] feat: New error: Component failed creation: Unable to create the Component jhutar-1-app-lxzzy-comp-0: Internal error occurred: failed calling webhook "mcomponent.kb.io": failed to call webhook: Post "https://application-service-webhook-service.application-service.svc:443/mutate-appstudio-redhat-com-v1alpha1-component?timeout=10s": no endpoints available for service "application-service-webhook-service" --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index ee634ef819..42abea0848 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -24,6 +24,7 @@ "Couldnt get task via http resolver from gitlab.cee": r"Message:.*Couldn't retrieve Task .*resolver type http.*error getting.*requested URL .*https://gitlab.cee.redhat.com/.* is not found", "Failed application creation when calling mapplication.kb.io webhook": r"Application failed creation: Unable to create the Application .*: Internal error occurred: failed calling webhook .*mapplication.kb.io.*: failed to call webhook: Post .*https://application-service-webhook-service.application-service.svc:443/mutate-appstudio-redhat-com-v1alpha1-application.* no endpoints available for service .*application-service-webhook-service", "Failed component creation because resource quota evaluation timed out": r"Component failed creation: Unable to create the Component .*: Internal error occurred: resource quota evaluation timed out", + "Failed component creation when calling mcomponent.kb.io webhook": r"Component failed creation: Unable to create the Component .*: Internal error occurred: failed calling webhook .*mcomponent.kb.io.*: failed to call webhook: Post .*https://application-service-webhook-service.application-service.svc:443/mutate-appstudio-redhat-com-v1alpha1-component.* no endpoints available for service .*application-service-webhook-service.*", "Failed creating integration test scenario because it already exists": r"Integration test scenario failed creation: Unable to create the Integration Test Scenario .* integrationtestscenarios.appstudio.redhat.com .* already exists", "Failed Integration test scenario when calling dintegrationtestscenario.kb.io webhook": r"Integration test scenario failed creation: Unable to create the Integration Test Scenario .*: Internal error occurred: failed calling webhook .*dintegrationtestscenario.kb.io.*: failed to call webhook: Post .*https://integration-service-webhook-service.integration-service.svc:443/mutate-appstudio-redhat-com-v1beta2-integrationtestscenario.*: no endpoints available for service .*integration-service-webhook-service", "Failed to link pipeline image pull secret to build service account because SA was not found": r"Failed to configure pipeline imagePullSecrets: Unable to add secret .* to service account .*: serviceaccounts .* not found", From e60bf1eb452198b3f48e6f6dc59e389b97a411d8 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Tue, 10 Jun 2025 17:19:28 +0200 Subject: [PATCH 050/321] feat: New error: Repo forking failed: Error deleting project jhutar/nodejs-devfile-sample4-jhutar: DELETE https://gitlab.cee.redhat.com/api/v4/projects/jhutar/nodejs-devfile-sample4-jhutar: 401 {message: 401 Unauthorized} --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 42abea0848..80a43dd4f2 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -33,6 +33,7 @@ "GitLab token used by test expired": r"Repo forking failed: Error deleting project .*: DELETE https://gitlab.cee.redhat.com/.*: 401 .*error: invalid_token.*error_description: Token is expired. You can either do re-authorization or token refresh", "Pipeline failed": r"Message:Tasks Completed: [0-9]+ \(Failed: [1-9]+,", "Post-test data collection failed": r"Failed to collect pipeline run JSONs", + "Repo forking failed as GitLab CEE says 401 Unauthorized": r"Repo forking failed: Error deleting project .*: DELETE https://gitlab.cee.redhat.com/.*: 401 .*message: 401 Unauthorized.*", "Repo forking failed as the target is still being deleted": r"Repo forking failed: Error forking project .* POST https://gitlab.cee.redhat.com.* 409 .*Project namespace name has already been taken, The project is still being deleted", "Repo forking failed when deleting target repo on gitlab.com (not CEE!) due unathorized": r"Repo forking failed: Error deleting project .* DELETE https://gitlab.com/.* 401 .* Unauthorized", "Timeout forking the repo before the actual test": r"Repo forking failed: Error forking project .*: context deadline exceeded", From 6e6ce49fc6191f57c6f33a04665737f7c5cfa7fb Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 11 Jun 2025 10:16:49 +0200 Subject: [PATCH 051/321] feat: Also measure waiting for initial PaC PR to appear --- tests/load-tests/ci-scripts/config/horreum-labels.sh | 2 ++ tests/load-tests/evaluate.py | 1 + 2 files changed, 3 insertions(+) diff --git a/tests/load-tests/ci-scripts/config/horreum-labels.sh b/tests/load-tests/ci-scripts/config/horreum-labels.sh index 256410aa18..aa20d23889 100755 --- a/tests/load-tests/ci-scripts/config/horreum-labels.sh +++ b/tests/load-tests/ci-scripts/config/horreum-labels.sh @@ -201,6 +201,8 @@ horreum_schema_label_present '$.results.measurements.createApplication.error_rat horreum_schema_label_present '$.results.measurements.createApplication.pass.duration.mean' horreum_schema_label_present '$.results.measurements.createComponent.error_rate' horreum_schema_label_present '$.results.measurements.createComponent.pass.duration.mean' +horreum_schema_label_present '$.results.measurements.getPaCPullNumber.error_rate' +horreum_schema_label_present '$.results.measurements.getPaCPullNumber.pass.duration.mean' horreum_schema_label_present '$.results.measurements.createIntegrationTestScenario.error_rate' horreum_schema_label_present '$.results.measurements.createIntegrationTestScenario.pass.duration.mean' horreum_schema_label_present '$.results.measurements.HandleUser.error_rate' diff --git a/tests/load-tests/evaluate.py b/tests/load-tests/evaluate.py index 0b96c61648..3b27bbed50 100755 --- a/tests/load-tests/evaluate.py +++ b/tests/load-tests/evaluate.py @@ -23,6 +23,7 @@ "validateApplication", "createIntegrationTestScenario", "createComponent", + "getPaCPullNumber", "validateComponentBuildSA", "validatePipelineRunCreation", "validatePipelineRunCondition", From e96937249c604a89b222a0f58023110cc9b5df21 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 11 Jun 2025 11:20:57 +0200 Subject: [PATCH 052/321] feat(KONFLUX-8756): Make it possible to fork from other org and apply pipeline templates on top of fork --- pkg/clients/github/repositories.go | 35 +++-- tests/load-tests/loadtest.go | 2 + .../pkg/journey/handle_component.go | 6 +- .../pkg/journey/handle_repo_templating.go | 122 ++++++++++++------ tests/load-tests/pkg/options/options.go | 74 ++++++----- tests/load-tests/run-stage.sh | 2 + tests/load-tests/run.sh | 2 + 7 files changed, 159 insertions(+), 84 deletions(-) diff --git a/pkg/clients/github/repositories.go b/pkg/clients/github/repositories.go index 75ec7e9663..bd6c8d7832 100644 --- a/pkg/clients/github/repositories.go +++ b/pkg/clients/github/repositories.go @@ -171,7 +171,7 @@ func (g *Github) DeleteRepositoryIfExists(name string) error { return nil } -func (g *Github) ForkRepository(sourceName, targetName string) (*github.Repository, error) { +func (g *Github) forkRepositoryWithOrgs(sourceOrgName, sourceName, targetOrgName, targetName string) (*github.Repository, error) { var fork *github.Repository var resp *github.Response var repo *github.Repository @@ -179,11 +179,11 @@ func (g *Github) ForkRepository(sourceName, targetName string) (*github.Reposito ctx := context.Background() forkOptions := &github.RepositoryCreateForkOptions{ - Organization: g.organization, + Organization: targetOrgName, } err1 := utils.WaitUntilWithInterval(func() (done bool, err error) { - fork, resp, err = g.client.Repositories.CreateFork(ctx, g.organization, sourceName, forkOptions) + fork, resp, err = g.client.Repositories.CreateFork(ctx, sourceOrgName, sourceName, forkOptions) if err != nil { if _, ok := err.(*github.AcceptedError); ok && resp.StatusCode == 202 { // This meens forking is happening asynchronously @@ -200,25 +200,25 @@ func (g *Github) ForkRepository(sourceName, targetName string) (*github.Reposito fmt.Printf("Warning, got 500: %s", resp.Body) return false, nil } - return false, fmt.Errorf("Error forking %s/%s: %v", g.organization, sourceName, err) + return false, fmt.Errorf("Error forking %s/%s: %v", sourceOrgName, sourceName, err) } return true, nil }, time.Second * 10, time.Minute * 5) if err1 != nil { - return nil, fmt.Errorf("Failed waiting for fork %s/%s: %v", g.organization, sourceName, err1) + return nil, fmt.Errorf("Failed waiting for fork %s/%s: %v", sourceOrgName, sourceName, err1) } err2 := utils.WaitUntilWithInterval(func() (done bool, err error) { // Using this to detect repo is created and populated with content // https://stackoverflow.com/questions/33666838/determine-if-a-fork-is-ready - _, _, err = g.client.Repositories.ListCommits(ctx, g.organization, fork.GetName(), &github.CommitsListOptions{}) + _, _, err = g.client.Repositories.ListCommits(ctx, targetOrgName, fork.GetName(), &github.CommitsListOptions{}) if err != nil { return false, nil } return true, nil }, time.Second * 10, time.Minute * 10) if err2 != nil { - return nil, fmt.Errorf("Failed waiting for commits %s/%s: %v", g.organization, sourceName, err2) + return nil, fmt.Errorf("Failed waiting for commits %s/%s: %v", targetOrgName, fork.GetName(), err2) } editedRepo := &github.Repository{ @@ -226,20 +226,35 @@ func (g *Github) ForkRepository(sourceName, targetName string) (*github.Reposito } err3 := utils.WaitUntilWithInterval(func() (done bool, err error) { - repo, resp, err = g.client.Repositories.Edit(ctx, g.organization, fork.GetName(), editedRepo) + repo, resp, err = g.client.Repositories.Edit(ctx, targetOrgName, fork.GetName(), editedRepo) if err != nil { if resp.StatusCode == 422 { // This started to happen recently. Docs says 422 is "Validation failed, or the endpoint has been spammed." so we need to be patient. // Error we are getting: "422 Validation Failed [{Resource:Repository Field:name Code:custom Message:name a repository operation is already in progress}]" return false, nil } - return false, fmt.Errorf("Error renaming %s/%s to %s: %v\n", g.organization, fork.GetName(), targetName, err) + return false, fmt.Errorf("Error renaming %s/%s to %s: %v\n", targetOrgName, fork.GetName(), targetName, err) } return true, nil }, time.Second * 10, time.Minute * 10) if err3 != nil { - return nil, fmt.Errorf("Failed waiting for renaming %s/%s: %v", g.organization, targetName, err3) + return nil, fmt.Errorf("Failed waiting for renaming %s/%s: %v", targetOrgName, targetName, err3) } return repo, nil } + +// Fork repository in our organization +func (g *Github) ForkRepository(sourceName, targetName string) (*github.Repository, error) { + return g.forkRepositoryWithOrgs(g.organization, sourceName, g.organization, targetName) +} + +// For repozitory from our organization to another org +func (g *Github) ForkRepositoryToOrg(sourceName, targetName, targetOrgName string) (*github.Repository, error) { + return g.forkRepositoryWithOrgs(g.organization, sourceName, targetOrgName, targetName) +} + +// Fork repository from another organization to our org +func (g *Github) ForkRepositoryFromOrg(sourceName, targetName, sourceOrgName string) (*github.Repository, error) { + return g.forkRepositoryWithOrgs(sourceOrgName, sourceName, g.organization, targetName) +} diff --git a/tests/load-tests/loadtest.go b/tests/load-tests/loadtest.go index 618edbd7a1..23112afd81 100644 --- a/tests/load-tests/loadtest.go +++ b/tests/load-tests/loadtest.go @@ -54,6 +54,8 @@ func init() { rootCmd.Flags().StringVar(&opts.JourneyDuration, "journey-duration", "1h", "repeat user journey until this timeout (either this or --journey-repeats)") rootCmd.Flags().BoolVar(&opts.PipelineMintmakerDisabled, "pipeline-mintmaker-disabled", true, "if you want to stop Mintmaker to be creating update PRs for your component (default in loadtest different from Konflux default)") rootCmd.Flags().BoolVar(&opts.PipelineRepoTemplating, "pipeline-repo-templating", false, "if we should use in repo template pipelines (merge PaC PR, template repo pipelines and ignore custom pipeline run, e.g. required for multi arch test)") + rootCmd.Flags().StringVar(&opts.PipelineRepoTemplatingSource, "pipeline-repo-templating-source", "", "when templating, take template source files from this repository (\"\" means we will get source files from current repo)") + rootCmd.Flags().StringVar(&opts.PipelineRepoTemplatingSourceDir, "pipeline-repo-templating-source-dir", "", "when templating from additional repository, take template source files from this directory (\"\" means default \".template/\" will ne used)") rootCmd.Flags().StringArrayVar(&opts.PipelineImagePullSecrets, "pipeline-image-pull-secrets", []string{}, "secret needed to pull task images, can be used multiple times") rootCmd.Flags().StringVarP(&opts.OutputDir, "output-dir", "o", ".", "directory where output files such as load-tests.log or load-tests.json are stored") rootCmd.Flags().StringVar(&opts.BuildPipelineSelectorBundle, "build-pipeline-selector-bundle", "", "BuildPipelineSelector bundle to use when testing with build-definition PR") diff --git a/tests/load-tests/pkg/journey/handle_component.go b/tests/load-tests/pkg/journey/handle_component.go index dbab27549a..0c74d385c5 100644 --- a/tests/load-tests/pkg/journey/handle_component.go +++ b/tests/load-tests/pkg/journey/handle_component.go @@ -238,7 +238,7 @@ func listAndDeletePipelineRunsWithTimeout(f *framework.Framework, namespace, app } // This handles post-component creation tasks for multi-arch PaC workflow -func utilityRepoTemplatingComponentCleanup(f *framework.Framework, namespace, appName, compName, repoUrl, repoRev string, mergeReqNum int, placeholders *map[string]string) error { +func utilityRepoTemplatingComponentCleanup(f *framework.Framework, namespace, appName, compName, repoUrl, repoRev, sourceRepo, sourceRepoDir string, mergeReqNum int, placeholders *map[string]string) error { var repoName string var err error @@ -272,7 +272,7 @@ func utilityRepoTemplatingComponentCleanup(f *framework.Framework, namespace, ap logging.Logger.Debug("Repo-templating workflow: Cleaned up (second cleanup) for %s/%s/%s", namespace, appName, compName) // Template our multi-arch PaC files - shaMap, err := templateFiles(f, repoUrl, repoRev, placeholders) + shaMap, err := templateFiles(f, repoUrl, repoRev, sourceRepo, sourceRepoDir, placeholders) if err != nil { return fmt.Errorf("Error templating PaC files: %v", err) } @@ -381,6 +381,8 @@ func HandleComponent(ctx *PerComponentContext) error { ctx.ComponentName, ctx.ParentContext.ParentContext.ComponentRepoUrl, ctx.ParentContext.ParentContext.Opts.ComponentRepoRevision, + ctx.ParentContext.ParentContext.Opts.PipelineRepoTemplatingSource, + ctx.ParentContext.ParentContext.Opts.PipelineRepoTemplatingSourceDir, ctx.MergeRequestNumber, placeholders, ) diff --git a/tests/load-tests/pkg/journey/handle_repo_templating.go b/tests/load-tests/pkg/journey/handle_repo_templating.go index 6f65c3008a..8288265b06 100644 --- a/tests/load-tests/pkg/journey/handle_repo_templating.go +++ b/tests/load-tests/pkg/journey/handle_repo_templating.go @@ -36,48 +36,91 @@ func getRepoNameFromRepoUrl(repoUrl string) (string, error) { } } -// Template file from '.template/...' to '.tekton/...', expanding placeholders (even in file name) using Github API -// Returns SHA of the commit -func templateRepoFileGithub(f *framework.Framework, repoName, repoRevision, fileName string, placeholders *map[string]string) (string, error) { - var fileResponse *github.RepositoryContent +// Parse repo organization out of repo url +func getRepoOrgFromRepoUrl(repoUrl string) (string, error) { + // Answer taken from https://stackoverflow.com/questions/7124778/how-can-i-match-anything-up-until-this-sequence-of-characters-in-a-regular-exp + // Tested with these input data: + // repoUrl: https://github.com/abc/nodejs-devfile-sample.git/, match[1]: abc + // repoUrl: https://github.com/abc/nodejs-devfile-sample.git, match[1]: abc + // repoUrl: https://github.com/abc/nodejs-devfile-sample/, match[1]: abc + // repoUrl: https://github.com/abc/nodejs-devfile-sample, match[1]: abc + // repoUrl: https://gitlab.example.com/abc/nodejs-devfile-sample, match[1]: abc + var regex *regexp.Regexp + regex = regexp.MustCompile(`[^/]+://[^/]+/(.*)/.*$`) + match := regex.FindStringSubmatch(repoUrl) + if match != nil { + return match[1], nil + } else { + return "", fmt.Errorf("Failed to parse repo org out of url %s", repoUrl) + } +} + +// Get file content from repository, no matter if on GitLab or GitHub +func getRepoFileContent(f *framework.Framework, repoUrl, repoRevision, fileName string) (string, error) { var fileContent string - var repoContentResponse *github.RepositoryContentResponse - var err error - fileResponse, err = f.AsKubeAdmin.CommonController.Github.GetFile(repoName, ".template/" + fileName, repoRevision) + repoName, err := getRepoNameFromRepoUrl(repoUrl) if err != nil { return "", err } - fileContent, err = fileResponse.GetContent() - if err != nil { - return "", err - } + if strings.Contains(repoUrl, "gitlab.") { + fileContent, err = f.AsKubeAdmin.CommonController.Gitlab.GetFile(repoName, fileName, repoRevision) + if err != nil { + return "", fmt.Errorf("Failed to get file %s from repo %s revision %s: %v", fileName, repoName, repoRevision, err) + } + } else { + fileResponse, err := f.AsKubeAdmin.CommonController.Github.GetFile(repoName, fileName, repoRevision) + if err != nil { + return "", fmt.Errorf("Failed to get file %s from repo %s revision %s: %v", fileName, repoName, repoRevision, err) + } - for key, value := range *placeholders { - fileContent = strings.ReplaceAll(fileContent, key, value) - fileName = strings.ReplaceAll(fileName, key, value) + fileContent, err = fileResponse.GetContent() + if err != nil { + return "", err + } } - fileResponse, err = f.AsKubeAdmin.CommonController.Github.GetFile(repoName, ".tekton/" + fileName, repoRevision) + return fileContent, nil +} + +// Update file content in repository, no matter if on GitLab or GitHub +func updateRepoFileContent(f *framework.Framework, repoUrl, repoRevision, fileName, fileContent string) (string, error) { + var commitSha string + + repoName, err := getRepoNameFromRepoUrl(repoUrl) if err != nil { return "", err } - repoContentResponse, err = f.AsKubeAdmin.CommonController.Github.UpdateFile(repoName, ".tekton/" + fileName, fileContent, repoRevision, *fileResponse.SHA) - if err != nil { - return "", err + if strings.Contains(repoUrl, "gitlab.") { + commitSha, err = f.AsKubeAdmin.CommonController.Gitlab.UpdateFile(repoName, fileName, fileContent, repoRevision) + if err != nil { + return "", fmt.Errorf("Failed to update file %s in repo %s revision %s: %v", fileName, repoName, repoRevision, err) + } + } else { + fileResponse, err := f.AsKubeAdmin.CommonController.Github.GetFile(repoName, fileName, repoRevision) + if err != nil { + return "", fmt.Errorf("Failed to get file %s from repo %s revision %s: %v", fileName, repoName, repoRevision, err) + } + + repoContentResponse, err := f.AsKubeAdmin.CommonController.Github.UpdateFile(repoName, fileName, fileContent, repoRevision, *fileResponse.SHA) + if err != nil { + return "", fmt.Errorf("Failed to update file %s in repo %s revision %s: %v", fileName, repoName, repoRevision, err) + } + + commitSha = *repoContentResponse.Commit.SHA } - return *repoContentResponse.Commit.SHA, nil + return commitSha, nil } -// Template file from '.template/...' to '.tekton/...', expanding placeholders (even in file name) using Gitlab API +// Template file from source repo and dir to '.tekton/...' in component repo, expanding placeholders (even in file name), no matter if on GitLab or GitHub // Returns SHA of the commit -func templateRepoFileGitlab(f *framework.Framework, repoName, repoRevision, fileName string, placeholders *map[string]string) (string, error) { - fileContent, err := f.AsKubeAdmin.CommonController.Gitlab.GetFile(repoName, ".template/" + fileName, repoRevision) +func templateRepoFile(f *framework.Framework, repoUrl, repoRevision, sourceRepo, sourceRepoDir, fileName string, placeholders *map[string]string) (string, error) { + fileContent, err := getRepoFileContent(f, sourceRepo, "main", sourceRepoDir + fileName) if err != nil { - return "", fmt.Errorf("Failed to get file: %v", err) + return "", err } for key, value := range *placeholders { @@ -85,13 +128,12 @@ func templateRepoFileGitlab(f *framework.Framework, repoName, repoRevision, file fileName = strings.ReplaceAll(fileName, key, value) } - commitID, err := f.AsKubeAdmin.CommonController.Gitlab.UpdateFile(repoName, ".tekton/" + fileName, fileContent, repoRevision) + commitSha, err := updateRepoFileContent(f, repoUrl, repoRevision, ".tekton/" + fileName, fileContent) if err != nil { - return "", fmt.Errorf("Failed to update file: %v", err) + return "", err } - logging.Logger.Info("Templated file %s with commit %s", fileName, commitID) - return commitID, nil + return commitSha, nil } // Fork repository and return forked repo URL @@ -99,14 +141,19 @@ func ForkRepo(f *framework.Framework, repoUrl, repoRevision, username string) (s // For PaC testing, let's template repo and return forked repo name var forkRepo *github.Repository var sourceName string + var sourceOrgName string var targetName string var err error - // Parse just repo name out of input repo url and construct target repo name + // Parse just repo name and org out of input repo url and construct target repo name sourceName, err = getRepoNameFromRepoUrl(repoUrl) if err != nil { return "", err } + sourceOrgName, err = getRepoOrgFromRepoUrl(repoUrl) + if err != nil { + return "", err + } targetName = fmt.Sprintf("%s-%s", sourceName, username) if strings.Contains(repoUrl, "gitlab.") { @@ -136,7 +183,7 @@ func ForkRepo(f *framework.Framework, repoUrl, repoRevision, username string) (s // Create fork and make sure it appears err = utils.WaitUntilWithInterval(func() (done bool, err error) { - forkRepo, err = f.AsKubeAdmin.CommonController.Github.ForkRepository(sourceName, targetName) + forkRepo, err = f.AsKubeAdmin.CommonController.Github.ForkRepositoryFromOrg(sourceName, targetName, sourceOrgName) if err != nil { logging.Logger.Debug("Repo forking failed, trying again: %v", err) return false, nil @@ -152,26 +199,15 @@ func ForkRepo(f *framework.Framework, repoUrl, repoRevision, username string) (s } // Template PaC files -func templateFiles(f *framework.Framework, repoUrl, repoRevision string, placeholders *map[string]string) (*map[string]string, error) { - var sha string - - // Get repo name from repo url - repoName, err := getRepoNameFromRepoUrl(repoUrl) - if err != nil { - return nil, err - } - +func templateFiles(f *framework.Framework, repoUrl, repoRevision, sourceRepo, sourceRepoDir string, placeholders *map[string]string) (*map[string]string, error) { // Template files we care about shaMap := &map[string]string{} for _, file := range fileList { - if strings.Contains(repoUrl, "gitlab.") { - sha, err = templateRepoFileGitlab(f, repoName, repoRevision, file, placeholders) - } else { - sha, err = templateRepoFileGithub(f, repoName, repoRevision, file, placeholders) - } + sha, err := templateRepoFile(f, repoUrl, repoRevision, sourceRepo, sourceRepoDir, file, placeholders) if err != nil { return nil, err } + logging.Logger.Debug("Templated file %s with commit %s", file, sha) (*shaMap)[file] = sha } diff --git a/tests/load-tests/pkg/options/options.go b/tests/load-tests/pkg/options/options.go index 61b54da0de..3e55951cfc 100644 --- a/tests/load-tests/pkg/options/options.go +++ b/tests/load-tests/pkg/options/options.go @@ -4,38 +4,41 @@ import "encoding/json" import "fmt" import "os" import "time" +import "strings" // Struct to hold command line options type Opts struct { - ApplicationsCount int - BuildPipelineSelectorBundle string - ComponentContainerContext string - ComponentContainerFile string - ComponentRepoRevision string - ComponentRepoUrl string - ComponentsCount int - Concurrency int - FailFast bool - JourneyDuration string - JourneyRepeats int - JourneyUntil time.Time - LogDebug bool - LogTrace bool - LogInfo bool - OutputDir string - PipelineMintmakerDisabled bool - PipelineRepoTemplating bool - PipelineImagePullSecrets []string - Purge bool - PurgeOnly bool - QuayRepo string - Stage bool - TestScenarioGitURL string - TestScenarioPathInRepo string - TestScenarioRevision string - UsernamePrefix string - WaitIntegrationTestsPipelines bool - WaitPipelines bool + ApplicationsCount int + BuildPipelineSelectorBundle string + ComponentContainerContext string + ComponentContainerFile string + ComponentRepoRevision string + ComponentRepoUrl string + ComponentsCount int + Concurrency int + FailFast bool + JourneyDuration string + JourneyRepeats int + JourneyUntil time.Time + LogDebug bool + LogTrace bool + LogInfo bool + OutputDir string + PipelineMintmakerDisabled bool + PipelineRepoTemplating bool + PipelineRepoTemplatingSource string + PipelineRepoTemplatingSourceDir string + PipelineImagePullSecrets []string + Purge bool + PurgeOnly bool + QuayRepo string + Stage bool + TestScenarioGitURL string + TestScenarioPathInRepo string + TestScenarioRevision string + UsernamePrefix string + WaitIntegrationTestsPipelines bool + WaitPipelines bool } // Pre-process load-test options before running the test @@ -52,6 +55,19 @@ func (o *Opts) ProcessOptions() error { o.Purge = true } + // If we are templating, set default values for relevant options if empty + if o.PipelineRepoTemplating { + if o.PipelineRepoTemplatingSource == "" { + o.PipelineRepoTemplatingSource = o.ComponentRepoUrl + } + if o.PipelineRepoTemplatingSourceDir == "" { + o.PipelineRepoTemplatingSourceDir = ".template/" + } + if strings.HasSuffix(o.PipelineRepoTemplatingSourceDir, "/") != true { + o.PipelineRepoTemplatingSourceDir = o.PipelineRepoTemplatingSourceDir + "/" + } + } + // Convert options struct to pretty JSON jsonOptions, err2 := json.MarshalIndent(o, "", " ") if err2 != nil { diff --git a/tests/load-tests/run-stage.sh b/tests/load-tests/run-stage.sh index 93b6889b9b..4172db7099 100755 --- a/tests/load-tests/run-stage.sh +++ b/tests/load-tests/run-stage.sh @@ -20,6 +20,8 @@ go run loadtest.go \ --journey-repeats "${JOURNEY_REPEATS:-1}" \ --log-"${LOGGING_LEVEL:-info}" \ --pipeline-repo-templating="${PIPELINE_REPO_TEMPLATING:-false}" \ + --pipeline-repo-templating-source="${PIPELINE_REPO_TEMPLATING_SOURCE:-}" \ + --pipeline-repo-templating-source-dir="${PIPELINE_REPO_TEMPLATING_SOURCE_DIR:-}" \ --output-dir "${OUTPUT_DIR:-.}" \ --purge="${PURGE:-true}" \ --quay-repo "${QUAY_REPO:-redhat-user-workloads-stage}" \ diff --git a/tests/load-tests/run.sh b/tests/load-tests/run.sh index b4e7fea113..e31f85ee47 100755 --- a/tests/load-tests/run.sh +++ b/tests/load-tests/run.sh @@ -76,6 +76,8 @@ go run loadtest.go \ --journey-repeats "${JOURNEY_REPEATS:-1}" \ --log-"${LOGGING_LEVEL:-info}" \ --pipeline-repo-templating="${PIPELINE_REPO_TEMPLATING:-false}" \ + --pipeline-repo-templating-source="${PIPELINE_REPO_TEMPLATING_SOURCE:-}" \ + --pipeline-repo-templating-source-dir="${PIPELINE_REPO_TEMPLATING_SOURCE_DIR:-}" \ --output-dir "${OUTPUT_DIR:-.}" \ --purge="${PURGE:-true}" \ --quay-repo "${QUAY_REPO:-stonesoup_perfscale}" \ From a7aeff233bc5389d39335a16e233dd223cc6075a Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 11 Jun 2025 11:44:19 +0200 Subject: [PATCH 053/321] feat(KONFLUX-8333): Make it possible to skip creating ITS --- tests/load-tests/loadtest.go | 2 +- .../handle_integration_test_scenarios.go | 5 +++ .../load-tests/pkg/journey/handle_test_run.go | 42 ++++++++++--------- tests/load-tests/run-stage.sh | 6 +-- tests/load-tests/run.sh | 6 +-- 5 files changed, 35 insertions(+), 26 deletions(-) diff --git a/tests/load-tests/loadtest.go b/tests/load-tests/loadtest.go index 23112afd81..2fa5e00d7f 100644 --- a/tests/load-tests/loadtest.go +++ b/tests/load-tests/loadtest.go @@ -43,7 +43,7 @@ func init() { rootCmd.Flags().BoolVarP(&opts.Stage, "stage", "s", false, "is you want to run the test on stage") rootCmd.Flags().BoolVarP(&opts.Purge, "purge", "p", false, "purge all users or resources (on stage) after test is done") rootCmd.Flags().BoolVarP(&opts.PurgeOnly, "purge-only", "u", false, "do not run test, only purge resources (this implies --purge)") - rootCmd.Flags().StringVar(&opts.TestScenarioGitURL, "test-scenario-git-url", "https://github.com/konflux-ci/integration-examples.git", "test scenario GIT URL") + rootCmd.Flags().StringVar(&opts.TestScenarioGitURL, "test-scenario-git-url", "https://github.com/konflux-ci/integration-examples.git", "test scenario GIT URL (set to \"\" to disable creating these)") rootCmd.Flags().StringVar(&opts.TestScenarioRevision, "test-scenario-revision", "main", "test scenario GIT URL repo revision to use") rootCmd.Flags().StringVar(&opts.TestScenarioPathInRepo, "test-scenario-path-in-repo", "pipelines/integration_resolver_pipeline_pass.yaml", "test scenario path in GIT repo") rootCmd.Flags().BoolVarP(&opts.WaitPipelines, "waitpipelines", "w", false, "if you want to wait for pipelines to finish") diff --git a/tests/load-tests/pkg/journey/handle_integration_test_scenarios.go b/tests/load-tests/pkg/journey/handle_integration_test_scenarios.go index 11a93276fb..c3713f87a7 100644 --- a/tests/load-tests/pkg/journey/handle_integration_test_scenarios.go +++ b/tests/load-tests/pkg/journey/handle_integration_test_scenarios.go @@ -19,6 +19,11 @@ func createIntegrationTestScenario(f *framework.Framework, namespace, name, appN } func HandleIntegrationTestScenario(ctx *PerApplicationContext) error { + if ctx.ParentContext.Opts.TestScenarioGitURL == "" { + logging.Logger.Debug("Integration Test Scenario GIT not provided, not creating it") + return nil + } + var err error name := fmt.Sprintf("%s-its-%s", ctx.ParentContext.Username, util.GenerateRandomString(5)) diff --git a/tests/load-tests/pkg/journey/handle_test_run.go b/tests/load-tests/pkg/journey/handle_test_run.go index ffa59cc1f5..4ddea62b19 100644 --- a/tests/load-tests/pkg/journey/handle_test_run.go +++ b/tests/load-tests/pkg/journey/handle_test_run.go @@ -106,26 +106,30 @@ func HandleTest(ctx *PerComponentContext) error { return logging.Logger.Fail(81, "Snapshot name type assertion failed") } - _, err = logging.Measure( - validateTestPipelineRunCreation, - ctx.Framework, - ctx.ParentContext.ParentContext.Namespace, - ctx.ParentContext.IntegrationTestScenarioName, - ctx.SnapshotName, - ) - if err != nil { - return logging.Logger.Fail(82, "Test Pipeline Run failed creation: %v", err) - } + if ctx.ParentContext.ParentContext.Opts.TestScenarioGitURL == "" { + logging.Logger.Debug("Integration Test Scenario GIT not provided, not waiting for it") + } else { + _, err = logging.Measure( + validateTestPipelineRunCreation, + ctx.Framework, + ctx.ParentContext.ParentContext.Namespace, + ctx.ParentContext.IntegrationTestScenarioName, + ctx.SnapshotName, + ) + if err != nil { + return logging.Logger.Fail(82, "Test Pipeline Run failed creation: %v", err) + } - _, err = logging.Measure( - validateTestPipelineRunCondition, - ctx.Framework, - ctx.ParentContext.ParentContext.Namespace, - ctx.ParentContext.IntegrationTestScenarioName, - ctx.SnapshotName, - ) - if err != nil { - return logging.Logger.Fail(83, "Test Pipeline Run failed run: %v", err) + _, err = logging.Measure( + validateTestPipelineRunCondition, + ctx.Framework, + ctx.ParentContext.ParentContext.Namespace, + ctx.ParentContext.IntegrationTestScenarioName, + ctx.SnapshotName, + ) + if err != nil { + return logging.Logger.Fail(83, "Test Pipeline Run failed run: %v", err) + } } return nil diff --git a/tests/load-tests/run-stage.sh b/tests/load-tests/run-stage.sh index 4172db7099..51b1c947da 100755 --- a/tests/load-tests/run-stage.sh +++ b/tests/load-tests/run-stage.sh @@ -25,9 +25,9 @@ go run loadtest.go \ --output-dir "${OUTPUT_DIR:-.}" \ --purge="${PURGE:-true}" \ --quay-repo "${QUAY_REPO:-redhat-user-workloads-stage}" \ - --test-scenario-git-url "${TEST_SCENARIO_GIT_URL:-https://github.com/konflux-ci/integration-examples.git}" \ - --test-scenario-path-in-repo "${TEST_SCENARIO_PATH_IN_REPO:-pipelines/integration_resolver_pipeline_pass.yaml}" \ - --test-scenario-revision "${TEST_SCENARIO_REVISION:-main}" \ + --test-scenario-git-url "${TEST_SCENARIO_GIT_URL-https://github.com/konflux-ci/integration-examples.git}" \ + --test-scenario-path-in-repo "${TEST_SCENARIO_PATH_IN_REPO-pipelines/integration_resolver_pipeline_pass.yaml}" \ + --test-scenario-revision "${TEST_SCENARIO_REVISION-main}" \ --username "${USER_PREFIX:-undef}" \ --waitintegrationtestspipelines="${WAIT_INTEGRATION_TESTS:-true}" \ --waitpipelines="${WAIT_PIPELINES:-true}" \ diff --git a/tests/load-tests/run.sh b/tests/load-tests/run.sh index e31f85ee47..2bc8d1838c 100755 --- a/tests/load-tests/run.sh +++ b/tests/load-tests/run.sh @@ -81,9 +81,9 @@ go run loadtest.go \ --output-dir "${OUTPUT_DIR:-.}" \ --purge="${PURGE:-true}" \ --quay-repo "${QUAY_REPO:-stonesoup_perfscale}" \ - --test-scenario-git-url "${TEST_SCENARIO_GIT_URL:-https://github.com/konflux-ci/integration-examples.git}" \ - --test-scenario-path-in-repo "${TEST_SCENARIO_PATH_IN_REPO:-pipelines/integration_resolver_pipeline_pass.yaml}" \ - --test-scenario-revision "${TEST_SCENARIO_REVISION:-main}" \ + --test-scenario-git-url "${TEST_SCENARIO_GIT_URL-https://github.com/konflux-ci/integration-examples.git}" \ + --test-scenario-path-in-repo "${TEST_SCENARIO_PATH_IN_REPO-pipelines/integration_resolver_pipeline_pass.yaml}" \ + --test-scenario-revision "${TEST_SCENARIO_REVISION-main}" \ --username "$USER_PREFIX" \ --waitintegrationtestspipelines="${WAIT_INTEGRATION_TESTS:-true}" \ --waitpipelines="${WAIT_PIPELINES:-true}" \ From c3d2cd47f8283c35acd0222571754902be27413a Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Thu, 12 Jun 2025 16:56:43 +0200 Subject: [PATCH 054/321] feat(KONFLUX-8779): WIP --- pkg/clients/github/repositories.go | 8 +-- pkg/clients/gitlab/git.go | 21 +++----- .../pkg/journey/handle_component.go | 10 ++-- .../pkg/journey/handle_repo_templating.go | 49 +++++++++++++------ 4 files changed, 52 insertions(+), 36 deletions(-) diff --git a/pkg/clients/github/repositories.go b/pkg/clients/github/repositories.go index bd6c8d7832..e6b754a1bb 100644 --- a/pkg/clients/github/repositories.go +++ b/pkg/clients/github/repositories.go @@ -171,7 +171,7 @@ func (g *Github) DeleteRepositoryIfExists(name string) error { return nil } -func (g *Github) forkRepositoryWithOrgs(sourceOrgName, sourceName, targetOrgName, targetName string) (*github.Repository, error) { +func (g *Github) ForkRepositoryWithOrgs(sourceOrgName, sourceName, targetOrgName, targetName string) (*github.Repository, error) { var fork *github.Repository var resp *github.Response var repo *github.Repository @@ -246,15 +246,15 @@ func (g *Github) forkRepositoryWithOrgs(sourceOrgName, sourceName, targetOrgName // Fork repository in our organization func (g *Github) ForkRepository(sourceName, targetName string) (*github.Repository, error) { - return g.forkRepositoryWithOrgs(g.organization, sourceName, g.organization, targetName) + return g.ForkRepositoryWithOrgs(g.organization, sourceName, g.organization, targetName) } // For repozitory from our organization to another org func (g *Github) ForkRepositoryToOrg(sourceName, targetName, targetOrgName string) (*github.Repository, error) { - return g.forkRepositoryWithOrgs(g.organization, sourceName, targetOrgName, targetName) + return g.ForkRepositoryWithOrgs(g.organization, sourceName, targetOrgName, targetName) } // Fork repository from another organization to our org func (g *Github) ForkRepositoryFromOrg(sourceName, targetName, sourceOrgName string) (*github.Repository, error) { - return g.forkRepositoryWithOrgs(sourceOrgName, sourceName, g.organization, targetName) + return g.ForkRepositoryWithOrgs(sourceOrgName, sourceName, g.organization, targetName) } diff --git a/pkg/clients/gitlab/git.go b/pkg/clients/gitlab/git.go index 63888c4821..6f6086ebf6 100644 --- a/pkg/clients/gitlab/git.go +++ b/pkg/clients/gitlab/git.go @@ -288,35 +288,30 @@ func (gc *GitlabClient) DeleteRepositoryIfExists(projectID string) error { // ForkRepository forks a source GitLab repository to a target repository. // Returns the newly forked repository and an error if the operation fails. -func (gc *GitlabClient) ForkRepository(sourceProjectID, targetProjectID string) (*gitlab.Project, error) { +func (gc *GitlabClient) ForkRepository(sourceOrgName, sourceName, targetOrgName, targetName string) (*gitlab.Project, error) { var forkedProject *gitlab.Project var resp *gitlab.Response var err error - targetSplit := strings.Split(targetProjectID,"/") - if len(targetSplit) != 2 { - return nil, fmt.Errorf("Failed to parse target repo %s to namespace and repo name", targetProjectID) - } - - targetNamespace := targetSplit[0] - targetRepo := targetSplit[1] + sourceProjectID := sourceOrgName + "/" + sourceName + targetProjectID := targetOrgName + "/" + targetName opts := &gitlab.ForkProjectOptions{ - Name: gitlab.Ptr(targetRepo), - NamespacePath: gitlab.Ptr(targetNamespace), - Path: gitlab.Ptr(targetRepo), + Name: gitlab.Ptr(targetName), + NamespacePath: gitlab.Ptr(targetOrgName), + Path: gitlab.Ptr(targetName), } err = utils.WaitUntilWithInterval(func() (done bool, err error) { forkedProject, resp, err = gc.client.Projects.ForkProject(sourceProjectID, opts) if err != nil { - fmt.Printf("Failed to fork, trying again: %v\n", err) + fmt.Printf("Failed to fork %s, trying again: %v\n", sourceProjectID, err) return false, nil } return true, nil }, time.Second * 10, time.Minute * 5) if err != nil { - return nil, fmt.Errorf("Error forking project %s to namespace %s: %w", sourceProjectID, targetNamespace, err) + return nil, fmt.Errorf("Error forking project %s to %s: %w", sourceProjectID, targetProjectID, err) } if resp.StatusCode != http.StatusCreated && resp.StatusCode != http.StatusAccepted { diff --git a/tests/load-tests/pkg/journey/handle_component.go b/tests/load-tests/pkg/journey/handle_component.go index 0c74d385c5..706d868f60 100644 --- a/tests/load-tests/pkg/journey/handle_component.go +++ b/tests/load-tests/pkg/journey/handle_component.go @@ -239,7 +239,7 @@ func listAndDeletePipelineRunsWithTimeout(f *framework.Framework, namespace, app // This handles post-component creation tasks for multi-arch PaC workflow func utilityRepoTemplatingComponentCleanup(f *framework.Framework, namespace, appName, compName, repoUrl, repoRev, sourceRepo, sourceRepoDir string, mergeReqNum int, placeholders *map[string]string) error { - var repoName string + var repoId string var err error // Delete on-pull-request default pipeline run @@ -250,19 +250,19 @@ func utilityRepoTemplatingComponentCleanup(f *framework.Framework, namespace, ap logging.Logger.Debug("Repo-templating workflow: Cleaned up (first cleanup) for %s/%s/%s", namespace, appName, compName) // Merge default PaC pipelines PR - repoName, err = getRepoNameFromRepoUrl(repoUrl) + repoId, err = getRepoIdFromRepoUrl(repoUrl) if err != nil { return fmt.Errorf("Failed parsing repo name: %v", err) } if strings.Contains(repoUrl, "gitlab.") { - _, err = f.AsKubeAdmin.CommonController.Gitlab.AcceptMergeRequest(repoName, mergeReqNum) + _, err = f.AsKubeAdmin.CommonController.Gitlab.AcceptMergeRequest(repoId, mergeReqNum) } else { - _, err = f.AsKubeAdmin.CommonController.Github.MergePullRequest(repoName, mergeReqNum) + _, err = f.AsKubeAdmin.CommonController.Github.MergePullRequest(repoId, mergeReqNum) } if err != nil { return fmt.Errorf("Merging %d failed: %v", mergeReqNum, err) } - logging.Logger.Debug("Repo-templating workflow: Merged PR %d in %s", mergeReqNum, repoName) + logging.Logger.Debug("Repo-templating workflow: Merged PR %d in %s", mergeReqNum, repoId) // Delete all pipeline runs as we do not care about these err = listAndDeletePipelineRunsWithTimeout(f, namespace, appName, compName, "", 1) diff --git a/tests/load-tests/pkg/journey/handle_repo_templating.go b/tests/load-tests/pkg/journey/handle_repo_templating.go index 8288265b06..5b46b583e9 100644 --- a/tests/load-tests/pkg/journey/handle_repo_templating.go +++ b/tests/load-tests/pkg/journey/handle_repo_templating.go @@ -21,13 +21,10 @@ func getRepoNameFromRepoUrl(repoUrl string) (string, error) { // repoUrl: https://github.com/abc/nodejs-devfile-sample.git, match[1]: nodejs-devfile-sample // repoUrl: https://github.com/abc/nodejs-devfile-sample/, match[1]: nodejs-devfile-sample // repoUrl: https://github.com/abc/nodejs-devfile-sample, match[1]: nodejs-devfile-sample - // repoUrl: https://gitlab.example.com/abc/nodejs-devfile-sample, match[1]: abc/nodejs-devfile-sample + // repoUrl: https://gitlab.example.com/abc/nodejs-devfile-sample, match[1]: nodejs-devfile-sample + // repoUrl: https://gitlab.example.com/abc/def/nodejs-devfile-sample, match[1]: nodejs-devfile-sample var regex *regexp.Regexp - if strings.Contains(repoUrl, "gitlab.") { - regex = regexp.MustCompile(`/([^/]+/[^/]+?)(.git)?/?$`) - } else { - regex = regexp.MustCompile(`/([^/]+?)(.git)?/?$`) - } + regex = regexp.MustCompile(`/([^/]+?)(.git)?/?$`) match := regex.FindStringSubmatch(repoUrl) if match != nil { return match[1], nil @@ -45,8 +42,9 @@ func getRepoOrgFromRepoUrl(repoUrl string) (string, error) { // repoUrl: https://github.com/abc/nodejs-devfile-sample/, match[1]: abc // repoUrl: https://github.com/abc/nodejs-devfile-sample, match[1]: abc // repoUrl: https://gitlab.example.com/abc/nodejs-devfile-sample, match[1]: abc + // repoUrl: https://gitlab.example.com/abc/def/nodejs-devfile-sample, match[1]: abc/def var regex *regexp.Regexp - regex = regexp.MustCompile(`[^/]+://[^/]+/(.*)/.*$`) + regex = regexp.MustCompile(`^[^/]+://[^/]+/(.*)/.+(.git)?/?$`) match := regex.FindStringSubmatch(repoUrl) if match != nil { return match[1], nil @@ -55,6 +53,19 @@ func getRepoOrgFromRepoUrl(repoUrl string) (string, error) { } } +// Parse repo ID (/) out of repo url +func getRepoIdFromRepoUrl(repoUrl string) (string, error) { + repoOrgName, err := getRepoOrgFromRepoUrl(repoUrl) + if err != nil { + return "", err + } + repoName, err := getRepoNameFromRepoUrl(repoUrl) + if err != nil { + return "", err + } + return repoOrgName + "/" + repoName, nil +} + // Get file content from repository, no matter if on GitLab or GitHub func getRepoFileContent(f *framework.Framework, repoUrl, repoRevision, fileName string) (string, error) { var fileContent string @@ -63,11 +74,15 @@ func getRepoFileContent(f *framework.Framework, repoUrl, repoRevision, fileName if err != nil { return "", err } + repoOrgName, err := getRepoOrgFromRepoUrl(repoUrl) + if err != nil { + return "", err + } if strings.Contains(repoUrl, "gitlab.") { - fileContent, err = f.AsKubeAdmin.CommonController.Gitlab.GetFile(repoName, fileName, repoRevision) + fileContent, err = f.AsKubeAdmin.CommonController.Gitlab.GetFile(repoOrgName + "/" + repoName, fileName, repoRevision) if err != nil { - return "", fmt.Errorf("Failed to get file %s from repo %s revision %s: %v", fileName, repoName, repoRevision, err) + return "", fmt.Errorf("Failed to get file %s from repo %s revision %s: %v", fileName, repoOrgName + "/" + repoName, repoRevision, err) } } else { fileResponse, err := f.AsKubeAdmin.CommonController.Github.GetFile(repoName, fileName, repoRevision) @@ -92,11 +107,15 @@ func updateRepoFileContent(f *framework.Framework, repoUrl, repoRevision, fileNa if err != nil { return "", err } + repoOrgName, err := getRepoOrgFromRepoUrl(repoUrl) + if err != nil { + return "", err + } if strings.Contains(repoUrl, "gitlab.") { - commitSha, err = f.AsKubeAdmin.CommonController.Gitlab.UpdateFile(repoName, fileName, fileContent, repoRevision) + commitSha, err = f.AsKubeAdmin.CommonController.Gitlab.UpdateFile(repoOrgName + "/" + repoName, fileName, fileContent, repoRevision) if err != nil { - return "", fmt.Errorf("Failed to update file %s in repo %s revision %s: %v", fileName, repoName, repoRevision, err) + return "", fmt.Errorf("Failed to update file %s in repo %s revision %s: %v", fileName, repoOrgName + "/" + repoName, repoRevision, err) } } else { fileResponse, err := f.AsKubeAdmin.CommonController.Github.GetFile(repoName, fileName, repoRevision) @@ -137,7 +156,7 @@ func templateRepoFile(f *framework.Framework, repoUrl, repoRevision, sourceRepo, } // Fork repository and return forked repo URL -func ForkRepo(f *framework.Framework, repoUrl, repoRevision, username string) (string, error) { +func ForkRepo(f *framework.Framework, repoUrl, repoRevision, username, targetOrgName string) (string, error) { // For PaC testing, let's template repo and return forked repo name var forkRepo *github.Repository var sourceName string @@ -154,19 +173,20 @@ func ForkRepo(f *framework.Framework, repoUrl, repoRevision, username string) (s if err != nil { return "", err } + targetName = fmt.Sprintf("%s-%s", sourceName, username) if strings.Contains(repoUrl, "gitlab.") { logging.Logger.Debug("Forking Gitlab repository %s", repoUrl) // Cleanup if it already exists - err = f.AsKubeAdmin.CommonController.Gitlab.DeleteRepositoryIfExists(targetName) + err = f.AsKubeAdmin.CommonController.Gitlab.DeleteRepositoryIfExists(targetOrgName + "/" + targetName) if err != nil { return "", err } // Create fork and make sure it appears - forkedRepoURL, err := f.AsKubeAdmin.CommonController.Gitlab.ForkRepository(sourceName, targetName) + forkedRepoURL, err := f.AsKubeAdmin.CommonController.Gitlab.ForkRepository(sourceOrgName, sourceName, targetOrgName, targetName) if err != nil { return "", err } @@ -222,6 +242,7 @@ func HandleRepoForking(ctx *MainContext) error { ctx.Opts.ComponentRepoUrl, ctx.Opts.ComponentRepoRevision, ctx.Username, + "jhutar", // FIXME FIXME FIXME ) if err != nil { return logging.Logger.Fail(80, "Repo forking failed: %v", err) From 15aedf8cfb9c2ec2cf70c70b0f6fbc9627b998d6 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 13 Jun 2025 08:08:53 +0200 Subject: [PATCH 055/321] feat: New error: Build Pipeline Run failed creation: context deadline exceeded --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 80a43dd4f2..584463ca44 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -40,6 +40,7 @@ "Timeout getting build service account": r"Component build SA failed creation: Component build SA .* not created: context deadline exceeded", "Timeout getting pipeline": r"Message:.*resolver failed to get Pipeline.*resolution took longer than global timeout of .*", "Timeout getting task via git resolver from gitlab.cee": r"Message:.*Couldn't retrieve Task .*resolver type git.*https://gitlab.cee.redhat.com/.* resolution took longer than global timeout of .*", + "Timeout waiting for build pipeline to be created": r"Build Pipeline Run failed creation: context deadline exceeded", "Timeout waiting for integration test scenario to validate": r"Integration test scenario failed validation: context deadline exceeded", "Timeout waiting for test pipeline to create": r"Test Pipeline Run failed creation: context deadline exceeded", "Timeout waiting for test pipeline to finish": r"Test Pipeline Run failed run: context deadline exceeded", From 7d029700c5c1814cf13b32bbbce7b57a8fff285b Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 13 Jun 2025 09:19:48 +0200 Subject: [PATCH 056/321] feat: Add more debug statements to make it clearer on what phase we are in --- tests/load-tests/pkg/journey/handle_pipeline.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/load-tests/pkg/journey/handle_pipeline.go b/tests/load-tests/pkg/journey/handle_pipeline.go index 3e96afac46..dcbea9b1c0 100644 --- a/tests/load-tests/pkg/journey/handle_pipeline.go +++ b/tests/load-tests/pkg/journey/handle_pipeline.go @@ -113,7 +113,7 @@ func HandlePipelineRun(ctx *PerComponentContext) error { var err error - logging.Logger.Debug("Creating build pipeline run for component %s in namespace %s", ctx.ComponentName, ctx.ParentContext.ParentContext.Namespace) + logging.Logger.Debug("Waiting for build pipeline run for component %s in namespace %s", ctx.ComponentName, ctx.ParentContext.ParentContext.Namespace) _, err = logging.Measure( validatePipelineRunCreation, @@ -126,6 +126,8 @@ func HandlePipelineRun(ctx *PerComponentContext) error { return logging.Logger.Fail(70, "Build Pipeline Run failed creation: %v", err) } + logging.Logger.Debug("Build pipeline run for component %s in namespace %s created", ctx.ComponentName, ctx.ParentContext.ParentContext.Namespace) + _, err = logging.Measure( validatePipelineRunCondition, ctx.Framework, @@ -137,6 +139,8 @@ func HandlePipelineRun(ctx *PerComponentContext) error { return logging.Logger.Fail(71, "Build Pipeline Run failed run: %v", err) } + logging.Logger.Debug("Build pipeline run for component %s in namespace %s succeeded", ctx.ComponentName, ctx.ParentContext.ParentContext.Namespace) + _, err = logging.Measure( validatePipelineRunSignature, ctx.Framework, @@ -148,5 +152,7 @@ func HandlePipelineRun(ctx *PerComponentContext) error { return logging.Logger.Fail(72, "Build Pipeline Run failed signing: %v", err) } + logging.Logger.Info("Build pipeline run for component %s in namespace %s OK", ctx.ComponentName, ctx.ParentContext.ParentContext.Namespace) + return nil } From 9aa6ab8d723286e895a2bb50f9333dbedb88ef86 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 13 Jun 2025 09:41:27 +0200 Subject: [PATCH 057/321] fix: When ITS was disabled at a run time, do not consider it when checking completeness of journeys and counting KPI numbers --- .../load-tests/ci-scripts/collect-results.sh | 2 +- .../ci-scripts/stage/collect-results.sh | 2 +- tests/load-tests/evaluate.py | 25 ++++++++++++++++--- 3 files changed, 24 insertions(+), 5 deletions(-) diff --git a/tests/load-tests/ci-scripts/collect-results.sh b/tests/load-tests/ci-scripts/collect-results.sh index bad3d859bd..c43f95df52 100755 --- a/tests/load-tests/ci-scripts/collect-results.sh +++ b/tests/load-tests/ci-scripts/collect-results.sh @@ -37,7 +37,7 @@ python3 -m pip install matplotlib } &>"${ARTIFACT_DIR}/monitoring-setup.log" echo "[$(date --utc -Ins)] Create summary JSON with timings" -./evaluate.py "${ARTIFACT_DIR}/load-test-timings.csv" "${ARTIFACT_DIR}/load-test-timings.json" +./evaluate.py "${ARTIFACT_DIR}/load-test-options.json" "${ARTIFACT_DIR}/load-test-timings.csv" "${ARTIFACT_DIR}/load-test-timings.json" echo "[$(date --utc -Ins)] Graphing PRs and TRs" ci-scripts/utility_scripts/show-pipelineruns.py --data-dir "${ARTIFACT_DIR}" &>"${ARTIFACT_DIR}/show-pipelineruns.log" diff --git a/tests/load-tests/ci-scripts/stage/collect-results.sh b/tests/load-tests/ci-scripts/stage/collect-results.sh index 4d98889060..194cf512db 100755 --- a/tests/load-tests/ci-scripts/stage/collect-results.sh +++ b/tests/load-tests/ci-scripts/stage/collect-results.sh @@ -39,7 +39,7 @@ python3 -m pip install matplotlib } &>"${ARTIFACT_DIR}/monitoring-setup.log" echo "[$(date --utc -Ins)] Create summary JSON with timings" -./evaluate.py "${ARTIFACT_DIR}/load-test-timings.csv" "${ARTIFACT_DIR}/load-test-timings.json" +./evaluate.py "${ARTIFACT_DIR}/load-test-options.json" "${ARTIFACT_DIR}/load-test-timings.csv" "${ARTIFACT_DIR}/load-test-timings.json" echo "[$(date --utc -Ins)] Create summary JSON with errors" ./errors.py "${ARTIFACT_DIR}/load-test-errors.csv" "${ARTIFACT_DIR}/load-test-errors.json" diff --git a/tests/load-tests/evaluate.py b/tests/load-tests/evaluate.py index 3b27bbed50..0292748494 100755 --- a/tests/load-tests/evaluate.py +++ b/tests/load-tests/evaluate.py @@ -33,6 +33,14 @@ "validateTestPipelineRunCondition", ] +# These metrics will be ignored if ITS was skipped +METRICS_ITS = [ + "createIntegrationTestScenario", + "validateIntegrationTestScenario", + "validateTestPipelineRunCreation", + "validateTestPipelineRunCondition", +] + def str2date(date_str): if isinstance(date_str, datetime.datetime): @@ -81,8 +89,19 @@ def count_stats_when(data): def main(): - input_file = sys.argv[1] - output_file = sys.argv[2] + options_file = sys.argv[1] + input_file = sys.argv[2] + output_file = sys.argv[3] + + # Load test options + with open(options_file, "r") as fp: + options = json.load(fp) + + # Determine what metrics we need to skip based on options + METRICS_to_skip = [] + if options["TestScenarioGitURL"] == "": + print("NOTE: Ignoring ITS related metrics because they were disabled at test run") + METRICS_to_skip += METRICS_ITS stats_raw = {} @@ -115,7 +134,7 @@ def main(): kpi_mean = 0.0 kpi_errors = 0 - for m in METRICS: + for m in [m for m in METRICS if m not in METRICS_to_skip]: stats[m] = {"pass": {"duration": {"samples": 0}, "when": {}}, "fail": {"duration": {"samples": 0}, "when": {}}} if m in stats_raw: stats[m]["pass"]["duration"] = count_stats(stats_raw[m]["pass"]["duration"]) From 53606558555db916548f524b536baf5a222432ce Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 13 Jun 2025 11:30:42 +0200 Subject: [PATCH 058/321] feat: Also store errors when KPI mean is -1 / no journey finished --- .../load-tests/ci-scripts/collect-results.sh | 5 ++- .../ci-scripts/stage/collect-results.sh | 2 +- tests/load-tests/errors.py | 32 ++++++++++++++++--- 3 files changed, 33 insertions(+), 6 deletions(-) diff --git a/tests/load-tests/ci-scripts/collect-results.sh b/tests/load-tests/ci-scripts/collect-results.sh index c43f95df52..d3b9c8eddd 100755 --- a/tests/load-tests/ci-scripts/collect-results.sh +++ b/tests/load-tests/ci-scripts/collect-results.sh @@ -39,6 +39,9 @@ python3 -m pip install matplotlib echo "[$(date --utc -Ins)] Create summary JSON with timings" ./evaluate.py "${ARTIFACT_DIR}/load-test-options.json" "${ARTIFACT_DIR}/load-test-timings.csv" "${ARTIFACT_DIR}/load-test-timings.json" +echo "[$(date --utc -Ins)] Create summary JSON with errors" +./errors.py "${ARTIFACT_DIR}/load-test-errors.csv" "${ARTIFACT_DIR}/load-test-timings.json" "${ARTIFACT_DIR}/load-test-errors.json" + echo "[$(date --utc -Ins)] Graphing PRs and TRs" ci-scripts/utility_scripts/show-pipelineruns.py --data-dir "${ARTIFACT_DIR}" &>"${ARTIFACT_DIR}/show-pipelineruns.log" mv "${ARTIFACT_DIR}/output.svg" "${ARTIFACT_DIR}/show-pipelines.svg" @@ -51,7 +54,7 @@ STATUS_DATA_FILE="${ARTIFACT_DIR}/load-test.json" status_data.py \ --status-data-file "${STATUS_DATA_FILE}" \ --set "name=Konflux loadtest" "started=$( cat started )" "ended=$( cat ended )" \ - --set-subtree-json "parameters.options=${ARTIFACT_DIR}/load-test-options.json" "results.measurements=${ARTIFACT_DIR}/load-test-timings.json" "results.durations=${ARTIFACT_DIR}/get-taskruns-durations.json" + --set-subtree-json "parameters.options=${ARTIFACT_DIR}/load-test-options.json" "results.measurements=${ARTIFACT_DIR}/load-test-timings.json" "results.errors=${ARTIFACT_DIR}/load-test-errors.json" "results.durations=${ARTIFACT_DIR}/get-taskruns-durations.json" echo "[$(date --utc -Ins)] Adding monitoring data" mstarted="$( date -d "$( cat started )" --utc -Iseconds )" diff --git a/tests/load-tests/ci-scripts/stage/collect-results.sh b/tests/load-tests/ci-scripts/stage/collect-results.sh index 194cf512db..9cd2479622 100755 --- a/tests/load-tests/ci-scripts/stage/collect-results.sh +++ b/tests/load-tests/ci-scripts/stage/collect-results.sh @@ -42,7 +42,7 @@ echo "[$(date --utc -Ins)] Create summary JSON with timings" ./evaluate.py "${ARTIFACT_DIR}/load-test-options.json" "${ARTIFACT_DIR}/load-test-timings.csv" "${ARTIFACT_DIR}/load-test-timings.json" echo "[$(date --utc -Ins)] Create summary JSON with errors" -./errors.py "${ARTIFACT_DIR}/load-test-errors.csv" "${ARTIFACT_DIR}/load-test-errors.json" +./errors.py "${ARTIFACT_DIR}/load-test-errors.csv" "${ARTIFACT_DIR}/load-test-timings.json" "${ARTIFACT_DIR}/load-test-errors.json" echo "[$(date --utc -Ins)] Graphing PRs and TRs" ci-scripts/utility_scripts/show-pipelineruns.py --data-dir "${ARTIFACT_DIR}" &>"${ARTIFACT_DIR}/show-pipelineruns.log" || true diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 584463ca44..371475728a 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -65,9 +65,19 @@ def message_to_reason(msg: str) -> str | None: return "UNKNOWN" +def add_reason(error_messages, error_by_code, error_by_reason, message, reason="", code=0): + if reason == "": + reason = message + print("Added", message, reason, code) + error_messages.append(message) + error_by_code[code] += 1 + error_by_reason[reason] += 1 + + def main(): input_file = sys.argv[1] - output_file = sys.argv[2] + timings_file = sys.argv[2] + output_file = sys.argv[3] error_messages = [] # list of error messages error_by_code = collections.defaultdict( @@ -89,12 +99,26 @@ def main(): reason = message_to_reason(message) - error_messages.append(message) - error_by_code[code] += 1 - error_by_reason[reason] += 1 + add_reason(error_messages, error_by_code, error_by_reason, message, reason, code) except FileNotFoundError: print("No errors file found, good :-D") + timings = {} + try: + with open(timings_file, "r") as fp: + timings = json.load(fp) + except FileNotFoundError: + print("No timings file found, strange :-/") + error_messages.append("No timings file found") + add_reason(error_messages, error_by_code, error_by_reason, "No timings file found") + + try: + if timings["KPI"]["mean"] == -1: + add_reason(error_messages, error_by_code, error_by_reason, "No test run finished") + except KeyError: + print("No KPI metrics in timings data, strange :-(") + add_reason(error_messages, error_by_code, error_by_reason, "No KPI metrics in timings data") + data = { "error_by_code": error_by_code, "error_by_reason": error_by_reason, From e3ae6ed854a60745f0c367c3faad7e83ede3c36a Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 13 Jun 2025 11:58:38 +0200 Subject: [PATCH 059/321] test: Adding tests for getRepoNameFromRepoUrl, getRepoOrgFromRepoUrl and getRepoIdFromRepoUrl --- tests/load-tests/pkg/journey/journey_test.go | 92 ++++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 tests/load-tests/pkg/journey/journey_test.go diff --git a/tests/load-tests/pkg/journey/journey_test.go b/tests/load-tests/pkg/journey/journey_test.go new file mode 100644 index 0000000000..41178244ef --- /dev/null +++ b/tests/load-tests/pkg/journey/journey_test.go @@ -0,0 +1,92 @@ +package journey + +import "testing" + +// Test basic input and output combinations for getRepoNameFromRepoUrl. +func Test_getRepoNameFromRepoUrl(t *testing.T) { + repoName := "nodejs-devfile-sample" + repoUrls := []string{ + "https://github.com/abc/nodejs-devfile-sample.git/", + "https://github.com/abc/nodejs-devfile-sample.git", + "https://github.com/abc/nodejs-devfile-sample/", + "https://github.com/abc/nodejs-devfile-sample", + "https://gitlab.example.com/abc/nodejs-devfile-sample", + "https://gitlab.example.com/abc/def/nodejs-devfile-sample", + "https://gitlab.example.com/abc/def/nodejs-devfile-sample.git", + "https://gitlab.example.com/abc/def/nodejs-devfile-sample.git/", + "https://gitlab.example.com/abc/def/nodejs-devfile-sample/", + } + for _, repoUrl := range repoUrls { + out, err := getRepoNameFromRepoUrl(repoUrl) + if err != nil || out != repoName { + t.Errorf("Failed getting '%s' from '%s': %v", repoName, repoUrl, err) + } + } +} + +// Test basic input and output combinations for getRepoOrgFromRepoUrl. +func Test_getRepoOrgFromRepoUrl(t *testing.T) { + repoName := "abc" + repoUrls := []string{ + "https://github.com/abc/nodejs-devfile-sample.git/", + "https://github.com/abc/nodejs-devfile-sample.git", + "https://github.com/abc/nodejs-devfile-sample/", + "https://github.com/abc/nodejs-devfile-sample", + "https://gitlab.example.com/abc/nodejs-devfile-sample", + "https://gitlab.example.com/abc/nodejs-devfile-sample.git", + "https://gitlab.example.com/abc/nodejs-devfile-sample.git/", + "https://gitlab.example.com/abc/nodejs-devfile-sample/", + } + for _, repoUrl := range repoUrls { + out, err := getRepoOrgFromRepoUrl(repoUrl) + if err != nil || out != repoName { + t.Errorf("Failed getting '%s' from '%s': %v", repoName, repoUrl, err) + } + } + + repoName = "abc/def" + repoUrls = []string{ + "https://gitlab.example.com/abc/def/nodejs-devfile-sample", + "https://gitlab.example.com/abc/def/nodejs-devfile-sample.git", + "https://gitlab.example.com/abc/def/nodejs-devfile-sample.git/", + "https://gitlab.example.com/abc/def/nodejs-devfile-sample/", + } + for _, repoUrl := range repoUrls { + out, err := getRepoOrgFromRepoUrl(repoUrl) + if err != nil || out != repoName { + t.Errorf("Failed getting '%s' from '%s': %v", repoName, repoUrl, err) + } + } +} + +// Test various input and output combinations for getRepoIdFromRepoUrl. +func Test_getRepoIdFromRepoUrl(t *testing.T) { + repoName := "abc/nodejs-devfile-sample" + repoUrls := []string{ + "https://github.com/abc/nodejs-devfile-sample.git/", + "https://github.com/abc/nodejs-devfile-sample.git", + "https://github.com/abc/nodejs-devfile-sample/", + "https://github.com/abc/nodejs-devfile-sample", + "https://gitlab.example.com/abc/nodejs-devfile-sample", + } + for _, repoUrl := range repoUrls { + out, err := getRepoIdFromRepoUrl(repoUrl) + if err != nil || out != repoName { + t.Errorf("Failed getting '%s' from '%s': %v", repoName, repoUrl, err) + } + } + + repoName = "abc/def/nodejs-devfile-sample" + repoUrls = []string{ + "https://gitlab.example.com/abc/def/nodejs-devfile-sample", + "https://gitlab.example.com/abc/def/nodejs-devfile-sample.git", + "https://gitlab.example.com/abc/def/nodejs-devfile-sample.git/", + "https://gitlab.example.com/abc/def/nodejs-devfile-sample/", + } + for _, repoUrl := range repoUrls { + out, err := getRepoIdFromRepoUrl(repoUrl) + if err != nil || out != repoName { + t.Errorf("Failed getting '%s' from '%s': %v", repoName, repoUrl, err) + } + } +} From d3a7fa17f6e802aad2a5c08ef17b51b059283257 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 13 Jun 2025 12:51:42 +0200 Subject: [PATCH 060/321] feat: Make target forking organization/namespace configurable --- tests/load-tests/loadtest.go | 1 + .../load-tests/pkg/journey/handle_component.go | 17 ++++++++++------- .../pkg/journey/handle_repo_templating.go | 8 +++----- tests/load-tests/pkg/options/options.go | 9 +++++++++ tests/load-tests/run-stage.sh | 1 + tests/load-tests/run.sh | 1 + 6 files changed, 25 insertions(+), 12 deletions(-) diff --git a/tests/load-tests/loadtest.go b/tests/load-tests/loadtest.go index 2fa5e00d7f..4b83f09cee 100644 --- a/tests/load-tests/loadtest.go +++ b/tests/load-tests/loadtest.go @@ -38,6 +38,7 @@ func init() { rootCmd.Flags().StringVar(&opts.ComponentRepoRevision, "component-repo-revision", "main", "the component repo revision, git branch") rootCmd.Flags().StringVar(&opts.ComponentContainerFile, "component-repo-container-file", "Dockerfile", "the component repo container file to build") rootCmd.Flags().StringVar(&opts.ComponentContainerContext, "component-repo-container-context", "/", "the context for image build") + rootCmd.Flags().StringVar(&opts.ForkTarget, "fork-target", "", "the target namespace (GitLab) or organization (GitHub) to fork component repository to (if empty, will use MY_GITHUB_ORG env variable)") rootCmd.Flags().StringVar(&opts.QuayRepo, "quay-repo", "redhat-user-workloads-stage", "the target quay repo for PaC templated image pushes") rootCmd.Flags().StringVar(&opts.UsernamePrefix, "username", "testuser", "the prefix used for usersignup names") rootCmd.Flags().BoolVarP(&opts.Stage, "stage", "s", false, "is you want to run the test on stage") diff --git a/tests/load-tests/pkg/journey/handle_component.go b/tests/load-tests/pkg/journey/handle_component.go index 706d868f60..81dc80e5bb 100644 --- a/tests/load-tests/pkg/journey/handle_component.go +++ b/tests/load-tests/pkg/journey/handle_component.go @@ -239,7 +239,6 @@ func listAndDeletePipelineRunsWithTimeout(f *framework.Framework, namespace, app // This handles post-component creation tasks for multi-arch PaC workflow func utilityRepoTemplatingComponentCleanup(f *framework.Framework, namespace, appName, compName, repoUrl, repoRev, sourceRepo, sourceRepoDir string, mergeReqNum int, placeholders *map[string]string) error { - var repoId string var err error // Delete on-pull-request default pipeline run @@ -250,19 +249,23 @@ func utilityRepoTemplatingComponentCleanup(f *framework.Framework, namespace, ap logging.Logger.Debug("Repo-templating workflow: Cleaned up (first cleanup) for %s/%s/%s", namespace, appName, compName) // Merge default PaC pipelines PR - repoId, err = getRepoIdFromRepoUrl(repoUrl) - if err != nil { - return fmt.Errorf("Failed parsing repo name: %v", err) - } if strings.Contains(repoUrl, "gitlab.") { + repoId, err := getRepoIdFromRepoUrl(repoUrl) + if err != nil { + return fmt.Errorf("Failed parsing repo org/name: %v", err) + } _, err = f.AsKubeAdmin.CommonController.Gitlab.AcceptMergeRequest(repoId, mergeReqNum) } else { - _, err = f.AsKubeAdmin.CommonController.Github.MergePullRequest(repoId, mergeReqNum) + repoName, err := getRepoIdFromRepoUrl(repoUrl) + if err != nil { + return fmt.Errorf("Failed parsing repo name: %v", err) + } + _, err = f.AsKubeAdmin.CommonController.Github.MergePullRequest(repoName, mergeReqNum) } if err != nil { return fmt.Errorf("Merging %d failed: %v", mergeReqNum, err) } - logging.Logger.Debug("Repo-templating workflow: Merged PR %d in %s", mergeReqNum, repoId) + logging.Logger.Debug("Repo-templating workflow: Merged PR %d in %s", mergeReqNum, repoUrl) // Delete all pipeline runs as we do not care about these err = listAndDeletePipelineRunsWithTimeout(f, namespace, appName, compName, "", 1) diff --git a/tests/load-tests/pkg/journey/handle_repo_templating.go b/tests/load-tests/pkg/journey/handle_repo_templating.go index 5b46b583e9..64a8c06266 100644 --- a/tests/load-tests/pkg/journey/handle_repo_templating.go +++ b/tests/load-tests/pkg/journey/handle_repo_templating.go @@ -177,8 +177,6 @@ func ForkRepo(f *framework.Framework, repoUrl, repoRevision, username, targetOrg targetName = fmt.Sprintf("%s-%s", sourceName, username) if strings.Contains(repoUrl, "gitlab.") { - logging.Logger.Debug("Forking Gitlab repository %s", repoUrl) - // Cleanup if it already exists err = f.AsKubeAdmin.CommonController.Gitlab.DeleteRepositoryIfExists(targetOrgName + "/" + targetName) if err != nil { @@ -193,8 +191,6 @@ func ForkRepo(f *framework.Framework, repoUrl, repoRevision, username, targetOrg return forkedRepoURL.WebURL, nil } else { - logging.Logger.Debug("Forking Github repository %s", repoUrl) - // Cleanup if it already exists err = f.AsKubeAdmin.CommonController.Github.DeleteRepositoryIfExists(targetName) if err != nil { @@ -242,12 +238,14 @@ func HandleRepoForking(ctx *MainContext) error { ctx.Opts.ComponentRepoUrl, ctx.Opts.ComponentRepoRevision, ctx.Username, - "jhutar", // FIXME FIXME FIXME + ctx.Opts.ForkTarget, ) if err != nil { return logging.Logger.Fail(80, "Repo forking failed: %v", err) } + logging.Logger.Info("Forked %s to %s", ctx.Opts.ComponentRepoUrl, forkUrl) + ctx.ComponentRepoUrl = forkUrl return nil diff --git a/tests/load-tests/pkg/options/options.go b/tests/load-tests/pkg/options/options.go index 3e55951cfc..246103cdc2 100644 --- a/tests/load-tests/pkg/options/options.go +++ b/tests/load-tests/pkg/options/options.go @@ -17,6 +17,7 @@ type Opts struct { ComponentsCount int Concurrency int FailFast bool + ForkTarget string JourneyDuration string JourneyRepeats int JourneyUntil time.Time @@ -68,6 +69,14 @@ func (o *Opts) ProcessOptions() error { } } + // If forking target directory was empty, use MY_GITHUB_ORG env variable + if o.ForkTarget == "" { + o.ForkTarget = os.Getenv("MY_GITHUB_ORG") + if o.ForkTarget == "" { + return fmt.Errorf("Was not able to get fork target") + } + } + // Convert options struct to pretty JSON jsonOptions, err2 := json.MarshalIndent(o, "", " ") if err2 != nil { diff --git a/tests/load-tests/run-stage.sh b/tests/load-tests/run-stage.sh index 51b1c947da..f11d9b501d 100755 --- a/tests/load-tests/run-stage.sh +++ b/tests/load-tests/run-stage.sh @@ -16,6 +16,7 @@ go run loadtest.go \ --component-repo-revision "${COMPONENT_REPO_REVISION:-main}" \ --components-count "${COMPONENTS_COUNT:-1}" \ --concurrency "${CONCURRENCY:-1}" \ + --fork-target "${FORK_TARGET:-}" \ --journey-duration "${JOURNEY_DURATION:-1h}" \ --journey-repeats "${JOURNEY_REPEATS:-1}" \ --log-"${LOGGING_LEVEL:-info}" \ diff --git a/tests/load-tests/run.sh b/tests/load-tests/run.sh index 2bc8d1838c..428d29c75b 100755 --- a/tests/load-tests/run.sh +++ b/tests/load-tests/run.sh @@ -72,6 +72,7 @@ go run loadtest.go \ --component-repo-revision "${COMPONENT_REPO_REVISION:-main}" \ --components-count "${COMPONENTS_COUNT:-1}" \ --concurrency "${CONCURRENCY:-1}" \ + --fork-target "${FORK_TARGET:-}" \ --journey-duration "${JOURNEY_DURATION:-1h}" \ --journey-repeats "${JOURNEY_REPEATS:-1}" \ --log-"${LOGGING_LEVEL:-info}" \ From 1831534b4d236872a31eb3859c52dfb5eefa5e7b Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 13 Jun 2025 13:17:29 +0200 Subject: [PATCH 061/321] debug: Show MR URL --- tests/load-tests/pkg/journey/handle_component.go | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/pkg/journey/handle_component.go b/tests/load-tests/pkg/journey/handle_component.go index 81dc80e5bb..86a08acfce 100644 --- a/tests/load-tests/pkg/journey/handle_component.go +++ b/tests/load-tests/pkg/journey/handle_component.go @@ -73,6 +73,7 @@ func getPaCPull(annotations map[string]string) (string, error) { // Get "merge-url" if data, ok = pac["merge-url"].(string); ok { + logging.Logger.Debug("Found PaC merge request URL: %s", data) return data, nil } else { return "", fmt.Errorf("Failed parsing state: %s", buildStatusValue) From 95ba5c4aa944a1e4819d5f859357b87773891bfd Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 13 Jun 2025 13:37:01 +0200 Subject: [PATCH 062/321] feat: Only add a note about 'No test run finished' if there is no any other error there already --- tests/load-tests/errors.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 371475728a..b7ee9e3806 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -114,7 +114,8 @@ def main(): try: if timings["KPI"]["mean"] == -1: - add_reason(error_messages, error_by_code, error_by_reason, "No test run finished") + if len(error_messages) == 0: + add_reason(error_messages, error_by_code, error_by_reason, "No test run finished") except KeyError: print("No KPI metrics in timings data, strange :-(") add_reason(error_messages, error_by_code, error_by_reason, "No KPI metrics in timings data") From 2f823a7606d2cd1eeea9f43360f0414f1cf2b22f Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 13 Jun 2025 13:50:29 +0200 Subject: [PATCH 063/321] fix: Really get just a repo name, not org/name --- tests/load-tests/pkg/journey/handle_component.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/load-tests/pkg/journey/handle_component.go b/tests/load-tests/pkg/journey/handle_component.go index 86a08acfce..d2b1782c1f 100644 --- a/tests/load-tests/pkg/journey/handle_component.go +++ b/tests/load-tests/pkg/journey/handle_component.go @@ -257,7 +257,7 @@ func utilityRepoTemplatingComponentCleanup(f *framework.Framework, namespace, ap } _, err = f.AsKubeAdmin.CommonController.Gitlab.AcceptMergeRequest(repoId, mergeReqNum) } else { - repoName, err := getRepoIdFromRepoUrl(repoUrl) + repoName, err := getRepoNameFromRepoUrl(repoUrl) if err != nil { return fmt.Errorf("Failed parsing repo name: %v", err) } From 1e098353f74c859e5837b0ecf9533f46b3f8cbd8 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Sat, 14 Jun 2025 13:10:46 +0200 Subject: [PATCH 064/321] feat: Allow configuring fork repo name --- tests/load-tests/loadtest.go | 2 +- .../pkg/journey/handle_repo_templating.go | 14 ++++++++++---- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/tests/load-tests/loadtest.go b/tests/load-tests/loadtest.go index 4b83f09cee..a3e8286f40 100644 --- a/tests/load-tests/loadtest.go +++ b/tests/load-tests/loadtest.go @@ -40,7 +40,7 @@ func init() { rootCmd.Flags().StringVar(&opts.ComponentContainerContext, "component-repo-container-context", "/", "the context for image build") rootCmd.Flags().StringVar(&opts.ForkTarget, "fork-target", "", "the target namespace (GitLab) or organization (GitHub) to fork component repository to (if empty, will use MY_GITHUB_ORG env variable)") rootCmd.Flags().StringVar(&opts.QuayRepo, "quay-repo", "redhat-user-workloads-stage", "the target quay repo for PaC templated image pushes") - rootCmd.Flags().StringVar(&opts.UsernamePrefix, "username", "testuser", "the prefix used for usersignup names") + rootCmd.Flags().StringVar(&opts.UsernamePrefix, "username", "testuser", "identifier used for prefix of usersignup names and as suffix when forking repo") rootCmd.Flags().BoolVarP(&opts.Stage, "stage", "s", false, "is you want to run the test on stage") rootCmd.Flags().BoolVarP(&opts.Purge, "purge", "p", false, "purge all users or resources (on stage) after test is done") rootCmd.Flags().BoolVarP(&opts.PurgeOnly, "purge-only", "u", false, "do not run test, only purge resources (this implies --purge)") diff --git a/tests/load-tests/pkg/journey/handle_repo_templating.go b/tests/load-tests/pkg/journey/handle_repo_templating.go index 64a8c06266..c0d030186a 100644 --- a/tests/load-tests/pkg/journey/handle_repo_templating.go +++ b/tests/load-tests/pkg/journey/handle_repo_templating.go @@ -156,7 +156,7 @@ func templateRepoFile(f *framework.Framework, repoUrl, repoRevision, sourceRepo, } // Fork repository and return forked repo URL -func ForkRepo(f *framework.Framework, repoUrl, repoRevision, username, targetOrgName string) (string, error) { +func ForkRepo(f *framework.Framework, repoUrl, repoRevision, suffix, targetOrgName string) (string, error) { // For PaC testing, let's template repo and return forked repo name var forkRepo *github.Repository var sourceName string @@ -174,7 +174,7 @@ func ForkRepo(f *framework.Framework, repoUrl, repoRevision, username, targetOrg return "", err } - targetName = fmt.Sprintf("%s-%s", sourceName, username) + targetName = fmt.Sprintf("%s-%s", sourceName, suffix) if strings.Contains(repoUrl, "gitlab.") { // Cleanup if it already exists @@ -231,13 +231,19 @@ func templateFiles(f *framework.Framework, repoUrl, repoRevision, sourceRepo, so } func HandleRepoForking(ctx *MainContext) error { - logging.Logger.Debug("Forking repository %s for user %s", ctx.Opts.ComponentRepoUrl, ctx.Username) + var suffix string + if ctx.Opts.Stage { + suffix = ctx.Opts.UsernamePrefix + "-" + ctx.Username + } else { + suffix = ctx.Username + } + logging.Logger.Debug("Forking repository %s with suffix %s to %s", ctx.Opts.ComponentRepoUrl, suffix, ctx.Opts.ForkTarget) forkUrl, err := ForkRepo( ctx.Framework, ctx.Opts.ComponentRepoUrl, ctx.Opts.ComponentRepoRevision, - ctx.Username, + suffix, ctx.Opts.ForkTarget, ) if err != nil { From ed052dca97e593d96d5cc0da5db9244fea8deebe Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Tue, 17 Jun 2025 14:23:31 +0200 Subject: [PATCH 065/321] feat: New error: Application failed creation: Unable to create the Application .*: Internal error occurred: resource quota evaluation timed out --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index b7ee9e3806..ad26bd05e7 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -15,6 +15,7 @@ # Errors patterns we recognize (when newlines were removed) ERRORS = { + "Application creation timed out waiting for quota evaluation": r"Application failed creation: Unable to create the Application .*: Internal error occurred: resource quota evaluation timed out", "Component creation timed out waiting for image-controller annotations": r"Component failed creation: Unable to create the Component .* timed out when waiting for image-controller annotations to be updated on component", "Couldnt get pipeline via bundles resolver from quay.io due to 429": r"Message:Error retrieving pipeline for pipelinerun .*bundleresolver.* cannot retrieve the oci image: GET https://quay.io/v2/.*unexpected status code 429 Too Many Requests", "Couldnt get pipeline via git resolver from gitlab.cee due to 429": r"Message:.*resolver failed to get Pipeline.*error requesting remote resource.*Git.*https://gitlab.cee.redhat.com/.* status code: 429", From 4ae007da7589378df8b7a01a5247b51be9833791 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Tue, 17 Jun 2025 15:53:40 +0200 Subject: [PATCH 066/321] feat: Need to regenerate on stone-stg-rh01 and updated for new UI --- .../utility_scripts/playwright-update-tokens.py | 7 ++++--- .../ci-scripts/utility_scripts/playwright_lib.py | 16 ++++++++-------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/tests/load-tests/ci-scripts/utility_scripts/playwright-update-tokens.py b/tests/load-tests/ci-scripts/utility_scripts/playwright-update-tokens.py index afd146414c..51cce3ffe0 100755 --- a/tests/load-tests/ci-scripts/utility_scripts/playwright-update-tokens.py +++ b/tests/load-tests/ci-scripts/utility_scripts/playwright-update-tokens.py @@ -2,7 +2,7 @@ # Docs: # This script uses credentials (username and password) from users.json -# to login to console.dev.redhat.com and generate new offline token. It +# to login to console.redhat.com and generate new offline token. It # saves updated content to users-new.json. # # Setup: @@ -55,8 +55,9 @@ def workload(user): playwright_lib.form_login(page, username, password) # Go to OpenShift Token page - page.goto("https://console.dev.redhat.com/openshift/token") - page.wait_for_url("https://console.dev.redhat.com/openshift/token**") + page.goto("https://console.redhat.com/openshift/token") + page.locator('//a[@href="/openshift/token"]').click() + page.wait_for_url("https://console.redhat.com/openshift/token**") page.wait_for_selector('//h2[text()="Connect with offline tokens"]') # Wait for token diff --git a/tests/load-tests/ci-scripts/utility_scripts/playwright_lib.py b/tests/load-tests/ci-scripts/utility_scripts/playwright_lib.py index 354cfee9d8..2aa9ae362f 100644 --- a/tests/load-tests/ci-scripts/utility_scripts/playwright_lib.py +++ b/tests/load-tests/ci-scripts/utility_scripts/playwright_lib.py @@ -6,15 +6,15 @@ def goto_login_and_accept_cookies(page): """Open a login page and accept cookies dialog""" - page.goto("https://console.dev.redhat.com") + page.goto("https://console.redhat.com") page.wait_for_url("https://sso.redhat.com/**") - # Accept cookies - cookies_iframe = page.frame_locator('iframe[name="trustarc_cm"]') - cookies_button = cookies_iframe.get_by_role( - "button", name="Agree and proceed with standard settings" - ) - cookies_button.click() + #### Accept cookies + ###cookies_iframe = page.frame_locator('iframe[name="trustarc_cm"]') + ###cookies_button = cookies_iframe.get_by_role( + ### "button", name="Agree and proceed with standard settings" + ###) + ###cookies_button.click() def form_login(page, username, password): @@ -30,5 +30,5 @@ def form_login(page, username, password): input_pass.wait_for(state="visible") input_pass.fill(password) page.locator('//button[@id="rh-password-verification-submit-button"]').click() - page.wait_for_url("https://console.dev.redhat.com/**") + page.wait_for_url("https://console.redhat.com/**") page.wait_for_selector('//h2[text()="Welcome to your Hybrid Cloud Console."]') From 76d9022498e1a7feb49825b3e3c10a0e12c7298a Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Tue, 17 Jun 2025 15:57:59 +0200 Subject: [PATCH 067/321] fix: Make sure we process all users (not 0) when allow list is empty --- .../ci-scripts/utility_scripts/playwright-update-tokens.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/load-tests/ci-scripts/utility_scripts/playwright-update-tokens.py b/tests/load-tests/ci-scripts/utility_scripts/playwright-update-tokens.py index 51cce3ffe0..0e304015e8 100755 --- a/tests/load-tests/ci-scripts/utility_scripts/playwright-update-tokens.py +++ b/tests/load-tests/ci-scripts/utility_scripts/playwright-update-tokens.py @@ -105,7 +105,8 @@ def main(): users_allowlist = [] # keep empty to allow all for user in users: - if users_allowlist is not [] and user["username"] not in users_allowlist: + if users_allowlist != [] and user["username"] not in users_allowlist: + print(f"Skipping user {user['username']} as it is not in allow list") continue result_queue = multiprocessing.Queue() process = multiprocessing.Process(target=process_it, args=(result_queue, user)) From 49f4781312066ae66f330f2e70ca5aad58d2c16d Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 18 Jun 2025 09:50:27 +0200 Subject: [PATCH 068/321] feat: New error: Repo forking failed: Error checking repository rhtap-perf-test/nodejs-devfile-sample2-undef-jhutar: GET https://api.github.com/repos/rhtap-perf-test/nodejs-devfile-sample2-undef-jhutar: 503 No server is currently available to service your request. Sorry about that. Please try resubmitting your request and contact us if the problem persists. [] --- .../ci-scripts/utility_scripts/playwright-update-tokens.py | 2 +- tests/load-tests/errors.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/load-tests/ci-scripts/utility_scripts/playwright-update-tokens.py b/tests/load-tests/ci-scripts/utility_scripts/playwright-update-tokens.py index 0e304015e8..6a52287fd4 100755 --- a/tests/load-tests/ci-scripts/utility_scripts/playwright-update-tokens.py +++ b/tests/load-tests/ci-scripts/utility_scripts/playwright-update-tokens.py @@ -33,7 +33,7 @@ sys.path.append(os.path.dirname(os.path.realpath(__file__))) import playwright_lib -PLAYWRIGHT_HEADLESS = False +PLAYWRIGHT_HEADLESS = True PLAYWRIGHT_VIDEO_DIR = "videos/" diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index ad26bd05e7..a5b92f95a8 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -36,6 +36,7 @@ "Post-test data collection failed": r"Failed to collect pipeline run JSONs", "Repo forking failed as GitLab CEE says 401 Unauthorized": r"Repo forking failed: Error deleting project .*: DELETE https://gitlab.cee.redhat.com/.*: 401 .*message: 401 Unauthorized.*", "Repo forking failed as the target is still being deleted": r"Repo forking failed: Error forking project .* POST https://gitlab.cee.redhat.com.* 409 .*Project namespace name has already been taken, The project is still being deleted", + "Repo forking failed because gitlab.com returned 503": r"Repo forking failed: Error checking repository .*: GET https://api.github.com/repos/.*: 503 No server is currently available to service your request. Sorry about that. Please try resubmitting your request and contact us if the problem persists.*", "Repo forking failed when deleting target repo on gitlab.com (not CEE!) due unathorized": r"Repo forking failed: Error deleting project .* DELETE https://gitlab.com/.* 401 .* Unauthorized", "Timeout forking the repo before the actual test": r"Repo forking failed: Error forking project .*: context deadline exceeded", "Timeout getting build service account": r"Component build SA failed creation: Component build SA .* not created: context deadline exceeded", From cea0a2bf7c194ba7ce519fb96d927c0c846704fb Mon Sep 17 00:00:00 2001 From: Larry Rios Date: Wed, 18 Jun 2025 08:06:29 -0400 Subject: [PATCH 069/321] feat: adding enhacements for playwright update token script and lib (cherry picked from commit c47ba8839627630596b553c12acc8556d8ff362e) --- .../playwright-update-tokens.py | 103 ++++++++++-------- .../utility_scripts/playwright_lib.py | 16 ++- 2 files changed, 66 insertions(+), 53 deletions(-) diff --git a/tests/load-tests/ci-scripts/utility_scripts/playwright-update-tokens.py b/tests/load-tests/ci-scripts/utility_scripts/playwright-update-tokens.py index 6a52287fd4..bde60aba8d 100755 --- a/tests/load-tests/ci-scripts/utility_scripts/playwright-update-tokens.py +++ b/tests/load-tests/ci-scripts/utility_scripts/playwright-update-tokens.py @@ -29,6 +29,7 @@ import queue import os.path import sys +import traceback sys.path.append(os.path.dirname(os.path.realpath(__file__))) import playwright_lib @@ -38,55 +39,62 @@ def workload(user): - username = user["username"].replace("-", "_") - password = user["password"] - - with playwright.sync_api.sync_playwright() as p: - browser = p.chromium.launch( - headless=PLAYWRIGHT_HEADLESS, - ) - context = browser.new_context( - record_video_dir=PLAYWRIGHT_VIDEO_DIR, - ) - page = context.new_page() - - playwright_lib.goto_login_and_accept_cookies(page) - - playwright_lib.form_login(page, username, password) - - # Go to OpenShift Token page - page.goto("https://console.redhat.com/openshift/token") - page.locator('//a[@href="/openshift/token"]').click() - page.wait_for_url("https://console.redhat.com/openshift/token**") - page.wait_for_selector('//h2[text()="Connect with offline tokens"]') - - # Wait for token - button_token = page.locator('//button[text()="Load token"]') - if button_token.is_visible(): - button_token.click() - attempt = 1 - attempt_max = 100 - while True: - input_token = page.locator( - '//input[@aria-label="Copyable token" and not(contains(@value, "ocm login "))]' - ) - input_token_value = input_token.get_attribute("value") - # Token value is populated assynchronously, so call it ready once - # it is longer than string "" or "null" - if len(input_token_value) > 10: - break - if attempt > attempt_max: - input_token_value = "Failed" - break - attempt += 1 - time.sleep(1) - print(f"Token for user {username}: {input_token_value}") + try: + username = user["username"].replace("-", "_") + password = user["password"] - page.close() - browser.close() + with playwright.sync_api.sync_playwright() as p: + browser = p.chromium.launch( + headless=PLAYWRIGHT_HEADLESS, + ) + context = browser.new_context( + record_video_dir=PLAYWRIGHT_VIDEO_DIR, + ) + page = context.new_page() + + playwright_lib.goto_login_and_accept_cookies(page) + + playwright_lib.form_login(page, username, password) + + # Go to OpenShift Token page + page.goto("https://console.redhat.com/openshift/token") + page.wait_for_url("https://console.redhat.com/openshift/token**") + + # Confirm I want to load a token + page.locator('a:has-text("use API tokens to authenticate")').click() + + # Wait for token + button_token = page.locator('//button[text()="Load token"]') + if button_token.is_visible(): + button_token.click() + attempt = 1 + attempt_max = 100 + while True: + input_token = page.locator( + '//input[@aria-label="Copyable token" and not(contains(@value, "ocm login "))]' + ) + input_token_value = input_token.get_attribute("value") + # Token value is populated assynchronously, so call it ready once + # it is longer than string "" or "null" + if len(input_token_value) > 10: + break + if attempt > attempt_max: + input_token_value = "Failed" + break + attempt += 1 + time.sleep(1) + print(f"Token for user {username}: {input_token_value}") + + page.close() + browser.close() + + user["token"] = input_token_value + return user - user["token"] = input_token_value - return user + except Exception as e: + print(f"[ERROR] Failed while processing {user['username']}") + traceback.print_exc() + raise def process_it(output_queue, user): @@ -108,6 +116,7 @@ def main(): if users_allowlist != [] and user["username"] not in users_allowlist: print(f"Skipping user {user['username']} as it is not in allow list") continue + result_queue = multiprocessing.Queue() process = multiprocessing.Process(target=process_it, args=(result_queue, user)) process.start() diff --git a/tests/load-tests/ci-scripts/utility_scripts/playwright_lib.py b/tests/load-tests/ci-scripts/utility_scripts/playwright_lib.py index 2aa9ae362f..ab8630fc8b 100644 --- a/tests/load-tests/ci-scripts/utility_scripts/playwright_lib.py +++ b/tests/load-tests/ci-scripts/utility_scripts/playwright_lib.py @@ -9,12 +9,15 @@ def goto_login_and_accept_cookies(page): page.goto("https://console.redhat.com") page.wait_for_url("https://sso.redhat.com/**") - #### Accept cookies - ###cookies_iframe = page.frame_locator('iframe[name="trustarc_cm"]') - ###cookies_button = cookies_iframe.get_by_role( - ### "button", name="Agree and proceed with standard settings" - ###) - ###cookies_button.click() + # Accept cookies + cookies_iframe = page.frame_locator('iframe[name="trustarc_cm"]') + cookies_button = cookies_iframe.get_by_role( + "button", name="Agree and proceed with standard settings" + ) + if cookies_button.is_visible(): + cookies_button.click() + else: + print("Cookies button not found or already clicked.") def form_login(page, username, password): @@ -32,3 +35,4 @@ def form_login(page, username, password): page.locator('//button[@id="rh-password-verification-submit-button"]').click() page.wait_for_url("https://console.redhat.com/**") page.wait_for_selector('//h2[text()="Welcome to your Hybrid Cloud Console."]') + From 00b5536301b425fa3ead2c2c217b12a6cbbb4dd4 Mon Sep 17 00:00:00 2001 From: Larry Rios Date: Fri, 20 Jun 2025 15:50:03 -0400 Subject: [PATCH 070/321] feat: Adding build pipeline run failure --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index a5b92f95a8..1eef55708f 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -46,6 +46,7 @@ "Timeout waiting for integration test scenario to validate": r"Integration test scenario failed validation: context deadline exceeded", "Timeout waiting for test pipeline to create": r"Test Pipeline Run failed creation: context deadline exceeded", "Timeout waiting for test pipeline to finish": r"Test Pipeline Run failed run: context deadline exceeded", + "Build Pipeline Run failed" : r" PipelineRun for component .* in namespace .* failed" } From 203d9a5189a5740ad8e9f5c86b49bc0863e50236 Mon Sep 17 00:00:00 2001 From: Larry Rios Date: Fri, 20 Jun 2025 15:59:34 -0400 Subject: [PATCH 071/321] feat: Build Pipeline Run failed run: PipelineRun for component jhutar-app-kdqbb-comp-0 in namespace jhutar-tenant failed --- tests/load-tests/errors.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 1eef55708f..85b156a8f2 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -16,6 +16,7 @@ # Errors patterns we recognize (when newlines were removed) ERRORS = { "Application creation timed out waiting for quota evaluation": r"Application failed creation: Unable to create the Application .*: Internal error occurred: resource quota evaluation timed out", + "Build Pipeline Run was cancelled" : r"Build Pipeline Run failed run: PipelineRun for component [\w-]+ in namespace [\w-]+ failed: .* Reason:Cancelled .* Message:PipelineRun .* was cancelled", "Component creation timed out waiting for image-controller annotations": r"Component failed creation: Unable to create the Component .* timed out when waiting for image-controller annotations to be updated on component", "Couldnt get pipeline via bundles resolver from quay.io due to 429": r"Message:Error retrieving pipeline for pipelinerun .*bundleresolver.* cannot retrieve the oci image: GET https://quay.io/v2/.*unexpected status code 429 Too Many Requests", "Couldnt get pipeline via git resolver from gitlab.cee due to 429": r"Message:.*resolver failed to get Pipeline.*error requesting remote resource.*Git.*https://gitlab.cee.redhat.com/.* status code: 429", @@ -23,11 +24,11 @@ "Couldnt get task via buldles resolver from quay.io due to 429": r"Message:.*Couldn't retrieve Task .*resolver type bundles.*https://quay.io/.* status code 429 Too Many Requests", "Couldnt get task via git resolver from gitlab.cee due to 429": r"Message:.*Couldn't retrieve Task .*resolver type git.*https://gitlab.cee.redhat.com/.* status code: 429", "Couldnt get task via http resolver from gitlab.cee": r"Message:.*Couldn't retrieve Task .*resolver type http.*error getting.*requested URL .*https://gitlab.cee.redhat.com/.* is not found", + "Failed Integration test scenario when calling dintegrationtestscenario.kb.io webhook": r"Integration test scenario failed creation: Unable to create the Integration Test Scenario .*: Internal error occurred: failed calling webhook .*dintegrationtestscenario.kb.io.*: failed to call webhook: Post .*https://integration-service-webhook-service.integration-service.svc:443/mutate-appstudio-redhat-com-v1beta2-integrationtestscenario.*: no endpoints available for service .*integration-service-webhook-service", "Failed application creation when calling mapplication.kb.io webhook": r"Application failed creation: Unable to create the Application .*: Internal error occurred: failed calling webhook .*mapplication.kb.io.*: failed to call webhook: Post .*https://application-service-webhook-service.application-service.svc:443/mutate-appstudio-redhat-com-v1alpha1-application.* no endpoints available for service .*application-service-webhook-service", "Failed component creation because resource quota evaluation timed out": r"Component failed creation: Unable to create the Component .*: Internal error occurred: resource quota evaluation timed out", "Failed component creation when calling mcomponent.kb.io webhook": r"Component failed creation: Unable to create the Component .*: Internal error occurred: failed calling webhook .*mcomponent.kb.io.*: failed to call webhook: Post .*https://application-service-webhook-service.application-service.svc:443/mutate-appstudio-redhat-com-v1alpha1-component.* no endpoints available for service .*application-service-webhook-service.*", "Failed creating integration test scenario because it already exists": r"Integration test scenario failed creation: Unable to create the Integration Test Scenario .* integrationtestscenarios.appstudio.redhat.com .* already exists", - "Failed Integration test scenario when calling dintegrationtestscenario.kb.io webhook": r"Integration test scenario failed creation: Unable to create the Integration Test Scenario .*: Internal error occurred: failed calling webhook .*dintegrationtestscenario.kb.io.*: failed to call webhook: Post .*https://integration-service-webhook-service.integration-service.svc:443/mutate-appstudio-redhat-com-v1beta2-integrationtestscenario.*: no endpoints available for service .*integration-service-webhook-service", "Failed to link pipeline image pull secret to build service account because SA was not found": r"Failed to configure pipeline imagePullSecrets: Unable to add secret .* to service account .*: serviceaccounts .* not found", "Failed to merge MR on CEE GitLab due to 405": r"Repo-templating workflow component cleanup failed: Merging [0-9]+ failed: [Pp][Uu][Tt] .*https://gitlab.cee.redhat.com/api/.*/merge_requests/[0-9]+/merge.*message: 405 Method Not Allowed", "Failed to merge MR on CEE GitLab due to DNS error": r"Repo-templating workflow component cleanup failed: Merging [0-9]+ failed: [Pp][Uu][Tt] .*https://gitlab.cee.redhat.com/api/.*/merge_requests/[0-9]+/merge.*Temporary failure in name resolution", @@ -46,7 +47,6 @@ "Timeout waiting for integration test scenario to validate": r"Integration test scenario failed validation: context deadline exceeded", "Timeout waiting for test pipeline to create": r"Test Pipeline Run failed creation: context deadline exceeded", "Timeout waiting for test pipeline to finish": r"Test Pipeline Run failed run: context deadline exceeded", - "Build Pipeline Run failed" : r" PipelineRun for component .* in namespace .* failed" } From a91cf147fad14ad2f30b506a7110c0b1ef3d6f76 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Mon, 23 Jun 2025 10:55:31 +0200 Subject: [PATCH 072/321] feat: Avoid confusion when MY_GITHUB_ORG is not set to same as --fork-target option --- tests/load-tests/pkg/journey/handle_repo_templating.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/load-tests/pkg/journey/handle_repo_templating.go b/tests/load-tests/pkg/journey/handle_repo_templating.go index c0d030186a..dafa6b231c 100644 --- a/tests/load-tests/pkg/journey/handle_repo_templating.go +++ b/tests/load-tests/pkg/journey/handle_repo_templating.go @@ -199,7 +199,7 @@ func ForkRepo(f *framework.Framework, repoUrl, repoRevision, suffix, targetOrgNa // Create fork and make sure it appears err = utils.WaitUntilWithInterval(func() (done bool, err error) { - forkRepo, err = f.AsKubeAdmin.CommonController.Github.ForkRepositoryFromOrg(sourceName, targetName, sourceOrgName) + forkRepo, err = f.AsKubeAdmin.CommonController.Github.ForkRepositoryWithOrgs(sourceOrgName, sourceName, targetOrgName, targetName) if err != nil { logging.Logger.Debug("Repo forking failed, trying again: %v", err) return false, nil From f2c95e894c66977eccf745c77a2cee9ff55164d3 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 25 Jun 2025 10:03:39 +0200 Subject: [PATCH 073/321] feat(KONFLUX-8856): Add more progress log messages when waiting for ITS run --- tests/load-tests/pkg/journey/handle_test_run.go | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/load-tests/pkg/journey/handle_test_run.go b/tests/load-tests/pkg/journey/handle_test_run.go index 4ddea62b19..92a95b6bc1 100644 --- a/tests/load-tests/pkg/journey/handle_test_run.go +++ b/tests/load-tests/pkg/journey/handle_test_run.go @@ -12,6 +12,8 @@ import utils "github.com/konflux-ci/e2e-tests/pkg/utils" import pipeline "github.com/tektoncd/pipeline/pkg/apis/pipeline/v1" func validateSnapshotCreation(f *framework.Framework, namespace, compName string) (string, error) { + logging.Logger.Debug("Waiting for snapshot for component %s in namespace %s to be created", compName, namespace) + interval := time.Second * 20 timeout := time.Minute * 5 var snap *appstudioApi.Snapshot @@ -30,6 +32,8 @@ func validateSnapshotCreation(f *framework.Framework, namespace, compName string } func validateTestPipelineRunCreation(f *framework.Framework, namespace, itsName, snapName string) error { + logging.Logger.Debug("Waiting for test pipeline run for ITS %s and snapshot %s in namespace %s to be created", itsName, snapName, namespace) + interval := time.Second * 20 timeout := time.Minute * 5 @@ -47,6 +51,8 @@ func validateTestPipelineRunCreation(f *framework.Framework, namespace, itsName, } func validateTestPipelineRunCondition(f *framework.Framework, namespace, itsName, snapName string) error { + logging.Logger.Debug("Waiting for test pipeline run for ITS %s and snapshot %s in namespace %s to finish", itsName, snapName, namespace) + interval := time.Second * 20 timeout := time.Minute * 10 var pr *pipeline.PipelineRun @@ -90,8 +96,6 @@ func HandleTest(ctx *PerComponentContext) error { var err error var ok bool - logging.Logger.Debug("Creating test pipeline run for component %s in namespace %s", ctx.ComponentName, ctx.ParentContext.ParentContext.Namespace) - result1, err1 := logging.Measure( validateSnapshotCreation, ctx.Framework, @@ -132,5 +136,7 @@ func HandleTest(ctx *PerComponentContext) error { } } + logging.Logger.Info("Integration Test Scenario for componet %s in namespace %s OK", ctx.ComponentName, ctx.ParentContext.ParentContext.Namespace) + return nil } From af211a0895f2fbc719408b89e9fb75a9781d1330 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 25 Jun 2025 10:11:59 +0200 Subject: [PATCH 074/321] fix(KONFLUX-8856): Avoid touching 'snap' if it was not loaded: panic: runtime error: invalid memory address or nil pointer dereference --- tests/load-tests/pkg/journey/handle_test_run.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/load-tests/pkg/journey/handle_test_run.go b/tests/load-tests/pkg/journey/handle_test_run.go index 92a95b6bc1..f79fca26f3 100644 --- a/tests/load-tests/pkg/journey/handle_test_run.go +++ b/tests/load-tests/pkg/journey/handle_test_run.go @@ -28,6 +28,10 @@ func validateSnapshotCreation(f *framework.Framework, namespace, compName string return true, nil }, interval, timeout) + if err != nil { + return "", err + } + return snap.Name, err } From 0a3da45ec2f8952f41b3dd9d409260f3e017450f Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 27 Jun 2025 09:15:35 +0200 Subject: [PATCH 075/321] feat(KONFLUX-8919): Add a way to add more details to 'Pipeline failed' reason --- .../ci-scripts/stage/collect-results.sh | 2 +- tests/load-tests/errors.py | 134 +++++++++++++++++- 2 files changed, 132 insertions(+), 4 deletions(-) diff --git a/tests/load-tests/ci-scripts/stage/collect-results.sh b/tests/load-tests/ci-scripts/stage/collect-results.sh index 9cd2479622..2fb1a44cfd 100755 --- a/tests/load-tests/ci-scripts/stage/collect-results.sh +++ b/tests/load-tests/ci-scripts/stage/collect-results.sh @@ -42,7 +42,7 @@ echo "[$(date --utc -Ins)] Create summary JSON with timings" ./evaluate.py "${ARTIFACT_DIR}/load-test-options.json" "${ARTIFACT_DIR}/load-test-timings.csv" "${ARTIFACT_DIR}/load-test-timings.json" echo "[$(date --utc -Ins)] Create summary JSON with errors" -./errors.py "${ARTIFACT_DIR}/load-test-errors.csv" "${ARTIFACT_DIR}/load-test-timings.json" "${ARTIFACT_DIR}/load-test-errors.json" +./errors.py "${ARTIFACT_DIR}/load-test-errors.csv" "${ARTIFACT_DIR}/load-test-timings.json" "${ARTIFACT_DIR}/load-test-errors.json" "${ARTIFACT_DIR}/collected-data/" echo "[$(date --utc -Ins)] Graphing PRs and TRs" ci-scripts/utility_scripts/show-pipelineruns.py --data-dir "${ARTIFACT_DIR}" &>"${ARTIFACT_DIR}/show-pipelineruns.log" || true diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 85b156a8f2..f2bf87f9ed 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -6,6 +6,8 @@ import re import sys import collections +import os +import time # Column indexes in input data @@ -49,8 +51,16 @@ "Timeout waiting for test pipeline to finish": r"Test Pipeline Run failed run: context deadline exceeded", } +FAILED_PLR_ERRORS = { + "SKIP": r"Skipping step because a previous step failed", + "Error allocating host as provision TR already exists": r"Error allocating host: taskruns.tekton.dev \".*provision\" already exists", + "RPM build failed: bool cannot be defined via typedef": r"error: .bool. cannot be defined via .typedef..*error: Bad exit status from /var/tmp/rpm-tmp..* ..build.", + "Failed because registry.access.redhat.com returned 503 when reading manifest": r"source-build:ERROR:command execution failure, status: 1, stderr: time=.* level=fatal msg=.Error parsing image name .* reading manifest .* in registry.access.redhat.com/.* received unexpected HTTP status: 503 Service Unavailable", + "Failed because of quay.io returned 502": r"level=fatal msg=.Error parsing image name .*docker://quay.io/.* Requesting bearer token: invalid status code from registry 502 .Bad Gateway.", +} + -def message_to_reason(msg: str) -> str | None: +def message_to_reason(reasons_and_errors: dict, msg: str) -> str | None: """ Classifies an error message using regular expressions and returns the error name. @@ -61,7 +71,7 @@ def message_to_reason(msg: str) -> str | None: The name of the error if a pattern matches, otherwise string "UNKNOWN". """ msg = msg.replace("\n", " ") # Remove newlines - for error_name, pattern in ERRORS.items(): + for error_name, pattern in reasons_and_errors.items(): if re.search(pattern, msg): return error_name print(f"Unknown error: {msg}") @@ -77,10 +87,124 @@ def add_reason(error_messages, error_by_code, error_by_reason, message, reason=" error_by_reason[reason] += 1 +def load(datafile): + if datafile.endswith(".yaml") or datafile.endswith(".yml"): + try: + with open(datafile, "r") as fd: + data = yaml.safe_load(fd) + except json.decoder.JSONDecodeError: + raise Exception(f"File {datafile} is malfrmed YAML, skipping it") + elif datafile.endswith(".json"): + try: + with open(datafile, "r") as fp: + data = json.load(fp) + except json.decoder.JSONDecodeError: + raise Exception(f"File {datafile} is malfrmed JSON, skipping it") + else: + raise Exception("Unknown data file format") + + return data + + +def find_first_failed_build_plr(data_dir): + """ This function is intended for jobs where we only run one concurrent + builds, so no more than one can failed: our load test probes. + + This is executed when test hits "Pipeline failed" error and this is + first step to identify task that failed so we can identify error in + the pod log. + + It goes through given data directory (probably "collected-data/") and + loads all files named "collected-pipelinerun-*" and checks that PLR is + a "build" PLR and it is failed one. + """ + + for currentpath, folders, files in os.walk(data_dir): + for datafile in files: + if not datafile.startswith("collected-pipelinerun-"): + continue + + datafile = os.path.join(currentpath, datafile) + data = load(datafile) + + # Skip PLRs that are not "build" PLRs + try: + if data["metadata"]["labels"]["pipelines.appstudio.openshift.io/type"] != "build": + continue + except KeyError: + continue + + # Skip PLRs that did not failed + try: + succeeded = True + for c in data["status"]["conditions"]: + if c["type"] == "Succeeded": + if c["status"] == "False": + succeeded = False + break + if succeeded: + continue + except KeyError: + continue + + return data + +def find_trs(plr): + try: + for tr in plr["status"]["childReferences"]: + yield tr["name"] + except KeyError: + return + +def find_failed_containers(data_dir, ns, tr_name): + datafile = os.path.join(data_dir, ns, "1", "collected-taskrun-" + tr_name + ".json") + data = load(datafile) + + try: + pod_name = data["status"]["podName"] + for sr in data["status"]["steps"]: + if sr["terminated"]["exitCode"] != 0: + yield (pod_name, sr["container"]) + except KeyError: + return + +def load_container_log(data_dir, ns, pod_name, cont_name): + datafile = os.path.join(data_dir, ns, "1", "pod-" + pod_name + "-" + cont_name + ".log") + print(f"Checking errors in {datafile}") + with open(datafile, "r") as fd: + return fd.read() + +def investigate_failed_plr(dump_dir): + try: + reasons = [] + + plr = find_first_failed_build_plr(dump_dir) + if plr == None: + return ["SORRY PLR not found"] + + plr_ns = plr["metadata"]["namespace"] + + for tr_name in find_trs(plr): + for pod_name, cont_name in find_failed_containers(dump_dir, plr_ns, tr_name): + log_lines = load_container_log(dump_dir, plr_ns, pod_name, cont_name) + reason = message_to_reason(FAILED_PLR_ERRORS, log_lines) + + if reason == "SKIP": + continue + + reasons.append(reason) + + reasons = list(set(reasons)) # get unique reasons only + reasons.sort() # sort reasons + return reasons + except Exception as e: + return ["SORRY " + str(e)] + def main(): input_file = sys.argv[1] timings_file = sys.argv[2] output_file = sys.argv[3] + dump_dir = sys.argv[4] error_messages = [] # list of error messages error_by_code = collections.defaultdict( @@ -100,7 +224,11 @@ def main(): code = row[COLUMN_CODE] message = row[COLUMN_MESSAGE] - reason = message_to_reason(message) + reason = message_to_reason(ERRORS, message) + + if reason == "Pipeline failed": + reasons2 = investigate_failed_plr(dump_dir) + reason = reason + ": " + ", ".join(reasons2) add_reason(error_messages, error_by_code, error_by_reason, message, reason, code) except FileNotFoundError: From 0d3c9022802678f7ac06c99c6b764ca0167ac131 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 20 Jun 2025 15:46:15 +0200 Subject: [PATCH 076/321] feat(KONFLUX-8544): Setup releases for the application --- tests/load-tests/loadtest.go | 20 ++ .../pkg/journey/handle_releases_run.go | 89 ++++++++ .../pkg/journey/handle_releases_setup.go | 203 ++++++++++++++++++ tests/load-tests/pkg/options/options.go | 12 +- tests/load-tests/run-stage.sh | 6 + 5 files changed, 327 insertions(+), 3 deletions(-) create mode 100644 tests/load-tests/pkg/journey/handle_releases_run.go create mode 100644 tests/load-tests/pkg/journey/handle_releases_setup.go diff --git a/tests/load-tests/loadtest.go b/tests/load-tests/loadtest.go index a3e8286f40..d1be37deb4 100644 --- a/tests/load-tests/loadtest.go +++ b/tests/load-tests/loadtest.go @@ -47,8 +47,14 @@ func init() { rootCmd.Flags().StringVar(&opts.TestScenarioGitURL, "test-scenario-git-url", "https://github.com/konflux-ci/integration-examples.git", "test scenario GIT URL (set to \"\" to disable creating these)") rootCmd.Flags().StringVar(&opts.TestScenarioRevision, "test-scenario-revision", "main", "test scenario GIT URL repo revision to use") rootCmd.Flags().StringVar(&opts.TestScenarioPathInRepo, "test-scenario-path-in-repo", "pipelines/integration_resolver_pipeline_pass.yaml", "test scenario path in GIT repo") + rootCmd.Flags().StringVar(&opts.ReleasePolicy, "release-policy", "", "enterprise contract policy name to use, e.g. \"tmp-onboard-policy\" (keep empty to skip release testing)") + rootCmd.Flags().StringVar(&opts.ReleasePipelineUrl, "release-pipeline-url", "https://github.com/konflux-ci/release-service-catalog.git", "release pipeline URL suitable for git resolver") + rootCmd.Flags().StringVar(&opts.ReleasePipelineRevision, "release-pipeline-revision", "production", "release pipeline repo branch suitable for git resolver") + rootCmd.Flags().StringVar(&opts.ReleasePipelinePath, "release-pipeline-path", "pipelines/managed/e2e/e2e.yaml", "release pipeline file path suitable for git resolver") + rootCmd.Flags().StringVar(&opts.ReleasePipelineServiceAccount, "release-pipeline-service-account", "release-serviceaccount", "service account to use for release pipeline") rootCmd.Flags().BoolVarP(&opts.WaitPipelines, "waitpipelines", "w", false, "if you want to wait for pipelines to finish") rootCmd.Flags().BoolVarP(&opts.WaitIntegrationTestsPipelines, "waitintegrationtestspipelines", "i", false, "if you want to wait for IntegrationTests (Integration Test Scenario) pipelines to finish") + rootCmd.Flags().BoolVarP(&opts.WaitRelease, "waitrelease", "r", false, "if you want to wait for Release to finish") rootCmd.Flags().BoolVar(&opts.FailFast, "fail-fast", false, "if you want the test to fail fast at first failure") rootCmd.Flags().IntVarP(&opts.Concurrency, "concurrency", "c", 1, "number of concurrent threads to execute") rootCmd.Flags().IntVar(&opts.JourneyRepeats, "journey-repeats", 1, "number of times to repeat user journey (either this or --journey-duration)") @@ -260,12 +266,26 @@ func perApplicationThread(perApplicationCtx *journey.PerApplicationContext) { return } + // Create release plan and release plan admission + _, err = logging.Measure(journey.HandleReleaseSetup, perApplicationCtx) + if err != nil { + logging.Logger.Error("Thread failed: %v", err) + return + } + // Start given number of `perComponentThread()` threads using `journey.PerComponentSetup()` and wait for them to finish _, err = logging.Measure(journey.PerComponentSetup, perComponentThread, perApplicationCtx) if err != nil { logging.Logger.Fatal("Per component threads setup failed: %v", err) } + //// Wait for release to finish + //_, err = logging.Measure(journey.HandleReleaseRun, perApplicationCtx) + //if err != nil { + // logging.Logger.Error("Thread failed: %v", err) + // return + //} + } // Single component journey (there can be multiple parallel comps per app) diff --git a/tests/load-tests/pkg/journey/handle_releases_run.go b/tests/load-tests/pkg/journey/handle_releases_run.go new file mode 100644 index 0000000000..a0388edec1 --- /dev/null +++ b/tests/load-tests/pkg/journey/handle_releases_run.go @@ -0,0 +1,89 @@ +package journey + +//import "fmt" +//import "strings" +//import "time" +// +//import logging "github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/logging" +// +//import framework "github.com/konflux-ci/e2e-tests/pkg/framework" +//import utils "github.com/konflux-ci/e2e-tests/pkg/utils" +//import pipeline "github.com/tektoncd/pipeline/pkg/apis/pipeline/v1" +// +// +//// Wait for Release CR to be created +//func validateReleaseCreation(f *framework.Framework, namespace...) error { +//} +// +// +//// Wait for release pipeline run to be created +//func validateReleasePipelineRunCreation(f *framework.Framework, namespace...) error { +//} +// +// +//// Wait for release pipeline run to succeed +//func validateReleasePipelineRunCondition(f *framework.Framework, namespace...) error { +//} +// +// +//// Wait for Release CR to have a succeeding status +//func validateReleaseCondition(f *framework.Framework, namespace...) error { +//} +// +// +//func HandleReleaseRun(ctx *PerApplicationContext) error { +// if ctx.ParentContext.Opts.ReleasePolicy == "" || !ctx.ParentContext.Opts.WaitRelease { +// logging.Logger.Info("Skipping wait for releases because policy was not provided or waiting for releases was disabled") +// return nil +// } +// +// var err error +// +// validateReleaseCreation +// validateReleasePipelineRunCreation +// validateReleasePipelineRunCondition +// validateReleaseCondition +// +// logging.Logger.Debug("Waiting for build pipeline run for component %s in namespace %s", ctx.ComponentName, ctx.ParentContext.ParentContext.Namespace) +// +// _, err = logging.Measure( +// validatePipelineRunCreation, +// ctx.Framework, +// ctx.ParentContext.ParentContext.Namespace, +// ctx.ParentContext.ApplicationName, +// ctx.ComponentName, +// ) +// if err != nil { +// return logging.Logger.Fail(70, "Build Pipeline Run failed creation: %v", err) +// } +// +// logging.Logger.Debug("Build pipeline run for component %s in namespace %s created", ctx.ComponentName, ctx.ParentContext.ParentContext.Namespace) +// +// _, err = logging.Measure( +// validatePipelineRunCondition, +// ctx.Framework, +// ctx.ParentContext.ParentContext.Namespace, +// ctx.ParentContext.ApplicationName, +// ctx.ComponentName, +// ) +// if err != nil { +// return logging.Logger.Fail(71, "Build Pipeline Run failed run: %v", err) +// } +// +// logging.Logger.Debug("Build pipeline run for component %s in namespace %s succeeded", ctx.ComponentName, ctx.ParentContext.ParentContext.Namespace) +// +// _, err = logging.Measure( +// validatePipelineRunSignature, +// ctx.Framework, +// ctx.ParentContext.ParentContext.Namespace, +// ctx.ParentContext.ApplicationName, +// ctx.ComponentName, +// ) +// if err != nil { +// return logging.Logger.Fail(72, "Build Pipeline Run failed signing: %v", err) +// } +// +// logging.Logger.Info("Build pipeline run for component %s in namespace %s OK", ctx.ComponentName, ctx.ParentContext.ParentContext.Namespace) +// +// return nil +//} diff --git a/tests/load-tests/pkg/journey/handle_releases_setup.go b/tests/load-tests/pkg/journey/handle_releases_setup.go new file mode 100644 index 0000000000..cd21c72887 --- /dev/null +++ b/tests/load-tests/pkg/journey/handle_releases_setup.go @@ -0,0 +1,203 @@ +package journey + +import "fmt" +import "time" + +import logging "github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/logging" + +import framework "github.com/konflux-ci/e2e-tests/pkg/framework" +import meta "k8s.io/apimachinery/pkg/api/meta" +import metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +import releaseApi "github.com/konflux-ci/release-service/api/v1alpha1" +import tektonutils "github.com/konflux-ci/release-service/tekton/utils" +import utils "github.com/konflux-ci/e2e-tests/pkg/utils" + + +// Create ReleasePlan CR +func createReleasePlan(f *framework.Framework, namespace, appName string) (string, error) { + name := appName + "-rp" + logging.Logger.Debug("Creating release plan %s in namespace %s", name, namespace) + + _, err := f.AsKubeDeveloper.ReleaseController.CreateReleasePlan(name, namespace, appName, namespace, "true", nil, nil, nil) + if err != nil { + return "", fmt.Errorf("Unable to create the ReleasePlan %s in %s: %v", name, namespace, err) + } + + return name, nil +} + + +// Create ReleasePlanAdmission CR +// Assumes enterprise contract policy and service account with required permissions is already there +func createReleasePlanAdmission(f *framework.Framework, namespace, appName, policyName, releasePipelineSAName, releasePipelineUrl, releasePipelineRevision, releasePipelinePath string) (string, error) { + name := appName + "-rpa" + logging.Logger.Debug("Creating release plan admission %s in namespace %s with policy %s and pipeline SA %s", name, namespace, policyName, releasePipelineSAName) + + pipeline := &tektonutils.PipelineRef{ + Resolver: "git", + Params: []tektonutils.Param{ + {Name: "url", Value: releasePipelineUrl}, + {Name: "revision", Value: releasePipelineRevision}, + {Name: "pathInRepo", Value: releasePipelinePath}, + }, + } + // CreateReleasePlanAdmission(name, namespace, environment, origin, policy, serviceAccountName string, applications []string, autoRelease bool, pipelineRef *tektonutils.PipelineRef, data *runtime.RawExtension) + _, err := f.AsKubeDeveloper.ReleaseController.CreateReleasePlanAdmission(name, namespace, "", namespace, policyName, releasePipelineSAName, []string{appName}, true, pipeline, nil) + if err != nil { + return "", fmt.Errorf("Unable to create the ReleasePlanAdmission %s in %s: %v", name, namespace, err) + } + + return name, nil +} + + +// Wait for ReleasePlan CR to be created and to have status "Matched" +func validateReleasePlan(f *framework.Framework, namespace, name string) error { + logging.Logger.Debug("Validating release plan %s in namespace %s", name, namespace) + + interval := time.Second * 10 + timeout := time.Minute * 5 + + err := utils.WaitUntilWithInterval(func() (done bool, err error) { + releasePlan, err := f.AsKubeDeveloper.ReleaseController.GetReleasePlan(name, namespace) + if err != nil { + fmt.Printf("Unable to get ReleasePlan %s in %s: %v", name, namespace, err) + return false, nil + } + + condition := meta.FindStatusCondition(releasePlan.Status.Conditions, releaseApi.MatchedConditionType.String()) + if condition == nil { + fmt.Printf("MatchedConditon of %s is still not set", releasePlan.Name) + return false, nil + } + // it may need a period of time for the ReleasePlanCR to be reconciled + if condition.Status == metav1.ConditionFalse { + fmt.Printf("MatchedConditon of %s has not reconciled yet", releasePlan.Name) + return false, nil + } + if condition.Status != metav1.ConditionTrue { + fmt.Printf("MatchedConditon of %s is not true yet", releasePlan.Name) + return false, nil + } + if condition.Reason == releaseApi.MatchedReason.String() { + return true, nil + } + + return false, fmt.Errorf("MatchedConditon of %s incorrect: %v", releasePlan.Name, condition) + }, interval, timeout) + + return err +} + + +// Wait for ReleasePlanAdmission CR to be created and to have status "Matched" +func validateReleasePlanAdmission(f *framework.Framework, namespace, name string) error { + logging.Logger.Debug("Validating release plan admission %s in namespace %s", name, namespace) + + interval := time.Second * 10 + timeout := time.Minute * 5 + + err := utils.WaitUntilWithInterval(func() (done bool, err error) { + releasePlanAdmission, err := f.AsKubeDeveloper.ReleaseController.GetReleasePlanAdmission(name, namespace) + if err != nil { + fmt.Printf("Unable to get ReleasePlanAdmission %s in %s: %v", name, namespace, err) + return false, nil + } + + condition := meta.FindStatusCondition(releasePlanAdmission.Status.Conditions, releaseApi.MatchedConditionType.String()) + if condition == nil { + fmt.Printf("MatchedConditon of %s is still not set", releasePlanAdmission.Name) + return false, nil + } + // it may need a period of time for the ReleasePlanCR to be reconciled + if condition.Status == metav1.ConditionFalse { + fmt.Printf("MatchedConditon of %s has not reconciled yet", releasePlanAdmission.Name) + return false, nil + } + if condition.Status != metav1.ConditionTrue { + fmt.Printf("MatchedConditon of %s is not true yet", releasePlanAdmission.Name) + return false, nil + } + if condition.Reason == releaseApi.MatchedReason.String() { + return true, nil + } + + return false, fmt.Errorf("MatchedConditon of %s incorrect: %v", releasePlanAdmission.Name, condition) + }, interval, timeout) + + return err +} + + +func HandleReleaseSetup(ctx *PerApplicationContext) error { + if ctx.ParentContext.Opts.ReleasePolicy == "" { + logging.Logger.Info("Skipping setting up releases because policy was not provided") + return nil + } + + var releasePlanName string + var releasePlanAdmissionName string + var iface interface{} + var ok bool + var err error + + iface, err = logging.Measure( + createReleasePlan, + ctx.Framework, + ctx.ParentContext.Namespace, + ctx.ApplicationName, + ) + if err != nil { + return logging.Logger.Fail(91, "Release Plan failed creation: %v", err) + } + + releasePlanName, ok = iface.(string) + if !ok { + return logging.Logger.Fail(92, "Type assertion failed on release plan name: %+v", iface) + } + + iface, err = logging.Measure( + createReleasePlanAdmission, + ctx.Framework, + ctx.ParentContext.Namespace, + ctx.ApplicationName, + ctx.ParentContext.Opts.ReleasePolicy, + ctx.ParentContext.Opts.ReleasePipelineServiceAccount, + ctx.ParentContext.Opts.ReleasePipelineUrl, + ctx.ParentContext.Opts.ReleasePipelineRevision, + ctx.ParentContext.Opts.ReleasePipelinePath, + ) + if err != nil { + return logging.Logger.Fail(93, "Release Plan Admission failed creation: %v", err) + } + + releasePlanAdmissionName, ok = iface.(string) + if !ok { + return logging.Logger.Fail(94, "Type assertion failed on release plan admission name: %+v", iface) + } + + iface, err = logging.Measure( + validateReleasePlan, + ctx.Framework, + ctx.ParentContext.Namespace, + releasePlanName, + ) + if err != nil { + return logging.Logger.Fail(95, "Release Plan failed validation: %v", err) + } + + iface, err = logging.Measure( + validateReleasePlanAdmission, + ctx.Framework, + ctx.ParentContext.Namespace, + releasePlanAdmissionName, + ) + if err != nil { + return logging.Logger.Fail(96, "Release Plan Admission failed validation: %v", err) + } + + + logging.Logger.Info("Configured release %s & %s for application %s in namespace %s", releasePlanName, releasePlanAdmissionName, ctx.ApplicationName, ctx.ParentContext.Namespace) + + return nil +} diff --git a/tests/load-tests/pkg/options/options.go b/tests/load-tests/pkg/options/options.go index 246103cdc2..e072180af1 100644 --- a/tests/load-tests/pkg/options/options.go +++ b/tests/load-tests/pkg/options/options.go @@ -22,17 +22,22 @@ type Opts struct { JourneyRepeats int JourneyUntil time.Time LogDebug bool - LogTrace bool LogInfo bool + LogTrace bool OutputDir string + PipelineImagePullSecrets []string PipelineMintmakerDisabled bool PipelineRepoTemplating bool - PipelineRepoTemplatingSource string PipelineRepoTemplatingSourceDir string - PipelineImagePullSecrets []string + PipelineRepoTemplatingSource string Purge bool PurgeOnly bool QuayRepo string + ReleasePipelinePath string + ReleasePipelineRevision string + ReleasePipelineServiceAccount string + ReleasePipelineUrl string + ReleasePolicy string Stage bool TestScenarioGitURL string TestScenarioPathInRepo string @@ -40,6 +45,7 @@ type Opts struct { UsernamePrefix string WaitIntegrationTestsPipelines bool WaitPipelines bool + WaitRelease bool } // Pre-process load-test options before running the test diff --git a/tests/load-tests/run-stage.sh b/tests/load-tests/run-stage.sh index f11d9b501d..51c04d05fd 100755 --- a/tests/load-tests/run-stage.sh +++ b/tests/load-tests/run-stage.sh @@ -29,8 +29,14 @@ go run loadtest.go \ --test-scenario-git-url "${TEST_SCENARIO_GIT_URL-https://github.com/konflux-ci/integration-examples.git}" \ --test-scenario-path-in-repo "${TEST_SCENARIO_PATH_IN_REPO-pipelines/integration_resolver_pipeline_pass.yaml}" \ --test-scenario-revision "${TEST_SCENARIO_REVISION-main}" \ + --release-policy "${RELEASE_POLICY-}" \ + --release-pipeline-url "${RELEASE_PIPELINE_URL:-https://github.com/konflux-ci/release-service-catalog.git}" \ + --release-pipeline-revision "${RELEASE_PIPELINE_REVISION:-production}" \ + --release-pipeline-path "${RELEASE_PIPELINE_PATH:-pipelines/managed/e2e/e2e.yaml}" \ + --release-pipeline-service-account "${RELEASE_PIPELINE_SERVICE_ACCOUNT:-release-serviceaccount}" \ --username "${USER_PREFIX:-undef}" \ --waitintegrationtestspipelines="${WAIT_INTEGRATION_TESTS:-true}" \ --waitpipelines="${WAIT_PIPELINES:-true}" \ + --waitrelease="${WAIT_RELEASE:-true}" \ $options \ --stage From 94449c6397af8b3940e5775652395d6f79db47dd Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Mon, 23 Jun 2025 13:59:39 +0200 Subject: [PATCH 077/321] feat(KONFLUX-8544): Measure releases for the application --- tests/load-tests/loadtest.go | 14 +- .../pkg/journey/handle_releases_run.go | 280 ++++++++++++------ .../pkg/journey/handle_releases_setup.go | 16 +- 3 files changed, 208 insertions(+), 102 deletions(-) diff --git a/tests/load-tests/loadtest.go b/tests/load-tests/loadtest.go index d1be37deb4..6ae381ced2 100644 --- a/tests/load-tests/loadtest.go +++ b/tests/load-tests/loadtest.go @@ -279,13 +279,6 @@ func perApplicationThread(perApplicationCtx *journey.PerApplicationContext) { logging.Logger.Fatal("Per component threads setup failed: %v", err) } - //// Wait for release to finish - //_, err = logging.Measure(journey.HandleReleaseRun, perApplicationCtx) - //if err != nil { - // logging.Logger.Error("Thread failed: %v", err) - // return - //} - } // Single component journey (there can be multiple parallel comps per app) @@ -327,4 +320,11 @@ func perComponentThread(perComponentCtx *journey.PerComponentContext) { logging.Logger.Error("Per component thread failed: %v", err) return } + + // Wait for release to finish + _, err = logging.Measure(journey.HandleReleaseRun, perComponentCtx) + if err != nil { + logging.Logger.Error("Thread failed: %v", err) + return + } } diff --git a/tests/load-tests/pkg/journey/handle_releases_run.go b/tests/load-tests/pkg/journey/handle_releases_run.go index a0388edec1..47c149634f 100644 --- a/tests/load-tests/pkg/journey/handle_releases_run.go +++ b/tests/load-tests/pkg/journey/handle_releases_run.go @@ -1,89 +1,195 @@ package journey -//import "fmt" -//import "strings" -//import "time" -// -//import logging "github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/logging" -// -//import framework "github.com/konflux-ci/e2e-tests/pkg/framework" -//import utils "github.com/konflux-ci/e2e-tests/pkg/utils" -//import pipeline "github.com/tektoncd/pipeline/pkg/apis/pipeline/v1" -// -// -//// Wait for Release CR to be created -//func validateReleaseCreation(f *framework.Framework, namespace...) error { -//} -// -// -//// Wait for release pipeline run to be created -//func validateReleasePipelineRunCreation(f *framework.Framework, namespace...) error { -//} -// -// -//// Wait for release pipeline run to succeed -//func validateReleasePipelineRunCondition(f *framework.Framework, namespace...) error { -//} -// -// -//// Wait for Release CR to have a succeeding status -//func validateReleaseCondition(f *framework.Framework, namespace...) error { -//} -// -// -//func HandleReleaseRun(ctx *PerApplicationContext) error { -// if ctx.ParentContext.Opts.ReleasePolicy == "" || !ctx.ParentContext.Opts.WaitRelease { -// logging.Logger.Info("Skipping wait for releases because policy was not provided or waiting for releases was disabled") -// return nil -// } -// -// var err error -// -// validateReleaseCreation -// validateReleasePipelineRunCreation -// validateReleasePipelineRunCondition -// validateReleaseCondition -// -// logging.Logger.Debug("Waiting for build pipeline run for component %s in namespace %s", ctx.ComponentName, ctx.ParentContext.ParentContext.Namespace) -// -// _, err = logging.Measure( -// validatePipelineRunCreation, -// ctx.Framework, -// ctx.ParentContext.ParentContext.Namespace, -// ctx.ParentContext.ApplicationName, -// ctx.ComponentName, -// ) -// if err != nil { -// return logging.Logger.Fail(70, "Build Pipeline Run failed creation: %v", err) -// } -// -// logging.Logger.Debug("Build pipeline run for component %s in namespace %s created", ctx.ComponentName, ctx.ParentContext.ParentContext.Namespace) -// -// _, err = logging.Measure( -// validatePipelineRunCondition, -// ctx.Framework, -// ctx.ParentContext.ParentContext.Namespace, -// ctx.ParentContext.ApplicationName, -// ctx.ComponentName, -// ) -// if err != nil { -// return logging.Logger.Fail(71, "Build Pipeline Run failed run: %v", err) -// } -// -// logging.Logger.Debug("Build pipeline run for component %s in namespace %s succeeded", ctx.ComponentName, ctx.ParentContext.ParentContext.Namespace) -// -// _, err = logging.Measure( -// validatePipelineRunSignature, -// ctx.Framework, -// ctx.ParentContext.ParentContext.Namespace, -// ctx.ParentContext.ApplicationName, -// ctx.ComponentName, -// ) -// if err != nil { -// return logging.Logger.Fail(72, "Build Pipeline Run failed signing: %v", err) -// } -// -// logging.Logger.Info("Build pipeline run for component %s in namespace %s OK", ctx.ComponentName, ctx.ParentContext.ParentContext.Namespace) -// -// return nil -//} +import "fmt" +import "strings" +import "time" + +import logging "github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/logging" + +import framework "github.com/konflux-ci/e2e-tests/pkg/framework" +import utils "github.com/konflux-ci/e2e-tests/pkg/utils" + + +// Wait for Release CR to be created +func validateReleaseCreation(f *framework.Framework, namespace, snapshotName string) (string, error) { + logging.Logger.Debug("Waiting for release for snapshot %s in namespace %s to be created", snapshotName, namespace) + + var releaseName string + + interval := time.Second * 10 + timeout := time.Minute * 5 + + err := utils.WaitUntilWithInterval(func() (done bool, err error) { + release, err := f.AsKubeDeveloper.ReleaseController.GetRelease("", snapshotName, namespace) + if err != nil { + fmt.Printf("Can not get release for snapshot %s in namespace %s: %v", snapshotName, namespace, err) + return false, nil + } + + releaseName = release.Name + + return true, nil + }, interval, timeout) + + return releaseName, err +} + + +// Wait for release pipeline run to be created +func validateReleasePipelineRunCreation(f *framework.Framework, namespace, releaseName string) error { + logging.Logger.Debug("Waiting for release pipeline for release %s in namespace %s to be created", releaseName, namespace) + + interval := time.Second * 10 + timeout := time.Minute * 5 + + err := utils.WaitUntilWithInterval(func() (done bool, err error) { + _, err = f.AsKubeDeveloper.ReleaseController.GetPipelineRunInNamespace(namespace, releaseName, namespace) + if err != nil { + fmt.Printf("Pipelinerun for release %s in namespace %s not created yet: %v\n", releaseName, namespace, err) + return true, nil + } + + return true, nil + }, interval, timeout) + + return err +} + + +// Wait for release pipeline run to succeed +func validateReleasePipelineRunCondition(f *framework.Framework, namespace, releaseName string) error { + logging.Logger.Debug("Waiting for release pipeline for release %s in namespace %s to finish", releaseName, namespace) + + interval := time.Second * 10 + timeout := time.Minute * 10 + + err := utils.WaitUntilWithInterval(func() (done bool, err error) { + pipelineRun, err := f.AsKubeDeveloper.ReleaseController.GetPipelineRunInNamespace(namespace, releaseName, namespace) + if err != nil { + fmt.Printf("PipelineRun for release %s in namespace %s not created yet: %v\n", releaseName, namespace, err) + return true, nil + } + + // Check if there are some conditions + if len(pipelineRun.Status.Conditions) == 0 { + fmt.Printf("PipelineRun %s in namespace %s lacks status conditions\n", pipelineRun.GetName(), pipelineRun.GetNamespace()) + return false, nil + } + + // Check right condition status + for _, condition := range pipelineRun.Status.Conditions { + if (strings.HasPrefix(string(condition.Type), "Error") || strings.HasSuffix(string(condition.Type), "Error")) && condition.Status == "True" { + return false, fmt.Errorf("PipelineRun %s in namespace %s is in error state: %+v", pipelineRun.GetName(), pipelineRun.GetNamespace(), condition) + } + if condition.Type == "Succeeded" && condition.Status == "False" { + return false, fmt.Errorf("PipelineRun %s in namespace %s failed: %+v", pipelineRun.GetName(), pipelineRun.GetNamespace(), condition) + } + if condition.Type == "Succeeded" && condition.Status == "True" { + return true, nil + } + } + + return false, nil + }, interval, timeout) + + return err +} + + +// Wait for Release CR to have a succeeding status +func validateReleaseCondition(f *framework.Framework, namespace, releaseName string) error { + logging.Logger.Debug("Waiting for release %s in namespace %s to finish", releaseName, namespace) + + interval := time.Second * 10 + timeout := time.Minute * 5 + + err := utils.WaitUntilWithInterval(func() (done bool, err error) { + release, err := f.AsKubeDeveloper.ReleaseController.GetRelease(releaseName, "", namespace) + if err != nil { + fmt.Printf("Can not get release %s in namespace %s: %v", releaseName, namespace, err) + return false, nil + } + + // Check if there are some conditions + if len(release.Status.Conditions) == 0 { + fmt.Printf("Release %s in namespace %s lacks status conditions\n", releaseName, namespace) + return false, nil + } + + // Check right condition status + for _, condition := range release.Status.Conditions { + if condition.Type == "Released" && condition.Status == "False" { + return false, fmt.Errorf("Release %s in namespace %s failed: %+v", releaseName, namespace, condition) + } + if condition.Type == "Released" && condition.Status == "True" { + return true, nil + } + } + + return false, nil + }, interval, timeout) + + return err +} + + +func HandleReleaseRun(ctx *PerComponentContext) error { + if ctx.ParentContext.ParentContext.Opts.ReleasePolicy == "" || !ctx.ParentContext.ParentContext.Opts.WaitRelease { + logging.Logger.Info("Skipping waiting for releases because policy was not provided or waiting was disabled") + return nil + } + + var releaseName string + var iface interface{} + var ok bool + var err error + + iface, err = logging.Measure( + validateReleaseCreation, + ctx.Framework, + ctx.ParentContext.ParentContext.Namespace, + ctx.SnapshotName, + ) + if err != nil { + return logging.Logger.Fail(90, "Release failed creation: %v", err) + } + + releaseName, ok = iface.(string) + if !ok { + return logging.Logger.Fail(91, "Type assertion failed on release name: %+v", iface) + } + + _, err = logging.Measure( + validateReleasePipelineRunCreation, + ctx.Framework, + ctx.ParentContext.ParentContext.Namespace, + releaseName, + ) + if err != nil { + return logging.Logger.Fail(92, "Release pipeline run failed creation: %v", err) + } + + _, err = logging.Measure( + validateReleasePipelineRunCondition, + ctx.Framework, + ctx.ParentContext.ParentContext.Namespace, + releaseName, + ) + if err != nil { + return logging.Logger.Fail(93, "Release pipeline run failed: %v", err) + } + + _, err = logging.Measure( + validateReleaseCondition, + ctx.Framework, + ctx.ParentContext.ParentContext.Namespace, + releaseName, + ) + if err != nil { + return logging.Logger.Fail(94, "Release failed: %v", err) + } + + logging.Logger.Info("Release %s in namespace %s succeeded", releaseName, ctx.ParentContext.ParentContext.Namespace) + + return nil +} diff --git a/tests/load-tests/pkg/journey/handle_releases_setup.go b/tests/load-tests/pkg/journey/handle_releases_setup.go index cd21c72887..1c8778f9cf 100644 --- a/tests/load-tests/pkg/journey/handle_releases_setup.go +++ b/tests/load-tests/pkg/journey/handle_releases_setup.go @@ -61,22 +61,22 @@ func validateReleasePlan(f *framework.Framework, namespace, name string) error { err := utils.WaitUntilWithInterval(func() (done bool, err error) { releasePlan, err := f.AsKubeDeveloper.ReleaseController.GetReleasePlan(name, namespace) if err != nil { - fmt.Printf("Unable to get ReleasePlan %s in %s: %v", name, namespace, err) + fmt.Printf("Unable to get ReleasePlan %s in %s: %v\n", name, namespace, err) return false, nil } condition := meta.FindStatusCondition(releasePlan.Status.Conditions, releaseApi.MatchedConditionType.String()) if condition == nil { - fmt.Printf("MatchedConditon of %s is still not set", releasePlan.Name) + fmt.Printf("MatchedConditon of %s is still not set\n", releasePlan.Name) return false, nil } // it may need a period of time for the ReleasePlanCR to be reconciled if condition.Status == metav1.ConditionFalse { - fmt.Printf("MatchedConditon of %s has not reconciled yet", releasePlan.Name) + fmt.Printf("MatchedConditon of %s has not reconciled yet\n", releasePlan.Name) return false, nil } if condition.Status != metav1.ConditionTrue { - fmt.Printf("MatchedConditon of %s is not true yet", releasePlan.Name) + fmt.Printf("MatchedConditon of %s is not true yet\n", releasePlan.Name) return false, nil } if condition.Reason == releaseApi.MatchedReason.String() { @@ -100,22 +100,22 @@ func validateReleasePlanAdmission(f *framework.Framework, namespace, name string err := utils.WaitUntilWithInterval(func() (done bool, err error) { releasePlanAdmission, err := f.AsKubeDeveloper.ReleaseController.GetReleasePlanAdmission(name, namespace) if err != nil { - fmt.Printf("Unable to get ReleasePlanAdmission %s in %s: %v", name, namespace, err) + fmt.Printf("Unable to get ReleasePlanAdmission %s in %s: %v\n", name, namespace, err) return false, nil } condition := meta.FindStatusCondition(releasePlanAdmission.Status.Conditions, releaseApi.MatchedConditionType.String()) if condition == nil { - fmt.Printf("MatchedConditon of %s is still not set", releasePlanAdmission.Name) + fmt.Printf("MatchedConditon of %s is still not set\n", releasePlanAdmission.Name) return false, nil } // it may need a period of time for the ReleasePlanCR to be reconciled if condition.Status == metav1.ConditionFalse { - fmt.Printf("MatchedConditon of %s has not reconciled yet", releasePlanAdmission.Name) + fmt.Printf("MatchedConditon of %s has not reconciled yet\n", releasePlanAdmission.Name) return false, nil } if condition.Status != metav1.ConditionTrue { - fmt.Printf("MatchedConditon of %s is not true yet", releasePlanAdmission.Name) + fmt.Printf("MatchedConditon of %s is not true yet\n", releasePlanAdmission.Name) return false, nil } if condition.Reason == releaseApi.MatchedReason.String() { From e662444236ffbd6bc10e2919404670b131a20afa Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Mon, 23 Jun 2025 17:12:23 +0200 Subject: [PATCH 078/321] chore: Add more ignored files & dirs --- tests/load-tests/.gitignore | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/load-tests/.gitignore b/tests/load-tests/.gitignore index aff54803fd..1666e0ae3e 100644 --- a/tests/load-tests/.gitignore +++ b/tests/load-tests/.gitignore @@ -5,4 +5,10 @@ load-tests.json load-tests.*.json output.json -users.json +users*.json + +loadtest + +OLD/ +videos/ +collected-data/ From 074537031c53f31b1bda45c0c5d3db856e3cda6b Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Mon, 23 Jun 2025 21:33:30 +0200 Subject: [PATCH 079/321] feat: Add more logging to show what is going on better --- tests/load-tests/loadtest.go | 8 ++++---- tests/load-tests/pkg/journey/handle_pipeline.go | 6 +++--- tests/load-tests/pkg/journey/handle_releases_run.go | 4 ++-- tests/load-tests/pkg/journey/handle_test_run.go | 4 ++++ 4 files changed, 13 insertions(+), 9 deletions(-) diff --git a/tests/load-tests/loadtest.go b/tests/load-tests/loadtest.go index 6ae381ced2..257aaaf539 100644 --- a/tests/load-tests/loadtest.go +++ b/tests/load-tests/loadtest.go @@ -255,21 +255,21 @@ func perApplicationThread(perApplicationCtx *journey.PerApplicationContext) { // Create application _, err = logging.Measure(journey.HandleApplication, perApplicationCtx) if err != nil { - logging.Logger.Error("Thread failed: %v", err) + logging.Logger.Error("Per application thread failed: %v", err) return } // Create integration test scenario _, err = logging.Measure(journey.HandleIntegrationTestScenario, perApplicationCtx) if err != nil { - logging.Logger.Error("Thread failed: %v", err) + logging.Logger.Error("Per application thread failed: %v", err) return } // Create release plan and release plan admission _, err = logging.Measure(journey.HandleReleaseSetup, perApplicationCtx) if err != nil { - logging.Logger.Error("Thread failed: %v", err) + logging.Logger.Error("Per application thread failed: %v", err) return } @@ -324,7 +324,7 @@ func perComponentThread(perComponentCtx *journey.PerComponentContext) { // Wait for release to finish _, err = logging.Measure(journey.HandleReleaseRun, perComponentCtx) if err != nil { - logging.Logger.Error("Thread failed: %v", err) + logging.Logger.Error("Per component thread failed: %v", err) return } } diff --git a/tests/load-tests/pkg/journey/handle_pipeline.go b/tests/load-tests/pkg/journey/handle_pipeline.go index dcbea9b1c0..332a72da76 100644 --- a/tests/load-tests/pkg/journey/handle_pipeline.go +++ b/tests/load-tests/pkg/journey/handle_pipeline.go @@ -113,7 +113,7 @@ func HandlePipelineRun(ctx *PerComponentContext) error { var err error - logging.Logger.Debug("Waiting for build pipeline run for component %s in namespace %s", ctx.ComponentName, ctx.ParentContext.ParentContext.Namespace) + logging.Logger.Debug("Waiting for build pipeline run for component %s in namespace %s to be created", ctx.ComponentName, ctx.ParentContext.ParentContext.Namespace) _, err = logging.Measure( validatePipelineRunCreation, @@ -126,7 +126,7 @@ func HandlePipelineRun(ctx *PerComponentContext) error { return logging.Logger.Fail(70, "Build Pipeline Run failed creation: %v", err) } - logging.Logger.Debug("Build pipeline run for component %s in namespace %s created", ctx.ComponentName, ctx.ParentContext.ParentContext.Namespace) + logging.Logger.Debug("Waiting for build pipeline run for component %s in namespace %s to finish", ctx.ComponentName, ctx.ParentContext.ParentContext.Namespace) _, err = logging.Measure( validatePipelineRunCondition, @@ -139,7 +139,7 @@ func HandlePipelineRun(ctx *PerComponentContext) error { return logging.Logger.Fail(71, "Build Pipeline Run failed run: %v", err) } - logging.Logger.Debug("Build pipeline run for component %s in namespace %s succeeded", ctx.ComponentName, ctx.ParentContext.ParentContext.Namespace) + logging.Logger.Debug("Waiting for build pipeline run for component %s in namespace %s to be signed", ctx.ComponentName, ctx.ParentContext.ParentContext.Namespace) _, err = logging.Measure( validatePipelineRunSignature, diff --git a/tests/load-tests/pkg/journey/handle_releases_run.go b/tests/load-tests/pkg/journey/handle_releases_run.go index 47c149634f..a3d75d8b49 100644 --- a/tests/load-tests/pkg/journey/handle_releases_run.go +++ b/tests/load-tests/pkg/journey/handle_releases_run.go @@ -22,7 +22,7 @@ func validateReleaseCreation(f *framework.Framework, namespace, snapshotName str err := utils.WaitUntilWithInterval(func() (done bool, err error) { release, err := f.AsKubeDeveloper.ReleaseController.GetRelease("", snapshotName, namespace) if err != nil { - fmt.Printf("Can not get release for snapshot %s in namespace %s: %v", snapshotName, namespace, err) + fmt.Printf("Can not get release for snapshot %s in namespace %s: %v\n", snapshotName, namespace, err) return false, nil } @@ -106,7 +106,7 @@ func validateReleaseCondition(f *framework.Framework, namespace, releaseName str err := utils.WaitUntilWithInterval(func() (done bool, err error) { release, err := f.AsKubeDeveloper.ReleaseController.GetRelease(releaseName, "", namespace) if err != nil { - fmt.Printf("Can not get release %s in namespace %s: %v", releaseName, namespace, err) + fmt.Printf("Can not get release %s in namespace %s: %v\n", releaseName, namespace, err) return false, nil } diff --git a/tests/load-tests/pkg/journey/handle_test_run.go b/tests/load-tests/pkg/journey/handle_test_run.go index f79fca26f3..0212c7a2db 100644 --- a/tests/load-tests/pkg/journey/handle_test_run.go +++ b/tests/load-tests/pkg/journey/handle_test_run.go @@ -117,6 +117,8 @@ func HandleTest(ctx *PerComponentContext) error { if ctx.ParentContext.ParentContext.Opts.TestScenarioGitURL == "" { logging.Logger.Debug("Integration Test Scenario GIT not provided, not waiting for it") } else { + logging.Logger.Debug("Waiting for test pipeline run for component %s in namespace %s to be created", ctx.ComponentName, ctx.ParentContext.ParentContext.Namespace) + _, err = logging.Measure( validateTestPipelineRunCreation, ctx.Framework, @@ -128,6 +130,8 @@ func HandleTest(ctx *PerComponentContext) error { return logging.Logger.Fail(82, "Test Pipeline Run failed creation: %v", err) } + logging.Logger.Debug("Waiting for test pipeline run for component %s in namespace %s to finish", ctx.ComponentName, ctx.ParentContext.ParentContext.Namespace) + _, err = logging.Measure( validateTestPipelineRunCondition, ctx.Framework, From 84fac22124b3b86547df96b34e1080b4a025f8a2 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Mon, 23 Jun 2025 21:35:42 +0200 Subject: [PATCH 080/321] feat(KONFLUX-8544): Also consider release measurements in KPI --- tests/load-tests/evaluate.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tests/load-tests/evaluate.py b/tests/load-tests/evaluate.py index 0292748494..fef53ec961 100755 --- a/tests/load-tests/evaluate.py +++ b/tests/load-tests/evaluate.py @@ -31,6 +31,14 @@ "validateSnapshotCreation", "validateTestPipelineRunCreation", "validateTestPipelineRunCondition", + "createReleasePlan", + "createReleasePlanAdmission", + "validateReleasePlan", + "validateReleasePlanAdmission", + "validateReleaseCreation", + "validateReleasePipelineRunCreation", + "validateReleasePipelineRunCondition", + "validateReleaseCondition", ] # These metrics will be ignored if ITS was skipped @@ -41,6 +49,18 @@ "validateTestPipelineRunCondition", ] +# These metrics will be ignored if Release was skipped +METRICS_RELEASE = [ + "createReleasePlan", + "createReleasePlanAdmission", + "validateReleasePlan", + "validateReleasePlanAdmission", + "validateReleaseCreation", + "validateReleasePipelineRunCreation", + "validateReleasePipelineRunCondition", + "validateReleaseCondition", +] + def str2date(date_str): if isinstance(date_str, datetime.datetime): @@ -102,6 +122,9 @@ def main(): if options["TestScenarioGitURL"] == "": print("NOTE: Ignoring ITS related metrics because they were disabled at test run") METRICS_to_skip += METRICS_ITS + if options["ReleasePolicy"] == "": + print("NOTE: Ignoring Release related metrics because they were disabled at test run") + METRICS_to_skip += METRICS_RELEASE stats_raw = {} From 00c51aa1fbc897e4568565e5be96c94b27261e00 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Mon, 23 Jun 2025 21:36:21 +0200 Subject: [PATCH 081/321] feat(KONFLUX-8544): Sync missing labels to Horreum definition script --- .../ci-scripts/config/horreum-labels.sh | 33 +++++++++++++++---- 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/tests/load-tests/ci-scripts/config/horreum-labels.sh b/tests/load-tests/ci-scripts/config/horreum-labels.sh index aa20d23889..0dfd359256 100755 --- a/tests/load-tests/ci-scripts/config/horreum-labels.sh +++ b/tests/load-tests/ci-scripts/config/horreum-labels.sh @@ -17,6 +17,11 @@ set -eu -o pipefail # shovel.py horreum --base-url https://horreum.corp.redhat.com/ --api-token "$HORREUM_API_TOKEN" schema-label-delete --schema-uri "urn:rhtap-perf-team-load-test:1.0" --id 999999 # # But here we are using just one that updates (or adds if label with the name is missing) labels for given extractor JSON path expressions: +# +# I'm using this helper to add new labels for new test phases when they are processed by evaluate.py and stored into load-test-timings.json: +# +# jq -r '. | keys[]' load-test-timings.json | grep -v '^KPI$' | while read m; do echo "horreum_schema_label_present '\$.results.measurements.$m.error_rate'"; echo "horreum_schema_label_present '\$.results.measurements.$m.pass.duration.mean'"; done | LANG=C sort >/tmp/list.sh +# meld /tmp/list.sh ci-scripts/config/horreum-labels.sh function horreum_schema_label_present() { local extractor="$1" @@ -197,18 +202,22 @@ horreum_schema_label_present '$.results.durations.stats.taskruns."test/test-outp horreum_schema_label_present '$.results.durations.stats.taskruns."test/test-output".passed.running.mean' horreum_schema_label_present '$.results.durations.stats.taskruns."test/test-output".passed.scheduled.mean' horreum_schema_label_present '$.results.errors.error_reasons_simple' +horreum_schema_label_present '$.results.measurements.HandleUser.error_rate' +horreum_schema_label_present '$.results.measurements.HandleUser.pass.duration.mean' +horreum_schema_label_present '$.results.measurements.KPI.errors' +horreum_schema_label_present '$.results.measurements.KPI.mean' horreum_schema_label_present '$.results.measurements.createApplication.error_rate' horreum_schema_label_present '$.results.measurements.createApplication.pass.duration.mean' horreum_schema_label_present '$.results.measurements.createComponent.error_rate' horreum_schema_label_present '$.results.measurements.createComponent.pass.duration.mean' -horreum_schema_label_present '$.results.measurements.getPaCPullNumber.error_rate' -horreum_schema_label_present '$.results.measurements.getPaCPullNumber.pass.duration.mean' horreum_schema_label_present '$.results.measurements.createIntegrationTestScenario.error_rate' horreum_schema_label_present '$.results.measurements.createIntegrationTestScenario.pass.duration.mean' -horreum_schema_label_present '$.results.measurements.HandleUser.error_rate' -horreum_schema_label_present '$.results.measurements.HandleUser.pass.duration.mean' -horreum_schema_label_present '$.results.measurements.KPI.errors' -horreum_schema_label_present '$.results.measurements.KPI.mean' +horreum_schema_label_present '$.results.measurements.createReleasePlan.error_rate' +horreum_schema_label_present '$.results.measurements.createReleasePlan.pass.duration.mean' +horreum_schema_label_present '$.results.measurements.createReleasePlanAdmission.error_rate' +horreum_schema_label_present '$.results.measurements.createReleasePlanAdmission.pass.duration.mean' +horreum_schema_label_present '$.results.measurements.getPaCPullNumber.error_rate' +horreum_schema_label_present '$.results.measurements.getPaCPullNumber.pass.duration.mean' horreum_schema_label_present '$.results.measurements.validateApplication.error_rate' horreum_schema_label_present '$.results.measurements.validateApplication.pass.duration.mean' horreum_schema_label_present '$.results.measurements.validateComponentBuildSA.error_rate' @@ -221,6 +230,18 @@ horreum_schema_label_present '$.results.measurements.validatePipelineRunCreation horreum_schema_label_present '$.results.measurements.validatePipelineRunCreation.pass.duration.mean' horreum_schema_label_present '$.results.measurements.validatePipelineRunSignature.error_rate' horreum_schema_label_present '$.results.measurements.validatePipelineRunSignature.pass.duration.mean' +horreum_schema_label_present '$.results.measurements.validateReleaseCondition.error_rate' +horreum_schema_label_present '$.results.measurements.validateReleaseCondition.pass.duration.mean' +horreum_schema_label_present '$.results.measurements.validateReleaseCreation.error_rate' +horreum_schema_label_present '$.results.measurements.validateReleaseCreation.pass.duration.mean' +horreum_schema_label_present '$.results.measurements.validateReleasePipelineRunCondition.error_rate' +horreum_schema_label_present '$.results.measurements.validateReleasePipelineRunCondition.pass.duration.mean' +horreum_schema_label_present '$.results.measurements.validateReleasePipelineRunCreation.error_rate' +horreum_schema_label_present '$.results.measurements.validateReleasePipelineRunCreation.pass.duration.mean' +horreum_schema_label_present '$.results.measurements.validateReleasePlan.error_rate' +horreum_schema_label_present '$.results.measurements.validateReleasePlan.pass.duration.mean' +horreum_schema_label_present '$.results.measurements.validateReleasePlanAdmission.error_rate' +horreum_schema_label_present '$.results.measurements.validateReleasePlanAdmission.pass.duration.mean' horreum_schema_label_present '$.results.measurements.validateSnapshotCreation.error_rate' horreum_schema_label_present '$.results.measurements.validateSnapshotCreation.pass.duration.mean' horreum_schema_label_present '$.results.measurements.validateTestPipelineRunCondition.error_rate' From 33b0221ce5facf7912d977f2c8c2c56c385aceee Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Tue, 24 Jun 2025 06:55:10 +0200 Subject: [PATCH 082/321] feat: New error: Release failed: Release jhutar-app-tnplb-bhjbp-e25f341-ltqhj in namespace jhutar-tenant failed: {Type:Released Status:False ObservedGeneration:0 LastTransitionTime:2025-06-24 04:20:33 +0000 UTC Reason:Failed Message:Release validation failed} --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index f2bf87f9ed..405d90d41f 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -34,6 +34,7 @@ "Failed to link pipeline image pull secret to build service account because SA was not found": r"Failed to configure pipeline imagePullSecrets: Unable to add secret .* to service account .*: serviceaccounts .* not found", "Failed to merge MR on CEE GitLab due to 405": r"Repo-templating workflow component cleanup failed: Merging [0-9]+ failed: [Pp][Uu][Tt] .*https://gitlab.cee.redhat.com/api/.*/merge_requests/[0-9]+/merge.*message: 405 Method Not Allowed", "Failed to merge MR on CEE GitLab due to DNS error": r"Repo-templating workflow component cleanup failed: Merging [0-9]+ failed: [Pp][Uu][Tt] .*https://gitlab.cee.redhat.com/api/.*/merge_requests/[0-9]+/merge.*Temporary failure in name resolution", + "Failed validating release condition": r"Release .* in namespace .* failed: .*Message:Release validation failed.*", "GitLab token used by test expired": r"Repo forking failed: Error deleting project .*: DELETE https://gitlab.cee.redhat.com/.*: 401 .*error: invalid_token.*error_description: Token is expired. You can either do re-authorization or token refresh", "Pipeline failed": r"Message:Tasks Completed: [0-9]+ \(Failed: [1-9]+,", "Post-test data collection failed": r"Failed to collect pipeline run JSONs", From 2bf95f2f427c8ff31ad59fa4470e9eac26155632 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 27 Jun 2025 13:41:31 +0200 Subject: [PATCH 083/321] feat: New error: Snapshot failed creation: context deadline exceeded --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 405d90d41f..99e5480209 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -48,6 +48,7 @@ "Timeout getting task via git resolver from gitlab.cee": r"Message:.*Couldn't retrieve Task .*resolver type git.*https://gitlab.cee.redhat.com/.* resolution took longer than global timeout of .*", "Timeout waiting for build pipeline to be created": r"Build Pipeline Run failed creation: context deadline exceeded", "Timeout waiting for integration test scenario to validate": r"Integration test scenario failed validation: context deadline exceeded", + "Timeout waiting for snapshot to be created": r"Snapshot failed creation: context deadline exceeded", "Timeout waiting for test pipeline to create": r"Test Pipeline Run failed creation: context deadline exceeded", "Timeout waiting for test pipeline to finish": r"Test Pipeline Run failed run: context deadline exceeded", } From 7624b9102ce8428717f5ed3d770504e9310ddab9 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Mon, 30 Jun 2025 07:54:35 +0200 Subject: [PATCH 084/321] feat: Return simple error when required logs are missing --- tests/load-tests/errors.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 99e5480209..d8de4dd3a3 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -199,6 +199,9 @@ def investigate_failed_plr(dump_dir): reasons = list(set(reasons)) # get unique reasons only reasons.sort() # sort reasons return reasons + except FileNotFoundError as e: + print(f"Failed to locate required files: {e}") + return ["SORRY, missing data"] except Exception as e: return ["SORRY " + str(e)] From b360461e735c792c7c9c7a819becd7be51bd573c Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Mon, 30 Jun 2025 08:17:50 +0200 Subject: [PATCH 085/321] feat: When no PLR was found, word the error properly. Also add one more check. --- pkg/clients/release/releases.go | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/pkg/clients/release/releases.go b/pkg/clients/release/releases.go index dd69769d7d..6434749510 100644 --- a/pkg/clients/release/releases.go +++ b/pkg/clients/release/releases.go @@ -166,10 +166,18 @@ func (r *ReleaseController) GetPipelineRunInNamespace(namespace, releaseName, re err := r.KubeRest().List(context.Background(), pipelineRuns, opts...) - if err == nil && len(pipelineRuns.Items) > 0 { + if err == nil && len(pipelineRuns.Items) > 1 { + return &pipelineRuns.Items[0], fmt.Errorf("found multiple PipelineRun in managed namespace '%s' for a release '%s' in '%s' namespace", namespace, releaseName, releaseNamespace) + } + + if err == nil && len(pipelineRuns.Items) == 1 { return &pipelineRuns.Items[0], nil } + if err == nil && len(pipelineRuns.Items) == 0 { + return nil, fmt.Errorf("couldn't find PipelineRun in managed namespace '%s' for a release '%s' in '%s' namespace", namespace, releaseName, releaseNamespace) + } + return nil, fmt.Errorf("couldn't find PipelineRun in managed namespace '%s' for a release '%s' in '%s' namespace because of err:'%w'", namespace, releaseName, releaseNamespace, err) } From dd1d120f99454f99ad760ff58208d787507433f2 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Mon, 30 Jun 2025 11:19:13 +0200 Subject: [PATCH 086/321] fix: Add missing newlines to end of these error prints --- pkg/sandbox/sandbox.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/sandbox/sandbox.go b/pkg/sandbox/sandbox.go index 197ddc9d64..ff08477932 100644 --- a/pkg/sandbox/sandbox.go +++ b/pkg/sandbox/sandbox.go @@ -149,7 +149,7 @@ func (lrt LoggingRoundTripper) RoundTrip(req *http.Request) (res *http.Response, // Handle the result. if e != nil { - GinkgoWriter.Printf("Sandbox proxy error: %v", e) + GinkgoWriter.Printf("Sandbox proxy error: %v\n", e) } return res, e } @@ -298,7 +298,7 @@ func (s *SandboxController) UpdateUserSignup(userSignupName string, modifyUserSi modifyUserSignup(freshUserSignup) if err := s.KubeRest.Update(context.Background(), freshUserSignup); err != nil { - GinkgoWriter.Printf("error updating UserSignup '%s': %s. Will retry again...", userSignupName, err.Error()) + GinkgoWriter.Printf("error updating UserSignup '%s': %s. Will retry again...\n", userSignupName, err.Error()) return false, nil } userSignup = freshUserSignup From 446ec7c2788e22d5f647032f5048d49f3103c65a Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Tue, 1 Jul 2025 08:38:31 +0200 Subject: [PATCH 087/321] fix: If the release pipeline run was not found, keep waiting --- tests/load-tests/pkg/journey/handle_releases_run.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/load-tests/pkg/journey/handle_releases_run.go b/tests/load-tests/pkg/journey/handle_releases_run.go index a3d75d8b49..a5f98bbd72 100644 --- a/tests/load-tests/pkg/journey/handle_releases_run.go +++ b/tests/load-tests/pkg/journey/handle_releases_run.go @@ -46,7 +46,7 @@ func validateReleasePipelineRunCreation(f *framework.Framework, namespace, relea _, err = f.AsKubeDeveloper.ReleaseController.GetPipelineRunInNamespace(namespace, releaseName, namespace) if err != nil { fmt.Printf("Pipelinerun for release %s in namespace %s not created yet: %v\n", releaseName, namespace, err) - return true, nil + return false, nil } return true, nil @@ -67,7 +67,7 @@ func validateReleasePipelineRunCondition(f *framework.Framework, namespace, rele pipelineRun, err := f.AsKubeDeveloper.ReleaseController.GetPipelineRunInNamespace(namespace, releaseName, namespace) if err != nil { fmt.Printf("PipelineRun for release %s in namespace %s not created yet: %v\n", releaseName, namespace, err) - return true, nil + return false, nil } // Check if there are some conditions From 03df0d0e4c30cf3921475edb3dc3581886a53123 Mon Sep 17 00:00:00 2001 From: Larry Rios Date: Tue, 1 Jul 2025 08:51:55 +0200 Subject: [PATCH 088/321] feat: New error for Unable to list PipelineRuns --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index d8de4dd3a3..dae6e87670 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -46,6 +46,7 @@ "Timeout getting build service account": r"Component build SA failed creation: Component build SA .* not created: context deadline exceeded", "Timeout getting pipeline": r"Message:.*resolver failed to get Pipeline.*resolution took longer than global timeout of .*", "Timeout getting task via git resolver from gitlab.cee": r"Message:.*Couldn't retrieve Task .*resolver type git.*https://gitlab.cee.redhat.com/.* resolution took longer than global timeout of .*", + "Timeout listing pipeline runs": r"Repo-templating workflow component cleanup failed: Error deleting on-pull-request default PipelineRun in namespace .*: Unable to list PipelineRuns for component .* in namespace .*: context deadline exceeded", "Timeout waiting for build pipeline to be created": r"Build Pipeline Run failed creation: context deadline exceeded", "Timeout waiting for integration test scenario to validate": r"Integration test scenario failed validation: context deadline exceeded", "Timeout waiting for snapshot to be created": r"Snapshot failed creation: context deadline exceeded", From b1c31f4962fcdc2da841bae8dbc57f3a9167c285 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Tue, 1 Jul 2025 12:26:27 +0200 Subject: [PATCH 089/321] feat: New error (and sort): ERROR Command returned error: Failed to download metadata (baseurl: "https://kojipkgs.fedoraproject.org/repos/f43-build/6585390/x86_64") for repository "build": Usable URL not found --- tests/load-tests/errors.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index dae6e87670..114ddce353 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -56,10 +56,11 @@ FAILED_PLR_ERRORS = { "SKIP": r"Skipping step because a previous step failed", + "DNF failed to download repodata from Koji": r"ERROR Command returned error: Failed to download metadata (baseurl: \"https://kojipkgs.fedoraproject.org/repos/[^ ]*\") for repository \"build\": Usable URL not found", "Error allocating host as provision TR already exists": r"Error allocating host: taskruns.tekton.dev \".*provision\" already exists", - "RPM build failed: bool cannot be defined via typedef": r"error: .bool. cannot be defined via .typedef..*error: Bad exit status from /var/tmp/rpm-tmp..* ..build.", - "Failed because registry.access.redhat.com returned 503 when reading manifest": r"source-build:ERROR:command execution failure, status: 1, stderr: time=.* level=fatal msg=.Error parsing image name .* reading manifest .* in registry.access.redhat.com/.* received unexpected HTTP status: 503 Service Unavailable", "Failed because of quay.io returned 502": r"level=fatal msg=.Error parsing image name .*docker://quay.io/.* Requesting bearer token: invalid status code from registry 502 .Bad Gateway.", + "Failed because registry.access.redhat.com returned 503 when reading manifest": r"source-build:ERROR:command execution failure, status: 1, stderr: time=.* level=fatal msg=.Error parsing image name .* reading manifest .* in registry.access.redhat.com/.* received unexpected HTTP status: 503 Service Unavailable", + "RPM build failed: bool cannot be defined via typedef": r"error: .bool. cannot be defined via .typedef..*error: Bad exit status from /var/tmp/rpm-tmp..* ..build.", } From f91d56a68944b842865f4f4654bcde8ac83d3722 Mon Sep 17 00:00:00 2001 From: Larry Rios Date: Fri, 27 Jun 2025 15:06:00 -0400 Subject: [PATCH 090/321] feat: New error: Adding Error deleting on-pull-request default PipelineRun --- tests/load-tests/errors.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 114ddce353..8cc5274d7d 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -18,7 +18,7 @@ # Errors patterns we recognize (when newlines were removed) ERRORS = { "Application creation timed out waiting for quota evaluation": r"Application failed creation: Unable to create the Application .*: Internal error occurred: resource quota evaluation timed out", - "Build Pipeline Run was cancelled" : r"Build Pipeline Run failed run: PipelineRun for component [\w-]+ in namespace [\w-]+ failed: .* Reason:Cancelled .* Message:PipelineRun .* was cancelled", + "Build Pipeline Run was cancelled" : r"Build Pipeline Run failed run: PipelineRun for component .* in namespace .* failed: .*", "Component creation timed out waiting for image-controller annotations": r"Component failed creation: Unable to create the Component .* timed out when waiting for image-controller annotations to be updated on component", "Couldnt get pipeline via bundles resolver from quay.io due to 429": r"Message:Error retrieving pipeline for pipelinerun .*bundleresolver.* cannot retrieve the oci image: GET https://quay.io/v2/.*unexpected status code 429 Too Many Requests", "Couldnt get pipeline via git resolver from gitlab.cee due to 429": r"Message:.*resolver failed to get Pipeline.*error requesting remote resource.*Git.*https://gitlab.cee.redhat.com/.* status code: 429", @@ -26,6 +26,7 @@ "Couldnt get task via buldles resolver from quay.io due to 429": r"Message:.*Couldn't retrieve Task .*resolver type bundles.*https://quay.io/.* status code 429 Too Many Requests", "Couldnt get task via git resolver from gitlab.cee due to 429": r"Message:.*Couldn't retrieve Task .*resolver type git.*https://gitlab.cee.redhat.com/.* status code: 429", "Couldnt get task via http resolver from gitlab.cee": r"Message:.*Couldn't retrieve Task .*resolver type http.*error getting.*requested URL .*https://gitlab.cee.redhat.com/.* is not found", + "Error deleting on-pull-request default PipelineRun": r"Repo-templating workflow component cleanup failed: Error deleting on-pull-request default PipelineRun in namespace .*: Unable to list PipelineRuns for component .* in namespace .*: context deadline exceeded", "Failed Integration test scenario when calling dintegrationtestscenario.kb.io webhook": r"Integration test scenario failed creation: Unable to create the Integration Test Scenario .*: Internal error occurred: failed calling webhook .*dintegrationtestscenario.kb.io.*: failed to call webhook: Post .*https://integration-service-webhook-service.integration-service.svc:443/mutate-appstudio-redhat-com-v1beta2-integrationtestscenario.*: no endpoints available for service .*integration-service-webhook-service", "Failed application creation when calling mapplication.kb.io webhook": r"Application failed creation: Unable to create the Application .*: Internal error occurred: failed calling webhook .*mapplication.kb.io.*: failed to call webhook: Post .*https://application-service-webhook-service.application-service.svc:443/mutate-appstudio-redhat-com-v1alpha1-application.* no endpoints available for service .*application-service-webhook-service", "Failed component creation because resource quota evaluation timed out": r"Component failed creation: Unable to create the Component .*: Internal error occurred: resource quota evaluation timed out", @@ -38,6 +39,7 @@ "GitLab token used by test expired": r"Repo forking failed: Error deleting project .*: DELETE https://gitlab.cee.redhat.com/.*: 401 .*error: invalid_token.*error_description: Token is expired. You can either do re-authorization or token refresh", "Pipeline failed": r"Message:Tasks Completed: [0-9]+ \(Failed: [1-9]+,", "Post-test data collection failed": r"Failed to collect pipeline run JSONs", + "Release failure: PipelineRun not created": r"couldn't find PipelineRun in managed namespace '%s' for a release '%s' in '%s' namespace", "Repo forking failed as GitLab CEE says 401 Unauthorized": r"Repo forking failed: Error deleting project .*: DELETE https://gitlab.cee.redhat.com/.*: 401 .*message: 401 Unauthorized.*", "Repo forking failed as the target is still being deleted": r"Repo forking failed: Error forking project .* POST https://gitlab.cee.redhat.com.* 409 .*Project namespace name has already been taken, The project is still being deleted", "Repo forking failed because gitlab.com returned 503": r"Repo forking failed: Error checking repository .*: GET https://api.github.com/repos/.*: 503 No server is currently available to service your request. Sorry about that. Please try resubmitting your request and contact us if the problem persists.*", From ee7c69ec14d05ad9365d3441823946f03b9f5070 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 2 Jul 2025 07:23:40 +0200 Subject: [PATCH 091/321] feat: New error: tar: /home/u-.../chroot_scan.tar.gz: Cannot open: No such file or directory --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 8cc5274d7d..f50732d94b 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -58,6 +58,7 @@ FAILED_PLR_ERRORS = { "SKIP": r"Skipping step because a previous step failed", + "Can not find chroot_scan.tar.gz file": r"tar: .*/chroot_scan.tar.gz: Cannot open: No such file or directory", "DNF failed to download repodata from Koji": r"ERROR Command returned error: Failed to download metadata (baseurl: \"https://kojipkgs.fedoraproject.org/repos/[^ ]*\") for repository \"build\": Usable URL not found", "Error allocating host as provision TR already exists": r"Error allocating host: taskruns.tekton.dev \".*provision\" already exists", "Failed because of quay.io returned 502": r"level=fatal msg=.Error parsing image name .*docker://quay.io/.* Requesting bearer token: invalid status code from registry 502 .Bad Gateway.", From dff18550013634cf3a6c347eba9c6b904ce1650c Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 2 Jul 2025 07:25:05 +0200 Subject: [PATCH 092/321] feat: Extend the message back --- tests/load-tests/errors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index f50732d94b..7b9d6ae300 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -18,7 +18,7 @@ # Errors patterns we recognize (when newlines were removed) ERRORS = { "Application creation timed out waiting for quota evaluation": r"Application failed creation: Unable to create the Application .*: Internal error occurred: resource quota evaluation timed out", - "Build Pipeline Run was cancelled" : r"Build Pipeline Run failed run: PipelineRun for component .* in namespace .* failed: .*", + "Build Pipeline Run was cancelled" : r"Build Pipeline Run failed run: PipelineRun for component .* in namespace .* failed: .* Reason:Cancelled .* Message:PipelineRun .* was cancelled", "Component creation timed out waiting for image-controller annotations": r"Component failed creation: Unable to create the Component .* timed out when waiting for image-controller annotations to be updated on component", "Couldnt get pipeline via bundles resolver from quay.io due to 429": r"Message:Error retrieving pipeline for pipelinerun .*bundleresolver.* cannot retrieve the oci image: GET https://quay.io/v2/.*unexpected status code 429 Too Many Requests", "Couldnt get pipeline via git resolver from gitlab.cee due to 429": r"Message:.*resolver failed to get Pipeline.*error requesting remote resource.*Git.*https://gitlab.cee.redhat.com/.* status code: 429", From 47ec9c20e230f90cd091c0e7ba92ea8ce43d89c3 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 2 Jul 2025 07:27:38 +0200 Subject: [PATCH 093/321] feat: New error: Application failed creation: Unable to create the Application jhutar-app-wpyjx: failed to get API group resources: unable to retrieve the complete list of server APIs: appstudio.redhat.com/v1alpha1: Get https://api.stone-prod-p02.hjvn.p1.openshiftapps.com:6443/apis/appstudio.redhat.com/v1alpha1: net/http: TLS handshake timeout --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 7b9d6ae300..549b1302cc 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -17,6 +17,7 @@ # Errors patterns we recognize (when newlines were removed) ERRORS = { + "Application creation failed because of TLS handshake timeout": r"Application failed creation: Unable to create the Application .*: failed to get API group resources: unable to retrieve the complete list of server APIs: appstudio.redhat.com/v1alpha1: Get .*: net/http: TLS handshake timeout", "Application creation timed out waiting for quota evaluation": r"Application failed creation: Unable to create the Application .*: Internal error occurred: resource quota evaluation timed out", "Build Pipeline Run was cancelled" : r"Build Pipeline Run failed run: PipelineRun for component .* in namespace .* failed: .* Reason:Cancelled .* Message:PipelineRun .* was cancelled", "Component creation timed out waiting for image-controller annotations": r"Component failed creation: Unable to create the Component .* timed out when waiting for image-controller annotations to be updated on component", From bc837cf50fd374fed327881fdec1ebac0e24e076 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Thu, 3 Jul 2025 10:45:29 +0200 Subject: [PATCH 094/321] feat: New error: Release failed: Release jhutar-app-okeck-krc9c-4f2976e-dgznx in namespace jhutar-tenant failed: {Type:Released Status:False ObservedGeneration:0 LastTransitionTime:2025-07-02 06:44:05 +0000 UTC Reason:Progressing Message:} --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 549b1302cc..57df264888 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -40,6 +40,7 @@ "GitLab token used by test expired": r"Repo forking failed: Error deleting project .*: DELETE https://gitlab.cee.redhat.com/.*: 401 .*error: invalid_token.*error_description: Token is expired. You can either do re-authorization or token refresh", "Pipeline failed": r"Message:Tasks Completed: [0-9]+ \(Failed: [1-9]+,", "Post-test data collection failed": r"Failed to collect pipeline run JSONs", + "Release failed in progress without error given": r"Release failed: Release .* in namespace .* failed: .Type:Released Status:False .* Reason:Progressing Message:.$", "Release failure: PipelineRun not created": r"couldn't find PipelineRun in managed namespace '%s' for a release '%s' in '%s' namespace", "Repo forking failed as GitLab CEE says 401 Unauthorized": r"Repo forking failed: Error deleting project .*: DELETE https://gitlab.cee.redhat.com/.*: 401 .*message: 401 Unauthorized.*", "Repo forking failed as the target is still being deleted": r"Repo forking failed: Error forking project .* POST https://gitlab.cee.redhat.com.* 409 .*Project namespace name has already been taken, The project is still being deleted", From fd4257708b823085cd84bff8a35ffd207ffaafe4 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Thu, 3 Jul 2025 12:11:54 +0200 Subject: [PATCH 095/321] docs: Add details about 'Timeout listing pipeline runs' error --- tests/load-tests/errors.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 57df264888..2cb6ffe89b 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -50,6 +50,21 @@ "Timeout getting build service account": r"Component build SA failed creation: Component build SA .* not created: context deadline exceeded", "Timeout getting pipeline": r"Message:.*resolver failed to get Pipeline.*resolution took longer than global timeout of .*", "Timeout getting task via git resolver from gitlab.cee": r"Message:.*Couldn't retrieve Task .*resolver type git.*https://gitlab.cee.redhat.com/.* resolution took longer than global timeout of .*", + # Last time I seen this we discussed it here: + # + # https://redhat-internal.slack.com/archives/C04PZ7H0VA8/p1751530663606749 + # + # And it manifested itself by check on initial PR failing with: + # + # the namespace of the provided object does not match the namespace sent on the request + # + # And folks noticed this in the PaC controller logs: + # + # There was an error starting the PipelineRun test-rhtap-1-app-ryliu-comp-0-on-pull-request-, creating pipelinerun + # test-rhtap-1-app-ryliu-comp-0-on-pull-request- in namespace test-rhtap-1-tenant has failed. Tekton Controller has + # reported this error: ```Internal error occurred: failed calling webhook "vpipelineruns.konflux-ci.dev": failed + # to call webhook: Post "https://etcd-shield.etcd-shield.svc:443/validate-tekton-dev-v1-pipelinerun?timeout=10s": + # context deadline exceeded``` "Timeout listing pipeline runs": r"Repo-templating workflow component cleanup failed: Error deleting on-pull-request default PipelineRun in namespace .*: Unable to list PipelineRuns for component .* in namespace .*: context deadline exceeded", "Timeout waiting for build pipeline to be created": r"Build Pipeline Run failed creation: context deadline exceeded", "Timeout waiting for integration test scenario to validate": r"Integration test scenario failed validation: context deadline exceeded", From 77b385efe69c6d25d88fa4064885fcfd6a4ceebc Mon Sep 17 00:00:00 2001 From: Larry Rios Date: Thu, 3 Jul 2025 12:46:39 +0200 Subject: [PATCH 096/321] feat: New error: buildah build failed creating build container: registry.access.redhat.com returned 403 Error with some context: [2025-07-01T19:20:02,003805722+00:00] buildah build --volume /tmp/entitlement:/etc/pki/entitlement --security-opt=unmask=/proc/interrupts --label build-date=2025-07-01T19:20:01 --label architecture=x86_64 --label vcs-type=git --label vcs-ref=7ec92bec984d038dfc2d1fb6241044431f113cc8 --label quay.expires-after=5d --tls-verify=true --no-cache --ulimit nofile=4096:4096 -f /tmp/Dockerfile.iS78Jt -t quay.io/redhat-user-workloads/jhutar-1-tenant/jhutar-1-app-ghefc-comp-0:on-pr-7ec92bec984d038dfc2d1fb6241044431f113cc8 . [1/2] STEP 1/3: FROM registry.access.redhat.com/ubi8/nodejs-18:latest Trying to pull registry.access.redhat.com/ubi8/nodejs-18:latest... Error: creating build container: internal error: unable to copy from source docker://registry.access.redhat.com/ubi8/nodejs-18:latest: copying system image from manifest list: determining manifest MIME type for docker://registry.access.redhat.com/ubi8/nodejs-18:latest: reading manifest sha256:3a895f2b85ffeda82b2d50ce1ae554bc5bc62448aba48b3fd56ce94b694b3b2a in registry.access.redhat.com/ubi8/nodejs-18: StatusCode: 403, "\r\n403 Forbidden\r..." Link: https://workdir-exporter-jenkins-csb-perf.apps.int.gpc.ocp-hub.prod.psi.redhat.com/workspace/StoneSoupLoadTestProbe_stone_prd_rh01/e2e-tests/tests/load-tests/OLD/run-stone-prd-rh01-2025_07_01T19_17_10_668176918_00_00/collected-data/jhutar-1-tenant/1/pod-jhutar-1-app-ghefc-comp-0-oe35b8003686447efeb763c670815dd4c-pod-step-build.log --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 2cb6ffe89b..8213569efe 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -75,6 +75,7 @@ FAILED_PLR_ERRORS = { "SKIP": r"Skipping step because a previous step failed", + "buildah build failed creating build container: registry.access.redhat.com returned 403": r"Error: creating build container: internal error: unable to copy from source docker://registry.access.redhat.com/.*: copying system image from manifest list: determining manifest MIME type for docker://registry.access.redhat.com/.*: reading manifest .* in registry.access.redhat.com/.*: StatusCode: 403", "Can not find chroot_scan.tar.gz file": r"tar: .*/chroot_scan.tar.gz: Cannot open: No such file or directory", "DNF failed to download repodata from Koji": r"ERROR Command returned error: Failed to download metadata (baseurl: \"https://kojipkgs.fedoraproject.org/repos/[^ ]*\") for repository \"build\": Usable URL not found", "Error allocating host as provision TR already exists": r"Error allocating host: taskruns.tekton.dev \".*provision\" already exists", From 3251138a3cbb428dd524c0066993663e6e79266c Mon Sep 17 00:00:00 2001 From: Larry Rios Date: Thu, 3 Jul 2025 12:51:49 +0200 Subject: [PATCH 097/321] feat: New error: Introspection failed because of incomplete .docker/config.json Error: Artifact type will be determined by introspection. Checking the media type of the OCI artifact... The media type of the OCI artifact is application/vnd.oci.image.manifest.v1+json. Looking for image labels that indicate this might be an operator bundle... time="2025-07-02T00:58:14Z" level=fatal msg="Error parsing image name \"docker://quay.io/redhat-user-workloads/jhutar-tenant/jhutar-app-youop-comp-0:on-pr-267c18212324a4f9f4b2fd8f30225e4b6ba1e4c0\": getting username and password: reading JSON file \"/tekton/home/.docker/config.json\": unmarshaling JSON at \"/tekton/home/.docker/config.json\": unexpected end of JSON input" Link: https://workdir-exporter-jenkins-csb-perf.apps.int.gpc.ocp-hub.prod.psi.redhat.com/workspace/StoneSoupLoadTestProbe_stone_prod_p02/e2e-tests/tests/load-tests/OLD/run-stone-prod-p02-2025_07_02T00_55_12_206383611_00_00/collected-data/jhutar-tenant/1/pod-jhutar-app-youop-comp-0-on-d9711596435debc219a10437940162fb-pod-step-introspect.log --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 8213569efe..16c4986e25 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -81,6 +81,7 @@ "Error allocating host as provision TR already exists": r"Error allocating host: taskruns.tekton.dev \".*provision\" already exists", "Failed because of quay.io returned 502": r"level=fatal msg=.Error parsing image name .*docker://quay.io/.* Requesting bearer token: invalid status code from registry 502 .Bad Gateway.", "Failed because registry.access.redhat.com returned 503 when reading manifest": r"source-build:ERROR:command execution failure, status: 1, stderr: time=.* level=fatal msg=.Error parsing image name .* reading manifest .* in registry.access.redhat.com/.* received unexpected HTTP status: 503 Service Unavailable", + "Introspection failed because of incomplete .docker/config.json": r".* level=fatal msg=\"Error parsing image name .*: getting username and password: reading JSON file .*/tekton/home/.docker/config.json.*: unmarshaling JSON at .*: unexpected end of JSON input\"", "RPM build failed: bool cannot be defined via typedef": r"error: .bool. cannot be defined via .typedef..*error: Bad exit status from /var/tmp/rpm-tmp..* ..build.", } From 840f78e76f2877d21a58200c15d3430184a22960 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 4 Jul 2025 13:10:35 +0200 Subject: [PATCH 098/321] docs: Add a note about how significant this ently is. We should have a constant for this. --- tests/load-tests/errors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 16c4986e25..ffd144eb97 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -74,7 +74,7 @@ } FAILED_PLR_ERRORS = { - "SKIP": r"Skipping step because a previous step failed", + "SKIP": r"Skipping step because a previous step failed", # This is a special "wildcard" error, let's keep it on top and do not change "SKIP" reason as it is used in the code "buildah build failed creating build container: registry.access.redhat.com returned 403": r"Error: creating build container: internal error: unable to copy from source docker://registry.access.redhat.com/.*: copying system image from manifest list: determining manifest MIME type for docker://registry.access.redhat.com/.*: reading manifest .* in registry.access.redhat.com/.*: StatusCode: 403", "Can not find chroot_scan.tar.gz file": r"tar: .*/chroot_scan.tar.gz: Cannot open: No such file or directory", "DNF failed to download repodata from Koji": r"ERROR Command returned error: Failed to download metadata (baseurl: \"https://kojipkgs.fedoraproject.org/repos/[^ ]*\") for repository \"build\": Usable URL not found", From d50b338bdbc783b53fec82cca6647b6752e57ca1 Mon Sep 17 00:00:00 2001 From: Larry Rios Date: Fri, 4 Jul 2025 13:12:30 +0200 Subject: [PATCH 099/321] feat: New error: Error allocating host because of provisioning error Error: + mkdir -p /root/.ssh + '[' -e /ssh/error ']' + cat /ssh/error Error allocating host: failed to provision host Context info: Platform: linux/amd64 File: /opt/app-root/src/pkg/reconciler/taskrun/taskrun.go Line: 464 + exit 1 Link: https://workdir-exporter-jenkins-csb-perf.apps.int.gpc.ocp-hub.prod.psi.redhat.com/workspace/StoneSoupLoadTestProbe_stone_prod_p02_RPM/e2e-tests/tests/load-tests/OLD/run-stone-prod-p02-2025_07_03T16_35_49_006538124_00_00/collected-data/jhutar-tenant/1/pod-jhutar-app-lpgau-comp-0-on-push-t5r9s-calculate-deps-x86-64-pod-step-mock-build.log --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index ffd144eb97..9d365ba26e 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -79,6 +79,7 @@ "Can not find chroot_scan.tar.gz file": r"tar: .*/chroot_scan.tar.gz: Cannot open: No such file or directory", "DNF failed to download repodata from Koji": r"ERROR Command returned error: Failed to download metadata (baseurl: \"https://kojipkgs.fedoraproject.org/repos/[^ ]*\") for repository \"build\": Usable URL not found", "Error allocating host as provision TR already exists": r"Error allocating host: taskruns.tekton.dev \".*provision\" already exists", + "Error allocating host because of provisioning error": r"Error allocating host: failed to provision host", "Failed because of quay.io returned 502": r"level=fatal msg=.Error parsing image name .*docker://quay.io/.* Requesting bearer token: invalid status code from registry 502 .Bad Gateway.", "Failed because registry.access.redhat.com returned 503 when reading manifest": r"source-build:ERROR:command execution failure, status: 1, stderr: time=.* level=fatal msg=.Error parsing image name .* reading manifest .* in registry.access.redhat.com/.* received unexpected HTTP status: 503 Service Unavailable", "Introspection failed because of incomplete .docker/config.json": r".* level=fatal msg=\"Error parsing image name .*: getting username and password: reading JSON file .*/tekton/home/.docker/config.json.*: unmarshaling JSON at .*: unexpected end of JSON input\"", From 20e24526be0046fbddaec73788d0d8378eca841f Mon Sep 17 00:00:00 2001 From: Larry Rios Date: Fri, 4 Jul 2025 13:16:14 +0200 Subject: [PATCH 100/321] feat: New error: Can not find Dockerfile Error: [2025-07-03T11:43:36,850878165+00:00] Update CA trust INFO: Using mounted CA bundle: /mnt/trusted-ca/ca-bundle.crt '/mnt/trusted-ca/ca-bundle.crt' -> '/etc/pki/ca-trust/source/anchors/ca-bundle.crt' [2025-07-03T11:43:37,651927712+00:00] Prepare Dockerfile Cannot find Dockerfile Dockerfile Link: https://workdir-exporter-jenkins-csb-perf.apps.int.gpc.ocp-hub.prod.psi.redhat.com/workspace/StoneSoupLoadTestProbe_kflux_rhel_p01_RPM/e2e-tests/tests/load-tests/OLD/run-kflux-rhel-p01-2025_07_03T11_39_39_192503922_00_00/collected-data/jhutar-tenant/1/pod-jhutar-app-svayh-comp-0-on-push-2wvnb-build-container-pod-step-build.log This is a problem on a load test side I guess. Looks like we fail to switch pipeline run to RPM build one and container build pipeline is trying to build the repo same way as it would be a container. --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 9d365ba26e..aa2cff3a4f 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -77,6 +77,7 @@ "SKIP": r"Skipping step because a previous step failed", # This is a special "wildcard" error, let's keep it on top and do not change "SKIP" reason as it is used in the code "buildah build failed creating build container: registry.access.redhat.com returned 403": r"Error: creating build container: internal error: unable to copy from source docker://registry.access.redhat.com/.*: copying system image from manifest list: determining manifest MIME type for docker://registry.access.redhat.com/.*: reading manifest .* in registry.access.redhat.com/.*: StatusCode: 403", "Can not find chroot_scan.tar.gz file": r"tar: .*/chroot_scan.tar.gz: Cannot open: No such file or directory", + "Can not find Dockerfile": r"Cannot find Dockerfile Dockerfile", "DNF failed to download repodata from Koji": r"ERROR Command returned error: Failed to download metadata (baseurl: \"https://kojipkgs.fedoraproject.org/repos/[^ ]*\") for repository \"build\": Usable URL not found", "Error allocating host as provision TR already exists": r"Error allocating host: taskruns.tekton.dev \".*provision\" already exists", "Error allocating host because of provisioning error": r"Error allocating host: failed to provision host", From e4fd8d7629df34290553d797de5fc76abea6bf94 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 23 Jul 2025 09:14:12 +0200 Subject: [PATCH 101/321] feat: New error: Error allocating host because of insufficient free addresses in subnet Error was in collected-data/jhutar-tenant/1/pod-jhutar-app-wvdca-comp-0-on-push-kwln8-calculate-deps-x86-64-pod-step-mock-build.log: + mkdir -p /root/.ssh + '[' -e /ssh/error ']' + cat /ssh/error Error allocating host: failed to launch EC2 instance for jhutar-app-wvdca-comp-0-on-push-kwln8-calculate-deps-x86-64: operation error EC2: RunInstances, https response error StatusCode: 400, RequestID: 19286adc-c47b-411d-b896-2d77368509dd, api error InsufficientFreeAddressesInSubnet: There are not enough free addresses in subnet 'subnet-0aa719a6c5b602b16' to satisfy the requested number of instances. Context info: Platform: linux/amd64 File: /opt/app-root/src/pkg/reconciler/taskrun/taskrun.go Line: 458 + exit 1 --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index aa2cff3a4f..306094b575 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -80,6 +80,7 @@ "Can not find Dockerfile": r"Cannot find Dockerfile Dockerfile", "DNF failed to download repodata from Koji": r"ERROR Command returned error: Failed to download metadata (baseurl: \"https://kojipkgs.fedoraproject.org/repos/[^ ]*\") for repository \"build\": Usable URL not found", "Error allocating host as provision TR already exists": r"Error allocating host: taskruns.tekton.dev \".*provision\" already exists", + "Error allocating host because of insufficient free addresses in subnet": r"Error allocating host: failed to launch EC2 instance for .* operation error EC2: RunInstances, https response error StatusCode: 400, RequestID: .*, api error InsufficientFreeAddressesInSubnet: There are not enough free addresses in subnet .* to satisfy the requested number of instances.", "Error allocating host because of provisioning error": r"Error allocating host: failed to provision host", "Failed because of quay.io returned 502": r"level=fatal msg=.Error parsing image name .*docker://quay.io/.* Requesting bearer token: invalid status code from registry 502 .Bad Gateway.", "Failed because registry.access.redhat.com returned 503 when reading manifest": r"source-build:ERROR:command execution failure, status: 1, stderr: time=.* level=fatal msg=.Error parsing image name .* reading manifest .* in registry.access.redhat.com/.* received unexpected HTTP status: 503 Service Unavailable", From 5498bf2a3748141ecc4d546e92797307452e4db1 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 23 Jul 2025 09:19:16 +0200 Subject: [PATCH 102/321] feat: New error: Gateway Time-out when pulling container image Error in collected-data/jhutar-tenant/1/pod-jhutar-app-iiezj-comp-0-on-push-kbdxj-calculate-deps-x86-64-pod-step-mock-build.log: [...] + mock_img=quay.io/redhat-user-workloads/rhel-on-konflux-tenant/tooling/mock-rhel-container:6ca435b53f4505aa3dc1f031f7912de1f32035f0 + remote_cmd podman pull quay.io/redhat-user-workloads/rhel-on-konflux-tenant/tooling/mock-rhel-container:6ca435b53f4505aa3dc1f031f7912de1f32035f0 + ssh -o StrictHostKeyChecking=no u-57228f5adfe05adda27be169735e@10.29.77.10 podman pull quay.io/redhat-user-workloads/rhel-on-konflux-tenant/tooling/mock-rhel-container:6ca435b53f4505aa3dc1f031f7912de1f32035f0 Trying to pull quay.io/redhat-user-workloads/rhel-on-konflux-tenant/tooling/mock-rhel-container:6ca435b53f4505aa3dc1f031f7912de1f32035f0... Error: copying system image from manifest list: parsing image configuration: fetching blob: received unexpected HTTP status: 504 Gateway Time-out --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 306094b575..14c203d0af 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -84,6 +84,7 @@ "Error allocating host because of provisioning error": r"Error allocating host: failed to provision host", "Failed because of quay.io returned 502": r"level=fatal msg=.Error parsing image name .*docker://quay.io/.* Requesting bearer token: invalid status code from registry 502 .Bad Gateway.", "Failed because registry.access.redhat.com returned 503 when reading manifest": r"source-build:ERROR:command execution failure, status: 1, stderr: time=.* level=fatal msg=.Error parsing image name .* reading manifest .* in registry.access.redhat.com/.* received unexpected HTTP status: 503 Service Unavailable", + "Gateway Time-out when pulling container image": r"Error: copying system image from manifest list: parsing image configuration: fetching blob: received unexpected HTTP status: 504 Gateway Time-out", "Introspection failed because of incomplete .docker/config.json": r".* level=fatal msg=\"Error parsing image name .*: getting username and password: reading JSON file .*/tekton/home/.docker/config.json.*: unmarshaling JSON at .*: unexpected end of JSON input\"", "RPM build failed: bool cannot be defined via typedef": r"error: .bool. cannot be defined via .typedef..*error: Bad exit status from /var/tmp/rpm-tmp..* ..build.", } From 152ca39877181e8a8f59a3b5b89ffe7af674309d Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 23 Jul 2025 09:28:11 +0200 Subject: [PATCH 103/321] feat: New error: Failed creating integration test scenario because cannot set blockOwnerDeletion if an ownerReference refers to a resource you can't set finalizers on Error in load-test-run.log: I0710 03:39:44.715984 4108 logging.go:64] FAIL(40): Integration test scenario failed creation: Unable to create the Integration Test Scenario jhutar-its-yupxo: integrationtestscenarios.appstudio.redhat.com "jhutar-its-yupxo" is forbidden: cannot set blockOwnerDeletion if an ownerReference refers to a resource you can't set finalizers on: , --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 14c203d0af..580e891dcf 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -33,6 +33,7 @@ "Failed component creation because resource quota evaluation timed out": r"Component failed creation: Unable to create the Component .*: Internal error occurred: resource quota evaluation timed out", "Failed component creation when calling mcomponent.kb.io webhook": r"Component failed creation: Unable to create the Component .*: Internal error occurred: failed calling webhook .*mcomponent.kb.io.*: failed to call webhook: Post .*https://application-service-webhook-service.application-service.svc:443/mutate-appstudio-redhat-com-v1alpha1-component.* no endpoints available for service .*application-service-webhook-service.*", "Failed creating integration test scenario because it already exists": r"Integration test scenario failed creation: Unable to create the Integration Test Scenario .* integrationtestscenarios.appstudio.redhat.com .* already exists", + "Failed creating integration test scenario because cannot set blockOwnerDeletion if an ownerReference refers to a resource you can't set finalizers on": r"Integration test scenario failed creation: Unable to create the Integration Test Scenario .* integrationtestscenarios.appstudio.redhat.com .* is forbidden: cannot set blockOwnerDeletion if an ownerReference refers to a resource you can't set finalizers on", "Failed to link pipeline image pull secret to build service account because SA was not found": r"Failed to configure pipeline imagePullSecrets: Unable to add secret .* to service account .*: serviceaccounts .* not found", "Failed to merge MR on CEE GitLab due to 405": r"Repo-templating workflow component cleanup failed: Merging [0-9]+ failed: [Pp][Uu][Tt] .*https://gitlab.cee.redhat.com/api/.*/merge_requests/[0-9]+/merge.*message: 405 Method Not Allowed", "Failed to merge MR on CEE GitLab due to DNS error": r"Repo-templating workflow component cleanup failed: Merging [0-9]+ failed: [Pp][Uu][Tt] .*https://gitlab.cee.redhat.com/api/.*/merge_requests/[0-9]+/merge.*Temporary failure in name resolution", From 4c786fadfb0f2ac8611f1991a2910c3303a86350 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 23 Jul 2025 09:29:42 +0200 Subject: [PATCH 104/321] cleanup: Sort --- tests/load-tests/errors.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 580e891dcf..f08e5ffe88 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -28,12 +28,12 @@ "Couldnt get task via git resolver from gitlab.cee due to 429": r"Message:.*Couldn't retrieve Task .*resolver type git.*https://gitlab.cee.redhat.com/.* status code: 429", "Couldnt get task via http resolver from gitlab.cee": r"Message:.*Couldn't retrieve Task .*resolver type http.*error getting.*requested URL .*https://gitlab.cee.redhat.com/.* is not found", "Error deleting on-pull-request default PipelineRun": r"Repo-templating workflow component cleanup failed: Error deleting on-pull-request default PipelineRun in namespace .*: Unable to list PipelineRuns for component .* in namespace .*: context deadline exceeded", - "Failed Integration test scenario when calling dintegrationtestscenario.kb.io webhook": r"Integration test scenario failed creation: Unable to create the Integration Test Scenario .*: Internal error occurred: failed calling webhook .*dintegrationtestscenario.kb.io.*: failed to call webhook: Post .*https://integration-service-webhook-service.integration-service.svc:443/mutate-appstudio-redhat-com-v1beta2-integrationtestscenario.*: no endpoints available for service .*integration-service-webhook-service", "Failed application creation when calling mapplication.kb.io webhook": r"Application failed creation: Unable to create the Application .*: Internal error occurred: failed calling webhook .*mapplication.kb.io.*: failed to call webhook: Post .*https://application-service-webhook-service.application-service.svc:443/mutate-appstudio-redhat-com-v1alpha1-application.* no endpoints available for service .*application-service-webhook-service", "Failed component creation because resource quota evaluation timed out": r"Component failed creation: Unable to create the Component .*: Internal error occurred: resource quota evaluation timed out", "Failed component creation when calling mcomponent.kb.io webhook": r"Component failed creation: Unable to create the Component .*: Internal error occurred: failed calling webhook .*mcomponent.kb.io.*: failed to call webhook: Post .*https://application-service-webhook-service.application-service.svc:443/mutate-appstudio-redhat-com-v1alpha1-component.* no endpoints available for service .*application-service-webhook-service.*", - "Failed creating integration test scenario because it already exists": r"Integration test scenario failed creation: Unable to create the Integration Test Scenario .* integrationtestscenarios.appstudio.redhat.com .* already exists", "Failed creating integration test scenario because cannot set blockOwnerDeletion if an ownerReference refers to a resource you can't set finalizers on": r"Integration test scenario failed creation: Unable to create the Integration Test Scenario .* integrationtestscenarios.appstudio.redhat.com .* is forbidden: cannot set blockOwnerDeletion if an ownerReference refers to a resource you can't set finalizers on", + "Failed creating integration test scenario because it already exists": r"Integration test scenario failed creation: Unable to create the Integration Test Scenario .* integrationtestscenarios.appstudio.redhat.com .* already exists", + "Failed Integration test scenario when calling dintegrationtestscenario.kb.io webhook": r"Integration test scenario failed creation: Unable to create the Integration Test Scenario .*: Internal error occurred: failed calling webhook .*dintegrationtestscenario.kb.io.*: failed to call webhook: Post .*https://integration-service-webhook-service.integration-service.svc:443/mutate-appstudio-redhat-com-v1beta2-integrationtestscenario.*: no endpoints available for service .*integration-service-webhook-service", "Failed to link pipeline image pull secret to build service account because SA was not found": r"Failed to configure pipeline imagePullSecrets: Unable to add secret .* to service account .*: serviceaccounts .* not found", "Failed to merge MR on CEE GitLab due to 405": r"Repo-templating workflow component cleanup failed: Merging [0-9]+ failed: [Pp][Uu][Tt] .*https://gitlab.cee.redhat.com/api/.*/merge_requests/[0-9]+/merge.*message: 405 Method Not Allowed", "Failed to merge MR on CEE GitLab due to DNS error": r"Repo-templating workflow component cleanup failed: Merging [0-9]+ failed: [Pp][Uu][Tt] .*https://gitlab.cee.redhat.com/api/.*/merge_requests/[0-9]+/merge.*Temporary failure in name resolution", From f205543e2ccaab1025398f6796f730a669be5865 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 23 Jul 2025 09:33:27 +0200 Subject: [PATCH 105/321] feat: New error pattern: Timeout listing pipeline runs I0710 03:15:44.971939 64120 logging.go:64] FAIL(64): Repo-templating workflow component cleanup failed: Error deleting on-push merged PipelineRun in namespace jhutar-1-tenant: Unable to list PipelineRuns for component jhutar-1-app-dmbwr-comp-0 in namespace jhutar-1-tenant: context deadline exceeded --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index f08e5ffe88..d131cede4d 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -67,6 +67,7 @@ # to call webhook: Post "https://etcd-shield.etcd-shield.svc:443/validate-tekton-dev-v1-pipelinerun?timeout=10s": # context deadline exceeded``` "Timeout listing pipeline runs": r"Repo-templating workflow component cleanup failed: Error deleting on-pull-request default PipelineRun in namespace .*: Unable to list PipelineRuns for component .* in namespace .*: context deadline exceeded", + "Timeout listing pipeline runs": r"Repo-templating workflow component cleanup failed: Error deleting on-push merged PipelineRun in namespace .*: Unable to list PipelineRuns for component .* in namespace .*: context deadline exceeded", "Timeout waiting for build pipeline to be created": r"Build Pipeline Run failed creation: context deadline exceeded", "Timeout waiting for integration test scenario to validate": r"Integration test scenario failed validation: context deadline exceeded", "Timeout waiting for snapshot to be created": r"Snapshot failed creation: context deadline exceeded", From 87dfb7a7bc604bbf0f581ec89d2e98c447e03ffb Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 23 Jul 2025 09:37:40 +0200 Subject: [PATCH 106/321] feat: New error: Bad Gateway when pulling container image Unknown error: + mkdir -p /root/.ssh + '[' -e /ssh/error ']' + '[' -e /ssh/otp ']' ++ cat /ssh/otp-server + curl --cacert /ssh/otp-ca -XPOST -d @/ssh/otp https://multi-platform-otp-server.multi-platform-controller.svc.cluster.local/otp % Total % Received % Xferd Average Speed Time Time Time Current Dload Upload Total Spent Left Speed 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0 100 2621 100 2601 100 20 169k 1333 --:--:-- --:--:-- --:--:-- 170k + echo '' + arch=x86_64 + case linux/ppc64le in + arch=ppc64le + chmod 0400 /root/.ssh/id_rsa ++ cat /ssh/host + export SSH_HOST=u-31801f6ea742f55211730d4df6f6@10.130.81.13 + SSH_HOST=u-31801f6ea742f55211730d4df6f6@10.130.81.13 ++ cat /ssh/user-dir + export HOMEDIR=/home/u-31801f6ea742f55211730d4df6f6 + HOMEDIR=/home/u-31801f6ea742f55211730d4df6f6 + export 'SSH_ARGS=-o StrictHostKeyChecking=no' + SSH_ARGS='-o StrictHostKeyChecking=no' + '[' u-31801f6ea742f55211730d4df6f6@10.130.81.13 == localhost ']' + workdir=/var/workdir + remote_cmd echo 'Hello from the other side!' + ssh -o StrictHostKeyChecking=no u-31801f6ea742f55211730d4df6f6@10.130.81.13 echo 'Hello from the other side!' Warning: Permanently added '10.130.81.13' (ED25519) to the list of known hosts. Hello from the other side! + send /var/workdir/source/ /home/u-31801f6ea742f55211730d4df6f6/source + rsync -rva /var/workdir/source/ u-31801f6ea742f55211730d4df6f6@10.130.81.13:/home/u-31801f6ea742f55211730d4df6f6/source sending incremental file list created directory /home/u-31801f6ea742f55211730d4df6f6/source ./ .gitignore README.md ci.fmf gating.yaml generate-sources.sh libecpg-10.5-no-compat-lib.patch libecpg-10.5-rpm-pgsql.patch libecpg-10.5-var-run-socket.patch libecpg-12.2-dependency-build.patch libecpg-12.2-external-libpq.patch libecpg.spec postgresql-16.1.tar.bz2 postgresql-16.1.tar.bz2.sha256 sources .fmf/ .fmf/version .git/ .git/FETCH_HEAD .git/HEAD .git/config .git/description .git/index .git/branches/ .git/hooks/ .git/hooks/applypatch-msg.sample .git/hooks/commit-msg.sample .git/hooks/fsmonitor-watchman.sample .git/hooks/post-update.sample .git/hooks/pre-applypatch.sample .git/hooks/pre-commit.sample .git/hooks/pre-merge-commit.sample .git/hooks/pre-push.sample .git/hooks/pre-rebase.sample .git/hooks/pre-receive.sample .git/hooks/prepare-commit-msg.sample .git/hooks/push-to-checkout.sample .git/hooks/sendemail-validate.sample .git/hooks/update.sample .git/info/ .git/info/exclude .git/logs/ .git/logs/HEAD .git/objects/ .git/objects/info/ .git/objects/pack/ .git/objects/pack/pack-1380e8dfc5d4a9f5aac6103aac9b2a228ed5a9d4.idx .git/objects/pack/pack-1380e8dfc5d4a9f5aac6103aac9b2a228ed5a9d4.pack .git/objects/pack/pack-1380e8dfc5d4a9f5aac6103aac9b2a228ed5a9d4.rev .git/refs/ .git/refs/heads/ .git/refs/tags/ .tekton/ .tekton/jhutar-app-ckazm-comp-0-pull-request.yaml .tekton/jhutar-app-ckazm-comp-0-push.yaml .tekton/libecpg-pull-request.yaml .tekton/libecpg-push.yaml plans/ plans/tier1-internal.fmf sent 24,713,422 bytes received 989 bytes 5,492,091.33 bytes/sec total size is 24,704,054 speedup is 1.00 + remote_cmd mkdir /home/u-31801f6ea742f55211730d4df6f6/results + ssh -o StrictHostKeyChecking=no u-31801f6ea742f55211730d4df6f6@10.130.81.13 mkdir /home/u-31801f6ea742f55211730d4df6f6/results + remote_cmd mkdir /home/u-31801f6ea742f55211730d4df6f6/.docker + ssh -o StrictHostKeyChecking=no u-31801f6ea742f55211730d4df6f6@10.130.81.13 mkdir /home/u-31801f6ea742f55211730d4df6f6/.docker + send /config.json /home/u-31801f6ea742f55211730d4df6f6/.docker/config.json + rsync -rva /config.json u-31801f6ea742f55211730d4df6f6@10.130.81.13:/home/u-31801f6ea742f55211730d4df6f6/.docker/config.json sending incremental file list config.json sent 417 bytes received 35 bytes 301.33 bytes/sec total size is 310 speedup is 0.69 + mock_img=quay.io/redhat-user-workloads/rhel-on-konflux-tenant/tooling/mock-rhel-container:6ca435b53f4505aa3dc1f031f7912de1f32035f0 + remote_cmd podman pull quay.io/redhat-user-workloads/rhel-on-konflux-tenant/tooling/mock-rhel-container:6ca435b53f4505aa3dc1f031f7912de1f32035f0 + ssh -o StrictHostKeyChecking=no u-31801f6ea742f55211730d4df6f6@10.130.81.13 podman pull quay.io/redhat-user-workloads/rhel-on-konflux-tenant/tooling/mock-rhel-container:6ca435b53f4505aa3dc1f031f7912de1f32035f0 Trying to pull quay.io/redhat-user-workloads/rhel-on-konflux-tenant/tooling/mock-rhel-container:6ca435b53f4505aa3dc1f031f7912de1f32035f0... Error: initializing source docker://quay.io/redhat-user-workloads/rhel-on-konflux-tenant/tooling/mock-rhel-container:6ca435b53f4505aa3dc1f031f7912de1f32035f0: reading manifest 6ca435b53f4505aa3dc1f031f7912de1f32035f0 in quay.io/redhat-user-workloads/rhel-on-konflux-tenant/tooling/mock-rhel-container: received unexpected HTTP status: 502 Bad Gateway --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index d131cede4d..ad54c78a94 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -77,6 +77,7 @@ FAILED_PLR_ERRORS = { "SKIP": r"Skipping step because a previous step failed", # This is a special "wildcard" error, let's keep it on top and do not change "SKIP" reason as it is used in the code + "Bad Gateway when pulling container image": r"Error: initializing source .* reading manifest .* in .* received unexpected HTTP status: 502 Bad Gateway ", "buildah build failed creating build container: registry.access.redhat.com returned 403": r"Error: creating build container: internal error: unable to copy from source docker://registry.access.redhat.com/.*: copying system image from manifest list: determining manifest MIME type for docker://registry.access.redhat.com/.*: reading manifest .* in registry.access.redhat.com/.*: StatusCode: 403", "Can not find chroot_scan.tar.gz file": r"tar: .*/chroot_scan.tar.gz: Cannot open: No such file or directory", "Can not find Dockerfile": r"Cannot find Dockerfile Dockerfile", From 2c88f8e1e6e0faa3c22d1431cdf2615e553dd48d Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 23 Jul 2025 09:41:04 +0200 Subject: [PATCH 107/321] feat: New error: Timeout getting PaC pull number when validating component Unknown error: FAIL(62): Component failed validation: Unable to get PaC pull number for component jhutar-1-app-jaagm-comp-0 in namespace jhutar-1-tenant: context deadline exceeded --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index ad54c78a94..f8d8449a0b 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -49,6 +49,7 @@ "Repo forking failed when deleting target repo on gitlab.com (not CEE!) due unathorized": r"Repo forking failed: Error deleting project .* DELETE https://gitlab.com/.* 401 .* Unauthorized", "Timeout forking the repo before the actual test": r"Repo forking failed: Error forking project .*: context deadline exceeded", "Timeout getting build service account": r"Component build SA failed creation: Component build SA .* not created: context deadline exceeded", + "Timeout getting PaC pull number when validating component": r"Component failed validation: Unable to get PaC pull number for component .* in namespace .*: context deadline exceeded", "Timeout getting pipeline": r"Message:.*resolver failed to get Pipeline.*resolution took longer than global timeout of .*", "Timeout getting task via git resolver from gitlab.cee": r"Message:.*Couldn't retrieve Task .*resolver type git.*https://gitlab.cee.redhat.com/.* resolution took longer than global timeout of .*", # Last time I seen this we discussed it here: From 1792b13c3ab0e0a9d29f87ed51bda5a924743635 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 23 Jul 2025 09:43:54 +0200 Subject: [PATCH 108/321] feat: New error pattern: Build Pipeline Run was cancelled Unknown error: FAIL(71): Build Pipeline Run failed run: PipelineRun for component jhutar-app-ginyi-comp-0 in namespace jhutar-tenant failed: {Type:Succeeded Status:False Severity: LastTransitionTime:{Inner:2025-07-16 03:13:40 +0000 UTC} Reason:Cancelled Message:PipelineRun "jhutar-app-ginyi-comp-0-on-pull-request-cp889" was cancelled} --- tests/load-tests/errors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index f8d8449a0b..5b9fc14a1e 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -19,7 +19,7 @@ ERRORS = { "Application creation failed because of TLS handshake timeout": r"Application failed creation: Unable to create the Application .*: failed to get API group resources: unable to retrieve the complete list of server APIs: appstudio.redhat.com/v1alpha1: Get .*: net/http: TLS handshake timeout", "Application creation timed out waiting for quota evaluation": r"Application failed creation: Unable to create the Application .*: Internal error occurred: resource quota evaluation timed out", - "Build Pipeline Run was cancelled" : r"Build Pipeline Run failed run: PipelineRun for component .* in namespace .* failed: .* Reason:Cancelled .* Message:PipelineRun .* was cancelled", + "Build Pipeline Run was cancelled" : r"Build Pipeline Run failed run: PipelineRun for component .* in namespace .* failed: .* Reason:Cancelled.*Message:PipelineRun .* was cancelled", "Component creation timed out waiting for image-controller annotations": r"Component failed creation: Unable to create the Component .* timed out when waiting for image-controller annotations to be updated on component", "Couldnt get pipeline via bundles resolver from quay.io due to 429": r"Message:Error retrieving pipeline for pipelinerun .*bundleresolver.* cannot retrieve the oci image: GET https://quay.io/v2/.*unexpected status code 429 Too Many Requests", "Couldnt get pipeline via git resolver from gitlab.cee due to 429": r"Message:.*resolver failed to get Pipeline.*error requesting remote resource.*Git.*https://gitlab.cee.redhat.com/.* status code: 429", From 7a43890df2b65edb1a183164e68566d713afa4ad Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 23 Jul 2025 09:49:24 +0200 Subject: [PATCH 109/321] feat: New error: DNF failed to download repodata from Download Devel because could not resolve host Error from collected-data/jhutar-tenant/1/pod-jhutar-app-dcwbn-comp-0-on-push-xcx4v-calculate-deps-s390x-pod-step-mock-build.log: [...] Start: installing minimal buildroot with dnf Unable to detect release version (use '--releasever' to specify release version) No matches found for the following disable plugin patterns: local, spacewalk, versionlock Updating Subscription Management repositories. Unable to read consumer identity This system is not registered with an entitlement server. You can use subscription-manager to register. Errors during downloading metadata for repository 'build': - Curl error (6): Couldn't resolve host name for http://download.devel.redhat.com/brewroot/repos/rhel-10.0-build/8942890/s390x/repodata/repomd.xml [Could not resolve host: download.devel.redhat.com] Error: Failed to download metadata for repo 'build': Cannot download repomd.xml: Cannot download repodata/repomd.xml: All mirrors were tried ERROR: Exception(/source/libecpg.spec) Config(rhel-10.0-build-repo_8942890) 2 minutes 14 seconds INFO: Results and/or logs in: /results INFO: Cleaning up build root ('cleanup_on_failure=True') Start: clean chroot Finish: clean chroot ERROR: Command failed: # /usr/bin/dnf-3 --installroot /var/lib/mock/rhel-10.0-build-repo_8942890/root/ --setopt=deltarpm=False --setopt=allow_vendor_change=yes --allowerasing --disableplugin=local --disableplugin=spacewalk --disableplugin=versionlock install @build Unable to detect release version (use '--releasever' to specify release version) No matches found for the following disable plugin patterns: local, spacewalk, versionlock Updating Subscription Management repositories. Unable to read consumer identity This system is not registered with an entitlement server. You can use subscription-manager to register. Errors during downloading metadata for repository 'build': - Curl error (6): Couldn't resolve host name for http://download.devel.redhat.com/brewroot/repos/rhel-10.0-build/8942890/s390x/repodata/repomd.xml [Could not resolve host: download.devel.redhat.com] Error: Failed to download metadata for repo 'build': Cannot download repomd.xml: Cannot download repodata/repomd.xml: All mirrors were tried --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 5b9fc14a1e..60f8d9b5a9 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -82,6 +82,7 @@ "buildah build failed creating build container: registry.access.redhat.com returned 403": r"Error: creating build container: internal error: unable to copy from source docker://registry.access.redhat.com/.*: copying system image from manifest list: determining manifest MIME type for docker://registry.access.redhat.com/.*: reading manifest .* in registry.access.redhat.com/.*: StatusCode: 403", "Can not find chroot_scan.tar.gz file": r"tar: .*/chroot_scan.tar.gz: Cannot open: No such file or directory", "Can not find Dockerfile": r"Cannot find Dockerfile Dockerfile", + "DNF failed to download repodata from Download Devel because could not resolve host": r"Errors during downloading metadata for repository '[^ ]+': - Curl error .6.: Couldn't resolve host name for http://download.devel.redhat.com/brewroot/repos/[^ ]+ .Could not resolve host: download\.devel\.redhat\.com.", "DNF failed to download repodata from Koji": r"ERROR Command returned error: Failed to download metadata (baseurl: \"https://kojipkgs.fedoraproject.org/repos/[^ ]*\") for repository \"build\": Usable URL not found", "Error allocating host as provision TR already exists": r"Error allocating host: taskruns.tekton.dev \".*provision\" already exists", "Error allocating host because of insufficient free addresses in subnet": r"Error allocating host: failed to launch EC2 instance for .* operation error EC2: RunInstances, https response error StatusCode: 400, RequestID: .*, api error InsufficientFreeAddressesInSubnet: There are not enough free addresses in subnet .* to satisfy the requested number of instances.", From 5a7ff062a42d96c4a3adabc6bb58fc00354ec0cb Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 23 Jul 2025 09:54:04 +0200 Subject: [PATCH 110/321] feat: New error: Repo forking failed as we got TLS handshake timeout talking to GitLab CEE Unknown error: FAIL(80): Repo forking failed: Error deleting project jhutar/libecpg-prodp02-jhutar: Delete "https://gitlab.cee.redhat.com/api/v4/projects/jhutar%2Flibecpg-prodp02-jhutar": net/http: TLS handshake timeout --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 60f8d9b5a9..7e370df544 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -45,6 +45,7 @@ "Release failure: PipelineRun not created": r"couldn't find PipelineRun in managed namespace '%s' for a release '%s' in '%s' namespace", "Repo forking failed as GitLab CEE says 401 Unauthorized": r"Repo forking failed: Error deleting project .*: DELETE https://gitlab.cee.redhat.com/.*: 401 .*message: 401 Unauthorized.*", "Repo forking failed as the target is still being deleted": r"Repo forking failed: Error forking project .* POST https://gitlab.cee.redhat.com.* 409 .*Project namespace name has already been taken, The project is still being deleted", + "Repo forking failed as we got TLS handshake timeout talking to GitLab CEE": r"Repo forking failed: Error deleting project .*: Delete \"https://gitlab.cee.redhat.com/api/v4/projects/.*\": net/http: TLS handshake timeout", "Repo forking failed because gitlab.com returned 503": r"Repo forking failed: Error checking repository .*: GET https://api.github.com/repos/.*: 503 No server is currently available to service your request. Sorry about that. Please try resubmitting your request and contact us if the problem persists.*", "Repo forking failed when deleting target repo on gitlab.com (not CEE!) due unathorized": r"Repo forking failed: Error deleting project .* DELETE https://gitlab.com/.* 401 .* Unauthorized", "Timeout forking the repo before the actual test": r"Repo forking failed: Error forking project .*: context deadline exceeded", From 921a752ef48fb64f7e95bc96848797c148e48873 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 23 Jul 2025 09:56:40 +0200 Subject: [PATCH 111/321] feat: New error: Unable to connect to server Error from collected-data/jhutar-tenant/1/pod-jhutar-app-rknus-comp-0-on-push-zcwfr-calculate-deps-s390x-pod-step-mock-build.log: [...] + remote_cmd podman run -e KOJI_TARGET=DEFAULT -v /home/u-4785c0a1a25afde7ef64e55e9b8f/source:/source -v /home/u-4785c0a1a25afde7ef64e55e9b8f/results:/results --privileged --rm -ti quay.io/redhat-user-workloads/rhel-on-konflux-tenant/tooling/mock-rhel-container:6ca435b53f4505aa3dc1f031f7912de1f32035f0 mock-from-brew rhel-10-main s390x --spec /source/libecpg.spec --sources /source --resultdir /results --calculate-build-dependencies + ssh -o StrictHostKeyChecking=no u-4785c0a1a25afde7ef64e55e9b8f@10.130.79.106 podman run -e KOJI_TARGET=DEFAULT -v /home/u-4785c0a1a25afde7ef64e55e9b8f/source:/source -v /home/u-4785c0a1a25afde7ef64e55e9b8f/results:/results --privileged --rm -ti quay.io/redhat-user-workloads/rhel-on-konflux-tenant/tooling/mock-rhel-container:6ca435b53f4505aa3dc1f031f7912de1f32035f0 mock-from-brew rhel-10-main s390x --spec /source/libecpg.spec --sources /source --resultdir /results --calculate-build-dependencies time="2025-07-21T08:42:30-04:00" level=warning msg="The input device is not a TTY. The --tty and --interactive flags might not work properly" + branch=rhel-10-main + shift + arch=s390x + shift + set -o pipefail + config=/tmp/mock-in-konflux.cfg + konflux-mock-profile rhel-10-main s390x + tee /tmp/mock-in-konflux.cfg Error: Unable to connect to server --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 7e370df544..c17212ea2e 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -75,6 +75,7 @@ "Timeout waiting for snapshot to be created": r"Snapshot failed creation: context deadline exceeded", "Timeout waiting for test pipeline to create": r"Test Pipeline Run failed creation: context deadline exceeded", "Timeout waiting for test pipeline to finish": r"Test Pipeline Run failed run: context deadline exceeded", + "Unable to connect to server": r"Error: Unable to connect to server", } FAILED_PLR_ERRORS = { From 617b63a69cf422d814e1751a98a11a5dee83479d Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 23 Jul 2025 10:49:27 +0200 Subject: [PATCH 112/321] feat: New error: Failed to pull container from registry.access.redhat.com because of DNS error Unknown error: + mkdir -p /root/.ssh + '[' -e /ssh/error ']' + '[' -e /ssh/otp ']' ++ cat /ssh/otp-server + curl --cacert /ssh/otp-ca -XPOST -d @/ssh/otp https://multi-platform-otp-server.multi-platform-controller.svc.cluster.local/otp % Total % Received % Xferd Average Speed Time Time Time Current Dload Upload Total Spent Left Speed 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0 100 2621 100 2601 100 20 169k 1333 --:--:-- --:--:-- --:--:-- 170k + echo '' + arch=x86_64 + case linux/s390x in + arch=s390x + chmod 0400 /root/.ssh/id_rsa ++ cat /ssh/host + export SSH_HOST=u-29a8daf9accb2315ef8b6c7f3a70@10.130.79.6 + SSH_HOST=u-29a8daf9accb2315ef8b6c7f3a70@10.130.79.6 ++ cat /ssh/user-dir + export HOMEDIR=/home/u-29a8daf9accb2315ef8b6c7f3a70 + HOMEDIR=/home/u-29a8daf9accb2315ef8b6c7f3a70 + export 'SSH_ARGS=-o StrictHostKeyChecking=no' + SSH_ARGS='-o StrictHostKeyChecking=no' + '[' u-29a8daf9accb2315ef8b6c7f3a70@10.130.79.6 == localhost ']' + workdir=/var/workdir + remote_cmd echo 'Hello from the other side!' + ssh -o StrictHostKeyChecking=no u-29a8daf9accb2315ef8b6c7f3a70@10.130.79.6 echo 'Hello from the other side!' Warning: Permanently added '10.130.79.6' (ED25519) to the list of known hosts. Hello from the other side! + send /var/workdir/source/ /home/u-29a8daf9accb2315ef8b6c7f3a70/source + rsync -rva /var/workdir/source/ u-29a8daf9accb2315ef8b6c7f3a70@10.130.79.6:/home/u-29a8daf9accb2315ef8b6c7f3a70/source sending incremental file list created directory /home/u-29a8daf9accb2315ef8b6c7f3a70/source ./ .gitignore README.md ci.fmf gating.yaml generate-sources.sh libecpg-10.5-no-compat-lib.patch libecpg-10.5-rpm-pgsql.patch libecpg-10.5-var-run-socket.patch libecpg-12.2-dependency-build.patch libecpg-12.2-external-libpq.patch libecpg.spec postgresql-16.1.tar.bz2 postgresql-16.1.tar.bz2.sha256 sources .fmf/ .fmf/version .git/ .git/FETCH_HEAD .git/HEAD .git/config .git/description .git/index .git/branches/ .git/hooks/ .git/hooks/applypatch-msg.sample .git/hooks/commit-msg.sample .git/hooks/fsmonitor-watchman.sample .git/hooks/post-update.sample .git/hooks/pre-applypatch.sample .git/hooks/pre-commit.sample .git/hooks/pre-merge-commit.sample .git/hooks/pre-push.sample .git/hooks/pre-rebase.sample .git/hooks/pre-receive.sample .git/hooks/prepare-commit-msg.sample .git/hooks/push-to-checkout.sample .git/hooks/sendemail-validate.sample .git/hooks/update.sample .git/info/ .git/info/exclude .git/logs/ .git/logs/HEAD .git/objects/ .git/objects/info/ .git/objects/pack/ .git/objects/pack/pack-2b8ff9c1a5a1d83d4cbb1679c918fdecb8dab75b.idx .git/objects/pack/pack-2b8ff9c1a5a1d83d4cbb1679c918fdecb8dab75b.pack .git/objects/pack/pack-2b8ff9c1a5a1d83d4cbb1679c918fdecb8dab75b.rev .git/refs/ .git/refs/heads/ .git/refs/tags/ .tekton/ .tekton/jhutar-app-dsfws-comp-0-pull-request.yaml .tekton/jhutar-app-dsfws-comp-0-push.yaml .tekton/libecpg-pull-request.yaml .tekton/libecpg-push.yaml plans/ plans/tier1-internal.fmf sent 24,715,247 bytes received 989 bytes 7,061,781.71 bytes/sec total size is 24,705,849 speedup is 1.00 + remote_cmd mkdir /home/u-29a8daf9accb2315ef8b6c7f3a70/results + ssh -o StrictHostKeyChecking=no u-29a8daf9accb2315ef8b6c7f3a70@10.130.79.6 mkdir /home/u-29a8daf9accb2315ef8b6c7f3a70/results + remote_cmd mkdir /home/u-29a8daf9accb2315ef8b6c7f3a70/.docker + ssh -o StrictHostKeyChecking=no u-29a8daf9accb2315ef8b6c7f3a70@10.130.79.6 mkdir /home/u-29a8daf9accb2315ef8b6c7f3a70/.docker + send /config.json /home/u-29a8daf9accb2315ef8b6c7f3a70/.docker/config.json + rsync -rva /config.json u-29a8daf9accb2315ef8b6c7f3a70@10.130.79.6:/home/u-29a8daf9accb2315ef8b6c7f3a70/.docker/config.json sending incremental file list config.json sent 417 bytes received 35 bytes 301.33 bytes/sec total size is 310 speedup is 0.69 + mock_img=quay.io/redhat-user-workloads/rhel-on-konflux-tenant/tooling/mock-rhel-container:6ca435b53f4505aa3dc1f031f7912de1f32035f0 + remote_cmd podman pull quay.io/redhat-user-workloads/rhel-on-konflux-tenant/tooling/mock-rhel-container:6ca435b53f4505aa3dc1f031f7912de1f32035f0 + ssh -o StrictHostKeyChecking=no u-29a8daf9accb2315ef8b6c7f3a70@10.130.79.6 podman pull quay.io/redhat-user-workloads/rhel-on-konflux-tenant/tooling/mock-rhel-container:6ca435b53f4505aa3dc1f031f7912de1f32035f0 Trying to pull quay.io/redhat-user-workloads/rhel-on-konflux-tenant/tooling/mock-rhel-container:6ca435b53f4505aa3dc1f031f7912de1f32035f0... Getting image source signatures Copying blob sha256:73b948e8f9de0a71f4c3ed790d930e7aebedb47c7944b2c6d093a8ab15ef8614 Copying blob sha256:368daf2a84878235aa6a1c0443aea4bb133ccaa3cee32aa744ff755fac6f90e9 Copying blob sha256:b7364e55b6076ed955c8a456a9e9862d89cc7fb366119c855467c5c9ef46b42b Copying config sha256:abb17151d3d24ef6f2692d1473e9147d358ad2de94b550d77346a314bf91b9c9 Writing manifest to image destination abb17151d3d24ef6f2692d1473e9147d358ad2de94b550d77346a314bf91b9c9 ++ remote_cmd podman run -e KOJI_TARGET=DEFAULT --rm quay.io/redhat-user-workloads/rhel-on-konflux-tenant/tooling/mock-rhel-container:6ca435b53f4505aa3dc1f031f7912de1f32035f0 /usr/bin/konflux-select-koji-target rhel-10-main ++ ssh -o StrictHostKeyChecking=no u-29a8daf9accb2315ef8b6c7f3a70@10.130.79.6 podman run -e KOJI_TARGET=DEFAULT --rm quay.io/redhat-user-workloads/rhel-on-konflux-tenant/tooling/mock-rhel-container:6ca435b53f4505aa3dc1f031f7912de1f32035f0 /usr/bin/konflux-select-koji-target rhel-10-main + brew_target=rhel-10.0-candidate + remote_cmd podman run -e KOJI_TARGET=DEFAULT -v /home/u-29a8daf9accb2315ef8b6c7f3a70/source:/source -v /home/u-29a8daf9accb2315ef8b6c7f3a70/results:/results --privileged --rm -ti quay.io/redhat-user-workloads/rhel-on-konflux-tenant/tooling/mock-rhel-container:6ca435b53f4505aa3dc1f031f7912de1f32035f0 mock-from-brew rhel-10-main s390x --spec /source/libecpg.spec --sources /source --resultdir /results --calculate-build-dependencies + ssh -o StrictHostKeyChecking=no u-29a8daf9accb2315ef8b6c7f3a70@10.130.79.6 podman run -e KOJI_TARGET=DEFAULT -v /home/u-29a8daf9accb2315ef8b6c7f3a70/source:/source -v /home/u-29a8daf9accb2315ef8b6c7f3a70/results:/results --privileged --rm -ti quay.io/redhat-user-workloads/rhel-on-konflux-tenant/tooling/mock-rhel-container:6ca435b53f4505aa3dc1f031f7912de1f32035f0 mock-from-brew rhel-10-main s390x --spec /source/libecpg.spec --sources /source --resultdir /results --calculate-build-dependencies time="2025-07-16T14:23:19-04:00" level=warning msg="The input device is not a TTY. The --tty and --interactive flags might not work properly" + branch=rhel-10-main + shift + arch=s390x + shift + set -o pipefail + config=/tmp/mock-in-konflux.cfg + konflux-mock-profile rhel-10-main s390x + tee /tmp/mock-in-konflux.cfg # Auto-generated by the Koji build system config_opts['basedir'] = '/var/lib/mock' config_opts['chroot_setup_cmd'] = 'install @build' config_opts['chroothome'] = '/builddir' config_opts['dnf_warning'] = False config_opts['package_manager'] = 'dnf' config_opts['root'] = 'rhel-10.0-build-repo_8942890' config_opts['rpmbuild_networking'] = False config_opts['target_arch'] = 's390x' config_opts['use_bootstrap_image'] = False config_opts['use_host_resolv'] = False config_opts['yum.conf'] = '[main]\ncachedir=/var/cache/yum\ndebuglevel=1\nlogfile=/var/log/yum.log\nreposdir=/dev/null\nretries=20\nobsoletes=1\ngpgcheck=0\nassumeyes=1\nkeepcache=1\ninstall_weak_deps=0\nstrict=1\n\n# repos\n\n[build]\nname=build\nbaseurl=http://download.devel.redhat.com/brewroot/repos/rhel-10.0-build/8942890/s390x\n' config_opts['plugin_conf']['ccache_enable'] = False config_opts['plugin_conf']['root_cache_enable'] = False config_opts['plugin_conf']['yum_cache_enable'] = False config_opts['macros']['%_host'] = 's390x-koji-linux-gnu' config_opts['macros']['%_host_cpu'] = 's390x' config_opts['macros']['%_rpmfilename'] = '%%{NAME}-%%{VERSION}-%%{RELEASE}.%%{ARCH}.rpm' config_opts['macros']['%_topdir'] = '/builddir/build' config_opts['macros']['%dist'] = '%{!?distprefix0:%{?distprefix}}%{expand:%{lua:for i=0,9999 do print("%{?distprefix" .. i .."}") end}}.el10_0%{?with_bootstrap:%{__bootstrap}}' config_opts['macros']['%distribution'] = 'Koji Testing' # Konflux: bootstrap image feature is required for hemetic builds config_opts['use_bootstrap_image'] = True # TODO (Konflux): switch to UBI10 once available! config_opts['bootstrap_image'] = 'registry.access.redhat.com/ubi9/ubi' config_opts['bootstrap_image_ready'] = True config_opts['macros']['%_buildhost'] = 'konflux.redhat.com' config_opts['macros']['%vendor'] = 'Red Hat, Inc.' config_opts['macros']['%packager'] = 'Red Hat, Inc. ' + exec mock -r /tmp/mock-in-konflux.cfg --spec /source/libecpg.spec --sources /source --resultdir /results --calculate-build-dependencies INFO: mock.py version 6.1 starting (python version = 3.13.3, NVR = mock-6.1-1.fc41), args: /usr/libexec/mock/mock -r /tmp/mock-in-konflux.cfg --spec /source/libecpg.spec --sources /source --resultdir /results --calculate-build-dependencies Start(bootstrap): init plugins INFO: selinux disabled Finish(bootstrap): init plugins Start: init plugins INFO: selinux disabled Finish: init plugins INFO: Signal handler active Start: run INFO: Start(/source/libecpg.spec) Config(rhel-10.0-build-repo_8942890) Start: clean chroot Finish: clean chroot Mock Version: 6.1 INFO: Mock Version: 6.1 Start(bootstrap): chroot init INFO: calling preinit hooks INFO: Guessed host environment type: unknown INFO: Using container image: registry.access.redhat.com/ubi9/ubi INFO: Pulling image: registry.access.redhat.com/ubi9/ubi ERROR: b'' b'Trying to pull registry.access.redhat.com/ubi9/ubi:latest...\nError: initializing source docker://registry.access.redhat.com/ubi9/ubi:latest: pinging container registry registry.access.redhat.com: Get "https://registry.access.redhat.com/v2/": dial tcp: lookup registry.access.redhat.com: Temporary failure in name resolution\n' INFO: Pulling image: registry.access.redhat.com/ubi9/ubi ERROR: b'' b'Trying to pull registry.access.redhat.com/ubi9/ubi:latest...\nError: initializing source docker://registry.access.redhat.com/ubi9/ubi:latest: pinging container registry registry.access.redhat.com: Get "https://registry.access.redhat.com/v2/": dial tcp: lookup registry.access.redhat.com: Temporary failure in name resolution\n' INFO: Pulling image: registry.access.redhat.com/ubi9/ubi ERROR: b'' b'Trying to pull registry.access.redhat.com/ubi9/ubi:latest...\nError: initializing source docker://registry.access.redhat.com/ubi9/ubi:latest: pinging container registry registry.access.redhat.com: Get "https://registry.access.redhat.com/v2/": dial tcp: lookup registry.access.redhat.com: Temporary failure in name resolution\n' INFO: Pulling image: registry.access.redhat.com/ubi9/ubi ERROR: b'' b'Trying to pull registry.access.redhat.com/ubi9/ubi:latest...\nError: initializing source docker://registry.access.redhat.com/ubi9/ubi:latest: pinging container registry registry.access.redhat.com: Get "https://registry.access.redhat.com/v2/": dial tcp: lookup registry.access.redhat.com: Temporary failure in name resolution\n' INFO: Pulling image: registry.access.redhat.com/ubi9/ubi ERROR: b'' b'Trying to pull registry.access.redhat.com/ubi9/ubi:latest...\nError: initializing source docker://registry.access.redhat.com/ubi9/ubi:latest: pinging container registry registry.access.redhat.com: Get "https://registry.access.redhat.com/v2/": dial tcp: lookup registry.access.redhat.com: Temporary failure in name resolution\n' INFO: Pulling image: registry.access.redhat.com/ubi9/ubi ERROR: b'' b'Trying to pull registry.access.redhat.com/ubi9/ubi:latest...\nError: initializing source docker://registry.access.redhat.com/ubi9/ubi:latest: pinging container registry registry.access.redhat.com: Get "https://registry.access.redhat.com/v2/": dial tcp: lookup registry.access.redhat.com: Temporary failure in name resolution\n' INFO: Pulling image: registry.access.redhat.com/ubi9/ubi ERROR: b'' b'Trying to pull registry.access.redhat.com/ubi9/ubi:latest...\nError: initializing source docker://registry.access.redhat.com/ubi9/ubi:latest: pinging container registry registry.access.redhat.com: Get "https://registry.access.redhat.com/v2/": dial tcp: lookup registry.access.redhat.com: Temporary failure in name resolution\n' INFO: Tagging container image as mock-bootstrap-d1e9fd30-95ab-46d3-8e36-2c9e4ed2bb8f ERROR: Exception(/source/libecpg.spec) Config(rhel-10.0-build-repo_8942890) 2 minutes 12 seconds INFO: Results and/or logs in: /results INFO: Cleaning up build root ('cleanup_on_failure=True') Start: clean chroot Finish: clean chroot ERROR: expected str, bytes or os.PathLike object, not NoneType Traceback (most recent call last): File "/usr/libexec/mock/mock", line 1142, in exitStatus = main() File "/usr/lib/python3.13/site-packages/mockbuild/trace_decorator.py", line 93, in trace result = func(*args, **kw) File "/usr/libexec/mock/mock", line 894, in main result = run_command(options, args, config_opts, commands, buildroot) File "/usr/lib/python3.13/site-packages/mockbuild/trace_decorator.py", line 93, in trace result = func(*args, **kw) File "/usr/libexec/mock/mock", line 997, in run_command srpm = mockbuild.rebuild.do_buildsrpm(config_opts, commands, buildroot, options, args) File "/usr/lib/python3.13/site-packages/mockbuild/trace_decorator.py", line 93, in trace result = func(*args, **kw) File "/usr/lib/python3.13/site-packages/mockbuild/rebuild.py", line 104, in do_buildsrpm return rebuild_generic([options.spec], commands, buildroot, config_opts, cmd=cmd, post=None, clean=clean) File "/usr/lib/python3.13/site-packages/mockbuild/trace_decorator.py", line 93, in trace result = func(*args, **kw) File "/usr/lib/python3.13/site-packages/mockbuild/rebuild.py", line 24, in rebuild_generic commands.init(prebuild=not config_opts.get('short_circuit')) ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/lib/python3.13/site-packages/mockbuild/trace_decorator.py", line 93, in trace result = func(*args, **kw) File "/usr/lib/python3.13/site-packages/mockbuild/backend.py", line 160, in init self.bootstrap_buildroot.initialize(**kwargs) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^ File "/usr/lib/python3.13/site-packages/mockbuild/trace_decorator.py", line 93, in trace result = func(*args, **kw) File "/usr/lib/python3.13/site-packages/mockbuild/buildroot.py", line 205, in initialize self._init(prebuild=prebuild) ~~~~~~~~~~^^^^^^^^^^^^^^^^^^^ File "/usr/lib/python3.13/site-packages/mockbuild/trace_decorator.py", line 93, in trace result = func(*args, **kw) File "/usr/lib/python3.13/site-packages/mockbuild/buildroot.py", line 322, in _init self._load_from_container_image() ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^ File "/usr/lib/python3.13/site-packages/mockbuild/trace_decorator.py", line 93, in trace result = func(*args, **kw) File "/usr/lib/python3.13/site-packages/mockbuild/buildroot.py", line 284, in _load_from_container_image podman.tag_image() ~~~~~~~~~~~~~~~~^^ File "/usr/lib/python3.13/site-packages/mockbuild/podman.py", line 139, in tag_image subprocess.run(cmd, env=self.buildroot.env, stdout=subprocess.PIPE, ~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ stderr=subprocess.PIPE, check=True) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/lib64/python3.13/subprocess.py", line 554, in run with Popen(*popenargs, **kwargs) as process: ~~~~~^^^^^^^^^^^^^^^^^^^^^^ File "/usr/lib64/python3.13/subprocess.py", line 1039, in __init__ self._execute_child(args, executable, preexec_fn, close_fds, ~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ pass_fds, cwd, env, ^^^^^^^^^^^^^^^^^^^ ...<5 lines>... gid, gids, uid, umask, ^^^^^^^^^^^^^^^^^^^^^^ start_new_session, process_group) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/lib64/python3.13/subprocess.py", line 1899, in _execute_child self.pid = _fork_exec( ~~~~~~~~~~^ args, executable_list, ^^^^^^^^^^^^^^^^^^^^^^ ...<6 lines>... process_group, gid, gids, uid, umask, ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ preexec_fn, _USE_VFORK) ^^^^^^^^^^^^^^^^^^^^^^^ TypeError: expected str, bytes or os.PathLike object, not NoneType --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index c17212ea2e..1775357a18 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -91,6 +91,7 @@ "Error allocating host because of provisioning error": r"Error allocating host: failed to provision host", "Failed because of quay.io returned 502": r"level=fatal msg=.Error parsing image name .*docker://quay.io/.* Requesting bearer token: invalid status code from registry 502 .Bad Gateway.", "Failed because registry.access.redhat.com returned 503 when reading manifest": r"source-build:ERROR:command execution failure, status: 1, stderr: time=.* level=fatal msg=.Error parsing image name .* reading manifest .* in registry.access.redhat.com/.* received unexpected HTTP status: 503 Service Unavailable", + "Failed to pull container from registry.access.redhat.com because of DNS error": r"Error: initializing source docker://registry.access.redhat.com/.* pinging container registry registry.access.redhat.com: Get \"https://registry.access.redhat.com/v2/\": dial tcp: lookup registry.access.redhat.com: Temporary failure in name resolution", "Gateway Time-out when pulling container image": r"Error: copying system image from manifest list: parsing image configuration: fetching blob: received unexpected HTTP status: 504 Gateway Time-out", "Introspection failed because of incomplete .docker/config.json": r".* level=fatal msg=\"Error parsing image name .*: getting username and password: reading JSON file .*/tekton/home/.docker/config.json.*: unmarshaling JSON at .*: unexpected end of JSON input\"", "RPM build failed: bool cannot be defined via typedef": r"error: .bool. cannot be defined via .typedef..*error: Bad exit status from /var/tmp/rpm-tmp..* ..build.", From 86c6738254cb6d57102416b366171042faacbdd0 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 25 Jul 2025 08:30:57 +0200 Subject: [PATCH 113/321] feat(KONFLUX-8783): Remove ITS validation According to Ryan Cole, Integration test scenario is now validated at creation time by their admission webhook, so if it is invalid, it would not be created at all. > We merged a change this week that moved validation from the controller > to an admission webhook. Now since an invalid ITS cannot be created we > no longer set that status condition. > [The webhook is] actually a combination of a mutating webhook and > a validation webhook. > The mutating webhook sets some default values if they don't exist but > doesn't fail if the values can't be set. The validating webhook rejects > the resource if the values don't exist/aren't correct. That keeps a > broken mutating webhook from blocking resource creation on the cluster. Assisted-by: Larry Rios --- tests/load-tests/ci-scripts/config/horreum-labels.sh | 2 -- tests/load-tests/evaluate.py | 1 - 2 files changed, 3 deletions(-) diff --git a/tests/load-tests/ci-scripts/config/horreum-labels.sh b/tests/load-tests/ci-scripts/config/horreum-labels.sh index 0dfd359256..c56a10980b 100755 --- a/tests/load-tests/ci-scripts/config/horreum-labels.sh +++ b/tests/load-tests/ci-scripts/config/horreum-labels.sh @@ -222,8 +222,6 @@ horreum_schema_label_present '$.results.measurements.validateApplication.error_r horreum_schema_label_present '$.results.measurements.validateApplication.pass.duration.mean' horreum_schema_label_present '$.results.measurements.validateComponentBuildSA.error_rate' horreum_schema_label_present '$.results.measurements.validateComponentBuildSA.pass.duration.mean' -horreum_schema_label_present '$.results.measurements.validateIntegrationTestScenario.error_rate' -horreum_schema_label_present '$.results.measurements.validateIntegrationTestScenario.pass.duration.mean' horreum_schema_label_present '$.results.measurements.validatePipelineRunCondition.error_rate' horreum_schema_label_present '$.results.measurements.validatePipelineRunCondition.pass.duration.mean' horreum_schema_label_present '$.results.measurements.validatePipelineRunCreation.error_rate' diff --git a/tests/load-tests/evaluate.py b/tests/load-tests/evaluate.py index fef53ec961..7252d5a258 100755 --- a/tests/load-tests/evaluate.py +++ b/tests/load-tests/evaluate.py @@ -44,7 +44,6 @@ # These metrics will be ignored if ITS was skipped METRICS_ITS = [ "createIntegrationTestScenario", - "validateIntegrationTestScenario", "validateTestPipelineRunCreation", "validateTestPipelineRunCondition", ] From a389260875f518dc191c8faeba2a9867cb329980 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 25 Jul 2025 09:21:03 +0200 Subject: [PATCH 114/321] feat: New error: Couldnt get task via buldles resolver from quay.io due to unexpected end of JSON input I0724 14:24:24.098121 1700 logging.go:64] FAIL(71): Build Pipeline Run failed run: PipelineRun for component jhutar-1-app-resyb-comp-0 in namespace jhutar-1-tenant failed: {Type:Succeeded Status:False Severity: LastTransitionTime:{Inner:2025-07-24 14:24:05 +0000 UTC} Reason:CouldntGetTask Message:Pipeline jhutar-1-tenant/jhutar-1-app-resyb-comp-0-on-pull-request-pf9zw can't be Run; it contains Tasks that don't exist: Couldn't retrieve Task "resolver type bundles\nname = sast-shell-check\n": error requesting remote resource: error getting "bundleresolver" "jhutar-1-tenant/bundles-258b04f0e473307e74d625481237374a": cannot retrieve the oci image: unexpected end of JSON input} --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 1775357a18..70cbcb2129 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -25,6 +25,7 @@ "Couldnt get pipeline via git resolver from gitlab.cee due to 429": r"Message:.*resolver failed to get Pipeline.*error requesting remote resource.*Git.*https://gitlab.cee.redhat.com/.* status code: 429", "Couldnt get pipeline via http resolver from gitlab.cee": r"Message:.*resolver failed to get Pipeline.*error requesting remote resource.*Http.*https://gitlab.cee.redhat.com/.* is not found", "Couldnt get task via buldles resolver from quay.io due to 429": r"Message:.*Couldn't retrieve Task .*resolver type bundles.*https://quay.io/.* status code 429 Too Many Requests", + "Couldnt get task via buldles resolver from quay.io due to unexpected end of JSON input": r"Build Pipeline Run failed run: PipelineRun for component .* in namespace .* failed: .* Reason:CouldntGetTask Message:Pipeline .* can't be Run; it contains Tasks that don't exist: Couldn't retrieve Task .resolver type bundles.*name = .*: error requesting remote resource: error getting \"bundleresolver\" .*: cannot retrieve the oci image: unexpected end of JSON input", "Couldnt get task via git resolver from gitlab.cee due to 429": r"Message:.*Couldn't retrieve Task .*resolver type git.*https://gitlab.cee.redhat.com/.* status code: 429", "Couldnt get task via http resolver from gitlab.cee": r"Message:.*Couldn't retrieve Task .*resolver type http.*error getting.*requested URL .*https://gitlab.cee.redhat.com/.* is not found", "Error deleting on-pull-request default PipelineRun": r"Repo-templating workflow component cleanup failed: Error deleting on-pull-request default PipelineRun in namespace .*: Unable to list PipelineRuns for component .* in namespace .*: context deadline exceeded", From f16965f0f52b1ceac2e5b50eda7dc3763589089c Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 25 Jul 2025 10:19:38 +0200 Subject: [PATCH 115/321] fix: Fix typo buldles -> bundles --- tests/load-tests/errors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 70cbcb2129..058b918d54 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -25,7 +25,7 @@ "Couldnt get pipeline via git resolver from gitlab.cee due to 429": r"Message:.*resolver failed to get Pipeline.*error requesting remote resource.*Git.*https://gitlab.cee.redhat.com/.* status code: 429", "Couldnt get pipeline via http resolver from gitlab.cee": r"Message:.*resolver failed to get Pipeline.*error requesting remote resource.*Http.*https://gitlab.cee.redhat.com/.* is not found", "Couldnt get task via buldles resolver from quay.io due to 429": r"Message:.*Couldn't retrieve Task .*resolver type bundles.*https://quay.io/.* status code 429 Too Many Requests", - "Couldnt get task via buldles resolver from quay.io due to unexpected end of JSON input": r"Build Pipeline Run failed run: PipelineRun for component .* in namespace .* failed: .* Reason:CouldntGetTask Message:Pipeline .* can't be Run; it contains Tasks that don't exist: Couldn't retrieve Task .resolver type bundles.*name = .*: error requesting remote resource: error getting \"bundleresolver\" .*: cannot retrieve the oci image: unexpected end of JSON input", + "Couldnt get task via bundles resolver from quay.io due to unexpected end of JSON input": r"Build Pipeline Run failed run: PipelineRun for component .* in namespace .* failed: .* Reason:CouldntGetTask Message:Pipeline .* can't be Run; it contains Tasks that don't exist: Couldn't retrieve Task .resolver type bundles.*name = .*: error requesting remote resource: error getting \"bundleresolver\" .*: cannot retrieve the oci image: unexpected end of JSON input", "Couldnt get task via git resolver from gitlab.cee due to 429": r"Message:.*Couldn't retrieve Task .*resolver type git.*https://gitlab.cee.redhat.com/.* status code: 429", "Couldnt get task via http resolver from gitlab.cee": r"Message:.*Couldn't retrieve Task .*resolver type http.*error getting.*requested URL .*https://gitlab.cee.redhat.com/.* is not found", "Error deleting on-pull-request default PipelineRun": r"Repo-templating workflow component cleanup failed: Error deleting on-pull-request default PipelineRun in namespace .*: Unable to list PipelineRuns for component .* in namespace .*: context deadline exceeded", From d28f3facd83befc49bd932eb085d5c61d3146753 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 25 Jul 2025 10:27:49 +0200 Subject: [PATCH 116/321] feat: New error: Gateway Time-out when pulling container image from quay.io; also tweaked similar error Error from collected-data/jhutar-tenant/1/pod-jhutar-app-ongac-comp-0-on-push-dl9d6-rpmbuild-ppc64le-pod-step-mock-build.log: + mock_img=quay.io/redhat-user-workloads/rhel-on-konflux-tenant/tooling/mock-rhel-container:6ca435b53f4505aa3dc1f031f7912de1f32035f0 + remote_cmd podman pull quay.io/redhat-user-workloads/rhel-on-konflux-tenant/tooling/mock-rhel-container:6ca435b53f4505aa3dc1f031f7912de1f32035f0 + ssh -o StrictHostKeyChecking=no u-05296ef2a5aad16a53d90432a9f7@10.130.75.151 podman pull quay.io/redhat-user-workloads/rhel-on-konflux-tenant/tooling/mock-rhel-container:6ca435b53f4505aa3dc1f031f7912de1f32035f0 Trying to pull quay.io/redhat-user-workloads/rhel-on-konflux-tenant/tooling/mock-rhel-container:6ca435b53f4505aa3dc1f031f7912de1f32035f0... Error: initializing source docker://quay.io/redhat-user-workloads/rhel-on-konflux-tenant/tooling/mock-rhel-container:6ca435b53f4505aa3dc1f031f7912de1f32035f0: reading manifest 6ca435b53f4505aa3dc1f031f7912de1f32035f0 in quay.io/redhat-user-workloads/rhel-on-konflux-tenant/tooling/mock-rhel-container: received unexpected HTTP status: 504 Gateway Time-out --- tests/load-tests/errors.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 058b918d54..da62c1fc8e 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -81,7 +81,7 @@ FAILED_PLR_ERRORS = { "SKIP": r"Skipping step because a previous step failed", # This is a special "wildcard" error, let's keep it on top and do not change "SKIP" reason as it is used in the code - "Bad Gateway when pulling container image": r"Error: initializing source .* reading manifest .* in .* received unexpected HTTP status: 502 Bad Gateway ", + "Bad Gateway when pulling container image from quay.io": r"Error: initializing source docker://quay.io/[^ ]+: reading manifest [^ ]+ in quay.io/[^ ]+: received unexpected HTTP status: 502 Bad Gateway ", "buildah build failed creating build container: registry.access.redhat.com returned 403": r"Error: creating build container: internal error: unable to copy from source docker://registry.access.redhat.com/.*: copying system image from manifest list: determining manifest MIME type for docker://registry.access.redhat.com/.*: reading manifest .* in registry.access.redhat.com/.*: StatusCode: 403", "Can not find chroot_scan.tar.gz file": r"tar: .*/chroot_scan.tar.gz: Cannot open: No such file or directory", "Can not find Dockerfile": r"Cannot find Dockerfile Dockerfile", @@ -93,6 +93,7 @@ "Failed because of quay.io returned 502": r"level=fatal msg=.Error parsing image name .*docker://quay.io/.* Requesting bearer token: invalid status code from registry 502 .Bad Gateway.", "Failed because registry.access.redhat.com returned 503 when reading manifest": r"source-build:ERROR:command execution failure, status: 1, stderr: time=.* level=fatal msg=.Error parsing image name .* reading manifest .* in registry.access.redhat.com/.* received unexpected HTTP status: 503 Service Unavailable", "Failed to pull container from registry.access.redhat.com because of DNS error": r"Error: initializing source docker://registry.access.redhat.com/.* pinging container registry registry.access.redhat.com: Get \"https://registry.access.redhat.com/v2/\": dial tcp: lookup registry.access.redhat.com: Temporary failure in name resolution", + "Gateway Time-out when pulling container image from quay.io": r"Error: initializing source docker://quay.io/[^ ]+: reading manifest [^ ]+ in quay.io/[^ ]+: received unexpected HTTP status: 504 Gateway Time-out", "Gateway Time-out when pulling container image": r"Error: copying system image from manifest list: parsing image configuration: fetching blob: received unexpected HTTP status: 504 Gateway Time-out", "Introspection failed because of incomplete .docker/config.json": r".* level=fatal msg=\"Error parsing image name .*: getting username and password: reading JSON file .*/tekton/home/.docker/config.json.*: unmarshaling JSON at .*: unexpected end of JSON input\"", "RPM build failed: bool cannot be defined via typedef": r"error: .bool. cannot be defined via .typedef..*error: Bad exit status from /var/tmp/rpm-tmp..* ..build.", From c0d5767544fd4e7f084162b1437f183884d4cfcf Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 25 Jul 2025 15:25:31 +0200 Subject: [PATCH 117/321] feat: New error: Failed getting PaC pull number because PaC public route does not exist I0725 09:40:50.990639 4206 logging.go:30] DEBUG Configuring 5 imagePullSecrets for component build task images for component jhutar-app-jppjx-comp-0 I0725 09:40:51.210914 4206 logging.go:30] DEBUG PaC component jhutar-app-jppjx-comp-0 in namespace jhutar-tenant do not have PR yet I0725 09:41:11.224624 4206 logging.go:30] DEBUG PaC component jhutar-app-jppjx-comp-0 in namespace jhutar-tenant do not have PR yet I0725 09:41:31.214269 4206 logging.go:30] DEBUG PaC component jhutar-app-jppjx-comp-0 in namespace jhutar-tenant do not have PR yet I0725 09:41:51.214190 4206 logging.go:64] FAIL(62): Component failed validation: Unable to get PaC pull number for component jhutar-app-jppjx-comp-0 in namespace jhutar-tenant: PaC component jhutar-app-jppjx-comp-0 in namespace jhutar-tenant failed on PR annotation: Incorrect state: {"pac":{"state":"error","error-id":52,"error-message":"52: Pipelines as Code public route does not exist"},"message":"done"} --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index da62c1fc8e..8e9302a6f8 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -34,6 +34,7 @@ "Failed component creation when calling mcomponent.kb.io webhook": r"Component failed creation: Unable to create the Component .*: Internal error occurred: failed calling webhook .*mcomponent.kb.io.*: failed to call webhook: Post .*https://application-service-webhook-service.application-service.svc:443/mutate-appstudio-redhat-com-v1alpha1-component.* no endpoints available for service .*application-service-webhook-service.*", "Failed creating integration test scenario because cannot set blockOwnerDeletion if an ownerReference refers to a resource you can't set finalizers on": r"Integration test scenario failed creation: Unable to create the Integration Test Scenario .* integrationtestscenarios.appstudio.redhat.com .* is forbidden: cannot set blockOwnerDeletion if an ownerReference refers to a resource you can't set finalizers on", "Failed creating integration test scenario because it already exists": r"Integration test scenario failed creation: Unable to create the Integration Test Scenario .* integrationtestscenarios.appstudio.redhat.com .* already exists", + "Failed getting PaC pull number because PaC public route does not exist": r"Component failed validation: Unable to get PaC pull number for component .* in namespace .*: PaC component .* in namespace .* failed on PR annotation: Incorrect state: .*\"error-message\":\"52: Pipelines as Code public route does not exist\"", "Failed Integration test scenario when calling dintegrationtestscenario.kb.io webhook": r"Integration test scenario failed creation: Unable to create the Integration Test Scenario .*: Internal error occurred: failed calling webhook .*dintegrationtestscenario.kb.io.*: failed to call webhook: Post .*https://integration-service-webhook-service.integration-service.svc:443/mutate-appstudio-redhat-com-v1beta2-integrationtestscenario.*: no endpoints available for service .*integration-service-webhook-service", "Failed to link pipeline image pull secret to build service account because SA was not found": r"Failed to configure pipeline imagePullSecrets: Unable to add secret .* to service account .*: serviceaccounts .* not found", "Failed to merge MR on CEE GitLab due to 405": r"Repo-templating workflow component cleanup failed: Merging [0-9]+ failed: [Pp][Uu][Tt] .*https://gitlab.cee.redhat.com/api/.*/merge_requests/[0-9]+/merge.*message: 405 Method Not Allowed", From fb11cf19abc541a4b468a223d7e82251078e00e2 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 25 Jul 2025 16:04:48 +0200 Subject: [PATCH 118/321] fix: Fix wording as we are not creating that SA, just ensuring it exists --- tests/load-tests/errors.py | 2 +- tests/load-tests/pkg/journey/handle_component.go | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 8e9302a6f8..47056fee9f 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -51,7 +51,7 @@ "Repo forking failed because gitlab.com returned 503": r"Repo forking failed: Error checking repository .*: GET https://api.github.com/repos/.*: 503 No server is currently available to service your request. Sorry about that. Please try resubmitting your request and contact us if the problem persists.*", "Repo forking failed when deleting target repo on gitlab.com (not CEE!) due unathorized": r"Repo forking failed: Error deleting project .* DELETE https://gitlab.com/.* 401 .* Unauthorized", "Timeout forking the repo before the actual test": r"Repo forking failed: Error forking project .*: context deadline exceeded", - "Timeout getting build service account": r"Component build SA failed creation: Component build SA .* not created: context deadline exceeded", + "Timeout getting build service account": r"Component build SA not present: Component build SA .* not present: context deadline exceeded", "Timeout getting PaC pull number when validating component": r"Component failed validation: Unable to get PaC pull number for component .* in namespace .*: context deadline exceeded", "Timeout getting pipeline": r"Message:.*resolver failed to get Pipeline.*resolution took longer than global timeout of .*", "Timeout getting task via git resolver from gitlab.cee": r"Message:.*Couldn't retrieve Task .*resolver type git.*https://gitlab.cee.redhat.com/.* resolution took longer than global timeout of .*", diff --git a/tests/load-tests/pkg/journey/handle_component.go b/tests/load-tests/pkg/journey/handle_component.go index d2b1782c1f..d019ddb711 100644 --- a/tests/load-tests/pkg/journey/handle_component.go +++ b/tests/load-tests/pkg/journey/handle_component.go @@ -131,7 +131,7 @@ func validateComponentBuildSA(f *framework.Framework, namespace, name string) er // TODO It would be much better to watch this resource instead querying it err := utils.WaitUntilWithInterval(f.AsKubeDeveloper.CommonController.ServiceAccountPresent(component_sa, namespace), interval, timeout) if err != nil { - return fmt.Errorf("Component build SA %s in namespace %s not created: %v", component_sa, namespace, err) + return fmt.Errorf("Component build SA %s in namespace %s not present: %v", component_sa, namespace, err) } return nil @@ -327,7 +327,7 @@ func HandleComponent(ctx *PerComponentContext) error { ctx.ComponentName, ) if err != nil { - return logging.Logger.Fail(65, "Component build SA failed creation: %v", err) + return logging.Logger.Fail(65, "Component build SA not present: %v", err) } // Configure imagePullSecrets needed for component build task images From b39c23f8997a0cd090164f7b6ce8776fbbe9a4d4 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Mon, 28 Jul 2025 15:48:04 +0200 Subject: [PATCH 119/321] feat(KONFLUX-8988): Also collect release pipelinerun --- tests/load-tests/pkg/journey/handle_collections.go | 12 ++++++++++-- tests/load-tests/pkg/journey/handle_releases_run.go | 11 +++++------ tests/load-tests/pkg/journey/journey.go | 1 + 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/tests/load-tests/pkg/journey/handle_collections.go b/tests/load-tests/pkg/journey/handle_collections.go index a1048a98aa..eacd04d32a 100644 --- a/tests/load-tests/pkg/journey/handle_collections.go +++ b/tests/load-tests/pkg/journey/handle_collections.go @@ -87,12 +87,20 @@ func collectPodLogs(f *framework.Framework, dirPath, namespace, component string return nil } -func collectPipelineRunJSONs(f *framework.Framework, dirPath, namespace, application, component string) error { +func collectPipelineRunJSONs(f *framework.Framework, dirPath, namespace, application, component, release string) error { prs, err := f.AsKubeDeveloper.HasController.GetComponentPipelineRunsWithType(component, application, namespace, "", "", "") if err != nil { return fmt.Errorf("Failed to list PipelineRuns %s/%s/%s: %v", namespace, application, component, err) } + pr_release, err := f.AsKubeDeveloper.ReleaseController.GetPipelineRunInNamespace(namespace, release, namespace) + if err != nil { + return fmt.Errorf("Failed to get Release PipelineRun %s/%s: %v", namespace, release, err) + } + + // Make one list that contains them all + *prs = append(*prs, *pr_release) + for _, pr := range *prs { prJSON, err := json.Marshal(pr) if err != nil { @@ -194,7 +202,7 @@ func HandlePerComponentCollection(ctx *PerComponentContext) error { return logging.Logger.Fail(101, "Failed to collect pod logs: %v", err) } - err = collectPipelineRunJSONs(ctx.Framework, dirPath, ctx.ParentContext.ParentContext.Namespace, ctx.ParentContext.ApplicationName, ctx.ComponentName) + err = collectPipelineRunJSONs(ctx.Framework, dirPath, ctx.ParentContext.ParentContext.Namespace, ctx.ParentContext.ApplicationName, ctx.ComponentName, ctx.ReleaseName) if err != nil { return logging.Logger.Fail(102, "Failed to collect pipeline run JSONs: %v", err) } diff --git a/tests/load-tests/pkg/journey/handle_releases_run.go b/tests/load-tests/pkg/journey/handle_releases_run.go index a5f98bbd72..c1d155ae78 100644 --- a/tests/load-tests/pkg/journey/handle_releases_run.go +++ b/tests/load-tests/pkg/journey/handle_releases_run.go @@ -139,7 +139,6 @@ func HandleReleaseRun(ctx *PerComponentContext) error { return nil } - var releaseName string var iface interface{} var ok bool var err error @@ -154,7 +153,7 @@ func HandleReleaseRun(ctx *PerComponentContext) error { return logging.Logger.Fail(90, "Release failed creation: %v", err) } - releaseName, ok = iface.(string) + ctx.ReleaseName, ok = iface.(string) if !ok { return logging.Logger.Fail(91, "Type assertion failed on release name: %+v", iface) } @@ -163,7 +162,7 @@ func HandleReleaseRun(ctx *PerComponentContext) error { validateReleasePipelineRunCreation, ctx.Framework, ctx.ParentContext.ParentContext.Namespace, - releaseName, + ctx.ReleaseName, ) if err != nil { return logging.Logger.Fail(92, "Release pipeline run failed creation: %v", err) @@ -173,7 +172,7 @@ func HandleReleaseRun(ctx *PerComponentContext) error { validateReleasePipelineRunCondition, ctx.Framework, ctx.ParentContext.ParentContext.Namespace, - releaseName, + ctx.ReleaseName, ) if err != nil { return logging.Logger.Fail(93, "Release pipeline run failed: %v", err) @@ -183,13 +182,13 @@ func HandleReleaseRun(ctx *PerComponentContext) error { validateReleaseCondition, ctx.Framework, ctx.ParentContext.ParentContext.Namespace, - releaseName, + ctx.ReleaseName, ) if err != nil { return logging.Logger.Fail(94, "Release failed: %v", err) } - logging.Logger.Info("Release %s in namespace %s succeeded", releaseName, ctx.ParentContext.ParentContext.Namespace) + logging.Logger.Info("Release %s in namespace %s succeeded", ctx.ReleaseName, ctx.ParentContext.ParentContext.Namespace) return nil } diff --git a/tests/load-tests/pkg/journey/journey.go b/tests/load-tests/pkg/journey/journey.go index 288ac24bd5..41ca2ffac0 100644 --- a/tests/load-tests/pkg/journey/journey.go +++ b/tests/load-tests/pkg/journey/journey.go @@ -150,6 +150,7 @@ type PerComponentContext struct { ComponentName string SnapshotName string MergeRequestNumber int + ReleaseName string } // Start all the threads to process all components per application From ef056113368db9a6628134d65f1e2243ffd6c7ff Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Mon, 28 Jul 2025 15:48:51 +0200 Subject: [PATCH 120/321] fix: Make error code unique --- tests/load-tests/pkg/journey/handle_collections.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/load-tests/pkg/journey/handle_collections.go b/tests/load-tests/pkg/journey/handle_collections.go index eacd04d32a..030ed56d82 100644 --- a/tests/load-tests/pkg/journey/handle_collections.go +++ b/tests/load-tests/pkg/journey/handle_collections.go @@ -209,7 +209,7 @@ func HandlePerComponentCollection(ctx *PerComponentContext) error { err = collectApplicationComponentJSONs(ctx.Framework, dirPath, ctx.ParentContext.ParentContext.Namespace, ctx.ParentContext.ApplicationName, ctx.ComponentName) if err != nil { - return logging.Logger.Fail(102, "Failed to collect Application and Component JSONs: %v", err) + return logging.Logger.Fail(103, "Failed to collect Application and Component JSONs: %v", err) } return nil From adc7620caed1b6aa2442a14aad41365c5f8b1bd9 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Mon, 28 Jul 2025 16:13:15 +0200 Subject: [PATCH 121/321] feat: Distinguilsh when different pipeline runs fails --- tests/load-tests/errors.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 47056fee9f..c72a5b20be 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -41,15 +41,17 @@ "Failed to merge MR on CEE GitLab due to DNS error": r"Repo-templating workflow component cleanup failed: Merging [0-9]+ failed: [Pp][Uu][Tt] .*https://gitlab.cee.redhat.com/api/.*/merge_requests/[0-9]+/merge.*Temporary failure in name resolution", "Failed validating release condition": r"Release .* in namespace .* failed: .*Message:Release validation failed.*", "GitLab token used by test expired": r"Repo forking failed: Error deleting project .*: DELETE https://gitlab.cee.redhat.com/.*: 401 .*error: invalid_token.*error_description: Token is expired. You can either do re-authorization or token refresh", - "Pipeline failed": r"Message:Tasks Completed: [0-9]+ \(Failed: [1-9]+,", + "Pipeline failed": r"Build Pipeline Run failed run:.*Message:Tasks Completed: [0-9]+ \(Failed: [1-9]+,", "Post-test data collection failed": r"Failed to collect pipeline run JSONs", "Release failed in progress without error given": r"Release failed: Release .* in namespace .* failed: .Type:Released Status:False .* Reason:Progressing Message:.$", "Release failure: PipelineRun not created": r"couldn't find PipelineRun in managed namespace '%s' for a release '%s' in '%s' namespace", + "Release Pipeline failed": r"Release pipeline run failed:.*Message:Tasks Completed: [0-9]+ \(Failed: [1-9]+,", "Repo forking failed as GitLab CEE says 401 Unauthorized": r"Repo forking failed: Error deleting project .*: DELETE https://gitlab.cee.redhat.com/.*: 401 .*message: 401 Unauthorized.*", "Repo forking failed as the target is still being deleted": r"Repo forking failed: Error forking project .* POST https://gitlab.cee.redhat.com.* 409 .*Project namespace name has already been taken, The project is still being deleted", "Repo forking failed as we got TLS handshake timeout talking to GitLab CEE": r"Repo forking failed: Error deleting project .*: Delete \"https://gitlab.cee.redhat.com/api/v4/projects/.*\": net/http: TLS handshake timeout", "Repo forking failed because gitlab.com returned 503": r"Repo forking failed: Error checking repository .*: GET https://api.github.com/repos/.*: 503 No server is currently available to service your request. Sorry about that. Please try resubmitting your request and contact us if the problem persists.*", "Repo forking failed when deleting target repo on gitlab.com (not CEE!) due unathorized": r"Repo forking failed: Error deleting project .* DELETE https://gitlab.com/.* 401 .* Unauthorized", + "Test Pipeline failed": r"Test Pipeline Run failed run:.*Message:Tasks Completed: [0-9]+ \(Failed: [1-9]+,", "Timeout forking the repo before the actual test": r"Repo forking failed: Error forking project .*: context deadline exceeded", "Timeout getting build service account": r"Component build SA not present: Component build SA .* not present: context deadline exceeded", "Timeout getting PaC pull number when validating component": r"Component failed validation: Unable to get PaC pull number for component .* in namespace .*: context deadline exceeded", From fe5aae690bca22b58be7b86801d8dc18d404d8f5 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Mon, 28 Jul 2025 17:02:16 +0200 Subject: [PATCH 122/321] fix: Do not exit collecting PLRs if there is not release PLR --- tests/load-tests/pkg/journey/handle_collections.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/load-tests/pkg/journey/handle_collections.go b/tests/load-tests/pkg/journey/handle_collections.go index 030ed56d82..753e256f50 100644 --- a/tests/load-tests/pkg/journey/handle_collections.go +++ b/tests/load-tests/pkg/journey/handle_collections.go @@ -95,7 +95,7 @@ func collectPipelineRunJSONs(f *framework.Framework, dirPath, namespace, applica pr_release, err := f.AsKubeDeveloper.ReleaseController.GetPipelineRunInNamespace(namespace, release, namespace) if err != nil { - return fmt.Errorf("Failed to get Release PipelineRun %s/%s: %v", namespace, release, err) + logging.Logger.Warning("Failed to get Release PipelineRun %s/%s: %v", namespace, release, err) } // Make one list that contains them all From a57685a5b39580e8791b6bdcccee00c857defebc Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Tue, 29 Jul 2025 06:37:35 +0200 Subject: [PATCH 123/321] feat: Do not append pr_release if none was found --- tests/load-tests/pkg/journey/handle_collections.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/load-tests/pkg/journey/handle_collections.go b/tests/load-tests/pkg/journey/handle_collections.go index 753e256f50..0d55fb8b0f 100644 --- a/tests/load-tests/pkg/journey/handle_collections.go +++ b/tests/load-tests/pkg/journey/handle_collections.go @@ -99,7 +99,9 @@ func collectPipelineRunJSONs(f *framework.Framework, dirPath, namespace, applica } // Make one list that contains them all - *prs = append(*prs, *pr_release) + if pr_release != nil { + *prs = append(*prs, *pr_release) + } for _, pr := range *prs { prJSON, err := json.Marshal(pr) From e44fb616ef538efbf82ebe590e039b018a0cdc1f Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Tue, 29 Jul 2025 07:14:53 +0200 Subject: [PATCH 124/321] feat: New error: Repo forking failed when deleting target repo on github.com because 504 F0729 00:44:41.656337 4365 logging.go:58] FATAL Threads setup failed: FAIL(80): Repo forking failed: Error deleting repository rhtap-perf-test/libecpg-srcfedora-fork-undef-jhutar-1: DELETE https://api.github.com/repos/rhtap-perf-test/libecpg-srcfedora-fork-undef-jhutar-1: 504 We couldn't respond to your request in time. Sorry about that. Please try resubmitting your request and contact us if the problem persists. [] --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index c72a5b20be..e614f9f317 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -50,6 +50,7 @@ "Repo forking failed as the target is still being deleted": r"Repo forking failed: Error forking project .* POST https://gitlab.cee.redhat.com.* 409 .*Project namespace name has already been taken, The project is still being deleted", "Repo forking failed as we got TLS handshake timeout talking to GitLab CEE": r"Repo forking failed: Error deleting project .*: Delete \"https://gitlab.cee.redhat.com/api/v4/projects/.*\": net/http: TLS handshake timeout", "Repo forking failed because gitlab.com returned 503": r"Repo forking failed: Error checking repository .*: GET https://api.github.com/repos/.*: 503 No server is currently available to service your request. Sorry about that. Please try resubmitting your request and contact us if the problem persists.*", + "Repo forking failed when deleting target repo on github.com because 504": r"Repo forking failed: Error deleting repository .*: DELETE https://api.github.com/repos/.*: 504 We couldn't respond to your request in time. Sorry about that. Please try resubmitting your request and contact us if the problem persists.", "Repo forking failed when deleting target repo on gitlab.com (not CEE!) due unathorized": r"Repo forking failed: Error deleting project .* DELETE https://gitlab.com/.* 401 .* Unauthorized", "Test Pipeline failed": r"Test Pipeline Run failed run:.*Message:Tasks Completed: [0-9]+ \(Failed: [1-9]+,", "Timeout forking the repo before the actual test": r"Repo forking failed: Error forking project .*: context deadline exceeded", From a673ff9420e5e1ec06e2e4408846ec739160b750 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 30 Jul 2025 08:04:47 +0200 Subject: [PATCH 125/321] fix: Use right (?) exception when catching YAML parsing error --- tests/load-tests/errors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index e614f9f317..e763d8856b 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -136,7 +136,7 @@ def load(datafile): try: with open(datafile, "r") as fd: data = yaml.safe_load(fd) - except json.decoder.JSONDecodeError: + except yaml.scanner.ScannerError: raise Exception(f"File {datafile} is malfrmed YAML, skipping it") elif datafile.endswith(".json"): try: From 6b7c5690c57146a5653ad5ae0c88468e8036093e Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 30 Jul 2025 08:28:02 +0200 Subject: [PATCH 126/321] feat: New error and functionality: Sometimes PLR fails because TR fails to create pod E.g. we had a PipelineRun like this: "status": { "conditions": [ { "type": "Succeeded", "status": "False", "lastTransitionTime": "2025-07-29T14:21:38Z", "reason": "Failed", "message": "Tasks Completed: 18 (Failed: 2, Cancelled 0), Skipped: 2" } ], "startTime": "2025-07-29T14:18:17Z", "completionTime": "2025-07-29T14:21:38Z", and that was caused by two TaskRuns like this: "status": { "conditions": [ { "type": "Succeeded", "status": "False", "lastTransitionTime": "2025-07-29T14:20:51Z", "reason": "PodCreationFailed", "message": "failed to create task run pod \"jhue05939bba6498044b9e123b65b3893c5-coverity-availability-check\": Internal error occurred: failed calling webhook \"proxy.operator.tekton.dev\": failed to call webhook: Post \"https://tekton-operator-proxy-webhook.openshift-pipelines.svc:443/defaulting?timeout=10s\": context deadline exceeded. Maybe missing or invalid Task jhutar-1-tenant/" } ], "podName": "", "startTime": "2025-07-29T14:20:29Z", "completionTime": "2025-07-29T14:20:51Z", --- tests/load-tests/errors.py | 38 +++++++++++++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index e763d8856b..3b885aff10 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -103,6 +103,10 @@ "RPM build failed: bool cannot be defined via typedef": r"error: .bool. cannot be defined via .typedef..*error: Bad exit status from /var/tmp/rpm-tmp..* ..build.", } +FAILED_TR_ERRORS = { + "Missing expected fields in TaskRun": r"Missing expected fields in TaskRun", # This is special error, meaning everithing failed basically + "Failed to create task run pod because ISE on webhook proxy.operator.tekton.dev": r"failed to create task run pod .*: Internal error occurred: failed calling webhook \"proxy.operator.tekton.dev\": failed to call webhook: Post \"https://tekton-operator-proxy-webhook.openshift-pipelines.svc:443/defaulting.timeout=10s\": context deadline exceeded. Maybe missing or invalid Task .*", +} def message_to_reason(reasons_and_errors: dict, msg: str) -> str | None: """ @@ -200,6 +204,24 @@ def find_trs(plr): except KeyError: return +def check_failed_taskrun(data_dir, ns, tr_name): + datafile = os.path.join(data_dir, ns, "1", "collected-taskrun-" + tr_name + ".json") + data = load(datafile) + + try: + pod_name = data["status"]["podName"] + for condition in data["status"]["conditions"]: + if condition["type"] == "Succeeded": + break + except KeyError: + return False, "Missing expected fields in TaskRun" + else: + if pod_name == "": + return False, json.dumps(condition, sort_keys=True) + else: + return True, None + + def find_failed_containers(data_dir, ns, tr_name): datafile = os.path.join(data_dir, ns, "1", "collected-taskrun-" + tr_name + ".json") data = load(datafile) @@ -229,13 +251,19 @@ def investigate_failed_plr(dump_dir): plr_ns = plr["metadata"]["namespace"] for tr_name in find_trs(plr): - for pod_name, cont_name in find_failed_containers(dump_dir, plr_ns, tr_name): - log_lines = load_container_log(dump_dir, plr_ns, pod_name, cont_name) - reason = message_to_reason(FAILED_PLR_ERRORS, log_lines) + tr_ok, tr_message = check_failed_taskrun(dump_dir, plr_ns, tr_name) - if reason == "SKIP": - continue + if tr_ok: + for pod_name, cont_name in find_failed_containers(dump_dir, plr_ns, tr_name): + log_lines = load_container_log(dump_dir, plr_ns, pod_name, cont_name) + reason = message_to_reason(FAILED_PLR_ERRORS, log_lines) + + if reason == "SKIP": + continue + reasons.append(reason) + else: + reason = message_to_reason(FAILED_TR_ERRORS, tr_message) reasons.append(reason) reasons = list(set(reasons)) # get unique reasons only From c85a2135c1e8a4a38e61b3798efbd6f085a19215 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 30 Jul 2025 09:01:42 +0200 Subject: [PATCH 127/321] feat: New error and functionality: Capture another fail that might be missing pod log We got this in the collected task run: "status": { "conditions": [ { "type": "Succeeded", "status": "False", "lastTransitionTime": "2025-07-29T11:42:40Z", "reason": "TaskRunImagePullFailed", "message": "the step \"oci-attach-report\" in TaskRun \"jhutar-app-floao-comp-0-on-push-2tw8q-clair-scan\" failed to pull the image \"\". The pod errored with the message: \"Back-off pulling image \"quay.io/konflux-ci/oras:latest@sha256:1beeecce012c99794568f74265c065839f9703d28306a8430b667f639343a98b\".\"" } ], "podName": "jhutar-app-floao-comp-0-on-push-2tw8q-clair-scan-pod", "startTime": "2025-07-29T11:42:35Z", "completionTime": "2025-07-29T11:42:40Z", but pod log was not collected - probably because the pod never actually run as we were not able to pull it. --- tests/load-tests/errors.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 3b885aff10..bdc8b50228 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -105,6 +105,9 @@ FAILED_TR_ERRORS = { "Missing expected fields in TaskRun": r"Missing expected fields in TaskRun", # This is special error, meaning everithing failed basically + "SKIP": r"\"message\": \"All Steps have completed executing\"", # Another special error to avoid printing 'Unknown error:' message + "SKIP": r"\"message\": \".* exited with code 1\"", # Another special error to avoid printing 'Unknown error:' message + "Back-off pulling task run image from quay.io": r"the step .* in TaskRun .* failed to pull the image .*. The pod errored with the message: \"Back-off pulling image \"quay.io/.*\"", "Failed to create task run pod because ISE on webhook proxy.operator.tekton.dev": r"failed to create task run pod .*: Internal error occurred: failed calling webhook \"proxy.operator.tekton.dev\": failed to call webhook: Post \"https://tekton-operator-proxy-webhook.openshift-pipelines.svc:443/defaulting.timeout=10s\": context deadline exceeded. Maybe missing or invalid Task .*", } @@ -219,7 +222,7 @@ def check_failed_taskrun(data_dir, ns, tr_name): if pod_name == "": return False, json.dumps(condition, sort_keys=True) else: - return True, None + return True, json.dumps(condition, sort_keys=True) def find_failed_containers(data_dir, ns, tr_name): @@ -262,9 +265,9 @@ def investigate_failed_plr(dump_dir): continue reasons.append(reason) - else: - reason = message_to_reason(FAILED_TR_ERRORS, tr_message) - reasons.append(reason) + + reason = message_to_reason(FAILED_TR_ERRORS, tr_message) + reasons.append(reason) reasons = list(set(reasons)) # get unique reasons only reasons.sort() # sort reasons From 90c74797c49d9a03d315d8485b7f25522e75fb42 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 30 Jul 2025 09:09:07 +0200 Subject: [PATCH 128/321] feat: New error: Failed to pull container from quay.io because of DNS error Error from collected-data/jhutar-tenant/1/pod-jhutar-app-budtn-comp-0-on-push-v9vd5-build-container-pod-step-build.log: [2025-07-29T09:51:34,311323006+00:00] Update CA trust INFO: Using mounted CA bundle: /mnt/trusted-ca/ca-bundle.crt '/mnt/trusted-ca/ca-bundle.crt' -> '/etc/pki/ca-trust/source/anchors/ca-bundle.crt' [2025-07-29T09:51:35,240514976+00:00] Prepare Dockerfile Checking if /var/workdir/cachi2/output/bom.json exists. Preparing construction of content-sets.json to be placed at /usr/share/buildinfo/content-sets.json in the image Constructed the following: { "metadata": { "icm_version": 1, "icm_spec": "https://raw.githubusercontent.com/containerbuildsystem/atomic-reactor/master/atomic_reactor/schemas/content_manifest.json", "image_layer_index": 0 }, "from_dnf_hint": true, "content_sets": [] } Appending a COPY command to the Containerfile [2025-07-29T09:51:35,253106176+00:00] Prepare system (architecture: x86_64) Trying to pull registry.access.redhat.com/ubi10/nodejs-22:latest... Getting image source signatures Checking if image destination supports signatures Copying blob sha256:42baebe7ae640b1e854d36457ba223320cdbc5c32b13a19c112ef447d220fc5d Copying blob sha256:addf8199744d205db3fbc9d2362711681eb7ce637199676287dc04a9f2f2d1c4 Copying blob sha256:64971d9503824c80069530cf5881bced3c2896533a1786c4b8b93354d0a0b572 Copying config sha256:52dde4223889b08bbf5ce98b536803bc57cb145455d4c6603417e79e58adf475 Writing manifest to image destination Storing signatures 52dde4223889b08bbf5ce98b536803bc57cb145455d4c6603417e79e58adf475 Trying to pull registry.access.redhat.com/ubi10/nodejs-22-minimal:latest... Getting image source signatures Checking if image destination supports signatures Copying blob sha256:5422f45747c67d8d7ff17fd6f2c2fff2b472050062875c176537fe85486fa542 Copying blob sha256:8842ba7bc2df44f86cd5c7be359d4db31b54752212e79736f6abda71717fa1f4 Error: copying system image from manifest list: reading blob sha256:8842ba7bc2df44f86cd5c7be359d4db31b54752212e79736f6abda71717fa1f4: Get "https://cdn01.quay.io/quayio-production-s3/sha256/88/8842ba7bc2df44f86cd5c7be359d4db31b54752212e79736f6abda71717fa1f4?X-Amz-Algorithm=...&X-Amz-Credential=...&X-Amz-Date=20250729T095147Z&X-Amz-Expires=600&X-Amz-SignedHeaders=host&X-Amz-Signature=...®ion=us-east-1&namespace=redhat-prod&username=redhat-prod+registry_proxy&repo_name=ubi10----nodejs-22-minimal&akamai_signature=...": dial tcp: lookup cdn01.quay.io: Temporary failure in name resolution --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index bdc8b50228..55582d6fca 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -96,6 +96,7 @@ "Error allocating host because of provisioning error": r"Error allocating host: failed to provision host", "Failed because of quay.io returned 502": r"level=fatal msg=.Error parsing image name .*docker://quay.io/.* Requesting bearer token: invalid status code from registry 502 .Bad Gateway.", "Failed because registry.access.redhat.com returned 503 when reading manifest": r"source-build:ERROR:command execution failure, status: 1, stderr: time=.* level=fatal msg=.Error parsing image name .* reading manifest .* in registry.access.redhat.com/.* received unexpected HTTP status: 503 Service Unavailable", + "Failed to pull container from quay.io because of DNS error": r"Error: copying system image from manifest list: reading blob .*: Get \"https://cdn[0-9]+.quay.io/.*\": dial tcp: lookup cdn[0-9]+.quay.io: Temporary failure in name resolution", "Failed to pull container from registry.access.redhat.com because of DNS error": r"Error: initializing source docker://registry.access.redhat.com/.* pinging container registry registry.access.redhat.com: Get \"https://registry.access.redhat.com/v2/\": dial tcp: lookup registry.access.redhat.com: Temporary failure in name resolution", "Gateway Time-out when pulling container image from quay.io": r"Error: initializing source docker://quay.io/[^ ]+: reading manifest [^ ]+ in quay.io/[^ ]+: received unexpected HTTP status: 504 Gateway Time-out", "Gateway Time-out when pulling container image": r"Error: copying system image from manifest list: parsing image configuration: fetching blob: received unexpected HTTP status: 504 Gateway Time-out", From 0ab58a7b2a9343b53d238051463e520aa1b7b55b Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 30 Jul 2025 09:49:48 +0200 Subject: [PATCH 129/321] feat: Also closely investigate release PLRs --- tests/load-tests/errors.py | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 55582d6fca..4bc874ff0a 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -124,6 +124,7 @@ def message_to_reason(reasons_and_errors: dict, msg: str) -> str | None: """ msg = msg.replace("\n", " ") # Remove newlines for error_name, pattern in reasons_and_errors.items(): + if error_name == "SKIP": if re.search(pattern, msg): return error_name print(f"Unknown error: {msg}") @@ -158,7 +159,7 @@ def load(datafile): return data -def find_first_failed_build_plr(data_dir): +def find_first_failed_build_plr(data_dir, plr_type): """ This function is intended for jobs where we only run one concurrent builds, so no more than one can failed: our load test probes. @@ -179,9 +180,16 @@ def find_first_failed_build_plr(data_dir): datafile = os.path.join(currentpath, datafile) data = load(datafile) - # Skip PLRs that are not "build" PLRs + if plr_type == "build": + plr_type_label = "build" + elif plr_type == "release": + plr_type_label = "managed" + else: + raise Exception("Unknown PLR type") + + # Skip PLRs that do not have expected type try: - if data["metadata"]["labels"]["pipelines.appstudio.openshift.io/type"] != "build": + if data["metadata"]["labels"]["pipelines.appstudio.openshift.io/type"] != plr_type_label: continue except KeyError: continue @@ -244,11 +252,11 @@ def load_container_log(data_dir, ns, pod_name, cont_name): with open(datafile, "r") as fd: return fd.read() -def investigate_failed_plr(dump_dir): +def investigate_failed_plr(dump_dir, plr_type="build"): try: reasons = [] - plr = find_first_failed_build_plr(dump_dir) + plr = find_first_failed_build_plr(dump_dir, plr_type) if plr == None: return ["SORRY PLR not found"] @@ -268,7 +276,8 @@ def investigate_failed_plr(dump_dir): reasons.append(reason) reason = message_to_reason(FAILED_TR_ERRORS, tr_message) - reasons.append(reason) + if reason != "SKIP": + reasons.append(reason) reasons = list(set(reasons)) # get unique reasons only reasons.sort() # sort reasons @@ -306,7 +315,11 @@ def main(): reason = message_to_reason(ERRORS, message) if reason == "Pipeline failed": - reasons2 = investigate_failed_plr(dump_dir) + reasons2 = investigate_failed_plr(dump_dir, "build") + reason = reason + ": " + ", ".join(reasons2) + + if reason == "Release Pipeline failed": + reasons2 = investigate_failed_plr(dump_dir, "release") reason = reason + ": " + ", ".join(reasons2) add_reason(error_messages, error_by_code, error_by_reason, message, reason, code) From 2d875c3173cc6aa0810232b246cd7d2641340734 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 30 Jul 2025 09:58:56 +0200 Subject: [PATCH 130/321] fix: Use sets here as in dicts we can not have two values with same key --- tests/load-tests/errors.py | 149 ++++++++++++++++++------------------- 1 file changed, 74 insertions(+), 75 deletions(-) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 4bc874ff0a..ae0cee265d 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -17,47 +17,47 @@ # Errors patterns we recognize (when newlines were removed) ERRORS = { - "Application creation failed because of TLS handshake timeout": r"Application failed creation: Unable to create the Application .*: failed to get API group resources: unable to retrieve the complete list of server APIs: appstudio.redhat.com/v1alpha1: Get .*: net/http: TLS handshake timeout", - "Application creation timed out waiting for quota evaluation": r"Application failed creation: Unable to create the Application .*: Internal error occurred: resource quota evaluation timed out", - "Build Pipeline Run was cancelled" : r"Build Pipeline Run failed run: PipelineRun for component .* in namespace .* failed: .* Reason:Cancelled.*Message:PipelineRun .* was cancelled", - "Component creation timed out waiting for image-controller annotations": r"Component failed creation: Unable to create the Component .* timed out when waiting for image-controller annotations to be updated on component", - "Couldnt get pipeline via bundles resolver from quay.io due to 429": r"Message:Error retrieving pipeline for pipelinerun .*bundleresolver.* cannot retrieve the oci image: GET https://quay.io/v2/.*unexpected status code 429 Too Many Requests", - "Couldnt get pipeline via git resolver from gitlab.cee due to 429": r"Message:.*resolver failed to get Pipeline.*error requesting remote resource.*Git.*https://gitlab.cee.redhat.com/.* status code: 429", - "Couldnt get pipeline via http resolver from gitlab.cee": r"Message:.*resolver failed to get Pipeline.*error requesting remote resource.*Http.*https://gitlab.cee.redhat.com/.* is not found", - "Couldnt get task via buldles resolver from quay.io due to 429": r"Message:.*Couldn't retrieve Task .*resolver type bundles.*https://quay.io/.* status code 429 Too Many Requests", - "Couldnt get task via bundles resolver from quay.io due to unexpected end of JSON input": r"Build Pipeline Run failed run: PipelineRun for component .* in namespace .* failed: .* Reason:CouldntGetTask Message:Pipeline .* can't be Run; it contains Tasks that don't exist: Couldn't retrieve Task .resolver type bundles.*name = .*: error requesting remote resource: error getting \"bundleresolver\" .*: cannot retrieve the oci image: unexpected end of JSON input", - "Couldnt get task via git resolver from gitlab.cee due to 429": r"Message:.*Couldn't retrieve Task .*resolver type git.*https://gitlab.cee.redhat.com/.* status code: 429", - "Couldnt get task via http resolver from gitlab.cee": r"Message:.*Couldn't retrieve Task .*resolver type http.*error getting.*requested URL .*https://gitlab.cee.redhat.com/.* is not found", - "Error deleting on-pull-request default PipelineRun": r"Repo-templating workflow component cleanup failed: Error deleting on-pull-request default PipelineRun in namespace .*: Unable to list PipelineRuns for component .* in namespace .*: context deadline exceeded", - "Failed application creation when calling mapplication.kb.io webhook": r"Application failed creation: Unable to create the Application .*: Internal error occurred: failed calling webhook .*mapplication.kb.io.*: failed to call webhook: Post .*https://application-service-webhook-service.application-service.svc:443/mutate-appstudio-redhat-com-v1alpha1-application.* no endpoints available for service .*application-service-webhook-service", - "Failed component creation because resource quota evaluation timed out": r"Component failed creation: Unable to create the Component .*: Internal error occurred: resource quota evaluation timed out", - "Failed component creation when calling mcomponent.kb.io webhook": r"Component failed creation: Unable to create the Component .*: Internal error occurred: failed calling webhook .*mcomponent.kb.io.*: failed to call webhook: Post .*https://application-service-webhook-service.application-service.svc:443/mutate-appstudio-redhat-com-v1alpha1-component.* no endpoints available for service .*application-service-webhook-service.*", - "Failed creating integration test scenario because cannot set blockOwnerDeletion if an ownerReference refers to a resource you can't set finalizers on": r"Integration test scenario failed creation: Unable to create the Integration Test Scenario .* integrationtestscenarios.appstudio.redhat.com .* is forbidden: cannot set blockOwnerDeletion if an ownerReference refers to a resource you can't set finalizers on", - "Failed creating integration test scenario because it already exists": r"Integration test scenario failed creation: Unable to create the Integration Test Scenario .* integrationtestscenarios.appstudio.redhat.com .* already exists", - "Failed getting PaC pull number because PaC public route does not exist": r"Component failed validation: Unable to get PaC pull number for component .* in namespace .*: PaC component .* in namespace .* failed on PR annotation: Incorrect state: .*\"error-message\":\"52: Pipelines as Code public route does not exist\"", - "Failed Integration test scenario when calling dintegrationtestscenario.kb.io webhook": r"Integration test scenario failed creation: Unable to create the Integration Test Scenario .*: Internal error occurred: failed calling webhook .*dintegrationtestscenario.kb.io.*: failed to call webhook: Post .*https://integration-service-webhook-service.integration-service.svc:443/mutate-appstudio-redhat-com-v1beta2-integrationtestscenario.*: no endpoints available for service .*integration-service-webhook-service", - "Failed to link pipeline image pull secret to build service account because SA was not found": r"Failed to configure pipeline imagePullSecrets: Unable to add secret .* to service account .*: serviceaccounts .* not found", - "Failed to merge MR on CEE GitLab due to 405": r"Repo-templating workflow component cleanup failed: Merging [0-9]+ failed: [Pp][Uu][Tt] .*https://gitlab.cee.redhat.com/api/.*/merge_requests/[0-9]+/merge.*message: 405 Method Not Allowed", - "Failed to merge MR on CEE GitLab due to DNS error": r"Repo-templating workflow component cleanup failed: Merging [0-9]+ failed: [Pp][Uu][Tt] .*https://gitlab.cee.redhat.com/api/.*/merge_requests/[0-9]+/merge.*Temporary failure in name resolution", - "Failed validating release condition": r"Release .* in namespace .* failed: .*Message:Release validation failed.*", - "GitLab token used by test expired": r"Repo forking failed: Error deleting project .*: DELETE https://gitlab.cee.redhat.com/.*: 401 .*error: invalid_token.*error_description: Token is expired. You can either do re-authorization or token refresh", - "Pipeline failed": r"Build Pipeline Run failed run:.*Message:Tasks Completed: [0-9]+ \(Failed: [1-9]+,", - "Post-test data collection failed": r"Failed to collect pipeline run JSONs", - "Release failed in progress without error given": r"Release failed: Release .* in namespace .* failed: .Type:Released Status:False .* Reason:Progressing Message:.$", - "Release failure: PipelineRun not created": r"couldn't find PipelineRun in managed namespace '%s' for a release '%s' in '%s' namespace", - "Release Pipeline failed": r"Release pipeline run failed:.*Message:Tasks Completed: [0-9]+ \(Failed: [1-9]+,", - "Repo forking failed as GitLab CEE says 401 Unauthorized": r"Repo forking failed: Error deleting project .*: DELETE https://gitlab.cee.redhat.com/.*: 401 .*message: 401 Unauthorized.*", - "Repo forking failed as the target is still being deleted": r"Repo forking failed: Error forking project .* POST https://gitlab.cee.redhat.com.* 409 .*Project namespace name has already been taken, The project is still being deleted", - "Repo forking failed as we got TLS handshake timeout talking to GitLab CEE": r"Repo forking failed: Error deleting project .*: Delete \"https://gitlab.cee.redhat.com/api/v4/projects/.*\": net/http: TLS handshake timeout", - "Repo forking failed because gitlab.com returned 503": r"Repo forking failed: Error checking repository .*: GET https://api.github.com/repos/.*: 503 No server is currently available to service your request. Sorry about that. Please try resubmitting your request and contact us if the problem persists.*", - "Repo forking failed when deleting target repo on github.com because 504": r"Repo forking failed: Error deleting repository .*: DELETE https://api.github.com/repos/.*: 504 We couldn't respond to your request in time. Sorry about that. Please try resubmitting your request and contact us if the problem persists.", - "Repo forking failed when deleting target repo on gitlab.com (not CEE!) due unathorized": r"Repo forking failed: Error deleting project .* DELETE https://gitlab.com/.* 401 .* Unauthorized", - "Test Pipeline failed": r"Test Pipeline Run failed run:.*Message:Tasks Completed: [0-9]+ \(Failed: [1-9]+,", - "Timeout forking the repo before the actual test": r"Repo forking failed: Error forking project .*: context deadline exceeded", - "Timeout getting build service account": r"Component build SA not present: Component build SA .* not present: context deadline exceeded", - "Timeout getting PaC pull number when validating component": r"Component failed validation: Unable to get PaC pull number for component .* in namespace .*: context deadline exceeded", - "Timeout getting pipeline": r"Message:.*resolver failed to get Pipeline.*resolution took longer than global timeout of .*", - "Timeout getting task via git resolver from gitlab.cee": r"Message:.*Couldn't retrieve Task .*resolver type git.*https://gitlab.cee.redhat.com/.* resolution took longer than global timeout of .*", + ("Application creation failed because of TLS handshake timeout", r"Application failed creation: Unable to create the Application .*: failed to get API group resources: unable to retrieve the complete list of server APIs: appstudio.redhat.com/v1alpha1: Get .*: net/http: TLS handshake timeout"), + ("Application creation timed out waiting for quota evaluation", r"Application failed creation: Unable to create the Application .*: Internal error occurred: resource quota evaluation timed out"), + ("Build Pipeline Run was cancelled", r"Build Pipeline Run failed run: PipelineRun for component .* in namespace .* failed: .* Reason:Cancelled.*Message:PipelineRun .* was cancelled"), + ("Component creation timed out waiting for image-controller annotations", r"Component failed creation: Unable to create the Component .* timed out when waiting for image-controller annotations to be updated on component"), + ("Couldnt get pipeline via bundles resolver from quay.io due to 429", r"Message:Error retrieving pipeline for pipelinerun .*bundleresolver.* cannot retrieve the oci image: GET https://quay.io/v2/.*unexpected status code 429 Too Many Requests"), + ("Couldnt get pipeline via git resolver from gitlab.cee due to 429", r"Message:.*resolver failed to get Pipeline.*error requesting remote resource.*Git.*https://gitlab.cee.redhat.com/.* status code: 429"), + ("Couldnt get pipeline via http resolver from gitlab.cee", r"Message:.*resolver failed to get Pipeline.*error requesting remote resource.*Http.*https://gitlab.cee.redhat.com/.* is not found"), + ("Couldnt get task via buldles resolver from quay.io due to 429", r"Message:.*Couldn't retrieve Task .*resolver type bundles.*https://quay.io/.* status code 429 Too Many Requests"), + ("Couldnt get task via bundles resolver from quay.io due to unexpected end of JSON input", r"Build Pipeline Run failed run: PipelineRun for component .* in namespace .* failed: .* Reason:CouldntGetTask Message:Pipeline .* can't be Run; it contains Tasks that don't exist: Couldn't retrieve Task .resolver type bundles.*name = .*: error requesting remote resource: error getting \"bundleresolver\" .*: cannot retrieve the oci image: unexpected end of JSON input"), + ("Couldnt get task via git resolver from gitlab.cee due to 429", r"Message:.*Couldn't retrieve Task .*resolver type git.*https://gitlab.cee.redhat.com/.* status code: 429"), + ("Couldnt get task via http resolver from gitlab.cee", r"Message:.*Couldn't retrieve Task .*resolver type http.*error getting.*requested URL .*https://gitlab.cee.redhat.com/.* is not found"), + ("Error deleting on-pull-request default PipelineRun", r"Repo-templating workflow component cleanup failed: Error deleting on-pull-request default PipelineRun in namespace .*: Unable to list PipelineRuns for component .* in namespace .*: context deadline exceeded"), + ("Failed application creation when calling mapplication.kb.io webhook", r"Application failed creation: Unable to create the Application .*: Internal error occurred: failed calling webhook .*mapplication.kb.io.*: failed to call webhook: Post .*https://application-service-webhook-service.application-service.svc:443/mutate-appstudio-redhat-com-v1alpha1-application.* no endpoints available for service .*application-service-webhook-service"), + ("Failed component creation because resource quota evaluation timed out", r"Component failed creation: Unable to create the Component .*: Internal error occurred: resource quota evaluation timed out"), + ("Failed component creation when calling mcomponent.kb.io webhook", r"Component failed creation: Unable to create the Component .*: Internal error occurred: failed calling webhook .*mcomponent.kb.io.*: failed to call webhook: Post .*https://application-service-webhook-service.application-service.svc:443/mutate-appstudio-redhat-com-v1alpha1-component.* no endpoints available for service .*application-service-webhook-service.*"), + ("Failed creating integration test scenario because cannot set blockOwnerDeletion if an ownerReference refers to a resource you can't set finalizers on", r"Integration test scenario failed creation: Unable to create the Integration Test Scenario .* integrationtestscenarios.appstudio.redhat.com .* is forbidden: cannot set blockOwnerDeletion if an ownerReference refers to a resource you can't set finalizers on"), + ("Failed creating integration test scenario because it already exists", r"Integration test scenario failed creation: Unable to create the Integration Test Scenario .* integrationtestscenarios.appstudio.redhat.com .* already exists"), + ("Failed getting PaC pull number because PaC public route does not exist", r"Component failed validation: Unable to get PaC pull number for component .* in namespace .*: PaC component .* in namespace .* failed on PR annotation: Incorrect state: .*\"error-message\":\"52: Pipelines as Code public route does not exist\""), + ("Failed Integration test scenario when calling dintegrationtestscenario.kb.io webhook", r"Integration test scenario failed creation: Unable to create the Integration Test Scenario .*: Internal error occurred: failed calling webhook .*dintegrationtestscenario.kb.io.*: failed to call webhook: Post .*https://integration-service-webhook-service.integration-service.svc:443/mutate-appstudio-redhat-com-v1beta2-integrationtestscenario.*: no endpoints available for service .*integration-service-webhook-service"), + ("Failed to link pipeline image pull secret to build service account because SA was not found", r"Failed to configure pipeline imagePullSecrets: Unable to add secret .* to service account .*: serviceaccounts .* not found"), + ("Failed to merge MR on CEE GitLab due to 405", r"Repo-templating workflow component cleanup failed: Merging [0-9]+ failed: [Pp][Uu][Tt] .*https://gitlab.cee.redhat.com/api/.*/merge_requests/[0-9]+/merge.*message: 405 Method Not Allowed"), + ("Failed to merge MR on CEE GitLab due to DNS error", r"Repo-templating workflow component cleanup failed: Merging [0-9]+ failed: [Pp][Uu][Tt] .*https://gitlab.cee.redhat.com/api/.*/merge_requests/[0-9]+/merge.*Temporary failure in name resolution"), + ("Failed validating release condition", r"Release .* in namespace .* failed: .*Message:Release validation failed.*"), + ("GitLab token used by test expired", r"Repo forking failed: Error deleting project .*: DELETE https://gitlab.cee.redhat.com/.*: 401 .*error: invalid_token.*error_description: Token is expired. You can either do re-authorization or token refresh"), + ("Pipeline failed", r"Build Pipeline Run failed run:.*Message:Tasks Completed: [0-9]+ \(Failed: [1-9]+,"), + ("Post-test data collection failed", r"Failed to collect pipeline run JSONs"), + ("Release failed in progress without error given", r"Release failed: Release .* in namespace .* failed: .Type:Released Status:False .* Reason:Progressing Message:.$"), + ("Release failure: PipelineRun not created", r"couldn't find PipelineRun in managed namespace '%s' for a release '%s' in '%s' namespace"), + ("Release Pipeline failed", r"Release pipeline run failed:.*Message:Tasks Completed: [0-9]+ \(Failed: [1-9]+,"), + ("Repo forking failed as GitLab CEE says 401 Unauthorized", r"Repo forking failed: Error deleting project .*: DELETE https://gitlab.cee.redhat.com/.*: 401 .*message: 401 Unauthorized.*"), + ("Repo forking failed as the target is still being deleted", r"Repo forking failed: Error forking project .* POST https://gitlab.cee.redhat.com.* 409 .*Project namespace name has already been taken, The project is still being deleted"), + ("Repo forking failed as we got TLS handshake timeout talking to GitLab CEE", r"Repo forking failed: Error deleting project .*: Delete \"https://gitlab.cee.redhat.com/api/v4/projects/.*\": net/http: TLS handshake timeout"), + ("Repo forking failed because gitlab.com returned 503", r"Repo forking failed: Error checking repository .*: GET https://api.github.com/repos/.*: 503 No server is currently available to service your request. Sorry about that. Please try resubmitting your request and contact us if the problem persists.*"), + ("Repo forking failed when deleting target repo on github.com because 504", r"Repo forking failed: Error deleting repository .*: DELETE https://api.github.com/repos/.*: 504 We couldn't respond to your request in time. Sorry about that. Please try resubmitting your request and contact us if the problem persists."), + ("Repo forking failed when deleting target repo on gitlab.com (not CEE!) due unathorized", r"Repo forking failed: Error deleting project .* DELETE https://gitlab.com/.* 401 .* Unauthorized"), + ("Test Pipeline failed", r"Test Pipeline Run failed run:.*Message:Tasks Completed: [0-9]+ \(Failed: [1-9]+,"), + ("Timeout forking the repo before the actual test", r"Repo forking failed: Error forking project .*: context deadline exceeded"), + ("Timeout getting build service account", r"Component build SA not present: Component build SA .* not present: context deadline exceeded"), + ("Timeout getting PaC pull number when validating component", r"Component failed validation: Unable to get PaC pull number for component .* in namespace .*: context deadline exceeded"), + ("Timeout getting pipeline", r"Message:.*resolver failed to get Pipeline.*resolution took longer than global timeout of .*"), + ("Timeout getting task via git resolver from gitlab.cee", r"Message:.*Couldn't retrieve Task .*resolver type git.*https://gitlab.cee.redhat.com/.* resolution took longer than global timeout of .*"), # Last time I seen this we discussed it here: # # https://redhat-internal.slack.com/archives/C04PZ7H0VA8/p1751530663606749 @@ -73,46 +73,46 @@ # reported this error: ```Internal error occurred: failed calling webhook "vpipelineruns.konflux-ci.dev": failed # to call webhook: Post "https://etcd-shield.etcd-shield.svc:443/validate-tekton-dev-v1-pipelinerun?timeout=10s": # context deadline exceeded``` - "Timeout listing pipeline runs": r"Repo-templating workflow component cleanup failed: Error deleting on-pull-request default PipelineRun in namespace .*: Unable to list PipelineRuns for component .* in namespace .*: context deadline exceeded", - "Timeout listing pipeline runs": r"Repo-templating workflow component cleanup failed: Error deleting on-push merged PipelineRun in namespace .*: Unable to list PipelineRuns for component .* in namespace .*: context deadline exceeded", - "Timeout waiting for build pipeline to be created": r"Build Pipeline Run failed creation: context deadline exceeded", - "Timeout waiting for integration test scenario to validate": r"Integration test scenario failed validation: context deadline exceeded", - "Timeout waiting for snapshot to be created": r"Snapshot failed creation: context deadline exceeded", - "Timeout waiting for test pipeline to create": r"Test Pipeline Run failed creation: context deadline exceeded", - "Timeout waiting for test pipeline to finish": r"Test Pipeline Run failed run: context deadline exceeded", - "Unable to connect to server": r"Error: Unable to connect to server", + ("Timeout listing pipeline runs", r"Repo-templating workflow component cleanup failed: Error deleting on-pull-request default PipelineRun in namespace .*: Unable to list PipelineRuns for component .* in namespace .*: context deadline exceeded"), + ("Timeout listing pipeline runs", r"Repo-templating workflow component cleanup failed: Error deleting on-push merged PipelineRun in namespace .*: Unable to list PipelineRuns for component .* in namespace .*: context deadline exceeded"), + ("Timeout waiting for build pipeline to be created", r"Build Pipeline Run failed creation: context deadline exceeded"), + ("Timeout waiting for integration test scenario to validate", r"Integration test scenario failed validation: context deadline exceeded"), + ("Timeout waiting for snapshot to be created", r"Snapshot failed creation: context deadline exceeded"), + ("Timeout waiting for test pipeline to create", r"Test Pipeline Run failed creation: context deadline exceeded"), + ("Timeout waiting for test pipeline to finish", r"Test Pipeline Run failed run: context deadline exceeded"), + ("Unable to connect to server", r"Error: Unable to connect to server"), } FAILED_PLR_ERRORS = { - "SKIP": r"Skipping step because a previous step failed", # This is a special "wildcard" error, let's keep it on top and do not change "SKIP" reason as it is used in the code - "Bad Gateway when pulling container image from quay.io": r"Error: initializing source docker://quay.io/[^ ]+: reading manifest [^ ]+ in quay.io/[^ ]+: received unexpected HTTP status: 502 Bad Gateway ", - "buildah build failed creating build container: registry.access.redhat.com returned 403": r"Error: creating build container: internal error: unable to copy from source docker://registry.access.redhat.com/.*: copying system image from manifest list: determining manifest MIME type for docker://registry.access.redhat.com/.*: reading manifest .* in registry.access.redhat.com/.*: StatusCode: 403", - "Can not find chroot_scan.tar.gz file": r"tar: .*/chroot_scan.tar.gz: Cannot open: No such file or directory", - "Can not find Dockerfile": r"Cannot find Dockerfile Dockerfile", - "DNF failed to download repodata from Download Devel because could not resolve host": r"Errors during downloading metadata for repository '[^ ]+': - Curl error .6.: Couldn't resolve host name for http://download.devel.redhat.com/brewroot/repos/[^ ]+ .Could not resolve host: download\.devel\.redhat\.com.", - "DNF failed to download repodata from Koji": r"ERROR Command returned error: Failed to download metadata (baseurl: \"https://kojipkgs.fedoraproject.org/repos/[^ ]*\") for repository \"build\": Usable URL not found", - "Error allocating host as provision TR already exists": r"Error allocating host: taskruns.tekton.dev \".*provision\" already exists", - "Error allocating host because of insufficient free addresses in subnet": r"Error allocating host: failed to launch EC2 instance for .* operation error EC2: RunInstances, https response error StatusCode: 400, RequestID: .*, api error InsufficientFreeAddressesInSubnet: There are not enough free addresses in subnet .* to satisfy the requested number of instances.", - "Error allocating host because of provisioning error": r"Error allocating host: failed to provision host", - "Failed because of quay.io returned 502": r"level=fatal msg=.Error parsing image name .*docker://quay.io/.* Requesting bearer token: invalid status code from registry 502 .Bad Gateway.", - "Failed because registry.access.redhat.com returned 503 when reading manifest": r"source-build:ERROR:command execution failure, status: 1, stderr: time=.* level=fatal msg=.Error parsing image name .* reading manifest .* in registry.access.redhat.com/.* received unexpected HTTP status: 503 Service Unavailable", - "Failed to pull container from quay.io because of DNS error": r"Error: copying system image from manifest list: reading blob .*: Get \"https://cdn[0-9]+.quay.io/.*\": dial tcp: lookup cdn[0-9]+.quay.io: Temporary failure in name resolution", - "Failed to pull container from registry.access.redhat.com because of DNS error": r"Error: initializing source docker://registry.access.redhat.com/.* pinging container registry registry.access.redhat.com: Get \"https://registry.access.redhat.com/v2/\": dial tcp: lookup registry.access.redhat.com: Temporary failure in name resolution", - "Gateway Time-out when pulling container image from quay.io": r"Error: initializing source docker://quay.io/[^ ]+: reading manifest [^ ]+ in quay.io/[^ ]+: received unexpected HTTP status: 504 Gateway Time-out", - "Gateway Time-out when pulling container image": r"Error: copying system image from manifest list: parsing image configuration: fetching blob: received unexpected HTTP status: 504 Gateway Time-out", - "Introspection failed because of incomplete .docker/config.json": r".* level=fatal msg=\"Error parsing image name .*: getting username and password: reading JSON file .*/tekton/home/.docker/config.json.*: unmarshaling JSON at .*: unexpected end of JSON input\"", - "RPM build failed: bool cannot be defined via typedef": r"error: .bool. cannot be defined via .typedef..*error: Bad exit status from /var/tmp/rpm-tmp..* ..build.", + ("SKIP", r"Skipping step because a previous step failed"), # This is a special "wildcard" error, let's keep it on top and do not change "SKIP" reason as it is used in the code + ("Bad Gateway when pulling container image from quay.io", r"Error: initializing source docker://quay.io/[^ ]+: reading manifest [^ ]+ in quay.io/[^ ]+: received unexpected HTTP status: 502 Bad Gateway "), + ("buildah build failed creating build container: registry.access.redhat.com returned 403", r"Error: creating build container: internal error: unable to copy from source docker://registry.access.redhat.com/.*: copying system image from manifest list: determining manifest MIME type for docker://registry.access.redhat.com/.*: reading manifest .* in registry.access.redhat.com/.*: StatusCode: 403"), + ("Can not find chroot_scan.tar.gz file", r"tar: .*/chroot_scan.tar.gz: Cannot open: No such file or directory"), + ("Can not find Dockerfile", r"Cannot find Dockerfile Dockerfile"), + ("DNF failed to download repodata from Download Devel because could not resolve host", r"Errors during downloading metadata for repository '[^ ]+': - Curl error .6.: Couldn't resolve host name for http://download.devel.redhat.com/brewroot/repos/[^ ]+ .Could not resolve host: download\.devel\.redhat\.com."), + ("DNF failed to download repodata from Koji", r"ERROR Command returned error: Failed to download metadata (baseurl: \"https://kojipkgs.fedoraproject.org/repos/[^ ]*\") for repository \"build\": Usable URL not found"), + ("Error allocating host as provision TR already exists", r"Error allocating host: taskruns.tekton.dev \".*provision\" already exists"), + ("Error allocating host because of insufficient free addresses in subnet", r"Error allocating host: failed to launch EC2 instance for .* operation error EC2: RunInstances, https response error StatusCode: 400, RequestID: .*, api error InsufficientFreeAddressesInSubnet: There are not enough free addresses in subnet .* to satisfy the requested number of instances."), + ("Error allocating host because of provisioning error", r"Error allocating host: failed to provision host"), + ("Failed because of quay.io returned 502", r"level=fatal msg=.Error parsing image name .*docker://quay.io/.* Requesting bearer token: invalid status code from registry 502 .Bad Gateway."), + ("Failed because registry.access.redhat.com returned 503 when reading manifest", r"source-build:ERROR:command execution failure, status: 1, stderr: time=.* level=fatal msg=.Error parsing image name .* reading manifest .* in registry.access.redhat.com/.* received unexpected HTTP status: 503 Service Unavailable"), + ("Failed to pull container from quay.io because of DNS error", r"Error: copying system image from manifest list: reading blob .*: Get \"https://cdn[0-9]+.quay.io/.*\": dial tcp: lookup cdn[0-9]+.quay.io: Temporary failure in name resolution"), + ("Failed to pull container from registry.access.redhat.com because of DNS error", r"Error: initializing source docker://registry.access.redhat.com/.* pinging container registry registry.access.redhat.com: Get \"https://registry.access.redhat.com/v2/\": dial tcp: lookup registry.access.redhat.com: Temporary failure in name resolution"), + ("Gateway Time-out when pulling container image from quay.io", r"Error: initializing source docker://quay.io/[^ ]+: reading manifest [^ ]+ in quay.io/[^ ]+: received unexpected HTTP status: 504 Gateway Time-out"), + ("Gateway Time-out when pulling container image", r"Error: copying system image from manifest list: parsing image configuration: fetching blob: received unexpected HTTP status: 504 Gateway Time-out"), + ("Introspection failed because of incomplete .docker/config.json", r".* level=fatal msg=\"Error parsing image name .*: getting username and password: reading JSON file .*/tekton/home/.docker/config.json.*: unmarshaling JSON at .*: unexpected end of JSON input\""), + ("RPM build failed: bool cannot be defined via typedef", r"error: .bool. cannot be defined via .typedef..*error: Bad exit status from /var/tmp/rpm-tmp..* ..build."), } FAILED_TR_ERRORS = { - "Missing expected fields in TaskRun": r"Missing expected fields in TaskRun", # This is special error, meaning everithing failed basically - "SKIP": r"\"message\": \"All Steps have completed executing\"", # Another special error to avoid printing 'Unknown error:' message - "SKIP": r"\"message\": \".* exited with code 1\"", # Another special error to avoid printing 'Unknown error:' message - "Back-off pulling task run image from quay.io": r"the step .* in TaskRun .* failed to pull the image .*. The pod errored with the message: \"Back-off pulling image \"quay.io/.*\"", - "Failed to create task run pod because ISE on webhook proxy.operator.tekton.dev": r"failed to create task run pod .*: Internal error occurred: failed calling webhook \"proxy.operator.tekton.dev\": failed to call webhook: Post \"https://tekton-operator-proxy-webhook.openshift-pipelines.svc:443/defaulting.timeout=10s\": context deadline exceeded. Maybe missing or invalid Task .*", + ("Missing expected fields in TaskRun(", r"Missing expected fields in TaskRun"), # This is special error, meaning everithing failed basically + ("SKIP", r"\"message\": \"All Steps have completed executing\""), # Another special error to avoid printing 'Unknown error:' message + ("SKIP", r"\"message\": \".* exited with code 1\""), # Another special error to avoid printing 'Unknown error:' message + ("Back-off pulling task run image from quay.io", r"the step .* in TaskRun .* failed to pull the image .*. The pod errored with the message: \"Back-off pulling image \"quay.io/.*\""), + ("Failed to create task run pod because ISE on webhook proxy.operator.tekton.dev", r"failed to create task run pod .*: Internal error occurred: failed calling webhook \"proxy.operator.tekton.dev\": failed to call webhook: Post \"https://tekton-operator-proxy-webhook.openshift-pipelines.svc:443/defaulting.timeout=10s\": context deadline exceeded. Maybe missing or invalid Task .*"), } -def message_to_reason(reasons_and_errors: dict, msg: str) -> str | None: +def message_to_reason(reasons_and_errors: set, msg: str) -> str | None: """ Classifies an error message using regular expressions and returns the error name. @@ -123,8 +123,7 @@ def message_to_reason(reasons_and_errors: dict, msg: str) -> str | None: The name of the error if a pattern matches, otherwise string "UNKNOWN". """ msg = msg.replace("\n", " ") # Remove newlines - for error_name, pattern in reasons_and_errors.items(): - if error_name == "SKIP": + for error_name, pattern in reasons_and_errors: if re.search(pattern, msg): return error_name print(f"Unknown error: {msg}") From 5d3f9c9e36025b0b4bf6bc8480bde054f0efd9a4 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 30 Jul 2025 10:27:59 +0200 Subject: [PATCH 131/321] feat(KONFLUX-8988): Move part of collection to per auth so it makes sense to collect pods per application label as release pods do not have component label --- tests/load-tests/loadtest.go | 6 +++ .../pkg/journey/handle_collections.go | 46 ++++++++++++++++--- 2 files changed, 45 insertions(+), 7 deletions(-) diff --git a/tests/load-tests/loadtest.go b/tests/load-tests/loadtest.go index 257aaaf539..b805713e1b 100644 --- a/tests/load-tests/loadtest.go +++ b/tests/load-tests/loadtest.go @@ -242,6 +242,12 @@ func perUserThread(threadCtx *journey.MainContext) { // Single application journey (there can be multiple parallel apps per user) func perApplicationThread(perApplicationCtx *journey.PerApplicationContext) { defer perApplicationCtx.PerApplicationWG.Done() + defer func() { + _, err := logging.Measure(journey.HandlePerApplicationCollection, perApplicationCtx) + if err != nil { + logging.Logger.Error("Per application thread failed: %v", err) + } + }() var err error diff --git a/tests/load-tests/pkg/journey/handle_collections.go b/tests/load-tests/pkg/journey/handle_collections.go index 0d55fb8b0f..84afa6c654 100644 --- a/tests/load-tests/pkg/journey/handle_collections.go +++ b/tests/load-tests/pkg/journey/handle_collections.go @@ -44,15 +44,15 @@ func writeToFile(dirPath, file string, contents []byte) error { return nil } -func collectPodLogs(f *framework.Framework, dirPath, namespace, component string) error { +func collectPodLogs(f *framework.Framework, dirPath, namespace, application string) error { podList, err := f.AsKubeAdmin.CommonController.ListPods( namespace, - "appstudio.openshift.io/component", - component, + "appstudio.openshift.io/application", + application, 100, ) if err != nil { - return fmt.Errorf("Failed to list pods in namespace %s for component %s: %v", namespace, component, err) + return fmt.Errorf("Failed to list pods in namespace %s for application %s: %v", namespace, application, err) } for _, pod := range podList.Items { @@ -144,7 +144,7 @@ func collectPipelineRunJSONs(f *framework.Framework, dirPath, namespace, applica return nil } -func collectApplicationComponentJSONs(f *framework.Framework, dirPath, namespace, application, component string) error { +func collectApplicationJSONs(f *framework.Framework, dirPath, namespace, application string) error { appJsonFileName := "collected-application-" + application + ".json" // Only save Application JSON if it has not already been collected (as HandlePerComponentCollection method is called for each component) if _, err := os.Stat(filepath.Join(dirPath, appJsonFileName)); errors.Is(err, os.ErrNotExist) { @@ -165,6 +165,10 @@ func collectApplicationComponentJSONs(f *framework.Framework, dirPath, namespace } } + return nil +} + +func collectComponentJSONs(f *framework.Framework, dirPath, namespace, component string) error { // Collect Component JSON comp, err := f.AsKubeDeveloper.HasController.GetComponent(component, namespace) if err != nil { @@ -184,6 +188,34 @@ func collectApplicationComponentJSONs(f *framework.Framework, dirPath, namespace return nil } +func HandlePerApplicationCollection(ctx *PerApplicationContext) error { + if ctx.ApplicationName == "" { + logging.Logger.Debug("Application name not populated, so skipping per-application collections in %s", ctx.ParentContext.Namespace) + return nil + } + + var err error + + journeyCounterStr := fmt.Sprintf("%d", ctx.ParentContext.JourneyRepeatsCounter) + dirPath := getDirName(ctx.ParentContext.Opts.OutputDir, ctx.ParentContext.Namespace, journeyCounterStr) + err = createDir(dirPath) + if err != nil { + return logging.Logger.Fail(105, "Failed to create dir: %v", err) + } + + err = collectPodLogs(ctx.Framework, dirPath, ctx.ParentContext.Namespace, ctx.ApplicationName) + if err != nil { + return logging.Logger.Fail(106, "Failed to collect pod logs: %v", err) + } + + err = collectApplicationJSONs(ctx.Framework, dirPath, ctx.ParentContext.Namespace, ctx.ApplicationName) + if err != nil { + return logging.Logger.Fail(107, "Failed to collect application JSONs: %v", err) + } + + return nil +} + func HandlePerComponentCollection(ctx *PerComponentContext) error { if ctx.ComponentName == "" { logging.Logger.Debug("Component name not populated, so skipping per-component collections in %s", ctx.ParentContext.ParentContext.Namespace) @@ -209,9 +241,9 @@ func HandlePerComponentCollection(ctx *PerComponentContext) error { return logging.Logger.Fail(102, "Failed to collect pipeline run JSONs: %v", err) } - err = collectApplicationComponentJSONs(ctx.Framework, dirPath, ctx.ParentContext.ParentContext.Namespace, ctx.ParentContext.ApplicationName, ctx.ComponentName) + err = collectComponentJSONs(ctx.Framework, dirPath, ctx.ParentContext.ParentContext.Namespace, ctx.ComponentName) if err != nil { - return logging.Logger.Fail(103, "Failed to collect Application and Component JSONs: %v", err) + return logging.Logger.Fail(103, "Failed to collect component JSONs: %v", err) } return nil From 789d9d3ec570eb86c6661c5c9da06385609b7fbd Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 30 Jul 2025 11:21:34 +0200 Subject: [PATCH 132/321] feat(KONFLUX-8988): Also collect Release, RP, RPA and Snapshot --- .../pkg/journey/handle_collections.go | 75 +++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/tests/load-tests/pkg/journey/handle_collections.go b/tests/load-tests/pkg/journey/handle_collections.go index 84afa6c654..58dd069d1c 100644 --- a/tests/load-tests/pkg/journey/handle_collections.go +++ b/tests/load-tests/pkg/journey/handle_collections.go @@ -188,6 +188,76 @@ func collectComponentJSONs(f *framework.Framework, dirPath, namespace, component return nil } +func collectReleaseRelatedJSONs(f *framework.Framework, dirPath, namespace, appName, compName, snapName, relName string) error { + // Collect ReleasePlan JSON + releasePlanName := appName + "-rp" + releasePlan, err := f.AsKubeDeveloper.ReleaseController.GetReleasePlan(releasePlanName, namespace) + if err != nil { + return fmt.Errorf("Failed to get Release Plan %s: %v", releasePlanName, err) + } + + releasePlanJSON, err := json.Marshal(releasePlan) + if err != nil { + return fmt.Errorf("Failed to dump Release Plan JSON: %v", err) + } + + err = writeToFile(dirPath, "collected-releaseplan-" + releasePlanName + ".json", releasePlanJSON) + if err != nil { + return fmt.Errorf("Failed to write Release Plan: %v", err) + } + + // Collect ReleasePlanAdmission JSON + releasePlanAdmissionName := appName + "-rpa" + releasePlanAdmission, err := f.AsKubeDeveloper.ReleaseController.GetReleasePlanAdmission(releasePlanAdmissionName, namespace) + if err != nil { + return fmt.Errorf("Failed to get Release Plan Admission %s: %v", releasePlanAdmissionName, err) + } + + releasePlanAdmissionJSON, err := json.Marshal(releasePlanAdmission) + if err != nil { + return fmt.Errorf("Failed to dump Release Plan Admission JSON: %v", err) + } + + err = writeToFile(dirPath, "collected-releaseplanadmission-" + releasePlanAdmissionName + ".json", releasePlanAdmissionJSON) + if err != nil { + return fmt.Errorf("Failed to write Release Plan Admission: %v", err) + } + + // Collect Snapshot JSON + snap, err := f.AsKubeDeveloper.IntegrationController.GetSnapshot(snapName, "", compName, namespace) + if err != nil { + return fmt.Errorf("Failed to get Snapshot %s: %v", snapName, err) + } + + snapJSON, err := json.Marshal(snap) + if err != nil { + return fmt.Errorf("Failed to dump Snapshot JSON: %v", err) + } + + err = writeToFile(dirPath, "collected-snapshot-" + snapName + ".json", snapJSON) + if err != nil { + return fmt.Errorf("Failed to write Snapshot: %v", err) + } + + // Collect Release JSON + rel, err := f.AsKubeDeveloper.ReleaseController.GetRelease(relName, "", namespace) + if err != nil { + return fmt.Errorf("Failed to get Release %s: %v", relName, err) + } + + relJSON, err := json.Marshal(rel) + if err != nil { + return fmt.Errorf("Failed to dump Release JSON: %v", err) + } + + err = writeToFile(dirPath, "collected-release-" + relName + ".json", relJSON) + if err != nil { + return fmt.Errorf("Failed to write Release: %v", err) + } + + return nil +} + func HandlePerApplicationCollection(ctx *PerApplicationContext) error { if ctx.ApplicationName == "" { logging.Logger.Debug("Application name not populated, so skipping per-application collections in %s", ctx.ParentContext.Namespace) @@ -246,5 +316,10 @@ func HandlePerComponentCollection(ctx *PerComponentContext) error { return logging.Logger.Fail(103, "Failed to collect component JSONs: %v", err) } + err = collectReleaseRelatedJSONs(ctx.Framework, dirPath, ctx.ParentContext.ParentContext.Namespace, ctx.ParentContext.ApplicationName, ctx.ComponentName, ctx.SnapshotName, ctx.ReleaseName) + if err != nil { + return logging.Logger.Fail(104, "Failed to collect release related JSONs: %v", err) + } + return nil } From 282079953906cb64106ca47dc6a6ff4c558ecc20 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 30 Jul 2025 11:59:07 +0200 Subject: [PATCH 133/321] feat: New error: Hide problem with releases collection on clusters where it is not configured --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index ae0cee265d..85a55e8e14 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -43,6 +43,7 @@ ("GitLab token used by test expired", r"Repo forking failed: Error deleting project .*: DELETE https://gitlab.cee.redhat.com/.*: 401 .*error: invalid_token.*error_description: Token is expired. You can either do re-authorization or token refresh"), ("Pipeline failed", r"Build Pipeline Run failed run:.*Message:Tasks Completed: [0-9]+ \(Failed: [1-9]+,"), ("Post-test data collection failed", r"Failed to collect pipeline run JSONs"), + ("Post-test data collection failed", r"Failed to collect release related JSONs") ("Release failed in progress without error given", r"Release failed: Release .* in namespace .* failed: .Type:Released Status:False .* Reason:Progressing Message:.$"), ("Release failure: PipelineRun not created", r"couldn't find PipelineRun in managed namespace '%s' for a release '%s' in '%s' namespace"), ("Release Pipeline failed", r"Release pipeline run failed:.*Message:Tasks Completed: [0-9]+ \(Failed: [1-9]+,"), From 58b98cdaad0abbe0da949aa82e096c3287f311e9 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 30 Jul 2025 12:12:34 +0200 Subject: [PATCH 134/321] fix: Do not fail if there are no release related CRs as this is a best effort function Generated-by: Gemini --- .../pkg/journey/handle_collections.go | 83 ++++++++++++------- 1 file changed, 51 insertions(+), 32 deletions(-) diff --git a/tests/load-tests/pkg/journey/handle_collections.go b/tests/load-tests/pkg/journey/handle_collections.go index 58dd069d1c..53e667c9d8 100644 --- a/tests/load-tests/pkg/journey/handle_collections.go +++ b/tests/load-tests/pkg/journey/handle_collections.go @@ -12,6 +12,9 @@ import ( framework "github.com/konflux-ci/e2e-tests/pkg/framework" ) +import k8s_api_errors "k8s.io/apimachinery/pkg/api/errors" + + func getDirName(baseDir, namespace, iteration string) string { return filepath.Join(baseDir, "collected-data", namespace, iteration) + "/" } @@ -193,66 +196,82 @@ func collectReleaseRelatedJSONs(f *framework.Framework, dirPath, namespace, appN releasePlanName := appName + "-rp" releasePlan, err := f.AsKubeDeveloper.ReleaseController.GetReleasePlan(releasePlanName, namespace) if err != nil { - return fmt.Errorf("Failed to get Release Plan %s: %v", releasePlanName, err) + if !k8s_api_errors.IsNotFound(err) { + return fmt.Errorf("Failed to get Release Plan %s: %v", releasePlanName, err) + } } - releasePlanJSON, err := json.Marshal(releasePlan) - if err != nil { - return fmt.Errorf("Failed to dump Release Plan JSON: %v", err) - } + if err == nil { + releasePlanJSON, err := json.Marshal(releasePlan) + if err != nil { + return fmt.Errorf("Failed to dump Release Plan JSON: %v", err) + } - err = writeToFile(dirPath, "collected-releaseplan-" + releasePlanName + ".json", releasePlanJSON) - if err != nil { - return fmt.Errorf("Failed to write Release Plan: %v", err) + err = writeToFile(dirPath, "collected-releaseplan-" + releasePlanName + ".json", releasePlanJSON) + if err != nil { + return fmt.Errorf("Failed to write Release Plan: %v", err) + } } // Collect ReleasePlanAdmission JSON releasePlanAdmissionName := appName + "-rpa" releasePlanAdmission, err := f.AsKubeDeveloper.ReleaseController.GetReleasePlanAdmission(releasePlanAdmissionName, namespace) if err != nil { - return fmt.Errorf("Failed to get Release Plan Admission %s: %v", releasePlanAdmissionName, err) + if !k8s_api_errors.IsNotFound(err) { + return fmt.Errorf("Failed to get Release Plan Admission %s: %v", releasePlanAdmissionName, err) + } } - releasePlanAdmissionJSON, err := json.Marshal(releasePlanAdmission) - if err != nil { - return fmt.Errorf("Failed to dump Release Plan Admission JSON: %v", err) - } + if err == nil { + releasePlanAdmissionJSON, err := json.Marshal(releasePlanAdmission) + if err != nil { + return fmt.Errorf("Failed to dump Release Plan Admission JSON: %v", err) + } - err = writeToFile(dirPath, "collected-releaseplanadmission-" + releasePlanAdmissionName + ".json", releasePlanAdmissionJSON) - if err != nil { - return fmt.Errorf("Failed to write Release Plan Admission: %v", err) + err = writeToFile(dirPath, "collected-releaseplanadmission-" + releasePlanAdmissionName + ".json", releasePlanAdmissionJSON) + if err != nil { + return fmt.Errorf("Failed to write Release Plan Admission: %v", err) + } } // Collect Snapshot JSON snap, err := f.AsKubeDeveloper.IntegrationController.GetSnapshot(snapName, "", compName, namespace) if err != nil { - return fmt.Errorf("Failed to get Snapshot %s: %v", snapName, err) + if !k8s_api_errors.IsNotFound(err) { + return fmt.Errorf("Failed to get Snapshot %s: %v", snapName, err) + } } - snapJSON, err := json.Marshal(snap) - if err != nil { - return fmt.Errorf("Failed to dump Snapshot JSON: %v", err) - } + if err == nil { + snapJSON, err := json.Marshal(snap) + if err != nil { + return fmt.Errorf("Failed to dump Snapshot JSON: %v", err) + } - err = writeToFile(dirPath, "collected-snapshot-" + snapName + ".json", snapJSON) - if err != nil { - return fmt.Errorf("Failed to write Snapshot: %v", err) + err = writeToFile(dirPath, "collected-snapshot-" + snapName + ".json", snapJSON) + if err != nil { + return fmt.Errorf("Failed to write Snapshot: %v", err) + } } // Collect Release JSON rel, err := f.AsKubeDeveloper.ReleaseController.GetRelease(relName, "", namespace) if err != nil { - return fmt.Errorf("Failed to get Release %s: %v", relName, err) + if !k8s_api_errors.IsNotFound(err) { + return fmt.Errorf("Failed to get Release %s: %v", relName, err) + } } - relJSON, err := json.Marshal(rel) - if err != nil { - return fmt.Errorf("Failed to dump Release JSON: %v", err) - } + if err == nil { + relJSON, err := json.Marshal(rel) + if err != nil { + return fmt.Errorf("Failed to dump Release JSON: %v", err) + } - err = writeToFile(dirPath, "collected-release-" + relName + ".json", relJSON) - if err != nil { - return fmt.Errorf("Failed to write Release: %v", err) + err = writeToFile(dirPath, "collected-release-" + relName + ".json", relJSON) + if err != nil { + return fmt.Errorf("Failed to write Release: %v", err) + } } return nil From 9214bbb823f761897b0f6973cd4619bd80bb7618 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 30 Jul 2025 12:14:24 +0200 Subject: [PATCH 135/321] fix: Add forgotten coma --- tests/load-tests/errors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 85a55e8e14..96ce1a2b8d 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -43,7 +43,7 @@ ("GitLab token used by test expired", r"Repo forking failed: Error deleting project .*: DELETE https://gitlab.cee.redhat.com/.*: 401 .*error: invalid_token.*error_description: Token is expired. You can either do re-authorization or token refresh"), ("Pipeline failed", r"Build Pipeline Run failed run:.*Message:Tasks Completed: [0-9]+ \(Failed: [1-9]+,"), ("Post-test data collection failed", r"Failed to collect pipeline run JSONs"), - ("Post-test data collection failed", r"Failed to collect release related JSONs") + ("Post-test data collection failed", r"Failed to collect release related JSONs"), ("Release failed in progress without error given", r"Release failed: Release .* in namespace .* failed: .Type:Released Status:False .* Reason:Progressing Message:.$"), ("Release failure: PipelineRun not created", r"couldn't find PipelineRun in managed namespace '%s' for a release '%s' in '%s' namespace"), ("Release Pipeline failed", r"Release pipeline run failed:.*Message:Tasks Completed: [0-9]+ \(Failed: [1-9]+,"), From af6c9b3126539eb6dfbc681e7f26fb5282fdf57a Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 30 Jul 2025 12:18:58 +0200 Subject: [PATCH 136/321] feat: Make sure that bug in errors.py do not cause interruption of the whole process --- tests/load-tests/ci-scripts/stage/collect-results.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/load-tests/ci-scripts/stage/collect-results.sh b/tests/load-tests/ci-scripts/stage/collect-results.sh index 2fb1a44cfd..c3482df059 100755 --- a/tests/load-tests/ci-scripts/stage/collect-results.sh +++ b/tests/load-tests/ci-scripts/stage/collect-results.sh @@ -42,7 +42,7 @@ echo "[$(date --utc -Ins)] Create summary JSON with timings" ./evaluate.py "${ARTIFACT_DIR}/load-test-options.json" "${ARTIFACT_DIR}/load-test-timings.csv" "${ARTIFACT_DIR}/load-test-timings.json" echo "[$(date --utc -Ins)] Create summary JSON with errors" -./errors.py "${ARTIFACT_DIR}/load-test-errors.csv" "${ARTIFACT_DIR}/load-test-timings.json" "${ARTIFACT_DIR}/load-test-errors.json" "${ARTIFACT_DIR}/collected-data/" +./errors.py "${ARTIFACT_DIR}/load-test-errors.csv" "${ARTIFACT_DIR}/load-test-timings.json" "${ARTIFACT_DIR}/load-test-errors.json" "${ARTIFACT_DIR}/collected-data/" || true echo "[$(date --utc -Ins)] Graphing PRs and TRs" ci-scripts/utility_scripts/show-pipelineruns.py --data-dir "${ARTIFACT_DIR}" &>"${ARTIFACT_DIR}/show-pipelineruns.log" || true From 366a63282d183af9fd65c1327b53b8fda353c0d0 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 30 Jul 2025 12:22:28 +0200 Subject: [PATCH 137/321] fix: Do not attempt to collect Snapshot and Release if name was not provided --- .../pkg/journey/handle_collections.go | 56 ++++++++++--------- 1 file changed, 30 insertions(+), 26 deletions(-) diff --git a/tests/load-tests/pkg/journey/handle_collections.go b/tests/load-tests/pkg/journey/handle_collections.go index 53e667c9d8..0fef551e60 100644 --- a/tests/load-tests/pkg/journey/handle_collections.go +++ b/tests/load-tests/pkg/journey/handle_collections.go @@ -235,42 +235,46 @@ func collectReleaseRelatedJSONs(f *framework.Framework, dirPath, namespace, appN } // Collect Snapshot JSON - snap, err := f.AsKubeDeveloper.IntegrationController.GetSnapshot(snapName, "", compName, namespace) - if err != nil { - if !k8s_api_errors.IsNotFound(err) { - return fmt.Errorf("Failed to get Snapshot %s: %v", snapName, err) - } - } - - if err == nil { - snapJSON, err := json.Marshal(snap) + if len(snapName) > 0 { + snap, err := f.AsKubeDeveloper.IntegrationController.GetSnapshot(snapName, "", compName, namespace) if err != nil { - return fmt.Errorf("Failed to dump Snapshot JSON: %v", err) + if !k8s_api_errors.IsNotFound(err) { + return fmt.Errorf("Failed to get Snapshot %s: %v", snapName, err) + } } - err = writeToFile(dirPath, "collected-snapshot-" + snapName + ".json", snapJSON) - if err != nil { - return fmt.Errorf("Failed to write Snapshot: %v", err) - } - } + if err == nil { + snapJSON, err := json.Marshal(snap) + if err != nil { + return fmt.Errorf("Failed to dump Snapshot JSON: %v", err) + } - // Collect Release JSON - rel, err := f.AsKubeDeveloper.ReleaseController.GetRelease(relName, "", namespace) - if err != nil { - if !k8s_api_errors.IsNotFound(err) { - return fmt.Errorf("Failed to get Release %s: %v", relName, err) + err = writeToFile(dirPath, "collected-snapshot-" + snapName + ".json", snapJSON) + if err != nil { + return fmt.Errorf("Failed to write Snapshot: %v", err) + } } } - if err == nil { - relJSON, err := json.Marshal(rel) + // Collect Release JSON + if len(relName) > 0 { + rel, err := f.AsKubeDeveloper.ReleaseController.GetRelease(relName, "", namespace) if err != nil { - return fmt.Errorf("Failed to dump Release JSON: %v", err) + if !k8s_api_errors.IsNotFound(err) { + return fmt.Errorf("Failed to get Release %s: %v", relName, err) + } } - err = writeToFile(dirPath, "collected-release-" + relName + ".json", relJSON) - if err != nil { - return fmt.Errorf("Failed to write Release: %v", err) + if err == nil { + relJSON, err := json.Marshal(rel) + if err != nil { + return fmt.Errorf("Failed to dump Release JSON: %v", err) + } + + err = writeToFile(dirPath, "collected-release-" + relName + ".json", relJSON) + if err != nil { + return fmt.Errorf("Failed to write Release: %v", err) + } } } From 68747e36eb4023f94ab809d2b942d0ad31ac94c5 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Mon, 4 Aug 2025 18:49:21 +0200 Subject: [PATCH 138/321] feat: Check TR status message even when podName is set but pod log is missing --- tests/load-tests/errors.py | 41 +++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 96ce1a2b8d..cca4f27e42 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -253,9 +253,9 @@ def load_container_log(data_dir, ns, pod_name, cont_name): return fd.read() def investigate_failed_plr(dump_dir, plr_type="build"): - try: - reasons = [] + reasons = [] + try: plr = find_first_failed_build_plr(dump_dir, plr_type) if plr == None: return ["SORRY PLR not found"] @@ -266,28 +266,29 @@ def investigate_failed_plr(dump_dir, plr_type="build"): tr_ok, tr_message = check_failed_taskrun(dump_dir, plr_ns, tr_name) if tr_ok: - for pod_name, cont_name in find_failed_containers(dump_dir, plr_ns, tr_name): - log_lines = load_container_log(dump_dir, plr_ns, pod_name, cont_name) - reason = message_to_reason(FAILED_PLR_ERRORS, log_lines) - + try: + for pod_name, cont_name in find_failed_containers(dump_dir, plr_ns, tr_name): + log_lines = load_container_log(dump_dir, plr_ns, pod_name, cont_name) + reason = message_to_reason(FAILED_PLR_ERRORS, log_lines) + + if reason == "SKIP": + continue + + reasons.append(reason) + except FileNotFoundError as e: + print(f"Failed to locate required files: {e}") + reason = message_to_reason(FAILED_TR_ERRORS, tr_message) if reason == "SKIP": - continue - - reasons.append(reason) - - reason = message_to_reason(FAILED_TR_ERRORS, tr_message) - if reason != "SKIP": - reasons.append(reason) - - reasons = list(set(reasons)) # get unique reasons only - reasons.sort() # sort reasons - return reasons - except FileNotFoundError as e: - print(f"Failed to locate required files: {e}") - return ["SORRY, missing data"] + reasons.append("SORRY, missing data") + else: + reasons.append(reason) except Exception as e: return ["SORRY " + str(e)] + reasons = list(set(reasons)) # get unique reasons only + reasons.sort() # sort reasons + return reasons + def main(): input_file = sys.argv[1] timings_file = sys.argv[2] From b9543f5d1019d9278802542c68c03eea01629d96 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Mon, 4 Aug 2025 18:54:44 +0200 Subject: [PATCH 139/321] feat: New error: Back-off pulling task run image from registry.access.redhat.com "status": { "conditions": [ { "type": "Succeeded", "status": "False", "lastTransitionTime": "2025-08-04T12:13:59Z", "reason": "TaskRunImagePullFailed", "message": "the step \"init\" in TaskRun \"jhutar-app-bjsnc-comp-0-on-pull-request-tdmgq-init\" failed to pull the image \"\". The pod errored with the message: \"Back-off pulling image \"registry.access.redhat.com/ubi9/skopeo:9.6-1752647240@sha256:cbf079a41e8dded35a1292730513857292930a2780e39f56d998ee4f3dadb856\".\"" } ], "podName": "jhutar-app-bjsnc-comp-0-on-pull-request-tdmgq-init-pod", "startTime": "2025-08-04T12:13:32Z", "completionTime": "2025-08-04T12:13:59Z", --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index cca4f27e42..3ababc1226 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -110,6 +110,7 @@ ("SKIP", r"\"message\": \"All Steps have completed executing\""), # Another special error to avoid printing 'Unknown error:' message ("SKIP", r"\"message\": \".* exited with code 1\""), # Another special error to avoid printing 'Unknown error:' message ("Back-off pulling task run image from quay.io", r"the step .* in TaskRun .* failed to pull the image .*. The pod errored with the message: \"Back-off pulling image \"quay.io/.*\""), + ("Back-off pulling task run image from registry.access.redhat.com", r"the step .* in TaskRun .* failed to pull the image .*. The pod errored with the message: \"Back-off pulling image \"registry.access.redhat.com/.*\""), ("Failed to create task run pod because ISE on webhook proxy.operator.tekton.dev", r"failed to create task run pod .*: Internal error occurred: failed calling webhook \"proxy.operator.tekton.dev\": failed to call webhook: Post \"https://tekton-operator-proxy-webhook.openshift-pipelines.svc:443/defaulting.timeout=10s\": context deadline exceeded. Maybe missing or invalid Task .*"), } From 7ff2947d4f2b3798eca3d4a90a97ff69792d3357 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Mon, 4 Aug 2025 19:06:18 +0200 Subject: [PATCH 140/321] fix: Properly escape these backslashes and double quotes --- tests/load-tests/errors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 3ababc1226..4aed53c006 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -111,7 +111,7 @@ ("SKIP", r"\"message\": \".* exited with code 1\""), # Another special error to avoid printing 'Unknown error:' message ("Back-off pulling task run image from quay.io", r"the step .* in TaskRun .* failed to pull the image .*. The pod errored with the message: \"Back-off pulling image \"quay.io/.*\""), ("Back-off pulling task run image from registry.access.redhat.com", r"the step .* in TaskRun .* failed to pull the image .*. The pod errored with the message: \"Back-off pulling image \"registry.access.redhat.com/.*\""), - ("Failed to create task run pod because ISE on webhook proxy.operator.tekton.dev", r"failed to create task run pod .*: Internal error occurred: failed calling webhook \"proxy.operator.tekton.dev\": failed to call webhook: Post \"https://tekton-operator-proxy-webhook.openshift-pipelines.svc:443/defaulting.timeout=10s\": context deadline exceeded. Maybe missing or invalid Task .*"), + ("Failed to create task run pod because ISE on webhook proxy.operator.tekton.dev", r"failed to create task run pod .*: Internal error occurred: failed calling webhook \\\"proxy.operator.tekton.dev\\\": failed to call webhook: Post \\\"https://tekton-operator-proxy-webhook.openshift-pipelines.svc:443/defaulting.timeout=10s\\\": context deadline exceeded. Maybe missing or invalid Task .*"), } def message_to_reason(reasons_and_errors: set, msg: str) -> str | None: From d5f438e009ca9890c86d869e039a3b6138e99900 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Mon, 4 Aug 2025 21:36:55 +0200 Subject: [PATCH 141/321] feat: New error: Failed to pull container from registry.fedoraproject.org Error from collected-data/jhutar-1-tenant/1/pod-jhutar-1-app-avzzl-comp-0-ob75e7f5ccf666294f2fe3bda1b4168dd-pod-step-mock-build.log: [...] Directory walk started Directory walk done - 208 packages Temporary output repo path: /results/buildroot_repo/.repodata/ Pool started (with 5 workers) Pool finished INFO:__main__:Pulling like: podman pull --arch amd64 registry.fedoraproject.org/fedora:rawhide@sha256:26048d9008a413c000a6e2f971de53b17859d17a15c16997f53aaa3dc2efdfa0 Trying to pull registry.fedoraproject.org/fedora@sha256:26048d9008a413c000a6e2f971de53b17859d17a15c16997f53aaa3dc2efdfa0... WARN[0011] Failed, retrying in 1s ... (1/3). Error: initializing source docker://registry.fedoraproject.org/fedora@sha256:26048d9008a413c000a6e2f971de53b17859d17a15c16997f53aaa3dc2efdfa0: pinging container registry registry.fedoraproject.org: Get "https://registry.fedoraproject.org/v2/": net/http: TLS handshake timeout WARN[0042] Failed, retrying in 1s ... (2/3). Error: initializing source docker://registry.fedoraproject.org/fedora@sha256:26048d9008a413c000a6e2f971de53b17859d17a15c16997f53aaa3dc2efdfa0: pinging container registry registry.fedoraproject.org: Get "https://registry.fedoraproject.org/v2/": dial tcp 38.145.32.21:443: i/o timeout WARN[0067] Failed, retrying in 1s ... (3/3). Error: initializing source docker://registry.fedoraproject.org/fedora@sha256:26048d9008a413c000a6e2f971de53b17859d17a15c16997f53aaa3dc2efdfa0: pinging container registry registry.fedoraproject.org: Get "https://registry.fedoraproject.org/v2/": read tcp 10.207.4.246:51610->38.145.32.21:443: read: connection reset by peer Error: internal error: unable to copy from source docker://registry.fedoraproject.org/fedora@sha256:26048d9008a413c000a6e2f971de53b17859d17a15c16997f53aaa3dc2efdfa0: initializing source docker://registry.fedoraproject.org/fedora@sha256:26048d9008a413c000a6e2f971de53b17859d17a15c16997f53aaa3dc2efdfa0: pinging container registry registry.fedoraproject.org: Get "https://registry.fedoraproject.org/v2/": dial tcp 38.145.32.21:443: connect: connection refused Traceback (most recent call last): File "/usr/bin/mock-hermetic-repo", line 151, in _main() ~~~~~^^ File "/usr/bin/mock-hermetic-repo", line 146, in _main prepare_image(data["config"]["bootstrap_image"], data["bootstrap"], ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ options.output_repo) ^^^^^^^^^^^^^^^^^^^^ File "/usr/bin/mock-hermetic-repo", line 115, in prepare_image subprocess.check_output(pull_cmd) ~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^ File "/usr/lib64/python3.13/subprocess.py", line 472, in check_output return run(*popenargs, stdout=PIPE, timeout=timeout, check=True, ~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ **kwargs).stdout ^^^^^^^^^ File "/usr/lib64/python3.13/subprocess.py", line 577, in run raise CalledProcessError(retcode, process.args, output=stdout, stderr=stderr) subprocess.CalledProcessError: Command '['podman', 'pull', '--arch', 'amd64', 'registry.fedoraproject.org/fedora:rawhide@sha256:26048d9008a413c000a6e2f971de53b17859d17a15c16997f53aaa3dc2efdfa0']' returned non-zero exit status 125. --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 4aed53c006..f99a31339e 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -99,6 +99,7 @@ ("Failed because registry.access.redhat.com returned 503 when reading manifest", r"source-build:ERROR:command execution failure, status: 1, stderr: time=.* level=fatal msg=.Error parsing image name .* reading manifest .* in registry.access.redhat.com/.* received unexpected HTTP status: 503 Service Unavailable"), ("Failed to pull container from quay.io because of DNS error", r"Error: copying system image from manifest list: reading blob .*: Get \"https://cdn[0-9]+.quay.io/.*\": dial tcp: lookup cdn[0-9]+.quay.io: Temporary failure in name resolution"), ("Failed to pull container from registry.access.redhat.com because of DNS error", r"Error: initializing source docker://registry.access.redhat.com/.* pinging container registry registry.access.redhat.com: Get \"https://registry.access.redhat.com/v2/\": dial tcp: lookup registry.access.redhat.com: Temporary failure in name resolution"), + ("Failed to pull container from registry.fedoraproject.org", r"Error: internal error: unable to copy from source docker://registry.fedoraproject.org/[^ ]+: initializing source docker://registry.fedoraproject.org/[^ ]+: pinging container registry registry.fedoraproject.org: Get \"https://registry.fedoraproject.org/v2/\": dial tcp [^ ]+: connect: connection refused"), ("Gateway Time-out when pulling container image from quay.io", r"Error: initializing source docker://quay.io/[^ ]+: reading manifest [^ ]+ in quay.io/[^ ]+: received unexpected HTTP status: 504 Gateway Time-out"), ("Gateway Time-out when pulling container image", r"Error: copying system image from manifest list: parsing image configuration: fetching blob: received unexpected HTTP status: 504 Gateway Time-out"), ("Introspection failed because of incomplete .docker/config.json", r".* level=fatal msg=\"Error parsing image name .*: getting username and password: reading JSON file .*/tekton/home/.docker/config.json.*: unmarshaling JSON at .*: unexpected end of JSON input\""), From ce5f1fe207709d95934fc5e39293a0dfff3421fb Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Tue, 5 Aug 2025 08:47:07 +0200 Subject: [PATCH 142/321] feat: New error: Error updating .tekton file in gitlab.cee.redhat.com I0804 21:40:59.042568 1703 logging.go:64] FAIL(64): Repo-templating workflow component cleanup failed: Error templating PaC files: Failed to update file .tekton/jhutar-app-fnpzb-comp-0-pull-request.yaml in repo jhutar/libecpg-gitlab-fork-rhelp01-jhutar revision c10s: Failed to update/create file: PUT https://gitlab.cee.redhat.com/api/v4/projects/jhutar/libecpg-gitlab-fork-rhelp01-jhutar/repository/files/.tekton/jhutar-app-fnpzb-comp-0-pull-request.yaml: 400 {message: A file with this name doesn't exist} --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index f99a31339e..a56cb441a1 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -29,6 +29,7 @@ ("Couldnt get task via git resolver from gitlab.cee due to 429", r"Message:.*Couldn't retrieve Task .*resolver type git.*https://gitlab.cee.redhat.com/.* status code: 429"), ("Couldnt get task via http resolver from gitlab.cee", r"Message:.*Couldn't retrieve Task .*resolver type http.*error getting.*requested URL .*https://gitlab.cee.redhat.com/.* is not found"), ("Error deleting on-pull-request default PipelineRun", r"Repo-templating workflow component cleanup failed: Error deleting on-pull-request default PipelineRun in namespace .*: Unable to list PipelineRuns for component .* in namespace .*: context deadline exceeded"), + ("Error updating .tekton file in gitlab.cee.redhat.com", r"Repo-templating workflow component cleanup failed: Error templating PaC files: Failed to update file .tekton/[^ ]+ in repo .*: Failed to update/create file: PUT https://gitlab.cee.redhat.com/api/v4/projects/[^ ]+/repository/files/.tekton/.*: 400 .message: A file with this name doesn't exist."), ("Failed application creation when calling mapplication.kb.io webhook", r"Application failed creation: Unable to create the Application .*: Internal error occurred: failed calling webhook .*mapplication.kb.io.*: failed to call webhook: Post .*https://application-service-webhook-service.application-service.svc:443/mutate-appstudio-redhat-com-v1alpha1-application.* no endpoints available for service .*application-service-webhook-service"), ("Failed component creation because resource quota evaluation timed out", r"Component failed creation: Unable to create the Component .*: Internal error occurred: resource quota evaluation timed out"), ("Failed component creation when calling mcomponent.kb.io webhook", r"Component failed creation: Unable to create the Component .*: Internal error occurred: failed calling webhook .*mcomponent.kb.io.*: failed to call webhook: Post .*https://application-service-webhook-service.application-service.svc:443/mutate-appstudio-redhat-com-v1alpha1-component.* no endpoints available for service .*application-service-webhook-service.*"), From 1908adcaca292aa3f54acc77e64979f10a485f14 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Tue, 5 Aug 2025 09:36:08 +0200 Subject: [PATCH 143/321] feat: New error and change: Build failed for unspecified reasons --- tests/load-tests/errors.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index a56cb441a1..3730ca4cb6 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -113,6 +113,7 @@ ("SKIP", r"\"message\": \".* exited with code 1\""), # Another special error to avoid printing 'Unknown error:' message ("Back-off pulling task run image from quay.io", r"the step .* in TaskRun .* failed to pull the image .*. The pod errored with the message: \"Back-off pulling image \"quay.io/.*\""), ("Back-off pulling task run image from registry.access.redhat.com", r"the step .* in TaskRun .* failed to pull the image .*. The pod errored with the message: \"Back-off pulling image \"registry.access.redhat.com/.*\""), + ("Build failed for unspecified reasons", r"build failed for unspecified reasons."), ("Failed to create task run pod because ISE on webhook proxy.operator.tekton.dev", r"failed to create task run pod .*: Internal error occurred: failed calling webhook \\\"proxy.operator.tekton.dev\\\": failed to call webhook: Post \\\"https://tekton-operator-proxy-webhook.openshift-pipelines.svc:443/defaulting.timeout=10s\\\": context deadline exceeded. Maybe missing or invalid Task .*"), } @@ -280,11 +281,10 @@ def investigate_failed_plr(dump_dir, plr_type="build"): reasons.append(reason) except FileNotFoundError as e: print(f"Failed to locate required files: {e}") - reason = message_to_reason(FAILED_TR_ERRORS, tr_message) - if reason == "SKIP": - reasons.append("SORRY, missing data") - else: - reasons.append(reason) + + reason = message_to_reason(FAILED_TR_ERRORS, tr_message) + if reason != "SKIP": + reasons.append(reason) except Exception as e: return ["SORRY " + str(e)] From e3693f9a431980aeb4abca75c9fa71ca3a70ef63 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 6 Aug 2025 08:02:41 +0200 Subject: [PATCH 144/321] feat: New error: Failed because CPU is not x86-64-v4 Error from collected-data/jhutar-tenant/1/pod-jhutar-app-idqrn-comp-0-on-push-l5nxb-rpmbuild-x86-64-pod-step-mock-build.log: + ssh -o StrictHostKeyChecking=no u-686965e23b2b4ad6dcf8387b40b1@10.29.77.90 echo 'Hello from the other side!' Warning: Permanently added '10.29.77.90' (ED25519) to the list of known hosts. Hello from the other side! + case linux/amd64 in + arch=x86_64 + remote_cmd /usr/bin/ld.so --help + ssh -o StrictHostKeyChecking=no u-686965e23b2b4ad6dcf8387b40b1@10.29.77.90 /usr/bin/ld.so --help + grep -q 'x86-64-v4 (supported, searched)' + remote_cmd /usr/bin/ld.so --help + ssh -o StrictHostKeyChecking=no u-686965e23b2b4ad6dcf8387b40b1@10.29.77.90 /usr/bin/ld.so --help Usage: /usr/bin/ld.so [OPTION]... EXECUTABLE-FILE [ARGS-FOR-PROGRAM...] You have invoked 'ld.so', the program interpreter for dynamically-linked ELF programs. Usually, the program interpreter is invoked automatically when a dynamically-linked executable is started. You may invoke the program interpreter program directly from the command line to load and run an ELF executable file; this is like executing that file itself, but always uses the program interpreter you invoked, instead of the program interpreter specified in the executable file you run. Invoking the program interpreter directly provides access to additional diagnostics, and changing the dynamic linker behavior without setting environment variables (which would be inherited by subprocesses). --list list all dependencies and how they are resolved --verify verify that given object really is a dynamically linked object we can handle --inhibit-cache Do not use /etc/ld.so.cache --library-path PATH use given PATH instead of content of the environment variable LD_LIBRARY_PATH --glibc-hwcaps-prepend LIST search glibc-hwcaps subdirectories in LIST --glibc-hwcaps-mask LIST only search built-in subdirectories if in LIST --inhibit-rpath LIST ignore RUNPATH and RPATH information in object names in LIST --audit LIST use objects named in LIST as auditors --preload LIST preload objects named in LIST --argv0 STRING set argv[0] to STRING before running --list-tunables list all tunables with minimum and maximum values --list-diagnostics list diagnostics information --help display this help and exit --version output version information and exit This program interpreter self-identifies as: /lib64/ld-linux-x86-64.so.2 Shared library search path: (libraries located via /etc/ld.so.cache) /lib64 (system search path) /usr/lib64 (system search path) Subdirectories of glibc-hwcaps directories, in priority order: x86-64-v4 x86-64-v3 (supported, searched) x86-64-v2 (supported, searched) Legacy HWCAP subdirectories under library search path directories: x86_64 (AT_PLATFORM; supported, searched) tls (supported, searched) avx512_1 x86_64 (supported, searched) ERROR: CPU is not x86-64-v4, aborting build. + '[' true = true ']' + echo 'ERROR: CPU is not x86-64-v4, aborting build.' + exit -1 --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 3730ca4cb6..19ac75e93c 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -96,6 +96,7 @@ ("Error allocating host as provision TR already exists", r"Error allocating host: taskruns.tekton.dev \".*provision\" already exists"), ("Error allocating host because of insufficient free addresses in subnet", r"Error allocating host: failed to launch EC2 instance for .* operation error EC2: RunInstances, https response error StatusCode: 400, RequestID: .*, api error InsufficientFreeAddressesInSubnet: There are not enough free addresses in subnet .* to satisfy the requested number of instances."), ("Error allocating host because of provisioning error", r"Error allocating host: failed to provision host"), + ("Failed because CPU is not x86-64-v4", r"ERROR: CPU is not x86-64-v4, aborting build."), ("Failed because of quay.io returned 502", r"level=fatal msg=.Error parsing image name .*docker://quay.io/.* Requesting bearer token: invalid status code from registry 502 .Bad Gateway."), ("Failed because registry.access.redhat.com returned 503 when reading manifest", r"source-build:ERROR:command execution failure, status: 1, stderr: time=.* level=fatal msg=.Error parsing image name .* reading manifest .* in registry.access.redhat.com/.* received unexpected HTTP status: 503 Service Unavailable"), ("Failed to pull container from quay.io because of DNS error", r"Error: copying system image from manifest list: reading blob .*: Get \"https://cdn[0-9]+.quay.io/.*\": dial tcp: lookup cdn[0-9]+.quay.io: Temporary failure in name resolution"), From 17b58283731c90871fafcd9c073092868c28d764 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 6 Aug 2025 09:05:41 +0200 Subject: [PATCH 145/321] feat: New error: Failed downloading rpms for hermetic builds due to 504 errors Error from collected-data/jhutar-1-tenant/1/pod-jhutar-1-app-tsyqu-comp-0-o4d99e024c3617e7a03cdc59371af8f9a-pod-step-mock-build.log: + remote_cmd podman run -v /home/u-04f56220f9bb6a890a064657d38e/results:/results --privileged --rm -ti quay.io/redhat-user-workloads/rpm-build-pipeline-tenant/environment:4b95644a1a230956d9cc8121cc3b15dbd5bdd5d0 mock-hermetic-repo --lockfile /results/buildroot_lock.json --output-repo /results/buildroot_repo + ssh -o StrictHostKeyChecking=no u-04f56220f9bb6a890a064657d38e@10.207.4.103 podman run -v /home/u-04f56220f9bb6a890a064657d38e/results:/results --privileged --rm -ti quay.io/redhat-user-workloads/rpm-build-pipeline-tenant/environment:4b95644a1a230956d9cc8121cc3b15dbd5bdd5d0 mock-hermetic-repo --lockfile /results/buildroot_lock.json --output-repo /results/buildroot_repo time="2025-08-06T06:38:28Z" level=warning msg="The input device is not a TTY. The --tty and --interactive flags might not work properly" [...] DEBUG:urllib3.connectionpool:Retry: /pub/fedora/linux/development/rawhide/Everything/aarch64/os/Packages/l/libkadm5-1.21.3-7.fc43.aarch64.rpm DEBUG:urllib3.connectionpool:https://d2lzkl7pfhq30w.cloudfront.net:443 "GET /pub/fedora/linux/development/rawhide/Everything/aarch64/os/Packages/l/libkadm5-1.21.3-7.fc43.aarch64.rpm HTTP/1.1" 504 941 DEBUG:urllib3.util.retry:Incremented Retry for (url='/pub/fedora/linux/development/rawhide/Everything/aarch64/os/Packages/l/libkadm5-1.21.3-7.fc43.aarch64.rpm'): Retry(total=0, connect=5, read=5, redirect=None, status=None) DEBUG:urllib3.connectionpool:Retry: /pub/fedora/linux/development/rawhide/Everything/aarch64/os/Packages/a/ansible-srpm-macros-1-18.1.fc43.noarch.rpm DEBUG:urllib3.connectionpool:Retry: /pub/fedora/linux/development/rawhide/Everything/aarch64/os/Packages/a/alternatives-1.33-2.fc43.aarch64.rpm DEBUG:urllib3.connectionpool:https://d2lzkl7pfhq30w.cloudfront.net:443 "GET /pub/fedora/linux/development/rawhide/Everything/aarch64/os/Packages/a/ansible-srpm-macros-1-18.1.fc43.noarch.rpm HTTP/1.1" 504 941 ERROR:__main__:Exception raised for https://d2lzkl7pfhq30w.cloudfront.net/pub/fedora/linux/development/rawhide/Everything/aarch64/os/Packages/a/ansible-srpm-macros-1-18.1.fc43.noarch.rpm urllib3.exceptions.ResponseError: too many 504 error responses The above exception was the direct cause of the following exception: Traceback (most recent call last): File "/usr/lib/python3.13/site-packages/requests/adapters.py", line 668, in send resp = conn.urlopen( method=request.method, ...<9 lines>... chunked=chunked, ) File "/usr/lib/python3.13/site-packages/urllib3/connectionpool.py", line 942, in urlopen return self.urlopen( ~~~~~~~~~~~~^ method, ^^^^^^^ ...<13 lines>... **response_kw, ^^^^^^^^^^^^^^ ) ^ File "/usr/lib/python3.13/site-packages/urllib3/connectionpool.py", line 942, in urlopen return self.urlopen( ~~~~~~~~~~~~^ method, ^^^^^^^ ...<13 lines>... **response_kw, ^^^^^^^^^^^^^^ ) ^ File "/usr/lib/python3.13/site-packages/urllib3/connectionpool.py", line 942, in urlopen return self.urlopen( ~~~~~~~~~~~~^ method, ^^^^^^^ ...<13 lines>... **response_kw, ^^^^^^^^^^^^^^ ) ^ [Previous line repeated 2 more times] File "/usr/lib/python3.13/site-packages/urllib3/connectionpool.py", line 932, in urlopen retries = retries.increment(method, url, response=response, _pool=self) File "/usr/lib/python3.13/site-packages/urllib3/util/retry.py", line 519, in increment raise MaxRetryError(_pool, url, reason) from reason # type: ignore[arg-type] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='d2lzkl7pfhq30w.cloudfront.net', port=443): Max retries exceeded with url: /pub/fedora/linux/development/rawhide/Everything/aarch64/os/Packages/a/ansible-srpm-macros-1-18.1.fc43.noarch.rpm (Caused by ResponseError('too many 504 error responses')) During handling of the above exception, another exception occurred: Traceback (most recent call last): File "/usr/bin/mock-hermetic-repo", line 62, in download_file with request_with_retry().get(url, stream=True, timeout=60) as response: ~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/lib/python3.13/site-packages/requests/sessions.py", line 602, in get return self.request("GET", url, **kwargs) ~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^ File "/usr/lib/python3.13/site-packages/requests/sessions.py", line 589, in request resp = self.send(prep, **send_kwargs) File "/usr/lib/python3.13/site-packages/requests/sessions.py", line 703, in send r = adapter.send(request, **kwargs) File "/usr/lib/python3.13/site-packages/requests/adapters.py", line 692, in send raise RetryError(e, request=request) requests.exceptions.RetryError: HTTPSConnectionPool(host='d2lzkl7pfhq30w.cloudfront.net', port=443): Max retries exceeded with url: /pub/fedora/linux/development/rawhide/Everything/aarch64/os/Packages/a/ansible-srpm-macros-1-18.1.fc43.noarch.rpm (Caused by ResponseError('too many 504 error responses')) DEBUG:urllib3.connectionpool:Retry: /pub/fedora/linux/development/rawhide/Everything/aarch64/os/Packages/b/binutils-2.45-1.fc43.aarch64.rpm DEBUG:urllib3.connectionpool:https://d2lzkl7pfhq30w.cloudfront.net:443 "GET /pub/fedora/linux/development/rawhide/Everything/aarch64/os/Packages/a/alternatives-1.33-2.fc43.aarch64.rpm HTTP/1.1" 504 941 ERROR:__main__:Exception raised for https://d2lzkl7pfhq30w.cloudfront.net/pub/fedora/linux/development/rawhide/Everything/aarch64/os/Packages/a/alternatives-1.33-2.fc43.aarch64.rpm urllib3.exceptions.ResponseError: too many 504 error responses [...] --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 19ac75e93c..3f86c8259d 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -99,6 +99,7 @@ ("Failed because CPU is not x86-64-v4", r"ERROR: CPU is not x86-64-v4, aborting build."), ("Failed because of quay.io returned 502", r"level=fatal msg=.Error parsing image name .*docker://quay.io/.* Requesting bearer token: invalid status code from registry 502 .Bad Gateway."), ("Failed because registry.access.redhat.com returned 503 when reading manifest", r"source-build:ERROR:command execution failure, status: 1, stderr: time=.* level=fatal msg=.Error parsing image name .* reading manifest .* in registry.access.redhat.com/.* received unexpected HTTP status: 503 Service Unavailable"), + ("Failed downloading rpms for hermetic builds due to 504 errors", r"mock-hermetic-repo.*urllib3.exceptions.MaxRetryError: HTTPSConnectionPool.*: Max retries exceeded with url: .*.rpm .Caused by ResponseError..too many 504 error responses..."), ("Failed to pull container from quay.io because of DNS error", r"Error: copying system image from manifest list: reading blob .*: Get \"https://cdn[0-9]+.quay.io/.*\": dial tcp: lookup cdn[0-9]+.quay.io: Temporary failure in name resolution"), ("Failed to pull container from registry.access.redhat.com because of DNS error", r"Error: initializing source docker://registry.access.redhat.com/.* pinging container registry registry.access.redhat.com: Get \"https://registry.access.redhat.com/v2/\": dial tcp: lookup registry.access.redhat.com: Temporary failure in name resolution"), ("Failed to pull container from registry.fedoraproject.org", r"Error: internal error: unable to copy from source docker://registry.fedoraproject.org/[^ ]+: initializing source docker://registry.fedoraproject.org/[^ ]+: pinging container registry registry.fedoraproject.org: Get \"https://registry.fedoraproject.org/v2/\": dial tcp [^ ]+: connect: connection refused"), From 13c46992e518be4388d0980654f42eb609043ef2 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Thu, 7 Aug 2025 08:45:23 +0200 Subject: [PATCH 146/321] feat: New error: Couldnt get task via git resolver from gitlab.cee due to 429 I0807 05:30:10.989882 29910 logging.go:64] FAIL(71): Build Pipeline Run failed run: PipelineRun for component jhutar-app-jowqw-comp-0 in namespace jhutar-tenant failed: {Type:Succeeded Status:False Severity: LastTransitionTime:{Inner:2025-08-07 05:29:59 +0000 UTC} Reason:CouldntGetTask Message:Pipeline jhutar-tenant/ can't be Run; it contains Tasks that don't exist: Couldn't retrieve Task "resolver type git\nurl = https://gitlab.cee.redhat.com/rhel-on-konflux/rpmbuild-pipeline.git\n": error requesting remote resource: error getting "Git" "jhutar-tenant/git-47322cd9fd7f3226a27635b56a1f4f43": git fetch error: remote: Retry later --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 3f86c8259d..d1b51c0f96 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -27,6 +27,7 @@ ("Couldnt get task via buldles resolver from quay.io due to 429", r"Message:.*Couldn't retrieve Task .*resolver type bundles.*https://quay.io/.* status code 429 Too Many Requests"), ("Couldnt get task via bundles resolver from quay.io due to unexpected end of JSON input", r"Build Pipeline Run failed run: PipelineRun for component .* in namespace .* failed: .* Reason:CouldntGetTask Message:Pipeline .* can't be Run; it contains Tasks that don't exist: Couldn't retrieve Task .resolver type bundles.*name = .*: error requesting remote resource: error getting \"bundleresolver\" .*: cannot retrieve the oci image: unexpected end of JSON input"), ("Couldnt get task via git resolver from gitlab.cee due to 429", r"Message:.*Couldn't retrieve Task .*resolver type git.*https://gitlab.cee.redhat.com/.* status code: 429"), + ("Couldnt get task via git resolver from gitlab.cee due to 429", r"Reason:CouldntGetTask Message:.*Couldn't retrieve Task .resolver type git.*https://gitlab.cee.redhat.com/.* error requesting remote resource: error getting .Git. .*: git fetch error: remote: Retry later fatal: unable to access .*: The requested URL returned error: 429: exit status 128") ("Couldnt get task via http resolver from gitlab.cee", r"Message:.*Couldn't retrieve Task .*resolver type http.*error getting.*requested URL .*https://gitlab.cee.redhat.com/.* is not found"), ("Error deleting on-pull-request default PipelineRun", r"Repo-templating workflow component cleanup failed: Error deleting on-pull-request default PipelineRun in namespace .*: Unable to list PipelineRuns for component .* in namespace .*: context deadline exceeded"), ("Error updating .tekton file in gitlab.cee.redhat.com", r"Repo-templating workflow component cleanup failed: Error templating PaC files: Failed to update file .tekton/[^ ]+ in repo .*: Failed to update/create file: PUT https://gitlab.cee.redhat.com/api/v4/projects/[^ ]+/repository/files/.tekton/.*: 400 .message: A file with this name doesn't exist."), From 604118ed7aff1f2a89c76d207a8e74c33400f437 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Thu, 7 Aug 2025 08:51:48 +0200 Subject: [PATCH 147/321] feat: Relax regexp for this pattern Newly I have noticed this: Unknown error: {"lastTransitionTime": "2025-08-06T18:49:34Z", "message": "\"step-mock-build\" exited with code 1: Error", "reason": "Failed", "status": "False", "type": "Succeeded"} Notice that added ': Error' in message. --- tests/load-tests/errors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index d1b51c0f96..e745fddb5f 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -113,7 +113,7 @@ FAILED_TR_ERRORS = { ("Missing expected fields in TaskRun(", r"Missing expected fields in TaskRun"), # This is special error, meaning everithing failed basically ("SKIP", r"\"message\": \"All Steps have completed executing\""), # Another special error to avoid printing 'Unknown error:' message - ("SKIP", r"\"message\": \".* exited with code 1\""), # Another special error to avoid printing 'Unknown error:' message + ("SKIP", r"\"message\": \".* exited with code 1.*\""), # Another special error to avoid printing 'Unknown error:' message ("Back-off pulling task run image from quay.io", r"the step .* in TaskRun .* failed to pull the image .*. The pod errored with the message: \"Back-off pulling image \"quay.io/.*\""), ("Back-off pulling task run image from registry.access.redhat.com", r"the step .* in TaskRun .* failed to pull the image .*. The pod errored with the message: \"Back-off pulling image \"registry.access.redhat.com/.*\""), ("Build failed for unspecified reasons", r"build failed for unspecified reasons."), From 213f7db9fda9fdd32126653d34ab8110649a3c0a Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Thu, 7 Aug 2025 09:00:00 +0200 Subject: [PATCH 148/321] feat: New error: Couldnt get task via git resolver from gitlab.cee due to 429 I0807 04:41:17.152651 31188 logging.go:64] FAIL(71): Build Pipeline Run failed run: PipelineRun for component jhutar-app-omzqw-comp-0 in namespace jhutar-tenant failed: {Type:Succeeded Status:False Severity: LastTransitionTime:{Inner:2025-08-07 04:41:08 +0000 UTC} Reason:CouldntGetTask Message:Pipeline jhutar-tenant/ can't be Run; it contains Tasks that don't exist: Couldn't retrieve Task "resolver type git\nurl = https://gitlab.cee.redhat.com/rhel-on-konflux/rpmbuild-pipeline.git\n": error requesting remote resource: error getting "Git" "jhutar-tenant/git-15e8f3ea98056bf530f0b541220abf09": git fetch error: error: RPC failed; HTTP 429 curl 22 The requested URL returned error: 429 --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index e745fddb5f..1f82b43e87 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -27,6 +27,7 @@ ("Couldnt get task via buldles resolver from quay.io due to 429", r"Message:.*Couldn't retrieve Task .*resolver type bundles.*https://quay.io/.* status code 429 Too Many Requests"), ("Couldnt get task via bundles resolver from quay.io due to unexpected end of JSON input", r"Build Pipeline Run failed run: PipelineRun for component .* in namespace .* failed: .* Reason:CouldntGetTask Message:Pipeline .* can't be Run; it contains Tasks that don't exist: Couldn't retrieve Task .resolver type bundles.*name = .*: error requesting remote resource: error getting \"bundleresolver\" .*: cannot retrieve the oci image: unexpected end of JSON input"), ("Couldnt get task via git resolver from gitlab.cee due to 429", r"Message:.*Couldn't retrieve Task .*resolver type git.*https://gitlab.cee.redhat.com/.* status code: 429"), + ("Couldnt get task via git resolver from gitlab.cee due to 429", r"Reason:CouldntGetTask Message:.*Couldn't retrieve Task .resolver type git.*https://gitlab.cee.redhat.com/.* error requesting remote resource: error getting .Git. .*: git fetch error: error: RPC failed; HTTP 429 curl 22 The requested URL returned error: 429 fatal: expected flush after ref listing: exit status 128"), ("Couldnt get task via git resolver from gitlab.cee due to 429", r"Reason:CouldntGetTask Message:.*Couldn't retrieve Task .resolver type git.*https://gitlab.cee.redhat.com/.* error requesting remote resource: error getting .Git. .*: git fetch error: remote: Retry later fatal: unable to access .*: The requested URL returned error: 429: exit status 128") ("Couldnt get task via http resolver from gitlab.cee", r"Message:.*Couldn't retrieve Task .*resolver type http.*error getting.*requested URL .*https://gitlab.cee.redhat.com/.* is not found"), ("Error deleting on-pull-request default PipelineRun", r"Repo-templating workflow component cleanup failed: Error deleting on-pull-request default PipelineRun in namespace .*: Unable to list PipelineRuns for component .* in namespace .*: context deadline exceeded"), From 4480a174465f098bc19baf8c1cacee985b958518 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Thu, 7 Aug 2025 09:04:38 +0200 Subject: [PATCH 149/321] feat: New error: Couldnt get task via git resolver from gitlab.cee due to 429 Unknown error: FAIL(71): Build Pipeline Run failed run: PipelineRun for component jhutar-app-rqnvt-comp-0 in namespace jhutar-tenant failed: {Type:Succeeded Status:False Severity: LastTransitionTime:{Inner:2025-08-06 20:42:28 +0000 UTC} Reason:CouldntGetTask Message:Pipeline jhutar-tenant/ can't be Run; it contains Tasks that don't exist: Couldn't retrieve Task "resolver type git\nurl = https://gitlab.cee.redhat.com/rhel-on-konflux/rpmbuild-pipeline.git\n": error requesting remote resource: error getting "Git" "jhutar-tenant/git-bdfc1a38ae1817c67b4110630f969975": error resolving repository: git clone error: Cloning into '/tmp/rpmbuild-pipeline.git-1020491201'... error: RPC failed; HTTP 429 curl 22 The requested URL returned error: 429 fatal: expected 'packfile': exit status 128} --- tests/load-tests/errors.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 1f82b43e87..8ce0b9906e 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -27,8 +27,9 @@ ("Couldnt get task via buldles resolver from quay.io due to 429", r"Message:.*Couldn't retrieve Task .*resolver type bundles.*https://quay.io/.* status code 429 Too Many Requests"), ("Couldnt get task via bundles resolver from quay.io due to unexpected end of JSON input", r"Build Pipeline Run failed run: PipelineRun for component .* in namespace .* failed: .* Reason:CouldntGetTask Message:Pipeline .* can't be Run; it contains Tasks that don't exist: Couldn't retrieve Task .resolver type bundles.*name = .*: error requesting remote resource: error getting \"bundleresolver\" .*: cannot retrieve the oci image: unexpected end of JSON input"), ("Couldnt get task via git resolver from gitlab.cee due to 429", r"Message:.*Couldn't retrieve Task .*resolver type git.*https://gitlab.cee.redhat.com/.* status code: 429"), + ("Couldnt get task via git resolver from gitlab.cee due to 429", r"Reason:CouldntGetTask Message:.*Couldn't retrieve Task .resolver type git.*https://gitlab.cee.redhat.com/.* error requesting remote resource: error getting .Git. .*: error resolving repository: git clone error: Cloning into .* error: RPC failed; HTTP 429 curl 22 The requested URL returned error: 429 fatal: expected 'packfile': exit status 128"), ("Couldnt get task via git resolver from gitlab.cee due to 429", r"Reason:CouldntGetTask Message:.*Couldn't retrieve Task .resolver type git.*https://gitlab.cee.redhat.com/.* error requesting remote resource: error getting .Git. .*: git fetch error: error: RPC failed; HTTP 429 curl 22 The requested URL returned error: 429 fatal: expected flush after ref listing: exit status 128"), - ("Couldnt get task via git resolver from gitlab.cee due to 429", r"Reason:CouldntGetTask Message:.*Couldn't retrieve Task .resolver type git.*https://gitlab.cee.redhat.com/.* error requesting remote resource: error getting .Git. .*: git fetch error: remote: Retry later fatal: unable to access .*: The requested URL returned error: 429: exit status 128") + ("Couldnt get task via git resolver from gitlab.cee due to 429", r"Reason:CouldntGetTask Message:.*Couldn't retrieve Task .resolver type git.*https://gitlab.cee.redhat.com/.* error requesting remote resource: error getting .Git. .*: git fetch error: remote: Retry later fatal: unable to access .*: The requested URL returned error: 429: exit status 128"), ("Couldnt get task via http resolver from gitlab.cee", r"Message:.*Couldn't retrieve Task .*resolver type http.*error getting.*requested URL .*https://gitlab.cee.redhat.com/.* is not found"), ("Error deleting on-pull-request default PipelineRun", r"Repo-templating workflow component cleanup failed: Error deleting on-pull-request default PipelineRun in namespace .*: Unable to list PipelineRuns for component .* in namespace .*: context deadline exceeded"), ("Error updating .tekton file in gitlab.cee.redhat.com", r"Repo-templating workflow component cleanup failed: Error templating PaC files: Failed to update file .tekton/[^ ]+ in repo .*: Failed to update/create file: PUT https://gitlab.cee.redhat.com/api/v4/projects/[^ ]+/repository/files/.tekton/.*: 400 .message: A file with this name doesn't exist."), From 8bb3f31857a0171c591d23846051925ea4af4b07 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Thu, 7 Aug 2025 09:09:10 +0200 Subject: [PATCH 150/321] feat: New error: Failed to add imagePullSecrets to build SA I0807 05:56:46.058388 4112 logging.go:30] DEBUG Configuring 5 imagePullSecrets for component build task images for component konflux-perfscale-app-cgdon-comp-0 I0807 05:57:06.080204 4112 logging.go:64] FAIL(61): Failed to configure pipeline imagePullSecrets: Unable to add secret imagerepository-for-toolings-gather-results-container-image-pull to service account build-pipeline-konflux-perfscale-app-cgdon-comp-0: context deadline exceeded This was caused by configuration issue on my side, but adding a rule to avoid "UNKNOWN". --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 8ce0b9906e..abf57c444c 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -40,6 +40,7 @@ ("Failed creating integration test scenario because it already exists", r"Integration test scenario failed creation: Unable to create the Integration Test Scenario .* integrationtestscenarios.appstudio.redhat.com .* already exists"), ("Failed getting PaC pull number because PaC public route does not exist", r"Component failed validation: Unable to get PaC pull number for component .* in namespace .*: PaC component .* in namespace .* failed on PR annotation: Incorrect state: .*\"error-message\":\"52: Pipelines as Code public route does not exist\""), ("Failed Integration test scenario when calling dintegrationtestscenario.kb.io webhook", r"Integration test scenario failed creation: Unable to create the Integration Test Scenario .*: Internal error occurred: failed calling webhook .*dintegrationtestscenario.kb.io.*: failed to call webhook: Post .*https://integration-service-webhook-service.integration-service.svc:443/mutate-appstudio-redhat-com-v1beta2-integrationtestscenario.*: no endpoints available for service .*integration-service-webhook-service"), + ("Failed to add imagePullSecrets to build SA", r"Failed to configure pipeline imagePullSecrets: Unable to add secret .* to service account build-pipeline-.*: context deadline exceeded"), ("Failed to link pipeline image pull secret to build service account because SA was not found", r"Failed to configure pipeline imagePullSecrets: Unable to add secret .* to service account .*: serviceaccounts .* not found"), ("Failed to merge MR on CEE GitLab due to 405", r"Repo-templating workflow component cleanup failed: Merging [0-9]+ failed: [Pp][Uu][Tt] .*https://gitlab.cee.redhat.com/api/.*/merge_requests/[0-9]+/merge.*message: 405 Method Not Allowed"), ("Failed to merge MR on CEE GitLab due to DNS error", r"Repo-templating workflow component cleanup failed: Merging [0-9]+ failed: [Pp][Uu][Tt] .*https://gitlab.cee.redhat.com/api/.*/merge_requests/[0-9]+/merge.*Temporary failure in name resolution"), From 73a841ab9e221c2520dc21fed3f8599904480a0c Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Thu, 7 Aug 2025 11:06:22 +0200 Subject: [PATCH 151/321] feat: Do not give up if JSON in that annotation value have unexpected data --- tests/load-tests/pkg/journey/handle_component.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/load-tests/pkg/journey/handle_component.go b/tests/load-tests/pkg/journey/handle_component.go index d019ddb711..8784ba143b 100644 --- a/tests/load-tests/pkg/journey/handle_component.go +++ b/tests/load-tests/pkg/journey/handle_component.go @@ -155,7 +155,8 @@ func getPaCPullNumber(f *framework.Framework, namespace, name string) (int, erro // Check for right annotation pull, err = getPaCPull(comp.Annotations) if err != nil { - return false, fmt.Errorf("PaC component %s in namespace %s failed on PR annotation: %v", name, namespace, err) + logging.Logger.Debug("PaC component %s in namespace %s failed on PR annotation: %v", name, namespace, err) + return false, nil } if pull == "" { logging.Logger.Debug("PaC component %s in namespace %s do not have PR yet", name, namespace) From df1627d6719231eb1dd29a8bd62291f6eb26cae9 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 8 Aug 2025 07:35:12 +0200 Subject: [PATCH 152/321] feat: New error: Component creation timed out waiting for image repository to be ready [...] Image repository for component jhutar-1-app-nruea-comp-0 in namespace jhutar-1-tenant do not have right state ('' != 'ready') yet. Image repository for component jhutar-1-app-nruea-comp-0 in namespace jhutar-1-tenant do not have right state ('' != 'ready') yet. Image repository for component jhutar-1-app-nruea-comp-0 in namespace jhutar-1-tenant do not have right state ('' != 'ready') yet. I0808 03:32:41.220780 94387 logging.go:64] FAIL(60): Component failed creation: Unable to create the Component jhutar-1-app-nruea-comp-0: timed out waiting for image repository to be ready for component jhutar-1-app-nruea-comp-0 in namespace jhutar-1-tenant: context deadline exceeded --- tests/load-tests/errors.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index abf57c444c..0bc1723afe 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -20,7 +20,8 @@ ("Application creation failed because of TLS handshake timeout", r"Application failed creation: Unable to create the Application .*: failed to get API group resources: unable to retrieve the complete list of server APIs: appstudio.redhat.com/v1alpha1: Get .*: net/http: TLS handshake timeout"), ("Application creation timed out waiting for quota evaluation", r"Application failed creation: Unable to create the Application .*: Internal error occurred: resource quota evaluation timed out"), ("Build Pipeline Run was cancelled", r"Build Pipeline Run failed run: PipelineRun for component .* in namespace .* failed: .* Reason:Cancelled.*Message:PipelineRun .* was cancelled"), - ("Component creation timed out waiting for image-controller annotations", r"Component failed creation: Unable to create the Component .* timed out when waiting for image-controller annotations to be updated on component"), + ("Component creation timed out waiting for image-controller annotations", r"Component failed creation: Unable to create the Component .* timed out when waiting for image-controller annotations to be updated on component"), # obsolete + ("Component creation timed out waiting for image repository to be ready", r"Component failed creation: Unable to create the Component .* timed out waiting for image repository to be ready for component .* in namespace .*: context deadline exceeded"), ("Couldnt get pipeline via bundles resolver from quay.io due to 429", r"Message:Error retrieving pipeline for pipelinerun .*bundleresolver.* cannot retrieve the oci image: GET https://quay.io/v2/.*unexpected status code 429 Too Many Requests"), ("Couldnt get pipeline via git resolver from gitlab.cee due to 429", r"Message:.*resolver failed to get Pipeline.*error requesting remote resource.*Git.*https://gitlab.cee.redhat.com/.* status code: 429"), ("Couldnt get pipeline via http resolver from gitlab.cee", r"Message:.*resolver failed to get Pipeline.*error requesting remote resource.*Http.*https://gitlab.cee.redhat.com/.* is not found"), From cd6d32e01c4211742781c118db1172e57e9c9d24 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 8 Aug 2025 11:17:39 +0200 Subject: [PATCH 153/321] feat: New error: Failed creating integration test scenario because admission webhook dintegrationtestscenario.kb.io could not find application I0807 17:17:44.754144 24726 logging.go:30] DEBUG Creating integration test scenario jhutar-1-its-xkutg for application jhutar-1-app-xaguj in namespace jhutar-1-tenant I0807 17:17:44.776288 24726 logging.go:64] FAIL(40): Integration test scenario failed creation: Unable to create the Integration Test Scenario jhutar-1-its-xkutg: admission webhook "dintegrationtestscenario.kb.io" denied the request: could not find application 'jhutar-1-app-xaguj' in namespace 'jhutar-1-tenant' --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 0bc1723afe..49696967cb 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -37,6 +37,7 @@ ("Failed application creation when calling mapplication.kb.io webhook", r"Application failed creation: Unable to create the Application .*: Internal error occurred: failed calling webhook .*mapplication.kb.io.*: failed to call webhook: Post .*https://application-service-webhook-service.application-service.svc:443/mutate-appstudio-redhat-com-v1alpha1-application.* no endpoints available for service .*application-service-webhook-service"), ("Failed component creation because resource quota evaluation timed out", r"Component failed creation: Unable to create the Component .*: Internal error occurred: resource quota evaluation timed out"), ("Failed component creation when calling mcomponent.kb.io webhook", r"Component failed creation: Unable to create the Component .*: Internal error occurred: failed calling webhook .*mcomponent.kb.io.*: failed to call webhook: Post .*https://application-service-webhook-service.application-service.svc:443/mutate-appstudio-redhat-com-v1alpha1-component.* no endpoints available for service .*application-service-webhook-service.*"), + ("Failed creating integration test scenario because admission webhook dintegrationtestscenario.kb.io could not find application", r"Integration test scenario failed creation: Unable to create the Integration Test Scenario [^ ]+: admission webhook \"dintegrationtestscenario.kb.io\" denied the request: could not find application '[^ ]+' in namespace '[^ ]+'"), ("Failed creating integration test scenario because cannot set blockOwnerDeletion if an ownerReference refers to a resource you can't set finalizers on", r"Integration test scenario failed creation: Unable to create the Integration Test Scenario .* integrationtestscenarios.appstudio.redhat.com .* is forbidden: cannot set blockOwnerDeletion if an ownerReference refers to a resource you can't set finalizers on"), ("Failed creating integration test scenario because it already exists", r"Integration test scenario failed creation: Unable to create the Integration Test Scenario .* integrationtestscenarios.appstudio.redhat.com .* already exists"), ("Failed getting PaC pull number because PaC public route does not exist", r"Component failed validation: Unable to get PaC pull number for component .* in namespace .*: PaC component .* in namespace .* failed on PR annotation: Incorrect state: .*\"error-message\":\"52: Pipelines as Code public route does not exist\""), From a3e0888e1b60b1fdd9919de3c44fa2d8053ef9b3 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 8 Aug 2025 15:08:00 +0200 Subject: [PATCH 154/321] feat: New error: Timeout waiting for release pipeline to be created I0808 12:45:27.030763 2054 logging.go:30] DEBUG Waiting for release for snapshot jhutar-app-czvyw-tngnq in namespace jhutar-tenant to be created I0808 12:45:27.045780 2054 logging.go:30] DEBUG Waiting for release pipeline for release jhutar-app-czvyw-tngnq-8bbb04f-t5vzz in namespace jhutar-tenant to be created Pipelinerun for release jhutar-app-czvyw-tngnq-8bbb04f-t5vzz in namespace jhutar-tenant not created yet: couldn't find PipelineRun in managed namespace 'jhutar-tenant' for a release 'jhutar-app-czvyw-tngnq-8bbb04f-t5vzz' in 'jhutar-tenant' namespace [...] Pipelinerun for release jhutar-app-czvyw-tngnq-8bbb04f-t5vzz in namespace jhutar-tenant not created yet: couldn't find PipelineRun in managed namespace 'jhutar-tenant' for a release 'jhutar-app-czvyw-tngnq-8bbb04f-t5vzz' in 'jhutar-tenant' namespace I0808 12:50:27.060515 2054 logging.go:64] FAIL(92): Release pipeline run failed creation: context deadline exceeded --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 49696967cb..77ee7800e4 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -85,6 +85,7 @@ ("Timeout listing pipeline runs", r"Repo-templating workflow component cleanup failed: Error deleting on-push merged PipelineRun in namespace .*: Unable to list PipelineRuns for component .* in namespace .*: context deadline exceeded"), ("Timeout waiting for build pipeline to be created", r"Build Pipeline Run failed creation: context deadline exceeded"), ("Timeout waiting for integration test scenario to validate", r"Integration test scenario failed validation: context deadline exceeded"), + ("Timeout waiting for release pipeline to be created", r"Release pipeline run failed creation: context deadline exceeded"), ("Timeout waiting for snapshot to be created", r"Snapshot failed creation: context deadline exceeded"), ("Timeout waiting for test pipeline to create", r"Test Pipeline Run failed creation: context deadline exceeded"), ("Timeout waiting for test pipeline to finish", r"Test Pipeline Run failed run: context deadline exceeded"), From 40a76a2b9771ef8e8918f936a88b7d5774fe6732 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 8 Aug 2025 16:00:37 +0200 Subject: [PATCH 155/321] feat: New error: Script gather-rpms.py failed because of too many values to unpack Error from collected-data/konflux-perfscale-tenant/1/pod-konflux-perfscale-app-ztoxpd3129d3547850eb4f72478f38d7aaa5a-pod-step-gather-rpms.log: INFO:root:Preparing arch data INFO:root:Handling archdir aarch64 Traceback (most recent call last): File "/usr/bin/gather-rpms.py", line 516, in prepare_arch_data() ~~~~~~~~~~~~~~~~~^^ File "/usr/bin/gather-rpms.py", line 142, in prepare_arch_data handle_archdir(arch) ~~~~~~~~~~~~~~^^^^^^ File "/usr/bin/gather-rpms.py", line 117, in handle_archdir nvr, btime, size, sigmd5, _ = line.strip().split() ^^^^^^^^^^^^^^^^^^^^^^^^^^^ ValueError: too many values to unpack (expected 5) --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 77ee7800e4..3ba354ae6c 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -124,6 +124,7 @@ ("Back-off pulling task run image from registry.access.redhat.com", r"the step .* in TaskRun .* failed to pull the image .*. The pod errored with the message: \"Back-off pulling image \"registry.access.redhat.com/.*\""), ("Build failed for unspecified reasons", r"build failed for unspecified reasons."), ("Failed to create task run pod because ISE on webhook proxy.operator.tekton.dev", r"failed to create task run pod .*: Internal error occurred: failed calling webhook \\\"proxy.operator.tekton.dev\\\": failed to call webhook: Post \\\"https://tekton-operator-proxy-webhook.openshift-pipelines.svc:443/defaulting.timeout=10s\\\": context deadline exceeded. Maybe missing or invalid Task .*"), + ("Script gather-rpms.py failed because of too many values to unpack", r"Handling archdir [^ ]+ Traceback.*File \"/usr/bin/gather-rpms.py\".*nvr, btime, size, sigmd5, _ = .*ValueError: too many values to unpack"), } def message_to_reason(reasons_and_errors: set, msg: str) -> str | None: From d236b3f0c0ac18a1ca969c6bfd34d7885144426a Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Mon, 11 Aug 2025 10:54:13 +0200 Subject: [PATCH 156/321] fix: Move to right list as this is about error from some pod --- tests/load-tests/errors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 3ba354ae6c..0a57ad04c2 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -114,6 +114,7 @@ ("Gateway Time-out when pulling container image", r"Error: copying system image from manifest list: parsing image configuration: fetching blob: received unexpected HTTP status: 504 Gateway Time-out"), ("Introspection failed because of incomplete .docker/config.json", r".* level=fatal msg=\"Error parsing image name .*: getting username and password: reading JSON file .*/tekton/home/.docker/config.json.*: unmarshaling JSON at .*: unexpected end of JSON input\""), ("RPM build failed: bool cannot be defined via typedef", r"error: .bool. cannot be defined via .typedef..*error: Bad exit status from /var/tmp/rpm-tmp..* ..build."), + ("Script gather-rpms.py failed because of too many values to unpack", r"Handling archdir [^ ]+ Traceback.*File \"/usr/bin/gather-rpms.py\".*nvr, btime, size, sigmd5, _ = .*ValueError: too many values to unpack"), } FAILED_TR_ERRORS = { @@ -124,7 +125,6 @@ ("Back-off pulling task run image from registry.access.redhat.com", r"the step .* in TaskRun .* failed to pull the image .*. The pod errored with the message: \"Back-off pulling image \"registry.access.redhat.com/.*\""), ("Build failed for unspecified reasons", r"build failed for unspecified reasons."), ("Failed to create task run pod because ISE on webhook proxy.operator.tekton.dev", r"failed to create task run pod .*: Internal error occurred: failed calling webhook \\\"proxy.operator.tekton.dev\\\": failed to call webhook: Post \\\"https://tekton-operator-proxy-webhook.openshift-pipelines.svc:443/defaulting.timeout=10s\\\": context deadline exceeded. Maybe missing or invalid Task .*"), - ("Script gather-rpms.py failed because of too many values to unpack", r"Handling archdir [^ ]+ Traceback.*File \"/usr/bin/gather-rpms.py\".*nvr, btime, size, sigmd5, _ = .*ValueError: too many values to unpack"), } def message_to_reason(reasons_and_errors: set, msg: str) -> str | None: From 20aaa3eedcbf6705b289e13a03f0512a39670f54 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Mon, 11 Aug 2025 12:45:51 +0200 Subject: [PATCH 157/321] feat: New error: Couldnt get task via git resolver from gitlab.cee due to 429 I0811 06:22:18.309056 2978 logging.go:30] DEBUG Waiting for build pipeline run for component jhutar-app-pugub-comp-0 in namespace jhutar-tenant to be created I0811 06:22:18.328525 2978 logging.go:30] DEBUG Waiting for build pipeline run for component jhutar-app-pugub-comp-0 in namespace jhutar-tenant to finish I0811 06:22:18.346997 2978 logging.go:64] FAIL(71): Build Pipeline Run failed run: PipelineRun for component jhutar-app-pugub-comp-0 in namespace jhutar-tenant failed: {Type:Succeeded Status:False Severity: LastTransitionTime:{Inner:2025-08-11 06:22:08 +0000 UTC} Reason:CouldntGetTask Message:Pipeline jhutar-tenant/ can't be Run; it contains Tasks that don't exist: Couldn't retrieve Task "resolver type git\nurl = https://gitlab.cee.redhat.com/rhel-on-konflux/rpmbuild-pipeline.git\n": error requesting remote resource: error getting "Git" "jhutar-tenant/git-28098392ab48a6e651276aac67bffafc": error resolving repository: git clone error: Cloning into '/tmp/rpmbuild-pipeline.git-1809921880'... --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 0a57ad04c2..040299199d 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -29,6 +29,7 @@ ("Couldnt get task via bundles resolver from quay.io due to unexpected end of JSON input", r"Build Pipeline Run failed run: PipelineRun for component .* in namespace .* failed: .* Reason:CouldntGetTask Message:Pipeline .* can't be Run; it contains Tasks that don't exist: Couldn't retrieve Task .resolver type bundles.*name = .*: error requesting remote resource: error getting \"bundleresolver\" .*: cannot retrieve the oci image: unexpected end of JSON input"), ("Couldnt get task via git resolver from gitlab.cee due to 429", r"Message:.*Couldn't retrieve Task .*resolver type git.*https://gitlab.cee.redhat.com/.* status code: 429"), ("Couldnt get task via git resolver from gitlab.cee due to 429", r"Reason:CouldntGetTask Message:.*Couldn't retrieve Task .resolver type git.*https://gitlab.cee.redhat.com/.* error requesting remote resource: error getting .Git. .*: error resolving repository: git clone error: Cloning into .* error: RPC failed; HTTP 429 curl 22 The requested URL returned error: 429 fatal: expected 'packfile': exit status 128"), + ("Couldnt get task via git resolver from gitlab.cee due to 429", r"Reason:CouldntGetTask Message:.*Couldn't retrieve Task .resolver type git.*https://gitlab.cee.redhat.com/.* error requesting remote resource: error getting .Git. .*: error resolving repository: git clone error: Cloning into .* remote: Retry later fatal: unable to access 'https://gitlab.cee.redhat.com/.*': The requested URL returned error: 429: exit status 128"), ("Couldnt get task via git resolver from gitlab.cee due to 429", r"Reason:CouldntGetTask Message:.*Couldn't retrieve Task .resolver type git.*https://gitlab.cee.redhat.com/.* error requesting remote resource: error getting .Git. .*: git fetch error: error: RPC failed; HTTP 429 curl 22 The requested URL returned error: 429 fatal: expected flush after ref listing: exit status 128"), ("Couldnt get task via git resolver from gitlab.cee due to 429", r"Reason:CouldntGetTask Message:.*Couldn't retrieve Task .resolver type git.*https://gitlab.cee.redhat.com/.* error requesting remote resource: error getting .Git. .*: git fetch error: remote: Retry later fatal: unable to access .*: The requested URL returned error: 429: exit status 128"), ("Couldnt get task via http resolver from gitlab.cee", r"Message:.*Couldn't retrieve Task .*resolver type http.*error getting.*requested URL .*https://gitlab.cee.redhat.com/.* is not found"), From 5a2a5db69b24cc8b3ae8d8f188fc522c279c1a2f Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Thu, 14 Aug 2025 16:54:05 +0200 Subject: [PATCH 158/321] feat: New error: Failed to pull container from access.redhat.com because of DNS error Error from collected-data/jhutar-tenant/1/pod-jhutar-app-psxvh-comp-0-on-cc656e9354034b8daae819a32a98b11c-pod-step-build.log: [2025-08-14T10:49:46,392086352+00:00] Run buildah build [2025-08-14T10:49:46,393098027+00:00] buildah build --volume /tmp/entitlement:/etc/pki/entitlement --security-opt=unmask=/proc/interrupts --label build-date=2025-08-14T10:49:46 --label architecture=x86_64 --label vcs-type=git --label vcs-ref=28d75f408dba4c8fad50b7a8d33512a65ebca490 --label quay.expires-after=5d --tls-verify=true --no-cache --ulimit nofile=4096:4096 --http-proxy=false -f /tmp/Dockerfile.dZrAnk -t quay.io/redhat-user-workloads/jhutar-tenant/jhutar-app-psxvh-comp-0:on-pr-28d75f408dba4c8fad50b7a8d33512a65ebca490 . [1/2] STEP 1/3: FROM registry.access.redhat.com/ubi8/nodejs-18:latest Trying to pull registry.access.redhat.com/ubi8/nodejs-18:latest... Getting image source signatures Error: creating build container: internal error: unable to copy from source docker://registry.access.redhat.com/ubi8/nodejs-18:latest: copying system image from manifest list: reading signatures: Get "https://access.redhat.com/webassets/docker/content/sigstore/ubi8/nodejs-18@sha256=3a895f2b85ffeda82b2d50ce1ae554bc5bc62448aba48b3fd56ce94b694b3b2a/signature-1": dial tcp: lookup access.redhat.com: Temporary failure in name resolution --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 040299199d..37a36b4777 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -108,6 +108,7 @@ ("Failed because of quay.io returned 502", r"level=fatal msg=.Error parsing image name .*docker://quay.io/.* Requesting bearer token: invalid status code from registry 502 .Bad Gateway."), ("Failed because registry.access.redhat.com returned 503 when reading manifest", r"source-build:ERROR:command execution failure, status: 1, stderr: time=.* level=fatal msg=.Error parsing image name .* reading manifest .* in registry.access.redhat.com/.* received unexpected HTTP status: 503 Service Unavailable"), ("Failed downloading rpms for hermetic builds due to 504 errors", r"mock-hermetic-repo.*urllib3.exceptions.MaxRetryError: HTTPSConnectionPool.*: Max retries exceeded with url: .*.rpm .Caused by ResponseError..too many 504 error responses..."), + ("Failed to pull container from access.redhat.com because of DNS error", r"Error: creating build container: internal error: unable to copy from source docker://registry.access.redhat.com/.*: copying system image from manifest list: reading signatures: Get \"https://access.redhat.com/.*\": dial tcp: lookup access.redhat.com: Temporary failure in name resolution"), ("Failed to pull container from quay.io because of DNS error", r"Error: copying system image from manifest list: reading blob .*: Get \"https://cdn[0-9]+.quay.io/.*\": dial tcp: lookup cdn[0-9]+.quay.io: Temporary failure in name resolution"), ("Failed to pull container from registry.access.redhat.com because of DNS error", r"Error: initializing source docker://registry.access.redhat.com/.* pinging container registry registry.access.redhat.com: Get \"https://registry.access.redhat.com/v2/\": dial tcp: lookup registry.access.redhat.com: Temporary failure in name resolution"), ("Failed to pull container from registry.fedoraproject.org", r"Error: internal error: unable to copy from source docker://registry.fedoraproject.org/[^ ]+: initializing source docker://registry.fedoraproject.org/[^ ]+: pinging container registry registry.fedoraproject.org: Get \"https://registry.fedoraproject.org/v2/\": dial tcp [^ ]+: connect: connection refused"), From f1296384804ab5b5c01e00b7f4cb69b06d1e1a76 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Thu, 14 Aug 2025 17:03:11 +0200 Subject: [PATCH 159/321] feat: New error: Failed to pull container from registry.access.redhat.com because of remote tls error Error from collected-data/jhutar-tenant/1/pod-jhutar-app-ueyje-comp-0-on-0fb3f1810802cabd83b13a2cb4f379bb-pod-step-build.log: [2025-08-12T23:50:11,341027851+00:00] Run buildah build [2025-08-12T23:50:11,342032927+00:00] buildah build --volume /tmp/entitlement:/etc/pki/entitlement --security-opt=unmask=/proc/interrupts --label build-date=2025-08-12T23:50:11 --label architecture=x86_64 --label vcs-type=git --label vcs-ref=6aa57e669dde3e18d0f601c53aa1a94656fb3af5 --label quay.expires-after=5d --tls-verify=true --no-cache --ulimit nofile=4096:4096 --http-proxy=false -f /tmp/Dockerfile.AnJPVL -t quay.io/redhat-user-workloads/jhutar-tenant/jhutar-app-ueyje-comp-0:on-pr-6aa57e669dde3e18d0f601c53aa1a94656fb3af5 . [1/2] STEP 1/3: FROM registry.access.redhat.com/ubi8/nodejs-18:latest Trying to pull registry.access.redhat.com/ubi8/nodejs-18:latest... Getting image source signatures Checking if image destination supports signatures Copying blob sha256:046eb6416170db2ab33e464d04c6f7c0c8c6cc8563597aca8e9fb7736dff00f9 Copying blob sha256:43ddc3bd12b8691687c9f6c273331ca07e3a89b50a619e0db9b040a8a629386d Copying blob sha256:b610fd6091ba2163b8943d2214e20665977bc8064dc769d47be9ae868e48a301 Error: creating build container: internal error: unable to copy from source docker://registry.access.redhat.com/ubi8/nodejs-18:latest: copying system image from manifest list: reading blob sha256:b610fd6091ba2163b8943d2214e20665977bc8064dc769d47be9ae868e48a301: Get "https://cdn01.quay.io/quayio-production-s3/sha256/b6/b610fd6091ba2163b8943d2214e20665977bc8064dc769d47be9ae868e48a301?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=...%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20250812T235029Z&X-Amz-Expires=600&X-Amz-SignedHeaders=host&X-Amz-Signature=...®ion=us-east-1&namespace=redhat-prod&username=redhat-prod+registry_proxy&repo_name=ubi8----nodejs-18&akamai_signature=exp=1755043529~hmac=...": remote error: tls: internal error --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 37a36b4777..29396e09ab 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -111,6 +111,7 @@ ("Failed to pull container from access.redhat.com because of DNS error", r"Error: creating build container: internal error: unable to copy from source docker://registry.access.redhat.com/.*: copying system image from manifest list: reading signatures: Get \"https://access.redhat.com/.*\": dial tcp: lookup access.redhat.com: Temporary failure in name resolution"), ("Failed to pull container from quay.io because of DNS error", r"Error: copying system image from manifest list: reading blob .*: Get \"https://cdn[0-9]+.quay.io/.*\": dial tcp: lookup cdn[0-9]+.quay.io: Temporary failure in name resolution"), ("Failed to pull container from registry.access.redhat.com because of DNS error", r"Error: initializing source docker://registry.access.redhat.com/.* pinging container registry registry.access.redhat.com: Get \"https://registry.access.redhat.com/v2/\": dial tcp: lookup registry.access.redhat.com: Temporary failure in name resolution"), + ("Failed to pull container from registry.access.redhat.com because of remote tls error", r"Error: creating build container: internal error: unable to copy from source docker://registry.access.redhat.com/[^ ]+ copying system image from manifest list: reading blob [^ ]+: Get .https://cdn[0-9]+.quay.io/[^ ]+ remote error: tls: internal error"), ("Failed to pull container from registry.fedoraproject.org", r"Error: internal error: unable to copy from source docker://registry.fedoraproject.org/[^ ]+: initializing source docker://registry.fedoraproject.org/[^ ]+: pinging container registry registry.fedoraproject.org: Get \"https://registry.fedoraproject.org/v2/\": dial tcp [^ ]+: connect: connection refused"), ("Gateway Time-out when pulling container image from quay.io", r"Error: initializing source docker://quay.io/[^ ]+: reading manifest [^ ]+ in quay.io/[^ ]+: received unexpected HTTP status: 504 Gateway Time-out"), ("Gateway Time-out when pulling container image", r"Error: copying system image from manifest list: parsing image configuration: fetching blob: received unexpected HTTP status: 504 Gateway Time-out"), From 6ea33beedc4b4e489a81af7aa9bd3f72749e2cfd Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Thu, 14 Aug 2025 17:11:40 +0200 Subject: [PATCH 160/321] feat: New error: Failed to git fetch from gitlab.cee due to connectivity issues collected-data/jhutar-tenant/1/pod-jhutar-app-mihxq-comp-0-on-97dc450f34381168232902baec0bd44e-pod-step-clone.log INFO: Using mounted CA bundle: /mnt/trusted-ca/ca-bundle.crt {"level":"error","ts":1755033959.7082,"caller":"git/git.go:53","msg":"Error running git [fetch --recurse-submodules=yes --depth=1 origin --update-head-ok --force 876d75cd541b597b83ddd4b6c4e34d5bb9291a18]: exit status 128\nremote: Retry later\nfatal: unable to access 'https://gitlab.cee.redhat.com/jhutar/nodejs-devfile-sample6-undef-jhutar/': The requested URL returned error: 429\n","stacktrace":"github.com/tektoncd-catalog/git-clone/git-init/git.run\n\t/opt/app-root/src/git-init/git/git.go:53\ngithub.com/tektoncd-catalog/git-clone/git-init/git.Fetch\n\t/opt/app-root/src/git-init/git/git.go:156\nmain.main\n\t/opt/app-root/src/git-init/main.go:52\nruntime.main\n\t/usr/lib/golang/src/runtime/proc.go:267"} {"level":"fatal","ts":1755033959.7083006,"caller":"git-init/main.go:53","msg":"Error fetching git repository: failed to fetch [876d75cd541b597b83ddd4b6c4e34d5bb9291a18]: exit status 128","stacktrace":"main.main\n\t/opt/app-root/src/git-init/main.go:53\nruntime.main\n\t/usr/lib/golang/src/runtime/proc.go:267"} --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 29396e09ab..677a3218e9 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -44,6 +44,7 @@ ("Failed getting PaC pull number because PaC public route does not exist", r"Component failed validation: Unable to get PaC pull number for component .* in namespace .*: PaC component .* in namespace .* failed on PR annotation: Incorrect state: .*\"error-message\":\"52: Pipelines as Code public route does not exist\""), ("Failed Integration test scenario when calling dintegrationtestscenario.kb.io webhook", r"Integration test scenario failed creation: Unable to create the Integration Test Scenario .*: Internal error occurred: failed calling webhook .*dintegrationtestscenario.kb.io.*: failed to call webhook: Post .*https://integration-service-webhook-service.integration-service.svc:443/mutate-appstudio-redhat-com-v1beta2-integrationtestscenario.*: no endpoints available for service .*integration-service-webhook-service"), ("Failed to add imagePullSecrets to build SA", r"Failed to configure pipeline imagePullSecrets: Unable to add secret .* to service account build-pipeline-.*: context deadline exceeded"), + ("Failed to git fetch from gitlab.cee due to connectivity issues", r"Error running git .fetch.*: exit status 128.*remote: Retry later.*fatal: unable to access 'https://gitlab.cee.redhat.com/[^ ]+': The requested URL returned error: 429.*Error fetching git repository: failed to fetch [^ ]+: exit status 128"), ("Failed to link pipeline image pull secret to build service account because SA was not found", r"Failed to configure pipeline imagePullSecrets: Unable to add secret .* to service account .*: serviceaccounts .* not found"), ("Failed to merge MR on CEE GitLab due to 405", r"Repo-templating workflow component cleanup failed: Merging [0-9]+ failed: [Pp][Uu][Tt] .*https://gitlab.cee.redhat.com/api/.*/merge_requests/[0-9]+/merge.*message: 405 Method Not Allowed"), ("Failed to merge MR on CEE GitLab due to DNS error", r"Repo-templating workflow component cleanup failed: Merging [0-9]+ failed: [Pp][Uu][Tt] .*https://gitlab.cee.redhat.com/api/.*/merge_requests/[0-9]+/merge.*Temporary failure in name resolution"), From ec350266f99ba94167d64e12736e91cb216499c3 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Thu, 14 Aug 2025 17:17:10 +0200 Subject: [PATCH 161/321] feat: New error: Timeout forking the repo before the actual test I0812 17:22:34.537088 18492 logging.go:36] INFO Initiating thread 0 I0812 17:22:34.640216 18492 logging.go:30] DEBUG Forking repository https://gitlab.cee.redhat.com/jhutar/nodejs-devfile-sample6 with suffix undef-jhutar to jhutar Failed to fork jhutar/nodejs-devfile-sample6, trying again: POST https://gitlab.cee.redhat.com/api/v4/projects/jhutar/nodejs-devfile-sample6/fork: 409 {message: [Project namespace name has already been taken, The project is still being deleted. Please try again later.]} I0812 17:32:48.397255 18492 logging.go:64] FAIL(80): Repo forking failed: Error waiting for project jhutar/nodejs-devfile-sample6-undef-jhutar (ID: 135814) fork to complete: context deadline exceeded --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 677a3218e9..5b637f9d2b 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -64,6 +64,7 @@ ("Repo forking failed when deleting target repo on gitlab.com (not CEE!) due unathorized", r"Repo forking failed: Error deleting project .* DELETE https://gitlab.com/.* 401 .* Unauthorized"), ("Test Pipeline failed", r"Test Pipeline Run failed run:.*Message:Tasks Completed: [0-9]+ \(Failed: [1-9]+,"), ("Timeout forking the repo before the actual test", r"Repo forking failed: Error forking project .*: context deadline exceeded"), + ("Timeout forking the repo before the actual test", r"Repo forking failed: Error waiting for project [^ ]+ .ID: [0-9]+. fork to complete: context deadline exceeded"), ("Timeout getting build service account", r"Component build SA not present: Component build SA .* not present: context deadline exceeded"), ("Timeout getting PaC pull number when validating component", r"Component failed validation: Unable to get PaC pull number for component .* in namespace .*: context deadline exceeded"), ("Timeout getting pipeline", r"Message:.*resolver failed to get Pipeline.*resolution took longer than global timeout of .*"), From 221ce18fc1cc6f5a09dba8ec5cc84182b7f88cd6 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Thu, 14 Aug 2025 17:28:59 +0200 Subject: [PATCH 162/321] feat: New error: Getting repo tags from quay.io failed because of 502 Bad Gateway collected-data/jhutar-1-tenant/1/pod-jhutar-1-app-hagbd-comp-0-o555ad77dc377600bd2f9a2f3a1462b59-pod-step-app-set-outcome.log {"result":"FAILURE","timestamp":"1755112790","note":"Task preflight is a FAILURE: Refer to Tekton task logs for more information","successes":7,"failures":1,"warnings":0}time="2025-08-13T19:19:51Z" level=fatal msg="Error determining repository tags: pinging container registry quay.io: received unexpected HTTP status: 502 Bad Gateway" --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 5b637f9d2b..d14269b3cd 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -117,6 +117,7 @@ ("Failed to pull container from registry.fedoraproject.org", r"Error: internal error: unable to copy from source docker://registry.fedoraproject.org/[^ ]+: initializing source docker://registry.fedoraproject.org/[^ ]+: pinging container registry registry.fedoraproject.org: Get \"https://registry.fedoraproject.org/v2/\": dial tcp [^ ]+: connect: connection refused"), ("Gateway Time-out when pulling container image from quay.io", r"Error: initializing source docker://quay.io/[^ ]+: reading manifest [^ ]+ in quay.io/[^ ]+: received unexpected HTTP status: 504 Gateway Time-out"), ("Gateway Time-out when pulling container image", r"Error: copying system image from manifest list: parsing image configuration: fetching blob: received unexpected HTTP status: 504 Gateway Time-out"), + ("Getting repo tags from quay.io failed because of 502 Bad Gateway", r"Error determining repository tags: pinging container registry quay.io: received unexpected HTTP status: 502 Bad Gateway"), ("Introspection failed because of incomplete .docker/config.json", r".* level=fatal msg=\"Error parsing image name .*: getting username and password: reading JSON file .*/tekton/home/.docker/config.json.*: unmarshaling JSON at .*: unexpected end of JSON input\""), ("RPM build failed: bool cannot be defined via typedef", r"error: .bool. cannot be defined via .typedef..*error: Bad exit status from /var/tmp/rpm-tmp..* ..build."), ("Script gather-rpms.py failed because of too many values to unpack", r"Handling archdir [^ ]+ Traceback.*File \"/usr/bin/gather-rpms.py\".*nvr, btime, size, sigmd5, _ = .*ValueError: too many values to unpack"), From f5d939c90dd4e6b7c56dddfd5fa80ddf20aa07b3 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Thu, 14 Aug 2025 17:44:34 +0200 Subject: [PATCH 163/321] fix: Properly escape the error --- tests/load-tests/errors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index d14269b3cd..c0551229c8 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -127,7 +127,7 @@ ("Missing expected fields in TaskRun(", r"Missing expected fields in TaskRun"), # This is special error, meaning everithing failed basically ("SKIP", r"\"message\": \"All Steps have completed executing\""), # Another special error to avoid printing 'Unknown error:' message ("SKIP", r"\"message\": \".* exited with code 1.*\""), # Another special error to avoid printing 'Unknown error:' message - ("Back-off pulling task run image from quay.io", r"the step .* in TaskRun .* failed to pull the image .*. The pod errored with the message: \"Back-off pulling image \"quay.io/.*\""), + ("Back-off pulling task run image from quay.io", r"the step .* in TaskRun .* failed to pull the image .*. The pod errored with the message: \\\"Back-off pulling image \\\"quay.io/.*"), ("Back-off pulling task run image from registry.access.redhat.com", r"the step .* in TaskRun .* failed to pull the image .*. The pod errored with the message: \"Back-off pulling image \"registry.access.redhat.com/.*\""), ("Build failed for unspecified reasons", r"build failed for unspecified reasons."), ("Failed to create task run pod because ISE on webhook proxy.operator.tekton.dev", r"failed to create task run pod .*: Internal error occurred: failed calling webhook \\\"proxy.operator.tekton.dev\\\": failed to call webhook: Post \\\"https://tekton-operator-proxy-webhook.openshift-pipelines.svc:443/defaulting.timeout=10s\\\": context deadline exceeded. Maybe missing or invalid Task .*"), From f545cc42f7991f7ce80c3179b7d144aa4009d6db Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Thu, 14 Aug 2025 17:48:30 +0200 Subject: [PATCH 164/321] feat: New error: Repo forking failed because import failed I0812 15:33:48.175534 3053 logging.go:36] INFO Initiating thread 0 I0812 15:33:48.278544 3053 logging.go:30] DEBUG Forking repository https://gitlab.cee.redhat.com/jhutar/nodejs-devfile-sample4 with suffix undef-jhutar to jhutar Failed to fork jhutar/nodejs-devfile-sample4, trying again: POST https://gitlab.cee.redhat.com/api/v4/projects/jhutar/nodejs-devfile-sample4/fork: 409 {message: [Project namespace name has already been taken, The project is still being deleted. Please try again later.]} I0812 15:42:50.707176 3053 logging.go:64] FAIL(80): Repo forking failed: Error waiting for project jhutar/nodejs-devfile-sample4-undef-jhutar (ID: 135791) fork to complete: Forking of project nodejs-devfile-sample4-undef-jhutar (ID: 135791) failed with import status: failed --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index c0551229c8..56c4dc448d 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -60,6 +60,7 @@ ("Repo forking failed as the target is still being deleted", r"Repo forking failed: Error forking project .* POST https://gitlab.cee.redhat.com.* 409 .*Project namespace name has already been taken, The project is still being deleted"), ("Repo forking failed as we got TLS handshake timeout talking to GitLab CEE", r"Repo forking failed: Error deleting project .*: Delete \"https://gitlab.cee.redhat.com/api/v4/projects/.*\": net/http: TLS handshake timeout"), ("Repo forking failed because gitlab.com returned 503", r"Repo forking failed: Error checking repository .*: GET https://api.github.com/repos/.*: 503 No server is currently available to service your request. Sorry about that. Please try resubmitting your request and contact us if the problem persists.*"), + ("Repo forking failed because import failed", r"Repo forking failed: Error waiting for project [^ ]+ .ID: [0-9]+. fork to complete: Forking of project [^ ]+ .ID: [0-9]+. failed with import status: failed"), ("Repo forking failed when deleting target repo on github.com because 504", r"Repo forking failed: Error deleting repository .*: DELETE https://api.github.com/repos/.*: 504 We couldn't respond to your request in time. Sorry about that. Please try resubmitting your request and contact us if the problem persists."), ("Repo forking failed when deleting target repo on gitlab.com (not CEE!) due unathorized", r"Repo forking failed: Error deleting project .* DELETE https://gitlab.com/.* 401 .* Unauthorized"), ("Test Pipeline failed", r"Test Pipeline Run failed run:.*Message:Tasks Completed: [0-9]+ \(Failed: [1-9]+,"), From 1dfe70d28626f7eda4ab6b1eee8695e3cdcf45ba Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 15 Aug 2025 10:46:22 +0200 Subject: [PATCH 165/321] fix: Remove trailing character - typo --- tests/load-tests/errors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 56c4dc448d..7d09ab6ec6 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -125,7 +125,7 @@ } FAILED_TR_ERRORS = { - ("Missing expected fields in TaskRun(", r"Missing expected fields in TaskRun"), # This is special error, meaning everithing failed basically + ("Missing expected fields in TaskRun", r"Missing expected fields in TaskRun"), # This is special error, meaning everithing failed basically ("SKIP", r"\"message\": \"All Steps have completed executing\""), # Another special error to avoid printing 'Unknown error:' message ("SKIP", r"\"message\": \".* exited with code 1.*\""), # Another special error to avoid printing 'Unknown error:' message ("Back-off pulling task run image from quay.io", r"the step .* in TaskRun .* failed to pull the image .*. The pod errored with the message: \\\"Back-off pulling image \\\"quay.io/.*"), From 5d64a5fc2bd00619849c35590a5d099009fb02b3 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 15 Aug 2025 15:24:09 +0200 Subject: [PATCH 166/321] feat: New error: Failed to ssh to remote MPC VM pod-konflux-perfscae74092bef25fe9c74f1e56293947647ac98636118502-pod-step-mock-build.log + test linux/amd64 = localhost + mkdir -p /root/.ssh + '[' -e /ssh/error ']' + '[' -e /ssh/otp ']' ++ cat /ssh/otp-server + curl --cacert /ssh/otp-ca -XPOST -d @/ssh/otp https://multi-platform-otp-server.multi-platform-controller.svc.cluster.local/otp % Total % Received % Xferd Average Speed Time Time Time Current Dload Upload Total Spent Left Speed 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0 100 2654 100 2634 100 20 197k 1538 --:--:-- --:--:-- --:--:-- 199k + echo '' + arch=x86_64 + case linux/amd64 in + chmod 0400 /root/.ssh/id_rsa ++ cat /ssh/host + export SSH_HOST=u-fb64b8567505db5ab08aadcc6ef0@10.208.3.72 + SSH_HOST=u-fb64b8567505db5ab08aadcc6ef0@10.208.3.72 ++ cat /ssh/user-dir + export HOMEDIR=/home/u-fb64b8567505db5ab08aadcc6ef0 + HOMEDIR=/home/u-fb64b8567505db5ab08aadcc6ef0 + export 'SSH_ARGS=-o StrictHostKeyChecking=no' + SSH_ARGS='-o StrictHostKeyChecking=no' + '[' u-fb64b8567505db5ab08aadcc6ef0@10.208.3.72 == localhost ']' + workdir=/var/workdir + remote_cmd echo 'Hello from the other side!' + ssh -o StrictHostKeyChecking=no u-fb64b8567505db5ab08aadcc6ef0@10.208.3.72 echo 'Hello from the other side!' Warning: Permanently added '10.208.3.72' (ED25519) to the list of known hosts. u-fb64b8567505db5ab08aadcc6ef0@10.208.3.72: Permission denied (publickey,gssapi-keyex,gssapi-with-mic). --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 7d09ab6ec6..a7c834cc26 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -116,6 +116,7 @@ ("Failed to pull container from registry.access.redhat.com because of DNS error", r"Error: initializing source docker://registry.access.redhat.com/.* pinging container registry registry.access.redhat.com: Get \"https://registry.access.redhat.com/v2/\": dial tcp: lookup registry.access.redhat.com: Temporary failure in name resolution"), ("Failed to pull container from registry.access.redhat.com because of remote tls error", r"Error: creating build container: internal error: unable to copy from source docker://registry.access.redhat.com/[^ ]+ copying system image from manifest list: reading blob [^ ]+: Get .https://cdn[0-9]+.quay.io/[^ ]+ remote error: tls: internal error"), ("Failed to pull container from registry.fedoraproject.org", r"Error: internal error: unable to copy from source docker://registry.fedoraproject.org/[^ ]+: initializing source docker://registry.fedoraproject.org/[^ ]+: pinging container registry registry.fedoraproject.org: Get \"https://registry.fedoraproject.org/v2/\": dial tcp [^ ]+: connect: connection refused"), + ("Failed to ssh to remote MPC VM", r"[^ ]+@[0-9.]+: Permission denied .publickey,gssapi-keyex,gssapi-with-mic..\s*$"), # KONFLUX-9742 ("Gateway Time-out when pulling container image from quay.io", r"Error: initializing source docker://quay.io/[^ ]+: reading manifest [^ ]+ in quay.io/[^ ]+: received unexpected HTTP status: 504 Gateway Time-out"), ("Gateway Time-out when pulling container image", r"Error: copying system image from manifest list: parsing image configuration: fetching blob: received unexpected HTTP status: 504 Gateway Time-out"), ("Getting repo tags from quay.io failed because of 502 Bad Gateway", r"Error determining repository tags: pinging container registry quay.io: received unexpected HTTP status: 502 Bad Gateway"), From 40819637343f9a795cd2ba095eef0e7022b46319 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 15 Aug 2025 15:57:30 +0200 Subject: [PATCH 167/321] fix: Fix regexp for this rule --- tests/load-tests/errors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index a7c834cc26..f12d80310b 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -130,7 +130,7 @@ ("SKIP", r"\"message\": \"All Steps have completed executing\""), # Another special error to avoid printing 'Unknown error:' message ("SKIP", r"\"message\": \".* exited with code 1.*\""), # Another special error to avoid printing 'Unknown error:' message ("Back-off pulling task run image from quay.io", r"the step .* in TaskRun .* failed to pull the image .*. The pod errored with the message: \\\"Back-off pulling image \\\"quay.io/.*"), - ("Back-off pulling task run image from registry.access.redhat.com", r"the step .* in TaskRun .* failed to pull the image .*. The pod errored with the message: \"Back-off pulling image \"registry.access.redhat.com/.*\""), + ("Back-off pulling task run image from registry.access.redhat.com", r"the step .* in TaskRun .* failed to pull the image .*. The pod errored with the message: \\\"Back-off pulling image \\\"registry.access.redhat.com/.*"), ("Build failed for unspecified reasons", r"build failed for unspecified reasons."), ("Failed to create task run pod because ISE on webhook proxy.operator.tekton.dev", r"failed to create task run pod .*: Internal error occurred: failed calling webhook \\\"proxy.operator.tekton.dev\\\": failed to call webhook: Post \\\"https://tekton-operator-proxy-webhook.openshift-pipelines.svc:443/defaulting.timeout=10s\\\": context deadline exceeded. Maybe missing or invalid Task .*"), } From 55b75427b8dad86efdadb375d006d39858614102 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 15 Aug 2025 16:04:37 +0200 Subject: [PATCH 168/321] feat: New error: Couldnt get task via git resolver from gitlab.cee due to 429 Looks like the error is shorter now: I0814 18:22:59.759422 11278 logging.go:30] DEBUG Waiting for build pipeline run for component jhutar-app-oxamu-comp-0 in namespace jhutar-tenant to be created I0814 18:23:00.411864 11278 logging.go:30] DEBUG Waiting for build pipeline run for component jhutar-app-oxamu-comp-0 in namespace jhutar-tenant to finish I0814 18:23:00.685297 11278 logging.go:64] FAIL(71): Build Pipeline Run failed run: PipelineRun for component jhutar-app-oxamu-comp-0 in namespace jhutar-tenant failed: {Type:Succeeded Status:False Severity: LastTransitionTime:{Inner:2025-08-14 18:22:49 +0000 UTC} Reason:CouldntGetTask Message:Pipeline jhutar-tenant/ can't be Run; it contains Tasks that don't exist: Couldn't retrieve Task "resolver type git\nurl = https://gitlab.cee.redhat.com/rhel-on-konflux/rpmbuild-pipeline.git\n": error requesting remote resource: error getting "Git" "jhutar-tenant/git-3c88a3efef1035b5120e6ab2c4854c2c": git fetch error: error: RPC failed; HTTP 429 curl 22 The requested URL returned error: 429 --- tests/load-tests/errors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index f12d80310b..1dd9879ed6 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -30,7 +30,7 @@ ("Couldnt get task via git resolver from gitlab.cee due to 429", r"Message:.*Couldn't retrieve Task .*resolver type git.*https://gitlab.cee.redhat.com/.* status code: 429"), ("Couldnt get task via git resolver from gitlab.cee due to 429", r"Reason:CouldntGetTask Message:.*Couldn't retrieve Task .resolver type git.*https://gitlab.cee.redhat.com/.* error requesting remote resource: error getting .Git. .*: error resolving repository: git clone error: Cloning into .* error: RPC failed; HTTP 429 curl 22 The requested URL returned error: 429 fatal: expected 'packfile': exit status 128"), ("Couldnt get task via git resolver from gitlab.cee due to 429", r"Reason:CouldntGetTask Message:.*Couldn't retrieve Task .resolver type git.*https://gitlab.cee.redhat.com/.* error requesting remote resource: error getting .Git. .*: error resolving repository: git clone error: Cloning into .* remote: Retry later fatal: unable to access 'https://gitlab.cee.redhat.com/.*': The requested URL returned error: 429: exit status 128"), - ("Couldnt get task via git resolver from gitlab.cee due to 429", r"Reason:CouldntGetTask Message:.*Couldn't retrieve Task .resolver type git.*https://gitlab.cee.redhat.com/.* error requesting remote resource: error getting .Git. .*: git fetch error: error: RPC failed; HTTP 429 curl 22 The requested URL returned error: 429 fatal: expected flush after ref listing: exit status 128"), + ("Couldnt get task via git resolver from gitlab.cee due to 429", r"Reason:CouldntGetTask Message:.*Couldn't retrieve Task .resolver type git.*https://gitlab.cee.redhat.com/.* error requesting remote resource: error getting .Git. .*: git fetch error: error: RPC failed; HTTP 429 curl 22 The requested URL returned error: 429"), ("Couldnt get task via git resolver from gitlab.cee due to 429", r"Reason:CouldntGetTask Message:.*Couldn't retrieve Task .resolver type git.*https://gitlab.cee.redhat.com/.* error requesting remote resource: error getting .Git. .*: git fetch error: remote: Retry later fatal: unable to access .*: The requested URL returned error: 429: exit status 128"), ("Couldnt get task via http resolver from gitlab.cee", r"Message:.*Couldn't retrieve Task .*resolver type http.*error getting.*requested URL .*https://gitlab.cee.redhat.com/.* is not found"), ("Error deleting on-pull-request default PipelineRun", r"Repo-templating workflow component cleanup failed: Error deleting on-pull-request default PipelineRun in namespace .*: Unable to list PipelineRuns for component .* in namespace .*: context deadline exceeded"), From e635a0f976c8c61d0f2cd5b17bf2e8c42154ce65 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 15 Aug 2025 16:24:17 +0200 Subject: [PATCH 169/321] feat: New error: Couldnt get task via git resolver from gitlab.cee due to 429 I0814 01:22:20.127409 3142 logging.go:30] DEBUG Waiting for build pipeline run for component jhutar-app-vstww-comp-0 in namespace jhutar-tenant to be created I0814 01:22:20.146073 3142 logging.go:30] DEBUG Waiting for build pipeline run for component jhutar-app-vstww-comp-0 in namespace jhutar-tenant to finish I0814 01:22:20.167329 3142 logging.go:64] FAIL(71): Build Pipeline Run failed run: PipelineRun for component jhutar-app-vstww-comp-0 in namespace jhutar-tenant failed: {Type:Succeeded Status:False Severity: LastTransitionTime:{Inner:2025-08-14 01:22:08 +0000 UTC} Reason:CouldntGetTask Message:Pipeline jhutar-tenant/ can't be Run; it contains Tasks that don't exist: Couldn't retrieve Task "resolver type git\nurl = https://gitlab.cee.redhat.com/rhel-on-konflux/rpmbuild-pipeline.git\n": error requesting remote resource: error getting "Git" "jhutar-tenant/git-0969fa6fd3eef93ceab51f48fe5512ab": git fetch error: error: RPC failed; HTTP 429 curl 22 The requested URL returned error: 429 fatal: expected 'acknowledgments': exit status 128} --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 1dd9879ed6..7550b32e54 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -31,6 +31,7 @@ ("Couldnt get task via git resolver from gitlab.cee due to 429", r"Reason:CouldntGetTask Message:.*Couldn't retrieve Task .resolver type git.*https://gitlab.cee.redhat.com/.* error requesting remote resource: error getting .Git. .*: error resolving repository: git clone error: Cloning into .* error: RPC failed; HTTP 429 curl 22 The requested URL returned error: 429 fatal: expected 'packfile': exit status 128"), ("Couldnt get task via git resolver from gitlab.cee due to 429", r"Reason:CouldntGetTask Message:.*Couldn't retrieve Task .resolver type git.*https://gitlab.cee.redhat.com/.* error requesting remote resource: error getting .Git. .*: error resolving repository: git clone error: Cloning into .* remote: Retry later fatal: unable to access 'https://gitlab.cee.redhat.com/.*': The requested URL returned error: 429: exit status 128"), ("Couldnt get task via git resolver from gitlab.cee due to 429", r"Reason:CouldntGetTask Message:.*Couldn't retrieve Task .resolver type git.*https://gitlab.cee.redhat.com/.* error requesting remote resource: error getting .Git. .*: git fetch error: error: RPC failed; HTTP 429 curl 22 The requested URL returned error: 429"), + ("Couldnt get task via git resolver from gitlab.cee due to 429", r"Reason:CouldntGetTask Message:.*Couldn't retrieve Task .resolver type git.*https://gitlab.cee.redhat.com/.* error requesting remote resource: error getting .Git. .*: git fetch error: error: RPC failed; HTTP 429 curl 22 The requested URL returned error: 429 fatal: expected 'acknowledgments': exit status 128"), ("Couldnt get task via git resolver from gitlab.cee due to 429", r"Reason:CouldntGetTask Message:.*Couldn't retrieve Task .resolver type git.*https://gitlab.cee.redhat.com/.* error requesting remote resource: error getting .Git. .*: git fetch error: remote: Retry later fatal: unable to access .*: The requested URL returned error: 429: exit status 128"), ("Couldnt get task via http resolver from gitlab.cee", r"Message:.*Couldn't retrieve Task .*resolver type http.*error getting.*requested URL .*https://gitlab.cee.redhat.com/.* is not found"), ("Error deleting on-pull-request default PipelineRun", r"Repo-templating workflow component cleanup failed: Error deleting on-pull-request default PipelineRun in namespace .*: Unable to list PipelineRuns for component .* in namespace .*: context deadline exceeded"), From 4ffd0b520ad583d8873889dc3d6cbc8b3b8cac0e Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 15 Aug 2025 16:25:36 +0200 Subject: [PATCH 170/321] feat: Ignore another error that is not relevant --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 7550b32e54..ae25b8a1bd 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -130,6 +130,7 @@ ("Missing expected fields in TaskRun", r"Missing expected fields in TaskRun"), # This is special error, meaning everithing failed basically ("SKIP", r"\"message\": \"All Steps have completed executing\""), # Another special error to avoid printing 'Unknown error:' message ("SKIP", r"\"message\": \".* exited with code 1.*\""), # Another special error to avoid printing 'Unknown error:' message + ("SKIP", r"\"message\": \".* exited with code 255.*\""), # Another special error to avoid printing 'Unknown error:' message ("Back-off pulling task run image from quay.io", r"the step .* in TaskRun .* failed to pull the image .*. The pod errored with the message: \\\"Back-off pulling image \\\"quay.io/.*"), ("Back-off pulling task run image from registry.access.redhat.com", r"the step .* in TaskRun .* failed to pull the image .*. The pod errored with the message: \\\"Back-off pulling image \\\"registry.access.redhat.com/.*"), ("Build failed for unspecified reasons", r"build failed for unspecified reasons."), From c6e518e0c0e5b1ea4a71f6cbc8d693d6f8ba2da2 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Mon, 18 Aug 2025 10:38:55 +0200 Subject: [PATCH 171/321] fix(KONFLUX-9622): Retry creating ITS in case of failure with short timeout as suggested by Krunoslav --- .../handle_integration_test_scenarios.go | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/tests/load-tests/pkg/journey/handle_integration_test_scenarios.go b/tests/load-tests/pkg/journey/handle_integration_test_scenarios.go index c3713f87a7..b23ed50860 100644 --- a/tests/load-tests/pkg/journey/handle_integration_test_scenarios.go +++ b/tests/load-tests/pkg/journey/handle_integration_test_scenarios.go @@ -2,19 +2,34 @@ package journey import ( "fmt" + "time" logging "github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/logging" framework "github.com/konflux-ci/e2e-tests/pkg/framework" util "github.com/devfile/library/v2/pkg/util" + + utils "github.com/konflux-ci/e2e-tests/pkg/utils" ) func createIntegrationTestScenario(f *framework.Framework, namespace, name, appName, scenarioGitURL, scenarioRevision, scenarioPathInRepo string) error { - _, err := f.AsKubeDeveloper.IntegrationController.CreateIntegrationTestScenario(name, appName, namespace, scenarioGitURL, scenarioRevision, scenarioPathInRepo, "", []string{}) + interval := time.Second * 10 + timeout := time.Minute * 1 + + err := utils.WaitUntilWithInterval(func() (done bool, err error) { + _, err = f.AsKubeDeveloper.IntegrationController.CreateIntegrationTestScenario(name, appName, namespace, scenarioGitURL, scenarioRevision, scenarioPathInRepo, "", []string{}) + if err != nil { + logging.Logger.Debug("Failed to create the Integration Test Scenario %s in namespace %s: %v", name, namespace, err) + return false, nil + } + + return true, nil + }, interval, timeout) if err != nil { - return fmt.Errorf("Unable to create the Integration Test Scenario %s: %v", name, err) + return fmt.Errorf("Unable to create the Integration Test Scenario %s in namespace %s: %v", name, namespace, err) } + return nil } From e1ba87d4ddacd6c7431b87fb1d76911e030f741d Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Mon, 18 Aug 2025 16:54:08 +0200 Subject: [PATCH 172/321] feat: Do not spam output with so many errors when waiting --- pkg/clients/has/components.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/clients/has/components.go b/pkg/clients/has/components.go index 8e8d9cc91a..560d207379 100644 --- a/pkg/clients/has/components.go +++ b/pkg/clients/has/components.go @@ -304,7 +304,7 @@ func (h *HasController) CreateComponent(componentSpec appservice.ComponentSpec, return nil, err } // Decrease the timeout to 5 mins, when the issue https://issues.redhat.com/browse/STONEBLD-3552 is fixed - if err := utils.WaitUntil(h.CheckImageRepositoryExists(namespace, componentSpec.ComponentName), time.Minute*15); err != nil { + if err := utils.WaitUntilWithInterval(h.CheckImageRepositoryExists(namespace, componentSpec.ComponentName), time.Second*5, time.Minute*15); err != nil { return nil, fmt.Errorf("timed out waiting for image repository to be ready for component %s in namespace %s: %+v", componentSpec.ComponentName, namespace, err) } return componentObject, nil From 9abe3a435f17a80e7e6d869adc9eb457423de434 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Tue, 19 Aug 2025 09:34:53 +0200 Subject: [PATCH 173/321] fix: Resolve panic 'invalid memory address or nil pointer dereference' when accessing a property of the resp object when resp itself is nil Generated-By: Gemini --- pkg/clients/github/repositories.go | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/pkg/clients/github/repositories.go b/pkg/clients/github/repositories.go index e6b754a1bb..c73815929c 100644 --- a/pkg/clients/github/repositories.go +++ b/pkg/clients/github/repositories.go @@ -158,14 +158,15 @@ func (g *Github) DeleteRepositoryIfExists(name string) error { _, resp, err := g.client.Repositories.Get(ctx, g.organization, name) if err != nil { - if resp.StatusCode != 404 { - return fmt.Errorf("Error checking repository %s/%s: %v\n", g.organization, name, err) - } - } else { - _, deleteErr := g.client.Repositories.Delete(ctx, g.organization, name) - if deleteErr != nil { - return fmt.Errorf("Error deleting repository %s/%s: %v\n", g.organization, name, deleteErr) + if resp != nil && resp.StatusCode == 404 { + return nil } + return fmt.Errorf("Error checking repository %s/%s: %v\n", g.organization, name, err) + } + + _, deleteErr := g.client.Repositories.Delete(ctx, g.organization, name) + if deleteErr != nil { + return fmt.Errorf("Error deleting repository %s/%s: %v\n", g.organization, name, deleteErr) } return nil From 4e1462dbdc53334c083a71f71fc3a34613d15965 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Tue, 19 Aug 2025 09:36:04 +0200 Subject: [PATCH 174/321] refactor: Remove trailing newlines from errors --- pkg/clients/github/repositories.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/clients/github/repositories.go b/pkg/clients/github/repositories.go index c73815929c..4794d66477 100644 --- a/pkg/clients/github/repositories.go +++ b/pkg/clients/github/repositories.go @@ -161,12 +161,12 @@ func (g *Github) DeleteRepositoryIfExists(name string) error { if resp != nil && resp.StatusCode == 404 { return nil } - return fmt.Errorf("Error checking repository %s/%s: %v\n", g.organization, name, err) + return fmt.Errorf("Error checking repository %s/%s: %v", g.organization, name, err) } _, deleteErr := g.client.Repositories.Delete(ctx, g.organization, name) if deleteErr != nil { - return fmt.Errorf("Error deleting repository %s/%s: %v\n", g.organization, name, deleteErr) + return fmt.Errorf("Error deleting repository %s/%s: %v", g.organization, name, deleteErr) } return nil @@ -234,7 +234,7 @@ func (g *Github) ForkRepositoryWithOrgs(sourceOrgName, sourceName, targetOrgName // Error we are getting: "422 Validation Failed [{Resource:Repository Field:name Code:custom Message:name a repository operation is already in progress}]" return false, nil } - return false, fmt.Errorf("Error renaming %s/%s to %s: %v\n", targetOrgName, fork.GetName(), targetName, err) + return false, fmt.Errorf("Error renaming %s/%s to %s: %v", targetOrgName, fork.GetName(), targetName, err) } return true, nil }, time.Second * 10, time.Minute * 10) From 2aa9fe4f2cd9bcf37e2122e8e5410dbc2cd5c418 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Tue, 19 Aug 2025 13:09:35 +0200 Subject: [PATCH 175/321] fix: Define we always return string here - this also resolves traceback on Python 3.9: TypeError: unsupported operand type(s) for |: 'type' and 'NoneType' --- tests/load-tests/errors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index ae25b8a1bd..882e69e33c 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -137,7 +137,7 @@ ("Failed to create task run pod because ISE on webhook proxy.operator.tekton.dev", r"failed to create task run pod .*: Internal error occurred: failed calling webhook \\\"proxy.operator.tekton.dev\\\": failed to call webhook: Post \\\"https://tekton-operator-proxy-webhook.openshift-pipelines.svc:443/defaulting.timeout=10s\\\": context deadline exceeded. Maybe missing or invalid Task .*"), } -def message_to_reason(reasons_and_errors: set, msg: str) -> str | None: +def message_to_reason(reasons_and_errors: set, msg: str) -> str: """ Classifies an error message using regular expressions and returns the error name. From 800370991a8185b7c1f530d7140f64a380b37853 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Tue, 19 Aug 2025 13:28:59 +0200 Subject: [PATCH 176/321] feat: Print PipelineRun name --- tests/load-tests/pkg/journey/handle_pipeline.go | 5 ++++- tests/load-tests/pkg/journey/handle_releases_run.go | 6 +++++- tests/load-tests/pkg/journey/handle_test_run.go | 5 ++++- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/tests/load-tests/pkg/journey/handle_pipeline.go b/tests/load-tests/pkg/journey/handle_pipeline.go index 332a72da76..acef97c5a5 100644 --- a/tests/load-tests/pkg/journey/handle_pipeline.go +++ b/tests/load-tests/pkg/journey/handle_pipeline.go @@ -17,14 +17,17 @@ import ( func validatePipelineRunCreation(f *framework.Framework, namespace, appName, compName string) error { interval := time.Second * 20 timeout := time.Minute * 30 + var pr *pipeline.PipelineRun // TODO It would be much better to watch this resource for a condition err := utils.WaitUntilWithInterval(func() (done bool, err error) { - _, err = f.AsKubeDeveloper.HasController.GetComponentPipelineRunWithType(compName, appName, namespace, "build", "", "") + pr, err = f.AsKubeDeveloper.HasController.GetComponentPipelineRunWithType(compName, appName, namespace, "build", "", "") if err != nil { logging.Logger.Debug("Unable to get created PipelineRun for component %s in namespace %s: %v", compName, namespace, err) return false, nil } + + logging.Logger.Debug("Build PipelineRun %s for component %s in namespace %s created", pr.GetName(), compName, namespace) return true, nil }, interval, timeout) diff --git a/tests/load-tests/pkg/journey/handle_releases_run.go b/tests/load-tests/pkg/journey/handle_releases_run.go index c1d155ae78..fd6c3873ab 100644 --- a/tests/load-tests/pkg/journey/handle_releases_run.go +++ b/tests/load-tests/pkg/journey/handle_releases_run.go @@ -8,6 +8,7 @@ import logging "github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/logging" import framework "github.com/konflux-ci/e2e-tests/pkg/framework" import utils "github.com/konflux-ci/e2e-tests/pkg/utils" +import pipeline "github.com/tektoncd/pipeline/pkg/apis/pipeline/v1" // Wait for Release CR to be created @@ -39,16 +40,19 @@ func validateReleaseCreation(f *framework.Framework, namespace, snapshotName str func validateReleasePipelineRunCreation(f *framework.Framework, namespace, releaseName string) error { logging.Logger.Debug("Waiting for release pipeline for release %s in namespace %s to be created", releaseName, namespace) + var pr *pipeline.PipelineRun + interval := time.Second * 10 timeout := time.Minute * 5 err := utils.WaitUntilWithInterval(func() (done bool, err error) { - _, err = f.AsKubeDeveloper.ReleaseController.GetPipelineRunInNamespace(namespace, releaseName, namespace) + pr, err = f.AsKubeDeveloper.ReleaseController.GetPipelineRunInNamespace(namespace, releaseName, namespace) if err != nil { fmt.Printf("Pipelinerun for release %s in namespace %s not created yet: %v\n", releaseName, namespace, err) return false, nil } + logging.Logger.Debug("Release PipelineRun %s for release %s in namespace %s created", pr.GetName(), releaseName, namespace) return true, nil }, interval, timeout) diff --git a/tests/load-tests/pkg/journey/handle_test_run.go b/tests/load-tests/pkg/journey/handle_test_run.go index 0212c7a2db..55390f66f2 100644 --- a/tests/load-tests/pkg/journey/handle_test_run.go +++ b/tests/load-tests/pkg/journey/handle_test_run.go @@ -40,14 +40,17 @@ func validateTestPipelineRunCreation(f *framework.Framework, namespace, itsName, interval := time.Second * 20 timeout := time.Minute * 5 + var pr *pipeline.PipelineRun // TODO It would be much better to watch this resource for a condition err := utils.WaitUntilWithInterval(func() (done bool, err error) { - _, err = f.AsKubeDeveloper.IntegrationController.GetIntegrationPipelineRun(itsName, snapName, namespace) + pr, err = f.AsKubeDeveloper.IntegrationController.GetIntegrationPipelineRun(itsName, snapName, namespace) if err != nil { logging.Logger.Debug("Unable to get created test PipelineRun for integration test pipeline %s in namespace %s: %v", itsName, namespace, err) return false, nil } + + logging.Logger.Debug("Test PipelineRun %s for its %s and snap %s in namespace %s created", pr.GetName(), itsName, snapName, namespace) return true, nil }, interval, timeout) From afad6926ed26c1c2839c9850884387e255d59c07 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Tue, 19 Aug 2025 13:30:44 +0200 Subject: [PATCH 177/321] style: Use debug log messages here as in the rest of the code --- .../pkg/journey/handle_releases_run.go | 12 ++++++------ .../pkg/journey/handle_releases_setup.go | 16 ++++++++-------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/tests/load-tests/pkg/journey/handle_releases_run.go b/tests/load-tests/pkg/journey/handle_releases_run.go index fd6c3873ab..8d51cbe89e 100644 --- a/tests/load-tests/pkg/journey/handle_releases_run.go +++ b/tests/load-tests/pkg/journey/handle_releases_run.go @@ -23,7 +23,7 @@ func validateReleaseCreation(f *framework.Framework, namespace, snapshotName str err := utils.WaitUntilWithInterval(func() (done bool, err error) { release, err := f.AsKubeDeveloper.ReleaseController.GetRelease("", snapshotName, namespace) if err != nil { - fmt.Printf("Can not get release for snapshot %s in namespace %s: %v\n", snapshotName, namespace, err) + logging.Logger.Debug("Can not get release for snapshot %s in namespace %s: %v\n", snapshotName, namespace, err) return false, nil } @@ -48,7 +48,7 @@ func validateReleasePipelineRunCreation(f *framework.Framework, namespace, relea err := utils.WaitUntilWithInterval(func() (done bool, err error) { pr, err = f.AsKubeDeveloper.ReleaseController.GetPipelineRunInNamespace(namespace, releaseName, namespace) if err != nil { - fmt.Printf("Pipelinerun for release %s in namespace %s not created yet: %v\n", releaseName, namespace, err) + logging.Logger.Debug("Pipelinerun for release %s in namespace %s not created yet: %v\n", releaseName, namespace, err) return false, nil } @@ -70,13 +70,13 @@ func validateReleasePipelineRunCondition(f *framework.Framework, namespace, rele err := utils.WaitUntilWithInterval(func() (done bool, err error) { pipelineRun, err := f.AsKubeDeveloper.ReleaseController.GetPipelineRunInNamespace(namespace, releaseName, namespace) if err != nil { - fmt.Printf("PipelineRun for release %s in namespace %s not created yet: %v\n", releaseName, namespace, err) + logging.Logger.Debug("PipelineRun for release %s in namespace %s not created yet: %v\n", releaseName, namespace, err) return false, nil } // Check if there are some conditions if len(pipelineRun.Status.Conditions) == 0 { - fmt.Printf("PipelineRun %s in namespace %s lacks status conditions\n", pipelineRun.GetName(), pipelineRun.GetNamespace()) + logging.Logger.Debug("PipelineRun %s in namespace %s lacks status conditions\n", pipelineRun.GetName(), pipelineRun.GetNamespace()) return false, nil } @@ -110,13 +110,13 @@ func validateReleaseCondition(f *framework.Framework, namespace, releaseName str err := utils.WaitUntilWithInterval(func() (done bool, err error) { release, err := f.AsKubeDeveloper.ReleaseController.GetRelease(releaseName, "", namespace) if err != nil { - fmt.Printf("Can not get release %s in namespace %s: %v\n", releaseName, namespace, err) + logging.Logger.Debug("Can not get release %s in namespace %s: %v\n", releaseName, namespace, err) return false, nil } // Check if there are some conditions if len(release.Status.Conditions) == 0 { - fmt.Printf("Release %s in namespace %s lacks status conditions\n", releaseName, namespace) + logging.Logger.Debug("Release %s in namespace %s lacks status conditions\n", releaseName, namespace) return false, nil } diff --git a/tests/load-tests/pkg/journey/handle_releases_setup.go b/tests/load-tests/pkg/journey/handle_releases_setup.go index 1c8778f9cf..abd71161fa 100644 --- a/tests/load-tests/pkg/journey/handle_releases_setup.go +++ b/tests/load-tests/pkg/journey/handle_releases_setup.go @@ -61,22 +61,22 @@ func validateReleasePlan(f *framework.Framework, namespace, name string) error { err := utils.WaitUntilWithInterval(func() (done bool, err error) { releasePlan, err := f.AsKubeDeveloper.ReleaseController.GetReleasePlan(name, namespace) if err != nil { - fmt.Printf("Unable to get ReleasePlan %s in %s: %v\n", name, namespace, err) + logging.Logger.Debug("Unable to get ReleasePlan %s in %s: %v\n", name, namespace, err) return false, nil } condition := meta.FindStatusCondition(releasePlan.Status.Conditions, releaseApi.MatchedConditionType.String()) if condition == nil { - fmt.Printf("MatchedConditon of %s is still not set\n", releasePlan.Name) + logging.Logger.Debug("MatchedConditon of %s is still not set\n", releasePlan.Name) return false, nil } // it may need a period of time for the ReleasePlanCR to be reconciled if condition.Status == metav1.ConditionFalse { - fmt.Printf("MatchedConditon of %s has not reconciled yet\n", releasePlan.Name) + logging.Logger.Debug("MatchedConditon of %s has not reconciled yet\n", releasePlan.Name) return false, nil } if condition.Status != metav1.ConditionTrue { - fmt.Printf("MatchedConditon of %s is not true yet\n", releasePlan.Name) + logging.Logger.Debug("MatchedConditon of %s is not true yet\n", releasePlan.Name) return false, nil } if condition.Reason == releaseApi.MatchedReason.String() { @@ -100,22 +100,22 @@ func validateReleasePlanAdmission(f *framework.Framework, namespace, name string err := utils.WaitUntilWithInterval(func() (done bool, err error) { releasePlanAdmission, err := f.AsKubeDeveloper.ReleaseController.GetReleasePlanAdmission(name, namespace) if err != nil { - fmt.Printf("Unable to get ReleasePlanAdmission %s in %s: %v\n", name, namespace, err) + logging.Logger.Debug("Unable to get ReleasePlanAdmission %s in %s: %v\n", name, namespace, err) return false, nil } condition := meta.FindStatusCondition(releasePlanAdmission.Status.Conditions, releaseApi.MatchedConditionType.String()) if condition == nil { - fmt.Printf("MatchedConditon of %s is still not set\n", releasePlanAdmission.Name) + logging.Logger.Debug("MatchedConditon of %s is still not set\n", releasePlanAdmission.Name) return false, nil } // it may need a period of time for the ReleasePlanCR to be reconciled if condition.Status == metav1.ConditionFalse { - fmt.Printf("MatchedConditon of %s has not reconciled yet\n", releasePlanAdmission.Name) + logging.Logger.Debug("MatchedConditon of %s has not reconciled yet\n", releasePlanAdmission.Name) return false, nil } if condition.Status != metav1.ConditionTrue { - fmt.Printf("MatchedConditon of %s is not true yet\n", releasePlanAdmission.Name) + logging.Logger.Debug("MatchedConditon of %s is not true yet\n", releasePlanAdmission.Name) return false, nil } if condition.Reason == releaseApi.MatchedReason.String() { From e559af2d4bb03c15f0c0e48a9cf1f046d63033b0 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 20 Aug 2025 07:30:40 +0200 Subject: [PATCH 178/321] feat: Increase interval as onboarding takes about 25 seconds, so there is no need to hurry --- pkg/clients/has/components.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/clients/has/components.go b/pkg/clients/has/components.go index 560d207379..30aebbd7a2 100644 --- a/pkg/clients/has/components.go +++ b/pkg/clients/has/components.go @@ -304,7 +304,7 @@ func (h *HasController) CreateComponent(componentSpec appservice.ComponentSpec, return nil, err } // Decrease the timeout to 5 mins, when the issue https://issues.redhat.com/browse/STONEBLD-3552 is fixed - if err := utils.WaitUntilWithInterval(h.CheckImageRepositoryExists(namespace, componentSpec.ComponentName), time.Second*5, time.Minute*15); err != nil { + if err := utils.WaitUntilWithInterval(h.CheckImageRepositoryExists(namespace, componentSpec.ComponentName), time.Second*10, time.Minute*15); err != nil { return nil, fmt.Errorf("timed out waiting for image repository to be ready for component %s in namespace %s: %+v", componentSpec.ComponentName, namespace, err) } return componentObject, nil From 818b774b4177c90c431da9d82ae761e6cc057eaf Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 20 Aug 2025 07:50:31 +0200 Subject: [PATCH 179/321] feat: Do not even attempt to list release PLRs if we do not tahe release (and do not print warning in that case) --- .../load-tests/pkg/journey/handle_collections.go | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/tests/load-tests/pkg/journey/handle_collections.go b/tests/load-tests/pkg/journey/handle_collections.go index 0fef551e60..31386be6f7 100644 --- a/tests/load-tests/pkg/journey/handle_collections.go +++ b/tests/load-tests/pkg/journey/handle_collections.go @@ -96,14 +96,16 @@ func collectPipelineRunJSONs(f *framework.Framework, dirPath, namespace, applica return fmt.Errorf("Failed to list PipelineRuns %s/%s/%s: %v", namespace, application, component, err) } - pr_release, err := f.AsKubeDeveloper.ReleaseController.GetPipelineRunInNamespace(namespace, release, namespace) - if err != nil { - logging.Logger.Warning("Failed to get Release PipelineRun %s/%s: %v", namespace, release, err) - } + if release != "" { + pr_release, err := f.AsKubeDeveloper.ReleaseController.GetPipelineRunInNamespace(namespace, release, namespace) + if err != nil { + logging.Logger.Warning("Failed to get Release PipelineRun %s/%s: %v", namespace, release, err) + } - // Make one list that contains them all - if pr_release != nil { - *prs = append(*prs, *pr_release) + // Add release pipeline runs to the list + if pr_release != nil { + *prs = append(*prs, *pr_release) + } } for _, pr := range *prs { From 2423819a9d7a6b12e986775af34a5af3ef07fd25 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 20 Aug 2025 08:12:34 +0200 Subject: [PATCH 180/321] feat(KONFLUX-8961): Add more verbosity when waiting for SA as it handy when debugging waiting for build SA --- pkg/clients/common/service_account.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pkg/clients/common/service_account.go b/pkg/clients/common/service_account.go index 3d4fb4d858..095311af98 100644 --- a/pkg/clients/common/service_account.go +++ b/pkg/clients/common/service_account.go @@ -5,6 +5,7 @@ import ( corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + . "github.com/onsi/ginkgo/v2" "k8s.io/apimachinery/pkg/util/wait" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -17,6 +18,7 @@ func (s *SuiteController) ServiceAccountPresent(saName, namespace string) wait.C return func() (bool, error) { _, err := s.GetServiceAccount(saName, namespace) if err != nil { + GinkgoWriter.Printf("failed to get service account %s in namespace %s: %+v\n", saName, namespace, err) return false, nil } return true, nil From 81a630703f521923a6427381cec210ce39e92f1c Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 20 Aug 2025 08:13:53 +0200 Subject: [PATCH 181/321] fix(KONFLUX-8961): Increase timeout when waiting for build SA Here: https://issues.redhat.com/browse/KONFLUX-8961?focusedId=27821911&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-27821911 Robert Cerven said (edited): > it didn't even have chance to create SA & process component, because there were other onboardings going on > set bigger timeout, because if there is a lot onboardings it can take some time for component to be processed > each onbaoarding takes about 25s > so if there is for example 100 onboardings before you, you will have to wait 41 minutes > I'd suggest timeout 1h just to be sure tbh --- tests/load-tests/pkg/journey/handle_component.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/load-tests/pkg/journey/handle_component.go b/tests/load-tests/pkg/journey/handle_component.go index 8784ba143b..208412483d 100644 --- a/tests/load-tests/pkg/journey/handle_component.go +++ b/tests/load-tests/pkg/journey/handle_component.go @@ -125,7 +125,7 @@ func createComponent(f *framework.Framework, namespace, name, repoUrl, repoRevis func validateComponentBuildSA(f *framework.Framework, namespace, name string) error { interval := time.Second * 10 - timeout := time.Minute * 5 + timeout := time.Minute * 30 component_sa := "build-pipeline-" + name // TODO It would be much better to watch this resource instead querying it From b9afb9efc0690e2c001d6a8bc79c5e8fb34a103f Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 20 Aug 2025 09:40:05 +0200 Subject: [PATCH 182/321] feat: New error: Repo forking failed as GitLab CEE says 500 Internal Server Error I0816 15:20:57.130622 22126 logging.go:30] DEBUG Forking repository https://gitlab.cee.redhat.com/jhutar/libecpg-gitlab-fork with suffix prodp02-jhutar to jhutar I0816 15:21:09.987954 22126 logging.go:64] FAIL(80): Repo forking failed: Error deleting project jhutar/libecpg-gitlab-fork-prodp02-jhutar: GET https://gitlab.cee.redhat.com/users/sign_in: 500 failed to parse unknown error format: Something went wrong (500)
500 error

500: We're sorry, something went wrong on our end

Try refreshing the page, or going back and attempting the action again.

Please contact your GitLab administrator if this problem persists.

I0816 15:21:09.989665 22126 logging.go:30] DEBUG Finished errorsWriter, 1 errors processed --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 882e69e33c..b4105d7802 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -58,6 +58,7 @@ ("Release failure: PipelineRun not created", r"couldn't find PipelineRun in managed namespace '%s' for a release '%s' in '%s' namespace"), ("Release Pipeline failed", r"Release pipeline run failed:.*Message:Tasks Completed: [0-9]+ \(Failed: [1-9]+,"), ("Repo forking failed as GitLab CEE says 401 Unauthorized", r"Repo forking failed: Error deleting project .*: DELETE https://gitlab.cee.redhat.com/.*: 401 .*message: 401 Unauthorized.*"), + ("Repo forking failed as GitLab CEE says 500 Internal Server Error", r"Repo forking failed: Error deleting project .*: GET https://gitlab.cee.redhat.com/.*: 500 failed to parse unknown error format.*500: We're sorry, something went wrong on our end"), ("Repo forking failed as the target is still being deleted", r"Repo forking failed: Error forking project .* POST https://gitlab.cee.redhat.com.* 409 .*Project namespace name has already been taken, The project is still being deleted"), ("Repo forking failed as we got TLS handshake timeout talking to GitLab CEE", r"Repo forking failed: Error deleting project .*: Delete \"https://gitlab.cee.redhat.com/api/v4/projects/.*\": net/http: TLS handshake timeout"), ("Repo forking failed because gitlab.com returned 503", r"Repo forking failed: Error checking repository .*: GET https://api.github.com/repos/.*: 503 No server is currently available to service your request. Sorry about that. Please try resubmitting your request and contact us if the problem persists.*"), From c26041291f6332eaa6ec9ea83117adf37f9c3b23 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 20 Aug 2025 12:27:54 +0200 Subject: [PATCH 183/321] feat: New error: Application creation failed because it already exists I0819 06:30:30.683417 27158 logging.go:64] FAIL(30): Application failed creation: Unable to create the Application jhutar-app-bxbyg: applications.appstudio.redhat.com "jhutar-app-bxbyg" already exists --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index b4105d7802..14f81c375d 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -17,6 +17,7 @@ # Errors patterns we recognize (when newlines were removed) ERRORS = { + ("Application creation failed because it already exists", r"Application failed creation: Unable to create the Application .*: applications.appstudio.redhat.com .* already exists"), ("Application creation failed because of TLS handshake timeout", r"Application failed creation: Unable to create the Application .*: failed to get API group resources: unable to retrieve the complete list of server APIs: appstudio.redhat.com/v1alpha1: Get .*: net/http: TLS handshake timeout"), ("Application creation timed out waiting for quota evaluation", r"Application failed creation: Unable to create the Application .*: Internal error occurred: resource quota evaluation timed out"), ("Build Pipeline Run was cancelled", r"Build Pipeline Run failed run: PipelineRun for component .* in namespace .* failed: .* Reason:Cancelled.*Message:PipelineRun .* was cancelled"), From 181f314f03a2e857e5b982e9da753b4039edb25a Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 20 Aug 2025 12:39:12 +0200 Subject: [PATCH 184/321] feat: New error: Couldnt get task via git resolver from gitlab.cee due to 429 I0820 06:53:57.944169 15943 logging.go:64] FAIL(71): Build Pipeline Run failed run: PipelineRun for component jhutar-app-jqfju-comp-0 in namespace jhutar-tenant failed: {Type:Succeeded Status:False Severity: LastTransitionTime:{Inner:2025-08-20 06:53:46 +0000 UTC} Reason:CouldntGetTask Message:Pipeline jhutar-tenant/ can't be Run; it contains Tasks that don't exist: Couldn't retrieve Task "resolver type git\nurl = https://gitlab.cee.redhat.com/rhel-on-konflux/rpmbuild-pipeline.git\n": error requesting remote resource: error getting "Git" "jhutar-tenant/git-53ae2669a833d098f2be9237d38953e9": error resolving repository: git clone error: Cloning into '/tmp/rpmbuild-pipeline.git-2776050177'... --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 14f81c375d..c2d70b504c 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -32,6 +32,7 @@ ("Couldnt get task via git resolver from gitlab.cee due to 429", r"Reason:CouldntGetTask Message:.*Couldn't retrieve Task .resolver type git.*https://gitlab.cee.redhat.com/.* error requesting remote resource: error getting .Git. .*: error resolving repository: git clone error: Cloning into .* error: RPC failed; HTTP 429 curl 22 The requested URL returned error: 429 fatal: expected 'packfile': exit status 128"), ("Couldnt get task via git resolver from gitlab.cee due to 429", r"Reason:CouldntGetTask Message:.*Couldn't retrieve Task .resolver type git.*https://gitlab.cee.redhat.com/.* error requesting remote resource: error getting .Git. .*: error resolving repository: git clone error: Cloning into .* remote: Retry later fatal: unable to access 'https://gitlab.cee.redhat.com/.*': The requested URL returned error: 429: exit status 128"), ("Couldnt get task via git resolver from gitlab.cee due to 429", r"Reason:CouldntGetTask Message:.*Couldn't retrieve Task .resolver type git.*https://gitlab.cee.redhat.com/.* error requesting remote resource: error getting .Git. .*: git fetch error: error: RPC failed; HTTP 429 curl 22 The requested URL returned error: 429"), + ("Couldnt get task via git resolver from gitlab.cee due to 429", r"Reason:CouldntGetTask Message:.*Couldn't retrieve Task .resolver type git.*https://gitlab.cee.redhat.com/.* error requesting remote resource: error getting .Git. .*: git clone error: Cloning into .* error: RPC failed; HTTP 429 curl 22 The requested URL returned error: 429"), ("Couldnt get task via git resolver from gitlab.cee due to 429", r"Reason:CouldntGetTask Message:.*Couldn't retrieve Task .resolver type git.*https://gitlab.cee.redhat.com/.* error requesting remote resource: error getting .Git. .*: git fetch error: error: RPC failed; HTTP 429 curl 22 The requested URL returned error: 429 fatal: expected 'acknowledgments': exit status 128"), ("Couldnt get task via git resolver from gitlab.cee due to 429", r"Reason:CouldntGetTask Message:.*Couldn't retrieve Task .resolver type git.*https://gitlab.cee.redhat.com/.* error requesting remote resource: error getting .Git. .*: git fetch error: remote: Retry later fatal: unable to access .*: The requested URL returned error: 429: exit status 128"), ("Couldnt get task via http resolver from gitlab.cee", r"Message:.*Couldn't retrieve Task .*resolver type http.*error getting.*requested URL .*https://gitlab.cee.redhat.com/.* is not found"), From f530a0f373583e842d40b54ba24ab4570f294a0f Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 20 Aug 2025 13:54:21 +0200 Subject: [PATCH 185/321] feat: New error: Failed to provision MPC VM due to resource quota evaluation timed out collected-data/jhutar-tenant/1/pod-jhutar-app-ojkux-comp-0-on-push-mhj5q-rpmbuild-aarch64-pod-step-mock-build.log + test linux/arm64 = localhost + mkdir -p /root/.ssh + '[' -e /ssh/error ']' + cat /ssh/error Error allocating host: Internal error occurred: resource quota evaluation timed out Context info: Platform: linux/arm64 File: /opt/app-root/src/pkg/reconciler/taskrun/taskrun.go Line: 460 + exit 1 --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index c2d70b504c..05993583fb 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -115,6 +115,7 @@ ("Failed because of quay.io returned 502", r"level=fatal msg=.Error parsing image name .*docker://quay.io/.* Requesting bearer token: invalid status code from registry 502 .Bad Gateway."), ("Failed because registry.access.redhat.com returned 503 when reading manifest", r"source-build:ERROR:command execution failure, status: 1, stderr: time=.* level=fatal msg=.Error parsing image name .* reading manifest .* in registry.access.redhat.com/.* received unexpected HTTP status: 503 Service Unavailable"), ("Failed downloading rpms for hermetic builds due to 504 errors", r"mock-hermetic-repo.*urllib3.exceptions.MaxRetryError: HTTPSConnectionPool.*: Max retries exceeded with url: .*.rpm .Caused by ResponseError..too many 504 error responses..."), + ("Failed to provision MPC VM due to resource quota evaluation timed out", r"cat /ssh/error Error allocating host: Internal error occurred: resource quota evaluation timed out"), # KONFLUX-9798 ("Failed to pull container from access.redhat.com because of DNS error", r"Error: creating build container: internal error: unable to copy from source docker://registry.access.redhat.com/.*: copying system image from manifest list: reading signatures: Get \"https://access.redhat.com/.*\": dial tcp: lookup access.redhat.com: Temporary failure in name resolution"), ("Failed to pull container from quay.io because of DNS error", r"Error: copying system image from manifest list: reading blob .*: Get \"https://cdn[0-9]+.quay.io/.*\": dial tcp: lookup cdn[0-9]+.quay.io: Temporary failure in name resolution"), ("Failed to pull container from registry.access.redhat.com because of DNS error", r"Error: initializing source docker://registry.access.redhat.com/.* pinging container registry registry.access.redhat.com: Get \"https://registry.access.redhat.com/v2/\": dial tcp: lookup registry.access.redhat.com: Temporary failure in name resolution"), From 5156fffae416fee39dd0323e88287c95d65e28f9 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 20 Aug 2025 14:58:42 +0200 Subject: [PATCH 186/321] feat: New error: Script mock-hermetic-repo failed because pull from registry.access.redhat.com failed collected-data/jhutar-tenant/1/pod-jhutar-app-degwu-comp-0-on-5633191cdff52abeb2320b68916a8e66-pod-step-mock-build.log [...] DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): download-01.beak-001.prod.iad2.dc.redhat.com:443 DEBUG:urllib3.connectionpool:https://download-01.beak-001.prod.iad2.dc.redhat.com:443 "GET /brewroot/repos/rhel-10.1-build/8965820/aarch64/toplink/vol/rhel-10/packages/pcre2/10.44/1.el10.3/aarch64/pcre2-utf32-10.44-1.el10.3.aarch64.rpm HTTP/1.1" 200 199140 Directory walk started Directory walk done - 184 packages Temporary output repo path: /results/buildroot_repo/.repodata/ Pool started (with 5 workers) Pool finished INFO:__main__:Pulling like: podman pull --arch arm64 registry.access.redhat.com/ubi9/ubi@sha256:b744c6fde9d4d18f8e192ea61aa93b9b2113a33e58a74ebb32307a1e49c0a7c0 Trying to pull registry.access.redhat.com/ubi9/ubi@sha256:b744c6fde9d4d18f8e192ea61aa93b9b2113a33e58a74ebb32307a1e49c0a7c0... Error: internal error: unable to copy from source docker://registry.access.redhat.com/ubi9/ubi@sha256:b744c6fde9d4d18f8e192ea61aa93b9b2113a33e58a74ebb32307a1e49c0a7c0: initializing source docker://registry.access.redhat.com/ubi9/ubi@sha256:b744c6fde9d4d18f8e192ea61aa93b9b2113a33e58a74ebb32307a1e49c0a7c0: unable to retrieve auth token: invalid username/password: unauthorized: Please login to the Red Hat Registry using your Customer Portal credentials. Further instructions can be found here: https://access.redhat.com/RegistryAuthentication Traceback (most recent call last): File "/usr/bin/mock-hermetic-repo", line 151, in _main() ~~~~~^^ File "/usr/bin/mock-hermetic-repo", line 146, in _main prepare_image(data["config"]["bootstrap_image"], data["bootstrap"], ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ options.output_repo) ^^^^^^^^^^^^^^^^^^^^ File "/usr/bin/mock-hermetic-repo", line 115, in prepare_image subprocess.check_output(pull_cmd) ~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^ File "/usr/lib64/python3.13/subprocess.py", line 472, in check_output return run(*popenargs, stdout=PIPE, timeout=timeout, check=True, ~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ **kwargs).stdout ^^^^^^^^^ File "/usr/lib64/python3.13/subprocess.py", line 577, in run raise CalledProcessError(retcode, process.args, output=stdout, stderr=stderr) subprocess.CalledProcessError: Command '['podman', 'pull', '--arch', 'arm64', 'registry.access.redhat.com/ubi9/ubi@sha256:b744c6fde9d4d18f8e192ea61aa93b9b2113a33e58a74ebb32307a1e49c0a7c0']' returned non-zero exit status 125. --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 05993583fb..f0e3950330 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -128,6 +128,7 @@ ("Introspection failed because of incomplete .docker/config.json", r".* level=fatal msg=\"Error parsing image name .*: getting username and password: reading JSON file .*/tekton/home/.docker/config.json.*: unmarshaling JSON at .*: unexpected end of JSON input\""), ("RPM build failed: bool cannot be defined via typedef", r"error: .bool. cannot be defined via .typedef..*error: Bad exit status from /var/tmp/rpm-tmp..* ..build."), ("Script gather-rpms.py failed because of too many values to unpack", r"Handling archdir [^ ]+ Traceback.*File \"/usr/bin/gather-rpms.py\".*nvr, btime, size, sigmd5, _ = .*ValueError: too many values to unpack"), + ("Script mock-hermetic-repo failed because pull from registry.access.redhat.com failed", r"/usr/bin/mock-hermetic-repo.*Error: internal error: unable to copy from source docker://registry.access.redhat.com/[^ ]+: initializing source docker://registry.access.redhat.com/[^ ]+: unable to retrieve auth token: invalid username/password: unauthorized.*subprocess.CalledProcessError.*Command '.'podman', 'pull', '--arch', '[^ ]+', 'registry.access.redhat.com/[^ ]+'.' returned non-zero exit status 125"), } FAILED_TR_ERRORS = { From 0b505fff41868d7fab85dea0c55c2fdbb5854688 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 20 Aug 2025 15:48:17 +0200 Subject: [PATCH 187/321] feat: New error: Failed creating integration test scenario because of timeout This was happening in stone-prod-p02 that is in bad shape now, so not reporting issue. I0820 11:13:13.599183 15202 logging.go:30] DEBUG Creating integration test scenario jhutar-its-icazb for application jhutar-app-phxgj in namespace jhutar-tenant I0820 11:13:13.627774 15202 logging.go:30] DEBUG Failed to create the Integration Test Scenario jhutar-its-icazb in namespace jhutar-tenant: Internal error occurred: failed calling webhook "vintegrationtestscenario.kb.io": failed to call webhook: Post "https://integration-service-webhook-service.integration-service.svc:443/validate-appstudio-redhat-com-v1beta2-integrationtestscenario?timeout=10s": no endpoints available for service "integration-service-webhook-service" I0820 11:13:23.616640 15202 logging.go:30] DEBUG Failed to create the Integration Test Scenario jhutar-its-icazb in namespace jhutar-tenant: Internal error occurred: failed calling webhook "vintegrationtestscenario.kb.io": failed to call webhook: Post "https://integration-service-webhook-service.integration-service.svc:443/validate-appstudio-redhat-com-v1beta2-integrationtestscenario?timeout=10s": no endpoints available for service "integration-service-webhook-service" I0820 11:13:33.614203 15202 logging.go:30] DEBUG Failed to create the Integration Test Scenario jhutar-its-icazb in namespace jhutar-tenant: Internal error occurred: failed calling webhook "vintegrationtestscenario.kb.io": failed to call webhook: Post "https://integration-service-webhook-service.integration-service.svc:443/validate-appstudio-redhat-com-v1beta2-integrationtestscenario?timeout=10s": no endpoints available for service "integration-service-webhook-service" I0820 11:13:43.615413 15202 logging.go:30] DEBUG Failed to create the Integration Test Scenario jhutar-its-icazb in namespace jhutar-tenant: Internal error occurred: failed calling webhook "vintegrationtestscenario.kb.io": failed to call webhook: Post "https://integration-service-webhook-service.integration-service.svc:443/validate-appstudio-redhat-com-v1beta2-integrationtestscenario?timeout=10s": no endpoints available for service "integration-service-webhook-service" I0820 11:13:53.614371 15202 logging.go:30] DEBUG Failed to create the Integration Test Scenario jhutar-its-icazb in namespace jhutar-tenant: Internal error occurred: failed calling webhook "vintegrationtestscenario.kb.io": failed to call webhook: Post "https://integration-service-webhook-service.integration-service.svc:443/validate-appstudio-redhat-com-v1beta2-integrationtestscenario?timeout=10s": no endpoints available for service "integration-service-webhook-service" I0820 11:14:03.618309 15202 logging.go:30] DEBUG Failed to create the Integration Test Scenario jhutar-its-icazb in namespace jhutar-tenant: Internal error occurred: failed calling webhook "vintegrationtestscenario.kb.io": failed to call webhook: Post "https://integration-service-webhook-service.integration-service.svc:443/validate-appstudio-redhat-com-v1beta2-integrationtestscenario?timeout=10s": no endpoints available for service "integration-service-webhook-service" I0820 11:14:13.613112 15202 logging.go:30] DEBUG Failed to create the Integration Test Scenario jhutar-its-icazb in namespace jhutar-tenant: Internal error occurred: failed calling webhook "vintegrationtestscenario.kb.io": failed to call webhook: Post "https://integration-service-webhook-service.integration-service.svc:443/validate-appstudio-redhat-com-v1beta2-integrationtestscenario?timeout=10s": no endpoints available for service "integration-service-webhook-service" I0820 11:14:13.613210 15202 logging.go:64] FAIL(40): Integration test scenario failed creation: Unable to create the Integration Test Scenario jhutar-its-icazb in namespace jhutar-tenant: context deadline exceeded --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index f0e3950330..534457d9e0 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -44,6 +44,7 @@ ("Failed creating integration test scenario because admission webhook dintegrationtestscenario.kb.io could not find application", r"Integration test scenario failed creation: Unable to create the Integration Test Scenario [^ ]+: admission webhook \"dintegrationtestscenario.kb.io\" denied the request: could not find application '[^ ]+' in namespace '[^ ]+'"), ("Failed creating integration test scenario because cannot set blockOwnerDeletion if an ownerReference refers to a resource you can't set finalizers on", r"Integration test scenario failed creation: Unable to create the Integration Test Scenario .* integrationtestscenarios.appstudio.redhat.com .* is forbidden: cannot set blockOwnerDeletion if an ownerReference refers to a resource you can't set finalizers on"), ("Failed creating integration test scenario because it already exists", r"Integration test scenario failed creation: Unable to create the Integration Test Scenario .* integrationtestscenarios.appstudio.redhat.com .* already exists"), + ("Failed creating integration test scenario because of timeout", r"Integration test scenario failed creation: Unable to create the Integration Test Scenario [^ ]+ in namespace jhutar-tenant: context deadline exceeded"), ("Failed getting PaC pull number because PaC public route does not exist", r"Component failed validation: Unable to get PaC pull number for component .* in namespace .*: PaC component .* in namespace .* failed on PR annotation: Incorrect state: .*\"error-message\":\"52: Pipelines as Code public route does not exist\""), ("Failed Integration test scenario when calling dintegrationtestscenario.kb.io webhook", r"Integration test scenario failed creation: Unable to create the Integration Test Scenario .*: Internal error occurred: failed calling webhook .*dintegrationtestscenario.kb.io.*: failed to call webhook: Post .*https://integration-service-webhook-service.integration-service.svc:443/mutate-appstudio-redhat-com-v1beta2-integrationtestscenario.*: no endpoints available for service .*integration-service-webhook-service"), ("Failed to add imagePullSecrets to build SA", r"Failed to configure pipeline imagePullSecrets: Unable to add secret .* to service account build-pipeline-.*: context deadline exceeded"), From 75dfd460d171038678a45af66429b91480e5847f Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 20 Aug 2025 16:19:39 +0200 Subject: [PATCH 188/321] refactor(KONFLUX-8961): When simply creating component, it is not our job to check image repository --- pkg/clients/has/components.go | 12 +++++++++++ tests/build/build.go | 20 +++++++++---------- tests/build/build_templates.go | 2 +- tests/build/multi-platform.go | 2 +- .../gitlab-integration-reporting.go | 2 +- tests/integration-service/integration.go | 4 ++-- tests/konflux-demo/konflux-demo.go | 2 +- tests/release/releaseLib.go | 4 ++-- 8 files changed, 30 insertions(+), 18 deletions(-) diff --git a/pkg/clients/has/components.go b/pkg/clients/has/components.go index 30aebbd7a2..d6d8d68d8f 100644 --- a/pkg/clients/has/components.go +++ b/pkg/clients/has/components.go @@ -303,10 +303,22 @@ func (h *HasController) CreateComponent(componentSpec appservice.ComponentSpec, if err := h.KubeRest().Create(ctx, componentObject); err != nil { return nil, err } + + return componentObject, nil +} + +// Create a component and check image repository gets created. +func (h *HasController) CreateComponentCheckImageRepository(componentSpec appservice.ComponentSpec, namespace string, outputContainerImage string, secret string, applicationName string, skipInitialChecks bool, annotations map[string]string) (*appservice.Component, error) { + componentObject, err := h.CreateComponent(componentSpec, namespace, outputContainerImage, secret, applicationName, skipInitialChecks, annotations) + if err != nil { + return nil, err + } + // Decrease the timeout to 5 mins, when the issue https://issues.redhat.com/browse/STONEBLD-3552 is fixed if err := utils.WaitUntilWithInterval(h.CheckImageRepositoryExists(namespace, componentSpec.ComponentName), time.Second*10, time.Minute*15); err != nil { return nil, fmt.Errorf("timed out waiting for image repository to be ready for component %s in namespace %s: %+v", componentSpec.ComponentName, namespace, err) } + return componentObject, nil } diff --git a/tests/build/build.go b/tests/build/build.go index 341f958ea4..9b2ca64bdd 100644 --- a/tests/build/build.go +++ b/tests/build/build.go @@ -143,7 +143,7 @@ var _ = framework.BuildSuiteDescribe("Build service E2E tests", Label("build-ser }, } - _, err = f.AsKubeAdmin.HasController.CreateComponent(componentObj, testNamespace, "", "", applicationName, false, utils.MergeMaps(utils.MergeMaps(constants.ComponentPaCRequestAnnotation, constants.ImageControllerAnnotationRequestPrivateRepo), buildPipelineAnnotation)) + _, err = f.AsKubeAdmin.HasController.CreateComponentCheckImageRepository(componentObj, testNamespace, "", "", applicationName, false, utils.MergeMaps(utils.MergeMaps(constants.ComponentPaCRequestAnnotation, constants.ImageControllerAnnotationRequestPrivateRepo), buildPipelineAnnotation)) Expect(err).ShouldNot(HaveOccurred()) }) @@ -319,7 +319,7 @@ var _ = framework.BuildSuiteDescribe("Build service E2E tests", Label("build-ser }, } // Create a component with Git Source URL, a specified git branch and marking delete-repo=true - component, err = f.AsKubeAdmin.HasController.CreateComponent(componentObj, testNamespace, "", "", applicationName, false, utils.MergeMaps(utils.MergeMaps(constants.ComponentPaCRequestAnnotation, constants.ImageControllerAnnotationRequestPublicRepo), buildPipelineAnnotation)) + component, err = f.AsKubeAdmin.HasController.CreateComponentCheckImageRepository(componentObj, testNamespace, "", "", applicationName, false, utils.MergeMaps(utils.MergeMaps(constants.ComponentPaCRequestAnnotation, constants.ImageControllerAnnotationRequestPublicRepo), buildPipelineAnnotation)) Expect(err).ShouldNot(HaveOccurred()) }) @@ -666,7 +666,7 @@ var _ = framework.BuildSuiteDescribe("Build service E2E tests", Label("build-ser }, } - _, err = f.AsKubeAdmin.HasController.CreateComponent(componentObj, testNamespace, "", "", applicationName, false, utils.MergeMaps(utils.MergeMaps(constants.ComponentPaCRequestAnnotation, constants.ImageControllerAnnotationRequestPublicRepo), buildPipelineAnnotation)) + _, err = f.AsKubeAdmin.HasController.CreateComponentCheckImageRepository(componentObj, testNamespace, "", "", applicationName, false, utils.MergeMaps(utils.MergeMaps(constants.ComponentPaCRequestAnnotation, constants.ImageControllerAnnotationRequestPublicRepo), buildPipelineAnnotation)) Expect(err).ShouldNot(HaveOccurred()) }) @@ -776,7 +776,7 @@ var _ = framework.BuildSuiteDescribe("Build service E2E tests", Label("build-ser }, }, } - component, err = f.AsKubeAdmin.HasController.CreateComponent(componentObj, testNamespace, "", "", applicationName, false, utils.MergeMaps(utils.MergeMaps(constants.ComponentPaCRequestAnnotation, constants.ImageControllerAnnotationRequestPublicRepo), buildPipelineAnnotation)) + component, err = f.AsKubeAdmin.HasController.CreateComponentCheckImageRepository(componentObj, testNamespace, "", "", applicationName, false, utils.MergeMaps(utils.MergeMaps(constants.ComponentPaCRequestAnnotation, constants.ImageControllerAnnotationRequestPublicRepo), buildPipelineAnnotation)) Expect(err).ShouldNot(HaveOccurred()) }) @@ -900,7 +900,7 @@ var _ = framework.BuildSuiteDescribe("Build service E2E tests", Label("build-ser }, }, } - _, err = fw.AsKubeAdmin.HasController.CreateComponent(componentObj, namespace, "", "", appName, false, utils.MergeMaps(utils.MergeMaps(constants.ComponentPaCRequestAnnotation, constants.ImageControllerAnnotationRequestPublicRepo), buildPipelineAnnotation)) + _, err = fw.AsKubeAdmin.HasController.CreateComponentCheckImageRepository(componentObj, namespace, "", "", appName, false, utils.MergeMaps(utils.MergeMaps(constants.ComponentPaCRequestAnnotation, constants.ImageControllerAnnotationRequestPublicRepo), buildPipelineAnnotation)) Expect(err).ShouldNot(HaveOccurred()) }) @@ -1046,7 +1046,7 @@ var _ = framework.BuildSuiteDescribe("Build service E2E tests", Label("build-ser }, }, } - _, err := f.AsKubeAdmin.HasController.CreateComponent(componentObj1, testNamespace, "", "", applicationName, false, utils.MergeMaps(utils.MergeMaps(constants.ComponentPaCRequestAnnotation, constants.ImageControllerAnnotationRequestPublicRepo), buildPipelineAnnotation)) + _, err := f.AsKubeAdmin.HasController.CreateComponentCheckImageRepository(componentObj1, testNamespace, "", "", applicationName, false, utils.MergeMaps(utils.MergeMaps(constants.ComponentPaCRequestAnnotation, constants.ImageControllerAnnotationRequestPublicRepo), buildPipelineAnnotation)) Expect(err).ShouldNot(HaveOccurred()) }) It("creates second component", func() { @@ -1063,7 +1063,7 @@ var _ = framework.BuildSuiteDescribe("Build service E2E tests", Label("build-ser }, }, } - _, err := f.AsKubeAdmin.HasController.CreateComponent(componentObj2, testNamespace, "", "", applicationName, false, utils.MergeMaps(utils.MergeMaps(constants.ComponentPaCRequestAnnotation, constants.ImageControllerAnnotationRequestPublicRepo), buildPipelineAnnotation)) + _, err := f.AsKubeAdmin.HasController.CreateComponentCheckImageRepository(componentObj2, testNamespace, "", "", applicationName, false, utils.MergeMaps(utils.MergeMaps(constants.ComponentPaCRequestAnnotation, constants.ImageControllerAnnotationRequestPublicRepo), buildPipelineAnnotation)) Expect(err).ShouldNot(HaveOccurred()) }) @@ -1179,7 +1179,7 @@ var _ = framework.BuildSuiteDescribe("Build service E2E tests", Label("build-ser }, } - component, err = f.AsKubeAdmin.HasController.CreateComponent(componentObj, testNamespace, "", "", applicationName, false, utils.MergeMaps(invalidBuildAnnotation, buildPipelineAnnotation)) + component, err = f.AsKubeAdmin.HasController.CreateComponentCheckImageRepository(componentObj, testNamespace, "", "", applicationName, false, utils.MergeMaps(invalidBuildAnnotation, buildPipelineAnnotation)) Expect(component).ToNot(BeNil()) Expect(err).ShouldNot(HaveOccurred()) }) @@ -1241,7 +1241,7 @@ var _ = framework.BuildSuiteDescribe("Build service E2E tests", Label("build-ser ComponentName: fmt.Sprintf("build-suite-test-component-image-source-%s", util.GenerateRandomString(6)), ContainerImage: containerImageSource, } - _, err = f.AsKubeAdmin.HasController.CreateComponent(component, testNamespace, outputContainerImage, "", applicationName, true, buildPipelineAnnotation) + _, err = f.AsKubeAdmin.HasController.CreateComponentCheckImageRepository(component, testNamespace, outputContainerImage, "", applicationName, true, buildPipelineAnnotation) Expect(err).ShouldNot(HaveOccurred()) // get the build pipeline bundle annotation @@ -1446,7 +1446,7 @@ var _ = framework.BuildSuiteDescribe("Build service E2E tests", Label("build-ser if comp.repoName == componentDependenciesParentRepoName { componentObj.BuildNudgesRef = []string{ChildComponentDef.componentName} } - comp.component, err = f.AsKubeAdmin.HasController.CreateComponent(componentObj, testNamespace, "", "", applicationName, true, utils.MergeMaps(utils.MergeMaps(constants.ComponentPaCRequestAnnotation, constants.ImageControllerAnnotationRequestPublicRepo), buildPipelineAnnotation)) + comp.component, err = f.AsKubeAdmin.HasController.CreateComponentCheckImageRepository(componentObj, testNamespace, "", "", applicationName, true, utils.MergeMaps(utils.MergeMaps(constants.ComponentPaCRequestAnnotation, constants.ImageControllerAnnotationRequestPublicRepo), buildPipelineAnnotation)) Expect(err).ShouldNot(HaveOccurred()) } }) diff --git a/tests/build/build_templates.go b/tests/build/build_templates.go index 761e96556b..983d0b9bb6 100644 --- a/tests/build/build_templates.go +++ b/tests/build/build_templates.go @@ -150,7 +150,7 @@ func CreateComponent(commonCtrl *common.SuiteController, ctrl *has.HasController "build.appstudio.openshift.io/pipeline": fmt.Sprintf(`{"name":"%s", "bundle": "%s"}`, pipelineBundleName, customBuildBundle), } } - c, err := ctrl.CreateComponent(componentObj, namespace, "", "", applicationName, false, utils.MergeMaps(constants.ComponentPaCRequestAnnotation, buildPipelineAnnotation)) + c, err := ctrl.CreateComponentCheckImageRepository(componentObj, namespace, "", "", applicationName, false, utils.MergeMaps(constants.ComponentPaCRequestAnnotation, buildPipelineAnnotation)) Expect(err).ShouldNot(HaveOccurred()) Expect(c.Name).Should(Equal(componentName)) diff --git a/tests/build/multi-platform.go b/tests/build/multi-platform.go index 842832c9bd..9486bd9fb6 100644 --- a/tests/build/multi-platform.go +++ b/tests/build/multi-platform.go @@ -493,7 +493,7 @@ func createApplicationAndComponent(f *framework.Framework, testNamespace, platfo }, }, } - component, err = f.AsKubeAdmin.HasController.CreateComponent(componentObj, testNamespace, "", "", applicationName, true, utils.MergeMaps(constants.ComponentPaCRequestAnnotation, buildPipelineAnnotation)) + component, err = f.AsKubeAdmin.HasController.CreateComponentCheckImageRepository(componentObj, testNamespace, "", "", applicationName, true, utils.MergeMaps(constants.ComponentPaCRequestAnnotation, buildPipelineAnnotation)) Expect(err).ShouldNot(HaveOccurred()) return } diff --git a/tests/integration-service/gitlab-integration-reporting.go b/tests/integration-service/gitlab-integration-reporting.go index aae6c2ba67..03d9aed783 100644 --- a/tests/integration-service/gitlab-integration-reporting.go +++ b/tests/integration-service/gitlab-integration-reporting.go @@ -114,7 +114,7 @@ var _ = framework.IntegrationServiceSuiteDescribe("Gitlab Status Reporting of In // get the build pipeline bundle annotation buildPipelineAnnotation := build.GetBuildPipelineBundleAnnotation(constants.DockerBuild) // Create a component with Git Source URL, a specified git branch - component, err = f.AsKubeAdmin.HasController.CreateComponent(componentObj, testNamespace, "", "", applicationName, false, utils.MergeMaps(utils.MergeMaps(constants.ComponentPaCRequestAnnotation, constants.ImageControllerAnnotationRequestPublicRepo), buildPipelineAnnotation)) + component, err = f.AsKubeAdmin.HasController.CreateComponentCheckImageRepository(componentObj, testNamespace, "", "", applicationName, false, utils.MergeMaps(utils.MergeMaps(constants.ComponentPaCRequestAnnotation, constants.ImageControllerAnnotationRequestPublicRepo), buildPipelineAnnotation)) Expect(err).ShouldNot(HaveOccurred()) }) diff --git a/tests/integration-service/integration.go b/tests/integration-service/integration.go index bb536c31e0..41cd5d1d16 100644 --- a/tests/integration-service/integration.go +++ b/tests/integration-service/integration.go @@ -452,7 +452,7 @@ func createComponent(f framework.Framework, testNamespace, applicationName, comp }, } - originalComponent, err := f.AsKubeAdmin.HasController.CreateComponent(componentObj, testNamespace, "", "", applicationName, false, utils.MergeMaps(utils.MergeMaps(constants.ComponentPaCRequestAnnotation, constants.ImageControllerAnnotationRequestPublicRepo), buildPipelineAnnotation)) + originalComponent, err := f.AsKubeAdmin.HasController.CreateComponentCheckImageRepository(componentObj, testNamespace, "", "", applicationName, false, utils.MergeMaps(utils.MergeMaps(constants.ComponentPaCRequestAnnotation, constants.ImageControllerAnnotationRequestPublicRepo), buildPipelineAnnotation)) Expect(err).NotTo(HaveOccurred()) return originalComponent, componentName, pacBranchName, componentBaseBranchName @@ -480,7 +480,7 @@ func createComponentWithCustomBranch(f framework.Framework, testNamespace, appli }, } - originalComponent, err := f.AsKubeAdmin.HasController.CreateComponent(componentObj, testNamespace, "", "", applicationName, true, utils.MergeMaps(utils.MergeMaps(constants.ComponentPaCRequestAnnotation, constants.ImageControllerAnnotationRequestPublicRepo), buildPipelineAnnotation)) + originalComponent, err := f.AsKubeAdmin.HasController.CreateComponentCheckImageRepository(componentObj, testNamespace, "", "", applicationName, true, utils.MergeMaps(utils.MergeMaps(constants.ComponentPaCRequestAnnotation, constants.ImageControllerAnnotationRequestPublicRepo), buildPipelineAnnotation)) Expect(err).NotTo(HaveOccurred()) return originalComponent diff --git a/tests/konflux-demo/konflux-demo.go b/tests/konflux-demo/konflux-demo.go index 8fcb154933..70cfd1fa7b 100644 --- a/tests/konflux-demo/konflux-demo.go +++ b/tests/konflux-demo/konflux-demo.go @@ -178,7 +178,7 @@ var _ = framework.KonfluxDemoSuiteDescribe(Label(devEnvTestLabel), func() { }, } - component, err = fw.AsKubeAdmin.HasController.CreateComponent(componentObj, userNamespace, "", "", appSpec.ApplicationName, false, utils.MergeMaps(constants.ComponentPaCRequestAnnotation, buildPipelineAnnotation)) + component, err = fw.AsKubeAdmin.HasController.CreateComponentCheckImageRepository(componentObj, userNamespace, "", "", appSpec.ApplicationName, false, utils.MergeMaps(constants.ComponentPaCRequestAnnotation, buildPipelineAnnotation)) Expect(err).ShouldNot(HaveOccurred()) }) diff --git a/tests/release/releaseLib.go b/tests/release/releaseLib.go index bb65864c9a..0a3fe1219d 100644 --- a/tests/release/releaseLib.go +++ b/tests/release/releaseLib.go @@ -78,7 +78,7 @@ func CreateComponent(devFw framework.Framework, devNamespace, appName, compName, }, }, } - component, err := devFw.AsKubeAdmin.HasController.CreateComponent(componentObj, devNamespace, "", "", appName, true, buildPipelineBundle) + component, err := devFw.AsKubeAdmin.HasController.CreateComponentCheckImageRepository(componentObj, devNamespace, "", "", appName, true, buildPipelineBundle) Expect(err).NotTo(HaveOccurred()) return component } @@ -116,7 +116,7 @@ func CreateComponentWithNewBranch(f framework.Framework, testNamespace, applicat }, } - testComponent, err := f.AsKubeAdmin.HasController.CreateComponent(componentObj, testNamespace, "", "", applicationName, true, utils.MergeMaps(utils.MergeMaps(constants.ComponentPaCRequestAnnotation, constants.ImageControllerAnnotationRequestPublicRepo), buildPipelineAnnotation)) + testComponent, err := f.AsKubeAdmin.HasController.CreateComponentCheckImageRepository(componentObj, testNamespace, "", "", applicationName, true, utils.MergeMaps(utils.MergeMaps(constants.ComponentPaCRequestAnnotation, constants.ImageControllerAnnotationRequestPublicRepo), buildPipelineAnnotation)) Expect(err).NotTo(HaveOccurred()) return testComponent, testPacBranchName, componentBaseBranchName From 80ef377c3122bdb4f4d6ce8ccbfa5605abf664ee Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 20 Aug 2025 16:50:31 +0200 Subject: [PATCH 189/321] feat(KONFLUX-8961): Instead of waiting for build SA and image repositpry, wait for request annotation to disappear Discussed this with Robert Cerven and he suggested this is a best and most reliable approach. --- .../ci-scripts/config/horreum-labels.sh | 4 +- tests/load-tests/evaluate.py | 2 +- .../pkg/journey/handle_component.go | 37 ++++++++++++++----- 3 files changed, 31 insertions(+), 12 deletions(-) diff --git a/tests/load-tests/ci-scripts/config/horreum-labels.sh b/tests/load-tests/ci-scripts/config/horreum-labels.sh index c56a10980b..99ca63a544 100755 --- a/tests/load-tests/ci-scripts/config/horreum-labels.sh +++ b/tests/load-tests/ci-scripts/config/horreum-labels.sh @@ -220,8 +220,8 @@ horreum_schema_label_present '$.results.measurements.getPaCPullNumber.error_rate horreum_schema_label_present '$.results.measurements.getPaCPullNumber.pass.duration.mean' horreum_schema_label_present '$.results.measurements.validateApplication.error_rate' horreum_schema_label_present '$.results.measurements.validateApplication.pass.duration.mean' -horreum_schema_label_present '$.results.measurements.validateComponentBuildSA.error_rate' -horreum_schema_label_present '$.results.measurements.validateComponentBuildSA.pass.duration.mean' +horreum_schema_label_present '$.results.measurements.validateComponent.error_rate' +horreum_schema_label_present '$.results.measurements.validateComponent.pass.duration.mean' horreum_schema_label_present '$.results.measurements.validatePipelineRunCondition.error_rate' horreum_schema_label_present '$.results.measurements.validatePipelineRunCondition.pass.duration.mean' horreum_schema_label_present '$.results.measurements.validatePipelineRunCreation.error_rate' diff --git a/tests/load-tests/evaluate.py b/tests/load-tests/evaluate.py index 7252d5a258..8d16b1a4d6 100755 --- a/tests/load-tests/evaluate.py +++ b/tests/load-tests/evaluate.py @@ -24,7 +24,7 @@ "createIntegrationTestScenario", "createComponent", "getPaCPullNumber", - "validateComponentBuildSA", + "validateComponent", "validatePipelineRunCreation", "validatePipelineRunCondition", "validatePipelineRunSignature", diff --git a/tests/load-tests/pkg/journey/handle_component.go b/tests/load-tests/pkg/journey/handle_component.go index 208412483d..b71e0761f8 100644 --- a/tests/load-tests/pkg/journey/handle_component.go +++ b/tests/load-tests/pkg/journey/handle_component.go @@ -123,18 +123,37 @@ func createComponent(f *framework.Framework, namespace, name, repoUrl, repoRevis return nil } -func validateComponentBuildSA(f *framework.Framework, namespace, name string) error { +func validateComponent(f *framework.Framework, namespace, name string) error { interval := time.Second * 10 timeout := time.Minute * 30 - component_sa := "build-pipeline-" + name // TODO It would be much better to watch this resource instead querying it - err := utils.WaitUntilWithInterval(f.AsKubeDeveloper.CommonController.ServiceAccountPresent(component_sa, namespace), interval, timeout) - if err != nil { - return fmt.Errorf("Component build SA %s in namespace %s not present: %v", component_sa, namespace, err) - } + err := utils.WaitUntilWithInterval(func() (done bool, err error) { + comp, err := f.AsKubeDeveloper.HasController.GetComponent(name, namespace) + if err != nil { + logging.Logger.Debug("Unable to get component %s in namespace %s for its annotations: %v", name, namespace, err) + return false, nil + } - return nil + // If build.appstudio.openshift.io/request annotation is gone, component finished onboarding + _, ok := comp.Annotations["build.appstudio.openshift.io/request"] + if ! ok { + logging.Logger.Debug("Finished onboarding of component %s in namespace %s", name, namespace) + return true, nil + } + + // If it is still there, build.appstudio.openshift.io/status will have a reason + val, ok := comp.Annotations["build.appstudio.openshift.io/status"] + if ok { + logging.Logger.Debug("Onboarding of a component %s in namespace %s not finished yet: %s", name, namespace, val) + } else { + logging.Logger.Debug("Onboarding of a component %s in namespace %s not started yet", name, namespace) + } + + return false, nil + }, interval, timeout) + + return err } func getPaCPullNumber(f *framework.Framework, namespace, name string) (int, error) { @@ -322,13 +341,13 @@ func HandleComponent(ctx *PerComponentContext) error { // Validate component build service account created _, err = logging.Measure( - validateComponentBuildSA, + validateComponent, ctx.Framework, ctx.ParentContext.ParentContext.Namespace, ctx.ComponentName, ) if err != nil { - return logging.Logger.Fail(65, "Component build SA not present: %v", err) + return logging.Logger.Fail(65, "Component failed onboarding: %v", err) } // Configure imagePullSecrets needed for component build task images From a4aa001959b9108cab696ba5851aaadfcd06260a Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Thu, 21 Aug 2025 08:26:10 +0200 Subject: [PATCH 190/321] feat: New error: Not enough nodes to schedule pod collected-data/jhutar-tenant/1/collected-taskrun-jhutar-app-ojdrv-comp-0-on-pull-request-7nghk-clone-repository.json "status": { "conditions": [ { "type": "Succeeded", "status": "Unknown", "lastTransitionTime": "2025-08-18T03:49:15Z", "reason": "Pending", "message": "pod status \"PodScheduled\":\"False\"; message: \"0/22 nodes are available: 1 node(s) didn't match pod affinity rules, 1 node(s) were unschedulable, 10 node(s) didn't match Pod's node affinity/selector, 3 node(s) had untolerated taint {node-role.kubernetes.io/infra: }, 3 node(s) had untolerated taint {node-role.kubernetes.io/master: }, 4 node(s) had volume node affinity conflict. preemption: 0/22 nodes are available: 22 Preemption is not helpful for scheduling.\"" } ], "podName": "jhutar-app-ojdrv-comp-0-on-5c5e9e5a34ec841e2d904cdbf6d05e62-pod", --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 534457d9e0..0c20dfc4ae 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -141,6 +141,7 @@ ("Back-off pulling task run image from registry.access.redhat.com", r"the step .* in TaskRun .* failed to pull the image .*. The pod errored with the message: \\\"Back-off pulling image \\\"registry.access.redhat.com/.*"), ("Build failed for unspecified reasons", r"build failed for unspecified reasons."), ("Failed to create task run pod because ISE on webhook proxy.operator.tekton.dev", r"failed to create task run pod .*: Internal error occurred: failed calling webhook \\\"proxy.operator.tekton.dev\\\": failed to call webhook: Post \\\"https://tekton-operator-proxy-webhook.openshift-pipelines.svc:443/defaulting.timeout=10s\\\": context deadline exceeded. Maybe missing or invalid Task .*"), + ("Not enough nodes to schedule pod", r".message.: .pod status ..PodScheduled..:..False..; message: ..[0-9/]+ nodes are available: .*: [0-9]+ Preemption is not helpful for scheduling."), } def message_to_reason(reasons_and_errors: set, msg: str) -> str: From d81d5943fc00e9bd183bbbab5e7c7eb9681c0c52 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Thu, 21 Aug 2025 08:52:44 +0200 Subject: [PATCH 191/321] feat: New error: Script rpm_verifier failed to access image layer from quay.io because 502 Bad Gateway collected-data/jhutar-tenant/1/pod-jhutar-app-cnglu-comp-0-on-push-5bj7d-rpms-signature-scan-pod-step-rpms-signature-scan.log + set -o pipefail + rpm_verifier --image-url quay.io/redhat-user-workloads-stage/jhutar-tenant/jhutar-app-cnglu-comp-0:74d05ce77a91402bf7fe0373ed06e12ff206b638 --image-digest sha256:749fdc8cd4bd4c75561fae12150bbef9d300857b14d1c28722a83e331b45d955 --workdir /tmp Image: quay.io/redhat-user-workloads-stage/jhutar-tenant/jhutar-app-cnglu-comp-0@sha256:749fdc8cd4bd4c75561fae12150bbef9d300857b14d1c28722a83e331b45d955 Error occurred: Warning: the default reading order of registry auth file will be changed from "${HOME}/.docker/config.json" to podman registry config locations in the future version of oc. "${HOME}/.docker/config.json" is deprecated, but can still be used for storing credentials as a fallback. See https://github.com/containers/image/blob/main/docs/containers-auth.json.5.md for the order of podman registry config locations. error: unable to access the source layer sha256:828bf65a2578d8ffeac3a7557d19c3c3f7886a831ff06d866224ad077c728ed9: received unexpected HTTP status: 502 Bad Gateway {'error': 'Warning: the default reading order of registry auth file will be changed from "${HOME}/.docker/config.json" to podman registry config locations in the future version of oc. "${HOME}/.docker/config.json" is deprecated, but can still be used for storing credentials as a fallback. See https://github.com/containers/image/blob/main/docs/containers-auth.json.5.md for the order of podman registry config locations.\nerror: unable to access the source layer sha256:828bf65a2578d8ffeac3a7557d19c3c3f7886a831ff06d866224ad077c728ed9: received unexpected HTTP status: 502 Bad Gateway\n'} ==================================== Final results: {"error": "Warning: the default reading order of registry auth file will be changed from \"${HOME}/.docker/config.json\" to podman registry config locations in the future version of oc. \"${HOME}/.docker/config.json\" is deprecated, but can still be used for storing credentials as a fallback. See https://github.com/containers/image/blob/main/docs/containers-auth.json.5.md for the order of podman registry config locations.\nerror: unable to access the source layer sha256:828bf65a2578d8ffeac3a7557d19c3c3f7886a831ff06d866224ad077c728ed9: received unexpected HTTP status: 502 Bad Gateway\n"} Images processed: {"image": {"pullspec": "quay.io/redhat-user-workloads-stage/jhutar-tenant/jhutar-app-cnglu-comp-0:74d05ce77a91402bf7fe0373ed06e12ff206b638", "digests": ["sha256:749fdc8cd4bd4c75561fae12150bbef9d300857b14d1c28722a83e331b45d955"]}} --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 0c20dfc4ae..0ca1e95a37 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -130,6 +130,7 @@ ("RPM build failed: bool cannot be defined via typedef", r"error: .bool. cannot be defined via .typedef..*error: Bad exit status from /var/tmp/rpm-tmp..* ..build."), ("Script gather-rpms.py failed because of too many values to unpack", r"Handling archdir [^ ]+ Traceback.*File \"/usr/bin/gather-rpms.py\".*nvr, btime, size, sigmd5, _ = .*ValueError: too many values to unpack"), ("Script mock-hermetic-repo failed because pull from registry.access.redhat.com failed", r"/usr/bin/mock-hermetic-repo.*Error: internal error: unable to copy from source docker://registry.access.redhat.com/[^ ]+: initializing source docker://registry.access.redhat.com/[^ ]+: unable to retrieve auth token: invalid username/password: unauthorized.*subprocess.CalledProcessError.*Command '.'podman', 'pull', '--arch', '[^ ]+', 'registry.access.redhat.com/[^ ]+'.' returned non-zero exit status 125"), + ("Script rpm_verifier failed to access image layer from quay.io because 502 Bad Gateway", r"rpm_verifier --image-url quay.io/.* Image: quay.io/.* error: unable to access the source layer sha256:[0-9a-z]+: received unexpected HTTP status: 502 Bad Gateway"), } FAILED_TR_ERRORS = { From a0e8d147552a200aac8f86e9d95c6eabecb15425 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Thu, 21 Aug 2025 08:58:58 +0200 Subject: [PATCH 192/321] feat: New error: Repo templating failed when updating file on github.com because 504 I0818 03:41:10.002998 3167 logging.go:30] DEBUG Repo-templating workflow: Cleaned up (second cleanup) for jhutar-tenant/jhutar-app-dnfwe/jhutar-app-dnfwe-comp-0 I0818 03:41:21.339178 3167 logging.go:64] FAIL(64): Repo-templating workflow component cleanup failed: Error templating PaC files: Failed to update file .tekton/jhutar-app-dnfwe-comp-0-pull-request.yaml in repo nodejs-devfile-sample-undef-jhutar revision main: error when updating a file on github: PUT https://api.github.com/repos/rhtap-perf-test/nodejs-devfile-sample-undef-jhutar/contents/.tekton/jhutar-app-dnfwe-comp-0-pull-request.yaml: 504 We couldn't respond to your request in time. Sorry about that. Please try resubmitting your request and contact us if the problem persists. [] --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 0ca1e95a37..71ab15c289 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -68,6 +68,7 @@ ("Repo forking failed because import failed", r"Repo forking failed: Error waiting for project [^ ]+ .ID: [0-9]+. fork to complete: Forking of project [^ ]+ .ID: [0-9]+. failed with import status: failed"), ("Repo forking failed when deleting target repo on github.com because 504", r"Repo forking failed: Error deleting repository .*: DELETE https://api.github.com/repos/.*: 504 We couldn't respond to your request in time. Sorry about that. Please try resubmitting your request and contact us if the problem persists."), ("Repo forking failed when deleting target repo on gitlab.com (not CEE!) due unathorized", r"Repo forking failed: Error deleting project .* DELETE https://gitlab.com/.* 401 .* Unauthorized"), + ("Repo templating failed when updating file on github.com because 504", r"Repo-templating workflow component cleanup failed: Error templating PaC files: Failed to update file .tekton/[^ ]+.yaml in repo [^ ]+ revision main: error when updating a file on github: PUT https://api.github.com/repos/[^ ]+: 504 We couldn't respond to your request in time. Sorry about that. Please try resubmitting your request and contact us if the problem persists."), ("Test Pipeline failed", r"Test Pipeline Run failed run:.*Message:Tasks Completed: [0-9]+ \(Failed: [1-9]+,"), ("Timeout forking the repo before the actual test", r"Repo forking failed: Error forking project .*: context deadline exceeded"), ("Timeout forking the repo before the actual test", r"Repo forking failed: Error waiting for project [^ ]+ .ID: [0-9]+. fork to complete: context deadline exceeded"), From 070a7f9db6fc9d4ac4de410891c46ae026f08f10 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Thu, 21 Aug 2025 09:39:21 +0200 Subject: [PATCH 193/321] feat: New error: Release failed because unauthorized when pushing artifact Discussing this here: https://redhat-internal.slack.com/archives/C04PZ7H0VA8/p1755761682198729 collected-data/jhutar-tenant/1/pod-managed-4cjdz-collect-data-pod-step-create-trusted-artifact.log Prepared artifact from /var/workdir/release (sha256:b630d892d14fe368d0c607f4b2d59c3360b91e7e53900165d811fe802893f987) Token not found for quay.io/konflux-ci/release-service-trusted-artifacts Uploading b630d892d14f sourceDataArtifact Error response from registry: unauthorized: access to the requested resource is not authorized: map[] Command exited with non-zero status 1 --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 71ab15c289..26f4395ff6 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -128,6 +128,7 @@ ("Gateway Time-out when pulling container image", r"Error: copying system image from manifest list: parsing image configuration: fetching blob: received unexpected HTTP status: 504 Gateway Time-out"), ("Getting repo tags from quay.io failed because of 502 Bad Gateway", r"Error determining repository tags: pinging container registry quay.io: received unexpected HTTP status: 502 Bad Gateway"), ("Introspection failed because of incomplete .docker/config.json", r".* level=fatal msg=\"Error parsing image name .*: getting username and password: reading JSON file .*/tekton/home/.docker/config.json.*: unmarshaling JSON at .*: unexpected end of JSON input\""), + ("Release failed because unauthorized when pushing artifact", r"Prepared artifact from /var/workdir/release .* Token not found for quay.io/konflux-ci/release-service-trusted-artifacts Uploading [0-9a-z]+ sourceDataArtifact Error response from registry: unauthorized: access to the requested resource is not authorized: map.. Command exited with non-zero status 1"), ("RPM build failed: bool cannot be defined via typedef", r"error: .bool. cannot be defined via .typedef..*error: Bad exit status from /var/tmp/rpm-tmp..* ..build."), ("Script gather-rpms.py failed because of too many values to unpack", r"Handling archdir [^ ]+ Traceback.*File \"/usr/bin/gather-rpms.py\".*nvr, btime, size, sigmd5, _ = .*ValueError: too many values to unpack"), ("Script mock-hermetic-repo failed because pull from registry.access.redhat.com failed", r"/usr/bin/mock-hermetic-repo.*Error: internal error: unable to copy from source docker://registry.access.redhat.com/[^ ]+: initializing source docker://registry.access.redhat.com/[^ ]+: unable to retrieve auth token: invalid username/password: unauthorized.*subprocess.CalledProcessError.*Command '.'podman', 'pull', '--arch', '[^ ]+', 'registry.access.redhat.com/[^ ]+'.' returned non-zero exit status 125"), From 2109f3c0d4fb7526c3a8ac2bc1e5ce4f262bd036 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Thu, 21 Aug 2025 09:55:32 +0200 Subject: [PATCH 194/321] feat: New error: Release failed because unauthorized when pulling policy This happened few times yesterday morning on stone-stg-rh01 and few houlrs after that another issue I added in previous commit appeared. collected-data/jhutar-tenant/1/pod-managed-2vtbp-verify-enterprise-contract-pod-step-validate.log Error: pulling policy: GET "https://quay.io/v2/konflux-ci/konflux-vanguard/data-acceptable-bundles/blobs/sha256:7ccce9b3fe91e37d11494c8b3eb1aefa90124ccddd5208914722bc432b9e822d": response status code 401: Unauthorized --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 26f4395ff6..ff4a98573c 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -128,6 +128,7 @@ ("Gateway Time-out when pulling container image", r"Error: copying system image from manifest list: parsing image configuration: fetching blob: received unexpected HTTP status: 504 Gateway Time-out"), ("Getting repo tags from quay.io failed because of 502 Bad Gateway", r"Error determining repository tags: pinging container registry quay.io: received unexpected HTTP status: 502 Bad Gateway"), ("Introspection failed because of incomplete .docker/config.json", r".* level=fatal msg=\"Error parsing image name .*: getting username and password: reading JSON file .*/tekton/home/.docker/config.json.*: unmarshaling JSON at .*: unexpected end of JSON input\""), + ("Release failed because unauthorized when pulling policy", r"Error: pulling policy: GET .https://quay.io/v2/konflux-ci/konflux-vanguard/data-acceptable-bundles/blobs/sha256:[0-9a-z]+.: response status code 401: Unauthorized"), ("Release failed because unauthorized when pushing artifact", r"Prepared artifact from /var/workdir/release .* Token not found for quay.io/konflux-ci/release-service-trusted-artifacts Uploading [0-9a-z]+ sourceDataArtifact Error response from registry: unauthorized: access to the requested resource is not authorized: map.. Command exited with non-zero status 1"), ("RPM build failed: bool cannot be defined via typedef", r"error: .bool. cannot be defined via .typedef..*error: Bad exit status from /var/tmp/rpm-tmp..* ..build."), ("Script gather-rpms.py failed because of too many values to unpack", r"Handling archdir [^ ]+ Traceback.*File \"/usr/bin/gather-rpms.py\".*nvr, btime, size, sigmd5, _ = .*ValueError: too many values to unpack"), From c20865f2af321fb987996783774f2b248351778f Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Thu, 21 Aug 2025 10:53:30 +0200 Subject: [PATCH 195/321] feat: New error: Enterprise contract results failed validation Output je pretty short here here and hopefuly regexp will not be chatching false positives. Anyway looking at the code, it seems to be intended that way: { "name": "assert", "image": "quay.io/conforma/cli@sha256:9275ec5f062399135ecb3f54f520ee5f20e818a846c34250e0d417dce9221459", "command": [ "jq" ], "args": [ "--argjson", "strict", "1", "-e", ".result == \"SUCCESS\" or .result == \"WARNING\" or ($strict | not)\n", "/tekton/results/TEST_OUTPUT" ], "computeResources": { "limits": { "memory": "256Mi" }, "requests": { "cpu": "100m", "memory": "256Mi" } } } Error from collected-data/jhutar-tenant/1/pod-managed-m5psc-verify-enterprise-contract-pod-step-assert.log: false --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index ff4a98573c..7caf047877 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -110,6 +110,7 @@ ("Can not find Dockerfile", r"Cannot find Dockerfile Dockerfile"), ("DNF failed to download repodata from Download Devel because could not resolve host", r"Errors during downloading metadata for repository '[^ ]+': - Curl error .6.: Couldn't resolve host name for http://download.devel.redhat.com/brewroot/repos/[^ ]+ .Could not resolve host: download\.devel\.redhat\.com."), ("DNF failed to download repodata from Koji", r"ERROR Command returned error: Failed to download metadata (baseurl: \"https://kojipkgs.fedoraproject.org/repos/[^ ]*\") for repository \"build\": Usable URL not found"), + ("Enterprise contract results failed validation", r"^false $"), ("Error allocating host as provision TR already exists", r"Error allocating host: taskruns.tekton.dev \".*provision\" already exists"), ("Error allocating host because of insufficient free addresses in subnet", r"Error allocating host: failed to launch EC2 instance for .* operation error EC2: RunInstances, https response error StatusCode: 400, RequestID: .*, api error InsufficientFreeAddressesInSubnet: There are not enough free addresses in subnet .* to satisfy the requested number of instances."), ("Error allocating host because of provisioning error", r"Error allocating host: failed to provision host"), From f601b8103aaeaffa4eacd6e1bd533348b3c4ed40 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Thu, 21 Aug 2025 11:02:09 +0200 Subject: [PATCH 196/321] feat: New error: Failed to pull container from registry.access.redhat.com because of remote tls error collected-data/jhutar-tenant/1/pod-jhutar-app-kzehx-comp-0-on-push-khscl-build-container-pod-step-build.log [2025-08-19T02:12:17,556933388+00:00] Update CA trust INFO: Using mounted CA bundle: /mnt/trusted-ca/ca-bundle.crt '/mnt/trusted-ca/ca-bundle.crt' -> '/etc/pki/ca-trust/source/anchors/ca-bundle.crt' [2025-08-19T02:12:18,491951854+00:00] Prepare Dockerfile Checking if /var/workdir/cachi2/output/bom.json exists. Preparing construction of content-sets.json to be placed at /usr/share/buildinfo/content-sets.json in the image Constructed the following: { "metadata": { "icm_version": 1, "icm_spec": "https://raw.githubusercontent.com/containerbuildsystem/atomic-reactor/master/atomic_reactor/schemas/content_manifest.json", "image_layer_index": 0 }, "from_dnf_hint": true, "content_sets": [] } Appending a COPY command to the Containerfile [2025-08-19T02:12:18,504543054+00:00] Prepare system (architecture: x86_64) Trying to pull registry.access.redhat.com/ubi10/nodejs-22:latest... Error: copying system image from manifest list: parsing image configuration: Get "https://cdn01.quay.io/quayio-production-s3/sha256/c5/c53ee82572a1a6e66c53d496f18d67bf257f98d0a5c3efbeefe48e0be9e8b9c3?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=...%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20250819T021218Z&X-Amz-Expires=600&X-Amz-SignedHeaders=host&X-Amz-Signature=...®ion=us-east-1&namespace=redhat-prod&username=redhat-prod+registry_proxy&repo_name=ubi10----nodejs-22&akamai_signature=exp=1755570438~hmac=...": remote error: tls: internal error --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 7caf047877..1d4bdbad0a 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -123,6 +123,7 @@ ("Failed to pull container from quay.io because of DNS error", r"Error: copying system image from manifest list: reading blob .*: Get \"https://cdn[0-9]+.quay.io/.*\": dial tcp: lookup cdn[0-9]+.quay.io: Temporary failure in name resolution"), ("Failed to pull container from registry.access.redhat.com because of DNS error", r"Error: initializing source docker://registry.access.redhat.com/.* pinging container registry registry.access.redhat.com: Get \"https://registry.access.redhat.com/v2/\": dial tcp: lookup registry.access.redhat.com: Temporary failure in name resolution"), ("Failed to pull container from registry.access.redhat.com because of remote tls error", r"Error: creating build container: internal error: unable to copy from source docker://registry.access.redhat.com/[^ ]+ copying system image from manifest list: reading blob [^ ]+: Get .https://cdn[0-9]+.quay.io/[^ ]+ remote error: tls: internal error"), + ("Failed to pull container from registry.access.redhat.com because of remote tls error", r"Trying to pull registry.access.redhat.com/[^ ]+ Error: copying system image from manifest list: parsing image configuration: Get .https://cdn[0-9]+.quay.io/[^ ]+ remote error: tls: internal error"), ("Failed to pull container from registry.fedoraproject.org", r"Error: internal error: unable to copy from source docker://registry.fedoraproject.org/[^ ]+: initializing source docker://registry.fedoraproject.org/[^ ]+: pinging container registry registry.fedoraproject.org: Get \"https://registry.fedoraproject.org/v2/\": dial tcp [^ ]+: connect: connection refused"), ("Failed to ssh to remote MPC VM", r"[^ ]+@[0-9.]+: Permission denied .publickey,gssapi-keyex,gssapi-with-mic..\s*$"), # KONFLUX-9742 ("Gateway Time-out when pulling container image from quay.io", r"Error: initializing source docker://quay.io/[^ ]+: reading manifest [^ ]+ in quay.io/[^ ]+: received unexpected HTTP status: 504 Gateway Time-out"), From 915c01734b809d534469676e60727d7912ed3349 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Thu, 21 Aug 2025 11:07:02 +0200 Subject: [PATCH 197/321] feat: New error: Failed to pull container from registry.access.redhat.com because of 500 Internal Server Error collected-data/jhutar-tenant/1/pod-jhutar-app-zpvzf-comp-0-on-push-hn8br-build-container-pod-step-build.log [2025-08-19T06:42:25,540656598+00:00] Update CA trust INFO: Using mounted CA bundle: /mnt/trusted-ca/ca-bundle.crt '/mnt/trusted-ca/ca-bundle.crt' -> '/etc/pki/ca-trust/source/anchors/ca-bundle.crt' [2025-08-19T06:42:26,469598930+00:00] Prepare Dockerfile Checking if /var/workdir/cachi2/output/bom.json exists. Preparing construction of content-sets.json to be placed at /usr/share/buildinfo/content-sets.json in the image Constructed the following: { "metadata": { "icm_version": 1, "icm_spec": "https://raw.githubusercontent.com/containerbuildsystem/atomic-reactor/master/atomic_reactor/schemas/content_manifest.json", "image_layer_index": 0 }, "from_dnf_hint": true, "content_sets": [] } Appending a COPY command to the Containerfile [2025-08-19T06:42:26,482126806+00:00] Prepare system (architecture: x86_64) Trying to pull registry.access.redhat.com/ubi10/nodejs-22:latest... Getting image source signatures Error: copying system image from manifest list: reading signatures: reading signature from https://access.redhat.com/webassets/docker/content/sigstore/ubi10/nodejs-22@sha256=f08544f6186535805ff1124a8104024c24e6d9c9bb048dfc9fe072befc781390/signature-7: status 500 (Internal Server Error) --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 1d4bdbad0a..bc20ae6ebe 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -121,6 +121,7 @@ ("Failed to provision MPC VM due to resource quota evaluation timed out", r"cat /ssh/error Error allocating host: Internal error occurred: resource quota evaluation timed out"), # KONFLUX-9798 ("Failed to pull container from access.redhat.com because of DNS error", r"Error: creating build container: internal error: unable to copy from source docker://registry.access.redhat.com/.*: copying system image from manifest list: reading signatures: Get \"https://access.redhat.com/.*\": dial tcp: lookup access.redhat.com: Temporary failure in name resolution"), ("Failed to pull container from quay.io because of DNS error", r"Error: copying system image from manifest list: reading blob .*: Get \"https://cdn[0-9]+.quay.io/.*\": dial tcp: lookup cdn[0-9]+.quay.io: Temporary failure in name resolution"), + ("Failed to pull container from registry.access.redhat.com because of 500 Internal Server Error", r"Trying to pull registry.access.redhat.com/[^ ]+ Getting image source signatures Error: copying system image from manifest list: reading signatures: reading signature from https://access.redhat.com/[^ ]+: status 500 .Internal Server Error."), ("Failed to pull container from registry.access.redhat.com because of DNS error", r"Error: initializing source docker://registry.access.redhat.com/.* pinging container registry registry.access.redhat.com: Get \"https://registry.access.redhat.com/v2/\": dial tcp: lookup registry.access.redhat.com: Temporary failure in name resolution"), ("Failed to pull container from registry.access.redhat.com because of remote tls error", r"Error: creating build container: internal error: unable to copy from source docker://registry.access.redhat.com/[^ ]+ copying system image from manifest list: reading blob [^ ]+: Get .https://cdn[0-9]+.quay.io/[^ ]+ remote error: tls: internal error"), ("Failed to pull container from registry.access.redhat.com because of remote tls error", r"Trying to pull registry.access.redhat.com/[^ ]+ Error: copying system image from manifest list: parsing image configuration: Get .https://cdn[0-9]+.quay.io/[^ ]+ remote error: tls: internal error"), From 1bed5a68c2df85a0b1bf69fe47fd34f612c78776 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Thu, 21 Aug 2025 11:13:45 +0200 Subject: [PATCH 198/321] feat: New error: Failed to pull container from registry.access.redhat.com because of unauthorized collected-data/jhutar-tenant/1/pod-jhutar-app-yrtti-comp-0-on-push-txrvk-build-source-image-pod-step-build.log 2025-08-19 03:45:10,364:source-build:DEBUG:workspace directory /var/workdir 2025-08-19 03:45:10,364:source-build:DEBUG:working directory /var/workdir/source-build 2025-08-19 03:45:10,368:build-source.source-archive:DEBUG:Stashing any changes to working repo ['git', 'stash'] Saved working directory and index state WIP on (no branch): 6acc311 e2e test commit message 2025-08-19 03:45:10,379:build-source.source-archive:DEBUG:Collecting timestamp of the commit at HEAD ['git', 'show', '-s', '--format=%cI'] 2025-08-19 03:45:10,382:build-source.source-archive:DEBUG:Generate source repo file list ['git', 'ls-files', '--recurse-submodules', '-z'] 2025-08-19 03:45:10,384:build-source.source-archive:DEBUG:Generate source archive ['tar', 'caf', '/var/workdir/source-build/source_archive/nodejs-devfile-sample-undef-jhutar-6acc311bc5cdcfabeb7a196e39ee456edae88623.tar.gz', '--mtime', '2025-08-19T05:41:27+02:00', '--transform', 's,^,nodejs-devfile-sample-undef-jhutar-6acc311bc5cdcfabeb7a196e39ee456edae88623/,', '--null', '-T-'] 2025-08-19 03:45:10,390:build-source.source-archive:DEBUG:Popping any stashed changes to working repo ['git', 'stash', 'pop'] HEAD detached at 6acc311 Changes not staged for commit: (use "git add ..." to update what will be committed) (use "git restore ..." to discard changes in working directory) modified: package-lock.json no changes added to commit (use "git add" and/or "git commit -a") Dropped refs/stash@{0} (560f01376ee69000592473de33db201c70e14011) 2025-08-19 03:45:10,397:build-source.source-archive:INFO:add source archive directory to sources for bsi: /var/workdir/source-build/source_archive 2025-08-19 03:45:10,550:source-build:ERROR:command execution failure, status: 1, stderr: time="2025-08-19T03:45:10Z" level=fatal msg="Error parsing image name \"docker://registry.access.redhat.com/ubi10/nodejs-22-minimal@sha256:9bb64834bf2c1c846bdf13ad90b2c796974623b78093cbe538803fe5b958622b\": unable to retrieve auth token: invalid username/password: unauthorized: Please login to the Red Hat Registry using your Customer Portal credentials. Further instructions can be found here: https://access.redhat.com/RegistryAuthentication" Traceback (most recent call last): File "/opt/source_build/source_build.py", line 1137, in main build_result = build(build_args) ^^^^^^^^^^^^^^^^^ File "/opt/source_build/source_build.py", line 1101, in build source_image = resolve_source_image(base_image, args.registry_allowlist) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/opt/source_build/source_build.py", line 504, in resolve_source_image return resolve_source_image_by_version_release(binary_image) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/opt/source_build/source_build.py", line 465, in resolve_source_image_by_version_release image_config = fetch_image_config(f"{name}@{digest}") ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/opt/source_build/source_build.py", line 189, in fetch_image_config return run(cmd, check=True, text=True, capture_output=True).stdout.strip() ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/lib64/python3.11/subprocess.py", line 571, in run raise CalledProcessError(retcode, process.args, subprocess.CalledProcessError: Command '['skopeo', 'inspect', '--config', '--retry-times', '5', 'docker://registry.access.redhat.com/ubi10/nodejs-22-minimal@sha256:9bb64834bf2c1c846bdf13ad90b2c796974623b78093cbe538803fe5b958622b']' returned non-zero exit status 1. 2025-08-19 03:45:10,551:source-build:INFO:build result {"status": "failure", "message": "Command '['skopeo', 'inspect', '--config', '--retry-times', '5', 'docker://registry.access.redhat.com/ubi10/nodejs-22-minimal@sha256:9bb64834bf2c1c846bdf13ad90b2c796974623b78093cbe538803fe5b958622b']' returned non-zero exit status 1."} 2025-08-19 03:45:10,551:source-build:INFO:write build result into file /tekton/results/BUILD_RESULT --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index bc20ae6ebe..9b2c2b510f 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -125,6 +125,7 @@ ("Failed to pull container from registry.access.redhat.com because of DNS error", r"Error: initializing source docker://registry.access.redhat.com/.* pinging container registry registry.access.redhat.com: Get \"https://registry.access.redhat.com/v2/\": dial tcp: lookup registry.access.redhat.com: Temporary failure in name resolution"), ("Failed to pull container from registry.access.redhat.com because of remote tls error", r"Error: creating build container: internal error: unable to copy from source docker://registry.access.redhat.com/[^ ]+ copying system image from manifest list: reading blob [^ ]+: Get .https://cdn[0-9]+.quay.io/[^ ]+ remote error: tls: internal error"), ("Failed to pull container from registry.access.redhat.com because of remote tls error", r"Trying to pull registry.access.redhat.com/[^ ]+ Error: copying system image from manifest list: parsing image configuration: Get .https://cdn[0-9]+.quay.io/[^ ]+ remote error: tls: internal error"), + ("Failed to pull container from registry.access.redhat.com because of unauthorized", r"unable to retrieve auth token: invalid username/password: unauthorized: Please login to the Red Hat Registry using your Customer Portal credentials. .* subprocess.CalledProcessError: Command ...skopeo....inspect.*docker://registry.access.redhat.com/.* returned non-zero exit status 1"), ("Failed to pull container from registry.fedoraproject.org", r"Error: internal error: unable to copy from source docker://registry.fedoraproject.org/[^ ]+: initializing source docker://registry.fedoraproject.org/[^ ]+: pinging container registry registry.fedoraproject.org: Get \"https://registry.fedoraproject.org/v2/\": dial tcp [^ ]+: connect: connection refused"), ("Failed to ssh to remote MPC VM", r"[^ ]+@[0-9.]+: Permission denied .publickey,gssapi-keyex,gssapi-with-mic..\s*$"), # KONFLUX-9742 ("Gateway Time-out when pulling container image from quay.io", r"Error: initializing source docker://quay.io/[^ ]+: reading manifest [^ ]+ in quay.io/[^ ]+: received unexpected HTTP status: 504 Gateway Time-out"), From f84deb68494d423c684b8b2a2fc3b99d5a024eef Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 22 Aug 2025 08:22:38 +0200 Subject: [PATCH 199/321] feat(KONFLUX-9621): Add a cleanup or ImageRepository CRs in case they were detached from their Component Suggestion from Robert Cerven via Slack (edited): > One more thing based on https://issues.redhat.com/browse/KONFLUX-9621 those old imageRepositories. > When you create component with that "image.redhat.com/generate" annotation it will create ImageRepository for you. > Component is waiting for ImageRepository to create quay.io repository AND then update spec.containerImage. > But IF ImageRepository isn't yet processed by controller (overloaded cluster with infra issue or too many onboardings), > and it doesn't actually finishes, IT WON'T set component as the OWNER, so when you remove in that state component first, ImageRepository will be left there forever until you remove it manually. > SO I suggest that when your timeout finishes AND component still has: `{"message":"waiting for spec.containerImage to be set by ImageRepository with annotation image-controller.appstudio.redhat.com/update-component-image"}` you should in your test explicitly remove that imageRepository for the component, IR will be named: `fmt.Sprintf("imagerepository-for-%s-%s", component.Spec.Application, component.Name)`. --- pkg/clients/has/components.go | 26 ++++++++++++++++++++ tests/load-tests/pkg/journey/handle_purge.go | 5 ++++ 2 files changed, 31 insertions(+) diff --git a/pkg/clients/has/components.go b/pkg/clients/has/components.go index d6d8d68d8f..84536c5876 100644 --- a/pkg/clients/has/components.go +++ b/pkg/clients/has/components.go @@ -569,6 +569,32 @@ func (h *HasController) CheckImageRepositoryExists(namespace, componentName stri } } +// DeleteAllImageRepositoriesInASpecificNamespace removes all image repository CRs from a specific namespace. Useful when cleaning up a namespace and component cleanup did not cleaned it's image repository +func (h *HasController) DeleteAllImageRepositoriesInASpecificNamespace(namespace string, timeout time.Duration) error { + // temporary logs + start := time.Now() + GinkgoWriter.Printf("Start to delete all image repositories in namespace '%s' at %s\n", namespace, start.String()) + + if err := h.KubeRest().DeleteAllOf(context.Background(), &imagecontroller.ImageRepository{}, rclient.InNamespace(namespace)); err != nil { + return fmt.Errorf("error deleting image repositories from the namespace %s: %+v", namespace, err) + } + + imageRepositoryList := &imagecontroller.ImageRepositoryList{} + + err := utils.WaitUntil(func() (done bool, err error) { + if err := h.KubeRest().List(context.Background(), imageRepositoryList, &rclient.ListOptions{Namespace: namespace}); err != nil { + return false, nil + } + return len(imageRepositoryList.Items) == 0, nil + }, timeout) + + // temporary logs + deletionTime := time.Since(start).Minutes() + GinkgoWriter.Printf("Finish to delete all image repositories in namespace '%s' at %s. It took '%f' minutes\n", namespace, time.Now().Format(time.RFC3339), deletionTime) + + return err +} + // Gets value of a specified annotation in a component func (h *HasController) GetComponentAnnotation(componentName, annotationKey, namespace string) (string, error) { component, err := h.GetComponent(componentName, namespace) diff --git a/tests/load-tests/pkg/journey/handle_purge.go b/tests/load-tests/pkg/journey/handle_purge.go index 5a1cf8dda7..f97d8ec680 100644 --- a/tests/load-tests/pkg/journey/handle_purge.go +++ b/tests/load-tests/pkg/journey/handle_purge.go @@ -22,6 +22,11 @@ func purgeStage(f *framework.Framework, namespace string) error { return fmt.Errorf("Error when deleting components in namespace %s: %v", namespace, err) } + err = f.AsKubeDeveloper.HasController.DeleteAllImageRepositoriesInASpecificNamespace(namespace, time.Minute*5) + if err != nil { + return fmt.Errorf("Error when deleting image repositories in namespace %s: %v", namespace, err) + } + err = f.AsKubeDeveloper.TektonController.DeleteAllPipelineRunsInASpecificNamespace(namespace) if err != nil { return fmt.Errorf("Error when deleting pipeline runs in namespace %s: %v", namespace, err) From c32ff5cb9de75e74d35e75f06b0fe02cf7f3fca7 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 22 Aug 2025 08:54:13 +0200 Subject: [PATCH 200/321] feat: New error: Timeout onboarding component I0820 16:49:01.700948 11630 logging.go:30] DEBUG Creating component jhutar-app-unxkn-comp-0 in namespace jhutar-tenant I0820 16:49:01.779082 11630 logging.go:30] DEBUG Onboarding of a component jhutar-app-unxkn-comp-0 in namespace jhutar-tenant not started yet [...] I0820 17:04:41.777497 11630 logging.go:30] DEBUG Onboarding of a component jhutar-app-unxkn-comp-0 in namespace jhutar-tenant not started yet I0820 17:04:51.783258 11630 logging.go:30] DEBUG Onboarding of a component jhutar-app-unxkn-comp-0 in namespace jhutar-tenant not finished yet: {"message":"waiting for spec.containerImage to be set by ImageRepository with annotation image-controller.appstudio.redhat.com/update-component-image"} [...] I0820 17:19:01.782761 11630 logging.go:30] DEBUG Onboarding of a component jhutar-app-unxkn-comp-0 in namespace jhutar-tenant not finished yet: {"message":"waiting for spec.containerImage to be set by ImageRepository with annotation image-controller.appstudio.redhat.com/update-component-image"} I0820 17:19:01.782809 11630 logging.go:64] FAIL(65): Component failed onboarding: context deadline exceeded --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 9b2c2b510f..3c05aafa97 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -93,6 +93,7 @@ # context deadline exceeded``` ("Timeout listing pipeline runs", r"Repo-templating workflow component cleanup failed: Error deleting on-pull-request default PipelineRun in namespace .*: Unable to list PipelineRuns for component .* in namespace .*: context deadline exceeded"), ("Timeout listing pipeline runs", r"Repo-templating workflow component cleanup failed: Error deleting on-push merged PipelineRun in namespace .*: Unable to list PipelineRuns for component .* in namespace .*: context deadline exceeded"), + ("Timeout onboarding component", r"Component failed onboarding: context deadline exceeded"), ("Timeout waiting for build pipeline to be created", r"Build Pipeline Run failed creation: context deadline exceeded"), ("Timeout waiting for integration test scenario to validate", r"Integration test scenario failed validation: context deadline exceeded"), ("Timeout waiting for release pipeline to be created", r"Release pipeline run failed creation: context deadline exceeded"), From 18f229361d9ef4893875753f5fb85dd77600d485 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 22 Aug 2025 09:01:05 +0200 Subject: [PATCH 201/321] feat: New error: Failed component creation because it already exists I0821 15:10:26.638096 54942 logging.go:30] DEBUG Creating component jhutar-app-hroqx-comp-0 in namespace jhutar-tenant I0821 15:10:36.634304 54942 logging.go:64] FAIL(60): Component failed creation: Unable to create the Component jhutar-app-hroqx-comp-0: components.appstudio.redhat.com "jhutar-app-hroqx-comp-0" already exists --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 3c05aafa97..409903e912 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -39,6 +39,7 @@ ("Error deleting on-pull-request default PipelineRun", r"Repo-templating workflow component cleanup failed: Error deleting on-pull-request default PipelineRun in namespace .*: Unable to list PipelineRuns for component .* in namespace .*: context deadline exceeded"), ("Error updating .tekton file in gitlab.cee.redhat.com", r"Repo-templating workflow component cleanup failed: Error templating PaC files: Failed to update file .tekton/[^ ]+ in repo .*: Failed to update/create file: PUT https://gitlab.cee.redhat.com/api/v4/projects/[^ ]+/repository/files/.tekton/.*: 400 .message: A file with this name doesn't exist."), ("Failed application creation when calling mapplication.kb.io webhook", r"Application failed creation: Unable to create the Application .*: Internal error occurred: failed calling webhook .*mapplication.kb.io.*: failed to call webhook: Post .*https://application-service-webhook-service.application-service.svc:443/mutate-appstudio-redhat-com-v1alpha1-application.* no endpoints available for service .*application-service-webhook-service"), + ("Failed component creation because it already exists", r"Component failed creation: Unable to create the Component [^ ]+: components.appstudio.redhat.com \"[^ ]+\" already exists"), ("Failed component creation because resource quota evaluation timed out", r"Component failed creation: Unable to create the Component .*: Internal error occurred: resource quota evaluation timed out"), ("Failed component creation when calling mcomponent.kb.io webhook", r"Component failed creation: Unable to create the Component .*: Internal error occurred: failed calling webhook .*mcomponent.kb.io.*: failed to call webhook: Post .*https://application-service-webhook-service.application-service.svc:443/mutate-appstudio-redhat-com-v1alpha1-component.* no endpoints available for service .*application-service-webhook-service.*"), ("Failed creating integration test scenario because admission webhook dintegrationtestscenario.kb.io could not find application", r"Integration test scenario failed creation: Unable to create the Integration Test Scenario [^ ]+: admission webhook \"dintegrationtestscenario.kb.io\" denied the request: could not find application '[^ ]+' in namespace '[^ ]+'"), From 4e74bbd257a1c083c228dda274def5fec8c23e62 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 22 Aug 2025 10:11:15 +0200 Subject: [PATCH 202/321] feat: New error: Failed to pull container from registry.access.redhat.com because of unauthorized collected-data/jhutar-tenant/1/pod-jhutar-app-bpbnf-comp-0-on-push-z9twv-calculate-deps-s390x-pod-step-mock-build.log DEBUG:urllib3.connectionpool:https://download-01.beak-001.prod.iad2.dc.redhat.com:443 "GET /brewroot/repos/rhel-10.1-build/8966336/s390x/toplink/vol/rhel-10/packages/zstd/1.5.5/9.el10/s390x/zstd-1.5.5-9.el10.s390x.rpm HTTP/1.1" 200 522354 Directory walk started Directory walk done - 184 packages Temporary output repo path: /results/buildroot_repo/.repodata/ Pool started (with 5 workers) Pool finished INFO:__main__:Pulling like: podman pull --arch s390x registry.access.redhat.com/ubi9/ubi@sha256:85b679a3547575692ed765a4ad005955da4b1d65d886ce9beb0727ffe06af565 Trying to pull registry.access.redhat.com/ubi9/ubi@sha256:85b679a3547575692ed765a4ad005955da4b1d65d886ce9beb0727ffe06af565... Error: internal error: unable to copy from source docker://registry.access.redhat.com/ubi9/ubi@sha256:85b679a3547575692ed765a4ad005955da4b1d65d886ce9beb0727ffe06af565: initializing source docker://registry.access.redhat.com/ubi9/ubi@sha256:85b679a3547575692ed765a4ad005955da4b1d65d886ce9beb0727ffe06af565: unable to retrieve auth token: invalid username/password: unauthorized: Please login to the Red Hat Registry using your Customer Portal credentials. Further instructions can be found here: https://access.redhat.com/RegistryAuthentication Traceback (most recent call last): File "/usr/bin/mock-hermetic-repo", line 151, in _main() ~~~~~^^ File "/usr/bin/mock-hermetic-repo", line 146, in _main prepare_image(data["config"]["bootstrap_image"], data["bootstrap"], ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ options.output_repo) ^^^^^^^^^^^^^^^^^^^^ File "/usr/bin/mock-hermetic-repo", line 115, in prepare_image subprocess.check_output(pull_cmd) ~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^ File "/usr/lib64/python3.13/subprocess.py", line 472, in check_output return run(*popenargs, stdout=PIPE, timeout=timeout, check=True, ~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ **kwargs).stdout ^^^^^^^^^ File "/usr/lib64/python3.13/subprocess.py", line 577, in run raise CalledProcessError(retcode, process.args, output=stdout, stderr=stderr) subprocess.CalledProcessError: Command '['podman', 'pull', '--arch', 's390x', 'registry.access.redhat.com/ubi9/ubi@sha256:85b679a3547575692ed765a4ad005955da4b1d65d886ce9beb0727ffe06af565']' returned non-zero exit status 125. --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 409903e912..96f5e5ee0a 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -127,6 +127,7 @@ ("Failed to pull container from registry.access.redhat.com because of DNS error", r"Error: initializing source docker://registry.access.redhat.com/.* pinging container registry registry.access.redhat.com: Get \"https://registry.access.redhat.com/v2/\": dial tcp: lookup registry.access.redhat.com: Temporary failure in name resolution"), ("Failed to pull container from registry.access.redhat.com because of remote tls error", r"Error: creating build container: internal error: unable to copy from source docker://registry.access.redhat.com/[^ ]+ copying system image from manifest list: reading blob [^ ]+: Get .https://cdn[0-9]+.quay.io/[^ ]+ remote error: tls: internal error"), ("Failed to pull container from registry.access.redhat.com because of remote tls error", r"Trying to pull registry.access.redhat.com/[^ ]+ Error: copying system image from manifest list: parsing image configuration: Get .https://cdn[0-9]+.quay.io/[^ ]+ remote error: tls: internal error"), + ("Failed to pull container from registry.access.redhat.com because of unauthorized", r"unable to retrieve auth token: invalid username/password: unauthorized: Please login to the Red Hat Registry using your Customer Portal credentials. .* subprocess.CalledProcessError: Command ...podman....pull.*registry.access.redhat.com/.* returned non-zero exit status 125"), ("Failed to pull container from registry.access.redhat.com because of unauthorized", r"unable to retrieve auth token: invalid username/password: unauthorized: Please login to the Red Hat Registry using your Customer Portal credentials. .* subprocess.CalledProcessError: Command ...skopeo....inspect.*docker://registry.access.redhat.com/.* returned non-zero exit status 1"), ("Failed to pull container from registry.fedoraproject.org", r"Error: internal error: unable to copy from source docker://registry.fedoraproject.org/[^ ]+: initializing source docker://registry.fedoraproject.org/[^ ]+: pinging container registry registry.fedoraproject.org: Get \"https://registry.fedoraproject.org/v2/\": dial tcp [^ ]+: connect: connection refused"), ("Failed to ssh to remote MPC VM", r"[^ ]+@[0-9.]+: Permission denied .publickey,gssapi-keyex,gssapi-with-mic..\s*$"), # KONFLUX-9742 From 513a0b1a3ab36410642f54085fd7733e9b2bc3cb Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 22 Aug 2025 10:15:20 +0200 Subject: [PATCH 203/321] feat: New error: Failed to pull container from registry.access.redhat.com because of unauthorized collected-data/konflux-perfscale-tenant/1/pod-konflux-perfscaa50b01befc2005089e510f93214c885544a020ae92d2-pod-step-build.log [2025-08-18T03:21:48,970379280+00:00] Validate context path [2025-08-18T03:21:48,972497344+00:00] Update CA trust [2025-08-18T03:21:48,973256875+00:00] Prepare Dockerfile Checking if /var/workdir/cachi2/output/bom.json exists. Could not find prefetched sbom. No content_sets found for ICM [2025-08-18T03:21:48,977668129+00:00] Prepare system (architecture: x86_64) [2025-08-18T03:21:48,987812566+00:00] Setup prefetched [2025-08-18T03:21:48,989914779+00:00] Register sub-man Adding the entitlement to the build [2025-08-18T03:21:48,992571739+00:00] Add secrets [2025-08-18T03:21:48,998381390+00:00] Run buildah build [2025-08-18T03:21:48,999058106+00:00] buildah build --volume /tmp/entitlement:/etc/pki/entitlement --security-opt=unmask=/proc/interrupts --label build-date=2025-08-18T03:21:48 --label architecture=x86_64 --label vcs-type=git --label vcs-ref=4bc81dd6e951237c2809052ce2e8b1fa50c316b6 --label quay.expires-after=5d --tls-verify=true --no-cache --ulimit nofile=4096:4096 --http-proxy=false -f /tmp/Dockerfile.mI892S -t quay.io/konflux-fedora/konflux-perfscale-tenant/konflux-perfscale-app-kbuzm-comp-0:on-pr-4bc81dd6e951237c2809052ce2e8b1fa50c316b6 . [1/2] STEP 1/3: FROM registry.access.redhat.com/ubi10/nodejs-22:latest Trying to pull registry.access.redhat.com/ubi10/nodejs-22:latest... Error: creating build container: internal error: unable to copy from source docker://registry.access.redhat.com/ubi10/nodejs-22:latest: initializing source docker://registry.access.redhat.com/ubi10/nodejs-22:latest: unable to retrieve auth token: invalid username/password: unauthorized: Please login to the Red Hat Registry using your Customer Portal credentials. Further instructions can be found here: https://access.redhat.com/RegistryAuthentication --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 96f5e5ee0a..c9801b49d0 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -127,6 +127,7 @@ ("Failed to pull container from registry.access.redhat.com because of DNS error", r"Error: initializing source docker://registry.access.redhat.com/.* pinging container registry registry.access.redhat.com: Get \"https://registry.access.redhat.com/v2/\": dial tcp: lookup registry.access.redhat.com: Temporary failure in name resolution"), ("Failed to pull container from registry.access.redhat.com because of remote tls error", r"Error: creating build container: internal error: unable to copy from source docker://registry.access.redhat.com/[^ ]+ copying system image from manifest list: reading blob [^ ]+: Get .https://cdn[0-9]+.quay.io/[^ ]+ remote error: tls: internal error"), ("Failed to pull container from registry.access.redhat.com because of remote tls error", r"Trying to pull registry.access.redhat.com/[^ ]+ Error: copying system image from manifest list: parsing image configuration: Get .https://cdn[0-9]+.quay.io/[^ ]+ remote error: tls: internal error"), + ("Failed to pull container from registry.access.redhat.com because of unauthorized", r"Error: creating build container: internal error: unable to copy from source docker://registry.access.redhat.com/[^ ]+: initializing source docker://registry.access.redhat.com/[^ ]+: unable to retrieve auth token: invalid username/password: unauthorized: Please login to the Red Hat Registry using your Customer Portal credentials."), ("Failed to pull container from registry.access.redhat.com because of unauthorized", r"unable to retrieve auth token: invalid username/password: unauthorized: Please login to the Red Hat Registry using your Customer Portal credentials. .* subprocess.CalledProcessError: Command ...podman....pull.*registry.access.redhat.com/.* returned non-zero exit status 125"), ("Failed to pull container from registry.access.redhat.com because of unauthorized", r"unable to retrieve auth token: invalid username/password: unauthorized: Please login to the Red Hat Registry using your Customer Portal credentials. .* subprocess.CalledProcessError: Command ...skopeo....inspect.*docker://registry.access.redhat.com/.* returned non-zero exit status 1"), ("Failed to pull container from registry.fedoraproject.org", r"Error: internal error: unable to copy from source docker://registry.fedoraproject.org/[^ ]+: initializing source docker://registry.fedoraproject.org/[^ ]+: pinging container registry registry.fedoraproject.org: Get \"https://registry.fedoraproject.org/v2/\": dial tcp [^ ]+: connect: connection refused"), From 5f9560a76908c9a9de2b23d2693b68fe68ac2961 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 22 Aug 2025 10:25:51 +0200 Subject: [PATCH 204/321] feat: New error: Couldnt get task via buldles resolver from quay.io due to 404 I0817 14:04:19.818394 7046 logging.go:30] DEBUG Waiting for build pipeline run for component jhutar-app-agelz-comp-0 in namespace jhutar-tenant to finish I0817 14:04:19.858267 7046 logging.go:64] FAIL(71): Build Pipeline Run failed run: PipelineRun for component jhutar-app-agelz-comp-0 in namespace jhutar-tenant failed: {Type:Succeeded Status:False Severity: LastTransitionTime:{Inner:2025-08-17 14:04:06 +0000 UTC} Reason:CouldntGetTask Message:Pipeline jhutar-tenant/jhutar-app-agelz-comp-0-on-pull-request-lcmfz can't be Run; it contains Tasks that don't exist: Couldn't retrieve Task "resolver type bundles\nname = init\n": error requesting remote resource: error getting "bundleresolver" "jhutar-tenant/bundles-7028908229c23db1f4e8e2394b002e35": cannot retrieve the oci image: GET https://quay.io/v2/konflux-ci/tekton-catalog/task-init/manifests/sha256:08e18a4dc5f947c1d20e8353a19d013144bea87b72f67236b165dd4778523951: unexpected status code 404 Not Found: 404 page not found --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index c9801b49d0..dccc2f2720 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -26,6 +26,7 @@ ("Couldnt get pipeline via bundles resolver from quay.io due to 429", r"Message:Error retrieving pipeline for pipelinerun .*bundleresolver.* cannot retrieve the oci image: GET https://quay.io/v2/.*unexpected status code 429 Too Many Requests"), ("Couldnt get pipeline via git resolver from gitlab.cee due to 429", r"Message:.*resolver failed to get Pipeline.*error requesting remote resource.*Git.*https://gitlab.cee.redhat.com/.* status code: 429"), ("Couldnt get pipeline via http resolver from gitlab.cee", r"Message:.*resolver failed to get Pipeline.*error requesting remote resource.*Http.*https://gitlab.cee.redhat.com/.* is not found"), + ("Couldnt get task via buldles resolver from quay.io due to 404", r"Message:.*Couldn't retrieve Task .*resolver type bundles.*https://quay.io/.* status code 404 Not Found"), ("Couldnt get task via buldles resolver from quay.io due to 429", r"Message:.*Couldn't retrieve Task .*resolver type bundles.*https://quay.io/.* status code 429 Too Many Requests"), ("Couldnt get task via bundles resolver from quay.io due to unexpected end of JSON input", r"Build Pipeline Run failed run: PipelineRun for component .* in namespace .* failed: .* Reason:CouldntGetTask Message:Pipeline .* can't be Run; it contains Tasks that don't exist: Couldn't retrieve Task .resolver type bundles.*name = .*: error requesting remote resource: error getting \"bundleresolver\" .*: cannot retrieve the oci image: unexpected end of JSON input"), ("Couldnt get task via git resolver from gitlab.cee due to 429", r"Message:.*Couldn't retrieve Task .*resolver type git.*https://gitlab.cee.redhat.com/.* status code: 429"), From 009bcdd82f2d724a4889cfd5e890f95702e6c991 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 22 Aug 2025 10:31:16 +0200 Subject: [PATCH 205/321] feat: New error: Failed to pull container from quay.io due to 404 collected-data/jhutar-1-tenant/1/pod-jhutar-1-app-dfjky-comp-0-on-push-8jj44-rpmbuild-s390x-pod-step-create-trusted-artifact.log Prepared artifact from /var/workdir/results (sha256:476ba0983eada2554772129e83d73877fbb6a5f39712902f576748c17bf89689) Using token for quay.io/redhat-user-workloads/jhutar-1-tenant/jhutar-1-app-dfjky-comp-0 Uploading 476ba0983ead rpmbuild-artifact Uploaded 476ba0983ead rpmbuild-artifact Error response from registry: recognizable error message not found: PUT "https://quay.io/v2/redhat-user-workloads/jhutar-1-tenant/jhutar-1-app-dfjky-comp-0/manifests/5e79489d301047963f6a84fa67b8327ae1b09109.rpmbuild-s390x": response status code 404: Not Found Command exited with non-zero status 1 --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index dccc2f2720..3fef93b2d9 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -130,6 +130,7 @@ ("Failed to pull container from registry.access.redhat.com because of remote tls error", r"Trying to pull registry.access.redhat.com/[^ ]+ Error: copying system image from manifest list: parsing image configuration: Get .https://cdn[0-9]+.quay.io/[^ ]+ remote error: tls: internal error"), ("Failed to pull container from registry.access.redhat.com because of unauthorized", r"Error: creating build container: internal error: unable to copy from source docker://registry.access.redhat.com/[^ ]+: initializing source docker://registry.access.redhat.com/[^ ]+: unable to retrieve auth token: invalid username/password: unauthorized: Please login to the Red Hat Registry using your Customer Portal credentials."), ("Failed to pull container from registry.access.redhat.com because of unauthorized", r"unable to retrieve auth token: invalid username/password: unauthorized: Please login to the Red Hat Registry using your Customer Portal credentials. .* subprocess.CalledProcessError: Command ...podman....pull.*registry.access.redhat.com/.* returned non-zero exit status 125"), + ("Failed to pull container from quay.io due to 404", r"Error response from registry: recognizable error message not found: PUT .https://quay.io/[^ ]+.: response status code 404: Not Found Command exited with non-zero status 1"), ("Failed to pull container from registry.access.redhat.com because of unauthorized", r"unable to retrieve auth token: invalid username/password: unauthorized: Please login to the Red Hat Registry using your Customer Portal credentials. .* subprocess.CalledProcessError: Command ...skopeo....inspect.*docker://registry.access.redhat.com/.* returned non-zero exit status 1"), ("Failed to pull container from registry.fedoraproject.org", r"Error: internal error: unable to copy from source docker://registry.fedoraproject.org/[^ ]+: initializing source docker://registry.fedoraproject.org/[^ ]+: pinging container registry registry.fedoraproject.org: Get \"https://registry.fedoraproject.org/v2/\": dial tcp [^ ]+: connect: connection refused"), ("Failed to ssh to remote MPC VM", r"[^ ]+@[0-9.]+: Permission denied .publickey,gssapi-keyex,gssapi-with-mic..\s*$"), # KONFLUX-9742 From 4716fb4fe98d685140a62bfc72eb8952efe35ed0 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 22 Aug 2025 10:37:24 +0200 Subject: [PATCH 206/321] feat: New error: Failed to push to quai.io due to 404 collected-data/jhutar-1-tenant/1/pod-jhutar-1-app-dfjky-comp-0-on-push-8jj44-rpmbuild-s390x-pod-step-create-trusted-artifact.log Prepared artifact from /var/workdir/results (sha256:476ba0983eada2554772129e83d73877fbb6a5f39712902f576748c17bf89689) Using token for quay.io/redhat-user-workloads/jhutar-1-tenant/jhutar-1-app-dfjky-comp-0 Uploading 476ba0983ead rpmbuild-artifact Uploaded 476ba0983ead rpmbuild-artifact Error response from registry: recognizable error message not found: PUT "https://quay.io/v2/redhat-user-workloads/jhutar-1-tenant/jhutar-1-app-dfjky-comp-0/manifests/5e79489d301047963f6a84fa67b8327ae1b09109.rpmbuild-s390x": response status code 404: Not Found Command exited with non-zero status 1 --- tests/load-tests/errors.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 3fef93b2d9..699387d030 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -124,15 +124,16 @@ ("Failed to provision MPC VM due to resource quota evaluation timed out", r"cat /ssh/error Error allocating host: Internal error occurred: resource quota evaluation timed out"), # KONFLUX-9798 ("Failed to pull container from access.redhat.com because of DNS error", r"Error: creating build container: internal error: unable to copy from source docker://registry.access.redhat.com/.*: copying system image from manifest list: reading signatures: Get \"https://access.redhat.com/.*\": dial tcp: lookup access.redhat.com: Temporary failure in name resolution"), ("Failed to pull container from quay.io because of DNS error", r"Error: copying system image from manifest list: reading blob .*: Get \"https://cdn[0-9]+.quay.io/.*\": dial tcp: lookup cdn[0-9]+.quay.io: Temporary failure in name resolution"), + ("Failed to pull container from quay.io due to 404", r"Error response from registry: recognizable error message not found: PUT .https://quay.io/[^ ]+.: response status code 404: Not Found Command exited with non-zero status 1"), ("Failed to pull container from registry.access.redhat.com because of 500 Internal Server Error", r"Trying to pull registry.access.redhat.com/[^ ]+ Getting image source signatures Error: copying system image from manifest list: reading signatures: reading signature from https://access.redhat.com/[^ ]+: status 500 .Internal Server Error."), ("Failed to pull container from registry.access.redhat.com because of DNS error", r"Error: initializing source docker://registry.access.redhat.com/.* pinging container registry registry.access.redhat.com: Get \"https://registry.access.redhat.com/v2/\": dial tcp: lookup registry.access.redhat.com: Temporary failure in name resolution"), ("Failed to pull container from registry.access.redhat.com because of remote tls error", r"Error: creating build container: internal error: unable to copy from source docker://registry.access.redhat.com/[^ ]+ copying system image from manifest list: reading blob [^ ]+: Get .https://cdn[0-9]+.quay.io/[^ ]+ remote error: tls: internal error"), ("Failed to pull container from registry.access.redhat.com because of remote tls error", r"Trying to pull registry.access.redhat.com/[^ ]+ Error: copying system image from manifest list: parsing image configuration: Get .https://cdn[0-9]+.quay.io/[^ ]+ remote error: tls: internal error"), ("Failed to pull container from registry.access.redhat.com because of unauthorized", r"Error: creating build container: internal error: unable to copy from source docker://registry.access.redhat.com/[^ ]+: initializing source docker://registry.access.redhat.com/[^ ]+: unable to retrieve auth token: invalid username/password: unauthorized: Please login to the Red Hat Registry using your Customer Portal credentials."), ("Failed to pull container from registry.access.redhat.com because of unauthorized", r"unable to retrieve auth token: invalid username/password: unauthorized: Please login to the Red Hat Registry using your Customer Portal credentials. .* subprocess.CalledProcessError: Command ...podman....pull.*registry.access.redhat.com/.* returned non-zero exit status 125"), - ("Failed to pull container from quay.io due to 404", r"Error response from registry: recognizable error message not found: PUT .https://quay.io/[^ ]+.: response status code 404: Not Found Command exited with non-zero status 1"), ("Failed to pull container from registry.access.redhat.com because of unauthorized", r"unable to retrieve auth token: invalid username/password: unauthorized: Please login to the Red Hat Registry using your Customer Portal credentials. .* subprocess.CalledProcessError: Command ...skopeo....inspect.*docker://registry.access.redhat.com/.* returned non-zero exit status 1"), ("Failed to pull container from registry.fedoraproject.org", r"Error: internal error: unable to copy from source docker://registry.fedoraproject.org/[^ ]+: initializing source docker://registry.fedoraproject.org/[^ ]+: pinging container registry registry.fedoraproject.org: Get \"https://registry.fedoraproject.org/v2/\": dial tcp [^ ]+: connect: connection refused"), + ("Failed to push to quai.io due to 404", r"Error response from registry: recognizable error message not found: PUT \"https://quay.io/[^ ]+\": response status code 404"), ("Failed to ssh to remote MPC VM", r"[^ ]+@[0-9.]+: Permission denied .publickey,gssapi-keyex,gssapi-with-mic..\s*$"), # KONFLUX-9742 ("Gateway Time-out when pulling container image from quay.io", r"Error: initializing source docker://quay.io/[^ ]+: reading manifest [^ ]+ in quay.io/[^ ]+: received unexpected HTTP status: 504 Gateway Time-out"), ("Gateway Time-out when pulling container image", r"Error: copying system image from manifest list: parsing image configuration: fetching blob: received unexpected HTTP status: 504 Gateway Time-out"), From 0142fefcf99b2a87212bafa56f1f51c999fc1593 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 22 Aug 2025 10:42:39 +0200 Subject: [PATCH 207/321] feat: New error: Back-off pulling task run image from registry.redhat.io {"lastTransitionTime": "2025-08-19T01:02:14Z", "message": "the step \"upload-sbom-to-quay\" in TaskRun \"jhutar-1-app-bpdvy-comp-0-on-push-4skgv-upload-to-quay\" failed to pull the image \"\". The pod errored with the message: \"Back-off pulling image \"registry.redhat.io/rhtas/cosign-rhel9:1.1.1@sha256:3cd261cd4fed03688c6fd3c6161ae1ec69e908bbb6593ec279415414c7422535\".\"", "reason": "TaskRunImagePullFailed", "status": "False", "type": "Succeeded"} --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 699387d030..c81bbdfb23 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -154,6 +154,7 @@ ("SKIP", r"\"message\": \".* exited with code 255.*\""), # Another special error to avoid printing 'Unknown error:' message ("Back-off pulling task run image from quay.io", r"the step .* in TaskRun .* failed to pull the image .*. The pod errored with the message: \\\"Back-off pulling image \\\"quay.io/.*"), ("Back-off pulling task run image from registry.access.redhat.com", r"the step .* in TaskRun .* failed to pull the image .*. The pod errored with the message: \\\"Back-off pulling image \\\"registry.access.redhat.com/.*"), + ("Back-off pulling task run image from registry.redhat.io", r"the step .* in TaskRun .* failed to pull the image .*. The pod errored with the message: \\\"Back-off pulling image \\\"registry.redhat.io/.*"), ("Build failed for unspecified reasons", r"build failed for unspecified reasons."), ("Failed to create task run pod because ISE on webhook proxy.operator.tekton.dev", r"failed to create task run pod .*: Internal error occurred: failed calling webhook \\\"proxy.operator.tekton.dev\\\": failed to call webhook: Post \\\"https://tekton-operator-proxy-webhook.openshift-pipelines.svc:443/defaulting.timeout=10s\\\": context deadline exceeded. Maybe missing or invalid Task .*"), ("Not enough nodes to schedule pod", r".message.: .pod status ..PodScheduled..:..False..; message: ..[0-9/]+ nodes are available: .*: [0-9]+ Preemption is not helpful for scheduling."), From 03ed6f15464959f266bc74c59e670685792f0a38 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 22 Aug 2025 10:45:09 +0200 Subject: [PATCH 208/321] feat: New error: Post-test data collection failed --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index c81bbdfb23..d249cb1bdb 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -57,6 +57,7 @@ ("Failed validating release condition", r"Release .* in namespace .* failed: .*Message:Release validation failed.*"), ("GitLab token used by test expired", r"Repo forking failed: Error deleting project .*: DELETE https://gitlab.cee.redhat.com/.*: 401 .*error: invalid_token.*error_description: Token is expired. You can either do re-authorization or token refresh"), ("Pipeline failed", r"Build Pipeline Run failed run:.*Message:Tasks Completed: [0-9]+ \(Failed: [1-9]+,"), + ("Post-test data collection failed", r"Failed to collect application JSONs"), ("Post-test data collection failed", r"Failed to collect pipeline run JSONs"), ("Post-test data collection failed", r"Failed to collect release related JSONs"), ("Release failed in progress without error given", r"Release failed: Release .* in namespace .* failed: .Type:Released Status:False .* Reason:Progressing Message:.$"), From 9fb5d05f06702303b56de6b343bf581955109a4d Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 22 Aug 2025 10:49:04 +0200 Subject: [PATCH 209/321] feat: New error: Failed downloading rpms for hermetic builds collected-data/jhutar-1-tenant/1/pod-jhutar-1-app-undzr-comp-0-o8d9a52bd5a236c8a9db457eb4ac3a226-pod-step-mock-build.log + remote_cmd podman run -v /home/u-4bcc0429056d4868ade24ff93df7/results:/results --privileged --rm -ti quay.io/redhat-user-workloads/rpm-build-pipeline-tenant/environment:1590e34b2c99590a6a86c14243eb641a1fe8168e mock-hermetic-repo --lockfile /results/buildroot_lock.json --output-repo /results/buildroot_repo + ssh -o StrictHostKeyChecking=no u-4bcc0429056d4868ade24ff93df7@10.207.4.133 podman run -v /home/u-4bcc0429056d4868ade24ff93df7/results:/results --privileged --rm -ti quay.io/redhat-user-workloads/rpm-build-pipeline-tenant/environment:1590e34b2c99590a6a86c14243eb641a1fe8168e mock-hermetic-repo --lockfile /results/buildroot_lock.json --output-repo /results/buildroot_repo [...] DEBUG:urllib3.connectionpool:https://d2lzkl7pfhq30w.cloudfront.net:443 "GET /pub/fedora/linux/development/rawhide/Everything/x86_64/os/Packages/z/zstd-1.5.7-2.fc43.x86_64.rpm HTTP/1.1" 200 497529 ERROR:__main__:RPM deps downloading failed --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index d249cb1bdb..231e1b2f45 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -122,6 +122,7 @@ ("Failed because of quay.io returned 502", r"level=fatal msg=.Error parsing image name .*docker://quay.io/.* Requesting bearer token: invalid status code from registry 502 .Bad Gateway."), ("Failed because registry.access.redhat.com returned 503 when reading manifest", r"source-build:ERROR:command execution failure, status: 1, stderr: time=.* level=fatal msg=.Error parsing image name .* reading manifest .* in registry.access.redhat.com/.* received unexpected HTTP status: 503 Service Unavailable"), ("Failed downloading rpms for hermetic builds due to 504 errors", r"mock-hermetic-repo.*urllib3.exceptions.MaxRetryError: HTTPSConnectionPool.*: Max retries exceeded with url: .*.rpm .Caused by ResponseError..too many 504 error responses..."), + ("Failed downloading rpms for hermetic builds", r"mock-hermetic-repo.*ERROR:__main__:RPM deps downloading failed"), ("Failed to provision MPC VM due to resource quota evaluation timed out", r"cat /ssh/error Error allocating host: Internal error occurred: resource quota evaluation timed out"), # KONFLUX-9798 ("Failed to pull container from access.redhat.com because of DNS error", r"Error: creating build container: internal error: unable to copy from source docker://registry.access.redhat.com/.*: copying system image from manifest list: reading signatures: Get \"https://access.redhat.com/.*\": dial tcp: lookup access.redhat.com: Temporary failure in name resolution"), ("Failed to pull container from quay.io because of DNS error", r"Error: copying system image from manifest list: reading blob .*: Get \"https://cdn[0-9]+.quay.io/.*\": dial tcp: lookup cdn[0-9]+.quay.io: Temporary failure in name resolution"), From 51e5b1975ca0a7cd53a9e020bcf4d1b801e772b6 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 22 Aug 2025 11:11:44 +0200 Subject: [PATCH 210/321] feat: New error: Failed to prefetch dependencies due to download timeout collected-data/jhutar-tenant/1/pod-jhutar-app-gjqxc-comp-0-on-push-5zxr7-prefetch-dependencies-pod-step-prefetch-dependencies.log 2025-08-21 17:47:35,622 ERROR Unsuccessful download: https://registry.npmjs.org/bintrees/-/bintrees-1.0.2.tgz 2025-08-21 17:47:35,627 ERROR FetchError: exception_name: TimeoutError, details: Error: FetchError: exception_name: TimeoutError, details: The error might be intermittent, please try again. If the issue seems to be on the cachi2 side, please contact the maintainers. --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 231e1b2f45..3f844273fa 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -123,6 +123,7 @@ ("Failed because registry.access.redhat.com returned 503 when reading manifest", r"source-build:ERROR:command execution failure, status: 1, stderr: time=.* level=fatal msg=.Error parsing image name .* reading manifest .* in registry.access.redhat.com/.* received unexpected HTTP status: 503 Service Unavailable"), ("Failed downloading rpms for hermetic builds due to 504 errors", r"mock-hermetic-repo.*urllib3.exceptions.MaxRetryError: HTTPSConnectionPool.*: Max retries exceeded with url: .*.rpm .Caused by ResponseError..too many 504 error responses..."), ("Failed downloading rpms for hermetic builds", r"mock-hermetic-repo.*ERROR:__main__:RPM deps downloading failed"), + ("Failed to prefetch dependencies due to download timeout", r"ERROR Unsuccessful download: .* ERROR FetchError: exception_name: TimeoutError.*If the issue seems to be on the cachi2 side, please contact the maintainers."), ("Failed to provision MPC VM due to resource quota evaluation timed out", r"cat /ssh/error Error allocating host: Internal error occurred: resource quota evaluation timed out"), # KONFLUX-9798 ("Failed to pull container from access.redhat.com because of DNS error", r"Error: creating build container: internal error: unable to copy from source docker://registry.access.redhat.com/.*: copying system image from manifest list: reading signatures: Get \"https://access.redhat.com/.*\": dial tcp: lookup access.redhat.com: Temporary failure in name resolution"), ("Failed to pull container from quay.io because of DNS error", r"Error: copying system image from manifest list: reading blob .*: Get \"https://cdn[0-9]+.quay.io/.*\": dial tcp: lookup cdn[0-9]+.quay.io: Temporary failure in name resolution"), From f564bdcfde750aed6fcde546c0a99a08c8aed11d Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Mon, 25 Aug 2025 09:34:34 +0200 Subject: [PATCH 211/321] feat: New error: Repo forking failed as GitLab CEE says 405 Method Not Allowed I0825 05:15:39.442373 14190 logging.go:30] DEBUG Forking repository https://gitlab.cee.redhat.com/jhutar/nodejs-devfile-sample7 with suffix undef-konflux-perfscale to jhutar I0825 05:15:39.589317 14190 logging.go:64] FAIL(80): Repo forking failed: Error deleting project jhutar/nodejs-devfile-sample7-undef-konflux-perfscale: DELETE https://gitlab.cee.redhat.com/api/v4/projects/jhutar/nodejs-devfile-sample7-undef-konflux-perfscale: 405 {message: Non GET methods are not allowed for moved projects} --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 3f844273fa..98342be0bc 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -64,6 +64,7 @@ ("Release failure: PipelineRun not created", r"couldn't find PipelineRun in managed namespace '%s' for a release '%s' in '%s' namespace"), ("Release Pipeline failed", r"Release pipeline run failed:.*Message:Tasks Completed: [0-9]+ \(Failed: [1-9]+,"), ("Repo forking failed as GitLab CEE says 401 Unauthorized", r"Repo forking failed: Error deleting project .*: DELETE https://gitlab.cee.redhat.com/.*: 401 .*message: 401 Unauthorized.*"), + ("Repo forking failed as GitLab CEE says 405 Method Not Allowed", r"Repo forking failed: Error deleting project [^ ]+: DELETE https://gitlab.cee.redhat.com/[^ ]+: 405 .message: Non GET methods are not allowed for moved projects."), ("Repo forking failed as GitLab CEE says 500 Internal Server Error", r"Repo forking failed: Error deleting project .*: GET https://gitlab.cee.redhat.com/.*: 500 failed to parse unknown error format.*500: We're sorry, something went wrong on our end"), ("Repo forking failed as the target is still being deleted", r"Repo forking failed: Error forking project .* POST https://gitlab.cee.redhat.com.* 409 .*Project namespace name has already been taken, The project is still being deleted"), ("Repo forking failed as we got TLS handshake timeout talking to GitLab CEE", r"Repo forking failed: Error deleting project .*: Delete \"https://gitlab.cee.redhat.com/api/v4/projects/.*\": net/http: TLS handshake timeout"), From 243359c16307dd8f805db67aab50e9f5c6ad3312 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Mon, 25 Aug 2025 09:42:40 +0200 Subject: [PATCH 212/321] feat: New error: Timeout forking the repo before the actual test I0823 15:39:34.584694 59092 logging.go:30] DEBUG Forking repository https://gitlab.cee.redhat.com/jhutar/libecpg-gitlab-fork with suffix rhelp01-jhutar to jhutar I0823 15:44:35.742776 59092 logging.go:64] FAIL(80): Repo forking failed: context deadline exceeded --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 98342be0bc..6a80d5e7a9 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -74,6 +74,7 @@ ("Repo forking failed when deleting target repo on gitlab.com (not CEE!) due unathorized", r"Repo forking failed: Error deleting project .* DELETE https://gitlab.com/.* 401 .* Unauthorized"), ("Repo templating failed when updating file on github.com because 504", r"Repo-templating workflow component cleanup failed: Error templating PaC files: Failed to update file .tekton/[^ ]+.yaml in repo [^ ]+ revision main: error when updating a file on github: PUT https://api.github.com/repos/[^ ]+: 504 We couldn't respond to your request in time. Sorry about that. Please try resubmitting your request and contact us if the problem persists."), ("Test Pipeline failed", r"Test Pipeline Run failed run:.*Message:Tasks Completed: [0-9]+ \(Failed: [1-9]+,"), + ("Timeout forking the repo before the actual test", r"Repo forking failed: context deadline exceeded"), ("Timeout forking the repo before the actual test", r"Repo forking failed: Error forking project .*: context deadline exceeded"), ("Timeout forking the repo before the actual test", r"Repo forking failed: Error waiting for project [^ ]+ .ID: [0-9]+. fork to complete: context deadline exceeded"), ("Timeout getting build service account", r"Component build SA not present: Component build SA .* not present: context deadline exceeded"), From 34f025ca0bfda62cc266f53fa693451c6efcbac4 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Mon, 25 Aug 2025 10:04:28 +0200 Subject: [PATCH 213/321] feat(KONFLUX-9835): When repo got moved, that means it was deleted Some info from Inderpreet: In the past (I think before GitLab 16.0), deleting a project was a much more immediate and final action. The problem with this approach was the risk of accidental data loss. Restoring a deleted project often required a complex and time-consuming process. In later versions Gitlab added a kind of grace-period to keep the project with different name for some days so that user can recover it in case it was an accidental deletion. I think how it works now is as below: First Deletion: Navigate to your project's Settings > General > Advanced. Under the "Delete project" section, click the Delete project button. This will change the project's status to "pending deletion" and rename the repository to allow you to immediately create a new project with the same name. Second Deletion: After the first step, the "Delete project" button will become available again. Clicking it a second time and confirming will permanently and immediately delete the project. --- pkg/clients/gitlab/git.go | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/pkg/clients/gitlab/git.go b/pkg/clients/gitlab/git.go index 6f6086ebf6..904e412b4e 100644 --- a/pkg/clients/gitlab/git.go +++ b/pkg/clients/gitlab/git.go @@ -258,6 +258,22 @@ func (gc *GitlabClient) GetCommitStatusConclusion(statusName, projectID, commitS // DeleteRepositoryIfExists deletes a GitLab repository if it exists. // Returns an error if the deletion fails except for project not being found (404). func (gc *GitlabClient) DeleteRepositoryIfExists(projectID string) error { + getProj, getResp, getErr := gc.client.Projects.GetProject(projectID, nil) + if getErr != nil { + if getResp != nil && getResp.StatusCode == http.StatusNotFound { + return nil + } else { + return fmt.Errorf("Error getting project %s: %v", projectID, getErr) + } + } + if getProj.PathWithNamespace != projectID && strings.Contains(getProj.PathWithNamespace, projectID + "-deleted-") { + // We asked for repo like "jhutar/nodejs-devfile-sample7-ocpp01v1-konflux-perfscale" + // and got "jhutar/nodejs-devfile-sample7-ocpp01v1-konflux-perfscale-deleted-138805" + // and that means repo was moved by being deleted for a first + // time, entering a grace period. + return nil + } + resp, err := gc.client.Projects.DeleteProject(projectID) if err != nil { @@ -272,7 +288,7 @@ func (gc *GitlabClient) DeleteRepositoryIfExists(projectID string) error { } err = utils.WaitUntilWithInterval(func() (done bool, err error) { - _, getResp, getErr := gc.client.Projects.GetProject(projectID, nil) + getProj, getResp, getErr := gc.client.Projects.GetProject(projectID, nil) if getErr != nil { if getResp != nil && getResp.StatusCode == http.StatusNotFound { return true, nil @@ -280,6 +296,11 @@ func (gc *GitlabClient) DeleteRepositoryIfExists(projectID string) error { return false, getErr } } + if getProj.PathWithNamespace != projectID && strings.Contains(getProj.PathWithNamespace, projectID + "-deleted-") { + return true, nil + } + + fmt.Printf("Repo %s still exists: %v\n", projectID, getResp) return false, nil }, time.Second * 10, time.Minute * 5) From ba01f3c2721669c53cf1d9a18acab97ef96f5714 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Mon, 25 Aug 2025 15:45:01 +0200 Subject: [PATCH 214/321] feat: New error: Failed to push SBOM to quay.io collected-data/jhutar-tenant/1/pod-jhutar-app-ihswh-comp-0-on-push-2mr9s-build-container-pod-step-upload-sbom.log [2025-08-25T10:12:17,072351892+00:00] Upload SBOM INFO: Using mounted CA bundle: /mnt/trusted-ca/ca-bundle.crt '/mnt/trusted-ca/ca-bundle.crt' -> '/etc/pki/ca-trust/source/anchors/ca-bundle.crt' Using token for quay.io/redhat-user-workloads-stage/jhutar-tenant/jhutar-app-ihswh-comp-0 WARNING: SBOM attachments are deprecated and support will be removed in a Cosign release soon after 2024-02-22 (see https://github.com/sigstore/cosign/issues/2755). Instead, please use SBOM attestations. WARNING: Attaching SBOMs this way does not sign them. To sign them, use 'cosign attest --predicate sbom.json --key '. Uploading SBOM file for [quay.io/redhat-user-workloads-stage/jhutar-tenant/jhutar-app-ihswh-comp-0@sha256:8fd905b770234c6890300085b1bda48737839572e8ed97d91f1c062ab907f7da] to [quay.io/redhat-user-workloads-stage/jhutar-tenant/jhutar-app-ihswh-comp-0:sha256-8fd905b770234c6890300085b1bda48737839572e8ed97d91f1c062ab907f7da.sbom] with mediaType [text/spdx+json]. Error: Get "https://quay.io/v2/": dial tcp [2600:1f18:483:cf01:807a:273b:44df:d4f7]:443: connect: network is unreachable main.go:74: error during command execution: Get "https://quay.io/v2/": dial tcp [2600:1f18:483:cf01:807a:273b:44df:d4f7]:443: connect: network is unreachable --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 6a80d5e7a9..de90405fb0 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -138,6 +138,7 @@ ("Failed to pull container from registry.access.redhat.com because of unauthorized", r"unable to retrieve auth token: invalid username/password: unauthorized: Please login to the Red Hat Registry using your Customer Portal credentials. .* subprocess.CalledProcessError: Command ...podman....pull.*registry.access.redhat.com/.* returned non-zero exit status 125"), ("Failed to pull container from registry.access.redhat.com because of unauthorized", r"unable to retrieve auth token: invalid username/password: unauthorized: Please login to the Red Hat Registry using your Customer Portal credentials. .* subprocess.CalledProcessError: Command ...skopeo....inspect.*docker://registry.access.redhat.com/.* returned non-zero exit status 1"), ("Failed to pull container from registry.fedoraproject.org", r"Error: internal error: unable to copy from source docker://registry.fedoraproject.org/[^ ]+: initializing source docker://registry.fedoraproject.org/[^ ]+: pinging container registry registry.fedoraproject.org: Get \"https://registry.fedoraproject.org/v2/\": dial tcp [^ ]+: connect: connection refused"), + ("Failed to push SBOM to quay.io", r"Uploading SBOM file for [^ ]+ to [^ ]+ with mediaType [^ ]+. Error: Get .https://quay.io/v2/.: dial tcp .[0-9a-f:]+.:443: connect: network is unreachable [^ ]+: error during command execution: Get .https://quay.io/v2/.: dial tcp .[0-9a-f:]+.:443: connect: network is unreachable"), ("Failed to push to quai.io due to 404", r"Error response from registry: recognizable error message not found: PUT \"https://quay.io/[^ ]+\": response status code 404"), ("Failed to ssh to remote MPC VM", r"[^ ]+@[0-9.]+: Permission denied .publickey,gssapi-keyex,gssapi-with-mic..\s*$"), # KONFLUX-9742 ("Gateway Time-out when pulling container image from quay.io", r"Error: initializing source docker://quay.io/[^ ]+: reading manifest [^ ]+ in quay.io/[^ ]+: received unexpected HTTP status: 504 Gateway Time-out"), From d96daa2242995b0ee94b01ac054289c5285ce5b6 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Tue, 26 Aug 2025 09:18:38 +0200 Subject: [PATCH 215/321] feat(KONFLUX-9835): Delete GL CEE repos for second time --- go.mod | 2 +- go.sum | 2 ++ pkg/clients/gitlab/git.go | 30 +++++++++++++++++++++++++++--- 3 files changed, 30 insertions(+), 4 deletions(-) diff --git a/go.mod b/go.mod index d1ff501b5d..424dd5bac5 100644 --- a/go.mod +++ b/go.mod @@ -47,7 +47,7 @@ require ( github.com/stretchr/testify v1.10.0 github.com/tektoncd/cli v0.33.0 github.com/tektoncd/pipeline v0.68.0 - github.com/xanzy/go-gitlab v0.104.1 + github.com/xanzy/go-gitlab v0.110.0 golang.org/x/crypto v0.36.0 golang.org/x/oauth2 v0.25.0 golang.org/x/tools v0.28.0 diff --git a/go.sum b/go.sum index 234bd8f542..8cbb4dffa0 100644 --- a/go.sum +++ b/go.sum @@ -1939,6 +1939,8 @@ github.com/vmihailenco/tagparser/v2 v2.0.0 h1:y09buUbR+b5aycVFQs/g70pqKVZNBmxwAh github.com/vmihailenco/tagparser/v2 v2.0.0/go.mod h1:Wri+At7QHww0WTrCBeu4J6bNtoV6mEfg5OIWRZA9qds= github.com/xanzy/go-gitlab v0.104.1 h1:g/liXIPJH0jsTwVuzTAUMiKdTf6Qup3u2XZq5Rp90Wc= github.com/xanzy/go-gitlab v0.104.1/go.mod h1:ETg8tcj4OhrB84UEgeE8dSuV/0h4BBL1uOV/qK0vlyI= +github.com/xanzy/go-gitlab v0.110.0 h1:hsFIFp01v/0D0sdUXoZfRk6CROzZbHQplk6NzKSFKhc= +github.com/xanzy/go-gitlab v0.110.0/go.mod h1:wKNKh3GkYDMOsGmnfuX+ITCmDuSDWFO0G+C4AygL9RY= github.com/xanzy/ssh-agent v0.3.3 h1:+/15pJfg/RsTxqYcX6fHqOXZwwMP+2VyYWJeWM2qQFM= github.com/xanzy/ssh-agent v0.3.3/go.mod h1:6dzNDKs0J9rVPHPhaGCukekBHKqfl+L3KghI1Bc68Uw= github.com/xdg-go/pbkdf2 v1.0.0/go.mod h1:jrpuAogTd400dnrH08LKmI/xc1MbPOebTwRqcT5RDeI= diff --git a/pkg/clients/gitlab/git.go b/pkg/clients/gitlab/git.go index 904e412b4e..7e6a0e2e90 100644 --- a/pkg/clients/gitlab/git.go +++ b/pkg/clients/gitlab/git.go @@ -69,7 +69,7 @@ func (gc *GitlabClient) CreateGitlabNewBranch(projectID, branchName, sha, baseBr // If sha is not provided, get the latest commit from the base branch if sha == "" { - commit, _, err := gc.client.Commits.GetCommit(projectID, baseBranch) + commit, _, err := gc.client.Commits.GetCommit(projectID, baseBranch, &gitlab.GetCommitOptions{}) if err != nil { return fmt.Errorf("failed to get latest commit from base branch: %v", err) } @@ -271,10 +271,14 @@ func (gc *GitlabClient) DeleteRepositoryIfExists(projectID string) error { // and got "jhutar/nodejs-devfile-sample7-ocpp01v1-konflux-perfscale-deleted-138805" // and that means repo was moved by being deleted for a first // time, entering a grace period. - return nil + + // Now we need to delete the repository for a second time to limit + // number of repos we keep behind as per request in INC3755661 + err := gc.DeleteRepositoryReally(getProj.PathWithNamespace) + return err } - resp, err := gc.client.Projects.DeleteProject(projectID) + resp, err := gc.client.Projects.DeleteProject(projectID, &gitlab.DeleteProjectOptions{}) if err != nil { if resp != nil && resp.StatusCode == http.StatusNotFound { @@ -289,6 +293,7 @@ func (gc *GitlabClient) DeleteRepositoryIfExists(projectID string) error { err = utils.WaitUntilWithInterval(func() (done bool, err error) { getProj, getResp, getErr := gc.client.Projects.GetProject(projectID, nil) + if getErr != nil { if getResp != nil && getResp.StatusCode == http.StatusNotFound { return true, nil @@ -296,7 +301,12 @@ func (gc *GitlabClient) DeleteRepositoryIfExists(projectID string) error { return false, getErr } } + if getProj.PathWithNamespace != projectID && strings.Contains(getProj.PathWithNamespace, projectID + "-deleted-") { + errDel := gc.DeleteRepositoryReally(getProj.PathWithNamespace) + if errDel != nil { + return false, errDel + } return true, nil } @@ -307,6 +317,20 @@ func (gc *GitlabClient) DeleteRepositoryIfExists(projectID string) error { return err } +// GitLab have a concept of two deletes. First one just renames the repo, +// and only second one really deletes it. DeleteRepositoryReally is meant for +// the second deletition. +func (gc *GitlabClient) DeleteRepositoryReally(projectID string) error { + opts := &gitlab.DeleteProjectOptions{ + PermanentlyRemove: gitlab.Ptr(true), + } + _, err := gc.client.Projects.DeleteProject(projectID, opts) + if err != nil { + return fmt.Errorf("Error on permanently deleting project %s: %w", projectID, err) + } + return nil +} + // ForkRepository forks a source GitLab repository to a target repository. // Returns the newly forked repository and an error if the operation fails. func (gc *GitlabClient) ForkRepository(sourceOrgName, sourceName, targetOrgName, targetName string) (*gitlab.Project, error) { From 1f05d649f9960922477fc2dc1086005aa8b97ae6 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Tue, 26 Aug 2025 10:25:23 +0200 Subject: [PATCH 216/321] feat: New error: Script mock-hermetic-repo failed because pull from registry.access.redhat.com failed collected-data/jhutar-tenant/1/pod-jhutar-app-vdqek-comp-0-on-push-fqhcq-calculate-deps-s390x-pod-step-mock-build.log DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): download-01.beak-001.prod.iad2.dc.redhat.com:443 DEBUG:urllib3.connectionpool:https://download-01.beak-001.prod.iad2.dc.redhat.com:443 "GET /brewroot/repos/rhel-10.1-build/8969036/s390x/toplink/vol/rhel-10/packages/zstd/1.5.5/9.el10/s390x/zstd-1.5.5-9.el10.s390x.rpm HTTP/1.1" 200 522354 DEBUG:urllib3.connectionpool:https://download-01.beak-001.prod.iad2.dc.redhat.com:443 "GET /brewroot/repos/rhel-10.1-build/8969036/s390x/toplink/vol/rhel-10/packages/zlib-ng/2.2.3/2.el10/s390x/zlib-ng-compat-2.2.3-2.el10.s390x.rpm HTTP/1.1" 200 67398 Directory walk started Directory walk done - 184 packages Temporary output repo path: /results/buildroot_repo/.repodata/ Pool started (with 5 workers) Pool finished INFO:__main__:Pulling like: podman pull --arch s390x registry.access.redhat.com/ubi9/ubi@sha256:8d3429261f9ed4a3c5fa57859425700290bc5469b512fd939fbac1fead8aa739 Trying to pull registry.access.redhat.com/ubi9/ubi@sha256:8d3429261f9ed4a3c5fa57859425700290bc5469b512fd939fbac1fead8aa739... Error: internal error: unable to copy from source docker://registry.access.redhat.com/ubi9/ubi@sha256:8d3429261f9ed4a3c5fa57859425700290bc5469b512fd939fbac1fead8aa739: determining manifest MIME type for docker://registry.access.redhat.com/ubi9/ubi@sha256:8d3429261f9ed4a3c5fa57859425700290bc5469b512fd939fbac1fead8aa739: Manifest does not match provided manifest digest sha256:8d3429261f9ed4a3c5fa57859425700290bc5469b512fd939fbac1fead8aa739 Traceback (most recent call last): File "/usr/bin/mock-hermetic-repo", line 151, in _main() ~~~~~^^ File "/usr/bin/mock-hermetic-repo", line 146, in _main prepare_image(data["config"]["bootstrap_image"], data["bootstrap"], ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ options.output_repo) ^^^^^^^^^^^^^^^^^^^^ File "/usr/bin/mock-hermetic-repo", line 115, in prepare_image subprocess.check_output(pull_cmd) ~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^ File "/usr/lib64/python3.13/subprocess.py", line 472, in check_output return run(*popenargs, stdout=PIPE, timeout=timeout, check=True, ~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ **kwargs).stdout ^^^^^^^^^ File "/usr/lib64/python3.13/subprocess.py", line 577, in run raise CalledProcessError(retcode, process.args, output=stdout, stderr=stderr) subprocess.CalledProcessError: Command '['podman', 'pull', '--arch', 's390x', 'registry.access.redhat.com/ubi9/ubi@sha256:8d3429261f9ed4a3c5fa57859425700290bc5469b512fd939fbac1fead8aa739']' returned non-zero exit status 125. --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index de90405fb0..94e8ce4383 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -149,6 +149,7 @@ ("Release failed because unauthorized when pushing artifact", r"Prepared artifact from /var/workdir/release .* Token not found for quay.io/konflux-ci/release-service-trusted-artifacts Uploading [0-9a-z]+ sourceDataArtifact Error response from registry: unauthorized: access to the requested resource is not authorized: map.. Command exited with non-zero status 1"), ("RPM build failed: bool cannot be defined via typedef", r"error: .bool. cannot be defined via .typedef..*error: Bad exit status from /var/tmp/rpm-tmp..* ..build."), ("Script gather-rpms.py failed because of too many values to unpack", r"Handling archdir [^ ]+ Traceback.*File \"/usr/bin/gather-rpms.py\".*nvr, btime, size, sigmd5, _ = .*ValueError: too many values to unpack"), + ("Script mock-hermetic-repo failed because pull from registry.access.redhat.com failed", r"Error: internal error: unable to copy from source docker://registry.access.redhat.com/[^ ]+: determining manifest MIME type for docker://registry.access.redhat.com/[^ ]+: Manifest does not match provided manifest digest [^ ]+.*/usr/bin/mock-hermetic-repo.*subprocess.CalledProcessError.*Command ...podman....pull.* returned non-zero exit status 125"), ("Script mock-hermetic-repo failed because pull from registry.access.redhat.com failed", r"/usr/bin/mock-hermetic-repo.*Error: internal error: unable to copy from source docker://registry.access.redhat.com/[^ ]+: initializing source docker://registry.access.redhat.com/[^ ]+: unable to retrieve auth token: invalid username/password: unauthorized.*subprocess.CalledProcessError.*Command '.'podman', 'pull', '--arch', '[^ ]+', 'registry.access.redhat.com/[^ ]+'.' returned non-zero exit status 125"), ("Script rpm_verifier failed to access image layer from quay.io because 502 Bad Gateway", r"rpm_verifier --image-url quay.io/.* Image: quay.io/.* error: unable to access the source layer sha256:[0-9a-z]+: received unexpected HTTP status: 502 Bad Gateway"), } From 778382f8e4340be7b9bc70c3b97bc28c7fe38254 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 27 Aug 2025 15:54:20 +0200 Subject: [PATCH 217/321] fix: Fix `full_path` is incorrect. You must enter the complete path for the project. --- pkg/clients/gitlab/git.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/clients/gitlab/git.go b/pkg/clients/gitlab/git.go index 7e6a0e2e90..41963ab571 100644 --- a/pkg/clients/gitlab/git.go +++ b/pkg/clients/gitlab/git.go @@ -322,6 +322,7 @@ func (gc *GitlabClient) DeleteRepositoryIfExists(projectID string) error { // the second deletition. func (gc *GitlabClient) DeleteRepositoryReally(projectID string) error { opts := &gitlab.DeleteProjectOptions{ + FullPath: gitlab.Ptr(projectID), PermanentlyRemove: gitlab.Ptr(true), } _, err := gc.client.Projects.DeleteProject(projectID, opts) From f01feeb51f117bd91cb3b6fe2024d4d16de91846 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Thu, 28 Aug 2025 11:02:20 +0200 Subject: [PATCH 218/321] feat: New error: Couldnt get task via bundles resolver from quay.io due to digest mismatch I0825 19:51:14.258466 110662 logging.go:30] DEBUG Waiting for build pipeline run for component jhutar-app-nvtly-comp-0 in namespace jhutar-tenant to finish I0825 19:51:14.277522 110662 logging.go:64] FAIL(71): Build Pipeline Run failed run: PipelineRun for component jhutar-app-nvtly-comp-0 in namespace jhutar-tenant failed: {Type:Succeeded Status:False Severity: LastTransitionTime:{Inner:2025-08-25 19:51:04 +0000 UTC} Reason:CouldntGetTask Message:Pipeline jhutar-tenant/jhutar-app-nvtly-comp-0-on-push-lf9f6 can't be Run; it contains Tasks that don't exist: Couldn't retrieve Task "resolver type bundles\nname = show-sbom\n": error requesting remote resource: error getting "bundleresolver" "jhutar-tenant/bundles-f10405a5456c436745c813da78be917f": cannot retrieve the oci image: manifest digest: "sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" does not match requested digest: "sha256:002f7c8c1d2f9e09904035da414aba1188ae091df0ea9532cd997be05e73d594" for "quay.io/konflux-ci/tekton-catalog/task-show-sbom:0.1@sha256:002f7c8c1d2f9e09904035da414aba1188ae091df0ea9532cd997be05e73d594"} --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 94e8ce4383..3550c0815a 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -28,6 +28,7 @@ ("Couldnt get pipeline via http resolver from gitlab.cee", r"Message:.*resolver failed to get Pipeline.*error requesting remote resource.*Http.*https://gitlab.cee.redhat.com/.* is not found"), ("Couldnt get task via buldles resolver from quay.io due to 404", r"Message:.*Couldn't retrieve Task .*resolver type bundles.*https://quay.io/.* status code 404 Not Found"), ("Couldnt get task via buldles resolver from quay.io due to 429", r"Message:.*Couldn't retrieve Task .*resolver type bundles.*https://quay.io/.* status code 429 Too Many Requests"), + ("Couldnt get task via bundles resolver from quay.io due to digest mismatch", r"Build Pipeline Run failed run: PipelineRun for component [^ ]+ in namespace [^ ]+ failed: .* Reason:CouldntGetTask Message:Pipeline [^ ]+ can't be Run; it contains Tasks that don't exist: Couldn't retrieve Task .resolver type bundles.*name = .*: error requesting remote resource: error getting \"bundleresolver\" .*: cannot retrieve the oci image: manifest digest: [^ ]+ does not match requested digest: [^ ]+ for .quay.io/"), ("Couldnt get task via bundles resolver from quay.io due to unexpected end of JSON input", r"Build Pipeline Run failed run: PipelineRun for component .* in namespace .* failed: .* Reason:CouldntGetTask Message:Pipeline .* can't be Run; it contains Tasks that don't exist: Couldn't retrieve Task .resolver type bundles.*name = .*: error requesting remote resource: error getting \"bundleresolver\" .*: cannot retrieve the oci image: unexpected end of JSON input"), ("Couldnt get task via git resolver from gitlab.cee due to 429", r"Message:.*Couldn't retrieve Task .*resolver type git.*https://gitlab.cee.redhat.com/.* status code: 429"), ("Couldnt get task via git resolver from gitlab.cee due to 429", r"Reason:CouldntGetTask Message:.*Couldn't retrieve Task .resolver type git.*https://gitlab.cee.redhat.com/.* error requesting remote resource: error getting .Git. .*: error resolving repository: git clone error: Cloning into .* error: RPC failed; HTTP 429 curl 22 The requested URL returned error: 429 fatal: expected 'packfile': exit status 128"), From bf8c35a7dc49fd4c017df825871340e7451d0f6b Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Thu, 28 Aug 2025 11:09:41 +0200 Subject: [PATCH 219/321] feat: New error: Script rpm_verifier failed to pull image from quay.io because 502 Bad Gateway collected-data/jhutar-tenant/1/pod-jhutar-app-051e62ca4b7511931a037be783a481f16daf188d94fe1df9-pod-step-rpms-signature-scan.log + set -o pipefail + rpm_verifier --image-url quay.io/redhat-user-workloads-stage/jhutar-tenant/jhutar-app-wzgho-comp-0:on-pr-921bd21e6d366a9f52348e9324d3c208aa548571 --image-digest sha256:9496b27f46723dce2856cceb3056c62131ebfa8e8cbc8600044fd89456fc58f1 --workdir /tmp Image: quay.io/redhat-user-workloads-stage/jhutar-tenant/jhutar-app-wzgho-comp-0@sha256:9496b27f46723dce2856cceb3056c62131ebfa8e8cbc8600044fd89456fc58f1 Error occurred: Warning: the default reading order of registry auth file will be changed from "${HOME}/.docker/config.json" to podman registry config locations in the future version of oc. "${HOME}/.docker/config.json" is deprecated, but can still be used for storing credentials as a fallback. See https://github.com/containers/image/blob/main/docs/containers-auth.json.5.md for the order of podman registry config locations. error: unable to read image quay.io/redhat-user-workloads-stage/jhutar-tenant/jhutar-app-wzgho-comp-0@sha256:9496b27f46723dce2856cceb3056c62131ebfa8e8cbc8600044fd89456fc58f1: Get "https://quay.io/v2/redhat-user-workloads-stage/jhutar-tenant/jhutar-app-wzgho-comp-0/manifests/sha256:9496b27f46723dce2856cceb3056c62131ebfa8e8cbc8600044fd89456fc58f1": received unexpected HTTP status: 502 Bad Gateway {'error': 'Warning: the default reading order of registry auth file will be changed from "${HOME}/.docker/config.json" to podman registry config locations in the future version of oc. "${HOME}/.docker/config.json" is deprecated, but can still be used for storing credentials as a fallback. See https://github.com/containers/image/blob/main/docs/containers-auth.json.5.md for the order of podman registry config locations.\nerror: unable to read image quay.io/redhat-user-workloads-stage/jhutar-tenant/jhutar-app-wzgho-comp-0@sha256:9496b27f46723dce2856cceb3056c62131ebfa8e8cbc8600044fd89456fc58f1: Get "https://quay.io/v2/redhat-user-workloads-stage/jhutar-tenant/jhutar-app-wzgho-comp-0/manifests/sha256:9496b27f46723dce2856cceb3056c62131ebfa8e8cbc8600044fd89456fc58f1": received unexpected HTTP status: 502 Bad Gateway\n'} ==================================== Final results: {"error": "Warning: the default reading order of registry auth file will be changed from \"${HOME}/.docker/config.json\" to podman registry config locations in the future version of oc. \"${HOME}/.docker/config.json\" is deprecated, but can still be used for storing credentials as a fallback. See https://github.com/containers/image/blob/main/docs/containers-auth.json.5.md for the order of podman registry config locations.\nerror: unable to read image quay.io/redhat-user-workloads-stage/jhutar-tenant/jhutar-app-wzgho-comp-0@sha256:9496b27f46723dce2856cceb3056c62131ebfa8e8cbc8600044fd89456fc58f1: Get \"https://quay.io/v2/redhat-user-workloads-stage/jhutar-tenant/jhutar-app-wzgho-comp-0/manifests/sha256:9496b27f46723dce2856cceb3056c62131ebfa8e8cbc8600044fd89456fc58f1\": received unexpected HTTP status: 502 Bad Gateway\n"} Images processed: {"image": {"pullspec": "quay.io/redhat-user-workloads-stage/jhutar-tenant/jhutar-app-wzgho-comp-0:on-pr-921bd21e6d366a9f52348e9324d3c208aa548571", "digests": ["sha256:9496b27f46723dce2856cceb3056c62131ebfa8e8cbc8600044fd89456fc58f1"]}} --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 3550c0815a..cd1099a1d6 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -153,6 +153,7 @@ ("Script mock-hermetic-repo failed because pull from registry.access.redhat.com failed", r"Error: internal error: unable to copy from source docker://registry.access.redhat.com/[^ ]+: determining manifest MIME type for docker://registry.access.redhat.com/[^ ]+: Manifest does not match provided manifest digest [^ ]+.*/usr/bin/mock-hermetic-repo.*subprocess.CalledProcessError.*Command ...podman....pull.* returned non-zero exit status 125"), ("Script mock-hermetic-repo failed because pull from registry.access.redhat.com failed", r"/usr/bin/mock-hermetic-repo.*Error: internal error: unable to copy from source docker://registry.access.redhat.com/[^ ]+: initializing source docker://registry.access.redhat.com/[^ ]+: unable to retrieve auth token: invalid username/password: unauthorized.*subprocess.CalledProcessError.*Command '.'podman', 'pull', '--arch', '[^ ]+', 'registry.access.redhat.com/[^ ]+'.' returned non-zero exit status 125"), ("Script rpm_verifier failed to access image layer from quay.io because 502 Bad Gateway", r"rpm_verifier --image-url quay.io/.* Image: quay.io/.* error: unable to access the source layer sha256:[0-9a-z]+: received unexpected HTTP status: 502 Bad Gateway"), + ("Script rpm_verifier failed to pull image from quay.io because 502 Bad Gateway", r"rpm_verifier.*error: unable to read image quay.io/[^ ]+: Get .https://quay.io/[^ ]+.: received unexpected HTTP status: 502 Bad Gateway"), } FAILED_TR_ERRORS = { From 0d4b962497f586d731ca20db6cb1b783b06a5ba6 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Thu, 28 Aug 2025 11:18:15 +0200 Subject: [PATCH 220/321] feat: New error: Script mock-hermetic-repo failed because pull from registry.access.redhat.com failed collected-data/jhutar-tenant/1/pod-jhutar-app-vdqek-comp-0-on-push-fqhcq-calculate-deps-s390x-pod-step-mock-build.log DEBUG:urllib3.connectionpool:https://download-01.beak-001.prod.iad2.dc.redhat.com:443 "GET /brewroot/repos/rhel-10.1-build/8969036/s390x/toplink/vol/rhel-10/packages/zstd/1.5.5/9.el10/s390x/zstd-1.5.5-9.el10.s390x.rpm HTTP/1.1" 200 522354 DEBUG:urllib3.connectionpool:https://download-01.beak-001.prod.iad2.dc.redhat.com:443 "GET /brewroot/repos/rhel-10.1-build/8969036/s390x/toplink/vol/rhel-10/packages/zlib-ng/2.2.3/2.el10/s390x/zlib-ng-compat-2.2.3-2.el10.s390x.rpm HTTP/1.1" 200 67398 Directory walk started Directory walk done - 184 packages Temporary output repo path: /results/buildroot_repo/.repodata/ Pool started (with 5 workers) Pool finished INFO:__main__:Pulling like: podman pull --arch s390x registry.access.redhat.com/ubi9/ubi@sha256:8d3429261f9ed4a3c5fa57859425700290bc5469b512fd939fbac1fead8aa739 Trying to pull registry.access.redhat.com/ubi9/ubi@sha256:8d3429261f9ed4a3c5fa57859425700290bc5469b512fd939fbac1fead8aa739... Error: internal error: unable to copy from source docker://registry.access.redhat.com/ubi9/ubi@sha256:8d3429261f9ed4a3c5fa57859425700290bc5469b512fd939fbac1fead8aa739: determining manifest MIME type for docker://registry.access.redhat.com/ubi9/ubi@sha256:8d3429261f9ed4a3c5fa57859425700290bc5469b512fd939fbac1fead8aa739: Manifest does not match provided manifest digest sha256:8d3429261f9ed4a3c5fa57859425700290bc5469b512fd939fbac1fead8aa739 Traceback (most recent call last): File "/usr/bin/mock-hermetic-repo", line 151, in _main() ~~~~~^^ File "/usr/bin/mock-hermetic-repo", line 146, in _main prepare_image(data["config"]["bootstrap_image"], data["bootstrap"], ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ options.output_repo) ^^^^^^^^^^^^^^^^^^^^ File "/usr/bin/mock-hermetic-repo", line 115, in prepare_image subprocess.check_output(pull_cmd) ~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^ File "/usr/lib64/python3.13/subprocess.py", line 472, in check_output return run(*popenargs, stdout=PIPE, timeout=timeout, check=True, ~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ **kwargs).stdout ^^^^^^^^^ File "/usr/lib64/python3.13/subprocess.py", line 577, in run raise CalledProcessError(retcode, process.args, output=stdout, stderr=stderr) subprocess.CalledProcessError: Command '['podman', 'pull', '--arch', 's390x', 'registry.access.redhat.com/ubi9/ubi@sha256:8d3429261f9ed4a3c5fa57859425700290bc5469b512fd939fbac1fead8aa739']' returned non-zero exit status 125. --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index cd1099a1d6..8a2cf1fddc 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -151,6 +151,7 @@ ("RPM build failed: bool cannot be defined via typedef", r"error: .bool. cannot be defined via .typedef..*error: Bad exit status from /var/tmp/rpm-tmp..* ..build."), ("Script gather-rpms.py failed because of too many values to unpack", r"Handling archdir [^ ]+ Traceback.*File \"/usr/bin/gather-rpms.py\".*nvr, btime, size, sigmd5, _ = .*ValueError: too many values to unpack"), ("Script mock-hermetic-repo failed because pull from registry.access.redhat.com failed", r"Error: internal error: unable to copy from source docker://registry.access.redhat.com/[^ ]+: determining manifest MIME type for docker://registry.access.redhat.com/[^ ]+: Manifest does not match provided manifest digest [^ ]+.*/usr/bin/mock-hermetic-repo.*subprocess.CalledProcessError.*Command ...podman....pull.* returned non-zero exit status 125"), + ("Script mock-hermetic-repo failed because pull from registry.access.redhat.com failed", r"mock-hermetic-repo.*Error: internal error: unable to copy from source docker://registry.access.redhat.com/[^ ]+: determining manifest MIME type for docker://registry.access.redhat.com/[^ ]+: Manifest does not match provided manifest digest.*subprocess.CalledProcessError.*Command ...podman....pull.* returned non-zero exit status 125"), ("Script mock-hermetic-repo failed because pull from registry.access.redhat.com failed", r"/usr/bin/mock-hermetic-repo.*Error: internal error: unable to copy from source docker://registry.access.redhat.com/[^ ]+: initializing source docker://registry.access.redhat.com/[^ ]+: unable to retrieve auth token: invalid username/password: unauthorized.*subprocess.CalledProcessError.*Command '.'podman', 'pull', '--arch', '[^ ]+', 'registry.access.redhat.com/[^ ]+'.' returned non-zero exit status 125"), ("Script rpm_verifier failed to access image layer from quay.io because 502 Bad Gateway", r"rpm_verifier --image-url quay.io/.* Image: quay.io/.* error: unable to access the source layer sha256:[0-9a-z]+: received unexpected HTTP status: 502 Bad Gateway"), ("Script rpm_verifier failed to pull image from quay.io because 502 Bad Gateway", r"rpm_verifier.*error: unable to read image quay.io/[^ ]+: Get .https://quay.io/[^ ]+.: received unexpected HTTP status: 502 Bad Gateway"), From 1ed79c16f0c87ba9a24ea71bf789dd6e6cb0303e Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Thu, 28 Aug 2025 11:25:42 +0200 Subject: [PATCH 221/321] feat: New error: Failed to pull container from registry.access.redhat.com because of 500 Internal Server Error collected-data/jhutar-1-tenant/1/pod-jhutar-1-app-nkbet-comp-0-o037913360d27af63e179d0e8617335e3-pod-step-build.log [2025-08-27T07:18:50,970721543+00:00] Validate context path [2025-08-27T07:18:50,973555651+00:00] Update CA trust [2025-08-27T07:18:50,974602263+00:00] Using mounted CA bundle: /mnt/trusted-ca/ca-bundle.crt '/mnt/trusted-ca/ca-bundle.crt' -> '/etc/pki/ca-trust/source/anchors/ca-bundle.crt' [2025-08-27T07:18:51,911208507+00:00] Prepare Dockerfile Checking if /var/workdir/cachi2/output/bom.json exists. Could not find prefetched sbom. No content_sets found for ICM [2025-08-27T07:18:51,917635709+00:00] Prepare system (architecture: x86_64) [2025-08-27T07:18:52,094335710+00:00] Setup prefetched [2025-08-27T07:18:52,097494505+00:00] Register sub-man Adding the entitlement to the build [2025-08-27T07:18:52,101592319+00:00] Add secrets [2025-08-27T07:18:52,110350888+00:00] Run buildah build [2025-08-27T07:18:52,111396210+00:00] buildah build --volume /tmp/entitlement:/etc/pki/entitlement --security-opt=unmask=/proc/interrupts --label build-date=2025-08-27T07:18:52 --label architecture=x86_64 --label vcs-type=git --label vcs-ref=ca37b4f0163e755549c6a5ce0864d86f921abe39 --label quay.expires-after=5d --tls-verify=true --no-cache --ulimit nofile=4096:4096 --http-proxy=false -f /tmp/Dockerfile.rJk5HM -t quay.io/redhat-user-workloads/jhutar-1-tenant/jhutar-1-app-nkbet-comp-0:on-pr-ca37b4f0163e755549c6a5ce0864d86f921abe39 . [1/2] STEP 1/3: FROM registry.access.redhat.com/ubi8/nodejs-18:latest Trying to pull registry.access.redhat.com/ubi8/nodejs-18:latest... Getting image source signatures Error: creating build container: internal error: unable to copy from source docker://registry.access.redhat.com/ubi8/nodejs-18:latest: copying system image from manifest list: reading signatures: reading signature from https://access.redhat.com/webassets/docker/content/sigstore/ubi8/nodejs-18@sha256=3a895f2b85ffeda82b2d50ce1ae554bc5bc62448aba48b3fd56ce94b694b3b2a/signature-7: received unexpected HTTP status: 500 Internal Server Error --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 8a2cf1fddc..dd9a0b7d1b 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -131,6 +131,7 @@ ("Failed to pull container from access.redhat.com because of DNS error", r"Error: creating build container: internal error: unable to copy from source docker://registry.access.redhat.com/.*: copying system image from manifest list: reading signatures: Get \"https://access.redhat.com/.*\": dial tcp: lookup access.redhat.com: Temporary failure in name resolution"), ("Failed to pull container from quay.io because of DNS error", r"Error: copying system image from manifest list: reading blob .*: Get \"https://cdn[0-9]+.quay.io/.*\": dial tcp: lookup cdn[0-9]+.quay.io: Temporary failure in name resolution"), ("Failed to pull container from quay.io due to 404", r"Error response from registry: recognizable error message not found: PUT .https://quay.io/[^ ]+.: response status code 404: Not Found Command exited with non-zero status 1"), + ("Failed to pull container from registry.access.redhat.com because of 500 Internal Server Error", r"buildah build.*FROM registry.access.redhat.com/[^ ]+ Trying to pull registry.access.redhat.com/[^ ]+ Getting image source signatures Error: creating build container: internal error: unable to copy from source docker://registry.access.redhat.com/[^ ]+ copying system image from manifest list: reading signatures: reading signature from https://access.redhat.com/[^ ]+ received unexpected HTTP status: 500 Internal Server Error"), ("Failed to pull container from registry.access.redhat.com because of 500 Internal Server Error", r"Trying to pull registry.access.redhat.com/[^ ]+ Getting image source signatures Error: copying system image from manifest list: reading signatures: reading signature from https://access.redhat.com/[^ ]+: status 500 .Internal Server Error."), ("Failed to pull container from registry.access.redhat.com because of DNS error", r"Error: initializing source docker://registry.access.redhat.com/.* pinging container registry registry.access.redhat.com: Get \"https://registry.access.redhat.com/v2/\": dial tcp: lookup registry.access.redhat.com: Temporary failure in name resolution"), ("Failed to pull container from registry.access.redhat.com because of remote tls error", r"Error: creating build container: internal error: unable to copy from source docker://registry.access.redhat.com/[^ ]+ copying system image from manifest list: reading blob [^ ]+: Get .https://cdn[0-9]+.quay.io/[^ ]+ remote error: tls: internal error"), From 37b527deb2e0940ba7325b4a85592a69ed6e6d3e Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Thu, 28 Aug 2025 11:30:40 +0200 Subject: [PATCH 222/321] feat: New error: Failed to connect to MPC VM collected-data/jhutar-1-tenant/1/pod-jhutar-1-app-boasl-comp-0-on-push-zclsr-rpmbuild-x86-64-pod-step-mock-build.log + test linux/amd64 = localhost + mkdir -p /root/.ssh + '[' -e /ssh/error ']' + '[' -e /ssh/otp ']' ++ cat /ssh/otp-server + curl --cacert /ssh/otp-ca -XPOST -d @/ssh/otp https://multi-platform-otp-server.multi-platform-controller.svc.cluster.local/otp % Total % Received % Xferd Average Speed Time Time Time Current Dload Upload Total Spent Left Speed 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0 100 2654 100 2634 100 20 122k 952 --:--:-- --:--:-- --:--:-- 129k + echo '' + arch=x86_64 + case linux/amd64 in + chmod 0400 /root/.ssh/id_rsa ++ cat /ssh/host + export SSH_HOST=u-e12c1960119fd8b233a0ac3f70ea@10.207.4.165 + SSH_HOST=u-e12c1960119fd8b233a0ac3f70ea@10.207.4.165 ++ cat /ssh/user-dir + export HOMEDIR=/home/u-e12c1960119fd8b233a0ac3f70ea + HOMEDIR=/home/u-e12c1960119fd8b233a0ac3f70ea + export 'SSH_ARGS=-o StrictHostKeyChecking=no' + SSH_ARGS='-o StrictHostKeyChecking=no' + '[' u-e12c1960119fd8b233a0ac3f70ea@10.207.4.165 == localhost ']' + workdir=/var/workdir + remote_cmd echo 'Hello from the other side!' + ssh -o StrictHostKeyChecking=no u-e12c1960119fd8b233a0ac3f70ea@10.207.4.165 echo 'Hello from the other side!' ssh: connect to host 10.207.4.165 port 22: Connection timed out --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index dd9a0b7d1b..53aa680621 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -126,6 +126,7 @@ ("Failed because registry.access.redhat.com returned 503 when reading manifest", r"source-build:ERROR:command execution failure, status: 1, stderr: time=.* level=fatal msg=.Error parsing image name .* reading manifest .* in registry.access.redhat.com/.* received unexpected HTTP status: 503 Service Unavailable"), ("Failed downloading rpms for hermetic builds due to 504 errors", r"mock-hermetic-repo.*urllib3.exceptions.MaxRetryError: HTTPSConnectionPool.*: Max retries exceeded with url: .*.rpm .Caused by ResponseError..too many 504 error responses..."), ("Failed downloading rpms for hermetic builds", r"mock-hermetic-repo.*ERROR:__main__:RPM deps downloading failed"), + ("Failed to connect to MPC VM", r"ssh: connect to host [0-9]+.[0-9]+.[0-9]+.[0-9]+ port 22: Connection timed out"), ("Failed to prefetch dependencies due to download timeout", r"ERROR Unsuccessful download: .* ERROR FetchError: exception_name: TimeoutError.*If the issue seems to be on the cachi2 side, please contact the maintainers."), ("Failed to provision MPC VM due to resource quota evaluation timed out", r"cat /ssh/error Error allocating host: Internal error occurred: resource quota evaluation timed out"), # KONFLUX-9798 ("Failed to pull container from access.redhat.com because of DNS error", r"Error: creating build container: internal error: unable to copy from source docker://registry.access.redhat.com/.*: copying system image from manifest list: reading signatures: Get \"https://access.redhat.com/.*\": dial tcp: lookup access.redhat.com: Temporary failure in name resolution"), From 38567497963dd34ac90d5a7b604746e5df1bbc42 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Thu, 28 Aug 2025 11:36:29 +0200 Subject: [PATCH 223/321] feat: New error: Pod stuck in incorrect status collected-data/konflux-perfscale-tenant/1/collected-taskrun-konflux-perfsc5a0ab08625f2a31b8c093a955565a592-clone-repository.json "status": { "conditions": [ { "type": "Succeeded", "status": "Unknown", "lastTransitionTime": "2025-08-28T04:16:02Z", "reason": "Pending", "message": "pod status \"PodReadyToStartContainers\":\"False\"; message: \"\"" } ], "podName": "konflux-perfsc5a0ab08625f2a62748da405242895eed0686a97c4d968-pod", "startTime": "2025-08-28T04:15:55Z", --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 53aa680621..dab031c663 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -170,6 +170,7 @@ ("Build failed for unspecified reasons", r"build failed for unspecified reasons."), ("Failed to create task run pod because ISE on webhook proxy.operator.tekton.dev", r"failed to create task run pod .*: Internal error occurred: failed calling webhook \\\"proxy.operator.tekton.dev\\\": failed to call webhook: Post \\\"https://tekton-operator-proxy-webhook.openshift-pipelines.svc:443/defaulting.timeout=10s\\\": context deadline exceeded. Maybe missing or invalid Task .*"), ("Not enough nodes to schedule pod", r".message.: .pod status ..PodScheduled..:..False..; message: ..[0-9/]+ nodes are available: .*: [0-9]+ Preemption is not helpful for scheduling."), + ("Pod stuck in incorrect status", r".message.: .pod status ..PodReadyToStartContainers..:..False..; message: ....., .reason.: .Pending., .status.: .Unknown."), } def message_to_reason(reasons_and_errors: set, msg: str) -> str: From 08c4da089bf8b32eabb458cb4da361f2e19d9c19 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Thu, 28 Aug 2025 11:41:40 +0200 Subject: [PATCH 224/321] style: Apply some flake8 recomendations to errors.py --- tests/load-tests/errors.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index dab031c663..4a99552fe0 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -1,13 +1,13 @@ #!/usr/bin/env python # -*- coding: UTF-8 -*- +import collections import csv import json +import os import re import sys -import collections -import os -import time +import yaml # Column indexes in input data @@ -173,6 +173,7 @@ ("Pod stuck in incorrect status", r".message.: .pod status ..PodReadyToStartContainers..:..False..; message: ....., .reason.: .Pending., .status.: .Unknown."), } + def message_to_reason(reasons_and_errors: set, msg: str) -> str: """ Classifies an error message using regular expressions and returns the error name. @@ -269,6 +270,7 @@ def find_first_failed_build_plr(data_dir, plr_type): return data + def find_trs(plr): try: for tr in plr["status"]["childReferences"]: @@ -276,6 +278,7 @@ def find_trs(plr): except KeyError: return + def check_failed_taskrun(data_dir, ns, tr_name): datafile = os.path.join(data_dir, ns, "1", "collected-taskrun-" + tr_name + ".json") data = load(datafile) @@ -306,18 +309,20 @@ def find_failed_containers(data_dir, ns, tr_name): except KeyError: return + def load_container_log(data_dir, ns, pod_name, cont_name): datafile = os.path.join(data_dir, ns, "1", "pod-" + pod_name + "-" + cont_name + ".log") print(f"Checking errors in {datafile}") with open(datafile, "r") as fd: return fd.read() + def investigate_failed_plr(dump_dir, plr_type="build"): reasons = [] try: plr = find_first_failed_build_plr(dump_dir, plr_type) - if plr == None: + if plr is None: return ["SORRY PLR not found"] plr_ns = plr["metadata"]["namespace"] @@ -348,6 +353,7 @@ def investigate_failed_plr(dump_dir, plr_type="build"): reasons.sort() # sort reasons return reasons + def main(): input_file = sys.argv[1] timings_file = sys.argv[2] From b589f6c07e8e65074f292d2e90cc52afb48c0c8d Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Thu, 28 Aug 2025 11:53:47 +0200 Subject: [PATCH 225/321] feat: Hide 'Post-test data collection failed' from simple error as it is not related to actual error and just increases variability of errors --- tests/load-tests/errors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 4a99552fe0..9207d0b5e6 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -412,7 +412,7 @@ def main(): data = { "error_by_code": error_by_code, "error_by_reason": error_by_reason, - "error_reasons_simple": "; ".join([f"{v}x {k}" for k, v in error_by_reason.items()]), + "error_reasons_simple": "; ".join([f"{v}x {k}" for k, v in error_by_reason.items() if k != "Post-test data collection failed"]), "error_messages": error_messages, } From ebec323abdb4234e4415d738b5187218ba0eeba5 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Thu, 28 Aug 2025 15:11:20 +0200 Subject: [PATCH 226/321] feat: New error: Application creation failed because resourcequota object has been modified Unknown error: FAIL(30): Application failed creation: Unable to create the Application test-rhtap-1-app-ulkcu: Operation cannot be fulfilled on resourcequotas "konflux": the object has been modified; please apply your changes to the latest version and try again --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 9207d0b5e6..16308d1574 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -19,6 +19,7 @@ ERRORS = { ("Application creation failed because it already exists", r"Application failed creation: Unable to create the Application .*: applications.appstudio.redhat.com .* already exists"), ("Application creation failed because of TLS handshake timeout", r"Application failed creation: Unable to create the Application .*: failed to get API group resources: unable to retrieve the complete list of server APIs: appstudio.redhat.com/v1alpha1: Get .*: net/http: TLS handshake timeout"), + ("Application creation failed because resourcequota object has been modified", r"Application failed creation: Unable to create the Application [^ ]+: Operation cannot be fulfilled on resourcequotas [^ ]+: the object has been modified; please apply your changes to the latest version and try again"), ("Application creation timed out waiting for quota evaluation", r"Application failed creation: Unable to create the Application .*: Internal error occurred: resource quota evaluation timed out"), ("Build Pipeline Run was cancelled", r"Build Pipeline Run failed run: PipelineRun for component .* in namespace .* failed: .* Reason:Cancelled.*Message:PipelineRun .* was cancelled"), ("Component creation timed out waiting for image-controller annotations", r"Component failed creation: Unable to create the Component .* timed out when waiting for image-controller annotations to be updated on component"), # obsolete From c0f83dac42bdb49518f02419dc45491ad044d590 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Thu, 28 Aug 2025 15:39:19 +0200 Subject: [PATCH 227/321] feat: New error: Component creation failed because resourcequota object has been modified Unknown error: FAIL(60): Component failed creation: Unable to create the Component test-rhtap-1-app-dbkjb-comp-0: Operation cannot be fulfilled on resourcequotas "konflux": the object has been modified; please apply your changes to the latest version and try again --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 16308d1574..ddeee3f81b 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -22,6 +22,7 @@ ("Application creation failed because resourcequota object has been modified", r"Application failed creation: Unable to create the Application [^ ]+: Operation cannot be fulfilled on resourcequotas [^ ]+: the object has been modified; please apply your changes to the latest version and try again"), ("Application creation timed out waiting for quota evaluation", r"Application failed creation: Unable to create the Application .*: Internal error occurred: resource quota evaluation timed out"), ("Build Pipeline Run was cancelled", r"Build Pipeline Run failed run: PipelineRun for component .* in namespace .* failed: .* Reason:Cancelled.*Message:PipelineRun .* was cancelled"), + ("Component creation failed because resourcequota object has been modified", r"Component failed creation: Unable to create the Component [^ ]+: Operation cannot be fulfilled on resourcequotas [^ ]+: the object has been modified; please apply your changes to the latest version and try again"), ("Component creation timed out waiting for image-controller annotations", r"Component failed creation: Unable to create the Component .* timed out when waiting for image-controller annotations to be updated on component"), # obsolete ("Component creation timed out waiting for image repository to be ready", r"Component failed creation: Unable to create the Component .* timed out waiting for image repository to be ready for component .* in namespace .*: context deadline exceeded"), ("Couldnt get pipeline via bundles resolver from quay.io due to 429", r"Message:Error retrieving pipeline for pipelinerun .*bundleresolver.* cannot retrieve the oci image: GET https://quay.io/v2/.*unexpected status code 429 Too Many Requests"), From b106a278c507e3271e668cae95d28dbbe85d3b75 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Mon, 1 Sep 2025 13:37:09 +0200 Subject: [PATCH 228/321] feat: New error: buildah build failed to pull container from registry.access.redhat.com because digest mismatch collected-data/jhutar-tenant/1/pod-jhutar-app-bnmkt-comp-0-on-87c457b5bae3483dd0ed470b92fe0b85-pod-step-build.log [2025-08-29T01:40:15,351568368+00:00] Validate context path [2025-08-29T01:40:15,354388245+00:00] Update CA trust [2025-08-29T01:40:15,355427246+00:00] Using mounted CA bundle: /mnt/trusted-ca/ca-bundle.crt '/mnt/trusted-ca/ca-bundle.crt' -> '/etc/pki/ca-trust/source/anchors/ca-bundle.crt' [2025-08-29T01:40:16,291529407+00:00] Prepare Dockerfile Checking if /var/workdir/cachi2/output/bom.json exists. Could not find prefetched sbom. No content_sets found for ICM [2025-08-29T01:40:16,298840580+00:00] Prepare system (architecture: x86_64) [2025-08-29T01:40:16,439900957+00:00] Setup prefetched [2025-08-29T01:40:16,443040332+00:00] Register sub-man Adding the entitlement to the build [2025-08-29T01:40:16,447690818+00:00] Add secrets [2025-08-29T01:40:16,456548152+00:00] Run buildah build [2025-08-29T01:40:16,457589953+00:00] buildah build --volume /tmp/entitlement:/etc/pki/entitlement --security-opt=unmask=/proc/interrupts --label build-date=2025-08-29T01:40:16 --label architecture=x86_64 --label vcs-type=git --label vcs-ref=9f8e8be25c3d37633b627d158eaa1e9cf8010d62 --label quay.expires-after=5d --tls-verify=true --no-cache --ulimit nofile=4096:4096 --http-proxy=false -f /tmp/Dockerfile.ajLBtU -t quay.io/redhat-user-workloads-stage/jhutar-tenant/jhutar-app-bnmkt-comp-0:on-pr-9f8e8be25c3d37633b627d158eaa1e9cf8010d62 . [1/2] STEP 1/3: FROM registry.access.redhat.com/ubi8/nodejs-18:latest Trying to pull registry.access.redhat.com/ubi8/nodejs-18:latest... Error: creating build container: internal error: unable to copy from source docker://registry.access.redhat.com/ubi8/nodejs-18:latest: copying system image from manifest list: parsing image configuration: Download config.json digest sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 does not match expected sha256:ab55058334c5c32969b838cea2db23ee8c69c1b742006a75c031ed4f1b286bc5 --- tests/load-tests/errors.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index ddeee3f81b..0a8b594633 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -114,7 +114,9 @@ FAILED_PLR_ERRORS = { ("SKIP", r"Skipping step because a previous step failed"), # This is a special "wildcard" error, let's keep it on top and do not change "SKIP" reason as it is used in the code ("Bad Gateway when pulling container image from quay.io", r"Error: initializing source docker://quay.io/[^ ]+: reading manifest [^ ]+ in quay.io/[^ ]+: received unexpected HTTP status: 502 Bad Gateway "), - ("buildah build failed creating build container: registry.access.redhat.com returned 403", r"Error: creating build container: internal error: unable to copy from source docker://registry.access.redhat.com/.*: copying system image from manifest list: determining manifest MIME type for docker://registry.access.redhat.com/.*: reading manifest .* in registry.access.redhat.com/.*: StatusCode: 403"), + ("buildah build failed to pull container from registry.access.redhat.com because digest mismatch", r"buildah build.*FROM registry.access.redhat.com/[^ ]+ Trying to pull registry.access.redhat.com/[^ ]+ Error: creating build container: internal error: unable to copy from source docker://registry.access.redhat.com/[^ ]+: copying system image from manifest list: parsing image configuration: Download config.json digest [^ ]+ does not match expected [^ ]+"), + ("buildah build failed to pull container from registry.access.redhat.com because of 403", r"Error: creating build container: internal error: unable to copy from source docker://registry.access.redhat.com/.*: copying system image from manifest list: determining manifest MIME type for docker://registry.access.redhat.com/.*: reading manifest .* in registry.access.redhat.com/.*: StatusCode: 403"), + ("buildah build failed to pull container from registry.access.redhat.com because of 500 Internal Server Error", r"buildah build.*FROM registry.access.redhat.com/[^ ]+ Trying to pull registry.access.redhat.com/[^ ]+ Getting image source signatures Error: creating build container: internal error: unable to copy from source docker://registry.access.redhat.com/[^ ]+ copying system image from manifest list: reading signatures: reading signature from https://access.redhat.com/[^ ]+ received unexpected HTTP status: 500 Internal Server Error"), ("Can not find chroot_scan.tar.gz file", r"tar: .*/chroot_scan.tar.gz: Cannot open: No such file or directory"), ("Can not find Dockerfile", r"Cannot find Dockerfile Dockerfile"), ("DNF failed to download repodata from Download Devel because could not resolve host", r"Errors during downloading metadata for repository '[^ ]+': - Curl error .6.: Couldn't resolve host name for http://download.devel.redhat.com/brewroot/repos/[^ ]+ .Could not resolve host: download\.devel\.redhat\.com."), @@ -134,7 +136,6 @@ ("Failed to pull container from access.redhat.com because of DNS error", r"Error: creating build container: internal error: unable to copy from source docker://registry.access.redhat.com/.*: copying system image from manifest list: reading signatures: Get \"https://access.redhat.com/.*\": dial tcp: lookup access.redhat.com: Temporary failure in name resolution"), ("Failed to pull container from quay.io because of DNS error", r"Error: copying system image from manifest list: reading blob .*: Get \"https://cdn[0-9]+.quay.io/.*\": dial tcp: lookup cdn[0-9]+.quay.io: Temporary failure in name resolution"), ("Failed to pull container from quay.io due to 404", r"Error response from registry: recognizable error message not found: PUT .https://quay.io/[^ ]+.: response status code 404: Not Found Command exited with non-zero status 1"), - ("Failed to pull container from registry.access.redhat.com because of 500 Internal Server Error", r"buildah build.*FROM registry.access.redhat.com/[^ ]+ Trying to pull registry.access.redhat.com/[^ ]+ Getting image source signatures Error: creating build container: internal error: unable to copy from source docker://registry.access.redhat.com/[^ ]+ copying system image from manifest list: reading signatures: reading signature from https://access.redhat.com/[^ ]+ received unexpected HTTP status: 500 Internal Server Error"), ("Failed to pull container from registry.access.redhat.com because of 500 Internal Server Error", r"Trying to pull registry.access.redhat.com/[^ ]+ Getting image source signatures Error: copying system image from manifest list: reading signatures: reading signature from https://access.redhat.com/[^ ]+: status 500 .Internal Server Error."), ("Failed to pull container from registry.access.redhat.com because of DNS error", r"Error: initializing source docker://registry.access.redhat.com/.* pinging container registry registry.access.redhat.com: Get \"https://registry.access.redhat.com/v2/\": dial tcp: lookup registry.access.redhat.com: Temporary failure in name resolution"), ("Failed to pull container from registry.access.redhat.com because of remote tls error", r"Error: creating build container: internal error: unable to copy from source docker://registry.access.redhat.com/[^ ]+ copying system image from manifest list: reading blob [^ ]+: Get .https://cdn[0-9]+.quay.io/[^ ]+ remote error: tls: internal error"), From 877189b63c0052cd1194cc1f53dc41f0883df579 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Mon, 1 Sep 2025 13:44:30 +0200 Subject: [PATCH 229/321] feat: New error: Couldnt get task via bundles resolver because control characters in yaml I0829 17:04:49.034619 2914 logging.go:30] DEBUG Waiting for build pipeline run for component jhutar-app-kxfnm-comp-0 in namespace jhutar-tenant to finish I0829 17:04:49.060482 2914 logging.go:64] FAIL(71): Build Pipeline Run failed run: PipelineRun for component jhutar-app-kxfnm-comp-0 in namespace jhutar-tenant failed: {Type:Succeeded Status:False Severity: LastTransitionTime:{Inner:2025-08-29 17:04:46 +0000 UTC} Reason:CouldntGetTask Message:Pipeline jhutar-tenant/jhutar-app-kxfnm-comp-0-on-pull-request-k8jnd can't be Run; it contains Tasks that don't exist: Couldn't retrieve Task "resolver type bundles\nname = clair-scan\n": invalid runtime object: yaml: control characters are not allowed} --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 0a8b594633..bab04fa810 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -30,6 +30,7 @@ ("Couldnt get pipeline via http resolver from gitlab.cee", r"Message:.*resolver failed to get Pipeline.*error requesting remote resource.*Http.*https://gitlab.cee.redhat.com/.* is not found"), ("Couldnt get task via buldles resolver from quay.io due to 404", r"Message:.*Couldn't retrieve Task .*resolver type bundles.*https://quay.io/.* status code 404 Not Found"), ("Couldnt get task via buldles resolver from quay.io due to 429", r"Message:.*Couldn't retrieve Task .*resolver type bundles.*https://quay.io/.* status code 429 Too Many Requests"), + ("Couldnt get task via bundles resolver because control characters in yaml", r"Build Pipeline Run failed run: PipelineRun for component [^ ]+ in namespace [^ ]+ failed: .* Reason:CouldntGetTask Message:Pipeline [^ ]+ can't be Run; it contains Tasks that don't exist: Couldn't retrieve Task .resolver type bundles.*name = .* invalid runtime object: yaml: control characters are not allowed"), ("Couldnt get task via bundles resolver from quay.io due to digest mismatch", r"Build Pipeline Run failed run: PipelineRun for component [^ ]+ in namespace [^ ]+ failed: .* Reason:CouldntGetTask Message:Pipeline [^ ]+ can't be Run; it contains Tasks that don't exist: Couldn't retrieve Task .resolver type bundles.*name = .*: error requesting remote resource: error getting \"bundleresolver\" .*: cannot retrieve the oci image: manifest digest: [^ ]+ does not match requested digest: [^ ]+ for .quay.io/"), ("Couldnt get task via bundles resolver from quay.io due to unexpected end of JSON input", r"Build Pipeline Run failed run: PipelineRun for component .* in namespace .* failed: .* Reason:CouldntGetTask Message:Pipeline .* can't be Run; it contains Tasks that don't exist: Couldn't retrieve Task .resolver type bundles.*name = .*: error requesting remote resource: error getting \"bundleresolver\" .*: cannot retrieve the oci image: unexpected end of JSON input"), ("Couldnt get task via git resolver from gitlab.cee due to 429", r"Message:.*Couldn't retrieve Task .*resolver type git.*https://gitlab.cee.redhat.com/.* status code: 429"), From 757cf0f7612303a4e4ce1ee6b15b56ca9b618ffe Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Mon, 1 Sep 2025 14:20:07 +0200 Subject: [PATCH 230/321] feat: New error: No podman installed on a MPC VM https://issues.redhat.com/browse/KONFLUX-9944 pod-jhutar-app-ygzrq-comp-0-on-push-q5mnr-rpmbuild-ppc64le-pod-step-mock-build.log + test linux/ppc64le = localhost + mkdir -p /root/.ssh + '[' -e /ssh/error ']' + '[' -e /ssh/otp ']' ++ cat /ssh/otp-server + curl --cacert /ssh/otp-ca -XPOST -d @/ssh/otp https://multi-platform-otp-server.multi-platform-controller.svc.cluster.local/otp % Total % Received % Xferd Average Speed Time Time Time Current Dload Upload Total Spent Left Speed 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0 100 2641 100 2621 100 20 159k 1250 --:--:-- --:--:-- --:--:-- 161k + echo '' + arch=x86_64 + case linux/ppc64le in + arch=ppc64le + chmod 0400 /root/.ssh/id_rsa ++ cat /ssh/host + export SSH_HOST=u-82f02fdfeb1b1dff89d58b56885d@10.130.75.6 + SSH_HOST=u-82f02fdfeb1b1dff89d58b56885d@10.130.75.6 ++ cat /ssh/user-dir + export HOMEDIR=/home/u-82f02fdfeb1b1dff89d58b56885d + HOMEDIR=/home/u-82f02fdfeb1b1dff89d58b56885d + export 'SSH_ARGS=-o StrictHostKeyChecking=no' + SSH_ARGS='-o StrictHostKeyChecking=no' + '[' u-82f02fdfeb1b1dff89d58b56885d@10.130.75.6 == localhost ']' + workdir=/var/workdir + remote_cmd echo 'Hello from the other side!' + ssh -o StrictHostKeyChecking=no u-82f02fdfeb1b1dff89d58b56885d@10.130.75.6 echo 'Hello from the other side!' Warning: Permanently added '10.130.75.6' (ED25519) to the list of known hosts. Hello from the other side! + remote_cmd mkdir /home/u-82f02fdfeb1b1dff89d58b56885d/results /home/u-82f02fdfeb1b1dff89d58b56885d/source + ssh -o StrictHostKeyChecking=no u-82f02fdfeb1b1dff89d58b56885d@10.130.75.6 mkdir /home/u-82f02fdfeb1b1dff89d58b56885d/results /home/u-82f02fdfeb1b1dff89d58b56885d/source + remote_cmd podman unshare setfacl -m g:135:r-x -m default:g:135:r-x /home/u-82f02fdfeb1b1dff89d58b56885d/source + ssh -o StrictHostKeyChecking=no u-82f02fdfeb1b1dff89d58b56885d@10.130.75.6 podman unshare setfacl -m g:135:r-x -m default:g:135:r-x /home/u-82f02fdfeb1b1dff89d58b56885d/source bash: line 1: podman: command not found --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index bab04fa810..7a284f53e8 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -152,6 +152,7 @@ ("Gateway Time-out when pulling container image", r"Error: copying system image from manifest list: parsing image configuration: fetching blob: received unexpected HTTP status: 504 Gateway Time-out"), ("Getting repo tags from quay.io failed because of 502 Bad Gateway", r"Error determining repository tags: pinging container registry quay.io: received unexpected HTTP status: 502 Bad Gateway"), ("Introspection failed because of incomplete .docker/config.json", r".* level=fatal msg=\"Error parsing image name .*: getting username and password: reading JSON file .*/tekton/home/.docker/config.json.*: unmarshaling JSON at .*: unexpected end of JSON input\""), + ("No podman installed on a MPC VM", r"remote_cmd podman unshare setfacl .* \+ ssh -o StrictHostKeyChecking=no [^ ]+ podman unshare setfacl .* bash: line 1: podman: command not found"), # KONFLUX-9944 ("Release failed because unauthorized when pulling policy", r"Error: pulling policy: GET .https://quay.io/v2/konflux-ci/konflux-vanguard/data-acceptable-bundles/blobs/sha256:[0-9a-z]+.: response status code 401: Unauthorized"), ("Release failed because unauthorized when pushing artifact", r"Prepared artifact from /var/workdir/release .* Token not found for quay.io/konflux-ci/release-service-trusted-artifacts Uploading [0-9a-z]+ sourceDataArtifact Error response from registry: unauthorized: access to the requested resource is not authorized: map.. Command exited with non-zero status 1"), ("RPM build failed: bool cannot be defined via typedef", r"error: .bool. cannot be defined via .typedef..*error: Bad exit status from /var/tmp/rpm-tmp..* ..build."), From a5dcfbc25ad18a90b9d13b383cf1303af8fd38cf Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Mon, 1 Sep 2025 14:26:44 +0200 Subject: [PATCH 231/321] feat: New error: Pod creation failed because resource quota evaluation timed out collected-data/jhutar-tenant/1/collected-taskrun-jhutar-app-gluus-comp-0-on-push-zpblt-calculate-deps-ppc64le.json "status": { "conditions": [ { "type": "Succeeded", "status": "False", "lastTransitionTime": "2025-08-31T19:25:25Z", "reason": "PodCreationFailed", "message": "failed to create task run pod \"jhutar-app-gluus-comp-0-on-push-zpblt-calculate-deps-ppc64le\": Internal error occurred: resource quota evaluation timed out. Maybe missing or invalid Task jhutar-tenant/" } ], "podName": "", "startTime": "2025-08-31T19:25:07Z", "completionTime": "2025-08-31T19:25:25Z", --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 7a284f53e8..f746d9ec44 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -175,6 +175,7 @@ ("Build failed for unspecified reasons", r"build failed for unspecified reasons."), ("Failed to create task run pod because ISE on webhook proxy.operator.tekton.dev", r"failed to create task run pod .*: Internal error occurred: failed calling webhook \\\"proxy.operator.tekton.dev\\\": failed to call webhook: Post \\\"https://tekton-operator-proxy-webhook.openshift-pipelines.svc:443/defaulting.timeout=10s\\\": context deadline exceeded. Maybe missing or invalid Task .*"), ("Not enough nodes to schedule pod", r".message.: .pod status ..PodScheduled..:..False..; message: ..[0-9/]+ nodes are available: .*: [0-9]+ Preemption is not helpful for scheduling."), + ("Pod creation failed because resource quota evaluation timed out", r".message.: .failed to create task run pod [^ ]+: Internal error occurred: resource quota evaluation timed out. Maybe missing or invalid Task [^ ]+., .reason.: .PodCreationFailed."), ("Pod stuck in incorrect status", r".message.: .pod status ..PodReadyToStartContainers..:..False..; message: ....., .reason.: .Pending., .status.: .Unknown."), } From 5428eac223fab4f7d72ede00f39c7f579e0ea3b5 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Mon, 1 Sep 2025 14:43:34 +0200 Subject: [PATCH 232/321] feat: New error: Invalid reference when processing SBOM collected-data/jhutar-tenant/1/pod-jhutar-app-xewou-comp-0-on-push-6xx4d-upload-to-quay-pod-step-upload-sbom-to-quay.log WARNING: SBOM attachments are deprecated and support will be removed in a Cosign release soon after 2024-02-22 (see https://github.com/sigstore/cosign/issues/2755). Instead, please use SBOM attestations. WARNING: Attaching SBOMs this way does not sign them. To sign them, use 'cosign attest --predicate results/oras-staging/sbom-merged.json --key '. Error: could not parse reference: quay.io/redhat-user-workloads/jhutar-tenant/jhutar-app-xewou-comp-0:a349a7739b89f37118909ebe54e49dc85f72bb3f.nvr-libecpg-16.1-16.el10_1@ error during command execution: could not parse reference: quay.io/redhat-user-workloads/jhutar-tenant/jhutar-app-xewou-comp-0:a349a7739b89f37118909ebe54e49dc85f72bb3f.nvr-libecpg-16.1-16.el10_1@ --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index f746d9ec44..bea4e7938f 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -152,6 +152,7 @@ ("Gateway Time-out when pulling container image", r"Error: copying system image from manifest list: parsing image configuration: fetching blob: received unexpected HTTP status: 504 Gateway Time-out"), ("Getting repo tags from quay.io failed because of 502 Bad Gateway", r"Error determining repository tags: pinging container registry quay.io: received unexpected HTTP status: 502 Bad Gateway"), ("Introspection failed because of incomplete .docker/config.json", r".* level=fatal msg=\"Error parsing image name .*: getting username and password: reading JSON file .*/tekton/home/.docker/config.json.*: unmarshaling JSON at .*: unexpected end of JSON input\""), + ("Invalid reference when processing SBOM", r"SBOM .* error during command execution: could not parse reference: quay.io/redhat-user-workloads/[^ ]+"), ("No podman installed on a MPC VM", r"remote_cmd podman unshare setfacl .* \+ ssh -o StrictHostKeyChecking=no [^ ]+ podman unshare setfacl .* bash: line 1: podman: command not found"), # KONFLUX-9944 ("Release failed because unauthorized when pulling policy", r"Error: pulling policy: GET .https://quay.io/v2/konflux-ci/konflux-vanguard/data-acceptable-bundles/blobs/sha256:[0-9a-z]+.: response status code 401: Unauthorized"), ("Release failed because unauthorized when pushing artifact", r"Prepared artifact from /var/workdir/release .* Token not found for quay.io/konflux-ci/release-service-trusted-artifacts Uploading [0-9a-z]+ sourceDataArtifact Error response from registry: unauthorized: access to the requested resource is not authorized: map.. Command exited with non-zero status 1"), From f1d9ed57c72765d3f821cfbe2f97d9115cd139ca Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Mon, 1 Sep 2025 14:48:21 +0200 Subject: [PATCH 233/321] feat: New error: Failed to push SBOM to quay.io collected-data/jhutar-1-tenant/1/pod-jhutar-1-app-brsuo-comp-0-on-push-w4dh8-upload-to-quay-pod-step-upload-sbom-to-quay.log WARNING: SBOM attachments are deprecated and support will be removed in a Cosign release soon after 2024-02-22 (see https://github.com/sigstore/cosign/issues/2755). Instead, please use SBOM attestations. WARNING: Attaching SBOMs this way does not sign them. To sign them, use 'cosign attest --predicate results/oras-staging/sbom-merged.json --key '. Uploading SBOM file for [quay.io/redhat-user-workloads/jhutar-1-tenant/jhutar-1-app-brsuo-comp-0@sha256:e935c2dfb4e5383077f1019474d11d67f47f85d3c85c8cbadc62969160fb46e8] to [quay.io/redhat-user-workloads/jhutar-1-tenant/jhutar-1-app-brsuo-comp-0:sha256-e935c2dfb4e5383077f1019474d11d67f47f85d3c85c8cbadc62969160fb46e8.sbom] with mediaType [text/spdx+json]. Error: PUT https://quay.io/v2/redhat-user-workloads/jhutar-1-tenant/jhutar-1-app-brsuo-comp-0/blobs/uploads/ab0767c0-8d92-49d9-ba26-ca0f7f31c949?digest=sha256%3A4179b49b7a904e7ee96a10530e4aa1d6b265e5df9d37fa8ffd73c8cdeca0ee6f: unexpected status code 200 OK main.go:74: error during command execution: PUT https://quay.io/v2/redhat-user-workloads/jhutar-1-tenant/jhutar-1-app-brsuo-comp-0/blobs/uploads/ab0767c0-8d92-49d9-ba26-ca0f7f31c949?digest=sha256%3A4179b49b7a904e7ee96a10530e4aa1d6b265e5df9d37fa8ffd73c8cdeca0ee6f: unexpected status code 200 OK --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index bea4e7938f..9acd3a8d43 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -146,6 +146,7 @@ ("Failed to pull container from registry.access.redhat.com because of unauthorized", r"unable to retrieve auth token: invalid username/password: unauthorized: Please login to the Red Hat Registry using your Customer Portal credentials. .* subprocess.CalledProcessError: Command ...skopeo....inspect.*docker://registry.access.redhat.com/.* returned non-zero exit status 1"), ("Failed to pull container from registry.fedoraproject.org", r"Error: internal error: unable to copy from source docker://registry.fedoraproject.org/[^ ]+: initializing source docker://registry.fedoraproject.org/[^ ]+: pinging container registry registry.fedoraproject.org: Get \"https://registry.fedoraproject.org/v2/\": dial tcp [^ ]+: connect: connection refused"), ("Failed to push SBOM to quay.io", r"Uploading SBOM file for [^ ]+ to [^ ]+ with mediaType [^ ]+. Error: Get .https://quay.io/v2/.: dial tcp .[0-9a-f:]+.:443: connect: network is unreachable [^ ]+: error during command execution: Get .https://quay.io/v2/.: dial tcp .[0-9a-f:]+.:443: connect: network is unreachable"), + ("Failed to push SBOM to quay.io", r"Uploading SBOM file for [^ ]+ to [^ ]+ with mediaType [^ ]+. Error: PUT https://quay.io/v2/[^ ]+: unexpected status code 200 OK [^ ]+: error during command execution: PUT https://quay.io/v2/[^ ]+: unexpected status code 200 OK"), ("Failed to push to quai.io due to 404", r"Error response from registry: recognizable error message not found: PUT \"https://quay.io/[^ ]+\": response status code 404"), ("Failed to ssh to remote MPC VM", r"[^ ]+@[0-9.]+: Permission denied .publickey,gssapi-keyex,gssapi-with-mic..\s*$"), # KONFLUX-9742 ("Gateway Time-out when pulling container image from quay.io", r"Error: initializing source docker://quay.io/[^ ]+: reading manifest [^ ]+ in quay.io/[^ ]+: received unexpected HTTP status: 504 Gateway Time-out"), From 36d1ebce25f4c2765710603d192fa78ee7b67ab4 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Tue, 2 Sep 2025 08:01:28 +0200 Subject: [PATCH 234/321] feat: New error: Timeout creating application calling mapplication.kb.io webhook I0902 00:52:04.181482 101988 logging.go:64] FAIL(30): Application failed creation: Unable to create the Application test-rhtap-52-app-pqttr: Internal error occurred: failed calling webhook "mapplication.kb.io": failed to call webhook: Post "https://application-service-webhook-service.application-service.svc:443/mutate-appstudio-redhat-com-v1alpha1-application?timeout=10s": context deadline exceeded --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 9acd3a8d43..7e3d93f7b9 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -78,6 +78,7 @@ ("Repo forking failed when deleting target repo on gitlab.com (not CEE!) due unathorized", r"Repo forking failed: Error deleting project .* DELETE https://gitlab.com/.* 401 .* Unauthorized"), ("Repo templating failed when updating file on github.com because 504", r"Repo-templating workflow component cleanup failed: Error templating PaC files: Failed to update file .tekton/[^ ]+.yaml in repo [^ ]+ revision main: error when updating a file on github: PUT https://api.github.com/repos/[^ ]+: 504 We couldn't respond to your request in time. Sorry about that. Please try resubmitting your request and contact us if the problem persists."), ("Test Pipeline failed", r"Test Pipeline Run failed run:.*Message:Tasks Completed: [0-9]+ \(Failed: [1-9]+,"), + ("Timeout creating application calling mapplication.kb.io webhook", r"Application failed creation: Unable to create the Application [^ ]+: Internal error occurred: failed calling webhook .mapplication.kb.io.: failed to call webhook: Post .https://application-service-webhook-service.application-service.svc:443/mutate-appstudio-redhat-com-v1alpha1-application[^ ]+.: context deadline exceeded"), ("Timeout forking the repo before the actual test", r"Repo forking failed: context deadline exceeded"), ("Timeout forking the repo before the actual test", r"Repo forking failed: Error forking project .*: context deadline exceeded"), ("Timeout forking the repo before the actual test", r"Repo forking failed: Error waiting for project [^ ]+ .ID: [0-9]+. fork to complete: context deadline exceeded"), From 09bc429d1703219663171543b8d6a51d316f771f Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Mon, 8 Sep 2025 13:22:18 +0200 Subject: [PATCH 235/321] feat: New error: Couldnt get task via buldles resolver from quay.io due to manifest unknown https://redhat-internal.slack.com/archives/C04PZ7H0VA8/p1757329053716889 I0908 10:16:03.724810 11586 logging.go:30] DEBUG Waiting for build pipeline run for component konflux-perfscale-app-mgyoa-comp-0 in namespace konflux-perfscale-tenant to finish I0908 10:16:23.768166 11586 logging.go:64] FAIL(71): Build Pipeline Run failed run: PipelineRun for component konflux-perfscale-app-mgyoa-comp-0 in namespace konflux-perfscale-tenant failed: {Type:Succeeded Status:False Severity: LastTransitionTime:{Inner:2025-09-08 10:16:08 +0000 UTC} Reason:CouldntGetTask Message:Pipeline konflux-perfscale-tenant/konflux-perfscale-app-mgyoa-comp-0-on-pull-request-5sl5v can't be Run; it contains Tasks that don't exist: Couldn't retrieve Task "resolver type bundles\nname = rpms-signature-scan\n": error requesting remote resource: error getting "bundleresolver" "konflux-perfscale-tenant/bundles-5789edae118d251a2d0d38c0ca3c4634": cannot retrieve the oci image: GET https://quay.io/v2/konflux-ci/konflux-vanguard/task-rpms-signature-scan/manifests/sha256:49ff6d117c3e9dc3966d1244e118e168b3501742ec14c3a4161a276ff48d04d5: MANIFEST_UNKNOWN: manifest unknown; map[]} --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 7e3d93f7b9..7cb2340b69 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -30,6 +30,7 @@ ("Couldnt get pipeline via http resolver from gitlab.cee", r"Message:.*resolver failed to get Pipeline.*error requesting remote resource.*Http.*https://gitlab.cee.redhat.com/.* is not found"), ("Couldnt get task via buldles resolver from quay.io due to 404", r"Message:.*Couldn't retrieve Task .*resolver type bundles.*https://quay.io/.* status code 404 Not Found"), ("Couldnt get task via buldles resolver from quay.io due to 429", r"Message:.*Couldn't retrieve Task .*resolver type bundles.*https://quay.io/.* status code 429 Too Many Requests"), + ("Couldnt get task via buldles resolver from quay.io due to manifest unknown", r"Build Pipeline Run failed run: PipelineRun for component [^ ]+ in namespace [^ ]+ failed: .* Reason:CouldntGetTask Message:Pipeline [^ ]+ can't be Run; it contains Tasks that don't exist: Couldn't retrieve Task .resolver type bundles.*name = .* cannot retrieve the oci image: GET https://quay.io/[^ ]+: MANIFEST_UNKNOWN: manifest unknown"), ("Couldnt get task via bundles resolver because control characters in yaml", r"Build Pipeline Run failed run: PipelineRun for component [^ ]+ in namespace [^ ]+ failed: .* Reason:CouldntGetTask Message:Pipeline [^ ]+ can't be Run; it contains Tasks that don't exist: Couldn't retrieve Task .resolver type bundles.*name = .* invalid runtime object: yaml: control characters are not allowed"), ("Couldnt get task via bundles resolver from quay.io due to digest mismatch", r"Build Pipeline Run failed run: PipelineRun for component [^ ]+ in namespace [^ ]+ failed: .* Reason:CouldntGetTask Message:Pipeline [^ ]+ can't be Run; it contains Tasks that don't exist: Couldn't retrieve Task .resolver type bundles.*name = .*: error requesting remote resource: error getting \"bundleresolver\" .*: cannot retrieve the oci image: manifest digest: [^ ]+ does not match requested digest: [^ ]+ for .quay.io/"), ("Couldnt get task via bundles resolver from quay.io due to unexpected end of JSON input", r"Build Pipeline Run failed run: PipelineRun for component .* in namespace .* failed: .* Reason:CouldntGetTask Message:Pipeline .* can't be Run; it contains Tasks that don't exist: Couldn't retrieve Task .resolver type bundles.*name = .*: error requesting remote resource: error getting \"bundleresolver\" .*: cannot retrieve the oci image: unexpected end of JSON input"), From 419e35e9103a84bd35c3e81666edf0bfb301ffc3 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Mon, 8 Sep 2025 15:00:19 +0200 Subject: [PATCH 236/321] feat: New error: DNF failed to download repodata from Download Devel because timeout collected-data/jhutar-tenant/1/pod-jhutar-app-cmppv-comp-0-on-push-hbqml-calculate-deps-x86-64-pod-step-mock-build.log [...] 2025-09-05T14:31:49+0000 DEBUG error: Curl error (28): Timeout was reached for https://download.devel.redhat.com/brewroot/repos/rhel-10.1-build/8973590/x86_64/repodata/repomd.xml [Failed to connect to download.devel.redhat.com port 443 after 30001 ms: Timeout was reached] (https://download.devel.redhat.com/brewroot/repos/rhel-10.1-build/8973590/x86_64/repodata/repomd.xml). 2025-09-05T14:32:19+0000 DEBUG error: Curl error (28): Timeout was reached for https://download.devel.redhat.com/brewroot/repos/rhel-10.1-build/8973590/x86_64/repodata/repomd.xml [Failed to connect to download.devel.redhat.com port 443 after 30001 ms: Timeout was reached] (https://download.devel.redhat.com/brewroot/repos/rhel-10.1-build/8973590/x86_64/repodata/repomd.xml). 2025-09-05T14:32:19+0000 WARNING Errors during downloading metadata for repository 'build': - Curl error (28): Timeout was reached for https://download.devel.redhat.com/brewroot/repos/rhel-10.1-build/8973590/x86_64/repodata/repomd.xml [Failed to connect to download.devel.redhat.com port 443 after 30001 ms: Timeout was reached] 2025-09-05T14:32:19+0000 DDEBUG Cleaning up. 2025-09-05T14:32:19+0000 DDEBUG Plugins were unloaded. 2025-09-05T14:32:19+0000 SUBDEBUG Traceback (most recent call last): File "/usr/lib/python3.12/site-packages/dnf/repo.py", line 574, in load ret = self._repo.load() ^^^^^^^^^^^^^^^^^ File "/usr/lib64/python3.12/site-packages/libdnf/repo.py", line 467, in load return _repo.Repo_load(self) ^^^^^^^^^^^^^^^^^^^^^ libdnf._error.Error: Failed to download metadata for repo 'build': Cannot download repomd.xml: Cannot download repodata/repomd.xml: All mirrors were tried During handling of the above exception, another exception occurred: Traceback (most recent call last): File "/usr/lib/python3.12/site-packages/dnf/cli/main.py", line 67, in main return _main(base, args, cli_class, option_parser_class) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/lib/python3.12/site-packages/dnf/cli/main.py", line 106, in _main return cli_run(cli, base) ^^^^^^^^^^^^^^^^^^ File "/usr/lib/python3.12/site-packages/dnf/cli/main.py", line 122, in cli_run cli.run() File "/usr/lib/python3.12/site-packages/dnf/cli/cli.py", line 1090, in run self._process_demands() File "/usr/lib/python3.12/site-packages/dnf/cli/cli.py", line 779, in _process_demands self.base.fill_sack( File "/usr/lib/python3.12/site-packages/dnf/base.py", line 413, in fill_sack self._add_repo_to_sack(r) File "/usr/lib/python3.12/site-packages/dnf/base.py", line 141, in _add_repo_to_sack repo.load() File "/usr/lib/python3.12/site-packages/dnf/repo.py", line 581, in load raise dnf.exceptions.RepoError(str(e)) dnf.exceptions.RepoError: Failed to download metadata for repo 'build': Cannot download repomd.xml: Cannot download repodata/repomd.xml: All mirrors were tried 2025-09-05T14:32:19+0000 CRITICAL Error: Failed to download metadata for repo 'build': Cannot download repomd.xml: Cannot download repodata/repomd.xml: All mirrors were tried results/chroot_scan/var/lib/mock/rhel-10.1-build-repo_8973590/root/var/log/dnf.librepo.log 2025-09-05T14:25:58+0000 INFO Librepo version: 1.18.0 with CURL_GLOBAL_ACK_EINTR support (libcurl/8.9.1 OpenSSL/3.2.2 zlib/1.3.1.zlib-ng libidn2/2.3.7 nghttp2/1.64.0) 2025-09-05T14:25:58+0000 INFO Downloading: https://download.devel.redhat.com/brewroot/repos/rhel-10.1-build/8973590/x86_64/repodata/repomd.xml 2025-09-05T14:26:28+0000 INFO Serious error - Curl code (28): Timeout was reached for https://download.devel.redhat.com/brewroot/repos/rhel-10.1-build/8973590/x86_64/repodata/repomd.xml [Failed to connect to download.devel.redhat.com port 443 after 30000 ms: Timeout was reached] 2025-09-05T14:26:28+0000 INFO Error during transfer: Curl error (28): Timeout was reached for https://download.devel.redhat.com/brewroot/repos/rhel-10.1-build/8973590/x86_64/repodata/repomd.xml [Failed to connect to download.devel.redhat.com port 443 after 30000 ms: Timeout was reached] [...] --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 7cb2340b69..7ccb6e7fda 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -123,6 +123,7 @@ ("Can not find chroot_scan.tar.gz file", r"tar: .*/chroot_scan.tar.gz: Cannot open: No such file or directory"), ("Can not find Dockerfile", r"Cannot find Dockerfile Dockerfile"), ("DNF failed to download repodata from Download Devel because could not resolve host", r"Errors during downloading metadata for repository '[^ ]+': - Curl error .6.: Couldn't resolve host name for http://download.devel.redhat.com/brewroot/repos/[^ ]+ .Could not resolve host: download\.devel\.redhat\.com."), + ("DNF failed to download repodata from Download Devel because timeout", r"dnf.exceptions.RepoError: Failed to download metadata for repo 'build': Cannot download repomd.xml: Cannot download repodata/repomd.xml: All mirrors were tried .* CRITICAL Error: Failed to download metadata for repo 'build': Cannot download repomd.xml: Cannot download repodata/repomd.xml: All mirrors were tried [^ ]+/mock/.*Failed to connect to download.devel.redhat.com"), ("DNF failed to download repodata from Koji", r"ERROR Command returned error: Failed to download metadata (baseurl: \"https://kojipkgs.fedoraproject.org/repos/[^ ]*\") for repository \"build\": Usable URL not found"), ("Enterprise contract results failed validation", r"^false $"), ("Error allocating host as provision TR already exists", r"Error allocating host: taskruns.tekton.dev \".*provision\" already exists"), From bc7d1b876b2546b2415c557714a0d5497e5061a5 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Mon, 8 Sep 2025 15:10:46 +0200 Subject: [PATCH 237/321] feat: New error: DNF failed to download repodata from Download Devel because timeout collected-data/jhutar-tenant/1/pod-jhutar-app-xloen-comp-0-on-push-b2kk5-calculate-deps-x86-64-pod-step-mock-build.log [...] 2025-09-05T12:49:33+0000 DEBUG error: Curl error (28): Timeout was reached for https://download-01.beak-001.prod.iad2.dc.redhat.com/brewroot/repos/rhel-10.1-build/8973590/x86_64/repodata/repomd.xml [Failed to connect to download-01.beak-001.prod.iad2.dc.redhat.com port 443 after 30001 ms: Timeout was reached] (https://download-01.beak-001.prod.iad2.dc.redhat.com/brewroot/repos/rhel-10.1-build/8973590/x86_64/repodata/repomd.xml). 2025-09-05T12:50:03+0000 DEBUG error: Curl error (28): Timeout was reached for https://download-01.beak-001.prod.iad2.dc.redhat.com/brewroot/repos/rhel-10.1-build/8973590/x86_64/repodata/repomd.xml [Failed to connect to download-01.beak-001.prod.iad2.dc.redhat.com port 443 after 30002 ms: Timeout was reached] (https://download-01.beak-001.prod.iad2.dc.redhat.com/brewroot/repos/rhel-10.1-build/8973590/x86_64/repodata/repomd.xml). 2025-09-05T12:50:03+0000 WARNING Errors during downloading metadata for repository 'build': - Curl error (28): Timeout was reached for https://download-01.beak-001.prod.iad2.dc.redhat.com/brewroot/repos/rhel-10.1-build/8973590/x86_64/repodata/repomd.xml [Failed to connect to download-01.beak-001.prod.iad2.dc.redhat.com port 443 after 30001 ms: Timeout was reached] - Curl error (28): Timeout was reached for https://download-01.beak-001.prod.iad2.dc.redhat.com/brewroot/repos/rhel-10.1-build/8973590/x86_64/repodata/repomd.xml [Failed to connect to download-01.beak-001.prod.iad2.dc.redhat.com port 443 after 30002 ms: Timeout was reached] 2025-09-05T12:50:03+0000 DDEBUG Cleaning up. 2025-09-05T12:50:03+0000 DDEBUG Plugins were unloaded. 2025-09-05T12:50:03+0000 SUBDEBUG Traceback (most recent call last): File "/usr/lib/python3.12/site-packages/dnf/repo.py", line 574, in load ret = self._repo.load() ^^^^^^^^^^^^^^^^^ File "/usr/lib64/python3.12/site-packages/libdnf/repo.py", line 467, in load return _repo.Repo_load(self) ^^^^^^^^^^^^^^^^^^^^^ libdnf._error.Error: Failed to download metadata for repo 'build': Cannot download repomd.xml: Cannot download repodata/repomd.xml: All mirrors were tried During handling of the above exception, another exception occurred: Traceback (most recent call last): File "/usr/lib/python3.12/site-packages/dnf/cli/main.py", line 67, in main return _main(base, args, cli_class, option_parser_class) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/lib/python3.12/site-packages/dnf/cli/main.py", line 106, in _main return cli_run(cli, base) ^^^^^^^^^^^^^^^^^^ File "/usr/lib/python3.12/site-packages/dnf/cli/main.py", line 122, in cli_run cli.run() File "/usr/lib/python3.12/site-packages/dnf/cli/cli.py", line 1090, in run self._process_demands() File "/usr/lib/python3.12/site-packages/dnf/cli/cli.py", line 779, in _process_demands self.base.fill_sack( File "/usr/lib/python3.12/site-packages/dnf/base.py", line 413, in fill_sack self._add_repo_to_sack(r) File "/usr/lib/python3.12/site-packages/dnf/base.py", line 141, in _add_repo_to_sack repo.load() File "/usr/lib/python3.12/site-packages/dnf/repo.py", line 581, in load raise dnf.exceptions.RepoError(str(e)) dnf.exceptions.RepoError: Failed to download metadata for repo 'build': Cannot download repomd.xml: Cannot download repodata/repomd.xml: All mirrors were tried 2025-09-05T12:50:03+0000 CRITICAL Error: Failed to download metadata for repo 'build': Cannot download repomd.xml: Cannot download repodata/repomd.xml: All mirrors were tried results/chroot_scan/var/lib/mock/rhel-10.1-build-repo_8973590/root/var/log/dnf.librepo.log 2025-09-05T12:43:40+0000 INFO Librepo version: 1.18.0 with CURL_GLOBAL_ACK_EINTR support (libcurl/8.9.1 OpenSSL/3.2.2 zlib/1.3.1.zlib-ng libidn2/2.3.7 nghttp2/1.64.0) 2025-09-05T12:43:40+0000 INFO Downloading: https://download.devel.redhat.com/brewroot/repos/rhel-10.1-build/8973590/x86_64/repodata/repomd.xml 2025-09-05T12:44:10+0000 INFO Serious error - Curl code (28): Timeout was reached for https://download-01.beak-001.prod.iad2.dc.redhat.com/brewroot/repos/rhel-10.1-build/8973590/x86_64/repodata/repomd.xml [Failed to connect to download-01.beak-001.prod.iad2.dc.redhat.com port 443 after 30000 ms: Timeout was reached] 2025-09-05T12:44:10+0000 INFO Error during transfer: Curl error (28): Timeout was reached for https://download-01.beak-001.prod.iad2.dc.redhat.com/brewroot/repos/rhel-10.1-build/8973590/x86_64/repodata/repomd.xml [Failed to connect to download-01.beak-001.prod.iad2.dc.redhat.com port 443 after 30000 ms: Timeout was reached] [...] --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 7ccb6e7fda..0bbd217d8c 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -123,6 +123,7 @@ ("Can not find chroot_scan.tar.gz file", r"tar: .*/chroot_scan.tar.gz: Cannot open: No such file or directory"), ("Can not find Dockerfile", r"Cannot find Dockerfile Dockerfile"), ("DNF failed to download repodata from Download Devel because could not resolve host", r"Errors during downloading metadata for repository '[^ ]+': - Curl error .6.: Couldn't resolve host name for http://download.devel.redhat.com/brewroot/repos/[^ ]+ .Could not resolve host: download\.devel\.redhat\.com."), + ("DNF failed to download repodata from Download Devel because timeout", r"dnf.exceptions.RepoError: Failed to download metadata for repo 'build': Cannot download repomd.xml: Cannot download repodata/repomd.xml: All mirrors were tried .* CRITICAL Error: Failed to download metadata for repo 'build': Cannot download repomd.xml: Cannot download repodata/repomd.xml: All mirrors were tried [^ ]+/mock/.*Failed to connect to download-[0-9]+.beak-[0-9]+.prod.iad2.dc.redhat.com"), ("DNF failed to download repodata from Download Devel because timeout", r"dnf.exceptions.RepoError: Failed to download metadata for repo 'build': Cannot download repomd.xml: Cannot download repodata/repomd.xml: All mirrors were tried .* CRITICAL Error: Failed to download metadata for repo 'build': Cannot download repomd.xml: Cannot download repodata/repomd.xml: All mirrors were tried [^ ]+/mock/.*Failed to connect to download.devel.redhat.com"), ("DNF failed to download repodata from Koji", r"ERROR Command returned error: Failed to download metadata (baseurl: \"https://kojipkgs.fedoraproject.org/repos/[^ ]*\") for repository \"build\": Usable URL not found"), ("Enterprise contract results failed validation", r"^false $"), From e26b6fb3cdd1451633ee24434019db9ab80a53ef Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Mon, 8 Sep 2025 15:20:07 +0200 Subject: [PATCH 238/321] feat: New error: Pod creation failed with reason error collected-taskrun-konflux-perfsc9667a7adfa6167427d72d8b537e95c3d-clone-repository.json "status": { "conditions": [ { "type": "Succeeded", "status": "False", "lastTransitionTime": "2025-09-05T04:16:02Z", "reason": "Failed", "message": "\"step-clone\" exited with code 2: Error" } ], "podName": "konflux-perfsc9667a7adfa616bed4d883be01ea5fa7a1b70e2167ac6b-pod", "startTime": "2025-09-05T04:15:52Z", "completionTime": "2025-09-05T04:16:02Z", --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 0bbd217d8c..de22c514e8 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -182,6 +182,7 @@ ("Failed to create task run pod because ISE on webhook proxy.operator.tekton.dev", r"failed to create task run pod .*: Internal error occurred: failed calling webhook \\\"proxy.operator.tekton.dev\\\": failed to call webhook: Post \\\"https://tekton-operator-proxy-webhook.openshift-pipelines.svc:443/defaulting.timeout=10s\\\": context deadline exceeded. Maybe missing or invalid Task .*"), ("Not enough nodes to schedule pod", r".message.: .pod status ..PodScheduled..:..False..; message: ..[0-9/]+ nodes are available: .*: [0-9]+ Preemption is not helpful for scheduling."), ("Pod creation failed because resource quota evaluation timed out", r".message.: .failed to create task run pod [^ ]+: Internal error occurred: resource quota evaluation timed out. Maybe missing or invalid Task [^ ]+., .reason.: .PodCreationFailed."), + ("Pod creation failed with reason error", r"\"message\": \".* exited with code 2: Error\""), ("Pod stuck in incorrect status", r".message.: .pod status ..PodReadyToStartContainers..:..False..; message: ....., .reason.: .Pending., .status.: .Unknown."), } From 9c30faa48844f26f76ed2c07aa5d6e0df1dd2775 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Tue, 9 Sep 2025 08:26:46 +0200 Subject: [PATCH 239/321] feat(KONFLUX-9513): Make it possible to configure namespace --- pkg/clients/kubernetes/client.go | 4 +- pkg/framework/framework.go | 40 ++----------------- pkg/sandbox/sandbox.go | 14 +------ pkg/utils/util.go | 29 +++++--------- tests/load-tests/loadtest.go | 2 +- .../handle_integration_test_scenarios.go | 4 +- .../pkg/journey/handle_repo_templating.go | 4 +- tests/load-tests/pkg/journey/handle_users.go | 20 +++++----- tests/load-tests/pkg/journey/journey.go | 2 +- .../load-tests/pkg/loadtestutils/userutils.go | 19 ++++++--- tests/load-tests/pkg/options/options.go | 2 +- tests/load-tests/run-stage.sh | 2 +- 12 files changed, 47 insertions(+), 95 deletions(-) diff --git a/pkg/clients/kubernetes/client.go b/pkg/clients/kubernetes/client.go index c8b556a31d..a41cd044c8 100644 --- a/pkg/clients/kubernetes/client.go +++ b/pkg/clients/kubernetes/client.go @@ -111,7 +111,7 @@ func (c *CustomClient) DynamicClient() dynamic.Interface { // Creates Kubernetes clients: // 1. Will create a kubernetes client from default kubeconfig as kubeadmin // 2. Will create a sandbox user and will generate a client using user token a new client to create resources in RHTAP like a normal user -func NewDevSandboxProxyClient(userName string, isSA bool, options utils.Options) (*K8SClient, error) { +func NewDevSandboxProxyClient(userName string, options utils.Options) (*K8SClient, error) { var err error var sandboxController *sandbox.SandboxController var proxyAuthInfo *sandbox.SandboxUserAuthInfo @@ -121,7 +121,7 @@ func NewDevSandboxProxyClient(userName string, isSA bool, options utils.Options) if err != nil { return nil, err } - proxyAuthInfo, err = sandboxController.ReconcileUserCreationStage(userName, options.ToolchainApiUrl, options.KeycloakUrl, options.OfflineToken, isSA) + proxyAuthInfo, err = sandboxController.ReconcileUserCreationStage(userName, options.ApiUrl, options.Token) if err != nil { return nil, err } diff --git a/pkg/framework/framework.go b/pkg/framework/framework.go index 58d55ea56f..64f38ac5be 100644 --- a/pkg/framework/framework.go +++ b/pkg/framework/framework.go @@ -50,20 +50,7 @@ func NewFramework(userName string, stageConfig ...utils.Options) (*Framework, er return NewFrameworkWithTimeout(userName, time.Second*60, stageConfig...) } -// This periodically refreshes framework for Stage user because of Keycloak access token expires in 15 minutes -func refreshFrameworkStage(currentFramework *Framework, userName string, timeout time.Duration, options ...utils.Options) { - for { - time.Sleep(time.Minute * 10) - fw, err := newFrameworkWithTimeout(userName, timeout, options...) - if err != nil { - fmt.Printf("ERROR: Failed refreshing framework for user %s: %+v\n", userName, err) - return - } - *currentFramework = *fw - } -} - -func newFrameworkWithTimeout(userName string, timeout time.Duration, options ...utils.Options) (*Framework, error) { +func NewFrameworkWithTimeout(userName string, timeout time.Duration, options ...utils.Options) (*Framework, error) { var err error var k *kubeCl.K8SClient var clusterAppDomain, openshiftConsoleHost string @@ -73,11 +60,11 @@ func newFrameworkWithTimeout(userName string, timeout time.Duration, options ... if userName == "" { return nil, fmt.Errorf("userName cannot be empty when initializing a new framework instance") } - isStage, isSA, err := utils.CheckOptions(options) + isStage, err := utils.CheckOptions(options) if err != nil { return nil, err } - if len(options) == 1 { + if isStage { option = options[0] } else { option = utils.Options{} @@ -89,7 +76,7 @@ func newFrameworkWithTimeout(userName string, timeout time.Duration, options ... // Just try several times to get the user kubeconfig err = retry.Do( func() error { - if k, err = kubeCl.NewDevSandboxProxyClient(userName, isSA, option); err != nil { + if k, err = kubeCl.NewDevSandboxProxyClient(userName, option); err != nil { GinkgoWriter.Printf("error when creating dev sandbox proxy client: %+v\n", err) } return err @@ -182,25 +169,6 @@ func newFrameworkWithTimeout(userName string, timeout time.Duration, options ... }, nil } -func NewFrameworkWithTimeout(userName string, timeout time.Duration, options ...utils.Options) (*Framework, error) { - isStage, isSA, err := utils.CheckOptions(options) - if err != nil { - return nil, err - } - - if isStage && !isSA { - options[0].ToolchainApiUrl = fmt.Sprintf("%s/workspaces/%s", options[0].ToolchainApiUrl, userName) - } - - fw, err := newFrameworkWithTimeout(userName, timeout, options...) - - if isStage && !isSA { - go refreshFrameworkStage(fw, userName, timeout, options...) - } - - return fw, err -} - func InitControllerHub(cc *kubeCl.CustomClient) (*ControllerHub, error) { // Initialize Common controller commonCtrl, err := common.NewSuiteController(cc) diff --git a/pkg/sandbox/sandbox.go b/pkg/sandbox/sandbox.go index ff08477932..e92a5f65df 100644 --- a/pkg/sandbox/sandbox.go +++ b/pkg/sandbox/sandbox.go @@ -155,24 +155,14 @@ func (lrt LoggingRoundTripper) RoundTrip(req *http.Request) (res *http.Response, } // ReconcileUserCreation create a user in sandbox and return a valid kubeconfig for user to be used for the tests -func (s *SandboxController) ReconcileUserCreationStage(userName, toolchainApiUrl, keycloakUrl, offlineToken string, isSA bool) (*SandboxUserAuthInfo, error) { +func (s *SandboxController) ReconcileUserCreationStage(userName, apiUrl, token string) (*SandboxUserAuthInfo, error) { wd, err := os.Getwd() if err != nil { return nil, err } kubeconfigPath := utils.GetEnv(constants.USER_KUBE_CONFIG_PATH_ENV, fmt.Sprintf("%s/tmp/%s.kubeconfig", wd, userName)) - var userToken string - if isSA { - userToken = offlineToken - } else { - userToken, err = s.GetKeycloakTokenStage(userName, keycloakUrl, offlineToken) - if err != nil { - return nil, err - } - } - - return s.GetKubeconfigPathForSpecificUser(true, toolchainApiUrl, userName, kubeconfigPath, userToken) + return s.GetKubeconfigPathForSpecificUser(true, apiUrl, userName, kubeconfigPath, token) } // ReconcileUserCreation create a user in sandbox and return a valid kubeconfig for user to be used for the tests diff --git a/pkg/utils/util.go b/pkg/utils/util.go index 167484fe1b..afbd3121dc 100644 --- a/pkg/utils/util.go +++ b/pkg/utils/util.go @@ -41,40 +41,31 @@ import ( ) type Options struct { - ToolchainApiUrl string - KeycloakUrl string - OfflineToken string + ApiUrl string + Token string } // check options are valid or not -func CheckOptions(optionsArr []Options) (bool, bool, error) { +func CheckOptions(optionsArr []Options) (bool, error) { if len(optionsArr) == 0 { - return false, false, nil + return false, nil } if len(optionsArr) > 1 { - return true, false, fmt.Errorf("options array contains more than 1 object") + return true, fmt.Errorf("options array contains more than 1 object") } options := optionsArr[0] - if options.ToolchainApiUrl == "" { - return true, false, fmt.Errorf("ToolchainApiUrl field is empty") + if options.ApiUrl == "" { + return true, fmt.Errorf("ApiUrl field is empty") } - if options.KeycloakUrl == "" { - return true, false, fmt.Errorf("KeycloakUrl field is empty") + if options.Token == "" { + return true, fmt.Errorf("Token field is empty") } - if options.OfflineToken == "" { - return true, false, fmt.Errorf("OfflineToken field is empty") - } - - if options.KeycloakUrl == "DIRECT" { - return true, true, nil - } else { - return true, false, nil - } + return true, nil } // CheckIfEnvironmentExists return true/false if the environment variable exists diff --git a/tests/load-tests/loadtest.go b/tests/load-tests/loadtest.go index b805713e1b..432d805889 100644 --- a/tests/load-tests/loadtest.go +++ b/tests/load-tests/loadtest.go @@ -40,7 +40,7 @@ func init() { rootCmd.Flags().StringVar(&opts.ComponentContainerContext, "component-repo-container-context", "/", "the context for image build") rootCmd.Flags().StringVar(&opts.ForkTarget, "fork-target", "", "the target namespace (GitLab) or organization (GitHub) to fork component repository to (if empty, will use MY_GITHUB_ORG env variable)") rootCmd.Flags().StringVar(&opts.QuayRepo, "quay-repo", "redhat-user-workloads-stage", "the target quay repo for PaC templated image pushes") - rootCmd.Flags().StringVar(&opts.UsernamePrefix, "username", "testuser", "identifier used for prefix of usersignup names and as suffix when forking repo") + rootCmd.Flags().StringVar(&opts.RunPrefix, "runprefix", "testuser", "identifier used for prefix of usersignup names and as suffix when forking repo") rootCmd.Flags().BoolVarP(&opts.Stage, "stage", "s", false, "is you want to run the test on stage") rootCmd.Flags().BoolVarP(&opts.Purge, "purge", "p", false, "purge all users or resources (on stage) after test is done") rootCmd.Flags().BoolVarP(&opts.PurgeOnly, "purge-only", "u", false, "do not run test, only purge resources (this implies --purge)") diff --git a/tests/load-tests/pkg/journey/handle_integration_test_scenarios.go b/tests/load-tests/pkg/journey/handle_integration_test_scenarios.go index b23ed50860..57b18438e1 100644 --- a/tests/load-tests/pkg/journey/handle_integration_test_scenarios.go +++ b/tests/load-tests/pkg/journey/handle_integration_test_scenarios.go @@ -8,8 +8,6 @@ import ( framework "github.com/konflux-ci/e2e-tests/pkg/framework" - util "github.com/devfile/library/v2/pkg/util" - utils "github.com/konflux-ci/e2e-tests/pkg/utils" ) @@ -41,7 +39,7 @@ func HandleIntegrationTestScenario(ctx *PerApplicationContext) error { var err error - name := fmt.Sprintf("%s-its-%s", ctx.ParentContext.Username, util.GenerateRandomString(5)) + name := fmt.Sprintf("%s-its", ctx.ApplicationName) logging.Logger.Debug("Creating integration test scenario %s for application %s in namespace %s", name, ctx.ApplicationName, ctx.ParentContext.Namespace) _, err = logging.Measure( diff --git a/tests/load-tests/pkg/journey/handle_repo_templating.go b/tests/load-tests/pkg/journey/handle_repo_templating.go index dafa6b231c..3b0f80bc33 100644 --- a/tests/load-tests/pkg/journey/handle_repo_templating.go +++ b/tests/load-tests/pkg/journey/handle_repo_templating.go @@ -233,9 +233,9 @@ func templateFiles(f *framework.Framework, repoUrl, repoRevision, sourceRepo, so func HandleRepoForking(ctx *MainContext) error { var suffix string if ctx.Opts.Stage { - suffix = ctx.Opts.UsernamePrefix + "-" + ctx.Username + suffix = ctx.Opts.RunPrefix + "-" + ctx.Namespace } else { - suffix = ctx.Username + suffix = ctx.Namespace } logging.Logger.Debug("Forking repository %s with suffix %s to %s", ctx.Opts.ComponentRepoUrl, suffix, ctx.Opts.ForkTarget) diff --git a/tests/load-tests/pkg/journey/handle_users.go b/tests/load-tests/pkg/journey/handle_users.go index c580a8cae0..9d9e3736b5 100644 --- a/tests/load-tests/pkg/journey/handle_users.go +++ b/tests/load-tests/pkg/journey/handle_users.go @@ -2,6 +2,7 @@ package journey import "fmt" import "time" +import "strings" import logging "github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/logging" @@ -14,17 +15,16 @@ func HandleUser(ctx *MainContext) error { // TODO E.g. when token is incorrect, timeout does not work as expected if ctx.Opts.Stage { user := (*ctx.StageUsers)[ctx.ThreadIndex] - ctx.Username = user.Username + ctx.Username = strings.TrimSuffix(user.Namespace, "-tenant") ctx.Framework, err = framework.NewFrameworkWithTimeout( ctx.Username, time.Minute*60, utils.Options{ - ToolchainApiUrl: user.APIURL, - KeycloakUrl: user.SSOURL, - OfflineToken: user.Token, + ApiUrl: user.APIURL, + Token: user.Token, }) } else { - ctx.Username = fmt.Sprintf("%s-%04d", ctx.Opts.UsernamePrefix, ctx.ThreadIndex) + ctx.Username = fmt.Sprintf("%s-%04d", ctx.Opts.RunPrefix, ctx.ThreadIndex) ctx.Framework, err = framework.NewFrameworkWithTimeout(ctx.Username, time.Minute*60) } @@ -47,9 +47,8 @@ func HandleNewFrameworkForComp(ctx *PerComponentContext) error { ctx.ParentContext.ParentContext.Username, time.Minute*60, utils.Options{ - ToolchainApiUrl: user.APIURL, - KeycloakUrl: user.SSOURL, - OfflineToken: user.Token, + ApiUrl: user.APIURL, + Token: user.Token, }) } else { ctx.Framework, err = framework.NewFrameworkWithTimeout(ctx.ParentContext.ParentContext.Username, time.Minute*60) @@ -72,9 +71,8 @@ func HandleNewFrameworkForApp(ctx *PerApplicationContext) error { ctx.ParentContext.Username, time.Minute*60, utils.Options{ - ToolchainApiUrl: user.APIURL, - KeycloakUrl: user.SSOURL, - OfflineToken: user.Token, + ApiUrl: user.APIURL, + Token: user.Token, }) } else { ctx.Framework, err = framework.NewFrameworkWithTimeout(ctx.ParentContext.Username, time.Minute*60) diff --git a/tests/load-tests/pkg/journey/journey.go b/tests/load-tests/pkg/journey/journey.go index 41ca2ffac0..f3eddc52a5 100644 --- a/tests/load-tests/pkg/journey/journey.go +++ b/tests/load-tests/pkg/journey/journey.go @@ -128,7 +128,7 @@ func PerApplicationSetup(fn func(*PerApplicationContext), parentContext *MainCon PerApplicationWG: perApplicationWG, ApplicationIndex: applicationIndex, ParentContext: parentContext, - ApplicationName: fmt.Sprintf("%s-app-%s", parentContext.Username, util.GenerateRandomString(5)), + ApplicationName: fmt.Sprintf("%s-app-%s", parentContext.Opts.RunPrefix, util.GenerateRandomString(5)), } parentContext.PerApplicationContexts = append(parentContext.PerApplicationContexts, perApplicationCtx) diff --git a/tests/load-tests/pkg/loadtestutils/userutils.go b/tests/load-tests/pkg/loadtestutils/userutils.go index 9b2c01450e..6a509df5c9 100644 --- a/tests/load-tests/pkg/loadtestutils/userutils.go +++ b/tests/load-tests/pkg/loadtestutils/userutils.go @@ -1,17 +1,16 @@ package loadtestutils import "encoding/json" +import "fmt" import "os" import "path/filepath" // Represents a user in the list of precreated users (e.g. Stage 'users.json') type User struct { - Username string `json:"username"` - Password string `json:"password"` - Token string `json:"token"` - SSOURL string `json:"ssourl"` - APIURL string `json:"apiurl"` - Verified bool `json:"verified"` + Namespace string `json:"namespace"` + Token string `json:"token"` + APIURL string `json:"apiurl"` + Verified bool `json:"verified"` } // Load 'users.json' into a slice of User structs @@ -27,5 +26,13 @@ func LoadStageUsers(filePath string) ([]User, error) { if err != nil { return nil, err } + + // Some sanity checks + if len(users) == 0 { + return nil, fmt.Errorf("Loaded %s but no users in there", filePath) + } + if users[0].APIURL == "" || users[0].Token == "" || users[0].Namespace == "" { + return nil, fmt.Errorf("Loaded %s but some expected field missing in first user", filePath) + } return users, nil } diff --git a/tests/load-tests/pkg/options/options.go b/tests/load-tests/pkg/options/options.go index e072180af1..b20c34c47c 100644 --- a/tests/load-tests/pkg/options/options.go +++ b/tests/load-tests/pkg/options/options.go @@ -38,11 +38,11 @@ type Opts struct { ReleasePipelineServiceAccount string ReleasePipelineUrl string ReleasePolicy string + RunPrefix string Stage bool TestScenarioGitURL string TestScenarioPathInRepo string TestScenarioRevision string - UsernamePrefix string WaitIntegrationTestsPipelines bool WaitPipelines bool WaitRelease bool diff --git a/tests/load-tests/run-stage.sh b/tests/load-tests/run-stage.sh index 51c04d05fd..1a8b13439e 100755 --- a/tests/load-tests/run-stage.sh +++ b/tests/load-tests/run-stage.sh @@ -34,7 +34,7 @@ go run loadtest.go \ --release-pipeline-revision "${RELEASE_PIPELINE_REVISION:-production}" \ --release-pipeline-path "${RELEASE_PIPELINE_PATH:-pipelines/managed/e2e/e2e.yaml}" \ --release-pipeline-service-account "${RELEASE_PIPELINE_SERVICE_ACCOUNT:-release-serviceaccount}" \ - --username "${USER_PREFIX:-undef}" \ + --runprefix "${USER_PREFIX:-undef}" \ --waitintegrationtestspipelines="${WAIT_INTEGRATION_TESTS:-true}" \ --waitpipelines="${WAIT_PIPELINES:-true}" \ --waitrelease="${WAIT_RELEASE:-true}" \ From 90360db1f9c1700d680d4f73f34436a2ea991f8d Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Tue, 9 Sep 2025 08:59:10 +0200 Subject: [PATCH 240/321] refactor: Simplify user frameworks creation --- tests/load-tests/pkg/journey/handle_users.go | 82 ++++++++++---------- 1 file changed, 43 insertions(+), 39 deletions(-) diff --git a/tests/load-tests/pkg/journey/handle_users.go b/tests/load-tests/pkg/journey/handle_users.go index 9d9e3736b5..33770121e7 100644 --- a/tests/load-tests/pkg/journey/handle_users.go +++ b/tests/load-tests/pkg/journey/handle_users.go @@ -5,57 +5,53 @@ import "time" import "strings" import logging "github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/logging" +import loadtestutils "github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/loadtestutils" import "github.com/konflux-ci/e2e-tests/pkg/framework" import "github.com/konflux-ci/e2e-tests/pkg/utils" -func HandleUser(ctx *MainContext) error { +// Returns framework, namespace (and error) +func provisionFramework(stageUsers []loadtestutils.User, threadIndex int, username string, isStage bool) (*framework.Framework, string, error) { + var f *framework.Framework var err error - // TODO E.g. when token is incorrect, timeout does not work as expected - if ctx.Opts.Stage { - user := (*ctx.StageUsers)[ctx.ThreadIndex] - ctx.Username = strings.TrimSuffix(user.Namespace, "-tenant") - ctx.Framework, err = framework.NewFrameworkWithTimeout( - ctx.Username, + if isStage { + user := stageUsers[threadIndex] + f, err = framework.NewFrameworkWithTimeout( + username, time.Minute*60, utils.Options{ ApiUrl: user.APIURL, Token: user.Token, }) } else { - ctx.Username = fmt.Sprintf("%s-%04d", ctx.Opts.RunPrefix, ctx.ThreadIndex) - ctx.Framework, err = framework.NewFrameworkWithTimeout(ctx.Username, time.Minute*60) + f, err = framework.NewFrameworkWithTimeout(username, time.Minute*60) } if err != nil { - return logging.Logger.Fail(10, "Unable to provision user %s: %v", ctx.Username, err) + return nil, "", err } - ctx.Namespace = ctx.Framework.UserNamespace - - return nil + return f, f.UserNamespace, nil } -func HandleNewFrameworkForComp(ctx *PerComponentContext) error { +func HandleUser(ctx *MainContext) error { var err error - // TODO This framework generation code is duplicate to above - if ctx.ParentContext.ParentContext.Opts.Stage { - user := (*ctx.ParentContext.ParentContext.StageUsers)[ctx.ParentContext.ParentContext.ThreadIndex] - ctx.Framework, err = framework.NewFrameworkWithTimeout( - ctx.ParentContext.ParentContext.Username, - time.Minute*60, - utils.Options{ - ApiUrl: user.APIURL, - Token: user.Token, - }) + if ctx.Opts.Stage { + ctx.Username = strings.TrimSuffix((*ctx.StageUsers)[ctx.ThreadIndex].Namespace, "-tenant") } else { - ctx.Framework, err = framework.NewFrameworkWithTimeout(ctx.ParentContext.ParentContext.Username, time.Minute*60) + ctx.Username = fmt.Sprintf("%s-%04d", ctx.Opts.RunPrefix, ctx.ThreadIndex) } + ctx.Framework, ctx.Namespace, err = provisionFramework( + *ctx.StageUsers, + ctx.ThreadIndex, + ctx.Username, + ctx.Opts.Stage, + ) if err != nil { - return logging.Logger.Fail(11, "Unable to provision framework for user %s: %v", ctx.ParentContext.ParentContext.Username, err) + return logging.Logger.Fail(10, "Unable to provision user %s: %v", ctx.Username, err) } return nil @@ -64,22 +60,30 @@ func HandleNewFrameworkForComp(ctx *PerComponentContext) error { func HandleNewFrameworkForApp(ctx *PerApplicationContext) error { var err error - // TODO This framework generation code is duplicate to above - if ctx.ParentContext.Opts.Stage { - user := (*ctx.ParentContext.StageUsers)[ctx.ParentContext.ThreadIndex] - ctx.Framework, err = framework.NewFrameworkWithTimeout( - ctx.ParentContext.Username, - time.Minute*60, - utils.Options{ - ApiUrl: user.APIURL, - Token: user.Token, - }) - } else { - ctx.Framework, err = framework.NewFrameworkWithTimeout(ctx.ParentContext.Username, time.Minute*60) + ctx.Framework, _, err = provisionFramework( + *ctx.ParentContext.StageUsers, + ctx.ParentContext.ThreadIndex, + ctx.ParentContext.Username, + ctx.ParentContext.Opts.Stage, + ) + if err != nil { + return logging.Logger.Fail(11, "Unable to provision framework for user %s: %v", ctx.ParentContext.Username, err) } + return nil +} + +func HandleNewFrameworkForComp(ctx *PerComponentContext) error { + var err error + + ctx.Framework, _, err = provisionFramework( + *ctx.ParentContext.ParentContext.StageUsers, + ctx.ParentContext.ParentContext.ThreadIndex, + ctx.ParentContext.ParentContext.Username, + ctx.ParentContext.ParentContext.Opts.Stage, + ) if err != nil { - return logging.Logger.Fail(12, "Unable to provision framework for user %s: %v", ctx.ParentContext.Username, err) + return logging.Logger.Fail(12, "Unable to provision framework for user %s: %v", ctx.ParentContext.ParentContext.Username, err) } return nil From 52dab43173dfcb7f3c48602c6ef7f2f3aaa78ea9 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 10 Sep 2025 07:46:11 +0200 Subject: [PATCH 241/321] feat: Allow setting startup delay for threads --- tests/load-tests/loadtest.go | 8 ++++++ tests/load-tests/pkg/journey/journey.go | 33 ++++++++++++++++++++++--- tests/load-tests/pkg/options/options.go | 10 ++++++++ 3 files changed, 48 insertions(+), 3 deletions(-) diff --git a/tests/load-tests/loadtest.go b/tests/load-tests/loadtest.go index 432d805889..2220d16c5c 100644 --- a/tests/load-tests/loadtest.go +++ b/tests/load-tests/loadtest.go @@ -42,6 +42,8 @@ func init() { rootCmd.Flags().StringVar(&opts.QuayRepo, "quay-repo", "redhat-user-workloads-stage", "the target quay repo for PaC templated image pushes") rootCmd.Flags().StringVar(&opts.RunPrefix, "runprefix", "testuser", "identifier used for prefix of usersignup names and as suffix when forking repo") rootCmd.Flags().BoolVarP(&opts.Stage, "stage", "s", false, "is you want to run the test on stage") + rootCmd.Flags().DurationVar(&opts.StartupDelay, "startup-delay", 0, "when starting per user/per application/per client treads, wait for this duration") + rootCmd.Flags().DurationVar(&opts.StartupJitter, "startup-jitter", 3*time.Second, "when applying startup delay, add or remove half of jitter with this maximum value") rootCmd.Flags().BoolVarP(&opts.Purge, "purge", "p", false, "purge all users or resources (on stage) after test is done") rootCmd.Flags().BoolVarP(&opts.PurgeOnly, "purge-only", "u", false, "do not run test, only purge resources (this implies --purge)") rootCmd.Flags().StringVar(&opts.TestScenarioGitURL, "test-scenario-git-url", "https://github.com/konflux-ci/integration-examples.git", "test scenario GIT URL (set to \"\" to disable creating these)") @@ -135,6 +137,8 @@ func main() { func perUserThread(threadCtx *journey.MainContext) { defer threadCtx.ThreadsWG.Done() + time.Sleep(threadCtx.StartupPause) + var err error //watchCtx := context.Background() @@ -249,6 +253,8 @@ func perApplicationThread(perApplicationCtx *journey.PerApplicationContext) { } }() + time.Sleep(perApplicationCtx.StartupPause) + var err error // Create framework so we do not have to share framework with parent thread @@ -297,6 +303,8 @@ func perComponentThread(perComponentCtx *journey.PerComponentContext) { } }() + time.Sleep(perComponentCtx.StartupPause) + var err error // Create framework so we do not have to share framework with parent thread diff --git a/tests/load-tests/pkg/journey/journey.go b/tests/load-tests/pkg/journey/journey.go index f3eddc52a5..35d0499e8a 100644 --- a/tests/load-tests/pkg/journey/journey.go +++ b/tests/load-tests/pkg/journey/journey.go @@ -2,6 +2,8 @@ package journey import "fmt" import "sync" +import "time" +import "math/rand" import options "github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/options" import logging "github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/logging" @@ -17,6 +19,7 @@ var MainContexts []*MainContext type MainContext struct { ThreadsWG *sync.WaitGroup ThreadIndex int + StartupPause time.Duration JourneyRepeatsCounter int Opts *options.Opts StageUsers *[]loadtestutils.User @@ -41,6 +44,19 @@ func initUserThread(threadCtx *MainContext) { } } +// Helper function to compute duration to delay startup of some threads based on StartupDelay and StartupJitter command-line options +// If this is a first thread, delay will be skipped as it would not help +func computeStartupPause(index int, delay, jitter time.Duration) time.Duration { + if index == 0 || delay == 0 { + return time.Duration(0) + } else { + // For delay = 10s and jitter = 3s, this computes random number from 8.5 to 11.5 seconds + jitterSec := rand.Float64() * jitter.Seconds() - jitter.Seconds() / 2 + jitterDur := time.Duration(jitterSec) * time.Second + return delay + jitterDur + } +} + // Start all the user journey threads // TODO split this to two functions and get PurgeOnly code out func Setup(fn func(*MainContext), opts *options.Opts) (string, error) { @@ -58,11 +74,14 @@ func Setup(fn func(*MainContext), opts *options.Opts) (string, error) { // Initialize all user thread contexts for threadIndex := 0; threadIndex < opts.Concurrency; threadIndex++ { - logging.Logger.Info("Initiating thread %d", threadIndex) + startupPause := computeStartupPause(threadIndex, opts.StartupDelay, opts.StartupJitter) + + logging.Logger.Info("Initiating per user thread %d with pause %v", threadIndex, startupPause) threadCtx := &MainContext{ ThreadsWG: threadsWG, ThreadIndex: threadIndex, + StartupPause: startupPause, Opts: opts, StageUsers: &stageUsers, Username: "", @@ -109,6 +128,7 @@ func Setup(fn func(*MainContext), opts *options.Opts) (string, error) { type PerApplicationContext struct { PerApplicationWG *sync.WaitGroup ApplicationIndex int + StartupPause time.Duration Framework *framework.Framework ParentContext *MainContext ApplicationName string @@ -122,11 +142,14 @@ func PerApplicationSetup(fn func(*PerApplicationContext), parentContext *MainCon perApplicationWG.Add(parentContext.Opts.ApplicationsCount) for applicationIndex := 0; applicationIndex < parentContext.Opts.ApplicationsCount; applicationIndex++ { - logging.Logger.Info("Initiating per application thread %d-%d", parentContext.ThreadIndex, applicationIndex) + startupPause := computeStartupPause(applicationIndex, parentContext.Opts.StartupDelay, parentContext.Opts.StartupJitter) + + logging.Logger.Info("Initiating per application thread %d-%d with pause %v", parentContext.ThreadIndex, applicationIndex, startupPause) perApplicationCtx := &PerApplicationContext{ PerApplicationWG: perApplicationWG, ApplicationIndex: applicationIndex, + StartupPause: startupPause, ParentContext: parentContext, ApplicationName: fmt.Sprintf("%s-app-%s", parentContext.Opts.RunPrefix, util.GenerateRandomString(5)), } @@ -145,6 +168,7 @@ func PerApplicationSetup(fn func(*PerApplicationContext), parentContext *MainCon type PerComponentContext struct { PerComponentWG *sync.WaitGroup ComponentIndex int + StartupPause time.Duration Framework *framework.Framework ParentContext *PerApplicationContext ComponentName string @@ -159,11 +183,14 @@ func PerComponentSetup(fn func(*PerComponentContext), parentContext *PerApplicat perComponentWG.Add(parentContext.ParentContext.Opts.ComponentsCount) for componentIndex := 0; componentIndex < parentContext.ParentContext.Opts.ComponentsCount; componentIndex++ { - logging.Logger.Info("Initiating per component thread %d-%d-%d", parentContext.ParentContext.ThreadIndex, parentContext.ApplicationIndex, componentIndex) + startupPause := computeStartupPause(componentIndex, parentContext.ParentContext.Opts.StartupDelay, parentContext.ParentContext.Opts.StartupJitter) + + logging.Logger.Info("Initiating per component thread %d-%d-%d with pause %s", parentContext.ParentContext.ThreadIndex, parentContext.ApplicationIndex, componentIndex, startupPause) perComponentCtx := &PerComponentContext{ PerComponentWG: perComponentWG, ComponentIndex: componentIndex, + StartupPause: startupPause, ParentContext: parentContext, ComponentName: fmt.Sprintf("%s-comp-%d", parentContext.ApplicationName, componentIndex), } diff --git a/tests/load-tests/pkg/options/options.go b/tests/load-tests/pkg/options/options.go index b20c34c47c..bf3154d501 100644 --- a/tests/load-tests/pkg/options/options.go +++ b/tests/load-tests/pkg/options/options.go @@ -40,6 +40,8 @@ type Opts struct { ReleasePolicy string RunPrefix string Stage bool + StartupDelay time.Duration + StartupJitter time.Duration TestScenarioGitURL string TestScenarioPathInRepo string TestScenarioRevision string @@ -83,6 +85,14 @@ func (o *Opts) ProcessOptions() error { } } + // If startup delay specified, make sure jitter is not bigger than 2 * delay + if o.StartupDelay != 0 { + if o.StartupJitter > o.StartupDelay * 2 { + fmt.Print("Warning: Lowering startup jitter as it was bigger than delay\n") + o.StartupJitter = o.StartupDelay * 2 + } + } + // Convert options struct to pretty JSON jsonOptions, err2 := json.MarshalIndent(o, "", " ") if err2 != nil { From aa16f81e708e9c15df2679bddb9850446b79769e Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 10 Sep 2025 07:58:05 +0200 Subject: [PATCH 242/321] feat: Make sure all times from pkg/clients/has/components.go are displayed in same format --- pkg/clients/has/components.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pkg/clients/has/components.go b/pkg/clients/has/components.go index 84536c5876..48a157a2d1 100644 --- a/pkg/clients/has/components.go +++ b/pkg/clients/has/components.go @@ -369,7 +369,7 @@ func (h *HasController) ScaleComponentReplicas(component *appservice.Component, func (h *HasController) DeleteComponent(name string, namespace string, reportErrorOnNotFound bool) error { // temporary logs start := time.Now() - GinkgoWriter.Printf("Start to delete component '%s' at %s\n", name, start.Format(time.RFC3339)) + GinkgoWriter.Printf("Start to delete component '%s' at %s\n", name, start.Format(time.RFC3339Nano)) component := appservice.Component{ ObjectMeta: metav1.ObjectMeta{ @@ -388,7 +388,7 @@ func (h *HasController) DeleteComponent(name string, namespace string, reportErr // temporary logs deletionTime := time.Since(start).Minutes() - GinkgoWriter.Printf("Finish to delete component '%s' at %s. It took '%f' minutes\n", name, time.Now().Format(time.RFC3339), deletionTime) + GinkgoWriter.Printf("Finish to delete component '%s' at %s. It took '%f' minutes\n", name, time.Now().Format(time.RFC3339Nano), deletionTime) return err } @@ -397,7 +397,7 @@ func (h *HasController) DeleteComponent(name string, namespace string, reportErr func (h *HasController) DeleteAllComponentsInASpecificNamespace(namespace string, timeout time.Duration) error { // temporary logs start := time.Now() - GinkgoWriter.Printf("Start to delete all components in namespace '%s' at %s\n", namespace, start.String()) + GinkgoWriter.Printf("Start to delete all components in namespace '%s' at %s\n", namespace, start.Format(time.RFC3339Nano)) if err := h.KubeRest().DeleteAllOf(context.Background(), &appservice.Component{}, rclient.InNamespace(namespace)); err != nil { return fmt.Errorf("error deleting components from the namespace %s: %+v", namespace, err) @@ -414,7 +414,7 @@ func (h *HasController) DeleteAllComponentsInASpecificNamespace(namespace string // temporary logs deletionTime := time.Since(start).Minutes() - GinkgoWriter.Printf("Finish to delete all components in namespace '%s' at %s. It took '%f' minutes\n", namespace, time.Now().Format(time.RFC3339), deletionTime) + GinkgoWriter.Printf("Finish to delete all components in namespace '%s' at %s. It took '%f' minutes\n", namespace, time.Now().Format(time.RFC3339Nano), deletionTime) return err } @@ -573,7 +573,7 @@ func (h *HasController) CheckImageRepositoryExists(namespace, componentName stri func (h *HasController) DeleteAllImageRepositoriesInASpecificNamespace(namespace string, timeout time.Duration) error { // temporary logs start := time.Now() - GinkgoWriter.Printf("Start to delete all image repositories in namespace '%s' at %s\n", namespace, start.String()) + GinkgoWriter.Printf("Start to delete all image repositories in namespace '%s' at %s\n", namespace, start.Format(time.RFC3339Nano)) if err := h.KubeRest().DeleteAllOf(context.Background(), &imagecontroller.ImageRepository{}, rclient.InNamespace(namespace)); err != nil { return fmt.Errorf("error deleting image repositories from the namespace %s: %+v", namespace, err) @@ -590,7 +590,7 @@ func (h *HasController) DeleteAllImageRepositoriesInASpecificNamespace(namespace // temporary logs deletionTime := time.Since(start).Minutes() - GinkgoWriter.Printf("Finish to delete all image repositories in namespace '%s' at %s. It took '%f' minutes\n", namespace, time.Now().Format(time.RFC3339), deletionTime) + GinkgoWriter.Printf("Finish to delete all image repositories in namespace '%s' at %s. It took '%f' minutes\n", namespace, time.Now().Format(time.RFC3339Nano), deletionTime) return err } From ca3853c2b91ab1c62acc0e57f0ebba1391b74ef8 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Thu, 11 Sep 2025 12:31:01 +0200 Subject: [PATCH 243/321] feat: Updated error: DNF failed to download repodata from Download Devel because timeout 2025-09-11T07:44:47+0000 DEBUG error: Curl error (28): Timeout was reached for https://download.devel.redhat.com/brewroot/repos/rhel-10.1-build/8976169/s390x/repodata/repomd.xml [Failed to connect to download.devel.redhat.com port 443 after 30000 ms: Timeout was reached] (https://download.devel.redhat.com/brewroot/repos/rhel-10.1-build/8976169/s390x/repodata/repomd.xml). 2025-09-11T07:44:47+0000 WARNING Errors during downloading metadata for repository 'build': - Curl error (28): Timeout was reached for https://download.devel.redhat.com/brewroot/repos/rhel-10.1-build/8976169/s390x/repodata/repomd.xml [Failed to connect to download.devel.redhat.com port 443 after 30001 ms: Timeout was reached] - Curl error (28): Timeout was reached for https://download.devel.redhat.com/brewroot/repos/rhel-10.1-build/8976169/s390x/repodata/repomd.xml [Failed to connect to download.devel.redhat.com port 443 after 30000 ms: Timeout was reached] 2025-09-11T07:44:47+0000 DDEBUG Cleaning up. 2025-09-11T07:44:47+0000 DDEBUG Plugins were unloaded. 2025-09-11T07:44:47+0000 SUBDEBUG Traceback (most recent call last): File "/usr/lib/python3.12/site-packages/dnf/repo.py", line 574, in load ret = self._repo.load() ^^^^^^^^^^^^^^^^^ File "/usr/lib64/python3.12/site-packages/libdnf/repo.py", line 467, in load return _repo.Repo_load(self) ^^^^^^^^^^^^^^^^^^^^^ libdnf._error.Error: Failed to download metadata for repo 'build': Cannot download repomd.xml: Cannot download repodata/repomd.xml: All mirrors were tried During handling of the above exception, another exception occurred: Traceback (most recent call last): File "/usr/lib/python3.12/site-packages/dnf/cli/main.py", line 67, in main return _main(base, args, cli_class, option_parser_class) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/lib/python3.12/site-packages/dnf/cli/main.py", line 106, in _main return cli_run(cli, base) ^^^^^^^^^^^^^^^^^^ File "/usr/lib/python3.12/site-packages/dnf/cli/main.py", line 122, in cli_run cli.run() File "/usr/lib/python3.12/site-packages/dnf/cli/cli.py", line 1090, in run self._process_demands() File "/usr/lib/python3.12/site-packages/dnf/cli/cli.py", line 779, in _process_demands self.base.fill_sack( File "/usr/lib/python3.12/site-packages/dnf/base.py", line 413, in fill_sack self._add_repo_to_sack(r) File "/usr/lib/python3.12/site-packages/dnf/base.py", line 141, in _add_repo_to_sack repo.load() File "/usr/lib/python3.12/site-packages/dnf/repo.py", line 581, in load raise dnf.exceptions.RepoError(str(e)) dnf.exceptions.RepoError: Failed to download metadata for repo 'build': Cannot download repomd.xml: Cannot download repodata/repomd.xml: All mirrors were tried 2025-09-11T07:44:47+0000 CRITICAL Error: Failed to download metadata for repo 'build': Cannot download repomd.xml: Cannot download repodata/repomd.xml: All mirrors were tried 2025-09-11T07:44:57+0000 INFO --- logging initialized --- 2025-09-11T07:44:57+0000 DDEBUG timer: config: 1 ms 2025-09-11T07:44:57+0000 WARNING No matches found for the following disable plugin patterns: local, spacewalk, versionlock 2025-09-11T07:44:57+0000 DEBUG Loaded plugins: builddep, changelog, config-manager, copr, debuginfo-install, download, generate_completion_cache, groups-manager, needs-restarting, playground, product-id, repoclosure, repodiff, repograph, repomanage, reposync, subscription-manager, system-upgrade, uploadprofile 2025-09-11T07:44:57+0000 INFO Updating Subscription Management repositories. 2025-09-11T07:44:57+0000 INFO Unable to read consumer identity 2025-09-11T07:44:57+0000 INFO This system is not registered with an entitlement server. You can use subscription-manager to register. 2025-09-11T07:44:57+0000 DEBUG DNF version: 4.20.0 2025-09-11T07:44:57+0000 DDEBUG Command: dnf-3 --installroot /var/lib/mock/rhel-10.1-build-repo_8976169/root/ --setopt=deltarpm=False --setopt=allow_vendor_change=yes --allowerasing --disableplugin=local --disableplugin=spacewalk --disableplugin=versionlock install @build 2025-09-11T07:44:57+0000 DDEBUG Installroot: /var/lib/mock/rhel-10.1-build-repo_8976169/root/ 2025-09-11T07:44:57+0000 DDEBUG Releasever: None 2025-09-11T07:44:57+0000 DEBUG cachedir: /var/lib/mock/rhel-10.1-build-repo_8976169/root/var/cache/yum 2025-09-11T07:44:57+0000 DDEBUG Base command: install 2025-09-11T07:44:57+0000 DDEBUG Extra commands: ['--installroot', '/var/lib/mock/rhel-10.1-build-repo_8976169/root/', '--setopt=deltarpm=False', '--setopt=allow_vendor_change=yes', '--allowerasing', '--disableplugin=local', '--disableplugin=spacewalk', '--disableplugin=versionlock', 'install', '@build'] 2025-09-11T07:44:57+0000 DEBUG User-Agent: constructed: 'libdnf (Red Hat Enterprise Linux 10.0; generic; Linux.s390x)' 2025-09-11T07:44:57+0000 DEBUG repo: downloading from remote: build 2025-09-11T07:45:27+0000 DEBUG error: Curl error (28): Timeout was reached for https://download.devel.redhat.com/brewroot/repos/rhel-10.1-build/8976169/s390x/repodata/repomd.xml [Failed to connect to download.devel.redhat.com port 443 after 30001 ms: Timeout was reached] (https://download.devel.redhat.com/brewroot/repos/rhel-10.1-build/8976169/s390x/repodata/repomd.xml). --- tests/load-tests/errors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index de22c514e8..456e15bd69 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -124,7 +124,7 @@ ("Can not find Dockerfile", r"Cannot find Dockerfile Dockerfile"), ("DNF failed to download repodata from Download Devel because could not resolve host", r"Errors during downloading metadata for repository '[^ ]+': - Curl error .6.: Couldn't resolve host name for http://download.devel.redhat.com/brewroot/repos/[^ ]+ .Could not resolve host: download\.devel\.redhat\.com."), ("DNF failed to download repodata from Download Devel because timeout", r"dnf.exceptions.RepoError: Failed to download metadata for repo 'build': Cannot download repomd.xml: Cannot download repodata/repomd.xml: All mirrors were tried .* CRITICAL Error: Failed to download metadata for repo 'build': Cannot download repomd.xml: Cannot download repodata/repomd.xml: All mirrors were tried [^ ]+/mock/.*Failed to connect to download-[0-9]+.beak-[0-9]+.prod.iad2.dc.redhat.com"), - ("DNF failed to download repodata from Download Devel because timeout", r"dnf.exceptions.RepoError: Failed to download metadata for repo 'build': Cannot download repomd.xml: Cannot download repodata/repomd.xml: All mirrors were tried .* CRITICAL Error: Failed to download metadata for repo 'build': Cannot download repomd.xml: Cannot download repodata/repomd.xml: All mirrors were tried [^ ]+/mock/.*Failed to connect to download.devel.redhat.com"), + ("DNF failed to download repodata from Download Devel because timeout", r"dnf.exceptions.RepoError: Failed to download metadata for repo 'build': Cannot download repomd.xml: Cannot download repodata/repomd.xml: All mirrors were tried .* CRITICAL Error: Failed to download metadata for repo 'build': Cannot download repomd.xml: Cannot download repodata/repomd.xml: All mirrors were tried .*/mock/.*Failed to connect to download.devel.redhat.com"), ("DNF failed to download repodata from Koji", r"ERROR Command returned error: Failed to download metadata (baseurl: \"https://kojipkgs.fedoraproject.org/repos/[^ ]*\") for repository \"build\": Usable URL not found"), ("Enterprise contract results failed validation", r"^false $"), ("Error allocating host as provision TR already exists", r"Error allocating host: taskruns.tekton.dev \".*provision\" already exists"), From 88fb2cd2f7c6c056fc687967212bf93333cbbfc1 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Thu, 11 Sep 2025 13:13:29 +0200 Subject: [PATCH 244/321] feat: New error: Repo templating failed when updating file on github.com because 500 I0911 03:08:18.882657 35140 logging.go:30] DEBUG Repo-templating workflow: Cleaned up (second cleanup) for jhutar-tenant/undef-app-thxgw/undef-app-thxgw-comp-0 I0911 03:08:19.619273 35140 logging.go:64] FAIL(64): Repo-templating workflow component cleanup failed: Error templating PaC files: Failed to update file .tekton/undef-app-thxgw-comp-0-pull-request.yaml in repo nodejs-devfile-sample-undef-jhutar-tenant revision main: error when updating a file on github: PUT https://api.github.com/repos/rhtap-perf-test/nodejs-devfile-sample-undef-jhutar-tenant/contents/.tekton/undef-app-thxgw-comp-0-pull-request.yaml: 500 [] --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 456e15bd69..297af9768e 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -77,6 +77,7 @@ ("Repo forking failed because import failed", r"Repo forking failed: Error waiting for project [^ ]+ .ID: [0-9]+. fork to complete: Forking of project [^ ]+ .ID: [0-9]+. failed with import status: failed"), ("Repo forking failed when deleting target repo on github.com because 504", r"Repo forking failed: Error deleting repository .*: DELETE https://api.github.com/repos/.*: 504 We couldn't respond to your request in time. Sorry about that. Please try resubmitting your request and contact us if the problem persists."), ("Repo forking failed when deleting target repo on gitlab.com (not CEE!) due unathorized", r"Repo forking failed: Error deleting project .* DELETE https://gitlab.com/.* 401 .* Unauthorized"), + ("Repo templating failed when updating file on github.com because 500", r"Repo-templating workflow component cleanup failed: Error templating PaC files: Failed to update file .tekton/[^ ]+.yaml in repo [^ ]+ revision main: error when updating a file on github: PUT https://api.github.com/repos/[^ ]+: 500"), ("Repo templating failed when updating file on github.com because 504", r"Repo-templating workflow component cleanup failed: Error templating PaC files: Failed to update file .tekton/[^ ]+.yaml in repo [^ ]+ revision main: error when updating a file on github: PUT https://api.github.com/repos/[^ ]+: 504 We couldn't respond to your request in time. Sorry about that. Please try resubmitting your request and contact us if the problem persists."), ("Test Pipeline failed", r"Test Pipeline Run failed run:.*Message:Tasks Completed: [0-9]+ \(Failed: [1-9]+,"), ("Timeout creating application calling mapplication.kb.io webhook", r"Application failed creation: Unable to create the Application [^ ]+: Internal error occurred: failed calling webhook .mapplication.kb.io.: failed to call webhook: Post .https://application-service-webhook-service.application-service.svc:443/mutate-appstudio-redhat-com-v1alpha1-application[^ ]+.: context deadline exceeded"), From b66da69f37f7ebde0f6c380e57de2d6caf9b6340 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Thu, 11 Sep 2025 13:18:15 +0200 Subject: [PATCH 245/321] feat: New error: Repo forking failed as we got TLS handshake timeout talking to GitLab CEE I0910 13:42:29.558797 22743 logging.go:30] DEBUG Forking repository https://gitlab.cee.redhat.com/jhutar/nodejs-devfile-sample5 with suffix undef-fork-jhutar-tenant to jhutar I0910 13:42:39.894089 22743 logging.go:64] FAIL(80): Repo forking failed: Error getting project jhutar/nodejs-devfile-sample5-undef-fork-jhutar-tenant: Get "https://gitlab.cee.redhat.com/api/v4/projects/jhutar%2Fnodejs-devfile-sample5-undef-fork-jhutar-tenant": net/http: TLS handshake timeout --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 297af9768e..5418b6f0d3 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -73,6 +73,7 @@ ("Repo forking failed as GitLab CEE says 500 Internal Server Error", r"Repo forking failed: Error deleting project .*: GET https://gitlab.cee.redhat.com/.*: 500 failed to parse unknown error format.*500: We're sorry, something went wrong on our end"), ("Repo forking failed as the target is still being deleted", r"Repo forking failed: Error forking project .* POST https://gitlab.cee.redhat.com.* 409 .*Project namespace name has already been taken, The project is still being deleted"), ("Repo forking failed as we got TLS handshake timeout talking to GitLab CEE", r"Repo forking failed: Error deleting project .*: Delete \"https://gitlab.cee.redhat.com/api/v4/projects/.*\": net/http: TLS handshake timeout"), + ("Repo forking failed as we got TLS handshake timeout talking to GitLab CEE", r"Repo forking failed: Error getting project [^ ]+: Get \"https://gitlab.cee.redhat.com/api/v4/projects/.*\": net/http: TLS handshake timeout"), ("Repo forking failed because gitlab.com returned 503", r"Repo forking failed: Error checking repository .*: GET https://api.github.com/repos/.*: 503 No server is currently available to service your request. Sorry about that. Please try resubmitting your request and contact us if the problem persists.*"), ("Repo forking failed because import failed", r"Repo forking failed: Error waiting for project [^ ]+ .ID: [0-9]+. fork to complete: Forking of project [^ ]+ .ID: [0-9]+. failed with import status: failed"), ("Repo forking failed when deleting target repo on github.com because 504", r"Repo forking failed: Error deleting repository .*: DELETE https://api.github.com/repos/.*: 504 We couldn't respond to your request in time. Sorry about that. Please try resubmitting your request and contact us if the problem persists."), From eb13fedf7106386941c1d454664d000273b882bb Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Thu, 11 Sep 2025 13:22:50 +0200 Subject: [PATCH 246/321] feat: Updated error: Invalid reference when processing SBOM collected-data/konflux-perfscale-tenant/1/pod-undef-app-axfli-comp-0-on-push-gdn99-upload-to-quay-pod-step-upload-sbom-to-quay.log Error: could not parse reference: quay.io/konflux-fedora/konflux-perfscale-tenant/undef-app-axfli-comp-0:on-pr-e2a47b93fdfdcc161967c73303b30365904523b0.nvr-libecpg-16.4-3.fc44@ main.go:74: error during command execution: could not parse reference: quay.io/konflux-fedora/konflux-perfscale-tenant/undef-app-axfli-comp-0:on-pr-e2a47b93fdfdcc161967c73303b30365904523b0.nvr-libecpg-16.4-3.fc44@ --- tests/load-tests/errors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 5418b6f0d3..0f270baa29 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -159,7 +159,7 @@ ("Gateway Time-out when pulling container image", r"Error: copying system image from manifest list: parsing image configuration: fetching blob: received unexpected HTTP status: 504 Gateway Time-out"), ("Getting repo tags from quay.io failed because of 502 Bad Gateway", r"Error determining repository tags: pinging container registry quay.io: received unexpected HTTP status: 502 Bad Gateway"), ("Introspection failed because of incomplete .docker/config.json", r".* level=fatal msg=\"Error parsing image name .*: getting username and password: reading JSON file .*/tekton/home/.docker/config.json.*: unmarshaling JSON at .*: unexpected end of JSON input\""), - ("Invalid reference when processing SBOM", r"SBOM .* error during command execution: could not parse reference: quay.io/redhat-user-workloads/[^ ]+"), + ("Invalid reference when processing SBOM", r"SBOM .* error during command execution: could not parse reference: quay.io/[^ ]+"), ("No podman installed on a MPC VM", r"remote_cmd podman unshare setfacl .* \+ ssh -o StrictHostKeyChecking=no [^ ]+ podman unshare setfacl .* bash: line 1: podman: command not found"), # KONFLUX-9944 ("Release failed because unauthorized when pulling policy", r"Error: pulling policy: GET .https://quay.io/v2/konflux-ci/konflux-vanguard/data-acceptable-bundles/blobs/sha256:[0-9a-z]+.: response status code 401: Unauthorized"), ("Release failed because unauthorized when pushing artifact", r"Prepared artifact from /var/workdir/release .* Token not found for quay.io/konflux-ci/release-service-trusted-artifacts Uploading [0-9a-z]+ sourceDataArtifact Error response from registry: unauthorized: access to the requested resource is not authorized: map.. Command exited with non-zero status 1"), From 8e922d37fdd4f0f37836a8ab07165da797bca6ed Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Mon, 15 Sep 2025 09:50:37 +0200 Subject: [PATCH 247/321] feat: Also show number of successful runs when computing KPI --- tests/load-tests/evaluate.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/load-tests/evaluate.py b/tests/load-tests/evaluate.py index 8d16b1a4d6..4c6efb2310 100755 --- a/tests/load-tests/evaluate.py +++ b/tests/load-tests/evaluate.py @@ -41,6 +41,11 @@ "validateReleaseCondition", ] +# These metrics will be ignored if running on non-CI cluster +METRICS_CI = [ + "HandleUser", +] + # These metrics will be ignored if ITS was skipped METRICS_ITS = [ "createIntegrationTestScenario", @@ -118,6 +123,9 @@ def main(): # Determine what metrics we need to skip based on options METRICS_to_skip = [] + if options["Stage"]: + print("NOTE: Ignoring CI cluster related metrics because running against non-CI cluster") + METRICS_to_skip += METRICS_CI if options["TestScenarioGitURL"] == "": print("NOTE: Ignoring ITS related metrics because they were disabled at test run") METRICS_to_skip += METRICS_ITS @@ -154,6 +162,7 @@ def main(): stats = {} kpi_mean = 0.0 + kpi_successes = sys.maxsize kpi_errors = 0 for m in [m for m in METRICS if m not in METRICS_to_skip]: @@ -173,6 +182,9 @@ def main(): else: kpi_mean += stats[m]["pass"]["duration"]["mean"] + if stats[m]["pass"]["duration"]["samples"] < kpi_successes: + kpi_successes = stats[m]["pass"]["duration"]["samples"] + if stats[m]["pass"]["duration"]["samples"] == 0: if kpi_errors == 0: kpi_errors += 1 @@ -187,12 +199,14 @@ def main(): stats["KPI"] = {} stats["KPI"]["mean"] = kpi_mean + stats["KPI"]["successes"] = kpi_successes stats["KPI"]["errors"] = kpi_errors #print("Final stats:") #print(json.dumps(stats, indent=4)) print(f"KPI mean: {stats['KPI']['mean']}") + print(f"KPI successes: {stats['KPI']['successes']}") print(f"KPI errors: {stats['KPI']['errors']}") with open(output_file, "w") as fp: From 6b08d058fba2c3d19bfbdbb71aa30ffa5aeb8b6d Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Mon, 15 Sep 2025 09:57:59 +0200 Subject: [PATCH 248/321] feat: New error: Repo templating failed when updating file on github.com because 502 I0913 14:45:30.932512 46416 logging.go:30] DEBUG Repo-templating workflow: Cleaned up (second cleanup) for jhutar-1-tenant/undef-app-rcinq/undef-app-rcinq-comp-0 I0913 14:45:41.367355 46416 logging.go:64] FAIL(64): Repo-templating workflow component cleanup failed: Error templating PaC files: Failed to update file .tekton/undef-app-rcinq-comp-0-pull-request.yaml in repo libecpg-srcfedora-fork-undef-jhutar-1-tenant revision main: error when updating a file on github: PUT https://api.github.com/repos/rhtap-perf-test/libecpg-srcfedora-fork-undef-jhutar-1-tenant/contents/.tekton/undef-app-rcinq-comp-0-pull-request.yaml: 502 Server Error [] --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 0f270baa29..54b49ba3b3 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -79,6 +79,7 @@ ("Repo forking failed when deleting target repo on github.com because 504", r"Repo forking failed: Error deleting repository .*: DELETE https://api.github.com/repos/.*: 504 We couldn't respond to your request in time. Sorry about that. Please try resubmitting your request and contact us if the problem persists."), ("Repo forking failed when deleting target repo on gitlab.com (not CEE!) due unathorized", r"Repo forking failed: Error deleting project .* DELETE https://gitlab.com/.* 401 .* Unauthorized"), ("Repo templating failed when updating file on github.com because 500", r"Repo-templating workflow component cleanup failed: Error templating PaC files: Failed to update file .tekton/[^ ]+.yaml in repo [^ ]+ revision main: error when updating a file on github: PUT https://api.github.com/repos/[^ ]+: 500"), + ("Repo templating failed when updating file on github.com because 502", r"Repo-templating workflow component cleanup failed: Error templating PaC files: Failed to update file .tekton/[^ ]+.yaml in repo [^ ]+ revision main: error when updating a file on github: PUT https://api.github.com/repos/[^ ]+: 502 Server Error"), ("Repo templating failed when updating file on github.com because 504", r"Repo-templating workflow component cleanup failed: Error templating PaC files: Failed to update file .tekton/[^ ]+.yaml in repo [^ ]+ revision main: error when updating a file on github: PUT https://api.github.com/repos/[^ ]+: 504 We couldn't respond to your request in time. Sorry about that. Please try resubmitting your request and contact us if the problem persists."), ("Test Pipeline failed", r"Test Pipeline Run failed run:.*Message:Tasks Completed: [0-9]+ \(Failed: [1-9]+,"), ("Timeout creating application calling mapplication.kb.io webhook", r"Application failed creation: Unable to create the Application [^ ]+: Internal error occurred: failed calling webhook .mapplication.kb.io.: failed to call webhook: Post .https://application-service-webhook-service.application-service.svc:443/mutate-appstudio-redhat-com-v1alpha1-application[^ ]+.: context deadline exceeded"), From 939718dc93cf9814a3cc97c14e9cba575a177081 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Tue, 16 Sep 2025 07:46:12 +0200 Subject: [PATCH 249/321] refactor(KONFLUX-10143): break import cycle between journey and logging The `logging.Measure()` function requires access to the `ThreadIndex` field of the `MainContext` struct for more detailed logging. Attempting to add a type assertion for `*journey.MainContext` within the `logging` package created an import cycle, as the `journey` package already depends on `logging`. To resolve this, a new `pkg/types` package has been introduced to hold shared context structs (`MainContext`, `PerApplicationContext`, `PerComponentContext`). Both the `journey` and `logging` packages now import `types`, breaking the circular dependency. All references to these context structs have been updated throughout the codebase to use the new `types` package. Generated-By: Gemini --- tests/load-tests/loadtest.go | 7 ++- .../pkg/journey/handle_applications.go | 3 +- .../pkg/journey/handle_collections.go | 10 ++-- .../pkg/journey/handle_component.go | 3 +- .../handle_integration_test_scenarios.go | 3 +- .../journey/handle_persistent_volume_claim.go | 3 +- .../load-tests/pkg/journey/handle_pipeline.go | 3 +- .../pkg/journey/handle_releases_run.go | 4 +- .../pkg/journey/handle_releases_setup.go | 4 +- .../pkg/journey/handle_repo_templating.go | 3 +- .../load-tests/pkg/journey/handle_test_run.go | 3 +- tests/load-tests/pkg/journey/handle_users.go | 7 ++- tests/load-tests/pkg/journey/journey.go | 58 +++---------------- tests/load-tests/pkg/logging/time_and_log.go | 10 +++- tests/load-tests/pkg/types/types.go | 48 +++++++++++++++ 15 files changed, 95 insertions(+), 74 deletions(-) create mode 100644 tests/load-tests/pkg/types/types.go diff --git a/tests/load-tests/loadtest.go b/tests/load-tests/loadtest.go index 2220d16c5c..36f5f1c85b 100644 --- a/tests/load-tests/loadtest.go +++ b/tests/load-tests/loadtest.go @@ -6,6 +6,7 @@ import "time" import journey "github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/journey" import options "github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/options" import logging "github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/logging" +import types "github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/types" import cobra "github.com/spf13/cobra" import klog "k8s.io/klog/v2" @@ -134,7 +135,7 @@ func main() { } // Single user journey -func perUserThread(threadCtx *journey.MainContext) { +func perUserThread(threadCtx *types.MainContext) { defer threadCtx.ThreadsWG.Done() time.Sleep(threadCtx.StartupPause) @@ -244,7 +245,7 @@ func perUserThread(threadCtx *journey.MainContext) { } // Single application journey (there can be multiple parallel apps per user) -func perApplicationThread(perApplicationCtx *journey.PerApplicationContext) { +func perApplicationThread(perApplicationCtx *types.PerApplicationContext) { defer perApplicationCtx.PerApplicationWG.Done() defer func() { _, err := logging.Measure(journey.HandlePerApplicationCollection, perApplicationCtx) @@ -294,7 +295,7 @@ func perApplicationThread(perApplicationCtx *journey.PerApplicationContext) { } // Single component journey (there can be multiple parallel comps per app) -func perComponentThread(perComponentCtx *journey.PerComponentContext) { +func perComponentThread(perComponentCtx *types.PerComponentContext) { defer perComponentCtx.PerComponentWG.Done() defer func() { _, err := logging.Measure(journey.HandlePerComponentCollection, perComponentCtx) diff --git a/tests/load-tests/pkg/journey/handle_applications.go b/tests/load-tests/pkg/journey/handle_applications.go index 672b55f376..eb14c5c0bf 100644 --- a/tests/load-tests/pkg/journey/handle_applications.go +++ b/tests/load-tests/pkg/journey/handle_applications.go @@ -4,6 +4,7 @@ import "fmt" import "time" import logging "github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/logging" +import types "github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/types" import framework "github.com/konflux-ci/e2e-tests/pkg/framework" import utils "github.com/konflux-ci/e2e-tests/pkg/utils" @@ -34,7 +35,7 @@ func validateApplication(f *framework.Framework, name, namespace string) error { return err } -func HandleApplication(ctx *PerApplicationContext) error { +func HandleApplication(ctx *types.PerApplicationContext) error { var err error logging.Logger.Debug("Creating application %s in namespace %s", ctx.ApplicationName, ctx.ParentContext.Namespace) diff --git a/tests/load-tests/pkg/journey/handle_collections.go b/tests/load-tests/pkg/journey/handle_collections.go index 31386be6f7..4e9edfbed1 100644 --- a/tests/load-tests/pkg/journey/handle_collections.go +++ b/tests/load-tests/pkg/journey/handle_collections.go @@ -8,12 +8,12 @@ import ( "path/filepath" logging "github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/logging" + types "github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/types" framework "github.com/konflux-ci/e2e-tests/pkg/framework" -) - -import k8s_api_errors "k8s.io/apimachinery/pkg/api/errors" + k8s_api_errors "k8s.io/apimachinery/pkg/api/errors" +) func getDirName(baseDir, namespace, iteration string) string { return filepath.Join(baseDir, "collected-data", namespace, iteration) + "/" @@ -283,7 +283,7 @@ func collectReleaseRelatedJSONs(f *framework.Framework, dirPath, namespace, appN return nil } -func HandlePerApplicationCollection(ctx *PerApplicationContext) error { +func HandlePerApplicationCollection(ctx *types.PerApplicationContext) error { if ctx.ApplicationName == "" { logging.Logger.Debug("Application name not populated, so skipping per-application collections in %s", ctx.ParentContext.Namespace) return nil @@ -311,7 +311,7 @@ func HandlePerApplicationCollection(ctx *PerApplicationContext) error { return nil } -func HandlePerComponentCollection(ctx *PerComponentContext) error { +func HandlePerComponentCollection(ctx *types.PerComponentContext) error { if ctx.ComponentName == "" { logging.Logger.Debug("Component name not populated, so skipping per-component collections in %s", ctx.ParentContext.ParentContext.Namespace) return nil diff --git a/tests/load-tests/pkg/journey/handle_component.go b/tests/load-tests/pkg/journey/handle_component.go index b71e0761f8..dfdb027dcc 100644 --- a/tests/load-tests/pkg/journey/handle_component.go +++ b/tests/load-tests/pkg/journey/handle_component.go @@ -9,6 +9,7 @@ import ( "time" logging "github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/logging" + types "github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/types" constants "github.com/konflux-ci/e2e-tests/pkg/constants" @@ -316,7 +317,7 @@ func utilityRepoTemplatingComponentCleanup(f *framework.Framework, namespace, ap return nil } -func HandleComponent(ctx *PerComponentContext) error { +func HandleComponent(ctx *types.PerComponentContext) error { var err error logging.Logger.Debug("Creating component %s in namespace %s", ctx.ComponentName, ctx.ParentContext.ParentContext.Namespace) diff --git a/tests/load-tests/pkg/journey/handle_integration_test_scenarios.go b/tests/load-tests/pkg/journey/handle_integration_test_scenarios.go index 57b18438e1..a4a6c3be41 100644 --- a/tests/load-tests/pkg/journey/handle_integration_test_scenarios.go +++ b/tests/load-tests/pkg/journey/handle_integration_test_scenarios.go @@ -5,6 +5,7 @@ import ( "time" logging "github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/logging" + types "github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/types" framework "github.com/konflux-ci/e2e-tests/pkg/framework" @@ -31,7 +32,7 @@ func createIntegrationTestScenario(f *framework.Framework, namespace, name, appN return nil } -func HandleIntegrationTestScenario(ctx *PerApplicationContext) error { +func HandleIntegrationTestScenario(ctx *types.PerApplicationContext) error { if ctx.ParentContext.Opts.TestScenarioGitURL == "" { logging.Logger.Debug("Integration Test Scenario GIT not provided, not creating it") return nil diff --git a/tests/load-tests/pkg/journey/handle_persistent_volume_claim.go b/tests/load-tests/pkg/journey/handle_persistent_volume_claim.go index 4f6fa1dbb5..1a2262dde6 100644 --- a/tests/load-tests/pkg/journey/handle_persistent_volume_claim.go +++ b/tests/load-tests/pkg/journey/handle_persistent_volume_claim.go @@ -4,6 +4,7 @@ import "context" import "fmt" import logging "github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/logging" +import types "github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/types" import framework "github.com/konflux-ci/e2e-tests/pkg/framework" import metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -25,7 +26,7 @@ func collectPersistentVolumeClaims(f *framework.Framework, namespace string) err return nil } -func HandlePersistentVolumeClaim(ctx *MainContext) error { +func HandlePersistentVolumeClaim(ctx *types.MainContext) error { if !ctx.Opts.WaitPipelines { return nil // if build pipeline runs are not done yet, it does not make sense to collect PV timings } diff --git a/tests/load-tests/pkg/journey/handle_pipeline.go b/tests/load-tests/pkg/journey/handle_pipeline.go index acef97c5a5..08964dfda2 100644 --- a/tests/load-tests/pkg/journey/handle_pipeline.go +++ b/tests/load-tests/pkg/journey/handle_pipeline.go @@ -6,6 +6,7 @@ import ( "time" logging "github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/logging" + types "github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/types" framework "github.com/konflux-ci/e2e-tests/pkg/framework" @@ -109,7 +110,7 @@ func validatePipelineRunSignature(f *framework.Framework, namespace, appName, co return err } -func HandlePipelineRun(ctx *PerComponentContext) error { +func HandlePipelineRun(ctx *types.PerComponentContext) error { if !ctx.ParentContext.ParentContext.Opts.WaitPipelines { return nil } diff --git a/tests/load-tests/pkg/journey/handle_releases_run.go b/tests/load-tests/pkg/journey/handle_releases_run.go index 8d51cbe89e..6d032c3c29 100644 --- a/tests/load-tests/pkg/journey/handle_releases_run.go +++ b/tests/load-tests/pkg/journey/handle_releases_run.go @@ -5,12 +5,12 @@ import "strings" import "time" import logging "github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/logging" +import types "github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/types" import framework "github.com/konflux-ci/e2e-tests/pkg/framework" import utils "github.com/konflux-ci/e2e-tests/pkg/utils" import pipeline "github.com/tektoncd/pipeline/pkg/apis/pipeline/v1" - // Wait for Release CR to be created func validateReleaseCreation(f *framework.Framework, namespace, snapshotName string) (string, error) { logging.Logger.Debug("Waiting for release for snapshot %s in namespace %s to be created", snapshotName, namespace) @@ -137,7 +137,7 @@ func validateReleaseCondition(f *framework.Framework, namespace, releaseName str } -func HandleReleaseRun(ctx *PerComponentContext) error { +func HandleReleaseRun(ctx *types.PerComponentContext) error { if ctx.ParentContext.ParentContext.Opts.ReleasePolicy == "" || !ctx.ParentContext.ParentContext.Opts.WaitRelease { logging.Logger.Info("Skipping waiting for releases because policy was not provided or waiting was disabled") return nil diff --git a/tests/load-tests/pkg/journey/handle_releases_setup.go b/tests/load-tests/pkg/journey/handle_releases_setup.go index abd71161fa..f5ddcb1a20 100644 --- a/tests/load-tests/pkg/journey/handle_releases_setup.go +++ b/tests/load-tests/pkg/journey/handle_releases_setup.go @@ -4,6 +4,7 @@ import "fmt" import "time" import logging "github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/logging" +import types "github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/types" import framework "github.com/konflux-ci/e2e-tests/pkg/framework" import meta "k8s.io/apimachinery/pkg/api/meta" @@ -12,7 +13,6 @@ import releaseApi "github.com/konflux-ci/release-service/api/v1alpha1" import tektonutils "github.com/konflux-ci/release-service/tekton/utils" import utils "github.com/konflux-ci/e2e-tests/pkg/utils" - // Create ReleasePlan CR func createReleasePlan(f *framework.Framework, namespace, appName string) (string, error) { name := appName + "-rp" @@ -129,7 +129,7 @@ func validateReleasePlanAdmission(f *framework.Framework, namespace, name string } -func HandleReleaseSetup(ctx *PerApplicationContext) error { +func HandleReleaseSetup(ctx *types.PerApplicationContext) error { if ctx.ParentContext.Opts.ReleasePolicy == "" { logging.Logger.Info("Skipping setting up releases because policy was not provided") return nil diff --git a/tests/load-tests/pkg/journey/handle_repo_templating.go b/tests/load-tests/pkg/journey/handle_repo_templating.go index 3b0f80bc33..08a7091b36 100644 --- a/tests/load-tests/pkg/journey/handle_repo_templating.go +++ b/tests/load-tests/pkg/journey/handle_repo_templating.go @@ -6,6 +6,7 @@ import "regexp" import "time" import logging "github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/logging" +import types "github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/types" import framework "github.com/konflux-ci/e2e-tests/pkg/framework" import github "github.com/google/go-github/v44/github" @@ -230,7 +231,7 @@ func templateFiles(f *framework.Framework, repoUrl, repoRevision, sourceRepo, so return shaMap, nil } -func HandleRepoForking(ctx *MainContext) error { +func HandleRepoForking(ctx *types.MainContext) error { var suffix string if ctx.Opts.Stage { suffix = ctx.Opts.RunPrefix + "-" + ctx.Namespace diff --git a/tests/load-tests/pkg/journey/handle_test_run.go b/tests/load-tests/pkg/journey/handle_test_run.go index 55390f66f2..86bae69ffc 100644 --- a/tests/load-tests/pkg/journey/handle_test_run.go +++ b/tests/load-tests/pkg/journey/handle_test_run.go @@ -5,6 +5,7 @@ import "strings" import "time" import logging "github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/logging" +import types "github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/types" import appstudioApi "github.com/konflux-ci/application-api/api/v1alpha1" import framework "github.com/konflux-ci/e2e-tests/pkg/framework" @@ -95,7 +96,7 @@ func validateTestPipelineRunCondition(f *framework.Framework, namespace, itsName return err } -func HandleTest(ctx *PerComponentContext) error { +func HandleTest(ctx *types.PerComponentContext) error { if !ctx.ParentContext.ParentContext.Opts.WaitPipelines || !ctx.ParentContext.ParentContext.Opts.WaitIntegrationTestsPipelines { return nil } diff --git a/tests/load-tests/pkg/journey/handle_users.go b/tests/load-tests/pkg/journey/handle_users.go index 33770121e7..d3c5f46128 100644 --- a/tests/load-tests/pkg/journey/handle_users.go +++ b/tests/load-tests/pkg/journey/handle_users.go @@ -5,6 +5,7 @@ import "time" import "strings" import logging "github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/logging" +import types "github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/types" import loadtestutils "github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/loadtestutils" import "github.com/konflux-ci/e2e-tests/pkg/framework" @@ -35,7 +36,7 @@ func provisionFramework(stageUsers []loadtestutils.User, threadIndex int, userna return f, f.UserNamespace, nil } -func HandleUser(ctx *MainContext) error { +func HandleUser(ctx *types.MainContext) error { var err error if ctx.Opts.Stage { @@ -57,7 +58,7 @@ func HandleUser(ctx *MainContext) error { return nil } -func HandleNewFrameworkForApp(ctx *PerApplicationContext) error { +func HandleNewFrameworkForApp(ctx *types.PerApplicationContext) error { var err error ctx.Framework, _, err = provisionFramework( @@ -73,7 +74,7 @@ func HandleNewFrameworkForApp(ctx *PerApplicationContext) error { return nil } -func HandleNewFrameworkForComp(ctx *PerComponentContext) error { +func HandleNewFrameworkForComp(ctx *types.PerComponentContext) error { var err error ctx.Framework, _, err = provisionFramework( diff --git a/tests/load-tests/pkg/journey/journey.go b/tests/load-tests/pkg/journey/journey.go index 35d0499e8a..40cd2834cd 100644 --- a/tests/load-tests/pkg/journey/journey.go +++ b/tests/load-tests/pkg/journey/journey.go @@ -8,30 +8,15 @@ import "math/rand" import options "github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/options" import logging "github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/logging" import loadtestutils "github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/loadtestutils" +import types "github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/types" -import framework "github.com/konflux-ci/e2e-tests/pkg/framework" import util "github.com/devfile/library/v2/pkg/util" // Pointers to all user journey thread contexts -var MainContexts []*MainContext - -// Struct to hold user journey thread data -type MainContext struct { - ThreadsWG *sync.WaitGroup - ThreadIndex int - StartupPause time.Duration - JourneyRepeatsCounter int - Opts *options.Opts - StageUsers *[]loadtestutils.User - Framework *framework.Framework - Username string - Namespace string - ComponentRepoUrl string // overrides same value from Opts, needed when templating repos - PerApplicationContexts []*PerApplicationContext -} +var MainContexts []*types.MainContext // Just to create user -func initUserThread(threadCtx *MainContext) { +func initUserThread(threadCtx *types.MainContext) { defer threadCtx.ThreadsWG.Done() var err error @@ -59,7 +44,7 @@ func computeStartupPause(index int, delay, jitter time.Duration) time.Duration { // Start all the user journey threads // TODO split this to two functions and get PurgeOnly code out -func Setup(fn func(*MainContext), opts *options.Opts) (string, error) { +func Setup(fn func(*types.MainContext), opts *options.Opts) (string, error) { threadsWG := &sync.WaitGroup{} threadsWG.Add(opts.Concurrency) @@ -78,7 +63,7 @@ func Setup(fn func(*MainContext), opts *options.Opts) (string, error) { logging.Logger.Info("Initiating per user thread %d with pause %v", threadIndex, startupPause) - threadCtx := &MainContext{ + threadCtx := &types.MainContext{ ThreadsWG: threadsWG, ThreadIndex: threadIndex, StartupPause: startupPause, @@ -124,20 +109,8 @@ func Setup(fn func(*MainContext), opts *options.Opts) (string, error) { return "", nil } -// Struct to hold data for thread to process each application -type PerApplicationContext struct { - PerApplicationWG *sync.WaitGroup - ApplicationIndex int - StartupPause time.Duration - Framework *framework.Framework - ParentContext *MainContext - ApplicationName string - IntegrationTestScenarioName string - PerComponentContexts []*PerComponentContext -} - // Start all the threads to process all applications per user -func PerApplicationSetup(fn func(*PerApplicationContext), parentContext *MainContext) (string, error) { +func PerApplicationSetup(fn func(*types.PerApplicationContext), parentContext *types.MainContext) (string, error) { perApplicationWG := &sync.WaitGroup{} perApplicationWG.Add(parentContext.Opts.ApplicationsCount) @@ -146,7 +119,7 @@ func PerApplicationSetup(fn func(*PerApplicationContext), parentContext *MainCon logging.Logger.Info("Initiating per application thread %d-%d with pause %v", parentContext.ThreadIndex, applicationIndex, startupPause) - perApplicationCtx := &PerApplicationContext{ + perApplicationCtx := &types.PerApplicationContext{ PerApplicationWG: perApplicationWG, ApplicationIndex: applicationIndex, StartupPause: startupPause, @@ -164,21 +137,8 @@ func PerApplicationSetup(fn func(*PerApplicationContext), parentContext *MainCon return "", nil } -// Struct to hold data for thread to process each component -type PerComponentContext struct { - PerComponentWG *sync.WaitGroup - ComponentIndex int - StartupPause time.Duration - Framework *framework.Framework - ParentContext *PerApplicationContext - ComponentName string - SnapshotName string - MergeRequestNumber int - ReleaseName string -} - // Start all the threads to process all components per application -func PerComponentSetup(fn func(*PerComponentContext), parentContext *PerApplicationContext) (string, error) { +func PerComponentSetup(fn func(*types.PerComponentContext), parentContext *types.PerApplicationContext) (string, error) { perComponentWG := &sync.WaitGroup{} perComponentWG.Add(parentContext.ParentContext.Opts.ComponentsCount) @@ -187,7 +147,7 @@ func PerComponentSetup(fn func(*PerComponentContext), parentContext *PerApplicat logging.Logger.Info("Initiating per component thread %d-%d-%d with pause %s", parentContext.ParentContext.ThreadIndex, parentContext.ApplicationIndex, componentIndex, startupPause) - perComponentCtx := &PerComponentContext{ + perComponentCtx := &types.PerComponentContext{ PerComponentWG: perComponentWG, ComponentIndex: componentIndex, StartupPause: startupPause, diff --git a/tests/load-tests/pkg/logging/time_and_log.go b/tests/load-tests/pkg/logging/time_and_log.go index 9aba84f0db..bd1abecef2 100644 --- a/tests/load-tests/pkg/logging/time_and_log.go +++ b/tests/load-tests/pkg/logging/time_and_log.go @@ -11,11 +11,13 @@ import "os" import "encoding/csv" import "sync" +import "github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/types" + var measurementsQueue chan MeasurementEntry // channel to send measurements to -var errorsQueue chan ErrorEntry // chanel to send failures to +var errorsQueue chan ErrorEntry // chanel to send failures to var measurementsOutput string // path to CSV where to save measurements -var errorsOutput string // path to CSV where to save measurements +var errorsOutput string // path to CSV where to save measurements var writerWaitGroup sync.WaitGroup @@ -47,7 +49,6 @@ func (e *ErrorEntry) GetSliceOfStrings() []string { return []string{e.Timestamp.Format(time.RFC3339Nano), fmt.Sprintf("%d", e.Code), e.Message} } - // Initialize channels and start functions that are processing records func MeasurementsStart(directory string) { batchSize = 3 @@ -183,6 +184,9 @@ func Measure(fn interface{}, params ...interface{}) (interface{}, error) { for i := 0; i < numParams; i++ { x := 1 key := fmt.Sprintf("%v", reflect.TypeOf(params[i])) + if casted, ok := params[i].(*types.MainContext); ok { + fmt.Printf(">>> %s --- %s --- %v \n", runtime.FuncForPC(funcValue.Pointer()).Name(), key, casted.ThreadIndex) + } value := fmt.Sprintf("%+v", reflect.ValueOf(params[i])) for { keyFull := key + fmt.Sprint(x) diff --git a/tests/load-tests/pkg/types/types.go b/tests/load-tests/pkg/types/types.go new file mode 100644 index 0000000000..219d523178 --- /dev/null +++ b/tests/load-tests/pkg/types/types.go @@ -0,0 +1,48 @@ +package types + +import "sync" +import "time" + +import framework "github.com/konflux-ci/e2e-tests/pkg/framework" +import loadtestutils "github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/loadtestutils" +import options "github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/options" + +// Struct to hold user journey thread data +type MainContext struct { + ThreadsWG *sync.WaitGroup + ThreadIndex int + StartupPause time.Duration + JourneyRepeatsCounter int + Opts *options.Opts + StageUsers *[]loadtestutils.User + Framework *framework.Framework + Username string + Namespace string + ComponentRepoUrl string // overrides same value from Opts, needed when templating repos + PerApplicationContexts []*PerApplicationContext +} + +// Struct to hold data for thread to process each application +type PerApplicationContext struct { + PerApplicationWG *sync.WaitGroup + ApplicationIndex int + StartupPause time.Duration + Framework *framework.Framework + ParentContext *MainContext + ApplicationName string + IntegrationTestScenarioName string + PerComponentContexts []*PerComponentContext +} + +// Struct to hold data for thread to process each component +type PerComponentContext struct { + PerComponentWG *sync.WaitGroup + ComponentIndex int + StartupPause time.Duration + Framework *framework.Framework + ParentContext *PerApplicationContext + ComponentName string + SnapshotName string + MergeRequestNumber int + ReleaseName string +} From 9fd488d00b12356a9912c7367a781b36a456f003 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Tue, 16 Sep 2025 08:50:06 +0200 Subject: [PATCH 250/321] chore: Make git to ignore few more files --- tests/load-tests/.gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/load-tests/.gitignore b/tests/load-tests/.gitignore index 1666e0ae3e..841c3cd016 100644 --- a/tests/load-tests/.gitignore +++ b/tests/load-tests/.gitignore @@ -10,5 +10,8 @@ users*.json loadtest OLD/ +run-*/ videos/ collected-data/ +mystoneinst/ +secrets/ From 7a77aeb6d57c281066884a0e0da5cd846339a5e2 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Tue, 16 Sep 2025 08:52:39 +0200 Subject: [PATCH 251/321] feat: Get thread and repeats info from params, but looks like it is not sufficient --- .../journey/handle_persistent_volume_claim.go | 2 +- tests/load-tests/pkg/logging/time_and_log.go | 61 ++++++++++++++----- 2 files changed, 46 insertions(+), 17 deletions(-) diff --git a/tests/load-tests/pkg/journey/handle_persistent_volume_claim.go b/tests/load-tests/pkg/journey/handle_persistent_volume_claim.go index 1a2262dde6..4d2ec71c19 100644 --- a/tests/load-tests/pkg/journey/handle_persistent_volume_claim.go +++ b/tests/load-tests/pkg/journey/handle_persistent_volume_claim.go @@ -21,7 +21,7 @@ func collectPersistentVolumeClaims(f *framework.Framework, namespace string) err continue } waittime := (pv.ObjectMeta.CreationTimestamp.Time).Sub(pvc.ObjectMeta.CreationTimestamp.Time) - logging.LogMeasurement("PVC_to_PV_CreationTimestamp", map[string]string{"pv.Name": pv.Name}, waittime, "", nil) + logging.LogMeasurement("PVC_to_PV_CreationTimestamp", -1, -1, -1, -1, map[string]string{"pv.Name": pv.Name}, waittime, "", nil) } return nil } diff --git a/tests/load-tests/pkg/logging/time_and_log.go b/tests/load-tests/pkg/logging/time_and_log.go index bd1abecef2..11b965f713 100644 --- a/tests/load-tests/pkg/logging/time_and_log.go +++ b/tests/load-tests/pkg/logging/time_and_log.go @@ -25,16 +25,20 @@ var batchSize int // when we accumulate this many of records, we dump them to CS // Represents the data about measurement we want to store to CSV type MeasurementEntry struct { - Timestamp time.Time - Metric string - Duration time.Duration - Parameters string - Error error + Timestamp time.Time + PerUserId int + PerAppId int + PerCompId int + RepeatsCounter int + Metric string + Duration time.Duration + Parameters string + Error error } // Helper function to convert struct to slice of string which is needed when converting to CSV func (e *MeasurementEntry) GetSliceOfStrings() []string { - return []string{e.Timestamp.Format(time.RFC3339Nano), e.Metric, fmt.Sprintf("%f", e.Duration.Seconds()), e.Parameters, fmt.Sprintf("%v", e.Error)} + return []string{e.Timestamp.Format(time.RFC3339Nano), fmt.Sprintf("%d", e.PerUserId), fmt.Sprintf("%d", e.PerAppId), fmt.Sprintf("%d", e.PerCompId), fmt.Sprintf("%d", e.RepeatsCounter), e.Metric, fmt.Sprintf("%f", e.Duration.Seconds()), e.Parameters, fmt.Sprintf("%v", e.Error)} } // Represents the data about failure we want to store to CSV @@ -168,6 +172,10 @@ func errorsWriter() { // can be generalized completely, but it is good enough for our needs. func Measure(fn interface{}, params ...interface{}) (interface{}, error) { funcValue := reflect.ValueOf(fn) + perUserId := -1 + perAppId := -1 + perCompId := -1 + repeatsCounter := -1 // Construct arguments for the function call numParams := len(params) @@ -183,11 +191,28 @@ func Measure(fn interface{}, params ...interface{}) (interface{}, error) { paramsStorable := make(map[string]string) for i := 0; i < numParams; i++ { x := 1 - key := fmt.Sprintf("%v", reflect.TypeOf(params[i])) + + // If the parameter we are processing now is per user/app/comp + // context, extract additional metadata about this function call. if casted, ok := params[i].(*types.MainContext); ok { - fmt.Printf(">>> %s --- %s --- %v \n", runtime.FuncForPC(funcValue.Pointer()).Name(), key, casted.ThreadIndex) + perUserId = casted.ThreadIndex + repeatsCounter = casted.JourneyRepeatsCounter + } + if casted, ok := params[i].(*types.PerApplicationContext); ok { + perUserId = casted.ParentContext.ThreadIndex + perAppId = casted.ApplicationIndex + repeatsCounter = casted.ParentContext.JourneyRepeatsCounter + } + if casted, ok := params[i].(*types.PerComponentContext); ok { + perUserId = casted.ParentContext.ParentContext.ThreadIndex + perAppId = casted.ParentContext.ApplicationIndex + perCompId = casted.ComponentIndex + repeatsCounter = casted.ParentContext.ParentContext.JourneyRepeatsCounter } + + key := fmt.Sprintf("%v", reflect.TypeOf(params[i])) value := fmt.Sprintf("%+v", reflect.ValueOf(params[i])) + for { keyFull := key + fmt.Sprint(x) if _, ok := paramsStorable[keyFull]; !ok { @@ -208,7 +233,7 @@ func Measure(fn interface{}, params ...interface{}) (interface{}, error) { defer func() { elapsed := time.Since(startTime) - LogMeasurement(funcName, paramsStorable, elapsed, fmt.Sprintf("%+v", resultInterValue), errInterValue) + LogMeasurement(funcName, perUserId, perAppId, perCompId, repeatsCounter, paramsStorable, elapsed, fmt.Sprintf("%+v", resultInterValue), errInterValue) }() // Call the function with provided arguments @@ -228,7 +253,7 @@ func Measure(fn interface{}, params ...interface{}) (interface{}, error) { } // Store given measurement -func LogMeasurement(metric string, params map[string]string, elapsed time.Duration, result string, err error) { +func LogMeasurement(metric string, perUserId, perAppId, perCompId, repeatsCounter int, params map[string]string, elapsed time.Duration, result string, err error) { // Extract parameter keys into a slice so we can sort them var paramsKeys []string for k := range params { @@ -249,13 +274,17 @@ func LogMeasurement(metric string, params map[string]string, elapsed time.Durati } params_string = strings.TrimLeft(params_string, " ") - Logger.Trace("Measured function: %s, Duration: %s, Params: %s, Result: %s, Error: %v\n", metric, elapsed, params_string, result, err) + Logger.Trace("Measured function: %s, Thread: %d/%d/%d, Repeat: %d, Duration: %s, Params: %s, Result: %s, Error: %v\n", metric, perUserId, perAppId, perCompId, repeatsCounter, elapsed, params_string, result, err) data := MeasurementEntry{ - Timestamp: time.Now(), - Metric: metric, - Duration: elapsed, - Parameters: params_string, - Error: err, + Timestamp: time.Now(), + Metric: metric, + PerUserId: perUserId, + PerAppId: perAppId, + PerCompId: perCompId, + RepeatsCounter: repeatsCounter, + Duration: elapsed, + Parameters: params_string, + Error: err, } measurementsQueue <- data } From b3b292d8e950e359904089eb6609b4cc39d4a542 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Tue, 16 Sep 2025 09:18:53 +0200 Subject: [PATCH 252/321] feat(KONFLUX-10143): Specify threads info when calling Measure function --- tests/load-tests/loadtest.go | 146 ++++++++++++++++-- .../pkg/journey/handle_applications.go | 8 + .../pkg/journey/handle_component.go | 20 +++ .../handle_integration_test_scenarios.go | 4 + .../load-tests/pkg/journey/handle_pipeline.go | 12 ++ .../pkg/journey/handle_releases_run.go | 16 ++ .../pkg/journey/handle_releases_setup.go | 16 ++ .../load-tests/pkg/journey/handle_test_run.go | 12 ++ tests/load-tests/pkg/journey/journey.go | 18 ++- tests/load-tests/pkg/logging/time_and_log.go | 26 +--- 10 files changed, 235 insertions(+), 43 deletions(-) diff --git a/tests/load-tests/loadtest.go b/tests/load-tests/loadtest.go index 36f5f1c85b..6b03b440fa 100644 --- a/tests/load-tests/loadtest.go +++ b/tests/load-tests/loadtest.go @@ -119,13 +119,27 @@ func main() { logging.MeasurementsStart(opts.OutputDir) // Start given number of `perUserThread()` threads using `journey.Setup()` and wait for them to finish - _, err = logging.Measure(journey.Setup, perUserThread, &opts) + _, err = logging.Measure( + -1, + -1, + -1, + -1, + journey.Setup, + perUserThread, + &opts, + ) if err != nil { logging.Logger.Fatal("Threads setup failed: %v", err) } // Cleanup resources - _, err = logging.Measure(journey.Purge) + _, err = logging.Measure( + -1, + -1, + -1, + -1, + journey.Purge, + ) if err != nil { logging.Logger.Error("Purging failed: %v", err) } @@ -222,7 +236,15 @@ func perUserThread(threadCtx *types.MainContext) { for threadCtx.JourneyRepeatsCounter = 1; threadCtx.JourneyRepeatsCounter <= threadCtx.Opts.JourneyRepeats; threadCtx.JourneyRepeatsCounter++ { // Start given number of `perApplicationThread()` threads using `journey.PerApplicationSetup()` and wait for them to finish - _, err = logging.Measure(journey.PerApplicationSetup, perApplicationThread, threadCtx) + _, err = logging.Measure( + threadCtx.ThreadIndex, + -1, + -1, + threadCtx.JourneyRepeatsCounter, + journey.PerApplicationSetup, + perApplicationThread, + threadCtx, + ) if err != nil { logging.Logger.Fatal("Per application threads setup failed: %v", err) } @@ -236,7 +258,14 @@ func perUserThread(threadCtx *types.MainContext) { } // Collect info about PVCs - _, err = logging.Measure(journey.HandlePersistentVolumeClaim, threadCtx) + _, err = logging.Measure( + threadCtx.ThreadIndex, + -1, + -1, + threadCtx.JourneyRepeatsCounter, + journey.HandlePersistentVolumeClaim, + threadCtx, + ) if err != nil { logging.Logger.Error("Thread failed: %v", err) return @@ -248,7 +277,14 @@ func perUserThread(threadCtx *types.MainContext) { func perApplicationThread(perApplicationCtx *types.PerApplicationContext) { defer perApplicationCtx.PerApplicationWG.Done() defer func() { - _, err := logging.Measure(journey.HandlePerApplicationCollection, perApplicationCtx) + _, err := logging.Measure( + perApplicationCtx.ParentContext.ThreadIndex, + perApplicationCtx.ApplicationIndex, + -1, + perApplicationCtx.ParentContext.JourneyRepeatsCounter, + journey.HandlePerApplicationCollection, + perApplicationCtx, + ) if err != nil { logging.Logger.Error("Per application thread failed: %v", err) } @@ -259,35 +295,71 @@ func perApplicationThread(perApplicationCtx *types.PerApplicationContext) { var err error // Create framework so we do not have to share framework with parent thread - _, err = logging.Measure(journey.HandleNewFrameworkForApp, perApplicationCtx) + _, err = logging.Measure( + perApplicationCtx.ParentContext.ThreadIndex, + perApplicationCtx.ApplicationIndex, + -1, + perApplicationCtx.ParentContext.JourneyRepeatsCounter, + journey.HandleNewFrameworkForApp, + perApplicationCtx, + ) if err != nil { logging.Logger.Error("Per application thread failed: %v", err) return } // Create application - _, err = logging.Measure(journey.HandleApplication, perApplicationCtx) + _, err = logging.Measure( + perApplicationCtx.ParentContext.ThreadIndex, + perApplicationCtx.ApplicationIndex, + -1, + perApplicationCtx.ParentContext.JourneyRepeatsCounter, + journey.HandleApplication, + perApplicationCtx, + ) if err != nil { logging.Logger.Error("Per application thread failed: %v", err) return } // Create integration test scenario - _, err = logging.Measure(journey.HandleIntegrationTestScenario, perApplicationCtx) + _, err = logging.Measure( + perApplicationCtx.ParentContext.ThreadIndex, + perApplicationCtx.ApplicationIndex, + -1, + perApplicationCtx.ParentContext.JourneyRepeatsCounter, + journey.HandleIntegrationTestScenario, + perApplicationCtx, + ) if err != nil { logging.Logger.Error("Per application thread failed: %v", err) return } // Create release plan and release plan admission - _, err = logging.Measure(journey.HandleReleaseSetup, perApplicationCtx) + _, err = logging.Measure( + perApplicationCtx.ParentContext.ThreadIndex, + perApplicationCtx.ApplicationIndex, + -1, + perApplicationCtx.ParentContext.JourneyRepeatsCounter, + journey.HandleReleaseSetup, + perApplicationCtx, + ) if err != nil { logging.Logger.Error("Per application thread failed: %v", err) return } // Start given number of `perComponentThread()` threads using `journey.PerComponentSetup()` and wait for them to finish - _, err = logging.Measure(journey.PerComponentSetup, perComponentThread, perApplicationCtx) + _, err = logging.Measure( + perApplicationCtx.ParentContext.ThreadIndex, + perApplicationCtx.ApplicationIndex, + -1, + perApplicationCtx.ParentContext.JourneyRepeatsCounter, + journey.PerComponentSetup, + perComponentThread, + perApplicationCtx, + ) if err != nil { logging.Logger.Fatal("Per component threads setup failed: %v", err) } @@ -298,7 +370,14 @@ func perApplicationThread(perApplicationCtx *types.PerApplicationContext) { func perComponentThread(perComponentCtx *types.PerComponentContext) { defer perComponentCtx.PerComponentWG.Done() defer func() { - _, err := logging.Measure(journey.HandlePerComponentCollection, perComponentCtx) + _, err := logging.Measure( + perComponentCtx.ParentContext.ParentContext.ThreadIndex, + perComponentCtx.ParentContext.ApplicationIndex, + perComponentCtx.ComponentIndex, + perComponentCtx.ParentContext.ParentContext.JourneyRepeatsCounter, + journey.HandlePerComponentCollection, + perComponentCtx, + ) if err != nil { logging.Logger.Error("Per component thread failed: %v", err) } @@ -309,35 +388,70 @@ func perComponentThread(perComponentCtx *types.PerComponentContext) { var err error // Create framework so we do not have to share framework with parent thread - _, err = logging.Measure(journey.HandleNewFrameworkForComp, perComponentCtx) + _, err = logging.Measure( + perComponentCtx.ParentContext.ParentContext.ThreadIndex, + perComponentCtx.ParentContext.ApplicationIndex, + perComponentCtx.ComponentIndex, + perComponentCtx.ParentContext.ParentContext.JourneyRepeatsCounter, + journey.HandleNewFrameworkForComp, + perComponentCtx, + ) if err != nil { logging.Logger.Error("Per component thread failed: %v", err) return } // Create component - _, err = logging.Measure(journey.HandleComponent, perComponentCtx) + _, err = logging.Measure( + perComponentCtx.ParentContext.ParentContext.ThreadIndex, + perComponentCtx.ParentContext.ApplicationIndex, + perComponentCtx.ComponentIndex, + perComponentCtx.ParentContext.ParentContext.JourneyRepeatsCounter, + journey.HandleComponent, + perComponentCtx, + ) if err != nil { logging.Logger.Error("Per component thread failed: %v", err) return } // Wait for build pipiline run - _, err = logging.Measure(journey.HandlePipelineRun, perComponentCtx) + _, err = logging.Measure( + perComponentCtx.ParentContext.ParentContext.ThreadIndex, + perComponentCtx.ParentContext.ApplicationIndex, + perComponentCtx.ComponentIndex, + perComponentCtx.ParentContext.ParentContext.JourneyRepeatsCounter, + journey.HandlePipelineRun, + perComponentCtx, + ) if err != nil { logging.Logger.Error("Per component thread failed: %v", err) return } // Wait for test pipiline run - _, err = logging.Measure(journey.HandleTest, perComponentCtx) + _, err = logging.Measure( + perComponentCtx.ParentContext.ParentContext.ThreadIndex, + perComponentCtx.ParentContext.ApplicationIndex, + perComponentCtx.ComponentIndex, + perComponentCtx.ParentContext.ParentContext.JourneyRepeatsCounter, + journey.HandleTest, + perComponentCtx, + ) if err != nil { logging.Logger.Error("Per component thread failed: %v", err) return } // Wait for release to finish - _, err = logging.Measure(journey.HandleReleaseRun, perComponentCtx) + _, err = logging.Measure( + perComponentCtx.ParentContext.ParentContext.ThreadIndex, + perComponentCtx.ParentContext.ApplicationIndex, + perComponentCtx.ComponentIndex, + perComponentCtx.ParentContext.ParentContext.JourneyRepeatsCounter, + journey.HandleReleaseRun, + perComponentCtx, + ) if err != nil { logging.Logger.Error("Per component thread failed: %v", err) return diff --git a/tests/load-tests/pkg/journey/handle_applications.go b/tests/load-tests/pkg/journey/handle_applications.go index eb14c5c0bf..926ee6fae0 100644 --- a/tests/load-tests/pkg/journey/handle_applications.go +++ b/tests/load-tests/pkg/journey/handle_applications.go @@ -41,6 +41,10 @@ func HandleApplication(ctx *types.PerApplicationContext) error { logging.Logger.Debug("Creating application %s in namespace %s", ctx.ApplicationName, ctx.ParentContext.Namespace) _, err = logging.Measure( + ctx.ParentContext.ThreadIndex, + ctx.ApplicationIndex, + -1, + ctx.ParentContext.JourneyRepeatsCounter, createApplication, ctx.Framework, ctx.ParentContext.Namespace, @@ -52,6 +56,10 @@ func HandleApplication(ctx *types.PerApplicationContext) error { } _, err = logging.Measure( + ctx.ParentContext.ThreadIndex, + ctx.ApplicationIndex, + -1, + ctx.ParentContext.JourneyRepeatsCounter, validateApplication, ctx.Framework, ctx.ApplicationName, diff --git a/tests/load-tests/pkg/journey/handle_component.go b/tests/load-tests/pkg/journey/handle_component.go index dfdb027dcc..2ab6b3a3ae 100644 --- a/tests/load-tests/pkg/journey/handle_component.go +++ b/tests/load-tests/pkg/journey/handle_component.go @@ -324,6 +324,10 @@ func HandleComponent(ctx *types.PerComponentContext) error { // Create component _, err = logging.Measure( + ctx.ParentContext.ParentContext.ThreadIndex, + ctx.ParentContext.ApplicationIndex, + ctx.ComponentIndex, + ctx.ParentContext.ParentContext.JourneyRepeatsCounter, createComponent, ctx.Framework, ctx.ParentContext.ParentContext.Namespace, @@ -342,6 +346,10 @@ func HandleComponent(ctx *types.PerComponentContext) error { // Validate component build service account created _, err = logging.Measure( + ctx.ParentContext.ParentContext.ThreadIndex, + ctx.ParentContext.ApplicationIndex, + ctx.ComponentIndex, + ctx.ParentContext.ParentContext.JourneyRepeatsCounter, validateComponent, ctx.Framework, ctx.ParentContext.ParentContext.Namespace, @@ -354,6 +362,10 @@ func HandleComponent(ctx *types.PerComponentContext) error { // Configure imagePullSecrets needed for component build task images if len(ctx.ParentContext.ParentContext.Opts.PipelineImagePullSecrets) > 0 { _, err = logging.Measure( + ctx.ParentContext.ParentContext.ThreadIndex, + ctx.ParentContext.ApplicationIndex, + ctx.ComponentIndex, + ctx.ParentContext.ParentContext.JourneyRepeatsCounter, configurePipelineImagePullSecrets, ctx.Framework, ctx.ParentContext.ParentContext.Namespace, @@ -367,6 +379,10 @@ func HandleComponent(ctx *types.PerComponentContext) error { var pullIface interface{} pullIface, err = logging.Measure( + ctx.ParentContext.ParentContext.ThreadIndex, + ctx.ParentContext.ApplicationIndex, + ctx.ComponentIndex, + ctx.ParentContext.ParentContext.JourneyRepeatsCounter, getPaCPullNumber, ctx.Framework, ctx.ParentContext.ParentContext.Namespace, @@ -399,6 +415,10 @@ func HandleComponent(ctx *types.PerComponentContext) error { // Skip what we do not care about, merge PR, graft pipeline yamls _, err = logging.Measure( + ctx.ParentContext.ParentContext.ThreadIndex, + ctx.ParentContext.ApplicationIndex, + ctx.ComponentIndex, + ctx.ParentContext.ParentContext.JourneyRepeatsCounter, utilityRepoTemplatingComponentCleanup, ctx.Framework, ctx.ParentContext.ParentContext.Namespace, diff --git a/tests/load-tests/pkg/journey/handle_integration_test_scenarios.go b/tests/load-tests/pkg/journey/handle_integration_test_scenarios.go index a4a6c3be41..455446065b 100644 --- a/tests/load-tests/pkg/journey/handle_integration_test_scenarios.go +++ b/tests/load-tests/pkg/journey/handle_integration_test_scenarios.go @@ -44,6 +44,10 @@ func HandleIntegrationTestScenario(ctx *types.PerApplicationContext) error { logging.Logger.Debug("Creating integration test scenario %s for application %s in namespace %s", name, ctx.ApplicationName, ctx.ParentContext.Namespace) _, err = logging.Measure( + ctx.ParentContext.ThreadIndex, + ctx.ApplicationIndex, + -1, + ctx.ParentContext.JourneyRepeatsCounter, createIntegrationTestScenario, ctx.Framework, ctx.ParentContext.Namespace, diff --git a/tests/load-tests/pkg/journey/handle_pipeline.go b/tests/load-tests/pkg/journey/handle_pipeline.go index 08964dfda2..4054cc5686 100644 --- a/tests/load-tests/pkg/journey/handle_pipeline.go +++ b/tests/load-tests/pkg/journey/handle_pipeline.go @@ -120,6 +120,10 @@ func HandlePipelineRun(ctx *types.PerComponentContext) error { logging.Logger.Debug("Waiting for build pipeline run for component %s in namespace %s to be created", ctx.ComponentName, ctx.ParentContext.ParentContext.Namespace) _, err = logging.Measure( + ctx.ParentContext.ParentContext.ThreadIndex, + ctx.ParentContext.ApplicationIndex, + ctx.ComponentIndex, + ctx.ParentContext.ParentContext.JourneyRepeatsCounter, validatePipelineRunCreation, ctx.Framework, ctx.ParentContext.ParentContext.Namespace, @@ -133,6 +137,10 @@ func HandlePipelineRun(ctx *types.PerComponentContext) error { logging.Logger.Debug("Waiting for build pipeline run for component %s in namespace %s to finish", ctx.ComponentName, ctx.ParentContext.ParentContext.Namespace) _, err = logging.Measure( + ctx.ParentContext.ParentContext.ThreadIndex, + ctx.ParentContext.ApplicationIndex, + ctx.ComponentIndex, + ctx.ParentContext.ParentContext.JourneyRepeatsCounter, validatePipelineRunCondition, ctx.Framework, ctx.ParentContext.ParentContext.Namespace, @@ -146,6 +154,10 @@ func HandlePipelineRun(ctx *types.PerComponentContext) error { logging.Logger.Debug("Waiting for build pipeline run for component %s in namespace %s to be signed", ctx.ComponentName, ctx.ParentContext.ParentContext.Namespace) _, err = logging.Measure( + ctx.ParentContext.ParentContext.ThreadIndex, + ctx.ParentContext.ApplicationIndex, + ctx.ComponentIndex, + ctx.ParentContext.ParentContext.JourneyRepeatsCounter, validatePipelineRunSignature, ctx.Framework, ctx.ParentContext.ParentContext.Namespace, diff --git a/tests/load-tests/pkg/journey/handle_releases_run.go b/tests/load-tests/pkg/journey/handle_releases_run.go index 6d032c3c29..c56a8d7355 100644 --- a/tests/load-tests/pkg/journey/handle_releases_run.go +++ b/tests/load-tests/pkg/journey/handle_releases_run.go @@ -148,6 +148,10 @@ func HandleReleaseRun(ctx *types.PerComponentContext) error { var err error iface, err = logging.Measure( + ctx.ParentContext.ParentContext.ThreadIndex, + ctx.ParentContext.ApplicationIndex, + ctx.ComponentIndex, + ctx.ParentContext.ParentContext.JourneyRepeatsCounter, validateReleaseCreation, ctx.Framework, ctx.ParentContext.ParentContext.Namespace, @@ -163,6 +167,10 @@ func HandleReleaseRun(ctx *types.PerComponentContext) error { } _, err = logging.Measure( + ctx.ParentContext.ParentContext.ThreadIndex, + ctx.ParentContext.ApplicationIndex, + ctx.ComponentIndex, + ctx.ParentContext.ParentContext.JourneyRepeatsCounter, validateReleasePipelineRunCreation, ctx.Framework, ctx.ParentContext.ParentContext.Namespace, @@ -173,6 +181,10 @@ func HandleReleaseRun(ctx *types.PerComponentContext) error { } _, err = logging.Measure( + ctx.ParentContext.ParentContext.ThreadIndex, + ctx.ParentContext.ApplicationIndex, + ctx.ComponentIndex, + ctx.ParentContext.ParentContext.JourneyRepeatsCounter, validateReleasePipelineRunCondition, ctx.Framework, ctx.ParentContext.ParentContext.Namespace, @@ -183,6 +195,10 @@ func HandleReleaseRun(ctx *types.PerComponentContext) error { } _, err = logging.Measure( + ctx.ParentContext.ParentContext.ThreadIndex, + ctx.ParentContext.ApplicationIndex, + ctx.ComponentIndex, + ctx.ParentContext.ParentContext.JourneyRepeatsCounter, validateReleaseCondition, ctx.Framework, ctx.ParentContext.ParentContext.Namespace, diff --git a/tests/load-tests/pkg/journey/handle_releases_setup.go b/tests/load-tests/pkg/journey/handle_releases_setup.go index f5ddcb1a20..ff1cbf603f 100644 --- a/tests/load-tests/pkg/journey/handle_releases_setup.go +++ b/tests/load-tests/pkg/journey/handle_releases_setup.go @@ -142,6 +142,10 @@ func HandleReleaseSetup(ctx *types.PerApplicationContext) error { var err error iface, err = logging.Measure( + ctx.ParentContext.ThreadIndex, + ctx.ApplicationIndex, + -1, + ctx.ParentContext.JourneyRepeatsCounter, createReleasePlan, ctx.Framework, ctx.ParentContext.Namespace, @@ -157,6 +161,10 @@ func HandleReleaseSetup(ctx *types.PerApplicationContext) error { } iface, err = logging.Measure( + ctx.ParentContext.ThreadIndex, + ctx.ApplicationIndex, + -1, + ctx.ParentContext.JourneyRepeatsCounter, createReleasePlanAdmission, ctx.Framework, ctx.ParentContext.Namespace, @@ -177,6 +185,10 @@ func HandleReleaseSetup(ctx *types.PerApplicationContext) error { } iface, err = logging.Measure( + ctx.ParentContext.ThreadIndex, + ctx.ApplicationIndex, + -1, + ctx.ParentContext.JourneyRepeatsCounter, validateReleasePlan, ctx.Framework, ctx.ParentContext.Namespace, @@ -187,6 +199,10 @@ func HandleReleaseSetup(ctx *types.PerApplicationContext) error { } iface, err = logging.Measure( + ctx.ParentContext.ThreadIndex, + ctx.ApplicationIndex, + -1, + ctx.ParentContext.JourneyRepeatsCounter, validateReleasePlanAdmission, ctx.Framework, ctx.ParentContext.Namespace, diff --git a/tests/load-tests/pkg/journey/handle_test_run.go b/tests/load-tests/pkg/journey/handle_test_run.go index 86bae69ffc..c335c07619 100644 --- a/tests/load-tests/pkg/journey/handle_test_run.go +++ b/tests/load-tests/pkg/journey/handle_test_run.go @@ -105,6 +105,10 @@ func HandleTest(ctx *types.PerComponentContext) error { var ok bool result1, err1 := logging.Measure( + ctx.ParentContext.ParentContext.ThreadIndex, + ctx.ParentContext.ApplicationIndex, + ctx.ComponentIndex, + ctx.ParentContext.ParentContext.JourneyRepeatsCounter, validateSnapshotCreation, ctx.Framework, ctx.ParentContext.ParentContext.Namespace, @@ -124,6 +128,10 @@ func HandleTest(ctx *types.PerComponentContext) error { logging.Logger.Debug("Waiting for test pipeline run for component %s in namespace %s to be created", ctx.ComponentName, ctx.ParentContext.ParentContext.Namespace) _, err = logging.Measure( + ctx.ParentContext.ParentContext.ThreadIndex, + ctx.ParentContext.ApplicationIndex, + ctx.ComponentIndex, + ctx.ParentContext.ParentContext.JourneyRepeatsCounter, validateTestPipelineRunCreation, ctx.Framework, ctx.ParentContext.ParentContext.Namespace, @@ -137,6 +145,10 @@ func HandleTest(ctx *types.PerComponentContext) error { logging.Logger.Debug("Waiting for test pipeline run for component %s in namespace %s to finish", ctx.ComponentName, ctx.ParentContext.ParentContext.Namespace) _, err = logging.Measure( + ctx.ParentContext.ParentContext.ThreadIndex, + ctx.ParentContext.ApplicationIndex, + ctx.ComponentIndex, + ctx.ParentContext.ParentContext.JourneyRepeatsCounter, validateTestPipelineRunCondition, ctx.Framework, ctx.ParentContext.ParentContext.Namespace, diff --git a/tests/load-tests/pkg/journey/journey.go b/tests/load-tests/pkg/journey/journey.go index 40cd2834cd..8c6fb537f7 100644 --- a/tests/load-tests/pkg/journey/journey.go +++ b/tests/load-tests/pkg/journey/journey.go @@ -22,7 +22,14 @@ func initUserThread(threadCtx *types.MainContext) { var err error // Create user if needed - _, err = logging.Measure(HandleUser, threadCtx) + _, err = logging.Measure( + threadCtx.ThreadIndex, + -1, + -1, + threadCtx.JourneyRepeatsCounter, + HandleUser, + threadCtx, + ) if err != nil { logging.Logger.Error("Thread failed: %v", err) return @@ -91,7 +98,14 @@ func Setup(fn func(*types.MainContext), opts *options.Opts) (string, error) { // Fork repositories sequentially as GitHub do not allow more than 3 running forks in parallel anyway for _, threadCtx := range MainContexts { - _, err = logging.Measure(HandleRepoForking, threadCtx) + _, err = logging.Measure( + threadCtx.ThreadIndex, + -1, + -1, + threadCtx.JourneyRepeatsCounter, + HandleRepoForking, + threadCtx, + ) if err != nil { return "", err } diff --git a/tests/load-tests/pkg/logging/time_and_log.go b/tests/load-tests/pkg/logging/time_and_log.go index 11b965f713..9a33723328 100644 --- a/tests/load-tests/pkg/logging/time_and_log.go +++ b/tests/load-tests/pkg/logging/time_and_log.go @@ -11,8 +11,6 @@ import "os" import "encoding/csv" import "sync" -import "github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/types" - var measurementsQueue chan MeasurementEntry // channel to send measurements to var errorsQueue chan ErrorEntry // chanel to send failures to @@ -170,12 +168,8 @@ func errorsWriter() { // Measure duration of a given function run with given parameters and return what function returned // This only returns first (data) and last (error) returned value. Maybe this // can be generalized completely, but it is good enough for our needs. -func Measure(fn interface{}, params ...interface{}) (interface{}, error) { +func Measure(perUserId, perAppId, perCompId, repeatsCounter int, fn interface{}, params ...interface{}) (interface{}, error) { funcValue := reflect.ValueOf(fn) - perUserId := -1 - perAppId := -1 - perCompId := -1 - repeatsCounter := -1 // Construct arguments for the function call numParams := len(params) @@ -192,24 +186,6 @@ func Measure(fn interface{}, params ...interface{}) (interface{}, error) { for i := 0; i < numParams; i++ { x := 1 - // If the parameter we are processing now is per user/app/comp - // context, extract additional metadata about this function call. - if casted, ok := params[i].(*types.MainContext); ok { - perUserId = casted.ThreadIndex - repeatsCounter = casted.JourneyRepeatsCounter - } - if casted, ok := params[i].(*types.PerApplicationContext); ok { - perUserId = casted.ParentContext.ThreadIndex - perAppId = casted.ApplicationIndex - repeatsCounter = casted.ParentContext.JourneyRepeatsCounter - } - if casted, ok := params[i].(*types.PerComponentContext); ok { - perUserId = casted.ParentContext.ParentContext.ThreadIndex - perAppId = casted.ParentContext.ApplicationIndex - perCompId = casted.ComponentIndex - repeatsCounter = casted.ParentContext.ParentContext.JourneyRepeatsCounter - } - key := fmt.Sprintf("%v", reflect.TypeOf(params[i])) value := fmt.Sprintf("%+v", reflect.ValueOf(params[i])) From bc8ceeeb0fd1f1f5d4b8c7707e1e0c0ecd6c9bda Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Tue, 16 Sep 2025 12:14:19 +0200 Subject: [PATCH 253/321] refactor(KONFLUX-10143): Compute KPI stats without guessing with the new data we have in timings csv --- tests/load-tests/evaluate.py | 144 ++++++++++++++++++++++++----------- 1 file changed, 100 insertions(+), 44 deletions(-) diff --git a/tests/load-tests/evaluate.py b/tests/load-tests/evaluate.py index 4c6efb2310..4779816427 100755 --- a/tests/load-tests/evaluate.py +++ b/tests/load-tests/evaluate.py @@ -11,10 +11,14 @@ # Column indexes in input data COLUMN_WHEN = 0 -COLUMN_METRIC = 1 -COLUMN_DURATION = 2 -COLUMN_PARAMS = 3 -COLUMN_ERROR = 4 +COLUMN_PER_USER_T = 1 +COLUMN_PER_APP_T = 2 +COLUMN_PER_COMP_T = 3 +COLUMN_REPEATS_COUNTER = 4 +COLUMN_METRIC = 5 +COLUMN_DURATION = 6 +COLUMN_PARAMS = 7 +COLUMN_ERROR = 8 # Metrics we care about that together form KPI metric duration METRICS = [ @@ -66,6 +70,42 @@ ] +class SinglePass: + """Structure to record data about one specific pass through loadtest workload, identified by an identier (touple with loadtest's per user, per application and per component thread index and repeats counter.""" + + def __init__(self): + self._metrics = {} + + def add(self, metric, duration): + """Adds given metric to data about this pass.""" + assert metric not in self._metrics + self._metrics[metric] = duration + + def complete(self, expected_metrics): + """Checks if we have all expected metrics.""" + current = set(self._metrics.keys()) + return current == expected_metrics + + def total(self): + """Return total duration.""" + return sum(self._metrics.values()) + + @staticmethod + def i_matches(identifier1, identifier2): + """Check if first provided identifier matches second one. When we have -1 instead of some value(s) in the first identifier, it acts as a wildcard.""" + if identifier1[3] == -1 or identifier1[3] == identifier2[3]: + if identifier1[2] == -1 or identifier1[2] == identifier2[2]: + if identifier1[1] == -1 or identifier1[1] == identifier2[1]: + if identifier1[0] == -1 or identifier1[0] == identifier2[0]: + return True + return False + + @staticmethod + def i_complete(identifier): + """Check this is complete identifier (does not contain wildcards).""" + return -1 not in identifier + + def str2date(date_str): if isinstance(date_str, datetime.datetime): return date_str @@ -122,18 +162,31 @@ def main(): options = json.load(fp) # Determine what metrics we need to skip based on options - METRICS_to_skip = [] + to_skip = [] if options["Stage"]: print("NOTE: Ignoring CI cluster related metrics because running against non-CI cluster") - METRICS_to_skip += METRICS_CI + to_skip += METRICS_CI if options["TestScenarioGitURL"] == "": print("NOTE: Ignoring ITS related metrics because they were disabled at test run") - METRICS_to_skip += METRICS_ITS + to_skip += METRICS_ITS if options["ReleasePolicy"] == "": print("NOTE: Ignoring Release related metrics because they were disabled at test run") - METRICS_to_skip += METRICS_RELEASE + to_skip += METRICS_RELEASE + + # When processing, only consider these metrics + expected_metrics = set(METRICS) - set(to_skip) stats_raw = {} + stats_passes = {} + + rows_incomplete = [] + + # Prepopulate stats_raw data structure + for m in expected_metrics: + stats_raw[m] = { + "pass": {"duration": [], "when": []}, + "fail": {"duration": [], "when": []}, + } with open(input_file, "r") as fp: csvreader = csv.reader(fp) @@ -142,30 +195,50 @@ def main(): continue when = str2date(row[COLUMN_WHEN]) - metric = row[COLUMN_METRIC] + per_user_t = int(row[COLUMN_PER_USER_T]) + per_app_t = int(row[COLUMN_PER_APP_T]) + per_comp_t = int(row[COLUMN_PER_COMP_T]) + repeats_counter = int(row[COLUMN_REPEATS_COUNTER]) + metric = row[COLUMN_METRIC].split(".")[-1] duration = float(row[COLUMN_DURATION]) error = row[COLUMN_ERROR] != "" - for m in METRICS: - if m not in stats_raw: - stats_raw[m] = { - "pass": {"duration": [], "when": []}, - "fail": {"duration": [], "when": []}, - } + if metric not in expected_metrics: + continue - if metric.endswith("." + m): - stats_raw[m]["fail" if error else "pass"]["duration"].append(duration) - stats_raw[m]["fail" if error else "pass"]["when"].append(when) + # First add this record to stats_raw that allows us to track stats per metric + stats_raw[metric]["fail" if error else "pass"]["duration"].append(duration) + stats_raw[metric]["fail" if error else "pass"]["when"].append(when) + + # Second add this record to stats_passes that allows us to track full completed passes + if not error: + identifier = (per_user_t, per_app_t, per_comp_t, repeats_counter) + + if SinglePass.i_complete(identifier): + if identifier not in stats_passes: + stats_passes[identifier] = SinglePass() + stats_passes[identifier].add(metric, duration) + else: + # Safe this metric for later once we have all passes + rows_incomplete.append((identifier, metric, duration)) + + # Now when we have data about all passes, add metrics that had incomplete identifiers (with wildcards) + for incomplete in rows_incomplete: + identifier, metric, duration = incomplete + found = [v for k, v in stats_passes.items() if SinglePass.i_matches(identifier, k)] + for i in found: + i.add(metric, duration) #print("Raw stats:") #print(json.dumps(stats_raw, indent=4, default=lambda o: '<' + str(o) + '>')) + #print(json.dumps({str(k): v for k, v in stats_passes.items()}, indent=4, default=lambda o: '<' + str(o._metrics) + '>')) stats = {} - kpi_mean = 0.0 - kpi_successes = sys.maxsize + kpi_mean_data = [] + kpi_successes = 0 kpi_errors = 0 - for m in [m for m in METRICS if m not in METRICS_to_skip]: + for m in expected_metrics: stats[m] = {"pass": {"duration": {"samples": 0}, "when": {}}, "fail": {"duration": {"samples": 0}, "when": {}}} if m in stats_raw: stats[m]["pass"]["duration"] = count_stats(stats_raw[m]["pass"]["duration"]) @@ -173,32 +246,15 @@ def main(): stats[m]["pass"]["when"] = count_stats_when(stats_raw[m]["pass"]["when"]) stats[m]["fail"]["when"] = count_stats_when(stats_raw[m]["fail"]["when"]) - if kpi_mean != -1: - # If we had 0 measurements in some metric, that means not a single - # build made it through this step, so kpi_mean metric does not make - # sense as it would not cover this part of the journey - if stats[m]["pass"]["duration"]["samples"] == 0: - kpi_mean = -1 - else: - kpi_mean += stats[m]["pass"]["duration"]["mean"] - - if stats[m]["pass"]["duration"]["samples"] < kpi_successes: - kpi_successes = stats[m]["pass"]["duration"]["samples"] - - if stats[m]["pass"]["duration"]["samples"] == 0: - if kpi_errors == 0: - kpi_errors += 1 - else: - kpi_errors += stats[m]["fail"]["duration"]["samples"] - - runs = stats[m]["pass"]["duration"]["samples"] + stats[m]["fail"]["duration"]["samples"] - if runs == 0: - stats[m]["error_rate"] = None + for k, v in stats_passes.items(): + if v.complete(expected_metrics): + kpi_successes += 1 + kpi_mean_data.append(v.total()) else: - stats[m]["error_rate"] = stats[m]["fail"]["duration"]["samples"] / runs + kpi_errors += 1 stats["KPI"] = {} - stats["KPI"]["mean"] = kpi_mean + stats["KPI"]["mean"] = sum(kpi_mean_data) / kpi_successes if kpi_successes > 0 else -1 stats["KPI"]["successes"] = kpi_successes stats["KPI"]["errors"] = kpi_errors From b44f3c6d00c1e29278abd9d0f306e1fd7575e7f5 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Tue, 16 Sep 2025 12:59:23 +0200 Subject: [PATCH 254/321] refactor(KONFLUX-10143): Pass just one context to make the code shorter and bit more readable --- tests/load-tests/loadtest.go | 80 ++++--------------- .../pkg/journey/handle_applications.go | 10 +-- .../pkg/journey/handle_component.go | 25 ++---- .../handle_integration_test_scenarios.go | 5 +- .../load-tests/pkg/journey/handle_pipeline.go | 15 +--- .../pkg/journey/handle_releases_run.go | 20 +---- .../pkg/journey/handle_releases_setup.go | 20 +---- .../load-tests/pkg/journey/handle_test_run.go | 15 +--- tests/load-tests/pkg/journey/journey.go | 10 +-- tests/load-tests/pkg/logging/time_and_log.go | 25 +++++- 10 files changed, 64 insertions(+), 161 deletions(-) diff --git a/tests/load-tests/loadtest.go b/tests/load-tests/loadtest.go index 6b03b440fa..d7c698522a 100644 --- a/tests/load-tests/loadtest.go +++ b/tests/load-tests/loadtest.go @@ -120,10 +120,7 @@ func main() { // Start given number of `perUserThread()` threads using `journey.Setup()` and wait for them to finish _, err = logging.Measure( - -1, - -1, - -1, - -1, + nil, journey.Setup, perUserThread, &opts, @@ -134,10 +131,7 @@ func main() { // Cleanup resources _, err = logging.Measure( - -1, - -1, - -1, - -1, + nil, journey.Purge, ) if err != nil { @@ -237,10 +231,7 @@ func perUserThread(threadCtx *types.MainContext) { // Start given number of `perApplicationThread()` threads using `journey.PerApplicationSetup()` and wait for them to finish _, err = logging.Measure( - threadCtx.ThreadIndex, - -1, - -1, - threadCtx.JourneyRepeatsCounter, + threadCtx, journey.PerApplicationSetup, perApplicationThread, threadCtx, @@ -259,10 +250,7 @@ func perUserThread(threadCtx *types.MainContext) { // Collect info about PVCs _, err = logging.Measure( - threadCtx.ThreadIndex, - -1, - -1, - threadCtx.JourneyRepeatsCounter, + threadCtx, journey.HandlePersistentVolumeClaim, threadCtx, ) @@ -278,10 +266,7 @@ func perApplicationThread(perApplicationCtx *types.PerApplicationContext) { defer perApplicationCtx.PerApplicationWG.Done() defer func() { _, err := logging.Measure( - perApplicationCtx.ParentContext.ThreadIndex, - perApplicationCtx.ApplicationIndex, - -1, - perApplicationCtx.ParentContext.JourneyRepeatsCounter, + perApplicationCtx, journey.HandlePerApplicationCollection, perApplicationCtx, ) @@ -296,10 +281,7 @@ func perApplicationThread(perApplicationCtx *types.PerApplicationContext) { // Create framework so we do not have to share framework with parent thread _, err = logging.Measure( - perApplicationCtx.ParentContext.ThreadIndex, - perApplicationCtx.ApplicationIndex, - -1, - perApplicationCtx.ParentContext.JourneyRepeatsCounter, + perApplicationCtx, journey.HandleNewFrameworkForApp, perApplicationCtx, ) @@ -310,10 +292,7 @@ func perApplicationThread(perApplicationCtx *types.PerApplicationContext) { // Create application _, err = logging.Measure( - perApplicationCtx.ParentContext.ThreadIndex, - perApplicationCtx.ApplicationIndex, - -1, - perApplicationCtx.ParentContext.JourneyRepeatsCounter, + perApplicationCtx, journey.HandleApplication, perApplicationCtx, ) @@ -324,10 +303,7 @@ func perApplicationThread(perApplicationCtx *types.PerApplicationContext) { // Create integration test scenario _, err = logging.Measure( - perApplicationCtx.ParentContext.ThreadIndex, - perApplicationCtx.ApplicationIndex, - -1, - perApplicationCtx.ParentContext.JourneyRepeatsCounter, + perApplicationCtx, journey.HandleIntegrationTestScenario, perApplicationCtx, ) @@ -338,10 +314,7 @@ func perApplicationThread(perApplicationCtx *types.PerApplicationContext) { // Create release plan and release plan admission _, err = logging.Measure( - perApplicationCtx.ParentContext.ThreadIndex, - perApplicationCtx.ApplicationIndex, - -1, - perApplicationCtx.ParentContext.JourneyRepeatsCounter, + perApplicationCtx, journey.HandleReleaseSetup, perApplicationCtx, ) @@ -352,10 +325,7 @@ func perApplicationThread(perApplicationCtx *types.PerApplicationContext) { // Start given number of `perComponentThread()` threads using `journey.PerComponentSetup()` and wait for them to finish _, err = logging.Measure( - perApplicationCtx.ParentContext.ThreadIndex, - perApplicationCtx.ApplicationIndex, - -1, - perApplicationCtx.ParentContext.JourneyRepeatsCounter, + perApplicationCtx, journey.PerComponentSetup, perComponentThread, perApplicationCtx, @@ -371,10 +341,7 @@ func perComponentThread(perComponentCtx *types.PerComponentContext) { defer perComponentCtx.PerComponentWG.Done() defer func() { _, err := logging.Measure( - perComponentCtx.ParentContext.ParentContext.ThreadIndex, - perComponentCtx.ParentContext.ApplicationIndex, - perComponentCtx.ComponentIndex, - perComponentCtx.ParentContext.ParentContext.JourneyRepeatsCounter, + perComponentCtx, journey.HandlePerComponentCollection, perComponentCtx, ) @@ -389,10 +356,7 @@ func perComponentThread(perComponentCtx *types.PerComponentContext) { // Create framework so we do not have to share framework with parent thread _, err = logging.Measure( - perComponentCtx.ParentContext.ParentContext.ThreadIndex, - perComponentCtx.ParentContext.ApplicationIndex, - perComponentCtx.ComponentIndex, - perComponentCtx.ParentContext.ParentContext.JourneyRepeatsCounter, + perComponentCtx, journey.HandleNewFrameworkForComp, perComponentCtx, ) @@ -403,10 +367,7 @@ func perComponentThread(perComponentCtx *types.PerComponentContext) { // Create component _, err = logging.Measure( - perComponentCtx.ParentContext.ParentContext.ThreadIndex, - perComponentCtx.ParentContext.ApplicationIndex, - perComponentCtx.ComponentIndex, - perComponentCtx.ParentContext.ParentContext.JourneyRepeatsCounter, + perComponentCtx, journey.HandleComponent, perComponentCtx, ) @@ -417,10 +378,7 @@ func perComponentThread(perComponentCtx *types.PerComponentContext) { // Wait for build pipiline run _, err = logging.Measure( - perComponentCtx.ParentContext.ParentContext.ThreadIndex, - perComponentCtx.ParentContext.ApplicationIndex, - perComponentCtx.ComponentIndex, - perComponentCtx.ParentContext.ParentContext.JourneyRepeatsCounter, + perComponentCtx, journey.HandlePipelineRun, perComponentCtx, ) @@ -431,10 +389,7 @@ func perComponentThread(perComponentCtx *types.PerComponentContext) { // Wait for test pipiline run _, err = logging.Measure( - perComponentCtx.ParentContext.ParentContext.ThreadIndex, - perComponentCtx.ParentContext.ApplicationIndex, - perComponentCtx.ComponentIndex, - perComponentCtx.ParentContext.ParentContext.JourneyRepeatsCounter, + perComponentCtx, journey.HandleTest, perComponentCtx, ) @@ -445,10 +400,7 @@ func perComponentThread(perComponentCtx *types.PerComponentContext) { // Wait for release to finish _, err = logging.Measure( - perComponentCtx.ParentContext.ParentContext.ThreadIndex, - perComponentCtx.ParentContext.ApplicationIndex, - perComponentCtx.ComponentIndex, - perComponentCtx.ParentContext.ParentContext.JourneyRepeatsCounter, + perComponentCtx, journey.HandleReleaseRun, perComponentCtx, ) diff --git a/tests/load-tests/pkg/journey/handle_applications.go b/tests/load-tests/pkg/journey/handle_applications.go index 926ee6fae0..7455dbe5f4 100644 --- a/tests/load-tests/pkg/journey/handle_applications.go +++ b/tests/load-tests/pkg/journey/handle_applications.go @@ -41,10 +41,7 @@ func HandleApplication(ctx *types.PerApplicationContext) error { logging.Logger.Debug("Creating application %s in namespace %s", ctx.ApplicationName, ctx.ParentContext.Namespace) _, err = logging.Measure( - ctx.ParentContext.ThreadIndex, - ctx.ApplicationIndex, - -1, - ctx.ParentContext.JourneyRepeatsCounter, + ctx, createApplication, ctx.Framework, ctx.ParentContext.Namespace, @@ -56,10 +53,7 @@ func HandleApplication(ctx *types.PerApplicationContext) error { } _, err = logging.Measure( - ctx.ParentContext.ThreadIndex, - ctx.ApplicationIndex, - -1, - ctx.ParentContext.JourneyRepeatsCounter, + ctx, validateApplication, ctx.Framework, ctx.ApplicationName, diff --git a/tests/load-tests/pkg/journey/handle_component.go b/tests/load-tests/pkg/journey/handle_component.go index 2ab6b3a3ae..b565d72b3c 100644 --- a/tests/load-tests/pkg/journey/handle_component.go +++ b/tests/load-tests/pkg/journey/handle_component.go @@ -324,10 +324,7 @@ func HandleComponent(ctx *types.PerComponentContext) error { // Create component _, err = logging.Measure( - ctx.ParentContext.ParentContext.ThreadIndex, - ctx.ParentContext.ApplicationIndex, - ctx.ComponentIndex, - ctx.ParentContext.ParentContext.JourneyRepeatsCounter, + ctx, createComponent, ctx.Framework, ctx.ParentContext.ParentContext.Namespace, @@ -346,10 +343,7 @@ func HandleComponent(ctx *types.PerComponentContext) error { // Validate component build service account created _, err = logging.Measure( - ctx.ParentContext.ParentContext.ThreadIndex, - ctx.ParentContext.ApplicationIndex, - ctx.ComponentIndex, - ctx.ParentContext.ParentContext.JourneyRepeatsCounter, + ctx, validateComponent, ctx.Framework, ctx.ParentContext.ParentContext.Namespace, @@ -362,10 +356,7 @@ func HandleComponent(ctx *types.PerComponentContext) error { // Configure imagePullSecrets needed for component build task images if len(ctx.ParentContext.ParentContext.Opts.PipelineImagePullSecrets) > 0 { _, err = logging.Measure( - ctx.ParentContext.ParentContext.ThreadIndex, - ctx.ParentContext.ApplicationIndex, - ctx.ComponentIndex, - ctx.ParentContext.ParentContext.JourneyRepeatsCounter, + ctx, configurePipelineImagePullSecrets, ctx.Framework, ctx.ParentContext.ParentContext.Namespace, @@ -379,10 +370,7 @@ func HandleComponent(ctx *types.PerComponentContext) error { var pullIface interface{} pullIface, err = logging.Measure( - ctx.ParentContext.ParentContext.ThreadIndex, - ctx.ParentContext.ApplicationIndex, - ctx.ComponentIndex, - ctx.ParentContext.ParentContext.JourneyRepeatsCounter, + ctx, getPaCPullNumber, ctx.Framework, ctx.ParentContext.ParentContext.Namespace, @@ -415,10 +403,7 @@ func HandleComponent(ctx *types.PerComponentContext) error { // Skip what we do not care about, merge PR, graft pipeline yamls _, err = logging.Measure( - ctx.ParentContext.ParentContext.ThreadIndex, - ctx.ParentContext.ApplicationIndex, - ctx.ComponentIndex, - ctx.ParentContext.ParentContext.JourneyRepeatsCounter, + ctx, utilityRepoTemplatingComponentCleanup, ctx.Framework, ctx.ParentContext.ParentContext.Namespace, diff --git a/tests/load-tests/pkg/journey/handle_integration_test_scenarios.go b/tests/load-tests/pkg/journey/handle_integration_test_scenarios.go index 455446065b..92e8da37e4 100644 --- a/tests/load-tests/pkg/journey/handle_integration_test_scenarios.go +++ b/tests/load-tests/pkg/journey/handle_integration_test_scenarios.go @@ -44,10 +44,7 @@ func HandleIntegrationTestScenario(ctx *types.PerApplicationContext) error { logging.Logger.Debug("Creating integration test scenario %s for application %s in namespace %s", name, ctx.ApplicationName, ctx.ParentContext.Namespace) _, err = logging.Measure( - ctx.ParentContext.ThreadIndex, - ctx.ApplicationIndex, - -1, - ctx.ParentContext.JourneyRepeatsCounter, + ctx, createIntegrationTestScenario, ctx.Framework, ctx.ParentContext.Namespace, diff --git a/tests/load-tests/pkg/journey/handle_pipeline.go b/tests/load-tests/pkg/journey/handle_pipeline.go index 4054cc5686..20b7a05e07 100644 --- a/tests/load-tests/pkg/journey/handle_pipeline.go +++ b/tests/load-tests/pkg/journey/handle_pipeline.go @@ -120,10 +120,7 @@ func HandlePipelineRun(ctx *types.PerComponentContext) error { logging.Logger.Debug("Waiting for build pipeline run for component %s in namespace %s to be created", ctx.ComponentName, ctx.ParentContext.ParentContext.Namespace) _, err = logging.Measure( - ctx.ParentContext.ParentContext.ThreadIndex, - ctx.ParentContext.ApplicationIndex, - ctx.ComponentIndex, - ctx.ParentContext.ParentContext.JourneyRepeatsCounter, + ctx, validatePipelineRunCreation, ctx.Framework, ctx.ParentContext.ParentContext.Namespace, @@ -137,10 +134,7 @@ func HandlePipelineRun(ctx *types.PerComponentContext) error { logging.Logger.Debug("Waiting for build pipeline run for component %s in namespace %s to finish", ctx.ComponentName, ctx.ParentContext.ParentContext.Namespace) _, err = logging.Measure( - ctx.ParentContext.ParentContext.ThreadIndex, - ctx.ParentContext.ApplicationIndex, - ctx.ComponentIndex, - ctx.ParentContext.ParentContext.JourneyRepeatsCounter, + ctx, validatePipelineRunCondition, ctx.Framework, ctx.ParentContext.ParentContext.Namespace, @@ -154,10 +148,7 @@ func HandlePipelineRun(ctx *types.PerComponentContext) error { logging.Logger.Debug("Waiting for build pipeline run for component %s in namespace %s to be signed", ctx.ComponentName, ctx.ParentContext.ParentContext.Namespace) _, err = logging.Measure( - ctx.ParentContext.ParentContext.ThreadIndex, - ctx.ParentContext.ApplicationIndex, - ctx.ComponentIndex, - ctx.ParentContext.ParentContext.JourneyRepeatsCounter, + ctx, validatePipelineRunSignature, ctx.Framework, ctx.ParentContext.ParentContext.Namespace, diff --git a/tests/load-tests/pkg/journey/handle_releases_run.go b/tests/load-tests/pkg/journey/handle_releases_run.go index c56a8d7355..fdc62d8098 100644 --- a/tests/load-tests/pkg/journey/handle_releases_run.go +++ b/tests/load-tests/pkg/journey/handle_releases_run.go @@ -148,10 +148,7 @@ func HandleReleaseRun(ctx *types.PerComponentContext) error { var err error iface, err = logging.Measure( - ctx.ParentContext.ParentContext.ThreadIndex, - ctx.ParentContext.ApplicationIndex, - ctx.ComponentIndex, - ctx.ParentContext.ParentContext.JourneyRepeatsCounter, + ctx, validateReleaseCreation, ctx.Framework, ctx.ParentContext.ParentContext.Namespace, @@ -167,10 +164,7 @@ func HandleReleaseRun(ctx *types.PerComponentContext) error { } _, err = logging.Measure( - ctx.ParentContext.ParentContext.ThreadIndex, - ctx.ParentContext.ApplicationIndex, - ctx.ComponentIndex, - ctx.ParentContext.ParentContext.JourneyRepeatsCounter, + ctx, validateReleasePipelineRunCreation, ctx.Framework, ctx.ParentContext.ParentContext.Namespace, @@ -181,10 +175,7 @@ func HandleReleaseRun(ctx *types.PerComponentContext) error { } _, err = logging.Measure( - ctx.ParentContext.ParentContext.ThreadIndex, - ctx.ParentContext.ApplicationIndex, - ctx.ComponentIndex, - ctx.ParentContext.ParentContext.JourneyRepeatsCounter, + ctx, validateReleasePipelineRunCondition, ctx.Framework, ctx.ParentContext.ParentContext.Namespace, @@ -195,10 +186,7 @@ func HandleReleaseRun(ctx *types.PerComponentContext) error { } _, err = logging.Measure( - ctx.ParentContext.ParentContext.ThreadIndex, - ctx.ParentContext.ApplicationIndex, - ctx.ComponentIndex, - ctx.ParentContext.ParentContext.JourneyRepeatsCounter, + ctx, validateReleaseCondition, ctx.Framework, ctx.ParentContext.ParentContext.Namespace, diff --git a/tests/load-tests/pkg/journey/handle_releases_setup.go b/tests/load-tests/pkg/journey/handle_releases_setup.go index ff1cbf603f..f312ed3014 100644 --- a/tests/load-tests/pkg/journey/handle_releases_setup.go +++ b/tests/load-tests/pkg/journey/handle_releases_setup.go @@ -142,10 +142,7 @@ func HandleReleaseSetup(ctx *types.PerApplicationContext) error { var err error iface, err = logging.Measure( - ctx.ParentContext.ThreadIndex, - ctx.ApplicationIndex, - -1, - ctx.ParentContext.JourneyRepeatsCounter, + ctx, createReleasePlan, ctx.Framework, ctx.ParentContext.Namespace, @@ -161,10 +158,7 @@ func HandleReleaseSetup(ctx *types.PerApplicationContext) error { } iface, err = logging.Measure( - ctx.ParentContext.ThreadIndex, - ctx.ApplicationIndex, - -1, - ctx.ParentContext.JourneyRepeatsCounter, + ctx, createReleasePlanAdmission, ctx.Framework, ctx.ParentContext.Namespace, @@ -185,10 +179,7 @@ func HandleReleaseSetup(ctx *types.PerApplicationContext) error { } iface, err = logging.Measure( - ctx.ParentContext.ThreadIndex, - ctx.ApplicationIndex, - -1, - ctx.ParentContext.JourneyRepeatsCounter, + ctx, validateReleasePlan, ctx.Framework, ctx.ParentContext.Namespace, @@ -199,10 +190,7 @@ func HandleReleaseSetup(ctx *types.PerApplicationContext) error { } iface, err = logging.Measure( - ctx.ParentContext.ThreadIndex, - ctx.ApplicationIndex, - -1, - ctx.ParentContext.JourneyRepeatsCounter, + ctx, validateReleasePlanAdmission, ctx.Framework, ctx.ParentContext.Namespace, diff --git a/tests/load-tests/pkg/journey/handle_test_run.go b/tests/load-tests/pkg/journey/handle_test_run.go index c335c07619..3f280b7389 100644 --- a/tests/load-tests/pkg/journey/handle_test_run.go +++ b/tests/load-tests/pkg/journey/handle_test_run.go @@ -105,10 +105,7 @@ func HandleTest(ctx *types.PerComponentContext) error { var ok bool result1, err1 := logging.Measure( - ctx.ParentContext.ParentContext.ThreadIndex, - ctx.ParentContext.ApplicationIndex, - ctx.ComponentIndex, - ctx.ParentContext.ParentContext.JourneyRepeatsCounter, + ctx, validateSnapshotCreation, ctx.Framework, ctx.ParentContext.ParentContext.Namespace, @@ -128,10 +125,7 @@ func HandleTest(ctx *types.PerComponentContext) error { logging.Logger.Debug("Waiting for test pipeline run for component %s in namespace %s to be created", ctx.ComponentName, ctx.ParentContext.ParentContext.Namespace) _, err = logging.Measure( - ctx.ParentContext.ParentContext.ThreadIndex, - ctx.ParentContext.ApplicationIndex, - ctx.ComponentIndex, - ctx.ParentContext.ParentContext.JourneyRepeatsCounter, + ctx, validateTestPipelineRunCreation, ctx.Framework, ctx.ParentContext.ParentContext.Namespace, @@ -145,10 +139,7 @@ func HandleTest(ctx *types.PerComponentContext) error { logging.Logger.Debug("Waiting for test pipeline run for component %s in namespace %s to finish", ctx.ComponentName, ctx.ParentContext.ParentContext.Namespace) _, err = logging.Measure( - ctx.ParentContext.ParentContext.ThreadIndex, - ctx.ParentContext.ApplicationIndex, - ctx.ComponentIndex, - ctx.ParentContext.ParentContext.JourneyRepeatsCounter, + ctx, validateTestPipelineRunCondition, ctx.Framework, ctx.ParentContext.ParentContext.Namespace, diff --git a/tests/load-tests/pkg/journey/journey.go b/tests/load-tests/pkg/journey/journey.go index 8c6fb537f7..51154b2aa0 100644 --- a/tests/load-tests/pkg/journey/journey.go +++ b/tests/load-tests/pkg/journey/journey.go @@ -23,10 +23,7 @@ func initUserThread(threadCtx *types.MainContext) { // Create user if needed _, err = logging.Measure( - threadCtx.ThreadIndex, - -1, - -1, - threadCtx.JourneyRepeatsCounter, + threadCtx, HandleUser, threadCtx, ) @@ -99,10 +96,7 @@ func Setup(fn func(*types.MainContext), opts *options.Opts) (string, error) { // Fork repositories sequentially as GitHub do not allow more than 3 running forks in parallel anyway for _, threadCtx := range MainContexts { _, err = logging.Measure( - threadCtx.ThreadIndex, - -1, - -1, - threadCtx.JourneyRepeatsCounter, + threadCtx, HandleRepoForking, threadCtx, ) diff --git a/tests/load-tests/pkg/logging/time_and_log.go b/tests/load-tests/pkg/logging/time_and_log.go index 9a33723328..f93ce61dc3 100644 --- a/tests/load-tests/pkg/logging/time_and_log.go +++ b/tests/load-tests/pkg/logging/time_and_log.go @@ -11,6 +11,8 @@ import "os" import "encoding/csv" import "sync" +import types "github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/types" + var measurementsQueue chan MeasurementEntry // channel to send measurements to var errorsQueue chan ErrorEntry // chanel to send failures to @@ -168,8 +170,29 @@ func errorsWriter() { // Measure duration of a given function run with given parameters and return what function returned // This only returns first (data) and last (error) returned value. Maybe this // can be generalized completely, but it is good enough for our needs. -func Measure(perUserId, perAppId, perCompId, repeatsCounter int, fn interface{}, params ...interface{}) (interface{}, error) { +func Measure(ctx interface{}, fn interface{}, params ...interface{}) (interface{}, error) { funcValue := reflect.ValueOf(fn) + perUserId := -1 + perAppId := -1 + perCompId := -1 + repeatsCounter := -1 + + // Extract additional metadata about this function call from provided context. + if casted, ok := ctx.(*types.MainContext); ok { + perUserId = casted.ThreadIndex + repeatsCounter = casted.JourneyRepeatsCounter + } + if casted, ok := ctx.(*types.PerApplicationContext); ok { + perUserId = casted.ParentContext.ThreadIndex + perAppId = casted.ApplicationIndex + repeatsCounter = casted.ParentContext.JourneyRepeatsCounter + } + if casted, ok := ctx.(*types.PerComponentContext); ok { + perUserId = casted.ParentContext.ParentContext.ThreadIndex + perAppId = casted.ParentContext.ApplicationIndex + perCompId = casted.ComponentIndex + repeatsCounter = casted.ParentContext.ParentContext.JourneyRepeatsCounter + } // Construct arguments for the function call numParams := len(params) From 14a0466bb47b7cf60f0e34c5e321d92e148d5de1 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 17 Sep 2025 14:30:48 +0200 Subject: [PATCH 255/321] feat: Allow running this script against custom collected-data directory - WIP --- tests/load-tests/errors.py | 133 ++++++++++++++++++++++++++++--------- 1 file changed, 103 insertions(+), 30 deletions(-) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 54b49ba3b3..8f15757fb4 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -236,19 +236,7 @@ def load(datafile): return data -def find_first_failed_build_plr(data_dir, plr_type): - """ This function is intended for jobs where we only run one concurrent - builds, so no more than one can failed: our load test probes. - - This is executed when test hits "Pipeline failed" error and this is - first step to identify task that failed so we can identify error in - the pod log. - - It goes through given data directory (probably "collected-data/") and - loads all files named "collected-pipelinerun-*" and checks that PLR is - a "build" PLR and it is failed one. - """ - +def find_all_failed_plrs(data_dir): for currentpath, folders, files in os.walk(data_dir): for datafile in files: if not datafile.startswith("collected-pipelinerun-"): @@ -257,26 +245,12 @@ def find_first_failed_build_plr(data_dir, plr_type): datafile = os.path.join(currentpath, datafile) data = load(datafile) - if plr_type == "build": - plr_type_label = "build" - elif plr_type == "release": - plr_type_label = "managed" - else: - raise Exception("Unknown PLR type") - - # Skip PLRs that do not have expected type - try: - if data["metadata"]["labels"]["pipelines.appstudio.openshift.io/type"] != plr_type_label: - continue - except KeyError: - continue - # Skip PLRs that did not failed try: succeeded = True for c in data["status"]["conditions"]: if c["type"] == "Succeeded": - if c["status"] == "False": + if c["status"] == "False": # possibly switch this to `!= "True"` but that might be too big change for normal runs succeeded = False break if succeeded: @@ -284,7 +258,40 @@ def find_first_failed_build_plr(data_dir, plr_type): except KeyError: continue - return data + yield data + + +def find_first_failed_build_plr(data_dir, plr_type): + """ This function is intended for jobs where we only run one concurrent + builds, so no more than one can failed: our load test probes. + + This is executed when test hits "Pipeline failed" error and this is + first step to identify task that failed so we can identify error in + the pod log. + + It goes through given data directory (probably "collected-data/") and + loads all files named "collected-pipelinerun-*" and checks that PLR is + a "build" PLR and it is failed one. + """ + + for data in find_all_failed_plrs(data_dir): + data = load(datafile) + + if plr_type == "build": + plr_type_label = "build" + elif plr_type == "release": + plr_type_label = "managed" + else: + raise Exception("Unknown PLR type") + + # Skip PLRs that do not have expected type + try: + if data["metadata"]["labels"]["pipelines.appstudio.openshift.io/type"] != plr_type_label: + continue + except KeyError: + continue + + return data def find_trs(plr): @@ -442,5 +449,71 @@ def main(): print(f"Data dumped to {output_file}") +def investigate_all_failed_plr(dump_dir): + reasons = [] + + for plr in find_all_failed_plrs(dump_dir): + plr_ns = plr["metadata"]["namespace"] + + for tr_name in find_trs(plr): + tr_ok, tr_message = check_failed_taskrun(dump_dir, plr_ns, tr_name) + + if tr_ok: + try: + for pod_name, cont_name in find_failed_containers(dump_dir, plr_ns, tr_name): + log_lines = load_container_log(dump_dir, plr_ns, pod_name, cont_name) + reason = message_to_reason(FAILED_PLR_ERRORS, log_lines) + + if reason == "SKIP": + continue + + reasons.append(reason) + except FileNotFoundError as e: + print(f"Failed to locate required files: {e}") + + reason = message_to_reason(FAILED_TR_ERRORS, tr_message) + if reason != "SKIP": + reasons.append(reason) + + return sorted(reasons) + + +def main_custom(): + dump_dir = sys.argv[1] + output_file = os.path.join(dump_dir, "errors-output.json") + + error_messages = [] # list of error messages + error_by_code = collections.defaultdict( + lambda: 0 + ) # key: numeric error code, value: number of such errors + error_by_reason = collections.defaultdict( + lambda: 0 + ) # key: textual error reason, value: number of such errors + + reasons = investigate_all_failed_plr(dump_dir) + for r in reasons: + add_reason(error_messages, error_by_code, error_by_reason, r) + + data = { + "error_by_code": error_by_code, + "error_by_reason": error_by_reason, + "error_reasons_simple": "; ".join([f"{v}x {k}" for k, v in error_by_reason.items() if k != "Post-test data collection failed"]), + "error_messages": error_messages, + } + + print(f"Errors detected: {len(error_messages)}") + print("Errors by reason:") + for k, v in error_by_reason.items(): + print(f" {v}x {k}") + + with open(output_file, "w") as fp: + json.dump(data, fp, indent=4) + print(f"Data dumped to {output_file}") + + if __name__ == "__main__": - sys.exit(main()) + if len(sys.argv) == 2: + # When examining just custom collected-data directory + sys.exit(main_custom()) + else: + sys.exit(main()) From 75f2815dc1beb9739831fa69f30c8ba9faa05e42 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 17 Sep 2025 14:41:13 +0200 Subject: [PATCH 256/321] feat: New error: Failed reading signatures from access.redhat.com due to 500 collected-data/jhutar-tenant/1/pod-undef-fork-app-0d5f59e772f96a482a39836d5cc76027c99108f734ab-pod-step-build.log [2025-09-16T08:14:20,904508812+00:00] Validate context path [2025-09-16T08:14:20,907423720+00:00] Update CA trust [2025-09-16T08:14:20,908379914+00:00] Using mounted CA bundle: /mnt/trusted-ca/ca-bundle.crt '/mnt/trusted-ca/ca-bundle.crt' -> '/etc/pki/ca-trust/source/anchors/ca-bundle.crt' [2025-09-16T08:14:21,844797221+00:00] Prepare Dockerfile Checking if /var/workdir/cachi2/output/bom.json exists. Could not find prefetched sbom. No content_sets found for ICM [2025-09-16T08:14:21,851410897+00:00] Prepare system (architecture: x86_64) [2025-09-16T08:14:44,810754929+00:00] Setup prefetched Trying to pull registry.access.redhat.com/ubi8/nodejs-18-minimal:latest... Getting image source signatures Error: internal error: unable to copy from source docker://registry.access.redhat.com/ubi8/nodejs-18-minimal:latest: copying system image from manifest list: reading signatures: reading signature from https://access.redhat.com/webassets/docker/content/sigstore/ubi8/nodejs-18-minimal@sha256=a699a8a3a257f78ce0b264cac570320dffb1daeec0e0382d4afbed82a368c38d/signature-7: received unexpected HTTP status: 500 Internal Server Error --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 8f15757fb4..8b7eaa92f8 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -138,6 +138,7 @@ ("Failed because registry.access.redhat.com returned 503 when reading manifest", r"source-build:ERROR:command execution failure, status: 1, stderr: time=.* level=fatal msg=.Error parsing image name .* reading manifest .* in registry.access.redhat.com/.* received unexpected HTTP status: 503 Service Unavailable"), ("Failed downloading rpms for hermetic builds due to 504 errors", r"mock-hermetic-repo.*urllib3.exceptions.MaxRetryError: HTTPSConnectionPool.*: Max retries exceeded with url: .*.rpm .Caused by ResponseError..too many 504 error responses..."), ("Failed downloading rpms for hermetic builds", r"mock-hermetic-repo.*ERROR:__main__:RPM deps downloading failed"), + ("Failed reading signatures from access.redhat.com due to 500", r"internal error: unable to copy from source docker://registry.access.redhat.com/[^ ]+: copying system image from manifest list: reading signatures: reading signature from https://access.redhat.com/webassets/docker/content/sigstore/[^ ]+: received unexpected HTTP status: 500 Internal Server Error"), ("Failed to connect to MPC VM", r"ssh: connect to host [0-9]+.[0-9]+.[0-9]+.[0-9]+ port 22: Connection timed out"), ("Failed to prefetch dependencies due to download timeout", r"ERROR Unsuccessful download: .* ERROR FetchError: exception_name: TimeoutError.*If the issue seems to be on the cachi2 side, please contact the maintainers."), ("Failed to provision MPC VM due to resource quota evaluation timed out", r"cat /ssh/error Error allocating host: Internal error occurred: resource quota evaluation timed out"), # KONFLUX-9798 From 7858b3be2474147fd89530315d761a1919a7bd81 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Thu, 18 Sep 2025 09:20:08 +0200 Subject: [PATCH 257/321] feat: New error: TaskRun was cancelled as its PipelineRun was cancelled Found during KONFLUX-10175 investigation: {"lastTransitionTime": "2025-09-15T05:41:50Z", "message": "TaskRun \"ossm-3-1-f2f2f42cb7a6d2ca561900aaf94a9725-prefetch-dependencies\" was cancelled. TaskRun cancelled as the PipelineRun it belongs to has been cancelled.", "reason": "TaskRunCancelled", "status": "False", "type": "Succeeded"} --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 8b7eaa92f8..95c1845906 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -188,6 +188,7 @@ ("Pod creation failed because resource quota evaluation timed out", r".message.: .failed to create task run pod [^ ]+: Internal error occurred: resource quota evaluation timed out. Maybe missing or invalid Task [^ ]+., .reason.: .PodCreationFailed."), ("Pod creation failed with reason error", r"\"message\": \".* exited with code 2: Error\""), ("Pod stuck in incorrect status", r".message.: .pod status ..PodReadyToStartContainers..:..False..; message: ....., .reason.: .Pending., .status.: .Unknown."), + ("TaskRun was cancelled as its PipelineRun was cancelled", r"TaskRun [^ ]+ was cancelled. TaskRun cancelled as the PipelineRun it belongs to has been cancelled."), } From bb948068b0fe48d2c63308b497a03a58e8638c9a Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Thu, 18 Sep 2025 14:46:56 +0200 Subject: [PATCH 258/321] fix(KONFLUX-9051): If the release is still progressing, wait some more --- tests/load-tests/pkg/journey/handle_releases_run.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/load-tests/pkg/journey/handle_releases_run.go b/tests/load-tests/pkg/journey/handle_releases_run.go index fdc62d8098..0fd7c9d743 100644 --- a/tests/load-tests/pkg/journey/handle_releases_run.go +++ b/tests/load-tests/pkg/journey/handle_releases_run.go @@ -122,6 +122,9 @@ func validateReleaseCondition(f *framework.Framework, namespace, releaseName str // Check right condition status for _, condition := range release.Status.Conditions { + if condition.Type == "Released" && condition.Reason == "Progressing" { + return false, nil + } if condition.Type == "Released" && condition.Status == "False" { return false, fmt.Errorf("Release %s in namespace %s failed: %+v", releaseName, namespace, condition) } From 56704db76cafeaebbf07acb64c31e019e041a9c4 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 19 Sep 2025 08:09:20 +0200 Subject: [PATCH 259/321] feat: Do not traceback when TR file is missing --- tests/load-tests/errors.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 95c1845906..76b54e7415 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -176,6 +176,7 @@ FAILED_TR_ERRORS = { ("Missing expected fields in TaskRun", r"Missing expected fields in TaskRun"), # This is special error, meaning everithing failed basically + ("Missing expected TaskRun file", r"Missing expected TaskRun file"), # Another special error, meaning everithing failed as well ("SKIP", r"\"message\": \"All Steps have completed executing\""), # Another special error to avoid printing 'Unknown error:' message ("SKIP", r"\"message\": \".* exited with code 1.*\""), # Another special error to avoid printing 'Unknown error:' message ("SKIP", r"\"message\": \".* exited with code 255.*\""), # Another special error to avoid printing 'Unknown error:' message @@ -213,7 +214,6 @@ def message_to_reason(reasons_and_errors: set, msg: str) -> str: def add_reason(error_messages, error_by_code, error_by_reason, message, reason="", code=0): if reason == "": reason = message - print("Added", message, reason, code) error_messages.append(message) error_by_code[code] += 1 error_by_reason[reason] += 1 @@ -306,7 +306,11 @@ def find_trs(plr): def check_failed_taskrun(data_dir, ns, tr_name): datafile = os.path.join(data_dir, ns, "1", "collected-taskrun-" + tr_name + ".json") - data = load(datafile) + try: + data = load(datafile) + except FileNotFoundError as e: + print(f"ERROR: Missing file: {str(e)}") + return False, "Missing expected TaskRun file" try: pod_name = data["status"]["podName"] From e7289e785b8ce6a06d7ff2a95c20c943f6d8e6ab Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 19 Sep 2025 08:09:37 +0200 Subject: [PATCH 260/321] feat: New error: Pod creation failed because serviceaccounts not found When investigating KONFLUX-10175 {"lastTransitionTime": "2025-09-10T11:19:56Z", "message": "failed to create task run pod \"ossm-3-2-must-gather-on-pull-request-7lj9z-init\": translating TaskSpec to Pod: serviceaccounts \"build-pipeline-ossm-3-2-must-gather\" not found. Maybe missing or invalid Task service-mesh-tenant/", "reason": "PodCreationFailed", "status": "False", "type": "Succeeded"} --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 76b54e7415..a2bb837a7f 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -187,6 +187,7 @@ ("Failed to create task run pod because ISE on webhook proxy.operator.tekton.dev", r"failed to create task run pod .*: Internal error occurred: failed calling webhook \\\"proxy.operator.tekton.dev\\\": failed to call webhook: Post \\\"https://tekton-operator-proxy-webhook.openshift-pipelines.svc:443/defaulting.timeout=10s\\\": context deadline exceeded. Maybe missing or invalid Task .*"), ("Not enough nodes to schedule pod", r".message.: .pod status ..PodScheduled..:..False..; message: ..[0-9/]+ nodes are available: .*: [0-9]+ Preemption is not helpful for scheduling."), ("Pod creation failed because resource quota evaluation timed out", r".message.: .failed to create task run pod [^ ]+: Internal error occurred: resource quota evaluation timed out. Maybe missing or invalid Task [^ ]+., .reason.: .PodCreationFailed."), + ("Pod creation failed because serviceaccounts not found", r".message.: .failed to create task run pod [^ ]+: translating TaskSpec to Pod: serviceaccounts [^ ]+ not found. Maybe missing or invalid Task [^ ]+., .reason.: .PodCreationFailed."), ("Pod creation failed with reason error", r"\"message\": \".* exited with code 2: Error\""), ("Pod stuck in incorrect status", r".message.: .pod status ..PodReadyToStartContainers..:..False..; message: ....., .reason.: .Pending., .status.: .Unknown."), ("TaskRun was cancelled as its PipelineRun was cancelled", r"TaskRun [^ ]+ was cancelled. TaskRun cancelled as the PipelineRun it belongs to has been cancelled."), From 63e95454e903735ad0cbce12ab9ffb55cbd9ce67 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 19 Sep 2025 08:22:22 +0200 Subject: [PATCH 261/321] feat: New error: TaskRun was cancelled as its PipelineRun timeouted When working on KONFLUX-10175 {"lastTransitionTime": "2025-09-03T12:57:12Z", "message": "TaskRun \"ossm-3-1-pilot-on-push-k2ggf-sast-snyk-check\" was cancelled. TaskRun cancelled as the PipelineRun it belongs to has timed out.", "reason": "TaskRunCancelled", "status": "False", "type": "Succeeded"} --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index a2bb837a7f..53c41ac01d 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -191,6 +191,7 @@ ("Pod creation failed with reason error", r"\"message\": \".* exited with code 2: Error\""), ("Pod stuck in incorrect status", r".message.: .pod status ..PodReadyToStartContainers..:..False..; message: ....., .reason.: .Pending., .status.: .Unknown."), ("TaskRun was cancelled as its PipelineRun was cancelled", r"TaskRun [^ ]+ was cancelled. TaskRun cancelled as the PipelineRun it belongs to has been cancelled."), + ("TaskRun was cancelled as its PipelineRun timeouted", r".message.: .TaskRun [^ ]+ was cancelled. TaskRun cancelled as the PipelineRun it belongs to has timed out.., .reason.: .TaskRunCancelled."), } From 833840c91fdc6ab1555df67db7859633fb23a1c5 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 19 Sep 2025 08:24:14 +0200 Subject: [PATCH 262/321] feat: New error: Script opm failed because catalog is missing When working on KONFLUX-10175 1/pod-maistra-fbc-v4-17-on-pull-r2ad250667ca59a28b893d1b9ff26c035-pod-step-run-opm-with-user-args.log Running OPM command in working directory: /var/workdir/source OPM Argument received: 'alpha' OPM Argument received: 'render-template' OPM Argument received: 'basic' OPM Argument received: '-o' OPM Argument received: 'yaml' OPM Argument received: 'catalog/ossm-2-6/catalog-prod.yaml' OPM Argument received: '--migrate-level=bundle-object-to-csv-metadata' Ensuring directory 'catalog' exists. Running opm alpha render-template basic -o yaml catalog/ossm-2-6/catalog-prod.yaml --migrate-level=bundle-object-to-csv-metadata > catalog/catalog-prod.yaml 2025/09/17 07:34:47 unable to open "catalog/ossm-2-6/catalog-prod.yaml": open catalog/ossm-2-6/catalog-prod.yaml: no such file or directory --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 53c41ac01d..effd6506a4 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -170,6 +170,7 @@ ("Script mock-hermetic-repo failed because pull from registry.access.redhat.com failed", r"Error: internal error: unable to copy from source docker://registry.access.redhat.com/[^ ]+: determining manifest MIME type for docker://registry.access.redhat.com/[^ ]+: Manifest does not match provided manifest digest [^ ]+.*/usr/bin/mock-hermetic-repo.*subprocess.CalledProcessError.*Command ...podman....pull.* returned non-zero exit status 125"), ("Script mock-hermetic-repo failed because pull from registry.access.redhat.com failed", r"mock-hermetic-repo.*Error: internal error: unable to copy from source docker://registry.access.redhat.com/[^ ]+: determining manifest MIME type for docker://registry.access.redhat.com/[^ ]+: Manifest does not match provided manifest digest.*subprocess.CalledProcessError.*Command ...podman....pull.* returned non-zero exit status 125"), ("Script mock-hermetic-repo failed because pull from registry.access.redhat.com failed", r"/usr/bin/mock-hermetic-repo.*Error: internal error: unable to copy from source docker://registry.access.redhat.com/[^ ]+: initializing source docker://registry.access.redhat.com/[^ ]+: unable to retrieve auth token: invalid username/password: unauthorized.*subprocess.CalledProcessError.*Command '.'podman', 'pull', '--arch', '[^ ]+', 'registry.access.redhat.com/[^ ]+'.' returned non-zero exit status 125"), + ("Script opm failed because catalog is missing", r"Running opm alpha render-template basic -o yaml [^ ]+ --migrate-level=bundle-object-to-csv-metadata . catalog/catalog-prod.yaml.*unable to open .catalog/[^ ]+.: open catalog/[^ ]+: no such file or directory"), ("Script rpm_verifier failed to access image layer from quay.io because 502 Bad Gateway", r"rpm_verifier --image-url quay.io/.* Image: quay.io/.* error: unable to access the source layer sha256:[0-9a-z]+: received unexpected HTTP status: 502 Bad Gateway"), ("Script rpm_verifier failed to pull image from quay.io because 502 Bad Gateway", r"rpm_verifier.*error: unable to read image quay.io/[^ ]+: Get .https://quay.io/[^ ]+.: received unexpected HTTP status: 502 Bad Gateway"), } From 9c022aa7fc117fa9193265f9479123454a4253bd Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 19 Sep 2025 08:52:42 +0200 Subject: [PATCH 263/321] feat: New error: Failed to compile with clang When working on KONFLUX-10175 service-mesh-tenant/1/pod-ossm-3-2-proxy-debug-on-pull-request-dtcz6-build-images-0-pod-step-build.log [...] ERROR: /root/.cache/bazel/_bazel_root/530ed184f85848d5a11a3a8203c74c1b/external/org_brotli/BUILD.bazel:116:11: Compiling c/common/dictionary.c failed: (Exit 1): clang-19 failed: error executing CppCompile command (from target @@org_brotli//:brotlicommon) exec env - \ BAZEL_COMPILER=clang \ BAZEL_CXXOPTS='-stdlib=libc++' \ BAZEL_LINKLIBS=-l%:libc++.a:-l%:libc++abi.a \ BAZEL_LINKOPTS=-lm:-pthread \ CC=clang \ CXX=clang++ \ CXXFLAGS='-stdlib=libc++' \ Clang_DIR=/usr/lib64/llvm18 \ ENVOY_OPENSSL=1 \ LDFLAGS='-stdlib=libc++' \ PATH=/usr/lib/llvm/bin:/usr/local/bin:/bin:/usr/bin \ PWD=/proc/self/cwd \ /usr/bin/clang-19 -U_FORTIFY_SOURCE -fstack-protector -Wall -Wthread-safety -Wself-assign -Wunused-but-set-parameter -Wno-free-nonheap-object -fcolor-diagnostics -fno-omit-frame-pointer -g0 -O2 '-D_FORTIFY_SOURCE=1' -DNDEBUG -ffunction-sections -fdata-sections -MD -MF bazel-out/k8-opt/bin/external/org_brotli/_objs/brotlicommon/dictionary.d '-frandom-seed=bazel-out/k8-opt/bin/external/org_brotli/_objs/brotlicommon/dictionary.o' -gsplit-dwarf -g -iquote external/org_brotli -iquote bazel-out/k8-opt/bin/external/org_brotli -Ibazel-out/k8-opt/bin/external/org_brotli/_virtual_includes/brotli_inc '-DABSL_MIN_LOG_LEVEL=4' -Wno-deprecated-declarations -fdebug-types-section -fPIC -DNULL_PLUGIN '--gcc-toolchain=/usr' -g -fexceptions --pedantic-errors -Wall -Wconversion -Werror -Wextra -Wlong-long -Wmissing-declarations -Wmissing-prototypes -Wno-strict-aliasing -Wshadow -Wsign-compare -no-canonical-prefixes -Wno-builtin-macro-redefined '-D__DATE__="redacted"' '-D__TIMESTAMP__="redacted"' '-D__TIME__="redacted"' -c external/org_brotli/c/common/dictionary.c -o bazel-out/k8-opt/bin/external/org_brotli/_objs/brotlicommon/dictionary.o) Use --sandbox_debug to see verbose messages from the sandbox and retain the sandbox build root for debugging clang-19: error: argument unused during compilation: '--gcc-toolchain=/usr' [-Werror,-Wunused-command-line-argument] [...] --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index effd6506a4..8d963d78ef 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -139,6 +139,7 @@ ("Failed downloading rpms for hermetic builds due to 504 errors", r"mock-hermetic-repo.*urllib3.exceptions.MaxRetryError: HTTPSConnectionPool.*: Max retries exceeded with url: .*.rpm .Caused by ResponseError..too many 504 error responses..."), ("Failed downloading rpms for hermetic builds", r"mock-hermetic-repo.*ERROR:__main__:RPM deps downloading failed"), ("Failed reading signatures from access.redhat.com due to 500", r"internal error: unable to copy from source docker://registry.access.redhat.com/[^ ]+: copying system image from manifest list: reading signatures: reading signature from https://access.redhat.com/webassets/docker/content/sigstore/[^ ]+: received unexpected HTTP status: 500 Internal Server Error"), + ("Failed to compile with clang", r"ERROR: [^ ]+: Compiling [^ ]+.c failed: .Exit 1.: clang-[0-9]+ failed: error executing CppCompile command"), ("Failed to connect to MPC VM", r"ssh: connect to host [0-9]+.[0-9]+.[0-9]+.[0-9]+ port 22: Connection timed out"), ("Failed to prefetch dependencies due to download timeout", r"ERROR Unsuccessful download: .* ERROR FetchError: exception_name: TimeoutError.*If the issue seems to be on the cachi2 side, please contact the maintainers."), ("Failed to provision MPC VM due to resource quota evaluation timed out", r"cat /ssh/error Error allocating host: Internal error occurred: resource quota evaluation timed out"), # KONFLUX-9798 From 304dde3a097957296cf356c5689b8896fbb61e48 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 19 Sep 2025 09:01:54 +0200 Subject: [PATCH 264/321] feat: New error: Failed to prefetch dependencies due to go env error When working on KONFLUX-10175 service-mesh-tenant/1/pod-ossm-3-2-963456391dabac376d995e320caa806de7b53e2cd673a7fb97-pod-step-prefetch-dependencies.log INFO: Using mounted CA bundle: /mnt/trusted-ca/ca-bundle.crt '/mnt/trusted-ca/ca-bundle.crt' -> '/etc/pki/ca-trust/source/anchors/ca-bundle.crt' Registering with Red Hat subscription manager. Executing: subscription-manager register --org 11009103 --activationkey rh-ossm-istio-proxy-production-build-els_9_4 The system has been registered with ID: ba58a14c-333e-426c-9d26-ab42928c7248 The registered system name is: ossm-3-2-963456391dabac376d995e320caa806de7b53e2cd673a7fb97-pod Executing: git fetch --tags 2025-09-17 13:58:03,660 INFO Fetching the gomod dependencies at subpath proxy 2025-09-17 13:58:03,993 ERROR The command "go env GOWORK" failed 2025-09-17 13:58:03,994 ERROR STDERR: go: downloading go1.24 (linux/amd64) go: download go1.24 for linux/amd64: toolchain not available 2025-09-17 13:58:10,242 ERROR PackageManagerError: Go execution failed: `go env GOWORK` failed with rc=1 Error: PackageManagerError: Go execution failed: `go env GOWORK` failed with rc=1 The cause of the failure could be: - something is broken in hermeto - something is wrong with your repository - communication with an external service failed (please try again) The output of the failing command should provide more details, please check the logs. Unregistering from: subscription.rhsm.redhat.com:443/subscription System has been unregistered. --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 8d963d78ef..92c82b4d6f 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -142,6 +142,7 @@ ("Failed to compile with clang", r"ERROR: [^ ]+: Compiling [^ ]+.c failed: .Exit 1.: clang-[0-9]+ failed: error executing CppCompile command"), ("Failed to connect to MPC VM", r"ssh: connect to host [0-9]+.[0-9]+.[0-9]+.[0-9]+ port 22: Connection timed out"), ("Failed to prefetch dependencies due to download timeout", r"ERROR Unsuccessful download: .* ERROR FetchError: exception_name: TimeoutError.*If the issue seems to be on the cachi2 side, please contact the maintainers."), + ("Failed to prefetch dependencies due to go env error", r"ERROR PackageManagerError: Go execution failed: .go env GOWORK. failed with rc=1"), ("Failed to provision MPC VM due to resource quota evaluation timed out", r"cat /ssh/error Error allocating host: Internal error occurred: resource quota evaluation timed out"), # KONFLUX-9798 ("Failed to pull container from access.redhat.com because of DNS error", r"Error: creating build container: internal error: unable to copy from source docker://registry.access.redhat.com/.*: copying system image from manifest list: reading signatures: Get \"https://access.redhat.com/.*\": dial tcp: lookup access.redhat.com: Temporary failure in name resolution"), ("Failed to pull container from quay.io because of DNS error", r"Error: copying system image from manifest list: reading blob .*: Get \"https://cdn[0-9]+.quay.io/.*\": dial tcp: lookup cdn[0-9]+.quay.io: Temporary failure in name resolution"), From 684bc87d332be7d74afaf319012ef19dcfe189a1 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 19 Sep 2025 09:06:59 +0200 Subject: [PATCH 265/321] feat: New error: Failed to prefetch dependencies due to invalid input when fetching tags When working on KONFLUX-10175 service-mesh-tenant/1/pod-ossm-3-2-7672bb71f5dcda4b3bec1413c3d0d13f78ab8161f3c7dfc519-pod-step-prefetch-dependencies.log INFO: Using mounted CA bundle: /mnt/trusted-ca/ca-bundle.crt '/mnt/trusted-ca/ca-bundle.crt' -> '/etc/pki/ca-trust/source/anchors/ca-bundle.crt' Registering with Red Hat subscription manager. Executing: subscription-manager register --org 11009103 --activationkey rh-ossm-istio-proxy-production-build-els_9_4 The system has been registered with ID: 23df700b-a3a5-449c-acbb-14b8871dfd41 The registered system name is: ossm-3-2-7672bb71f5dcda4b3bec1413c3d0d13f78ab8161f3c7dfc519-pod Executing: git fetch --tags 2025-09-17 12:50:53,180 ERROR InvalidInput: 1 validation error for user input\npackages\n Value error, package path does not exist (or is not a directory): rpm-lockfiles/proxy-debug/golang Error: InvalidInput: 1 validation error for user input packages Value error, package path does not exist (or is not a directory): rpm-lockfiles/proxy-debug/golang Unregistering from: subscription.rhsm.redhat.com:443/subscription System has been unregistered. --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 92c82b4d6f..b9dab80f0f 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -143,6 +143,7 @@ ("Failed to connect to MPC VM", r"ssh: connect to host [0-9]+.[0-9]+.[0-9]+.[0-9]+ port 22: Connection timed out"), ("Failed to prefetch dependencies due to download timeout", r"ERROR Unsuccessful download: .* ERROR FetchError: exception_name: TimeoutError.*If the issue seems to be on the cachi2 side, please contact the maintainers."), ("Failed to prefetch dependencies due to go env error", r"ERROR PackageManagerError: Go execution failed: .go env GOWORK. failed with rc=1"), + ("Failed to prefetch dependencies due to invalid input when fetching tags", r"Executing: git fetch --tags.*ERROR InvalidInput: 1 validation error for user input.*packages.*Value error, package path does not exist .or is not a directory.:"), ("Failed to provision MPC VM due to resource quota evaluation timed out", r"cat /ssh/error Error allocating host: Internal error occurred: resource quota evaluation timed out"), # KONFLUX-9798 ("Failed to pull container from access.redhat.com because of DNS error", r"Error: creating build container: internal error: unable to copy from source docker://registry.access.redhat.com/.*: copying system image from manifest list: reading signatures: Get \"https://access.redhat.com/.*\": dial tcp: lookup access.redhat.com: Temporary failure in name resolution"), ("Failed to pull container from quay.io because of DNS error", r"Error: copying system image from manifest list: reading blob .*: Get \"https://cdn[0-9]+.quay.io/.*\": dial tcp: lookup cdn[0-9]+.quay.io: Temporary failure in name resolution"), From 011696e60731ee5abc92147ebb5ee4f52271c3d0 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 19 Sep 2025 09:11:17 +0200 Subject: [PATCH 266/321] feat: New error: Script merge_catalogs.sh due to permission error When working on KONFLUX-10175 service-mesh-tenant/1/pod-ossm-fbc-v4-16-on-pull-request-57rrv-build-images-pod-step-build.log [...] [2025-09-17T11:16:15,504304049+00:00] Run buildah build [2025-09-17T11:16:15,505331298+00:00] ip link set lo up && buildah build --pull=never --security-opt=unmask=/proc/interrupts --label architecture=x86_64 --label vcs-type=git --label vcs-ref=210d308e5a93a2453ec134ff50ef30c5e16d42a4 --label quay.expires-after=5d --label build-date=2025-09-17T11:16:15Z --tls-verify=true --no-cache --ulimit nofile=4096:4096 --http-proxy=false -f /tmp/fbc-v4-16.Containerfile.oJBvWz -t quay.io/redhat-user-workloads/service-mesh-tenant/ossm-fbc-v4-16:on-pr-210d308e5a93a2453ec134ff50ef30c5e16d42a4-linux-x86-64 . [1/3] STEP 1/1: FROM quay.io/konflux-ci/yq@sha256:875f69f9e2172d627bd01aaf7a0d49f67ffebc07fc148ae0d50865e48bd401b9 AS yq --> c3ef06d5a25c [2/3] STEP 1/5: FROM registry.redhat.io/ubi9/ubi-minimal@sha256:2f06ae0e6d3d9c4f610d32c480338eef474867f435d8d28625f2985e8acde6e8 AS builder [2/3] STEP 2/5: COPY --from=yq /usr/bin/yq /usr/bin/yq [2/3] STEP 3/5: COPY catalog /catalog [2/3] STEP 4/5: COPY scripts / [2/3] STEP 5/5: RUN ./merge_catalogs.sh /bin/sh: line 1: ./merge_catalogs.sh: Permission denied subprocess exited with status 126 subprocess exited with status 126 Error: building at STEP "RUN ./merge_catalogs.sh": exit status 126 --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index b9dab80f0f..4680b2ff06 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -170,6 +170,7 @@ ("Release failed because unauthorized when pushing artifact", r"Prepared artifact from /var/workdir/release .* Token not found for quay.io/konflux-ci/release-service-trusted-artifacts Uploading [0-9a-z]+ sourceDataArtifact Error response from registry: unauthorized: access to the requested resource is not authorized: map.. Command exited with non-zero status 1"), ("RPM build failed: bool cannot be defined via typedef", r"error: .bool. cannot be defined via .typedef..*error: Bad exit status from /var/tmp/rpm-tmp..* ..build."), ("Script gather-rpms.py failed because of too many values to unpack", r"Handling archdir [^ ]+ Traceback.*File \"/usr/bin/gather-rpms.py\".*nvr, btime, size, sigmd5, _ = .*ValueError: too many values to unpack"), + ("Script merge_catalogs.sh due to permission error", r"RUN ./merge_catalogs.sh /bin/sh: line 1: ./merge_catalogs.sh: Permission denied subprocess exited with status 126 subprocess exited with status 126"), ("Script mock-hermetic-repo failed because pull from registry.access.redhat.com failed", r"Error: internal error: unable to copy from source docker://registry.access.redhat.com/[^ ]+: determining manifest MIME type for docker://registry.access.redhat.com/[^ ]+: Manifest does not match provided manifest digest [^ ]+.*/usr/bin/mock-hermetic-repo.*subprocess.CalledProcessError.*Command ...podman....pull.* returned non-zero exit status 125"), ("Script mock-hermetic-repo failed because pull from registry.access.redhat.com failed", r"mock-hermetic-repo.*Error: internal error: unable to copy from source docker://registry.access.redhat.com/[^ ]+: determining manifest MIME type for docker://registry.access.redhat.com/[^ ]+: Manifest does not match provided manifest digest.*subprocess.CalledProcessError.*Command ...podman....pull.* returned non-zero exit status 125"), ("Script mock-hermetic-repo failed because pull from registry.access.redhat.com failed", r"/usr/bin/mock-hermetic-repo.*Error: internal error: unable to copy from source docker://registry.access.redhat.com/[^ ]+: initializing source docker://registry.access.redhat.com/[^ ]+: unable to retrieve auth token: invalid username/password: unauthorized.*subprocess.CalledProcessError.*Command '.'podman', 'pull', '--arch', '[^ ]+', 'registry.access.redhat.com/[^ ]+'.' returned non-zero exit status 125"), From f1d627d350b6ca4790c38d4adb035a0e4a10d596 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 19 Sep 2025 09:18:09 +0200 Subject: [PATCH 267/321] feat: New error: TaskRun resolution failed because validation.webhook.pipeline.tekton.dev returned EOF When working on KONFLUX-10175 {"lastTransitionTime": "2025-09-08T14:35:08Z", "message": "error requesting remote resource: error updating resource request \"service-mesh-tenant/bundles-c6b942583a2face2e21b3cb79d442951\" with data: Internal error occurred: failed calling webhook \"validation.webhook.pipeline.tekton.dev\": failed to call webhook: Post \"https://tekton-pipelines-webhook.openshift-pipelines.svc:443/resource-validation?timeout=10s\": EOF", "reason": "TaskRunResolutionFailed", "status": "False", "type": "Succeeded"} --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 4680b2ff06..926fa3f17e 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -195,6 +195,7 @@ ("Pod creation failed because serviceaccounts not found", r".message.: .failed to create task run pod [^ ]+: translating TaskSpec to Pod: serviceaccounts [^ ]+ not found. Maybe missing or invalid Task [^ ]+., .reason.: .PodCreationFailed."), ("Pod creation failed with reason error", r"\"message\": \".* exited with code 2: Error\""), ("Pod stuck in incorrect status", r".message.: .pod status ..PodReadyToStartContainers..:..False..; message: ....., .reason.: .Pending., .status.: .Unknown."), + ("TaskRun resolution failed because validation.webhook.pipeline.tekton.dev returned EOF", r".message.: .error requesting remote resource: error updating resource request [^ ]+ with data: Internal error occurred: failed calling webhook .*validation.webhook.pipeline.tekton.dev.*: failed to call webhook: Post .*https://tekton-pipelines-webhook.openshift-pipelines.svc:443/resource-validation.timeout=10s.*: EOF., .reason.: .TaskRunResolutionFailed."), ("TaskRun was cancelled as its PipelineRun was cancelled", r"TaskRun [^ ]+ was cancelled. TaskRun cancelled as the PipelineRun it belongs to has been cancelled."), ("TaskRun was cancelled as its PipelineRun timeouted", r".message.: .TaskRun [^ ]+ was cancelled. TaskRun cancelled as the PipelineRun it belongs to has timed out.., .reason.: .TaskRunCancelled."), } From 46062b8b9eb7a3b3410762bd4f930d7c3f1be94e Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 19 Sep 2025 09:21:21 +0200 Subject: [PATCH 268/321] feat: New error: TaskRun was cancelled because it timeouted When working on KONFLUX-10175 {"lastTransitionTime": "2025-09-06T14:39:24Z", "message": "TaskRun \"ossm-3-1-must-gather-on-push-vm84w-clone-repository\" failed to finish within \"2h0m0s\"", "reason": "TaskRunTimeout", "status": "False", "type": "Succeeded"} --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 926fa3f17e..05201b3290 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -198,6 +198,7 @@ ("TaskRun resolution failed because validation.webhook.pipeline.tekton.dev returned EOF", r".message.: .error requesting remote resource: error updating resource request [^ ]+ with data: Internal error occurred: failed calling webhook .*validation.webhook.pipeline.tekton.dev.*: failed to call webhook: Post .*https://tekton-pipelines-webhook.openshift-pipelines.svc:443/resource-validation.timeout=10s.*: EOF., .reason.: .TaskRunResolutionFailed."), ("TaskRun was cancelled as its PipelineRun was cancelled", r"TaskRun [^ ]+ was cancelled. TaskRun cancelled as the PipelineRun it belongs to has been cancelled."), ("TaskRun was cancelled as its PipelineRun timeouted", r".message.: .TaskRun [^ ]+ was cancelled. TaskRun cancelled as the PipelineRun it belongs to has timed out.., .reason.: .TaskRunCancelled."), + ("TaskRun was cancelled because it timeouted", r".message.: .TaskRun [^ ]+ failed to finish within [^ ]+., .reason.: .TaskRunTimeout."), } From 86c54dc76e2ea293053900516b9beda8509feead Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 19 Sep 2025 09:25:38 +0200 Subject: [PATCH 269/321] feat: New error: Script oras failed to fetch blob from Quay after 10 retries When working on KONFLUX-10175 service-mesh-tenant/1/pod-ossm-3-1-ztunnel-on-pull-request-st8vb-sast-snyk-check-pod-step-use-trusted-artifact.log Using token for quay.io/redhat-user-workloads/service-mesh-tenant/ossm-3-1-ztunnel Executing: oras blob fetch --registry-config /tmp/use-oci.sh.yVSwi1/auth-FtpKt7.json quay.io/redhat-user-workloads/service-mesh-tenant/ossm-3-1-ztunnel@sha256:8e186e9b3eb643d167b26b24d4df8534ab7a5f4ff0cb15c839bd795710d0ca97 --output - Restored artifact quay.io/redhat-user-workloads/service-mesh-tenant/ossm-3-1-ztunnel@sha256:8e186e9b3eb643d167b26b24d4df8534ab7a5f4ff0cb15c839bd795710d0ca97 to /var/workdir/source Using token for quay.io/redhat-user-workloads/service-mesh-tenant/ossm-3-1-ztunnel Executing: oras blob fetch --registry-config /tmp/use-oci.sh.yVSwi1/auth-EWCzRh.json quay.io/redhat-user-workloads/service-mesh-tenant/ossm-3-1-ztunnel@sha256:f9aefad78be290c139917fb40b941e3c377202e4ffd9f56608fe0d564520863c --output - Error: unexpected EOF warning: Command failed and will retry, 1 try Executing: oras blob fetch --registry-config /tmp/use-oci.sh.yVSwi1/auth-EWCzRh.json quay.io/redhat-user-workloads/service-mesh-tenant/ossm-3-1-ztunnel@sha256:f9aefad78be290c139917fb40b941e3c377202e4ffd9f56608fe0d564520863c --output - gzip: stdin: invalid compressed data--format violated tar: Unexpected EOF in archive tar: Unexpected EOF in archive tar: Error is not recoverable: exiting now warning: Command failed and will retry, 2 try Executing: oras blob fetch --registry-config /tmp/use-oci.sh.yVSwi1/auth-EWCzRh.json quay.io/redhat-user-workloads/service-mesh-tenant/ossm-3-1-ztunnel@sha256:f9aefad78be290c139917fb40b941e3c377202e4ffd9f56608fe0d564520863c --output - warning: Command failed and will retry, 3 try Executing: oras blob fetch --registry-config /tmp/use-oci.sh.yVSwi1/auth-EWCzRh.json quay.io/redhat-user-workloads/service-mesh-tenant/ossm-3-1-ztunnel@sha256:f9aefad78be290c139917fb40b941e3c377202e4ffd9f56608fe0d564520863c --output - warning: Command failed and will retry, 4 try Executing: oras blob fetch --registry-config /tmp/use-oci.sh.yVSwi1/auth-EWCzRh.json quay.io/redhat-user-workloads/service-mesh-tenant/ossm-3-1-ztunnel@sha256:f9aefad78be290c139917fb40b941e3c377202e4ffd9f56608fe0d564520863c --output - warning: Command failed and will retry, 5 try Executing: oras blob fetch --registry-config /tmp/use-oci.sh.yVSwi1/auth-EWCzRh.json quay.io/redhat-user-workloads/service-mesh-tenant/ossm-3-1-ztunnel@sha256:f9aefad78be290c139917fb40b941e3c377202e4ffd9f56608fe0d564520863c --output - warning: Command failed and will retry, 6 try Executing: oras blob fetch --registry-config /tmp/use-oci.sh.yVSwi1/auth-EWCzRh.json quay.io/redhat-user-workloads/service-mesh-tenant/ossm-3-1-ztunnel@sha256:f9aefad78be290c139917fb40b941e3c377202e4ffd9f56608fe0d564520863c --output - warning: Command failed and will retry, 7 try Executing: oras blob fetch --registry-config /tmp/use-oci.sh.yVSwi1/auth-EWCzRh.json quay.io/redhat-user-workloads/service-mesh-tenant/ossm-3-1-ztunnel@sha256:f9aefad78be290c139917fb40b941e3c377202e4ffd9f56608fe0d564520863c --output - warning: Command failed and will retry, 8 try Executing: oras blob fetch --registry-config /tmp/use-oci.sh.yVSwi1/auth-EWCzRh.json quay.io/redhat-user-workloads/service-mesh-tenant/ossm-3-1-ztunnel@sha256:f9aefad78be290c139917fb40b941e3c377202e4ffd9f56608fe0d564520863c --output - warning: Command failed and will retry, 9 try Executing: oras blob fetch --registry-config /tmp/use-oci.sh.yVSwi1/auth-EWCzRh.json quay.io/redhat-user-workloads/service-mesh-tenant/ossm-3-1-ztunnel@sha256:f9aefad78be290c139917fb40b941e3c377202e4ffd9f56608fe0d564520863c --output - error: Command failed after 10 tries with status 141 Command exited with non-zero status 2 --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 05201b3290..df1add91b0 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -175,6 +175,7 @@ ("Script mock-hermetic-repo failed because pull from registry.access.redhat.com failed", r"mock-hermetic-repo.*Error: internal error: unable to copy from source docker://registry.access.redhat.com/[^ ]+: determining manifest MIME type for docker://registry.access.redhat.com/[^ ]+: Manifest does not match provided manifest digest.*subprocess.CalledProcessError.*Command ...podman....pull.* returned non-zero exit status 125"), ("Script mock-hermetic-repo failed because pull from registry.access.redhat.com failed", r"/usr/bin/mock-hermetic-repo.*Error: internal error: unable to copy from source docker://registry.access.redhat.com/[^ ]+: initializing source docker://registry.access.redhat.com/[^ ]+: unable to retrieve auth token: invalid username/password: unauthorized.*subprocess.CalledProcessError.*Command '.'podman', 'pull', '--arch', '[^ ]+', 'registry.access.redhat.com/[^ ]+'.' returned non-zero exit status 125"), ("Script opm failed because catalog is missing", r"Running opm alpha render-template basic -o yaml [^ ]+ --migrate-level=bundle-object-to-csv-metadata . catalog/catalog-prod.yaml.*unable to open .catalog/[^ ]+.: open catalog/[^ ]+: no such file or directory"), + ("Script oras failed to fetch blob from Quay after 10 retries", r"Executing: oras blob fetch --registry-config [^ ]+ quay.io/[^ ]+ --output - error: Command failed after 10 tries with status 141"), ("Script rpm_verifier failed to access image layer from quay.io because 502 Bad Gateway", r"rpm_verifier --image-url quay.io/.* Image: quay.io/.* error: unable to access the source layer sha256:[0-9a-z]+: received unexpected HTTP status: 502 Bad Gateway"), ("Script rpm_verifier failed to pull image from quay.io because 502 Bad Gateway", r"rpm_verifier.*error: unable to read image quay.io/[^ ]+: Get .https://quay.io/[^ ]+.: received unexpected HTTP status: 502 Bad Gateway"), } From fab2359c99520c35eb4227a5b7d36c6d1f0490f0 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 19 Sep 2025 09:28:04 +0200 Subject: [PATCH 270/321] style: Fix order --- tests/load-tests/errors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index df1add91b0..4045165164 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -197,8 +197,8 @@ ("Pod creation failed with reason error", r"\"message\": \".* exited with code 2: Error\""), ("Pod stuck in incorrect status", r".message.: .pod status ..PodReadyToStartContainers..:..False..; message: ....., .reason.: .Pending., .status.: .Unknown."), ("TaskRun resolution failed because validation.webhook.pipeline.tekton.dev returned EOF", r".message.: .error requesting remote resource: error updating resource request [^ ]+ with data: Internal error occurred: failed calling webhook .*validation.webhook.pipeline.tekton.dev.*: failed to call webhook: Post .*https://tekton-pipelines-webhook.openshift-pipelines.svc:443/resource-validation.timeout=10s.*: EOF., .reason.: .TaskRunResolutionFailed."), - ("TaskRun was cancelled as its PipelineRun was cancelled", r"TaskRun [^ ]+ was cancelled. TaskRun cancelled as the PipelineRun it belongs to has been cancelled."), ("TaskRun was cancelled as its PipelineRun timeouted", r".message.: .TaskRun [^ ]+ was cancelled. TaskRun cancelled as the PipelineRun it belongs to has timed out.., .reason.: .TaskRunCancelled."), + ("TaskRun was cancelled as its PipelineRun was cancelled", r"TaskRun [^ ]+ was cancelled. TaskRun cancelled as the PipelineRun it belongs to has been cancelled."), ("TaskRun was cancelled because it timeouted", r".message.: .TaskRun [^ ]+ failed to finish within [^ ]+., .reason.: .TaskRunTimeout."), } From 55e72a2d850a97ace82b8213dea8c8e1017ff14b Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 19 Sep 2025 09:31:39 +0200 Subject: [PATCH 271/321] feat: New error: Failed to compile with clang When working on KONFLUX-10175 service-mesh-tenant/1/pod-ossm-3-2-proxy-debug-on-pull-request-dtcz6-build-images-3-pod-step-build.log [...] ERROR: /root/.cache/bazel/_bazel_root/530ed184f85848d5a11a3a8203c74c1b/external/com_google_absl/absl/flags/BUILD.bazel:118:11: Compiling absl/flags/internal/commandlineflag.cc [for tool] failed: (Exit 1): clang-19 failed: error executing CppCompile command (from target @@com_google_absl//absl/flags:commandlineflag_internal) exec env - \ CC='' \ CXX='' \ PATH=/bin:/usr/bin:/usr/local/bin \ PWD=/proc/self/cwd \ /usr/bin/clang-19 -U_FORTIFY_SOURCE -fstack-protector -Wall -Wthread-safety -Wself-assign -Wunused-but-set-parameter -Wno-free-nonheap-object -fcolor-diagnostics -fno-omit-frame-pointer -g0 -O2 '-D_FORTIFY_SOURCE=1' -DNDEBUG -ffunction-sections -fdata-sections '-stdlib=libc++' -MD -MF bazel-out/s390x-opt-exec-ST-a828a81199fe/bin/external/com_google_absl/absl/flags/_objs/commandlineflag_internal/commandlineflag.d '-frandom-seed=bazel-out/s390x-opt-exec-ST-a828a81199fe/bin/external/com_google_absl/absl/flags/_objs/commandlineflag_internal/commandlineflag.o' -iquote external/com_google_absl -iquote bazel-out/s390x-opt-exec-ST-a828a81199fe/bin/external/com_google_absl -g0 -g0 '-std=c++20' -fsized-deallocation -Wall -Wextra -Wc++98-compat-extra-semi -Wcast-qual -Wconversion -Wdeprecated-pragma -Wfloat-overflow-conversion -Wfloat-zero-conversion -Wfor-loop-analysis -Wformat-security -Wgnu-redeclared-enum -Winfinite-recursion -Winvalid-constexpr -Wliteral-conversion -Wmissing-declarations -Wnullability-completeness -Woverlength-strings -Wpointer-arith -Wself-assign -Wshadow-all -Wshorten-64-to-32 -Wsign-conversion -Wstring-conversion -Wtautological-overlap-compare -Wtautological-unsigned-zero-compare -Wthread-safety -Wundef -Wuninitialized -Wunreachable-code -Wunused-comparison -Wunused-local-typedefs -Wunused-result -Wvla -Wwrite-strings -Wno-float-conversion -Wno-implicit-float-conversion -Wno-implicit-int-float-conversion -Wno-unknown-warning-option -DNOMINMAX -no-canonical-prefixes -Wno-builtin-macro-redefined '-D__DATE__="redacted"' '-D__TIMESTAMP__="redacted"' '-D__TIME__="redacted"' '-fmodule-name=com_google_absl//absl/flags:commandlineflag_internal' '-fmodule-map-file=bazel-out/s390x-opt-exec-ST-a828a81199fe/bin/external/com_google_absl/absl/flags/commandlineflag_internal.cppmap' -Xclang -fno-cxx-modules -fmodules-strict-decluse -Wprivate-header '-fmodule-map-file=external/local_config_cc/module.modulemap' '-fmodule-map-file=bazel-out/s390x-opt-exec-ST-a828a81199fe/bin/external/com_google_absl/absl/base/config.cppmap' '-fmodule-map-file=bazel-out/s390x-opt-exec-ST-a828a81199fe/bin/external/com_google_absl/absl/base/fast_type_id.cppmap' -c external/com_google_absl/absl/flags/internal/commandlineflag.cc -o bazel-out/s390x-opt-exec-ST-a828a81199fe/bin/external/com_google_absl/absl/flags/_objs/commandlineflag_internal/commandlineflag.o) Use --sandbox_debug to see verbose messages from the sandbox and retain the sandbox build root for debugging In file included from external/com_google_absl/absl/flags/internal/commandlineflag.cc:16: In file included from external/com_google_absl/absl/flags/internal/commandlineflag.h:19: external/com_google_absl/absl/base/config.h:56:10: fatal error: 'cstddef' file not found 56 | #include | ^~~~~~~~~ 1 error generated. [...] --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 4045165164..e93f40342e 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -139,6 +139,7 @@ ("Failed downloading rpms for hermetic builds due to 504 errors", r"mock-hermetic-repo.*urllib3.exceptions.MaxRetryError: HTTPSConnectionPool.*: Max retries exceeded with url: .*.rpm .Caused by ResponseError..too many 504 error responses..."), ("Failed downloading rpms for hermetic builds", r"mock-hermetic-repo.*ERROR:__main__:RPM deps downloading failed"), ("Failed reading signatures from access.redhat.com due to 500", r"internal error: unable to copy from source docker://registry.access.redhat.com/[^ ]+: copying system image from manifest list: reading signatures: reading signature from https://access.redhat.com/webassets/docker/content/sigstore/[^ ]+: received unexpected HTTP status: 500 Internal Server Error"), + ("Failed to compile with clang", r"ERROR: [^ ]+: Compiling [^ ]+.cc .for tool. failed: .Exit 1.: clang-[0-9]+ failed: error executing CppCompile command"), ("Failed to compile with clang", r"ERROR: [^ ]+: Compiling [^ ]+.c failed: .Exit 1.: clang-[0-9]+ failed: error executing CppCompile command"), ("Failed to connect to MPC VM", r"ssh: connect to host [0-9]+.[0-9]+.[0-9]+.[0-9]+ port 22: Connection timed out"), ("Failed to prefetch dependencies due to download timeout", r"ERROR Unsuccessful download: .* ERROR FetchError: exception_name: TimeoutError.*If the issue seems to be on the cachi2 side, please contact the maintainers."), From e599ff781a33a4c71083405a5d06957a71808f0a Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 19 Sep 2025 09:36:18 +0200 Subject: [PATCH 272/321] feat: New error: Script opm failed because failed to pull image from registry.redhat.io When working on KONFLUX-10175 service-mesh-tenant/1/pod-maistra-fbc-v4-16-on-pull-rc04bcd2b7b4fd4f8c36543d8216796c5-pod-step-run-opm-with-user-args.log Running OPM command in working directory: /var/workdir/source OPM Argument received: 'alpha' OPM Argument received: 'render-template' OPM Argument received: 'basic' OPM Argument received: '-o' OPM Argument received: 'yaml' OPM Argument received: 'catalog/ossm-2-6/catalog-prod.yaml' Ensuring directory 'catalog' exists. Running opm alpha render-template basic -o yaml catalog/ossm-2-6/catalog-prod.yaml > catalog/catalog-prod.yaml 2025/09/17 07:44:11 render reference "registry.redhat.io/openshift-service-mesh/istio-rhel8-operator-metadata@sha256:6444029a262a9cf526d2195b2bbec7b3c28e52c1473fe8d43d8167ea3c82fda0": failed to pull image "registry.redhat.io/openshift-service-mesh/istio-rhel8-operator-metadata@sha256:6444029a262a9cf526d2195b2bbec7b3c28e52c1473fe8d43d8167ea3c82fda0": Source image rejected: reading signature from https://registry.redhat.io/containers/sigstore/openshift-service-mesh/istio-rhel8-operator-metadata@sha256=6444029a262a9cf526d2195b2bbec7b3c28e52c1473fe8d43d8167ea3c82fda0/signature-5: received unexpected HTTP status: 500 Internal Server Error --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index e93f40342e..5a78e7f3c9 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -176,6 +176,7 @@ ("Script mock-hermetic-repo failed because pull from registry.access.redhat.com failed", r"mock-hermetic-repo.*Error: internal error: unable to copy from source docker://registry.access.redhat.com/[^ ]+: determining manifest MIME type for docker://registry.access.redhat.com/[^ ]+: Manifest does not match provided manifest digest.*subprocess.CalledProcessError.*Command ...podman....pull.* returned non-zero exit status 125"), ("Script mock-hermetic-repo failed because pull from registry.access.redhat.com failed", r"/usr/bin/mock-hermetic-repo.*Error: internal error: unable to copy from source docker://registry.access.redhat.com/[^ ]+: initializing source docker://registry.access.redhat.com/[^ ]+: unable to retrieve auth token: invalid username/password: unauthorized.*subprocess.CalledProcessError.*Command '.'podman', 'pull', '--arch', '[^ ]+', 'registry.access.redhat.com/[^ ]+'.' returned non-zero exit status 125"), ("Script opm failed because catalog is missing", r"Running opm alpha render-template basic -o yaml [^ ]+ --migrate-level=bundle-object-to-csv-metadata . catalog/catalog-prod.yaml.*unable to open .catalog/[^ ]+.: open catalog/[^ ]+: no such file or directory"), + ("Script opm failed because failed to pull image from registry.redhat.io", r"Running opm alpha render-template basic -o yaml [^ ]+ . [^ ]+.*render reference .registry.redhat.io/[^ ]+.: failed to pull image .registry.redhat.io/[^ ]+.: Source image rejected: reading signature from https://registry.redhat.io/[^ ]+: received unexpected HTTP status: 500 Internal Server Error"), ("Script oras failed to fetch blob from Quay after 10 retries", r"Executing: oras blob fetch --registry-config [^ ]+ quay.io/[^ ]+ --output - error: Command failed after 10 tries with status 141"), ("Script rpm_verifier failed to access image layer from quay.io because 502 Bad Gateway", r"rpm_verifier --image-url quay.io/.* Image: quay.io/.* error: unable to access the source layer sha256:[0-9a-z]+: received unexpected HTTP status: 502 Bad Gateway"), ("Script rpm_verifier failed to pull image from quay.io because 502 Bad Gateway", r"rpm_verifier.*error: unable to read image quay.io/[^ ]+: Get .https://quay.io/[^ ]+.: received unexpected HTTP status: 502 Bad Gateway"), From 722592ffbd039c6b7cd6cc41b520fd2f0b6c6a65 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 19 Sep 2025 09:39:44 +0200 Subject: [PATCH 273/321] feat: New error: Script opm failed because catalog is missing When working on KONFLUX-10175 service-mesh-tenant/1/pod-maistra-fbc-v4-16-on-pull-rc04bcd2b7b4fd4f8c36543d8216796c5-pod-step-run-opm-with-user-args.log Running OPM command in working directory: /var/workdir/source OPM Argument received: 'alpha' OPM Argument received: 'render-template' OPM Argument received: 'basic' OPM Argument received: '-o' OPM Argument received: 'yaml' OPM Argument received: 'catalog/ossm-2-6/catalog-prod.yaml' Ensuring directory 'catalog' exists. Running opm alpha render-template basic -o yaml catalog/ossm-2-6/catalog-prod.yaml > catalog/catalog-prod.yaml 2025/09/17 07:44:11 render reference "registry.redhat.io/openshift-service-mesh/istio-rhel8-operator-metadata@sha256:6444029a262a9cf526d2195b2bbec7b3c28e52c1473fe8d43d8167ea3c82fda0": failed to pull image "registry.redhat.io/openshift-service-mesh/istio-rhel8-operator-metadata@sha256:6444029a262a9cf526d2195b2bbec7b3c28e52c1473fe8d43d8167ea3c82fda0": Source image rejected: reading signature from https://registry.redhat.io/containers/sigstore/openshift-service-mesh/istio-rhel8-operator-metadata@sha256=6444029a262a9cf526d2195b2bbec7b3c28e52c1473fe8d43d8167ea3c82fda0/signature-5: received unexpected HTTP status: 500 Internal Server Error --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 5a78e7f3c9..718a718215 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -175,6 +175,7 @@ ("Script mock-hermetic-repo failed because pull from registry.access.redhat.com failed", r"Error: internal error: unable to copy from source docker://registry.access.redhat.com/[^ ]+: determining manifest MIME type for docker://registry.access.redhat.com/[^ ]+: Manifest does not match provided manifest digest [^ ]+.*/usr/bin/mock-hermetic-repo.*subprocess.CalledProcessError.*Command ...podman....pull.* returned non-zero exit status 125"), ("Script mock-hermetic-repo failed because pull from registry.access.redhat.com failed", r"mock-hermetic-repo.*Error: internal error: unable to copy from source docker://registry.access.redhat.com/[^ ]+: determining manifest MIME type for docker://registry.access.redhat.com/[^ ]+: Manifest does not match provided manifest digest.*subprocess.CalledProcessError.*Command ...podman....pull.* returned non-zero exit status 125"), ("Script mock-hermetic-repo failed because pull from registry.access.redhat.com failed", r"/usr/bin/mock-hermetic-repo.*Error: internal error: unable to copy from source docker://registry.access.redhat.com/[^ ]+: initializing source docker://registry.access.redhat.com/[^ ]+: unable to retrieve auth token: invalid username/password: unauthorized.*subprocess.CalledProcessError.*Command '.'podman', 'pull', '--arch', '[^ ]+', 'registry.access.redhat.com/[^ ]+'.' returned non-zero exit status 125"), + ("Script opm failed because catalog is missing", r"Running opm alpha render-template basic -o yaml [^ ]+ . catalog/catalog-prod.yaml.*unable to open .catalog/[^ ]+.: open catalog/[^ ]+: no such file or directory"), ("Script opm failed because catalog is missing", r"Running opm alpha render-template basic -o yaml [^ ]+ --migrate-level=bundle-object-to-csv-metadata . catalog/catalog-prod.yaml.*unable to open .catalog/[^ ]+.: open catalog/[^ ]+: no such file or directory"), ("Script opm failed because failed to pull image from registry.redhat.io", r"Running opm alpha render-template basic -o yaml [^ ]+ . [^ ]+.*render reference .registry.redhat.io/[^ ]+.: failed to pull image .registry.redhat.io/[^ ]+.: Source image rejected: reading signature from https://registry.redhat.io/[^ ]+: received unexpected HTTP status: 500 Internal Server Error"), ("Script oras failed to fetch blob from Quay after 10 retries", r"Executing: oras blob fetch --registry-config [^ ]+ quay.io/[^ ]+ --output - error: Command failed after 10 tries with status 141"), From b0329f0ad90a0d30a15c7f393b55f96c99c82012 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 19 Sep 2025 09:44:00 +0200 Subject: [PATCH 274/321] feat: New error: Failed to prefetch dependencies due to subscription-manager failed to register because system is already registered When working on KONFLUX-10175 service-mesh-tenant/1/pod-ossm-3-2-b70c96a06b7267ca2a5a295b2cc0dc00c4f03453b1e40a2325-pod-step-prefetch-dependencies.log INFO: Using mounted CA bundle: /mnt/trusted-ca/ca-bundle.crt '/mnt/trusted-ca/ca-bundle.crt' -> '/etc/pki/ca-trust/source/anchors/ca-bundle.crt' Registering with Red Hat subscription manager. Executing: subscription-manager register --org 11009103 --activationkey rh-ossm-istio-proxy-production-build-els_9_4 The system has been registered with ID: 40a77fdb-9823-497a-833e-b06048209df5 The registered system name is: ossm-3-2-b70c96a06b7267ca2a5a295b2cc0dc00c4f03453b1e40a2325-pod Remote server error. Please check the connection details, or see /var/log/rhsm/rhsm.log for more information. warning: Command failed and will retry, 1 try Executing: subscription-manager register --org 11009103 --activationkey rh-ossm-istio-proxy-production-build-els_9_4 This system is already registered. Use --force to override warning: Command failed and will retry, 2 try Executing: subscription-manager register --org 11009103 --activationkey rh-ossm-istio-proxy-production-build-els_9_4 This system is already registered. Use --force to override warning: Command failed and will retry, 3 try Executing: subscription-manager register --org 11009103 --activationkey rh-ossm-istio-proxy-production-build-els_9_4 This system is already registered. Use --force to override warning: Command failed and will retry, 4 try Executing: subscription-manager register --org 11009103 --activationkey rh-ossm-istio-proxy-production-build-els_9_4 This system is already registered. Use --force to override warning: Command failed and will retry, 5 try Executing: subscription-manager register --org 11009103 --activationkey rh-ossm-istio-proxy-production-build-els_9_4 This system is already registered. Use --force to override error: Command failed after 6 tries with status 64 Subscription-manager register failed --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 718a718215..605db8c58d 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -145,6 +145,7 @@ ("Failed to prefetch dependencies due to download timeout", r"ERROR Unsuccessful download: .* ERROR FetchError: exception_name: TimeoutError.*If the issue seems to be on the cachi2 side, please contact the maintainers."), ("Failed to prefetch dependencies due to go env error", r"ERROR PackageManagerError: Go execution failed: .go env GOWORK. failed with rc=1"), ("Failed to prefetch dependencies due to invalid input when fetching tags", r"Executing: git fetch --tags.*ERROR InvalidInput: 1 validation error for user input.*packages.*Value error, package path does not exist .or is not a directory.:"), + ("Failed to prefetch dependencies due to subscription-manager failed to register because system is already registered", r"Executing: subscription-manager register --org [^ ]+ --activationkey [^ ]+ This system is already registered. Use --force to override error: Command failed after [0-9]+ tries with status 64 Subscription-manager register failed"), ("Failed to provision MPC VM due to resource quota evaluation timed out", r"cat /ssh/error Error allocating host: Internal error occurred: resource quota evaluation timed out"), # KONFLUX-9798 ("Failed to pull container from access.redhat.com because of DNS error", r"Error: creating build container: internal error: unable to copy from source docker://registry.access.redhat.com/.*: copying system image from manifest list: reading signatures: Get \"https://access.redhat.com/.*\": dial tcp: lookup access.redhat.com: Temporary failure in name resolution"), ("Failed to pull container from quay.io because of DNS error", r"Error: copying system image from manifest list: reading blob .*: Get \"https://cdn[0-9]+.quay.io/.*\": dial tcp: lookup cdn[0-9]+.quay.io: Temporary failure in name resolution"), From 2f142797067ac5370b62a631e04a95275e3b666b Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Tue, 23 Sep 2025 07:37:06 +0200 Subject: [PATCH 275/321] feat: New error: Script opm failed to load or rebuild cache because some package has duplicate bundle --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 605db8c58d..312b5e027c 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -179,6 +179,7 @@ ("Script opm failed because catalog is missing", r"Running opm alpha render-template basic -o yaml [^ ]+ . catalog/catalog-prod.yaml.*unable to open .catalog/[^ ]+.: open catalog/[^ ]+: no such file or directory"), ("Script opm failed because catalog is missing", r"Running opm alpha render-template basic -o yaml [^ ]+ --migrate-level=bundle-object-to-csv-metadata . catalog/catalog-prod.yaml.*unable to open .catalog/[^ ]+.: open catalog/[^ ]+: no such file or directory"), ("Script opm failed because failed to pull image from registry.redhat.io", r"Running opm alpha render-template basic -o yaml [^ ]+ . [^ ]+.*render reference .registry.redhat.io/[^ ]+.: failed to pull image .registry.redhat.io/[^ ]+.: Source image rejected: reading signature from https://registry.redhat.io/[^ ]+: received unexpected HTTP status: 500 Internal Server Error"), + ("Script opm failed to load or rebuild cache because some package has duplicate bundle", r"level=fatal msg=.failed to load or rebuild cache: failed to rebuild cache: build package index: process package [^ ]+: package [^ ]+ has duplicate bundle [^ ]+..*Error: building at STEP .RUN /bin/opm serve .*.: exit status 1"), ("Script oras failed to fetch blob from Quay after 10 retries", r"Executing: oras blob fetch --registry-config [^ ]+ quay.io/[^ ]+ --output - error: Command failed after 10 tries with status 141"), ("Script rpm_verifier failed to access image layer from quay.io because 502 Bad Gateway", r"rpm_verifier --image-url quay.io/.* Image: quay.io/.* error: unable to access the source layer sha256:[0-9a-z]+: received unexpected HTTP status: 502 Bad Gateway"), ("Script rpm_verifier failed to pull image from quay.io because 502 Bad Gateway", r"rpm_verifier.*error: unable to read image quay.io/[^ ]+: Get .https://quay.io/[^ ]+.: received unexpected HTTP status: 502 Bad Gateway"), From 30088f816372df1e9a175f297fe06bcd3bf7cbb7 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Tue, 23 Sep 2025 07:37:58 +0200 Subject: [PATCH 276/321] feat: Make files we are evaluating smaller, because for big ones it takes too much time --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 312b5e027c..2e16b1827c 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -219,6 +219,7 @@ def message_to_reason(reasons_and_errors: set, msg: str) -> str: The name of the error if a pattern matches, otherwise string "UNKNOWN". """ msg = msg.replace("\n", " ") # Remove newlines + msg = msg[-250000:] # Just look at last 250k bytes for error_name, pattern in reasons_and_errors: if re.search(pattern, msg): return error_name From 181b86e96871be018aa2db1d710679f41076f5ee Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Tue, 23 Sep 2025 07:40:14 +0200 Subject: [PATCH 277/321] feat: New error: Git failed to clone submodule because GitLab CEE giving 429 While working on KONFLUX-10176 {"level":"error","ts":1758273519.5033627,"caller":"git/git.go:53","msg":"Error running git [submodule update --recursive --init --depth=1]: exit status 128\nSubmodule 'source2image/barbican-operator/barbican-operator' (https://gitlab.cee.redhat.com/openstack-midstream/podified/source/barbican-operator.git) registered for path 'source2image/barbican-operator/barbican-operator'\nSubmodule 'source2image/cinder-operator/cinder-operator' (https://gitlab.cee.redhat.com/openstack-midstream/podified/source/cinder-operator.git) registered for path 'source2image/cinder-operator/cinder-operator'\nSubmodule 'source2image/designate-operator/designate-operator' (https://gitlab.cee.redhat.com/openstack-midstream/podified/source/designate-operator.git) registered for path 'source2image/designate-operator/designate-operator'\nSubmodule 'source2image/glance-operator/glance-operator' (https://gitlab.cee.redhat.com/openstack-midstream/podified/source/glance-operator.git) registered for path 'source2image/glance-operator/glance-operator'\nSubmodule 'source2image/heat-operator/heat-operator' (https://gitlab.cee.redhat.com/openstack-midstream/podified/source/heat-operator.git) registered for path 'source2image/heat-operator/heat-operator'\nSubmodule 'source2image/horizon-operator/horizon-operator' (https://gitlab.cee.redhat.com/openstack-midstream/podified/source/horizon-operator.git) registered for path 'source2image/horizon-operator/horizon-operator'\nSubmodule 'source2image/infra-operator/infra-operator' (https://gitlab.cee.redhat.com/openstack-midstream/podified/source/infra-operator.git) registered for path 'source2image/infra-operator/infra-operator'\nSubmodule 'source2image/ironic-operator/ironic-operator' (https://gitlab.cee.redhat.com/openstack-midstream/podified/source/ironic-operator.git) registered for path 'source2image/ironic-operator/ironic-operator'\nSubmodule 'source2image/keystone-operator/keystone-operator' (https://gitlab.cee.redhat.com/openstack-midstream/podified/source/keystone-operator.git) registered for path 'source2image/keystone-operator/keystone-operator'\nSubmodule 'source2image/manila-operator/manila-operator' (https://gitlab.cee.redhat.com/openstack-midstream/podified/source/manila-operator.git) registered for path 'source2image/manila-operator/manila-operator'\nSubmodule 'source2image/mariadb-operator/mariadb-operator' (https://gitlab.cee.redhat.com/openstack-midstream/podified/source/mariadb-operator.git) registered for path 'source2image/mariadb-operator/mariadb-operator'\nSubmodule 'source2image/mysqld-exporter/mysqld-exporter' (https://gitlab.cee.redhat.com/openstack-midstream/podified/source/mysqld-exporter.git) registered for path 'source2image/mysqld-exporter/mysqld-exporter'\nSubmodule 'source2image/neutron-operator/neutron-operator' (https://gitlab.cee.redhat.com/openstack-midstream/podified/source/neutron-operator.git) registered for path 'source2image/neutron-operator/neutron-operator'\nSubmodule 'source2image/nova-operator/nova-operator' (https://gitlab.cee.redhat.com/openstack-midstream/podified/source/nova-operator.git) registered for path 'source2image/nova-operator/nova-operator'\nSubmodule 'source2image/octavia-operator/octavia-operator' (https://gitlab.cee.redhat.com/openstack-midstream/podified/source/octavia-operator.git) registered for path 'source2image/octavia-operator/octavia-operator'\nSubmodule 'source2image/openstack-ansible-ee/openstack-ansible-ee' (https://gitlab.cee.redhat.com/openstack-midstream/edpm/source/edpm-ansible.git) registered for path 'source2image/openstack-ansible-ee/openstack-ansible-ee'\nSubmodule 'source2image/openstack-baremetal-agent/openstack-baremetal-operator' (https://gitlab.cee.redhat.com/openstack-midstream/podified/source/openstack-baremetal-operator.git) registered for path 'source2image/openstack-baremetal-agent/openstack-baremetal-operator'\nSubmodule 'source2image/openstack-baremetal-operator/openstack-baremetal-operator' (https://gitlab.cee.redhat.com/openstack-midstream/podified/source/openstack-baremetal-operator.git) registered for path 'source2image/openstack-baremetal-operator/openstack-baremetal-operator'\nSubmodule 'source2image/openstack-must-gather/openstack-must-gather' (https://gitlab.cee.redhat.com/openstack-midstream/podified/source/openstack-must-gather.git) registered for path 'source2image/openstack-must-gather/openstack-must-gather'\nSubmodule 'source2image/openstack-network-exporter/openstack-network-exporter' (https://gitlab.cee.redhat.com/openstack-midstream/podified/source/openstack-network-exporter.git) registered for path 'source2image/openstack-network-exporter/openstack-network-exporter'\nSubmodule 'source2image/openstack-operator/openstack-operator' (https://gitlab.cee.redhat.com/openstack-midstream/podified/source/openstack-operator.git) registered for path 'source2image/openstack-operator/openstack-operator'\nSubmodule 'source2image/ovn-operator/ovn-operator' (https://gitlab.cee.redhat.com/openstack-midstream/podified/source/ovn-operator.git) registered for path 'source2image/ovn-operator/ovn-operator'\nSubmodule 'source2image/placement-operator/placement-operator' (https://gitlab.cee.redhat.com/openstack-midstream/podified/source/placement-operator.git) registered for path 'source2image/placement-operator/placement-operator'\nSubmodule 'source2image/prometheus-podman-exporter/prometheus-podman-exporter' (https://gitlab.cee.redhat.com/openstack-midstream/podified/source/prometheus-podman-exporter.git) registered for path 'source2image/prometheus-podman-exporter/prometheus-podman-exporter'\nSubmodule 'source2image/rabbitmq-cluster-operator/rabbitmq-cluster-operator' (https://gitlab.cee.redhat.com/openstack-midstream/podified/source/rabbitmq-cluster-operator.git) registered for path 'source2image/rabbitmq-cluster-operator/rabbitmq-cluster-operator'\nSubmodule 'source2image/sg-core/sg-core' (https://gitlab.cee.redhat.com/openstack-midstream/podified/source/sg-core.git) registered for path 'source2image/sg-core/sg-core'\nSubmodule 'source2image/swift-operator/swift-operator' (https://gitlab.cee.redhat.com/openstack-midstream/podified/source/swift-operator.git) registered for path 'source2image/swift-operator/swift-operator'\nSubmodule 'source2image/telemetry-operator/telemetry-operator' (https://gitlab.cee.redhat.com/openstack-midstream/podified/source/telemetry-operator.git) registered for path 'source2image/telemetry-operator/telemetry-operator'\nSubmodule 'source2image/test-operator/test-operator' (https://gitlab.cee.redhat.com/openstack-midstream/podified/source/test-operator.git) registered for path 'source2image/test-operator/test-operator'\nSubmodule 'source2image/watcher-operator/watcher-operator' (https://gitlab.cee.redhat.com/openstack-midstream/podified/source/watcher-operator.git) registered for path 'source2image/watcher-operator/watcher-operator'\nCloning into '/var/workdir/source/source2image/barbican-operator/barbican-operator'...\nCloning into '/var/workdir/source/source2image/cinder-operator/cinder-operator'...\nCloning into '/var/workdir/source/source2image/designate-operator/designate-operator'...\nCloning into '/var/workdir/source/source2image/glance-operator/glance-operator'...\nCloning into '/var/workdir/source/source2image/heat-operator/heat-operator'...\nCloning into '/var/workdir/source/source2image/horizon-operator/horizon-operator'...\nCloning into '/var/workdir/source/source2image/infra-operator/infra-operator'...\nCloning into '/var/workdir/source/source2image/ironic-operator/ironic-operator'...\nCloning into '/var/workdir/source/source2image/keystone-operator/keystone-operator'...\nCloning into '/var/workdir/source/source2image/manila-operator/manila-operator'...\nCloning into '/var/workdir/source/source2image/mariadb-operator/mariadb-operator'...\nCloning into '/var/workdir/source/source2image/mysqld-exporter/mysqld-exporter'...\nCloning into '/var/workdir/source/source2image/neutron-operator/neutron-operator'...\nCloning into '/var/workdir/source/source2image/nova-operator/nova-operator'...\nCloning into '/var/workdir/source/source2image/octavia-operator/octavia-operator'...\nCloning into '/var/workdir/source/source2image/openstack-ansible-ee/openstack-ansible-ee'...\nCloning into '/var/workdir/source/source2image/openstack-baremetal-agent/openstack-baremetal-operator'...\nCloning into '/var/workdir/source/source2image/openstack-baremetal-operator/openstack-baremetal-operator'...\nCloning into '/var/workdir/source/source2image/openstack-must-gather/openstack-must-gather'...\nCloning into '/var/workdir/source/source2image/openstack-network-exporter/openstack-network-exporter'...\nCloning into '/var/workdir/source/source2image/openstack-operator/openstack-operator'...\nCloning into '/var/workdir/source/source2image/ovn-operator/ovn-operator'...\nerror: RPC failed; HTTP 429 curl 22 The requested URL returned error: 429\nfatal: expected flush after ref listing\nfatal: clone of 'https://gitlab.cee.redhat.com/openstack-midstream/podified/source/ovn-operator.git' into submodule path '/var/workdir/source/source2image/ovn-operator/ovn-operator' failed\nFailed to clone 'source2image/ovn-operator/ovn-operator'. Retry scheduled\nCloning into '/var/workdir/source/source2image/placement-operator/placement-operator'...\nCloning into '/var/workdir/source/source2image/prometheus-podman-exporter/prometheus-podman-exporter'...\nCloning into '/var/workdir/source/source2image/rabbitmq-cluster-operator/rabbitmq-cluster-operator'...\nremote: Retry later\nfatal: unable to access 'https://gitlab.cee.redhat.com/openstack-midstream/podified/source/rabbitmq-cluster-operator.git/': The requested URL returned error: 429\nfatal: clone of 'https://gitlab.cee.redhat.com/openstack-midstream/podified/source/rabbitmq-cluster-operator.git' into submodule path '/var/workdir/source/source2image/rabbitmq-cluster-operator/rabbitmq-cluster-operator' failed\nFailed to clone 'source2image/rabbitmq-cluster-operator/rabbitmq-cluster-operator'. Retry scheduled\nCloning into '/var/workdir/source/source2image/sg-core/sg-core'...\nremote: Retry later\nfatal: unable to access 'https://gitlab.cee.redhat.com/openstack-midstream/podified/source/sg-core.git/': The requested URL returned error: 429\nfatal: clone of 'https://gitlab.cee.redhat.com/openstack-midstream/podified/source/sg-core.git' into submodule path '/var/workdir/source/source2image/sg-core/sg-core' failed\nFailed to clone 'source2image/sg-core/sg-core'. Retry scheduled\nCloning into '/var/workdir/source/source2image/swift-operator/swift-operator'...\nCloning into '/var/workdir/source/source2image/telemetry-operator/telemetry-operator'...\nremote: Retry later\nfatal: unable to access 'https://gitlab.cee.redhat.com/openstack-midstream/podified/source/telemetry-operator.git/': The requested URL returned error: 429\nfatal: clone of 'https://gitlab.cee.redhat.com/openstack-midstream/podified/source/telemetry-operator.git' into submodule path '/var/workdir/source/source2image/telemetry-operator/telemetry-operator' failed\nFailed to clone 'source2image/telemetry-operator/telemetry-operator'. Retry scheduled\nCloning into '/var/workdir/source/source2image/test-operator/test-operator'...\nCloning into '/var/workdir/source/source2image/watcher-operator/watcher-operator'...\nCloning into '/var/workdir/source/source2image/ovn-operator/ovn-operator'...\nCloning into '/var/workdir/source/source2image/rabbitmq-cluster-operator/rabbitmq-cluster-operator'...\nCloning into '/var/workdir/source/source2image/sg-core/sg-core'...\nCloning into '/var/workdir/source/source2image/telemetry-operator/telemetry-operator'...\nFrom https://gitlab.cee.redhat.com/openstack-midstream/podified/source/barbican-operator\n * branch 6ec5aeadb762f2aa6525d910c92090f47676c208 -> FETCH_HEAD\nSubmodule path 'source2image/barbican-operator/barbican-operator': checked out '6ec5aeadb762f2aa6525d910c92090f47676c208'\nSubmodule path 'source2image/cinder-operator/cinder-operator': checked out 'cbfa0b6b5a3fc9cd503e63fcfed875cbd6bc23ec'\nSubmodule path 'source2image/designate-operator/designate-operator': checked out '30b7110b0524d9be7d131b8d7a92324f50cff58b'\nSubmodule path 'source2image/glance-operator/glance-operator': checked out '2be6be08d902d6dc4ef1265b832f9ef0bc673d8b'\nSubmodule path 'source2image/heat-operator/heat-operator': checked out '45e4d9dd39a4da35b2ad8b93463a27c0d04b3a45'\nSubmodule path 'source2image/horizon-operator/horizon-operator': checked out '9d97a0b7a0c7f5a6aa39f049a3f17d77ffe6c8a4'\nFrom https://gitlab.cee.redhat.com/openstack-midstream/podified/source/infra-operator\n * branch f2de869e495341e9fbeb433b492b60d204fd38af -> FETCH_HEAD\nSubmodule path 'source2image/infra-operator/infra-operator': checked out 'f2de869e495341e9fbeb433b492b60d204fd38af'\nSubmodule path 'source2image/ironic-operator/ironic-operator': checked out '1e6f74ca0521d57829ee061c74738db4e90e4b79'\nSubmodule path 'source2image/keystone-operator/keystone-operator': checked out 'b2e35a35d42306a26c048ce7cdae9c52e44f7bee'\nSubmodule path 'source2image/manila-operator/manila-operator': checked out 'c7cc59b9eb472e94ded36f637fca8d5548a3afaa'\nSubmodule path 'source2image/mariadb-operator/mariadb-operator': checked out '213306df744131350ca7d4375f3d5104b88bd5f3'\nFrom https://gitlab.cee.redhat.com/openstack-midstream/podified/source/mysqld-exporter\n * branch 3abedeb1d84f3c9c76a891c0b4d9ca14c69f0d7e -> FETCH_HEAD\nSubmodule path 'source2image/mysqld-exporter/mysqld-exporter': checked out '3abedeb1d84f3c9c76a891c0b4d9ca14c69f0d7e'\nFrom https://gitlab.cee.redhat.com/openstack-midstream/podified/source/neutron-operator\n * branch 7da84a35fa6cc9a5a7e81d39f43123e498247e4a -> FETCH_HEAD\nSubmodule path 'source2image/neutron-operator/neutron-operator': checked out '7da84a35fa6cc9a5a7e81d39f43123e498247e4a'\nSubmodule path 'source2image/nova-operator/nova-operator': checked out '64db9a82b4e72c01a4fc87c8e46872036d2c2e5a'\nFrom https://gitlab.cee.redhat.com/openstack-midstream/podified/source/octavia-operator\n * branch 05d69fc35a55515993f9285410506a43cbaacaeb -> FETCH_HEAD\nSubmodule path 'source2image/octavia-operator/octavia-operator': checked out '05d69fc35a55515993f9285410506a43cbaacaeb'\nFrom https://gitlab.cee.redhat.com/openstack-midstream/edpm/source/edpm-ansible\n * branch 0cead0fb801df3f27090f470abb4009910a5fcd9 -> FETCH_HEAD\nSubmodule path 'source2image/openstack-ansible-ee/openstack-ansible-ee': checked out '0cead0fb801df3f27090f470abb4009910a5fcd9'\nSubmodule path 'source2image/openstack-baremetal-agent/openstack-baremetal-operator': checked out '13588826f6d6664f03717deff67655db45a134da'\nSubmodule path 'source2image/openstack-baremetal-operator/openstack-baremetal-operator': checked out '13588826f6d6664f03717deff67655db45a134da'\nFrom https://gitlab.cee.redhat.com/openstack-midstream/podified/source/openstack-must-gather\n * branch 87704990690ab7f209125bebfb2bceb4ee6af11b -> FETCH_HEAD\nSubmodule path 'source2image/openstack-must-gather/openstack-must-gather': checked out '87704990690ab7f209125bebfb2bceb4ee6af11b'\nSubmodule path 'source2image/openstack-network-exporter/openstack-network-exporter': checked out 'cebab24bee9cd3c647b7d06ee38a099e4a05a985'\nFrom https://gitlab.cee.redhat.com/openstack-midstream/podified/source/openstack-operator\n * branch 8be5f30b2baec1caec160d5e14de0bb41449e27f -> FETCH_HEAD\nSubmodule path 'source2image/openstack-operator/openstack-operator': checked out '8be5f30b2baec1caec160d5e14de0bb41449e27f'\nSubmodule path 'source2image/ovn-operator/ovn-operator': checked out 'a1902b8c8c94b1f1de3ed8f64786d83190185383'\nSubmodule path 'source2image/placement-operator/placement-operator': checked out '0561698fe8399fffe6e2b619d7ab9615ac914f79'\nSubmodule path 'source2image/prometheus-podman-exporter/prometheus-podman-exporter': checked out '60b38adbf20164c11024f61621e6577026129568'\nremote: Retry later\nfatal: unable to access 'https://gitlab.cee.redhat.com/openstack-midstream/podified/source/rabbitmq-cluster-operator.git/': The requested URL returned error: 429\nUnable to fetch in submodule path 'source2image/rabbitmq-cluster-operator/rabbitmq-cluster-operator'; trying to directly fetch a3524acedd49ee0a49b5e6ae9e093c5cfb4ca590:\nremote: Retry later\nfatal: unable to access 'https://gitlab.cee.redhat.com/openstack-midstream/podified/source/rabbitmq-cluster-operator.git/': The requested URL returned error: 429\nfatal: Fetched in submodule path 'source2image/rabbitmq-cluster-operator/rabbitmq-cluster-operator', but it did not contain a3524acedd49ee0a49b5e6ae9e093c5cfb4ca590. Direct fetching of that commit failed.\n","stacktrace":"github.com/tektoncd-catalog/git-clone/git-init/git.run\n\t/opt/app-root/src/git-init/git/git.go:53\ngithub.com/tektoncd-catalog/git-clone/git-init/git.submoduleFetch\n\t/opt/app-root/src/git-init/git/git.go:236\ngithub.com/tektoncd-catalog/git-clone/git-init/git.Fetch\n\t/opt/app-root/src/git-init/git/git.go:202\nmain.main\n\t/opt/app-root/src/git-init/main.go:52\nruntime.main\n\t/usr/lib/golang/src/runtime/proc.go:283"} --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 2e16b1827c..f0c7168640 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -165,6 +165,7 @@ ("Gateway Time-out when pulling container image from quay.io", r"Error: initializing source docker://quay.io/[^ ]+: reading manifest [^ ]+ in quay.io/[^ ]+: received unexpected HTTP status: 504 Gateway Time-out"), ("Gateway Time-out when pulling container image", r"Error: copying system image from manifest list: parsing image configuration: fetching blob: received unexpected HTTP status: 504 Gateway Time-out"), ("Getting repo tags from quay.io failed because of 502 Bad Gateway", r"Error determining repository tags: pinging container registry quay.io: received unexpected HTTP status: 502 Bad Gateway"), + ("Git failed to clone submodule because GitLab CEE giving 429", r"Error running git .*: exit status 128.*error: RPC failed; HTTP 429 curl 22 The requested URL returned error: 429.*fatal: clone of 'https://gitlab.cee.redhat.com/[^ ]+' into submodule path '[^ ]+' failed"), ("Introspection failed because of incomplete .docker/config.json", r".* level=fatal msg=\"Error parsing image name .*: getting username and password: reading JSON file .*/tekton/home/.docker/config.json.*: unmarshaling JSON at .*: unexpected end of JSON input\""), ("Invalid reference when processing SBOM", r"SBOM .* error during command execution: could not parse reference: quay.io/[^ ]+"), ("No podman installed on a MPC VM", r"remote_cmd podman unshare setfacl .* \+ ssh -o StrictHostKeyChecking=no [^ ]+ podman unshare setfacl .* bash: line 1: podman: command not found"), # KONFLUX-9944 From 84179e11e9e93b88804bf77e57b79d3bc7f9bb9e Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Tue, 23 Sep 2025 07:49:54 +0200 Subject: [PATCH 278/321] feat: New error: Git failed to fetch because GitLab CEE giving 429 While working on KONFLUX-10176 run-2025-09-22T11_18_30_168225/openstack-tenant/1/pod-mysqld-exporte3c3a611ca6dbe6638afbdae24c46ba23a6693b4b0989b-pod-step-clone.log INFO: Using mounted CA bundle: /mnt/trusted-ca/ca-bundle.crt {"level":"error","ts":1758273518.8573155,"caller":"git/git.go:53","msg":"Error running git [fetch --recurse-submodules=yes --depth=1 origin --update-head-ok --force 3baf362ce5d2b526874f6c98b3d36ec8b9cd9371]: exit status 128\nremote: Retry later\nfatal: unable to access 'https://gitlab.cee.redhat.com/openstack-konflux/openstack-operator-18.0/': The requested URL returned error: 429\n","stacktrace":"github.com/tektoncd-catalog/git-clone/git-init/git.run\n\t/opt/app-root/src/git-init/git/git.go:53\ngithub.com/tektoncd-catalog/git-clone/git-init/git.Fetch\n\t/opt/app-root/src/git-init/git/git.go:180\nmain.main\n\t/opt/app-root/src/git-init/main.go:52\nruntime.main\n\t/usr/lib/golang/src/runtime/proc.go:283"} {"level":"fatal","ts":1758273518.8574386,"caller":"git-init/main.go:53","msg":"Error fetching git repository: failed to fetch [3baf362ce5d2b526874f6c98b3d36ec8b9cd9371]: exit status 128","stacktrace":"main.main\n\t/opt/app-root/src/git-init/main.go:53\nruntime.main\n\t/usr/lib/golang/src/runtime/proc.go:283"} --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index f0c7168640..a131a36ecb 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -166,6 +166,7 @@ ("Gateway Time-out when pulling container image", r"Error: copying system image from manifest list: parsing image configuration: fetching blob: received unexpected HTTP status: 504 Gateway Time-out"), ("Getting repo tags from quay.io failed because of 502 Bad Gateway", r"Error determining repository tags: pinging container registry quay.io: received unexpected HTTP status: 502 Bad Gateway"), ("Git failed to clone submodule because GitLab CEE giving 429", r"Error running git .*: exit status 128.*error: RPC failed; HTTP 429 curl 22 The requested URL returned error: 429.*fatal: clone of 'https://gitlab.cee.redhat.com/[^ ]+' into submodule path '[^ ]+' failed"), + ("Git failed to fetch because GitLab CEE giving 429", r"Error running git .*: exit status 128.*remote: Retry later.*fatal: unable to access 'https://gitlab.cee.redhat.com/.*': The requested URL returned error: 429.*Error fetching git repository: failed to fetch [^ ]+: exit status 128"), ("Introspection failed because of incomplete .docker/config.json", r".* level=fatal msg=\"Error parsing image name .*: getting username and password: reading JSON file .*/tekton/home/.docker/config.json.*: unmarshaling JSON at .*: unexpected end of JSON input\""), ("Invalid reference when processing SBOM", r"SBOM .* error during command execution: could not parse reference: quay.io/[^ ]+"), ("No podman installed on a MPC VM", r"remote_cmd podman unshare setfacl .* \+ ssh -o StrictHostKeyChecking=no [^ ]+ podman unshare setfacl .* bash: line 1: podman: command not found"), # KONFLUX-9944 From 4cab593db00573e3e6b2673e4c811006f0c257a8 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Tue, 23 Sep 2025 08:18:24 +0200 Subject: [PATCH 279/321] feat: New error: Git failed to fetch because GitLab CEE giving 429 When working on KONFLUX-10176 run-2025-09-22T11_18_30_168225/openstack-tenant/1/pod-watcher-operat83a83140517dea91dc6eb4a3c6aa4336ceeda64cec731-pod-step-clone.log INFO: Using mounted CA bundle: /mnt/trusted-ca/ca-bundle.crt {"level":"info","ts":1758280181.0700035,"caller":"git/git.go:200","msg":"Successfully cloned https://gitlab.cee.redhat.com/openstack-konflux/openstack-operator-18.0 @ 3baf362ce5d2b526874f6c98b3d36ec8b9cd9371 (grafted, HEAD) in path /var/workdir/source"} {"level":"error","ts":1758280209.5156348,"caller":"git/git.go:53","msg":"Error running git [submodule update --recursive --init --depth=1]: exit status 128\nSubmodule 'source2image/barbican-operator/barbican-operator' (https://gitlab.cee.redhat.com/openstack-midstream/podified/source/barbican-operator.git) registered for path 'source2image/barbican-operator/barbican-operator'\nSubmodule 'source2image/cinder-operator/cinder-operator' (https://gitlab.cee.redhat.com/openstack-midstream/podified/source/cinder-operator.git) registered for path 'source2image/cinder-operator/cinder-operator'\nSubmodule 'source2image/designate-operator/designate-operator' (https://gitlab.cee.redhat.com/openstack-midstream/podified/source/designate-operator.git) registered for path 'source2image/designate-operator/designate-operator'\nSubmodule 'source2image/glance-operator/glance-operator' (https://gitlab.cee.redhat.com/openstack-midstream/podified/source/glance-operator.git) registered for path 'source2image/glance-operator/glance-operator'\nSubmodule 'source2image/heat-operator/heat-operator' (https://gitlab.cee.redhat.com/openstack-midstream/podified/source/heat-operator.git) registered for path 'source2image/heat-operator/heat-operator'\nSubmodule 'source2image/horizon-operator/horizon-operator' (https://gitlab.cee.redhat.com/openstack-midstream/podified/source/horizon-operator.git) registered for path 'source2image/horizon-operator/horizon-operator'\nSubmodule 'source2image/infra-operator/infra-operator' (https://gitlab.cee.redhat.com/openstack-midstream/podified/source/infra-operator.git) registered for path 'source2image/infra-operator/infra-operator'\nSubmodule 'source2image/ironic-operator/ironic-operator' (https://gitlab.cee.redhat.com/openstack-midstream/podified/source/ironic-operator.git) registered for path 'source2image/ironic-operator/ironic-operator'\nSubmodule 'source2image/keystone-operator/keystone-operator' (https://gitlab.cee.redhat.com/openstack-midstream/podified/source/keystone-operator.git) registered for path 'source2image/keystone-operator/keystone-operator'\nSubmodule 'source2image/manila-operator/manila-operator' (https://gitlab.cee.redhat.com/openstack-midstream/podified/source/manila-operator.git) registered for path 'source2image/manila-operator/manila-operator'\nSubmodule 'source2image/mariadb-operator/mariadb-operator' (https://gitlab.cee.redhat.com/openstack-midstream/podified/source/mariadb-operator.git) registered for path 'source2image/mariadb-operator/mariadb-operator'\nSubmodule 'source2image/mysqld-exporter/mysqld-exporter' (https://gitlab.cee.redhat.com/openstack-midstream/podified/source/mysqld-exporter.git) registered for path 'source2image/mysqld-exporter/mysqld-exporter'\nSubmodule 'source2image/neutron-operator/neutron-operator' (https://gitlab.cee.redhat.com/openstack-midstream/podified/source/neutron-operator.git) registered for path 'source2image/neutron-operator/neutron-operator'\nSubmodule 'source2image/nova-operator/nova-operator' (https://gitlab.cee.redhat.com/openstack-midstream/podified/source/nova-operator.git) registered for path 'source2image/nova-operator/nova-operator'\nSubmodule 'source2image/octavia-operator/octavia-operator' (https://gitlab.cee.redhat.com/openstack-midstream/podified/source/octavia-operator.git) registered for path 'source2image/octavia-operator/octavia-operator'\nSubmodule 'source2image/openstack-ansible-ee/openstack-ansible-ee' (https://gitlab.cee.redhat.com/openstack-midstream/edpm/source/edpm-ansible.git) registered for path 'source2image/openstack-ansible-ee/openstack-ansible-ee'\nSubmodule 'source2image/openstack-baremetal-agent/openstack-baremetal-operator' (https://gitlab.cee.redhat.com/openstack-midstream/podified/source/openstack-baremetal-operator.git) registered for path 'source2image/openstack-baremetal-agent/openstack-baremetal-operator'\nSubmodule 'source2image/openstack-baremetal-operator/openstack-baremetal-operator' (https://gitlab.cee.redhat.com/openstack-midstream/podified/source/openstack-baremetal-operator.git) registered for path 'source2image/openstack-baremetal-operator/openstack-baremetal-operator'\nSubmodule 'source2image/openstack-must-gather/openstack-must-gather' (https://gitlab.cee.redhat.com/openstack-midstream/podified/source/openstack-must-gather.git) registered for path 'source2image/openstack-must-gather/openstack-must-gather'\nSubmodule 'source2image/openstack-network-exporter/openstack-network-exporter' (https://gitlab.cee.redhat.com/openstack-midstream/podified/source/openstack-network-exporter.git) registered for path 'source2image/openstack-network-exporter/openstack-network-exporter'\nSubmodule 'source2image/openstack-operator/openstack-operator' (https://gitlab.cee.redhat.com/openstack-midstream/podified/source/openstack-operator.git) registered for path 'source2image/openstack-operator/openstack-operator'\nSubmodule 'source2image/ovn-operator/ovn-operator' (https://gitlab.cee.redhat.com/openstack-midstream/podified/source/ovn-operator.git) registered for path 'source2image/ovn-operator/ovn-operator'\nSubmodule 'source2image/placement-operator/placement-operator' (https://gitlab.cee.redhat.com/openstack-midstream/podified/source/placement-operator.git) registered for path 'source2image/placement-operator/placement-operator'\nSubmodule 'source2image/prometheus-podman-exporter/prometheus-podman-exporter' (https://gitlab.cee.redhat.com/openstack-midstream/podified/source/prometheus-podman-exporter.git) registered for path 'source2image/prometheus-podman-exporter/prometheus-podman-exporter'\nSubmodule 'source2image/rabbitmq-cluster-operator/rabbitmq-cluster-operator' (https://gitlab.cee.redhat.com/openstack-midstream/podified/source/rabbitmq-cluster-operator.git) registered for path 'source2image/rabbitmq-cluster-operator/rabbitmq-cluster-operator'\nSubmodule 'source2image/sg-core/sg-core' (https://gitlab.cee.redhat.com/openstack-midstream/podified/source/sg-core.git) registered for path 'source2image/sg-core/sg-core'\nSubmodule 'source2image/swift-operator/swift-operator' (https://gitlab.cee.redhat.com/openstack-midstream/podified/source/swift-operator.git) registered for path 'source2image/swift-operator/swift-operator'\nSubmodule 'source2image/telemetry-operator/telemetry-operator' (https://gitlab.cee.redhat.com/openstack-midstream/podified/source/telemetry-operator.git) registered for path 'source2image/telemetry-operator/telemetry-operator'\nSubmodule 'source2image/test-operator/test-operator' (https://gitlab.cee.redhat.com/openstack-midstream/podified/source/test-operator.git) registered for path 'source2image/test-operator/test-operator'\nSubmodule 'source2image/watcher-operator/watcher-operator' (https://gitlab.cee.redhat.com/openstack-midstream/podified/source/watcher-operator.git) registered for path 'source2image/watcher-operator/watcher-operator'\nCloning into '/var/workdir/source/source2image/barbican-operator/barbican-operator'...\nCloning into '/var/workdir/source/source2image/cinder-operator/cinder-operator'...\nCloning into '/var/workdir/source/source2image/designate-operator/designate-operator'...\nCloning into '/var/workdir/source/source2image/glance-operator/glance-operator'...\nCloning into '/var/workdir/source/source2image/heat-operator/heat-operator'...\nCloning into '/var/workdir/source/source2image/horizon-operator/horizon-operator'...\nCloning into '/var/workdir/source/source2image/infra-operator/infra-operator'...\nCloning into '/var/workdir/source/source2image/ironic-operator/ironic-operator'...\nremote: Retry later\nfatal: unable to access 'https://gitlab.cee.redhat.com/openstack-midstream/podified/source/ironic-operator.git/': The requested URL returned error: 429\nfatal: clone of 'https://gitlab.cee.redhat.com/openstack-midstream/podified/source/ironic-operator.git' into submodule path '/var/workdir/source/source2image/ironic-operator/ironic-operator' failed\nFailed to clone 'source2image/ironic-operator/ironic-operator'. Retry scheduled\nCloning into '/var/workdir/source/source2image/keystone-operator/keystone-operator'...\nCloning into '/var/workdir/source/source2image/manila-operator/manila-operator'...\nremote: Retry later\nfatal: unable to access 'https://gitlab.cee.redhat.com/openstack-midstream/podified/source/manila-operator.git/': The requested URL returned error: 429\nfatal: clone of 'https://gitlab.cee.redhat.com/openstack-midstream/podified/source/manila-operator.git' into submodule path '/var/workdir/source/source2image/manila-operator/manila-operator' failed\nFailed to clone 'source2image/manila-operator/manila-operator'. Retry scheduled\nCloning into '/var/workdir/source/source2image/mariadb-operator/mariadb-operator'...\nremote: Retry later\nfatal: unable to access 'https://gitlab.cee.redhat.com/openstack-midstream/podified/source/mariadb-operator.git/': The requested URL returned error: 429\nfatal: clone of 'https://gitlab.cee.redhat.com/openstack-midstream/podified/source/mariadb-operator.git' into submodule path '/var/workdir/source/source2image/mariadb-operator/mariadb-operator' failed\nFailed to clone 'source2image/mariadb-operator/mariadb-operator'. Retry scheduled\nCloning into '/var/workdir/source/source2image/mysqld-exporter/mysqld-exporter'...\nCloning into '/var/workdir/source/source2image/neutron-operator/neutron-operator'...\nCloning into '/var/workdir/source/source2image/nova-operator/nova-operator'...\nCloning into '/var/workdir/source/source2image/octavia-operator/octavia-operator'...\nCloning into '/var/workdir/source/source2image/openstack-ansible-ee/openstack-ansible-ee'...\nCloning into '/var/workdir/source/source2image/openstack-baremetal-agent/openstack-baremetal-operator'...\nCloning into '/var/workdir/source/source2image/openstack-baremetal-operator/openstack-baremetal-operator'...\nCloning into '/var/workdir/source/source2image/openstack-must-gather/openstack-must-gather'...\nCloning into '/var/workdir/source/source2image/openstack-network-exporter/openstack-network-exporter'...\nCloning into '/var/workdir/source/source2image/openstack-operator/openstack-operator'...\nCloning into '/var/workdir/source/source2image/ovn-operator/ovn-operator'...\nCloning into '/var/workdir/source/source2image/placement-operator/placement-operator'...\nCloning into '/var/workdir/source/source2image/prometheus-podman-exporter/prometheus-podman-exporter'...\nCloning into '/var/workdir/source/source2image/rabbitmq-cluster-operator/rabbitmq-cluster-operator'...\nCloning into '/var/workdir/source/source2image/sg-core/sg-core'...\nCloning into '/var/workdir/source/source2image/swift-operator/swift-operator'...\nCloning into '/var/workdir/source/source2image/telemetry-operator/telemetry-operator'...\nCloning into '/var/workdir/source/source2image/test-operator/test-operator'...\nCloning into '/var/workdir/source/source2image/watcher-operator/watcher-operator'...\nCloning into '/var/workdir/source/source2image/ironic-operator/ironic-operator'...\nCloning into '/var/workdir/source/source2image/manila-operator/manila-operator'...\nCloning into '/var/workdir/source/source2image/mariadb-operator/mariadb-operator'...\nFrom https://gitlab.cee.redhat.com/openstack-midstream/podified/source/barbican-operator\n * branch 6ec5aeadb762f2aa6525d910c92090f47676c208 -> FETCH_HEAD\nSubmodule path 'source2image/barbican-operator/barbican-operator': checked out '6ec5aeadb762f2aa6525d910c92090f47676c208'\nSubmodule path 'source2image/cinder-operator/cinder-operator': checked out 'cbfa0b6b5a3fc9cd503e63fcfed875cbd6bc23ec'\nSubmodule path 'source2image/designate-operator/designate-operator': checked out '30b7110b0524d9be7d131b8d7a92324f50cff58b'\nSubmodule path 'source2image/glance-operator/glance-operator': checked out '2be6be08d902d6dc4ef1265b832f9ef0bc673d8b'\nSubmodule path 'source2image/heat-operator/heat-operator': checked out '45e4d9dd39a4da35b2ad8b93463a27c0d04b3a45'\nSubmodule path 'source2image/horizon-operator/horizon-operator': checked out '9d97a0b7a0c7f5a6aa39f049a3f17d77ffe6c8a4'\nFrom https://gitlab.cee.redhat.com/openstack-midstream/podified/source/infra-operator\n * branch f2de869e495341e9fbeb433b492b60d204fd38af -> FETCH_HEAD\nSubmodule path 'source2image/infra-operator/infra-operator': checked out 'f2de869e495341e9fbeb433b492b60d204fd38af'\nSubmodule path 'source2image/ironic-operator/ironic-operator': checked out '1e6f74ca0521d57829ee061c74738db4e90e4b79'\nSubmodule path 'source2image/keystone-operator/keystone-operator': checked out 'b2e35a35d42306a26c048ce7cdae9c52e44f7bee'\nSubmodule path 'source2image/manila-operator/manila-operator': checked out 'c7cc59b9eb472e94ded36f637fca8d5548a3afaa'\nSubmodule path 'source2image/mariadb-operator/mariadb-operator': checked out '213306df744131350ca7d4375f3d5104b88bd5f3'\nFrom https://gitlab.cee.redhat.com/openstack-midstream/podified/source/mysqld-exporter\n * branch 3abedeb1d84f3c9c76a891c0b4d9ca14c69f0d7e -> FETCH_HEAD\nSubmodule path 'source2image/mysqld-exporter/mysqld-exporter': checked out '3abedeb1d84f3c9c76a891c0b4d9ca14c69f0d7e'\nFrom https://gitlab.cee.redhat.com/openstack-midstream/podified/source/neutron-operator\n * branch 7da84a35fa6cc9a5a7e81d39f43123e498247e4a -> FETCH_HEAD\nSubmodule path 'source2image/neutron-operator/neutron-operator': checked out '7da84a35fa6cc9a5a7e81d39f43123e498247e4a'\nSubmodule path 'source2image/nova-operator/nova-operator': checked out '64db9a82b4e72c01a4fc87c8e46872036d2c2e5a'\nFrom https://gitlab.cee.redhat.com/openstack-midstream/podified/source/octavia-operator\n * branch 05d69fc35a55515993f9285410506a43cbaacaeb -> FETCH_HEAD\nSubmodule path 'source2image/octavia-operator/octavia-operator': checked out '05d69fc35a55515993f9285410506a43cbaacaeb'\nFrom https://gitlab.cee.redhat.com/openstack-midstream/edpm/source/edpm-ansible\n * branch 0cead0fb801df3f27090f470abb4009910a5fcd9 -> FETCH_HEAD\nSubmodule path 'source2image/openstack-ansible-ee/openstack-ansible-ee': checked out '0cead0fb801df3f27090f470abb4009910a5fcd9'\nSubmodule path 'source2image/openstack-baremetal-agent/openstack-baremetal-operator': checked out '13588826f6d6664f03717deff67655db45a134da'\nSubmodule path 'source2image/openstack-baremetal-operator/openstack-baremetal-operator': checked out '13588826f6d6664f03717deff67655db45a134da'\nFrom https://gitlab.cee.redhat.com/openstack-midstream/podified/source/openstack-must-gather\n * branch 87704990690ab7f209125bebfb2bceb4ee6af11b -> FETCH_HEAD\nSubmodule path 'source2image/openstack-must-gather/openstack-must-gather': checked out '87704990690ab7f209125bebfb2bceb4ee6af11b'\nSubmodule path 'source2image/openstack-network-exporter/openstack-network-exporter': checked out 'cebab24bee9cd3c647b7d06ee38a099e4a05a985'\nFrom https://gitlab.cee.redhat.com/openstack-midstream/podified/source/openstack-operator\n * branch 8be5f30b2baec1caec160d5e14de0bb41449e27f -> FETCH_HEAD\nSubmodule path 'source2image/openstack-operator/openstack-operator': checked out '8be5f30b2baec1caec160d5e14de0bb41449e27f'\nSubmodule path 'source2image/ovn-operator/ovn-operator': checked out 'a1902b8c8c94b1f1de3ed8f64786d83190185383'\nSubmodule path 'source2image/placement-operator/placement-operator': checked out '0561698fe8399fffe6e2b619d7ab9615ac914f79'\nSubmodule path 'source2image/prometheus-podman-exporter/prometheus-podman-exporter': checked out '60b38adbf20164c11024f61621e6577026129568'\nerror: RPC failed; HTTP 429 curl 22 The requested URL returned error: 429\nfatal: expected 'acknowledgments'\nfatal: Fetched in submodule path 'source2image/rabbitmq-cluster-operator/rabbitmq-cluster-operator', but it did not contain a3524acedd49ee0a49b5e6ae9e093c5cfb4ca590. Direct fetching of that commit failed.\n","stacktrace":"github.com/tektoncd-catalog/git-clone/git-init/git.run\n\t/opt/app-root/src/git-init/git/git.go:53\ngithub.com/tektoncd-catalog/git-clone/git-init/git.submoduleFetch\n\t/opt/app-root/src/git-init/git/git.go:236\ngithub.com/tektoncd-catalog/git-clone/git-init/git.Fetch\n\t/opt/app-root/src/git-init/git/git.go:202\nmain.main\n\t/opt/app-root/src/git-init/main.go:52\nruntime.main\n\t/usr/lib/golang/src/runtime/proc.go:283"} {"level":"fatal","ts":1758280209.5157819,"caller":"git-init/main.go:53","msg":"Error fetching git repository: exit status 128","stacktrace":"main.main\n\t/opt/app-root/src/git-init/main.go:53\nruntime.main\n\t/usr/lib/golang/src/runtime/proc.go:283"} --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index a131a36ecb..e467c67c74 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -167,6 +167,7 @@ ("Getting repo tags from quay.io failed because of 502 Bad Gateway", r"Error determining repository tags: pinging container registry quay.io: received unexpected HTTP status: 502 Bad Gateway"), ("Git failed to clone submodule because GitLab CEE giving 429", r"Error running git .*: exit status 128.*error: RPC failed; HTTP 429 curl 22 The requested URL returned error: 429.*fatal: clone of 'https://gitlab.cee.redhat.com/[^ ]+' into submodule path '[^ ]+' failed"), ("Git failed to fetch because GitLab CEE giving 429", r"Error running git .*: exit status 128.*remote: Retry later.*fatal: unable to access 'https://gitlab.cee.redhat.com/.*': The requested URL returned error: 429.*Error fetching git repository: failed to fetch [^ ]+: exit status 128"), + ("Git failed to fetch because GitLab CEE giving 429", r"Error running git .*: exit status 128.*remote: Retry later.*fatal: unable to access 'https://gitlab.cee.redhat.com/.*': The requested URL returned error: 429.*Error fetching git repository: exit status 128"), ("Introspection failed because of incomplete .docker/config.json", r".* level=fatal msg=\"Error parsing image name .*: getting username and password: reading JSON file .*/tekton/home/.docker/config.json.*: unmarshaling JSON at .*: unexpected end of JSON input\""), ("Invalid reference when processing SBOM", r"SBOM .* error during command execution: could not parse reference: quay.io/[^ ]+"), ("No podman installed on a MPC VM", r"remote_cmd podman unshare setfacl .* \+ ssh -o StrictHostKeyChecking=no [^ ]+ podman unshare setfacl .* bash: line 1: podman: command not found"), # KONFLUX-9944 From bb785e2a23f791e646de91e1b6018eab6d733e08 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Tue, 23 Sep 2025 08:37:17 +0200 Subject: [PATCH 280/321] fix: Change error: Relax regexp here as exist codes are sometimes 1, sometimes 128 --- tests/load-tests/errors.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index e467c67c74..1c57e488d5 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -165,9 +165,9 @@ ("Gateway Time-out when pulling container image from quay.io", r"Error: initializing source docker://quay.io/[^ ]+: reading manifest [^ ]+ in quay.io/[^ ]+: received unexpected HTTP status: 504 Gateway Time-out"), ("Gateway Time-out when pulling container image", r"Error: copying system image from manifest list: parsing image configuration: fetching blob: received unexpected HTTP status: 504 Gateway Time-out"), ("Getting repo tags from quay.io failed because of 502 Bad Gateway", r"Error determining repository tags: pinging container registry quay.io: received unexpected HTTP status: 502 Bad Gateway"), - ("Git failed to clone submodule because GitLab CEE giving 429", r"Error running git .*: exit status 128.*error: RPC failed; HTTP 429 curl 22 The requested URL returned error: 429.*fatal: clone of 'https://gitlab.cee.redhat.com/[^ ]+' into submodule path '[^ ]+' failed"), - ("Git failed to fetch because GitLab CEE giving 429", r"Error running git .*: exit status 128.*remote: Retry later.*fatal: unable to access 'https://gitlab.cee.redhat.com/.*': The requested URL returned error: 429.*Error fetching git repository: failed to fetch [^ ]+: exit status 128"), - ("Git failed to fetch because GitLab CEE giving 429", r"Error running git .*: exit status 128.*remote: Retry later.*fatal: unable to access 'https://gitlab.cee.redhat.com/.*': The requested URL returned error: 429.*Error fetching git repository: exit status 128"), + ("Git failed to clone submodule because GitLab CEE giving 429", r"Error running git .*: exit status 1*error: RPC failed; HTTP 429 curl 22 The requested URL returned error: 429.*fatal: clone of 'https://gitlab.cee.redhat.com/[^ ]\+' into submodule path '[^ ]\+' failed"), + ("Git failed to fetch because GitLab CEE giving 429", r"Error running git .*: exit status 1.*remote: Retry later.*fatal: unable to access 'https://gitlab.cee.redhat.com/.*': The requested URL returned error: 429.*Error fetching git repository: failed to fetch [^ ]+: exit status 1"), + ("Git failed to fetch because GitLab CEE giving 429", r"Error running git .*: exit status 1.*remote: Retry later.*fatal: unable to access 'https://gitlab.cee.redhat.com/.*': The requested URL returned error: 429.*Error fetching git repository: exit status 1"), ("Introspection failed because of incomplete .docker/config.json", r".* level=fatal msg=\"Error parsing image name .*: getting username and password: reading JSON file .*/tekton/home/.docker/config.json.*: unmarshaling JSON at .*: unexpected end of JSON input\""), ("Invalid reference when processing SBOM", r"SBOM .* error during command execution: could not parse reference: quay.io/[^ ]+"), ("No podman installed on a MPC VM", r"remote_cmd podman unshare setfacl .* \+ ssh -o StrictHostKeyChecking=no [^ ]+ podman unshare setfacl .* bash: line 1: podman: command not found"), # KONFLUX-9944 From 6769767d19c731b9149e0716f808eda47abcd7f5 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Tue, 23 Sep 2025 09:13:31 +0200 Subject: [PATCH 281/321] fix: Fixed error regexps and added one more variant --- tests/load-tests/errors.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 1c57e488d5..e540495a45 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -165,7 +165,8 @@ ("Gateway Time-out when pulling container image from quay.io", r"Error: initializing source docker://quay.io/[^ ]+: reading manifest [^ ]+ in quay.io/[^ ]+: received unexpected HTTP status: 504 Gateway Time-out"), ("Gateway Time-out when pulling container image", r"Error: copying system image from manifest list: parsing image configuration: fetching blob: received unexpected HTTP status: 504 Gateway Time-out"), ("Getting repo tags from quay.io failed because of 502 Bad Gateway", r"Error determining repository tags: pinging container registry quay.io: received unexpected HTTP status: 502 Bad Gateway"), - ("Git failed to clone submodule because GitLab CEE giving 429", r"Error running git .*: exit status 1*error: RPC failed; HTTP 429 curl 22 The requested URL returned error: 429.*fatal: clone of 'https://gitlab.cee.redhat.com/[^ ]\+' into submodule path '[^ ]\+' failed"), + ("Git failed to clone submodule because GitLab CEE giving 429", r"Error running git .*: exit status 1.*error: RPC failed; HTTP 429 curl 22 The requested URL returned error: 429.*fatal: clone of 'https://gitlab.cee.redhat.com/[^ ]+' into submodule path '[^ ]+' failed"), + ("Git failed to clone submodule because GitLab CEE giving 429", r"Error running git .*: exit status 1.*error: RPC failed; HTTP 429 curl 22 The requested URL returned error: 429.*fatal: Fetched in submodule path '[^ ]+', but it did not contain [^ ]+. Direct fetching of that commit failed"), ("Git failed to fetch because GitLab CEE giving 429", r"Error running git .*: exit status 1.*remote: Retry later.*fatal: unable to access 'https://gitlab.cee.redhat.com/.*': The requested URL returned error: 429.*Error fetching git repository: failed to fetch [^ ]+: exit status 1"), ("Git failed to fetch because GitLab CEE giving 429", r"Error running git .*: exit status 1.*remote: Retry later.*fatal: unable to access 'https://gitlab.cee.redhat.com/.*': The requested URL returned error: 429.*Error fetching git repository: exit status 1"), ("Introspection failed because of incomplete .docker/config.json", r".* level=fatal msg=\"Error parsing image name .*: getting username and password: reading JSON file .*/tekton/home/.docker/config.json.*: unmarshaling JSON at .*: unexpected end of JSON input\""), From cefc8f3c0fae7cf47914f606c8f137a616149f66 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Tue, 23 Sep 2025 09:20:55 +0200 Subject: [PATCH 282/321] feat: New error: DNF failed to install package because GPG check failed When working on KONFLUX-10176 run-2025-09-22T11_18_30_168225/openstack-tenant/1/pod-edpm-hardened-u85159135a27023f5a9263b65a70d4dc95ebc408ec7e3-pod-step-build.log [2025-09-11T08:23:21,900695702+00:00] Run buildah build [2025-09-11T08:23:21,901637952+00:00] ip link set lo up && buildah build --volume /tmp/cachi2:/cachi2 --volume /var/workdir/fetched.repos.d:/etc/yum.repos.d --pull=never --build-arg=batch=18.0_20250605.1 --build-arg=tags=18.0\ 18.0_20250605.1 --build-arg=version=18.0.9 --security-opt=unmask=/proc/interrupts --label architecture=x86_64 --label vcs-type=git --label vcs-ref=0d5e68c7a0d4af533d7346cf9e16d5f2fe1cc49b --label quay.expires-after=5d --label build-date=2025-09-11T08:23:21Z --tls-verify=true --no-cache --ulimit nofile=4096:4096 --http-proxy=false -f /tmp/Containerfile.ZEFSIT -t quay.io/redhat-user-workloads/openstack-tenant/openstack-18-0/edpm-hardened-uefi:on-pr-0d5e68c7a0d4af533d7346cf9e16d5f2fe1cc49b . [1/2] STEP 1/7: FROM registry.redhat.io/rhel9-4-els/rhel:9.4@sha256:00e433b9ad6b154ec6e68fc8afb76e4d4771b17bc806074ceb74ef0ba0e380de AS builder [1/2] STEP 2/7: RUN . /cachi2/cachi2.env && dnf -y install rhoso-images-edpm-x86_64 tar Updating Subscription Management repositories. Unable to read consumer identity This system is not registered with an entitlement server. You can use subscription-manager to register. Repository 'rhoso-18.0-for-rhel-9-x86_64-rpms' is missing name in configuration, using id. Repository 'rhel-9-for-x86_64-appstream-e4s-rpms__9_DOT_4' is missing name in configuration, using id. Repository 'rhel-9-for-x86_64-baseos-e4s-rpms__9_DOT_4' is missing name in configuration, using id. rhoso-18.0-for-rhel-9-x86_64-rpms 2.0 MB/s | 2.1 kB 00:00 rhel-9-for-x86_64-appstream-e4s-rpms__9_DOT_4 3.8 MB/s | 3.9 kB 00:00 rhel-9-for-x86_64-baseos-e4s-rpms__9_DOT_4 203 MB/s | 248 kB 00:00 Package tar-2:1.34-6.el9_4.1.x86_64 is already installed. Dependencies resolved. =================================================================================================== Package Arch Version Repository Size =================================================================================================== Installing: rhoso-images-edpm-x86_64 noarch 18.0-20250903.2.el9ost rhoso-18.0-for-rhel-9-x86_64-rpms 1.3 G Installing dependencies: rhoso-images-base noarch 18.0-20250903.2.el9ost rhoso-18.0-for-rhel-9-x86_64-rpms 7.5 k rhoso-images-metadata noarch 18.0-20250903.2.el9ost rhoso-18.0-for-rhel-9-x86_64-rpms 7.7 k rhoso-release noarch 18.0.12-1.el9ost rhoso-18.0-for-rhel-9-x86_64-rpms 6.9 k Transaction Summary =================================================================================================== Install 4 Packages Total size: 1.3 G Installed size: 1.4 G Downloading Packages: Package rhoso-images-base-18.0-20250903.2.el9ost.noarch.rpm is not signed Package rhoso-images-edpm-x86_64-18.0-20250903.2.el9ost.noarch.rpm is not signed Package rhoso-images-metadata-18.0-20250903.2.el9ost.noarch.rpm is not signed Error: GPG check FAILED subprocess exited with status 1 subprocess exited with status 1 Error: building at STEP "RUN . /cachi2/cachi2.env && dnf -y install rhoso-images-edpm-x86_64 tar": exit status 1 --- tests/load-tests/errors.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index e540495a45..678668736c 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -117,6 +117,7 @@ ("Unable to connect to server", r"Error: Unable to connect to server"), } +# Generic guideline on constructing error reasons: FAILED_PLR_ERRORS = { ("SKIP", r"Skipping step because a previous step failed"), # This is a special "wildcard" error, let's keep it on top and do not change "SKIP" reason as it is used in the code ("Bad Gateway when pulling container image from quay.io", r"Error: initializing source docker://quay.io/[^ ]+: reading manifest [^ ]+ in quay.io/[^ ]+: received unexpected HTTP status: 502 Bad Gateway "), @@ -129,6 +130,7 @@ ("DNF failed to download repodata from Download Devel because timeout", r"dnf.exceptions.RepoError: Failed to download metadata for repo 'build': Cannot download repomd.xml: Cannot download repodata/repomd.xml: All mirrors were tried .* CRITICAL Error: Failed to download metadata for repo 'build': Cannot download repomd.xml: Cannot download repodata/repomd.xml: All mirrors were tried [^ ]+/mock/.*Failed to connect to download-[0-9]+.beak-[0-9]+.prod.iad2.dc.redhat.com"), ("DNF failed to download repodata from Download Devel because timeout", r"dnf.exceptions.RepoError: Failed to download metadata for repo 'build': Cannot download repomd.xml: Cannot download repodata/repomd.xml: All mirrors were tried .* CRITICAL Error: Failed to download metadata for repo 'build': Cannot download repomd.xml: Cannot download repodata/repomd.xml: All mirrors were tried .*/mock/.*Failed to connect to download.devel.redhat.com"), ("DNF failed to download repodata from Koji", r"ERROR Command returned error: Failed to download metadata (baseurl: \"https://kojipkgs.fedoraproject.org/repos/[^ ]*\") for repository \"build\": Usable URL not found"), + ("DNF failed to install package because GPG check failed", r"dnf -y install .* is not signed.*Error: GPG check FAILED.*exit status 1"), ("Enterprise contract results failed validation", r"^false $"), ("Error allocating host as provision TR already exists", r"Error allocating host: taskruns.tekton.dev \".*provision\" already exists"), ("Error allocating host because of insufficient free addresses in subnet", r"Error allocating host: failed to launch EC2 instance for .* operation error EC2: RunInstances, https response error StatusCode: 400, RequestID: .*, api error InsufficientFreeAddressesInSubnet: There are not enough free addresses in subnet .* to satisfy the requested number of instances."), From 49f89ab3a760e343be70762057374eb7887781b2 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Tue, 23 Sep 2025 09:25:36 +0200 Subject: [PATCH 283/321] feat: New error: Go failed installation because it was killed When working on KONFLUX-10176 run-2025-09-22T11_18_30_168225/openstack-tenant/1/pod-openstack-operaac490bba4c0a81bc91b802d328d021779f6dd1a6a16f-pod-step-build.log [2025-09-12T17:52:07,079160929+00:00] Run buildah build [...] [2/3] STEP 16/31: RUN . /cachi2/cachi2.env && go env GOMODCACHE /cachi2/output/deps/gomod/pkg/mod [2/3] STEP 17/31: RUN . /cachi2/cachi2.env && mkdir -p $REMOTE_SOURCE_DIR/bin/ [2/3] STEP 18/31: RUN . /cachi2/cachi2.env && go install github.com/operator-framework/operator-sdk/cmd/operator-sdk@v1.35.0 subprocess exited on killed subprocess exited with status 1 --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 678668736c..e25de94bb5 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -171,6 +171,7 @@ ("Git failed to clone submodule because GitLab CEE giving 429", r"Error running git .*: exit status 1.*error: RPC failed; HTTP 429 curl 22 The requested URL returned error: 429.*fatal: Fetched in submodule path '[^ ]+', but it did not contain [^ ]+. Direct fetching of that commit failed"), ("Git failed to fetch because GitLab CEE giving 429", r"Error running git .*: exit status 1.*remote: Retry later.*fatal: unable to access 'https://gitlab.cee.redhat.com/.*': The requested URL returned error: 429.*Error fetching git repository: failed to fetch [^ ]+: exit status 1"), ("Git failed to fetch because GitLab CEE giving 429", r"Error running git .*: exit status 1.*remote: Retry later.*fatal: unable to access 'https://gitlab.cee.redhat.com/.*': The requested URL returned error: 429.*Error fetching git repository: exit status 1"), + ("Go failed installation because it was killed", r"Run buildah build .* go install [^ ]+ subprocess exited on killed subprocess exited with status 1"), ("Introspection failed because of incomplete .docker/config.json", r".* level=fatal msg=\"Error parsing image name .*: getting username and password: reading JSON file .*/tekton/home/.docker/config.json.*: unmarshaling JSON at .*: unexpected end of JSON input\""), ("Invalid reference when processing SBOM", r"SBOM .* error during command execution: could not parse reference: quay.io/[^ ]+"), ("No podman installed on a MPC VM", r"remote_cmd podman unshare setfacl .* \+ ssh -o StrictHostKeyChecking=no [^ ]+ podman unshare setfacl .* bash: line 1: podman: command not found"), # KONFLUX-9944 From a97193bd051c15c085592e5cac7f43bfb5afe722 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Tue, 23 Sep 2025 09:30:17 +0200 Subject: [PATCH 284/321] feat: New error: oras failed to fetch blob from Quay.io because it was terminated When working on KONFLUX-10176 run-2025-09-22T11_18_30_168225/openstack-tenant/1/pod-openstack-oper65426086a555e3fe09d9e6a5235aa047362644e8b0d07-pod-step-use-trusted-artifact.log Using token for quay.io/redhat-user-workloads/openstack-tenant/openstack-18-0/openstack-operator-bundle Executing: oras blob fetch --registry-config /tmp/use-oci.sh.qkJXi3/auth-TOn90n.json quay.io/redhat-user-workloads/openstack-tenant/openstack-18-0/openstack-operator-bundle@sha256:36c0c947066fc65723a7f43fdf631f9a3f4a8e2ee1df8f07d11bf2351b9eba5e --output - Restored artifact quay.io/redhat-user-workloads/openstack-tenant/openstack-18-0/openstack-operator-bundle@sha256:36c0c947066fc65723a7f43fdf631f9a3f4a8e2ee1df8f07d11bf2351b9eba5e to /var/workdir/source Using token for quay.io/redhat-user-workloads/openstack-tenant/openstack-18-0/openstack-operator-bundle Executing: oras blob fetch --registry-config /tmp/use-oci.sh.qkJXi3/auth-6IfEkw.json quay.io/redhat-user-workloads/openstack-tenant/openstack-18-0/openstack-operator-bundle@sha256:03ed145d34f26f1b76f8496fefba4771d3aa6f7dee8dcf76768f26493424ecf5 --output - Terminated --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index e25de94bb5..1fe762a387 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -175,6 +175,7 @@ ("Introspection failed because of incomplete .docker/config.json", r".* level=fatal msg=\"Error parsing image name .*: getting username and password: reading JSON file .*/tekton/home/.docker/config.json.*: unmarshaling JSON at .*: unexpected end of JSON input\""), ("Invalid reference when processing SBOM", r"SBOM .* error during command execution: could not parse reference: quay.io/[^ ]+"), ("No podman installed on a MPC VM", r"remote_cmd podman unshare setfacl .* \+ ssh -o StrictHostKeyChecking=no [^ ]+ podman unshare setfacl .* bash: line 1: podman: command not found"), # KONFLUX-9944 + ("oras failed to fetch blob from Quay.io because it was terminated", r"Executing: oras blob fetch --registry-config [^ ]+ quay.io/[^ ]+ --output [^ ]+ Terminated"), ("Release failed because unauthorized when pulling policy", r"Error: pulling policy: GET .https://quay.io/v2/konflux-ci/konflux-vanguard/data-acceptable-bundles/blobs/sha256:[0-9a-z]+.: response status code 401: Unauthorized"), ("Release failed because unauthorized when pushing artifact", r"Prepared artifact from /var/workdir/release .* Token not found for quay.io/konflux-ci/release-service-trusted-artifacts Uploading [0-9a-z]+ sourceDataArtifact Error response from registry: unauthorized: access to the requested resource is not authorized: map.. Command exited with non-zero status 1"), ("RPM build failed: bool cannot be defined via typedef", r"error: .bool. cannot be defined via .typedef..*error: Bad exit status from /var/tmp/rpm-tmp..* ..build."), From cda594d532aecf9832f262a8b569c24631362d8d Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Tue, 23 Sep 2025 09:34:39 +0200 Subject: [PATCH 285/321] fix: Make this space optional --- tests/load-tests/errors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 1fe762a387..dda316653f 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -131,7 +131,7 @@ ("DNF failed to download repodata from Download Devel because timeout", r"dnf.exceptions.RepoError: Failed to download metadata for repo 'build': Cannot download repomd.xml: Cannot download repodata/repomd.xml: All mirrors were tried .* CRITICAL Error: Failed to download metadata for repo 'build': Cannot download repomd.xml: Cannot download repodata/repomd.xml: All mirrors were tried .*/mock/.*Failed to connect to download.devel.redhat.com"), ("DNF failed to download repodata from Koji", r"ERROR Command returned error: Failed to download metadata (baseurl: \"https://kojipkgs.fedoraproject.org/repos/[^ ]*\") for repository \"build\": Usable URL not found"), ("DNF failed to install package because GPG check failed", r"dnf -y install .* is not signed.*Error: GPG check FAILED.*exit status 1"), - ("Enterprise contract results failed validation", r"^false $"), + ("Enterprise contract results failed validation", r"^false *$"), ("Error allocating host as provision TR already exists", r"Error allocating host: taskruns.tekton.dev \".*provision\" already exists"), ("Error allocating host because of insufficient free addresses in subnet", r"Error allocating host: failed to launch EC2 instance for .* operation error EC2: RunInstances, https response error StatusCode: 400, RequestID: .*, api error InsufficientFreeAddressesInSubnet: There are not enough free addresses in subnet .* to satisfy the requested number of instances."), ("Error allocating host because of provisioning error", r"Error allocating host: failed to provision host"), From 655757dad05039d71dad190e1a23e82186866ec0 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Tue, 23 Sep 2025 09:58:27 +0200 Subject: [PATCH 286/321] feat: Also skipp containers that were just cancelled because TaskRun was cancelled --- tests/load-tests/errors.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index dda316653f..269bdd06e3 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -357,8 +357,11 @@ def find_failed_containers(data_dir, ns, tr_name): try: pod_name = data["status"]["podName"] for sr in data["status"]["steps"]: - if sr["terminated"]["exitCode"] != 0: - yield (pod_name, sr["container"]) + if sr["terminated"]["exitCode"] == 0: + continue + if sr["terminated"]["reason"] == "TaskRunCancelled": + continue + yield (pod_name, sr["container"]) except KeyError: return From 0bed36c39fd12a12bc737a56b057069595a0dfc1 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Tue, 23 Sep 2025 09:58:45 +0200 Subject: [PATCH 287/321] feat: New error: buildah failed to pull image from Quay.io because unauthorized When working on KONFLUX-10176 run-2025-09-22T11_18_30_168225/openstack-tenant/1/pod-openstack-barbi4443d67b267b3ebddd8c14de1a25afb1ccabe06e54f4-pod-step-build.log [2025-09-04T15:56:27,839581228+00:00] Validate context path [2025-09-04T15:56:27,842711471+00:00] Update CA trust [2025-09-04T15:56:27,843727868+00:00] Using mounted CA bundle: /mnt/trusted-ca/ca-bundle.crt '/mnt/trusted-ca/ca-bundle.crt' -> '/etc/pki/ca-trust/source/anchors/ca-bundle.crt' [2025-09-04T15:56:28,776479129+00:00] Prepare Dockerfile Checking if /var/workdir/cachi2/output/bom.json exists. Preparing construction of content-sets.json to be placed at /usr/share/buildinfo/content-sets.json in the image Constructed the following: { "metadata": { "icm_version": 1, "icm_spec": "https://raw.githubusercontent.com/containerbuildsystem/atomic-reactor/master/atomic_reactor/schemas/content_manifest.json", "image_layer_index": 0 }, "from_dnf_hint": true, "content_sets": [ "ansible-automation-platform-2_DOT_5-for-rhel-9-x86_64-rpms", "fast-datapath-for-rhel-9-x86_64-rpms", "rhceph-6-tools-for-rhel-9-x86_64-rpms", "rhel-9-for-x86_64-appstream-e4s-rpms__9_DOT_4", "rhel-9-for-x86_64-baseos-e4s-rpms__9_DOT_4", "rhocp-4_DOT_15-for-rhel-9-x86_64-rpms", "rhoso-18.0-for-rhel-9-x86_64-rpms" ] } Appending a COPY command to the Containerfile [2025-09-04T15:56:28,843442844+00:00] Prepare system (architecture: x86_64) Executing: unshare -Ufp --keep-caps -r --map-users 1,1,65536 --map-groups 1,1,65536 --mount -- buildah pull --retry 3 quay.io/redhat-user-workloads/openstack-tenant/openstack-18-0/openstack-barbican-base@sha256:96c47ff1a8ac17b928d3bf930b0c93cbba03253bead42eabbdd00004417c3a45 Trying to pull quay.io/redhat-user-workloads/openstack-tenant/openstack-18-0/openstack-barbican-base@sha256:96c47ff1a8ac17b928d3bf930b0c93cbba03253bead42eabbdd00004417c3a45... Error: internal error: unable to copy from source docker://quay.io/redhat-user-workloads/openstack-tenant/openstack-18-0/openstack-barbican-base@sha256:96c47ff1a8ac17b928d3bf930b0c93cbba03253bead42eabbdd00004417c3a45: initializing source docker://quay.io/redhat-user-workloads/openstack-tenant/openstack-18-0/openstack-barbican-base@sha256:96c47ff1a8ac17b928d3bf930b0c93cbba03253bead42eabbdd00004417c3a45: reading manifest sha256:96c47ff1a8ac17b928d3bf930b0c93cbba03253bead42eabbdd00004417c3a45 in quay.io/redhat-user-workloads/openstack-tenant/openstack-18-0/openstack-barbican-base: unauthorized: access to the requested resource is not authorized warning: Command failed and will retry, 1 try error: Unauthorized error, wrong registry credentials provided, won't retry Failed to pull base image quay.io/redhat-user-workloads/openstack-tenant/openstack-18-0/openstack-barbican-base@sha256:96c47ff1a8ac17b928d3bf930b0c93cbba03253bead42eabbdd00004417c3a45 --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 269bdd06e3..525d9071e9 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -124,6 +124,7 @@ ("buildah build failed to pull container from registry.access.redhat.com because digest mismatch", r"buildah build.*FROM registry.access.redhat.com/[^ ]+ Trying to pull registry.access.redhat.com/[^ ]+ Error: creating build container: internal error: unable to copy from source docker://registry.access.redhat.com/[^ ]+: copying system image from manifest list: parsing image configuration: Download config.json digest [^ ]+ does not match expected [^ ]+"), ("buildah build failed to pull container from registry.access.redhat.com because of 403", r"Error: creating build container: internal error: unable to copy from source docker://registry.access.redhat.com/.*: copying system image from manifest list: determining manifest MIME type for docker://registry.access.redhat.com/.*: reading manifest .* in registry.access.redhat.com/.*: StatusCode: 403"), ("buildah build failed to pull container from registry.access.redhat.com because of 500 Internal Server Error", r"buildah build.*FROM registry.access.redhat.com/[^ ]+ Trying to pull registry.access.redhat.com/[^ ]+ Getting image source signatures Error: creating build container: internal error: unable to copy from source docker://registry.access.redhat.com/[^ ]+ copying system image from manifest list: reading signatures: reading signature from https://access.redhat.com/[^ ]+ received unexpected HTTP status: 500 Internal Server Error"), + ("buildah failed to pull image from Quay.io because unauthorized", r"Executing: unshare .* buildah pull .* Trying to pull quay.io/[^ ]+ Error: internal error: unable to copy from source docker://quay.io/[^ ]+: initializing source docker://quay.io/[^ ]+: reading manifest [^ ]+ in quay.io/[^ ]+: unauthorized: access to the requested resource is not authorized warning: Command failed and will retry, 1 try error: Unauthorized error, wrong registry credentials provided, won't retry Failed to pull base image quay.io/[^ ]+"), ("Can not find chroot_scan.tar.gz file", r"tar: .*/chroot_scan.tar.gz: Cannot open: No such file or directory"), ("Can not find Dockerfile", r"Cannot find Dockerfile Dockerfile"), ("DNF failed to download repodata from Download Devel because could not resolve host", r"Errors during downloading metadata for repository '[^ ]+': - Curl error .6.: Couldn't resolve host name for http://download.devel.redhat.com/brewroot/repos/[^ ]+ .Could not resolve host: download\.devel\.redhat\.com."), From 4acd1962840146639d9cdfcb34df05a999a30347 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Tue, 23 Sep 2025 10:04:21 +0200 Subject: [PATCH 288/321] feat: New error: Git failed to fetch because GitLab CEE giving 429 When working on KONFLUX-10176 run-2025-09-22T11_18_30_168225/openstack-tenant/1/pod-watcher-operatc6546d74d84304f0e1cc7959bf41fb166db9e1bea6746-pod-step-clone.log INFO: Using mounted CA bundle: /mnt/trusted-ca/ca-bundle.crt {"level":"error","ts":1758273507.6520457,"caller":"git/git.go:53","msg":"Error running git [fetch --recurse-submodules=yes --depth=1 origin --update-head-ok --force 3baf362ce5d2b526874f6c98b3d36ec8b9cd9371]: exit status 128\nwarning: redirecting to https://gitlab.cee.redhat.com/openstack-konflux/openstack-operator-18.0.git/\nerror: RPC failed; HTTP 429 curl 22 The requested URL returned error: 429\nfatal: expected flush after ref listing\n","stacktrace":"github.com/tektoncd-catalog/git-clone/git-init/git.run\n\t/opt/app-root/src/git-init/git/git.go:53\ngithub.com/tektoncd-catalog/git-clone/git-init/git.Fetch\n\t/opt/app-root/src/git-init/git/git.go:180\nmain.main\n\t/opt/app-root/src/git-init/main.go:52\nruntime.main\n\t/usr/lib/golang/src/runtime/proc.go:283"} {"level":"fatal","ts":1758273507.6521184,"caller":"git-init/main.go:53","msg":"Error fetching git repository: failed to fetch [3baf362ce5d2b526874f6c98b3d36ec8b9cd9371]: exit status 128","stacktrace":"main.main\n\t/opt/app-root/src/git-init/main.go:53\nruntime.main\n\t/usr/lib/golang/src/runtime/proc.go:283"} --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 525d9071e9..8bf43ceb0d 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -170,6 +170,7 @@ ("Getting repo tags from quay.io failed because of 502 Bad Gateway", r"Error determining repository tags: pinging container registry quay.io: received unexpected HTTP status: 502 Bad Gateway"), ("Git failed to clone submodule because GitLab CEE giving 429", r"Error running git .*: exit status 1.*error: RPC failed; HTTP 429 curl 22 The requested URL returned error: 429.*fatal: clone of 'https://gitlab.cee.redhat.com/[^ ]+' into submodule path '[^ ]+' failed"), ("Git failed to clone submodule because GitLab CEE giving 429", r"Error running git .*: exit status 1.*error: RPC failed; HTTP 429 curl 22 The requested URL returned error: 429.*fatal: Fetched in submodule path '[^ ]+', but it did not contain [^ ]+. Direct fetching of that commit failed"), + ("Git failed to fetch because GitLab CEE giving 429", r"Error running git .*: exit status 128.*error: RPC failed; HTTP 429 curl 22 The requested URL returned error: 429.*Error fetching git repository: failed to fetch [^ ]+: exit status 128"), ("Git failed to fetch because GitLab CEE giving 429", r"Error running git .*: exit status 1.*remote: Retry later.*fatal: unable to access 'https://gitlab.cee.redhat.com/.*': The requested URL returned error: 429.*Error fetching git repository: failed to fetch [^ ]+: exit status 1"), ("Git failed to fetch because GitLab CEE giving 429", r"Error running git .*: exit status 1.*remote: Retry later.*fatal: unable to access 'https://gitlab.cee.redhat.com/.*': The requested URL returned error: 429.*Error fetching git repository: exit status 1"), ("Go failed installation because it was killed", r"Run buildah build .* go install [^ ]+ subprocess exited on killed subprocess exited with status 1"), From 03968f757edbef52e5b5c567ff31a39e4bfdf8ca Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Tue, 23 Sep 2025 10:14:28 +0200 Subject: [PATCH 289/321] feat: New error: Prefetch dependencies failed to download from rhsm-pulp.corp.redhat.com because not whole content was fetched When working on KONFLUX-10176 run-2025-09-22T11_18_30_168225/openstack-tenant/1/pod-openstack236a389aa168d52202dda4b3e129304f6a0d4adb4e11ca9dec-pod-step-prefetch-dependencies.log INFO: Using mounted CA bundle: /mnt/trusted-ca/ca-bundle.crt '/mnt/trusted-ca/ca-bundle.crt' -> '/etc/pki/ca-trust/source/anchors/ca-bundle.crt' Executing: git fetch --tags 2025-09-16 18:05:10,933 INFO Reading RPM lockfile: /tmpmkwowdrd.hermeto-source-copy/rpms.lock.yaml 2025-09-16 18:05:11,935 INFO Downloading files for 'x86_64' architecture. 2025-09-16 18:06:45,958 ERROR Unsuccessful download: https://rhsm-pulp.corp.redhat.com/content/e4s/rhel9/9.4/x86_64/baseos/os/Packages/k/kernel-modules-core-5.14.0-427.88.1.el9_4.x86_64.rpm 2025-09-16 18:06:45,977 ERROR FetchError: exception_name: ClientPayloadError, details: Response payload is not completed: Error: FetchError: exception_name: ClientPayloadError, details: Response payload is not completed: The error might be intermittent, please try again. If the issue seems to be on the hermeto side, please contact the maintainers. --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 8bf43ceb0d..4c915dca14 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -178,6 +178,7 @@ ("Invalid reference when processing SBOM", r"SBOM .* error during command execution: could not parse reference: quay.io/[^ ]+"), ("No podman installed on a MPC VM", r"remote_cmd podman unshare setfacl .* \+ ssh -o StrictHostKeyChecking=no [^ ]+ podman unshare setfacl .* bash: line 1: podman: command not found"), # KONFLUX-9944 ("oras failed to fetch blob from Quay.io because it was terminated", r"Executing: oras blob fetch --registry-config [^ ]+ quay.io/[^ ]+ --output [^ ]+ Terminated"), + ("Prefetch dependencies failed to download from rhsm-pulp.corp.redhat.com because not whole content was fetched", r"Reading RPM lockfile: [^ ]+ .* Unsuccessful download: https://rhsm-pulp.corp.redhat.com/content/[^ ]+ .* ERROR FetchError: exception_name: ClientPayloadError, details: Response payload is not completed: .ContentLengthError: 400, message='Not enough data to satisfy content length header.'."), ("Release failed because unauthorized when pulling policy", r"Error: pulling policy: GET .https://quay.io/v2/konflux-ci/konflux-vanguard/data-acceptable-bundles/blobs/sha256:[0-9a-z]+.: response status code 401: Unauthorized"), ("Release failed because unauthorized when pushing artifact", r"Prepared artifact from /var/workdir/release .* Token not found for quay.io/konflux-ci/release-service-trusted-artifacts Uploading [0-9a-z]+ sourceDataArtifact Error response from registry: unauthorized: access to the requested resource is not authorized: map.. Command exited with non-zero status 1"), ("RPM build failed: bool cannot be defined via typedef", r"error: .bool. cannot be defined via .typedef..*error: Bad exit status from /var/tmp/rpm-tmp..* ..build."), From 868b84ca4a9b9fc7c6efba031030710b7f94006c Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Tue, 23 Sep 2025 10:19:56 +0200 Subject: [PATCH 290/321] feat: New error: Prefetch dependencies failed to download from download.devel.redhat.com because of timeout When working on KONFLUX-10176 run-2025-09-22T11_18_30_168225/openstack-tenant/1/pod-octavia-a2e808e6473f47234d96b4ad16d283cdbbce05d0699326c35c0-pod-step-prefetch-dependencies.log INFO: Using mounted CA bundle: /mnt/trusted-ca/ca-bundle.crt '/mnt/trusted-ca/ca-bundle.crt' -> '/etc/pki/ca-trust/source/anchors/ca-bundle.crt' Executing: git fetch --tags 2025-09-11 00:24:33,953 INFO Reading RPM lockfile: /tmp6xka_4rj.hermeto-source-copy/copyout/octavia-amphora-image/rpms.lock.yaml 2025-09-11 00:24:34,078 INFO Downloading files for 'x86_64' architecture. 2025-09-11 00:39:34,364 ERROR Unsuccessful download: https://download.devel.redhat.com/rhel-9/nightly/RHOSO/RHOSO-18.0/latest-RHOSO-18-RHEL-9/compose/OpenStack/x86_64/os/Packages/octavia-amphora-image-vert-fips-x86_64-18.0-20250903.2.el9ost.noarch.rpm 2025-09-11 00:39:34,384 ERROR FetchError: exception_name: TimeoutError, details: Error: FetchError: exception_name: TimeoutError, details: The error might be intermittent, please try again. If the issue seems to be on the hermeto side, please contact the maintainers. --- tests/load-tests/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 4c915dca14..5ca12f38f8 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -179,6 +179,7 @@ ("No podman installed on a MPC VM", r"remote_cmd podman unshare setfacl .* \+ ssh -o StrictHostKeyChecking=no [^ ]+ podman unshare setfacl .* bash: line 1: podman: command not found"), # KONFLUX-9944 ("oras failed to fetch blob from Quay.io because it was terminated", r"Executing: oras blob fetch --registry-config [^ ]+ quay.io/[^ ]+ --output [^ ]+ Terminated"), ("Prefetch dependencies failed to download from rhsm-pulp.corp.redhat.com because not whole content was fetched", r"Reading RPM lockfile: [^ ]+ .* Unsuccessful download: https://rhsm-pulp.corp.redhat.com/content/[^ ]+ .* ERROR FetchError: exception_name: ClientPayloadError, details: Response payload is not completed: .ContentLengthError: 400, message='Not enough data to satisfy content length header.'."), + ("Prefetch dependencies failed to download from download.devel.redhat.com because of timeout", r"Reading RPM lockfile: [^ ]+ .* Unsuccessful download: https://download.devel.redhat.com/[^ ]+ .* ERROR FetchError: exception_name: TimeoutError, details: Error: FetchError: exception_name: TimeoutError,"), ("Release failed because unauthorized when pulling policy", r"Error: pulling policy: GET .https://quay.io/v2/konflux-ci/konflux-vanguard/data-acceptable-bundles/blobs/sha256:[0-9a-z]+.: response status code 401: Unauthorized"), ("Release failed because unauthorized when pushing artifact", r"Prepared artifact from /var/workdir/release .* Token not found for quay.io/konflux-ci/release-service-trusted-artifacts Uploading [0-9a-z]+ sourceDataArtifact Error response from registry: unauthorized: access to the requested resource is not authorized: map.. Command exited with non-zero status 1"), ("RPM build failed: bool cannot be defined via typedef", r"error: .bool. cannot be defined via .typedef..*error: Bad exit status from /var/tmp/rpm-tmp..* ..build."), From d73d5804d750b1e9f2e6b14d15a139a8b6134b8e Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Tue, 23 Sep 2025 10:22:08 +0200 Subject: [PATCH 291/321] fix: Dockerfile can have different names --- tests/load-tests/errors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 5ca12f38f8..a1801eebb2 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -126,7 +126,7 @@ ("buildah build failed to pull container from registry.access.redhat.com because of 500 Internal Server Error", r"buildah build.*FROM registry.access.redhat.com/[^ ]+ Trying to pull registry.access.redhat.com/[^ ]+ Getting image source signatures Error: creating build container: internal error: unable to copy from source docker://registry.access.redhat.com/[^ ]+ copying system image from manifest list: reading signatures: reading signature from https://access.redhat.com/[^ ]+ received unexpected HTTP status: 500 Internal Server Error"), ("buildah failed to pull image from Quay.io because unauthorized", r"Executing: unshare .* buildah pull .* Trying to pull quay.io/[^ ]+ Error: internal error: unable to copy from source docker://quay.io/[^ ]+: initializing source docker://quay.io/[^ ]+: reading manifest [^ ]+ in quay.io/[^ ]+: unauthorized: access to the requested resource is not authorized warning: Command failed and will retry, 1 try error: Unauthorized error, wrong registry credentials provided, won't retry Failed to pull base image quay.io/[^ ]+"), ("Can not find chroot_scan.tar.gz file", r"tar: .*/chroot_scan.tar.gz: Cannot open: No such file or directory"), - ("Can not find Dockerfile", r"Cannot find Dockerfile Dockerfile"), + ("Can not find Dockerfile", r"Cannot find Dockerfile [^ ]+"), ("DNF failed to download repodata from Download Devel because could not resolve host", r"Errors during downloading metadata for repository '[^ ]+': - Curl error .6.: Couldn't resolve host name for http://download.devel.redhat.com/brewroot/repos/[^ ]+ .Could not resolve host: download\.devel\.redhat\.com."), ("DNF failed to download repodata from Download Devel because timeout", r"dnf.exceptions.RepoError: Failed to download metadata for repo 'build': Cannot download repomd.xml: Cannot download repodata/repomd.xml: All mirrors were tried .* CRITICAL Error: Failed to download metadata for repo 'build': Cannot download repomd.xml: Cannot download repodata/repomd.xml: All mirrors were tried [^ ]+/mock/.*Failed to connect to download-[0-9]+.beak-[0-9]+.prod.iad2.dc.redhat.com"), ("DNF failed to download repodata from Download Devel because timeout", r"dnf.exceptions.RepoError: Failed to download metadata for repo 'build': Cannot download repomd.xml: Cannot download repodata/repomd.xml: All mirrors were tried .* CRITICAL Error: Failed to download metadata for repo 'build': Cannot download repomd.xml: Cannot download repodata/repomd.xml: All mirrors were tried .*/mock/.*Failed to connect to download.devel.redhat.com"), From 239a1a6a871bfdefd0fd4604cf4ce7c027af4bbd Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 30 Jul 2025 12:12:34 +0200 Subject: [PATCH 292/321] fix: Do not fail if there are no release related CRs as this is a best effort function Generated-by: Gemini --- tests/load-tests/pkg/journey/handle_collections.go | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load-tests/pkg/journey/handle_collections.go b/tests/load-tests/pkg/journey/handle_collections.go index 4e9edfbed1..375ae83515 100644 --- a/tests/load-tests/pkg/journey/handle_collections.go +++ b/tests/load-tests/pkg/journey/handle_collections.go @@ -15,6 +15,7 @@ import ( k8s_api_errors "k8s.io/apimachinery/pkg/api/errors" ) + func getDirName(baseDir, namespace, iteration string) string { return filepath.Join(baseDir, "collected-data", namespace, iteration) + "/" } From 68c3664f73c332f539f0db1f53a1dc5e93f7c2ba Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Thu, 25 Sep 2025 11:35:45 +0200 Subject: [PATCH 293/321] feat: Add more info into distribution of KPI durations --- tests/load-tests/evaluate.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/load-tests/evaluate.py b/tests/load-tests/evaluate.py index 4779816427..c339dc86e3 100755 --- a/tests/load-tests/evaluate.py +++ b/tests/load-tests/evaluate.py @@ -124,6 +124,10 @@ def count_stats(data): if len(data) == 0: return { "samples": 0, + "min": -1, + "max": -1, + "mean": -1, + "stdev": -1, } else: return { @@ -131,6 +135,7 @@ def count_stats(data): "min": min(data), "mean": statistics.mean(data), "max": max(data), + "stdev": statistics.stdev(data) if len(data) >= 2 else -1, } def count_stats_when(data): @@ -254,7 +259,7 @@ def main(): kpi_errors += 1 stats["KPI"] = {} - stats["KPI"]["mean"] = sum(kpi_mean_data) / kpi_successes if kpi_successes > 0 else -1 + stats["KPI"] = count_stats(kpi_mean_data) stats["KPI"]["successes"] = kpi_successes stats["KPI"]["errors"] = kpi_errors From 3674432925a234df5a29d6004e58982c5ac01d42 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 26 Sep 2025 10:14:13 +0200 Subject: [PATCH 294/321] fix: Fix name 'datafile' is not defined error caused by refactor --- tests/load-tests/errors.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index a1801eebb2..95b3a6a7f9 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -304,9 +304,7 @@ def find_first_failed_build_plr(data_dir, plr_type): a "build" PLR and it is failed one. """ - for data in find_all_failed_plrs(data_dir): - data = load(datafile) - + for plr in find_all_failed_plrs(data_dir): if plr_type == "build": plr_type_label = "build" elif plr_type == "release": @@ -316,12 +314,12 @@ def find_first_failed_build_plr(data_dir, plr_type): # Skip PLRs that do not have expected type try: - if data["metadata"]["labels"]["pipelines.appstudio.openshift.io/type"] != plr_type_label: + if plr["metadata"]["labels"]["pipelines.appstudio.openshift.io/type"] != plr_type_label: continue except KeyError: continue - return data + return plr def find_trs(plr): From 66d6b66b8a5288efbbafa3a989c2b29d527cd5de Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 26 Sep 2025 11:59:48 +0200 Subject: [PATCH 295/321] fix(KONFLUX-10309): Use right organization when getting templates Normally we are using this template repo: https://github.com/rhtap-perf-test/konflux-probe-test-templates That lives in same organization as all the component repos we work with. When Zhiming tried to use his fork with one small change: https://github.com/zxiong/konflux-probe-test-templates getting file from there did not seen the change. Turns out our code was actually getting the file from `rhtap-perf-test`. This fixes the issue and as far as the GitHub token is sufficient to get thta file from that different repo in different organization, it will try to do so. --- pkg/clients/github/repositories.go | 6 +++++- tests/load-tests/pkg/journey/handle_repo_templating.go | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/pkg/clients/github/repositories.go b/pkg/clients/github/repositories.go index 4794d66477..d7a0c93ddc 100644 --- a/pkg/clients/github/repositories.go +++ b/pkg/clients/github/repositories.go @@ -66,11 +66,15 @@ func (g *Github) CreateFile(repository, pathToFile, fileContent, branchName stri } func (g *Github) GetFile(repository, pathToFile, branchName string) (*github.RepositoryContent, error) { + return g.GetFileWithOrg(g.organization, repository, pathToFile, branchName) +} + +func (g *Github) GetFileWithOrg(org, repository, pathToFile, branchName string) (*github.RepositoryContent, error) { opts := &github.RepositoryContentGetOptions{} if branchName != "" { opts.Ref = fmt.Sprintf(HEADS, branchName) } - file, _, _, err := g.client.Repositories.GetContents(context.Background(), g.organization, repository, pathToFile, opts) + file, _, _, err := g.client.Repositories.GetContents(context.Background(), org, repository, pathToFile, opts) if err != nil { return nil, fmt.Errorf("error when listing file contents: %v", err) } diff --git a/tests/load-tests/pkg/journey/handle_repo_templating.go b/tests/load-tests/pkg/journey/handle_repo_templating.go index 08a7091b36..d2bfb1c4e9 100644 --- a/tests/load-tests/pkg/journey/handle_repo_templating.go +++ b/tests/load-tests/pkg/journey/handle_repo_templating.go @@ -86,7 +86,7 @@ func getRepoFileContent(f *framework.Framework, repoUrl, repoRevision, fileName return "", fmt.Errorf("Failed to get file %s from repo %s revision %s: %v", fileName, repoOrgName + "/" + repoName, repoRevision, err) } } else { - fileResponse, err := f.AsKubeAdmin.CommonController.Github.GetFile(repoName, fileName, repoRevision) + fileResponse, err := f.AsKubeAdmin.CommonController.Github.GetFileWithOrg(repoOrgName, repoName, fileName, repoRevision) if err != nil { return "", fmt.Errorf("Failed to get file %s from repo %s revision %s: %v", fileName, repoName, repoRevision, err) } From 39a42efe709e7a2d012d9b3242447c23a92f922f Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 1 Oct 2025 08:07:58 +0200 Subject: [PATCH 296/321] feat(KONFLUX-10328): Add option to make component onboarding sequential Incident caused by the load test was discussed here: https://redhat-internal.slack.com/archives/C04F4NE15U1/p1759163666162299 We need to serialize component creation ane donboarding to make the test relevant even for higher scale because build controller reconciles new components in sequence and needs ~1 minute per component. If we would not serialize that, we would quickly start hitting timeouts. --- tests/load-tests/loadtest.go | 1 + .../pkg/journey/handle_component.go | 10 +++ tests/load-tests/pkg/options/options.go | 83 ++++++++++--------- 3 files changed, 54 insertions(+), 40 deletions(-) diff --git a/tests/load-tests/loadtest.go b/tests/load-tests/loadtest.go index d7c698522a..6245c6e3ff 100644 --- a/tests/load-tests/loadtest.go +++ b/tests/load-tests/loadtest.go @@ -42,6 +42,7 @@ func init() { rootCmd.Flags().StringVar(&opts.ForkTarget, "fork-target", "", "the target namespace (GitLab) or organization (GitHub) to fork component repository to (if empty, will use MY_GITHUB_ORG env variable)") rootCmd.Flags().StringVar(&opts.QuayRepo, "quay-repo", "redhat-user-workloads-stage", "the target quay repo for PaC templated image pushes") rootCmd.Flags().StringVar(&opts.RunPrefix, "runprefix", "testuser", "identifier used for prefix of usersignup names and as suffix when forking repo") + rootCmd.Flags().BoolVar(&opts.SerializeComponentOnboarding, "serialize-component-onboarding", false, "should we serialize creation and onboarding of a component (wait will not affect measurement)") rootCmd.Flags().BoolVarP(&opts.Stage, "stage", "s", false, "is you want to run the test on stage") rootCmd.Flags().DurationVar(&opts.StartupDelay, "startup-delay", 0, "when starting per user/per application/per client treads, wait for this duration") rootCmd.Flags().DurationVar(&opts.StartupJitter, "startup-jitter", 3*time.Second, "when applying startup delay, add or remove half of jitter with this maximum value") diff --git a/tests/load-tests/pkg/journey/handle_component.go b/tests/load-tests/pkg/journey/handle_component.go index b565d72b3c..3342ccb12d 100644 --- a/tests/load-tests/pkg/journey/handle_component.go +++ b/tests/load-tests/pkg/journey/handle_component.go @@ -320,6 +320,11 @@ func utilityRepoTemplatingComponentCleanup(f *framework.Framework, namespace, ap func HandleComponent(ctx *types.PerComponentContext) error { var err error + if ctx.ParentContext.ParentContext.Opts.SerializeComponentOnboarding { + logging.Logger.Debug("Waiting to create component %s in namespace %s", ctx.ComponentName, ctx.ParentContext.ParentContext.Namespace) + ctx.ParentContext.ParentContext.Opts.SerializeComponentOnboardingLock.Lock() + } + logging.Logger.Debug("Creating component %s in namespace %s", ctx.ComponentName, ctx.ParentContext.ParentContext.Namespace) // Create component @@ -353,6 +358,11 @@ func HandleComponent(ctx *types.PerComponentContext) error { return logging.Logger.Fail(65, "Component failed onboarding: %v", err) } + if ctx.ParentContext.ParentContext.Opts.SerializeComponentOnboarding { + ctx.ParentContext.ParentContext.Opts.SerializeComponentOnboardingLock.Unlock() + logging.Logger.Debug("Freed lock to create another component after %s in namespace %s", ctx.ComponentName, ctx.ParentContext.ParentContext.Namespace) + } + // Configure imagePullSecrets needed for component build task images if len(ctx.ParentContext.ParentContext.Opts.PipelineImagePullSecrets) > 0 { _, err = logging.Measure( diff --git a/tests/load-tests/pkg/options/options.go b/tests/load-tests/pkg/options/options.go index bf3154d501..a27292ac4b 100644 --- a/tests/load-tests/pkg/options/options.go +++ b/tests/load-tests/pkg/options/options.go @@ -5,49 +5,52 @@ import "fmt" import "os" import "time" import "strings" +import "sync" // Struct to hold command line options type Opts struct { - ApplicationsCount int - BuildPipelineSelectorBundle string - ComponentContainerContext string - ComponentContainerFile string - ComponentRepoRevision string - ComponentRepoUrl string - ComponentsCount int - Concurrency int - FailFast bool - ForkTarget string - JourneyDuration string - JourneyRepeats int - JourneyUntil time.Time - LogDebug bool - LogInfo bool - LogTrace bool - OutputDir string - PipelineImagePullSecrets []string - PipelineMintmakerDisabled bool - PipelineRepoTemplating bool - PipelineRepoTemplatingSourceDir string - PipelineRepoTemplatingSource string - Purge bool - PurgeOnly bool - QuayRepo string - ReleasePipelinePath string - ReleasePipelineRevision string - ReleasePipelineServiceAccount string - ReleasePipelineUrl string - ReleasePolicy string - RunPrefix string - Stage bool - StartupDelay time.Duration - StartupJitter time.Duration - TestScenarioGitURL string - TestScenarioPathInRepo string - TestScenarioRevision string - WaitIntegrationTestsPipelines bool - WaitPipelines bool - WaitRelease bool + ApplicationsCount int + BuildPipelineSelectorBundle string + ComponentContainerContext string + ComponentContainerFile string + ComponentRepoRevision string + ComponentRepoUrl string + ComponentsCount int + Concurrency int + FailFast bool + ForkTarget string + JourneyDuration string + JourneyRepeats int + JourneyUntil time.Time + LogDebug bool + LogInfo bool + LogTrace bool + OutputDir string + PipelineImagePullSecrets []string + PipelineMintmakerDisabled bool + PipelineRepoTemplating bool + PipelineRepoTemplatingSourceDir string + PipelineRepoTemplatingSource string + Purge bool + PurgeOnly bool + QuayRepo string + ReleasePipelinePath string + ReleasePipelineRevision string + ReleasePipelineServiceAccount string + ReleasePipelineUrl string + ReleasePolicy string + RunPrefix string + SerializeComponentOnboarding bool + SerializeComponentOnboardingLock sync.Mutex + Stage bool + StartupDelay time.Duration + StartupJitter time.Duration + TestScenarioGitURL string + TestScenarioPathInRepo string + TestScenarioRevision string + WaitIntegrationTestsPipelines bool + WaitPipelines bool + WaitRelease bool } // Pre-process load-test options before running the test From 4c07de5a54275ae7bed88ef1e7d944c878a0fa68 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 1 Oct 2025 08:25:39 +0200 Subject: [PATCH 297/321] refactor: Rename these to express this is about per user threads --- tests/load-tests/loadtest.go | 6 +++--- .../pkg/journey/handle_persistent_volume_claim.go | 2 +- tests/load-tests/pkg/journey/handle_repo_templating.go | 2 +- tests/load-tests/pkg/journey/handle_users.go | 2 +- tests/load-tests/pkg/journey/journey.go | 10 +++++----- tests/load-tests/pkg/logging/time_and_log.go | 2 +- tests/load-tests/pkg/types/types.go | 4 ++-- 7 files changed, 14 insertions(+), 14 deletions(-) diff --git a/tests/load-tests/loadtest.go b/tests/load-tests/loadtest.go index 6245c6e3ff..ad5fa782d8 100644 --- a/tests/load-tests/loadtest.go +++ b/tests/load-tests/loadtest.go @@ -119,10 +119,10 @@ func main() { // Tier up measurements logger logging.MeasurementsStart(opts.OutputDir) - // Start given number of `perUserThread()` threads using `journey.Setup()` and wait for them to finish + // Start given number of `perUserThread()` threads using `journey.PerUserSetup()` and wait for them to finish _, err = logging.Measure( nil, - journey.Setup, + journey.PerUserSetup, perUserThread, &opts, ) @@ -144,7 +144,7 @@ func main() { } // Single user journey -func perUserThread(threadCtx *types.MainContext) { +func perUserThread(threadCtx *types.PerUserContext) { defer threadCtx.ThreadsWG.Done() time.Sleep(threadCtx.StartupPause) diff --git a/tests/load-tests/pkg/journey/handle_persistent_volume_claim.go b/tests/load-tests/pkg/journey/handle_persistent_volume_claim.go index 4d2ec71c19..d289cea5c2 100644 --- a/tests/load-tests/pkg/journey/handle_persistent_volume_claim.go +++ b/tests/load-tests/pkg/journey/handle_persistent_volume_claim.go @@ -26,7 +26,7 @@ func collectPersistentVolumeClaims(f *framework.Framework, namespace string) err return nil } -func HandlePersistentVolumeClaim(ctx *types.MainContext) error { +func HandlePersistentVolumeClaim(ctx *types.PerUserContext) error { if !ctx.Opts.WaitPipelines { return nil // if build pipeline runs are not done yet, it does not make sense to collect PV timings } diff --git a/tests/load-tests/pkg/journey/handle_repo_templating.go b/tests/load-tests/pkg/journey/handle_repo_templating.go index d2bfb1c4e9..92e3d5c01d 100644 --- a/tests/load-tests/pkg/journey/handle_repo_templating.go +++ b/tests/load-tests/pkg/journey/handle_repo_templating.go @@ -231,7 +231,7 @@ func templateFiles(f *framework.Framework, repoUrl, repoRevision, sourceRepo, so return shaMap, nil } -func HandleRepoForking(ctx *types.MainContext) error { +func HandleRepoForking(ctx *types.PerUserContext) error { var suffix string if ctx.Opts.Stage { suffix = ctx.Opts.RunPrefix + "-" + ctx.Namespace diff --git a/tests/load-tests/pkg/journey/handle_users.go b/tests/load-tests/pkg/journey/handle_users.go index d3c5f46128..cef95ef72b 100644 --- a/tests/load-tests/pkg/journey/handle_users.go +++ b/tests/load-tests/pkg/journey/handle_users.go @@ -36,7 +36,7 @@ func provisionFramework(stageUsers []loadtestutils.User, threadIndex int, userna return f, f.UserNamespace, nil } -func HandleUser(ctx *types.MainContext) error { +func HandleUser(ctx *types.PerUserContext) error { var err error if ctx.Opts.Stage { diff --git a/tests/load-tests/pkg/journey/journey.go b/tests/load-tests/pkg/journey/journey.go index 51154b2aa0..d2b454e9e1 100644 --- a/tests/load-tests/pkg/journey/journey.go +++ b/tests/load-tests/pkg/journey/journey.go @@ -13,10 +13,10 @@ import types "github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/types" import util "github.com/devfile/library/v2/pkg/util" // Pointers to all user journey thread contexts -var MainContexts []*types.MainContext +var MainContexts []*types.PerUserContext // Just to create user -func initUserThread(threadCtx *types.MainContext) { +func initUserThread(threadCtx *types.PerUserContext) { defer threadCtx.ThreadsWG.Done() var err error @@ -48,7 +48,7 @@ func computeStartupPause(index int, delay, jitter time.Duration) time.Duration { // Start all the user journey threads // TODO split this to two functions and get PurgeOnly code out -func Setup(fn func(*types.MainContext), opts *options.Opts) (string, error) { +func PerUserSetup(fn func(*types.PerUserContext), opts *options.Opts) (string, error) { threadsWG := &sync.WaitGroup{} threadsWG.Add(opts.Concurrency) @@ -67,7 +67,7 @@ func Setup(fn func(*types.MainContext), opts *options.Opts) (string, error) { logging.Logger.Info("Initiating per user thread %d with pause %v", threadIndex, startupPause) - threadCtx := &types.MainContext{ + threadCtx := &types.PerUserContext{ ThreadsWG: threadsWG, ThreadIndex: threadIndex, StartupPause: startupPause, @@ -118,7 +118,7 @@ func Setup(fn func(*types.MainContext), opts *options.Opts) (string, error) { } // Start all the threads to process all applications per user -func PerApplicationSetup(fn func(*types.PerApplicationContext), parentContext *types.MainContext) (string, error) { +func PerApplicationSetup(fn func(*types.PerApplicationContext), parentContext *types.PerUserContext) (string, error) { perApplicationWG := &sync.WaitGroup{} perApplicationWG.Add(parentContext.Opts.ApplicationsCount) diff --git a/tests/load-tests/pkg/logging/time_and_log.go b/tests/load-tests/pkg/logging/time_and_log.go index f93ce61dc3..baa30dba7c 100644 --- a/tests/load-tests/pkg/logging/time_and_log.go +++ b/tests/load-tests/pkg/logging/time_and_log.go @@ -178,7 +178,7 @@ func Measure(ctx interface{}, fn interface{}, params ...interface{}) (interface{ repeatsCounter := -1 // Extract additional metadata about this function call from provided context. - if casted, ok := ctx.(*types.MainContext); ok { + if casted, ok := ctx.(*types.PerUserContext); ok { perUserId = casted.ThreadIndex repeatsCounter = casted.JourneyRepeatsCounter } diff --git a/tests/load-tests/pkg/types/types.go b/tests/load-tests/pkg/types/types.go index 219d523178..759cf0f62d 100644 --- a/tests/load-tests/pkg/types/types.go +++ b/tests/load-tests/pkg/types/types.go @@ -8,7 +8,7 @@ import loadtestutils "github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/loadt import options "github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/options" // Struct to hold user journey thread data -type MainContext struct { +type PerUserContext struct { ThreadsWG *sync.WaitGroup ThreadIndex int StartupPause time.Duration @@ -28,7 +28,7 @@ type PerApplicationContext struct { ApplicationIndex int StartupPause time.Duration Framework *framework.Framework - ParentContext *MainContext + ParentContext *PerUserContext ApplicationName string IntegrationTestScenarioName string PerComponentContexts []*PerComponentContext From d989b699db881c3960e179508057d5e8fc3004bc Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 1 Oct 2025 08:51:19 +0200 Subject: [PATCH 298/321] refactor: Move determining ITS and printing relevant log to create... function --- .../handle_integration_test_scenarios.go | 22 +++++++++++-------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/tests/load-tests/pkg/journey/handle_integration_test_scenarios.go b/tests/load-tests/pkg/journey/handle_integration_test_scenarios.go index 92e8da37e4..c77a8dd1d1 100644 --- a/tests/load-tests/pkg/journey/handle_integration_test_scenarios.go +++ b/tests/load-tests/pkg/journey/handle_integration_test_scenarios.go @@ -12,10 +12,13 @@ import ( utils "github.com/konflux-ci/e2e-tests/pkg/utils" ) -func createIntegrationTestScenario(f *framework.Framework, namespace, name, appName, scenarioGitURL, scenarioRevision, scenarioPathInRepo string) error { +func createIntegrationTestScenario(f *framework.Framework, namespace, appName, scenarioGitURL, scenarioRevision, scenarioPathInRepo string) (string, error) { interval := time.Second * 10 timeout := time.Minute * 1 + name := fmt.Sprintf("%s-its", appName) + logging.Logger.Debug("Creating integration test scenario %s for application %s in namespace %s", name, appName, namespace) + err := utils.WaitUntilWithInterval(func() (done bool, err error) { _, err = f.AsKubeDeveloper.IntegrationController.CreateIntegrationTestScenario(name, appName, namespace, scenarioGitURL, scenarioRevision, scenarioPathInRepo, "", []string{}) if err != nil { @@ -26,10 +29,10 @@ func createIntegrationTestScenario(f *framework.Framework, namespace, name, appN return true, nil }, interval, timeout) if err != nil { - return fmt.Errorf("Unable to create the Integration Test Scenario %s in namespace %s: %v", name, namespace, err) + return "", fmt.Errorf("Unable to create the Integration Test Scenario %s in namespace %s: %v", name, namespace, err) } - return nil + return name, nil } func HandleIntegrationTestScenario(ctx *types.PerApplicationContext) error { @@ -38,17 +41,15 @@ func HandleIntegrationTestScenario(ctx *types.PerApplicationContext) error { return nil } + var iface interface{} var err error + var ok bool - name := fmt.Sprintf("%s-its", ctx.ApplicationName) - logging.Logger.Debug("Creating integration test scenario %s for application %s in namespace %s", name, ctx.ApplicationName, ctx.ParentContext.Namespace) - - _, err = logging.Measure( + iface, err = logging.Measure( ctx, createIntegrationTestScenario, ctx.Framework, ctx.ParentContext.Namespace, - name, ctx.ApplicationName, ctx.ParentContext.Opts.TestScenarioGitURL, ctx.ParentContext.Opts.TestScenarioRevision, @@ -58,7 +59,10 @@ func HandleIntegrationTestScenario(ctx *types.PerApplicationContext) error { return logging.Logger.Fail(40, "Integration test scenario failed creation: %v", err) } - ctx.IntegrationTestScenarioName = name + ctx.IntegrationTestScenarioName, ok = iface.(string) + if !ok { + return logging.Logger.Fail(41, "Type assertion failed on integration test scenario name: %+v", iface) + } return nil } From fc93f764c644ecc658b3b4cfe69ac8d76c349db0 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 1 Oct 2025 09:22:31 +0200 Subject: [PATCH 299/321] refactor: Rename these to express this is about per user threads, part two --- tests/load-tests/loadtest.go | 22 ++++----- tests/load-tests/pkg/journey/handle_purge.go | 4 +- tests/load-tests/pkg/journey/handle_users.go | 10 ++-- tests/load-tests/pkg/journey/journey.go | 52 ++++++++++---------- tests/load-tests/pkg/logging/time_and_log.go | 6 +-- tests/load-tests/pkg/types/types.go | 4 +- 6 files changed, 49 insertions(+), 49 deletions(-) diff --git a/tests/load-tests/loadtest.go b/tests/load-tests/loadtest.go index ad5fa782d8..3f870d9408 100644 --- a/tests/load-tests/loadtest.go +++ b/tests/load-tests/loadtest.go @@ -144,10 +144,10 @@ func main() { } // Single user journey -func perUserThread(threadCtx *types.PerUserContext) { - defer threadCtx.ThreadsWG.Done() +func perUserThread(perUserCtx *types.PerUserContext) { + defer perUserCtx.PerUserWG.Done() - time.Sleep(threadCtx.StartupPause) + time.Sleep(perUserCtx.StartupPause) var err error @@ -165,9 +165,9 @@ func perUserThread(threadCtx *types.PerUserContext) { //} //// Create watcher //fmt.Print("Creating watcher...\n") - //watcher, err2 := threadCtx.Framework.AsKubeDeveloper.CommonController.DynamicClient(). + //watcher, err2 := perUserCtx.Framework.AsKubeDeveloper.CommonController.DynamicClient(). // Resource(gvr). - // Namespace(threadCtx.Namespace). + // Namespace(perUserCtx.Namespace). // Watch(watchCtx, listOptions) //if err2 != nil { // fmt.Printf("Can not get watcher: %v", err2) @@ -228,21 +228,21 @@ func perUserThread(threadCtx *types.PerUserContext) { //watcher.Stop() //os.Exit(10) - for threadCtx.JourneyRepeatsCounter = 1; threadCtx.JourneyRepeatsCounter <= threadCtx.Opts.JourneyRepeats; threadCtx.JourneyRepeatsCounter++ { + for perUserCtx.JourneyRepeatsCounter = 1; perUserCtx.JourneyRepeatsCounter <= perUserCtx.Opts.JourneyRepeats; perUserCtx.JourneyRepeatsCounter++ { // Start given number of `perApplicationThread()` threads using `journey.PerApplicationSetup()` and wait for them to finish _, err = logging.Measure( - threadCtx, + perUserCtx, journey.PerApplicationSetup, perApplicationThread, - threadCtx, + perUserCtx, ) if err != nil { logging.Logger.Fatal("Per application threads setup failed: %v", err) } // Check if we are supposed to quit based on --journey-duration - if time.Now().UTC().After(threadCtx.Opts.JourneyUntil) { + if time.Now().UTC().After(perUserCtx.Opts.JourneyUntil) { logging.Logger.Debug("Done with user journey because of timeout") break } @@ -251,9 +251,9 @@ func perUserThread(threadCtx *types.PerUserContext) { // Collect info about PVCs _, err = logging.Measure( - threadCtx, + perUserCtx, journey.HandlePersistentVolumeClaim, - threadCtx, + perUserCtx, ) if err != nil { logging.Logger.Error("Thread failed: %v", err) diff --git a/tests/load-tests/pkg/journey/handle_purge.go b/tests/load-tests/pkg/journey/handle_purge.go index f97d8ec680..7817338f58 100644 --- a/tests/load-tests/pkg/journey/handle_purge.go +++ b/tests/load-tests/pkg/journey/handle_purge.go @@ -52,13 +52,13 @@ func purgeCi(f *framework.Framework, username string) error { } func Purge() error { - if !MainContexts[0].Opts.Purge { + if !PerUserContexts[0].Opts.Purge { return nil } errCounter := 0 - for _, ctx := range MainContexts { + for _, ctx := range PerUserContexts { if ctx.Opts.Stage { err := purgeStage(ctx.Framework, ctx.Namespace) if err != nil { diff --git a/tests/load-tests/pkg/journey/handle_users.go b/tests/load-tests/pkg/journey/handle_users.go index cef95ef72b..6f564cfeef 100644 --- a/tests/load-tests/pkg/journey/handle_users.go +++ b/tests/load-tests/pkg/journey/handle_users.go @@ -40,14 +40,14 @@ func HandleUser(ctx *types.PerUserContext) error { var err error if ctx.Opts.Stage { - ctx.Username = strings.TrimSuffix((*ctx.StageUsers)[ctx.ThreadIndex].Namespace, "-tenant") + ctx.Username = strings.TrimSuffix((*ctx.StageUsers)[ctx.UserIndex].Namespace, "-tenant") } else { - ctx.Username = fmt.Sprintf("%s-%04d", ctx.Opts.RunPrefix, ctx.ThreadIndex) + ctx.Username = fmt.Sprintf("%s-%04d", ctx.Opts.RunPrefix, ctx.UserIndex) } ctx.Framework, ctx.Namespace, err = provisionFramework( *ctx.StageUsers, - ctx.ThreadIndex, + ctx.UserIndex, ctx.Username, ctx.Opts.Stage, ) @@ -63,7 +63,7 @@ func HandleNewFrameworkForApp(ctx *types.PerApplicationContext) error { ctx.Framework, _, err = provisionFramework( *ctx.ParentContext.StageUsers, - ctx.ParentContext.ThreadIndex, + ctx.ParentContext.UserIndex, ctx.ParentContext.Username, ctx.ParentContext.Opts.Stage, ) @@ -79,7 +79,7 @@ func HandleNewFrameworkForComp(ctx *types.PerComponentContext) error { ctx.Framework, _, err = provisionFramework( *ctx.ParentContext.ParentContext.StageUsers, - ctx.ParentContext.ParentContext.ThreadIndex, + ctx.ParentContext.ParentContext.UserIndex, ctx.ParentContext.ParentContext.Username, ctx.ParentContext.ParentContext.Opts.Stage, ) diff --git a/tests/load-tests/pkg/journey/journey.go b/tests/load-tests/pkg/journey/journey.go index d2b454e9e1..44fdef537a 100644 --- a/tests/load-tests/pkg/journey/journey.go +++ b/tests/load-tests/pkg/journey/journey.go @@ -13,19 +13,19 @@ import types "github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/types" import util "github.com/devfile/library/v2/pkg/util" // Pointers to all user journey thread contexts -var MainContexts []*types.PerUserContext +var PerUserContexts []*types.PerUserContext // Just to create user -func initUserThread(threadCtx *types.PerUserContext) { - defer threadCtx.ThreadsWG.Done() +func initUserThread(perUserCtx *types.PerUserContext) { + defer perUserCtx.PerUserWG.Done() var err error // Create user if needed _, err = logging.Measure( - threadCtx, + perUserCtx, HandleUser, - threadCtx, + perUserCtx, ) if err != nil { logging.Logger.Error("Thread failed: %v", err) @@ -49,8 +49,8 @@ func computeStartupPause(index int, delay, jitter time.Duration) time.Duration { // Start all the user journey threads // TODO split this to two functions and get PurgeOnly code out func PerUserSetup(fn func(*types.PerUserContext), opts *options.Opts) (string, error) { - threadsWG := &sync.WaitGroup{} - threadsWG.Add(opts.Concurrency) + perUserWG := &sync.WaitGroup{} + perUserWG.Add(opts.Concurrency) var stageUsers []loadtestutils.User var err error @@ -62,14 +62,14 @@ func PerUserSetup(fn func(*types.PerUserContext), opts *options.Opts) (string, e } // Initialize all user thread contexts - for threadIndex := 0; threadIndex < opts.Concurrency; threadIndex++ { - startupPause := computeStartupPause(threadIndex, opts.StartupDelay, opts.StartupJitter) + for userIndex := 0; userIndex < opts.Concurrency; userIndex++ { + startupPause := computeStartupPause(userIndex, opts.StartupDelay, opts.StartupJitter) - logging.Logger.Info("Initiating per user thread %d with pause %v", threadIndex, startupPause) + logging.Logger.Info("Initiating per user thread %d with pause %v", userIndex, startupPause) - threadCtx := &types.PerUserContext{ - ThreadsWG: threadsWG, - ThreadIndex: threadIndex, + perUserCtx := &types.PerUserContext{ + PerUserWG: perUserWG, + UserIndex: userIndex, StartupPause: startupPause, Opts: opts, StageUsers: &stageUsers, @@ -77,15 +77,15 @@ func PerUserSetup(fn func(*types.PerUserContext), opts *options.Opts) (string, e Namespace: "", } - MainContexts = append(MainContexts, threadCtx) + PerUserContexts = append(PerUserContexts, perUserCtx) } // Create all users (if necessary) and initialize their frameworks - for _, threadCtx := range MainContexts { - go initUserThread(threadCtx) + for _, perUserCtx := range PerUserContexts { + go initUserThread(perUserCtx) } - threadsWG.Wait() + perUserWG.Wait() // If we are supposed to only purge resources, now when frameworks are initialized, we are done if opts.PurgeOnly { @@ -94,25 +94,25 @@ func PerUserSetup(fn func(*types.PerUserContext), opts *options.Opts) (string, e } // Fork repositories sequentially as GitHub do not allow more than 3 running forks in parallel anyway - for _, threadCtx := range MainContexts { + for _, perUserCtx := range PerUserContexts { _, err = logging.Measure( - threadCtx, + perUserCtx, HandleRepoForking, - threadCtx, + perUserCtx, ) if err != nil { return "", err } } - threadsWG.Add(opts.Concurrency) + perUserWG.Add(opts.Concurrency) // Run actual user thread function - for _, threadCtx := range MainContexts { - go fn(threadCtx) + for _, perUserCtx := range PerUserContexts { + go fn(perUserCtx) } - threadsWG.Wait() + perUserWG.Wait() return "", nil } @@ -125,7 +125,7 @@ func PerApplicationSetup(fn func(*types.PerApplicationContext), parentContext *t for applicationIndex := 0; applicationIndex < parentContext.Opts.ApplicationsCount; applicationIndex++ { startupPause := computeStartupPause(applicationIndex, parentContext.Opts.StartupDelay, parentContext.Opts.StartupJitter) - logging.Logger.Info("Initiating per application thread %d-%d with pause %v", parentContext.ThreadIndex, applicationIndex, startupPause) + logging.Logger.Info("Initiating per application thread %d-%d with pause %v", parentContext.UserIndex, applicationIndex, startupPause) perApplicationCtx := &types.PerApplicationContext{ PerApplicationWG: perApplicationWG, @@ -153,7 +153,7 @@ func PerComponentSetup(fn func(*types.PerComponentContext), parentContext *types for componentIndex := 0; componentIndex < parentContext.ParentContext.Opts.ComponentsCount; componentIndex++ { startupPause := computeStartupPause(componentIndex, parentContext.ParentContext.Opts.StartupDelay, parentContext.ParentContext.Opts.StartupJitter) - logging.Logger.Info("Initiating per component thread %d-%d-%d with pause %s", parentContext.ParentContext.ThreadIndex, parentContext.ApplicationIndex, componentIndex, startupPause) + logging.Logger.Info("Initiating per component thread %d-%d-%d with pause %s", parentContext.ParentContext.UserIndex, parentContext.ApplicationIndex, componentIndex, startupPause) perComponentCtx := &types.PerComponentContext{ PerComponentWG: perComponentWG, diff --git a/tests/load-tests/pkg/logging/time_and_log.go b/tests/load-tests/pkg/logging/time_and_log.go index baa30dba7c..61a2b22356 100644 --- a/tests/load-tests/pkg/logging/time_and_log.go +++ b/tests/load-tests/pkg/logging/time_and_log.go @@ -179,16 +179,16 @@ func Measure(ctx interface{}, fn interface{}, params ...interface{}) (interface{ // Extract additional metadata about this function call from provided context. if casted, ok := ctx.(*types.PerUserContext); ok { - perUserId = casted.ThreadIndex + perUserId = casted.UserIndex repeatsCounter = casted.JourneyRepeatsCounter } if casted, ok := ctx.(*types.PerApplicationContext); ok { - perUserId = casted.ParentContext.ThreadIndex + perUserId = casted.ParentContext.UserIndex perAppId = casted.ApplicationIndex repeatsCounter = casted.ParentContext.JourneyRepeatsCounter } if casted, ok := ctx.(*types.PerComponentContext); ok { - perUserId = casted.ParentContext.ParentContext.ThreadIndex + perUserId = casted.ParentContext.ParentContext.UserIndex perAppId = casted.ParentContext.ApplicationIndex perCompId = casted.ComponentIndex repeatsCounter = casted.ParentContext.ParentContext.JourneyRepeatsCounter diff --git a/tests/load-tests/pkg/types/types.go b/tests/load-tests/pkg/types/types.go index 759cf0f62d..08d98537e6 100644 --- a/tests/load-tests/pkg/types/types.go +++ b/tests/load-tests/pkg/types/types.go @@ -9,8 +9,8 @@ import options "github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/options" // Struct to hold user journey thread data type PerUserContext struct { - ThreadsWG *sync.WaitGroup - ThreadIndex int + PerUserWG *sync.WaitGroup + UserIndex int StartupPause time.Duration JourneyRepeatsCounter int Opts *options.Opts From b47b80324ec0487dabae5a3913fdf51a27b18b2b Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 1 Oct 2025 09:55:53 +0200 Subject: [PATCH 300/321] refactor: Determine application name in handle function as for other entities --- .../pkg/journey/handle_applications.go | 25 +++++++++++++------ tests/load-tests/pkg/journey/journey.go | 3 --- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/tests/load-tests/pkg/journey/handle_applications.go b/tests/load-tests/pkg/journey/handle_applications.go index 7455dbe5f4..4040820842 100644 --- a/tests/load-tests/pkg/journey/handle_applications.go +++ b/tests/load-tests/pkg/journey/handle_applications.go @@ -9,12 +9,15 @@ import types "github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/types" import framework "github.com/konflux-ci/e2e-tests/pkg/framework" import utils "github.com/konflux-ci/e2e-tests/pkg/utils" -func createApplication(f *framework.Framework, namespace string, timeout time.Duration, name string) error { - _, err := f.AsKubeDeveloper.HasController.CreateApplicationWithTimeout(name, namespace, timeout) +import util "github.com/devfile/library/v2/pkg/util" + +func createApplication(f *framework.Framework, namespace string, runPrefix string) (string, error) { + name := fmt.Sprintf("%s-app-%s", runPrefix, util.GenerateRandomString(5)) + _, err := f.AsKubeDeveloper.HasController.CreateApplicationWithTimeout(name, namespace, time.Minute*60) if err != nil { - return fmt.Errorf("Unable to create the Application %s: %v", name, err) + return "", fmt.Errorf("Unable to create the Application %s: %v", name, err) } - return nil + return name, nil } func validateApplication(f *framework.Framework, name, namespace string) error { @@ -36,22 +39,28 @@ func validateApplication(f *framework.Framework, name, namespace string) error { } func HandleApplication(ctx *types.PerApplicationContext) error { + var iface interface{} var err error + var ok bool logging.Logger.Debug("Creating application %s in namespace %s", ctx.ApplicationName, ctx.ParentContext.Namespace) - _, err = logging.Measure( + iface, err = logging.Measure( ctx, createApplication, ctx.Framework, ctx.ParentContext.Namespace, - time.Minute*60, - ctx.ApplicationName, + ctx.ParentContext.Opts.RunPrefix, ) if err != nil { return logging.Logger.Fail(30, "Application failed creation: %v", err) } + ctx.ApplicationName, ok = iface.(string) + if !ok { + return logging.Logger.Fail(31, "Type assertion failed on application name: %+v", iface) + } + _, err = logging.Measure( ctx, validateApplication, @@ -60,7 +69,7 @@ func HandleApplication(ctx *types.PerApplicationContext) error { ctx.ParentContext.Namespace, ) if err != nil { - return logging.Logger.Fail(31, "Application failed validation: %v", err) + return logging.Logger.Fail(32, "Application failed validation: %v", err) } return nil diff --git a/tests/load-tests/pkg/journey/journey.go b/tests/load-tests/pkg/journey/journey.go index 44fdef537a..787e34c828 100644 --- a/tests/load-tests/pkg/journey/journey.go +++ b/tests/load-tests/pkg/journey/journey.go @@ -10,8 +10,6 @@ import logging "github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/logging" import loadtestutils "github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/loadtestutils" import types "github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/types" -import util "github.com/devfile/library/v2/pkg/util" - // Pointers to all user journey thread contexts var PerUserContexts []*types.PerUserContext @@ -132,7 +130,6 @@ func PerApplicationSetup(fn func(*types.PerApplicationContext), parentContext *t ApplicationIndex: applicationIndex, StartupPause: startupPause, ParentContext: parentContext, - ApplicationName: fmt.Sprintf("%s-app-%s", parentContext.Opts.RunPrefix, util.GenerateRandomString(5)), } parentContext.PerApplicationContexts = append(parentContext.PerApplicationContexts, perApplicationCtx) From 46669fbde26b795b55c03cfb775a88d158163b4e Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 1 Oct 2025 10:06:04 +0200 Subject: [PATCH 301/321] refactor: Generate RP and RPA names in functions and store them in context --- tests/load-tests/pkg/journey/handle_collections.go | 6 ++---- .../load-tests/pkg/journey/handle_releases_setup.go | 12 +++++------- tests/load-tests/pkg/types/types.go | 2 ++ 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/tests/load-tests/pkg/journey/handle_collections.go b/tests/load-tests/pkg/journey/handle_collections.go index 375ae83515..79d1f287d3 100644 --- a/tests/load-tests/pkg/journey/handle_collections.go +++ b/tests/load-tests/pkg/journey/handle_collections.go @@ -194,9 +194,8 @@ func collectComponentJSONs(f *framework.Framework, dirPath, namespace, component return nil } -func collectReleaseRelatedJSONs(f *framework.Framework, dirPath, namespace, appName, compName, snapName, relName string) error { +func collectReleaseRelatedJSONs(f *framework.Framework, dirPath, namespace, appName, compName, snapName, releasePlanName, releasePlanAdmissionName, relName string) error { // Collect ReleasePlan JSON - releasePlanName := appName + "-rp" releasePlan, err := f.AsKubeDeveloper.ReleaseController.GetReleasePlan(releasePlanName, namespace) if err != nil { if !k8s_api_errors.IsNotFound(err) { @@ -217,7 +216,6 @@ func collectReleaseRelatedJSONs(f *framework.Framework, dirPath, namespace, appN } // Collect ReleasePlanAdmission JSON - releasePlanAdmissionName := appName + "-rpa" releasePlanAdmission, err := f.AsKubeDeveloper.ReleaseController.GetReleasePlanAdmission(releasePlanAdmissionName, namespace) if err != nil { if !k8s_api_errors.IsNotFound(err) { @@ -342,7 +340,7 @@ func HandlePerComponentCollection(ctx *types.PerComponentContext) error { return logging.Logger.Fail(103, "Failed to collect component JSONs: %v", err) } - err = collectReleaseRelatedJSONs(ctx.Framework, dirPath, ctx.ParentContext.ParentContext.Namespace, ctx.ParentContext.ApplicationName, ctx.ComponentName, ctx.SnapshotName, ctx.ReleaseName) + err = collectReleaseRelatedJSONs(ctx.Framework, dirPath, ctx.ParentContext.ParentContext.Namespace, ctx.ParentContext.ApplicationName, ctx.ComponentName, ctx.SnapshotName, ctx.ParentContext.ReleasePlanName, ctx.ParentContext.ReleasePlanAdmissionName, ctx.ReleaseName) if err != nil { return logging.Logger.Fail(104, "Failed to collect release related JSONs: %v", err) } diff --git a/tests/load-tests/pkg/journey/handle_releases_setup.go b/tests/load-tests/pkg/journey/handle_releases_setup.go index f312ed3014..dd48d9d82f 100644 --- a/tests/load-tests/pkg/journey/handle_releases_setup.go +++ b/tests/load-tests/pkg/journey/handle_releases_setup.go @@ -135,8 +135,6 @@ func HandleReleaseSetup(ctx *types.PerApplicationContext) error { return nil } - var releasePlanName string - var releasePlanAdmissionName string var iface interface{} var ok bool var err error @@ -152,7 +150,7 @@ func HandleReleaseSetup(ctx *types.PerApplicationContext) error { return logging.Logger.Fail(91, "Release Plan failed creation: %v", err) } - releasePlanName, ok = iface.(string) + ctx.ReleasePlanName, ok = iface.(string) if !ok { return logging.Logger.Fail(92, "Type assertion failed on release plan name: %+v", iface) } @@ -173,7 +171,7 @@ func HandleReleaseSetup(ctx *types.PerApplicationContext) error { return logging.Logger.Fail(93, "Release Plan Admission failed creation: %v", err) } - releasePlanAdmissionName, ok = iface.(string) + ctx.ReleasePlanAdmissionName, ok = iface.(string) if !ok { return logging.Logger.Fail(94, "Type assertion failed on release plan admission name: %+v", iface) } @@ -183,7 +181,7 @@ func HandleReleaseSetup(ctx *types.PerApplicationContext) error { validateReleasePlan, ctx.Framework, ctx.ParentContext.Namespace, - releasePlanName, + ctx.ReleasePlanName, ) if err != nil { return logging.Logger.Fail(95, "Release Plan failed validation: %v", err) @@ -194,14 +192,14 @@ func HandleReleaseSetup(ctx *types.PerApplicationContext) error { validateReleasePlanAdmission, ctx.Framework, ctx.ParentContext.Namespace, - releasePlanAdmissionName, + ctx.ReleasePlanAdmissionName, ) if err != nil { return logging.Logger.Fail(96, "Release Plan Admission failed validation: %v", err) } - logging.Logger.Info("Configured release %s & %s for application %s in namespace %s", releasePlanName, releasePlanAdmissionName, ctx.ApplicationName, ctx.ParentContext.Namespace) + logging.Logger.Info("Configured release %s & %s for application %s in namespace %s", ctx.ReleasePlanName, ctx.ReleasePlanAdmissionName, ctx.ApplicationName, ctx.ParentContext.Namespace) return nil } diff --git a/tests/load-tests/pkg/types/types.go b/tests/load-tests/pkg/types/types.go index 08d98537e6..f8cee9e1e1 100644 --- a/tests/load-tests/pkg/types/types.go +++ b/tests/load-tests/pkg/types/types.go @@ -31,6 +31,8 @@ type PerApplicationContext struct { ParentContext *PerUserContext ApplicationName string IntegrationTestScenarioName string + ReleasePlanName string + ReleasePlanAdmissionName string PerComponentContexts []*PerComponentContext } From 2036379c32006565adbb25372a5898b8550db3dc Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 1 Oct 2025 11:10:54 +0200 Subject: [PATCH 302/321] feat: Add DoHarmlessCommit function Adds a new function DoHarmlessCommit to pkg/journey/handle_repo_templating.go. This function creates or updates a file named 'just-trigger-build' with the current date and time and commits it. It is designed to work with both GitHub and GitLab repositories. Generated-by: Gemini --- .../pkg/journey/handle_repo_templating.go | 69 +++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/tests/load-tests/pkg/journey/handle_repo_templating.go b/tests/load-tests/pkg/journey/handle_repo_templating.go index 92e3d5c01d..5189e38bc4 100644 --- a/tests/load-tests/pkg/journey/handle_repo_templating.go +++ b/tests/load-tests/pkg/journey/handle_repo_templating.go @@ -231,6 +231,75 @@ func templateFiles(f *framework.Framework, repoUrl, repoRevision, sourceRepo, so return shaMap, nil } +// DoHarmlessCommit creates or updates file "just-trigger-build" with current timestamp and commits it +func DoHarmlessCommit(f *framework.Framework, repoUrl, repoRevision string) (string, error) { + fileName := "just-trigger-build" + var fileContent string + var sha *string + var commitSha string + + repoName, err := getRepoNameFromRepoUrl(repoUrl) + if err != nil { + return "", err + } + repoOrgName, err := getRepoOrgFromRepoUrl(repoUrl) + if err != nil { + return "", err + } + + if strings.Contains(repoUrl, "gitlab.") { + // For gitlab, we can get file content. If it fails, we assume it doesn't exist. + // The UpdateFile API for gitlab creates the file if it doesn't exist. + existingContent, err := f.AsKubeAdmin.CommonController.Gitlab.GetFile(repoOrgName+"/"+repoName, fileName, repoRevision) + if err != nil { + logging.Logger.Debug("Failed to get file %s from repo %s, assuming it does not exist: %v", fileName, repoUrl, err) + fileContent = "" + } else { + fileContent = existingContent + } + fileContent += fmt.Sprintf("\n# %s", time.Now().String()) + + commitSha, err = f.AsKubeAdmin.CommonController.Gitlab.UpdateFile(repoOrgName+"/"+repoName, fileName, fileContent, repoRevision) + if err != nil { + return "", fmt.Errorf("Failed to update file %s in repo %s revision %s: %v", fileName, repoOrgName+"/"+repoName, repoRevision, err) + } + } else { + // For github, we need to get SHA if file exists. + fileResponse, err := f.AsKubeAdmin.CommonController.Github.GetFile(repoName, fileName, repoRevision) + if err != nil { + // Assuming error means not found. + logging.Logger.Debug("File %s not found in repo %s, will create it.", fileName, repoUrl) + fileContent = "" + sha = nil + } else { + existingContent, err := fileResponse.GetContent() + if err != nil { + return "", err + } + fileContent = existingContent + sha = fileResponse.SHA + } + + fileContent += fmt.Sprintf("\n# %s", time.Now().String()) + + if sha == nil { + // We have to assume a CreateFile function exists in the framework's github controller + repoContentResponse, err := f.AsKubeAdmin.CommonController.Github.CreateFile(repoName, fileName, fileContent, repoRevision) + if err != nil { + return "", fmt.Errorf("Failed to create file %s in repo %s: %v", fileName, repoUrl, err) + } + commitSha = *repoContentResponse.Commit.SHA + } else { + repoContentResponse, err := f.AsKubeAdmin.CommonController.Github.UpdateFile(repoName, fileName, fileContent, repoRevision, *sha) + if err != nil { + return "", fmt.Errorf("Failed to update file %s in repo %s: %v", fileName, repoUrl, err) + } + commitSha = *repoContentResponse.Commit.SHA + } + } + return commitSha, nil +} + func HandleRepoForking(ctx *types.PerUserContext) error { var suffix string if ctx.Opts.Stage { From 7211f847715236d8a3de0aeb4b2a9b3817795554 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 1 Oct 2025 09:49:22 +0200 Subject: [PATCH 303/321] feat(KONFLUX-10328): Add options to reuse apps and components This is needed because component onboarding is very slow and if we want to avoid it and put significant load on the cluster, we need to create (and onboard) the components in advance. Still need to update collection code to make it work. --- tests/load-tests/loadtest.go | 2 ++ .../pkg/journey/handle_applications.go | 5 ++++ .../pkg/journey/handle_component.go | 28 +++++++++++++------ .../handle_integration_test_scenarios.go | 7 ++++- .../pkg/journey/handle_releases_setup.go | 8 ++++++ .../pkg/journey/handle_repo_templating.go | 4 +-- tests/load-tests/pkg/journey/journey.go | 25 ++++++++++++++--- tests/load-tests/pkg/options/options.go | 12 ++++++++ tests/load-tests/pkg/types/types.go | 17 ++++++----- 9 files changed, 83 insertions(+), 25 deletions(-) diff --git a/tests/load-tests/loadtest.go b/tests/load-tests/loadtest.go index 3f870d9408..3ea0566c80 100644 --- a/tests/load-tests/loadtest.go +++ b/tests/load-tests/loadtest.go @@ -63,6 +63,8 @@ func init() { rootCmd.Flags().IntVarP(&opts.Concurrency, "concurrency", "c", 1, "number of concurrent threads to execute") rootCmd.Flags().IntVar(&opts.JourneyRepeats, "journey-repeats", 1, "number of times to repeat user journey (either this or --journey-duration)") rootCmd.Flags().StringVar(&opts.JourneyDuration, "journey-duration", "1h", "repeat user journey until this timeout (either this or --journey-repeats)") + rootCmd.Flags().BoolVar(&opts.JourneyReuseApplications, "journey-reuse-applications", false, "when repeating journey, do not create new application (and integration test scenario and release plan and repease plan admission) on every journey repeat") + rootCmd.Flags().BoolVar(&opts.JourneyReuseComponents, "journey-reuse-componets", false, "when repeating journey, do not create new component on every journey repeat; this implies --journey-reuse-applications") rootCmd.Flags().BoolVar(&opts.PipelineMintmakerDisabled, "pipeline-mintmaker-disabled", true, "if you want to stop Mintmaker to be creating update PRs for your component (default in loadtest different from Konflux default)") rootCmd.Flags().BoolVar(&opts.PipelineRepoTemplating, "pipeline-repo-templating", false, "if we should use in repo template pipelines (merge PaC PR, template repo pipelines and ignore custom pipeline run, e.g. required for multi arch test)") rootCmd.Flags().StringVar(&opts.PipelineRepoTemplatingSource, "pipeline-repo-templating-source", "", "when templating, take template source files from this repository (\"\" means we will get source files from current repo)") diff --git a/tests/load-tests/pkg/journey/handle_applications.go b/tests/load-tests/pkg/journey/handle_applications.go index 4040820842..ef5bbed4ee 100644 --- a/tests/load-tests/pkg/journey/handle_applications.go +++ b/tests/load-tests/pkg/journey/handle_applications.go @@ -39,6 +39,11 @@ func validateApplication(f *framework.Framework, name, namespace string) error { } func HandleApplication(ctx *types.PerApplicationContext) error { + if ctx.ApplicationName != "" { + logging.Logger.Debug("Skipping application creation because reusing application %s in namespace %s", ctx.ApplicationName, ctx.ParentContext.Namespace) + return nil + } + var iface interface{} var err error var ok bool diff --git a/tests/load-tests/pkg/journey/handle_component.go b/tests/load-tests/pkg/journey/handle_component.go index 3342ccb12d..52158fd04f 100644 --- a/tests/load-tests/pkg/journey/handle_component.go +++ b/tests/load-tests/pkg/journey/handle_component.go @@ -318,13 +318,23 @@ func utilityRepoTemplatingComponentCleanup(f *framework.Framework, namespace, ap } func HandleComponent(ctx *types.PerComponentContext) error { - var err error + if ctx.ComponentName != "" { + logging.Logger.Debug("Skipping setting up component because reusing component %s in namespace %s, triggering build with push to the repo", ctx.ComponentName, ctx.ParentContext.ParentContext.Namespace) + _, err := doHarmlessCommit(ctx.Framework, ctx.ParentContext.ParentContext.Opts.ComponentRepoUrl, ctx.ParentContext.ParentContext.Opts.ComponentRepoRevision) + if err != nil { + return logging.Logger.Fail(60, "Commiting to repo for reused component %s in namespace %s failed: %v", ctx.ComponentName, ctx.ParentContext.ParentContext.Namespace, err) + } + return nil + } if ctx.ParentContext.ParentContext.Opts.SerializeComponentOnboarding { logging.Logger.Debug("Waiting to create component %s in namespace %s", ctx.ComponentName, ctx.ParentContext.ParentContext.Namespace) ctx.ParentContext.ParentContext.Opts.SerializeComponentOnboardingLock.Lock() } + var err error + var mergeRequestNumber int + logging.Logger.Debug("Creating component %s in namespace %s", ctx.ComponentName, ctx.ParentContext.ParentContext.Namespace) // Create component @@ -343,7 +353,7 @@ func HandleComponent(ctx *types.PerComponentContext) error { ctx.ParentContext.ParentContext.Opts.PipelineMintmakerDisabled, ) if err != nil { - return logging.Logger.Fail(60, "Component failed creation: %v", err) + return logging.Logger.Fail(61, "Component failed creation: %v", err) } // Validate component build service account created @@ -355,7 +365,7 @@ func HandleComponent(ctx *types.PerComponentContext) error { ctx.ComponentName, ) if err != nil { - return logging.Logger.Fail(65, "Component failed onboarding: %v", err) + return logging.Logger.Fail(62, "Component failed onboarding: %v", err) } if ctx.ParentContext.ParentContext.Opts.SerializeComponentOnboarding { @@ -374,7 +384,7 @@ func HandleComponent(ctx *types.PerComponentContext) error { ctx.ParentContext.ParentContext.Opts.PipelineImagePullSecrets, ) if err != nil { - return logging.Logger.Fail(61, "Failed to configure pipeline imagePullSecrets: %v", err) + return logging.Logger.Fail(63, "Failed to configure pipeline imagePullSecrets: %v", err) } } @@ -387,14 +397,14 @@ func HandleComponent(ctx *types.PerComponentContext) error { ctx.ComponentName, ) if err != nil { - return logging.Logger.Fail(62, "Component failed validation: %v", err) + return logging.Logger.Fail(64, "Component failed validation: %v", err) } // Get merge request number var ok bool - ctx.MergeRequestNumber, ok = pullIface.(int) + mergeRequestNumber, ok = pullIface.(int) if !ok { - return logging.Logger.Fail(63, "Type assertion failed on pull: %+v", pullIface) + return logging.Logger.Fail(65, "Type assertion failed on pull: %+v", pullIface) } // If this is supposed to be a multi-arch build, we do not care about @@ -423,11 +433,11 @@ func HandleComponent(ctx *types.PerComponentContext) error { ctx.ParentContext.ParentContext.Opts.ComponentRepoRevision, ctx.ParentContext.ParentContext.Opts.PipelineRepoTemplatingSource, ctx.ParentContext.ParentContext.Opts.PipelineRepoTemplatingSourceDir, - ctx.MergeRequestNumber, + mergeRequestNumber, placeholders, ) if err != nil { - return logging.Logger.Fail(64, "Repo-templating workflow component cleanup failed: %v", err) + return logging.Logger.Fail(66, "Repo-templating workflow component cleanup failed: %v", err) } } diff --git a/tests/load-tests/pkg/journey/handle_integration_test_scenarios.go b/tests/load-tests/pkg/journey/handle_integration_test_scenarios.go index c77a8dd1d1..28783c6bde 100644 --- a/tests/load-tests/pkg/journey/handle_integration_test_scenarios.go +++ b/tests/load-tests/pkg/journey/handle_integration_test_scenarios.go @@ -36,8 +36,13 @@ func createIntegrationTestScenario(f *framework.Framework, namespace, appName, s } func HandleIntegrationTestScenario(ctx *types.PerApplicationContext) error { + if ctx.IntegrationTestScenarioName != "" { + logging.Logger.Debug("Skipping integration test scenario creation because reusing integration test scenario %s in namespace %s", ctx.IntegrationTestScenarioName, ctx.ParentContext.Namespace) + return nil + } + if ctx.ParentContext.Opts.TestScenarioGitURL == "" { - logging.Logger.Debug("Integration Test Scenario GIT not provided, not creating it") + logging.Logger.Debug("Skipping integration test scenario creation because GIT was not provided") return nil } diff --git a/tests/load-tests/pkg/journey/handle_releases_setup.go b/tests/load-tests/pkg/journey/handle_releases_setup.go index dd48d9d82f..c81ee0d177 100644 --- a/tests/load-tests/pkg/journey/handle_releases_setup.go +++ b/tests/load-tests/pkg/journey/handle_releases_setup.go @@ -130,6 +130,14 @@ func validateReleasePlanAdmission(f *framework.Framework, namespace, name string func HandleReleaseSetup(ctx *types.PerApplicationContext) error { + if ctx.ReleasePlanName != "" { + if ctx.ReleasePlanAdmissionName == "" { + return logging.Logger.Fail(90, "We are supposed to reuse RPA, but it was not configured") + } + logging.Logger.Debug("Skipping setting up releases because reusing release plan %s and release plan admission %s in namespace %s", ctx.ReleasePlanName, ctx.ReleasePlanAdmissionName, ctx.ParentContext.Namespace) + return nil + } + if ctx.ParentContext.Opts.ReleasePolicy == "" { logging.Logger.Info("Skipping setting up releases because policy was not provided") return nil diff --git a/tests/load-tests/pkg/journey/handle_repo_templating.go b/tests/load-tests/pkg/journey/handle_repo_templating.go index 5189e38bc4..64c7075170 100644 --- a/tests/load-tests/pkg/journey/handle_repo_templating.go +++ b/tests/load-tests/pkg/journey/handle_repo_templating.go @@ -231,8 +231,8 @@ func templateFiles(f *framework.Framework, repoUrl, repoRevision, sourceRepo, so return shaMap, nil } -// DoHarmlessCommit creates or updates file "just-trigger-build" with current timestamp and commits it -func DoHarmlessCommit(f *framework.Framework, repoUrl, repoRevision string) (string, error) { +// doHarmlessCommit creates or updates file "just-trigger-build" with current timestamp and commits it +func doHarmlessCommit(f *framework.Framework, repoUrl, repoRevision string) (string, error) { fileName := "just-trigger-build" var fileContent string var sha *string diff --git a/tests/load-tests/pkg/journey/journey.go b/tests/load-tests/pkg/journey/journey.go index 787e34c828..ff9b010459 100644 --- a/tests/load-tests/pkg/journey/journey.go +++ b/tests/load-tests/pkg/journey/journey.go @@ -126,10 +126,22 @@ func PerApplicationSetup(fn func(*types.PerApplicationContext), parentContext *t logging.Logger.Info("Initiating per application thread %d-%d with pause %v", parentContext.UserIndex, applicationIndex, startupPause) perApplicationCtx := &types.PerApplicationContext{ - PerApplicationWG: perApplicationWG, - ApplicationIndex: applicationIndex, - StartupPause: startupPause, - ParentContext: parentContext, + PerApplicationWG: perApplicationWG, + ApplicationIndex: applicationIndex, + StartupPause: startupPause, + ParentContext: parentContext, + ApplicationName: "", + IntegrationTestScenarioName: "", + ReleasePlanName: "", + ReleasePlanAdmissionName: "", + } + + if parentContext.Opts.JourneyReuseApplications && applicationIndex != 0 { + perApplicationCtx.ApplicationName = parentContext.PerApplicationContexts[0].ApplicationName + perApplicationCtx.IntegrationTestScenarioName = parentContext.PerApplicationContexts[0].IntegrationTestScenarioName + perApplicationCtx.ReleasePlanName = parentContext.PerApplicationContexts[0].ReleasePlanName + perApplicationCtx.ReleasePlanAdmissionName = parentContext.PerApplicationContexts[0].ReleasePlanAdmissionName + logging.Logger.Debug("Reusing application %s and others in thread %d-%d", perApplicationCtx.ApplicationName, parentContext.UserIndex, applicationIndex) } parentContext.PerApplicationContexts = append(parentContext.PerApplicationContexts, perApplicationCtx) @@ -160,6 +172,11 @@ func PerComponentSetup(fn func(*types.PerComponentContext), parentContext *types ComponentName: fmt.Sprintf("%s-comp-%d", parentContext.ApplicationName, componentIndex), } + if parentContext.ParentContext.Opts.JourneyReuseComponents && componentIndex != 0 { + perComponentCtx.ComponentName = parentContext.PerComponentContexts[0].ComponentName + logging.Logger.Debug("Reusing component %s in thread %d-%d-%d", perComponentCtx.ComponentName, parentContext.ParentContext.UserIndex, parentContext.ApplicationIndex, componentIndex) + } + parentContext.PerComponentContexts = append(parentContext.PerComponentContexts, perComponentCtx) go fn(perComponentCtx) diff --git a/tests/load-tests/pkg/options/options.go b/tests/load-tests/pkg/options/options.go index a27292ac4b..52c0a94499 100644 --- a/tests/load-tests/pkg/options/options.go +++ b/tests/load-tests/pkg/options/options.go @@ -22,6 +22,8 @@ type Opts struct { JourneyDuration string JourneyRepeats int JourneyUntil time.Time + JourneyReuseApplications bool + JourneyReuseComponents bool LogDebug bool LogInfo bool LogTrace bool @@ -96,6 +98,16 @@ func (o *Opts) ProcessOptions() error { } } + // If we are supposed to reuse components on additional journeys, we have to reuse applications + if o.JourneyRepeats > 1 { + if o.JourneyReuseComponents { + if ! o.JourneyReuseApplications { + fmt.Print("Warning: We are supposed to reuse components so will reuse applications as well\n") + o.JourneyReuseApplications = true + } + } + } + // Convert options struct to pretty JSON jsonOptions, err2 := json.MarshalIndent(o, "", " ") if err2 != nil { diff --git a/tests/load-tests/pkg/types/types.go b/tests/load-tests/pkg/types/types.go index f8cee9e1e1..dbc936c2f1 100644 --- a/tests/load-tests/pkg/types/types.go +++ b/tests/load-tests/pkg/types/types.go @@ -38,13 +38,12 @@ type PerApplicationContext struct { // Struct to hold data for thread to process each component type PerComponentContext struct { - PerComponentWG *sync.WaitGroup - ComponentIndex int - StartupPause time.Duration - Framework *framework.Framework - ParentContext *PerApplicationContext - ComponentName string - SnapshotName string - MergeRequestNumber int - ReleaseName string + PerComponentWG *sync.WaitGroup + ComponentIndex int + StartupPause time.Duration + Framework *framework.Framework + ParentContext *PerApplicationContext + ComponentName string + SnapshotName string + ReleaseName string } From dfb554eced0518f602a031c3a021835ac21ea131 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 1 Oct 2025 11:37:41 +0200 Subject: [PATCH 304/321] refactor: Move component name creation to handle function --- .../pkg/journey/handle_component.go | 35 +++++++++++-------- tests/load-tests/pkg/journey/journey.go | 3 +- 2 files changed, 22 insertions(+), 16 deletions(-) diff --git a/tests/load-tests/pkg/journey/handle_component.go b/tests/load-tests/pkg/journey/handle_component.go index 52158fd04f..2cf07d4814 100644 --- a/tests/load-tests/pkg/journey/handle_component.go +++ b/tests/load-tests/pkg/journey/handle_component.go @@ -84,7 +84,11 @@ func getPaCPull(annotations map[string]string) (string, error) { } } -func createComponent(f *framework.Framework, namespace, name, repoUrl, repoRevision, containerContext, containerFile, buildPipelineSelector, appName string, mintmakerDisabled bool) error { +func createComponent(f *framework.Framework, namespace, repoUrl, repoRevision, containerContext, containerFile, buildPipelineSelector, appName string, componentIndex int, mintmakerDisabled bool) error { + name := fmt.Sprintf("%s-comp-%d", appName, componentIndex) + + logging.Logger.Debug("Creating component %s in namespace %s", name, namespace) + // Prepare annotations to add to component annotationsMap := constants.DefaultDockerBuildPipelineBundleAnnotation if buildPipelineSelector != "" { @@ -332,30 +336,35 @@ func HandleComponent(ctx *types.PerComponentContext) error { ctx.ParentContext.ParentContext.Opts.SerializeComponentOnboardingLock.Lock() } + var iface interface{} + var ok bool var err error var mergeRequestNumber int - logging.Logger.Debug("Creating component %s in namespace %s", ctx.ComponentName, ctx.ParentContext.ParentContext.Namespace) - // Create component - _, err = logging.Measure( + iface, err = logging.Measure( ctx, createComponent, ctx.Framework, ctx.ParentContext.ParentContext.Namespace, - ctx.ComponentName, ctx.ParentContext.ParentContext.ComponentRepoUrl, ctx.ParentContext.ParentContext.Opts.ComponentRepoRevision, ctx.ParentContext.ParentContext.Opts.ComponentContainerContext, ctx.ParentContext.ParentContext.Opts.ComponentContainerFile, ctx.ParentContext.ParentContext.Opts.BuildPipelineSelectorBundle, ctx.ParentContext.ApplicationName, + ctx.ComponentIndex, ctx.ParentContext.ParentContext.Opts.PipelineMintmakerDisabled, ) if err != nil { return logging.Logger.Fail(61, "Component failed creation: %v", err) } + ctx.ComponentName, ok = iface.(string) + if !ok { + return logging.Logger.Fail(62, "Type assertion failed on component name: %+v", iface) + } + // Validate component build service account created _, err = logging.Measure( ctx, @@ -365,7 +374,7 @@ func HandleComponent(ctx *types.PerComponentContext) error { ctx.ComponentName, ) if err != nil { - return logging.Logger.Fail(62, "Component failed onboarding: %v", err) + return logging.Logger.Fail(63, "Component failed onboarding: %v", err) } if ctx.ParentContext.ParentContext.Opts.SerializeComponentOnboarding { @@ -384,12 +393,11 @@ func HandleComponent(ctx *types.PerComponentContext) error { ctx.ParentContext.ParentContext.Opts.PipelineImagePullSecrets, ) if err != nil { - return logging.Logger.Fail(63, "Failed to configure pipeline imagePullSecrets: %v", err) + return logging.Logger.Fail(64, "Failed to configure pipeline imagePullSecrets: %v", err) } } - var pullIface interface{} - pullIface, err = logging.Measure( + iface, err = logging.Measure( ctx, getPaCPullNumber, ctx.Framework, @@ -397,14 +405,13 @@ func HandleComponent(ctx *types.PerComponentContext) error { ctx.ComponentName, ) if err != nil { - return logging.Logger.Fail(64, "Component failed validation: %v", err) + return logging.Logger.Fail(65, "Component failed validation: %v", err) } // Get merge request number - var ok bool - mergeRequestNumber, ok = pullIface.(int) + mergeRequestNumber, ok = iface.(int) if !ok { - return logging.Logger.Fail(65, "Type assertion failed on pull: %+v", pullIface) + return logging.Logger.Fail(66, "Type assertion failed on pull: %+v", iface) } // If this is supposed to be a multi-arch build, we do not care about @@ -437,7 +444,7 @@ func HandleComponent(ctx *types.PerComponentContext) error { placeholders, ) if err != nil { - return logging.Logger.Fail(66, "Repo-templating workflow component cleanup failed: %v", err) + return logging.Logger.Fail(67, "Repo-templating workflow component cleanup failed: %v", err) } } diff --git a/tests/load-tests/pkg/journey/journey.go b/tests/load-tests/pkg/journey/journey.go index ff9b010459..94e624d2b1 100644 --- a/tests/load-tests/pkg/journey/journey.go +++ b/tests/load-tests/pkg/journey/journey.go @@ -1,6 +1,5 @@ package journey -import "fmt" import "sync" import "time" import "math/rand" @@ -169,7 +168,7 @@ func PerComponentSetup(fn func(*types.PerComponentContext), parentContext *types ComponentIndex: componentIndex, StartupPause: startupPause, ParentContext: parentContext, - ComponentName: fmt.Sprintf("%s-comp-%d", parentContext.ApplicationName, componentIndex), + ComponentName: "", } if parentContext.ParentContext.Opts.JourneyReuseComponents && componentIndex != 0 { From 27ab1258a17ae43f7a1d83ea3c6a9f3e47740c56 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 1 Oct 2025 11:46:35 +0200 Subject: [PATCH 305/321] fix: At this point we might not have a component name yet --- tests/load-tests/pkg/journey/handle_component.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/load-tests/pkg/journey/handle_component.go b/tests/load-tests/pkg/journey/handle_component.go index 2cf07d4814..fd8d2ddbb3 100644 --- a/tests/load-tests/pkg/journey/handle_component.go +++ b/tests/load-tests/pkg/journey/handle_component.go @@ -332,7 +332,7 @@ func HandleComponent(ctx *types.PerComponentContext) error { } if ctx.ParentContext.ParentContext.Opts.SerializeComponentOnboarding { - logging.Logger.Debug("Waiting to create component %s in namespace %s", ctx.ComponentName, ctx.ParentContext.ParentContext.Namespace) + logging.Logger.Debug("Waiting to create component in namespace %s", ctx.ParentContext.ParentContext.Namespace) ctx.ParentContext.ParentContext.Opts.SerializeComponentOnboardingLock.Lock() } From 34b4e96fd9087e75b0c3be16bededea996de5d55 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 1 Oct 2025 12:09:06 +0200 Subject: [PATCH 306/321] fix: Make this function to actually return component name --- tests/load-tests/pkg/journey/handle_component.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/load-tests/pkg/journey/handle_component.go b/tests/load-tests/pkg/journey/handle_component.go index fd8d2ddbb3..aefc070a6a 100644 --- a/tests/load-tests/pkg/journey/handle_component.go +++ b/tests/load-tests/pkg/journey/handle_component.go @@ -84,7 +84,7 @@ func getPaCPull(annotations map[string]string) (string, error) { } } -func createComponent(f *framework.Framework, namespace, repoUrl, repoRevision, containerContext, containerFile, buildPipelineSelector, appName string, componentIndex int, mintmakerDisabled bool) error { +func createComponent(f *framework.Framework, namespace, repoUrl, repoRevision, containerContext, containerFile, buildPipelineSelector, appName string, componentIndex int, mintmakerDisabled bool) (string, error) { name := fmt.Sprintf("%s-comp-%d", appName, componentIndex) logging.Logger.Debug("Creating component %s in namespace %s", name, namespace) @@ -123,9 +123,9 @@ func createComponent(f *framework.Framework, namespace, repoUrl, repoRevision, c _, err := f.AsKubeDeveloper.HasController.CreateComponent(componentObj, namespace, "", "", appName, false, annotationsMap) if err != nil { - return fmt.Errorf("Unable to create the Component %s: %v", name, err) + return "", fmt.Errorf("Unable to create the Component %s: %v", name, err) } - return nil + return name, nil } func validateComponent(f *framework.Framework, namespace, name string) error { From fb15e5a56db1f62ef1ce7d11ffc6b5395961bfa4 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 1 Oct 2025 12:10:02 +0200 Subject: [PATCH 307/321] fix: Do not attempt to collect RPs and RPAs when they were not created --- .../pkg/journey/handle_collections.go | 56 ++++++++++--------- 1 file changed, 30 insertions(+), 26 deletions(-) diff --git a/tests/load-tests/pkg/journey/handle_collections.go b/tests/load-tests/pkg/journey/handle_collections.go index 79d1f287d3..2ca3e4b95e 100644 --- a/tests/load-tests/pkg/journey/handle_collections.go +++ b/tests/load-tests/pkg/journey/handle_collections.go @@ -196,42 +196,46 @@ func collectComponentJSONs(f *framework.Framework, dirPath, namespace, component func collectReleaseRelatedJSONs(f *framework.Framework, dirPath, namespace, appName, compName, snapName, releasePlanName, releasePlanAdmissionName, relName string) error { // Collect ReleasePlan JSON - releasePlan, err := f.AsKubeDeveloper.ReleaseController.GetReleasePlan(releasePlanName, namespace) - if err != nil { - if !k8s_api_errors.IsNotFound(err) { - return fmt.Errorf("Failed to get Release Plan %s: %v", releasePlanName, err) - } - } - - if err == nil { - releasePlanJSON, err := json.Marshal(releasePlan) + if releasePlanName != "" { + releasePlan, err := f.AsKubeDeveloper.ReleaseController.GetReleasePlan(releasePlanName, namespace) if err != nil { - return fmt.Errorf("Failed to dump Release Plan JSON: %v", err) + if !k8s_api_errors.IsNotFound(err) { + return fmt.Errorf("Failed to get Release Plan %s: %v", releasePlanName, err) + } } - err = writeToFile(dirPath, "collected-releaseplan-" + releasePlanName + ".json", releasePlanJSON) - if err != nil { - return fmt.Errorf("Failed to write Release Plan: %v", err) - } - } + if err == nil { + releasePlanJSON, err := json.Marshal(releasePlan) + if err != nil { + return fmt.Errorf("Failed to dump Release Plan JSON: %v", err) + } - // Collect ReleasePlanAdmission JSON - releasePlanAdmission, err := f.AsKubeDeveloper.ReleaseController.GetReleasePlanAdmission(releasePlanAdmissionName, namespace) - if err != nil { - if !k8s_api_errors.IsNotFound(err) { - return fmt.Errorf("Failed to get Release Plan Admission %s: %v", releasePlanAdmissionName, err) + err = writeToFile(dirPath, "collected-releaseplan-" + releasePlanName + ".json", releasePlanJSON) + if err != nil { + return fmt.Errorf("Failed to write Release Plan: %v", err) + } } } - if err == nil { - releasePlanAdmissionJSON, err := json.Marshal(releasePlanAdmission) + // Collect ReleasePlanAdmission JSON + if releasePlanAdmissionName != "" { + releasePlanAdmission, err := f.AsKubeDeveloper.ReleaseController.GetReleasePlanAdmission(releasePlanAdmissionName, namespace) if err != nil { - return fmt.Errorf("Failed to dump Release Plan Admission JSON: %v", err) + if !k8s_api_errors.IsNotFound(err) { + return fmt.Errorf("Failed to get Release Plan Admission %s: %v", releasePlanAdmissionName, err) + } } - err = writeToFile(dirPath, "collected-releaseplanadmission-" + releasePlanAdmissionName + ".json", releasePlanAdmissionJSON) - if err != nil { - return fmt.Errorf("Failed to write Release Plan Admission: %v", err) + if err == nil { + releasePlanAdmissionJSON, err := json.Marshal(releasePlanAdmission) + if err != nil { + return fmt.Errorf("Failed to dump Release Plan Admission JSON: %v", err) + } + + err = writeToFile(dirPath, "collected-releaseplanadmission-" + releasePlanAdmissionName + ".json", releasePlanAdmissionJSON) + if err != nil { + return fmt.Errorf("Failed to write Release Plan Admission: %v", err) + } } } From 425170016e4ea86e51198fa5fdd399f9cc9d4f08 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 1 Oct 2025 13:58:38 +0200 Subject: [PATCH 308/321] fix: Resolve race condition in component and application setup When running user journeys with `JourneyReuseComponents` or `JourneyReuseApplications` enabled, a race condition would occur. The setup functions (`PerComponentSetup` and `PerApplicationSetup`) would attempt to copy the name from the first component or application to subsequent ones before the first one had been created, resulting in an empty name. This commit resolves the issue by moving the name-copying logic into the respective `HandleComponent` and `HandleApplication` functions. This ensures that subsequent components or applications wait for the first one to be fully initialized before attempting to reuse its name, using `utils.WaitUntilWithInterval` to poll for the name to become available. Generated-by: Gemini --- .../pkg/journey/handle_applications.go | 28 +++++++++++++++++++ .../pkg/journey/handle_component.go | 25 +++++++++++++++++ tests/load-tests/pkg/journey/journey.go | 13 --------- 3 files changed, 53 insertions(+), 13 deletions(-) diff --git a/tests/load-tests/pkg/journey/handle_applications.go b/tests/load-tests/pkg/journey/handle_applications.go index ef5bbed4ee..237124e976 100644 --- a/tests/load-tests/pkg/journey/handle_applications.go +++ b/tests/load-tests/pkg/journey/handle_applications.go @@ -39,6 +39,34 @@ func validateApplication(f *framework.Framework, name, namespace string) error { } func HandleApplication(ctx *types.PerApplicationContext) error { + if ctx.ParentContext.Opts.JourneyReuseApplications && ctx.ApplicationIndex != 0 { + // This is a reused application. We need to get the name from the first application. + // We must wait until the first application's context has the name. + firstApplicationCtx := ctx.ParentContext.PerApplicationContexts[0] + + interval := time.Second * 2 + timeout := time.Minute * 20 + + err := utils.WaitUntilWithInterval(func() (done bool, err error) { + if firstApplicationCtx.ApplicationName != "" { + logging.Logger.Debug("Reused application name is now available: %s", firstApplicationCtx.ApplicationName) + return true, nil + } + logging.Logger.Debug("Waiting for application name from first application thread...") + return false, nil + }, interval, timeout) + + if err != nil { + return logging.Logger.Fail(30, "timed out waiting for application name from first application thread: %v", err) + } + + ctx.ApplicationName = firstApplicationCtx.ApplicationName + ctx.IntegrationTestScenarioName = firstApplicationCtx.IntegrationTestScenarioName + ctx.ReleasePlanName = firstApplicationCtx.ReleasePlanName + ctx.ReleasePlanAdmissionName = firstApplicationCtx.ReleasePlanAdmissionName + logging.Logger.Debug("Reusing application %s and others in thread %d-%d", ctx.ApplicationName, ctx.ParentContext.UserIndex, ctx.ApplicationIndex) + } + if ctx.ApplicationName != "" { logging.Logger.Debug("Skipping application creation because reusing application %s in namespace %s", ctx.ApplicationName, ctx.ParentContext.Namespace) return nil diff --git a/tests/load-tests/pkg/journey/handle_component.go b/tests/load-tests/pkg/journey/handle_component.go index aefc070a6a..a2dd20d0e4 100644 --- a/tests/load-tests/pkg/journey/handle_component.go +++ b/tests/load-tests/pkg/journey/handle_component.go @@ -322,6 +322,31 @@ func utilityRepoTemplatingComponentCleanup(f *framework.Framework, namespace, ap } func HandleComponent(ctx *types.PerComponentContext) error { + if ctx.ParentContext.ParentContext.Opts.JourneyReuseComponents && ctx.ComponentIndex != 0 { + // This is a reused component. We need to get the name from the first component. + // We must wait until the first component's context has the name. + firstComponentCtx := ctx.ParentContext.PerComponentContexts[0] + + interval := time.Second * 2 + timeout := time.Minute * 20 + + err := utils.WaitUntilWithInterval(func() (done bool, err error) { + if firstComponentCtx.ComponentName != "" { + logging.Logger.Debug("Reused component name is now available: %s", firstComponentCtx.ComponentName) + return true, nil + } + logging.Logger.Debug("Waiting for component name from first component thread...") + return false, nil + }, interval, timeout) + + if err != nil { + return logging.Logger.Fail(60, "timed out waiting for component name from first component thread: %v", err) + } + + ctx.ComponentName = firstComponentCtx.ComponentName + logging.Logger.Debug("Reusing component %s in thread %d-%d-%d", ctx.ComponentName, ctx.ParentContext.ParentContext.UserIndex, ctx.ParentContext.ApplicationIndex, ctx.ComponentIndex) + } + if ctx.ComponentName != "" { logging.Logger.Debug("Skipping setting up component because reusing component %s in namespace %s, triggering build with push to the repo", ctx.ComponentName, ctx.ParentContext.ParentContext.Namespace) _, err := doHarmlessCommit(ctx.Framework, ctx.ParentContext.ParentContext.Opts.ComponentRepoUrl, ctx.ParentContext.ParentContext.Opts.ComponentRepoRevision) diff --git a/tests/load-tests/pkg/journey/journey.go b/tests/load-tests/pkg/journey/journey.go index 94e624d2b1..34afe7c292 100644 --- a/tests/load-tests/pkg/journey/journey.go +++ b/tests/load-tests/pkg/journey/journey.go @@ -135,14 +135,6 @@ func PerApplicationSetup(fn func(*types.PerApplicationContext), parentContext *t ReleasePlanAdmissionName: "", } - if parentContext.Opts.JourneyReuseApplications && applicationIndex != 0 { - perApplicationCtx.ApplicationName = parentContext.PerApplicationContexts[0].ApplicationName - perApplicationCtx.IntegrationTestScenarioName = parentContext.PerApplicationContexts[0].IntegrationTestScenarioName - perApplicationCtx.ReleasePlanName = parentContext.PerApplicationContexts[0].ReleasePlanName - perApplicationCtx.ReleasePlanAdmissionName = parentContext.PerApplicationContexts[0].ReleasePlanAdmissionName - logging.Logger.Debug("Reusing application %s and others in thread %d-%d", perApplicationCtx.ApplicationName, parentContext.UserIndex, applicationIndex) - } - parentContext.PerApplicationContexts = append(parentContext.PerApplicationContexts, perApplicationCtx) go fn(perApplicationCtx) @@ -171,11 +163,6 @@ func PerComponentSetup(fn func(*types.PerComponentContext), parentContext *types ComponentName: "", } - if parentContext.ParentContext.Opts.JourneyReuseComponents && componentIndex != 0 { - perComponentCtx.ComponentName = parentContext.PerComponentContexts[0].ComponentName - logging.Logger.Debug("Reusing component %s in thread %d-%d-%d", perComponentCtx.ComponentName, parentContext.ParentContext.UserIndex, parentContext.ApplicationIndex, componentIndex) - } - parentContext.PerComponentContexts = append(parentContext.PerComponentContexts, perComponentCtx) go fn(perComponentCtx) From 66745febff5875ef66a963dbd53f6409f70fb5ad Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Wed, 1 Oct 2025 14:53:49 +0200 Subject: [PATCH 309/321] feat: Ensure all needed data are available --- tests/load-tests/pkg/journey/handle_applications.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/load-tests/pkg/journey/handle_applications.go b/tests/load-tests/pkg/journey/handle_applications.go index 237124e976..134282fb69 100644 --- a/tests/load-tests/pkg/journey/handle_applications.go +++ b/tests/load-tests/pkg/journey/handle_applications.go @@ -48,7 +48,7 @@ func HandleApplication(ctx *types.PerApplicationContext) error { timeout := time.Minute * 20 err := utils.WaitUntilWithInterval(func() (done bool, err error) { - if firstApplicationCtx.ApplicationName != "" { + if firstApplicationCtx.ApplicationName != "" && firstApplicationCtx.IntegrationTestScenarioName != "" && (ctx.ParentContext.Opts.ReleasePolicy == "" || (firstApplicationCtx.ReleasePlanName != "" && firstApplicationCtx.ReleasePlanAdmissionName != "")) { logging.Logger.Debug("Reused application name is now available: %s", firstApplicationCtx.ApplicationName) return true, nil } From 91bdd91244d5bc4dca9831d5062f953d18d51ad5 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Thu, 2 Oct 2025 08:33:50 +0200 Subject: [PATCH 310/321] feat: Streamline tracking journey, have is staticcaly in app context and improve some log messages --- tests/load-tests/loadtest.go | 2 +- tests/load-tests/pkg/journey/handle_applications.go | 4 ++-- tests/load-tests/pkg/journey/handle_collections.go | 4 ++-- tests/load-tests/pkg/journey/handle_component.go | 4 ++-- tests/load-tests/pkg/journey/journey.go | 5 +++-- tests/load-tests/pkg/logging/time_and_log.go | 4 ++-- tests/load-tests/pkg/types/types.go | 1 + 7 files changed, 13 insertions(+), 11 deletions(-) diff --git a/tests/load-tests/loadtest.go b/tests/load-tests/loadtest.go index 3ea0566c80..3b297219ae 100644 --- a/tests/load-tests/loadtest.go +++ b/tests/load-tests/loadtest.go @@ -230,7 +230,7 @@ func perUserThread(perUserCtx *types.PerUserContext) { //watcher.Stop() //os.Exit(10) - for perUserCtx.JourneyRepeatsCounter = 1; perUserCtx.JourneyRepeatsCounter <= perUserCtx.Opts.JourneyRepeats; perUserCtx.JourneyRepeatsCounter++ { + for perUserCtx.JourneyRepeatsCounter = 0; perUserCtx.JourneyRepeatsCounter < perUserCtx.Opts.JourneyRepeats; perUserCtx.JourneyRepeatsCounter++ { // Start given number of `perApplicationThread()` threads using `journey.PerApplicationSetup()` and wait for them to finish _, err = logging.Measure( diff --git a/tests/load-tests/pkg/journey/handle_applications.go b/tests/load-tests/pkg/journey/handle_applications.go index 134282fb69..f606e4d705 100644 --- a/tests/load-tests/pkg/journey/handle_applications.go +++ b/tests/load-tests/pkg/journey/handle_applications.go @@ -39,7 +39,7 @@ func validateApplication(f *framework.Framework, name, namespace string) error { } func HandleApplication(ctx *types.PerApplicationContext) error { - if ctx.ParentContext.Opts.JourneyReuseApplications && ctx.ApplicationIndex != 0 { + if ctx.ParentContext.Opts.JourneyReuseApplications && ctx.JourneyRepeatIndex > 0 { // This is a reused application. We need to get the name from the first application. // We must wait until the first application's context has the name. firstApplicationCtx := ctx.ParentContext.PerApplicationContexts[0] @@ -52,7 +52,7 @@ func HandleApplication(ctx *types.PerApplicationContext) error { logging.Logger.Debug("Reused application name is now available: %s", firstApplicationCtx.ApplicationName) return true, nil } - logging.Logger.Debug("Waiting for application name from first application thread...") + logging.Logger.Trace("Waiting for application name from first application thread") return false, nil }, interval, timeout) diff --git a/tests/load-tests/pkg/journey/handle_collections.go b/tests/load-tests/pkg/journey/handle_collections.go index 2ca3e4b95e..874b56c470 100644 --- a/tests/load-tests/pkg/journey/handle_collections.go +++ b/tests/load-tests/pkg/journey/handle_collections.go @@ -294,7 +294,7 @@ func HandlePerApplicationCollection(ctx *types.PerApplicationContext) error { var err error - journeyCounterStr := fmt.Sprintf("%d", ctx.ParentContext.JourneyRepeatsCounter) + journeyCounterStr := fmt.Sprintf("%d", ctx.JourneyRepeatIndex) dirPath := getDirName(ctx.ParentContext.Opts.OutputDir, ctx.ParentContext.Namespace, journeyCounterStr) err = createDir(dirPath) if err != nil { @@ -322,7 +322,7 @@ func HandlePerComponentCollection(ctx *types.PerComponentContext) error { var err error - journeyCounterStr := fmt.Sprintf("%d", ctx.ParentContext.ParentContext.JourneyRepeatsCounter) + journeyCounterStr := fmt.Sprintf("%d", ctx.ParentContext.JourneyRepeatIndex) dirPath := getDirName(ctx.ParentContext.ParentContext.Opts.OutputDir, ctx.ParentContext.ParentContext.Namespace, journeyCounterStr) err = createDir(dirPath) if err != nil { diff --git a/tests/load-tests/pkg/journey/handle_component.go b/tests/load-tests/pkg/journey/handle_component.go index a2dd20d0e4..50237b8e09 100644 --- a/tests/load-tests/pkg/journey/handle_component.go +++ b/tests/load-tests/pkg/journey/handle_component.go @@ -322,7 +322,7 @@ func utilityRepoTemplatingComponentCleanup(f *framework.Framework, namespace, ap } func HandleComponent(ctx *types.PerComponentContext) error { - if ctx.ParentContext.ParentContext.Opts.JourneyReuseComponents && ctx.ComponentIndex != 0 { + if ctx.ParentContext.ParentContext.Opts.JourneyReuseComponents && ctx.ParentContext.JourneyRepeatIndex > 0 { // This is a reused component. We need to get the name from the first component. // We must wait until the first component's context has the name. firstComponentCtx := ctx.ParentContext.PerComponentContexts[0] @@ -335,7 +335,7 @@ func HandleComponent(ctx *types.PerComponentContext) error { logging.Logger.Debug("Reused component name is now available: %s", firstComponentCtx.ComponentName) return true, nil } - logging.Logger.Debug("Waiting for component name from first component thread...") + logging.Logger.Trace("Waiting for component name from first component thread") return false, nil }, interval, timeout) diff --git a/tests/load-tests/pkg/journey/journey.go b/tests/load-tests/pkg/journey/journey.go index 34afe7c292..ebf6260680 100644 --- a/tests/load-tests/pkg/journey/journey.go +++ b/tests/load-tests/pkg/journey/journey.go @@ -122,11 +122,12 @@ func PerApplicationSetup(fn func(*types.PerApplicationContext), parentContext *t for applicationIndex := 0; applicationIndex < parentContext.Opts.ApplicationsCount; applicationIndex++ { startupPause := computeStartupPause(applicationIndex, parentContext.Opts.StartupDelay, parentContext.Opts.StartupJitter) - logging.Logger.Info("Initiating per application thread %d-%d with pause %v", parentContext.UserIndex, applicationIndex, startupPause) + logging.Logger.Info("Initiating per application thread %d-%d(%d) with pause %v", parentContext.UserIndex, applicationIndex, parentContext.JourneyRepeatsCounter, startupPause) perApplicationCtx := &types.PerApplicationContext{ PerApplicationWG: perApplicationWG, ApplicationIndex: applicationIndex, + JourneyRepeatIndex: parentContext.JourneyRepeatsCounter, StartupPause: startupPause, ParentContext: parentContext, ApplicationName: "", @@ -153,7 +154,7 @@ func PerComponentSetup(fn func(*types.PerComponentContext), parentContext *types for componentIndex := 0; componentIndex < parentContext.ParentContext.Opts.ComponentsCount; componentIndex++ { startupPause := computeStartupPause(componentIndex, parentContext.ParentContext.Opts.StartupDelay, parentContext.ParentContext.Opts.StartupJitter) - logging.Logger.Info("Initiating per component thread %d-%d-%d with pause %s", parentContext.ParentContext.UserIndex, parentContext.ApplicationIndex, componentIndex, startupPause) + logging.Logger.Info("Initiating per component thread %d-%d(%d)-%d with pause %s", parentContext.ParentContext.UserIndex, parentContext.ApplicationIndex, parentContext.JourneyRepeatIndex, componentIndex, startupPause) perComponentCtx := &types.PerComponentContext{ PerComponentWG: perComponentWG, diff --git a/tests/load-tests/pkg/logging/time_and_log.go b/tests/load-tests/pkg/logging/time_and_log.go index 61a2b22356..7e1af7c41c 100644 --- a/tests/load-tests/pkg/logging/time_and_log.go +++ b/tests/load-tests/pkg/logging/time_and_log.go @@ -185,13 +185,13 @@ func Measure(ctx interface{}, fn interface{}, params ...interface{}) (interface{ if casted, ok := ctx.(*types.PerApplicationContext); ok { perUserId = casted.ParentContext.UserIndex perAppId = casted.ApplicationIndex - repeatsCounter = casted.ParentContext.JourneyRepeatsCounter + repeatsCounter = casted.JourneyRepeatIndex } if casted, ok := ctx.(*types.PerComponentContext); ok { perUserId = casted.ParentContext.ParentContext.UserIndex perAppId = casted.ParentContext.ApplicationIndex perCompId = casted.ComponentIndex - repeatsCounter = casted.ParentContext.ParentContext.JourneyRepeatsCounter + repeatsCounter = casted.ParentContext.JourneyRepeatIndex } // Construct arguments for the function call diff --git a/tests/load-tests/pkg/types/types.go b/tests/load-tests/pkg/types/types.go index dbc936c2f1..f4a8f1232b 100644 --- a/tests/load-tests/pkg/types/types.go +++ b/tests/load-tests/pkg/types/types.go @@ -26,6 +26,7 @@ type PerUserContext struct { type PerApplicationContext struct { PerApplicationWG *sync.WaitGroup ApplicationIndex int + JourneyRepeatIndex int StartupPause time.Duration Framework *framework.Framework ParentContext *PerUserContext From bc0412840cfd33151bcd664b5ce0b145f69ac6fb Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Thu, 2 Oct 2025 08:56:05 +0200 Subject: [PATCH 311/321] fix: Correctly reference reused components in subsequent journeys When a journey is repeated with component reuse enabled, the HandleComponent function should check for existing components from the *first* journey iteration. The previous logic incorrectly looked for the component's context within the current iteration, leading to a timeout because the component name was always empty. This change adjusts the lookup to reference the PerApplicationContext from the initial journey, ensuring the correct component is found and the wait loop does not time out. Generated-by: Gemini --- tests/load-tests/pkg/journey/handle_component.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/load-tests/pkg/journey/handle_component.go b/tests/load-tests/pkg/journey/handle_component.go index 50237b8e09..e24c7ff542 100644 --- a/tests/load-tests/pkg/journey/handle_component.go +++ b/tests/load-tests/pkg/journey/handle_component.go @@ -323,9 +323,10 @@ func utilityRepoTemplatingComponentCleanup(f *framework.Framework, namespace, ap func HandleComponent(ctx *types.PerComponentContext) error { if ctx.ParentContext.ParentContext.Opts.JourneyReuseComponents && ctx.ParentContext.JourneyRepeatIndex > 0 { - // This is a reused component. We need to get the name from the first component. - // We must wait until the first component's context has the name. - firstComponentCtx := ctx.ParentContext.PerComponentContexts[0] + // This is a reused component. We need to get the name from the component from the first journey. + // We must wait until the component's context from the first journey has the name. + firstApplicationCtx := ctx.ParentContext.ParentContext.PerApplicationContexts[ctx.ParentContext.ApplicationIndex] + firstComponentCtx := firstApplicationCtx.PerComponentContexts[ctx.ComponentIndex] interval := time.Second * 2 timeout := time.Minute * 20 From 8e6e8508f66b1b3e3fde796f2555d119815cc4f5 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Thu, 2 Oct 2025 09:11:49 +0200 Subject: [PATCH 312/321] fix: Use forked ComponentRepoUrl --- tests/load-tests/pkg/journey/handle_component.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/load-tests/pkg/journey/handle_component.go b/tests/load-tests/pkg/journey/handle_component.go index e24c7ff542..0d5589066d 100644 --- a/tests/load-tests/pkg/journey/handle_component.go +++ b/tests/load-tests/pkg/journey/handle_component.go @@ -350,7 +350,7 @@ func HandleComponent(ctx *types.PerComponentContext) error { if ctx.ComponentName != "" { logging.Logger.Debug("Skipping setting up component because reusing component %s in namespace %s, triggering build with push to the repo", ctx.ComponentName, ctx.ParentContext.ParentContext.Namespace) - _, err := doHarmlessCommit(ctx.Framework, ctx.ParentContext.ParentContext.Opts.ComponentRepoUrl, ctx.ParentContext.ParentContext.Opts.ComponentRepoRevision) + _, err := doHarmlessCommit(ctx.Framework, ctx.ParentContext.ParentContext.ComponentRepoUrl, ctx.ParentContext.ParentContext.Opts.ComponentRepoRevision) if err != nil { return logging.Logger.Fail(60, "Commiting to repo for reused component %s in namespace %s failed: %v", ctx.ComponentName, ctx.ParentContext.ParentContext.Namespace, err) } From e4cfdd6bd45fcdf83b63b18a5a03c92d01a75a02 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Thu, 2 Oct 2025 09:12:45 +0200 Subject: [PATCH 313/321] fix: Get right application for every application thread --- tests/load-tests/pkg/journey/handle_applications.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/load-tests/pkg/journey/handle_applications.go b/tests/load-tests/pkg/journey/handle_applications.go index f606e4d705..ca685a0bcc 100644 --- a/tests/load-tests/pkg/journey/handle_applications.go +++ b/tests/load-tests/pkg/journey/handle_applications.go @@ -42,7 +42,7 @@ func HandleApplication(ctx *types.PerApplicationContext) error { if ctx.ParentContext.Opts.JourneyReuseApplications && ctx.JourneyRepeatIndex > 0 { // This is a reused application. We need to get the name from the first application. // We must wait until the first application's context has the name. - firstApplicationCtx := ctx.ParentContext.PerApplicationContexts[0] + firstApplicationCtx := ctx.ParentContext.PerApplicationContexts[ctx.ApplicationIndex] interval := time.Second * 2 timeout := time.Minute * 20 From 387d04ce2681179ab5e3812c6eba53250b03a5f0 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Thu, 2 Oct 2025 09:46:56 +0200 Subject: [PATCH 314/321] feat: When reusing components and applications, reuse also metrics for these to encure these passes are considered complete as well --- tests/load-tests/evaluate.py | 43 +++++++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/tests/load-tests/evaluate.py b/tests/load-tests/evaluate.py index c339dc86e3..a643de696a 100755 --- a/tests/load-tests/evaluate.py +++ b/tests/load-tests/evaluate.py @@ -69,6 +69,24 @@ "validateReleaseCondition", ] +# These metrics will be reused when we are reusing applications +METRICS_REUSE_APPLICATIONS = [ + "createApplication", + "validateApplication", + "createIntegrationTestScenario", + "createReleasePlan", + "createReleasePlanAdmission", + "validateReleasePlan", + "validateReleasePlanAdmission", +] + +# These metrics will be reused when we are reusing components +METRICS_REUSE_COMPONENTS = [ + "createComponent", + "getPaCPullNumber", + "validateComponent", +] + class SinglePass: """Structure to record data about one specific pass through loadtest workload, identified by an identier (touple with loadtest's per user, per application and per component thread index and repeats counter.""" @@ -76,6 +94,12 @@ class SinglePass: def __init__(self): self._metrics = {} + def __contains__(self, item): + return item in self._metrics + + def __getitem__(self, key): + return self._metrics[key] + def add(self, metric, duration): """Adds given metric to data about this pass.""" assert metric not in self._metrics @@ -166,8 +190,9 @@ def main(): with open(options_file, "r") as fp: options = json.load(fp) - # Determine what metrics we need to skip based on options + # Determine what metrics we need to skip or reuse based on options to_skip = [] + to_reuse = [] if options["Stage"]: print("NOTE: Ignoring CI cluster related metrics because running against non-CI cluster") to_skip += METRICS_CI @@ -177,9 +202,16 @@ def main(): if options["ReleasePolicy"] == "": print("NOTE: Ignoring Release related metrics because they were disabled at test run") to_skip += METRICS_RELEASE + if options["JourneyReuseApplications"]: + print("NOTE: Will reuse application metrics as we were reusing applications") + to_reuse += METRICS_REUSE_APPLICATIONS + if options["JourneyReuseComponents"]: + print("NOTE: Will reuse component metrics as we were reusing components") + to_reuse += METRICS_REUSE_COMPONENTS # When processing, only consider these metrics expected_metrics = set(METRICS) - set(to_skip) + reuse_metrics = set(to_reuse) - set(to_skip) stats_raw = {} stats_passes = {} @@ -233,6 +265,15 @@ def main(): found = [v for k, v in stats_passes.items() if SinglePass.i_matches(identifier, k)] for i in found: i.add(metric, duration) + #print(f"Metric {metric} added from {identifier}") + + # Now add reused metrics if needed + for pass_id, pass_data in stats_passes.items(): + for reuse_metric in reuse_metrics: + if reuse_metric not in pass_data: + reuse_from_id = pass_id[:3] + (0,) + pass_data.add(reuse_metric, stats_passes[reuse_from_id][reuse_metric]) + #print(f"Metric {reuse_metric} reused from {reuse_from_id} to {pass_id}") #print("Raw stats:") #print(json.dumps(stats_raw, indent=4, default=lambda o: '<' + str(o) + '>')) From 4e3750801a15786db3e7292077fda40f30bb32d7 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Mon, 13 Oct 2025 11:20:32 +0200 Subject: [PATCH 315/321] feat: Log traceback if this exception happens --- tests/load-tests/errors.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 95b3a6a7f9..358c9b24de 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -4,6 +4,7 @@ import collections import csv import json +import logging import os import re import sys @@ -405,6 +406,7 @@ def investigate_failed_plr(dump_dir, plr_type="build"): if reason != "SKIP": reasons.append(reason) except Exception as e: + logging.exception("Investigating PLR failed") return ["SORRY " + str(e)] reasons = list(set(reasons)) # get unique reasons only From 6e0b0f58f8a4b4ac47456495cb95c4aa910b44a2 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 17 Oct 2025 08:38:33 +0200 Subject: [PATCH 316/321] feat: Show monitoring venv content --- tests/load-tests/ci-scripts/stage/collect-results.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/load-tests/ci-scripts/stage/collect-results.sh b/tests/load-tests/ci-scripts/stage/collect-results.sh index c3482df059..cba3522b39 100755 --- a/tests/load-tests/ci-scripts/stage/collect-results.sh +++ b/tests/load-tests/ci-scripts/stage/collect-results.sh @@ -36,6 +36,8 @@ python3 -m pip install -U pip python3 -m pip install -e "git+https://github.com/redhat-performance/opl.git#egg=opl-rhcloud-perf-team-core&subdirectory=core" python3 -m pip install tabulate python3 -m pip install matplotlib +echo "Content of the venv:" +python3 -m pip freeze } &>"${ARTIFACT_DIR}/monitoring-setup.log" echo "[$(date --utc -Ins)] Create summary JSON with timings" From 0aab00f11e2ac647f90ed77976718c236f228423 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 31 Oct 2025 10:47:32 +0100 Subject: [PATCH 317/321] fix(KONFLUX-10645): correct Options struct initialization in releaseLib.go Corrected the field names used to initialize the utils.Options struct in the NewFramework function to match the actual struct definition. This resolves a compile-time error. Generated-by: Gemini --- tests/release/releaseLib.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/release/releaseLib.go b/tests/release/releaseLib.go index 0a3fe1219d..3b4c763845 100644 --- a/tests/release/releaseLib.go +++ b/tests/release/releaseLib.go @@ -30,9 +30,8 @@ func NewFramework(workspace string) *framework.Framework { var fw *framework.Framework var err error stageOptions := utils.Options{ - ToolchainApiUrl: os.Getenv(constants.TOOLCHAIN_API_URL_ENV), - KeycloakUrl: os.Getenv(constants.KEYLOAK_URL_ENV), - OfflineToken: os.Getenv(constants.OFFLINE_TOKEN_ENV), + ApiUrl: os.Getenv(constants.TOOLCHAIN_API_URL_ENV), + Token: os.Getenv(constants.OFFLINE_TOKEN_ENV), } fw, err = framework.NewFrameworkWithTimeout( From a2d80e408f9ed5365f6e345c63e679eef45f1d27 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 31 Oct 2025 11:55:49 +0100 Subject: [PATCH 318/321] feat(KONFLUX-10645): Address issues found by golangci-lint Generated-by: Gemini --- tests/load-tests/loadtest.go | 6 +++--- tests/load-tests/pkg/journey/handle_component.go | 9 ++++++--- tests/load-tests/pkg/journey/handle_releases_setup.go | 4 ++-- tests/load-tests/pkg/journey/handle_repo_templating.go | 6 ++---- tests/load-tests/pkg/options/options.go | 2 +- 5 files changed, 14 insertions(+), 13 deletions(-) diff --git a/tests/load-tests/loadtest.go b/tests/load-tests/loadtest.go index 3b297219ae..218f87df98 100644 --- a/tests/load-tests/loadtest.go +++ b/tests/load-tests/loadtest.go @@ -10,7 +10,7 @@ import types "github.com/konflux-ci/e2e-tests/tests/load-tests/pkg/types" import cobra "github.com/spf13/cobra" import klog "k8s.io/klog/v2" -import klogr "k8s.io/klog/v2/klogr" +import textlogger "k8s.io/klog/v2/textlogger" import ctrl "sigs.k8s.io/controller-runtime" //import "os" @@ -87,7 +87,7 @@ func main() { // This makes controller-runtime logs go through klog. // Hopefuly will help us to avoid these errors: // [controller-runtime] log.SetLogger(...) was never called; logs will not be displayed. - ctrl.SetLogger(klogr.New()) + ctrl.SetLogger(textlogger.NewLogger(textlogger.NewConfig())) // Setup argument parser err = rootCmd.Execute() @@ -116,7 +116,7 @@ func main() { } // Show test options - logging.Logger.Debug("Options: %+v", opts) + logging.Logger.Debug("Options: %+v", &opts) // Tier up measurements logger logging.MeasurementsStart(opts.OutputDir) diff --git a/tests/load-tests/pkg/journey/handle_component.go b/tests/load-tests/pkg/journey/handle_component.go index 0d5589066d..81e3e46cfa 100644 --- a/tests/load-tests/pkg/journey/handle_component.go +++ b/tests/load-tests/pkg/journey/handle_component.go @@ -281,15 +281,18 @@ func utilityRepoTemplatingComponentCleanup(f *framework.Framework, namespace, ap return fmt.Errorf("Failed parsing repo org/name: %v", err) } _, err = f.AsKubeAdmin.CommonController.Gitlab.AcceptMergeRequest(repoId, mergeReqNum) + if err != nil { + return fmt.Errorf("Merging %d failed: %v", mergeReqNum, err) + } } else { repoName, err := getRepoNameFromRepoUrl(repoUrl) if err != nil { return fmt.Errorf("Failed parsing repo name: %v", err) } _, err = f.AsKubeAdmin.CommonController.Github.MergePullRequest(repoName, mergeReqNum) - } - if err != nil { - return fmt.Errorf("Merging %d failed: %v", mergeReqNum, err) + if err != nil { + return fmt.Errorf("Merging %d failed: %v", mergeReqNum, err) + } } logging.Logger.Debug("Repo-templating workflow: Merged PR %d in %s", mergeReqNum, repoUrl) diff --git a/tests/load-tests/pkg/journey/handle_releases_setup.go b/tests/load-tests/pkg/journey/handle_releases_setup.go index c81ee0d177..215f760abf 100644 --- a/tests/load-tests/pkg/journey/handle_releases_setup.go +++ b/tests/load-tests/pkg/journey/handle_releases_setup.go @@ -184,7 +184,7 @@ func HandleReleaseSetup(ctx *types.PerApplicationContext) error { return logging.Logger.Fail(94, "Type assertion failed on release plan admission name: %+v", iface) } - iface, err = logging.Measure( + _, err = logging.Measure( ctx, validateReleasePlan, ctx.Framework, @@ -195,7 +195,7 @@ func HandleReleaseSetup(ctx *types.PerApplicationContext) error { return logging.Logger.Fail(95, "Release Plan failed validation: %v", err) } - iface, err = logging.Measure( + _, err = logging.Measure( ctx, validateReleasePlanAdmission, ctx.Framework, diff --git a/tests/load-tests/pkg/journey/handle_repo_templating.go b/tests/load-tests/pkg/journey/handle_repo_templating.go index 64c7075170..608ac39a7b 100644 --- a/tests/load-tests/pkg/journey/handle_repo_templating.go +++ b/tests/load-tests/pkg/journey/handle_repo_templating.go @@ -24,8 +24,7 @@ func getRepoNameFromRepoUrl(repoUrl string) (string, error) { // repoUrl: https://github.com/abc/nodejs-devfile-sample, match[1]: nodejs-devfile-sample // repoUrl: https://gitlab.example.com/abc/nodejs-devfile-sample, match[1]: nodejs-devfile-sample // repoUrl: https://gitlab.example.com/abc/def/nodejs-devfile-sample, match[1]: nodejs-devfile-sample - var regex *regexp.Regexp - regex = regexp.MustCompile(`/([^/]+?)(.git)?/?$`) + regex := regexp.MustCompile(`/([^/]+?)(.git)?/?$`) match := regex.FindStringSubmatch(repoUrl) if match != nil { return match[1], nil @@ -44,8 +43,7 @@ func getRepoOrgFromRepoUrl(repoUrl string) (string, error) { // repoUrl: https://github.com/abc/nodejs-devfile-sample, match[1]: abc // repoUrl: https://gitlab.example.com/abc/nodejs-devfile-sample, match[1]: abc // repoUrl: https://gitlab.example.com/abc/def/nodejs-devfile-sample, match[1]: abc/def - var regex *regexp.Regexp - regex = regexp.MustCompile(`^[^/]+://[^/]+/(.*)/.+(.git)?/?$`) + regex := regexp.MustCompile(`^[^/]+://[^/]+/(.*)/.+(.git)?/?$`) match := regex.FindStringSubmatch(repoUrl) if match != nil { return match[1], nil diff --git a/tests/load-tests/pkg/options/options.go b/tests/load-tests/pkg/options/options.go index 52c0a94499..6ae0fcae52 100644 --- a/tests/load-tests/pkg/options/options.go +++ b/tests/load-tests/pkg/options/options.go @@ -77,7 +77,7 @@ func (o *Opts) ProcessOptions() error { if o.PipelineRepoTemplatingSourceDir == "" { o.PipelineRepoTemplatingSourceDir = ".template/" } - if strings.HasSuffix(o.PipelineRepoTemplatingSourceDir, "/") != true { + if !strings.HasSuffix(o.PipelineRepoTemplatingSourceDir, "/") { o.PipelineRepoTemplatingSourceDir = o.PipelineRepoTemplatingSourceDir + "/" } } From cd5336d44f1548325af1c709c3566281ca15ce43 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Fri, 31 Oct 2025 17:33:54 +0100 Subject: [PATCH 319/321] fix(KONFLUX-10645): Fix two tests working with git branches Generated-by: Gemini --- tests/build/build.go | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/tests/build/build.go b/tests/build/build.go index 9b2ca64bdd..23953b5c69 100644 --- a/tests/build/build.go +++ b/tests/build/build.go @@ -269,10 +269,13 @@ var _ = framework.BuildSuiteDescribe("Build service E2E tests", Label("build-ser It("PR branch should not exist in the repo", func() { timeout = time.Second * 60 interval = time.Second * 1 - Eventually(func() bool { + Eventually(func() (bool, error) { exists, err := gitClient.BranchExists(helloWorldRepository, customDefaultComponentBranch) - Expect(err).ShouldNot(HaveOccurred()) - return exists + if err != nil { + Expect(err.Error()).To(Or(ContainSubstring("Reference does not exist"), ContainSubstring("404"))) + return false, nil + } + return exists, nil }, timeout, interval).Should(BeFalse(), fmt.Sprintf("timed out when waiting for the branch %s to be deleted from %s repository", customDefaultComponentBranch, helloWorldComponentGitSourceRepoName)) }) @@ -1405,12 +1408,13 @@ var _ = framework.BuildSuiteDescribe("Build service E2E tests", Label("build-ser println("deleting branch " + c.componentBranch) err = gitClient.DeleteBranch(repositories[i], c.componentBranch) if err != nil { - Expect(err.Error()).To(Or(ContainSubstring("Reference does not exist"), ContainSubstring("Branch Not Found"))) + Expect(err.Error()).To(Or(ContainSubstring("Reference does not exist"), ContainSubstring("404 Not Found"), ContainSubstring("Branch Not Found"))) } err = gitClient.DeleteBranch(repositories[i], c.pacBranchName) if err != nil { - Expect(err.Error()).To(Or(ContainSubstring("Reference does not exist"), ContainSubstring("Branch Not Found"))) + Expect(err.Error()).To(Or(ContainSubstring("Reference does not exist"), ContainSubstring("404 Not Found"), ContainSubstring("Branch Not Found"))) } + // Cleanup parent repo webhooks err = gitClient.CleanupWebhooks(componentDependenciesParentRepoName, f.ClusterAppDomain) if err != nil { From 82155defbeb93e61145b5fea0831e839cb0597c3 Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Mon, 3 Nov 2025 12:20:06 +0100 Subject: [PATCH 320/321] cleanup: Remove 'temporary logs' --- pkg/clients/has/components.go | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/pkg/clients/has/components.go b/pkg/clients/has/components.go index 48a157a2d1..7ca6c2c6fe 100644 --- a/pkg/clients/has/components.go +++ b/pkg/clients/has/components.go @@ -367,10 +367,6 @@ func (h *HasController) ScaleComponentReplicas(component *appservice.Component, // DeleteComponent delete an has component from a given name and namespace func (h *HasController) DeleteComponent(name string, namespace string, reportErrorOnNotFound bool) error { - // temporary logs - start := time.Now() - GinkgoWriter.Printf("Start to delete component '%s' at %s\n", name, start.Format(time.RFC3339Nano)) - component := appservice.Component{ ObjectMeta: metav1.ObjectMeta{ Name: name, @@ -386,19 +382,11 @@ func (h *HasController) DeleteComponent(name string, namespace string, reportErr // RHTAPBUGS-978: temporary timeout to 15min err := utils.WaitUntil(h.ComponentDeleted(&component), 15*time.Minute) - // temporary logs - deletionTime := time.Since(start).Minutes() - GinkgoWriter.Printf("Finish to delete component '%s' at %s. It took '%f' minutes\n", name, time.Now().Format(time.RFC3339Nano), deletionTime) - return err } // DeleteAllComponentsInASpecificNamespace removes all component CRs from a specific namespace. Useful when creating a lot of resources and want to remove all of them func (h *HasController) DeleteAllComponentsInASpecificNamespace(namespace string, timeout time.Duration) error { - // temporary logs - start := time.Now() - GinkgoWriter.Printf("Start to delete all components in namespace '%s' at %s\n", namespace, start.Format(time.RFC3339Nano)) - if err := h.KubeRest().DeleteAllOf(context.Background(), &appservice.Component{}, rclient.InNamespace(namespace)); err != nil { return fmt.Errorf("error deleting components from the namespace %s: %+v", namespace, err) } @@ -412,10 +400,6 @@ func (h *HasController) DeleteAllComponentsInASpecificNamespace(namespace string return len(componentList.Items) == 0, nil }, timeout) - // temporary logs - deletionTime := time.Since(start).Minutes() - GinkgoWriter.Printf("Finish to delete all components in namespace '%s' at %s. It took '%f' minutes\n", namespace, time.Now().Format(time.RFC3339Nano), deletionTime) - return err } @@ -571,10 +555,6 @@ func (h *HasController) CheckImageRepositoryExists(namespace, componentName stri // DeleteAllImageRepositoriesInASpecificNamespace removes all image repository CRs from a specific namespace. Useful when cleaning up a namespace and component cleanup did not cleaned it's image repository func (h *HasController) DeleteAllImageRepositoriesInASpecificNamespace(namespace string, timeout time.Duration) error { - // temporary logs - start := time.Now() - GinkgoWriter.Printf("Start to delete all image repositories in namespace '%s' at %s\n", namespace, start.Format(time.RFC3339Nano)) - if err := h.KubeRest().DeleteAllOf(context.Background(), &imagecontroller.ImageRepository{}, rclient.InNamespace(namespace)); err != nil { return fmt.Errorf("error deleting image repositories from the namespace %s: %+v", namespace, err) } @@ -588,10 +568,6 @@ func (h *HasController) DeleteAllImageRepositoriesInASpecificNamespace(namespace return len(imageRepositoryList.Items) == 0, nil }, timeout) - // temporary logs - deletionTime := time.Since(start).Minutes() - GinkgoWriter.Printf("Finish to delete all image repositories in namespace '%s' at %s. It took '%f' minutes\n", namespace, time.Now().Format(time.RFC3339Nano), deletionTime) - return err } From bb876049319988e82fa3e2c92098811864c0a46b Mon Sep 17 00:00:00 2001 From: Jan Hutar Date: Mon, 3 Nov 2025 15:00:49 +0100 Subject: [PATCH 321/321] feat: First run is in .../0/... now --- tests/load-tests/errors.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/load-tests/errors.py b/tests/load-tests/errors.py index 358c9b24de..23029c3594 100755 --- a/tests/load-tests/errors.py +++ b/tests/load-tests/errors.py @@ -332,7 +332,7 @@ def find_trs(plr): def check_failed_taskrun(data_dir, ns, tr_name): - datafile = os.path.join(data_dir, ns, "1", "collected-taskrun-" + tr_name + ".json") + datafile = os.path.join(data_dir, ns, "0", "collected-taskrun-" + tr_name + ".json") try: data = load(datafile) except FileNotFoundError as e: @@ -354,7 +354,7 @@ def check_failed_taskrun(data_dir, ns, tr_name): def find_failed_containers(data_dir, ns, tr_name): - datafile = os.path.join(data_dir, ns, "1", "collected-taskrun-" + tr_name + ".json") + datafile = os.path.join(data_dir, ns, "0", "collected-taskrun-" + tr_name + ".json") data = load(datafile) try: @@ -370,7 +370,7 @@ def find_failed_containers(data_dir, ns, tr_name): def load_container_log(data_dir, ns, pod_name, cont_name): - datafile = os.path.join(data_dir, ns, "1", "pod-" + pod_name + "-" + cont_name + ".log") + datafile = os.path.join(data_dir, ns, "0", "pod-" + pod_name + "-" + cont_name + ".log") print(f"Checking errors in {datafile}") with open(datafile, "r") as fd: return fd.read()