From 68bd62dfaac2c8258ce84bf101845ffab5d64ef4 Mon Sep 17 00:00:00 2001 From: Zita Dombi Date: Fri, 6 Mar 2026 14:47:17 +0100 Subject: [PATCH 1/3] HDDS-14751. Add basic ZDU flow in acceptance tests --- .../dist/src/main/compose/upgrade/README.md | 2 +- .../upgrade/compose/ha/docker-compose.yaml | 7 + .../dist/src/main/compose/upgrade/test.sh | 3 + .../dist/src/main/compose/upgrade/testlib.sh | 18 +++ .../callbacks/common/callback.sh | 18 --- .../callbacks/common/callback.sh | 28 ++++ .../upgrades/rolling-upgrade/driver.sh | 134 ++++++++++++++++++ 7 files changed, 191 insertions(+), 19 deletions(-) create mode 100644 hadoop-ozone/dist/src/main/compose/upgrade/upgrades/rolling-upgrade/callbacks/common/callback.sh create mode 100644 hadoop-ozone/dist/src/main/compose/upgrade/upgrades/rolling-upgrade/driver.sh diff --git a/hadoop-ozone/dist/src/main/compose/upgrade/README.md b/hadoop-ozone/dist/src/main/compose/upgrade/README.md index 05920533bcb3..3565d0e067ea 100644 --- a/hadoop-ozone/dist/src/main/compose/upgrade/README.md +++ b/hadoop-ozone/dist/src/main/compose/upgrade/README.md @@ -91,7 +91,7 @@ Docker compose cluster definitions to be used in upgrade testing are defined in - Tests that should run for all upgrades, regardless of the version being tested, can be added to *compose/upgrade/\/common/callback.sh*. -- Tests that should run only for an upgrade to a specific version can be added to *compose/upgrade/\/\/callback.sh*. +- Tests that should run only for an upgrade to a specific version can be added to *compose/upgrade/upgrades/\/\/callback.sh*. - Add commands in the callback function when they should be run. Each callback file will have access to the following environment variables: - `OZONE_UPGRADE_FROM`: The version of ozone being upgraded from. diff --git a/hadoop-ozone/dist/src/main/compose/upgrade/compose/ha/docker-compose.yaml b/hadoop-ozone/dist/src/main/compose/upgrade/compose/ha/docker-compose.yaml index 8235f2137498..20c93493a14d 100644 --- a/hadoop-ozone/dist/src/main/compose/upgrade/compose/ha/docker-compose.yaml +++ b/hadoop-ozone/dist/src/main/compose/upgrade/compose/ha/docker-compose.yaml @@ -22,6 +22,13 @@ x-common-config: - ../../../common/security.conf image: ${OZONE_TEST_IMAGE} dns_search: . + extra_hosts: + - "om1:10.9.0.11" + - "om2:10.9.0.12" + - "om3:10.9.0.13" + - "scm1.org:10.9.0.14" + - "scm2.org:10.9.0.15" + - "scm3.org:10.9.0.16" x-environment: &environment diff --git a/hadoop-ozone/dist/src/main/compose/upgrade/test.sh b/hadoop-ozone/dist/src/main/compose/upgrade/test.sh index 8fdc98938eaf..5c5e27288591 100755 --- a/hadoop-ozone/dist/src/main/compose/upgrade/test.sh +++ b/hadoop-ozone/dist/src/main/compose/upgrade/test.sh @@ -42,6 +42,9 @@ run_test ha non-rolling-upgrade 2.1.0 "$OZONE_CURRENT_VERSION" # run_test ha non-rolling-upgrade 1.2.1 "$OZONE_CURRENT_VERSION" # run_test om-ha non-rolling-upgrade 1.1.0 "$OZONE_CURRENT_VERSION" +# Rolling upgrade test, commented out for now +run_test ha rolling-upgrade "$OZONE_CURRENT_VERSION" "$OZONE_CURRENT_VERSION" + generate_report "upgrade" "$ALL_RESULT_DIR" exit "$RESULT" diff --git a/hadoop-ozone/dist/src/main/compose/upgrade/testlib.sh b/hadoop-ozone/dist/src/main/compose/upgrade/testlib.sh index 60b6187e08f4..a418a96423a2 100755 --- a/hadoop-ozone/dist/src/main/compose/upgrade/testlib.sh +++ b/hadoop-ozone/dist/src/main/compose/upgrade/testlib.sh @@ -112,3 +112,21 @@ run_test() { copy_results "$execution_dir" "$ALL_RESULT_DIR" } + +### CALLBACK HELPER METHODS ### + +## @description Generates data on the cluster. +## @param The prefix to use for data generated. +## @param All parameters after the first one are passed directly to the robot command, +## see https://robotframework.org/robotframework/latest/RobotFrameworkUserGuide.html#all-command-line-options +generate() { + execute_robot_test "$SCM" -N "${OUTPUT_NAME}-generate-${1}" -v PREFIX:"$1" ${@:2} upgrade/generate.robot +} + +## @description Validates that data exists on the cluster. +## @param The prefix of the data to be validated. +## @param All parameters after the first one are passed directly to the robot command, +## see https://robotframework.org/robotframework/latest/RobotFrameworkUserGuide.html#all-command-line-options +validate() { + execute_robot_test "$SCM" -N "${OUTPUT_NAME}-validate-${1}" -v PREFIX:"$1" ${@:2} upgrade/validate.robot +} diff --git a/hadoop-ozone/dist/src/main/compose/upgrade/upgrades/non-rolling-upgrade/callbacks/common/callback.sh b/hadoop-ozone/dist/src/main/compose/upgrade/upgrades/non-rolling-upgrade/callbacks/common/callback.sh index f205553bbec5..c2159ca0cd24 100755 --- a/hadoop-ozone/dist/src/main/compose/upgrade/upgrades/non-rolling-upgrade/callbacks/common/callback.sh +++ b/hadoop-ozone/dist/src/main/compose/upgrade/upgrades/non-rolling-upgrade/callbacks/common/callback.sh @@ -17,24 +17,6 @@ source "$TEST_DIR"/testlib.sh -### HELPER METHODS ### - -## @description Generates data on the cluster. -## @param The prefix to use for data generated. -## @param All parameters after the first one are passed directly to the robot command, -## see https://robotframework.org/robotframework/latest/RobotFrameworkUserGuide.html#all-command-line-options -generate() { - execute_robot_test "$SCM" -N "${OUTPUT_NAME}-generate-${1}" -v PREFIX:"$1" ${@:2} upgrade/generate.robot -} - -## @description Validates that data exists on the cluster. -## @param The prefix of the data to be validated. -## @param All parameters after the first one are passed directly to the robot command, -## see https://robotframework.org/robotframework/latest/RobotFrameworkUserGuide.html#all-command-line-options -validate() { - execute_robot_test "$SCM" -N "${OUTPUT_NAME}-validate-${1}" -v PREFIX:"$1" ${@:2} upgrade/validate.robot -} - ### CALLBACKS ### with_old_version() { diff --git a/hadoop-ozone/dist/src/main/compose/upgrade/upgrades/rolling-upgrade/callbacks/common/callback.sh b/hadoop-ozone/dist/src/main/compose/upgrade/upgrades/rolling-upgrade/callbacks/common/callback.sh new file mode 100644 index 000000000000..5160b4ef384b --- /dev/null +++ b/hadoop-ozone/dist/src/main/compose/upgrade/upgrades/rolling-upgrade/callbacks/common/callback.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +source "$TEST_DIR"/testlib.sh + +### CALLBACKS ### + +before_service_restart() { + generate "generate-${SERVICE}" +} + +after_service_restart() { + validate "generate-${SERVICE}" +} diff --git a/hadoop-ozone/dist/src/main/compose/upgrade/upgrades/rolling-upgrade/driver.sh b/hadoop-ozone/dist/src/main/compose/upgrade/upgrades/rolling-upgrade/driver.sh new file mode 100644 index 000000000000..37d896b3f1a7 --- /dev/null +++ b/hadoop-ozone/dist/src/main/compose/upgrade/upgrades/rolling-upgrade/driver.sh @@ -0,0 +1,134 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This script tests upgrade from a previous release to the current +# binaries. Docker image with Ozone binaries is required for the +# initial version, while the snapshot version uses Ozone runner image. + +set -e -o pipefail + +# Fail if required vars are not set. +set -u +: "${OZONE_UPGRADE_FROM}" +: "${OZONE_UPGRADE_TO}" +: "${TEST_DIR}" +: "${SCM}" +: "${OZONE_CURRENT_VERSION}" +set +u + +echo "--- RUNNING ROLLING UPGRADE TEST FROM $OZONE_UPGRADE_FROM TO $OZONE_UPGRADE_TO ---" + +source "$TEST_DIR"/testlib.sh + +# Restart one service with the target image. +rolling_restart_service() { + SERVICE="$1" + + echo "--- RESTARTING ${SERVICE} WITH IMAGE ${OZONE_UPGRADE_TO} ---" + + # Stop service + stop_containers "${SERVICE}" + + # Check if this SCM container is running, as during a rolling upgrade it does stop-start one-by-one and + # we want to run write/read tests while one service is unavailable. Choose SCM (the container where the generate and + # validate robot tests are running) considering availability. + if [[ "$(docker inspect -f '{{.State.Running}}' "ha-${SCM}-1" 2>/dev/null)" != "true" ]]; then + local fallback_scm + fallback_scm="$(docker-compose --project-directory="$TEST_DIR/compose/ha" config --services | grep scm | grep -v "^${SCM}$" | head -n1)" + if [[ -n "$fallback_scm" ]]; then + export SCM="$fallback_scm" + fi + fi + + # The data generation/validation is doing S3 API tests, so skip it in case the S3 gateway is updated + # TODO find a better solution + if [[ ${SERVICE} != "s3g" ]]; then + callback before_service_restart + fi + + # Restart service with new image. + prepare_for_image "${OZONE_UPGRADE_TO}" + create_containers "${SERVICE}" + + # The data generation/validation is doing S3 API tests, so skip it in case the S3 gateway is updated + if [[ ${SERVICE} != "s3g" ]]; then + callback after_service_restart + fi + + # Service-specific readiness checks. + case "${SERVICE}" in + om*) + wait_for_port "${SERVICE}" 9862 120 + ;; + scm*) + # SCM hostnames in this compose are scmX.org + wait_for_port "${SERVICE}.org" 9876 120 + ;; + dn*) + wait_for_port "${SERVICE}" 9882 120 + ;; + esac +} + +echo "--- SETTING UP OLD VERSION $OZONE_UPGRADE_FROM ---" +OUTPUT_NAME="${OZONE_UPGRADE_FROM}-${OZONE_UPGRADE_TO}-1-original" +export OM_HA_ARGS='--' +prepare_for_image "$OZONE_UPGRADE_FROM" + +echo "--- RUNNING WITH OLD VERSION $OZONE_UPGRADE_FROM ---" +start_docker_env + +# TODO Add old data generation + +echo "--- ROLLING UPGRADE TO $OZONE_UPGRADE_TO PRE-FINALIZED ---" + +# SCMs first +for s in scm2 scm1 scm3; do + OUTPUT_NAME="${OZONE_UPGRADE_FROM}-${OZONE_UPGRADE_TO}-2-${s}" + rolling_restart_service "$s" "$OZONE_UPGRADE_TO" +done + +# Recon +OUTPUT_NAME="${OZONE_UPGRADE_FROM}-${OZONE_UPGRADE_TO}-2-recon" +rolling_restart_service "recon" "$OZONE_UPGRADE_TO" + +# DNs +for s in dn1 dn2 dn3 dn4 dn5; do + OUTPUT_NAME="${OZONE_UPGRADE_FROM}-${OZONE_UPGRADE_TO}-2-${s}" + rolling_restart_service "$s" "$OZONE_UPGRADE_TO" +done + +# OMs with upgrade arg +export OM_HA_ARGS='--upgrade' +for s in om1 om2 om3; do + OUTPUT_NAME="${OZONE_UPGRADE_FROM}-${OZONE_UPGRADE_TO}-2-${s}" + rolling_restart_service "$s" "$OZONE_UPGRADE_TO" +done + +# S3 Gateway +OUTPUT_NAME="${OZONE_UPGRADE_FROM}-${OZONE_UPGRADE_TO}-2-s3g" +rolling_restart_service "s3g" "$OZONE_UPGRADE_TO" + +# TODO Add downgrade scenario + +echo "--- RUNNING WITH NEW VERSION $OZONE_UPGRADE_TO FINALIZED ---" +OUTPUT_NAME="${OZONE_UPGRADE_FROM}-${OZONE_UPGRADE_TO}-3-finalized" + +# TODO Add validation for pre-finalized state + +# Sends commands to finalize OM and SCM. +execute_robot_test "$SCM" -N "${OUTPUT_NAME}-finalize" upgrade/finalize.robot From a3bec3089c5a5d27cbe71700ed38442c96cc92d7 Mon Sep 17 00:00:00 2001 From: Zita Dombi Date: Sat, 7 Mar 2026 00:04:17 +0100 Subject: [PATCH 2/3] Comment out rolling upgrade run_test --- hadoop-ozone/dist/src/main/compose/upgrade/test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-ozone/dist/src/main/compose/upgrade/test.sh b/hadoop-ozone/dist/src/main/compose/upgrade/test.sh index 5c5e27288591..014d1ba81e21 100755 --- a/hadoop-ozone/dist/src/main/compose/upgrade/test.sh +++ b/hadoop-ozone/dist/src/main/compose/upgrade/test.sh @@ -43,7 +43,7 @@ run_test ha non-rolling-upgrade 2.1.0 "$OZONE_CURRENT_VERSION" # run_test om-ha non-rolling-upgrade 1.1.0 "$OZONE_CURRENT_VERSION" # Rolling upgrade test, commented out for now -run_test ha rolling-upgrade "$OZONE_CURRENT_VERSION" "$OZONE_CURRENT_VERSION" +# run_test ha rolling-upgrade "$OZONE_CURRENT_VERSION" "$OZONE_CURRENT_VERSION" generate_report "upgrade" "$ALL_RESULT_DIR" From f9b08229977a2bde392f59c96be38ece0144962f Mon Sep 17 00:00:00 2001 From: Zita Dombi Date: Wed, 25 Mar 2026 14:12:52 +0100 Subject: [PATCH 3/3] Address review comments --- .../src/main/compose/upgrade/upgrades/rolling-upgrade/driver.sh | 2 -- 1 file changed, 2 deletions(-) mode change 100644 => 100755 hadoop-ozone/dist/src/main/compose/upgrade/upgrades/rolling-upgrade/driver.sh diff --git a/hadoop-ozone/dist/src/main/compose/upgrade/upgrades/rolling-upgrade/driver.sh b/hadoop-ozone/dist/src/main/compose/upgrade/upgrades/rolling-upgrade/driver.sh old mode 100644 new mode 100755 index 37d896b3f1a7..73663dc53614 --- a/hadoop-ozone/dist/src/main/compose/upgrade/upgrades/rolling-upgrade/driver.sh +++ b/hadoop-ozone/dist/src/main/compose/upgrade/upgrades/rolling-upgrade/driver.sh @@ -112,8 +112,6 @@ for s in dn1 dn2 dn3 dn4 dn5; do rolling_restart_service "$s" "$OZONE_UPGRADE_TO" done -# OMs with upgrade arg -export OM_HA_ARGS='--upgrade' for s in om1 om2 om3; do OUTPUT_NAME="${OZONE_UPGRADE_FROM}-${OZONE_UPGRADE_TO}-2-${s}" rolling_restart_service "$s" "$OZONE_UPGRADE_TO"